{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 36420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 0.6326802372932434, "eval_runtime": 177.5382, "eval_samples_per_second": 135.177, "eval_steps_per_second": 16.898, "step": 0 }, { "epoch": 2.7457440966501922e-05, "grad_norm": 2.2561402320861816, "learning_rate": 0.0, "loss": 0.8802, "step": 1 }, { "epoch": 5.4914881933003843e-05, "grad_norm": 2.342005968093872, "learning_rate": 2.0000000000000002e-07, "loss": 0.9029, "step": 2 }, { "epoch": 8.237232289950576e-05, "grad_norm": 2.3655002117156982, "learning_rate": 4.0000000000000003e-07, "loss": 0.8864, "step": 3 }, { "epoch": 0.00010982976386600769, "grad_norm": 4.076173305511475, "learning_rate": 6.000000000000001e-07, "loss": 0.9276, "step": 4 }, { "epoch": 0.0001372872048325096, "grad_norm": 2.35604190826416, "learning_rate": 8.000000000000001e-07, "loss": 0.898, "step": 5 }, { "epoch": 0.00016474464579901152, "grad_norm": 2.186289072036743, "learning_rate": 1.0000000000000002e-06, "loss": 0.8972, "step": 6 }, { "epoch": 0.00019220208676551346, "grad_norm": 1.9669835567474365, "learning_rate": 1.2000000000000002e-06, "loss": 0.8868, "step": 7 }, { "epoch": 0.00021965952773201537, "grad_norm": 1.6937439441680908, "learning_rate": 1.4000000000000001e-06, "loss": 0.914, "step": 8 }, { "epoch": 0.0002471169686985173, "grad_norm": 2.1082160472869873, "learning_rate": 1.6000000000000001e-06, "loss": 0.8594, "step": 9 }, { "epoch": 0.0002745744096650192, "grad_norm": 1.773193359375, "learning_rate": 1.8000000000000001e-06, "loss": 0.8371, "step": 10 }, { "epoch": 0.00030203185063152114, "grad_norm": 1.8144782781600952, "learning_rate": 2.0000000000000003e-06, "loss": 0.8764, "step": 11 }, { "epoch": 0.00032948929159802305, "grad_norm": 1.2651466131210327, "learning_rate": 2.2e-06, "loss": 0.8279, "step": 12 }, { "epoch": 0.000356946732564525, "grad_norm": 1.157173752784729, "learning_rate": 2.4000000000000003e-06, "loss": 0.7459, "step": 13 }, { "epoch": 0.0003844041735310269, "grad_norm": 1.2744603157043457, "learning_rate": 2.6e-06, "loss": 0.8008, "step": 14 }, { "epoch": 0.00041186161449752884, "grad_norm": 1.1443653106689453, "learning_rate": 2.8000000000000003e-06, "loss": 0.7943, "step": 15 }, { "epoch": 0.00043931905546403075, "grad_norm": 1.0647474527359009, "learning_rate": 3e-06, "loss": 0.8079, "step": 16 }, { "epoch": 0.00046677649643053266, "grad_norm": 0.7448004484176636, "learning_rate": 3.2000000000000003e-06, "loss": 0.7663, "step": 17 }, { "epoch": 0.0004942339373970346, "grad_norm": 0.8476930856704712, "learning_rate": 3.4000000000000005e-06, "loss": 0.7532, "step": 18 }, { "epoch": 0.0005216913783635365, "grad_norm": 0.6624916195869446, "learning_rate": 3.6000000000000003e-06, "loss": 0.7995, "step": 19 }, { "epoch": 0.0005491488193300384, "grad_norm": 0.7960836887359619, "learning_rate": 3.8000000000000005e-06, "loss": 0.7677, "step": 20 }, { "epoch": 0.0005766062602965404, "grad_norm": 0.7930276989936829, "learning_rate": 4.000000000000001e-06, "loss": 0.7259, "step": 21 }, { "epoch": 0.0006040637012630423, "grad_norm": 0.90882807970047, "learning_rate": 4.2000000000000004e-06, "loss": 0.7541, "step": 22 }, { "epoch": 0.0006315211422295442, "grad_norm": 0.8168877363204956, "learning_rate": 4.4e-06, "loss": 0.7322, "step": 23 }, { "epoch": 0.0006589785831960461, "grad_norm": 0.703757643699646, "learning_rate": 4.600000000000001e-06, "loss": 0.6697, "step": 24 }, { "epoch": 0.0006864360241625481, "grad_norm": 0.6717430949211121, "learning_rate": 4.800000000000001e-06, "loss": 0.6747, "step": 25 }, { "epoch": 0.00071389346512905, "grad_norm": 0.642096221446991, "learning_rate": 5e-06, "loss": 0.7663, "step": 26 }, { "epoch": 0.0007413509060955519, "grad_norm": 0.6495366096496582, "learning_rate": 5.2e-06, "loss": 0.6048, "step": 27 }, { "epoch": 0.0007688083470620538, "grad_norm": 0.6014453172683716, "learning_rate": 5.400000000000001e-06, "loss": 0.7347, "step": 28 }, { "epoch": 0.0007962657880285557, "grad_norm": 0.5283642411231995, "learning_rate": 5.600000000000001e-06, "loss": 0.6906, "step": 29 }, { "epoch": 0.0008237232289950577, "grad_norm": 0.4862114489078522, "learning_rate": 5.8e-06, "loss": 0.7212, "step": 30 }, { "epoch": 0.0008511806699615595, "grad_norm": 0.5186327695846558, "learning_rate": 6e-06, "loss": 0.6614, "step": 31 }, { "epoch": 0.0008786381109280615, "grad_norm": 0.5351436138153076, "learning_rate": 6.200000000000001e-06, "loss": 0.7566, "step": 32 }, { "epoch": 0.0009060955518945635, "grad_norm": 0.5533176064491272, "learning_rate": 6.4000000000000006e-06, "loss": 0.6553, "step": 33 }, { "epoch": 0.0009335529928610653, "grad_norm": 0.5029674768447876, "learning_rate": 6.600000000000001e-06, "loss": 0.7282, "step": 34 }, { "epoch": 0.0009610104338275673, "grad_norm": 0.44217249751091003, "learning_rate": 6.800000000000001e-06, "loss": 0.619, "step": 35 }, { "epoch": 0.0009884678747940692, "grad_norm": 0.43247437477111816, "learning_rate": 7e-06, "loss": 0.6208, "step": 36 }, { "epoch": 0.001015925315760571, "grad_norm": 0.8083419799804688, "learning_rate": 7.2000000000000005e-06, "loss": 0.6588, "step": 37 }, { "epoch": 0.001043382756727073, "grad_norm": 0.49433842301368713, "learning_rate": 7.4e-06, "loss": 0.6537, "step": 38 }, { "epoch": 0.001070840197693575, "grad_norm": 0.5236935019493103, "learning_rate": 7.600000000000001e-06, "loss": 0.6074, "step": 39 }, { "epoch": 0.001098297638660077, "grad_norm": 0.5073657035827637, "learning_rate": 7.800000000000002e-06, "loss": 0.7987, "step": 40 }, { "epoch": 0.0011257550796265788, "grad_norm": 0.4493747651576996, "learning_rate": 8.000000000000001e-06, "loss": 0.6389, "step": 41 }, { "epoch": 0.0011532125205930808, "grad_norm": 0.5368253588676453, "learning_rate": 8.2e-06, "loss": 0.6919, "step": 42 }, { "epoch": 0.0011806699615595827, "grad_norm": 0.5811830759048462, "learning_rate": 8.400000000000001e-06, "loss": 0.7515, "step": 43 }, { "epoch": 0.0012081274025260845, "grad_norm": 0.43591269850730896, "learning_rate": 8.6e-06, "loss": 0.6522, "step": 44 }, { "epoch": 0.0012355848434925864, "grad_norm": 0.4478546380996704, "learning_rate": 8.8e-06, "loss": 0.7013, "step": 45 }, { "epoch": 0.0012630422844590885, "grad_norm": 0.44441911578178406, "learning_rate": 9e-06, "loss": 0.6555, "step": 46 }, { "epoch": 0.0012904997254255903, "grad_norm": 0.4050477147102356, "learning_rate": 9.200000000000002e-06, "loss": 0.5679, "step": 47 }, { "epoch": 0.0013179571663920922, "grad_norm": 0.4047469198703766, "learning_rate": 9.4e-06, "loss": 0.7032, "step": 48 }, { "epoch": 0.0013454146073585943, "grad_norm": 0.44018757343292236, "learning_rate": 9.600000000000001e-06, "loss": 0.657, "step": 49 }, { "epoch": 0.0013728720483250961, "grad_norm": 0.4194895923137665, "learning_rate": 9.800000000000001e-06, "loss": 0.61, "step": 50 }, { "epoch": 0.001400329489291598, "grad_norm": 0.3997894525527954, "learning_rate": 1e-05, "loss": 0.5872, "step": 51 }, { "epoch": 0.0014277869302581, "grad_norm": 0.4468090534210205, "learning_rate": 1.02e-05, "loss": 0.498, "step": 52 }, { "epoch": 0.001455244371224602, "grad_norm": 0.48776981234550476, "learning_rate": 1.04e-05, "loss": 0.588, "step": 53 }, { "epoch": 0.0014827018121911038, "grad_norm": 0.42795422673225403, "learning_rate": 1.0600000000000002e-05, "loss": 0.6725, "step": 54 }, { "epoch": 0.0015101592531576056, "grad_norm": 0.4143153727054596, "learning_rate": 1.0800000000000002e-05, "loss": 0.6205, "step": 55 }, { "epoch": 0.0015376166941241077, "grad_norm": 0.4018900692462921, "learning_rate": 1.1000000000000001e-05, "loss": 0.5532, "step": 56 }, { "epoch": 0.0015650741350906096, "grad_norm": 0.4289415180683136, "learning_rate": 1.1200000000000001e-05, "loss": 0.5733, "step": 57 }, { "epoch": 0.0015925315760571114, "grad_norm": 0.4622398018836975, "learning_rate": 1.14e-05, "loss": 0.6955, "step": 58 }, { "epoch": 0.0016199890170236135, "grad_norm": 0.492209255695343, "learning_rate": 1.16e-05, "loss": 0.5962, "step": 59 }, { "epoch": 0.0016474464579901153, "grad_norm": 0.38489100337028503, "learning_rate": 1.18e-05, "loss": 0.5443, "step": 60 }, { "epoch": 0.0016749038989566172, "grad_norm": 0.42261120676994324, "learning_rate": 1.2e-05, "loss": 0.6293, "step": 61 }, { "epoch": 0.001702361339923119, "grad_norm": 0.545713484287262, "learning_rate": 1.22e-05, "loss": 0.642, "step": 62 }, { "epoch": 0.0017298187808896211, "grad_norm": 0.46870431303977966, "learning_rate": 1.2400000000000002e-05, "loss": 0.6618, "step": 63 }, { "epoch": 0.001757276221856123, "grad_norm": 0.4857075810432434, "learning_rate": 1.2600000000000001e-05, "loss": 0.5794, "step": 64 }, { "epoch": 0.0017847336628226248, "grad_norm": 0.4711804687976837, "learning_rate": 1.2800000000000001e-05, "loss": 0.6093, "step": 65 }, { "epoch": 0.001812191103789127, "grad_norm": 0.45472607016563416, "learning_rate": 1.3000000000000001e-05, "loss": 0.614, "step": 66 }, { "epoch": 0.0018396485447556288, "grad_norm": 0.5441303849220276, "learning_rate": 1.3200000000000002e-05, "loss": 0.6774, "step": 67 }, { "epoch": 0.0018671059857221306, "grad_norm": 0.4511236548423767, "learning_rate": 1.3400000000000002e-05, "loss": 0.6283, "step": 68 }, { "epoch": 0.0018945634266886327, "grad_norm": 0.441902756690979, "learning_rate": 1.3600000000000002e-05, "loss": 0.6967, "step": 69 }, { "epoch": 0.0019220208676551346, "grad_norm": 0.44374457001686096, "learning_rate": 1.38e-05, "loss": 0.5963, "step": 70 }, { "epoch": 0.0019494783086216364, "grad_norm": 0.4764389097690582, "learning_rate": 1.4e-05, "loss": 0.7037, "step": 71 }, { "epoch": 0.0019769357495881385, "grad_norm": 0.44006603956222534, "learning_rate": 1.4200000000000001e-05, "loss": 0.6305, "step": 72 }, { "epoch": 0.0020043931905546404, "grad_norm": 0.4069029986858368, "learning_rate": 1.4400000000000001e-05, "loss": 0.6288, "step": 73 }, { "epoch": 0.002031850631521142, "grad_norm": 0.3871413469314575, "learning_rate": 1.46e-05, "loss": 0.5926, "step": 74 }, { "epoch": 0.002059308072487644, "grad_norm": 0.4089374542236328, "learning_rate": 1.48e-05, "loss": 0.6765, "step": 75 }, { "epoch": 0.002086765513454146, "grad_norm": 0.41028884053230286, "learning_rate": 1.5000000000000002e-05, "loss": 0.6027, "step": 76 }, { "epoch": 0.002114222954420648, "grad_norm": 0.4265989065170288, "learning_rate": 1.5200000000000002e-05, "loss": 0.5517, "step": 77 }, { "epoch": 0.00214168039538715, "grad_norm": 0.48942798376083374, "learning_rate": 1.54e-05, "loss": 0.6191, "step": 78 }, { "epoch": 0.002169137836353652, "grad_norm": 0.5473577976226807, "learning_rate": 1.5600000000000003e-05, "loss": 0.7313, "step": 79 }, { "epoch": 0.002196595277320154, "grad_norm": 0.5348865389823914, "learning_rate": 1.58e-05, "loss": 0.6606, "step": 80 }, { "epoch": 0.0022240527182866556, "grad_norm": 0.4247622489929199, "learning_rate": 1.6000000000000003e-05, "loss": 0.6122, "step": 81 }, { "epoch": 0.0022515101592531575, "grad_norm": 0.41674304008483887, "learning_rate": 1.62e-05, "loss": 0.5894, "step": 82 }, { "epoch": 0.0022789676002196594, "grad_norm": 0.49677029252052307, "learning_rate": 1.64e-05, "loss": 0.6122, "step": 83 }, { "epoch": 0.0023064250411861617, "grad_norm": 0.4433738887310028, "learning_rate": 1.66e-05, "loss": 0.6413, "step": 84 }, { "epoch": 0.0023338824821526635, "grad_norm": 0.48676344752311707, "learning_rate": 1.6800000000000002e-05, "loss": 0.6909, "step": 85 }, { "epoch": 0.0023613399231191654, "grad_norm": 0.4397391974925995, "learning_rate": 1.7e-05, "loss": 0.563, "step": 86 }, { "epoch": 0.0023887973640856672, "grad_norm": 0.46282798051834106, "learning_rate": 1.72e-05, "loss": 0.7357, "step": 87 }, { "epoch": 0.002416254805052169, "grad_norm": 0.45930078625679016, "learning_rate": 1.7400000000000003e-05, "loss": 0.62, "step": 88 }, { "epoch": 0.002443712246018671, "grad_norm": 0.48151397705078125, "learning_rate": 1.76e-05, "loss": 0.6979, "step": 89 }, { "epoch": 0.002471169686985173, "grad_norm": 0.4736911654472351, "learning_rate": 1.7800000000000002e-05, "loss": 0.6824, "step": 90 }, { "epoch": 0.002498627127951675, "grad_norm": 0.45994314551353455, "learning_rate": 1.8e-05, "loss": 0.6294, "step": 91 }, { "epoch": 0.002526084568918177, "grad_norm": 0.5020555257797241, "learning_rate": 1.8200000000000002e-05, "loss": 0.5782, "step": 92 }, { "epoch": 0.002553542009884679, "grad_norm": 0.5591815114021301, "learning_rate": 1.8400000000000003e-05, "loss": 0.6595, "step": 93 }, { "epoch": 0.0025809994508511807, "grad_norm": 0.44648024439811707, "learning_rate": 1.86e-05, "loss": 0.5676, "step": 94 }, { "epoch": 0.0026084568918176825, "grad_norm": 0.445890873670578, "learning_rate": 1.88e-05, "loss": 0.7295, "step": 95 }, { "epoch": 0.0026359143327841844, "grad_norm": 0.4448269009590149, "learning_rate": 1.9e-05, "loss": 0.5728, "step": 96 }, { "epoch": 0.0026633717737506862, "grad_norm": 0.45189368724823, "learning_rate": 1.9200000000000003e-05, "loss": 0.6471, "step": 97 }, { "epoch": 0.0026908292147171885, "grad_norm": 0.4151735007762909, "learning_rate": 1.94e-05, "loss": 0.5957, "step": 98 }, { "epoch": 0.0027182866556836904, "grad_norm": 0.41519683599472046, "learning_rate": 1.9600000000000002e-05, "loss": 0.5478, "step": 99 }, { "epoch": 0.0027457440966501922, "grad_norm": 0.43746230006217957, "learning_rate": 1.98e-05, "loss": 0.5593, "step": 100 }, { "epoch": 0.002773201537616694, "grad_norm": 0.4645647406578064, "learning_rate": 2e-05, "loss": 0.6259, "step": 101 }, { "epoch": 0.002800658978583196, "grad_norm": 1.1229034662246704, "learning_rate": 1.9999999990673403e-05, "loss": 0.5534, "step": 102 }, { "epoch": 0.002828116419549698, "grad_norm": 0.4796474575996399, "learning_rate": 1.9999999962693618e-05, "loss": 0.6227, "step": 103 }, { "epoch": 0.0028555738605162, "grad_norm": 0.41566044092178345, "learning_rate": 1.999999991606064e-05, "loss": 0.5946, "step": 104 }, { "epoch": 0.002883031301482702, "grad_norm": 0.4952285587787628, "learning_rate": 1.9999999850774467e-05, "loss": 0.6729, "step": 105 }, { "epoch": 0.002910488742449204, "grad_norm": 0.4553510844707489, "learning_rate": 1.9999999766835108e-05, "loss": 0.62, "step": 106 }, { "epoch": 0.0029379461834157057, "grad_norm": 0.4013444781303406, "learning_rate": 1.999999966424255e-05, "loss": 0.5991, "step": 107 }, { "epoch": 0.0029654036243822075, "grad_norm": 0.4763377010822296, "learning_rate": 1.9999999542996812e-05, "loss": 0.6157, "step": 108 }, { "epoch": 0.0029928610653487094, "grad_norm": 0.4436170160770416, "learning_rate": 1.9999999403097873e-05, "loss": 0.5386, "step": 109 }, { "epoch": 0.0030203185063152112, "grad_norm": 0.4334041178226471, "learning_rate": 1.9999999244545753e-05, "loss": 0.6781, "step": 110 }, { "epoch": 0.0030477759472817135, "grad_norm": 0.5235326290130615, "learning_rate": 1.9999999067340433e-05, "loss": 0.6014, "step": 111 }, { "epoch": 0.0030752333882482154, "grad_norm": 0.5019425749778748, "learning_rate": 1.999999887148193e-05, "loss": 0.5865, "step": 112 }, { "epoch": 0.0031026908292147173, "grad_norm": 0.4108220636844635, "learning_rate": 1.9999998656970236e-05, "loss": 0.5724, "step": 113 }, { "epoch": 0.003130148270181219, "grad_norm": 0.46410611271858215, "learning_rate": 1.9999998423805352e-05, "loss": 0.6435, "step": 114 }, { "epoch": 0.003157605711147721, "grad_norm": 0.418171226978302, "learning_rate": 1.9999998171987278e-05, "loss": 0.6485, "step": 115 }, { "epoch": 0.003185063152114223, "grad_norm": 0.4836839437484741, "learning_rate": 1.9999997901516014e-05, "loss": 0.6753, "step": 116 }, { "epoch": 0.0032125205930807247, "grad_norm": 0.41291505098342896, "learning_rate": 1.9999997612391567e-05, "loss": 0.5849, "step": 117 }, { "epoch": 0.003239978034047227, "grad_norm": 0.4702100157737732, "learning_rate": 1.9999997304613932e-05, "loss": 0.7081, "step": 118 }, { "epoch": 0.003267435475013729, "grad_norm": 0.4679124057292938, "learning_rate": 1.9999996978183108e-05, "loss": 0.6667, "step": 119 }, { "epoch": 0.0032948929159802307, "grad_norm": 0.42561307549476624, "learning_rate": 1.99999966330991e-05, "loss": 0.608, "step": 120 }, { "epoch": 0.0033223503569467325, "grad_norm": 0.4386301636695862, "learning_rate": 1.999999626936191e-05, "loss": 0.5862, "step": 121 }, { "epoch": 0.0033498077979132344, "grad_norm": 0.4232906997203827, "learning_rate": 1.9999995886971528e-05, "loss": 0.573, "step": 122 }, { "epoch": 0.0033772652388797363, "grad_norm": 0.45866522192955017, "learning_rate": 1.9999995485927966e-05, "loss": 0.689, "step": 123 }, { "epoch": 0.003404722679846238, "grad_norm": 0.46257665753364563, "learning_rate": 1.9999995066231222e-05, "loss": 0.6425, "step": 124 }, { "epoch": 0.0034321801208127404, "grad_norm": 0.4337705373764038, "learning_rate": 1.999999462788129e-05, "loss": 0.5342, "step": 125 }, { "epoch": 0.0034596375617792423, "grad_norm": 0.409372478723526, "learning_rate": 1.9999994170878182e-05, "loss": 0.5385, "step": 126 }, { "epoch": 0.003487095002745744, "grad_norm": 0.43916985392570496, "learning_rate": 1.9999993695221894e-05, "loss": 0.5918, "step": 127 }, { "epoch": 0.003514552443712246, "grad_norm": 0.4186578691005707, "learning_rate": 1.9999993200912423e-05, "loss": 0.6443, "step": 128 }, { "epoch": 0.003542009884678748, "grad_norm": 0.512024998664856, "learning_rate": 1.9999992687949775e-05, "loss": 0.498, "step": 129 }, { "epoch": 0.0035694673256452497, "grad_norm": 0.7444612979888916, "learning_rate": 1.9999992156333947e-05, "loss": 0.6449, "step": 130 }, { "epoch": 0.003596924766611752, "grad_norm": 0.48492512106895447, "learning_rate": 1.9999991606064942e-05, "loss": 0.6568, "step": 131 }, { "epoch": 0.003624382207578254, "grad_norm": 0.44983240962028503, "learning_rate": 1.999999103714276e-05, "loss": 0.5998, "step": 132 }, { "epoch": 0.0036518396485447557, "grad_norm": 0.4248271584510803, "learning_rate": 1.9999990449567407e-05, "loss": 0.6453, "step": 133 }, { "epoch": 0.0036792970895112576, "grad_norm": 0.4380721151828766, "learning_rate": 1.9999989843338875e-05, "loss": 0.6809, "step": 134 }, { "epoch": 0.0037067545304777594, "grad_norm": 0.4258834719657898, "learning_rate": 1.9999989218457175e-05, "loss": 0.6324, "step": 135 }, { "epoch": 0.0037342119714442613, "grad_norm": 0.4458576738834381, "learning_rate": 1.99999885749223e-05, "loss": 0.6717, "step": 136 }, { "epoch": 0.003761669412410763, "grad_norm": 0.4505946636199951, "learning_rate": 1.9999987912734257e-05, "loss": 0.6018, "step": 137 }, { "epoch": 0.0037891268533772654, "grad_norm": 0.4281530976295471, "learning_rate": 1.9999987231893046e-05, "loss": 0.6672, "step": 138 }, { "epoch": 0.0038165842943437673, "grad_norm": 0.5011027455329895, "learning_rate": 1.9999986532398666e-05, "loss": 0.6524, "step": 139 }, { "epoch": 0.003844041735310269, "grad_norm": 0.4920332133769989, "learning_rate": 1.999998581425112e-05, "loss": 0.5761, "step": 140 }, { "epoch": 0.003871499176276771, "grad_norm": 0.49742692708969116, "learning_rate": 1.9999985077450406e-05, "loss": 0.5844, "step": 141 }, { "epoch": 0.003898956617243273, "grad_norm": 0.4630191922187805, "learning_rate": 1.9999984321996534e-05, "loss": 0.683, "step": 142 }, { "epoch": 0.003926414058209775, "grad_norm": 0.47620368003845215, "learning_rate": 1.999998354788949e-05, "loss": 0.6161, "step": 143 }, { "epoch": 0.003953871499176277, "grad_norm": 0.4642435312271118, "learning_rate": 1.9999982755129292e-05, "loss": 0.6554, "step": 144 }, { "epoch": 0.003981328940142779, "grad_norm": 0.4387148916721344, "learning_rate": 1.9999981943715934e-05, "loss": 0.6853, "step": 145 }, { "epoch": 0.004008786381109281, "grad_norm": 0.4572583734989166, "learning_rate": 1.999998111364942e-05, "loss": 0.6093, "step": 146 }, { "epoch": 0.004036243822075783, "grad_norm": 0.4165478050708771, "learning_rate": 1.9999980264929744e-05, "loss": 0.6117, "step": 147 }, { "epoch": 0.004063701263042284, "grad_norm": 0.4298742711544037, "learning_rate": 1.9999979397556917e-05, "loss": 0.6015, "step": 148 }, { "epoch": 0.004091158704008786, "grad_norm": 0.4505866765975952, "learning_rate": 1.9999978511530937e-05, "loss": 0.558, "step": 149 }, { "epoch": 0.004118616144975288, "grad_norm": 0.4468693137168884, "learning_rate": 1.9999977606851804e-05, "loss": 0.6812, "step": 150 }, { "epoch": 0.00414607358594179, "grad_norm": 0.42061498761177063, "learning_rate": 1.999997668351952e-05, "loss": 0.583, "step": 151 }, { "epoch": 0.004173531026908292, "grad_norm": 0.4557202160358429, "learning_rate": 1.999997574153409e-05, "loss": 0.6236, "step": 152 }, { "epoch": 0.004200988467874794, "grad_norm": 0.5489321947097778, "learning_rate": 1.9999974780895514e-05, "loss": 0.7379, "step": 153 }, { "epoch": 0.004228445908841296, "grad_norm": 0.45926955342292786, "learning_rate": 1.9999973801603793e-05, "loss": 0.6898, "step": 154 }, { "epoch": 0.004255903349807798, "grad_norm": 0.478107750415802, "learning_rate": 1.999997280365893e-05, "loss": 0.6662, "step": 155 }, { "epoch": 0.0042833607907743, "grad_norm": 0.43631061911582947, "learning_rate": 1.9999971787060923e-05, "loss": 0.5895, "step": 156 }, { "epoch": 0.004310818231740802, "grad_norm": 0.4447135031223297, "learning_rate": 1.999997075180978e-05, "loss": 0.657, "step": 157 }, { "epoch": 0.004338275672707304, "grad_norm": 0.41299593448638916, "learning_rate": 1.9999969697905496e-05, "loss": 0.5642, "step": 158 }, { "epoch": 0.004365733113673806, "grad_norm": 0.4609125554561615, "learning_rate": 1.999996862534808e-05, "loss": 0.6649, "step": 159 }, { "epoch": 0.004393190554640308, "grad_norm": 0.4190620183944702, "learning_rate": 1.999996753413753e-05, "loss": 0.6188, "step": 160 }, { "epoch": 0.0044206479956068094, "grad_norm": 0.44755828380584717, "learning_rate": 1.9999966424273852e-05, "loss": 0.6463, "step": 161 }, { "epoch": 0.004448105436573311, "grad_norm": 0.5037378668785095, "learning_rate": 1.9999965295757043e-05, "loss": 0.6094, "step": 162 }, { "epoch": 0.004475562877539813, "grad_norm": 0.422136127948761, "learning_rate": 1.9999964148587106e-05, "loss": 0.6634, "step": 163 }, { "epoch": 0.004503020318506315, "grad_norm": 0.3965300917625427, "learning_rate": 1.9999962982764045e-05, "loss": 0.6735, "step": 164 }, { "epoch": 0.004530477759472817, "grad_norm": 0.38264939188957214, "learning_rate": 1.9999961798287863e-05, "loss": 0.5633, "step": 165 }, { "epoch": 0.004557935200439319, "grad_norm": 0.4847031831741333, "learning_rate": 1.9999960595158558e-05, "loss": 0.5131, "step": 166 }, { "epoch": 0.004585392641405821, "grad_norm": 0.4248157739639282, "learning_rate": 1.9999959373376134e-05, "loss": 0.5826, "step": 167 }, { "epoch": 0.004612850082372323, "grad_norm": 0.40812015533447266, "learning_rate": 1.9999958132940595e-05, "loss": 0.5653, "step": 168 }, { "epoch": 0.004640307523338825, "grad_norm": 0.5641244649887085, "learning_rate": 1.9999956873851943e-05, "loss": 0.6459, "step": 169 }, { "epoch": 0.004667764964305327, "grad_norm": 0.4661983847618103, "learning_rate": 1.9999955596110182e-05, "loss": 0.5959, "step": 170 }, { "epoch": 0.004695222405271829, "grad_norm": 0.4596157371997833, "learning_rate": 1.9999954299715313e-05, "loss": 0.6741, "step": 171 }, { "epoch": 0.004722679846238331, "grad_norm": 0.6327513456344604, "learning_rate": 1.999995298466733e-05, "loss": 0.5499, "step": 172 }, { "epoch": 0.004750137287204833, "grad_norm": 0.4812397062778473, "learning_rate": 1.999995165096625e-05, "loss": 0.6152, "step": 173 }, { "epoch": 0.0047775947281713345, "grad_norm": 0.4263966977596283, "learning_rate": 1.999995029861207e-05, "loss": 0.619, "step": 174 }, { "epoch": 0.004805052169137836, "grad_norm": 0.4864668846130371, "learning_rate": 1.9999948927604787e-05, "loss": 0.6177, "step": 175 }, { "epoch": 0.004832509610104338, "grad_norm": 0.43964195251464844, "learning_rate": 1.9999947537944408e-05, "loss": 0.6115, "step": 176 }, { "epoch": 0.00485996705107084, "grad_norm": 0.45105692744255066, "learning_rate": 1.9999946129630932e-05, "loss": 0.6257, "step": 177 }, { "epoch": 0.004887424492037342, "grad_norm": 0.39107412099838257, "learning_rate": 1.9999944702664372e-05, "loss": 0.6347, "step": 178 }, { "epoch": 0.004914881933003844, "grad_norm": 0.3795750141143799, "learning_rate": 1.9999943257044717e-05, "loss": 0.5037, "step": 179 }, { "epoch": 0.004942339373970346, "grad_norm": 0.39178532361984253, "learning_rate": 1.9999941792771983e-05, "loss": 0.5581, "step": 180 }, { "epoch": 0.0049697968149368475, "grad_norm": 0.4771156907081604, "learning_rate": 1.999994030984616e-05, "loss": 0.6105, "step": 181 }, { "epoch": 0.00499725425590335, "grad_norm": 0.4509790539741516, "learning_rate": 1.999993880826726e-05, "loss": 0.6598, "step": 182 }, { "epoch": 0.005024711696869852, "grad_norm": 0.4423171579837799, "learning_rate": 1.9999937288035278e-05, "loss": 0.6374, "step": 183 }, { "epoch": 0.005052169137836354, "grad_norm": 0.5727972984313965, "learning_rate": 1.9999935749150227e-05, "loss": 0.589, "step": 184 }, { "epoch": 0.005079626578802856, "grad_norm": 0.4115532338619232, "learning_rate": 1.99999341916121e-05, "loss": 0.5308, "step": 185 }, { "epoch": 0.005107084019769358, "grad_norm": 0.7021656036376953, "learning_rate": 1.9999932615420908e-05, "loss": 0.6746, "step": 186 }, { "epoch": 0.0051345414607358595, "grad_norm": 3.469609260559082, "learning_rate": 1.9999931020576646e-05, "loss": 0.6083, "step": 187 }, { "epoch": 0.005161998901702361, "grad_norm": 0.44507789611816406, "learning_rate": 1.9999929407079322e-05, "loss": 0.7131, "step": 188 }, { "epoch": 0.005189456342668863, "grad_norm": 0.41362589597702026, "learning_rate": 1.999992777492894e-05, "loss": 0.5915, "step": 189 }, { "epoch": 0.005216913783635365, "grad_norm": 0.42812520265579224, "learning_rate": 1.99999261241255e-05, "loss": 0.6013, "step": 190 }, { "epoch": 0.005244371224601867, "grad_norm": 0.47485044598579407, "learning_rate": 1.9999924454669008e-05, "loss": 0.6892, "step": 191 }, { "epoch": 0.005271828665568369, "grad_norm": 0.42368319630622864, "learning_rate": 1.9999922766559466e-05, "loss": 0.6236, "step": 192 }, { "epoch": 0.005299286106534871, "grad_norm": 0.4164941608905792, "learning_rate": 1.9999921059796872e-05, "loss": 0.5604, "step": 193 }, { "epoch": 0.0053267435475013725, "grad_norm": 0.3503522276878357, "learning_rate": 1.9999919334381235e-05, "loss": 0.5007, "step": 194 }, { "epoch": 0.005354200988467875, "grad_norm": 0.4173789918422699, "learning_rate": 1.999991759031256e-05, "loss": 0.6528, "step": 195 }, { "epoch": 0.005381658429434377, "grad_norm": 0.3919907808303833, "learning_rate": 1.9999915827590843e-05, "loss": 0.588, "step": 196 }, { "epoch": 0.005409115870400879, "grad_norm": 0.4164595901966095, "learning_rate": 1.9999914046216093e-05, "loss": 0.6306, "step": 197 }, { "epoch": 0.005436573311367381, "grad_norm": 0.4904977083206177, "learning_rate": 1.9999912246188314e-05, "loss": 0.5763, "step": 198 }, { "epoch": 0.005464030752333883, "grad_norm": 0.37329134345054626, "learning_rate": 1.9999910427507502e-05, "loss": 0.5329, "step": 199 }, { "epoch": 0.0054914881933003845, "grad_norm": 0.4144654870033264, "learning_rate": 1.9999908590173672e-05, "loss": 0.6314, "step": 200 }, { "epoch": 0.005518945634266886, "grad_norm": 0.3920156955718994, "learning_rate": 1.9999906734186815e-05, "loss": 0.5881, "step": 201 }, { "epoch": 0.005546403075233388, "grad_norm": 0.4405362606048584, "learning_rate": 1.9999904859546943e-05, "loss": 0.6078, "step": 202 }, { "epoch": 0.00557386051619989, "grad_norm": 0.3813554644584656, "learning_rate": 1.9999902966254058e-05, "loss": 0.6288, "step": 203 }, { "epoch": 0.005601317957166392, "grad_norm": 0.4086970388889313, "learning_rate": 1.999990105430816e-05, "loss": 0.5928, "step": 204 }, { "epoch": 0.005628775398132894, "grad_norm": 0.41574662923812866, "learning_rate": 1.9999899123709255e-05, "loss": 0.5722, "step": 205 }, { "epoch": 0.005656232839099396, "grad_norm": 0.40006983280181885, "learning_rate": 1.999989717445735e-05, "loss": 0.6232, "step": 206 }, { "epoch": 0.0056836902800658975, "grad_norm": 0.48231008648872375, "learning_rate": 1.9999895206552438e-05, "loss": 0.6282, "step": 207 }, { "epoch": 0.0057111477210324, "grad_norm": 0.3890703618526459, "learning_rate": 1.9999893219994533e-05, "loss": 0.6671, "step": 208 }, { "epoch": 0.005738605161998902, "grad_norm": 0.4452843964099884, "learning_rate": 1.999989121478364e-05, "loss": 0.6513, "step": 209 }, { "epoch": 0.005766062602965404, "grad_norm": 0.381786584854126, "learning_rate": 1.9999889190919754e-05, "loss": 0.5881, "step": 210 }, { "epoch": 0.005793520043931906, "grad_norm": 0.3894210159778595, "learning_rate": 1.9999887148402882e-05, "loss": 0.6084, "step": 211 }, { "epoch": 0.005820977484898408, "grad_norm": 0.3597693145275116, "learning_rate": 1.9999885087233034e-05, "loss": 0.5627, "step": 212 }, { "epoch": 0.0058484349258649095, "grad_norm": 0.39207249879837036, "learning_rate": 1.9999883007410205e-05, "loss": 0.5817, "step": 213 }, { "epoch": 0.005875892366831411, "grad_norm": 0.45632675290107727, "learning_rate": 1.9999880908934403e-05, "loss": 0.6772, "step": 214 }, { "epoch": 0.005903349807797913, "grad_norm": 0.4458891749382019, "learning_rate": 1.999987879180563e-05, "loss": 0.67, "step": 215 }, { "epoch": 0.005930807248764415, "grad_norm": 0.46367698907852173, "learning_rate": 1.9999876656023893e-05, "loss": 0.5532, "step": 216 }, { "epoch": 0.005958264689730917, "grad_norm": 0.4678700566291809, "learning_rate": 1.9999874501589195e-05, "loss": 0.6394, "step": 217 }, { "epoch": 0.005985722130697419, "grad_norm": 0.40423232316970825, "learning_rate": 1.999987232850154e-05, "loss": 0.5715, "step": 218 }, { "epoch": 0.006013179571663921, "grad_norm": 0.5475576519966125, "learning_rate": 1.9999870136760933e-05, "loss": 0.5485, "step": 219 }, { "epoch": 0.0060406370126304225, "grad_norm": 0.4014328718185425, "learning_rate": 1.9999867926367372e-05, "loss": 0.6441, "step": 220 }, { "epoch": 0.006068094453596924, "grad_norm": 0.3991987109184265, "learning_rate": 1.999986569732087e-05, "loss": 0.5957, "step": 221 }, { "epoch": 0.006095551894563427, "grad_norm": 0.43559056520462036, "learning_rate": 1.9999863449621423e-05, "loss": 0.66, "step": 222 }, { "epoch": 0.006123009335529929, "grad_norm": 0.46711626648902893, "learning_rate": 1.9999861183269044e-05, "loss": 0.6736, "step": 223 }, { "epoch": 0.006150466776496431, "grad_norm": 0.4080478549003601, "learning_rate": 1.999985889826373e-05, "loss": 0.59, "step": 224 }, { "epoch": 0.006177924217462933, "grad_norm": 0.400508314371109, "learning_rate": 1.9999856594605485e-05, "loss": 0.6484, "step": 225 }, { "epoch": 0.0062053816584294345, "grad_norm": 0.38320261240005493, "learning_rate": 1.999985427229432e-05, "loss": 0.5548, "step": 226 }, { "epoch": 0.006232839099395936, "grad_norm": 0.4422648549079895, "learning_rate": 1.999985193133023e-05, "loss": 0.6269, "step": 227 }, { "epoch": 0.006260296540362438, "grad_norm": 0.3704020082950592, "learning_rate": 1.9999849571713228e-05, "loss": 0.6068, "step": 228 }, { "epoch": 0.00628775398132894, "grad_norm": 0.4695093631744385, "learning_rate": 1.9999847193443314e-05, "loss": 0.697, "step": 229 }, { "epoch": 0.006315211422295442, "grad_norm": 0.441283255815506, "learning_rate": 1.9999844796520495e-05, "loss": 0.6467, "step": 230 }, { "epoch": 0.006342668863261944, "grad_norm": 0.3810816705226898, "learning_rate": 1.9999842380944773e-05, "loss": 0.6547, "step": 231 }, { "epoch": 0.006370126304228446, "grad_norm": 0.4478892683982849, "learning_rate": 1.9999839946716153e-05, "loss": 0.6145, "step": 232 }, { "epoch": 0.0063975837451949475, "grad_norm": 0.3766327202320099, "learning_rate": 1.9999837493834637e-05, "loss": 0.6683, "step": 233 }, { "epoch": 0.006425041186161449, "grad_norm": 0.40446531772613525, "learning_rate": 1.9999835022300236e-05, "loss": 0.563, "step": 234 }, { "epoch": 0.006452498627127952, "grad_norm": 0.5221717357635498, "learning_rate": 1.999983253211295e-05, "loss": 0.5626, "step": 235 }, { "epoch": 0.006479956068094454, "grad_norm": 0.4647086262702942, "learning_rate": 1.999983002327279e-05, "loss": 0.6272, "step": 236 }, { "epoch": 0.006507413509060956, "grad_norm": 0.36568981409072876, "learning_rate": 1.999982749577975e-05, "loss": 0.5632, "step": 237 }, { "epoch": 0.006534870950027458, "grad_norm": 0.41321995854377747, "learning_rate": 1.9999824949633838e-05, "loss": 0.5992, "step": 238 }, { "epoch": 0.0065623283909939595, "grad_norm": 0.3914000988006592, "learning_rate": 1.9999822384835065e-05, "loss": 0.6265, "step": 239 }, { "epoch": 0.006589785831960461, "grad_norm": 0.39914536476135254, "learning_rate": 1.999981980138343e-05, "loss": 0.6101, "step": 240 }, { "epoch": 0.006617243272926963, "grad_norm": 0.416861355304718, "learning_rate": 1.9999817199278942e-05, "loss": 0.5741, "step": 241 }, { "epoch": 0.006644700713893465, "grad_norm": 0.39164242148399353, "learning_rate": 1.99998145785216e-05, "loss": 0.5893, "step": 242 }, { "epoch": 0.006672158154859967, "grad_norm": 0.5115380883216858, "learning_rate": 1.9999811939111417e-05, "loss": 0.6448, "step": 243 }, { "epoch": 0.006699615595826469, "grad_norm": 0.4411737322807312, "learning_rate": 1.9999809281048387e-05, "loss": 0.5703, "step": 244 }, { "epoch": 0.006727073036792971, "grad_norm": 0.576815128326416, "learning_rate": 1.9999806604332527e-05, "loss": 0.63, "step": 245 }, { "epoch": 0.0067545304777594725, "grad_norm": 0.38987287878990173, "learning_rate": 1.999980390896383e-05, "loss": 0.5591, "step": 246 }, { "epoch": 0.006781987918725974, "grad_norm": 0.42383235692977905, "learning_rate": 1.999980119494231e-05, "loss": 0.6196, "step": 247 }, { "epoch": 0.006809445359692476, "grad_norm": 0.4189009964466095, "learning_rate": 1.999979846226797e-05, "loss": 0.6287, "step": 248 }, { "epoch": 0.006836902800658979, "grad_norm": 0.44138169288635254, "learning_rate": 1.999979571094081e-05, "loss": 0.6352, "step": 249 }, { "epoch": 0.006864360241625481, "grad_norm": 0.4521174728870392, "learning_rate": 1.9999792940960847e-05, "loss": 0.5807, "step": 250 }, { "epoch": 0.006891817682591983, "grad_norm": 0.3877543807029724, "learning_rate": 1.9999790152328075e-05, "loss": 0.5806, "step": 251 }, { "epoch": 0.0069192751235584845, "grad_norm": 0.42152369022369385, "learning_rate": 1.9999787345042502e-05, "loss": 0.6312, "step": 252 }, { "epoch": 0.006946732564524986, "grad_norm": 0.4098742604255676, "learning_rate": 1.9999784519104136e-05, "loss": 0.5933, "step": 253 }, { "epoch": 0.006974190005491488, "grad_norm": 0.3835645318031311, "learning_rate": 1.999978167451298e-05, "loss": 0.626, "step": 254 }, { "epoch": 0.00700164744645799, "grad_norm": 0.38852691650390625, "learning_rate": 1.9999778811269035e-05, "loss": 0.644, "step": 255 }, { "epoch": 0.007029104887424492, "grad_norm": 0.6437236666679382, "learning_rate": 1.9999775929372315e-05, "loss": 0.6188, "step": 256 }, { "epoch": 0.007056562328390994, "grad_norm": 0.4205680191516876, "learning_rate": 1.999977302882282e-05, "loss": 0.5978, "step": 257 }, { "epoch": 0.007084019769357496, "grad_norm": 0.41911906003952026, "learning_rate": 1.9999770109620557e-05, "loss": 0.6382, "step": 258 }, { "epoch": 0.0071114772103239975, "grad_norm": 0.4383474290370941, "learning_rate": 1.9999767171765533e-05, "loss": 0.7556, "step": 259 }, { "epoch": 0.007138934651290499, "grad_norm": 0.4251858592033386, "learning_rate": 1.999976421525775e-05, "loss": 0.6369, "step": 260 }, { "epoch": 0.007166392092257001, "grad_norm": 1.1386762857437134, "learning_rate": 1.9999761240097216e-05, "loss": 0.6859, "step": 261 }, { "epoch": 0.007193849533223504, "grad_norm": 0.3800390660762787, "learning_rate": 1.9999758246283936e-05, "loss": 0.5921, "step": 262 }, { "epoch": 0.007221306974190006, "grad_norm": 0.43522900342941284, "learning_rate": 1.9999755233817914e-05, "loss": 0.5619, "step": 263 }, { "epoch": 0.007248764415156508, "grad_norm": 0.33871057629585266, "learning_rate": 1.999975220269916e-05, "loss": 0.576, "step": 264 }, { "epoch": 0.0072762218561230095, "grad_norm": 0.398775190114975, "learning_rate": 1.9999749152927674e-05, "loss": 0.6651, "step": 265 }, { "epoch": 0.007303679297089511, "grad_norm": 0.403473824262619, "learning_rate": 1.9999746084503463e-05, "loss": 0.6252, "step": 266 }, { "epoch": 0.007331136738056013, "grad_norm": 0.5099785327911377, "learning_rate": 1.9999742997426533e-05, "loss": 0.6448, "step": 267 }, { "epoch": 0.007358594179022515, "grad_norm": 0.45460227131843567, "learning_rate": 1.9999739891696897e-05, "loss": 0.6201, "step": 268 }, { "epoch": 0.007386051619989017, "grad_norm": 0.4064958393573761, "learning_rate": 1.999973676731455e-05, "loss": 0.5237, "step": 269 }, { "epoch": 0.007413509060955519, "grad_norm": 0.38697826862335205, "learning_rate": 1.9999733624279502e-05, "loss": 0.6057, "step": 270 }, { "epoch": 0.007440966501922021, "grad_norm": 0.3963126838207245, "learning_rate": 1.999973046259176e-05, "loss": 0.5588, "step": 271 }, { "epoch": 0.0074684239428885225, "grad_norm": 0.3703741431236267, "learning_rate": 1.9999727282251332e-05, "loss": 0.5522, "step": 272 }, { "epoch": 0.007495881383855024, "grad_norm": 0.4151833951473236, "learning_rate": 1.999972408325822e-05, "loss": 0.6224, "step": 273 }, { "epoch": 0.007523338824821526, "grad_norm": 0.4059694707393646, "learning_rate": 1.9999720865612428e-05, "loss": 0.6942, "step": 274 }, { "epoch": 0.007550796265788029, "grad_norm": 0.39608219265937805, "learning_rate": 1.9999717629313968e-05, "loss": 0.6273, "step": 275 }, { "epoch": 0.007578253706754531, "grad_norm": 0.4666145145893097, "learning_rate": 1.999971437436284e-05, "loss": 0.768, "step": 276 }, { "epoch": 0.007605711147721033, "grad_norm": 0.48801395297050476, "learning_rate": 1.9999711100759053e-05, "loss": 0.5688, "step": 277 }, { "epoch": 0.0076331685886875346, "grad_norm": 0.46614870429039, "learning_rate": 1.9999707808502618e-05, "loss": 0.6076, "step": 278 }, { "epoch": 0.007660626029654036, "grad_norm": 0.4650278687477112, "learning_rate": 1.9999704497593532e-05, "loss": 0.7125, "step": 279 }, { "epoch": 0.007688083470620538, "grad_norm": 0.3995112180709839, "learning_rate": 1.9999701168031808e-05, "loss": 0.572, "step": 280 }, { "epoch": 0.00771554091158704, "grad_norm": 0.4158041775226593, "learning_rate": 1.999969781981745e-05, "loss": 0.5786, "step": 281 }, { "epoch": 0.007742998352553542, "grad_norm": 0.42611992359161377, "learning_rate": 1.9999694452950463e-05, "loss": 0.6018, "step": 282 }, { "epoch": 0.007770455793520044, "grad_norm": 0.38140007853507996, "learning_rate": 1.9999691067430853e-05, "loss": 0.5825, "step": 283 }, { "epoch": 0.007797913234486546, "grad_norm": 0.44541677832603455, "learning_rate": 1.9999687663258628e-05, "loss": 0.6411, "step": 284 }, { "epoch": 0.007825370675453048, "grad_norm": 0.40978023409843445, "learning_rate": 1.9999684240433796e-05, "loss": 0.5177, "step": 285 }, { "epoch": 0.00785282811641955, "grad_norm": 0.36902597546577454, "learning_rate": 1.999968079895636e-05, "loss": 0.5387, "step": 286 }, { "epoch": 0.007880285557386051, "grad_norm": 0.5806494951248169, "learning_rate": 1.999967733882633e-05, "loss": 0.5714, "step": 287 }, { "epoch": 0.007907742998352554, "grad_norm": 0.47856956720352173, "learning_rate": 1.9999673860043706e-05, "loss": 0.5658, "step": 288 }, { "epoch": 0.007935200439319055, "grad_norm": 0.40733814239501953, "learning_rate": 1.99996703626085e-05, "loss": 0.6782, "step": 289 }, { "epoch": 0.007962657880285558, "grad_norm": 0.42939478158950806, "learning_rate": 1.999966684652072e-05, "loss": 0.6381, "step": 290 }, { "epoch": 0.007990115321252059, "grad_norm": 0.4574210047721863, "learning_rate": 1.999966331178037e-05, "loss": 0.5677, "step": 291 }, { "epoch": 0.008017572762218561, "grad_norm": 0.4332565367221832, "learning_rate": 1.999965975838745e-05, "loss": 0.6322, "step": 292 }, { "epoch": 0.008045030203185062, "grad_norm": 0.3806329369544983, "learning_rate": 1.999965618634198e-05, "loss": 0.6067, "step": 293 }, { "epoch": 0.008072487644151565, "grad_norm": 0.42823880910873413, "learning_rate": 1.999965259564396e-05, "loss": 0.6378, "step": 294 }, { "epoch": 0.008099945085118066, "grad_norm": 0.42657193541526794, "learning_rate": 1.999964898629339e-05, "loss": 0.6237, "step": 295 }, { "epoch": 0.008127402526084569, "grad_norm": 0.4090295732021332, "learning_rate": 1.999964535829029e-05, "loss": 0.5975, "step": 296 }, { "epoch": 0.008154859967051072, "grad_norm": 0.4147412180900574, "learning_rate": 1.9999641711634656e-05, "loss": 0.6473, "step": 297 }, { "epoch": 0.008182317408017573, "grad_norm": 0.5293698906898499, "learning_rate": 1.9999638046326497e-05, "loss": 0.6546, "step": 298 }, { "epoch": 0.008209774848984075, "grad_norm": 0.4355682134628296, "learning_rate": 1.9999634362365825e-05, "loss": 0.6017, "step": 299 }, { "epoch": 0.008237232289950576, "grad_norm": 0.36183473467826843, "learning_rate": 1.999963065975264e-05, "loss": 0.7503, "step": 300 }, { "epoch": 0.008264689730917079, "grad_norm": 0.39521846175193787, "learning_rate": 1.9999626938486954e-05, "loss": 0.6294, "step": 301 }, { "epoch": 0.00829214717188358, "grad_norm": 0.37467971444129944, "learning_rate": 1.9999623198568775e-05, "loss": 0.5676, "step": 302 }, { "epoch": 0.008319604612850083, "grad_norm": 0.3913743197917938, "learning_rate": 1.9999619439998104e-05, "loss": 0.6654, "step": 303 }, { "epoch": 0.008347062053816584, "grad_norm": 0.36361223459243774, "learning_rate": 1.9999615662774955e-05, "loss": 0.6202, "step": 304 }, { "epoch": 0.008374519494783086, "grad_norm": 0.35502925515174866, "learning_rate": 1.999961186689933e-05, "loss": 0.5438, "step": 305 }, { "epoch": 0.008401976935749587, "grad_norm": 0.41663822531700134, "learning_rate": 1.9999608052371233e-05, "loss": 0.6184, "step": 306 }, { "epoch": 0.00842943437671609, "grad_norm": 0.40139421820640564, "learning_rate": 1.9999604219190678e-05, "loss": 0.6051, "step": 307 }, { "epoch": 0.008456891817682591, "grad_norm": 0.3863654136657715, "learning_rate": 1.999960036735767e-05, "loss": 0.5994, "step": 308 }, { "epoch": 0.008484349258649094, "grad_norm": 0.42359796166419983, "learning_rate": 1.9999596496872217e-05, "loss": 0.5385, "step": 309 }, { "epoch": 0.008511806699615597, "grad_norm": 0.372025728225708, "learning_rate": 1.9999592607734324e-05, "loss": 0.5407, "step": 310 }, { "epoch": 0.008539264140582098, "grad_norm": 0.34968510270118713, "learning_rate": 1.9999588699943997e-05, "loss": 0.5633, "step": 311 }, { "epoch": 0.0085667215815486, "grad_norm": 0.43541666865348816, "learning_rate": 1.999958477350125e-05, "loss": 0.6466, "step": 312 }, { "epoch": 0.008594179022515101, "grad_norm": 0.3847510516643524, "learning_rate": 1.9999580828406082e-05, "loss": 0.6506, "step": 313 }, { "epoch": 0.008621636463481604, "grad_norm": 0.38849589228630066, "learning_rate": 1.999957686465851e-05, "loss": 0.5717, "step": 314 }, { "epoch": 0.008649093904448105, "grad_norm": 0.389700710773468, "learning_rate": 1.9999572882258528e-05, "loss": 0.6487, "step": 315 }, { "epoch": 0.008676551345414608, "grad_norm": 0.5243780016899109, "learning_rate": 1.9999568881206156e-05, "loss": 0.5612, "step": 316 }, { "epoch": 0.008704008786381109, "grad_norm": 0.5033589601516724, "learning_rate": 1.9999564861501394e-05, "loss": 0.6318, "step": 317 }, { "epoch": 0.008731466227347611, "grad_norm": 0.39670807123184204, "learning_rate": 1.999956082314425e-05, "loss": 0.5521, "step": 318 }, { "epoch": 0.008758923668314112, "grad_norm": 0.39557626843452454, "learning_rate": 1.999955676613474e-05, "loss": 0.626, "step": 319 }, { "epoch": 0.008786381109280615, "grad_norm": 0.3835965096950531, "learning_rate": 1.9999552690472862e-05, "loss": 0.5775, "step": 320 }, { "epoch": 0.008813838550247116, "grad_norm": 0.3820382058620453, "learning_rate": 1.9999548596158626e-05, "loss": 0.6041, "step": 321 }, { "epoch": 0.008841295991213619, "grad_norm": 0.36794647574424744, "learning_rate": 1.9999544483192043e-05, "loss": 0.6059, "step": 322 }, { "epoch": 0.008868753432180122, "grad_norm": 0.39219069480895996, "learning_rate": 1.9999540351573114e-05, "loss": 0.6153, "step": 323 }, { "epoch": 0.008896210873146623, "grad_norm": 0.39058953523635864, "learning_rate": 1.999953620130185e-05, "loss": 0.5835, "step": 324 }, { "epoch": 0.008923668314113125, "grad_norm": 0.37050989270210266, "learning_rate": 1.9999532032378266e-05, "loss": 0.5639, "step": 325 }, { "epoch": 0.008951125755079626, "grad_norm": 0.37111926078796387, "learning_rate": 1.999952784480236e-05, "loss": 0.5823, "step": 326 }, { "epoch": 0.008978583196046129, "grad_norm": 0.4303293824195862, "learning_rate": 1.9999523638574144e-05, "loss": 0.5708, "step": 327 }, { "epoch": 0.00900604063701263, "grad_norm": 0.36852791905403137, "learning_rate": 1.999951941369362e-05, "loss": 0.6135, "step": 328 }, { "epoch": 0.009033498077979133, "grad_norm": 0.799198567867279, "learning_rate": 1.9999515170160806e-05, "loss": 0.52, "step": 329 }, { "epoch": 0.009060955518945634, "grad_norm": 0.39080047607421875, "learning_rate": 1.9999510907975702e-05, "loss": 0.5637, "step": 330 }, { "epoch": 0.009088412959912136, "grad_norm": 0.40158334374427795, "learning_rate": 1.9999506627138322e-05, "loss": 0.634, "step": 331 }, { "epoch": 0.009115870400878637, "grad_norm": 0.35106492042541504, "learning_rate": 1.9999502327648667e-05, "loss": 0.5729, "step": 332 }, { "epoch": 0.00914332784184514, "grad_norm": 0.39517080783843994, "learning_rate": 1.9999498009506754e-05, "loss": 0.526, "step": 333 }, { "epoch": 0.009170785282811641, "grad_norm": 0.3985733985900879, "learning_rate": 1.999949367271258e-05, "loss": 0.497, "step": 334 }, { "epoch": 0.009198242723778144, "grad_norm": 0.4160799980163574, "learning_rate": 1.9999489317266162e-05, "loss": 0.6127, "step": 335 }, { "epoch": 0.009225700164744647, "grad_norm": 0.40741023421287537, "learning_rate": 1.99994849431675e-05, "loss": 0.6519, "step": 336 }, { "epoch": 0.009253157605711148, "grad_norm": 0.40619391202926636, "learning_rate": 1.9999480550416615e-05, "loss": 0.5528, "step": 337 }, { "epoch": 0.00928061504667765, "grad_norm": 0.4305838644504547, "learning_rate": 1.99994761390135e-05, "loss": 0.58, "step": 338 }, { "epoch": 0.009308072487644151, "grad_norm": 0.4111979305744171, "learning_rate": 1.9999471708958176e-05, "loss": 0.5665, "step": 339 }, { "epoch": 0.009335529928610654, "grad_norm": 0.43987253308296204, "learning_rate": 1.9999467260250643e-05, "loss": 0.6567, "step": 340 }, { "epoch": 0.009362987369577155, "grad_norm": 0.3774475157260895, "learning_rate": 1.9999462792890913e-05, "loss": 0.5573, "step": 341 }, { "epoch": 0.009390444810543658, "grad_norm": 0.3702661693096161, "learning_rate": 1.9999458306878992e-05, "loss": 0.6083, "step": 342 }, { "epoch": 0.009417902251510159, "grad_norm": 0.4131929874420166, "learning_rate": 1.999945380221489e-05, "loss": 0.6789, "step": 343 }, { "epoch": 0.009445359692476661, "grad_norm": 0.4229578673839569, "learning_rate": 1.9999449278898616e-05, "loss": 0.5318, "step": 344 }, { "epoch": 0.009472817133443162, "grad_norm": 0.38716715574264526, "learning_rate": 1.999944473693018e-05, "loss": 0.6216, "step": 345 }, { "epoch": 0.009500274574409665, "grad_norm": 0.4167757034301758, "learning_rate": 1.9999440176309586e-05, "loss": 0.6254, "step": 346 }, { "epoch": 0.009527732015376166, "grad_norm": 0.38367530703544617, "learning_rate": 1.9999435597036844e-05, "loss": 0.5595, "step": 347 }, { "epoch": 0.009555189456342669, "grad_norm": 0.4259548783302307, "learning_rate": 1.9999430999111962e-05, "loss": 0.5517, "step": 348 }, { "epoch": 0.009582646897309172, "grad_norm": 0.45225411653518677, "learning_rate": 1.9999426382534954e-05, "loss": 0.6619, "step": 349 }, { "epoch": 0.009610104338275673, "grad_norm": 0.3605313301086426, "learning_rate": 1.999942174730582e-05, "loss": 0.5538, "step": 350 }, { "epoch": 0.009637561779242175, "grad_norm": 0.4614499807357788, "learning_rate": 1.9999417093424576e-05, "loss": 0.7169, "step": 351 }, { "epoch": 0.009665019220208676, "grad_norm": 0.35982346534729004, "learning_rate": 1.9999412420891227e-05, "loss": 0.6093, "step": 352 }, { "epoch": 0.009692476661175179, "grad_norm": 0.5104308724403381, "learning_rate": 1.9999407729705778e-05, "loss": 0.4833, "step": 353 }, { "epoch": 0.00971993410214168, "grad_norm": 0.3800354599952698, "learning_rate": 1.9999403019868247e-05, "loss": 0.5606, "step": 354 }, { "epoch": 0.009747391543108183, "grad_norm": 0.4136630594730377, "learning_rate": 1.9999398291378637e-05, "loss": 0.5894, "step": 355 }, { "epoch": 0.009774848984074684, "grad_norm": 0.44301921129226685, "learning_rate": 1.999939354423696e-05, "loss": 0.7001, "step": 356 }, { "epoch": 0.009802306425041186, "grad_norm": 0.7333250045776367, "learning_rate": 1.9999388778443222e-05, "loss": 0.6461, "step": 357 }, { "epoch": 0.009829763866007687, "grad_norm": 0.37262406945228577, "learning_rate": 1.999938399399743e-05, "loss": 0.5953, "step": 358 }, { "epoch": 0.00985722130697419, "grad_norm": 0.3611081540584564, "learning_rate": 1.9999379190899596e-05, "loss": 0.546, "step": 359 }, { "epoch": 0.009884678747940691, "grad_norm": 0.36833998560905457, "learning_rate": 1.9999374369149727e-05, "loss": 0.5865, "step": 360 }, { "epoch": 0.009912136188907194, "grad_norm": 0.4237898290157318, "learning_rate": 1.9999369528747838e-05, "loss": 0.4831, "step": 361 }, { "epoch": 0.009939593629873695, "grad_norm": 0.43199780583381653, "learning_rate": 1.9999364669693927e-05, "loss": 0.5832, "step": 362 }, { "epoch": 0.009967051070840198, "grad_norm": 0.38108187913894653, "learning_rate": 1.9999359791988015e-05, "loss": 0.6026, "step": 363 }, { "epoch": 0.0099945085118067, "grad_norm": 0.38783499598503113, "learning_rate": 1.9999354895630102e-05, "loss": 0.6187, "step": 364 }, { "epoch": 0.010021965952773201, "grad_norm": 0.3971196711063385, "learning_rate": 1.99993499806202e-05, "loss": 0.6038, "step": 365 }, { "epoch": 0.010049423393739704, "grad_norm": 0.38506606221199036, "learning_rate": 1.9999345046958324e-05, "loss": 0.5458, "step": 366 }, { "epoch": 0.010076880834706205, "grad_norm": 0.36890357732772827, "learning_rate": 1.9999340094644473e-05, "loss": 0.6481, "step": 367 }, { "epoch": 0.010104338275672708, "grad_norm": 0.3652532398700714, "learning_rate": 1.9999335123678665e-05, "loss": 0.5744, "step": 368 }, { "epoch": 0.010131795716639209, "grad_norm": 0.3595273792743683, "learning_rate": 1.9999330134060903e-05, "loss": 0.5397, "step": 369 }, { "epoch": 0.010159253157605712, "grad_norm": 0.37918394804000854, "learning_rate": 1.99993251257912e-05, "loss": 0.5599, "step": 370 }, { "epoch": 0.010186710598572212, "grad_norm": 0.5408847332000732, "learning_rate": 1.9999320098869563e-05, "loss": 0.7187, "step": 371 }, { "epoch": 0.010214168039538715, "grad_norm": 0.41097304224967957, "learning_rate": 1.9999315053296002e-05, "loss": 0.6566, "step": 372 }, { "epoch": 0.010241625480505216, "grad_norm": 0.39629796147346497, "learning_rate": 1.999930998907053e-05, "loss": 0.5802, "step": 373 }, { "epoch": 0.010269082921471719, "grad_norm": 0.37485307455062866, "learning_rate": 1.999930490619315e-05, "loss": 0.5374, "step": 374 }, { "epoch": 0.01029654036243822, "grad_norm": 0.4000895321369171, "learning_rate": 1.999929980466388e-05, "loss": 0.5872, "step": 375 }, { "epoch": 0.010323997803404723, "grad_norm": 0.3993488848209381, "learning_rate": 1.999929468448272e-05, "loss": 0.5805, "step": 376 }, { "epoch": 0.010351455244371225, "grad_norm": 0.3904055058956146, "learning_rate": 1.9999289545649687e-05, "loss": 0.6936, "step": 377 }, { "epoch": 0.010378912685337726, "grad_norm": 0.4211486577987671, "learning_rate": 1.9999284388164784e-05, "loss": 0.6477, "step": 378 }, { "epoch": 0.010406370126304229, "grad_norm": 0.39395758509635925, "learning_rate": 1.9999279212028026e-05, "loss": 0.6097, "step": 379 }, { "epoch": 0.01043382756727073, "grad_norm": 0.3844999074935913, "learning_rate": 1.9999274017239423e-05, "loss": 0.5465, "step": 380 }, { "epoch": 0.010461285008237233, "grad_norm": 0.4007297158241272, "learning_rate": 1.999926880379898e-05, "loss": 0.5982, "step": 381 }, { "epoch": 0.010488742449203734, "grad_norm": 0.5679020285606384, "learning_rate": 1.999926357170671e-05, "loss": 0.7846, "step": 382 }, { "epoch": 0.010516199890170237, "grad_norm": 0.4418479800224304, "learning_rate": 1.9999258320962622e-05, "loss": 0.57, "step": 383 }, { "epoch": 0.010543657331136738, "grad_norm": 0.418607234954834, "learning_rate": 1.9999253051566727e-05, "loss": 0.6835, "step": 384 }, { "epoch": 0.01057111477210324, "grad_norm": 0.4055803716182709, "learning_rate": 1.999924776351903e-05, "loss": 0.6286, "step": 385 }, { "epoch": 0.010598572213069741, "grad_norm": 0.3648841977119446, "learning_rate": 1.999924245681955e-05, "loss": 0.5695, "step": 386 }, { "epoch": 0.010626029654036244, "grad_norm": 0.44627705216407776, "learning_rate": 1.9999237131468286e-05, "loss": 0.5807, "step": 387 }, { "epoch": 0.010653487095002745, "grad_norm": 0.3643602430820465, "learning_rate": 1.9999231787465255e-05, "loss": 0.5563, "step": 388 }, { "epoch": 0.010680944535969248, "grad_norm": 0.37555938959121704, "learning_rate": 1.999922642481047e-05, "loss": 0.6015, "step": 389 }, { "epoch": 0.01070840197693575, "grad_norm": 0.49338334798812866, "learning_rate": 1.999922104350393e-05, "loss": 0.6029, "step": 390 }, { "epoch": 0.010735859417902251, "grad_norm": 0.4592312276363373, "learning_rate": 1.9999215643545656e-05, "loss": 0.6884, "step": 391 }, { "epoch": 0.010763316858868754, "grad_norm": 0.3789234459400177, "learning_rate": 1.999921022493565e-05, "loss": 0.5956, "step": 392 }, { "epoch": 0.010790774299835255, "grad_norm": 0.4043075144290924, "learning_rate": 1.9999204787673926e-05, "loss": 0.6012, "step": 393 }, { "epoch": 0.010818231740801758, "grad_norm": 0.4544488191604614, "learning_rate": 1.9999199331760495e-05, "loss": 0.5301, "step": 394 }, { "epoch": 0.010845689181768259, "grad_norm": 0.3869175314903259, "learning_rate": 1.9999193857195368e-05, "loss": 0.5991, "step": 395 }, { "epoch": 0.010873146622734762, "grad_norm": 0.48198366165161133, "learning_rate": 1.999918836397855e-05, "loss": 0.6078, "step": 396 }, { "epoch": 0.010900604063701263, "grad_norm": 0.4633484482765198, "learning_rate": 1.9999182852110053e-05, "loss": 0.7243, "step": 397 }, { "epoch": 0.010928061504667765, "grad_norm": 0.39428600668907166, "learning_rate": 1.9999177321589892e-05, "loss": 0.6114, "step": 398 }, { "epoch": 0.010955518945634266, "grad_norm": 0.3761778175830841, "learning_rate": 1.999917177241807e-05, "loss": 0.5816, "step": 399 }, { "epoch": 0.010982976386600769, "grad_norm": 1.1282670497894287, "learning_rate": 1.9999166204594605e-05, "loss": 0.5831, "step": 400 }, { "epoch": 0.01101043382756727, "grad_norm": 0.4686826765537262, "learning_rate": 1.9999160618119502e-05, "loss": 0.5724, "step": 401 }, { "epoch": 0.011037891268533773, "grad_norm": 0.39842167496681213, "learning_rate": 1.9999155012992774e-05, "loss": 0.5948, "step": 402 }, { "epoch": 0.011065348709500275, "grad_norm": 0.421371728181839, "learning_rate": 1.9999149389214428e-05, "loss": 0.6233, "step": 403 }, { "epoch": 0.011092806150466776, "grad_norm": 0.4203774929046631, "learning_rate": 1.999914374678448e-05, "loss": 0.6396, "step": 404 }, { "epoch": 0.011120263591433279, "grad_norm": 0.4540972411632538, "learning_rate": 1.999913808570294e-05, "loss": 0.6167, "step": 405 }, { "epoch": 0.01114772103239978, "grad_norm": 0.4429083466529846, "learning_rate": 1.999913240596981e-05, "loss": 0.6048, "step": 406 }, { "epoch": 0.011175178473366283, "grad_norm": 0.3499901294708252, "learning_rate": 1.9999126707585108e-05, "loss": 0.5277, "step": 407 }, { "epoch": 0.011202635914332784, "grad_norm": 0.38898661732673645, "learning_rate": 1.9999120990548847e-05, "loss": 0.5162, "step": 408 }, { "epoch": 0.011230093355299287, "grad_norm": 0.37125349044799805, "learning_rate": 1.999911525486103e-05, "loss": 0.5905, "step": 409 }, { "epoch": 0.011257550796265788, "grad_norm": 0.38218599557876587, "learning_rate": 1.9999109500521673e-05, "loss": 0.6646, "step": 410 }, { "epoch": 0.01128500823723229, "grad_norm": 0.3971567451953888, "learning_rate": 1.9999103727530786e-05, "loss": 0.6434, "step": 411 }, { "epoch": 0.011312465678198791, "grad_norm": 0.4186413288116455, "learning_rate": 1.9999097935888382e-05, "loss": 0.6382, "step": 412 }, { "epoch": 0.011339923119165294, "grad_norm": 0.36706238985061646, "learning_rate": 1.9999092125594465e-05, "loss": 0.5521, "step": 413 }, { "epoch": 0.011367380560131795, "grad_norm": 0.4050791561603546, "learning_rate": 1.999908629664905e-05, "loss": 0.6328, "step": 414 }, { "epoch": 0.011394838001098298, "grad_norm": 0.38581904768943787, "learning_rate": 1.9999080449052147e-05, "loss": 0.6358, "step": 415 }, { "epoch": 0.0114222954420648, "grad_norm": 0.4145922362804413, "learning_rate": 1.999907458280377e-05, "loss": 0.6181, "step": 416 }, { "epoch": 0.011449752883031301, "grad_norm": 0.35262760519981384, "learning_rate": 1.999906869790393e-05, "loss": 0.5745, "step": 417 }, { "epoch": 0.011477210323997804, "grad_norm": 0.49101522564888, "learning_rate": 1.999906279435263e-05, "loss": 0.5586, "step": 418 }, { "epoch": 0.011504667764964305, "grad_norm": 0.41676822304725647, "learning_rate": 1.999905687214989e-05, "loss": 0.6504, "step": 419 }, { "epoch": 0.011532125205930808, "grad_norm": 0.3991329073905945, "learning_rate": 1.9999050931295713e-05, "loss": 0.5266, "step": 420 }, { "epoch": 0.011559582646897309, "grad_norm": 0.6659475564956665, "learning_rate": 1.999904497179012e-05, "loss": 0.5735, "step": 421 }, { "epoch": 0.011587040087863812, "grad_norm": 0.34640198945999146, "learning_rate": 1.9999038993633114e-05, "loss": 0.586, "step": 422 }, { "epoch": 0.011614497528830313, "grad_norm": 0.36985453963279724, "learning_rate": 1.9999032996824714e-05, "loss": 0.4776, "step": 423 }, { "epoch": 0.011641954969796815, "grad_norm": 0.4077282249927521, "learning_rate": 1.999902698136492e-05, "loss": 0.514, "step": 424 }, { "epoch": 0.011669412410763316, "grad_norm": 0.41774219274520874, "learning_rate": 1.999902094725375e-05, "loss": 0.6577, "step": 425 }, { "epoch": 0.011696869851729819, "grad_norm": 0.4046265482902527, "learning_rate": 1.999901489449122e-05, "loss": 0.499, "step": 426 }, { "epoch": 0.01172432729269632, "grad_norm": 0.4133409261703491, "learning_rate": 1.999900882307733e-05, "loss": 0.6334, "step": 427 }, { "epoch": 0.011751784733662823, "grad_norm": 0.39341819286346436, "learning_rate": 1.99990027330121e-05, "loss": 0.5969, "step": 428 }, { "epoch": 0.011779242174629324, "grad_norm": 0.4167121946811676, "learning_rate": 1.9998996624295536e-05, "loss": 0.6754, "step": 429 }, { "epoch": 0.011806699615595826, "grad_norm": 0.3867819309234619, "learning_rate": 1.9998990496927652e-05, "loss": 0.606, "step": 430 }, { "epoch": 0.01183415705656233, "grad_norm": 0.3635198771953583, "learning_rate": 1.9998984350908463e-05, "loss": 0.5184, "step": 431 }, { "epoch": 0.01186161449752883, "grad_norm": 0.4112887382507324, "learning_rate": 1.9998978186237977e-05, "loss": 0.6396, "step": 432 }, { "epoch": 0.011889071938495333, "grad_norm": 0.42449551820755005, "learning_rate": 1.9998972002916202e-05, "loss": 0.5869, "step": 433 }, { "epoch": 0.011916529379461834, "grad_norm": 0.39251822233200073, "learning_rate": 1.9998965800943154e-05, "loss": 0.5808, "step": 434 }, { "epoch": 0.011943986820428337, "grad_norm": 0.42648008465766907, "learning_rate": 1.9998959580318844e-05, "loss": 0.5732, "step": 435 }, { "epoch": 0.011971444261394838, "grad_norm": 0.4095357060432434, "learning_rate": 1.9998953341043282e-05, "loss": 0.5974, "step": 436 }, { "epoch": 0.01199890170236134, "grad_norm": 0.4035501480102539, "learning_rate": 1.999894708311648e-05, "loss": 0.635, "step": 437 }, { "epoch": 0.012026359143327841, "grad_norm": 0.3491573631763458, "learning_rate": 1.9998940806538452e-05, "loss": 0.532, "step": 438 }, { "epoch": 0.012053816584294344, "grad_norm": 0.34197482466697693, "learning_rate": 1.999893451130921e-05, "loss": 0.5335, "step": 439 }, { "epoch": 0.012081274025260845, "grad_norm": 0.39727604389190674, "learning_rate": 1.9998928197428763e-05, "loss": 0.6076, "step": 440 }, { "epoch": 0.012108731466227348, "grad_norm": 0.36327365040779114, "learning_rate": 1.9998921864897123e-05, "loss": 0.5861, "step": 441 }, { "epoch": 0.012136188907193849, "grad_norm": 0.46729639172554016, "learning_rate": 1.9998915513714302e-05, "loss": 0.6098, "step": 442 }, { "epoch": 0.012163646348160351, "grad_norm": 0.43233320116996765, "learning_rate": 1.999890914388031e-05, "loss": 0.5463, "step": 443 }, { "epoch": 0.012191103789126854, "grad_norm": 0.4136062264442444, "learning_rate": 1.9998902755395165e-05, "loss": 0.6441, "step": 444 }, { "epoch": 0.012218561230093355, "grad_norm": 0.3878582715988159, "learning_rate": 1.9998896348258875e-05, "loss": 0.6353, "step": 445 }, { "epoch": 0.012246018671059858, "grad_norm": 0.4148087501525879, "learning_rate": 1.999888992247145e-05, "loss": 0.5967, "step": 446 }, { "epoch": 0.012273476112026359, "grad_norm": 0.40921667218208313, "learning_rate": 1.9998883478032906e-05, "loss": 0.6296, "step": 447 }, { "epoch": 0.012300933552992862, "grad_norm": 0.37157028913497925, "learning_rate": 1.9998877014943253e-05, "loss": 0.5351, "step": 448 }, { "epoch": 0.012328390993959363, "grad_norm": 0.3901151418685913, "learning_rate": 1.9998870533202497e-05, "loss": 0.646, "step": 449 }, { "epoch": 0.012355848434925865, "grad_norm": 0.42170265316963196, "learning_rate": 1.9998864032810662e-05, "loss": 0.6252, "step": 450 }, { "epoch": 0.012383305875892366, "grad_norm": 0.4750824272632599, "learning_rate": 1.9998857513767754e-05, "loss": 0.5375, "step": 451 }, { "epoch": 0.012410763316858869, "grad_norm": 0.41642192006111145, "learning_rate": 1.9998850976073788e-05, "loss": 0.5099, "step": 452 }, { "epoch": 0.01243822075782537, "grad_norm": 0.4203537702560425, "learning_rate": 1.999884441972877e-05, "loss": 0.5477, "step": 453 }, { "epoch": 0.012465678198791873, "grad_norm": 0.3524506688117981, "learning_rate": 1.9998837844732714e-05, "loss": 0.5242, "step": 454 }, { "epoch": 0.012493135639758374, "grad_norm": 0.46098700165748596, "learning_rate": 1.9998831251085638e-05, "loss": 0.6856, "step": 455 }, { "epoch": 0.012520593080724876, "grad_norm": 0.37626954913139343, "learning_rate": 1.9998824638787547e-05, "loss": 0.5859, "step": 456 }, { "epoch": 0.01254805052169138, "grad_norm": 0.7488641142845154, "learning_rate": 1.9998818007838458e-05, "loss": 0.6959, "step": 457 }, { "epoch": 0.01257550796265788, "grad_norm": 0.4118775427341461, "learning_rate": 1.9998811358238384e-05, "loss": 0.5697, "step": 458 }, { "epoch": 0.012602965403624383, "grad_norm": 0.4069029688835144, "learning_rate": 1.9998804689987333e-05, "loss": 0.607, "step": 459 }, { "epoch": 0.012630422844590884, "grad_norm": 0.4377652406692505, "learning_rate": 1.9998798003085325e-05, "loss": 0.5939, "step": 460 }, { "epoch": 0.012657880285557387, "grad_norm": 0.37648123502731323, "learning_rate": 1.9998791297532362e-05, "loss": 0.6229, "step": 461 }, { "epoch": 0.012685337726523888, "grad_norm": 0.3883246183395386, "learning_rate": 1.9998784573328466e-05, "loss": 0.6296, "step": 462 }, { "epoch": 0.01271279516749039, "grad_norm": 0.3723233640193939, "learning_rate": 1.9998777830473642e-05, "loss": 0.5839, "step": 463 }, { "epoch": 0.012740252608456891, "grad_norm": 0.5099871754646301, "learning_rate": 1.999877106896791e-05, "loss": 0.5836, "step": 464 }, { "epoch": 0.012767710049423394, "grad_norm": 0.3458348214626312, "learning_rate": 1.9998764288811273e-05, "loss": 0.5643, "step": 465 }, { "epoch": 0.012795167490389895, "grad_norm": 0.4301275610923767, "learning_rate": 1.9998757490003754e-05, "loss": 0.6464, "step": 466 }, { "epoch": 0.012822624931356398, "grad_norm": 0.37440094351768494, "learning_rate": 1.999875067254536e-05, "loss": 0.6508, "step": 467 }, { "epoch": 0.012850082372322899, "grad_norm": 0.4051024913787842, "learning_rate": 1.9998743836436104e-05, "loss": 0.5983, "step": 468 }, { "epoch": 0.012877539813289401, "grad_norm": 0.38167673349380493, "learning_rate": 1.9998736981676e-05, "loss": 0.5775, "step": 469 }, { "epoch": 0.012904997254255904, "grad_norm": 0.4419257342815399, "learning_rate": 1.9998730108265062e-05, "loss": 0.6152, "step": 470 }, { "epoch": 0.012932454695222405, "grad_norm": 0.37587323784828186, "learning_rate": 1.9998723216203298e-05, "loss": 0.5778, "step": 471 }, { "epoch": 0.012959912136188908, "grad_norm": 0.3446073532104492, "learning_rate": 1.9998716305490722e-05, "loss": 0.5312, "step": 472 }, { "epoch": 0.012987369577155409, "grad_norm": 0.5147837996482849, "learning_rate": 1.9998709376127355e-05, "loss": 0.6409, "step": 473 }, { "epoch": 0.013014827018121912, "grad_norm": 0.42770513892173767, "learning_rate": 1.99987024281132e-05, "loss": 0.6254, "step": 474 }, { "epoch": 0.013042284459088413, "grad_norm": 0.39681941270828247, "learning_rate": 1.9998695461448278e-05, "loss": 0.5535, "step": 475 }, { "epoch": 0.013069741900054915, "grad_norm": 0.44756025075912476, "learning_rate": 1.9998688476132594e-05, "loss": 0.6263, "step": 476 }, { "epoch": 0.013097199341021416, "grad_norm": 0.41858989000320435, "learning_rate": 1.9998681472166163e-05, "loss": 0.615, "step": 477 }, { "epoch": 0.013124656781987919, "grad_norm": 0.35515040159225464, "learning_rate": 1.9998674449549002e-05, "loss": 0.5665, "step": 478 }, { "epoch": 0.01315211422295442, "grad_norm": 0.4042092263698578, "learning_rate": 1.9998667408281124e-05, "loss": 0.5971, "step": 479 }, { "epoch": 0.013179571663920923, "grad_norm": 0.3441367745399475, "learning_rate": 1.9998660348362536e-05, "loss": 0.5473, "step": 480 }, { "epoch": 0.013207029104887424, "grad_norm": 0.392115980386734, "learning_rate": 1.999865326979326e-05, "loss": 0.6438, "step": 481 }, { "epoch": 0.013234486545853926, "grad_norm": 0.40628859400749207, "learning_rate": 1.99986461725733e-05, "loss": 0.5723, "step": 482 }, { "epoch": 0.01326194398682043, "grad_norm": 0.3773040175437927, "learning_rate": 1.9998639056702678e-05, "loss": 0.5263, "step": 483 }, { "epoch": 0.01328940142778693, "grad_norm": 0.3874393403530121, "learning_rate": 1.99986319221814e-05, "loss": 0.5682, "step": 484 }, { "epoch": 0.013316858868753433, "grad_norm": 0.4044968783855438, "learning_rate": 1.9998624769009484e-05, "loss": 0.5692, "step": 485 }, { "epoch": 0.013344316309719934, "grad_norm": 0.3392695486545563, "learning_rate": 1.999861759718694e-05, "loss": 0.5705, "step": 486 }, { "epoch": 0.013371773750686437, "grad_norm": 0.37451910972595215, "learning_rate": 1.9998610406713782e-05, "loss": 0.611, "step": 487 }, { "epoch": 0.013399231191652938, "grad_norm": 0.35444578528404236, "learning_rate": 1.9998603197590025e-05, "loss": 0.5331, "step": 488 }, { "epoch": 0.01342668863261944, "grad_norm": 0.46706724166870117, "learning_rate": 1.9998595969815684e-05, "loss": 0.6238, "step": 489 }, { "epoch": 0.013454146073585941, "grad_norm": 0.44799354672431946, "learning_rate": 1.9998588723390768e-05, "loss": 0.6324, "step": 490 }, { "epoch": 0.013481603514552444, "grad_norm": 0.3829394578933716, "learning_rate": 1.9998581458315294e-05, "loss": 0.5986, "step": 491 }, { "epoch": 0.013509060955518945, "grad_norm": 0.3845359683036804, "learning_rate": 1.9998574174589277e-05, "loss": 0.5767, "step": 492 }, { "epoch": 0.013536518396485448, "grad_norm": 0.49297624826431274, "learning_rate": 1.9998566872212725e-05, "loss": 0.6295, "step": 493 }, { "epoch": 0.013563975837451949, "grad_norm": 0.36868372559547424, "learning_rate": 1.9998559551185653e-05, "loss": 0.5755, "step": 494 }, { "epoch": 0.013591433278418451, "grad_norm": 0.3506320118904114, "learning_rate": 1.9998552211508078e-05, "loss": 0.5656, "step": 495 }, { "epoch": 0.013618890719384952, "grad_norm": 0.38464581966400146, "learning_rate": 1.9998544853180013e-05, "loss": 0.6289, "step": 496 }, { "epoch": 0.013646348160351455, "grad_norm": 0.3753840923309326, "learning_rate": 1.999853747620147e-05, "loss": 0.6043, "step": 497 }, { "epoch": 0.013673805601317958, "grad_norm": 0.3963780999183655, "learning_rate": 1.9998530080572465e-05, "loss": 0.5504, "step": 498 }, { "epoch": 0.013701263042284459, "grad_norm": 0.3831250071525574, "learning_rate": 1.9998522666293008e-05, "loss": 0.6155, "step": 499 }, { "epoch": 0.013728720483250962, "grad_norm": 0.3952217996120453, "learning_rate": 1.9998515233363113e-05, "loss": 0.6493, "step": 500 }, { "epoch": 0.013756177924217463, "grad_norm": 0.377018541097641, "learning_rate": 1.9998507781782802e-05, "loss": 0.5851, "step": 501 }, { "epoch": 0.013783635365183965, "grad_norm": 0.36269983649253845, "learning_rate": 1.999850031155208e-05, "loss": 0.6443, "step": 502 }, { "epoch": 0.013811092806150466, "grad_norm": 0.39316627383232117, "learning_rate": 1.999849282267096e-05, "loss": 0.5832, "step": 503 }, { "epoch": 0.013838550247116969, "grad_norm": 0.36537376046180725, "learning_rate": 1.9998485315139466e-05, "loss": 0.5017, "step": 504 }, { "epoch": 0.01386600768808347, "grad_norm": 0.39261430501937866, "learning_rate": 1.99984777889576e-05, "loss": 0.5864, "step": 505 }, { "epoch": 0.013893465129049973, "grad_norm": 0.3777439594268799, "learning_rate": 1.9998470244125385e-05, "loss": 0.62, "step": 506 }, { "epoch": 0.013920922570016474, "grad_norm": 0.3682347536087036, "learning_rate": 1.9998462680642833e-05, "loss": 0.5041, "step": 507 }, { "epoch": 0.013948380010982976, "grad_norm": 0.40784814953804016, "learning_rate": 1.999845509850995e-05, "loss": 0.5333, "step": 508 }, { "epoch": 0.013975837451949477, "grad_norm": 0.4051037132740021, "learning_rate": 1.9998447497726765e-05, "loss": 0.6828, "step": 509 }, { "epoch": 0.01400329489291598, "grad_norm": 0.36823195219039917, "learning_rate": 1.9998439878293282e-05, "loss": 0.5581, "step": 510 }, { "epoch": 0.014030752333882483, "grad_norm": 0.3990444242954254, "learning_rate": 1.9998432240209517e-05, "loss": 0.5556, "step": 511 }, { "epoch": 0.014058209774848984, "grad_norm": 0.4431035816669464, "learning_rate": 1.9998424583475485e-05, "loss": 0.5684, "step": 512 }, { "epoch": 0.014085667215815487, "grad_norm": 0.4089876711368561, "learning_rate": 1.9998416908091197e-05, "loss": 0.617, "step": 513 }, { "epoch": 0.014113124656781988, "grad_norm": 0.3681512176990509, "learning_rate": 1.9998409214056677e-05, "loss": 0.5576, "step": 514 }, { "epoch": 0.01414058209774849, "grad_norm": 0.5452068448066711, "learning_rate": 1.9998401501371928e-05, "loss": 0.5729, "step": 515 }, { "epoch": 0.014168039538714991, "grad_norm": 0.3845811188220978, "learning_rate": 1.999839377003697e-05, "loss": 0.5954, "step": 516 }, { "epoch": 0.014195496979681494, "grad_norm": 0.369366854429245, "learning_rate": 1.9998386020051814e-05, "loss": 0.5675, "step": 517 }, { "epoch": 0.014222954420647995, "grad_norm": 0.5670362114906311, "learning_rate": 1.999837825141648e-05, "loss": 0.5844, "step": 518 }, { "epoch": 0.014250411861614498, "grad_norm": 0.40332505106925964, "learning_rate": 1.999837046413098e-05, "loss": 0.5621, "step": 519 }, { "epoch": 0.014277869302580999, "grad_norm": 0.3800332546234131, "learning_rate": 1.9998362658195328e-05, "loss": 0.6491, "step": 520 }, { "epoch": 0.014305326743547502, "grad_norm": 0.4471629858016968, "learning_rate": 1.9998354833609537e-05, "loss": 0.5931, "step": 521 }, { "epoch": 0.014332784184514002, "grad_norm": 0.3648340702056885, "learning_rate": 1.9998346990373623e-05, "loss": 0.6339, "step": 522 }, { "epoch": 0.014360241625480505, "grad_norm": 0.44361770153045654, "learning_rate": 1.99983391284876e-05, "loss": 0.5688, "step": 523 }, { "epoch": 0.014387699066447008, "grad_norm": 0.37211835384368896, "learning_rate": 1.9998331247951486e-05, "loss": 0.5305, "step": 524 }, { "epoch": 0.014415156507413509, "grad_norm": 0.401266872882843, "learning_rate": 1.9998323348765292e-05, "loss": 0.5714, "step": 525 }, { "epoch": 0.014442613948380012, "grad_norm": 0.3878715932369232, "learning_rate": 1.9998315430929034e-05, "loss": 0.569, "step": 526 }, { "epoch": 0.014470071389346513, "grad_norm": 0.3805721402168274, "learning_rate": 1.9998307494442727e-05, "loss": 0.5861, "step": 527 }, { "epoch": 0.014497528830313015, "grad_norm": 0.4498625099658966, "learning_rate": 1.9998299539306384e-05, "loss": 0.5927, "step": 528 }, { "epoch": 0.014524986271279516, "grad_norm": 0.38131728768348694, "learning_rate": 1.9998291565520023e-05, "loss": 0.6031, "step": 529 }, { "epoch": 0.014552443712246019, "grad_norm": 0.38801324367523193, "learning_rate": 1.9998283573083656e-05, "loss": 0.6366, "step": 530 }, { "epoch": 0.01457990115321252, "grad_norm": 0.402130663394928, "learning_rate": 1.9998275561997297e-05, "loss": 0.6269, "step": 531 }, { "epoch": 0.014607358594179023, "grad_norm": 0.3892827332019806, "learning_rate": 1.9998267532260967e-05, "loss": 0.6156, "step": 532 }, { "epoch": 0.014634816035145524, "grad_norm": 0.4167672395706177, "learning_rate": 1.9998259483874675e-05, "loss": 0.5741, "step": 533 }, { "epoch": 0.014662273476112027, "grad_norm": 0.3267308473587036, "learning_rate": 1.999825141683844e-05, "loss": 0.5749, "step": 534 }, { "epoch": 0.014689730917078528, "grad_norm": 0.3839011490345001, "learning_rate": 1.999824333115227e-05, "loss": 0.5772, "step": 535 }, { "epoch": 0.01471718835804503, "grad_norm": 0.3850117027759552, "learning_rate": 1.999823522681619e-05, "loss": 0.5756, "step": 536 }, { "epoch": 0.014744645799011533, "grad_norm": 0.4164671301841736, "learning_rate": 1.9998227103830208e-05, "loss": 0.6503, "step": 537 }, { "epoch": 0.014772103239978034, "grad_norm": 0.41122978925704956, "learning_rate": 1.999821896219434e-05, "loss": 0.6301, "step": 538 }, { "epoch": 0.014799560680944537, "grad_norm": 0.46953505277633667, "learning_rate": 1.9998210801908607e-05, "loss": 0.5889, "step": 539 }, { "epoch": 0.014827018121911038, "grad_norm": 0.46712812781333923, "learning_rate": 1.9998202622973014e-05, "loss": 0.6586, "step": 540 }, { "epoch": 0.01485447556287754, "grad_norm": 0.46527013182640076, "learning_rate": 1.9998194425387588e-05, "loss": 0.5824, "step": 541 }, { "epoch": 0.014881933003844041, "grad_norm": 0.366098016500473, "learning_rate": 1.9998186209152336e-05, "loss": 0.6099, "step": 542 }, { "epoch": 0.014909390444810544, "grad_norm": 0.3561984896659851, "learning_rate": 1.9998177974267275e-05, "loss": 0.6368, "step": 543 }, { "epoch": 0.014936847885777045, "grad_norm": 0.4592185914516449, "learning_rate": 1.999816972073242e-05, "loss": 0.5208, "step": 544 }, { "epoch": 0.014964305326743548, "grad_norm": 0.37104126811027527, "learning_rate": 1.999816144854779e-05, "loss": 0.5502, "step": 545 }, { "epoch": 0.014991762767710049, "grad_norm": 0.3912109136581421, "learning_rate": 1.9998153157713397e-05, "loss": 0.7315, "step": 546 }, { "epoch": 0.015019220208676552, "grad_norm": 0.5116318464279175, "learning_rate": 1.9998144848229257e-05, "loss": 0.6158, "step": 547 }, { "epoch": 0.015046677649643053, "grad_norm": 0.35095739364624023, "learning_rate": 1.9998136520095386e-05, "loss": 0.5825, "step": 548 }, { "epoch": 0.015074135090609555, "grad_norm": 0.4187716543674469, "learning_rate": 1.99981281733118e-05, "loss": 0.5321, "step": 549 }, { "epoch": 0.015101592531576058, "grad_norm": 0.3867867588996887, "learning_rate": 1.9998119807878513e-05, "loss": 0.5678, "step": 550 }, { "epoch": 0.015129049972542559, "grad_norm": 0.39295196533203125, "learning_rate": 1.999811142379554e-05, "loss": 0.5342, "step": 551 }, { "epoch": 0.015156507413509062, "grad_norm": 0.35886532068252563, "learning_rate": 1.99981030210629e-05, "loss": 0.6229, "step": 552 }, { "epoch": 0.015183964854475563, "grad_norm": 0.3691362738609314, "learning_rate": 1.9998094599680605e-05, "loss": 0.6431, "step": 553 }, { "epoch": 0.015211422295442065, "grad_norm": 0.3991229832172394, "learning_rate": 1.9998086159648678e-05, "loss": 0.6209, "step": 554 }, { "epoch": 0.015238879736408566, "grad_norm": 0.392595499753952, "learning_rate": 1.9998077700967124e-05, "loss": 0.6696, "step": 555 }, { "epoch": 0.015266337177375069, "grad_norm": 0.42975157499313354, "learning_rate": 1.999806922363597e-05, "loss": 0.5418, "step": 556 }, { "epoch": 0.01529379461834157, "grad_norm": 0.39465758204460144, "learning_rate": 1.999806072765522e-05, "loss": 0.6012, "step": 557 }, { "epoch": 0.015321252059308073, "grad_norm": 0.47726041078567505, "learning_rate": 1.9998052213024893e-05, "loss": 0.5806, "step": 558 }, { "epoch": 0.015348709500274574, "grad_norm": 0.978887140750885, "learning_rate": 1.9998043679745012e-05, "loss": 0.6535, "step": 559 }, { "epoch": 0.015376166941241077, "grad_norm": 0.3782835006713867, "learning_rate": 1.999803512781559e-05, "loss": 0.5831, "step": 560 }, { "epoch": 0.015403624382207578, "grad_norm": 0.3892028033733368, "learning_rate": 1.999802655723664e-05, "loss": 0.5753, "step": 561 }, { "epoch": 0.01543108182317408, "grad_norm": 0.39572474360466003, "learning_rate": 1.999801796800818e-05, "loss": 0.6076, "step": 562 }, { "epoch": 0.015458539264140581, "grad_norm": 0.46114587783813477, "learning_rate": 1.9998009360130225e-05, "loss": 0.6362, "step": 563 }, { "epoch": 0.015485996705107084, "grad_norm": 0.3802315592765808, "learning_rate": 1.9998000733602788e-05, "loss": 0.5781, "step": 564 }, { "epoch": 0.015513454146073587, "grad_norm": 0.3613618016242981, "learning_rate": 1.9997992088425895e-05, "loss": 0.5668, "step": 565 }, { "epoch": 0.015540911587040088, "grad_norm": 0.3899209201335907, "learning_rate": 1.9997983424599552e-05, "loss": 0.6582, "step": 566 }, { "epoch": 0.01556836902800659, "grad_norm": 0.3839051127433777, "learning_rate": 1.999797474212378e-05, "loss": 0.5908, "step": 567 }, { "epoch": 0.015595826468973091, "grad_norm": 0.5837864279747009, "learning_rate": 1.9997966040998595e-05, "loss": 0.5791, "step": 568 }, { "epoch": 0.015623283909939594, "grad_norm": 0.39719292521476746, "learning_rate": 1.9997957321224007e-05, "loss": 0.6178, "step": 569 }, { "epoch": 0.015650741350906095, "grad_norm": 0.3915398120880127, "learning_rate": 1.9997948582800045e-05, "loss": 0.5295, "step": 570 }, { "epoch": 0.015678198791872596, "grad_norm": 0.3873436748981476, "learning_rate": 1.9997939825726715e-05, "loss": 0.5852, "step": 571 }, { "epoch": 0.0157056562328391, "grad_norm": 0.3592807948589325, "learning_rate": 1.9997931050004036e-05, "loss": 0.4739, "step": 572 }, { "epoch": 0.0157331136738056, "grad_norm": 0.4087948799133301, "learning_rate": 1.9997922255632023e-05, "loss": 0.6564, "step": 573 }, { "epoch": 0.015760571114772103, "grad_norm": 0.4732983708381653, "learning_rate": 1.9997913442610696e-05, "loss": 0.6002, "step": 574 }, { "epoch": 0.015788028555738604, "grad_norm": 0.3809700906276703, "learning_rate": 1.9997904610940072e-05, "loss": 0.6194, "step": 575 }, { "epoch": 0.015815485996705108, "grad_norm": 0.45309966802597046, "learning_rate": 1.9997895760620157e-05, "loss": 0.6071, "step": 576 }, { "epoch": 0.01584294343767161, "grad_norm": 0.35042840242385864, "learning_rate": 1.9997886891650982e-05, "loss": 0.6006, "step": 577 }, { "epoch": 0.01587040087863811, "grad_norm": 0.33388376235961914, "learning_rate": 1.9997878004032555e-05, "loss": 0.6194, "step": 578 }, { "epoch": 0.015897858319604614, "grad_norm": 0.3959198594093323, "learning_rate": 1.9997869097764897e-05, "loss": 0.5916, "step": 579 }, { "epoch": 0.015925315760571115, "grad_norm": 0.4101382791996002, "learning_rate": 1.999786017284802e-05, "loss": 0.5702, "step": 580 }, { "epoch": 0.015952773201537616, "grad_norm": 0.4070645868778229, "learning_rate": 1.9997851229281942e-05, "loss": 0.5718, "step": 581 }, { "epoch": 0.015980230642504117, "grad_norm": 0.37170371413230896, "learning_rate": 1.999784226706668e-05, "loss": 0.6563, "step": 582 }, { "epoch": 0.016007688083470622, "grad_norm": 0.43527600169181824, "learning_rate": 1.9997833286202255e-05, "loss": 0.603, "step": 583 }, { "epoch": 0.016035145524437123, "grad_norm": 0.35357949137687683, "learning_rate": 1.9997824286688678e-05, "loss": 0.5785, "step": 584 }, { "epoch": 0.016062602965403624, "grad_norm": 0.4094401001930237, "learning_rate": 1.999781526852597e-05, "loss": 0.6416, "step": 585 }, { "epoch": 0.016090060406370125, "grad_norm": 0.4384291172027588, "learning_rate": 1.999780623171414e-05, "loss": 0.642, "step": 586 }, { "epoch": 0.01611751784733663, "grad_norm": 0.34347274899482727, "learning_rate": 1.9997797176253213e-05, "loss": 0.516, "step": 587 }, { "epoch": 0.01614497528830313, "grad_norm": 0.4126909673213959, "learning_rate": 1.9997788102143206e-05, "loss": 0.6108, "step": 588 }, { "epoch": 0.01617243272926963, "grad_norm": 0.37418729066848755, "learning_rate": 1.999777900938413e-05, "loss": 0.5346, "step": 589 }, { "epoch": 0.016199890170236132, "grad_norm": 0.36670830845832825, "learning_rate": 1.9997769897976008e-05, "loss": 0.5593, "step": 590 }, { "epoch": 0.016227347611202637, "grad_norm": 0.41985636949539185, "learning_rate": 1.999776076791885e-05, "loss": 0.58, "step": 591 }, { "epoch": 0.016254805052169138, "grad_norm": 0.42642778158187866, "learning_rate": 1.999775161921268e-05, "loss": 0.6964, "step": 592 }, { "epoch": 0.01628226249313564, "grad_norm": 0.46405890583992004, "learning_rate": 1.9997742451857513e-05, "loss": 0.6636, "step": 593 }, { "epoch": 0.016309719934102143, "grad_norm": 0.38017377257347107, "learning_rate": 1.9997733265853366e-05, "loss": 0.515, "step": 594 }, { "epoch": 0.016337177375068644, "grad_norm": 0.44501325488090515, "learning_rate": 1.9997724061200254e-05, "loss": 0.6398, "step": 595 }, { "epoch": 0.016364634816035145, "grad_norm": 0.36285772919654846, "learning_rate": 1.9997714837898193e-05, "loss": 0.5774, "step": 596 }, { "epoch": 0.016392092257001646, "grad_norm": 0.40975141525268555, "learning_rate": 1.9997705595947208e-05, "loss": 0.5681, "step": 597 }, { "epoch": 0.01641954969796815, "grad_norm": 0.43118762969970703, "learning_rate": 1.999769633534731e-05, "loss": 0.5944, "step": 598 }, { "epoch": 0.01644700713893465, "grad_norm": 0.39138084650039673, "learning_rate": 1.9997687056098515e-05, "loss": 0.6193, "step": 599 }, { "epoch": 0.016474464579901153, "grad_norm": 0.36784979701042175, "learning_rate": 1.9997677758200845e-05, "loss": 0.6735, "step": 600 }, { "epoch": 0.016501922020867654, "grad_norm": 0.36294370889663696, "learning_rate": 1.999766844165431e-05, "loss": 0.6779, "step": 601 }, { "epoch": 0.016529379461834158, "grad_norm": 0.4124161899089813, "learning_rate": 1.999765910645894e-05, "loss": 0.7035, "step": 602 }, { "epoch": 0.01655683690280066, "grad_norm": 0.40208274126052856, "learning_rate": 1.9997649752614744e-05, "loss": 0.6185, "step": 603 }, { "epoch": 0.01658429434376716, "grad_norm": 0.4000442326068878, "learning_rate": 1.9997640380121735e-05, "loss": 0.5312, "step": 604 }, { "epoch": 0.016611751784733664, "grad_norm": 0.36491647362709045, "learning_rate": 1.999763098897994e-05, "loss": 0.6631, "step": 605 }, { "epoch": 0.016639209225700165, "grad_norm": 0.36979371309280396, "learning_rate": 1.999762157918937e-05, "loss": 0.6259, "step": 606 }, { "epoch": 0.016666666666666666, "grad_norm": 0.3710172176361084, "learning_rate": 1.9997612150750044e-05, "loss": 0.6069, "step": 607 }, { "epoch": 0.016694124107633167, "grad_norm": 0.37757453322410583, "learning_rate": 1.9997602703661984e-05, "loss": 0.5597, "step": 608 }, { "epoch": 0.016721581548599672, "grad_norm": 0.42393893003463745, "learning_rate": 1.9997593237925203e-05, "loss": 0.6359, "step": 609 }, { "epoch": 0.016749038989566173, "grad_norm": 0.40653982758522034, "learning_rate": 1.999758375353972e-05, "loss": 0.5559, "step": 610 }, { "epoch": 0.016776496430532674, "grad_norm": 0.4024507403373718, "learning_rate": 1.999757425050555e-05, "loss": 0.5642, "step": 611 }, { "epoch": 0.016803953871499175, "grad_norm": 0.3754747211933136, "learning_rate": 1.9997564728822718e-05, "loss": 0.6193, "step": 612 }, { "epoch": 0.01683141131246568, "grad_norm": 0.46400701999664307, "learning_rate": 1.9997555188491234e-05, "loss": 0.6474, "step": 613 }, { "epoch": 0.01685886875343218, "grad_norm": 0.39160293340682983, "learning_rate": 1.9997545629511118e-05, "loss": 0.5649, "step": 614 }, { "epoch": 0.01688632619439868, "grad_norm": 0.37168920040130615, "learning_rate": 1.999753605188239e-05, "loss": 0.5518, "step": 615 }, { "epoch": 0.016913783635365182, "grad_norm": 0.38634005188941956, "learning_rate": 1.9997526455605064e-05, "loss": 0.6012, "step": 616 }, { "epoch": 0.016941241076331687, "grad_norm": 0.35299113392829895, "learning_rate": 1.999751684067916e-05, "loss": 0.6017, "step": 617 }, { "epoch": 0.016968698517298188, "grad_norm": 0.3434194028377533, "learning_rate": 1.9997507207104697e-05, "loss": 0.6039, "step": 618 }, { "epoch": 0.01699615595826469, "grad_norm": 0.3671810030937195, "learning_rate": 1.9997497554881697e-05, "loss": 0.4956, "step": 619 }, { "epoch": 0.017023613399231193, "grad_norm": 0.3855080306529999, "learning_rate": 1.9997487884010168e-05, "loss": 0.511, "step": 620 }, { "epoch": 0.017051070840197694, "grad_norm": 0.3590529263019562, "learning_rate": 1.9997478194490135e-05, "loss": 0.5682, "step": 621 }, { "epoch": 0.017078528281164195, "grad_norm": 0.3853760063648224, "learning_rate": 1.9997468486321614e-05, "loss": 0.5669, "step": 622 }, { "epoch": 0.017105985722130696, "grad_norm": 0.38928598165512085, "learning_rate": 1.9997458759504623e-05, "loss": 0.6273, "step": 623 }, { "epoch": 0.0171334431630972, "grad_norm": 0.3647083342075348, "learning_rate": 1.999744901403918e-05, "loss": 0.6242, "step": 624 }, { "epoch": 0.0171609006040637, "grad_norm": 0.40483883023262024, "learning_rate": 1.9997439249925307e-05, "loss": 0.5475, "step": 625 }, { "epoch": 0.017188358045030203, "grad_norm": 0.328152060508728, "learning_rate": 1.999742946716302e-05, "loss": 0.492, "step": 626 }, { "epoch": 0.017215815485996704, "grad_norm": 0.3460335433483124, "learning_rate": 1.9997419665752333e-05, "loss": 0.5432, "step": 627 }, { "epoch": 0.017243272926963208, "grad_norm": 0.4378203749656677, "learning_rate": 1.9997409845693264e-05, "loss": 0.6527, "step": 628 }, { "epoch": 0.01727073036792971, "grad_norm": 0.7637513875961304, "learning_rate": 1.9997400006985843e-05, "loss": 0.527, "step": 629 }, { "epoch": 0.01729818780889621, "grad_norm": 0.42557206749916077, "learning_rate": 1.9997390149630073e-05, "loss": 0.6952, "step": 630 }, { "epoch": 0.017325645249862714, "grad_norm": 0.3923731744289398, "learning_rate": 1.9997380273625982e-05, "loss": 0.601, "step": 631 }, { "epoch": 0.017353102690829215, "grad_norm": 0.436728298664093, "learning_rate": 1.9997370378973587e-05, "loss": 0.6006, "step": 632 }, { "epoch": 0.017380560131795716, "grad_norm": 0.37395837903022766, "learning_rate": 1.9997360465672904e-05, "loss": 0.6416, "step": 633 }, { "epoch": 0.017408017572762217, "grad_norm": 0.3573594391345978, "learning_rate": 1.9997350533723953e-05, "loss": 0.5262, "step": 634 }, { "epoch": 0.017435475013728722, "grad_norm": 0.39430806040763855, "learning_rate": 1.9997340583126753e-05, "loss": 0.5805, "step": 635 }, { "epoch": 0.017462932454695223, "grad_norm": 0.441062867641449, "learning_rate": 1.9997330613881322e-05, "loss": 0.5672, "step": 636 }, { "epoch": 0.017490389895661724, "grad_norm": 0.4491402506828308, "learning_rate": 1.999732062598768e-05, "loss": 0.7201, "step": 637 }, { "epoch": 0.017517847336628225, "grad_norm": 0.37788498401641846, "learning_rate": 1.9997310619445843e-05, "loss": 0.5856, "step": 638 }, { "epoch": 0.01754530477759473, "grad_norm": 0.3511843681335449, "learning_rate": 1.999730059425583e-05, "loss": 0.5611, "step": 639 }, { "epoch": 0.01757276221856123, "grad_norm": 0.3528120219707489, "learning_rate": 1.9997290550417664e-05, "loss": 0.5425, "step": 640 }, { "epoch": 0.01760021965952773, "grad_norm": 0.40895235538482666, "learning_rate": 1.9997280487931355e-05, "loss": 0.5981, "step": 641 }, { "epoch": 0.017627677100494232, "grad_norm": 0.5240883827209473, "learning_rate": 1.999727040679693e-05, "loss": 0.6397, "step": 642 }, { "epoch": 0.017655134541460737, "grad_norm": 0.3656834661960602, "learning_rate": 1.9997260307014406e-05, "loss": 0.6139, "step": 643 }, { "epoch": 0.017682591982427238, "grad_norm": 0.3596108555793762, "learning_rate": 1.99972501885838e-05, "loss": 0.5773, "step": 644 }, { "epoch": 0.01771004942339374, "grad_norm": 0.3943532109260559, "learning_rate": 1.999724005150513e-05, "loss": 0.6087, "step": 645 }, { "epoch": 0.017737506864360243, "grad_norm": 0.36053335666656494, "learning_rate": 1.999722989577842e-05, "loss": 0.5835, "step": 646 }, { "epoch": 0.017764964305326744, "grad_norm": 0.42509549856185913, "learning_rate": 1.9997219721403684e-05, "loss": 0.6688, "step": 647 }, { "epoch": 0.017792421746293245, "grad_norm": 0.3831471800804138, "learning_rate": 1.999720952838094e-05, "loss": 0.556, "step": 648 }, { "epoch": 0.017819879187259746, "grad_norm": 0.7185770273208618, "learning_rate": 1.9997199316710213e-05, "loss": 0.6329, "step": 649 }, { "epoch": 0.01784733662822625, "grad_norm": 0.3929024934768677, "learning_rate": 1.9997189086391518e-05, "loss": 0.7131, "step": 650 }, { "epoch": 0.01787479406919275, "grad_norm": 0.4883844554424286, "learning_rate": 1.999717883742487e-05, "loss": 0.5116, "step": 651 }, { "epoch": 0.017902251510159253, "grad_norm": 0.41014277935028076, "learning_rate": 1.99971685698103e-05, "loss": 0.6076, "step": 652 }, { "epoch": 0.017929708951125754, "grad_norm": 0.4055403470993042, "learning_rate": 1.9997158283547814e-05, "loss": 0.5961, "step": 653 }, { "epoch": 0.017957166392092258, "grad_norm": 0.384277880191803, "learning_rate": 1.9997147978637443e-05, "loss": 0.5638, "step": 654 }, { "epoch": 0.01798462383305876, "grad_norm": 0.37508463859558105, "learning_rate": 1.9997137655079194e-05, "loss": 0.5973, "step": 655 }, { "epoch": 0.01801208127402526, "grad_norm": 0.38920071721076965, "learning_rate": 1.9997127312873096e-05, "loss": 0.6345, "step": 656 }, { "epoch": 0.01803953871499176, "grad_norm": 0.38789263367652893, "learning_rate": 1.9997116952019163e-05, "loss": 0.6496, "step": 657 }, { "epoch": 0.018066996155958265, "grad_norm": 0.4327630400657654, "learning_rate": 1.999710657251742e-05, "loss": 0.5824, "step": 658 }, { "epoch": 0.018094453596924766, "grad_norm": 0.39532190561294556, "learning_rate": 1.999709617436788e-05, "loss": 0.6401, "step": 659 }, { "epoch": 0.018121911037891267, "grad_norm": 0.3969400227069855, "learning_rate": 1.9997085757570565e-05, "loss": 0.5944, "step": 660 }, { "epoch": 0.018149368478857772, "grad_norm": 0.5550959706306458, "learning_rate": 1.9997075322125492e-05, "loss": 0.6153, "step": 661 }, { "epoch": 0.018176825919824273, "grad_norm": 0.4067176282405853, "learning_rate": 1.9997064868032687e-05, "loss": 0.6429, "step": 662 }, { "epoch": 0.018204283360790774, "grad_norm": 0.35939744114875793, "learning_rate": 1.9997054395292163e-05, "loss": 0.5708, "step": 663 }, { "epoch": 0.018231740801757275, "grad_norm": 0.39175185561180115, "learning_rate": 1.999704390390394e-05, "loss": 0.5345, "step": 664 }, { "epoch": 0.01825919824272378, "grad_norm": 0.4091104567050934, "learning_rate": 1.9997033393868042e-05, "loss": 0.6652, "step": 665 }, { "epoch": 0.01828665568369028, "grad_norm": 0.4726792275905609, "learning_rate": 1.9997022865184486e-05, "loss": 0.533, "step": 666 }, { "epoch": 0.01831411312465678, "grad_norm": 0.3521459996700287, "learning_rate": 1.9997012317853292e-05, "loss": 0.5429, "step": 667 }, { "epoch": 0.018341570565623282, "grad_norm": 0.3617788553237915, "learning_rate": 1.9997001751874475e-05, "loss": 0.528, "step": 668 }, { "epoch": 0.018369028006589787, "grad_norm": 0.3685790002346039, "learning_rate": 1.9996991167248065e-05, "loss": 0.6191, "step": 669 }, { "epoch": 0.018396485447556288, "grad_norm": 0.3416842818260193, "learning_rate": 1.9996980563974073e-05, "loss": 0.5215, "step": 670 }, { "epoch": 0.01842394288852279, "grad_norm": 0.3642665445804596, "learning_rate": 1.999696994205252e-05, "loss": 0.6163, "step": 671 }, { "epoch": 0.018451400329489293, "grad_norm": 0.3567480444908142, "learning_rate": 1.999695930148343e-05, "loss": 0.5503, "step": 672 }, { "epoch": 0.018478857770455794, "grad_norm": 0.35434848070144653, "learning_rate": 1.999694864226682e-05, "loss": 0.5697, "step": 673 }, { "epoch": 0.018506315211422295, "grad_norm": 0.36857515573501587, "learning_rate": 1.999693796440271e-05, "loss": 0.6676, "step": 674 }, { "epoch": 0.018533772652388796, "grad_norm": 0.4114159643650055, "learning_rate": 1.9996927267891123e-05, "loss": 0.6253, "step": 675 }, { "epoch": 0.0185612300933553, "grad_norm": 0.42688414454460144, "learning_rate": 1.9996916552732073e-05, "loss": 0.6039, "step": 676 }, { "epoch": 0.0185886875343218, "grad_norm": 0.4053291976451874, "learning_rate": 1.999690581892558e-05, "loss": 0.6493, "step": 677 }, { "epoch": 0.018616144975288303, "grad_norm": 0.37900951504707336, "learning_rate": 1.999689506647167e-05, "loss": 0.6184, "step": 678 }, { "epoch": 0.018643602416254804, "grad_norm": 0.38820114731788635, "learning_rate": 1.999688429537036e-05, "loss": 0.5318, "step": 679 }, { "epoch": 0.018671059857221308, "grad_norm": 0.41871899366378784, "learning_rate": 1.9996873505621672e-05, "loss": 0.6151, "step": 680 }, { "epoch": 0.01869851729818781, "grad_norm": 0.42370879650115967, "learning_rate": 1.9996862697225624e-05, "loss": 0.6047, "step": 681 }, { "epoch": 0.01872597473915431, "grad_norm": 0.4112662076950073, "learning_rate": 1.9996851870182233e-05, "loss": 0.6336, "step": 682 }, { "epoch": 0.01875343218012081, "grad_norm": 0.3670344054698944, "learning_rate": 1.999684102449152e-05, "loss": 0.622, "step": 683 }, { "epoch": 0.018780889621087316, "grad_norm": 0.4268692433834076, "learning_rate": 1.9996830160153517e-05, "loss": 0.5497, "step": 684 }, { "epoch": 0.018808347062053817, "grad_norm": 0.3788997530937195, "learning_rate": 1.999681927716823e-05, "loss": 0.5358, "step": 685 }, { "epoch": 0.018835804503020318, "grad_norm": 0.37218910455703735, "learning_rate": 1.9996808375535686e-05, "loss": 0.4709, "step": 686 }, { "epoch": 0.018863261943986822, "grad_norm": 0.42026597261428833, "learning_rate": 1.99967974552559e-05, "loss": 0.7038, "step": 687 }, { "epoch": 0.018890719384953323, "grad_norm": 0.40262511372566223, "learning_rate": 1.99967865163289e-05, "loss": 0.5092, "step": 688 }, { "epoch": 0.018918176825919824, "grad_norm": 0.3546545207500458, "learning_rate": 1.99967755587547e-05, "loss": 0.6381, "step": 689 }, { "epoch": 0.018945634266886325, "grad_norm": 0.3929683268070221, "learning_rate": 1.9996764582533326e-05, "loss": 0.567, "step": 690 }, { "epoch": 0.01897309170785283, "grad_norm": 0.4001508355140686, "learning_rate": 1.9996753587664794e-05, "loss": 0.5886, "step": 691 }, { "epoch": 0.01900054914881933, "grad_norm": 0.35207152366638184, "learning_rate": 1.9996742574149123e-05, "loss": 0.5739, "step": 692 }, { "epoch": 0.01902800658978583, "grad_norm": 0.36567723751068115, "learning_rate": 1.999673154198634e-05, "loss": 0.6034, "step": 693 }, { "epoch": 0.019055464030752332, "grad_norm": 0.40501561760902405, "learning_rate": 1.9996720491176462e-05, "loss": 0.5993, "step": 694 }, { "epoch": 0.019082921471718837, "grad_norm": 0.3868468403816223, "learning_rate": 1.999670942171951e-05, "loss": 0.579, "step": 695 }, { "epoch": 0.019110378912685338, "grad_norm": 0.3892608880996704, "learning_rate": 1.9996698333615503e-05, "loss": 0.6181, "step": 696 }, { "epoch": 0.01913783635365184, "grad_norm": 0.4524693787097931, "learning_rate": 1.9996687226864462e-05, "loss": 0.532, "step": 697 }, { "epoch": 0.019165293794618343, "grad_norm": 0.37429699301719666, "learning_rate": 1.999667610146641e-05, "loss": 0.6278, "step": 698 }, { "epoch": 0.019192751235584844, "grad_norm": 0.3698277175426483, "learning_rate": 1.9996664957421367e-05, "loss": 0.5945, "step": 699 }, { "epoch": 0.019220208676551345, "grad_norm": 0.441307932138443, "learning_rate": 1.9996653794729353e-05, "loss": 0.5737, "step": 700 }, { "epoch": 0.019247666117517846, "grad_norm": 0.3855707049369812, "learning_rate": 1.999664261339039e-05, "loss": 0.5946, "step": 701 }, { "epoch": 0.01927512355848435, "grad_norm": 0.38945701718330383, "learning_rate": 1.99966314134045e-05, "loss": 0.5966, "step": 702 }, { "epoch": 0.01930258099945085, "grad_norm": 0.379617840051651, "learning_rate": 1.9996620194771698e-05, "loss": 0.5929, "step": 703 }, { "epoch": 0.019330038440417353, "grad_norm": 0.3983360528945923, "learning_rate": 1.9996608957492006e-05, "loss": 0.577, "step": 704 }, { "epoch": 0.019357495881383854, "grad_norm": 0.34047651290893555, "learning_rate": 1.9996597701565453e-05, "loss": 0.5414, "step": 705 }, { "epoch": 0.019384953322350358, "grad_norm": 0.4075978994369507, "learning_rate": 1.9996586426992052e-05, "loss": 0.5607, "step": 706 }, { "epoch": 0.01941241076331686, "grad_norm": 0.3390105068683624, "learning_rate": 1.9996575133771828e-05, "loss": 0.5567, "step": 707 }, { "epoch": 0.01943986820428336, "grad_norm": 0.4025273025035858, "learning_rate": 1.9996563821904803e-05, "loss": 0.6369, "step": 708 }, { "epoch": 0.01946732564524986, "grad_norm": 0.42383697628974915, "learning_rate": 1.9996552491390992e-05, "loss": 0.6831, "step": 709 }, { "epoch": 0.019494783086216366, "grad_norm": 0.4264463484287262, "learning_rate": 1.9996541142230425e-05, "loss": 0.6113, "step": 710 }, { "epoch": 0.019522240527182867, "grad_norm": 0.41362059116363525, "learning_rate": 1.999652977442311e-05, "loss": 0.6939, "step": 711 }, { "epoch": 0.019549697968149368, "grad_norm": 0.3867991864681244, "learning_rate": 1.9996518387969083e-05, "loss": 0.5496, "step": 712 }, { "epoch": 0.019577155409115872, "grad_norm": 0.3825870454311371, "learning_rate": 1.999650698286836e-05, "loss": 0.6467, "step": 713 }, { "epoch": 0.019604612850082373, "grad_norm": 0.4010181725025177, "learning_rate": 1.9996495559120957e-05, "loss": 0.6138, "step": 714 }, { "epoch": 0.019632070291048874, "grad_norm": 0.3686906695365906, "learning_rate": 1.99964841167269e-05, "loss": 0.5615, "step": 715 }, { "epoch": 0.019659527732015375, "grad_norm": 0.4058796167373657, "learning_rate": 1.999647265568621e-05, "loss": 0.6292, "step": 716 }, { "epoch": 0.01968698517298188, "grad_norm": 0.37050995230674744, "learning_rate": 1.9996461175998905e-05, "loss": 0.6338, "step": 717 }, { "epoch": 0.01971444261394838, "grad_norm": 0.34270793199539185, "learning_rate": 1.9996449677665014e-05, "loss": 0.4983, "step": 718 }, { "epoch": 0.01974190005491488, "grad_norm": 0.35190996527671814, "learning_rate": 1.9996438160684553e-05, "loss": 0.502, "step": 719 }, { "epoch": 0.019769357495881382, "grad_norm": 0.39960989356040955, "learning_rate": 1.9996426625057547e-05, "loss": 0.5888, "step": 720 }, { "epoch": 0.019796814936847887, "grad_norm": 0.39513394236564636, "learning_rate": 1.9996415070784007e-05, "loss": 0.6204, "step": 721 }, { "epoch": 0.019824272377814388, "grad_norm": 0.35115572810173035, "learning_rate": 1.9996403497863966e-05, "loss": 0.599, "step": 722 }, { "epoch": 0.01985172981878089, "grad_norm": 0.3970882296562195, "learning_rate": 1.9996391906297446e-05, "loss": 0.5573, "step": 723 }, { "epoch": 0.01987918725974739, "grad_norm": 0.44875118136405945, "learning_rate": 1.9996380296084462e-05, "loss": 0.764, "step": 724 }, { "epoch": 0.019906644700713894, "grad_norm": 0.3716571629047394, "learning_rate": 1.9996368667225037e-05, "loss": 0.5655, "step": 725 }, { "epoch": 0.019934102141680395, "grad_norm": 0.4899410307407379, "learning_rate": 1.9996357019719194e-05, "loss": 0.5899, "step": 726 }, { "epoch": 0.019961559582646896, "grad_norm": 0.47499653697013855, "learning_rate": 1.9996345353566955e-05, "loss": 0.5837, "step": 727 }, { "epoch": 0.0199890170236134, "grad_norm": 0.4007241427898407, "learning_rate": 1.9996333668768342e-05, "loss": 0.5501, "step": 728 }, { "epoch": 0.0200164744645799, "grad_norm": 0.42610257863998413, "learning_rate": 1.9996321965323376e-05, "loss": 0.5657, "step": 729 }, { "epoch": 0.020043931905546403, "grad_norm": 0.35504621267318726, "learning_rate": 1.999631024323208e-05, "loss": 0.6636, "step": 730 }, { "epoch": 0.020071389346512904, "grad_norm": 0.5344265699386597, "learning_rate": 1.9996298502494474e-05, "loss": 0.5473, "step": 731 }, { "epoch": 0.020098846787479408, "grad_norm": 0.369143545627594, "learning_rate": 1.9996286743110582e-05, "loss": 0.588, "step": 732 }, { "epoch": 0.02012630422844591, "grad_norm": 0.41597768664360046, "learning_rate": 1.999627496508042e-05, "loss": 0.5127, "step": 733 }, { "epoch": 0.02015376166941241, "grad_norm": 0.37029096484184265, "learning_rate": 1.9996263168404023e-05, "loss": 0.5197, "step": 734 }, { "epoch": 0.02018121911037891, "grad_norm": 0.3724589943885803, "learning_rate": 1.99962513530814e-05, "loss": 0.5545, "step": 735 }, { "epoch": 0.020208676551345416, "grad_norm": 0.40416592359542847, "learning_rate": 1.9996239519112578e-05, "loss": 0.6692, "step": 736 }, { "epoch": 0.020236133992311917, "grad_norm": 0.378384530544281, "learning_rate": 1.9996227666497575e-05, "loss": 0.5671, "step": 737 }, { "epoch": 0.020263591433278418, "grad_norm": 0.39161446690559387, "learning_rate": 1.9996215795236423e-05, "loss": 0.5885, "step": 738 }, { "epoch": 0.020291048874244922, "grad_norm": 0.36782926321029663, "learning_rate": 1.9996203905329138e-05, "loss": 0.5308, "step": 739 }, { "epoch": 0.020318506315211423, "grad_norm": 0.4427297115325928, "learning_rate": 1.9996191996775737e-05, "loss": 0.5512, "step": 740 }, { "epoch": 0.020345963756177924, "grad_norm": 0.39539363980293274, "learning_rate": 1.999618006957625e-05, "loss": 0.5917, "step": 741 }, { "epoch": 0.020373421197144425, "grad_norm": 0.39683249592781067, "learning_rate": 1.99961681237307e-05, "loss": 0.6572, "step": 742 }, { "epoch": 0.02040087863811093, "grad_norm": 0.3967967629432678, "learning_rate": 1.9996156159239104e-05, "loss": 0.6313, "step": 743 }, { "epoch": 0.02042833607907743, "grad_norm": 0.3610052168369293, "learning_rate": 1.9996144176101487e-05, "loss": 0.6616, "step": 744 }, { "epoch": 0.02045579352004393, "grad_norm": 0.3304113447666168, "learning_rate": 1.9996132174317867e-05, "loss": 0.5033, "step": 745 }, { "epoch": 0.020483250961010432, "grad_norm": 0.368051141500473, "learning_rate": 1.9996120153888273e-05, "loss": 0.5672, "step": 746 }, { "epoch": 0.020510708401976937, "grad_norm": 0.3525749146938324, "learning_rate": 1.9996108114812726e-05, "loss": 0.5228, "step": 747 }, { "epoch": 0.020538165842943438, "grad_norm": 0.35352861881256104, "learning_rate": 1.9996096057091246e-05, "loss": 0.5042, "step": 748 }, { "epoch": 0.02056562328390994, "grad_norm": 0.34667524695396423, "learning_rate": 1.9996083980723854e-05, "loss": 0.5905, "step": 749 }, { "epoch": 0.02059308072487644, "grad_norm": 0.3735881745815277, "learning_rate": 1.9996071885710576e-05, "loss": 0.5851, "step": 750 }, { "epoch": 0.020620538165842944, "grad_norm": 0.37982043623924255, "learning_rate": 1.9996059772051434e-05, "loss": 0.5889, "step": 751 }, { "epoch": 0.020647995606809445, "grad_norm": 0.4002512991428375, "learning_rate": 1.9996047639746453e-05, "loss": 0.644, "step": 752 }, { "epoch": 0.020675453047775946, "grad_norm": 0.3585599660873413, "learning_rate": 1.999603548879565e-05, "loss": 0.6271, "step": 753 }, { "epoch": 0.02070291048874245, "grad_norm": 0.36989226937294006, "learning_rate": 1.999602331919905e-05, "loss": 0.6115, "step": 754 }, { "epoch": 0.02073036792970895, "grad_norm": 0.3567410707473755, "learning_rate": 1.9996011130956677e-05, "loss": 0.5691, "step": 755 }, { "epoch": 0.020757825370675453, "grad_norm": 0.43828344345092773, "learning_rate": 1.999599892406855e-05, "loss": 0.5916, "step": 756 }, { "epoch": 0.020785282811641954, "grad_norm": 0.4001257121562958, "learning_rate": 1.99959866985347e-05, "loss": 0.5844, "step": 757 }, { "epoch": 0.020812740252608458, "grad_norm": 0.37951651215553284, "learning_rate": 1.9995974454355143e-05, "loss": 0.6155, "step": 758 }, { "epoch": 0.02084019769357496, "grad_norm": 0.45075759291648865, "learning_rate": 1.99959621915299e-05, "loss": 0.5821, "step": 759 }, { "epoch": 0.02086765513454146, "grad_norm": 0.35843023657798767, "learning_rate": 1.9995949910059002e-05, "loss": 0.6045, "step": 760 }, { "epoch": 0.02089511257550796, "grad_norm": 0.352909117937088, "learning_rate": 1.9995937609942463e-05, "loss": 0.5682, "step": 761 }, { "epoch": 0.020922570016474466, "grad_norm": 0.39389362931251526, "learning_rate": 1.9995925291180313e-05, "loss": 0.6055, "step": 762 }, { "epoch": 0.020950027457440967, "grad_norm": 0.39120009541511536, "learning_rate": 1.999591295377257e-05, "loss": 0.5527, "step": 763 }, { "epoch": 0.020977484898407468, "grad_norm": 0.387358695268631, "learning_rate": 1.9995900597719258e-05, "loss": 0.6594, "step": 764 }, { "epoch": 0.021004942339373972, "grad_norm": 0.36977407336235046, "learning_rate": 1.9995888223020404e-05, "loss": 0.5769, "step": 765 }, { "epoch": 0.021032399780340473, "grad_norm": 0.36938515305519104, "learning_rate": 1.9995875829676027e-05, "loss": 0.5652, "step": 766 }, { "epoch": 0.021059857221306974, "grad_norm": 0.4045946002006531, "learning_rate": 1.999586341768615e-05, "loss": 0.5797, "step": 767 }, { "epoch": 0.021087314662273475, "grad_norm": 0.3764019012451172, "learning_rate": 1.9995850987050796e-05, "loss": 0.5147, "step": 768 }, { "epoch": 0.02111477210323998, "grad_norm": 0.35261985659599304, "learning_rate": 1.9995838537769993e-05, "loss": 0.6225, "step": 769 }, { "epoch": 0.02114222954420648, "grad_norm": 0.3653716444969177, "learning_rate": 1.999582606984376e-05, "loss": 0.6397, "step": 770 }, { "epoch": 0.02116968698517298, "grad_norm": 0.36696428060531616, "learning_rate": 1.999581358327212e-05, "loss": 0.5254, "step": 771 }, { "epoch": 0.021197144426139482, "grad_norm": 0.4574185013771057, "learning_rate": 1.9995801078055098e-05, "loss": 0.6072, "step": 772 }, { "epoch": 0.021224601867105987, "grad_norm": 0.3803882598876953, "learning_rate": 1.999578855419272e-05, "loss": 0.6184, "step": 773 }, { "epoch": 0.021252059308072488, "grad_norm": 0.38566452264785767, "learning_rate": 1.9995776011685003e-05, "loss": 0.6319, "step": 774 }, { "epoch": 0.02127951674903899, "grad_norm": 0.39810946583747864, "learning_rate": 1.999576345053197e-05, "loss": 0.4869, "step": 775 }, { "epoch": 0.02130697419000549, "grad_norm": 0.35804641246795654, "learning_rate": 1.9995750870733653e-05, "loss": 0.5991, "step": 776 }, { "epoch": 0.021334431630971994, "grad_norm": 0.34871742129325867, "learning_rate": 1.999573827229007e-05, "loss": 0.546, "step": 777 }, { "epoch": 0.021361889071938495, "grad_norm": 0.42481130361557007, "learning_rate": 1.9995725655201243e-05, "loss": 0.5686, "step": 778 }, { "epoch": 0.021389346512904996, "grad_norm": 0.4556298851966858, "learning_rate": 1.9995713019467197e-05, "loss": 0.624, "step": 779 }, { "epoch": 0.0214168039538715, "grad_norm": 0.3918381631374359, "learning_rate": 1.9995700365087957e-05, "loss": 0.5028, "step": 780 }, { "epoch": 0.021444261394838002, "grad_norm": 0.37947458028793335, "learning_rate": 1.9995687692063547e-05, "loss": 0.5161, "step": 781 }, { "epoch": 0.021471718835804503, "grad_norm": 0.4046136736869812, "learning_rate": 1.999567500039399e-05, "loss": 0.5377, "step": 782 }, { "epoch": 0.021499176276771004, "grad_norm": 0.3638302683830261, "learning_rate": 1.9995662290079303e-05, "loss": 0.591, "step": 783 }, { "epoch": 0.021526633717737508, "grad_norm": 0.3677133321762085, "learning_rate": 1.999564956111952e-05, "loss": 0.5021, "step": 784 }, { "epoch": 0.02155409115870401, "grad_norm": 0.44340354204177856, "learning_rate": 1.9995636813514658e-05, "loss": 0.6616, "step": 785 }, { "epoch": 0.02158154859967051, "grad_norm": 0.3773060739040375, "learning_rate": 1.9995624047264745e-05, "loss": 0.5845, "step": 786 }, { "epoch": 0.02160900604063701, "grad_norm": 0.3579763174057007, "learning_rate": 1.9995611262369803e-05, "loss": 0.5782, "step": 787 }, { "epoch": 0.021636463481603516, "grad_norm": 0.4122910797595978, "learning_rate": 1.9995598458829853e-05, "loss": 0.567, "step": 788 }, { "epoch": 0.021663920922570017, "grad_norm": 0.32842203974723816, "learning_rate": 1.9995585636644926e-05, "loss": 0.5306, "step": 789 }, { "epoch": 0.021691378363536518, "grad_norm": 0.34808245301246643, "learning_rate": 1.999557279581504e-05, "loss": 0.5881, "step": 790 }, { "epoch": 0.02171883580450302, "grad_norm": 0.3741764426231384, "learning_rate": 1.999555993634022e-05, "loss": 0.5252, "step": 791 }, { "epoch": 0.021746293245469523, "grad_norm": 0.36945030093193054, "learning_rate": 1.9995547058220488e-05, "loss": 0.5682, "step": 792 }, { "epoch": 0.021773750686436024, "grad_norm": 0.3753634989261627, "learning_rate": 1.9995534161455876e-05, "loss": 0.6068, "step": 793 }, { "epoch": 0.021801208127402525, "grad_norm": 0.41255176067352295, "learning_rate": 1.9995521246046398e-05, "loss": 0.5363, "step": 794 }, { "epoch": 0.02182866556836903, "grad_norm": 0.3646013140678406, "learning_rate": 1.9995508311992084e-05, "loss": 0.574, "step": 795 }, { "epoch": 0.02185612300933553, "grad_norm": 0.4076300263404846, "learning_rate": 1.9995495359292957e-05, "loss": 0.5974, "step": 796 }, { "epoch": 0.02188358045030203, "grad_norm": 0.3962678611278534, "learning_rate": 1.999548238794904e-05, "loss": 0.6868, "step": 797 }, { "epoch": 0.021911037891268532, "grad_norm": 0.39624378085136414, "learning_rate": 1.9995469397960362e-05, "loss": 0.604, "step": 798 }, { "epoch": 0.021938495332235037, "grad_norm": 0.38660988211631775, "learning_rate": 1.999545638932694e-05, "loss": 0.6033, "step": 799 }, { "epoch": 0.021965952773201538, "grad_norm": 0.42505213618278503, "learning_rate": 1.9995443362048803e-05, "loss": 0.6325, "step": 800 }, { "epoch": 0.02199341021416804, "grad_norm": 0.3458612859249115, "learning_rate": 1.999543031612597e-05, "loss": 0.5341, "step": 801 }, { "epoch": 0.02202086765513454, "grad_norm": 0.4429183602333069, "learning_rate": 1.9995417251558473e-05, "loss": 0.6424, "step": 802 }, { "epoch": 0.022048325096101044, "grad_norm": 0.4475831091403961, "learning_rate": 1.9995404168346334e-05, "loss": 0.6302, "step": 803 }, { "epoch": 0.022075782537067545, "grad_norm": 0.3606654107570648, "learning_rate": 1.9995391066489573e-05, "loss": 0.5564, "step": 804 }, { "epoch": 0.022103239978034046, "grad_norm": 0.34195756912231445, "learning_rate": 1.999537794598822e-05, "loss": 0.6014, "step": 805 }, { "epoch": 0.02213069741900055, "grad_norm": 0.39407142996788025, "learning_rate": 1.9995364806842294e-05, "loss": 0.5935, "step": 806 }, { "epoch": 0.022158154859967052, "grad_norm": 0.3707405924797058, "learning_rate": 1.9995351649051824e-05, "loss": 0.6438, "step": 807 }, { "epoch": 0.022185612300933553, "grad_norm": 0.4073667824268341, "learning_rate": 1.9995338472616834e-05, "loss": 0.5443, "step": 808 }, { "epoch": 0.022213069741900054, "grad_norm": 0.40615957975387573, "learning_rate": 1.9995325277537348e-05, "loss": 0.5466, "step": 809 }, { "epoch": 0.022240527182866558, "grad_norm": 0.4002559781074524, "learning_rate": 1.9995312063813386e-05, "loss": 0.5455, "step": 810 }, { "epoch": 0.02226798462383306, "grad_norm": 0.3798239827156067, "learning_rate": 1.9995298831444982e-05, "loss": 0.5668, "step": 811 }, { "epoch": 0.02229544206479956, "grad_norm": 0.3723065257072449, "learning_rate": 1.9995285580432153e-05, "loss": 0.5868, "step": 812 }, { "epoch": 0.02232289950576606, "grad_norm": 0.3871300220489502, "learning_rate": 1.9995272310774926e-05, "loss": 0.6292, "step": 813 }, { "epoch": 0.022350356946732566, "grad_norm": 0.38495635986328125, "learning_rate": 1.999525902247333e-05, "loss": 0.6666, "step": 814 }, { "epoch": 0.022377814387699067, "grad_norm": 0.3858591914176941, "learning_rate": 1.999524571552738e-05, "loss": 0.5645, "step": 815 }, { "epoch": 0.022405271828665568, "grad_norm": 0.3730102479457855, "learning_rate": 1.9995232389937108e-05, "loss": 0.5775, "step": 816 }, { "epoch": 0.02243272926963207, "grad_norm": 0.3483995795249939, "learning_rate": 1.999521904570254e-05, "loss": 0.5571, "step": 817 }, { "epoch": 0.022460186710598573, "grad_norm": 0.34828364849090576, "learning_rate": 1.9995205682823696e-05, "loss": 0.6157, "step": 818 }, { "epoch": 0.022487644151565074, "grad_norm": 0.4147208333015442, "learning_rate": 1.9995192301300606e-05, "loss": 0.5925, "step": 819 }, { "epoch": 0.022515101592531575, "grad_norm": 0.36973336338996887, "learning_rate": 1.9995178901133288e-05, "loss": 0.585, "step": 820 }, { "epoch": 0.02254255903349808, "grad_norm": 0.35538971424102783, "learning_rate": 1.9995165482321775e-05, "loss": 0.4958, "step": 821 }, { "epoch": 0.02257001647446458, "grad_norm": 0.3477325439453125, "learning_rate": 1.999515204486609e-05, "loss": 0.548, "step": 822 }, { "epoch": 0.02259747391543108, "grad_norm": 0.38357576727867126, "learning_rate": 1.999513858876625e-05, "loss": 0.5381, "step": 823 }, { "epoch": 0.022624931356397582, "grad_norm": 0.37185072898864746, "learning_rate": 1.9995125114022293e-05, "loss": 0.625, "step": 824 }, { "epoch": 0.022652388797364087, "grad_norm": 0.38245031237602234, "learning_rate": 1.999511162063423e-05, "loss": 0.6348, "step": 825 }, { "epoch": 0.022679846238330588, "grad_norm": 0.41776806116104126, "learning_rate": 1.99950981086021e-05, "loss": 0.5791, "step": 826 }, { "epoch": 0.02270730367929709, "grad_norm": 0.3829968571662903, "learning_rate": 1.9995084577925922e-05, "loss": 0.5873, "step": 827 }, { "epoch": 0.02273476112026359, "grad_norm": 0.3826722502708435, "learning_rate": 1.999507102860572e-05, "loss": 0.6369, "step": 828 }, { "epoch": 0.022762218561230094, "grad_norm": 0.38322916626930237, "learning_rate": 1.999505746064152e-05, "loss": 0.6467, "step": 829 }, { "epoch": 0.022789676002196595, "grad_norm": 0.3978785276412964, "learning_rate": 1.9995043874033347e-05, "loss": 0.6491, "step": 830 }, { "epoch": 0.022817133443163096, "grad_norm": 0.48552563786506653, "learning_rate": 1.999503026878123e-05, "loss": 0.562, "step": 831 }, { "epoch": 0.0228445908841296, "grad_norm": 0.3718736469745636, "learning_rate": 1.9995016644885185e-05, "loss": 0.5921, "step": 832 }, { "epoch": 0.022872048325096102, "grad_norm": 0.36356237530708313, "learning_rate": 1.999500300234525e-05, "loss": 0.5916, "step": 833 }, { "epoch": 0.022899505766062603, "grad_norm": 0.40481147170066833, "learning_rate": 1.9994989341161443e-05, "loss": 0.5249, "step": 834 }, { "epoch": 0.022926963207029104, "grad_norm": 0.40765437483787537, "learning_rate": 1.999497566133379e-05, "loss": 0.6033, "step": 835 }, { "epoch": 0.022954420647995608, "grad_norm": 0.3418324887752533, "learning_rate": 1.999496196286232e-05, "loss": 0.5186, "step": 836 }, { "epoch": 0.02298187808896211, "grad_norm": 0.3712979853153229, "learning_rate": 1.999494824574705e-05, "loss": 0.4847, "step": 837 }, { "epoch": 0.02300933552992861, "grad_norm": 0.3652055561542511, "learning_rate": 1.9994934509988018e-05, "loss": 0.511, "step": 838 }, { "epoch": 0.02303679297089511, "grad_norm": 0.3624974489212036, "learning_rate": 1.999492075558524e-05, "loss": 0.5184, "step": 839 }, { "epoch": 0.023064250411861616, "grad_norm": 0.3694970905780792, "learning_rate": 1.9994906982538744e-05, "loss": 0.6439, "step": 840 }, { "epoch": 0.023091707852828117, "grad_norm": 0.412337064743042, "learning_rate": 1.9994893190848556e-05, "loss": 0.5892, "step": 841 }, { "epoch": 0.023119165293794618, "grad_norm": 0.38939744234085083, "learning_rate": 1.9994879380514708e-05, "loss": 0.5727, "step": 842 }, { "epoch": 0.02314662273476112, "grad_norm": 0.3723776340484619, "learning_rate": 1.9994865551537214e-05, "loss": 0.5406, "step": 843 }, { "epoch": 0.023174080175727623, "grad_norm": 0.4175414741039276, "learning_rate": 1.9994851703916106e-05, "loss": 0.5651, "step": 844 }, { "epoch": 0.023201537616694124, "grad_norm": 0.3855356276035309, "learning_rate": 1.9994837837651412e-05, "loss": 0.6533, "step": 845 }, { "epoch": 0.023228995057660625, "grad_norm": 0.4428286850452423, "learning_rate": 1.9994823952743157e-05, "loss": 0.487, "step": 846 }, { "epoch": 0.02325645249862713, "grad_norm": 0.39513304829597473, "learning_rate": 1.999481004919136e-05, "loss": 0.607, "step": 847 }, { "epoch": 0.02328390993959363, "grad_norm": 0.5215888619422913, "learning_rate": 1.9994796126996056e-05, "loss": 0.5645, "step": 848 }, { "epoch": 0.02331136738056013, "grad_norm": 0.40944522619247437, "learning_rate": 1.9994782186157267e-05, "loss": 0.531, "step": 849 }, { "epoch": 0.023338824821526633, "grad_norm": 0.35032230615615845, "learning_rate": 1.999476822667502e-05, "loss": 0.5174, "step": 850 }, { "epoch": 0.023366282262493137, "grad_norm": 0.4147513508796692, "learning_rate": 1.9994754248549338e-05, "loss": 0.6151, "step": 851 }, { "epoch": 0.023393739703459638, "grad_norm": 0.389904648065567, "learning_rate": 1.999474025178025e-05, "loss": 0.5613, "step": 852 }, { "epoch": 0.02342119714442614, "grad_norm": 0.3529571294784546, "learning_rate": 1.9994726236367784e-05, "loss": 0.5358, "step": 853 }, { "epoch": 0.02344865458539264, "grad_norm": 0.36591753363609314, "learning_rate": 1.999471220231196e-05, "loss": 0.4879, "step": 854 }, { "epoch": 0.023476112026359144, "grad_norm": 0.4022122025489807, "learning_rate": 1.9994698149612806e-05, "loss": 0.6666, "step": 855 }, { "epoch": 0.023503569467325645, "grad_norm": 0.4660525321960449, "learning_rate": 1.9994684078270357e-05, "loss": 0.5235, "step": 856 }, { "epoch": 0.023531026908292146, "grad_norm": 0.4081370234489441, "learning_rate": 1.9994669988284628e-05, "loss": 0.5513, "step": 857 }, { "epoch": 0.023558484349258647, "grad_norm": 0.35713183879852295, "learning_rate": 1.9994655879655653e-05, "loss": 0.5263, "step": 858 }, { "epoch": 0.023585941790225152, "grad_norm": 0.34501883387565613, "learning_rate": 1.999464175238345e-05, "loss": 0.4776, "step": 859 }, { "epoch": 0.023613399231191653, "grad_norm": 0.3845241963863373, "learning_rate": 1.9994627606468052e-05, "loss": 0.5708, "step": 860 }, { "epoch": 0.023640856672158154, "grad_norm": 0.3557480275630951, "learning_rate": 1.999461344190948e-05, "loss": 0.5578, "step": 861 }, { "epoch": 0.02366831411312466, "grad_norm": 0.38580864667892456, "learning_rate": 1.999459925870777e-05, "loss": 0.602, "step": 862 }, { "epoch": 0.02369577155409116, "grad_norm": 0.4131057560443878, "learning_rate": 1.999458505686294e-05, "loss": 0.5707, "step": 863 }, { "epoch": 0.02372322899505766, "grad_norm": 0.35431694984436035, "learning_rate": 1.999457083637502e-05, "loss": 0.6412, "step": 864 }, { "epoch": 0.02375068643602416, "grad_norm": 0.40537914633750916, "learning_rate": 1.9994556597244035e-05, "loss": 0.6022, "step": 865 }, { "epoch": 0.023778143876990666, "grad_norm": 0.3799135386943817, "learning_rate": 1.999454233947001e-05, "loss": 0.6091, "step": 866 }, { "epoch": 0.023805601317957167, "grad_norm": 0.4244007170200348, "learning_rate": 1.9994528063052976e-05, "loss": 0.5542, "step": 867 }, { "epoch": 0.023833058758923668, "grad_norm": 0.5341901779174805, "learning_rate": 1.9994513767992957e-05, "loss": 0.5382, "step": 868 }, { "epoch": 0.02386051619989017, "grad_norm": 0.3517626225948334, "learning_rate": 1.999449945428998e-05, "loss": 0.6505, "step": 869 }, { "epoch": 0.023887973640856673, "grad_norm": 0.4160444438457489, "learning_rate": 1.9994485121944073e-05, "loss": 0.645, "step": 870 }, { "epoch": 0.023915431081823174, "grad_norm": 0.37870872020721436, "learning_rate": 1.9994470770955258e-05, "loss": 0.6245, "step": 871 }, { "epoch": 0.023942888522789675, "grad_norm": 0.37536153197288513, "learning_rate": 1.9994456401323566e-05, "loss": 0.5695, "step": 872 }, { "epoch": 0.02397034596375618, "grad_norm": 0.37391453981399536, "learning_rate": 1.9994442013049025e-05, "loss": 0.5937, "step": 873 }, { "epoch": 0.02399780340472268, "grad_norm": 0.4423391819000244, "learning_rate": 1.9994427606131658e-05, "loss": 0.7197, "step": 874 }, { "epoch": 0.02402526084568918, "grad_norm": 0.3652022182941437, "learning_rate": 1.9994413180571495e-05, "loss": 0.6957, "step": 875 }, { "epoch": 0.024052718286655683, "grad_norm": 0.35951510071754456, "learning_rate": 1.999439873636856e-05, "loss": 0.5976, "step": 876 }, { "epoch": 0.024080175727622187, "grad_norm": 0.3824165165424347, "learning_rate": 1.9994384273522886e-05, "loss": 0.5504, "step": 877 }, { "epoch": 0.024107633168588688, "grad_norm": 0.3506315052509308, "learning_rate": 1.999436979203449e-05, "loss": 0.5528, "step": 878 }, { "epoch": 0.02413509060955519, "grad_norm": 0.37292277812957764, "learning_rate": 1.9994355291903406e-05, "loss": 0.5909, "step": 879 }, { "epoch": 0.02416254805052169, "grad_norm": 0.40433812141418457, "learning_rate": 1.999434077312966e-05, "loss": 0.5879, "step": 880 }, { "epoch": 0.024190005491488194, "grad_norm": 0.38165444135665894, "learning_rate": 1.9994326235713278e-05, "loss": 0.539, "step": 881 }, { "epoch": 0.024217462932454695, "grad_norm": 0.3862334191799164, "learning_rate": 1.999431167965429e-05, "loss": 0.6716, "step": 882 }, { "epoch": 0.024244920373421196, "grad_norm": 0.5726919174194336, "learning_rate": 1.999429710495272e-05, "loss": 0.5252, "step": 883 }, { "epoch": 0.024272377814387697, "grad_norm": 0.4128020107746124, "learning_rate": 1.9994282511608595e-05, "loss": 0.5836, "step": 884 }, { "epoch": 0.024299835255354202, "grad_norm": 0.6838932037353516, "learning_rate": 1.9994267899621942e-05, "loss": 0.6209, "step": 885 }, { "epoch": 0.024327292696320703, "grad_norm": 0.4296434819698334, "learning_rate": 1.999425326899279e-05, "loss": 0.5294, "step": 886 }, { "epoch": 0.024354750137287204, "grad_norm": 0.38976433873176575, "learning_rate": 1.9994238619721166e-05, "loss": 0.6288, "step": 887 }, { "epoch": 0.02438220757825371, "grad_norm": 0.39353659749031067, "learning_rate": 1.99942239518071e-05, "loss": 0.6612, "step": 888 }, { "epoch": 0.02440966501922021, "grad_norm": 0.44789043068885803, "learning_rate": 1.9994209265250614e-05, "loss": 0.6279, "step": 889 }, { "epoch": 0.02443712246018671, "grad_norm": 0.3838953673839569, "learning_rate": 1.999419456005174e-05, "loss": 0.5842, "step": 890 }, { "epoch": 0.02446457990115321, "grad_norm": 0.34852832555770874, "learning_rate": 1.99941798362105e-05, "loss": 0.4732, "step": 891 }, { "epoch": 0.024492037342119716, "grad_norm": 0.39160799980163574, "learning_rate": 1.9994165093726926e-05, "loss": 0.5991, "step": 892 }, { "epoch": 0.024519494783086217, "grad_norm": 0.37991052865982056, "learning_rate": 1.9994150332601045e-05, "loss": 0.6007, "step": 893 }, { "epoch": 0.024546952224052718, "grad_norm": 0.34441566467285156, "learning_rate": 1.9994135552832887e-05, "loss": 0.5211, "step": 894 }, { "epoch": 0.02457440966501922, "grad_norm": 0.3584813177585602, "learning_rate": 1.999412075442247e-05, "loss": 0.5809, "step": 895 }, { "epoch": 0.024601867105985723, "grad_norm": 0.36873099207878113, "learning_rate": 1.9994105937369834e-05, "loss": 0.5957, "step": 896 }, { "epoch": 0.024629324546952224, "grad_norm": 0.35527503490448, "learning_rate": 1.9994091101674997e-05, "loss": 0.5939, "step": 897 }, { "epoch": 0.024656781987918725, "grad_norm": 0.4367346465587616, "learning_rate": 1.999407624733799e-05, "loss": 0.6446, "step": 898 }, { "epoch": 0.02468423942888523, "grad_norm": 0.3605690598487854, "learning_rate": 1.9994061374358848e-05, "loss": 0.5214, "step": 899 }, { "epoch": 0.02471169686985173, "grad_norm": 0.34355461597442627, "learning_rate": 1.9994046482737586e-05, "loss": 0.5625, "step": 900 }, { "epoch": 0.02473915431081823, "grad_norm": 0.3743392527103424, "learning_rate": 1.9994031572474238e-05, "loss": 0.6179, "step": 901 }, { "epoch": 0.024766611751784733, "grad_norm": 0.3856125771999359, "learning_rate": 1.999401664356883e-05, "loss": 0.6375, "step": 902 }, { "epoch": 0.024794069192751237, "grad_norm": 0.40766143798828125, "learning_rate": 1.999400169602139e-05, "loss": 0.6175, "step": 903 }, { "epoch": 0.024821526633717738, "grad_norm": 0.3600632846355438, "learning_rate": 1.9993986729831953e-05, "loss": 0.5247, "step": 904 }, { "epoch": 0.02484898407468424, "grad_norm": 0.40059682726860046, "learning_rate": 1.9993971745000537e-05, "loss": 0.6002, "step": 905 }, { "epoch": 0.02487644151565074, "grad_norm": 0.34220048785209656, "learning_rate": 1.9993956741527176e-05, "loss": 0.4656, "step": 906 }, { "epoch": 0.024903898956617244, "grad_norm": 0.36982086300849915, "learning_rate": 1.9993941719411893e-05, "loss": 0.5617, "step": 907 }, { "epoch": 0.024931356397583745, "grad_norm": 0.31426599621772766, "learning_rate": 1.9993926678654723e-05, "loss": 0.5512, "step": 908 }, { "epoch": 0.024958813838550246, "grad_norm": 0.360524982213974, "learning_rate": 1.9993911619255686e-05, "loss": 0.5855, "step": 909 }, { "epoch": 0.024986271279516747, "grad_norm": 0.3784053325653076, "learning_rate": 1.9993896541214813e-05, "loss": 0.5451, "step": 910 }, { "epoch": 0.025013728720483252, "grad_norm": 0.3463555872440338, "learning_rate": 1.999388144453214e-05, "loss": 0.5203, "step": 911 }, { "epoch": 0.025041186161449753, "grad_norm": 0.3917367458343506, "learning_rate": 1.9993866329207682e-05, "loss": 0.5777, "step": 912 }, { "epoch": 0.025068643602416254, "grad_norm": 0.39325085282325745, "learning_rate": 1.9993851195241478e-05, "loss": 0.6316, "step": 913 }, { "epoch": 0.02509610104338276, "grad_norm": 0.3751402497291565, "learning_rate": 1.999383604263355e-05, "loss": 0.525, "step": 914 }, { "epoch": 0.02512355848434926, "grad_norm": 0.40176859498023987, "learning_rate": 1.9993820871383928e-05, "loss": 0.5949, "step": 915 }, { "epoch": 0.02515101592531576, "grad_norm": 0.34659355878829956, "learning_rate": 1.999380568149264e-05, "loss": 0.588, "step": 916 }, { "epoch": 0.02517847336628226, "grad_norm": 0.3828376531600952, "learning_rate": 1.9993790472959715e-05, "loss": 0.6039, "step": 917 }, { "epoch": 0.025205930807248766, "grad_norm": 0.3541743755340576, "learning_rate": 1.9993775245785183e-05, "loss": 0.4975, "step": 918 }, { "epoch": 0.025233388248215267, "grad_norm": 0.38034483790397644, "learning_rate": 1.999375999996907e-05, "loss": 0.596, "step": 919 }, { "epoch": 0.025260845689181768, "grad_norm": 0.40771231055259705, "learning_rate": 1.9993744735511402e-05, "loss": 0.6564, "step": 920 }, { "epoch": 0.02528830313014827, "grad_norm": 0.381778359413147, "learning_rate": 1.9993729452412214e-05, "loss": 0.624, "step": 921 }, { "epoch": 0.025315760571114773, "grad_norm": 0.37894031405448914, "learning_rate": 1.9993714150671528e-05, "loss": 0.6037, "step": 922 }, { "epoch": 0.025343218012081274, "grad_norm": 0.42819708585739136, "learning_rate": 1.999369883028938e-05, "loss": 0.5761, "step": 923 }, { "epoch": 0.025370675453047775, "grad_norm": 0.3666745722293854, "learning_rate": 1.999368349126579e-05, "loss": 0.5828, "step": 924 }, { "epoch": 0.025398132894014276, "grad_norm": 0.38612958788871765, "learning_rate": 1.9993668133600793e-05, "loss": 0.6499, "step": 925 }, { "epoch": 0.02542559033498078, "grad_norm": 0.4302365183830261, "learning_rate": 1.9993652757294414e-05, "loss": 0.5088, "step": 926 }, { "epoch": 0.02545304777594728, "grad_norm": 0.35726818442344666, "learning_rate": 1.999363736234668e-05, "loss": 0.5408, "step": 927 }, { "epoch": 0.025480505216913783, "grad_norm": 0.39305344223976135, "learning_rate": 1.9993621948757625e-05, "loss": 0.584, "step": 928 }, { "epoch": 0.025507962657880287, "grad_norm": 0.36686354875564575, "learning_rate": 1.999360651652728e-05, "loss": 0.6421, "step": 929 }, { "epoch": 0.025535420098846788, "grad_norm": 0.3874843418598175, "learning_rate": 1.9993591065655665e-05, "loss": 0.6635, "step": 930 }, { "epoch": 0.02556287753981329, "grad_norm": 0.36083051562309265, "learning_rate": 1.999357559614281e-05, "loss": 0.5635, "step": 931 }, { "epoch": 0.02559033498077979, "grad_norm": 0.37970593571662903, "learning_rate": 1.999356010798875e-05, "loss": 0.5698, "step": 932 }, { "epoch": 0.025617792421746294, "grad_norm": 0.4769110083580017, "learning_rate": 1.9993544601193514e-05, "loss": 0.5988, "step": 933 }, { "epoch": 0.025645249862712795, "grad_norm": 0.3667714297771454, "learning_rate": 1.9993529075757126e-05, "loss": 0.5569, "step": 934 }, { "epoch": 0.025672707303679296, "grad_norm": 0.41776660084724426, "learning_rate": 1.9993513531679616e-05, "loss": 0.551, "step": 935 }, { "epoch": 0.025700164744645797, "grad_norm": 0.3929305970668793, "learning_rate": 1.9993497968961012e-05, "loss": 0.5556, "step": 936 }, { "epoch": 0.025727622185612302, "grad_norm": 0.3798709809780121, "learning_rate": 1.9993482387601347e-05, "loss": 0.6442, "step": 937 }, { "epoch": 0.025755079626578803, "grad_norm": 0.37590518593788147, "learning_rate": 1.9993466787600645e-05, "loss": 0.6371, "step": 938 }, { "epoch": 0.025782537067545304, "grad_norm": 0.3935871422290802, "learning_rate": 1.999345116895894e-05, "loss": 0.6423, "step": 939 }, { "epoch": 0.02580999450851181, "grad_norm": 0.41789913177490234, "learning_rate": 1.999343553167626e-05, "loss": 0.5489, "step": 940 }, { "epoch": 0.02583745194947831, "grad_norm": 0.3577101230621338, "learning_rate": 1.9993419875752632e-05, "loss": 0.485, "step": 941 }, { "epoch": 0.02586490939044481, "grad_norm": 0.3502335846424103, "learning_rate": 1.9993404201188084e-05, "loss": 0.5739, "step": 942 }, { "epoch": 0.02589236683141131, "grad_norm": 0.381287544965744, "learning_rate": 1.999338850798265e-05, "loss": 0.5111, "step": 943 }, { "epoch": 0.025919824272377816, "grad_norm": 0.36284980177879333, "learning_rate": 1.9993372796136358e-05, "loss": 0.5962, "step": 944 }, { "epoch": 0.025947281713344317, "grad_norm": 0.38347357511520386, "learning_rate": 1.9993357065649237e-05, "loss": 0.5895, "step": 945 }, { "epoch": 0.025974739154310818, "grad_norm": 0.36783695220947266, "learning_rate": 1.9993341316521314e-05, "loss": 0.6196, "step": 946 }, { "epoch": 0.02600219659527732, "grad_norm": 0.39926159381866455, "learning_rate": 1.9993325548752618e-05, "loss": 0.5397, "step": 947 }, { "epoch": 0.026029654036243823, "grad_norm": 0.41217175126075745, "learning_rate": 1.9993309762343187e-05, "loss": 0.5607, "step": 948 }, { "epoch": 0.026057111477210324, "grad_norm": 0.4106501340866089, "learning_rate": 1.9993293957293037e-05, "loss": 0.6317, "step": 949 }, { "epoch": 0.026084568918176825, "grad_norm": 0.3664214611053467, "learning_rate": 1.9993278133602202e-05, "loss": 0.6514, "step": 950 }, { "epoch": 0.026112026359143326, "grad_norm": 0.5807508826255798, "learning_rate": 1.999326229127072e-05, "loss": 0.6002, "step": 951 }, { "epoch": 0.02613948380010983, "grad_norm": 0.43355438113212585, "learning_rate": 1.999324643029861e-05, "loss": 0.592, "step": 952 }, { "epoch": 0.02616694124107633, "grad_norm": 0.37691181898117065, "learning_rate": 1.999323055068591e-05, "loss": 0.6003, "step": 953 }, { "epoch": 0.026194398682042833, "grad_norm": 0.40786439180374146, "learning_rate": 1.9993214652432645e-05, "loss": 0.4571, "step": 954 }, { "epoch": 0.026221856123009337, "grad_norm": 0.49035903811454773, "learning_rate": 1.9993198735538842e-05, "loss": 0.5913, "step": 955 }, { "epoch": 0.026249313563975838, "grad_norm": 0.38124987483024597, "learning_rate": 1.9993182800004537e-05, "loss": 0.5974, "step": 956 }, { "epoch": 0.02627677100494234, "grad_norm": 0.3930895924568176, "learning_rate": 1.9993166845829758e-05, "loss": 0.5837, "step": 957 }, { "epoch": 0.02630422844590884, "grad_norm": 0.36349180340766907, "learning_rate": 1.999315087301453e-05, "loss": 0.6227, "step": 958 }, { "epoch": 0.026331685886875345, "grad_norm": 0.3604130148887634, "learning_rate": 1.9993134881558887e-05, "loss": 0.5494, "step": 959 }, { "epoch": 0.026359143327841845, "grad_norm": 0.3484971225261688, "learning_rate": 1.9993118871462857e-05, "loss": 0.4991, "step": 960 }, { "epoch": 0.026386600768808346, "grad_norm": 0.3819892108440399, "learning_rate": 1.999310284272647e-05, "loss": 0.5932, "step": 961 }, { "epoch": 0.026414058209774847, "grad_norm": 0.34435275197029114, "learning_rate": 1.999308679534976e-05, "loss": 0.5761, "step": 962 }, { "epoch": 0.026441515650741352, "grad_norm": 0.363552063703537, "learning_rate": 1.9993070729332757e-05, "loss": 0.547, "step": 963 }, { "epoch": 0.026468973091707853, "grad_norm": 0.4074888825416565, "learning_rate": 1.999305464467548e-05, "loss": 0.5508, "step": 964 }, { "epoch": 0.026496430532674354, "grad_norm": 0.43979841470718384, "learning_rate": 1.9993038541377968e-05, "loss": 0.5906, "step": 965 }, { "epoch": 0.02652388797364086, "grad_norm": 0.35189196467399597, "learning_rate": 1.9993022419440253e-05, "loss": 0.5592, "step": 966 }, { "epoch": 0.02655134541460736, "grad_norm": 0.3797662854194641, "learning_rate": 1.9993006278862358e-05, "loss": 0.6101, "step": 967 }, { "epoch": 0.02657880285557386, "grad_norm": 0.6282324194908142, "learning_rate": 1.999299011964432e-05, "loss": 0.5364, "step": 968 }, { "epoch": 0.02660626029654036, "grad_norm": 0.38134366273880005, "learning_rate": 1.9992973941786165e-05, "loss": 0.645, "step": 969 }, { "epoch": 0.026633717737506866, "grad_norm": 0.354566752910614, "learning_rate": 1.9992957745287928e-05, "loss": 0.5745, "step": 970 }, { "epoch": 0.026661175178473367, "grad_norm": 0.35665130615234375, "learning_rate": 1.999294153014963e-05, "loss": 0.5761, "step": 971 }, { "epoch": 0.026688632619439868, "grad_norm": 0.3991217017173767, "learning_rate": 1.9992925296371307e-05, "loss": 0.582, "step": 972 }, { "epoch": 0.02671609006040637, "grad_norm": 0.4167400300502777, "learning_rate": 1.999290904395299e-05, "loss": 0.5691, "step": 973 }, { "epoch": 0.026743547501372873, "grad_norm": 0.3843333423137665, "learning_rate": 1.9992892772894713e-05, "loss": 0.6058, "step": 974 }, { "epoch": 0.026771004942339374, "grad_norm": 0.3385389745235443, "learning_rate": 1.9992876483196495e-05, "loss": 0.501, "step": 975 }, { "epoch": 0.026798462383305875, "grad_norm": 0.35857513546943665, "learning_rate": 1.9992860174858375e-05, "loss": 0.5757, "step": 976 }, { "epoch": 0.026825919824272376, "grad_norm": 0.3490881025791168, "learning_rate": 1.9992843847880384e-05, "loss": 0.5859, "step": 977 }, { "epoch": 0.02685337726523888, "grad_norm": 0.3448120951652527, "learning_rate": 1.9992827502262545e-05, "loss": 0.6027, "step": 978 }, { "epoch": 0.02688083470620538, "grad_norm": 0.34573960304260254, "learning_rate": 1.9992811138004896e-05, "loss": 0.5305, "step": 979 }, { "epoch": 0.026908292147171883, "grad_norm": 0.3538866639137268, "learning_rate": 1.9992794755107466e-05, "loss": 0.5119, "step": 980 }, { "epoch": 0.026935749588138387, "grad_norm": 0.32505765557289124, "learning_rate": 1.9992778353570284e-05, "loss": 0.6215, "step": 981 }, { "epoch": 0.026963207029104888, "grad_norm": 0.4069283902645111, "learning_rate": 1.999276193339338e-05, "loss": 0.6201, "step": 982 }, { "epoch": 0.02699066447007139, "grad_norm": 0.3572087287902832, "learning_rate": 1.9992745494576787e-05, "loss": 0.5313, "step": 983 }, { "epoch": 0.02701812191103789, "grad_norm": 0.3803759217262268, "learning_rate": 1.9992729037120533e-05, "loss": 0.5444, "step": 984 }, { "epoch": 0.027045579352004395, "grad_norm": 0.3695618510246277, "learning_rate": 1.999271256102465e-05, "loss": 0.5511, "step": 985 }, { "epoch": 0.027073036792970896, "grad_norm": 0.34669381380081177, "learning_rate": 1.999269606628917e-05, "loss": 0.6556, "step": 986 }, { "epoch": 0.027100494233937397, "grad_norm": 0.375227689743042, "learning_rate": 1.999267955291412e-05, "loss": 0.6045, "step": 987 }, { "epoch": 0.027127951674903897, "grad_norm": 0.3376089334487915, "learning_rate": 1.9992663020899536e-05, "loss": 0.5405, "step": 988 }, { "epoch": 0.027155409115870402, "grad_norm": 0.33699193596839905, "learning_rate": 1.9992646470245446e-05, "loss": 0.4245, "step": 989 }, { "epoch": 0.027182866556836903, "grad_norm": 0.43719902634620667, "learning_rate": 1.999262990095188e-05, "loss": 0.5687, "step": 990 }, { "epoch": 0.027210323997803404, "grad_norm": 0.4110218584537506, "learning_rate": 1.9992613313018872e-05, "loss": 0.6315, "step": 991 }, { "epoch": 0.027237781438769905, "grad_norm": 0.3578897714614868, "learning_rate": 1.999259670644645e-05, "loss": 0.5946, "step": 992 }, { "epoch": 0.02726523887973641, "grad_norm": 0.3775756061077118, "learning_rate": 1.999258008123464e-05, "loss": 0.6004, "step": 993 }, { "epoch": 0.02729269632070291, "grad_norm": 0.39287376403808594, "learning_rate": 1.9992563437383485e-05, "loss": 0.5453, "step": 994 }, { "epoch": 0.02732015376166941, "grad_norm": 0.33550596237182617, "learning_rate": 1.9992546774893013e-05, "loss": 0.4885, "step": 995 }, { "epoch": 0.027347611202635916, "grad_norm": 0.3771209716796875, "learning_rate": 1.9992530093763248e-05, "loss": 0.5756, "step": 996 }, { "epoch": 0.027375068643602417, "grad_norm": 0.39248737692832947, "learning_rate": 1.9992513393994222e-05, "loss": 0.5959, "step": 997 }, { "epoch": 0.027402526084568918, "grad_norm": 0.35922709107398987, "learning_rate": 1.9992496675585972e-05, "loss": 0.6094, "step": 998 }, { "epoch": 0.02742998352553542, "grad_norm": 0.34583619236946106, "learning_rate": 1.9992479938538528e-05, "loss": 0.5425, "step": 999 }, { "epoch": 0.027457440966501923, "grad_norm": 0.3636701703071594, "learning_rate": 1.999246318285192e-05, "loss": 0.6242, "step": 1000 }, { "epoch": 0.027484898407468424, "grad_norm": 0.8384482860565186, "learning_rate": 1.9992446408526177e-05, "loss": 0.5727, "step": 1001 }, { "epoch": 0.027512355848434925, "grad_norm": 0.33956918120384216, "learning_rate": 1.9992429615561334e-05, "loss": 0.5909, "step": 1002 }, { "epoch": 0.027539813289401426, "grad_norm": 0.4058050811290741, "learning_rate": 1.9992412803957417e-05, "loss": 0.6054, "step": 1003 }, { "epoch": 0.02756727073036793, "grad_norm": 0.36798644065856934, "learning_rate": 1.9992395973714465e-05, "loss": 0.5416, "step": 1004 }, { "epoch": 0.02759472817133443, "grad_norm": 0.3728574514389038, "learning_rate": 1.99923791248325e-05, "loss": 0.5384, "step": 1005 }, { "epoch": 0.027622185612300933, "grad_norm": 0.45467543601989746, "learning_rate": 1.9992362257311565e-05, "loss": 0.5715, "step": 1006 }, { "epoch": 0.027649643053267437, "grad_norm": 0.3525679409503937, "learning_rate": 1.999234537115168e-05, "loss": 0.512, "step": 1007 }, { "epoch": 0.027677100494233938, "grad_norm": 0.3841477334499359, "learning_rate": 1.9992328466352883e-05, "loss": 0.5587, "step": 1008 }, { "epoch": 0.02770455793520044, "grad_norm": 0.35613855719566345, "learning_rate": 1.9992311542915205e-05, "loss": 0.5624, "step": 1009 }, { "epoch": 0.02773201537616694, "grad_norm": 0.39970070123672485, "learning_rate": 1.999229460083868e-05, "loss": 0.6877, "step": 1010 }, { "epoch": 0.027759472817133445, "grad_norm": 0.37386220693588257, "learning_rate": 1.9992277640123336e-05, "loss": 0.4992, "step": 1011 }, { "epoch": 0.027786930258099946, "grad_norm": 0.37003666162490845, "learning_rate": 1.9992260660769198e-05, "loss": 0.6107, "step": 1012 }, { "epoch": 0.027814387699066447, "grad_norm": 0.3546055853366852, "learning_rate": 1.999224366277631e-05, "loss": 0.4786, "step": 1013 }, { "epoch": 0.027841845140032948, "grad_norm": 0.38332098722457886, "learning_rate": 1.99922266461447e-05, "loss": 0.5946, "step": 1014 }, { "epoch": 0.027869302580999452, "grad_norm": 0.35668495297431946, "learning_rate": 1.9992209610874394e-05, "loss": 0.5754, "step": 1015 }, { "epoch": 0.027896760021965953, "grad_norm": 0.3704359233379364, "learning_rate": 1.999219255696543e-05, "loss": 0.6031, "step": 1016 }, { "epoch": 0.027924217462932454, "grad_norm": 0.40456390380859375, "learning_rate": 1.9992175484417837e-05, "loss": 0.6537, "step": 1017 }, { "epoch": 0.027951674903898955, "grad_norm": 0.3857343792915344, "learning_rate": 1.9992158393231647e-05, "loss": 0.5281, "step": 1018 }, { "epoch": 0.02797913234486546, "grad_norm": 0.41663751006126404, "learning_rate": 1.9992141283406897e-05, "loss": 0.6063, "step": 1019 }, { "epoch": 0.02800658978583196, "grad_norm": 0.3914123475551605, "learning_rate": 1.9992124154943607e-05, "loss": 0.6007, "step": 1020 }, { "epoch": 0.02803404722679846, "grad_norm": 0.3512546122074127, "learning_rate": 1.999210700784182e-05, "loss": 0.627, "step": 1021 }, { "epoch": 0.028061504667764966, "grad_norm": 0.4507715702056885, "learning_rate": 1.9992089842101567e-05, "loss": 0.6909, "step": 1022 }, { "epoch": 0.028088962108731467, "grad_norm": 0.37227052450180054, "learning_rate": 1.9992072657722877e-05, "loss": 0.5917, "step": 1023 }, { "epoch": 0.028116419549697968, "grad_norm": 0.3790050446987152, "learning_rate": 1.999205545470578e-05, "loss": 0.582, "step": 1024 }, { "epoch": 0.02814387699066447, "grad_norm": 0.4073377847671509, "learning_rate": 1.999203823305031e-05, "loss": 0.5731, "step": 1025 }, { "epoch": 0.028171334431630973, "grad_norm": 0.44958582520484924, "learning_rate": 1.99920209927565e-05, "loss": 0.6524, "step": 1026 }, { "epoch": 0.028198791872597474, "grad_norm": 0.37123727798461914, "learning_rate": 1.999200373382438e-05, "loss": 0.543, "step": 1027 }, { "epoch": 0.028226249313563975, "grad_norm": 0.35310420393943787, "learning_rate": 1.9991986456253988e-05, "loss": 0.5625, "step": 1028 }, { "epoch": 0.028253706754530476, "grad_norm": 0.3420066237449646, "learning_rate": 1.999196916004535e-05, "loss": 0.5135, "step": 1029 }, { "epoch": 0.02828116419549698, "grad_norm": 0.40854907035827637, "learning_rate": 1.99919518451985e-05, "loss": 0.553, "step": 1030 }, { "epoch": 0.02830862163646348, "grad_norm": 0.3704095184803009, "learning_rate": 1.999193451171347e-05, "loss": 0.5172, "step": 1031 }, { "epoch": 0.028336079077429983, "grad_norm": 0.33348026871681213, "learning_rate": 1.9991917159590298e-05, "loss": 0.4688, "step": 1032 }, { "epoch": 0.028363536518396487, "grad_norm": 0.4510387182235718, "learning_rate": 1.9991899788829005e-05, "loss": 0.6268, "step": 1033 }, { "epoch": 0.028390993959362988, "grad_norm": 0.3433258533477783, "learning_rate": 1.9991882399429636e-05, "loss": 0.5433, "step": 1034 }, { "epoch": 0.02841845140032949, "grad_norm": 0.393830806016922, "learning_rate": 1.9991864991392212e-05, "loss": 0.5921, "step": 1035 }, { "epoch": 0.02844590884129599, "grad_norm": 0.3533097505569458, "learning_rate": 1.9991847564716777e-05, "loss": 0.6632, "step": 1036 }, { "epoch": 0.028473366282262495, "grad_norm": 0.3673514127731323, "learning_rate": 1.999183011940335e-05, "loss": 0.5775, "step": 1037 }, { "epoch": 0.028500823723228996, "grad_norm": 0.3725419342517853, "learning_rate": 1.9991812655451974e-05, "loss": 0.61, "step": 1038 }, { "epoch": 0.028528281164195497, "grad_norm": 0.375331848859787, "learning_rate": 1.999179517286268e-05, "loss": 0.5481, "step": 1039 }, { "epoch": 0.028555738605161998, "grad_norm": 0.5290784239768982, "learning_rate": 1.9991777671635498e-05, "loss": 0.5292, "step": 1040 }, { "epoch": 0.028583196046128502, "grad_norm": 0.35392361879348755, "learning_rate": 1.999176015177046e-05, "loss": 0.5858, "step": 1041 }, { "epoch": 0.028610653487095003, "grad_norm": 0.3944548964500427, "learning_rate": 1.99917426132676e-05, "loss": 0.5517, "step": 1042 }, { "epoch": 0.028638110928061504, "grad_norm": 0.3355981409549713, "learning_rate": 1.9991725056126953e-05, "loss": 0.5331, "step": 1043 }, { "epoch": 0.028665568369028005, "grad_norm": 1.168067216873169, "learning_rate": 1.999170748034855e-05, "loss": 0.6595, "step": 1044 }, { "epoch": 0.02869302580999451, "grad_norm": 0.3683520257472992, "learning_rate": 1.999168988593242e-05, "loss": 0.5855, "step": 1045 }, { "epoch": 0.02872048325096101, "grad_norm": 0.4193560779094696, "learning_rate": 1.9991672272878603e-05, "loss": 0.5877, "step": 1046 }, { "epoch": 0.02874794069192751, "grad_norm": 0.4037628471851349, "learning_rate": 1.9991654641187127e-05, "loss": 0.5953, "step": 1047 }, { "epoch": 0.028775398132894016, "grad_norm": 0.36947256326675415, "learning_rate": 1.999163699085803e-05, "loss": 0.513, "step": 1048 }, { "epoch": 0.028802855573860517, "grad_norm": 0.47230595350265503, "learning_rate": 1.9991619321891336e-05, "loss": 0.5131, "step": 1049 }, { "epoch": 0.028830313014827018, "grad_norm": 0.38674768805503845, "learning_rate": 1.9991601634287083e-05, "loss": 0.5004, "step": 1050 }, { "epoch": 0.02885777045579352, "grad_norm": 0.3782259225845337, "learning_rate": 1.9991583928045306e-05, "loss": 0.5883, "step": 1051 }, { "epoch": 0.028885227896760023, "grad_norm": 0.37078866362571716, "learning_rate": 1.9991566203166037e-05, "loss": 0.5863, "step": 1052 }, { "epoch": 0.028912685337726524, "grad_norm": 0.3927291929721832, "learning_rate": 1.9991548459649305e-05, "loss": 0.5285, "step": 1053 }, { "epoch": 0.028940142778693025, "grad_norm": 0.36980345845222473, "learning_rate": 1.999153069749515e-05, "loss": 0.5759, "step": 1054 }, { "epoch": 0.028967600219659526, "grad_norm": 0.3808493912220001, "learning_rate": 1.99915129167036e-05, "loss": 0.5403, "step": 1055 }, { "epoch": 0.02899505766062603, "grad_norm": 0.42090263962745667, "learning_rate": 1.9991495117274686e-05, "loss": 0.6497, "step": 1056 }, { "epoch": 0.02902251510159253, "grad_norm": 0.38748452067375183, "learning_rate": 1.9991477299208446e-05, "loss": 0.6084, "step": 1057 }, { "epoch": 0.029049972542559033, "grad_norm": 0.39346593618392944, "learning_rate": 1.9991459462504913e-05, "loss": 0.6373, "step": 1058 }, { "epoch": 0.029077429983525534, "grad_norm": 0.3686029613018036, "learning_rate": 1.999144160716412e-05, "loss": 0.5658, "step": 1059 }, { "epoch": 0.029104887424492038, "grad_norm": 0.365413099527359, "learning_rate": 1.99914237331861e-05, "loss": 0.5347, "step": 1060 }, { "epoch": 0.02913234486545854, "grad_norm": 0.34688809514045715, "learning_rate": 1.9991405840570886e-05, "loss": 0.5749, "step": 1061 }, { "epoch": 0.02915980230642504, "grad_norm": 0.3781895041465759, "learning_rate": 1.999138792931851e-05, "loss": 0.5949, "step": 1062 }, { "epoch": 0.029187259747391545, "grad_norm": 0.34147363901138306, "learning_rate": 1.9991369999429006e-05, "loss": 0.5444, "step": 1063 }, { "epoch": 0.029214717188358046, "grad_norm": 0.3793453872203827, "learning_rate": 1.999135205090241e-05, "loss": 0.6387, "step": 1064 }, { "epoch": 0.029242174629324547, "grad_norm": 0.42361509799957275, "learning_rate": 1.999133408373875e-05, "loss": 0.6936, "step": 1065 }, { "epoch": 0.029269632070291048, "grad_norm": 0.3378000855445862, "learning_rate": 1.9991316097938066e-05, "loss": 0.5407, "step": 1066 }, { "epoch": 0.029297089511257552, "grad_norm": 0.41527846455574036, "learning_rate": 1.999129809350039e-05, "loss": 0.6462, "step": 1067 }, { "epoch": 0.029324546952224053, "grad_norm": 0.35297030210494995, "learning_rate": 1.999128007042575e-05, "loss": 0.6128, "step": 1068 }, { "epoch": 0.029352004393190554, "grad_norm": 0.3846484124660492, "learning_rate": 1.9991262028714185e-05, "loss": 0.5488, "step": 1069 }, { "epoch": 0.029379461834157055, "grad_norm": 0.35821110010147095, "learning_rate": 1.999124396836573e-05, "loss": 0.4963, "step": 1070 }, { "epoch": 0.02940691927512356, "grad_norm": 0.4034867286682129, "learning_rate": 1.9991225889380415e-05, "loss": 0.5325, "step": 1071 }, { "epoch": 0.02943437671609006, "grad_norm": 0.4116135537624359, "learning_rate": 1.9991207791758276e-05, "loss": 0.5411, "step": 1072 }, { "epoch": 0.02946183415705656, "grad_norm": 0.3718913793563843, "learning_rate": 1.9991189675499342e-05, "loss": 0.6349, "step": 1073 }, { "epoch": 0.029489291598023066, "grad_norm": 0.36586418747901917, "learning_rate": 1.9991171540603654e-05, "loss": 0.5424, "step": 1074 }, { "epoch": 0.029516749038989567, "grad_norm": 0.3628186881542206, "learning_rate": 1.9991153387071242e-05, "loss": 0.5816, "step": 1075 }, { "epoch": 0.029544206479956068, "grad_norm": 0.3632669746875763, "learning_rate": 1.9991135214902137e-05, "loss": 0.5272, "step": 1076 }, { "epoch": 0.02957166392092257, "grad_norm": 0.36830535531044006, "learning_rate": 1.999111702409638e-05, "loss": 0.5201, "step": 1077 }, { "epoch": 0.029599121361889073, "grad_norm": 0.41280773282051086, "learning_rate": 1.9991098814654e-05, "loss": 0.4716, "step": 1078 }, { "epoch": 0.029626578802855574, "grad_norm": 0.38160622119903564, "learning_rate": 1.9991080586575027e-05, "loss": 0.6599, "step": 1079 }, { "epoch": 0.029654036243822075, "grad_norm": 0.33139970898628235, "learning_rate": 1.9991062339859507e-05, "loss": 0.4846, "step": 1080 }, { "epoch": 0.029681493684788576, "grad_norm": 0.36915552616119385, "learning_rate": 1.9991044074507463e-05, "loss": 0.543, "step": 1081 }, { "epoch": 0.02970895112575508, "grad_norm": 0.42304274439811707, "learning_rate": 1.9991025790518933e-05, "loss": 0.7284, "step": 1082 }, { "epoch": 0.029736408566721582, "grad_norm": 0.3540177047252655, "learning_rate": 1.999100748789395e-05, "loss": 0.611, "step": 1083 }, { "epoch": 0.029763866007688083, "grad_norm": 0.3768143951892853, "learning_rate": 1.9990989166632552e-05, "loss": 0.5706, "step": 1084 }, { "epoch": 0.029791323448654584, "grad_norm": 0.33821359276771545, "learning_rate": 1.9990970826734772e-05, "loss": 0.6365, "step": 1085 }, { "epoch": 0.029818780889621088, "grad_norm": 0.4141998291015625, "learning_rate": 1.9990952468200637e-05, "loss": 0.5755, "step": 1086 }, { "epoch": 0.02984623833058759, "grad_norm": 0.4378417730331421, "learning_rate": 1.9990934091030192e-05, "loss": 0.5097, "step": 1087 }, { "epoch": 0.02987369577155409, "grad_norm": 0.3431118130683899, "learning_rate": 1.9990915695223464e-05, "loss": 0.4902, "step": 1088 }, { "epoch": 0.029901153212520595, "grad_norm": 0.3951015770435333, "learning_rate": 1.999089728078049e-05, "loss": 0.6611, "step": 1089 }, { "epoch": 0.029928610653487096, "grad_norm": 0.3511399030685425, "learning_rate": 1.9990878847701306e-05, "loss": 0.5494, "step": 1090 }, { "epoch": 0.029956068094453597, "grad_norm": 0.4076765477657318, "learning_rate": 1.999086039598594e-05, "loss": 0.6112, "step": 1091 }, { "epoch": 0.029983525535420098, "grad_norm": 0.3971415162086487, "learning_rate": 1.9990841925634434e-05, "loss": 0.5042, "step": 1092 }, { "epoch": 0.030010982976386602, "grad_norm": 0.40476614236831665, "learning_rate": 1.9990823436646817e-05, "loss": 0.5755, "step": 1093 }, { "epoch": 0.030038440417353103, "grad_norm": 0.3497704267501831, "learning_rate": 1.9990804929023127e-05, "loss": 0.487, "step": 1094 }, { "epoch": 0.030065897858319604, "grad_norm": 0.3888041079044342, "learning_rate": 1.9990786402763397e-05, "loss": 0.5982, "step": 1095 }, { "epoch": 0.030093355299286105, "grad_norm": 0.38021576404571533, "learning_rate": 1.9990767857867662e-05, "loss": 0.5539, "step": 1096 }, { "epoch": 0.03012081274025261, "grad_norm": 0.36477571725845337, "learning_rate": 1.9990749294335955e-05, "loss": 0.5914, "step": 1097 }, { "epoch": 0.03014827018121911, "grad_norm": 0.3786374628543854, "learning_rate": 1.999073071216831e-05, "loss": 0.5545, "step": 1098 }, { "epoch": 0.03017572762218561, "grad_norm": 0.36763229966163635, "learning_rate": 1.9990712111364766e-05, "loss": 0.5119, "step": 1099 }, { "epoch": 0.030203185063152116, "grad_norm": 0.3615340292453766, "learning_rate": 1.9990693491925354e-05, "loss": 0.5437, "step": 1100 }, { "epoch": 0.030230642504118617, "grad_norm": 0.39813360571861267, "learning_rate": 1.9990674853850113e-05, "loss": 0.5932, "step": 1101 }, { "epoch": 0.030258099945085118, "grad_norm": 0.45678070187568665, "learning_rate": 1.9990656197139073e-05, "loss": 0.6147, "step": 1102 }, { "epoch": 0.03028555738605162, "grad_norm": 0.4664384722709656, "learning_rate": 1.999063752179227e-05, "loss": 0.5372, "step": 1103 }, { "epoch": 0.030313014827018123, "grad_norm": 0.37013372778892517, "learning_rate": 1.9990618827809742e-05, "loss": 0.5952, "step": 1104 }, { "epoch": 0.030340472267984624, "grad_norm": 0.36184975504875183, "learning_rate": 1.9990600115191515e-05, "loss": 0.5812, "step": 1105 }, { "epoch": 0.030367929708951125, "grad_norm": 0.40883713960647583, "learning_rate": 1.9990581383937635e-05, "loss": 0.6318, "step": 1106 }, { "epoch": 0.030395387149917626, "grad_norm": 0.35589897632598877, "learning_rate": 1.9990562634048133e-05, "loss": 0.52, "step": 1107 }, { "epoch": 0.03042284459088413, "grad_norm": 0.6323639154434204, "learning_rate": 1.9990543865523038e-05, "loss": 0.5457, "step": 1108 }, { "epoch": 0.030450302031850632, "grad_norm": 0.38986527919769287, "learning_rate": 1.99905250783624e-05, "loss": 0.5215, "step": 1109 }, { "epoch": 0.030477759472817133, "grad_norm": 0.38410794734954834, "learning_rate": 1.999050627256623e-05, "loss": 0.5393, "step": 1110 }, { "epoch": 0.030505216913783634, "grad_norm": 0.36383768916130066, "learning_rate": 1.9990487448134587e-05, "loss": 0.6437, "step": 1111 }, { "epoch": 0.030532674354750138, "grad_norm": 0.3929080665111542, "learning_rate": 1.9990468605067493e-05, "loss": 0.6736, "step": 1112 }, { "epoch": 0.03056013179571664, "grad_norm": 0.3574185371398926, "learning_rate": 1.999044974336499e-05, "loss": 0.5548, "step": 1113 }, { "epoch": 0.03058758923668314, "grad_norm": 0.39505741000175476, "learning_rate": 1.9990430863027107e-05, "loss": 0.6055, "step": 1114 }, { "epoch": 0.030615046677649645, "grad_norm": 0.4200162887573242, "learning_rate": 1.999041196405388e-05, "loss": 0.6549, "step": 1115 }, { "epoch": 0.030642504118616146, "grad_norm": 0.401910662651062, "learning_rate": 1.999039304644535e-05, "loss": 0.5333, "step": 1116 }, { "epoch": 0.030669961559582647, "grad_norm": 0.36436882615089417, "learning_rate": 1.9990374110201544e-05, "loss": 0.5921, "step": 1117 }, { "epoch": 0.030697419000549148, "grad_norm": 0.40377339720726013, "learning_rate": 1.9990355155322506e-05, "loss": 0.6137, "step": 1118 }, { "epoch": 0.030724876441515652, "grad_norm": 0.38865959644317627, "learning_rate": 1.9990336181808266e-05, "loss": 0.5947, "step": 1119 }, { "epoch": 0.030752333882482153, "grad_norm": 0.3577164113521576, "learning_rate": 1.999031718965886e-05, "loss": 0.5676, "step": 1120 }, { "epoch": 0.030779791323448654, "grad_norm": 0.3906039893627167, "learning_rate": 1.9990298178874322e-05, "loss": 0.5914, "step": 1121 }, { "epoch": 0.030807248764415155, "grad_norm": 0.41052085161209106, "learning_rate": 1.999027914945469e-05, "loss": 0.5646, "step": 1122 }, { "epoch": 0.03083470620538166, "grad_norm": 0.34488677978515625, "learning_rate": 1.9990260101400003e-05, "loss": 0.5343, "step": 1123 }, { "epoch": 0.03086216364634816, "grad_norm": 0.3855760991573334, "learning_rate": 1.9990241034710288e-05, "loss": 0.6124, "step": 1124 }, { "epoch": 0.03088962108731466, "grad_norm": 0.4068845510482788, "learning_rate": 1.9990221949385588e-05, "loss": 0.6484, "step": 1125 }, { "epoch": 0.030917078528281162, "grad_norm": 0.3437855839729309, "learning_rate": 1.9990202845425935e-05, "loss": 0.4952, "step": 1126 }, { "epoch": 0.030944535969247667, "grad_norm": 0.5546271204948425, "learning_rate": 1.999018372283136e-05, "loss": 0.626, "step": 1127 }, { "epoch": 0.030971993410214168, "grad_norm": 0.3687724173069, "learning_rate": 1.999016458160191e-05, "loss": 0.6101, "step": 1128 }, { "epoch": 0.03099945085118067, "grad_norm": 0.42107245326042175, "learning_rate": 1.9990145421737613e-05, "loss": 0.5464, "step": 1129 }, { "epoch": 0.031026908292147173, "grad_norm": 0.40520480275154114, "learning_rate": 1.9990126243238506e-05, "loss": 0.5827, "step": 1130 }, { "epoch": 0.031054365733113674, "grad_norm": 0.4114346504211426, "learning_rate": 1.9990107046104623e-05, "loss": 0.5725, "step": 1131 }, { "epoch": 0.031081823174080175, "grad_norm": 0.33798307180404663, "learning_rate": 1.9990087830336005e-05, "loss": 0.5956, "step": 1132 }, { "epoch": 0.031109280615046676, "grad_norm": 0.3678004741668701, "learning_rate": 1.9990068595932682e-05, "loss": 0.5856, "step": 1133 }, { "epoch": 0.03113673805601318, "grad_norm": 0.4111005961894989, "learning_rate": 1.9990049342894696e-05, "loss": 0.5893, "step": 1134 }, { "epoch": 0.031164195496979682, "grad_norm": 0.33263298869132996, "learning_rate": 1.9990030071222076e-05, "loss": 0.4931, "step": 1135 }, { "epoch": 0.031191652937946183, "grad_norm": 0.38465067744255066, "learning_rate": 1.9990010780914864e-05, "loss": 0.6097, "step": 1136 }, { "epoch": 0.031219110378912684, "grad_norm": 0.4152178168296814, "learning_rate": 1.9989991471973093e-05, "loss": 0.5101, "step": 1137 }, { "epoch": 0.031246567819879188, "grad_norm": 0.34545642137527466, "learning_rate": 1.99899721443968e-05, "loss": 0.5723, "step": 1138 }, { "epoch": 0.03127402526084569, "grad_norm": 0.32596585154533386, "learning_rate": 1.9989952798186016e-05, "loss": 0.5128, "step": 1139 }, { "epoch": 0.03130148270181219, "grad_norm": 0.3676753342151642, "learning_rate": 1.9989933433340785e-05, "loss": 0.6046, "step": 1140 }, { "epoch": 0.03132894014277869, "grad_norm": 0.38751524686813354, "learning_rate": 1.9989914049861145e-05, "loss": 0.5558, "step": 1141 }, { "epoch": 0.03135639758374519, "grad_norm": 0.3586612045764923, "learning_rate": 1.9989894647747118e-05, "loss": 0.4406, "step": 1142 }, { "epoch": 0.0313838550247117, "grad_norm": 0.4083050489425659, "learning_rate": 1.9989875226998753e-05, "loss": 0.5747, "step": 1143 }, { "epoch": 0.0314113124656782, "grad_norm": 0.4099906086921692, "learning_rate": 1.9989855787616083e-05, "loss": 0.6319, "step": 1144 }, { "epoch": 0.0314387699066447, "grad_norm": 0.42153939604759216, "learning_rate": 1.9989836329599142e-05, "loss": 0.5808, "step": 1145 }, { "epoch": 0.0314662273476112, "grad_norm": 0.366277813911438, "learning_rate": 1.9989816852947968e-05, "loss": 0.53, "step": 1146 }, { "epoch": 0.031493684788577704, "grad_norm": 0.3842029273509979, "learning_rate": 1.9989797357662598e-05, "loss": 0.6357, "step": 1147 }, { "epoch": 0.031521142229544205, "grad_norm": 0.37428995966911316, "learning_rate": 1.9989777843743068e-05, "loss": 0.5063, "step": 1148 }, { "epoch": 0.031548599670510706, "grad_norm": 0.3466823399066925, "learning_rate": 1.9989758311189413e-05, "loss": 0.5349, "step": 1149 }, { "epoch": 0.03157605711147721, "grad_norm": 0.355495810508728, "learning_rate": 1.998973876000167e-05, "loss": 0.5047, "step": 1150 }, { "epoch": 0.031603514552443715, "grad_norm": 0.3858616054058075, "learning_rate": 1.9989719190179877e-05, "loss": 0.5748, "step": 1151 }, { "epoch": 0.031630971993410216, "grad_norm": 0.38830262422561646, "learning_rate": 1.9989699601724067e-05, "loss": 0.5983, "step": 1152 }, { "epoch": 0.03165842943437672, "grad_norm": 0.3450514078140259, "learning_rate": 1.9989679994634285e-05, "loss": 0.6177, "step": 1153 }, { "epoch": 0.03168588687534322, "grad_norm": 0.3923671841621399, "learning_rate": 1.9989660368910557e-05, "loss": 0.5356, "step": 1154 }, { "epoch": 0.03171334431630972, "grad_norm": 0.3242569863796234, "learning_rate": 1.998964072455292e-05, "loss": 0.4951, "step": 1155 }, { "epoch": 0.03174080175727622, "grad_norm": 0.38521701097488403, "learning_rate": 1.9989621061561423e-05, "loss": 0.5055, "step": 1156 }, { "epoch": 0.03176825919824272, "grad_norm": 0.3671351969242096, "learning_rate": 1.998960137993609e-05, "loss": 0.4833, "step": 1157 }, { "epoch": 0.03179571663920923, "grad_norm": 0.34628114104270935, "learning_rate": 1.998958167967696e-05, "loss": 0.539, "step": 1158 }, { "epoch": 0.03182317408017573, "grad_norm": 0.32310134172439575, "learning_rate": 1.9989561960784077e-05, "loss": 0.432, "step": 1159 }, { "epoch": 0.03185063152114223, "grad_norm": 0.3981914222240448, "learning_rate": 1.998954222325747e-05, "loss": 0.6005, "step": 1160 }, { "epoch": 0.03187808896210873, "grad_norm": 0.5005843639373779, "learning_rate": 1.998952246709718e-05, "loss": 0.5962, "step": 1161 }, { "epoch": 0.03190554640307523, "grad_norm": 0.4504685401916504, "learning_rate": 1.9989502692303238e-05, "loss": 0.5613, "step": 1162 }, { "epoch": 0.031933003844041734, "grad_norm": 0.3916843831539154, "learning_rate": 1.9989482898875687e-05, "loss": 0.6205, "step": 1163 }, { "epoch": 0.031960461285008235, "grad_norm": 0.35856735706329346, "learning_rate": 1.9989463086814566e-05, "loss": 0.5617, "step": 1164 }, { "epoch": 0.031987918725974736, "grad_norm": 0.3771199584007263, "learning_rate": 1.9989443256119903e-05, "loss": 0.6294, "step": 1165 }, { "epoch": 0.032015376166941244, "grad_norm": 0.35168740153312683, "learning_rate": 1.9989423406791742e-05, "loss": 0.5323, "step": 1166 }, { "epoch": 0.032042833607907745, "grad_norm": 0.4223398268222809, "learning_rate": 1.998940353883012e-05, "loss": 0.6888, "step": 1167 }, { "epoch": 0.032070291048874246, "grad_norm": 0.39900821447372437, "learning_rate": 1.998938365223507e-05, "loss": 0.6069, "step": 1168 }, { "epoch": 0.03209774848984075, "grad_norm": 0.35758841037750244, "learning_rate": 1.998936374700663e-05, "loss": 0.5155, "step": 1169 }, { "epoch": 0.03212520593080725, "grad_norm": 0.3440866470336914, "learning_rate": 1.9989343823144842e-05, "loss": 0.566, "step": 1170 }, { "epoch": 0.03215266337177375, "grad_norm": 0.35869526863098145, "learning_rate": 1.9989323880649738e-05, "loss": 0.5594, "step": 1171 }, { "epoch": 0.03218012081274025, "grad_norm": 0.39065420627593994, "learning_rate": 1.9989303919521356e-05, "loss": 0.5445, "step": 1172 }, { "epoch": 0.03220757825370676, "grad_norm": 0.3631516993045807, "learning_rate": 1.9989283939759737e-05, "loss": 0.5016, "step": 1173 }, { "epoch": 0.03223503569467326, "grad_norm": 0.3453170657157898, "learning_rate": 1.998926394136491e-05, "loss": 0.5364, "step": 1174 }, { "epoch": 0.03226249313563976, "grad_norm": 0.366641640663147, "learning_rate": 1.998924392433692e-05, "loss": 0.5915, "step": 1175 }, { "epoch": 0.03228995057660626, "grad_norm": 0.3558062016963959, "learning_rate": 1.9989223888675805e-05, "loss": 0.5534, "step": 1176 }, { "epoch": 0.03231740801757276, "grad_norm": 0.3809729218482971, "learning_rate": 1.9989203834381595e-05, "loss": 0.6103, "step": 1177 }, { "epoch": 0.03234486545853926, "grad_norm": 0.3748064339160919, "learning_rate": 1.998918376145433e-05, "loss": 0.5624, "step": 1178 }, { "epoch": 0.032372322899505764, "grad_norm": 0.3522144556045532, "learning_rate": 1.9989163669894055e-05, "loss": 0.5899, "step": 1179 }, { "epoch": 0.032399780340472265, "grad_norm": 0.38437071442604065, "learning_rate": 1.99891435597008e-05, "loss": 0.5974, "step": 1180 }, { "epoch": 0.03242723778143877, "grad_norm": 0.4200361669063568, "learning_rate": 1.9989123430874602e-05, "loss": 0.6342, "step": 1181 }, { "epoch": 0.03245469522240527, "grad_norm": 0.40029168128967285, "learning_rate": 1.99891032834155e-05, "loss": 0.6315, "step": 1182 }, { "epoch": 0.032482152663371774, "grad_norm": 0.4482196271419525, "learning_rate": 1.9989083117323535e-05, "loss": 0.6351, "step": 1183 }, { "epoch": 0.032509610104338275, "grad_norm": 0.3813280463218689, "learning_rate": 1.998906293259874e-05, "loss": 0.6116, "step": 1184 }, { "epoch": 0.032537067545304776, "grad_norm": 0.4204625189304352, "learning_rate": 1.9989042729241154e-05, "loss": 0.4899, "step": 1185 }, { "epoch": 0.03256452498627128, "grad_norm": 0.40458056330680847, "learning_rate": 1.9989022507250815e-05, "loss": 0.519, "step": 1186 }, { "epoch": 0.03259198242723778, "grad_norm": 0.3631324768066406, "learning_rate": 1.9989002266627764e-05, "loss": 0.6147, "step": 1187 }, { "epoch": 0.032619439868204286, "grad_norm": 0.37989094853401184, "learning_rate": 1.9988982007372035e-05, "loss": 0.5939, "step": 1188 }, { "epoch": 0.03264689730917079, "grad_norm": 0.3791643977165222, "learning_rate": 1.998896172948366e-05, "loss": 0.6607, "step": 1189 }, { "epoch": 0.03267435475013729, "grad_norm": 0.34027811884880066, "learning_rate": 1.9988941432962693e-05, "loss": 0.5404, "step": 1190 }, { "epoch": 0.03270181219110379, "grad_norm": 0.3630107045173645, "learning_rate": 1.9988921117809156e-05, "loss": 0.533, "step": 1191 }, { "epoch": 0.03272926963207029, "grad_norm": 0.4039234220981598, "learning_rate": 1.9988900784023096e-05, "loss": 0.5887, "step": 1192 }, { "epoch": 0.03275672707303679, "grad_norm": 0.40917861461639404, "learning_rate": 1.9988880431604544e-05, "loss": 0.6248, "step": 1193 }, { "epoch": 0.03278418451400329, "grad_norm": 0.36764654517173767, "learning_rate": 1.9988860060553547e-05, "loss": 0.5172, "step": 1194 }, { "epoch": 0.0328116419549698, "grad_norm": 0.3944789469242096, "learning_rate": 1.9988839670870136e-05, "loss": 0.6235, "step": 1195 }, { "epoch": 0.0328390993959363, "grad_norm": 0.364243745803833, "learning_rate": 1.9988819262554348e-05, "loss": 0.5731, "step": 1196 }, { "epoch": 0.0328665568369028, "grad_norm": 0.3523035943508148, "learning_rate": 1.9988798835606227e-05, "loss": 0.524, "step": 1197 }, { "epoch": 0.0328940142778693, "grad_norm": 0.32980474829673767, "learning_rate": 1.9988778390025808e-05, "loss": 0.5547, "step": 1198 }, { "epoch": 0.032921471718835804, "grad_norm": 0.369876503944397, "learning_rate": 1.9988757925813127e-05, "loss": 0.5899, "step": 1199 }, { "epoch": 0.032948929159802305, "grad_norm": 0.35442253947257996, "learning_rate": 1.9988737442968228e-05, "loss": 0.5681, "step": 1200 }, { "epoch": 0.032976386600768806, "grad_norm": 0.37036654353141785, "learning_rate": 1.9988716941491142e-05, "loss": 0.5953, "step": 1201 }, { "epoch": 0.03300384404173531, "grad_norm": 0.40857645869255066, "learning_rate": 1.9988696421381913e-05, "loss": 0.5665, "step": 1202 }, { "epoch": 0.033031301482701815, "grad_norm": 0.38524600863456726, "learning_rate": 1.9988675882640578e-05, "loss": 0.5646, "step": 1203 }, { "epoch": 0.033058758923668316, "grad_norm": 1.2550793886184692, "learning_rate": 1.9988655325267175e-05, "loss": 0.6566, "step": 1204 }, { "epoch": 0.03308621636463482, "grad_norm": 0.38972964882850647, "learning_rate": 1.9988634749261737e-05, "loss": 0.6225, "step": 1205 }, { "epoch": 0.03311367380560132, "grad_norm": 0.35768184065818787, "learning_rate": 1.998861415462431e-05, "loss": 0.4799, "step": 1206 }, { "epoch": 0.03314113124656782, "grad_norm": 0.3882080316543579, "learning_rate": 1.998859354135493e-05, "loss": 0.6168, "step": 1207 }, { "epoch": 0.03316858868753432, "grad_norm": 0.34855368733406067, "learning_rate": 1.9988572909453635e-05, "loss": 0.5341, "step": 1208 }, { "epoch": 0.03319604612850082, "grad_norm": 0.3344106674194336, "learning_rate": 1.9988552258920465e-05, "loss": 0.5258, "step": 1209 }, { "epoch": 0.03322350356946733, "grad_norm": 0.44499003887176514, "learning_rate": 1.9988531589755453e-05, "loss": 0.5114, "step": 1210 }, { "epoch": 0.03325096101043383, "grad_norm": 0.3726302683353424, "learning_rate": 1.9988510901958647e-05, "loss": 0.586, "step": 1211 }, { "epoch": 0.03327841845140033, "grad_norm": 0.37287113070487976, "learning_rate": 1.9988490195530078e-05, "loss": 0.6161, "step": 1212 }, { "epoch": 0.03330587589236683, "grad_norm": 0.35752734541893005, "learning_rate": 1.9988469470469785e-05, "loss": 0.5638, "step": 1213 }, { "epoch": 0.03333333333333333, "grad_norm": 0.3563767373561859, "learning_rate": 1.998844872677781e-05, "loss": 0.5995, "step": 1214 }, { "epoch": 0.033360790774299834, "grad_norm": 0.39681869745254517, "learning_rate": 1.9988427964454188e-05, "loss": 0.6785, "step": 1215 }, { "epoch": 0.033388248215266335, "grad_norm": 0.3929237127304077, "learning_rate": 1.998840718349896e-05, "loss": 0.544, "step": 1216 }, { "epoch": 0.033415705656232836, "grad_norm": 0.3811745047569275, "learning_rate": 1.998838638391217e-05, "loss": 0.5654, "step": 1217 }, { "epoch": 0.033443163097199344, "grad_norm": 0.39695554971694946, "learning_rate": 1.9988365565693848e-05, "loss": 0.5741, "step": 1218 }, { "epoch": 0.033470620538165845, "grad_norm": 0.36237284541130066, "learning_rate": 1.9988344728844035e-05, "loss": 0.5865, "step": 1219 }, { "epoch": 0.033498077979132346, "grad_norm": 0.41349294781684875, "learning_rate": 1.9988323873362773e-05, "loss": 0.5854, "step": 1220 }, { "epoch": 0.03352553542009885, "grad_norm": 0.3986033499240875, "learning_rate": 1.99883029992501e-05, "loss": 0.66, "step": 1221 }, { "epoch": 0.03355299286106535, "grad_norm": 0.49217504262924194, "learning_rate": 1.9988282106506052e-05, "loss": 0.4488, "step": 1222 }, { "epoch": 0.03358045030203185, "grad_norm": 0.3621750771999359, "learning_rate": 1.9988261195130668e-05, "loss": 0.4523, "step": 1223 }, { "epoch": 0.03360790774299835, "grad_norm": 0.36717796325683594, "learning_rate": 1.998824026512399e-05, "loss": 0.6954, "step": 1224 }, { "epoch": 0.03363536518396486, "grad_norm": 0.3753148019313812, "learning_rate": 1.998821931648606e-05, "loss": 0.5744, "step": 1225 }, { "epoch": 0.03366282262493136, "grad_norm": 0.34411847591400146, "learning_rate": 1.9988198349216913e-05, "loss": 0.4994, "step": 1226 }, { "epoch": 0.03369028006589786, "grad_norm": 0.39986324310302734, "learning_rate": 1.998817736331658e-05, "loss": 0.5856, "step": 1227 }, { "epoch": 0.03371773750686436, "grad_norm": 0.40316906571388245, "learning_rate": 1.9988156358785117e-05, "loss": 0.5488, "step": 1228 }, { "epoch": 0.03374519494783086, "grad_norm": 0.3437943756580353, "learning_rate": 1.9988135335622553e-05, "loss": 0.5097, "step": 1229 }, { "epoch": 0.03377265238879736, "grad_norm": 0.4427289664745331, "learning_rate": 1.9988114293828927e-05, "loss": 0.6977, "step": 1230 }, { "epoch": 0.033800109829763864, "grad_norm": 0.3682600259780884, "learning_rate": 1.998809323340428e-05, "loss": 0.5585, "step": 1231 }, { "epoch": 0.033827567270730365, "grad_norm": 0.4105755388736725, "learning_rate": 1.9988072154348654e-05, "loss": 0.5968, "step": 1232 }, { "epoch": 0.03385502471169687, "grad_norm": 0.35161149501800537, "learning_rate": 1.998805105666208e-05, "loss": 0.4807, "step": 1233 }, { "epoch": 0.033882482152663373, "grad_norm": 0.36217236518859863, "learning_rate": 1.9988029940344606e-05, "loss": 0.6411, "step": 1234 }, { "epoch": 0.033909939593629874, "grad_norm": 0.374201238155365, "learning_rate": 1.9988008805396268e-05, "loss": 0.5644, "step": 1235 }, { "epoch": 0.033937397034596375, "grad_norm": 0.4078959822654724, "learning_rate": 1.9987987651817108e-05, "loss": 0.6384, "step": 1236 }, { "epoch": 0.033964854475562876, "grad_norm": 0.3493024408817291, "learning_rate": 1.9987966479607164e-05, "loss": 0.508, "step": 1237 }, { "epoch": 0.03399231191652938, "grad_norm": 0.34062570333480835, "learning_rate": 1.998794528876647e-05, "loss": 0.5053, "step": 1238 }, { "epoch": 0.03401976935749588, "grad_norm": 0.38363513350486755, "learning_rate": 1.9987924079295072e-05, "loss": 0.5733, "step": 1239 }, { "epoch": 0.034047226798462386, "grad_norm": 0.37566471099853516, "learning_rate": 1.9987902851193008e-05, "loss": 0.586, "step": 1240 }, { "epoch": 0.03407468423942889, "grad_norm": 0.4337230920791626, "learning_rate": 1.998788160446032e-05, "loss": 0.5801, "step": 1241 }, { "epoch": 0.03410214168039539, "grad_norm": 0.3737572431564331, "learning_rate": 1.9987860339097043e-05, "loss": 0.6166, "step": 1242 }, { "epoch": 0.03412959912136189, "grad_norm": 0.3531170189380646, "learning_rate": 1.998783905510322e-05, "loss": 0.6043, "step": 1243 }, { "epoch": 0.03415705656232839, "grad_norm": 0.4008699059486389, "learning_rate": 1.9987817752478885e-05, "loss": 0.663, "step": 1244 }, { "epoch": 0.03418451400329489, "grad_norm": 0.3955473005771637, "learning_rate": 1.9987796431224088e-05, "loss": 0.6157, "step": 1245 }, { "epoch": 0.03421197144426139, "grad_norm": 0.3951888084411621, "learning_rate": 1.998777509133886e-05, "loss": 0.5827, "step": 1246 }, { "epoch": 0.03423942888522789, "grad_norm": 0.3929520845413208, "learning_rate": 1.998775373282324e-05, "loss": 0.6527, "step": 1247 }, { "epoch": 0.0342668863261944, "grad_norm": 0.4121510684490204, "learning_rate": 1.9987732355677275e-05, "loss": 0.6603, "step": 1248 }, { "epoch": 0.0342943437671609, "grad_norm": 0.35039830207824707, "learning_rate": 1.9987710959901005e-05, "loss": 0.6157, "step": 1249 }, { "epoch": 0.0343218012081274, "grad_norm": 0.375547856092453, "learning_rate": 1.9987689545494465e-05, "loss": 0.5631, "step": 1250 }, { "epoch": 0.034349258649093904, "grad_norm": 0.36310890316963196, "learning_rate": 1.9987668112457692e-05, "loss": 0.5549, "step": 1251 }, { "epoch": 0.034376716090060405, "grad_norm": 0.42688703536987305, "learning_rate": 1.9987646660790735e-05, "loss": 0.6195, "step": 1252 }, { "epoch": 0.034404173531026906, "grad_norm": 0.3575834035873413, "learning_rate": 1.9987625190493624e-05, "loss": 0.5894, "step": 1253 }, { "epoch": 0.03443163097199341, "grad_norm": 0.342949241399765, "learning_rate": 1.998760370156641e-05, "loss": 0.5877, "step": 1254 }, { "epoch": 0.034459088412959915, "grad_norm": 0.3697826862335205, "learning_rate": 1.9987582194009125e-05, "loss": 0.5591, "step": 1255 }, { "epoch": 0.034486545853926416, "grad_norm": 0.3995330333709717, "learning_rate": 1.998756066782181e-05, "loss": 0.6008, "step": 1256 }, { "epoch": 0.03451400329489292, "grad_norm": 0.6094611287117004, "learning_rate": 1.9987539123004506e-05, "loss": 0.6265, "step": 1257 }, { "epoch": 0.03454146073585942, "grad_norm": 0.34113338589668274, "learning_rate": 1.9987517559557257e-05, "loss": 0.5155, "step": 1258 }, { "epoch": 0.03456891817682592, "grad_norm": 0.363423228263855, "learning_rate": 1.99874959774801e-05, "loss": 0.5066, "step": 1259 }, { "epoch": 0.03459637561779242, "grad_norm": 0.3834076225757599, "learning_rate": 1.9987474376773072e-05, "loss": 0.6517, "step": 1260 }, { "epoch": 0.03462383305875892, "grad_norm": 0.36704230308532715, "learning_rate": 1.998745275743622e-05, "loss": 0.531, "step": 1261 }, { "epoch": 0.03465129049972543, "grad_norm": 0.35406729578971863, "learning_rate": 1.998743111946958e-05, "loss": 0.5549, "step": 1262 }, { "epoch": 0.03467874794069193, "grad_norm": 0.3688981533050537, "learning_rate": 1.9987409462873194e-05, "loss": 0.4874, "step": 1263 }, { "epoch": 0.03470620538165843, "grad_norm": 0.4087311923503876, "learning_rate": 1.99873877876471e-05, "loss": 0.5976, "step": 1264 }, { "epoch": 0.03473366282262493, "grad_norm": 0.3871917426586151, "learning_rate": 1.9987366093791346e-05, "loss": 0.5881, "step": 1265 }, { "epoch": 0.03476112026359143, "grad_norm": 0.40225958824157715, "learning_rate": 1.998734438130596e-05, "loss": 0.5821, "step": 1266 }, { "epoch": 0.034788577704557934, "grad_norm": 0.35959210991859436, "learning_rate": 1.998732265019099e-05, "loss": 0.624, "step": 1267 }, { "epoch": 0.034816035145524435, "grad_norm": 0.34677913784980774, "learning_rate": 1.9987300900446476e-05, "loss": 0.5069, "step": 1268 }, { "epoch": 0.034843492586490936, "grad_norm": 0.34094998240470886, "learning_rate": 1.998727913207246e-05, "loss": 0.5799, "step": 1269 }, { "epoch": 0.034870950027457444, "grad_norm": 0.3932151794433594, "learning_rate": 1.998725734506898e-05, "loss": 0.675, "step": 1270 }, { "epoch": 0.034898407468423945, "grad_norm": 0.358003705739975, "learning_rate": 1.9987235539436076e-05, "loss": 0.5519, "step": 1271 }, { "epoch": 0.034925864909390446, "grad_norm": 0.3688274025917053, "learning_rate": 1.9987213715173793e-05, "loss": 0.5101, "step": 1272 }, { "epoch": 0.03495332235035695, "grad_norm": 0.4790780246257782, "learning_rate": 1.998719187228217e-05, "loss": 0.5313, "step": 1273 }, { "epoch": 0.03498077979132345, "grad_norm": 0.5002806186676025, "learning_rate": 1.998717001076124e-05, "loss": 0.6158, "step": 1274 }, { "epoch": 0.03500823723228995, "grad_norm": 0.37346526980400085, "learning_rate": 1.9987148130611056e-05, "loss": 0.5024, "step": 1275 }, { "epoch": 0.03503569467325645, "grad_norm": 0.3586314022541046, "learning_rate": 1.9987126231831652e-05, "loss": 0.6421, "step": 1276 }, { "epoch": 0.03506315211422296, "grad_norm": 0.3577103316783905, "learning_rate": 1.998710431442307e-05, "loss": 0.6036, "step": 1277 }, { "epoch": 0.03509060955518946, "grad_norm": 0.3347107470035553, "learning_rate": 1.998708237838535e-05, "loss": 0.4965, "step": 1278 }, { "epoch": 0.03511806699615596, "grad_norm": 0.45199474692344666, "learning_rate": 1.9987060423718533e-05, "loss": 0.5844, "step": 1279 }, { "epoch": 0.03514552443712246, "grad_norm": 0.39720168709754944, "learning_rate": 1.9987038450422666e-05, "loss": 0.6527, "step": 1280 }, { "epoch": 0.03517298187808896, "grad_norm": 0.4401092231273651, "learning_rate": 1.998701645849778e-05, "loss": 0.5747, "step": 1281 }, { "epoch": 0.03520043931905546, "grad_norm": 0.3654850423336029, "learning_rate": 1.998699444794392e-05, "loss": 0.6415, "step": 1282 }, { "epoch": 0.035227896760021964, "grad_norm": 0.4403817355632782, "learning_rate": 1.998697241876113e-05, "loss": 0.6674, "step": 1283 }, { "epoch": 0.035255354200988465, "grad_norm": 0.35067018866539, "learning_rate": 1.9986950370949446e-05, "loss": 0.5749, "step": 1284 }, { "epoch": 0.03528281164195497, "grad_norm": 0.4076237678527832, "learning_rate": 1.998692830450892e-05, "loss": 0.6251, "step": 1285 }, { "epoch": 0.035310269082921474, "grad_norm": 0.3908742368221283, "learning_rate": 1.9986906219439575e-05, "loss": 0.6396, "step": 1286 }, { "epoch": 0.035337726523887975, "grad_norm": 0.3406464457511902, "learning_rate": 1.9986884115741468e-05, "loss": 0.4885, "step": 1287 }, { "epoch": 0.035365183964854476, "grad_norm": 0.3983194828033447, "learning_rate": 1.9986861993414637e-05, "loss": 0.5469, "step": 1288 }, { "epoch": 0.035392641405820977, "grad_norm": 0.3528475761413574, "learning_rate": 1.9986839852459118e-05, "loss": 0.5619, "step": 1289 }, { "epoch": 0.03542009884678748, "grad_norm": 0.4188925325870514, "learning_rate": 1.9986817692874956e-05, "loss": 0.6016, "step": 1290 }, { "epoch": 0.03544755628775398, "grad_norm": 0.33856260776519775, "learning_rate": 1.998679551466219e-05, "loss": 0.5218, "step": 1291 }, { "epoch": 0.035475013728720486, "grad_norm": 0.39022311568260193, "learning_rate": 1.9986773317820866e-05, "loss": 0.5955, "step": 1292 }, { "epoch": 0.03550247116968699, "grad_norm": 0.38043728470802307, "learning_rate": 1.998675110235102e-05, "loss": 0.6192, "step": 1293 }, { "epoch": 0.03552992861065349, "grad_norm": 0.34716635942459106, "learning_rate": 1.9986728868252694e-05, "loss": 0.5419, "step": 1294 }, { "epoch": 0.03555738605161999, "grad_norm": 0.390146404504776, "learning_rate": 1.9986706615525933e-05, "loss": 0.641, "step": 1295 }, { "epoch": 0.03558484349258649, "grad_norm": 0.39541980624198914, "learning_rate": 1.998668434417078e-05, "loss": 0.6573, "step": 1296 }, { "epoch": 0.03561230093355299, "grad_norm": 0.3927261233329773, "learning_rate": 1.9986662054187268e-05, "loss": 0.5834, "step": 1297 }, { "epoch": 0.03563975837451949, "grad_norm": 0.44010382890701294, "learning_rate": 1.9986639745575445e-05, "loss": 0.6526, "step": 1298 }, { "epoch": 0.03566721581548599, "grad_norm": 0.3553536534309387, "learning_rate": 1.9986617418335355e-05, "loss": 0.6426, "step": 1299 }, { "epoch": 0.0356946732564525, "grad_norm": 0.34526264667510986, "learning_rate": 1.9986595072467035e-05, "loss": 0.6102, "step": 1300 }, { "epoch": 0.035722130697419, "grad_norm": 0.3611888587474823, "learning_rate": 1.9986572707970527e-05, "loss": 0.6482, "step": 1301 }, { "epoch": 0.0357495881383855, "grad_norm": 0.3979525566101074, "learning_rate": 1.998655032484587e-05, "loss": 0.5745, "step": 1302 }, { "epoch": 0.035777045579352004, "grad_norm": 0.34665295481681824, "learning_rate": 1.9986527923093114e-05, "loss": 0.615, "step": 1303 }, { "epoch": 0.035804503020318505, "grad_norm": 0.3473665714263916, "learning_rate": 1.9986505502712295e-05, "loss": 0.5446, "step": 1304 }, { "epoch": 0.035831960461285006, "grad_norm": 0.46632325649261475, "learning_rate": 1.9986483063703453e-05, "loss": 0.5934, "step": 1305 }, { "epoch": 0.03585941790225151, "grad_norm": 0.3431428372859955, "learning_rate": 1.998646060606664e-05, "loss": 0.4752, "step": 1306 }, { "epoch": 0.035886875343218015, "grad_norm": 0.3739601969718933, "learning_rate": 1.9986438129801886e-05, "loss": 0.6084, "step": 1307 }, { "epoch": 0.035914332784184516, "grad_norm": 0.3794978857040405, "learning_rate": 1.9986415634909236e-05, "loss": 0.518, "step": 1308 }, { "epoch": 0.03594179022515102, "grad_norm": 0.3524473011493683, "learning_rate": 1.9986393121388735e-05, "loss": 0.5502, "step": 1309 }, { "epoch": 0.03596924766611752, "grad_norm": 0.3723267614841461, "learning_rate": 1.9986370589240425e-05, "loss": 0.6309, "step": 1310 }, { "epoch": 0.03599670510708402, "grad_norm": 0.41200733184814453, "learning_rate": 1.9986348038464346e-05, "loss": 0.6095, "step": 1311 }, { "epoch": 0.03602416254805052, "grad_norm": 0.3638576865196228, "learning_rate": 1.9986325469060538e-05, "loss": 0.6397, "step": 1312 }, { "epoch": 0.03605161998901702, "grad_norm": 0.4164491295814514, "learning_rate": 1.9986302881029046e-05, "loss": 0.6418, "step": 1313 }, { "epoch": 0.03607907742998352, "grad_norm": 0.3762272298336029, "learning_rate": 1.9986280274369917e-05, "loss": 0.6263, "step": 1314 }, { "epoch": 0.03610653487095003, "grad_norm": 0.3653043210506439, "learning_rate": 1.998625764908318e-05, "loss": 0.567, "step": 1315 }, { "epoch": 0.03613399231191653, "grad_norm": 0.4219542145729065, "learning_rate": 1.9986235005168894e-05, "loss": 0.5943, "step": 1316 }, { "epoch": 0.03616144975288303, "grad_norm": 0.41777148842811584, "learning_rate": 1.998621234262709e-05, "loss": 0.5578, "step": 1317 }, { "epoch": 0.03618890719384953, "grad_norm": 0.3672006130218506, "learning_rate": 1.998618966145781e-05, "loss": 0.5172, "step": 1318 }, { "epoch": 0.036216364634816034, "grad_norm": 0.6433749198913574, "learning_rate": 1.9986166961661098e-05, "loss": 0.5534, "step": 1319 }, { "epoch": 0.036243822075782535, "grad_norm": 0.3567993640899658, "learning_rate": 1.9986144243237002e-05, "loss": 0.6146, "step": 1320 }, { "epoch": 0.036271279516749036, "grad_norm": 0.3429429829120636, "learning_rate": 1.9986121506185557e-05, "loss": 0.5727, "step": 1321 }, { "epoch": 0.036298736957715544, "grad_norm": 0.3574250638484955, "learning_rate": 1.9986098750506807e-05, "loss": 0.5927, "step": 1322 }, { "epoch": 0.036326194398682045, "grad_norm": 0.36117053031921387, "learning_rate": 1.99860759762008e-05, "loss": 0.5578, "step": 1323 }, { "epoch": 0.036353651839648546, "grad_norm": 0.4171261787414551, "learning_rate": 1.9986053183267572e-05, "loss": 0.6365, "step": 1324 }, { "epoch": 0.03638110928061505, "grad_norm": 0.3504883050918579, "learning_rate": 1.9986030371707165e-05, "loss": 0.6009, "step": 1325 }, { "epoch": 0.03640856672158155, "grad_norm": 0.3804073929786682, "learning_rate": 1.9986007541519626e-05, "loss": 0.5007, "step": 1326 }, { "epoch": 0.03643602416254805, "grad_norm": 0.35574838519096375, "learning_rate": 1.9985984692704995e-05, "loss": 0.5779, "step": 1327 }, { "epoch": 0.03646348160351455, "grad_norm": 0.4317134916782379, "learning_rate": 1.9985961825263314e-05, "loss": 0.5392, "step": 1328 }, { "epoch": 0.03649093904448106, "grad_norm": 0.31361091136932373, "learning_rate": 1.998593893919463e-05, "loss": 0.5096, "step": 1329 }, { "epoch": 0.03651839648544756, "grad_norm": 0.39062777161598206, "learning_rate": 1.9985916034498983e-05, "loss": 0.6709, "step": 1330 }, { "epoch": 0.03654585392641406, "grad_norm": 0.38672131299972534, "learning_rate": 1.9985893111176414e-05, "loss": 0.5767, "step": 1331 }, { "epoch": 0.03657331136738056, "grad_norm": 0.3764428198337555, "learning_rate": 1.998587016922697e-05, "loss": 0.4977, "step": 1332 }, { "epoch": 0.03660076880834706, "grad_norm": 0.33594101667404175, "learning_rate": 1.998584720865069e-05, "loss": 0.5536, "step": 1333 }, { "epoch": 0.03662822624931356, "grad_norm": 0.37867122888565063, "learning_rate": 1.9985824229447615e-05, "loss": 0.5461, "step": 1334 }, { "epoch": 0.036655683690280064, "grad_norm": 0.4636308550834656, "learning_rate": 1.9985801231617792e-05, "loss": 0.5721, "step": 1335 }, { "epoch": 0.036683141131246565, "grad_norm": 0.45492222905158997, "learning_rate": 1.9985778215161264e-05, "loss": 0.573, "step": 1336 }, { "epoch": 0.03671059857221307, "grad_norm": 0.3679135739803314, "learning_rate": 1.998575518007807e-05, "loss": 0.5252, "step": 1337 }, { "epoch": 0.036738056013179574, "grad_norm": 0.3396340012550354, "learning_rate": 1.9985732126368258e-05, "loss": 0.5996, "step": 1338 }, { "epoch": 0.036765513454146075, "grad_norm": 0.35321441292762756, "learning_rate": 1.9985709054031868e-05, "loss": 0.5552, "step": 1339 }, { "epoch": 0.036792970895112576, "grad_norm": 0.37329745292663574, "learning_rate": 1.9985685963068942e-05, "loss": 0.6165, "step": 1340 }, { "epoch": 0.03682042833607908, "grad_norm": 0.35924991965293884, "learning_rate": 1.9985662853479525e-05, "loss": 0.4732, "step": 1341 }, { "epoch": 0.03684788577704558, "grad_norm": 0.3975987136363983, "learning_rate": 1.998563972526366e-05, "loss": 0.6623, "step": 1342 }, { "epoch": 0.03687534321801208, "grad_norm": 0.3536795675754547, "learning_rate": 1.9985616578421392e-05, "loss": 0.678, "step": 1343 }, { "epoch": 0.036902800658978586, "grad_norm": 0.41098275780677795, "learning_rate": 1.998559341295276e-05, "loss": 0.5952, "step": 1344 }, { "epoch": 0.03693025809994509, "grad_norm": 0.34572386741638184, "learning_rate": 1.9985570228857808e-05, "loss": 0.5169, "step": 1345 }, { "epoch": 0.03695771554091159, "grad_norm": 0.45085200667381287, "learning_rate": 1.9985547026136586e-05, "loss": 0.5976, "step": 1346 }, { "epoch": 0.03698517298187809, "grad_norm": 0.3493817448616028, "learning_rate": 1.9985523804789127e-05, "loss": 0.6205, "step": 1347 }, { "epoch": 0.03701263042284459, "grad_norm": 0.3858240246772766, "learning_rate": 1.9985500564815478e-05, "loss": 0.5269, "step": 1348 }, { "epoch": 0.03704008786381109, "grad_norm": 0.32471320033073425, "learning_rate": 1.9985477306215686e-05, "loss": 0.5139, "step": 1349 }, { "epoch": 0.03706754530477759, "grad_norm": 0.3415811359882355, "learning_rate": 1.9985454028989792e-05, "loss": 0.5502, "step": 1350 }, { "epoch": 0.03709500274574409, "grad_norm": 0.37088167667388916, "learning_rate": 1.998543073313784e-05, "loss": 0.5629, "step": 1351 }, { "epoch": 0.0371224601867106, "grad_norm": 0.35518884658813477, "learning_rate": 1.9985407418659867e-05, "loss": 0.5587, "step": 1352 }, { "epoch": 0.0371499176276771, "grad_norm": 0.41024041175842285, "learning_rate": 1.9985384085555928e-05, "loss": 0.5694, "step": 1353 }, { "epoch": 0.0371773750686436, "grad_norm": 0.3652222752571106, "learning_rate": 1.998536073382606e-05, "loss": 0.5428, "step": 1354 }, { "epoch": 0.037204832509610104, "grad_norm": 0.3796199858188629, "learning_rate": 1.9985337363470306e-05, "loss": 0.4688, "step": 1355 }, { "epoch": 0.037232289950576605, "grad_norm": 0.35591110587120056, "learning_rate": 1.998531397448871e-05, "loss": 0.5115, "step": 1356 }, { "epoch": 0.037259747391543106, "grad_norm": 0.3940508961677551, "learning_rate": 1.9985290566881316e-05, "loss": 0.5252, "step": 1357 }, { "epoch": 0.03728720483250961, "grad_norm": 0.4166024923324585, "learning_rate": 1.9985267140648174e-05, "loss": 0.5449, "step": 1358 }, { "epoch": 0.037314662273476115, "grad_norm": 0.3663184642791748, "learning_rate": 1.9985243695789316e-05, "loss": 0.5512, "step": 1359 }, { "epoch": 0.037342119714442616, "grad_norm": 0.3589610457420349, "learning_rate": 1.9985220232304794e-05, "loss": 0.569, "step": 1360 }, { "epoch": 0.03736957715540912, "grad_norm": 0.3511161804199219, "learning_rate": 1.9985196750194647e-05, "loss": 0.505, "step": 1361 }, { "epoch": 0.03739703459637562, "grad_norm": 0.36088645458221436, "learning_rate": 1.9985173249458924e-05, "loss": 0.6174, "step": 1362 }, { "epoch": 0.03742449203734212, "grad_norm": 0.4510972797870636, "learning_rate": 1.9985149730097664e-05, "loss": 0.6422, "step": 1363 }, { "epoch": 0.03745194947830862, "grad_norm": 0.36744189262390137, "learning_rate": 1.9985126192110912e-05, "loss": 0.5352, "step": 1364 }, { "epoch": 0.03747940691927512, "grad_norm": 0.3571881055831909, "learning_rate": 1.9985102635498715e-05, "loss": 0.5262, "step": 1365 }, { "epoch": 0.03750686436024162, "grad_norm": 0.37280380725860596, "learning_rate": 1.9985079060261118e-05, "loss": 0.5597, "step": 1366 }, { "epoch": 0.03753432180120813, "grad_norm": 0.3582775294780731, "learning_rate": 1.9985055466398157e-05, "loss": 0.608, "step": 1367 }, { "epoch": 0.03756177924217463, "grad_norm": 0.4250354468822479, "learning_rate": 1.998503185390988e-05, "loss": 0.6902, "step": 1368 }, { "epoch": 0.03758923668314113, "grad_norm": 0.426248162984848, "learning_rate": 1.9985008222796334e-05, "loss": 0.6321, "step": 1369 }, { "epoch": 0.03761669412410763, "grad_norm": 0.40823084115982056, "learning_rate": 1.998498457305756e-05, "loss": 0.5644, "step": 1370 }, { "epoch": 0.037644151565074134, "grad_norm": 0.35153359174728394, "learning_rate": 1.9984960904693604e-05, "loss": 0.5109, "step": 1371 }, { "epoch": 0.037671609006040635, "grad_norm": 0.36713558435440063, "learning_rate": 1.9984937217704507e-05, "loss": 0.5587, "step": 1372 }, { "epoch": 0.037699066447007136, "grad_norm": 0.34770628809928894, "learning_rate": 1.9984913512090317e-05, "loss": 0.4984, "step": 1373 }, { "epoch": 0.037726523887973644, "grad_norm": 0.38485515117645264, "learning_rate": 1.9984889787851073e-05, "loss": 0.5534, "step": 1374 }, { "epoch": 0.037753981328940145, "grad_norm": 0.4647962749004364, "learning_rate": 1.9984866044986827e-05, "loss": 0.5523, "step": 1375 }, { "epoch": 0.037781438769906646, "grad_norm": 0.5933514833450317, "learning_rate": 1.9984842283497618e-05, "loss": 0.5667, "step": 1376 }, { "epoch": 0.03780889621087315, "grad_norm": 0.3392966389656067, "learning_rate": 1.9984818503383493e-05, "loss": 0.5417, "step": 1377 }, { "epoch": 0.03783635365183965, "grad_norm": 0.350118488073349, "learning_rate": 1.998479470464449e-05, "loss": 0.5642, "step": 1378 }, { "epoch": 0.03786381109280615, "grad_norm": 0.3626915514469147, "learning_rate": 1.9984770887280663e-05, "loss": 0.6028, "step": 1379 }, { "epoch": 0.03789126853377265, "grad_norm": 0.4075717628002167, "learning_rate": 1.9984747051292047e-05, "loss": 0.6407, "step": 1380 }, { "epoch": 0.03791872597473915, "grad_norm": 0.37196436524391174, "learning_rate": 1.9984723196678694e-05, "loss": 0.5713, "step": 1381 }, { "epoch": 0.03794618341570566, "grad_norm": 0.33494457602500916, "learning_rate": 1.9984699323440645e-05, "loss": 0.55, "step": 1382 }, { "epoch": 0.03797364085667216, "grad_norm": 0.3736448585987091, "learning_rate": 1.9984675431577944e-05, "loss": 0.4671, "step": 1383 }, { "epoch": 0.03800109829763866, "grad_norm": 0.3956364095211029, "learning_rate": 1.9984651521090635e-05, "loss": 0.6346, "step": 1384 }, { "epoch": 0.03802855573860516, "grad_norm": 0.41002312302589417, "learning_rate": 1.998462759197877e-05, "loss": 0.578, "step": 1385 }, { "epoch": 0.03805601317957166, "grad_norm": 0.33643418550491333, "learning_rate": 1.9984603644242384e-05, "loss": 0.5922, "step": 1386 }, { "epoch": 0.038083470620538164, "grad_norm": 0.38798728585243225, "learning_rate": 1.9984579677881526e-05, "loss": 0.6263, "step": 1387 }, { "epoch": 0.038110928061504665, "grad_norm": 0.36448708176612854, "learning_rate": 1.998455569289624e-05, "loss": 0.5796, "step": 1388 }, { "epoch": 0.03813838550247117, "grad_norm": 0.3806496262550354, "learning_rate": 1.998453168928657e-05, "loss": 0.5845, "step": 1389 }, { "epoch": 0.038165842943437674, "grad_norm": 0.45116952061653137, "learning_rate": 1.9984507667052565e-05, "loss": 0.5427, "step": 1390 }, { "epoch": 0.038193300384404175, "grad_norm": 0.37620508670806885, "learning_rate": 1.998448362619426e-05, "loss": 0.5341, "step": 1391 }, { "epoch": 0.038220757825370676, "grad_norm": 0.4355982840061188, "learning_rate": 1.9984459566711713e-05, "loss": 0.6131, "step": 1392 }, { "epoch": 0.03824821526633718, "grad_norm": 0.40971219539642334, "learning_rate": 1.998443548860496e-05, "loss": 0.6067, "step": 1393 }, { "epoch": 0.03827567270730368, "grad_norm": 0.9071754217147827, "learning_rate": 1.9984411391874048e-05, "loss": 0.5623, "step": 1394 }, { "epoch": 0.03830313014827018, "grad_norm": 0.36216816306114197, "learning_rate": 1.998438727651902e-05, "loss": 0.5762, "step": 1395 }, { "epoch": 0.038330587589236687, "grad_norm": 0.38394609093666077, "learning_rate": 1.9984363142539928e-05, "loss": 0.5427, "step": 1396 }, { "epoch": 0.03835804503020319, "grad_norm": 0.37858355045318604, "learning_rate": 1.998433898993681e-05, "loss": 0.5558, "step": 1397 }, { "epoch": 0.03838550247116969, "grad_norm": 0.3510121703147888, "learning_rate": 1.998431481870971e-05, "loss": 0.4667, "step": 1398 }, { "epoch": 0.03841295991213619, "grad_norm": 0.3656271696090698, "learning_rate": 1.998429062885868e-05, "loss": 0.5903, "step": 1399 }, { "epoch": 0.03844041735310269, "grad_norm": 0.3702053129673004, "learning_rate": 1.9984266420383758e-05, "loss": 0.5269, "step": 1400 }, { "epoch": 0.03846787479406919, "grad_norm": 0.3782820999622345, "learning_rate": 1.9984242193284996e-05, "loss": 0.6312, "step": 1401 }, { "epoch": 0.03849533223503569, "grad_norm": 0.342664510011673, "learning_rate": 1.9984217947562433e-05, "loss": 0.6699, "step": 1402 }, { "epoch": 0.03852278967600219, "grad_norm": 0.3822386860847473, "learning_rate": 1.9984193683216116e-05, "loss": 0.5265, "step": 1403 }, { "epoch": 0.0385502471169687, "grad_norm": 0.33213821053504944, "learning_rate": 1.9984169400246096e-05, "loss": 0.5421, "step": 1404 }, { "epoch": 0.0385777045579352, "grad_norm": 0.39297229051589966, "learning_rate": 1.9984145098652413e-05, "loss": 0.5874, "step": 1405 }, { "epoch": 0.0386051619989017, "grad_norm": 0.38491353392601013, "learning_rate": 1.998412077843511e-05, "loss": 0.575, "step": 1406 }, { "epoch": 0.038632619439868204, "grad_norm": 0.3379661738872528, "learning_rate": 1.9984096439594233e-05, "loss": 0.5119, "step": 1407 }, { "epoch": 0.038660076880834705, "grad_norm": 0.37386050820350647, "learning_rate": 1.9984072082129833e-05, "loss": 0.564, "step": 1408 }, { "epoch": 0.038687534321801206, "grad_norm": 0.3413480520248413, "learning_rate": 1.998404770604195e-05, "loss": 0.553, "step": 1409 }, { "epoch": 0.03871499176276771, "grad_norm": 0.3351229429244995, "learning_rate": 1.9984023311330636e-05, "loss": 0.6059, "step": 1410 }, { "epoch": 0.038742449203734215, "grad_norm": 0.4778900444507599, "learning_rate": 1.998399889799593e-05, "loss": 0.5741, "step": 1411 }, { "epoch": 0.038769906644700716, "grad_norm": 0.3922230303287506, "learning_rate": 1.9983974466037876e-05, "loss": 0.5676, "step": 1412 }, { "epoch": 0.03879736408566722, "grad_norm": 0.39225998520851135, "learning_rate": 1.9983950015456525e-05, "loss": 0.5706, "step": 1413 }, { "epoch": 0.03882482152663372, "grad_norm": 0.35159313678741455, "learning_rate": 1.9983925546251922e-05, "loss": 0.4788, "step": 1414 }, { "epoch": 0.03885227896760022, "grad_norm": 0.7194771766662598, "learning_rate": 1.998390105842411e-05, "loss": 0.5789, "step": 1415 }, { "epoch": 0.03887973640856672, "grad_norm": 0.39846062660217285, "learning_rate": 1.9983876551973138e-05, "loss": 0.5799, "step": 1416 }, { "epoch": 0.03890719384953322, "grad_norm": 0.40458840131759644, "learning_rate": 1.9983852026899048e-05, "loss": 0.619, "step": 1417 }, { "epoch": 0.03893465129049972, "grad_norm": 0.36823511123657227, "learning_rate": 1.998382748320189e-05, "loss": 0.5731, "step": 1418 }, { "epoch": 0.03896210873146623, "grad_norm": 0.36196476221084595, "learning_rate": 1.9983802920881703e-05, "loss": 0.5065, "step": 1419 }, { "epoch": 0.03898956617243273, "grad_norm": 0.36814600229263306, "learning_rate": 1.998377833993854e-05, "loss": 0.6741, "step": 1420 }, { "epoch": 0.03901702361339923, "grad_norm": 0.42277851700782776, "learning_rate": 1.9983753740372442e-05, "loss": 0.564, "step": 1421 }, { "epoch": 0.03904448105436573, "grad_norm": 0.3448120951652527, "learning_rate": 1.9983729122183458e-05, "loss": 0.5764, "step": 1422 }, { "epoch": 0.039071938495332234, "grad_norm": 0.33831626176834106, "learning_rate": 1.9983704485371635e-05, "loss": 0.5491, "step": 1423 }, { "epoch": 0.039099395936298735, "grad_norm": 0.39261531829833984, "learning_rate": 1.9983679829937013e-05, "loss": 0.6168, "step": 1424 }, { "epoch": 0.039126853377265236, "grad_norm": 0.37276944518089294, "learning_rate": 1.9983655155879643e-05, "loss": 0.5465, "step": 1425 }, { "epoch": 0.039154310818231744, "grad_norm": 0.39492225646972656, "learning_rate": 1.9983630463199572e-05, "loss": 0.6269, "step": 1426 }, { "epoch": 0.039181768259198245, "grad_norm": 0.35296735167503357, "learning_rate": 1.998360575189684e-05, "loss": 0.4761, "step": 1427 }, { "epoch": 0.039209225700164746, "grad_norm": 0.402790367603302, "learning_rate": 1.99835810219715e-05, "loss": 0.633, "step": 1428 }, { "epoch": 0.03923668314113125, "grad_norm": 0.3851383924484253, "learning_rate": 1.9983556273423593e-05, "loss": 0.5423, "step": 1429 }, { "epoch": 0.03926414058209775, "grad_norm": 0.36005282402038574, "learning_rate": 1.9983531506253165e-05, "loss": 0.5059, "step": 1430 }, { "epoch": 0.03929159802306425, "grad_norm": 0.35508689284324646, "learning_rate": 1.9983506720460268e-05, "loss": 0.5506, "step": 1431 }, { "epoch": 0.03931905546403075, "grad_norm": 0.3937515914440155, "learning_rate": 1.9983481916044947e-05, "loss": 0.6406, "step": 1432 }, { "epoch": 0.03934651290499725, "grad_norm": 0.35777997970581055, "learning_rate": 1.998345709300724e-05, "loss": 0.6165, "step": 1433 }, { "epoch": 0.03937397034596376, "grad_norm": 0.42319774627685547, "learning_rate": 1.9983432251347205e-05, "loss": 0.5883, "step": 1434 }, { "epoch": 0.03940142778693026, "grad_norm": 0.37995222210884094, "learning_rate": 1.9983407391064878e-05, "loss": 0.5542, "step": 1435 }, { "epoch": 0.03942888522789676, "grad_norm": 0.392892062664032, "learning_rate": 1.998338251216031e-05, "loss": 0.6081, "step": 1436 }, { "epoch": 0.03945634266886326, "grad_norm": 0.4020610451698303, "learning_rate": 1.9983357614633546e-05, "loss": 0.5607, "step": 1437 }, { "epoch": 0.03948380010982976, "grad_norm": 0.4491632580757141, "learning_rate": 1.998333269848464e-05, "loss": 0.5524, "step": 1438 }, { "epoch": 0.039511257550796264, "grad_norm": 0.31460535526275635, "learning_rate": 1.9983307763713628e-05, "loss": 0.5417, "step": 1439 }, { "epoch": 0.039538714991762765, "grad_norm": 0.3834148049354553, "learning_rate": 1.998328281032056e-05, "loss": 0.5629, "step": 1440 }, { "epoch": 0.03956617243272927, "grad_norm": 0.4132012128829956, "learning_rate": 1.9983257838305487e-05, "loss": 0.6206, "step": 1441 }, { "epoch": 0.039593629873695774, "grad_norm": 0.389197438955307, "learning_rate": 1.9983232847668445e-05, "loss": 0.5619, "step": 1442 }, { "epoch": 0.039621087314662275, "grad_norm": 0.3745328187942505, "learning_rate": 1.9983207838409492e-05, "loss": 0.5434, "step": 1443 }, { "epoch": 0.039648544755628776, "grad_norm": 0.4033775329589844, "learning_rate": 1.9983182810528673e-05, "loss": 0.5154, "step": 1444 }, { "epoch": 0.03967600219659528, "grad_norm": 0.452808678150177, "learning_rate": 1.9983157764026027e-05, "loss": 0.5891, "step": 1445 }, { "epoch": 0.03970345963756178, "grad_norm": 0.399745911359787, "learning_rate": 1.9983132698901606e-05, "loss": 0.6336, "step": 1446 }, { "epoch": 0.03973091707852828, "grad_norm": 0.40538227558135986, "learning_rate": 1.998310761515546e-05, "loss": 0.6422, "step": 1447 }, { "epoch": 0.03975837451949478, "grad_norm": 0.36251363158226013, "learning_rate": 1.9983082512787627e-05, "loss": 0.5449, "step": 1448 }, { "epoch": 0.03978583196046129, "grad_norm": 0.32668158411979675, "learning_rate": 1.9983057391798164e-05, "loss": 0.4774, "step": 1449 }, { "epoch": 0.03981328940142779, "grad_norm": 0.3558714687824249, "learning_rate": 1.998303225218711e-05, "loss": 0.5731, "step": 1450 }, { "epoch": 0.03984074684239429, "grad_norm": 0.35155555605888367, "learning_rate": 1.9983007093954515e-05, "loss": 0.582, "step": 1451 }, { "epoch": 0.03986820428336079, "grad_norm": 0.46668800711631775, "learning_rate": 1.9982981917100424e-05, "loss": 0.5368, "step": 1452 }, { "epoch": 0.03989566172432729, "grad_norm": 0.365637868642807, "learning_rate": 1.9982956721624886e-05, "loss": 0.4701, "step": 1453 }, { "epoch": 0.03992311916529379, "grad_norm": 0.3650030791759491, "learning_rate": 1.998293150752795e-05, "loss": 0.5268, "step": 1454 }, { "epoch": 0.039950576606260293, "grad_norm": 0.33889004588127136, "learning_rate": 1.998290627480966e-05, "loss": 0.5089, "step": 1455 }, { "epoch": 0.0399780340472268, "grad_norm": 0.501200258731842, "learning_rate": 1.998288102347006e-05, "loss": 0.5543, "step": 1456 }, { "epoch": 0.0400054914881933, "grad_norm": 0.3795984089374542, "learning_rate": 1.9982855753509205e-05, "loss": 0.5571, "step": 1457 }, { "epoch": 0.0400329489291598, "grad_norm": 0.3601495921611786, "learning_rate": 1.9982830464927135e-05, "loss": 0.5648, "step": 1458 }, { "epoch": 0.040060406370126304, "grad_norm": 0.37256380915641785, "learning_rate": 1.9982805157723903e-05, "loss": 0.5725, "step": 1459 }, { "epoch": 0.040087863811092805, "grad_norm": 0.4040667414665222, "learning_rate": 1.998277983189955e-05, "loss": 0.5779, "step": 1460 }, { "epoch": 0.040115321252059306, "grad_norm": 0.4205073416233063, "learning_rate": 1.9982754487454124e-05, "loss": 0.5996, "step": 1461 }, { "epoch": 0.04014277869302581, "grad_norm": 0.3432098925113678, "learning_rate": 1.998272912438768e-05, "loss": 0.4901, "step": 1462 }, { "epoch": 0.040170236133992315, "grad_norm": 0.44499748945236206, "learning_rate": 1.998270374270026e-05, "loss": 0.6179, "step": 1463 }, { "epoch": 0.040197693574958816, "grad_norm": 0.4408318102359772, "learning_rate": 1.998267834239191e-05, "loss": 0.6108, "step": 1464 }, { "epoch": 0.04022515101592532, "grad_norm": 0.3823871314525604, "learning_rate": 1.9982652923462678e-05, "loss": 0.6101, "step": 1465 }, { "epoch": 0.04025260845689182, "grad_norm": 0.47863680124282837, "learning_rate": 1.9982627485912615e-05, "loss": 0.6014, "step": 1466 }, { "epoch": 0.04028006589785832, "grad_norm": 0.3763941824436188, "learning_rate": 1.9982602029741762e-05, "loss": 0.5441, "step": 1467 }, { "epoch": 0.04030752333882482, "grad_norm": 0.41168203949928284, "learning_rate": 1.998257655495017e-05, "loss": 0.6304, "step": 1468 }, { "epoch": 0.04033498077979132, "grad_norm": 0.5156595706939697, "learning_rate": 1.9982551061537892e-05, "loss": 0.6214, "step": 1469 }, { "epoch": 0.04036243822075782, "grad_norm": 0.36858677864074707, "learning_rate": 1.9982525549504966e-05, "loss": 0.5463, "step": 1470 }, { "epoch": 0.04038989566172433, "grad_norm": 0.35927948355674744, "learning_rate": 1.9982500018851444e-05, "loss": 0.6117, "step": 1471 }, { "epoch": 0.04041735310269083, "grad_norm": 0.3643840253353119, "learning_rate": 1.9982474469577373e-05, "loss": 0.4597, "step": 1472 }, { "epoch": 0.04044481054365733, "grad_norm": 0.40708377957344055, "learning_rate": 1.9982448901682803e-05, "loss": 0.5554, "step": 1473 }, { "epoch": 0.04047226798462383, "grad_norm": 0.3784312307834625, "learning_rate": 1.998242331516778e-05, "loss": 0.6176, "step": 1474 }, { "epoch": 0.040499725425590334, "grad_norm": 0.5704900026321411, "learning_rate": 1.998239771003235e-05, "loss": 0.5954, "step": 1475 }, { "epoch": 0.040527182866556835, "grad_norm": 0.39144372940063477, "learning_rate": 1.9982372086276564e-05, "loss": 0.5635, "step": 1476 }, { "epoch": 0.040554640307523336, "grad_norm": 0.3544127941131592, "learning_rate": 1.9982346443900463e-05, "loss": 0.5578, "step": 1477 }, { "epoch": 0.040582097748489844, "grad_norm": 0.36313650012016296, "learning_rate": 1.9982320782904105e-05, "loss": 0.4876, "step": 1478 }, { "epoch": 0.040609555189456345, "grad_norm": 0.39067843556404114, "learning_rate": 1.998229510328753e-05, "loss": 0.5741, "step": 1479 }, { "epoch": 0.040637012630422846, "grad_norm": 0.35563233494758606, "learning_rate": 1.998226940505079e-05, "loss": 0.5783, "step": 1480 }, { "epoch": 0.04066447007138935, "grad_norm": 0.37560442090034485, "learning_rate": 1.9982243688193935e-05, "loss": 0.5279, "step": 1481 }, { "epoch": 0.04069192751235585, "grad_norm": 0.35053735971450806, "learning_rate": 1.9982217952717004e-05, "loss": 0.5484, "step": 1482 }, { "epoch": 0.04071938495332235, "grad_norm": 0.37565770745277405, "learning_rate": 1.9982192198620052e-05, "loss": 0.5872, "step": 1483 }, { "epoch": 0.04074684239428885, "grad_norm": 0.4358329474925995, "learning_rate": 1.9982166425903128e-05, "loss": 0.4688, "step": 1484 }, { "epoch": 0.04077429983525535, "grad_norm": 0.39445623755455017, "learning_rate": 1.9982140634566275e-05, "loss": 0.5895, "step": 1485 }, { "epoch": 0.04080175727622186, "grad_norm": 0.39675506949424744, "learning_rate": 1.9982114824609544e-05, "loss": 0.5763, "step": 1486 }, { "epoch": 0.04082921471718836, "grad_norm": 0.3610275685787201, "learning_rate": 1.9982088996032982e-05, "loss": 0.5862, "step": 1487 }, { "epoch": 0.04085667215815486, "grad_norm": 0.44647669792175293, "learning_rate": 1.998206314883664e-05, "loss": 0.5574, "step": 1488 }, { "epoch": 0.04088412959912136, "grad_norm": 0.36456337571144104, "learning_rate": 1.9982037283020566e-05, "loss": 0.584, "step": 1489 }, { "epoch": 0.04091158704008786, "grad_norm": 0.3951393961906433, "learning_rate": 1.9982011398584804e-05, "loss": 0.5615, "step": 1490 }, { "epoch": 0.040939044481054364, "grad_norm": 0.3587546944618225, "learning_rate": 1.9981985495529407e-05, "loss": 0.5145, "step": 1491 }, { "epoch": 0.040966501922020865, "grad_norm": 0.36884433031082153, "learning_rate": 1.9981959573854417e-05, "loss": 0.5559, "step": 1492 }, { "epoch": 0.04099395936298737, "grad_norm": 0.3929798901081085, "learning_rate": 1.9981933633559892e-05, "loss": 0.5849, "step": 1493 }, { "epoch": 0.041021416803953874, "grad_norm": 0.40386855602264404, "learning_rate": 1.998190767464587e-05, "loss": 0.5762, "step": 1494 }, { "epoch": 0.041048874244920375, "grad_norm": 0.377131849527359, "learning_rate": 1.9981881697112406e-05, "loss": 0.6043, "step": 1495 }, { "epoch": 0.041076331685886876, "grad_norm": 0.40541398525238037, "learning_rate": 1.998185570095955e-05, "loss": 0.609, "step": 1496 }, { "epoch": 0.04110378912685338, "grad_norm": 0.3475559651851654, "learning_rate": 1.9981829686187342e-05, "loss": 0.6455, "step": 1497 }, { "epoch": 0.04113124656781988, "grad_norm": 0.38848477602005005, "learning_rate": 1.9981803652795838e-05, "loss": 0.6266, "step": 1498 }, { "epoch": 0.04115870400878638, "grad_norm": 0.38136783242225647, "learning_rate": 1.9981777600785083e-05, "loss": 0.6295, "step": 1499 }, { "epoch": 0.04118616144975288, "grad_norm": 0.448990136384964, "learning_rate": 1.998175153015513e-05, "loss": 0.7197, "step": 1500 }, { "epoch": 0.04121361889071939, "grad_norm": 0.3774144947528839, "learning_rate": 1.9981725440906023e-05, "loss": 0.565, "step": 1501 }, { "epoch": 0.04124107633168589, "grad_norm": 0.3946588635444641, "learning_rate": 1.9981699333037813e-05, "loss": 0.5605, "step": 1502 }, { "epoch": 0.04126853377265239, "grad_norm": 0.35287272930145264, "learning_rate": 1.9981673206550548e-05, "loss": 0.5308, "step": 1503 }, { "epoch": 0.04129599121361889, "grad_norm": 0.36341220140457153, "learning_rate": 1.9981647061444275e-05, "loss": 0.5189, "step": 1504 }, { "epoch": 0.04132344865458539, "grad_norm": 0.4458792507648468, "learning_rate": 1.9981620897719046e-05, "loss": 0.5548, "step": 1505 }, { "epoch": 0.04135090609555189, "grad_norm": 0.35031858086586, "learning_rate": 1.998159471537491e-05, "loss": 0.5912, "step": 1506 }, { "epoch": 0.041378363536518394, "grad_norm": 0.37241825461387634, "learning_rate": 1.998156851441191e-05, "loss": 0.5701, "step": 1507 }, { "epoch": 0.0414058209774849, "grad_norm": 0.34442421793937683, "learning_rate": 1.9981542294830102e-05, "loss": 0.5853, "step": 1508 }, { "epoch": 0.0414332784184514, "grad_norm": 0.3662140667438507, "learning_rate": 1.9981516056629528e-05, "loss": 0.5103, "step": 1509 }, { "epoch": 0.0414607358594179, "grad_norm": 0.3637961745262146, "learning_rate": 1.9981489799810245e-05, "loss": 0.5085, "step": 1510 }, { "epoch": 0.041488193300384404, "grad_norm": 0.4506815969944, "learning_rate": 1.9981463524372294e-05, "loss": 0.5686, "step": 1511 }, { "epoch": 0.041515650741350905, "grad_norm": 0.3992023169994354, "learning_rate": 1.9981437230315732e-05, "loss": 0.5688, "step": 1512 }, { "epoch": 0.041543108182317406, "grad_norm": 0.4627644121646881, "learning_rate": 1.9981410917640603e-05, "loss": 0.5711, "step": 1513 }, { "epoch": 0.04157056562328391, "grad_norm": 0.4091407358646393, "learning_rate": 1.9981384586346958e-05, "loss": 0.6807, "step": 1514 }, { "epoch": 0.04159802306425041, "grad_norm": 0.36994150280952454, "learning_rate": 1.998135823643484e-05, "loss": 0.6075, "step": 1515 }, { "epoch": 0.041625480505216916, "grad_norm": 0.4173072576522827, "learning_rate": 1.998133186790431e-05, "loss": 0.6394, "step": 1516 }, { "epoch": 0.04165293794618342, "grad_norm": 0.41299736499786377, "learning_rate": 1.9981305480755404e-05, "loss": 0.6187, "step": 1517 }, { "epoch": 0.04168039538714992, "grad_norm": 0.35795843601226807, "learning_rate": 1.998127907498818e-05, "loss": 0.6062, "step": 1518 }, { "epoch": 0.04170785282811642, "grad_norm": 0.3957805931568146, "learning_rate": 1.9981252650602686e-05, "loss": 0.5766, "step": 1519 }, { "epoch": 0.04173531026908292, "grad_norm": 0.38808673620224, "learning_rate": 1.998122620759897e-05, "loss": 0.5527, "step": 1520 }, { "epoch": 0.04176276771004942, "grad_norm": 0.3621934652328491, "learning_rate": 1.998119974597708e-05, "loss": 0.4586, "step": 1521 }, { "epoch": 0.04179022515101592, "grad_norm": 0.402288556098938, "learning_rate": 1.9981173265737065e-05, "loss": 0.6552, "step": 1522 }, { "epoch": 0.04181768259198243, "grad_norm": 0.41721636056900024, "learning_rate": 1.9981146766878982e-05, "loss": 0.6612, "step": 1523 }, { "epoch": 0.04184514003294893, "grad_norm": 0.6894505620002747, "learning_rate": 1.9981120249402874e-05, "loss": 0.5079, "step": 1524 }, { "epoch": 0.04187259747391543, "grad_norm": 0.3658975064754486, "learning_rate": 1.9981093713308785e-05, "loss": 0.5527, "step": 1525 }, { "epoch": 0.04190005491488193, "grad_norm": 0.3439442813396454, "learning_rate": 1.9981067158596772e-05, "loss": 0.4958, "step": 1526 }, { "epoch": 0.041927512355848434, "grad_norm": 0.4003651440143585, "learning_rate": 1.9981040585266888e-05, "loss": 0.5667, "step": 1527 }, { "epoch": 0.041954969796814935, "grad_norm": 0.35332822799682617, "learning_rate": 1.9981013993319176e-05, "loss": 0.5368, "step": 1528 }, { "epoch": 0.041982427237781436, "grad_norm": 0.4027746617794037, "learning_rate": 1.9980987382753686e-05, "loss": 0.6459, "step": 1529 }, { "epoch": 0.042009884678747944, "grad_norm": 0.3974721431732178, "learning_rate": 1.998096075357047e-05, "loss": 0.6098, "step": 1530 }, { "epoch": 0.042037342119714445, "grad_norm": 0.3503240942955017, "learning_rate": 1.9980934105769577e-05, "loss": 0.4317, "step": 1531 }, { "epoch": 0.042064799560680946, "grad_norm": 0.35331302881240845, "learning_rate": 1.9980907439351053e-05, "loss": 0.6204, "step": 1532 }, { "epoch": 0.04209225700164745, "grad_norm": 0.3674430549144745, "learning_rate": 1.9980880754314955e-05, "loss": 0.6133, "step": 1533 }, { "epoch": 0.04211971444261395, "grad_norm": 0.38715028762817383, "learning_rate": 1.9980854050661326e-05, "loss": 0.5739, "step": 1534 }, { "epoch": 0.04214717188358045, "grad_norm": 0.4007578492164612, "learning_rate": 1.9980827328390222e-05, "loss": 0.577, "step": 1535 }, { "epoch": 0.04217462932454695, "grad_norm": 0.37029266357421875, "learning_rate": 1.9980800587501686e-05, "loss": 0.7246, "step": 1536 }, { "epoch": 0.04220208676551345, "grad_norm": 0.40150314569473267, "learning_rate": 1.9980773827995773e-05, "loss": 0.6511, "step": 1537 }, { "epoch": 0.04222954420647996, "grad_norm": 0.370368629693985, "learning_rate": 1.998074704987253e-05, "loss": 0.581, "step": 1538 }, { "epoch": 0.04225700164744646, "grad_norm": 0.3493688106536865, "learning_rate": 1.998072025313201e-05, "loss": 0.5749, "step": 1539 }, { "epoch": 0.04228445908841296, "grad_norm": 0.35625311732292175, "learning_rate": 1.9980693437774265e-05, "loss": 0.4805, "step": 1540 }, { "epoch": 0.04231191652937946, "grad_norm": 0.39882004261016846, "learning_rate": 1.9980666603799336e-05, "loss": 0.6301, "step": 1541 }, { "epoch": 0.04233937397034596, "grad_norm": 0.3345077931880951, "learning_rate": 1.998063975120728e-05, "loss": 0.4452, "step": 1542 }, { "epoch": 0.042366831411312464, "grad_norm": 0.3945041000843048, "learning_rate": 1.9980612879998144e-05, "loss": 0.5425, "step": 1543 }, { "epoch": 0.042394288852278965, "grad_norm": 0.3532535433769226, "learning_rate": 1.998058599017198e-05, "loss": 0.569, "step": 1544 }, { "epoch": 0.04242174629324547, "grad_norm": 0.37374332547187805, "learning_rate": 1.9980559081728838e-05, "loss": 0.5216, "step": 1545 }, { "epoch": 0.042449203734211974, "grad_norm": 0.35263243317604065, "learning_rate": 1.998053215466877e-05, "loss": 0.5482, "step": 1546 }, { "epoch": 0.042476661175178475, "grad_norm": 0.35178399085998535, "learning_rate": 1.998050520899182e-05, "loss": 0.5585, "step": 1547 }, { "epoch": 0.042504118616144976, "grad_norm": 0.47210147976875305, "learning_rate": 1.9980478244698046e-05, "loss": 0.5626, "step": 1548 }, { "epoch": 0.04253157605711148, "grad_norm": 0.3967672288417816, "learning_rate": 1.998045126178749e-05, "loss": 0.5033, "step": 1549 }, { "epoch": 0.04255903349807798, "grad_norm": 0.36692968010902405, "learning_rate": 1.9980424260260212e-05, "loss": 0.6623, "step": 1550 }, { "epoch": 0.04258649093904448, "grad_norm": 0.3726944923400879, "learning_rate": 1.9980397240116257e-05, "loss": 0.5681, "step": 1551 }, { "epoch": 0.04261394838001098, "grad_norm": 0.45657438039779663, "learning_rate": 1.9980370201355673e-05, "loss": 0.5627, "step": 1552 }, { "epoch": 0.04264140582097749, "grad_norm": 0.4005999267101288, "learning_rate": 1.9980343143978515e-05, "loss": 0.5323, "step": 1553 }, { "epoch": 0.04266886326194399, "grad_norm": 0.3591587543487549, "learning_rate": 1.9980316067984832e-05, "loss": 0.5962, "step": 1554 }, { "epoch": 0.04269632070291049, "grad_norm": 0.36159202456474304, "learning_rate": 1.9980288973374674e-05, "loss": 0.5563, "step": 1555 }, { "epoch": 0.04272377814387699, "grad_norm": 0.48796606063842773, "learning_rate": 1.998026186014809e-05, "loss": 0.5487, "step": 1556 }, { "epoch": 0.04275123558484349, "grad_norm": 0.3308041989803314, "learning_rate": 1.9980234728305134e-05, "loss": 0.5664, "step": 1557 }, { "epoch": 0.04277869302580999, "grad_norm": 0.3321518301963806, "learning_rate": 1.9980207577845854e-05, "loss": 0.4775, "step": 1558 }, { "epoch": 0.042806150466776494, "grad_norm": 0.4029240608215332, "learning_rate": 1.99801804087703e-05, "loss": 0.5318, "step": 1559 }, { "epoch": 0.042833607907743, "grad_norm": 0.3778943419456482, "learning_rate": 1.9980153221078527e-05, "loss": 0.5926, "step": 1560 }, { "epoch": 0.0428610653487095, "grad_norm": 0.4445226490497589, "learning_rate": 1.998012601477058e-05, "loss": 0.5553, "step": 1561 }, { "epoch": 0.042888522789676004, "grad_norm": 0.3678400218486786, "learning_rate": 1.9980098789846517e-05, "loss": 0.5408, "step": 1562 }, { "epoch": 0.042915980230642505, "grad_norm": 1.170819640159607, "learning_rate": 1.998007154630638e-05, "loss": 0.6125, "step": 1563 }, { "epoch": 0.042943437671609005, "grad_norm": 0.3932032883167267, "learning_rate": 1.998004428415023e-05, "loss": 0.546, "step": 1564 }, { "epoch": 0.042970895112575506, "grad_norm": 0.3799107074737549, "learning_rate": 1.9980017003378108e-05, "loss": 0.5912, "step": 1565 }, { "epoch": 0.04299835255354201, "grad_norm": 0.3581136465072632, "learning_rate": 1.997998970399007e-05, "loss": 0.6049, "step": 1566 }, { "epoch": 0.04302580999450851, "grad_norm": 0.3978174328804016, "learning_rate": 1.9979962385986166e-05, "loss": 0.5927, "step": 1567 }, { "epoch": 0.043053267435475016, "grad_norm": 0.355208158493042, "learning_rate": 1.9979935049366446e-05, "loss": 0.5792, "step": 1568 }, { "epoch": 0.04308072487644152, "grad_norm": 0.6855316758155823, "learning_rate": 1.997990769413096e-05, "loss": 0.5728, "step": 1569 }, { "epoch": 0.04310818231740802, "grad_norm": 0.32909727096557617, "learning_rate": 1.9979880320279766e-05, "loss": 0.4973, "step": 1570 }, { "epoch": 0.04313563975837452, "grad_norm": 0.3891008794307709, "learning_rate": 1.9979852927812908e-05, "loss": 0.5509, "step": 1571 }, { "epoch": 0.04316309719934102, "grad_norm": 0.36977487802505493, "learning_rate": 1.9979825516730437e-05, "loss": 0.6191, "step": 1572 }, { "epoch": 0.04319055464030752, "grad_norm": 0.4252360165119171, "learning_rate": 1.997979808703241e-05, "loss": 0.5767, "step": 1573 }, { "epoch": 0.04321801208127402, "grad_norm": 0.3528222143650055, "learning_rate": 1.997977063871887e-05, "loss": 0.5655, "step": 1574 }, { "epoch": 0.04324546952224053, "grad_norm": 0.35072705149650574, "learning_rate": 1.9979743171789875e-05, "loss": 0.4999, "step": 1575 }, { "epoch": 0.04327292696320703, "grad_norm": 0.3999965190887451, "learning_rate": 1.9979715686245473e-05, "loss": 0.6472, "step": 1576 }, { "epoch": 0.04330038440417353, "grad_norm": 0.4214687943458557, "learning_rate": 1.9979688182085716e-05, "loss": 0.5411, "step": 1577 }, { "epoch": 0.04332784184514003, "grad_norm": 0.35870251059532166, "learning_rate": 1.9979660659310658e-05, "loss": 0.5393, "step": 1578 }, { "epoch": 0.043355299286106534, "grad_norm": 0.3755820393562317, "learning_rate": 1.9979633117920343e-05, "loss": 0.5455, "step": 1579 }, { "epoch": 0.043382756727073035, "grad_norm": 0.40620195865631104, "learning_rate": 1.997960555791483e-05, "loss": 0.6688, "step": 1580 }, { "epoch": 0.043410214168039536, "grad_norm": 0.6106656193733215, "learning_rate": 1.997957797929417e-05, "loss": 0.5521, "step": 1581 }, { "epoch": 0.04343767160900604, "grad_norm": 0.3323543071746826, "learning_rate": 1.997955038205841e-05, "loss": 0.5447, "step": 1582 }, { "epoch": 0.043465129049972545, "grad_norm": 0.35335952043533325, "learning_rate": 1.99795227662076e-05, "loss": 0.4805, "step": 1583 }, { "epoch": 0.043492586490939046, "grad_norm": 0.38506680727005005, "learning_rate": 1.99794951317418e-05, "loss": 0.6261, "step": 1584 }, { "epoch": 0.04352004393190555, "grad_norm": 0.36565372347831726, "learning_rate": 1.9979467478661053e-05, "loss": 0.5383, "step": 1585 }, { "epoch": 0.04354750137287205, "grad_norm": 0.3636868894100189, "learning_rate": 1.9979439806965416e-05, "loss": 0.5559, "step": 1586 }, { "epoch": 0.04357495881383855, "grad_norm": 0.36844295263290405, "learning_rate": 1.9979412116654936e-05, "loss": 0.5711, "step": 1587 }, { "epoch": 0.04360241625480505, "grad_norm": 0.39611735939979553, "learning_rate": 1.997938440772967e-05, "loss": 0.568, "step": 1588 }, { "epoch": 0.04362987369577155, "grad_norm": 0.4090544581413269, "learning_rate": 1.9979356680189666e-05, "loss": 0.6429, "step": 1589 }, { "epoch": 0.04365733113673806, "grad_norm": 0.3816700279712677, "learning_rate": 1.9979328934034978e-05, "loss": 0.5315, "step": 1590 }, { "epoch": 0.04368478857770456, "grad_norm": 0.3633674681186676, "learning_rate": 1.9979301169265656e-05, "loss": 0.5525, "step": 1591 }, { "epoch": 0.04371224601867106, "grad_norm": 0.367925763130188, "learning_rate": 1.9979273385881753e-05, "loss": 0.5735, "step": 1592 }, { "epoch": 0.04373970345963756, "grad_norm": 0.4352996349334717, "learning_rate": 1.997924558388332e-05, "loss": 0.609, "step": 1593 }, { "epoch": 0.04376716090060406, "grad_norm": 0.3500840961933136, "learning_rate": 1.9979217763270408e-05, "loss": 0.546, "step": 1594 }, { "epoch": 0.043794618341570564, "grad_norm": 0.4347485601902008, "learning_rate": 1.997918992404307e-05, "loss": 0.6044, "step": 1595 }, { "epoch": 0.043822075782537065, "grad_norm": 0.3432178497314453, "learning_rate": 1.9979162066201357e-05, "loss": 0.5072, "step": 1596 }, { "epoch": 0.04384953322350357, "grad_norm": 0.41670942306518555, "learning_rate": 1.997913418974532e-05, "loss": 0.6063, "step": 1597 }, { "epoch": 0.043876990664470074, "grad_norm": 0.35011470317840576, "learning_rate": 1.9979106294675017e-05, "loss": 0.492, "step": 1598 }, { "epoch": 0.043904448105436575, "grad_norm": 0.4062155783176422, "learning_rate": 1.9979078380990493e-05, "loss": 0.6162, "step": 1599 }, { "epoch": 0.043931905546403076, "grad_norm": 0.351869136095047, "learning_rate": 1.9979050448691804e-05, "loss": 0.6121, "step": 1600 }, { "epoch": 0.04395936298736958, "grad_norm": 0.3907490074634552, "learning_rate": 1.9979022497779003e-05, "loss": 0.6211, "step": 1601 }, { "epoch": 0.04398682042833608, "grad_norm": 0.4329334795475006, "learning_rate": 1.9978994528252135e-05, "loss": 0.6195, "step": 1602 }, { "epoch": 0.04401427786930258, "grad_norm": 0.4141519069671631, "learning_rate": 1.9978966540111264e-05, "loss": 0.582, "step": 1603 }, { "epoch": 0.04404173531026908, "grad_norm": 0.357405424118042, "learning_rate": 1.997893853335643e-05, "loss": 0.5466, "step": 1604 }, { "epoch": 0.04406919275123559, "grad_norm": 0.35625141859054565, "learning_rate": 1.997891050798769e-05, "loss": 0.4719, "step": 1605 }, { "epoch": 0.04409665019220209, "grad_norm": 0.37754884362220764, "learning_rate": 1.99788824640051e-05, "loss": 0.576, "step": 1606 }, { "epoch": 0.04412410763316859, "grad_norm": 0.3458520472049713, "learning_rate": 1.997885440140871e-05, "loss": 0.5297, "step": 1607 }, { "epoch": 0.04415156507413509, "grad_norm": 0.37040504813194275, "learning_rate": 1.9978826320198573e-05, "loss": 0.5742, "step": 1608 }, { "epoch": 0.04417902251510159, "grad_norm": 0.42641469836235046, "learning_rate": 1.9978798220374734e-05, "loss": 0.6425, "step": 1609 }, { "epoch": 0.04420647995606809, "grad_norm": 0.3348250091075897, "learning_rate": 1.9978770101937255e-05, "loss": 0.5658, "step": 1610 }, { "epoch": 0.044233937397034594, "grad_norm": 0.37275129556655884, "learning_rate": 1.9978741964886185e-05, "loss": 0.6063, "step": 1611 }, { "epoch": 0.0442613948380011, "grad_norm": 0.45644208788871765, "learning_rate": 1.9978713809221577e-05, "loss": 0.647, "step": 1612 }, { "epoch": 0.0442888522789676, "grad_norm": 0.3386860191822052, "learning_rate": 1.997868563494348e-05, "loss": 0.5787, "step": 1613 }, { "epoch": 0.044316309719934104, "grad_norm": 0.9339507222175598, "learning_rate": 1.9978657442051955e-05, "loss": 0.5359, "step": 1614 }, { "epoch": 0.044343767160900605, "grad_norm": 0.4120176434516907, "learning_rate": 1.9978629230547045e-05, "loss": 0.529, "step": 1615 }, { "epoch": 0.044371224601867106, "grad_norm": 0.39538586139678955, "learning_rate": 1.9978601000428807e-05, "loss": 0.5648, "step": 1616 }, { "epoch": 0.04439868204283361, "grad_norm": 0.3849935233592987, "learning_rate": 1.9978572751697294e-05, "loss": 0.5375, "step": 1617 }, { "epoch": 0.04442613948380011, "grad_norm": 0.39220142364501953, "learning_rate": 1.9978544484352557e-05, "loss": 0.6646, "step": 1618 }, { "epoch": 0.04445359692476661, "grad_norm": 0.3432864546775818, "learning_rate": 1.997851619839465e-05, "loss": 0.5156, "step": 1619 }, { "epoch": 0.044481054365733116, "grad_norm": 0.353623628616333, "learning_rate": 1.9978487893823626e-05, "loss": 0.5652, "step": 1620 }, { "epoch": 0.04450851180669962, "grad_norm": 0.37998852133750916, "learning_rate": 1.997845957063954e-05, "loss": 0.5714, "step": 1621 }, { "epoch": 0.04453596924766612, "grad_norm": 0.3396022617816925, "learning_rate": 1.9978431228842437e-05, "loss": 0.5608, "step": 1622 }, { "epoch": 0.04456342668863262, "grad_norm": 0.3922903537750244, "learning_rate": 1.9978402868432378e-05, "loss": 0.5809, "step": 1623 }, { "epoch": 0.04459088412959912, "grad_norm": 0.3459933400154114, "learning_rate": 1.997837448940941e-05, "loss": 0.4906, "step": 1624 }, { "epoch": 0.04461834157056562, "grad_norm": 0.3655529022216797, "learning_rate": 1.9978346091773595e-05, "loss": 0.5151, "step": 1625 }, { "epoch": 0.04464579901153212, "grad_norm": 0.43729856610298157, "learning_rate": 1.9978317675524975e-05, "loss": 0.5493, "step": 1626 }, { "epoch": 0.04467325645249863, "grad_norm": 0.3376643657684326, "learning_rate": 1.997828924066361e-05, "loss": 0.5337, "step": 1627 }, { "epoch": 0.04470071389346513, "grad_norm": 0.3751026391983032, "learning_rate": 1.997826078718955e-05, "loss": 0.5673, "step": 1628 }, { "epoch": 0.04472817133443163, "grad_norm": 0.34166353940963745, "learning_rate": 1.997823231510285e-05, "loss": 0.584, "step": 1629 }, { "epoch": 0.04475562877539813, "grad_norm": 0.40826261043548584, "learning_rate": 1.997820382440356e-05, "loss": 0.6144, "step": 1630 }, { "epoch": 0.044783086216364634, "grad_norm": 0.4053293466567993, "learning_rate": 1.9978175315091733e-05, "loss": 0.6462, "step": 1631 }, { "epoch": 0.044810543657331135, "grad_norm": 0.46763524413108826, "learning_rate": 1.9978146787167427e-05, "loss": 0.5399, "step": 1632 }, { "epoch": 0.044838001098297636, "grad_norm": 0.3321897089481354, "learning_rate": 1.9978118240630693e-05, "loss": 0.5876, "step": 1633 }, { "epoch": 0.04486545853926414, "grad_norm": 0.33027157187461853, "learning_rate": 1.9978089675481583e-05, "loss": 0.513, "step": 1634 }, { "epoch": 0.044892915980230645, "grad_norm": 0.39297324419021606, "learning_rate": 1.9978061091720154e-05, "loss": 0.5493, "step": 1635 }, { "epoch": 0.044920373421197146, "grad_norm": 0.3836534023284912, "learning_rate": 1.9978032489346453e-05, "loss": 0.5183, "step": 1636 }, { "epoch": 0.04494783086216365, "grad_norm": 0.35657554864883423, "learning_rate": 1.9978003868360538e-05, "loss": 0.5489, "step": 1637 }, { "epoch": 0.04497528830313015, "grad_norm": 0.36772316694259644, "learning_rate": 1.9977975228762463e-05, "loss": 0.5597, "step": 1638 }, { "epoch": 0.04500274574409665, "grad_norm": 0.3156841993331909, "learning_rate": 1.9977946570552276e-05, "loss": 0.485, "step": 1639 }, { "epoch": 0.04503020318506315, "grad_norm": 0.351209431886673, "learning_rate": 1.9977917893730037e-05, "loss": 0.4685, "step": 1640 }, { "epoch": 0.04505766062602965, "grad_norm": 0.37185484170913696, "learning_rate": 1.9977889198295794e-05, "loss": 0.5357, "step": 1641 }, { "epoch": 0.04508511806699616, "grad_norm": 0.4787532091140747, "learning_rate": 1.9977860484249605e-05, "loss": 0.5116, "step": 1642 }, { "epoch": 0.04511257550796266, "grad_norm": 0.43139660358428955, "learning_rate": 1.9977831751591523e-05, "loss": 0.5733, "step": 1643 }, { "epoch": 0.04514003294892916, "grad_norm": 0.3786834478378296, "learning_rate": 1.99778030003216e-05, "loss": 0.6324, "step": 1644 }, { "epoch": 0.04516749038989566, "grad_norm": 0.33062466979026794, "learning_rate": 1.9977774230439887e-05, "loss": 0.4149, "step": 1645 }, { "epoch": 0.04519494783086216, "grad_norm": 0.38944971561431885, "learning_rate": 1.997774544194644e-05, "loss": 0.6769, "step": 1646 }, { "epoch": 0.045222405271828664, "grad_norm": 0.35034096240997314, "learning_rate": 1.9977716634841315e-05, "loss": 0.61, "step": 1647 }, { "epoch": 0.045249862712795165, "grad_norm": 0.3655945062637329, "learning_rate": 1.9977687809124565e-05, "loss": 0.606, "step": 1648 }, { "epoch": 0.045277320153761666, "grad_norm": 0.3509422838687897, "learning_rate": 1.997765896479624e-05, "loss": 0.4848, "step": 1649 }, { "epoch": 0.045304777594728174, "grad_norm": 0.3329755961894989, "learning_rate": 1.9977630101856402e-05, "loss": 0.5234, "step": 1650 }, { "epoch": 0.045332235035694675, "grad_norm": 0.3257162868976593, "learning_rate": 1.9977601220305093e-05, "loss": 0.5142, "step": 1651 }, { "epoch": 0.045359692476661176, "grad_norm": 1.3141242265701294, "learning_rate": 1.9977572320142374e-05, "loss": 0.5788, "step": 1652 }, { "epoch": 0.04538714991762768, "grad_norm": 0.3501489758491516, "learning_rate": 1.99775434013683e-05, "loss": 0.5493, "step": 1653 }, { "epoch": 0.04541460735859418, "grad_norm": 0.3833545744419098, "learning_rate": 1.9977514463982922e-05, "loss": 0.6259, "step": 1654 }, { "epoch": 0.04544206479956068, "grad_norm": 0.340162992477417, "learning_rate": 1.9977485507986298e-05, "loss": 0.5203, "step": 1655 }, { "epoch": 0.04546952224052718, "grad_norm": 0.41600340604782104, "learning_rate": 1.9977456533378476e-05, "loss": 0.6536, "step": 1656 }, { "epoch": 0.04549697968149369, "grad_norm": 0.4003397524356842, "learning_rate": 1.9977427540159514e-05, "loss": 0.5865, "step": 1657 }, { "epoch": 0.04552443712246019, "grad_norm": 0.39493659138679504, "learning_rate": 1.997739852832947e-05, "loss": 0.538, "step": 1658 }, { "epoch": 0.04555189456342669, "grad_norm": 0.3891777992248535, "learning_rate": 1.9977369497888387e-05, "loss": 0.607, "step": 1659 }, { "epoch": 0.04557935200439319, "grad_norm": 0.42392173409461975, "learning_rate": 1.9977340448836327e-05, "loss": 0.548, "step": 1660 }, { "epoch": 0.04560680944535969, "grad_norm": 0.3889339864253998, "learning_rate": 1.997731138117334e-05, "loss": 0.4738, "step": 1661 }, { "epoch": 0.04563426688632619, "grad_norm": 0.5017538070678711, "learning_rate": 1.9977282294899488e-05, "loss": 0.5853, "step": 1662 }, { "epoch": 0.045661724327292694, "grad_norm": 0.3721320629119873, "learning_rate": 1.9977253190014817e-05, "loss": 0.5246, "step": 1663 }, { "epoch": 0.0456891817682592, "grad_norm": 0.39119404554367065, "learning_rate": 1.9977224066519386e-05, "loss": 0.5307, "step": 1664 }, { "epoch": 0.0457166392092257, "grad_norm": 0.38130030035972595, "learning_rate": 1.997719492441325e-05, "loss": 0.5867, "step": 1665 }, { "epoch": 0.045744096650192204, "grad_norm": 0.3553202748298645, "learning_rate": 1.9977165763696455e-05, "loss": 0.5511, "step": 1666 }, { "epoch": 0.045771554091158705, "grad_norm": 0.48479706048965454, "learning_rate": 1.9977136584369064e-05, "loss": 0.5789, "step": 1667 }, { "epoch": 0.045799011532125206, "grad_norm": 0.3920189142227173, "learning_rate": 1.9977107386431133e-05, "loss": 0.56, "step": 1668 }, { "epoch": 0.04582646897309171, "grad_norm": 0.3380624055862427, "learning_rate": 1.997707816988271e-05, "loss": 0.4645, "step": 1669 }, { "epoch": 0.04585392641405821, "grad_norm": 0.3563838601112366, "learning_rate": 1.997704893472385e-05, "loss": 0.5059, "step": 1670 }, { "epoch": 0.04588138385502471, "grad_norm": 0.3170499801635742, "learning_rate": 1.9977019680954612e-05, "loss": 0.4485, "step": 1671 }, { "epoch": 0.045908841295991216, "grad_norm": 0.38907405734062195, "learning_rate": 1.9976990408575044e-05, "loss": 0.5798, "step": 1672 }, { "epoch": 0.04593629873695772, "grad_norm": 0.35074278712272644, "learning_rate": 1.9976961117585207e-05, "loss": 0.501, "step": 1673 }, { "epoch": 0.04596375617792422, "grad_norm": 0.31192779541015625, "learning_rate": 1.9976931807985155e-05, "loss": 0.4577, "step": 1674 }, { "epoch": 0.04599121361889072, "grad_norm": 0.3812955319881439, "learning_rate": 1.997690247977494e-05, "loss": 0.5114, "step": 1675 }, { "epoch": 0.04601867105985722, "grad_norm": 0.4271005094051361, "learning_rate": 1.9976873132954616e-05, "loss": 0.6572, "step": 1676 }, { "epoch": 0.04604612850082372, "grad_norm": 0.3593882918357849, "learning_rate": 1.997684376752424e-05, "loss": 0.6548, "step": 1677 }, { "epoch": 0.04607358594179022, "grad_norm": 0.3889383375644684, "learning_rate": 1.9976814383483867e-05, "loss": 0.5211, "step": 1678 }, { "epoch": 0.04610104338275673, "grad_norm": 0.37002500891685486, "learning_rate": 1.997678498083355e-05, "loss": 0.612, "step": 1679 }, { "epoch": 0.04612850082372323, "grad_norm": 0.3793419301509857, "learning_rate": 1.9976755559573345e-05, "loss": 0.525, "step": 1680 }, { "epoch": 0.04615595826468973, "grad_norm": 0.3515072464942932, "learning_rate": 1.997672611970331e-05, "loss": 0.4385, "step": 1681 }, { "epoch": 0.04618341570565623, "grad_norm": 0.38817518949508667, "learning_rate": 1.997669666122349e-05, "loss": 0.6007, "step": 1682 }, { "epoch": 0.046210873146622734, "grad_norm": 0.4044186770915985, "learning_rate": 1.997666718413395e-05, "loss": 0.6078, "step": 1683 }, { "epoch": 0.046238330587589235, "grad_norm": 0.45998576283454895, "learning_rate": 1.997663768843474e-05, "loss": 0.5569, "step": 1684 }, { "epoch": 0.046265788028555736, "grad_norm": 0.4107573628425598, "learning_rate": 1.9976608174125918e-05, "loss": 0.4504, "step": 1685 }, { "epoch": 0.04629324546952224, "grad_norm": 0.404364675283432, "learning_rate": 1.9976578641207537e-05, "loss": 0.5889, "step": 1686 }, { "epoch": 0.046320702910488745, "grad_norm": 0.3890608847141266, "learning_rate": 1.9976549089679652e-05, "loss": 0.5216, "step": 1687 }, { "epoch": 0.046348160351455246, "grad_norm": 0.4371418058872223, "learning_rate": 1.9976519519542325e-05, "loss": 0.5445, "step": 1688 }, { "epoch": 0.04637561779242175, "grad_norm": 0.40834856033325195, "learning_rate": 1.9976489930795596e-05, "loss": 0.5831, "step": 1689 }, { "epoch": 0.04640307523338825, "grad_norm": 0.4211842715740204, "learning_rate": 1.9976460323439536e-05, "loss": 0.6272, "step": 1690 }, { "epoch": 0.04643053267435475, "grad_norm": 0.3223666250705719, "learning_rate": 1.997643069747419e-05, "loss": 0.5498, "step": 1691 }, { "epoch": 0.04645799011532125, "grad_norm": 0.36402952671051025, "learning_rate": 1.9976401052899617e-05, "loss": 0.5191, "step": 1692 }, { "epoch": 0.04648544755628775, "grad_norm": 0.7898536324501038, "learning_rate": 1.9976371389715873e-05, "loss": 0.579, "step": 1693 }, { "epoch": 0.04651290499725426, "grad_norm": 0.4162236154079437, "learning_rate": 1.997634170792301e-05, "loss": 0.5199, "step": 1694 }, { "epoch": 0.04654036243822076, "grad_norm": 0.36921828985214233, "learning_rate": 1.9976312007521087e-05, "loss": 0.5826, "step": 1695 }, { "epoch": 0.04656781987918726, "grad_norm": 0.3712034821510315, "learning_rate": 1.9976282288510157e-05, "loss": 0.5095, "step": 1696 }, { "epoch": 0.04659527732015376, "grad_norm": 0.49292856454849243, "learning_rate": 1.9976252550890282e-05, "loss": 0.5593, "step": 1697 }, { "epoch": 0.04662273476112026, "grad_norm": 0.39175304770469666, "learning_rate": 1.9976222794661504e-05, "loss": 0.6076, "step": 1698 }, { "epoch": 0.046650192202086764, "grad_norm": 0.34162437915802, "learning_rate": 1.997619301982389e-05, "loss": 0.5701, "step": 1699 }, { "epoch": 0.046677649643053265, "grad_norm": 0.4131351411342621, "learning_rate": 1.9976163226377493e-05, "loss": 0.5755, "step": 1700 }, { "epoch": 0.046705107084019766, "grad_norm": 0.37309128046035767, "learning_rate": 1.997613341432237e-05, "loss": 0.5804, "step": 1701 }, { "epoch": 0.046732564524986274, "grad_norm": 0.3355151414871216, "learning_rate": 1.9976103583658567e-05, "loss": 0.5347, "step": 1702 }, { "epoch": 0.046760021965952775, "grad_norm": 0.3353118300437927, "learning_rate": 1.997607373438615e-05, "loss": 0.5515, "step": 1703 }, { "epoch": 0.046787479406919276, "grad_norm": 0.3552364408969879, "learning_rate": 1.997604386650517e-05, "loss": 0.5306, "step": 1704 }, { "epoch": 0.04681493684788578, "grad_norm": 0.354777991771698, "learning_rate": 1.9976013980015686e-05, "loss": 0.6186, "step": 1705 }, { "epoch": 0.04684239428885228, "grad_norm": 0.3374803066253662, "learning_rate": 1.9975984074917753e-05, "loss": 0.489, "step": 1706 }, { "epoch": 0.04686985172981878, "grad_norm": 0.3448299169540405, "learning_rate": 1.9975954151211425e-05, "loss": 0.5755, "step": 1707 }, { "epoch": 0.04689730917078528, "grad_norm": 0.3431239724159241, "learning_rate": 1.9975924208896758e-05, "loss": 0.6134, "step": 1708 }, { "epoch": 0.04692476661175179, "grad_norm": 0.42584022879600525, "learning_rate": 1.997589424797381e-05, "loss": 0.6296, "step": 1709 }, { "epoch": 0.04695222405271829, "grad_norm": 0.43279021978378296, "learning_rate": 1.997586426844263e-05, "loss": 0.4994, "step": 1710 }, { "epoch": 0.04697968149368479, "grad_norm": 0.376655638217926, "learning_rate": 1.9975834270303286e-05, "loss": 0.5828, "step": 1711 }, { "epoch": 0.04700713893465129, "grad_norm": 0.42796212434768677, "learning_rate": 1.9975804253555827e-05, "loss": 0.6233, "step": 1712 }, { "epoch": 0.04703459637561779, "grad_norm": 0.3703799843788147, "learning_rate": 1.9975774218200307e-05, "loss": 0.6113, "step": 1713 }, { "epoch": 0.04706205381658429, "grad_norm": 0.3934277594089508, "learning_rate": 1.997574416423678e-05, "loss": 0.6374, "step": 1714 }, { "epoch": 0.047089511257550794, "grad_norm": 0.35334357619285583, "learning_rate": 1.9975714091665313e-05, "loss": 0.5808, "step": 1715 }, { "epoch": 0.047116968698517295, "grad_norm": 0.35406729578971863, "learning_rate": 1.9975684000485952e-05, "loss": 0.5039, "step": 1716 }, { "epoch": 0.0471444261394838, "grad_norm": 0.36556097865104675, "learning_rate": 1.997565389069876e-05, "loss": 0.5895, "step": 1717 }, { "epoch": 0.047171883580450304, "grad_norm": 0.44138839840888977, "learning_rate": 1.9975623762303783e-05, "loss": 0.568, "step": 1718 }, { "epoch": 0.047199341021416805, "grad_norm": 0.3740490972995758, "learning_rate": 1.9975593615301087e-05, "loss": 0.5476, "step": 1719 }, { "epoch": 0.047226798462383306, "grad_norm": 0.4442768096923828, "learning_rate": 1.9975563449690725e-05, "loss": 0.6089, "step": 1720 }, { "epoch": 0.04725425590334981, "grad_norm": 0.3277495801448822, "learning_rate": 1.9975533265472756e-05, "loss": 0.5346, "step": 1721 }, { "epoch": 0.04728171334431631, "grad_norm": 0.3677663207054138, "learning_rate": 1.997550306264723e-05, "loss": 0.6225, "step": 1722 }, { "epoch": 0.04730917078528281, "grad_norm": 0.3433849513530731, "learning_rate": 1.9975472841214206e-05, "loss": 0.5522, "step": 1723 }, { "epoch": 0.04733662822624932, "grad_norm": 0.3698031008243561, "learning_rate": 1.9975442601173747e-05, "loss": 0.5432, "step": 1724 }, { "epoch": 0.04736408566721582, "grad_norm": 0.3758487105369568, "learning_rate": 1.99754123425259e-05, "loss": 0.5289, "step": 1725 }, { "epoch": 0.04739154310818232, "grad_norm": 0.3385457396507263, "learning_rate": 1.9975382065270725e-05, "loss": 0.5202, "step": 1726 }, { "epoch": 0.04741900054914882, "grad_norm": 0.37068232893943787, "learning_rate": 1.997535176940828e-05, "loss": 0.6018, "step": 1727 }, { "epoch": 0.04744645799011532, "grad_norm": 0.4369012415409088, "learning_rate": 1.997532145493862e-05, "loss": 0.6161, "step": 1728 }, { "epoch": 0.04747391543108182, "grad_norm": 0.38823720812797546, "learning_rate": 1.99752911218618e-05, "loss": 0.5609, "step": 1729 }, { "epoch": 0.04750137287204832, "grad_norm": 0.40428680181503296, "learning_rate": 1.997526077017788e-05, "loss": 0.6734, "step": 1730 }, { "epoch": 0.04752883031301483, "grad_norm": 0.3778148889541626, "learning_rate": 1.9975230399886914e-05, "loss": 0.6295, "step": 1731 }, { "epoch": 0.04755628775398133, "grad_norm": 0.5080970525741577, "learning_rate": 1.997520001098896e-05, "loss": 0.6168, "step": 1732 }, { "epoch": 0.04758374519494783, "grad_norm": 0.4745655059814453, "learning_rate": 1.9975169603484073e-05, "loss": 0.5916, "step": 1733 }, { "epoch": 0.04761120263591433, "grad_norm": 0.34783831238746643, "learning_rate": 1.9975139177372312e-05, "loss": 0.53, "step": 1734 }, { "epoch": 0.047638660076880834, "grad_norm": 0.3691735565662384, "learning_rate": 1.9975108732653738e-05, "loss": 0.5076, "step": 1735 }, { "epoch": 0.047666117517847335, "grad_norm": 0.3799746334552765, "learning_rate": 1.9975078269328394e-05, "loss": 0.5494, "step": 1736 }, { "epoch": 0.047693574958813836, "grad_norm": 0.38361912965774536, "learning_rate": 1.997504778739635e-05, "loss": 0.6442, "step": 1737 }, { "epoch": 0.04772103239978034, "grad_norm": 0.4452016055583954, "learning_rate": 1.997501728685766e-05, "loss": 0.5577, "step": 1738 }, { "epoch": 0.047748489840746845, "grad_norm": 1.1470799446105957, "learning_rate": 1.9974986767712373e-05, "loss": 0.5293, "step": 1739 }, { "epoch": 0.047775947281713346, "grad_norm": 0.35418373346328735, "learning_rate": 1.9974956229960555e-05, "loss": 0.5092, "step": 1740 }, { "epoch": 0.04780340472267985, "grad_norm": 0.35853996872901917, "learning_rate": 1.9974925673602263e-05, "loss": 0.5795, "step": 1741 }, { "epoch": 0.04783086216364635, "grad_norm": 0.33686500787734985, "learning_rate": 1.9974895098637546e-05, "loss": 0.5617, "step": 1742 }, { "epoch": 0.04785831960461285, "grad_norm": 0.40688657760620117, "learning_rate": 1.997486450506647e-05, "loss": 0.5466, "step": 1743 }, { "epoch": 0.04788577704557935, "grad_norm": 0.3369748294353485, "learning_rate": 1.9974833892889086e-05, "loss": 0.4981, "step": 1744 }, { "epoch": 0.04791323448654585, "grad_norm": 0.34575265645980835, "learning_rate": 1.9974803262105454e-05, "loss": 0.6352, "step": 1745 }, { "epoch": 0.04794069192751236, "grad_norm": 0.3886786699295044, "learning_rate": 1.997477261271563e-05, "loss": 0.5706, "step": 1746 }, { "epoch": 0.04796814936847886, "grad_norm": 0.38180017471313477, "learning_rate": 1.997474194471967e-05, "loss": 0.5854, "step": 1747 }, { "epoch": 0.04799560680944536, "grad_norm": 0.39295628666877747, "learning_rate": 1.9974711258117635e-05, "loss": 0.5254, "step": 1748 }, { "epoch": 0.04802306425041186, "grad_norm": 0.40700751543045044, "learning_rate": 1.9974680552909578e-05, "loss": 0.5521, "step": 1749 }, { "epoch": 0.04805052169137836, "grad_norm": 0.3621041774749756, "learning_rate": 1.9974649829095562e-05, "loss": 0.5475, "step": 1750 }, { "epoch": 0.048077979132344864, "grad_norm": 0.33428534865379333, "learning_rate": 1.9974619086675633e-05, "loss": 0.5676, "step": 1751 }, { "epoch": 0.048105436573311365, "grad_norm": 0.4122284948825836, "learning_rate": 1.9974588325649864e-05, "loss": 0.622, "step": 1752 }, { "epoch": 0.048132894014277866, "grad_norm": 0.3641304075717926, "learning_rate": 1.99745575460183e-05, "loss": 0.5141, "step": 1753 }, { "epoch": 0.048160351455244374, "grad_norm": 0.3531661331653595, "learning_rate": 1.9974526747781006e-05, "loss": 0.6285, "step": 1754 }, { "epoch": 0.048187808896210875, "grad_norm": 0.40635645389556885, "learning_rate": 1.997449593093803e-05, "loss": 0.52, "step": 1755 }, { "epoch": 0.048215266337177376, "grad_norm": 0.39193737506866455, "learning_rate": 1.997446509548944e-05, "loss": 0.5684, "step": 1756 }, { "epoch": 0.04824272377814388, "grad_norm": 0.3323000967502594, "learning_rate": 1.997443424143529e-05, "loss": 0.5578, "step": 1757 }, { "epoch": 0.04827018121911038, "grad_norm": 0.4137215316295624, "learning_rate": 1.9974403368775636e-05, "loss": 0.6293, "step": 1758 }, { "epoch": 0.04829763866007688, "grad_norm": 0.6278602480888367, "learning_rate": 1.9974372477510534e-05, "loss": 0.6141, "step": 1759 }, { "epoch": 0.04832509610104338, "grad_norm": 0.347959965467453, "learning_rate": 1.9974341567640046e-05, "loss": 0.586, "step": 1760 }, { "epoch": 0.04835255354200989, "grad_norm": 0.36387717723846436, "learning_rate": 1.997431063916423e-05, "loss": 0.5306, "step": 1761 }, { "epoch": 0.04838001098297639, "grad_norm": 0.3765870928764343, "learning_rate": 1.9974279692083135e-05, "loss": 0.5817, "step": 1762 }, { "epoch": 0.04840746842394289, "grad_norm": 0.3361295759677887, "learning_rate": 1.9974248726396828e-05, "loss": 0.5671, "step": 1763 }, { "epoch": 0.04843492586490939, "grad_norm": 0.4213378131389618, "learning_rate": 1.9974217742105364e-05, "loss": 0.6365, "step": 1764 }, { "epoch": 0.04846238330587589, "grad_norm": 0.3795631229877472, "learning_rate": 1.9974186739208798e-05, "loss": 0.5625, "step": 1765 }, { "epoch": 0.04848984074684239, "grad_norm": 0.3485829532146454, "learning_rate": 1.9974155717707194e-05, "loss": 0.5763, "step": 1766 }, { "epoch": 0.048517298187808894, "grad_norm": 0.3717228174209595, "learning_rate": 1.9974124677600603e-05, "loss": 0.6544, "step": 1767 }, { "epoch": 0.048544755628775395, "grad_norm": 0.40393179655075073, "learning_rate": 1.997409361888909e-05, "loss": 0.5798, "step": 1768 }, { "epoch": 0.0485722130697419, "grad_norm": 0.43523648381233215, "learning_rate": 1.997406254157271e-05, "loss": 0.6642, "step": 1769 }, { "epoch": 0.048599670510708404, "grad_norm": 0.3997601568698883, "learning_rate": 1.9974031445651515e-05, "loss": 0.5706, "step": 1770 }, { "epoch": 0.048627127951674905, "grad_norm": 0.35494181513786316, "learning_rate": 1.9974000331125568e-05, "loss": 0.5046, "step": 1771 }, { "epoch": 0.048654585392641406, "grad_norm": 0.3958304822444916, "learning_rate": 1.9973969197994928e-05, "loss": 0.6218, "step": 1772 }, { "epoch": 0.04868204283360791, "grad_norm": 0.3997637927532196, "learning_rate": 1.9973938046259653e-05, "loss": 0.5732, "step": 1773 }, { "epoch": 0.04870950027457441, "grad_norm": 0.3628101050853729, "learning_rate": 1.99739068759198e-05, "loss": 0.491, "step": 1774 }, { "epoch": 0.04873695771554091, "grad_norm": 0.395282506942749, "learning_rate": 1.9973875686975427e-05, "loss": 0.508, "step": 1775 }, { "epoch": 0.04876441515650742, "grad_norm": 0.3738830089569092, "learning_rate": 1.9973844479426593e-05, "loss": 0.5556, "step": 1776 }, { "epoch": 0.04879187259747392, "grad_norm": 0.37502390146255493, "learning_rate": 1.997381325327336e-05, "loss": 0.5722, "step": 1777 }, { "epoch": 0.04881933003844042, "grad_norm": 0.40069982409477234, "learning_rate": 1.9973782008515777e-05, "loss": 0.6521, "step": 1778 }, { "epoch": 0.04884678747940692, "grad_norm": 0.393072247505188, "learning_rate": 1.997375074515391e-05, "loss": 0.5564, "step": 1779 }, { "epoch": 0.04887424492037342, "grad_norm": 0.3785655200481415, "learning_rate": 1.997371946318781e-05, "loss": 0.6237, "step": 1780 }, { "epoch": 0.04890170236133992, "grad_norm": 0.35999783873558044, "learning_rate": 1.9973688162617545e-05, "loss": 0.5459, "step": 1781 }, { "epoch": 0.04892915980230642, "grad_norm": 0.4492824375629425, "learning_rate": 1.9973656843443162e-05, "loss": 0.503, "step": 1782 }, { "epoch": 0.048956617243272924, "grad_norm": 0.39231082797050476, "learning_rate": 1.9973625505664734e-05, "loss": 0.544, "step": 1783 }, { "epoch": 0.04898407468423943, "grad_norm": 0.3255685865879059, "learning_rate": 1.9973594149282305e-05, "loss": 0.4917, "step": 1784 }, { "epoch": 0.04901153212520593, "grad_norm": 0.43804335594177246, "learning_rate": 1.9973562774295942e-05, "loss": 0.6478, "step": 1785 }, { "epoch": 0.04903898956617243, "grad_norm": 0.39409971237182617, "learning_rate": 1.9973531380705703e-05, "loss": 0.6134, "step": 1786 }, { "epoch": 0.049066447007138934, "grad_norm": 0.3804078996181488, "learning_rate": 1.9973499968511643e-05, "loss": 0.5475, "step": 1787 }, { "epoch": 0.049093904448105435, "grad_norm": 0.34638506174087524, "learning_rate": 1.9973468537713822e-05, "loss": 0.6199, "step": 1788 }, { "epoch": 0.049121361889071936, "grad_norm": 0.39052829146385193, "learning_rate": 1.9973437088312298e-05, "loss": 0.6045, "step": 1789 }, { "epoch": 0.04914881933003844, "grad_norm": 0.36124342679977417, "learning_rate": 1.997340562030713e-05, "loss": 0.5874, "step": 1790 }, { "epoch": 0.049176276771004945, "grad_norm": 0.4390476644039154, "learning_rate": 1.997337413369838e-05, "loss": 0.5432, "step": 1791 }, { "epoch": 0.049203734211971446, "grad_norm": 0.34413209557533264, "learning_rate": 1.9973342628486106e-05, "loss": 0.5558, "step": 1792 }, { "epoch": 0.04923119165293795, "grad_norm": 0.39440158009529114, "learning_rate": 1.9973311104670363e-05, "loss": 0.5713, "step": 1793 }, { "epoch": 0.04925864909390445, "grad_norm": 0.36803197860717773, "learning_rate": 1.9973279562251207e-05, "loss": 0.5472, "step": 1794 }, { "epoch": 0.04928610653487095, "grad_norm": 0.3541286289691925, "learning_rate": 1.9973248001228708e-05, "loss": 0.5591, "step": 1795 }, { "epoch": 0.04931356397583745, "grad_norm": 0.4497540295124054, "learning_rate": 1.9973216421602915e-05, "loss": 0.5405, "step": 1796 }, { "epoch": 0.04934102141680395, "grad_norm": 0.33534368872642517, "learning_rate": 1.997318482337389e-05, "loss": 0.5277, "step": 1797 }, { "epoch": 0.04936847885777046, "grad_norm": 0.38229721784591675, "learning_rate": 1.9973153206541695e-05, "loss": 0.5436, "step": 1798 }, { "epoch": 0.04939593629873696, "grad_norm": 0.39799225330352783, "learning_rate": 1.9973121571106385e-05, "loss": 0.5563, "step": 1799 }, { "epoch": 0.04942339373970346, "grad_norm": 0.40024998784065247, "learning_rate": 1.997308991706802e-05, "loss": 0.5555, "step": 1800 }, { "epoch": 0.04945085118066996, "grad_norm": 0.5081374645233154, "learning_rate": 1.9973058244426663e-05, "loss": 0.5557, "step": 1801 }, { "epoch": 0.04947830862163646, "grad_norm": 0.3543853461742401, "learning_rate": 1.9973026553182362e-05, "loss": 0.5063, "step": 1802 }, { "epoch": 0.049505766062602964, "grad_norm": 0.4867360591888428, "learning_rate": 1.9972994843335188e-05, "loss": 0.65, "step": 1803 }, { "epoch": 0.049533223503569465, "grad_norm": 0.36622342467308044, "learning_rate": 1.9972963114885195e-05, "loss": 0.6245, "step": 1804 }, { "epoch": 0.049560680944535966, "grad_norm": 0.37493157386779785, "learning_rate": 1.997293136783244e-05, "loss": 0.5471, "step": 1805 }, { "epoch": 0.049588138385502474, "grad_norm": 0.37974101305007935, "learning_rate": 1.997289960217699e-05, "loss": 0.5557, "step": 1806 }, { "epoch": 0.049615595826468975, "grad_norm": 0.401750773191452, "learning_rate": 1.9972867817918896e-05, "loss": 0.5763, "step": 1807 }, { "epoch": 0.049643053267435476, "grad_norm": 0.3102671802043915, "learning_rate": 1.997283601505822e-05, "loss": 0.4984, "step": 1808 }, { "epoch": 0.04967051070840198, "grad_norm": 1.6420553922653198, "learning_rate": 1.9972804193595022e-05, "loss": 0.55, "step": 1809 }, { "epoch": 0.04969796814936848, "grad_norm": 0.3835165500640869, "learning_rate": 1.9972772353529363e-05, "loss": 0.5268, "step": 1810 }, { "epoch": 0.04972542559033498, "grad_norm": 0.33358123898506165, "learning_rate": 1.99727404948613e-05, "loss": 0.4984, "step": 1811 }, { "epoch": 0.04975288303130148, "grad_norm": 0.35960647463798523, "learning_rate": 1.9972708617590893e-05, "loss": 0.5046, "step": 1812 }, { "epoch": 0.04978034047226799, "grad_norm": 0.33748507499694824, "learning_rate": 1.9972676721718204e-05, "loss": 0.495, "step": 1813 }, { "epoch": 0.04980779791323449, "grad_norm": 0.38767698407173157, "learning_rate": 1.9972644807243286e-05, "loss": 0.5667, "step": 1814 }, { "epoch": 0.04983525535420099, "grad_norm": 0.35820385813713074, "learning_rate": 1.9972612874166203e-05, "loss": 0.6194, "step": 1815 }, { "epoch": 0.04986271279516749, "grad_norm": 0.30981922149658203, "learning_rate": 1.9972580922487016e-05, "loss": 0.4729, "step": 1816 }, { "epoch": 0.04989017023613399, "grad_norm": 0.37196460366249084, "learning_rate": 1.9972548952205783e-05, "loss": 0.5691, "step": 1817 }, { "epoch": 0.04991762767710049, "grad_norm": 0.4547102451324463, "learning_rate": 1.9972516963322564e-05, "loss": 0.6663, "step": 1818 }, { "epoch": 0.049945085118066994, "grad_norm": 0.39466118812561035, "learning_rate": 1.9972484955837414e-05, "loss": 0.5019, "step": 1819 }, { "epoch": 0.049972542559033495, "grad_norm": 0.3360445201396942, "learning_rate": 1.9972452929750398e-05, "loss": 0.5675, "step": 1820 }, { "epoch": 0.05, "grad_norm": 0.49518972635269165, "learning_rate": 1.9972420885061576e-05, "loss": 0.575, "step": 1821 }, { "epoch": 0.050027457440966504, "grad_norm": 0.3713243007659912, "learning_rate": 1.997238882177101e-05, "loss": 0.5736, "step": 1822 }, { "epoch": 0.050054914881933005, "grad_norm": 0.3652655780315399, "learning_rate": 1.9972356739878748e-05, "loss": 0.4706, "step": 1823 }, { "epoch": 0.050082372322899506, "grad_norm": 0.36481449007987976, "learning_rate": 1.997232463938486e-05, "loss": 0.6004, "step": 1824 }, { "epoch": 0.05010982976386601, "grad_norm": 0.39170143008232117, "learning_rate": 1.9972292520289405e-05, "loss": 0.5661, "step": 1825 }, { "epoch": 0.05013728720483251, "grad_norm": 0.4235903024673462, "learning_rate": 1.9972260382592442e-05, "loss": 0.5977, "step": 1826 }, { "epoch": 0.05016474464579901, "grad_norm": 0.36186689138412476, "learning_rate": 1.9972228226294032e-05, "loss": 0.5678, "step": 1827 }, { "epoch": 0.05019220208676552, "grad_norm": 0.4754292964935303, "learning_rate": 1.997219605139423e-05, "loss": 0.6312, "step": 1828 }, { "epoch": 0.05021965952773202, "grad_norm": 0.4184439480304718, "learning_rate": 1.9972163857893103e-05, "loss": 0.5948, "step": 1829 }, { "epoch": 0.05024711696869852, "grad_norm": 0.3711458742618561, "learning_rate": 1.9972131645790705e-05, "loss": 0.5484, "step": 1830 }, { "epoch": 0.05027457440966502, "grad_norm": 0.39244458079338074, "learning_rate": 1.9972099415087102e-05, "loss": 0.5983, "step": 1831 }, { "epoch": 0.05030203185063152, "grad_norm": 0.38231831789016724, "learning_rate": 1.997206716578235e-05, "loss": 0.6482, "step": 1832 }, { "epoch": 0.05032948929159802, "grad_norm": 0.379594624042511, "learning_rate": 1.9972034897876507e-05, "loss": 0.5437, "step": 1833 }, { "epoch": 0.05035694673256452, "grad_norm": 0.3779386878013611, "learning_rate": 1.9972002611369638e-05, "loss": 0.6174, "step": 1834 }, { "epoch": 0.050384404173531024, "grad_norm": 0.3438434898853302, "learning_rate": 1.9971970306261804e-05, "loss": 0.6091, "step": 1835 }, { "epoch": 0.05041186161449753, "grad_norm": 0.3756929337978363, "learning_rate": 1.9971937982553058e-05, "loss": 0.5689, "step": 1836 }, { "epoch": 0.05043931905546403, "grad_norm": 0.3578527271747589, "learning_rate": 1.9971905640243468e-05, "loss": 0.596, "step": 1837 }, { "epoch": 0.050466776496430533, "grad_norm": 0.38765448331832886, "learning_rate": 1.997187327933309e-05, "loss": 0.6344, "step": 1838 }, { "epoch": 0.050494233937397034, "grad_norm": 0.3439255356788635, "learning_rate": 1.9971840899821986e-05, "loss": 0.5319, "step": 1839 }, { "epoch": 0.050521691378363535, "grad_norm": 0.3683120012283325, "learning_rate": 1.9971808501710218e-05, "loss": 0.4952, "step": 1840 }, { "epoch": 0.050549148819330036, "grad_norm": 0.35682010650634766, "learning_rate": 1.9971776084997844e-05, "loss": 0.5197, "step": 1841 }, { "epoch": 0.05057660626029654, "grad_norm": 0.4267752468585968, "learning_rate": 1.997174364968492e-05, "loss": 0.6842, "step": 1842 }, { "epoch": 0.050604063701263045, "grad_norm": 0.3641623854637146, "learning_rate": 1.9971711195771517e-05, "loss": 0.5864, "step": 1843 }, { "epoch": 0.050631521142229546, "grad_norm": 0.38410714268684387, "learning_rate": 1.9971678723257687e-05, "loss": 0.5848, "step": 1844 }, { "epoch": 0.05065897858319605, "grad_norm": 0.4092848598957062, "learning_rate": 1.9971646232143497e-05, "loss": 0.6289, "step": 1845 }, { "epoch": 0.05068643602416255, "grad_norm": 0.3922099769115448, "learning_rate": 1.9971613722429003e-05, "loss": 0.5204, "step": 1846 }, { "epoch": 0.05071389346512905, "grad_norm": 0.348134845495224, "learning_rate": 1.9971581194114264e-05, "loss": 0.5128, "step": 1847 }, { "epoch": 0.05074135090609555, "grad_norm": 0.35261133313179016, "learning_rate": 1.9971548647199347e-05, "loss": 0.5666, "step": 1848 }, { "epoch": 0.05076880834706205, "grad_norm": 0.39459383487701416, "learning_rate": 1.9971516081684307e-05, "loss": 0.5857, "step": 1849 }, { "epoch": 0.05079626578802855, "grad_norm": 0.35726556181907654, "learning_rate": 1.9971483497569207e-05, "loss": 0.4745, "step": 1850 }, { "epoch": 0.05082372322899506, "grad_norm": 0.3958752751350403, "learning_rate": 1.997145089485411e-05, "loss": 0.6552, "step": 1851 }, { "epoch": 0.05085118066996156, "grad_norm": 0.3550335466861725, "learning_rate": 1.9971418273539074e-05, "loss": 0.5541, "step": 1852 }, { "epoch": 0.05087863811092806, "grad_norm": 0.34962233901023865, "learning_rate": 1.9971385633624157e-05, "loss": 0.5461, "step": 1853 }, { "epoch": 0.05090609555189456, "grad_norm": 0.4213283360004425, "learning_rate": 1.9971352975109427e-05, "loss": 0.5573, "step": 1854 }, { "epoch": 0.050933552992861064, "grad_norm": 0.36787307262420654, "learning_rate": 1.9971320297994937e-05, "loss": 0.5772, "step": 1855 }, { "epoch": 0.050961010433827565, "grad_norm": 0.380305677652359, "learning_rate": 1.997128760228076e-05, "loss": 0.6275, "step": 1856 }, { "epoch": 0.050988467874794066, "grad_norm": 0.4087391495704651, "learning_rate": 1.9971254887966943e-05, "loss": 0.6092, "step": 1857 }, { "epoch": 0.051015925315760574, "grad_norm": 0.37025395035743713, "learning_rate": 1.9971222155053555e-05, "loss": 0.5622, "step": 1858 }, { "epoch": 0.051043382756727075, "grad_norm": 0.3716244101524353, "learning_rate": 1.9971189403540654e-05, "loss": 0.6932, "step": 1859 }, { "epoch": 0.051070840197693576, "grad_norm": 0.3616083562374115, "learning_rate": 1.99711566334283e-05, "loss": 0.555, "step": 1860 }, { "epoch": 0.05109829763866008, "grad_norm": 0.33523502945899963, "learning_rate": 1.9971123844716562e-05, "loss": 0.533, "step": 1861 }, { "epoch": 0.05112575507962658, "grad_norm": 0.3595324456691742, "learning_rate": 1.997109103740549e-05, "loss": 0.5759, "step": 1862 }, { "epoch": 0.05115321252059308, "grad_norm": 0.3867854177951813, "learning_rate": 1.9971058211495156e-05, "loss": 0.6165, "step": 1863 }, { "epoch": 0.05118066996155958, "grad_norm": 0.42644843459129333, "learning_rate": 1.9971025366985614e-05, "loss": 0.5937, "step": 1864 }, { "epoch": 0.05120812740252609, "grad_norm": 0.37566736340522766, "learning_rate": 1.997099250387693e-05, "loss": 0.6033, "step": 1865 }, { "epoch": 0.05123558484349259, "grad_norm": 0.3363610804080963, "learning_rate": 1.997095962216916e-05, "loss": 0.5124, "step": 1866 }, { "epoch": 0.05126304228445909, "grad_norm": 0.331209272146225, "learning_rate": 1.9970926721862365e-05, "loss": 0.5225, "step": 1867 }, { "epoch": 0.05129049972542559, "grad_norm": 0.3431551158428192, "learning_rate": 1.9970893802956612e-05, "loss": 0.5133, "step": 1868 }, { "epoch": 0.05131795716639209, "grad_norm": 0.4807696044445038, "learning_rate": 1.997086086545196e-05, "loss": 0.4962, "step": 1869 }, { "epoch": 0.05134541460735859, "grad_norm": 0.41332411766052246, "learning_rate": 1.997082790934847e-05, "loss": 0.6258, "step": 1870 }, { "epoch": 0.051372872048325094, "grad_norm": 0.40335801243782043, "learning_rate": 1.9970794934646206e-05, "loss": 0.545, "step": 1871 }, { "epoch": 0.051400329489291595, "grad_norm": 0.38011857867240906, "learning_rate": 1.9970761941345223e-05, "loss": 0.5686, "step": 1872 }, { "epoch": 0.0514277869302581, "grad_norm": 0.35664230585098267, "learning_rate": 1.997072892944559e-05, "loss": 0.6033, "step": 1873 }, { "epoch": 0.051455244371224604, "grad_norm": 0.37166547775268555, "learning_rate": 1.997069589894736e-05, "loss": 0.589, "step": 1874 }, { "epoch": 0.051482701812191105, "grad_norm": 0.3257485330104828, "learning_rate": 1.9970662849850607e-05, "loss": 0.5676, "step": 1875 }, { "epoch": 0.051510159253157606, "grad_norm": 0.4023725092411041, "learning_rate": 1.997062978215538e-05, "loss": 0.574, "step": 1876 }, { "epoch": 0.05153761669412411, "grad_norm": 0.33804449439048767, "learning_rate": 1.9970596695861748e-05, "loss": 0.567, "step": 1877 }, { "epoch": 0.05156507413509061, "grad_norm": 0.374999076128006, "learning_rate": 1.9970563590969775e-05, "loss": 0.5703, "step": 1878 }, { "epoch": 0.05159253157605711, "grad_norm": 0.36269867420196533, "learning_rate": 1.9970530467479513e-05, "loss": 0.5653, "step": 1879 }, { "epoch": 0.05161998901702362, "grad_norm": 0.3454335629940033, "learning_rate": 1.997049732539103e-05, "loss": 0.5238, "step": 1880 }, { "epoch": 0.05164744645799012, "grad_norm": 0.34760940074920654, "learning_rate": 1.9970464164704387e-05, "loss": 0.5325, "step": 1881 }, { "epoch": 0.05167490389895662, "grad_norm": 0.4599643647670746, "learning_rate": 1.9970430985419652e-05, "loss": 0.6429, "step": 1882 }, { "epoch": 0.05170236133992312, "grad_norm": 0.37288418412208557, "learning_rate": 1.9970397787536875e-05, "loss": 0.5919, "step": 1883 }, { "epoch": 0.05172981878088962, "grad_norm": 0.3666027784347534, "learning_rate": 1.9970364571056128e-05, "loss": 0.5327, "step": 1884 }, { "epoch": 0.05175727622185612, "grad_norm": 0.4380134642124176, "learning_rate": 1.9970331335977464e-05, "loss": 0.6175, "step": 1885 }, { "epoch": 0.05178473366282262, "grad_norm": 0.4335097372531891, "learning_rate": 1.9970298082300956e-05, "loss": 0.6043, "step": 1886 }, { "epoch": 0.051812191103789124, "grad_norm": 0.3626493513584137, "learning_rate": 1.9970264810026653e-05, "loss": 0.5745, "step": 1887 }, { "epoch": 0.05183964854475563, "grad_norm": 0.362321674823761, "learning_rate": 1.9970231519154627e-05, "loss": 0.5524, "step": 1888 }, { "epoch": 0.05186710598572213, "grad_norm": 0.3227526843547821, "learning_rate": 1.9970198209684936e-05, "loss": 0.5296, "step": 1889 }, { "epoch": 0.051894563426688634, "grad_norm": 0.3227916657924652, "learning_rate": 1.9970164881617647e-05, "loss": 0.4682, "step": 1890 }, { "epoch": 0.051922020867655135, "grad_norm": 0.3825254440307617, "learning_rate": 1.9970131534952815e-05, "loss": 0.55, "step": 1891 }, { "epoch": 0.051949478308621636, "grad_norm": 0.3648739755153656, "learning_rate": 1.9970098169690506e-05, "loss": 0.5656, "step": 1892 }, { "epoch": 0.051976935749588137, "grad_norm": 0.40551474690437317, "learning_rate": 1.9970064785830784e-05, "loss": 0.5394, "step": 1893 }, { "epoch": 0.05200439319055464, "grad_norm": 0.3743736743927002, "learning_rate": 1.997003138337371e-05, "loss": 0.5738, "step": 1894 }, { "epoch": 0.052031850631521145, "grad_norm": 0.3500254154205322, "learning_rate": 1.996999796231934e-05, "loss": 0.5572, "step": 1895 }, { "epoch": 0.052059308072487646, "grad_norm": 0.48554638028144836, "learning_rate": 1.9969964522667747e-05, "loss": 0.5766, "step": 1896 }, { "epoch": 0.05208676551345415, "grad_norm": 0.42437511682510376, "learning_rate": 1.9969931064418985e-05, "loss": 0.5791, "step": 1897 }, { "epoch": 0.05211422295442065, "grad_norm": 0.3432849049568176, "learning_rate": 1.996989758757312e-05, "loss": 0.5483, "step": 1898 }, { "epoch": 0.05214168039538715, "grad_norm": 0.3748025596141815, "learning_rate": 1.9969864092130217e-05, "loss": 0.5151, "step": 1899 }, { "epoch": 0.05216913783635365, "grad_norm": 0.47756150364875793, "learning_rate": 1.996983057809033e-05, "loss": 0.562, "step": 1900 }, { "epoch": 0.05219659527732015, "grad_norm": 0.4014303982257843, "learning_rate": 1.996979704545353e-05, "loss": 0.6337, "step": 1901 }, { "epoch": 0.05222405271828665, "grad_norm": 2.7102608680725098, "learning_rate": 1.9969763494219878e-05, "loss": 0.5327, "step": 1902 }, { "epoch": 0.05225151015925316, "grad_norm": 0.3542889654636383, "learning_rate": 1.9969729924389433e-05, "loss": 0.6304, "step": 1903 }, { "epoch": 0.05227896760021966, "grad_norm": 0.3389890193939209, "learning_rate": 1.9969696335962258e-05, "loss": 0.5501, "step": 1904 }, { "epoch": 0.05230642504118616, "grad_norm": 0.43655356764793396, "learning_rate": 1.996966272893842e-05, "loss": 0.6864, "step": 1905 }, { "epoch": 0.05233388248215266, "grad_norm": 0.3507354259490967, "learning_rate": 1.996962910331798e-05, "loss": 0.5368, "step": 1906 }, { "epoch": 0.052361339923119164, "grad_norm": 0.36367684602737427, "learning_rate": 1.9969595459100998e-05, "loss": 0.5665, "step": 1907 }, { "epoch": 0.052388797364085665, "grad_norm": 0.3746825158596039, "learning_rate": 1.9969561796287538e-05, "loss": 0.4528, "step": 1908 }, { "epoch": 0.052416254805052166, "grad_norm": 0.38441458344459534, "learning_rate": 1.9969528114877667e-05, "loss": 0.5754, "step": 1909 }, { "epoch": 0.052443712246018674, "grad_norm": 0.4207031726837158, "learning_rate": 1.996949441487144e-05, "loss": 0.5201, "step": 1910 }, { "epoch": 0.052471169686985175, "grad_norm": 0.4016045331954956, "learning_rate": 1.9969460696268926e-05, "loss": 0.6386, "step": 1911 }, { "epoch": 0.052498627127951676, "grad_norm": 0.36780083179473877, "learning_rate": 1.9969426959070185e-05, "loss": 0.5646, "step": 1912 }, { "epoch": 0.05252608456891818, "grad_norm": 0.36451947689056396, "learning_rate": 1.996939320327528e-05, "loss": 0.5155, "step": 1913 }, { "epoch": 0.05255354200988468, "grad_norm": 0.39193567633628845, "learning_rate": 1.9969359428884277e-05, "loss": 0.56, "step": 1914 }, { "epoch": 0.05258099945085118, "grad_norm": 0.3539312183856964, "learning_rate": 1.9969325635897236e-05, "loss": 0.5687, "step": 1915 }, { "epoch": 0.05260845689181768, "grad_norm": 0.3835406005382538, "learning_rate": 1.996929182431422e-05, "loss": 0.5404, "step": 1916 }, { "epoch": 0.05263591433278418, "grad_norm": 0.35497432947158813, "learning_rate": 1.9969257994135293e-05, "loss": 0.4967, "step": 1917 }, { "epoch": 0.05266337177375069, "grad_norm": 0.3840617537498474, "learning_rate": 1.996922414536052e-05, "loss": 0.5767, "step": 1918 }, { "epoch": 0.05269082921471719, "grad_norm": 0.3723994791507721, "learning_rate": 1.996919027798996e-05, "loss": 0.4957, "step": 1919 }, { "epoch": 0.05271828665568369, "grad_norm": 0.3579917848110199, "learning_rate": 1.9969156392023682e-05, "loss": 0.5527, "step": 1920 }, { "epoch": 0.05274574409665019, "grad_norm": 0.39644885063171387, "learning_rate": 1.996912248746174e-05, "loss": 0.6028, "step": 1921 }, { "epoch": 0.05277320153761669, "grad_norm": 0.36882805824279785, "learning_rate": 1.9969088564304204e-05, "loss": 0.5815, "step": 1922 }, { "epoch": 0.052800658978583194, "grad_norm": 0.39110493659973145, "learning_rate": 1.996905462255114e-05, "loss": 0.5948, "step": 1923 }, { "epoch": 0.052828116419549695, "grad_norm": 0.3762013614177704, "learning_rate": 1.9969020662202606e-05, "loss": 0.5491, "step": 1924 }, { "epoch": 0.0528555738605162, "grad_norm": 0.3976396322250366, "learning_rate": 1.9968986683258666e-05, "loss": 0.6082, "step": 1925 }, { "epoch": 0.052883031301482704, "grad_norm": 0.38490593433380127, "learning_rate": 1.996895268571938e-05, "loss": 0.5644, "step": 1926 }, { "epoch": 0.052910488742449205, "grad_norm": 0.4161694645881653, "learning_rate": 1.9968918669584824e-05, "loss": 0.5839, "step": 1927 }, { "epoch": 0.052937946183415706, "grad_norm": 0.3447498083114624, "learning_rate": 1.9968884634855047e-05, "loss": 0.5461, "step": 1928 }, { "epoch": 0.05296540362438221, "grad_norm": 0.40057653188705444, "learning_rate": 1.996885058153012e-05, "loss": 0.5956, "step": 1929 }, { "epoch": 0.05299286106534871, "grad_norm": 0.3923904001712799, "learning_rate": 1.9968816509610103e-05, "loss": 0.6227, "step": 1930 }, { "epoch": 0.05302031850631521, "grad_norm": 0.4147428870201111, "learning_rate": 1.996878241909506e-05, "loss": 0.6155, "step": 1931 }, { "epoch": 0.05304777594728172, "grad_norm": 0.32197341322898865, "learning_rate": 1.9968748309985062e-05, "loss": 0.4848, "step": 1932 }, { "epoch": 0.05307523338824822, "grad_norm": 0.3821795880794525, "learning_rate": 1.9968714182280165e-05, "loss": 0.5429, "step": 1933 }, { "epoch": 0.05310269082921472, "grad_norm": 0.4186863899230957, "learning_rate": 1.9968680035980434e-05, "loss": 0.5628, "step": 1934 }, { "epoch": 0.05313014827018122, "grad_norm": 0.3584262430667877, "learning_rate": 1.9968645871085932e-05, "loss": 0.5923, "step": 1935 }, { "epoch": 0.05315760571114772, "grad_norm": 0.3366561830043793, "learning_rate": 1.9968611687596722e-05, "loss": 0.4573, "step": 1936 }, { "epoch": 0.05318506315211422, "grad_norm": 0.3582332730293274, "learning_rate": 1.9968577485512876e-05, "loss": 0.5626, "step": 1937 }, { "epoch": 0.05321252059308072, "grad_norm": 0.3172548711299896, "learning_rate": 1.9968543264834444e-05, "loss": 0.512, "step": 1938 }, { "epoch": 0.053239978034047224, "grad_norm": 0.3788275420665741, "learning_rate": 1.99685090255615e-05, "loss": 0.4983, "step": 1939 }, { "epoch": 0.05326743547501373, "grad_norm": 0.3999404013156891, "learning_rate": 1.9968474767694108e-05, "loss": 0.5089, "step": 1940 }, { "epoch": 0.05329489291598023, "grad_norm": 0.3673028349876404, "learning_rate": 1.9968440491232326e-05, "loss": 0.534, "step": 1941 }, { "epoch": 0.053322350356946734, "grad_norm": 0.4109498858451843, "learning_rate": 1.9968406196176222e-05, "loss": 0.596, "step": 1942 }, { "epoch": 0.053349807797913235, "grad_norm": 0.3752717971801758, "learning_rate": 1.9968371882525858e-05, "loss": 0.5523, "step": 1943 }, { "epoch": 0.053377265238879736, "grad_norm": 0.332479864358902, "learning_rate": 1.99683375502813e-05, "loss": 0.5534, "step": 1944 }, { "epoch": 0.05340472267984624, "grad_norm": 0.4025343358516693, "learning_rate": 1.9968303199442613e-05, "loss": 0.5511, "step": 1945 }, { "epoch": 0.05343218012081274, "grad_norm": 0.36609092354774475, "learning_rate": 1.9968268830009854e-05, "loss": 0.5352, "step": 1946 }, { "epoch": 0.053459637561779245, "grad_norm": 0.4145871698856354, "learning_rate": 1.9968234441983096e-05, "loss": 0.6911, "step": 1947 }, { "epoch": 0.053487095002745746, "grad_norm": 0.36430829763412476, "learning_rate": 1.9968200035362395e-05, "loss": 0.5811, "step": 1948 }, { "epoch": 0.05351455244371225, "grad_norm": 0.38215717673301697, "learning_rate": 1.9968165610147824e-05, "loss": 0.5603, "step": 1949 }, { "epoch": 0.05354200988467875, "grad_norm": 0.3737226724624634, "learning_rate": 1.996813116633944e-05, "loss": 0.5536, "step": 1950 }, { "epoch": 0.05356946732564525, "grad_norm": 1.022645115852356, "learning_rate": 1.996809670393731e-05, "loss": 0.5749, "step": 1951 }, { "epoch": 0.05359692476661175, "grad_norm": 0.3291313648223877, "learning_rate": 1.99680622229415e-05, "loss": 0.5191, "step": 1952 }, { "epoch": 0.05362438220757825, "grad_norm": 0.37627366185188293, "learning_rate": 1.9968027723352073e-05, "loss": 0.5853, "step": 1953 }, { "epoch": 0.05365183964854475, "grad_norm": 0.3818928301334381, "learning_rate": 1.996799320516909e-05, "loss": 0.5545, "step": 1954 }, { "epoch": 0.05367929708951126, "grad_norm": 0.3633921444416046, "learning_rate": 1.996795866839262e-05, "loss": 0.4966, "step": 1955 }, { "epoch": 0.05370675453047776, "grad_norm": 0.3660745918750763, "learning_rate": 1.9967924113022725e-05, "loss": 0.5228, "step": 1956 }, { "epoch": 0.05373421197144426, "grad_norm": 0.353555828332901, "learning_rate": 1.996788953905947e-05, "loss": 0.5138, "step": 1957 }, { "epoch": 0.05376166941241076, "grad_norm": 0.3804539442062378, "learning_rate": 1.996785494650292e-05, "loss": 0.5823, "step": 1958 }, { "epoch": 0.053789126853377264, "grad_norm": 0.4119977355003357, "learning_rate": 1.996782033535314e-05, "loss": 0.4778, "step": 1959 }, { "epoch": 0.053816584294343765, "grad_norm": 0.39536595344543457, "learning_rate": 1.9967785705610193e-05, "loss": 0.6366, "step": 1960 }, { "epoch": 0.053844041735310266, "grad_norm": 0.3434202969074249, "learning_rate": 1.9967751057274147e-05, "loss": 0.5028, "step": 1961 }, { "epoch": 0.053871499176276774, "grad_norm": 0.380874902009964, "learning_rate": 1.9967716390345056e-05, "loss": 0.5961, "step": 1962 }, { "epoch": 0.053898956617243275, "grad_norm": 0.33766821026802063, "learning_rate": 1.9967681704823e-05, "loss": 0.645, "step": 1963 }, { "epoch": 0.053926414058209776, "grad_norm": 0.3723110556602478, "learning_rate": 1.9967647000708035e-05, "loss": 0.569, "step": 1964 }, { "epoch": 0.05395387149917628, "grad_norm": 0.5142785310745239, "learning_rate": 1.996761227800023e-05, "loss": 0.6009, "step": 1965 }, { "epoch": 0.05398132894014278, "grad_norm": 0.3816337287425995, "learning_rate": 1.996757753669964e-05, "loss": 0.5039, "step": 1966 }, { "epoch": 0.05400878638110928, "grad_norm": 0.39781779050827026, "learning_rate": 1.996754277680634e-05, "loss": 0.5758, "step": 1967 }, { "epoch": 0.05403624382207578, "grad_norm": 0.33051854372024536, "learning_rate": 1.996750799832039e-05, "loss": 0.5256, "step": 1968 }, { "epoch": 0.05406370126304228, "grad_norm": 0.3353223204612732, "learning_rate": 1.9967473201241856e-05, "loss": 0.5441, "step": 1969 }, { "epoch": 0.05409115870400879, "grad_norm": 0.3472880423069, "learning_rate": 1.996743838557081e-05, "loss": 0.5482, "step": 1970 }, { "epoch": 0.05411861614497529, "grad_norm": 0.32928189635276794, "learning_rate": 1.99674035513073e-05, "loss": 0.544, "step": 1971 }, { "epoch": 0.05414607358594179, "grad_norm": 0.3723282814025879, "learning_rate": 1.996736869845141e-05, "loss": 0.6027, "step": 1972 }, { "epoch": 0.05417353102690829, "grad_norm": 0.37033510208129883, "learning_rate": 1.996733382700319e-05, "loss": 0.5398, "step": 1973 }, { "epoch": 0.05420098846787479, "grad_norm": 0.33934634923934937, "learning_rate": 1.9967298936962712e-05, "loss": 0.4855, "step": 1974 }, { "epoch": 0.054228445908841294, "grad_norm": 0.33855217695236206, "learning_rate": 1.9967264028330043e-05, "loss": 0.5237, "step": 1975 }, { "epoch": 0.054255903349807795, "grad_norm": 0.39195215702056885, "learning_rate": 1.9967229101105244e-05, "loss": 0.5544, "step": 1976 }, { "epoch": 0.0542833607907743, "grad_norm": 0.43763822317123413, "learning_rate": 1.996719415528838e-05, "loss": 0.5685, "step": 1977 }, { "epoch": 0.054310818231740804, "grad_norm": 0.3820432722568512, "learning_rate": 1.996715919087952e-05, "loss": 0.5293, "step": 1978 }, { "epoch": 0.054338275672707305, "grad_norm": 0.49195462465286255, "learning_rate": 1.9967124207878727e-05, "loss": 0.5789, "step": 1979 }, { "epoch": 0.054365733113673806, "grad_norm": 0.38214603066444397, "learning_rate": 1.9967089206286062e-05, "loss": 0.5677, "step": 1980 }, { "epoch": 0.05439319055464031, "grad_norm": 0.3766654133796692, "learning_rate": 1.99670541861016e-05, "loss": 0.543, "step": 1981 }, { "epoch": 0.05442064799560681, "grad_norm": 0.3439669907093048, "learning_rate": 1.99670191473254e-05, "loss": 0.5434, "step": 1982 }, { "epoch": 0.05444810543657331, "grad_norm": 0.394232839345932, "learning_rate": 1.9966984089957525e-05, "loss": 0.5771, "step": 1983 }, { "epoch": 0.05447556287753981, "grad_norm": 0.37702706456184387, "learning_rate": 1.9966949013998047e-05, "loss": 0.5724, "step": 1984 }, { "epoch": 0.05450302031850632, "grad_norm": 0.43418416380882263, "learning_rate": 1.9966913919447026e-05, "loss": 0.6491, "step": 1985 }, { "epoch": 0.05453047775947282, "grad_norm": 0.43077394366264343, "learning_rate": 1.996687880630453e-05, "loss": 0.6297, "step": 1986 }, { "epoch": 0.05455793520043932, "grad_norm": 0.3564531207084656, "learning_rate": 1.9966843674570623e-05, "loss": 0.5039, "step": 1987 }, { "epoch": 0.05458539264140582, "grad_norm": 0.39182329177856445, "learning_rate": 1.9966808524245373e-05, "loss": 0.5408, "step": 1988 }, { "epoch": 0.05461285008237232, "grad_norm": 0.4099125564098358, "learning_rate": 1.9966773355328847e-05, "loss": 0.6587, "step": 1989 }, { "epoch": 0.05464030752333882, "grad_norm": 0.3784736096858978, "learning_rate": 1.9966738167821103e-05, "loss": 0.5272, "step": 1990 }, { "epoch": 0.054667764964305324, "grad_norm": 0.40466517210006714, "learning_rate": 1.996670296172221e-05, "loss": 0.6046, "step": 1991 }, { "epoch": 0.05469522240527183, "grad_norm": 0.35350197553634644, "learning_rate": 1.996666773703224e-05, "loss": 0.565, "step": 1992 }, { "epoch": 0.05472267984623833, "grad_norm": 0.3548986315727234, "learning_rate": 1.996663249375125e-05, "loss": 0.5458, "step": 1993 }, { "epoch": 0.054750137287204834, "grad_norm": 0.3931201994419098, "learning_rate": 1.9966597231879313e-05, "loss": 0.6154, "step": 1994 }, { "epoch": 0.054777594728171335, "grad_norm": 0.32984408736228943, "learning_rate": 1.9966561951416486e-05, "loss": 0.4501, "step": 1995 }, { "epoch": 0.054805052169137836, "grad_norm": 0.36772221326828003, "learning_rate": 1.9966526652362845e-05, "loss": 0.5679, "step": 1996 }, { "epoch": 0.05483250961010434, "grad_norm": 0.5863059163093567, "learning_rate": 1.9966491334718447e-05, "loss": 0.5416, "step": 1997 }, { "epoch": 0.05485996705107084, "grad_norm": 0.35672613978385925, "learning_rate": 1.9966455998483366e-05, "loss": 0.5438, "step": 1998 }, { "epoch": 0.054887424492037346, "grad_norm": 0.3641097843647003, "learning_rate": 1.9966420643657657e-05, "loss": 0.5081, "step": 1999 }, { "epoch": 0.054914881933003847, "grad_norm": 0.3254946768283844, "learning_rate": 1.99663852702414e-05, "loss": 0.5307, "step": 2000 }, { "epoch": 0.05494233937397035, "grad_norm": 0.39165404438972473, "learning_rate": 1.996634987823465e-05, "loss": 0.5616, "step": 2001 }, { "epoch": 0.05496979681493685, "grad_norm": 0.3822854161262512, "learning_rate": 1.996631446763748e-05, "loss": 0.626, "step": 2002 }, { "epoch": 0.05499725425590335, "grad_norm": 0.3435845673084259, "learning_rate": 1.9966279038449947e-05, "loss": 0.5283, "step": 2003 }, { "epoch": 0.05502471169686985, "grad_norm": 0.3328026533126831, "learning_rate": 1.9966243590672123e-05, "loss": 0.611, "step": 2004 }, { "epoch": 0.05505216913783635, "grad_norm": 0.3749738335609436, "learning_rate": 1.9966208124304078e-05, "loss": 0.5491, "step": 2005 }, { "epoch": 0.05507962657880285, "grad_norm": 0.38101232051849365, "learning_rate": 1.996617263934587e-05, "loss": 0.6033, "step": 2006 }, { "epoch": 0.05510708401976936, "grad_norm": 0.3583291471004486, "learning_rate": 1.9966137135797572e-05, "loss": 0.5916, "step": 2007 }, { "epoch": 0.05513454146073586, "grad_norm": 0.3315559923648834, "learning_rate": 1.9966101613659247e-05, "loss": 0.5326, "step": 2008 }, { "epoch": 0.05516199890170236, "grad_norm": 0.38871026039123535, "learning_rate": 1.9966066072930962e-05, "loss": 0.5678, "step": 2009 }, { "epoch": 0.05518945634266886, "grad_norm": 0.45660313963890076, "learning_rate": 1.996603051361278e-05, "loss": 0.6601, "step": 2010 }, { "epoch": 0.055216913783635364, "grad_norm": 0.4075772762298584, "learning_rate": 1.9965994935704773e-05, "loss": 0.6638, "step": 2011 }, { "epoch": 0.055244371224601865, "grad_norm": 0.4331810176372528, "learning_rate": 1.9965959339207005e-05, "loss": 0.6178, "step": 2012 }, { "epoch": 0.055271828665568366, "grad_norm": 0.33049675822257996, "learning_rate": 1.996592372411954e-05, "loss": 0.5172, "step": 2013 }, { "epoch": 0.055299286106534874, "grad_norm": 0.3745536506175995, "learning_rate": 1.996588809044245e-05, "loss": 0.5338, "step": 2014 }, { "epoch": 0.055326743547501375, "grad_norm": 0.3538389801979065, "learning_rate": 1.9965852438175795e-05, "loss": 0.5499, "step": 2015 }, { "epoch": 0.055354200988467876, "grad_norm": 0.32021448016166687, "learning_rate": 1.9965816767319647e-05, "loss": 0.6278, "step": 2016 }, { "epoch": 0.05538165842943438, "grad_norm": 0.3408103585243225, "learning_rate": 1.9965781077874067e-05, "loss": 0.4662, "step": 2017 }, { "epoch": 0.05540911587040088, "grad_norm": 0.4726349413394928, "learning_rate": 1.9965745369839126e-05, "loss": 0.5268, "step": 2018 }, { "epoch": 0.05543657331136738, "grad_norm": 0.37714600563049316, "learning_rate": 1.9965709643214888e-05, "loss": 0.5525, "step": 2019 }, { "epoch": 0.05546403075233388, "grad_norm": 0.3966827392578125, "learning_rate": 1.9965673898001424e-05, "loss": 0.5729, "step": 2020 }, { "epoch": 0.05549148819330038, "grad_norm": 0.47160837054252625, "learning_rate": 1.9965638134198792e-05, "loss": 0.5332, "step": 2021 }, { "epoch": 0.05551894563426689, "grad_norm": 0.45000022649765015, "learning_rate": 1.996560235180707e-05, "loss": 0.6143, "step": 2022 }, { "epoch": 0.05554640307523339, "grad_norm": 0.35568341612815857, "learning_rate": 1.9965566550826316e-05, "loss": 0.5013, "step": 2023 }, { "epoch": 0.05557386051619989, "grad_norm": 0.4731135964393616, "learning_rate": 1.99655307312566e-05, "loss": 0.5195, "step": 2024 }, { "epoch": 0.05560131795716639, "grad_norm": 0.3742121160030365, "learning_rate": 1.996549489309799e-05, "loss": 0.5551, "step": 2025 }, { "epoch": 0.05562877539813289, "grad_norm": 0.3754158914089203, "learning_rate": 1.996545903635055e-05, "loss": 0.4215, "step": 2026 }, { "epoch": 0.055656232839099394, "grad_norm": 0.3337489366531372, "learning_rate": 1.996542316101435e-05, "loss": 0.4872, "step": 2027 }, { "epoch": 0.055683690280065895, "grad_norm": 0.3623185157775879, "learning_rate": 1.9965387267089453e-05, "loss": 0.5688, "step": 2028 }, { "epoch": 0.0557111477210324, "grad_norm": 0.36729708313941956, "learning_rate": 1.996535135457593e-05, "loss": 0.527, "step": 2029 }, { "epoch": 0.055738605161998904, "grad_norm": 0.3253529667854309, "learning_rate": 1.9965315423473842e-05, "loss": 0.5183, "step": 2030 }, { "epoch": 0.055766062602965405, "grad_norm": 0.38443174958229065, "learning_rate": 1.9965279473783267e-05, "loss": 0.6316, "step": 2031 }, { "epoch": 0.055793520043931906, "grad_norm": 0.5800321698188782, "learning_rate": 1.996524350550426e-05, "loss": 0.5619, "step": 2032 }, { "epoch": 0.05582097748489841, "grad_norm": 0.6226053833961487, "learning_rate": 1.9965207518636897e-05, "loss": 0.5755, "step": 2033 }, { "epoch": 0.05584843492586491, "grad_norm": 0.33981379866600037, "learning_rate": 1.9965171513181237e-05, "loss": 0.5159, "step": 2034 }, { "epoch": 0.05587589236683141, "grad_norm": 0.4213826060295105, "learning_rate": 1.996513548913735e-05, "loss": 0.6902, "step": 2035 }, { "epoch": 0.05590334980779791, "grad_norm": 0.45275676250457764, "learning_rate": 1.996509944650531e-05, "loss": 0.4717, "step": 2036 }, { "epoch": 0.05593080724876442, "grad_norm": 0.366296648979187, "learning_rate": 1.996506338528518e-05, "loss": 0.6725, "step": 2037 }, { "epoch": 0.05595826468973092, "grad_norm": 0.3707183301448822, "learning_rate": 1.9965027305477025e-05, "loss": 0.6182, "step": 2038 }, { "epoch": 0.05598572213069742, "grad_norm": 0.3783794045448303, "learning_rate": 1.9964991207080912e-05, "loss": 0.6264, "step": 2039 }, { "epoch": 0.05601317957166392, "grad_norm": 0.3620065152645111, "learning_rate": 1.996495509009691e-05, "loss": 0.6018, "step": 2040 }, { "epoch": 0.05604063701263042, "grad_norm": 0.3712809979915619, "learning_rate": 1.9964918954525086e-05, "loss": 0.5817, "step": 2041 }, { "epoch": 0.05606809445359692, "grad_norm": 0.39888373017311096, "learning_rate": 1.9964882800365508e-05, "loss": 0.6361, "step": 2042 }, { "epoch": 0.056095551894563424, "grad_norm": 0.42834100127220154, "learning_rate": 1.9964846627618246e-05, "loss": 0.5717, "step": 2043 }, { "epoch": 0.05612300933552993, "grad_norm": 0.3709574341773987, "learning_rate": 1.996481043628336e-05, "loss": 0.5782, "step": 2044 }, { "epoch": 0.05615046677649643, "grad_norm": 0.35595080256462097, "learning_rate": 1.9964774226360927e-05, "loss": 0.4876, "step": 2045 }, { "epoch": 0.056177924217462934, "grad_norm": 0.42987295985221863, "learning_rate": 1.9964737997851006e-05, "loss": 0.6421, "step": 2046 }, { "epoch": 0.056205381658429435, "grad_norm": 0.37864950299263, "learning_rate": 1.996470175075367e-05, "loss": 0.6578, "step": 2047 }, { "epoch": 0.056232839099395936, "grad_norm": 0.3595561981201172, "learning_rate": 1.9964665485068985e-05, "loss": 0.4964, "step": 2048 }, { "epoch": 0.05626029654036244, "grad_norm": 0.3331921398639679, "learning_rate": 1.9964629200797015e-05, "loss": 0.5776, "step": 2049 }, { "epoch": 0.05628775398132894, "grad_norm": 0.358014315366745, "learning_rate": 1.9964592897937835e-05, "loss": 0.5617, "step": 2050 }, { "epoch": 0.05631521142229544, "grad_norm": 0.3295709788799286, "learning_rate": 1.996455657649151e-05, "loss": 0.4767, "step": 2051 }, { "epoch": 0.05634266886326195, "grad_norm": 0.3911502957344055, "learning_rate": 1.9964520236458104e-05, "loss": 0.4986, "step": 2052 }, { "epoch": 0.05637012630422845, "grad_norm": 0.3673626184463501, "learning_rate": 1.9964483877837688e-05, "loss": 0.5375, "step": 2053 }, { "epoch": 0.05639758374519495, "grad_norm": 0.3907355070114136, "learning_rate": 1.996444750063033e-05, "loss": 0.5577, "step": 2054 }, { "epoch": 0.05642504118616145, "grad_norm": 0.7137215733528137, "learning_rate": 1.9964411104836097e-05, "loss": 0.6828, "step": 2055 }, { "epoch": 0.05645249862712795, "grad_norm": 0.4165397882461548, "learning_rate": 1.9964374690455055e-05, "loss": 0.673, "step": 2056 }, { "epoch": 0.05647995606809445, "grad_norm": 0.42495349049568176, "learning_rate": 1.9964338257487274e-05, "loss": 0.5968, "step": 2057 }, { "epoch": 0.05650741350906095, "grad_norm": 0.4476298689842224, "learning_rate": 1.9964301805932826e-05, "loss": 0.6406, "step": 2058 }, { "epoch": 0.05653487095002746, "grad_norm": 0.3488427698612213, "learning_rate": 1.996426533579177e-05, "loss": 0.5124, "step": 2059 }, { "epoch": 0.05656232839099396, "grad_norm": 0.37953710556030273, "learning_rate": 1.9964228847064183e-05, "loss": 0.5728, "step": 2060 }, { "epoch": 0.05658978583196046, "grad_norm": 0.32830098271369934, "learning_rate": 1.996419233975013e-05, "loss": 0.5364, "step": 2061 }, { "epoch": 0.05661724327292696, "grad_norm": 0.47980713844299316, "learning_rate": 1.9964155813849674e-05, "loss": 0.5793, "step": 2062 }, { "epoch": 0.056644700713893464, "grad_norm": 0.3663296401500702, "learning_rate": 1.9964119269362887e-05, "loss": 0.5166, "step": 2063 }, { "epoch": 0.056672158154859965, "grad_norm": 0.6913097500801086, "learning_rate": 1.996408270628984e-05, "loss": 0.592, "step": 2064 }, { "epoch": 0.056699615595826466, "grad_norm": 0.3918541371822357, "learning_rate": 1.9964046124630595e-05, "loss": 0.5815, "step": 2065 }, { "epoch": 0.056727073036792974, "grad_norm": 0.3882789611816406, "learning_rate": 1.9964009524385228e-05, "loss": 0.5611, "step": 2066 }, { "epoch": 0.056754530477759475, "grad_norm": 0.3591477572917938, "learning_rate": 1.99639729055538e-05, "loss": 0.5299, "step": 2067 }, { "epoch": 0.056781987918725976, "grad_norm": 0.34083282947540283, "learning_rate": 1.9963936268136383e-05, "loss": 0.6021, "step": 2068 }, { "epoch": 0.05680944535969248, "grad_norm": 0.3997838795185089, "learning_rate": 1.996389961213305e-05, "loss": 0.6232, "step": 2069 }, { "epoch": 0.05683690280065898, "grad_norm": 0.38902944326400757, "learning_rate": 1.9963862937543855e-05, "loss": 0.662, "step": 2070 }, { "epoch": 0.05686436024162548, "grad_norm": 0.41650712490081787, "learning_rate": 1.996382624436888e-05, "loss": 0.6102, "step": 2071 }, { "epoch": 0.05689181768259198, "grad_norm": 0.4586417078971863, "learning_rate": 1.996378953260819e-05, "loss": 0.6335, "step": 2072 }, { "epoch": 0.05691927512355848, "grad_norm": 0.38544201850891113, "learning_rate": 1.9963752802261854e-05, "loss": 0.5136, "step": 2073 }, { "epoch": 0.05694673256452499, "grad_norm": 0.3732902407646179, "learning_rate": 1.9963716053329937e-05, "loss": 0.603, "step": 2074 }, { "epoch": 0.05697419000549149, "grad_norm": 0.42070090770721436, "learning_rate": 1.996367928581251e-05, "loss": 0.4846, "step": 2075 }, { "epoch": 0.05700164744645799, "grad_norm": 0.380567342042923, "learning_rate": 1.996364249970964e-05, "loss": 0.5739, "step": 2076 }, { "epoch": 0.05702910488742449, "grad_norm": 0.37890395522117615, "learning_rate": 1.9963605695021396e-05, "loss": 0.619, "step": 2077 }, { "epoch": 0.05705656232839099, "grad_norm": 0.3676146864891052, "learning_rate": 1.9963568871747846e-05, "loss": 0.4917, "step": 2078 }, { "epoch": 0.057084019769357494, "grad_norm": 0.3497373163700104, "learning_rate": 1.9963532029889062e-05, "loss": 0.493, "step": 2079 }, { "epoch": 0.057111477210323995, "grad_norm": 0.5582741498947144, "learning_rate": 1.996349516944511e-05, "loss": 0.5268, "step": 2080 }, { "epoch": 0.0571389346512905, "grad_norm": 0.35925808548927307, "learning_rate": 1.9963458290416066e-05, "loss": 0.5935, "step": 2081 }, { "epoch": 0.057166392092257004, "grad_norm": 0.371040940284729, "learning_rate": 1.9963421392801985e-05, "loss": 0.6186, "step": 2082 }, { "epoch": 0.057193849533223505, "grad_norm": 0.35558146238327026, "learning_rate": 1.9963384476602944e-05, "loss": 0.5648, "step": 2083 }, { "epoch": 0.057221306974190006, "grad_norm": 0.34265777468681335, "learning_rate": 1.9963347541819012e-05, "loss": 0.5956, "step": 2084 }, { "epoch": 0.05724876441515651, "grad_norm": 0.39103201031684875, "learning_rate": 1.9963310588450258e-05, "loss": 0.5989, "step": 2085 }, { "epoch": 0.05727622185612301, "grad_norm": 0.32918792963027954, "learning_rate": 1.9963273616496747e-05, "loss": 0.611, "step": 2086 }, { "epoch": 0.05730367929708951, "grad_norm": 0.38253819942474365, "learning_rate": 1.9963236625958555e-05, "loss": 0.5723, "step": 2087 }, { "epoch": 0.05733113673805601, "grad_norm": 0.373649001121521, "learning_rate": 1.9963199616835745e-05, "loss": 0.5901, "step": 2088 }, { "epoch": 0.05735859417902252, "grad_norm": 0.3540079593658447, "learning_rate": 1.9963162589128388e-05, "loss": 0.6477, "step": 2089 }, { "epoch": 0.05738605161998902, "grad_norm": 0.40538671612739563, "learning_rate": 1.996312554283655e-05, "loss": 0.6546, "step": 2090 }, { "epoch": 0.05741350906095552, "grad_norm": 0.35379964113235474, "learning_rate": 1.9963088477960305e-05, "loss": 0.5089, "step": 2091 }, { "epoch": 0.05744096650192202, "grad_norm": 0.334396094083786, "learning_rate": 1.9963051394499718e-05, "loss": 0.5228, "step": 2092 }, { "epoch": 0.05746842394288852, "grad_norm": 0.3276139497756958, "learning_rate": 1.9963014292454863e-05, "loss": 0.5395, "step": 2093 }, { "epoch": 0.05749588138385502, "grad_norm": 0.37385308742523193, "learning_rate": 1.996297717182581e-05, "loss": 0.4796, "step": 2094 }, { "epoch": 0.057523338824821524, "grad_norm": 0.38942891359329224, "learning_rate": 1.996294003261262e-05, "loss": 0.6693, "step": 2095 }, { "epoch": 0.05755079626578803, "grad_norm": 0.38757753372192383, "learning_rate": 1.9962902874815367e-05, "loss": 0.5123, "step": 2096 }, { "epoch": 0.05757825370675453, "grad_norm": 0.38430896401405334, "learning_rate": 1.996286569843412e-05, "loss": 0.6012, "step": 2097 }, { "epoch": 0.057605711147721034, "grad_norm": 0.3397964835166931, "learning_rate": 1.996282850346895e-05, "loss": 0.4714, "step": 2098 }, { "epoch": 0.057633168588687535, "grad_norm": 0.3666517436504364, "learning_rate": 1.9962791289919927e-05, "loss": 0.4795, "step": 2099 }, { "epoch": 0.057660626029654036, "grad_norm": 0.4206869900226593, "learning_rate": 1.9962754057787114e-05, "loss": 0.5854, "step": 2100 }, { "epoch": 0.05768808347062054, "grad_norm": 0.3855239450931549, "learning_rate": 1.9962716807070592e-05, "loss": 0.4526, "step": 2101 }, { "epoch": 0.05771554091158704, "grad_norm": 0.36273616552352905, "learning_rate": 1.9962679537770414e-05, "loss": 0.4751, "step": 2102 }, { "epoch": 0.05774299835255354, "grad_norm": 0.36599552631378174, "learning_rate": 1.9962642249886665e-05, "loss": 0.5279, "step": 2103 }, { "epoch": 0.05777045579352005, "grad_norm": 0.3558511734008789, "learning_rate": 1.9962604943419407e-05, "loss": 0.5198, "step": 2104 }, { "epoch": 0.05779791323448655, "grad_norm": 0.3543054163455963, "learning_rate": 1.9962567618368713e-05, "loss": 0.497, "step": 2105 }, { "epoch": 0.05782537067545305, "grad_norm": 0.3627586364746094, "learning_rate": 1.9962530274734648e-05, "loss": 0.5009, "step": 2106 }, { "epoch": 0.05785282811641955, "grad_norm": 0.3531317114830017, "learning_rate": 1.9962492912517287e-05, "loss": 0.493, "step": 2107 }, { "epoch": 0.05788028555738605, "grad_norm": 0.422013521194458, "learning_rate": 1.9962455531716697e-05, "loss": 0.5789, "step": 2108 }, { "epoch": 0.05790774299835255, "grad_norm": 0.35329610109329224, "learning_rate": 1.9962418132332943e-05, "loss": 0.5622, "step": 2109 }, { "epoch": 0.05793520043931905, "grad_norm": 0.7353420257568359, "learning_rate": 1.9962380714366106e-05, "loss": 0.65, "step": 2110 }, { "epoch": 0.05796265788028556, "grad_norm": 0.3453896641731262, "learning_rate": 1.9962343277816244e-05, "loss": 0.6194, "step": 2111 }, { "epoch": 0.05799011532125206, "grad_norm": 0.39047321677207947, "learning_rate": 1.9962305822683434e-05, "loss": 0.5473, "step": 2112 }, { "epoch": 0.05801757276221856, "grad_norm": 0.38865065574645996, "learning_rate": 1.9962268348967745e-05, "loss": 0.5524, "step": 2113 }, { "epoch": 0.05804503020318506, "grad_norm": 0.3683394491672516, "learning_rate": 1.9962230856669243e-05, "loss": 0.5767, "step": 2114 }, { "epoch": 0.058072487644151564, "grad_norm": 0.3613957464694977, "learning_rate": 1.9962193345788005e-05, "loss": 0.5695, "step": 2115 }, { "epoch": 0.058099945085118065, "grad_norm": 0.33223995566368103, "learning_rate": 1.9962155816324097e-05, "loss": 0.5418, "step": 2116 }, { "epoch": 0.058127402526084566, "grad_norm": 0.3556272089481354, "learning_rate": 1.9962118268277587e-05, "loss": 0.5227, "step": 2117 }, { "epoch": 0.05815485996705107, "grad_norm": 0.3495692312717438, "learning_rate": 1.9962080701648546e-05, "loss": 0.5261, "step": 2118 }, { "epoch": 0.058182317408017575, "grad_norm": 0.3517887592315674, "learning_rate": 1.9962043116437046e-05, "loss": 0.5019, "step": 2119 }, { "epoch": 0.058209774848984076, "grad_norm": 0.3904082179069519, "learning_rate": 1.9962005512643157e-05, "loss": 0.5882, "step": 2120 }, { "epoch": 0.05823723228995058, "grad_norm": 0.41929295659065247, "learning_rate": 1.9961967890266948e-05, "loss": 0.5862, "step": 2121 }, { "epoch": 0.05826468973091708, "grad_norm": 0.3513755202293396, "learning_rate": 1.9961930249308486e-05, "loss": 0.561, "step": 2122 }, { "epoch": 0.05829214717188358, "grad_norm": 0.32787853479385376, "learning_rate": 1.996189258976785e-05, "loss": 0.5964, "step": 2123 }, { "epoch": 0.05831960461285008, "grad_norm": 0.34504589438438416, "learning_rate": 1.99618549116451e-05, "loss": 0.5218, "step": 2124 }, { "epoch": 0.05834706205381658, "grad_norm": 0.40498968958854675, "learning_rate": 1.9961817214940315e-05, "loss": 0.5111, "step": 2125 }, { "epoch": 0.05837451949478309, "grad_norm": 0.3565294146537781, "learning_rate": 1.9961779499653557e-05, "loss": 0.4954, "step": 2126 }, { "epoch": 0.05840197693574959, "grad_norm": 0.3774467408657074, "learning_rate": 1.9961741765784904e-05, "loss": 0.568, "step": 2127 }, { "epoch": 0.05842943437671609, "grad_norm": 0.3867673873901367, "learning_rate": 1.996170401333442e-05, "loss": 0.4966, "step": 2128 }, { "epoch": 0.05845689181768259, "grad_norm": 0.36321911215782166, "learning_rate": 1.9961666242302183e-05, "loss": 0.6001, "step": 2129 }, { "epoch": 0.05848434925864909, "grad_norm": 0.35775139927864075, "learning_rate": 1.9961628452688257e-05, "loss": 0.5813, "step": 2130 }, { "epoch": 0.058511806699615594, "grad_norm": 0.5144543051719666, "learning_rate": 1.9961590644492714e-05, "loss": 0.5887, "step": 2131 }, { "epoch": 0.058539264140582095, "grad_norm": 0.4455825686454773, "learning_rate": 1.9961552817715627e-05, "loss": 0.6404, "step": 2132 }, { "epoch": 0.0585667215815486, "grad_norm": 0.4945022165775299, "learning_rate": 1.9961514972357062e-05, "loss": 0.5868, "step": 2133 }, { "epoch": 0.058594179022515104, "grad_norm": 0.36157357692718506, "learning_rate": 1.9961477108417092e-05, "loss": 0.5822, "step": 2134 }, { "epoch": 0.058621636463481605, "grad_norm": 0.3817666471004486, "learning_rate": 1.996143922589579e-05, "loss": 0.5131, "step": 2135 }, { "epoch": 0.058649093904448106, "grad_norm": 0.3608619272708893, "learning_rate": 1.9961401324793226e-05, "loss": 0.5419, "step": 2136 }, { "epoch": 0.05867655134541461, "grad_norm": 0.3493039309978485, "learning_rate": 1.9961363405109466e-05, "loss": 0.5099, "step": 2137 }, { "epoch": 0.05870400878638111, "grad_norm": 0.37090176343917847, "learning_rate": 1.9961325466844587e-05, "loss": 0.5914, "step": 2138 }, { "epoch": 0.05873146622734761, "grad_norm": 0.3708887994289398, "learning_rate": 1.9961287509998655e-05, "loss": 0.5226, "step": 2139 }, { "epoch": 0.05875892366831411, "grad_norm": 0.497320294380188, "learning_rate": 1.996124953457174e-05, "loss": 0.6046, "step": 2140 }, { "epoch": 0.05878638110928062, "grad_norm": 0.3263620436191559, "learning_rate": 1.996121154056392e-05, "loss": 0.5559, "step": 2141 }, { "epoch": 0.05881383855024712, "grad_norm": 0.435139924287796, "learning_rate": 1.996117352797526e-05, "loss": 0.5239, "step": 2142 }, { "epoch": 0.05884129599121362, "grad_norm": 0.3795925974845886, "learning_rate": 1.996113549680583e-05, "loss": 0.4495, "step": 2143 }, { "epoch": 0.05886875343218012, "grad_norm": 0.358169823884964, "learning_rate": 1.9961097447055705e-05, "loss": 0.5846, "step": 2144 }, { "epoch": 0.05889621087314662, "grad_norm": 0.3654024004936218, "learning_rate": 1.9961059378724953e-05, "loss": 0.5682, "step": 2145 }, { "epoch": 0.05892366831411312, "grad_norm": 0.37363845109939575, "learning_rate": 1.9961021291813643e-05, "loss": 0.658, "step": 2146 }, { "epoch": 0.058951125755079624, "grad_norm": 0.36140117049217224, "learning_rate": 1.9960983186321855e-05, "loss": 0.5529, "step": 2147 }, { "epoch": 0.05897858319604613, "grad_norm": 0.4407306909561157, "learning_rate": 1.996094506224965e-05, "loss": 0.584, "step": 2148 }, { "epoch": 0.05900604063701263, "grad_norm": 0.33430108428001404, "learning_rate": 1.9960906919597108e-05, "loss": 0.5232, "step": 2149 }, { "epoch": 0.059033498077979134, "grad_norm": 0.32781267166137695, "learning_rate": 1.9960868758364295e-05, "loss": 0.5856, "step": 2150 }, { "epoch": 0.059060955518945635, "grad_norm": 0.9499065279960632, "learning_rate": 1.9960830578551275e-05, "loss": 0.464, "step": 2151 }, { "epoch": 0.059088412959912136, "grad_norm": 0.3495224714279175, "learning_rate": 1.9960792380158133e-05, "loss": 0.5385, "step": 2152 }, { "epoch": 0.05911587040087864, "grad_norm": 0.3699169456958771, "learning_rate": 1.9960754163184934e-05, "loss": 0.4808, "step": 2153 }, { "epoch": 0.05914332784184514, "grad_norm": 0.3452378809452057, "learning_rate": 1.996071592763175e-05, "loss": 0.594, "step": 2154 }, { "epoch": 0.05917078528281164, "grad_norm": 0.40316420793533325, "learning_rate": 1.9960677673498648e-05, "loss": 0.6708, "step": 2155 }, { "epoch": 0.05919824272377815, "grad_norm": 0.33986324071884155, "learning_rate": 1.9960639400785707e-05, "loss": 0.5426, "step": 2156 }, { "epoch": 0.05922570016474465, "grad_norm": 0.40857383608818054, "learning_rate": 1.996060110949299e-05, "loss": 0.587, "step": 2157 }, { "epoch": 0.05925315760571115, "grad_norm": 0.3709402084350586, "learning_rate": 1.9960562799620576e-05, "loss": 0.5557, "step": 2158 }, { "epoch": 0.05928061504667765, "grad_norm": 0.4072776734828949, "learning_rate": 1.9960524471168533e-05, "loss": 0.4753, "step": 2159 }, { "epoch": 0.05930807248764415, "grad_norm": 0.36226820945739746, "learning_rate": 1.9960486124136932e-05, "loss": 0.5427, "step": 2160 }, { "epoch": 0.05933552992861065, "grad_norm": 0.34444883465766907, "learning_rate": 1.9960447758525846e-05, "loss": 0.5539, "step": 2161 }, { "epoch": 0.05936298736957715, "grad_norm": 0.5509684681892395, "learning_rate": 1.9960409374335346e-05, "loss": 0.4419, "step": 2162 }, { "epoch": 0.05939044481054366, "grad_norm": 0.38677430152893066, "learning_rate": 1.9960370971565504e-05, "loss": 0.599, "step": 2163 }, { "epoch": 0.05941790225151016, "grad_norm": 0.8592802286148071, "learning_rate": 1.996033255021639e-05, "loss": 0.5908, "step": 2164 }, { "epoch": 0.05944535969247666, "grad_norm": 0.38140520453453064, "learning_rate": 1.9960294110288077e-05, "loss": 0.5912, "step": 2165 }, { "epoch": 0.059472817133443164, "grad_norm": 0.3671068251132965, "learning_rate": 1.9960255651780638e-05, "loss": 0.6039, "step": 2166 }, { "epoch": 0.059500274574409664, "grad_norm": 0.38614559173583984, "learning_rate": 1.996021717469414e-05, "loss": 0.4954, "step": 2167 }, { "epoch": 0.059527732015376165, "grad_norm": 0.38289177417755127, "learning_rate": 1.9960178679028664e-05, "loss": 0.5795, "step": 2168 }, { "epoch": 0.059555189456342666, "grad_norm": 0.36286115646362305, "learning_rate": 1.996014016478427e-05, "loss": 0.5734, "step": 2169 }, { "epoch": 0.05958264689730917, "grad_norm": 4.5834455490112305, "learning_rate": 1.996010163196104e-05, "loss": 0.5406, "step": 2170 }, { "epoch": 0.059610104338275675, "grad_norm": 0.4072335958480835, "learning_rate": 1.9960063080559038e-05, "loss": 0.5521, "step": 2171 }, { "epoch": 0.059637561779242176, "grad_norm": 0.39108744263648987, "learning_rate": 1.996002451057834e-05, "loss": 0.5834, "step": 2172 }, { "epoch": 0.05966501922020868, "grad_norm": 0.35315045714378357, "learning_rate": 1.995998592201902e-05, "loss": 0.5252, "step": 2173 }, { "epoch": 0.05969247666117518, "grad_norm": 0.36881497502326965, "learning_rate": 1.9959947314881144e-05, "loss": 0.6557, "step": 2174 }, { "epoch": 0.05971993410214168, "grad_norm": 0.38067081570625305, "learning_rate": 1.995990868916479e-05, "loss": 0.5765, "step": 2175 }, { "epoch": 0.05974739154310818, "grad_norm": 0.4525109827518463, "learning_rate": 1.9959870044870025e-05, "loss": 0.6257, "step": 2176 }, { "epoch": 0.05977484898407468, "grad_norm": 0.38258153200149536, "learning_rate": 1.9959831381996927e-05, "loss": 0.5767, "step": 2177 }, { "epoch": 0.05980230642504119, "grad_norm": 0.3335186839103699, "learning_rate": 1.995979270054556e-05, "loss": 0.5242, "step": 2178 }, { "epoch": 0.05982976386600769, "grad_norm": 0.3927525281906128, "learning_rate": 1.9959754000516005e-05, "loss": 0.5754, "step": 2179 }, { "epoch": 0.05985722130697419, "grad_norm": 0.3810999095439911, "learning_rate": 1.995971528190833e-05, "loss": 0.5785, "step": 2180 }, { "epoch": 0.05988467874794069, "grad_norm": 0.42955848574638367, "learning_rate": 1.9959676544722605e-05, "loss": 0.5668, "step": 2181 }, { "epoch": 0.05991213618890719, "grad_norm": 0.3748985230922699, "learning_rate": 1.9959637788958904e-05, "loss": 0.6214, "step": 2182 }, { "epoch": 0.059939593629873694, "grad_norm": 0.3618476688861847, "learning_rate": 1.99595990146173e-05, "loss": 0.5992, "step": 2183 }, { "epoch": 0.059967051070840195, "grad_norm": 0.38910743594169617, "learning_rate": 1.9959560221697865e-05, "loss": 0.5502, "step": 2184 }, { "epoch": 0.059994508511806696, "grad_norm": 0.357039213180542, "learning_rate": 1.9959521410200674e-05, "loss": 0.5658, "step": 2185 }, { "epoch": 0.060021965952773204, "grad_norm": 0.36861079931259155, "learning_rate": 1.9959482580125796e-05, "loss": 0.5083, "step": 2186 }, { "epoch": 0.060049423393739705, "grad_norm": 0.3543212413787842, "learning_rate": 1.99594437314733e-05, "loss": 0.5093, "step": 2187 }, { "epoch": 0.060076880834706206, "grad_norm": 0.4011175334453583, "learning_rate": 1.995940486424327e-05, "loss": 0.6243, "step": 2188 }, { "epoch": 0.06010433827567271, "grad_norm": 0.6190702319145203, "learning_rate": 1.995936597843577e-05, "loss": 0.636, "step": 2189 }, { "epoch": 0.06013179571663921, "grad_norm": 0.33835357427597046, "learning_rate": 1.995932707405087e-05, "loss": 0.574, "step": 2190 }, { "epoch": 0.06015925315760571, "grad_norm": 0.4024796783924103, "learning_rate": 1.9959288151088646e-05, "loss": 0.5967, "step": 2191 }, { "epoch": 0.06018671059857221, "grad_norm": 0.3792473077774048, "learning_rate": 1.9959249209549172e-05, "loss": 0.6307, "step": 2192 }, { "epoch": 0.06021416803953872, "grad_norm": 0.3466573655605316, "learning_rate": 1.9959210249432522e-05, "loss": 0.5278, "step": 2193 }, { "epoch": 0.06024162548050522, "grad_norm": 0.36213958263397217, "learning_rate": 1.9959171270738765e-05, "loss": 0.5288, "step": 2194 }, { "epoch": 0.06026908292147172, "grad_norm": 0.4089621901512146, "learning_rate": 1.9959132273467973e-05, "loss": 0.5333, "step": 2195 }, { "epoch": 0.06029654036243822, "grad_norm": 0.41900789737701416, "learning_rate": 1.9959093257620227e-05, "loss": 0.7082, "step": 2196 }, { "epoch": 0.06032399780340472, "grad_norm": 0.35812780261039734, "learning_rate": 1.9959054223195588e-05, "loss": 0.6205, "step": 2197 }, { "epoch": 0.06035145524437122, "grad_norm": 0.3465331792831421, "learning_rate": 1.9959015170194134e-05, "loss": 0.5471, "step": 2198 }, { "epoch": 0.060378912685337724, "grad_norm": 0.35243669152259827, "learning_rate": 1.9958976098615937e-05, "loss": 0.6123, "step": 2199 }, { "epoch": 0.06040637012630423, "grad_norm": 0.38020363450050354, "learning_rate": 1.9958937008461077e-05, "loss": 0.6008, "step": 2200 }, { "epoch": 0.06043382756727073, "grad_norm": 0.42968353629112244, "learning_rate": 1.9958897899729616e-05, "loss": 0.5614, "step": 2201 }, { "epoch": 0.060461285008237234, "grad_norm": 0.7304350137710571, "learning_rate": 1.9958858772421635e-05, "loss": 0.5193, "step": 2202 }, { "epoch": 0.060488742449203735, "grad_norm": 0.39762037992477417, "learning_rate": 1.99588196265372e-05, "loss": 0.5339, "step": 2203 }, { "epoch": 0.060516199890170236, "grad_norm": 0.36051496863365173, "learning_rate": 1.995878046207639e-05, "loss": 0.5897, "step": 2204 }, { "epoch": 0.06054365733113674, "grad_norm": 0.33115264773368835, "learning_rate": 1.9958741279039278e-05, "loss": 0.5356, "step": 2205 }, { "epoch": 0.06057111477210324, "grad_norm": 0.3882325291633606, "learning_rate": 1.995870207742593e-05, "loss": 0.6257, "step": 2206 }, { "epoch": 0.06059857221306974, "grad_norm": 0.39441296458244324, "learning_rate": 1.9958662857236427e-05, "loss": 0.6537, "step": 2207 }, { "epoch": 0.06062602965403625, "grad_norm": 0.3537082374095917, "learning_rate": 1.9958623618470842e-05, "loss": 0.6159, "step": 2208 }, { "epoch": 0.06065348709500275, "grad_norm": 0.34285151958465576, "learning_rate": 1.9958584361129243e-05, "loss": 0.522, "step": 2209 }, { "epoch": 0.06068094453596925, "grad_norm": 0.33795827627182007, "learning_rate": 1.9958545085211706e-05, "loss": 0.6028, "step": 2210 }, { "epoch": 0.06070840197693575, "grad_norm": 0.36470019817352295, "learning_rate": 1.9958505790718303e-05, "loss": 0.591, "step": 2211 }, { "epoch": 0.06073585941790225, "grad_norm": 0.3774943947792053, "learning_rate": 1.9958466477649108e-05, "loss": 0.6532, "step": 2212 }, { "epoch": 0.06076331685886875, "grad_norm": 0.4967968165874481, "learning_rate": 1.9958427146004196e-05, "loss": 0.528, "step": 2213 }, { "epoch": 0.06079077429983525, "grad_norm": 0.36023372411727905, "learning_rate": 1.9958387795783642e-05, "loss": 0.5801, "step": 2214 }, { "epoch": 0.06081823174080176, "grad_norm": 0.38253340125083923, "learning_rate": 1.9958348426987513e-05, "loss": 0.6204, "step": 2215 }, { "epoch": 0.06084568918176826, "grad_norm": 0.3428211212158203, "learning_rate": 1.9958309039615886e-05, "loss": 0.5237, "step": 2216 }, { "epoch": 0.06087314662273476, "grad_norm": 0.34544041752815247, "learning_rate": 1.9958269633668834e-05, "loss": 0.4874, "step": 2217 }, { "epoch": 0.060900604063701264, "grad_norm": 0.38082388043403625, "learning_rate": 1.995823020914643e-05, "loss": 0.5675, "step": 2218 }, { "epoch": 0.060928061504667765, "grad_norm": 0.4328271746635437, "learning_rate": 1.995819076604875e-05, "loss": 0.6572, "step": 2219 }, { "epoch": 0.060955518945634266, "grad_norm": 0.39280998706817627, "learning_rate": 1.9958151304375866e-05, "loss": 0.5616, "step": 2220 }, { "epoch": 0.060982976386600767, "grad_norm": 0.3945459723472595, "learning_rate": 1.995811182412785e-05, "loss": 0.6064, "step": 2221 }, { "epoch": 0.06101043382756727, "grad_norm": 0.3881964087486267, "learning_rate": 1.9958072325304777e-05, "loss": 0.5552, "step": 2222 }, { "epoch": 0.061037891268533775, "grad_norm": 0.366852343082428, "learning_rate": 1.9958032807906725e-05, "loss": 0.5253, "step": 2223 }, { "epoch": 0.061065348709500276, "grad_norm": 0.4133734703063965, "learning_rate": 1.995799327193376e-05, "loss": 0.601, "step": 2224 }, { "epoch": 0.06109280615046678, "grad_norm": 0.36045631766319275, "learning_rate": 1.995795371738596e-05, "loss": 0.5718, "step": 2225 }, { "epoch": 0.06112026359143328, "grad_norm": 0.38050082325935364, "learning_rate": 1.9957914144263398e-05, "loss": 0.5908, "step": 2226 }, { "epoch": 0.06114772103239978, "grad_norm": 0.42873719334602356, "learning_rate": 1.9957874552566147e-05, "loss": 0.5557, "step": 2227 }, { "epoch": 0.06117517847336628, "grad_norm": 0.4077243208885193, "learning_rate": 1.9957834942294284e-05, "loss": 0.6465, "step": 2228 }, { "epoch": 0.06120263591433278, "grad_norm": 0.3590684235095978, "learning_rate": 1.995779531344788e-05, "loss": 0.4935, "step": 2229 }, { "epoch": 0.06123009335529929, "grad_norm": 0.35945841670036316, "learning_rate": 1.995775566602701e-05, "loss": 0.4746, "step": 2230 }, { "epoch": 0.06125755079626579, "grad_norm": 0.36589139699935913, "learning_rate": 1.9957716000031748e-05, "loss": 0.5114, "step": 2231 }, { "epoch": 0.06128500823723229, "grad_norm": 0.41690871119499207, "learning_rate": 1.9957676315462166e-05, "loss": 0.539, "step": 2232 }, { "epoch": 0.06131246567819879, "grad_norm": 0.39166584610939026, "learning_rate": 1.9957636612318337e-05, "loss": 0.6121, "step": 2233 }, { "epoch": 0.06133992311916529, "grad_norm": 0.3611486554145813, "learning_rate": 1.995759689060034e-05, "loss": 0.4411, "step": 2234 }, { "epoch": 0.061367380560131794, "grad_norm": 0.34359443187713623, "learning_rate": 1.995755715030825e-05, "loss": 0.4769, "step": 2235 }, { "epoch": 0.061394838001098295, "grad_norm": 0.3115439713001251, "learning_rate": 1.9957517391442134e-05, "loss": 0.47, "step": 2236 }, { "epoch": 0.061422295442064796, "grad_norm": 0.34263476729393005, "learning_rate": 1.995747761400207e-05, "loss": 0.4703, "step": 2237 }, { "epoch": 0.061449752883031304, "grad_norm": 0.35825952887535095, "learning_rate": 1.9957437817988134e-05, "loss": 0.5211, "step": 2238 }, { "epoch": 0.061477210323997805, "grad_norm": 0.3702717125415802, "learning_rate": 1.9957398003400398e-05, "loss": 0.624, "step": 2239 }, { "epoch": 0.061504667764964306, "grad_norm": 0.3753089904785156, "learning_rate": 1.9957358170238933e-05, "loss": 0.6685, "step": 2240 }, { "epoch": 0.06153212520593081, "grad_norm": 0.3798760175704956, "learning_rate": 1.995731831850382e-05, "loss": 0.5347, "step": 2241 }, { "epoch": 0.06155958264689731, "grad_norm": 0.34956783056259155, "learning_rate": 1.9957278448195134e-05, "loss": 0.5167, "step": 2242 }, { "epoch": 0.06158704008786381, "grad_norm": 0.3399032652378082, "learning_rate": 1.9957238559312938e-05, "loss": 0.4865, "step": 2243 }, { "epoch": 0.06161449752883031, "grad_norm": 0.4953603446483612, "learning_rate": 1.9957198651857322e-05, "loss": 0.5925, "step": 2244 }, { "epoch": 0.06164195496979682, "grad_norm": 0.3936220109462738, "learning_rate": 1.9957158725828348e-05, "loss": 0.5858, "step": 2245 }, { "epoch": 0.06166941241076332, "grad_norm": 0.5521969795227051, "learning_rate": 1.9957118781226095e-05, "loss": 0.4939, "step": 2246 }, { "epoch": 0.06169686985172982, "grad_norm": 0.3506004512310028, "learning_rate": 1.995707881805064e-05, "loss": 0.5319, "step": 2247 }, { "epoch": 0.06172432729269632, "grad_norm": 0.4359992742538452, "learning_rate": 1.9957038836302052e-05, "loss": 0.548, "step": 2248 }, { "epoch": 0.06175178473366282, "grad_norm": 0.3661154806613922, "learning_rate": 1.995699883598041e-05, "loss": 0.5042, "step": 2249 }, { "epoch": 0.06177924217462932, "grad_norm": 0.3790014982223511, "learning_rate": 1.9956958817085786e-05, "loss": 0.5444, "step": 2250 }, { "epoch": 0.061806699615595824, "grad_norm": 0.38349899649620056, "learning_rate": 1.995691877961826e-05, "loss": 0.5851, "step": 2251 }, { "epoch": 0.061834157056562325, "grad_norm": 0.40254703164100647, "learning_rate": 1.99568787235779e-05, "loss": 0.5331, "step": 2252 }, { "epoch": 0.06186161449752883, "grad_norm": 0.33847615122795105, "learning_rate": 1.995683864896478e-05, "loss": 0.4876, "step": 2253 }, { "epoch": 0.061889071938495334, "grad_norm": 0.47499531507492065, "learning_rate": 1.9956798555778984e-05, "loss": 0.5626, "step": 2254 }, { "epoch": 0.061916529379461835, "grad_norm": 0.3656770586967468, "learning_rate": 1.9956758444020577e-05, "loss": 0.4675, "step": 2255 }, { "epoch": 0.061943986820428336, "grad_norm": 0.37194526195526123, "learning_rate": 1.9956718313689637e-05, "loss": 0.5739, "step": 2256 }, { "epoch": 0.06197144426139484, "grad_norm": 0.3548049032688141, "learning_rate": 1.9956678164786246e-05, "loss": 0.6028, "step": 2257 }, { "epoch": 0.06199890170236134, "grad_norm": 0.39905068278312683, "learning_rate": 1.9956637997310466e-05, "loss": 0.572, "step": 2258 }, { "epoch": 0.06202635914332784, "grad_norm": 0.33913111686706543, "learning_rate": 1.995659781126238e-05, "loss": 0.6195, "step": 2259 }, { "epoch": 0.06205381658429435, "grad_norm": 0.3583167791366577, "learning_rate": 1.9956557606642063e-05, "loss": 0.5625, "step": 2260 }, { "epoch": 0.06208127402526085, "grad_norm": 0.39295777678489685, "learning_rate": 1.9956517383449587e-05, "loss": 0.6595, "step": 2261 }, { "epoch": 0.06210873146622735, "grad_norm": 0.3511582911014557, "learning_rate": 1.9956477141685025e-05, "loss": 0.5338, "step": 2262 }, { "epoch": 0.06213618890719385, "grad_norm": 0.3706660866737366, "learning_rate": 1.995643688134846e-05, "loss": 0.4894, "step": 2263 }, { "epoch": 0.06216364634816035, "grad_norm": 0.41938576102256775, "learning_rate": 1.995639660243996e-05, "loss": 0.618, "step": 2264 }, { "epoch": 0.06219110378912685, "grad_norm": 0.3404916226863861, "learning_rate": 1.9956356304959607e-05, "loss": 0.5432, "step": 2265 }, { "epoch": 0.06221856123009335, "grad_norm": 0.3488747179508209, "learning_rate": 1.9956315988907464e-05, "loss": 0.5845, "step": 2266 }, { "epoch": 0.06224601867105986, "grad_norm": 0.3924195468425751, "learning_rate": 1.995627565428362e-05, "loss": 0.6417, "step": 2267 }, { "epoch": 0.06227347611202636, "grad_norm": 0.4261990487575531, "learning_rate": 1.9956235301088144e-05, "loss": 0.5988, "step": 2268 }, { "epoch": 0.06230093355299286, "grad_norm": 0.41748592257499695, "learning_rate": 1.9956194929321108e-05, "loss": 0.6291, "step": 2269 }, { "epoch": 0.062328390993959364, "grad_norm": 0.35313042998313904, "learning_rate": 1.9956154538982593e-05, "loss": 0.6238, "step": 2270 }, { "epoch": 0.062355848434925865, "grad_norm": 0.35685089230537415, "learning_rate": 1.995611413007267e-05, "loss": 0.5225, "step": 2271 }, { "epoch": 0.062383305875892366, "grad_norm": 0.32201531529426575, "learning_rate": 1.995607370259142e-05, "loss": 0.5438, "step": 2272 }, { "epoch": 0.06241076331685887, "grad_norm": 0.38347142934799194, "learning_rate": 1.9956033256538914e-05, "loss": 0.6097, "step": 2273 }, { "epoch": 0.06243822075782537, "grad_norm": 0.34300997853279114, "learning_rate": 1.9955992791915227e-05, "loss": 0.5604, "step": 2274 }, { "epoch": 0.062465678198791875, "grad_norm": 0.3473573923110962, "learning_rate": 1.9955952308720438e-05, "loss": 0.5257, "step": 2275 }, { "epoch": 0.062493135639758376, "grad_norm": 0.40131157636642456, "learning_rate": 1.995591180695462e-05, "loss": 0.5358, "step": 2276 }, { "epoch": 0.06252059308072487, "grad_norm": 0.3179967701435089, "learning_rate": 1.9955871286617847e-05, "loss": 0.5663, "step": 2277 }, { "epoch": 0.06254805052169138, "grad_norm": 0.484841525554657, "learning_rate": 1.9955830747710198e-05, "loss": 0.5293, "step": 2278 }, { "epoch": 0.06257550796265789, "grad_norm": 0.34153032302856445, "learning_rate": 1.9955790190231744e-05, "loss": 0.5094, "step": 2279 }, { "epoch": 0.06260296540362438, "grad_norm": 0.415095716714859, "learning_rate": 1.9955749614182567e-05, "loss": 0.6684, "step": 2280 }, { "epoch": 0.06263042284459089, "grad_norm": 0.36621731519699097, "learning_rate": 1.995570901956274e-05, "loss": 0.6181, "step": 2281 }, { "epoch": 0.06265788028555738, "grad_norm": 0.37299177050590515, "learning_rate": 1.9955668406372336e-05, "loss": 0.5142, "step": 2282 }, { "epoch": 0.06268533772652389, "grad_norm": 0.38442304730415344, "learning_rate": 1.9955627774611437e-05, "loss": 0.5777, "step": 2283 }, { "epoch": 0.06271279516749038, "grad_norm": 0.5352806448936462, "learning_rate": 1.995558712428011e-05, "loss": 0.5238, "step": 2284 }, { "epoch": 0.06274025260845689, "grad_norm": 0.3169478476047516, "learning_rate": 1.9955546455378436e-05, "loss": 0.5389, "step": 2285 }, { "epoch": 0.0627677100494234, "grad_norm": 0.3604527711868286, "learning_rate": 1.9955505767906493e-05, "loss": 0.5506, "step": 2286 }, { "epoch": 0.0627951674903899, "grad_norm": 0.4571734368801117, "learning_rate": 1.9955465061864352e-05, "loss": 0.635, "step": 2287 }, { "epoch": 0.0628226249313564, "grad_norm": 0.34403151273727417, "learning_rate": 1.9955424337252095e-05, "loss": 0.5086, "step": 2288 }, { "epoch": 0.0628500823723229, "grad_norm": 0.44114401936531067, "learning_rate": 1.9955383594069792e-05, "loss": 0.612, "step": 2289 }, { "epoch": 0.0628775398132894, "grad_norm": 0.4709815979003906, "learning_rate": 1.995534283231752e-05, "loss": 0.6116, "step": 2290 }, { "epoch": 0.0629049972542559, "grad_norm": 0.3816196024417877, "learning_rate": 1.9955302051995355e-05, "loss": 0.5459, "step": 2291 }, { "epoch": 0.0629324546952224, "grad_norm": 0.38121655583381653, "learning_rate": 1.9955261253103377e-05, "loss": 0.6082, "step": 2292 }, { "epoch": 0.0629599121361889, "grad_norm": 0.377704381942749, "learning_rate": 1.995522043564166e-05, "loss": 0.4812, "step": 2293 }, { "epoch": 0.06298736957715541, "grad_norm": 0.32774117588996887, "learning_rate": 1.9955179599610277e-05, "loss": 0.5398, "step": 2294 }, { "epoch": 0.06301482701812192, "grad_norm": 0.3724874258041382, "learning_rate": 1.9955138745009308e-05, "loss": 0.5277, "step": 2295 }, { "epoch": 0.06304228445908841, "grad_norm": 0.4296702742576599, "learning_rate": 1.995509787183883e-05, "loss": 0.6025, "step": 2296 }, { "epoch": 0.06306974190005492, "grad_norm": 0.34256622195243835, "learning_rate": 1.9955056980098914e-05, "loss": 0.507, "step": 2297 }, { "epoch": 0.06309719934102141, "grad_norm": 0.34510764479637146, "learning_rate": 1.9955016069789638e-05, "loss": 0.5837, "step": 2298 }, { "epoch": 0.06312465678198792, "grad_norm": 0.36298176646232605, "learning_rate": 1.9954975140911083e-05, "loss": 0.6197, "step": 2299 }, { "epoch": 0.06315211422295441, "grad_norm": 0.4207666218280792, "learning_rate": 1.9954934193463322e-05, "loss": 0.5901, "step": 2300 }, { "epoch": 0.06317957166392092, "grad_norm": 0.34578725695610046, "learning_rate": 1.995489322744643e-05, "loss": 0.6002, "step": 2301 }, { "epoch": 0.06320702910488743, "grad_norm": 0.7501724362373352, "learning_rate": 1.9954852242860487e-05, "loss": 0.5604, "step": 2302 }, { "epoch": 0.06323448654585392, "grad_norm": 0.35167786478996277, "learning_rate": 1.9954811239705565e-05, "loss": 0.5193, "step": 2303 }, { "epoch": 0.06326194398682043, "grad_norm": 0.39194926619529724, "learning_rate": 1.9954770217981742e-05, "loss": 0.5909, "step": 2304 }, { "epoch": 0.06328940142778693, "grad_norm": 0.42285704612731934, "learning_rate": 1.9954729177689098e-05, "loss": 0.5684, "step": 2305 }, { "epoch": 0.06331685886875343, "grad_norm": 0.33957818150520325, "learning_rate": 1.995468811882771e-05, "loss": 0.6116, "step": 2306 }, { "epoch": 0.06334431630971993, "grad_norm": 0.46363919973373413, "learning_rate": 1.9954647041397647e-05, "loss": 0.519, "step": 2307 }, { "epoch": 0.06337177375068644, "grad_norm": 0.6705470085144043, "learning_rate": 1.995460594539899e-05, "loss": 0.6881, "step": 2308 }, { "epoch": 0.06339923119165294, "grad_norm": 0.5446553826332092, "learning_rate": 1.9954564830831814e-05, "loss": 0.6161, "step": 2309 }, { "epoch": 0.06342668863261944, "grad_norm": 0.3758768141269684, "learning_rate": 1.9954523697696203e-05, "loss": 0.6039, "step": 2310 }, { "epoch": 0.06345414607358595, "grad_norm": 0.5052087903022766, "learning_rate": 1.9954482545992224e-05, "loss": 0.5199, "step": 2311 }, { "epoch": 0.06348160351455244, "grad_norm": 0.4090321958065033, "learning_rate": 1.9954441375719958e-05, "loss": 0.5276, "step": 2312 }, { "epoch": 0.06350906095551895, "grad_norm": 0.3880351185798645, "learning_rate": 1.9954400186879483e-05, "loss": 0.5743, "step": 2313 }, { "epoch": 0.06353651839648544, "grad_norm": 0.3884424567222595, "learning_rate": 1.9954358979470877e-05, "loss": 0.6075, "step": 2314 }, { "epoch": 0.06356397583745195, "grad_norm": 0.3533661663532257, "learning_rate": 1.995431775349421e-05, "loss": 0.5487, "step": 2315 }, { "epoch": 0.06359143327841846, "grad_norm": 0.4300060570240021, "learning_rate": 1.9954276508949566e-05, "loss": 0.6232, "step": 2316 }, { "epoch": 0.06361889071938495, "grad_norm": 0.34378761053085327, "learning_rate": 1.9954235245837017e-05, "loss": 0.4507, "step": 2317 }, { "epoch": 0.06364634816035146, "grad_norm": 0.35687997937202454, "learning_rate": 1.9954193964156645e-05, "loss": 0.5023, "step": 2318 }, { "epoch": 0.06367380560131795, "grad_norm": 0.3871772289276123, "learning_rate": 1.9954152663908522e-05, "loss": 0.6216, "step": 2319 }, { "epoch": 0.06370126304228446, "grad_norm": 0.33289089798927307, "learning_rate": 1.9954111345092728e-05, "loss": 0.4748, "step": 2320 }, { "epoch": 0.06372872048325096, "grad_norm": 0.33130377531051636, "learning_rate": 1.995407000770934e-05, "loss": 0.5653, "step": 2321 }, { "epoch": 0.06375617792421746, "grad_norm": 0.35807961225509644, "learning_rate": 1.9954028651758435e-05, "loss": 0.5619, "step": 2322 }, { "epoch": 0.06378363536518397, "grad_norm": 0.3314899504184723, "learning_rate": 1.9953987277240087e-05, "loss": 0.5152, "step": 2323 }, { "epoch": 0.06381109280615047, "grad_norm": 0.4388476014137268, "learning_rate": 1.9953945884154378e-05, "loss": 0.583, "step": 2324 }, { "epoch": 0.06383855024711697, "grad_norm": 0.3738574981689453, "learning_rate": 1.9953904472501385e-05, "loss": 0.5615, "step": 2325 }, { "epoch": 0.06386600768808347, "grad_norm": 0.3551120162010193, "learning_rate": 1.995386304228118e-05, "loss": 0.5096, "step": 2326 }, { "epoch": 0.06389346512904998, "grad_norm": 0.40389296412467957, "learning_rate": 1.9953821593493844e-05, "loss": 0.5465, "step": 2327 }, { "epoch": 0.06392092257001647, "grad_norm": 0.343547523021698, "learning_rate": 1.9953780126139453e-05, "loss": 0.5997, "step": 2328 }, { "epoch": 0.06394838001098298, "grad_norm": 0.3800989091396332, "learning_rate": 1.995373864021809e-05, "loss": 0.5982, "step": 2329 }, { "epoch": 0.06397583745194947, "grad_norm": 0.43190714716911316, "learning_rate": 1.995369713572982e-05, "loss": 0.5738, "step": 2330 }, { "epoch": 0.06400329489291598, "grad_norm": 0.35931316018104553, "learning_rate": 1.995365561267473e-05, "loss": 0.5967, "step": 2331 }, { "epoch": 0.06403075233388249, "grad_norm": 0.3362046480178833, "learning_rate": 1.99536140710529e-05, "loss": 0.568, "step": 2332 }, { "epoch": 0.06405820977484898, "grad_norm": 0.3802679479122162, "learning_rate": 1.99535725108644e-05, "loss": 0.5152, "step": 2333 }, { "epoch": 0.06408566721581549, "grad_norm": 0.40355223417282104, "learning_rate": 1.995353093210931e-05, "loss": 0.6161, "step": 2334 }, { "epoch": 0.06411312465678198, "grad_norm": 0.4156351089477539, "learning_rate": 1.9953489334787707e-05, "loss": 0.5688, "step": 2335 }, { "epoch": 0.06414058209774849, "grad_norm": 0.38148412108421326, "learning_rate": 1.9953447718899674e-05, "loss": 0.5619, "step": 2336 }, { "epoch": 0.06416803953871499, "grad_norm": 0.3595396876335144, "learning_rate": 1.995340608444528e-05, "loss": 0.6102, "step": 2337 }, { "epoch": 0.0641954969796815, "grad_norm": 0.392189085483551, "learning_rate": 1.995336443142461e-05, "loss": 0.5758, "step": 2338 }, { "epoch": 0.064222954420648, "grad_norm": 0.36414793133735657, "learning_rate": 1.9953322759837737e-05, "loss": 0.56, "step": 2339 }, { "epoch": 0.0642504118616145, "grad_norm": 0.35379132628440857, "learning_rate": 1.995328106968474e-05, "loss": 0.4934, "step": 2340 }, { "epoch": 0.064277869302581, "grad_norm": 0.3400535583496094, "learning_rate": 1.9953239360965697e-05, "loss": 0.4699, "step": 2341 }, { "epoch": 0.0643053267435475, "grad_norm": 0.3722015619277954, "learning_rate": 1.9953197633680685e-05, "loss": 0.5347, "step": 2342 }, { "epoch": 0.064332784184514, "grad_norm": 0.41912591457366943, "learning_rate": 1.9953155887829785e-05, "loss": 0.619, "step": 2343 }, { "epoch": 0.0643602416254805, "grad_norm": 0.4313147962093353, "learning_rate": 1.9953114123413072e-05, "loss": 0.4778, "step": 2344 }, { "epoch": 0.06438769906644701, "grad_norm": 0.4037111699581146, "learning_rate": 1.9953072340430623e-05, "loss": 0.6029, "step": 2345 }, { "epoch": 0.06441515650741352, "grad_norm": 0.3588685095310211, "learning_rate": 1.995303053888252e-05, "loss": 0.5645, "step": 2346 }, { "epoch": 0.06444261394838001, "grad_norm": 0.3372650444507599, "learning_rate": 1.9952988718768836e-05, "loss": 0.6063, "step": 2347 }, { "epoch": 0.06447007138934652, "grad_norm": 0.4008887708187103, "learning_rate": 1.995294688008965e-05, "loss": 0.6082, "step": 2348 }, { "epoch": 0.06449752883031301, "grad_norm": 0.394862562417984, "learning_rate": 1.9952905022845045e-05, "loss": 0.5774, "step": 2349 }, { "epoch": 0.06452498627127952, "grad_norm": 0.41483163833618164, "learning_rate": 1.9952863147035096e-05, "loss": 0.5861, "step": 2350 }, { "epoch": 0.06455244371224601, "grad_norm": 0.38506001234054565, "learning_rate": 1.995282125265988e-05, "loss": 0.6398, "step": 2351 }, { "epoch": 0.06457990115321252, "grad_norm": 0.46177539229393005, "learning_rate": 1.9952779339719473e-05, "loss": 0.5611, "step": 2352 }, { "epoch": 0.06460735859417903, "grad_norm": 0.347177654504776, "learning_rate": 1.9952737408213957e-05, "loss": 0.5159, "step": 2353 }, { "epoch": 0.06463481603514552, "grad_norm": 0.3991475999355316, "learning_rate": 1.995269545814341e-05, "loss": 0.5205, "step": 2354 }, { "epoch": 0.06466227347611203, "grad_norm": 0.37093180418014526, "learning_rate": 1.995265348950791e-05, "loss": 0.608, "step": 2355 }, { "epoch": 0.06468973091707853, "grad_norm": 0.39164698123931885, "learning_rate": 1.9952611502307535e-05, "loss": 0.5165, "step": 2356 }, { "epoch": 0.06471718835804503, "grad_norm": 0.4027473032474518, "learning_rate": 1.9952569496542363e-05, "loss": 0.6494, "step": 2357 }, { "epoch": 0.06474464579901153, "grad_norm": 0.5456291437149048, "learning_rate": 1.9952527472212472e-05, "loss": 0.5251, "step": 2358 }, { "epoch": 0.06477210323997803, "grad_norm": 0.39037951827049255, "learning_rate": 1.995248542931794e-05, "loss": 0.6345, "step": 2359 }, { "epoch": 0.06479956068094453, "grad_norm": 0.3340372145175934, "learning_rate": 1.9952443367858843e-05, "loss": 0.5391, "step": 2360 }, { "epoch": 0.06482701812191104, "grad_norm": 0.3582030236721039, "learning_rate": 1.995240128783527e-05, "loss": 0.6511, "step": 2361 }, { "epoch": 0.06485447556287754, "grad_norm": 0.4054940938949585, "learning_rate": 1.9952359189247286e-05, "loss": 0.6254, "step": 2362 }, { "epoch": 0.06488193300384404, "grad_norm": 0.37794166803359985, "learning_rate": 1.9952317072094977e-05, "loss": 0.5589, "step": 2363 }, { "epoch": 0.06490939044481055, "grad_norm": 0.41617465019226074, "learning_rate": 1.995227493637842e-05, "loss": 0.4759, "step": 2364 }, { "epoch": 0.06493684788577704, "grad_norm": 0.4125503897666931, "learning_rate": 1.9952232782097697e-05, "loss": 0.5837, "step": 2365 }, { "epoch": 0.06496430532674355, "grad_norm": 0.40291157364845276, "learning_rate": 1.995219060925288e-05, "loss": 0.5432, "step": 2366 }, { "epoch": 0.06499176276771004, "grad_norm": 0.37631815671920776, "learning_rate": 1.9952148417844056e-05, "loss": 0.591, "step": 2367 }, { "epoch": 0.06501922020867655, "grad_norm": 0.33996298909187317, "learning_rate": 1.9952106207871295e-05, "loss": 0.6312, "step": 2368 }, { "epoch": 0.06504667764964306, "grad_norm": 0.3862976133823395, "learning_rate": 1.9952063979334683e-05, "loss": 0.5445, "step": 2369 }, { "epoch": 0.06507413509060955, "grad_norm": 0.4430011510848999, "learning_rate": 1.995202173223429e-05, "loss": 0.5691, "step": 2370 }, { "epoch": 0.06510159253157606, "grad_norm": 0.3977324068546295, "learning_rate": 1.99519794665702e-05, "loss": 0.5261, "step": 2371 }, { "epoch": 0.06512904997254255, "grad_norm": 0.3757913112640381, "learning_rate": 1.9951937182342496e-05, "loss": 0.5547, "step": 2372 }, { "epoch": 0.06515650741350906, "grad_norm": 0.34055274724960327, "learning_rate": 1.995189487955125e-05, "loss": 0.5336, "step": 2373 }, { "epoch": 0.06518396485447556, "grad_norm": 0.4109501838684082, "learning_rate": 1.995185255819655e-05, "loss": 0.6224, "step": 2374 }, { "epoch": 0.06521142229544206, "grad_norm": 0.4686250388622284, "learning_rate": 1.995181021827846e-05, "loss": 0.5278, "step": 2375 }, { "epoch": 0.06523887973640857, "grad_norm": 0.3437930643558502, "learning_rate": 1.9951767859797074e-05, "loss": 0.5356, "step": 2376 }, { "epoch": 0.06526633717737507, "grad_norm": 0.3621160089969635, "learning_rate": 1.995172548275246e-05, "loss": 0.5497, "step": 2377 }, { "epoch": 0.06529379461834157, "grad_norm": 0.4090946912765503, "learning_rate": 1.9951683087144705e-05, "loss": 0.5848, "step": 2378 }, { "epoch": 0.06532125205930807, "grad_norm": 0.3702278435230255, "learning_rate": 1.9951640672973887e-05, "loss": 0.4896, "step": 2379 }, { "epoch": 0.06534870950027458, "grad_norm": 0.42307791113853455, "learning_rate": 1.995159824024008e-05, "loss": 0.4921, "step": 2380 }, { "epoch": 0.06537616694124107, "grad_norm": 0.3260853886604309, "learning_rate": 1.9951555788943364e-05, "loss": 0.529, "step": 2381 }, { "epoch": 0.06540362438220758, "grad_norm": 0.34623345732688904, "learning_rate": 1.9951513319083822e-05, "loss": 0.5616, "step": 2382 }, { "epoch": 0.06543108182317409, "grad_norm": 0.35533228516578674, "learning_rate": 1.9951470830661533e-05, "loss": 0.5077, "step": 2383 }, { "epoch": 0.06545853926414058, "grad_norm": 0.40130531787872314, "learning_rate": 1.9951428323676575e-05, "loss": 0.5544, "step": 2384 }, { "epoch": 0.06548599670510709, "grad_norm": 0.4143134653568268, "learning_rate": 1.9951385798129025e-05, "loss": 0.5961, "step": 2385 }, { "epoch": 0.06551345414607358, "grad_norm": 0.3523617684841156, "learning_rate": 1.9951343254018965e-05, "loss": 0.4967, "step": 2386 }, { "epoch": 0.06554091158704009, "grad_norm": 0.3218756914138794, "learning_rate": 1.9951300691346472e-05, "loss": 0.5101, "step": 2387 }, { "epoch": 0.06556836902800658, "grad_norm": 0.4726930558681488, "learning_rate": 1.9951258110111632e-05, "loss": 0.7072, "step": 2388 }, { "epoch": 0.06559582646897309, "grad_norm": 0.36453983187675476, "learning_rate": 1.9951215510314515e-05, "loss": 0.5275, "step": 2389 }, { "epoch": 0.0656232839099396, "grad_norm": 0.40244221687316895, "learning_rate": 1.9951172891955207e-05, "loss": 0.7143, "step": 2390 }, { "epoch": 0.0656507413509061, "grad_norm": 0.3376240134239197, "learning_rate": 1.9951130255033785e-05, "loss": 0.5386, "step": 2391 }, { "epoch": 0.0656781987918726, "grad_norm": 0.35504403710365295, "learning_rate": 1.9951087599550327e-05, "loss": 0.4933, "step": 2392 }, { "epoch": 0.0657056562328391, "grad_norm": 0.3846206068992615, "learning_rate": 1.9951044925504915e-05, "loss": 0.5841, "step": 2393 }, { "epoch": 0.0657331136738056, "grad_norm": 0.3544868230819702, "learning_rate": 1.9951002232897633e-05, "loss": 0.6378, "step": 2394 }, { "epoch": 0.0657605711147721, "grad_norm": 0.3465864062309265, "learning_rate": 1.9950959521728552e-05, "loss": 0.4862, "step": 2395 }, { "epoch": 0.0657880285557386, "grad_norm": 0.4300004839897156, "learning_rate": 1.9950916791997757e-05, "loss": 0.5508, "step": 2396 }, { "epoch": 0.0658154859967051, "grad_norm": 0.35596659779548645, "learning_rate": 1.9950874043705322e-05, "loss": 0.5493, "step": 2397 }, { "epoch": 0.06584294343767161, "grad_norm": 0.3728666305541992, "learning_rate": 1.9950831276851337e-05, "loss": 0.5294, "step": 2398 }, { "epoch": 0.06587040087863812, "grad_norm": 0.3770555853843689, "learning_rate": 1.9950788491435874e-05, "loss": 0.6146, "step": 2399 }, { "epoch": 0.06589785831960461, "grad_norm": 0.37264615297317505, "learning_rate": 1.9950745687459013e-05, "loss": 0.5663, "step": 2400 }, { "epoch": 0.06592531576057112, "grad_norm": 0.37756600975990295, "learning_rate": 1.9950702864920837e-05, "loss": 0.5783, "step": 2401 }, { "epoch": 0.06595277320153761, "grad_norm": 0.35577312111854553, "learning_rate": 1.9950660023821422e-05, "loss": 0.6352, "step": 2402 }, { "epoch": 0.06598023064250412, "grad_norm": 0.36462417244911194, "learning_rate": 1.995061716416085e-05, "loss": 0.5111, "step": 2403 }, { "epoch": 0.06600768808347061, "grad_norm": 0.35594162344932556, "learning_rate": 1.9950574285939204e-05, "loss": 0.4975, "step": 2404 }, { "epoch": 0.06603514552443712, "grad_norm": 0.3404890298843384, "learning_rate": 1.995053138915656e-05, "loss": 0.4943, "step": 2405 }, { "epoch": 0.06606260296540363, "grad_norm": 0.34419503808021545, "learning_rate": 1.9950488473812997e-05, "loss": 0.6186, "step": 2406 }, { "epoch": 0.06609006040637012, "grad_norm": 0.33428871631622314, "learning_rate": 1.99504455399086e-05, "loss": 0.5512, "step": 2407 }, { "epoch": 0.06611751784733663, "grad_norm": 0.31703981757164, "learning_rate": 1.9950402587443448e-05, "loss": 0.4772, "step": 2408 }, { "epoch": 0.06614497528830313, "grad_norm": 0.35289838910102844, "learning_rate": 1.9950359616417615e-05, "loss": 0.561, "step": 2409 }, { "epoch": 0.06617243272926963, "grad_norm": 0.4048164188861847, "learning_rate": 1.9950316626831186e-05, "loss": 0.5394, "step": 2410 }, { "epoch": 0.06619989017023613, "grad_norm": 0.3773423731327057, "learning_rate": 1.9950273618684243e-05, "loss": 0.6116, "step": 2411 }, { "epoch": 0.06622734761120264, "grad_norm": 0.37130776047706604, "learning_rate": 1.9950230591976862e-05, "loss": 0.5263, "step": 2412 }, { "epoch": 0.06625480505216914, "grad_norm": 0.6545937061309814, "learning_rate": 1.9950187546709127e-05, "loss": 0.6434, "step": 2413 }, { "epoch": 0.06628226249313564, "grad_norm": 0.3710745871067047, "learning_rate": 1.9950144482881114e-05, "loss": 0.6272, "step": 2414 }, { "epoch": 0.06630971993410215, "grad_norm": 0.3910004496574402, "learning_rate": 1.995010140049291e-05, "loss": 0.5648, "step": 2415 }, { "epoch": 0.06633717737506864, "grad_norm": 0.358593225479126, "learning_rate": 1.9950058299544585e-05, "loss": 0.5897, "step": 2416 }, { "epoch": 0.06636463481603515, "grad_norm": 0.33523327112197876, "learning_rate": 1.995001518003623e-05, "loss": 0.4351, "step": 2417 }, { "epoch": 0.06639209225700164, "grad_norm": 0.36261075735092163, "learning_rate": 1.994997204196792e-05, "loss": 0.5554, "step": 2418 }, { "epoch": 0.06641954969796815, "grad_norm": 0.4395487606525421, "learning_rate": 1.9949928885339735e-05, "loss": 0.5593, "step": 2419 }, { "epoch": 0.06644700713893466, "grad_norm": 0.36798161268234253, "learning_rate": 1.9949885710151758e-05, "loss": 0.557, "step": 2420 }, { "epoch": 0.06647446457990115, "grad_norm": 0.37082457542419434, "learning_rate": 1.9949842516404073e-05, "loss": 0.59, "step": 2421 }, { "epoch": 0.06650192202086766, "grad_norm": 0.3638967275619507, "learning_rate": 1.994979930409675e-05, "loss": 0.5234, "step": 2422 }, { "epoch": 0.06652937946183415, "grad_norm": 0.36279240250587463, "learning_rate": 1.9949756073229877e-05, "loss": 0.4795, "step": 2423 }, { "epoch": 0.06655683690280066, "grad_norm": 0.3546316623687744, "learning_rate": 1.9949712823803535e-05, "loss": 0.5204, "step": 2424 }, { "epoch": 0.06658429434376716, "grad_norm": 0.3369678854942322, "learning_rate": 1.9949669555817804e-05, "loss": 0.5588, "step": 2425 }, { "epoch": 0.06661175178473366, "grad_norm": 0.35158663988113403, "learning_rate": 1.994962626927276e-05, "loss": 0.4849, "step": 2426 }, { "epoch": 0.06663920922570016, "grad_norm": 0.3536481261253357, "learning_rate": 1.994958296416849e-05, "loss": 0.5305, "step": 2427 }, { "epoch": 0.06666666666666667, "grad_norm": 0.3814046382904053, "learning_rate": 1.994953964050507e-05, "loss": 0.6067, "step": 2428 }, { "epoch": 0.06669412410763317, "grad_norm": 0.35357314348220825, "learning_rate": 1.9949496298282586e-05, "loss": 0.4843, "step": 2429 }, { "epoch": 0.06672158154859967, "grad_norm": 0.4029700458049774, "learning_rate": 1.9949452937501116e-05, "loss": 0.4546, "step": 2430 }, { "epoch": 0.06674903898956618, "grad_norm": 0.40141725540161133, "learning_rate": 1.9949409558160736e-05, "loss": 0.6528, "step": 2431 }, { "epoch": 0.06677649643053267, "grad_norm": 0.3752491772174835, "learning_rate": 1.994936616026154e-05, "loss": 0.5254, "step": 2432 }, { "epoch": 0.06680395387149918, "grad_norm": 0.41975846886634827, "learning_rate": 1.9949322743803594e-05, "loss": 0.5283, "step": 2433 }, { "epoch": 0.06683141131246567, "grad_norm": 0.3690173327922821, "learning_rate": 1.9949279308786987e-05, "loss": 0.5615, "step": 2434 }, { "epoch": 0.06685886875343218, "grad_norm": 0.33703848719596863, "learning_rate": 1.9949235855211797e-05, "loss": 0.5446, "step": 2435 }, { "epoch": 0.06688632619439869, "grad_norm": 0.3546878397464752, "learning_rate": 1.9949192383078108e-05, "loss": 0.4764, "step": 2436 }, { "epoch": 0.06691378363536518, "grad_norm": 0.4123169183731079, "learning_rate": 1.9949148892386005e-05, "loss": 0.5649, "step": 2437 }, { "epoch": 0.06694124107633169, "grad_norm": 0.34833216667175293, "learning_rate": 1.9949105383135556e-05, "loss": 0.5151, "step": 2438 }, { "epoch": 0.06696869851729818, "grad_norm": 0.3220106065273285, "learning_rate": 1.9949061855326852e-05, "loss": 0.5169, "step": 2439 }, { "epoch": 0.06699615595826469, "grad_norm": 0.4123360216617584, "learning_rate": 1.9949018308959974e-05, "loss": 0.5286, "step": 2440 }, { "epoch": 0.06702361339923119, "grad_norm": 0.34656408429145813, "learning_rate": 1.9948974744035002e-05, "loss": 0.5576, "step": 2441 }, { "epoch": 0.0670510708401977, "grad_norm": 0.37669599056243896, "learning_rate": 1.9948931160552015e-05, "loss": 0.5156, "step": 2442 }, { "epoch": 0.0670785282811642, "grad_norm": 0.42906442284584045, "learning_rate": 1.99488875585111e-05, "loss": 0.5789, "step": 2443 }, { "epoch": 0.0671059857221307, "grad_norm": 0.34615379571914673, "learning_rate": 1.994884393791233e-05, "loss": 0.5442, "step": 2444 }, { "epoch": 0.0671334431630972, "grad_norm": 0.4075731933116913, "learning_rate": 1.9948800298755793e-05, "loss": 0.5074, "step": 2445 }, { "epoch": 0.0671609006040637, "grad_norm": 0.3915160000324249, "learning_rate": 1.9948756641041566e-05, "loss": 0.475, "step": 2446 }, { "epoch": 0.0671883580450302, "grad_norm": 0.3490441143512726, "learning_rate": 1.9948712964769735e-05, "loss": 0.4751, "step": 2447 }, { "epoch": 0.0672158154859967, "grad_norm": 0.39158669114112854, "learning_rate": 1.9948669269940377e-05, "loss": 0.5774, "step": 2448 }, { "epoch": 0.06724327292696321, "grad_norm": 0.35388022661209106, "learning_rate": 1.9948625556553575e-05, "loss": 0.634, "step": 2449 }, { "epoch": 0.06727073036792972, "grad_norm": 0.4043738842010498, "learning_rate": 1.9948581824609416e-05, "loss": 0.5089, "step": 2450 }, { "epoch": 0.06729818780889621, "grad_norm": 0.3756684958934784, "learning_rate": 1.994853807410797e-05, "loss": 0.5308, "step": 2451 }, { "epoch": 0.06732564524986272, "grad_norm": 0.3822261393070221, "learning_rate": 1.9948494305049332e-05, "loss": 0.5578, "step": 2452 }, { "epoch": 0.06735310269082921, "grad_norm": 0.39924734830856323, "learning_rate": 1.994845051743357e-05, "loss": 0.5698, "step": 2453 }, { "epoch": 0.06738056013179572, "grad_norm": 0.3949912190437317, "learning_rate": 1.9948406711260776e-05, "loss": 0.5325, "step": 2454 }, { "epoch": 0.06740801757276221, "grad_norm": 0.3612290620803833, "learning_rate": 1.994836288653103e-05, "loss": 0.5852, "step": 2455 }, { "epoch": 0.06743547501372872, "grad_norm": 0.38283222913742065, "learning_rate": 1.994831904324441e-05, "loss": 0.5586, "step": 2456 }, { "epoch": 0.06746293245469523, "grad_norm": 0.3937833905220032, "learning_rate": 1.9948275181401e-05, "loss": 0.5566, "step": 2457 }, { "epoch": 0.06749038989566172, "grad_norm": 0.34893596172332764, "learning_rate": 1.994823130100088e-05, "loss": 0.5843, "step": 2458 }, { "epoch": 0.06751784733662823, "grad_norm": 0.3804466426372528, "learning_rate": 1.994818740204414e-05, "loss": 0.5362, "step": 2459 }, { "epoch": 0.06754530477759473, "grad_norm": 0.3672961890697479, "learning_rate": 1.994814348453085e-05, "loss": 0.4723, "step": 2460 }, { "epoch": 0.06757276221856123, "grad_norm": 0.35358694195747375, "learning_rate": 1.9948099548461098e-05, "loss": 0.5043, "step": 2461 }, { "epoch": 0.06760021965952773, "grad_norm": 0.3814089596271515, "learning_rate": 1.9948055593834965e-05, "loss": 0.5549, "step": 2462 }, { "epoch": 0.06762767710049424, "grad_norm": 0.366769939661026, "learning_rate": 1.9948011620652533e-05, "loss": 0.6147, "step": 2463 }, { "epoch": 0.06765513454146073, "grad_norm": 0.33227694034576416, "learning_rate": 1.9947967628913885e-05, "loss": 0.5221, "step": 2464 }, { "epoch": 0.06768259198242724, "grad_norm": 0.46404004096984863, "learning_rate": 1.9947923618619104e-05, "loss": 0.6504, "step": 2465 }, { "epoch": 0.06771004942339374, "grad_norm": 0.35283759236335754, "learning_rate": 1.9947879589768267e-05, "loss": 0.5643, "step": 2466 }, { "epoch": 0.06773750686436024, "grad_norm": 0.4014884829521179, "learning_rate": 1.9947835542361462e-05, "loss": 0.5034, "step": 2467 }, { "epoch": 0.06776496430532675, "grad_norm": 0.3382922112941742, "learning_rate": 1.9947791476398768e-05, "loss": 0.494, "step": 2468 }, { "epoch": 0.06779242174629324, "grad_norm": 0.36997005343437195, "learning_rate": 1.9947747391880268e-05, "loss": 0.6103, "step": 2469 }, { "epoch": 0.06781987918725975, "grad_norm": 0.41770434379577637, "learning_rate": 1.9947703288806045e-05, "loss": 0.5704, "step": 2470 }, { "epoch": 0.06784733662822624, "grad_norm": 0.6453843712806702, "learning_rate": 1.994765916717618e-05, "loss": 0.6629, "step": 2471 }, { "epoch": 0.06787479406919275, "grad_norm": 0.382752388715744, "learning_rate": 1.9947615026990755e-05, "loss": 0.5635, "step": 2472 }, { "epoch": 0.06790225151015926, "grad_norm": 0.379184752702713, "learning_rate": 1.9947570868249852e-05, "loss": 0.6075, "step": 2473 }, { "epoch": 0.06792970895112575, "grad_norm": 0.37019670009613037, "learning_rate": 1.9947526690953557e-05, "loss": 0.5596, "step": 2474 }, { "epoch": 0.06795716639209226, "grad_norm": 0.3334265947341919, "learning_rate": 1.9947482495101948e-05, "loss": 0.4917, "step": 2475 }, { "epoch": 0.06798462383305875, "grad_norm": 0.36810392141342163, "learning_rate": 1.994743828069511e-05, "loss": 0.5286, "step": 2476 }, { "epoch": 0.06801208127402526, "grad_norm": 0.34017282724380493, "learning_rate": 1.9947394047733124e-05, "loss": 0.5557, "step": 2477 }, { "epoch": 0.06803953871499176, "grad_norm": 0.36422809958457947, "learning_rate": 1.9947349796216075e-05, "loss": 0.5731, "step": 2478 }, { "epoch": 0.06806699615595826, "grad_norm": 0.36500614881515503, "learning_rate": 1.9947305526144044e-05, "loss": 0.6077, "step": 2479 }, { "epoch": 0.06809445359692477, "grad_norm": 0.3905034363269806, "learning_rate": 1.9947261237517113e-05, "loss": 0.6033, "step": 2480 }, { "epoch": 0.06812191103789127, "grad_norm": 0.36006250977516174, "learning_rate": 1.9947216930335363e-05, "loss": 0.5495, "step": 2481 }, { "epoch": 0.06814936847885777, "grad_norm": 0.3371071219444275, "learning_rate": 1.9947172604598878e-05, "loss": 0.5162, "step": 2482 }, { "epoch": 0.06817682591982427, "grad_norm": 0.37942707538604736, "learning_rate": 1.9947128260307744e-05, "loss": 0.5762, "step": 2483 }, { "epoch": 0.06820428336079078, "grad_norm": 0.38507717847824097, "learning_rate": 1.994708389746204e-05, "loss": 0.5462, "step": 2484 }, { "epoch": 0.06823174080175727, "grad_norm": 0.3783193528652191, "learning_rate": 1.994703951606185e-05, "loss": 0.6364, "step": 2485 }, { "epoch": 0.06825919824272378, "grad_norm": 0.35543766617774963, "learning_rate": 1.994699511610726e-05, "loss": 0.6027, "step": 2486 }, { "epoch": 0.06828665568369029, "grad_norm": 0.34314975142478943, "learning_rate": 1.994695069759834e-05, "loss": 0.5122, "step": 2487 }, { "epoch": 0.06831411312465678, "grad_norm": 0.3640054166316986, "learning_rate": 1.994690626053519e-05, "loss": 0.4736, "step": 2488 }, { "epoch": 0.06834157056562329, "grad_norm": 0.3699384033679962, "learning_rate": 1.9946861804917887e-05, "loss": 0.5929, "step": 2489 }, { "epoch": 0.06836902800658978, "grad_norm": 0.3777710199356079, "learning_rate": 1.9946817330746505e-05, "loss": 0.5186, "step": 2490 }, { "epoch": 0.06839648544755629, "grad_norm": 0.3909810483455658, "learning_rate": 1.9946772838021137e-05, "loss": 0.6544, "step": 2491 }, { "epoch": 0.06842394288852278, "grad_norm": 0.3749752342700958, "learning_rate": 1.9946728326741865e-05, "loss": 0.6736, "step": 2492 }, { "epoch": 0.06845140032948929, "grad_norm": 0.3217942416667938, "learning_rate": 1.994668379690877e-05, "loss": 0.5239, "step": 2493 }, { "epoch": 0.06847885777045579, "grad_norm": 0.3616379499435425, "learning_rate": 1.9946639248521933e-05, "loss": 0.598, "step": 2494 }, { "epoch": 0.0685063152114223, "grad_norm": 0.33275681734085083, "learning_rate": 1.994659468158144e-05, "loss": 0.5187, "step": 2495 }, { "epoch": 0.0685337726523888, "grad_norm": 0.3579504191875458, "learning_rate": 1.9946550096087373e-05, "loss": 0.4963, "step": 2496 }, { "epoch": 0.0685612300933553, "grad_norm": 0.37384963035583496, "learning_rate": 1.9946505492039816e-05, "loss": 0.4772, "step": 2497 }, { "epoch": 0.0685886875343218, "grad_norm": 0.4308069944381714, "learning_rate": 1.994646086943885e-05, "loss": 0.491, "step": 2498 }, { "epoch": 0.0686161449752883, "grad_norm": 0.3591962456703186, "learning_rate": 1.994641622828456e-05, "loss": 0.5769, "step": 2499 }, { "epoch": 0.0686436024162548, "grad_norm": 0.497003436088562, "learning_rate": 1.9946371568577032e-05, "loss": 0.536, "step": 2500 }, { "epoch": 0.0686710598572213, "grad_norm": 0.31872129440307617, "learning_rate": 1.9946326890316345e-05, "loss": 0.4871, "step": 2501 }, { "epoch": 0.06869851729818781, "grad_norm": 0.40617093443870544, "learning_rate": 1.9946282193502583e-05, "loss": 0.5242, "step": 2502 }, { "epoch": 0.06872597473915432, "grad_norm": 0.422521710395813, "learning_rate": 1.994623747813583e-05, "loss": 0.6196, "step": 2503 }, { "epoch": 0.06875343218012081, "grad_norm": 0.3546566665172577, "learning_rate": 1.9946192744216172e-05, "loss": 0.5432, "step": 2504 }, { "epoch": 0.06878088962108732, "grad_norm": 0.4822406768798828, "learning_rate": 1.994614799174369e-05, "loss": 0.5351, "step": 2505 }, { "epoch": 0.06880834706205381, "grad_norm": 0.3569478988647461, "learning_rate": 1.9946103220718463e-05, "loss": 0.5522, "step": 2506 }, { "epoch": 0.06883580450302032, "grad_norm": 0.3895514905452728, "learning_rate": 1.9946058431140587e-05, "loss": 0.5628, "step": 2507 }, { "epoch": 0.06886326194398681, "grad_norm": 0.4540875256061554, "learning_rate": 1.994601362301013e-05, "loss": 0.5085, "step": 2508 }, { "epoch": 0.06889071938495332, "grad_norm": 0.4012500047683716, "learning_rate": 1.9945968796327188e-05, "loss": 0.578, "step": 2509 }, { "epoch": 0.06891817682591983, "grad_norm": 0.3989761769771576, "learning_rate": 1.9945923951091837e-05, "loss": 0.5584, "step": 2510 }, { "epoch": 0.06894563426688632, "grad_norm": 0.3539775013923645, "learning_rate": 1.9945879087304164e-05, "loss": 0.5375, "step": 2511 }, { "epoch": 0.06897309170785283, "grad_norm": 0.5623977184295654, "learning_rate": 1.9945834204964254e-05, "loss": 0.4889, "step": 2512 }, { "epoch": 0.06900054914881933, "grad_norm": 0.38151729106903076, "learning_rate": 1.9945789304072188e-05, "loss": 0.5252, "step": 2513 }, { "epoch": 0.06902800658978583, "grad_norm": 0.35390251874923706, "learning_rate": 1.994574438462805e-05, "loss": 0.4848, "step": 2514 }, { "epoch": 0.06905546403075233, "grad_norm": 0.3761693835258484, "learning_rate": 1.9945699446631927e-05, "loss": 0.4746, "step": 2515 }, { "epoch": 0.06908292147171884, "grad_norm": 0.3529433608055115, "learning_rate": 1.9945654490083894e-05, "loss": 0.5028, "step": 2516 }, { "epoch": 0.06911037891268534, "grad_norm": 0.4220900535583496, "learning_rate": 1.9945609514984047e-05, "loss": 0.5625, "step": 2517 }, { "epoch": 0.06913783635365184, "grad_norm": 0.3410559296607971, "learning_rate": 1.9945564521332458e-05, "loss": 0.5622, "step": 2518 }, { "epoch": 0.06916529379461835, "grad_norm": 0.3723577857017517, "learning_rate": 1.9945519509129224e-05, "loss": 0.566, "step": 2519 }, { "epoch": 0.06919275123558484, "grad_norm": 0.38781461119651794, "learning_rate": 1.994547447837442e-05, "loss": 0.5647, "step": 2520 }, { "epoch": 0.06922020867655135, "grad_norm": 0.40047988295555115, "learning_rate": 1.9945429429068127e-05, "loss": 0.6192, "step": 2521 }, { "epoch": 0.06924766611751784, "grad_norm": 0.3830215036869049, "learning_rate": 1.9945384361210438e-05, "loss": 0.5921, "step": 2522 }, { "epoch": 0.06927512355848435, "grad_norm": 0.3816046416759491, "learning_rate": 1.9945339274801432e-05, "loss": 0.5835, "step": 2523 }, { "epoch": 0.06930258099945086, "grad_norm": 0.5515255331993103, "learning_rate": 1.9945294169841196e-05, "loss": 0.5578, "step": 2524 }, { "epoch": 0.06933003844041735, "grad_norm": 0.3673606514930725, "learning_rate": 1.994524904632981e-05, "loss": 0.5701, "step": 2525 }, { "epoch": 0.06935749588138386, "grad_norm": 0.4832536578178406, "learning_rate": 1.994520390426736e-05, "loss": 0.5459, "step": 2526 }, { "epoch": 0.06938495332235035, "grad_norm": 0.3989959955215454, "learning_rate": 1.9945158743653933e-05, "loss": 0.5179, "step": 2527 }, { "epoch": 0.06941241076331686, "grad_norm": 0.37179499864578247, "learning_rate": 1.994511356448961e-05, "loss": 0.5526, "step": 2528 }, { "epoch": 0.06943986820428336, "grad_norm": 0.3651149272918701, "learning_rate": 1.9945068366774474e-05, "loss": 0.5907, "step": 2529 }, { "epoch": 0.06946732564524986, "grad_norm": 0.36114662885665894, "learning_rate": 1.9945023150508613e-05, "loss": 0.5719, "step": 2530 }, { "epoch": 0.06949478308621636, "grad_norm": 0.3603382706642151, "learning_rate": 1.9944977915692113e-05, "loss": 0.5284, "step": 2531 }, { "epoch": 0.06952224052718287, "grad_norm": 0.350202351808548, "learning_rate": 1.994493266232505e-05, "loss": 0.5453, "step": 2532 }, { "epoch": 0.06954969796814937, "grad_norm": 0.37228116393089294, "learning_rate": 1.9944887390407515e-05, "loss": 0.5289, "step": 2533 }, { "epoch": 0.06957715540911587, "grad_norm": 0.3300260603427887, "learning_rate": 1.9944842099939592e-05, "loss": 0.5354, "step": 2534 }, { "epoch": 0.06960461285008238, "grad_norm": 0.4777238667011261, "learning_rate": 1.994479679092136e-05, "loss": 0.6283, "step": 2535 }, { "epoch": 0.06963207029104887, "grad_norm": 0.36970382928848267, "learning_rate": 1.9944751463352913e-05, "loss": 0.575, "step": 2536 }, { "epoch": 0.06965952773201538, "grad_norm": 0.34163615107536316, "learning_rate": 1.9944706117234332e-05, "loss": 0.5336, "step": 2537 }, { "epoch": 0.06968698517298187, "grad_norm": 0.3359769880771637, "learning_rate": 1.9944660752565697e-05, "loss": 0.5434, "step": 2538 }, { "epoch": 0.06971444261394838, "grad_norm": 0.3446539342403412, "learning_rate": 1.9944615369347097e-05, "loss": 0.5494, "step": 2539 }, { "epoch": 0.06974190005491489, "grad_norm": 0.3866296410560608, "learning_rate": 1.9944569967578615e-05, "loss": 0.5488, "step": 2540 }, { "epoch": 0.06976935749588138, "grad_norm": 0.3613860011100769, "learning_rate": 1.9944524547260334e-05, "loss": 0.5598, "step": 2541 }, { "epoch": 0.06979681493684789, "grad_norm": 0.4183613359928131, "learning_rate": 1.994447910839234e-05, "loss": 0.5789, "step": 2542 }, { "epoch": 0.06982427237781438, "grad_norm": 0.3624984323978424, "learning_rate": 1.9944433650974722e-05, "loss": 0.4791, "step": 2543 }, { "epoch": 0.06985172981878089, "grad_norm": 0.3814305365085602, "learning_rate": 1.994438817500756e-05, "loss": 0.5398, "step": 2544 }, { "epoch": 0.06987918725974739, "grad_norm": 0.33490052819252014, "learning_rate": 1.994434268049094e-05, "loss": 0.5373, "step": 2545 }, { "epoch": 0.0699066447007139, "grad_norm": 0.41264626383781433, "learning_rate": 1.9944297167424946e-05, "loss": 0.5318, "step": 2546 }, { "epoch": 0.0699341021416804, "grad_norm": 0.4095516800880432, "learning_rate": 1.9944251635809667e-05, "loss": 0.5329, "step": 2547 }, { "epoch": 0.0699615595826469, "grad_norm": 0.41390979290008545, "learning_rate": 1.9944206085645183e-05, "loss": 0.5535, "step": 2548 }, { "epoch": 0.0699890170236134, "grad_norm": 0.4160715341567993, "learning_rate": 1.9944160516931582e-05, "loss": 0.6511, "step": 2549 }, { "epoch": 0.0700164744645799, "grad_norm": 0.4027978777885437, "learning_rate": 1.9944114929668946e-05, "loss": 0.5978, "step": 2550 }, { "epoch": 0.0700439319055464, "grad_norm": 0.3429988622665405, "learning_rate": 1.9944069323857365e-05, "loss": 0.442, "step": 2551 }, { "epoch": 0.0700713893465129, "grad_norm": 0.41126179695129395, "learning_rate": 1.9944023699496918e-05, "loss": 0.61, "step": 2552 }, { "epoch": 0.07009884678747941, "grad_norm": 0.36616581678390503, "learning_rate": 1.9943978056587693e-05, "loss": 0.4892, "step": 2553 }, { "epoch": 0.07012630422844592, "grad_norm": 0.3552047610282898, "learning_rate": 1.994393239512978e-05, "loss": 0.6039, "step": 2554 }, { "epoch": 0.07015376166941241, "grad_norm": 0.37143319845199585, "learning_rate": 1.9943886715123252e-05, "loss": 0.6143, "step": 2555 }, { "epoch": 0.07018121911037892, "grad_norm": 0.3834437429904938, "learning_rate": 1.99438410165682e-05, "loss": 0.6, "step": 2556 }, { "epoch": 0.07020867655134541, "grad_norm": 0.377986341714859, "learning_rate": 1.9943795299464717e-05, "loss": 0.5734, "step": 2557 }, { "epoch": 0.07023613399231192, "grad_norm": 0.3634434640407562, "learning_rate": 1.9943749563812883e-05, "loss": 0.543, "step": 2558 }, { "epoch": 0.07026359143327841, "grad_norm": 0.36504262685775757, "learning_rate": 1.994370380961278e-05, "loss": 0.5832, "step": 2559 }, { "epoch": 0.07029104887424492, "grad_norm": 0.3603762090206146, "learning_rate": 1.9943658036864495e-05, "loss": 0.5572, "step": 2560 }, { "epoch": 0.07031850631521142, "grad_norm": 0.3836669325828552, "learning_rate": 1.9943612245568115e-05, "loss": 0.5705, "step": 2561 }, { "epoch": 0.07034596375617792, "grad_norm": 0.32436054944992065, "learning_rate": 1.9943566435723723e-05, "loss": 0.5561, "step": 2562 }, { "epoch": 0.07037342119714443, "grad_norm": 0.3204456865787506, "learning_rate": 1.994352060733141e-05, "loss": 0.4835, "step": 2563 }, { "epoch": 0.07040087863811093, "grad_norm": 0.36620965600013733, "learning_rate": 1.9943474760391253e-05, "loss": 0.5591, "step": 2564 }, { "epoch": 0.07042833607907743, "grad_norm": 0.33208566904067993, "learning_rate": 1.9943428894903346e-05, "loss": 0.5497, "step": 2565 }, { "epoch": 0.07045579352004393, "grad_norm": 0.35680022835731506, "learning_rate": 1.9943383010867768e-05, "loss": 0.5936, "step": 2566 }, { "epoch": 0.07048325096101044, "grad_norm": 0.36450469493865967, "learning_rate": 1.9943337108284608e-05, "loss": 0.5568, "step": 2567 }, { "epoch": 0.07051070840197693, "grad_norm": 0.32450729608535767, "learning_rate": 1.994329118715395e-05, "loss": 0.4821, "step": 2568 }, { "epoch": 0.07053816584294344, "grad_norm": 0.39028382301330566, "learning_rate": 1.994324524747588e-05, "loss": 0.5462, "step": 2569 }, { "epoch": 0.07056562328390995, "grad_norm": 0.3685503304004669, "learning_rate": 1.9943199289250486e-05, "loss": 0.5855, "step": 2570 }, { "epoch": 0.07059308072487644, "grad_norm": 0.31089234352111816, "learning_rate": 1.9943153312477848e-05, "loss": 0.4961, "step": 2571 }, { "epoch": 0.07062053816584295, "grad_norm": 0.35604190826416016, "learning_rate": 1.9943107317158058e-05, "loss": 0.4681, "step": 2572 }, { "epoch": 0.07064799560680944, "grad_norm": 0.3767017126083374, "learning_rate": 1.99430613032912e-05, "loss": 0.4762, "step": 2573 }, { "epoch": 0.07067545304777595, "grad_norm": 0.3224920332431793, "learning_rate": 1.994301527087736e-05, "loss": 0.5132, "step": 2574 }, { "epoch": 0.07070291048874244, "grad_norm": 0.3483562171459198, "learning_rate": 1.994296921991662e-05, "loss": 0.5656, "step": 2575 }, { "epoch": 0.07073036792970895, "grad_norm": 0.37184998393058777, "learning_rate": 1.994292315040907e-05, "loss": 0.5567, "step": 2576 }, { "epoch": 0.07075782537067546, "grad_norm": 0.39539819955825806, "learning_rate": 1.9942877062354797e-05, "loss": 0.5812, "step": 2577 }, { "epoch": 0.07078528281164195, "grad_norm": 0.41819050908088684, "learning_rate": 1.9942830955753885e-05, "loss": 0.6274, "step": 2578 }, { "epoch": 0.07081274025260846, "grad_norm": 0.3662780225276947, "learning_rate": 1.9942784830606418e-05, "loss": 0.5762, "step": 2579 }, { "epoch": 0.07084019769357495, "grad_norm": 0.39505141973495483, "learning_rate": 1.9942738686912484e-05, "loss": 0.6463, "step": 2580 }, { "epoch": 0.07086765513454146, "grad_norm": 0.3770921528339386, "learning_rate": 1.994269252467217e-05, "loss": 0.5053, "step": 2581 }, { "epoch": 0.07089511257550796, "grad_norm": 0.3661443293094635, "learning_rate": 1.9942646343885558e-05, "loss": 0.5327, "step": 2582 }, { "epoch": 0.07092257001647446, "grad_norm": 0.4816809594631195, "learning_rate": 1.9942600144552743e-05, "loss": 0.5706, "step": 2583 }, { "epoch": 0.07095002745744097, "grad_norm": 0.3881710171699524, "learning_rate": 1.99425539266738e-05, "loss": 0.5405, "step": 2584 }, { "epoch": 0.07097748489840747, "grad_norm": 0.3701106607913971, "learning_rate": 1.9942507690248826e-05, "loss": 0.5234, "step": 2585 }, { "epoch": 0.07100494233937397, "grad_norm": 0.5333350896835327, "learning_rate": 1.9942461435277896e-05, "loss": 0.5625, "step": 2586 }, { "epoch": 0.07103239978034047, "grad_norm": 0.47856807708740234, "learning_rate": 1.9942415161761106e-05, "loss": 0.4935, "step": 2587 }, { "epoch": 0.07105985722130698, "grad_norm": 0.34923648834228516, "learning_rate": 1.9942368869698536e-05, "loss": 0.48, "step": 2588 }, { "epoch": 0.07108731466227347, "grad_norm": 0.37891772389411926, "learning_rate": 1.9942322559090277e-05, "loss": 0.5526, "step": 2589 }, { "epoch": 0.07111477210323998, "grad_norm": 0.3615868091583252, "learning_rate": 1.9942276229936412e-05, "loss": 0.5871, "step": 2590 }, { "epoch": 0.07114222954420649, "grad_norm": 0.36254000663757324, "learning_rate": 1.994222988223703e-05, "loss": 0.6181, "step": 2591 }, { "epoch": 0.07116968698517298, "grad_norm": 0.4926735758781433, "learning_rate": 1.9942183515992214e-05, "loss": 0.6063, "step": 2592 }, { "epoch": 0.07119714442613949, "grad_norm": 0.372385710477829, "learning_rate": 1.9942137131202054e-05, "loss": 0.5909, "step": 2593 }, { "epoch": 0.07122460186710598, "grad_norm": 0.41848719120025635, "learning_rate": 1.9942090727866636e-05, "loss": 0.5422, "step": 2594 }, { "epoch": 0.07125205930807249, "grad_norm": 0.31702950596809387, "learning_rate": 1.9942044305986045e-05, "loss": 0.4573, "step": 2595 }, { "epoch": 0.07127951674903898, "grad_norm": 0.39123478531837463, "learning_rate": 1.9941997865560365e-05, "loss": 0.5776, "step": 2596 }, { "epoch": 0.07130697419000549, "grad_norm": 0.39267152547836304, "learning_rate": 1.994195140658969e-05, "loss": 0.5643, "step": 2597 }, { "epoch": 0.07133443163097199, "grad_norm": 0.35269755125045776, "learning_rate": 1.99419049290741e-05, "loss": 0.5256, "step": 2598 }, { "epoch": 0.0713618890719385, "grad_norm": 0.4239741861820221, "learning_rate": 1.9941858433013686e-05, "loss": 0.6336, "step": 2599 }, { "epoch": 0.071389346512905, "grad_norm": 0.37426701188087463, "learning_rate": 1.994181191840853e-05, "loss": 0.4909, "step": 2600 }, { "epoch": 0.0714168039538715, "grad_norm": 0.413070946931839, "learning_rate": 1.9941765385258723e-05, "loss": 0.6359, "step": 2601 }, { "epoch": 0.071444261394838, "grad_norm": 0.4034333825111389, "learning_rate": 1.9941718833564353e-05, "loss": 0.6032, "step": 2602 }, { "epoch": 0.0714717188358045, "grad_norm": 0.3669786751270294, "learning_rate": 1.9941672263325504e-05, "loss": 0.5612, "step": 2603 }, { "epoch": 0.071499176276771, "grad_norm": 0.4065932035446167, "learning_rate": 1.994162567454226e-05, "loss": 0.5675, "step": 2604 }, { "epoch": 0.0715266337177375, "grad_norm": 0.357594758272171, "learning_rate": 1.9941579067214712e-05, "loss": 0.5461, "step": 2605 }, { "epoch": 0.07155409115870401, "grad_norm": 0.4127942621707916, "learning_rate": 1.994153244134295e-05, "loss": 0.6106, "step": 2606 }, { "epoch": 0.07158154859967052, "grad_norm": 0.3475908935070038, "learning_rate": 1.9941485796927053e-05, "loss": 0.5656, "step": 2607 }, { "epoch": 0.07160900604063701, "grad_norm": 0.36307427287101746, "learning_rate": 1.9941439133967116e-05, "loss": 0.4098, "step": 2608 }, { "epoch": 0.07163646348160352, "grad_norm": 0.3814275860786438, "learning_rate": 1.9941392452463218e-05, "loss": 0.4961, "step": 2609 }, { "epoch": 0.07166392092257001, "grad_norm": 0.32227155566215515, "learning_rate": 1.9941345752415452e-05, "loss": 0.5097, "step": 2610 }, { "epoch": 0.07169137836353652, "grad_norm": 0.3656122088432312, "learning_rate": 1.99412990338239e-05, "loss": 0.5816, "step": 2611 }, { "epoch": 0.07171883580450301, "grad_norm": 0.3937719166278839, "learning_rate": 1.9941252296688655e-05, "loss": 0.4721, "step": 2612 }, { "epoch": 0.07174629324546952, "grad_norm": 0.3834493160247803, "learning_rate": 1.99412055410098e-05, "loss": 0.5784, "step": 2613 }, { "epoch": 0.07177375068643603, "grad_norm": 0.3467981517314911, "learning_rate": 1.994115876678743e-05, "loss": 0.5613, "step": 2614 }, { "epoch": 0.07180120812740252, "grad_norm": 0.3624782860279083, "learning_rate": 1.9941111974021622e-05, "loss": 0.5285, "step": 2615 }, { "epoch": 0.07182866556836903, "grad_norm": 0.3708520531654358, "learning_rate": 1.9941065162712467e-05, "loss": 0.6291, "step": 2616 }, { "epoch": 0.07185612300933553, "grad_norm": 0.3851027488708496, "learning_rate": 1.994101833286005e-05, "loss": 0.5669, "step": 2617 }, { "epoch": 0.07188358045030203, "grad_norm": 0.3704240322113037, "learning_rate": 1.9940971484464463e-05, "loss": 0.5297, "step": 2618 }, { "epoch": 0.07191103789126853, "grad_norm": 0.3824291527271271, "learning_rate": 1.9940924617525792e-05, "loss": 0.5336, "step": 2619 }, { "epoch": 0.07193849533223504, "grad_norm": 0.33124932646751404, "learning_rate": 1.9940877732044123e-05, "loss": 0.5206, "step": 2620 }, { "epoch": 0.07196595277320154, "grad_norm": 0.7106751799583435, "learning_rate": 1.9940830828019547e-05, "loss": 0.5201, "step": 2621 }, { "epoch": 0.07199341021416804, "grad_norm": 0.34947019815444946, "learning_rate": 1.9940783905452146e-05, "loss": 0.5384, "step": 2622 }, { "epoch": 0.07202086765513455, "grad_norm": 0.35613083839416504, "learning_rate": 1.9940736964342012e-05, "loss": 0.4831, "step": 2623 }, { "epoch": 0.07204832509610104, "grad_norm": 0.36345064640045166, "learning_rate": 1.9940690004689228e-05, "loss": 0.5687, "step": 2624 }, { "epoch": 0.07207578253706755, "grad_norm": 0.40493467450141907, "learning_rate": 1.9940643026493887e-05, "loss": 0.5381, "step": 2625 }, { "epoch": 0.07210323997803404, "grad_norm": 0.36398592591285706, "learning_rate": 1.9940596029756073e-05, "loss": 0.595, "step": 2626 }, { "epoch": 0.07213069741900055, "grad_norm": 0.38166388869285583, "learning_rate": 1.9940549014475875e-05, "loss": 0.6265, "step": 2627 }, { "epoch": 0.07215815485996704, "grad_norm": 0.4015948176383972, "learning_rate": 1.9940501980653383e-05, "loss": 0.5774, "step": 2628 }, { "epoch": 0.07218561230093355, "grad_norm": 0.4453607201576233, "learning_rate": 1.994045492828868e-05, "loss": 0.6102, "step": 2629 }, { "epoch": 0.07221306974190006, "grad_norm": 0.36877599358558655, "learning_rate": 1.9940407857381852e-05, "loss": 0.5927, "step": 2630 }, { "epoch": 0.07224052718286655, "grad_norm": 0.3988637626171112, "learning_rate": 1.9940360767932996e-05, "loss": 0.5317, "step": 2631 }, { "epoch": 0.07226798462383306, "grad_norm": 0.5136372447013855, "learning_rate": 1.9940313659942192e-05, "loss": 0.5544, "step": 2632 }, { "epoch": 0.07229544206479956, "grad_norm": 0.38091790676116943, "learning_rate": 1.9940266533409532e-05, "loss": 0.6279, "step": 2633 }, { "epoch": 0.07232289950576606, "grad_norm": 0.3934011161327362, "learning_rate": 1.9940219388335104e-05, "loss": 0.5152, "step": 2634 }, { "epoch": 0.07235035694673256, "grad_norm": 0.44202902913093567, "learning_rate": 1.994017222471899e-05, "loss": 0.5914, "step": 2635 }, { "epoch": 0.07237781438769907, "grad_norm": 0.38018250465393066, "learning_rate": 1.9940125042561285e-05, "loss": 0.4886, "step": 2636 }, { "epoch": 0.07240527182866557, "grad_norm": 0.3356010317802429, "learning_rate": 1.9940077841862075e-05, "loss": 0.5247, "step": 2637 }, { "epoch": 0.07243272926963207, "grad_norm": 0.5002802610397339, "learning_rate": 1.9940030622621442e-05, "loss": 0.563, "step": 2638 }, { "epoch": 0.07246018671059858, "grad_norm": 0.36134690046310425, "learning_rate": 1.9939983384839485e-05, "loss": 0.5543, "step": 2639 }, { "epoch": 0.07248764415156507, "grad_norm": 0.3746606111526489, "learning_rate": 1.9939936128516284e-05, "loss": 0.5406, "step": 2640 }, { "epoch": 0.07251510159253158, "grad_norm": 0.36766335368156433, "learning_rate": 1.9939888853651933e-05, "loss": 0.5591, "step": 2641 }, { "epoch": 0.07254255903349807, "grad_norm": 0.3396856188774109, "learning_rate": 1.9939841560246515e-05, "loss": 0.5441, "step": 2642 }, { "epoch": 0.07257001647446458, "grad_norm": 0.3870697319507599, "learning_rate": 1.9939794248300118e-05, "loss": 0.5012, "step": 2643 }, { "epoch": 0.07259747391543109, "grad_norm": 0.44808945059776306, "learning_rate": 1.9939746917812834e-05, "loss": 0.6261, "step": 2644 }, { "epoch": 0.07262493135639758, "grad_norm": 0.4103437662124634, "learning_rate": 1.9939699568784747e-05, "loss": 0.5333, "step": 2645 }, { "epoch": 0.07265238879736409, "grad_norm": 0.36931341886520386, "learning_rate": 1.993965220121595e-05, "loss": 0.553, "step": 2646 }, { "epoch": 0.07267984623833058, "grad_norm": 0.37470391392707825, "learning_rate": 1.9939604815106533e-05, "loss": 0.574, "step": 2647 }, { "epoch": 0.07270730367929709, "grad_norm": 0.3661896288394928, "learning_rate": 1.9939557410456574e-05, "loss": 0.4792, "step": 2648 }, { "epoch": 0.07273476112026359, "grad_norm": 0.35213884711265564, "learning_rate": 1.9939509987266173e-05, "loss": 0.5057, "step": 2649 }, { "epoch": 0.0727622185612301, "grad_norm": 0.3676280081272125, "learning_rate": 1.9939462545535412e-05, "loss": 0.5966, "step": 2650 }, { "epoch": 0.0727896760021966, "grad_norm": 0.3317960798740387, "learning_rate": 1.9939415085264378e-05, "loss": 0.5475, "step": 2651 }, { "epoch": 0.0728171334431631, "grad_norm": 0.3404351770877838, "learning_rate": 1.9939367606453168e-05, "loss": 0.4912, "step": 2652 }, { "epoch": 0.0728445908841296, "grad_norm": 0.3500071167945862, "learning_rate": 1.9939320109101864e-05, "loss": 0.5338, "step": 2653 }, { "epoch": 0.0728720483250961, "grad_norm": 0.3270661532878876, "learning_rate": 1.9939272593210554e-05, "loss": 0.4755, "step": 2654 }, { "epoch": 0.0728995057660626, "grad_norm": 0.42786136269569397, "learning_rate": 1.9939225058779325e-05, "loss": 0.5839, "step": 2655 }, { "epoch": 0.0729269632070291, "grad_norm": 0.34090664982795715, "learning_rate": 1.9939177505808277e-05, "loss": 0.4481, "step": 2656 }, { "epoch": 0.07295442064799561, "grad_norm": 0.35125553607940674, "learning_rate": 1.9939129934297483e-05, "loss": 0.5032, "step": 2657 }, { "epoch": 0.07298187808896212, "grad_norm": 0.37812626361846924, "learning_rate": 1.9939082344247045e-05, "loss": 0.615, "step": 2658 }, { "epoch": 0.07300933552992861, "grad_norm": 0.34745416045188904, "learning_rate": 1.9939034735657042e-05, "loss": 0.6247, "step": 2659 }, { "epoch": 0.07303679297089512, "grad_norm": 0.32580873370170593, "learning_rate": 1.993898710852757e-05, "loss": 0.5678, "step": 2660 }, { "epoch": 0.07306425041186161, "grad_norm": 0.33764344453811646, "learning_rate": 1.9938939462858714e-05, "loss": 0.5297, "step": 2661 }, { "epoch": 0.07309170785282812, "grad_norm": 0.3577463626861572, "learning_rate": 1.9938891798650563e-05, "loss": 0.582, "step": 2662 }, { "epoch": 0.07311916529379461, "grad_norm": 0.3572021722793579, "learning_rate": 1.9938844115903208e-05, "loss": 0.6164, "step": 2663 }, { "epoch": 0.07314662273476112, "grad_norm": 0.35050785541534424, "learning_rate": 1.9938796414616738e-05, "loss": 0.5533, "step": 2664 }, { "epoch": 0.07317408017572762, "grad_norm": 0.37762248516082764, "learning_rate": 1.9938748694791237e-05, "loss": 0.549, "step": 2665 }, { "epoch": 0.07320153761669412, "grad_norm": 0.43513795733451843, "learning_rate": 1.99387009564268e-05, "loss": 0.6419, "step": 2666 }, { "epoch": 0.07322899505766063, "grad_norm": 0.34502169489860535, "learning_rate": 1.993865319952351e-05, "loss": 0.5465, "step": 2667 }, { "epoch": 0.07325645249862713, "grad_norm": 0.41204503178596497, "learning_rate": 1.9938605424081464e-05, "loss": 0.6111, "step": 2668 }, { "epoch": 0.07328390993959363, "grad_norm": 0.3497854173183441, "learning_rate": 1.9938557630100747e-05, "loss": 0.5607, "step": 2669 }, { "epoch": 0.07331136738056013, "grad_norm": 0.36050671339035034, "learning_rate": 1.9938509817581446e-05, "loss": 0.6843, "step": 2670 }, { "epoch": 0.07333882482152664, "grad_norm": 0.3455282747745514, "learning_rate": 1.9938461986523653e-05, "loss": 0.4999, "step": 2671 }, { "epoch": 0.07336628226249313, "grad_norm": 0.35529136657714844, "learning_rate": 1.9938414136927457e-05, "loss": 0.5364, "step": 2672 }, { "epoch": 0.07339373970345964, "grad_norm": 0.3630426824092865, "learning_rate": 1.9938366268792945e-05, "loss": 0.6261, "step": 2673 }, { "epoch": 0.07342119714442615, "grad_norm": 0.41040003299713135, "learning_rate": 1.993831838212021e-05, "loss": 0.6342, "step": 2674 }, { "epoch": 0.07344865458539264, "grad_norm": 0.33441323041915894, "learning_rate": 1.9938270476909338e-05, "loss": 0.5257, "step": 2675 }, { "epoch": 0.07347611202635915, "grad_norm": 0.4260815382003784, "learning_rate": 1.9938222553160418e-05, "loss": 0.5308, "step": 2676 }, { "epoch": 0.07350356946732564, "grad_norm": 0.3370666205883026, "learning_rate": 1.993817461087354e-05, "loss": 0.5218, "step": 2677 }, { "epoch": 0.07353102690829215, "grad_norm": 0.38374847173690796, "learning_rate": 1.99381266500488e-05, "loss": 0.6373, "step": 2678 }, { "epoch": 0.07355848434925864, "grad_norm": 0.402051717042923, "learning_rate": 1.993807867068628e-05, "loss": 0.5807, "step": 2679 }, { "epoch": 0.07358594179022515, "grad_norm": 0.3351864516735077, "learning_rate": 1.993803067278607e-05, "loss": 0.6055, "step": 2680 }, { "epoch": 0.07361339923119166, "grad_norm": 0.382453590631485, "learning_rate": 1.9937982656348262e-05, "loss": 0.5891, "step": 2681 }, { "epoch": 0.07364085667215815, "grad_norm": 0.3522912859916687, "learning_rate": 1.9937934621372942e-05, "loss": 0.5662, "step": 2682 }, { "epoch": 0.07366831411312466, "grad_norm": 0.36873912811279297, "learning_rate": 1.9937886567860205e-05, "loss": 0.5698, "step": 2683 }, { "epoch": 0.07369577155409116, "grad_norm": 0.37353515625, "learning_rate": 1.9937838495810137e-05, "loss": 0.5452, "step": 2684 }, { "epoch": 0.07372322899505766, "grad_norm": 0.3329957127571106, "learning_rate": 1.993779040522283e-05, "loss": 0.5815, "step": 2685 }, { "epoch": 0.07375068643602416, "grad_norm": 0.37043604254722595, "learning_rate": 1.9937742296098367e-05, "loss": 0.6056, "step": 2686 }, { "epoch": 0.07377814387699067, "grad_norm": 0.4267311096191406, "learning_rate": 1.9937694168436846e-05, "loss": 0.5962, "step": 2687 }, { "epoch": 0.07380560131795717, "grad_norm": 0.35558298230171204, "learning_rate": 1.9937646022238355e-05, "loss": 0.6044, "step": 2688 }, { "epoch": 0.07383305875892367, "grad_norm": 0.34687885642051697, "learning_rate": 1.993759785750298e-05, "loss": 0.5431, "step": 2689 }, { "epoch": 0.07386051619989017, "grad_norm": 0.4082138240337372, "learning_rate": 1.9937549674230817e-05, "loss": 0.6081, "step": 2690 }, { "epoch": 0.07388797364085667, "grad_norm": 0.35377195477485657, "learning_rate": 1.993750147242195e-05, "loss": 0.4899, "step": 2691 }, { "epoch": 0.07391543108182318, "grad_norm": 0.40484917163848877, "learning_rate": 1.9937453252076468e-05, "loss": 0.6295, "step": 2692 }, { "epoch": 0.07394288852278967, "grad_norm": 0.39375248551368713, "learning_rate": 1.993740501319447e-05, "loss": 0.5626, "step": 2693 }, { "epoch": 0.07397034596375618, "grad_norm": 0.37707382440567017, "learning_rate": 1.9937356755776033e-05, "loss": 0.6261, "step": 2694 }, { "epoch": 0.07399780340472267, "grad_norm": 0.36441782116889954, "learning_rate": 1.993730847982126e-05, "loss": 0.6041, "step": 2695 }, { "epoch": 0.07402526084568918, "grad_norm": 0.3367854356765747, "learning_rate": 1.9937260185330233e-05, "loss": 0.5212, "step": 2696 }, { "epoch": 0.07405271828665569, "grad_norm": 0.34719085693359375, "learning_rate": 1.9937211872303043e-05, "loss": 0.5571, "step": 2697 }, { "epoch": 0.07408017572762218, "grad_norm": 0.3888559639453888, "learning_rate": 1.9937163540739784e-05, "loss": 0.5951, "step": 2698 }, { "epoch": 0.07410763316858869, "grad_norm": 0.37381651997566223, "learning_rate": 1.993711519064054e-05, "loss": 0.4864, "step": 2699 }, { "epoch": 0.07413509060955518, "grad_norm": 0.3443273603916168, "learning_rate": 1.9937066822005407e-05, "loss": 0.6229, "step": 2700 }, { "epoch": 0.07416254805052169, "grad_norm": 0.3469371199607849, "learning_rate": 1.9937018434834472e-05, "loss": 0.5302, "step": 2701 }, { "epoch": 0.07419000549148819, "grad_norm": 0.36280110478401184, "learning_rate": 1.9936970029127827e-05, "loss": 0.5917, "step": 2702 }, { "epoch": 0.0742174629324547, "grad_norm": 0.3265971839427948, "learning_rate": 1.993692160488556e-05, "loss": 0.503, "step": 2703 }, { "epoch": 0.0742449203734212, "grad_norm": 0.3804887533187866, "learning_rate": 1.9936873162107762e-05, "loss": 0.5456, "step": 2704 }, { "epoch": 0.0742723778143877, "grad_norm": 0.3863884508609772, "learning_rate": 1.9936824700794526e-05, "loss": 0.5528, "step": 2705 }, { "epoch": 0.0742998352553542, "grad_norm": 0.3280385434627533, "learning_rate": 1.9936776220945942e-05, "loss": 0.5653, "step": 2706 }, { "epoch": 0.0743272926963207, "grad_norm": 0.3830016553401947, "learning_rate": 1.9936727722562095e-05, "loss": 0.5479, "step": 2707 }, { "epoch": 0.0743547501372872, "grad_norm": 0.48339787125587463, "learning_rate": 1.993667920564308e-05, "loss": 0.5333, "step": 2708 }, { "epoch": 0.0743822075782537, "grad_norm": 0.3274328112602234, "learning_rate": 1.993663067018899e-05, "loss": 0.5685, "step": 2709 }, { "epoch": 0.07440966501922021, "grad_norm": 0.40899938344955444, "learning_rate": 1.993658211619991e-05, "loss": 0.5827, "step": 2710 }, { "epoch": 0.07443712246018672, "grad_norm": 0.3796854317188263, "learning_rate": 1.9936533543675932e-05, "loss": 0.502, "step": 2711 }, { "epoch": 0.07446457990115321, "grad_norm": 0.3383747637271881, "learning_rate": 1.9936484952617147e-05, "loss": 0.5776, "step": 2712 }, { "epoch": 0.07449203734211972, "grad_norm": 0.3405179977416992, "learning_rate": 1.993643634302365e-05, "loss": 0.593, "step": 2713 }, { "epoch": 0.07451949478308621, "grad_norm": 0.40838173031806946, "learning_rate": 1.9936387714895525e-05, "loss": 0.6096, "step": 2714 }, { "epoch": 0.07454695222405272, "grad_norm": 0.35725435614585876, "learning_rate": 1.993633906823287e-05, "loss": 0.5453, "step": 2715 }, { "epoch": 0.07457440966501921, "grad_norm": 0.3567812740802765, "learning_rate": 1.9936290403035766e-05, "loss": 0.5678, "step": 2716 }, { "epoch": 0.07460186710598572, "grad_norm": 0.4028964340686798, "learning_rate": 1.993624171930431e-05, "loss": 0.6442, "step": 2717 }, { "epoch": 0.07462932454695223, "grad_norm": 0.37923797965049744, "learning_rate": 1.9936193017038594e-05, "loss": 0.5921, "step": 2718 }, { "epoch": 0.07465678198791872, "grad_norm": 0.4018227458000183, "learning_rate": 1.9936144296238705e-05, "loss": 0.4904, "step": 2719 }, { "epoch": 0.07468423942888523, "grad_norm": 0.3664076626300812, "learning_rate": 1.9936095556904735e-05, "loss": 0.5354, "step": 2720 }, { "epoch": 0.07471169686985173, "grad_norm": 0.3344953656196594, "learning_rate": 1.993604679903678e-05, "loss": 0.4649, "step": 2721 }, { "epoch": 0.07473915431081823, "grad_norm": 0.3914625644683838, "learning_rate": 1.993599802263492e-05, "loss": 0.5601, "step": 2722 }, { "epoch": 0.07476661175178473, "grad_norm": 0.35467520356178284, "learning_rate": 1.993594922769926e-05, "loss": 0.5186, "step": 2723 }, { "epoch": 0.07479406919275124, "grad_norm": 1.1804643869400024, "learning_rate": 1.9935900414229875e-05, "loss": 0.6175, "step": 2724 }, { "epoch": 0.07482152663371774, "grad_norm": 0.39759641885757446, "learning_rate": 1.993585158222687e-05, "loss": 0.6662, "step": 2725 }, { "epoch": 0.07484898407468424, "grad_norm": 0.3593357801437378, "learning_rate": 1.993580273169033e-05, "loss": 0.562, "step": 2726 }, { "epoch": 0.07487644151565075, "grad_norm": 0.3799419105052948, "learning_rate": 1.9935753862620347e-05, "loss": 0.4993, "step": 2727 }, { "epoch": 0.07490389895661724, "grad_norm": 0.3500645160675049, "learning_rate": 1.993570497501701e-05, "loss": 0.5517, "step": 2728 }, { "epoch": 0.07493135639758375, "grad_norm": 0.3719981908798218, "learning_rate": 1.9935656068880417e-05, "loss": 0.6272, "step": 2729 }, { "epoch": 0.07495881383855024, "grad_norm": 0.34457510709762573, "learning_rate": 1.9935607144210648e-05, "loss": 0.4795, "step": 2730 }, { "epoch": 0.07498627127951675, "grad_norm": 0.38582348823547363, "learning_rate": 1.9935558201007804e-05, "loss": 0.5951, "step": 2731 }, { "epoch": 0.07501372872048324, "grad_norm": 0.38662785291671753, "learning_rate": 1.993550923927197e-05, "loss": 0.5194, "step": 2732 }, { "epoch": 0.07504118616144975, "grad_norm": 0.413790225982666, "learning_rate": 1.9935460259003244e-05, "loss": 0.7141, "step": 2733 }, { "epoch": 0.07506864360241626, "grad_norm": 0.35950368642807007, "learning_rate": 1.993541126020171e-05, "loss": 0.5413, "step": 2734 }, { "epoch": 0.07509610104338275, "grad_norm": 0.37346598505973816, "learning_rate": 1.9935362242867465e-05, "loss": 0.56, "step": 2735 }, { "epoch": 0.07512355848434926, "grad_norm": 0.66705721616745, "learning_rate": 1.99353132070006e-05, "loss": 0.6236, "step": 2736 }, { "epoch": 0.07515101592531576, "grad_norm": 0.3876762390136719, "learning_rate": 1.9935264152601205e-05, "loss": 0.5416, "step": 2737 }, { "epoch": 0.07517847336628226, "grad_norm": 0.3597573935985565, "learning_rate": 1.9935215079669367e-05, "loss": 0.5831, "step": 2738 }, { "epoch": 0.07520593080724876, "grad_norm": 0.3984428346157074, "learning_rate": 1.9935165988205184e-05, "loss": 0.6571, "step": 2739 }, { "epoch": 0.07523338824821527, "grad_norm": 0.3651711642742157, "learning_rate": 1.993511687820875e-05, "loss": 0.5967, "step": 2740 }, { "epoch": 0.07526084568918177, "grad_norm": 0.35446274280548096, "learning_rate": 1.9935067749680145e-05, "loss": 0.5662, "step": 2741 }, { "epoch": 0.07528830313014827, "grad_norm": 0.3749960660934448, "learning_rate": 1.993501860261947e-05, "loss": 0.4836, "step": 2742 }, { "epoch": 0.07531576057111478, "grad_norm": 0.4806305766105652, "learning_rate": 1.9934969437026815e-05, "loss": 0.7408, "step": 2743 }, { "epoch": 0.07534321801208127, "grad_norm": 0.3815155327320099, "learning_rate": 1.9934920252902272e-05, "loss": 0.5858, "step": 2744 }, { "epoch": 0.07537067545304778, "grad_norm": 0.38132649660110474, "learning_rate": 1.9934871050245932e-05, "loss": 0.5681, "step": 2745 }, { "epoch": 0.07539813289401427, "grad_norm": 0.418954998254776, "learning_rate": 1.9934821829057887e-05, "loss": 0.6034, "step": 2746 }, { "epoch": 0.07542559033498078, "grad_norm": 0.3933415412902832, "learning_rate": 1.9934772589338228e-05, "loss": 0.4925, "step": 2747 }, { "epoch": 0.07545304777594729, "grad_norm": 0.3763349652290344, "learning_rate": 1.9934723331087046e-05, "loss": 0.548, "step": 2748 }, { "epoch": 0.07548050521691378, "grad_norm": 0.3689199984073639, "learning_rate": 1.9934674054304434e-05, "loss": 0.5298, "step": 2749 }, { "epoch": 0.07550796265788029, "grad_norm": 0.3422185480594635, "learning_rate": 1.9934624758990486e-05, "loss": 0.4868, "step": 2750 }, { "epoch": 0.07553542009884678, "grad_norm": 0.37855806946754456, "learning_rate": 1.993457544514529e-05, "loss": 0.5408, "step": 2751 }, { "epoch": 0.07556287753981329, "grad_norm": 0.38406747579574585, "learning_rate": 1.993452611276894e-05, "loss": 0.5341, "step": 2752 }, { "epoch": 0.07559033498077979, "grad_norm": 0.43737491965293884, "learning_rate": 1.9934476761861533e-05, "loss": 0.5955, "step": 2753 }, { "epoch": 0.0756177924217463, "grad_norm": 0.3628994822502136, "learning_rate": 1.9934427392423152e-05, "loss": 0.5448, "step": 2754 }, { "epoch": 0.0756452498627128, "grad_norm": 0.3423815369606018, "learning_rate": 1.9934378004453892e-05, "loss": 0.5301, "step": 2755 }, { "epoch": 0.0756727073036793, "grad_norm": 0.406340628862381, "learning_rate": 1.9934328597953846e-05, "loss": 0.6875, "step": 2756 }, { "epoch": 0.0757001647446458, "grad_norm": 0.3692006766796112, "learning_rate": 1.9934279172923114e-05, "loss": 0.5997, "step": 2757 }, { "epoch": 0.0757276221856123, "grad_norm": 0.435729444026947, "learning_rate": 1.993422972936177e-05, "loss": 0.5786, "step": 2758 }, { "epoch": 0.0757550796265788, "grad_norm": 0.34369081258773804, "learning_rate": 1.9934180267269925e-05, "loss": 0.5418, "step": 2759 }, { "epoch": 0.0757825370675453, "grad_norm": 0.41339290142059326, "learning_rate": 1.9934130786647658e-05, "loss": 0.6335, "step": 2760 }, { "epoch": 0.07580999450851181, "grad_norm": 0.36868974566459656, "learning_rate": 1.993408128749507e-05, "loss": 0.6019, "step": 2761 }, { "epoch": 0.0758374519494783, "grad_norm": 0.3593771457672119, "learning_rate": 1.9934031769812247e-05, "loss": 0.5058, "step": 2762 }, { "epoch": 0.07586490939044481, "grad_norm": 0.3793041706085205, "learning_rate": 1.9933982233599286e-05, "loss": 0.5277, "step": 2763 }, { "epoch": 0.07589236683141132, "grad_norm": 0.3581700921058655, "learning_rate": 1.9933932678856277e-05, "loss": 0.5577, "step": 2764 }, { "epoch": 0.07591982427237781, "grad_norm": 0.3905010521411896, "learning_rate": 1.9933883105583313e-05, "loss": 0.5377, "step": 2765 }, { "epoch": 0.07594728171334432, "grad_norm": 0.36551010608673096, "learning_rate": 1.9933833513780488e-05, "loss": 0.5277, "step": 2766 }, { "epoch": 0.07597473915431081, "grad_norm": 0.35551154613494873, "learning_rate": 1.993378390344789e-05, "loss": 0.5782, "step": 2767 }, { "epoch": 0.07600219659527732, "grad_norm": 0.4203924238681793, "learning_rate": 1.9933734274585616e-05, "loss": 0.5811, "step": 2768 }, { "epoch": 0.07602965403624382, "grad_norm": 0.3575420379638672, "learning_rate": 1.993368462719376e-05, "loss": 0.5321, "step": 2769 }, { "epoch": 0.07605711147721032, "grad_norm": 0.5529618859291077, "learning_rate": 1.9933634961272408e-05, "loss": 0.5298, "step": 2770 }, { "epoch": 0.07608456891817683, "grad_norm": 0.4183511734008789, "learning_rate": 1.9933585276821657e-05, "loss": 0.584, "step": 2771 }, { "epoch": 0.07611202635914333, "grad_norm": 0.3422856032848358, "learning_rate": 1.9933535573841597e-05, "loss": 0.5349, "step": 2772 }, { "epoch": 0.07613948380010983, "grad_norm": 0.6585968136787415, "learning_rate": 1.9933485852332328e-05, "loss": 0.5797, "step": 2773 }, { "epoch": 0.07616694124107633, "grad_norm": 0.3618282675743103, "learning_rate": 1.9933436112293932e-05, "loss": 0.5282, "step": 2774 }, { "epoch": 0.07619439868204284, "grad_norm": 0.38805195689201355, "learning_rate": 1.993338635372651e-05, "loss": 0.4877, "step": 2775 }, { "epoch": 0.07622185612300933, "grad_norm": 0.452722430229187, "learning_rate": 1.9933336576630157e-05, "loss": 0.6424, "step": 2776 }, { "epoch": 0.07624931356397584, "grad_norm": 0.3949277698993683, "learning_rate": 1.9933286781004953e-05, "loss": 0.5361, "step": 2777 }, { "epoch": 0.07627677100494235, "grad_norm": 0.4401606023311615, "learning_rate": 1.9933236966851002e-05, "loss": 0.6415, "step": 2778 }, { "epoch": 0.07630422844590884, "grad_norm": 0.35570091009140015, "learning_rate": 1.9933187134168395e-05, "loss": 0.6253, "step": 2779 }, { "epoch": 0.07633168588687535, "grad_norm": 0.3576837480068207, "learning_rate": 1.993313728295722e-05, "loss": 0.5406, "step": 2780 }, { "epoch": 0.07635914332784184, "grad_norm": 0.37266233563423157, "learning_rate": 1.9933087413217575e-05, "loss": 0.6677, "step": 2781 }, { "epoch": 0.07638660076880835, "grad_norm": 0.3768152594566345, "learning_rate": 1.9933037524949555e-05, "loss": 0.6057, "step": 2782 }, { "epoch": 0.07641405820977484, "grad_norm": 0.3335483968257904, "learning_rate": 1.9932987618153245e-05, "loss": 0.4939, "step": 2783 }, { "epoch": 0.07644151565074135, "grad_norm": 0.3640168011188507, "learning_rate": 1.9932937692828744e-05, "loss": 0.5317, "step": 2784 }, { "epoch": 0.07646897309170786, "grad_norm": 0.35526973009109497, "learning_rate": 1.993288774897615e-05, "loss": 0.6087, "step": 2785 }, { "epoch": 0.07649643053267435, "grad_norm": 0.33862540125846863, "learning_rate": 1.9932837786595542e-05, "loss": 0.4516, "step": 2786 }, { "epoch": 0.07652388797364086, "grad_norm": 0.37815752625465393, "learning_rate": 1.9932787805687024e-05, "loss": 0.4676, "step": 2787 }, { "epoch": 0.07655134541460736, "grad_norm": 0.3728600740432739, "learning_rate": 1.9932737806250687e-05, "loss": 0.6107, "step": 2788 }, { "epoch": 0.07657880285557386, "grad_norm": 0.39178186655044556, "learning_rate": 1.9932687788286626e-05, "loss": 0.5741, "step": 2789 }, { "epoch": 0.07660626029654036, "grad_norm": 0.4382000267505646, "learning_rate": 1.993263775179493e-05, "loss": 0.5623, "step": 2790 }, { "epoch": 0.07663371773750687, "grad_norm": 0.3897673487663269, "learning_rate": 1.9932587696775693e-05, "loss": 0.5485, "step": 2791 }, { "epoch": 0.07666117517847337, "grad_norm": 0.38330283761024475, "learning_rate": 1.993253762322901e-05, "loss": 0.5688, "step": 2792 }, { "epoch": 0.07668863261943987, "grad_norm": 0.423299640417099, "learning_rate": 1.9932487531154975e-05, "loss": 0.5775, "step": 2793 }, { "epoch": 0.07671609006040638, "grad_norm": 0.3697313368320465, "learning_rate": 1.9932437420553685e-05, "loss": 0.5091, "step": 2794 }, { "epoch": 0.07674354750137287, "grad_norm": 0.40409529209136963, "learning_rate": 1.9932387291425223e-05, "loss": 0.5543, "step": 2795 }, { "epoch": 0.07677100494233938, "grad_norm": 0.38750290870666504, "learning_rate": 1.9932337143769694e-05, "loss": 0.5899, "step": 2796 }, { "epoch": 0.07679846238330587, "grad_norm": 0.3626382648944855, "learning_rate": 1.9932286977587183e-05, "loss": 0.5859, "step": 2797 }, { "epoch": 0.07682591982427238, "grad_norm": 0.3714325726032257, "learning_rate": 1.9932236792877784e-05, "loss": 0.5327, "step": 2798 }, { "epoch": 0.07685337726523887, "grad_norm": 0.5344110131263733, "learning_rate": 1.9932186589641597e-05, "loss": 0.5314, "step": 2799 }, { "epoch": 0.07688083470620538, "grad_norm": 0.34094947576522827, "learning_rate": 1.9932136367878715e-05, "loss": 0.4438, "step": 2800 }, { "epoch": 0.07690829214717189, "grad_norm": 0.42264747619628906, "learning_rate": 1.993208612758922e-05, "loss": 0.5256, "step": 2801 }, { "epoch": 0.07693574958813838, "grad_norm": 0.35935506224632263, "learning_rate": 1.9932035868773224e-05, "loss": 0.5352, "step": 2802 }, { "epoch": 0.07696320702910489, "grad_norm": 0.3253059983253479, "learning_rate": 1.9931985591430805e-05, "loss": 0.5526, "step": 2803 }, { "epoch": 0.07699066447007138, "grad_norm": 0.3399925231933594, "learning_rate": 1.9931935295562064e-05, "loss": 0.4883, "step": 2804 }, { "epoch": 0.07701812191103789, "grad_norm": 0.3638738989830017, "learning_rate": 1.9931884981167094e-05, "loss": 0.5109, "step": 2805 }, { "epoch": 0.07704557935200439, "grad_norm": 0.4416954219341278, "learning_rate": 1.993183464824599e-05, "loss": 0.5924, "step": 2806 }, { "epoch": 0.0770730367929709, "grad_norm": 0.38718482851982117, "learning_rate": 1.9931784296798845e-05, "loss": 0.5762, "step": 2807 }, { "epoch": 0.0771004942339374, "grad_norm": 0.33059263229370117, "learning_rate": 1.993173392682575e-05, "loss": 0.5841, "step": 2808 }, { "epoch": 0.0771279516749039, "grad_norm": 0.32983866333961487, "learning_rate": 1.99316835383268e-05, "loss": 0.4634, "step": 2809 }, { "epoch": 0.0771554091158704, "grad_norm": 0.48451074957847595, "learning_rate": 1.9931633131302095e-05, "loss": 0.5628, "step": 2810 }, { "epoch": 0.0771828665568369, "grad_norm": 0.3593306243419647, "learning_rate": 1.9931582705751722e-05, "loss": 0.5369, "step": 2811 }, { "epoch": 0.0772103239978034, "grad_norm": 0.3475426137447357, "learning_rate": 1.9931532261675777e-05, "loss": 0.5185, "step": 2812 }, { "epoch": 0.0772377814387699, "grad_norm": 0.3185271620750427, "learning_rate": 1.9931481799074354e-05, "loss": 0.5471, "step": 2813 }, { "epoch": 0.07726523887973641, "grad_norm": 0.3926304876804352, "learning_rate": 1.993143131794755e-05, "loss": 0.5918, "step": 2814 }, { "epoch": 0.07729269632070292, "grad_norm": 0.36520200967788696, "learning_rate": 1.9931380818295456e-05, "loss": 0.4961, "step": 2815 }, { "epoch": 0.07732015376166941, "grad_norm": 0.3411597013473511, "learning_rate": 1.9931330300118165e-05, "loss": 0.5761, "step": 2816 }, { "epoch": 0.07734761120263592, "grad_norm": 0.4121137261390686, "learning_rate": 1.993127976341578e-05, "loss": 0.4895, "step": 2817 }, { "epoch": 0.07737506864360241, "grad_norm": 0.5671923756599426, "learning_rate": 1.9931229208188382e-05, "loss": 0.5424, "step": 2818 }, { "epoch": 0.07740252608456892, "grad_norm": 0.3693098723888397, "learning_rate": 1.9931178634436073e-05, "loss": 0.5845, "step": 2819 }, { "epoch": 0.07742998352553541, "grad_norm": 0.37176698446273804, "learning_rate": 1.9931128042158944e-05, "loss": 0.5635, "step": 2820 }, { "epoch": 0.07745744096650192, "grad_norm": 0.4026034474372864, "learning_rate": 1.9931077431357095e-05, "loss": 0.6562, "step": 2821 }, { "epoch": 0.07748489840746843, "grad_norm": 0.48332399129867554, "learning_rate": 1.9931026802030616e-05, "loss": 0.5605, "step": 2822 }, { "epoch": 0.07751235584843492, "grad_norm": 0.32080546021461487, "learning_rate": 1.9930976154179604e-05, "loss": 0.4994, "step": 2823 }, { "epoch": 0.07753981328940143, "grad_norm": 0.37902727723121643, "learning_rate": 1.9930925487804148e-05, "loss": 0.599, "step": 2824 }, { "epoch": 0.07756727073036793, "grad_norm": 0.3590219020843506, "learning_rate": 1.993087480290435e-05, "loss": 0.4767, "step": 2825 }, { "epoch": 0.07759472817133443, "grad_norm": 0.37361615896224976, "learning_rate": 1.99308240994803e-05, "loss": 0.5788, "step": 2826 }, { "epoch": 0.07762218561230093, "grad_norm": 0.34117591381073, "learning_rate": 1.9930773377532094e-05, "loss": 0.5361, "step": 2827 }, { "epoch": 0.07764964305326744, "grad_norm": 0.37882521748542786, "learning_rate": 1.9930722637059825e-05, "loss": 0.6524, "step": 2828 }, { "epoch": 0.07767710049423393, "grad_norm": 0.4004732072353363, "learning_rate": 1.993067187806359e-05, "loss": 0.5514, "step": 2829 }, { "epoch": 0.07770455793520044, "grad_norm": 0.35850226879119873, "learning_rate": 1.993062110054348e-05, "loss": 0.6091, "step": 2830 }, { "epoch": 0.07773201537616695, "grad_norm": 0.31252509355545044, "learning_rate": 1.9930570304499596e-05, "loss": 0.4506, "step": 2831 }, { "epoch": 0.07775947281713344, "grad_norm": 0.31133925914764404, "learning_rate": 1.9930519489932022e-05, "loss": 0.5297, "step": 2832 }, { "epoch": 0.07778693025809995, "grad_norm": 0.4107705056667328, "learning_rate": 1.9930468656840868e-05, "loss": 0.6131, "step": 2833 }, { "epoch": 0.07781438769906644, "grad_norm": 0.35909274220466614, "learning_rate": 1.9930417805226218e-05, "loss": 0.5188, "step": 2834 }, { "epoch": 0.07784184514003295, "grad_norm": 0.3245830535888672, "learning_rate": 1.9930366935088167e-05, "loss": 0.5357, "step": 2835 }, { "epoch": 0.07786930258099944, "grad_norm": 0.36577746272087097, "learning_rate": 1.9930316046426813e-05, "loss": 0.7189, "step": 2836 }, { "epoch": 0.07789676002196595, "grad_norm": 0.41771578788757324, "learning_rate": 1.993026513924225e-05, "loss": 0.7226, "step": 2837 }, { "epoch": 0.07792421746293246, "grad_norm": 0.5177350640296936, "learning_rate": 1.9930214213534573e-05, "loss": 0.486, "step": 2838 }, { "epoch": 0.07795167490389895, "grad_norm": 0.39596351981163025, "learning_rate": 1.9930163269303876e-05, "loss": 0.73, "step": 2839 }, { "epoch": 0.07797913234486546, "grad_norm": 0.37434571981430054, "learning_rate": 1.993011230655026e-05, "loss": 0.5959, "step": 2840 }, { "epoch": 0.07800658978583196, "grad_norm": 0.34406188130378723, "learning_rate": 1.9930061325273812e-05, "loss": 0.5164, "step": 2841 }, { "epoch": 0.07803404722679846, "grad_norm": 0.3807098865509033, "learning_rate": 1.993001032547463e-05, "loss": 0.6478, "step": 2842 }, { "epoch": 0.07806150466776496, "grad_norm": 0.3742447793483734, "learning_rate": 1.992995930715281e-05, "loss": 0.5555, "step": 2843 }, { "epoch": 0.07808896210873147, "grad_norm": 0.3860563635826111, "learning_rate": 1.9929908270308446e-05, "loss": 0.6261, "step": 2844 }, { "epoch": 0.07811641954969797, "grad_norm": 0.4248245656490326, "learning_rate": 1.9929857214941637e-05, "loss": 0.5783, "step": 2845 }, { "epoch": 0.07814387699066447, "grad_norm": 0.43635210394859314, "learning_rate": 1.992980614105247e-05, "loss": 0.6263, "step": 2846 }, { "epoch": 0.07817133443163098, "grad_norm": 0.3440542221069336, "learning_rate": 1.9929755048641047e-05, "loss": 0.5511, "step": 2847 }, { "epoch": 0.07819879187259747, "grad_norm": 0.4962725341320038, "learning_rate": 1.9929703937707467e-05, "loss": 0.6372, "step": 2848 }, { "epoch": 0.07822624931356398, "grad_norm": 0.37182122468948364, "learning_rate": 1.9929652808251813e-05, "loss": 0.5708, "step": 2849 }, { "epoch": 0.07825370675453047, "grad_norm": 0.3625844717025757, "learning_rate": 1.9929601660274192e-05, "loss": 0.5736, "step": 2850 }, { "epoch": 0.07828116419549698, "grad_norm": 0.3484930396080017, "learning_rate": 1.9929550493774692e-05, "loss": 0.5258, "step": 2851 }, { "epoch": 0.07830862163646349, "grad_norm": 0.4039776623249054, "learning_rate": 1.992949930875341e-05, "loss": 0.5334, "step": 2852 }, { "epoch": 0.07833607907742998, "grad_norm": 0.45663824677467346, "learning_rate": 1.9929448105210444e-05, "loss": 0.553, "step": 2853 }, { "epoch": 0.07836353651839649, "grad_norm": 0.33520007133483887, "learning_rate": 1.9929396883145887e-05, "loss": 0.5285, "step": 2854 }, { "epoch": 0.07839099395936298, "grad_norm": 0.43612414598464966, "learning_rate": 1.992934564255984e-05, "loss": 0.6272, "step": 2855 }, { "epoch": 0.07841845140032949, "grad_norm": 0.35779187083244324, "learning_rate": 1.992929438345239e-05, "loss": 0.5591, "step": 2856 }, { "epoch": 0.07844590884129599, "grad_norm": 0.3973114788532257, "learning_rate": 1.9929243105823638e-05, "loss": 0.5776, "step": 2857 }, { "epoch": 0.0784733662822625, "grad_norm": 0.47988036274909973, "learning_rate": 1.992919180967368e-05, "loss": 0.6081, "step": 2858 }, { "epoch": 0.078500823723229, "grad_norm": 0.3700225055217743, "learning_rate": 1.992914049500261e-05, "loss": 0.575, "step": 2859 }, { "epoch": 0.0785282811641955, "grad_norm": 0.5254287123680115, "learning_rate": 1.9929089161810525e-05, "loss": 0.6146, "step": 2860 }, { "epoch": 0.078555738605162, "grad_norm": 0.3430519700050354, "learning_rate": 1.9929037810097516e-05, "loss": 0.4668, "step": 2861 }, { "epoch": 0.0785831960461285, "grad_norm": 0.44405412673950195, "learning_rate": 1.9928986439863684e-05, "loss": 0.5619, "step": 2862 }, { "epoch": 0.078610653487095, "grad_norm": 0.34817326068878174, "learning_rate": 1.9928935051109125e-05, "loss": 0.568, "step": 2863 }, { "epoch": 0.0786381109280615, "grad_norm": 0.3485410213470459, "learning_rate": 1.9928883643833933e-05, "loss": 0.5256, "step": 2864 }, { "epoch": 0.07866556836902801, "grad_norm": 0.33600303530693054, "learning_rate": 1.9928832218038202e-05, "loss": 0.4936, "step": 2865 }, { "epoch": 0.0786930258099945, "grad_norm": 2.556021213531494, "learning_rate": 1.9928780773722032e-05, "loss": 0.5674, "step": 2866 }, { "epoch": 0.07872048325096101, "grad_norm": 0.4196873605251312, "learning_rate": 1.9928729310885514e-05, "loss": 0.5025, "step": 2867 }, { "epoch": 0.07874794069192752, "grad_norm": 0.3804168105125427, "learning_rate": 1.9928677829528752e-05, "loss": 0.6343, "step": 2868 }, { "epoch": 0.07877539813289401, "grad_norm": 0.37788933515548706, "learning_rate": 1.9928626329651832e-05, "loss": 0.5502, "step": 2869 }, { "epoch": 0.07880285557386052, "grad_norm": 0.3128436207771301, "learning_rate": 1.9928574811254858e-05, "loss": 0.431, "step": 2870 }, { "epoch": 0.07883031301482701, "grad_norm": 0.378238320350647, "learning_rate": 1.9928523274337922e-05, "loss": 0.5945, "step": 2871 }, { "epoch": 0.07885777045579352, "grad_norm": 0.3867018222808838, "learning_rate": 1.992847171890112e-05, "loss": 0.6275, "step": 2872 }, { "epoch": 0.07888522789676002, "grad_norm": 0.3865893483161926, "learning_rate": 1.992842014494455e-05, "loss": 0.5225, "step": 2873 }, { "epoch": 0.07891268533772652, "grad_norm": 0.34317150712013245, "learning_rate": 1.992836855246831e-05, "loss": 0.5988, "step": 2874 }, { "epoch": 0.07894014277869303, "grad_norm": 0.3600977957248688, "learning_rate": 1.9928316941472493e-05, "loss": 0.5122, "step": 2875 }, { "epoch": 0.07896760021965953, "grad_norm": 0.36574992537498474, "learning_rate": 1.9928265311957194e-05, "loss": 0.6097, "step": 2876 }, { "epoch": 0.07899505766062603, "grad_norm": 0.3493749499320984, "learning_rate": 1.9928213663922512e-05, "loss": 0.4906, "step": 2877 }, { "epoch": 0.07902251510159253, "grad_norm": 0.32657596468925476, "learning_rate": 1.9928161997368546e-05, "loss": 0.5312, "step": 2878 }, { "epoch": 0.07904997254255904, "grad_norm": 0.332365483045578, "learning_rate": 1.9928110312295387e-05, "loss": 0.5685, "step": 2879 }, { "epoch": 0.07907742998352553, "grad_norm": 0.3845902383327484, "learning_rate": 1.9928058608703133e-05, "loss": 0.4743, "step": 2880 }, { "epoch": 0.07910488742449204, "grad_norm": 0.3781723380088806, "learning_rate": 1.992800688659188e-05, "loss": 0.531, "step": 2881 }, { "epoch": 0.07913234486545855, "grad_norm": 0.3839815855026245, "learning_rate": 1.9927955145961727e-05, "loss": 0.5319, "step": 2882 }, { "epoch": 0.07915980230642504, "grad_norm": 0.3775777816772461, "learning_rate": 1.9927903386812772e-05, "loss": 0.4933, "step": 2883 }, { "epoch": 0.07918725974739155, "grad_norm": 0.3619346618652344, "learning_rate": 1.9927851609145105e-05, "loss": 0.5258, "step": 2884 }, { "epoch": 0.07921471718835804, "grad_norm": 0.3510991036891937, "learning_rate": 1.9927799812958824e-05, "loss": 0.6414, "step": 2885 }, { "epoch": 0.07924217462932455, "grad_norm": 0.3552727699279785, "learning_rate": 1.992774799825403e-05, "loss": 0.5913, "step": 2886 }, { "epoch": 0.07926963207029104, "grad_norm": 0.3576870858669281, "learning_rate": 1.9927696165030822e-05, "loss": 0.5615, "step": 2887 }, { "epoch": 0.07929708951125755, "grad_norm": 0.343036413192749, "learning_rate": 1.9927644313289283e-05, "loss": 0.5293, "step": 2888 }, { "epoch": 0.07932454695222406, "grad_norm": 0.38303953409194946, "learning_rate": 1.9927592443029528e-05, "loss": 0.6379, "step": 2889 }, { "epoch": 0.07935200439319055, "grad_norm": 0.3803226351737976, "learning_rate": 1.992754055425164e-05, "loss": 0.6456, "step": 2890 }, { "epoch": 0.07937946183415706, "grad_norm": 0.4017510414123535, "learning_rate": 1.992748864695572e-05, "loss": 0.6087, "step": 2891 }, { "epoch": 0.07940691927512356, "grad_norm": 0.3569180965423584, "learning_rate": 1.9927436721141866e-05, "loss": 0.5977, "step": 2892 }, { "epoch": 0.07943437671609006, "grad_norm": 0.4353973865509033, "learning_rate": 1.9927384776810173e-05, "loss": 0.607, "step": 2893 }, { "epoch": 0.07946183415705656, "grad_norm": 0.45006659626960754, "learning_rate": 1.992733281396074e-05, "loss": 0.534, "step": 2894 }, { "epoch": 0.07948929159802307, "grad_norm": 0.4490526616573334, "learning_rate": 1.992728083259366e-05, "loss": 0.4647, "step": 2895 }, { "epoch": 0.07951674903898956, "grad_norm": 0.3992559313774109, "learning_rate": 1.992722883270904e-05, "loss": 0.5246, "step": 2896 }, { "epoch": 0.07954420647995607, "grad_norm": 0.3607464134693146, "learning_rate": 1.9927176814306964e-05, "loss": 0.4822, "step": 2897 }, { "epoch": 0.07957166392092258, "grad_norm": 0.3901677429676056, "learning_rate": 1.9927124777387537e-05, "loss": 0.5891, "step": 2898 }, { "epoch": 0.07959912136188907, "grad_norm": 0.38980552554130554, "learning_rate": 1.9927072721950854e-05, "loss": 0.5208, "step": 2899 }, { "epoch": 0.07962657880285558, "grad_norm": 0.3503669500350952, "learning_rate": 1.9927020647997012e-05, "loss": 0.5909, "step": 2900 }, { "epoch": 0.07965403624382207, "grad_norm": 0.37473902106285095, "learning_rate": 1.9926968555526108e-05, "loss": 0.558, "step": 2901 }, { "epoch": 0.07968149368478858, "grad_norm": 0.3491728603839874, "learning_rate": 1.9926916444538237e-05, "loss": 0.5312, "step": 2902 }, { "epoch": 0.07970895112575507, "grad_norm": 0.36730116605758667, "learning_rate": 1.99268643150335e-05, "loss": 0.5133, "step": 2903 }, { "epoch": 0.07973640856672158, "grad_norm": 0.3758183419704437, "learning_rate": 1.9926812167011997e-05, "loss": 0.5637, "step": 2904 }, { "epoch": 0.07976386600768809, "grad_norm": 0.3552912771701813, "learning_rate": 1.9926760000473814e-05, "loss": 0.5446, "step": 2905 }, { "epoch": 0.07979132344865458, "grad_norm": 0.371304452419281, "learning_rate": 1.992670781541906e-05, "loss": 0.5928, "step": 2906 }, { "epoch": 0.07981878088962109, "grad_norm": 0.481494665145874, "learning_rate": 1.9926655611847826e-05, "loss": 0.5548, "step": 2907 }, { "epoch": 0.07984623833058759, "grad_norm": 0.325808048248291, "learning_rate": 1.992660338976021e-05, "loss": 0.4954, "step": 2908 }, { "epoch": 0.07987369577155409, "grad_norm": 0.4524689316749573, "learning_rate": 1.992655114915631e-05, "loss": 0.6007, "step": 2909 }, { "epoch": 0.07990115321252059, "grad_norm": 0.3450329601764679, "learning_rate": 1.9926498890036228e-05, "loss": 0.5695, "step": 2910 }, { "epoch": 0.0799286106534871, "grad_norm": 0.3693739175796509, "learning_rate": 1.9926446612400056e-05, "loss": 0.5419, "step": 2911 }, { "epoch": 0.0799560680944536, "grad_norm": 0.4213643968105316, "learning_rate": 1.992639431624789e-05, "loss": 0.5381, "step": 2912 }, { "epoch": 0.0799835255354201, "grad_norm": 0.48941484093666077, "learning_rate": 1.9926342001579833e-05, "loss": 0.662, "step": 2913 }, { "epoch": 0.0800109829763866, "grad_norm": 0.30614471435546875, "learning_rate": 1.9926289668395978e-05, "loss": 0.472, "step": 2914 }, { "epoch": 0.0800384404173531, "grad_norm": 0.38299858570098877, "learning_rate": 1.9926237316696427e-05, "loss": 0.5578, "step": 2915 }, { "epoch": 0.0800658978583196, "grad_norm": 0.3362836539745331, "learning_rate": 1.9926184946481275e-05, "loss": 0.5276, "step": 2916 }, { "epoch": 0.0800933552992861, "grad_norm": 0.38571280241012573, "learning_rate": 1.992613255775062e-05, "loss": 0.6457, "step": 2917 }, { "epoch": 0.08012081274025261, "grad_norm": 0.3981505334377289, "learning_rate": 1.992608015050456e-05, "loss": 0.5975, "step": 2918 }, { "epoch": 0.08014827018121912, "grad_norm": 0.3718164563179016, "learning_rate": 1.992602772474319e-05, "loss": 0.6075, "step": 2919 }, { "epoch": 0.08017572762218561, "grad_norm": 0.3259344696998596, "learning_rate": 1.9925975280466612e-05, "loss": 0.5222, "step": 2920 }, { "epoch": 0.08020318506315212, "grad_norm": 0.3399087190628052, "learning_rate": 1.9925922817674923e-05, "loss": 0.5845, "step": 2921 }, { "epoch": 0.08023064250411861, "grad_norm": 0.3290942311286926, "learning_rate": 1.9925870336368218e-05, "loss": 0.5314, "step": 2922 }, { "epoch": 0.08025809994508512, "grad_norm": 0.3782019317150116, "learning_rate": 1.9925817836546596e-05, "loss": 0.6494, "step": 2923 }, { "epoch": 0.08028555738605161, "grad_norm": 0.3859346807003021, "learning_rate": 1.992576531821016e-05, "loss": 0.3941, "step": 2924 }, { "epoch": 0.08031301482701812, "grad_norm": 0.41803237795829773, "learning_rate": 1.9925712781359e-05, "loss": 0.5989, "step": 2925 }, { "epoch": 0.08034047226798463, "grad_norm": 0.3629205524921417, "learning_rate": 1.992566022599322e-05, "loss": 0.5005, "step": 2926 }, { "epoch": 0.08036792970895112, "grad_norm": 0.38809043169021606, "learning_rate": 1.9925607652112916e-05, "loss": 0.5833, "step": 2927 }, { "epoch": 0.08039538714991763, "grad_norm": 0.35910719633102417, "learning_rate": 1.9925555059718182e-05, "loss": 0.511, "step": 2928 }, { "epoch": 0.08042284459088413, "grad_norm": 0.4892318844795227, "learning_rate": 1.992550244880912e-05, "loss": 0.5588, "step": 2929 }, { "epoch": 0.08045030203185063, "grad_norm": 0.3623504340648651, "learning_rate": 1.9925449819385834e-05, "loss": 0.5922, "step": 2930 }, { "epoch": 0.08047775947281713, "grad_norm": 0.4201202690601349, "learning_rate": 1.9925397171448412e-05, "loss": 0.6213, "step": 2931 }, { "epoch": 0.08050521691378364, "grad_norm": 0.3393542468547821, "learning_rate": 1.992534450499696e-05, "loss": 0.4986, "step": 2932 }, { "epoch": 0.08053267435475013, "grad_norm": 0.38193461298942566, "learning_rate": 1.992529182003157e-05, "loss": 0.4899, "step": 2933 }, { "epoch": 0.08056013179571664, "grad_norm": 0.43295520544052124, "learning_rate": 1.992523911655234e-05, "loss": 0.5158, "step": 2934 }, { "epoch": 0.08058758923668315, "grad_norm": 0.41032472252845764, "learning_rate": 1.9925186394559377e-05, "loss": 0.5848, "step": 2935 }, { "epoch": 0.08061504667764964, "grad_norm": 0.4271424114704132, "learning_rate": 1.992513365405277e-05, "loss": 0.537, "step": 2936 }, { "epoch": 0.08064250411861615, "grad_norm": 0.33038344979286194, "learning_rate": 1.9925080895032622e-05, "loss": 0.5167, "step": 2937 }, { "epoch": 0.08066996155958264, "grad_norm": 0.461454302072525, "learning_rate": 1.992502811749903e-05, "loss": 0.6101, "step": 2938 }, { "epoch": 0.08069741900054915, "grad_norm": 0.39107322692871094, "learning_rate": 1.9924975321452095e-05, "loss": 0.5574, "step": 2939 }, { "epoch": 0.08072487644151564, "grad_norm": 0.38988909125328064, "learning_rate": 1.9924922506891912e-05, "loss": 0.5904, "step": 2940 }, { "epoch": 0.08075233388248215, "grad_norm": 0.4036739468574524, "learning_rate": 1.992486967381858e-05, "loss": 0.5324, "step": 2941 }, { "epoch": 0.08077979132344866, "grad_norm": 0.3991895914077759, "learning_rate": 1.99248168222322e-05, "loss": 0.6519, "step": 2942 }, { "epoch": 0.08080724876441515, "grad_norm": 0.4499501883983612, "learning_rate": 1.9924763952132865e-05, "loss": 0.5253, "step": 2943 }, { "epoch": 0.08083470620538166, "grad_norm": 0.3219243586063385, "learning_rate": 1.992471106352068e-05, "loss": 0.5789, "step": 2944 }, { "epoch": 0.08086216364634816, "grad_norm": 0.33325114846229553, "learning_rate": 1.9924658156395744e-05, "loss": 0.5099, "step": 2945 }, { "epoch": 0.08088962108731466, "grad_norm": 0.3900696635246277, "learning_rate": 1.992460523075815e-05, "loss": 0.5578, "step": 2946 }, { "epoch": 0.08091707852828116, "grad_norm": 0.39600586891174316, "learning_rate": 1.9924552286607998e-05, "loss": 0.5859, "step": 2947 }, { "epoch": 0.08094453596924767, "grad_norm": 0.38592851161956787, "learning_rate": 1.9924499323945392e-05, "loss": 0.6131, "step": 2948 }, { "epoch": 0.08097199341021417, "grad_norm": 0.4094874858856201, "learning_rate": 1.9924446342770426e-05, "loss": 0.5852, "step": 2949 }, { "epoch": 0.08099945085118067, "grad_norm": 0.3711739778518677, "learning_rate": 1.99243933430832e-05, "loss": 0.5757, "step": 2950 }, { "epoch": 0.08102690829214718, "grad_norm": 0.42694398760795593, "learning_rate": 1.9924340324883815e-05, "loss": 0.558, "step": 2951 }, { "epoch": 0.08105436573311367, "grad_norm": 0.38341933488845825, "learning_rate": 1.9924287288172365e-05, "loss": 0.624, "step": 2952 }, { "epoch": 0.08108182317408018, "grad_norm": 0.3375255763530731, "learning_rate": 1.9924234232948952e-05, "loss": 0.51, "step": 2953 }, { "epoch": 0.08110928061504667, "grad_norm": 0.3824305236339569, "learning_rate": 1.9924181159213676e-05, "loss": 0.474, "step": 2954 }, { "epoch": 0.08113673805601318, "grad_norm": 0.3944445848464966, "learning_rate": 1.9924128066966633e-05, "loss": 0.5921, "step": 2955 }, { "epoch": 0.08116419549697969, "grad_norm": 0.3738815188407898, "learning_rate": 1.9924074956207925e-05, "loss": 0.5527, "step": 2956 }, { "epoch": 0.08119165293794618, "grad_norm": 0.4116462469100952, "learning_rate": 1.9924021826937648e-05, "loss": 0.5411, "step": 2957 }, { "epoch": 0.08121911037891269, "grad_norm": 0.359149307012558, "learning_rate": 1.9923968679155906e-05, "loss": 0.5336, "step": 2958 }, { "epoch": 0.08124656781987918, "grad_norm": 0.3946773111820221, "learning_rate": 1.9923915512862795e-05, "loss": 0.5863, "step": 2959 }, { "epoch": 0.08127402526084569, "grad_norm": 0.3651440441608429, "learning_rate": 1.9923862328058412e-05, "loss": 0.6559, "step": 2960 }, { "epoch": 0.08130148270181219, "grad_norm": 0.42253732681274414, "learning_rate": 1.992380912474286e-05, "loss": 0.5675, "step": 2961 }, { "epoch": 0.0813289401427787, "grad_norm": 0.4080330431461334, "learning_rate": 1.9923755902916235e-05, "loss": 0.5773, "step": 2962 }, { "epoch": 0.08135639758374519, "grad_norm": 0.3515836000442505, "learning_rate": 1.9923702662578637e-05, "loss": 0.4322, "step": 2963 }, { "epoch": 0.0813838550247117, "grad_norm": 0.3470718562602997, "learning_rate": 1.9923649403730168e-05, "loss": 0.5893, "step": 2964 }, { "epoch": 0.0814113124656782, "grad_norm": 0.32626453042030334, "learning_rate": 1.9923596126370928e-05, "loss": 0.4671, "step": 2965 }, { "epoch": 0.0814387699066447, "grad_norm": 0.3677840232849121, "learning_rate": 1.992354283050101e-05, "loss": 0.5136, "step": 2966 }, { "epoch": 0.0814662273476112, "grad_norm": 0.3241141736507416, "learning_rate": 1.992348951612052e-05, "loss": 0.4912, "step": 2967 }, { "epoch": 0.0814936847885777, "grad_norm": 0.4776799976825714, "learning_rate": 1.9923436183229552e-05, "loss": 0.5477, "step": 2968 }, { "epoch": 0.08152114222954421, "grad_norm": 0.44733965396881104, "learning_rate": 1.992338283182821e-05, "loss": 0.5244, "step": 2969 }, { "epoch": 0.0815485996705107, "grad_norm": 0.35925260186195374, "learning_rate": 1.9923329461916592e-05, "loss": 0.4831, "step": 2970 }, { "epoch": 0.08157605711147721, "grad_norm": 0.4334067404270172, "learning_rate": 1.9923276073494796e-05, "loss": 0.6141, "step": 2971 }, { "epoch": 0.08160351455244372, "grad_norm": 0.3816995918750763, "learning_rate": 1.9923222666562928e-05, "loss": 0.538, "step": 2972 }, { "epoch": 0.08163097199341021, "grad_norm": 0.37118253111839294, "learning_rate": 1.9923169241121076e-05, "loss": 0.5175, "step": 2973 }, { "epoch": 0.08165842943437672, "grad_norm": 0.3629944324493408, "learning_rate": 1.992311579716935e-05, "loss": 0.541, "step": 2974 }, { "epoch": 0.08168588687534321, "grad_norm": 0.38288435339927673, "learning_rate": 1.992306233470785e-05, "loss": 0.6226, "step": 2975 }, { "epoch": 0.08171334431630972, "grad_norm": 0.4302304983139038, "learning_rate": 1.992300885373666e-05, "loss": 0.6313, "step": 2976 }, { "epoch": 0.08174080175727622, "grad_norm": 0.4304451048374176, "learning_rate": 1.9922955354255903e-05, "loss": 0.6165, "step": 2977 }, { "epoch": 0.08176825919824272, "grad_norm": 0.42249906063079834, "learning_rate": 1.9922901836265662e-05, "loss": 0.6133, "step": 2978 }, { "epoch": 0.08179571663920923, "grad_norm": 0.4036064147949219, "learning_rate": 1.992284829976604e-05, "loss": 0.4748, "step": 2979 }, { "epoch": 0.08182317408017573, "grad_norm": 0.3455677032470703, "learning_rate": 1.9922794744757143e-05, "loss": 0.5184, "step": 2980 }, { "epoch": 0.08185063152114223, "grad_norm": 0.4761973023414612, "learning_rate": 1.9922741171239064e-05, "loss": 0.6345, "step": 2981 }, { "epoch": 0.08187808896210873, "grad_norm": 0.434856116771698, "learning_rate": 1.9922687579211906e-05, "loss": 0.5556, "step": 2982 }, { "epoch": 0.08190554640307524, "grad_norm": 0.37445515394210815, "learning_rate": 1.992263396867577e-05, "loss": 0.5523, "step": 2983 }, { "epoch": 0.08193300384404173, "grad_norm": 0.3755737245082855, "learning_rate": 1.992258033963076e-05, "loss": 0.4932, "step": 2984 }, { "epoch": 0.08196046128500824, "grad_norm": 0.3619568645954132, "learning_rate": 1.9922526692076962e-05, "loss": 0.4652, "step": 2985 }, { "epoch": 0.08198791872597475, "grad_norm": 0.3754488229751587, "learning_rate": 1.992247302601449e-05, "loss": 0.5195, "step": 2986 }, { "epoch": 0.08201537616694124, "grad_norm": 0.36593571305274963, "learning_rate": 1.9922419341443433e-05, "loss": 0.5565, "step": 2987 }, { "epoch": 0.08204283360790775, "grad_norm": 0.34616708755493164, "learning_rate": 1.9922365638363902e-05, "loss": 0.5019, "step": 2988 }, { "epoch": 0.08207029104887424, "grad_norm": 0.3773635923862457, "learning_rate": 1.992231191677599e-05, "loss": 0.4736, "step": 2989 }, { "epoch": 0.08209774848984075, "grad_norm": 0.35893407464027405, "learning_rate": 1.9922258176679803e-05, "loss": 0.6407, "step": 2990 }, { "epoch": 0.08212520593080724, "grad_norm": 0.39510247111320496, "learning_rate": 1.9922204418075434e-05, "loss": 0.5369, "step": 2991 }, { "epoch": 0.08215266337177375, "grad_norm": 0.3639708161354065, "learning_rate": 1.992215064096299e-05, "loss": 0.5451, "step": 2992 }, { "epoch": 0.08218012081274026, "grad_norm": 0.36584392189979553, "learning_rate": 1.9922096845342564e-05, "loss": 0.6017, "step": 2993 }, { "epoch": 0.08220757825370675, "grad_norm": 0.4056588113307953, "learning_rate": 1.9922043031214263e-05, "loss": 0.6251, "step": 2994 }, { "epoch": 0.08223503569467326, "grad_norm": 0.43352216482162476, "learning_rate": 1.9921989198578187e-05, "loss": 0.6132, "step": 2995 }, { "epoch": 0.08226249313563976, "grad_norm": 0.3369750380516052, "learning_rate": 1.9921935347434435e-05, "loss": 0.4993, "step": 2996 }, { "epoch": 0.08228995057660626, "grad_norm": 0.37279918789863586, "learning_rate": 1.9921881477783104e-05, "loss": 0.5179, "step": 2997 }, { "epoch": 0.08231740801757276, "grad_norm": 0.419405460357666, "learning_rate": 1.9921827589624297e-05, "loss": 0.6962, "step": 2998 }, { "epoch": 0.08234486545853927, "grad_norm": 0.42972368001937866, "learning_rate": 1.9921773682958114e-05, "loss": 0.5114, "step": 2999 }, { "epoch": 0.08237232289950576, "grad_norm": 0.35561293363571167, "learning_rate": 1.9921719757784658e-05, "loss": 0.5028, "step": 3000 }, { "epoch": 0.08239978034047227, "grad_norm": 0.3175599277019501, "learning_rate": 1.9921665814104027e-05, "loss": 0.5518, "step": 3001 }, { "epoch": 0.08242723778143878, "grad_norm": 0.36875686049461365, "learning_rate": 1.992161185191633e-05, "loss": 0.5895, "step": 3002 }, { "epoch": 0.08245469522240527, "grad_norm": 0.33067041635513306, "learning_rate": 1.992155787122165e-05, "loss": 0.6587, "step": 3003 }, { "epoch": 0.08248215266337178, "grad_norm": 0.33115819096565247, "learning_rate": 1.99215038720201e-05, "loss": 0.5332, "step": 3004 }, { "epoch": 0.08250961010433827, "grad_norm": 0.3827197849750519, "learning_rate": 1.9921449854311782e-05, "loss": 0.6507, "step": 3005 }, { "epoch": 0.08253706754530478, "grad_norm": 0.4356773793697357, "learning_rate": 1.9921395818096792e-05, "loss": 0.5779, "step": 3006 }, { "epoch": 0.08256452498627127, "grad_norm": 0.4465574324131012, "learning_rate": 1.9921341763375234e-05, "loss": 0.5568, "step": 3007 }, { "epoch": 0.08259198242723778, "grad_norm": 0.39597493410110474, "learning_rate": 1.9921287690147205e-05, "loss": 0.5744, "step": 3008 }, { "epoch": 0.08261943986820429, "grad_norm": 0.3700196146965027, "learning_rate": 1.9921233598412808e-05, "loss": 0.489, "step": 3009 }, { "epoch": 0.08264689730917078, "grad_norm": 0.38273143768310547, "learning_rate": 1.9921179488172146e-05, "loss": 0.6361, "step": 3010 }, { "epoch": 0.08267435475013729, "grad_norm": 0.373741090297699, "learning_rate": 1.9921125359425315e-05, "loss": 0.6075, "step": 3011 }, { "epoch": 0.08270181219110379, "grad_norm": 0.4957992434501648, "learning_rate": 1.992107121217242e-05, "loss": 0.5183, "step": 3012 }, { "epoch": 0.08272926963207029, "grad_norm": 0.3566875755786896, "learning_rate": 1.992101704641356e-05, "loss": 0.4961, "step": 3013 }, { "epoch": 0.08275672707303679, "grad_norm": 0.37970170378685, "learning_rate": 1.9920962862148837e-05, "loss": 0.4791, "step": 3014 }, { "epoch": 0.0827841845140033, "grad_norm": 0.3923743963241577, "learning_rate": 1.9920908659378354e-05, "loss": 0.5504, "step": 3015 }, { "epoch": 0.0828116419549698, "grad_norm": 0.43154841661453247, "learning_rate": 1.9920854438102207e-05, "loss": 0.6419, "step": 3016 }, { "epoch": 0.0828390993959363, "grad_norm": 0.3938272297382355, "learning_rate": 1.99208001983205e-05, "loss": 0.6334, "step": 3017 }, { "epoch": 0.0828665568369028, "grad_norm": 0.3559965193271637, "learning_rate": 1.9920745940033337e-05, "loss": 0.5746, "step": 3018 }, { "epoch": 0.0828940142778693, "grad_norm": 0.3944675624370575, "learning_rate": 1.9920691663240814e-05, "loss": 0.56, "step": 3019 }, { "epoch": 0.0829214717188358, "grad_norm": 0.3873023986816406, "learning_rate": 1.9920637367943037e-05, "loss": 0.6084, "step": 3020 }, { "epoch": 0.0829489291598023, "grad_norm": 0.34189581871032715, "learning_rate": 1.9920583054140104e-05, "loss": 0.5169, "step": 3021 }, { "epoch": 0.08297638660076881, "grad_norm": 0.4123912453651428, "learning_rate": 1.9920528721832116e-05, "loss": 0.6334, "step": 3022 }, { "epoch": 0.08300384404173532, "grad_norm": 0.36992576718330383, "learning_rate": 1.992047437101918e-05, "loss": 0.5991, "step": 3023 }, { "epoch": 0.08303130148270181, "grad_norm": 0.367628276348114, "learning_rate": 1.992042000170139e-05, "loss": 0.5553, "step": 3024 }, { "epoch": 0.08305875892366832, "grad_norm": 0.4319625794887543, "learning_rate": 1.9920365613878845e-05, "loss": 0.4801, "step": 3025 }, { "epoch": 0.08308621636463481, "grad_norm": 0.44401952624320984, "learning_rate": 1.9920311207551658e-05, "loss": 0.5672, "step": 3026 }, { "epoch": 0.08311367380560132, "grad_norm": 0.4952750504016876, "learning_rate": 1.9920256782719924e-05, "loss": 0.5561, "step": 3027 }, { "epoch": 0.08314113124656781, "grad_norm": 0.39820578694343567, "learning_rate": 1.992020233938375e-05, "loss": 0.6143, "step": 3028 }, { "epoch": 0.08316858868753432, "grad_norm": 0.32245948910713196, "learning_rate": 1.9920147877543224e-05, "loss": 0.5352, "step": 3029 }, { "epoch": 0.08319604612850082, "grad_norm": 0.42104482650756836, "learning_rate": 1.9920093397198455e-05, "loss": 0.553, "step": 3030 }, { "epoch": 0.08322350356946732, "grad_norm": 0.3802202641963959, "learning_rate": 1.992003889834955e-05, "loss": 0.5043, "step": 3031 }, { "epoch": 0.08325096101043383, "grad_norm": 0.38581180572509766, "learning_rate": 1.9919984380996607e-05, "loss": 0.5011, "step": 3032 }, { "epoch": 0.08327841845140033, "grad_norm": 0.39783766865730286, "learning_rate": 1.991992984513973e-05, "loss": 0.5504, "step": 3033 }, { "epoch": 0.08330587589236683, "grad_norm": 0.3662765324115753, "learning_rate": 1.991987529077901e-05, "loss": 0.5669, "step": 3034 }, { "epoch": 0.08333333333333333, "grad_norm": 0.35435813665390015, "learning_rate": 1.9919820717914562e-05, "loss": 0.5949, "step": 3035 }, { "epoch": 0.08336079077429984, "grad_norm": 0.3844055235385895, "learning_rate": 1.9919766126546484e-05, "loss": 0.6105, "step": 3036 }, { "epoch": 0.08338824821526633, "grad_norm": 0.32428309321403503, "learning_rate": 1.991971151667487e-05, "loss": 0.5671, "step": 3037 }, { "epoch": 0.08341570565623284, "grad_norm": 0.3872242271900177, "learning_rate": 1.991965688829983e-05, "loss": 0.605, "step": 3038 }, { "epoch": 0.08344316309719935, "grad_norm": 0.3888581693172455, "learning_rate": 1.9919602241421466e-05, "loss": 0.5297, "step": 3039 }, { "epoch": 0.08347062053816584, "grad_norm": 0.34023338556289673, "learning_rate": 1.9919547576039877e-05, "loss": 0.5428, "step": 3040 }, { "epoch": 0.08349807797913235, "grad_norm": 0.3719659745693207, "learning_rate": 1.9919492892155164e-05, "loss": 0.5452, "step": 3041 }, { "epoch": 0.08352553542009884, "grad_norm": 0.4186488687992096, "learning_rate": 1.9919438189767434e-05, "loss": 0.5555, "step": 3042 }, { "epoch": 0.08355299286106535, "grad_norm": 0.37303292751312256, "learning_rate": 1.9919383468876787e-05, "loss": 0.4633, "step": 3043 }, { "epoch": 0.08358045030203184, "grad_norm": 0.415324866771698, "learning_rate": 1.9919328729483318e-05, "loss": 0.5653, "step": 3044 }, { "epoch": 0.08360790774299835, "grad_norm": 0.37103843688964844, "learning_rate": 1.991927397158714e-05, "loss": 0.5828, "step": 3045 }, { "epoch": 0.08363536518396486, "grad_norm": 0.29787832498550415, "learning_rate": 1.9919219195188347e-05, "loss": 0.4469, "step": 3046 }, { "epoch": 0.08366282262493135, "grad_norm": 0.3436926305294037, "learning_rate": 1.9919164400287044e-05, "loss": 0.4913, "step": 3047 }, { "epoch": 0.08369028006589786, "grad_norm": 0.34001094102859497, "learning_rate": 1.9919109586883337e-05, "loss": 0.5923, "step": 3048 }, { "epoch": 0.08371773750686436, "grad_norm": 0.3286226689815521, "learning_rate": 1.9919054754977323e-05, "loss": 0.4359, "step": 3049 }, { "epoch": 0.08374519494783086, "grad_norm": 0.36464717984199524, "learning_rate": 1.9918999904569104e-05, "loss": 0.5533, "step": 3050 }, { "epoch": 0.08377265238879736, "grad_norm": 0.3548579812049866, "learning_rate": 1.991894503565879e-05, "loss": 0.5859, "step": 3051 }, { "epoch": 0.08380010982976387, "grad_norm": 0.3492199182510376, "learning_rate": 1.9918890148246472e-05, "loss": 0.5503, "step": 3052 }, { "epoch": 0.08382756727073037, "grad_norm": 0.3771657645702362, "learning_rate": 1.991883524233226e-05, "loss": 0.5398, "step": 3053 }, { "epoch": 0.08385502471169687, "grad_norm": 0.3610140383243561, "learning_rate": 1.9918780317916258e-05, "loss": 0.5348, "step": 3054 }, { "epoch": 0.08388248215266338, "grad_norm": 0.37360113859176636, "learning_rate": 1.991872537499856e-05, "loss": 0.5061, "step": 3055 }, { "epoch": 0.08390993959362987, "grad_norm": 0.3414529263973236, "learning_rate": 1.9918670413579274e-05, "loss": 0.5572, "step": 3056 }, { "epoch": 0.08393739703459638, "grad_norm": 0.41530749201774597, "learning_rate": 1.9918615433658504e-05, "loss": 0.5713, "step": 3057 }, { "epoch": 0.08396485447556287, "grad_norm": 0.3743249475955963, "learning_rate": 1.991856043523635e-05, "loss": 0.5976, "step": 3058 }, { "epoch": 0.08399231191652938, "grad_norm": 0.3908880949020386, "learning_rate": 1.9918505418312916e-05, "loss": 0.5935, "step": 3059 }, { "epoch": 0.08401976935749589, "grad_norm": 0.37082165479660034, "learning_rate": 1.9918450382888304e-05, "loss": 0.5727, "step": 3060 }, { "epoch": 0.08404722679846238, "grad_norm": 0.4048471450805664, "learning_rate": 1.9918395328962615e-05, "loss": 0.6256, "step": 3061 }, { "epoch": 0.08407468423942889, "grad_norm": 0.346057265996933, "learning_rate": 1.9918340256535954e-05, "loss": 0.5468, "step": 3062 }, { "epoch": 0.08410214168039538, "grad_norm": 0.3500054180622101, "learning_rate": 1.9918285165608424e-05, "loss": 0.5073, "step": 3063 }, { "epoch": 0.08412959912136189, "grad_norm": 0.354735791683197, "learning_rate": 1.991823005618012e-05, "loss": 0.6059, "step": 3064 }, { "epoch": 0.08415705656232839, "grad_norm": 0.3932260572910309, "learning_rate": 1.991817492825116e-05, "loss": 0.4772, "step": 3065 }, { "epoch": 0.0841845140032949, "grad_norm": 0.4607710838317871, "learning_rate": 1.9918119781821636e-05, "loss": 0.5828, "step": 3066 }, { "epoch": 0.08421197144426139, "grad_norm": 0.38526609539985657, "learning_rate": 1.991806461689165e-05, "loss": 0.6468, "step": 3067 }, { "epoch": 0.0842394288852279, "grad_norm": 0.355780690908432, "learning_rate": 1.991800943346131e-05, "loss": 0.49, "step": 3068 }, { "epoch": 0.0842668863261944, "grad_norm": 0.38191473484039307, "learning_rate": 1.9917954231530716e-05, "loss": 0.51, "step": 3069 }, { "epoch": 0.0842943437671609, "grad_norm": 0.3502902686595917, "learning_rate": 1.9917899011099972e-05, "loss": 0.4632, "step": 3070 }, { "epoch": 0.0843218012081274, "grad_norm": 1.0015678405761719, "learning_rate": 1.9917843772169182e-05, "loss": 0.5576, "step": 3071 }, { "epoch": 0.0843492586490939, "grad_norm": 0.3703102171421051, "learning_rate": 1.9917788514738447e-05, "loss": 0.5109, "step": 3072 }, { "epoch": 0.08437671609006041, "grad_norm": 0.3525243401527405, "learning_rate": 1.9917733238807872e-05, "loss": 0.4878, "step": 3073 }, { "epoch": 0.0844041735310269, "grad_norm": 0.4019070267677307, "learning_rate": 1.991767794437756e-05, "loss": 0.5873, "step": 3074 }, { "epoch": 0.08443163097199341, "grad_norm": 0.3399433493614197, "learning_rate": 1.991762263144761e-05, "loss": 0.522, "step": 3075 }, { "epoch": 0.08445908841295992, "grad_norm": 0.4678155779838562, "learning_rate": 1.991756730001813e-05, "loss": 0.5594, "step": 3076 }, { "epoch": 0.08448654585392641, "grad_norm": 0.39123469591140747, "learning_rate": 1.9917511950089224e-05, "loss": 0.5387, "step": 3077 }, { "epoch": 0.08451400329489292, "grad_norm": 0.3664635419845581, "learning_rate": 1.991745658166099e-05, "loss": 0.53, "step": 3078 }, { "epoch": 0.08454146073585941, "grad_norm": 0.3622594475746155, "learning_rate": 1.9917401194733538e-05, "loss": 0.555, "step": 3079 }, { "epoch": 0.08456891817682592, "grad_norm": 0.35493385791778564, "learning_rate": 1.991734578930696e-05, "loss": 0.5901, "step": 3080 }, { "epoch": 0.08459637561779242, "grad_norm": 0.51849764585495, "learning_rate": 1.9917290365381373e-05, "loss": 0.5319, "step": 3081 }, { "epoch": 0.08462383305875892, "grad_norm": 0.34235748648643494, "learning_rate": 1.9917234922956874e-05, "loss": 0.5791, "step": 3082 }, { "epoch": 0.08465129049972543, "grad_norm": 0.39979732036590576, "learning_rate": 1.9917179462033568e-05, "loss": 0.6167, "step": 3083 }, { "epoch": 0.08467874794069193, "grad_norm": 0.38724082708358765, "learning_rate": 1.991712398261155e-05, "loss": 0.6287, "step": 3084 }, { "epoch": 0.08470620538165843, "grad_norm": 0.3478383421897888, "learning_rate": 1.9917068484690937e-05, "loss": 0.521, "step": 3085 }, { "epoch": 0.08473366282262493, "grad_norm": 0.3568466305732727, "learning_rate": 1.9917012968271824e-05, "loss": 0.6104, "step": 3086 }, { "epoch": 0.08476112026359144, "grad_norm": 0.3691876232624054, "learning_rate": 1.9916957433354316e-05, "loss": 0.5038, "step": 3087 }, { "epoch": 0.08478857770455793, "grad_norm": 0.3892597556114197, "learning_rate": 1.991690187993852e-05, "loss": 0.5951, "step": 3088 }, { "epoch": 0.08481603514552444, "grad_norm": 0.3788401484489441, "learning_rate": 1.9916846308024532e-05, "loss": 0.5338, "step": 3089 }, { "epoch": 0.08484349258649095, "grad_norm": 0.3392800986766815, "learning_rate": 1.9916790717612467e-05, "loss": 0.5263, "step": 3090 }, { "epoch": 0.08487095002745744, "grad_norm": 0.36497923731803894, "learning_rate": 1.991673510870242e-05, "loss": 0.5213, "step": 3091 }, { "epoch": 0.08489840746842395, "grad_norm": 0.485361248254776, "learning_rate": 1.9916679481294493e-05, "loss": 0.5762, "step": 3092 }, { "epoch": 0.08492586490939044, "grad_norm": 0.4000490605831146, "learning_rate": 1.9916623835388797e-05, "loss": 0.5841, "step": 3093 }, { "epoch": 0.08495332235035695, "grad_norm": 0.34389007091522217, "learning_rate": 1.991656817098543e-05, "loss": 0.5151, "step": 3094 }, { "epoch": 0.08498077979132344, "grad_norm": 0.35685890913009644, "learning_rate": 1.9916512488084502e-05, "loss": 0.5094, "step": 3095 }, { "epoch": 0.08500823723228995, "grad_norm": 0.43347710371017456, "learning_rate": 1.9916456786686113e-05, "loss": 0.5382, "step": 3096 }, { "epoch": 0.08503569467325645, "grad_norm": 0.38122981786727905, "learning_rate": 1.9916401066790365e-05, "loss": 0.57, "step": 3097 }, { "epoch": 0.08506315211422295, "grad_norm": 0.33660954236984253, "learning_rate": 1.9916345328397365e-05, "loss": 0.489, "step": 3098 }, { "epoch": 0.08509060955518946, "grad_norm": 0.3994135856628418, "learning_rate": 1.9916289571507215e-05, "loss": 0.5919, "step": 3099 }, { "epoch": 0.08511806699615596, "grad_norm": 0.39044275879859924, "learning_rate": 1.991623379612002e-05, "loss": 0.5619, "step": 3100 }, { "epoch": 0.08514552443712246, "grad_norm": 0.3638702929019928, "learning_rate": 1.9916178002235886e-05, "loss": 0.545, "step": 3101 }, { "epoch": 0.08517298187808896, "grad_norm": 0.3598957359790802, "learning_rate": 1.9916122189854918e-05, "loss": 0.5219, "step": 3102 }, { "epoch": 0.08520043931905547, "grad_norm": 0.353473961353302, "learning_rate": 1.991606635897721e-05, "loss": 0.5363, "step": 3103 }, { "epoch": 0.08522789676002196, "grad_norm": 0.3923407196998596, "learning_rate": 1.991601050960288e-05, "loss": 0.6281, "step": 3104 }, { "epoch": 0.08525535420098847, "grad_norm": 0.4042568504810333, "learning_rate": 1.991595464173202e-05, "loss": 0.5601, "step": 3105 }, { "epoch": 0.08528281164195498, "grad_norm": 0.3507134020328522, "learning_rate": 1.9915898755364743e-05, "loss": 0.6007, "step": 3106 }, { "epoch": 0.08531026908292147, "grad_norm": 0.38078415393829346, "learning_rate": 1.991584285050115e-05, "loss": 0.612, "step": 3107 }, { "epoch": 0.08533772652388798, "grad_norm": 0.3405579626560211, "learning_rate": 1.9915786927141344e-05, "loss": 0.5189, "step": 3108 }, { "epoch": 0.08536518396485447, "grad_norm": 0.34907203912734985, "learning_rate": 1.991573098528543e-05, "loss": 0.5221, "step": 3109 }, { "epoch": 0.08539264140582098, "grad_norm": 0.3139292299747467, "learning_rate": 1.9915675024933514e-05, "loss": 0.5014, "step": 3110 }, { "epoch": 0.08542009884678747, "grad_norm": 0.39074277877807617, "learning_rate": 1.99156190460857e-05, "loss": 0.616, "step": 3111 }, { "epoch": 0.08544755628775398, "grad_norm": 0.3312952518463135, "learning_rate": 1.991556304874209e-05, "loss": 0.458, "step": 3112 }, { "epoch": 0.08547501372872049, "grad_norm": 0.6165147423744202, "learning_rate": 1.9915507032902793e-05, "loss": 0.6427, "step": 3113 }, { "epoch": 0.08550247116968698, "grad_norm": 0.33941954374313354, "learning_rate": 1.991545099856791e-05, "loss": 0.5458, "step": 3114 }, { "epoch": 0.08552992861065349, "grad_norm": 0.37801194190979004, "learning_rate": 1.9915394945737543e-05, "loss": 0.5744, "step": 3115 }, { "epoch": 0.08555738605161999, "grad_norm": 0.3633836507797241, "learning_rate": 1.9915338874411803e-05, "loss": 0.555, "step": 3116 }, { "epoch": 0.0855848434925865, "grad_norm": 0.3239288628101349, "learning_rate": 1.9915282784590793e-05, "loss": 0.4792, "step": 3117 }, { "epoch": 0.08561230093355299, "grad_norm": 0.34880468249320984, "learning_rate": 1.991522667627461e-05, "loss": 0.6396, "step": 3118 }, { "epoch": 0.0856397583745195, "grad_norm": 0.38951992988586426, "learning_rate": 1.991517054946337e-05, "loss": 0.5702, "step": 3119 }, { "epoch": 0.085667215815486, "grad_norm": 0.4017128050327301, "learning_rate": 1.991511440415717e-05, "loss": 0.5329, "step": 3120 }, { "epoch": 0.0856946732564525, "grad_norm": 0.3894684314727783, "learning_rate": 1.991505824035612e-05, "loss": 0.5378, "step": 3121 }, { "epoch": 0.085722130697419, "grad_norm": 0.36977410316467285, "learning_rate": 1.9915002058060318e-05, "loss": 0.5595, "step": 3122 }, { "epoch": 0.0857495881383855, "grad_norm": 0.38841933012008667, "learning_rate": 1.9914945857269874e-05, "loss": 0.5602, "step": 3123 }, { "epoch": 0.08577704557935201, "grad_norm": 0.34277161955833435, "learning_rate": 1.9914889637984892e-05, "loss": 0.5242, "step": 3124 }, { "epoch": 0.0858045030203185, "grad_norm": 0.40681585669517517, "learning_rate": 1.9914833400205474e-05, "loss": 0.5235, "step": 3125 }, { "epoch": 0.08583196046128501, "grad_norm": 0.3350759446620941, "learning_rate": 1.9914777143931733e-05, "loss": 0.4867, "step": 3126 }, { "epoch": 0.08585941790225152, "grad_norm": 0.37349146604537964, "learning_rate": 1.9914720869163762e-05, "loss": 0.554, "step": 3127 }, { "epoch": 0.08588687534321801, "grad_norm": 0.38777318596839905, "learning_rate": 1.9914664575901678e-05, "loss": 0.5376, "step": 3128 }, { "epoch": 0.08591433278418452, "grad_norm": 0.4212503731250763, "learning_rate": 1.9914608264145574e-05, "loss": 0.6626, "step": 3129 }, { "epoch": 0.08594179022515101, "grad_norm": 0.4425583481788635, "learning_rate": 1.9914551933895566e-05, "loss": 0.5283, "step": 3130 }, { "epoch": 0.08596924766611752, "grad_norm": 0.35119709372520447, "learning_rate": 1.9914495585151753e-05, "loss": 0.494, "step": 3131 }, { "epoch": 0.08599670510708401, "grad_norm": 0.3854362964630127, "learning_rate": 1.9914439217914242e-05, "loss": 0.6496, "step": 3132 }, { "epoch": 0.08602416254805052, "grad_norm": 0.375833123922348, "learning_rate": 1.9914382832183136e-05, "loss": 0.5762, "step": 3133 }, { "epoch": 0.08605161998901702, "grad_norm": 0.3516687750816345, "learning_rate": 1.9914326427958543e-05, "loss": 0.47, "step": 3134 }, { "epoch": 0.08607907742998352, "grad_norm": 0.34540843963623047, "learning_rate": 1.9914270005240564e-05, "loss": 0.5408, "step": 3135 }, { "epoch": 0.08610653487095003, "grad_norm": 0.3656224012374878, "learning_rate": 1.9914213564029308e-05, "loss": 0.532, "step": 3136 }, { "epoch": 0.08613399231191653, "grad_norm": 0.33878353238105774, "learning_rate": 1.991415710432488e-05, "loss": 0.5682, "step": 3137 }, { "epoch": 0.08616144975288303, "grad_norm": 0.3688545823097229, "learning_rate": 1.9914100626127388e-05, "loss": 0.5719, "step": 3138 }, { "epoch": 0.08618890719384953, "grad_norm": 0.43538740277290344, "learning_rate": 1.991404412943693e-05, "loss": 0.6257, "step": 3139 }, { "epoch": 0.08621636463481604, "grad_norm": 0.3873177766799927, "learning_rate": 1.9913987614253616e-05, "loss": 0.5849, "step": 3140 }, { "epoch": 0.08624382207578253, "grad_norm": 0.3697783350944519, "learning_rate": 1.9913931080577554e-05, "loss": 0.5475, "step": 3141 }, { "epoch": 0.08627127951674904, "grad_norm": 0.34691882133483887, "learning_rate": 1.9913874528408844e-05, "loss": 0.532, "step": 3142 }, { "epoch": 0.08629873695771555, "grad_norm": 0.3783372938632965, "learning_rate": 1.9913817957747593e-05, "loss": 0.6379, "step": 3143 }, { "epoch": 0.08632619439868204, "grad_norm": 0.3556547164916992, "learning_rate": 1.9913761368593908e-05, "loss": 0.4955, "step": 3144 }, { "epoch": 0.08635365183964855, "grad_norm": 0.3570651412010193, "learning_rate": 1.9913704760947898e-05, "loss": 0.572, "step": 3145 }, { "epoch": 0.08638110928061504, "grad_norm": 0.3521994352340698, "learning_rate": 1.991364813480966e-05, "loss": 0.6704, "step": 3146 }, { "epoch": 0.08640856672158155, "grad_norm": 0.39765864610671997, "learning_rate": 1.9913591490179306e-05, "loss": 0.5703, "step": 3147 }, { "epoch": 0.08643602416254804, "grad_norm": 0.3677360713481903, "learning_rate": 1.991353482705694e-05, "loss": 0.5795, "step": 3148 }, { "epoch": 0.08646348160351455, "grad_norm": 0.42152076959609985, "learning_rate": 1.9913478145442667e-05, "loss": 0.5003, "step": 3149 }, { "epoch": 0.08649093904448106, "grad_norm": 0.43698254227638245, "learning_rate": 1.991342144533659e-05, "loss": 0.6216, "step": 3150 }, { "epoch": 0.08651839648544755, "grad_norm": 0.438378244638443, "learning_rate": 1.9913364726738822e-05, "loss": 0.5804, "step": 3151 }, { "epoch": 0.08654585392641406, "grad_norm": 1.5449484586715698, "learning_rate": 1.9913307989649464e-05, "loss": 0.5545, "step": 3152 }, { "epoch": 0.08657331136738056, "grad_norm": 0.38163113594055176, "learning_rate": 1.991325123406862e-05, "loss": 0.633, "step": 3153 }, { "epoch": 0.08660076880834706, "grad_norm": 0.3833751678466797, "learning_rate": 1.9913194459996402e-05, "loss": 0.6094, "step": 3154 }, { "epoch": 0.08662822624931356, "grad_norm": 0.38165175914764404, "learning_rate": 1.991313766743291e-05, "loss": 0.6012, "step": 3155 }, { "epoch": 0.08665568369028007, "grad_norm": 0.37796810269355774, "learning_rate": 1.9913080856378254e-05, "loss": 0.5772, "step": 3156 }, { "epoch": 0.08668314113124657, "grad_norm": 0.37214577198028564, "learning_rate": 1.991302402683254e-05, "loss": 0.5849, "step": 3157 }, { "epoch": 0.08671059857221307, "grad_norm": 0.39197593927383423, "learning_rate": 1.991296717879587e-05, "loss": 0.6396, "step": 3158 }, { "epoch": 0.08673805601317958, "grad_norm": 0.3607301414012909, "learning_rate": 1.9912910312268353e-05, "loss": 0.4805, "step": 3159 }, { "epoch": 0.08676551345414607, "grad_norm": 0.3576720654964447, "learning_rate": 1.9912853427250095e-05, "loss": 0.5125, "step": 3160 }, { "epoch": 0.08679297089511258, "grad_norm": 0.356453001499176, "learning_rate": 1.99127965237412e-05, "loss": 0.5763, "step": 3161 }, { "epoch": 0.08682042833607907, "grad_norm": 0.40957915782928467, "learning_rate": 1.9912739601741774e-05, "loss": 0.566, "step": 3162 }, { "epoch": 0.08684788577704558, "grad_norm": 0.3592242896556854, "learning_rate": 1.9912682661251928e-05, "loss": 0.5737, "step": 3163 }, { "epoch": 0.08687534321801207, "grad_norm": 0.3964890241622925, "learning_rate": 1.9912625702271765e-05, "loss": 0.5506, "step": 3164 }, { "epoch": 0.08690280065897858, "grad_norm": 0.46052658557891846, "learning_rate": 1.991256872480139e-05, "loss": 0.5128, "step": 3165 }, { "epoch": 0.08693025809994509, "grad_norm": 0.3741527497768402, "learning_rate": 1.991251172884091e-05, "loss": 0.5418, "step": 3166 }, { "epoch": 0.08695771554091158, "grad_norm": 0.378779798746109, "learning_rate": 1.991245471439043e-05, "loss": 0.526, "step": 3167 }, { "epoch": 0.08698517298187809, "grad_norm": 0.3797495365142822, "learning_rate": 1.9912397681450066e-05, "loss": 0.5048, "step": 3168 }, { "epoch": 0.08701263042284459, "grad_norm": 0.33703213930130005, "learning_rate": 1.991234063001991e-05, "loss": 0.5335, "step": 3169 }, { "epoch": 0.0870400878638111, "grad_norm": 0.3675273060798645, "learning_rate": 1.9912283560100078e-05, "loss": 0.6253, "step": 3170 }, { "epoch": 0.08706754530477759, "grad_norm": 0.364955335855484, "learning_rate": 1.9912226471690673e-05, "loss": 0.6119, "step": 3171 }, { "epoch": 0.0870950027457441, "grad_norm": 0.44833487272262573, "learning_rate": 1.99121693647918e-05, "loss": 0.4763, "step": 3172 }, { "epoch": 0.0871224601867106, "grad_norm": 0.3704459071159363, "learning_rate": 1.991211223940357e-05, "loss": 0.5657, "step": 3173 }, { "epoch": 0.0871499176276771, "grad_norm": 0.3818432092666626, "learning_rate": 1.9912055095526088e-05, "loss": 0.5817, "step": 3174 }, { "epoch": 0.0871773750686436, "grad_norm": 0.4610784947872162, "learning_rate": 1.9911997933159454e-05, "loss": 0.6047, "step": 3175 }, { "epoch": 0.0872048325096101, "grad_norm": 0.4219365119934082, "learning_rate": 1.9911940752303785e-05, "loss": 0.5953, "step": 3176 }, { "epoch": 0.08723228995057661, "grad_norm": 0.36055439710617065, "learning_rate": 1.9911883552959183e-05, "loss": 0.558, "step": 3177 }, { "epoch": 0.0872597473915431, "grad_norm": 0.344022661447525, "learning_rate": 1.9911826335125752e-05, "loss": 0.4904, "step": 3178 }, { "epoch": 0.08728720483250961, "grad_norm": 0.40213847160339355, "learning_rate": 1.9911769098803605e-05, "loss": 0.474, "step": 3179 }, { "epoch": 0.08731466227347612, "grad_norm": 0.38067054748535156, "learning_rate": 1.9911711843992842e-05, "loss": 0.5852, "step": 3180 }, { "epoch": 0.08734211971444261, "grad_norm": 0.35548821091651917, "learning_rate": 1.9911654570693576e-05, "loss": 0.5455, "step": 3181 }, { "epoch": 0.08736957715540912, "grad_norm": 0.38751277327537537, "learning_rate": 1.991159727890591e-05, "loss": 0.4953, "step": 3182 }, { "epoch": 0.08739703459637561, "grad_norm": 0.4133763909339905, "learning_rate": 1.991153996862995e-05, "loss": 0.5387, "step": 3183 }, { "epoch": 0.08742449203734212, "grad_norm": 0.4060620665550232, "learning_rate": 1.9911482639865803e-05, "loss": 0.5214, "step": 3184 }, { "epoch": 0.08745194947830862, "grad_norm": 0.41407129168510437, "learning_rate": 1.9911425292613578e-05, "loss": 0.5348, "step": 3185 }, { "epoch": 0.08747940691927512, "grad_norm": 0.38008028268814087, "learning_rate": 1.9911367926873385e-05, "loss": 0.6123, "step": 3186 }, { "epoch": 0.08750686436024163, "grad_norm": 0.47845888137817383, "learning_rate": 1.991131054264533e-05, "loss": 0.5662, "step": 3187 }, { "epoch": 0.08753432180120813, "grad_norm": 0.4051632285118103, "learning_rate": 1.9911253139929513e-05, "loss": 0.4652, "step": 3188 }, { "epoch": 0.08756177924217463, "grad_norm": 0.35869109630584717, "learning_rate": 1.9911195718726043e-05, "loss": 0.5244, "step": 3189 }, { "epoch": 0.08758923668314113, "grad_norm": 0.4087565243244171, "learning_rate": 1.9911138279035032e-05, "loss": 0.5899, "step": 3190 }, { "epoch": 0.08761669412410764, "grad_norm": 0.351574182510376, "learning_rate": 1.9911080820856585e-05, "loss": 0.5304, "step": 3191 }, { "epoch": 0.08764415156507413, "grad_norm": 0.40920358896255493, "learning_rate": 1.991102334419081e-05, "loss": 0.5368, "step": 3192 }, { "epoch": 0.08767160900604064, "grad_norm": 0.35106855630874634, "learning_rate": 1.9910965849037814e-05, "loss": 0.4712, "step": 3193 }, { "epoch": 0.08769906644700715, "grad_norm": 0.36936140060424805, "learning_rate": 1.99109083353977e-05, "loss": 0.4543, "step": 3194 }, { "epoch": 0.08772652388797364, "grad_norm": 0.37517067790031433, "learning_rate": 1.9910850803270583e-05, "loss": 0.6124, "step": 3195 }, { "epoch": 0.08775398132894015, "grad_norm": 0.356742262840271, "learning_rate": 1.9910793252656565e-05, "loss": 0.5004, "step": 3196 }, { "epoch": 0.08778143876990664, "grad_norm": 0.39151257276535034, "learning_rate": 1.9910735683555752e-05, "loss": 0.4602, "step": 3197 }, { "epoch": 0.08780889621087315, "grad_norm": 0.33860597014427185, "learning_rate": 1.9910678095968256e-05, "loss": 0.5282, "step": 3198 }, { "epoch": 0.08783635365183964, "grad_norm": 0.3463510274887085, "learning_rate": 1.991062048989418e-05, "loss": 0.4902, "step": 3199 }, { "epoch": 0.08786381109280615, "grad_norm": 0.3738035261631012, "learning_rate": 1.9910562865333634e-05, "loss": 0.4942, "step": 3200 }, { "epoch": 0.08789126853377265, "grad_norm": 0.39467930793762207, "learning_rate": 1.991050522228673e-05, "loss": 0.5634, "step": 3201 }, { "epoch": 0.08791872597473915, "grad_norm": 0.3497433364391327, "learning_rate": 1.9910447560753565e-05, "loss": 0.5018, "step": 3202 }, { "epoch": 0.08794618341570566, "grad_norm": 0.37710538506507874, "learning_rate": 1.9910389880734255e-05, "loss": 0.6506, "step": 3203 }, { "epoch": 0.08797364085667216, "grad_norm": 0.33815863728523254, "learning_rate": 1.9910332182228905e-05, "loss": 0.4454, "step": 3204 }, { "epoch": 0.08800109829763866, "grad_norm": 0.485881507396698, "learning_rate": 1.991027446523762e-05, "loss": 0.5638, "step": 3205 }, { "epoch": 0.08802855573860516, "grad_norm": 0.38838228583335876, "learning_rate": 1.9910216729760512e-05, "loss": 0.4917, "step": 3206 }, { "epoch": 0.08805601317957167, "grad_norm": 0.3868916630744934, "learning_rate": 1.9910158975797686e-05, "loss": 0.5997, "step": 3207 }, { "epoch": 0.08808347062053816, "grad_norm": 0.45558539032936096, "learning_rate": 1.991010120334925e-05, "loss": 0.5815, "step": 3208 }, { "epoch": 0.08811092806150467, "grad_norm": 0.34407952427864075, "learning_rate": 1.9910043412415316e-05, "loss": 0.5396, "step": 3209 }, { "epoch": 0.08813838550247118, "grad_norm": 0.3800027370452881, "learning_rate": 1.9909985602995985e-05, "loss": 0.523, "step": 3210 }, { "epoch": 0.08816584294343767, "grad_norm": 0.43269798159599304, "learning_rate": 1.9909927775091367e-05, "loss": 0.5053, "step": 3211 }, { "epoch": 0.08819330038440418, "grad_norm": 0.4874632656574249, "learning_rate": 1.9909869928701573e-05, "loss": 0.62, "step": 3212 }, { "epoch": 0.08822075782537067, "grad_norm": 0.3243730366230011, "learning_rate": 1.9909812063826707e-05, "loss": 0.4477, "step": 3213 }, { "epoch": 0.08824821526633718, "grad_norm": 0.3117091655731201, "learning_rate": 1.990975418046688e-05, "loss": 0.5566, "step": 3214 }, { "epoch": 0.08827567270730367, "grad_norm": 0.47732481360435486, "learning_rate": 1.99096962786222e-05, "loss": 0.5949, "step": 3215 }, { "epoch": 0.08830313014827018, "grad_norm": 0.3632173240184784, "learning_rate": 1.9909638358292772e-05, "loss": 0.5925, "step": 3216 }, { "epoch": 0.08833058758923669, "grad_norm": 0.38406211137771606, "learning_rate": 1.9909580419478703e-05, "loss": 0.6046, "step": 3217 }, { "epoch": 0.08835804503020318, "grad_norm": 0.3682786524295807, "learning_rate": 1.9909522462180106e-05, "loss": 0.613, "step": 3218 }, { "epoch": 0.08838550247116969, "grad_norm": 0.3922516405582428, "learning_rate": 1.9909464486397087e-05, "loss": 0.5204, "step": 3219 }, { "epoch": 0.08841295991213619, "grad_norm": 0.40254315733909607, "learning_rate": 1.9909406492129757e-05, "loss": 0.5711, "step": 3220 }, { "epoch": 0.0884404173531027, "grad_norm": 0.32269617915153503, "learning_rate": 1.9909348479378216e-05, "loss": 0.4996, "step": 3221 }, { "epoch": 0.08846787479406919, "grad_norm": 0.3453953266143799, "learning_rate": 1.9909290448142583e-05, "loss": 0.5412, "step": 3222 }, { "epoch": 0.0884953322350357, "grad_norm": 0.37068918347358704, "learning_rate": 1.9909232398422956e-05, "loss": 0.5436, "step": 3223 }, { "epoch": 0.0885227896760022, "grad_norm": 0.34113579988479614, "learning_rate": 1.9909174330219448e-05, "loss": 0.5209, "step": 3224 }, { "epoch": 0.0885502471169687, "grad_norm": 0.3979324400424957, "learning_rate": 1.990911624353217e-05, "loss": 0.5778, "step": 3225 }, { "epoch": 0.0885777045579352, "grad_norm": 1.2976291179656982, "learning_rate": 1.9909058138361227e-05, "loss": 0.5628, "step": 3226 }, { "epoch": 0.0886051619989017, "grad_norm": 0.3309061825275421, "learning_rate": 1.9909000014706727e-05, "loss": 0.5404, "step": 3227 }, { "epoch": 0.08863261943986821, "grad_norm": 0.503724217414856, "learning_rate": 1.9908941872568775e-05, "loss": 0.5285, "step": 3228 }, { "epoch": 0.0886600768808347, "grad_norm": 0.40295854210853577, "learning_rate": 1.990888371194749e-05, "loss": 0.5184, "step": 3229 }, { "epoch": 0.08868753432180121, "grad_norm": 0.3858764171600342, "learning_rate": 1.9908825532842972e-05, "loss": 0.5014, "step": 3230 }, { "epoch": 0.0887149917627677, "grad_norm": 0.3580276072025299, "learning_rate": 1.9908767335255333e-05, "loss": 0.4904, "step": 3231 }, { "epoch": 0.08874244920373421, "grad_norm": 0.4700264036655426, "learning_rate": 1.990870911918468e-05, "loss": 0.5487, "step": 3232 }, { "epoch": 0.08876990664470072, "grad_norm": 0.47418081760406494, "learning_rate": 1.9908650884631125e-05, "loss": 0.6401, "step": 3233 }, { "epoch": 0.08879736408566721, "grad_norm": 0.34153982996940613, "learning_rate": 1.9908592631594768e-05, "loss": 0.5517, "step": 3234 }, { "epoch": 0.08882482152663372, "grad_norm": 0.4043576419353485, "learning_rate": 1.9908534360075726e-05, "loss": 0.5917, "step": 3235 }, { "epoch": 0.08885227896760022, "grad_norm": 0.3442865312099457, "learning_rate": 1.9908476070074106e-05, "loss": 0.505, "step": 3236 }, { "epoch": 0.08887973640856672, "grad_norm": 0.4370473325252533, "learning_rate": 1.9908417761590014e-05, "loss": 0.5944, "step": 3237 }, { "epoch": 0.08890719384953322, "grad_norm": 0.350563645362854, "learning_rate": 1.990835943462356e-05, "loss": 0.5141, "step": 3238 }, { "epoch": 0.08893465129049972, "grad_norm": 0.3351345360279083, "learning_rate": 1.9908301089174856e-05, "loss": 0.5673, "step": 3239 }, { "epoch": 0.08896210873146623, "grad_norm": 0.38889992237091064, "learning_rate": 1.9908242725244004e-05, "loss": 0.5701, "step": 3240 }, { "epoch": 0.08898956617243273, "grad_norm": 0.39961302280426025, "learning_rate": 1.990818434283112e-05, "loss": 0.5994, "step": 3241 }, { "epoch": 0.08901702361339923, "grad_norm": 0.361337274312973, "learning_rate": 1.990812594193631e-05, "loss": 0.494, "step": 3242 }, { "epoch": 0.08904448105436573, "grad_norm": 0.47824323177337646, "learning_rate": 1.9908067522559684e-05, "loss": 0.5308, "step": 3243 }, { "epoch": 0.08907193849533224, "grad_norm": 0.34467095136642456, "learning_rate": 1.9908009084701344e-05, "loss": 0.5732, "step": 3244 }, { "epoch": 0.08909939593629873, "grad_norm": 0.42201581597328186, "learning_rate": 1.9907950628361414e-05, "loss": 0.5472, "step": 3245 }, { "epoch": 0.08912685337726524, "grad_norm": 0.36185574531555176, "learning_rate": 1.9907892153539986e-05, "loss": 0.641, "step": 3246 }, { "epoch": 0.08915431081823175, "grad_norm": 0.42578285932540894, "learning_rate": 1.9907833660237178e-05, "loss": 0.6558, "step": 3247 }, { "epoch": 0.08918176825919824, "grad_norm": 0.44082021713256836, "learning_rate": 1.9907775148453103e-05, "loss": 0.5125, "step": 3248 }, { "epoch": 0.08920922570016475, "grad_norm": 0.3511110544204712, "learning_rate": 1.9907716618187857e-05, "loss": 0.5795, "step": 3249 }, { "epoch": 0.08923668314113124, "grad_norm": 0.3756183385848999, "learning_rate": 1.990765806944156e-05, "loss": 0.5426, "step": 3250 }, { "epoch": 0.08926414058209775, "grad_norm": 0.35160455107688904, "learning_rate": 1.9907599502214325e-05, "loss": 0.5534, "step": 3251 }, { "epoch": 0.08929159802306424, "grad_norm": 0.40765464305877686, "learning_rate": 1.990754091650625e-05, "loss": 0.6697, "step": 3252 }, { "epoch": 0.08931905546403075, "grad_norm": 0.37326428294181824, "learning_rate": 1.990748231231745e-05, "loss": 0.6373, "step": 3253 }, { "epoch": 0.08934651290499726, "grad_norm": 0.4163500666618347, "learning_rate": 1.990742368964803e-05, "loss": 0.5298, "step": 3254 }, { "epoch": 0.08937397034596375, "grad_norm": 0.43508976697921753, "learning_rate": 1.9907365048498107e-05, "loss": 0.564, "step": 3255 }, { "epoch": 0.08940142778693026, "grad_norm": 0.34837794303894043, "learning_rate": 1.9907306388867783e-05, "loss": 0.5273, "step": 3256 }, { "epoch": 0.08942888522789676, "grad_norm": 0.36456912755966187, "learning_rate": 1.990724771075717e-05, "loss": 0.5688, "step": 3257 }, { "epoch": 0.08945634266886326, "grad_norm": 0.38311299681663513, "learning_rate": 1.990718901416638e-05, "loss": 0.6306, "step": 3258 }, { "epoch": 0.08948380010982976, "grad_norm": 0.3830330967903137, "learning_rate": 1.9907130299095523e-05, "loss": 0.581, "step": 3259 }, { "epoch": 0.08951125755079627, "grad_norm": 0.36771926283836365, "learning_rate": 1.9907071565544704e-05, "loss": 0.5317, "step": 3260 }, { "epoch": 0.08953871499176277, "grad_norm": 0.6281590461730957, "learning_rate": 1.990701281351403e-05, "loss": 0.4892, "step": 3261 }, { "epoch": 0.08956617243272927, "grad_norm": 0.35221514105796814, "learning_rate": 1.9906954043003624e-05, "loss": 0.4505, "step": 3262 }, { "epoch": 0.08959362987369578, "grad_norm": 0.3861498534679413, "learning_rate": 1.9906895254013577e-05, "loss": 0.5761, "step": 3263 }, { "epoch": 0.08962108731466227, "grad_norm": 0.381055623292923, "learning_rate": 1.990683644654402e-05, "loss": 0.6021, "step": 3264 }, { "epoch": 0.08964854475562878, "grad_norm": 0.38482892513275146, "learning_rate": 1.990677762059504e-05, "loss": 0.5664, "step": 3265 }, { "epoch": 0.08967600219659527, "grad_norm": 0.3514528274536133, "learning_rate": 1.9906718776166762e-05, "loss": 0.5392, "step": 3266 }, { "epoch": 0.08970345963756178, "grad_norm": 0.37568554282188416, "learning_rate": 1.9906659913259294e-05, "loss": 0.6031, "step": 3267 }, { "epoch": 0.08973091707852827, "grad_norm": 0.38094088435173035, "learning_rate": 1.990660103187274e-05, "loss": 0.5906, "step": 3268 }, { "epoch": 0.08975837451949478, "grad_norm": 0.3820612132549286, "learning_rate": 1.9906542132007217e-05, "loss": 0.5201, "step": 3269 }, { "epoch": 0.08978583196046129, "grad_norm": 0.3533404469490051, "learning_rate": 1.9906483213662828e-05, "loss": 0.6214, "step": 3270 }, { "epoch": 0.08981328940142778, "grad_norm": 0.4257062077522278, "learning_rate": 1.9906424276839687e-05, "loss": 0.5819, "step": 3271 }, { "epoch": 0.08984074684239429, "grad_norm": 0.3551250994205475, "learning_rate": 1.9906365321537902e-05, "loss": 0.5308, "step": 3272 }, { "epoch": 0.08986820428336079, "grad_norm": 0.36842861771583557, "learning_rate": 1.9906306347757585e-05, "loss": 0.5757, "step": 3273 }, { "epoch": 0.0898956617243273, "grad_norm": 0.33690592646598816, "learning_rate": 1.9906247355498845e-05, "loss": 0.4816, "step": 3274 }, { "epoch": 0.08992311916529379, "grad_norm": 0.34285932779312134, "learning_rate": 1.990618834476179e-05, "loss": 0.544, "step": 3275 }, { "epoch": 0.0899505766062603, "grad_norm": 0.3523218035697937, "learning_rate": 1.9906129315546537e-05, "loss": 0.5945, "step": 3276 }, { "epoch": 0.0899780340472268, "grad_norm": 0.40730714797973633, "learning_rate": 1.9906070267853187e-05, "loss": 0.5816, "step": 3277 }, { "epoch": 0.0900054914881933, "grad_norm": 0.45997700095176697, "learning_rate": 1.9906011201681854e-05, "loss": 0.5108, "step": 3278 }, { "epoch": 0.0900329489291598, "grad_norm": 0.31813183426856995, "learning_rate": 1.990595211703265e-05, "loss": 0.3245, "step": 3279 }, { "epoch": 0.0900604063701263, "grad_norm": 0.36020800471305847, "learning_rate": 1.9905893013905682e-05, "loss": 0.5138, "step": 3280 }, { "epoch": 0.09008786381109281, "grad_norm": 0.37830880284309387, "learning_rate": 1.9905833892301067e-05, "loss": 0.6027, "step": 3281 }, { "epoch": 0.0901153212520593, "grad_norm": 0.581026554107666, "learning_rate": 1.9905774752218905e-05, "loss": 0.683, "step": 3282 }, { "epoch": 0.09014277869302581, "grad_norm": 0.3953525125980377, "learning_rate": 1.990571559365931e-05, "loss": 0.509, "step": 3283 }, { "epoch": 0.09017023613399232, "grad_norm": 0.35911622643470764, "learning_rate": 1.99056564166224e-05, "loss": 0.5607, "step": 3284 }, { "epoch": 0.09019769357495881, "grad_norm": 0.45917800068855286, "learning_rate": 1.9905597221108274e-05, "loss": 0.5731, "step": 3285 }, { "epoch": 0.09022515101592532, "grad_norm": 0.43529415130615234, "learning_rate": 1.990553800711705e-05, "loss": 0.6893, "step": 3286 }, { "epoch": 0.09025260845689181, "grad_norm": 0.3546833097934723, "learning_rate": 1.9905478774648836e-05, "loss": 0.5174, "step": 3287 }, { "epoch": 0.09028006589785832, "grad_norm": 0.3950420618057251, "learning_rate": 1.990541952370374e-05, "loss": 0.5849, "step": 3288 }, { "epoch": 0.09030752333882482, "grad_norm": 0.38023409247398376, "learning_rate": 1.990536025428188e-05, "loss": 0.5783, "step": 3289 }, { "epoch": 0.09033498077979132, "grad_norm": 0.39877286553382874, "learning_rate": 1.990530096638336e-05, "loss": 0.5562, "step": 3290 }, { "epoch": 0.09036243822075783, "grad_norm": 0.4194156229496002, "learning_rate": 1.9905241660008292e-05, "loss": 0.586, "step": 3291 }, { "epoch": 0.09038989566172433, "grad_norm": 0.3331613838672638, "learning_rate": 1.9905182335156786e-05, "loss": 0.5556, "step": 3292 }, { "epoch": 0.09041735310269083, "grad_norm": 0.3199431002140045, "learning_rate": 1.9905122991828954e-05, "loss": 0.4813, "step": 3293 }, { "epoch": 0.09044481054365733, "grad_norm": 0.34835976362228394, "learning_rate": 1.9905063630024908e-05, "loss": 0.5366, "step": 3294 }, { "epoch": 0.09047226798462384, "grad_norm": 0.4026298522949219, "learning_rate": 1.9905004249744755e-05, "loss": 0.6356, "step": 3295 }, { "epoch": 0.09049972542559033, "grad_norm": 0.3244951367378235, "learning_rate": 1.9904944850988608e-05, "loss": 0.4211, "step": 3296 }, { "epoch": 0.09052718286655684, "grad_norm": 0.3981974422931671, "learning_rate": 1.990488543375658e-05, "loss": 0.6026, "step": 3297 }, { "epoch": 0.09055464030752333, "grad_norm": 0.3729385435581207, "learning_rate": 1.9904825998048776e-05, "loss": 0.5789, "step": 3298 }, { "epoch": 0.09058209774848984, "grad_norm": 0.3340352177619934, "learning_rate": 1.990476654386531e-05, "loss": 0.5189, "step": 3299 }, { "epoch": 0.09060955518945635, "grad_norm": 0.396192342042923, "learning_rate": 1.9904707071206294e-05, "loss": 0.5299, "step": 3300 }, { "epoch": 0.09063701263042284, "grad_norm": 0.33879032731056213, "learning_rate": 1.990464758007184e-05, "loss": 0.5102, "step": 3301 }, { "epoch": 0.09066447007138935, "grad_norm": 0.35069993138313293, "learning_rate": 1.9904588070462055e-05, "loss": 0.5485, "step": 3302 }, { "epoch": 0.09069192751235584, "grad_norm": 0.3700973093509674, "learning_rate": 1.9904528542377052e-05, "loss": 0.6101, "step": 3303 }, { "epoch": 0.09071938495332235, "grad_norm": 0.35640865564346313, "learning_rate": 1.9904468995816946e-05, "loss": 0.5214, "step": 3304 }, { "epoch": 0.09074684239428885, "grad_norm": 0.3724553883075714, "learning_rate": 1.9904409430781838e-05, "loss": 0.5011, "step": 3305 }, { "epoch": 0.09077429983525535, "grad_norm": 0.3634449243545532, "learning_rate": 1.990434984727185e-05, "loss": 0.5365, "step": 3306 }, { "epoch": 0.09080175727622186, "grad_norm": 0.33786654472351074, "learning_rate": 1.990429024528709e-05, "loss": 0.5776, "step": 3307 }, { "epoch": 0.09082921471718836, "grad_norm": 0.36381298303604126, "learning_rate": 1.990423062482766e-05, "loss": 0.6049, "step": 3308 }, { "epoch": 0.09085667215815486, "grad_norm": 0.36748743057250977, "learning_rate": 1.9904170985893685e-05, "loss": 0.5322, "step": 3309 }, { "epoch": 0.09088412959912136, "grad_norm": 0.3544635474681854, "learning_rate": 1.990411132848527e-05, "loss": 0.5203, "step": 3310 }, { "epoch": 0.09091158704008787, "grad_norm": 0.38370415568351746, "learning_rate": 1.9904051652602522e-05, "loss": 0.4434, "step": 3311 }, { "epoch": 0.09093904448105436, "grad_norm": 0.36442410945892334, "learning_rate": 1.9903991958245558e-05, "loss": 0.488, "step": 3312 }, { "epoch": 0.09096650192202087, "grad_norm": 0.3688850402832031, "learning_rate": 1.9903932245414493e-05, "loss": 0.5716, "step": 3313 }, { "epoch": 0.09099395936298738, "grad_norm": 0.39044809341430664, "learning_rate": 1.9903872514109427e-05, "loss": 0.5285, "step": 3314 }, { "epoch": 0.09102141680395387, "grad_norm": 0.35171544551849365, "learning_rate": 1.990381276433048e-05, "loss": 0.5642, "step": 3315 }, { "epoch": 0.09104887424492038, "grad_norm": 0.39038726687431335, "learning_rate": 1.9903752996077762e-05, "loss": 0.5551, "step": 3316 }, { "epoch": 0.09107633168588687, "grad_norm": 0.43575209379196167, "learning_rate": 1.9903693209351384e-05, "loss": 0.5942, "step": 3317 }, { "epoch": 0.09110378912685338, "grad_norm": 0.3155750334262848, "learning_rate": 1.9903633404151458e-05, "loss": 0.4902, "step": 3318 }, { "epoch": 0.09113124656781987, "grad_norm": 0.3335115909576416, "learning_rate": 1.9903573580478092e-05, "loss": 0.5184, "step": 3319 }, { "epoch": 0.09115870400878638, "grad_norm": 0.37053316831588745, "learning_rate": 1.9903513738331402e-05, "loss": 0.5031, "step": 3320 }, { "epoch": 0.09118616144975289, "grad_norm": 0.3350338339805603, "learning_rate": 1.99034538777115e-05, "loss": 0.5159, "step": 3321 }, { "epoch": 0.09121361889071938, "grad_norm": 0.38758522272109985, "learning_rate": 1.9903393998618493e-05, "loss": 0.5561, "step": 3322 }, { "epoch": 0.09124107633168589, "grad_norm": 0.38113933801651, "learning_rate": 1.9903334101052497e-05, "loss": 0.5824, "step": 3323 }, { "epoch": 0.09126853377265239, "grad_norm": 0.37687766551971436, "learning_rate": 1.990327418501362e-05, "loss": 0.6104, "step": 3324 }, { "epoch": 0.0912959912136189, "grad_norm": 0.34970465302467346, "learning_rate": 1.990321425050198e-05, "loss": 0.4723, "step": 3325 }, { "epoch": 0.09132344865458539, "grad_norm": 0.515536367893219, "learning_rate": 1.990315429751768e-05, "loss": 0.5795, "step": 3326 }, { "epoch": 0.0913509060955519, "grad_norm": 0.38595297932624817, "learning_rate": 1.990309432606084e-05, "loss": 0.6075, "step": 3327 }, { "epoch": 0.0913783635365184, "grad_norm": 0.34674564003944397, "learning_rate": 1.9903034336131566e-05, "loss": 0.5248, "step": 3328 }, { "epoch": 0.0914058209774849, "grad_norm": 0.35332274436950684, "learning_rate": 1.9902974327729974e-05, "loss": 0.5464, "step": 3329 }, { "epoch": 0.0914332784184514, "grad_norm": 0.5265550017356873, "learning_rate": 1.9902914300856173e-05, "loss": 0.6205, "step": 3330 }, { "epoch": 0.0914607358594179, "grad_norm": 0.35014790296554565, "learning_rate": 1.9902854255510277e-05, "loss": 0.5144, "step": 3331 }, { "epoch": 0.09148819330038441, "grad_norm": 0.38389652967453003, "learning_rate": 1.9902794191692398e-05, "loss": 0.5643, "step": 3332 }, { "epoch": 0.0915156507413509, "grad_norm": 0.47311073541641235, "learning_rate": 1.9902734109402645e-05, "loss": 0.6407, "step": 3333 }, { "epoch": 0.09154310818231741, "grad_norm": 0.3439444303512573, "learning_rate": 1.9902674008641133e-05, "loss": 0.5394, "step": 3334 }, { "epoch": 0.0915705656232839, "grad_norm": 0.3157111406326294, "learning_rate": 1.9902613889407973e-05, "loss": 0.4521, "step": 3335 }, { "epoch": 0.09159802306425041, "grad_norm": 0.4152909219264984, "learning_rate": 1.9902553751703278e-05, "loss": 0.5966, "step": 3336 }, { "epoch": 0.09162548050521692, "grad_norm": 0.3670664131641388, "learning_rate": 1.9902493595527163e-05, "loss": 0.6135, "step": 3337 }, { "epoch": 0.09165293794618341, "grad_norm": 0.3874143958091736, "learning_rate": 1.9902433420879733e-05, "loss": 0.5597, "step": 3338 }, { "epoch": 0.09168039538714992, "grad_norm": 0.3405759036540985, "learning_rate": 1.9902373227761105e-05, "loss": 0.5361, "step": 3339 }, { "epoch": 0.09170785282811642, "grad_norm": 0.4539763331413269, "learning_rate": 1.990231301617139e-05, "loss": 0.5832, "step": 3340 }, { "epoch": 0.09173531026908292, "grad_norm": 0.32107529044151306, "learning_rate": 1.9902252786110702e-05, "loss": 0.487, "step": 3341 }, { "epoch": 0.09176276771004942, "grad_norm": 0.390642911195755, "learning_rate": 1.990219253757915e-05, "loss": 0.4933, "step": 3342 }, { "epoch": 0.09179022515101593, "grad_norm": 0.3959125280380249, "learning_rate": 1.990213227057685e-05, "loss": 0.5973, "step": 3343 }, { "epoch": 0.09181768259198243, "grad_norm": 0.3482478857040405, "learning_rate": 1.9902071985103913e-05, "loss": 0.5664, "step": 3344 }, { "epoch": 0.09184514003294893, "grad_norm": 0.3694511950016022, "learning_rate": 1.990201168116045e-05, "loss": 0.5507, "step": 3345 }, { "epoch": 0.09187259747391543, "grad_norm": 0.35642778873443604, "learning_rate": 1.9901951358746578e-05, "loss": 0.5391, "step": 3346 }, { "epoch": 0.09190005491488193, "grad_norm": 0.4408516585826874, "learning_rate": 1.9901891017862402e-05, "loss": 0.5298, "step": 3347 }, { "epoch": 0.09192751235584844, "grad_norm": 0.3846726715564728, "learning_rate": 1.990183065850804e-05, "loss": 0.6422, "step": 3348 }, { "epoch": 0.09195496979681493, "grad_norm": 0.5086424946784973, "learning_rate": 1.990177028068361e-05, "loss": 0.6184, "step": 3349 }, { "epoch": 0.09198242723778144, "grad_norm": 0.371178537607193, "learning_rate": 1.9901709884389212e-05, "loss": 0.5762, "step": 3350 }, { "epoch": 0.09200988467874795, "grad_norm": 0.3566742539405823, "learning_rate": 1.9901649469624963e-05, "loss": 0.6229, "step": 3351 }, { "epoch": 0.09203734211971444, "grad_norm": 0.34832340478897095, "learning_rate": 1.990158903639098e-05, "loss": 0.567, "step": 3352 }, { "epoch": 0.09206479956068095, "grad_norm": 0.5979407429695129, "learning_rate": 1.9901528584687374e-05, "loss": 0.6577, "step": 3353 }, { "epoch": 0.09209225700164744, "grad_norm": 0.38518014550209045, "learning_rate": 1.990146811451426e-05, "loss": 0.5362, "step": 3354 }, { "epoch": 0.09211971444261395, "grad_norm": 0.44032055139541626, "learning_rate": 1.990140762587174e-05, "loss": 0.5339, "step": 3355 }, { "epoch": 0.09214717188358044, "grad_norm": 0.3392046391963959, "learning_rate": 1.9901347118759942e-05, "loss": 0.4805, "step": 3356 }, { "epoch": 0.09217462932454695, "grad_norm": 0.946993887424469, "learning_rate": 1.990128659317897e-05, "loss": 0.5842, "step": 3357 }, { "epoch": 0.09220208676551346, "grad_norm": 0.3699509799480438, "learning_rate": 1.9901226049128934e-05, "loss": 0.4787, "step": 3358 }, { "epoch": 0.09222954420647995, "grad_norm": 0.370581716299057, "learning_rate": 1.9901165486609955e-05, "loss": 0.565, "step": 3359 }, { "epoch": 0.09225700164744646, "grad_norm": 0.34993109107017517, "learning_rate": 1.990110490562214e-05, "loss": 0.6178, "step": 3360 }, { "epoch": 0.09228445908841296, "grad_norm": 0.4378306269645691, "learning_rate": 1.9901044306165607e-05, "loss": 0.5873, "step": 3361 }, { "epoch": 0.09231191652937946, "grad_norm": 0.35035014152526855, "learning_rate": 1.9900983688240465e-05, "loss": 0.5738, "step": 3362 }, { "epoch": 0.09233937397034596, "grad_norm": 0.35814690589904785, "learning_rate": 1.9900923051846826e-05, "loss": 0.4746, "step": 3363 }, { "epoch": 0.09236683141131247, "grad_norm": 0.39350980520248413, "learning_rate": 1.990086239698481e-05, "loss": 0.6135, "step": 3364 }, { "epoch": 0.09239428885227896, "grad_norm": 0.40833815932273865, "learning_rate": 1.9900801723654525e-05, "loss": 0.6035, "step": 3365 }, { "epoch": 0.09242174629324547, "grad_norm": 0.3711360991001129, "learning_rate": 1.9900741031856082e-05, "loss": 0.5181, "step": 3366 }, { "epoch": 0.09244920373421198, "grad_norm": 0.38028642535209656, "learning_rate": 1.9900680321589597e-05, "loss": 0.639, "step": 3367 }, { "epoch": 0.09247666117517847, "grad_norm": 0.45201563835144043, "learning_rate": 1.9900619592855184e-05, "loss": 0.617, "step": 3368 }, { "epoch": 0.09250411861614498, "grad_norm": 0.6397286057472229, "learning_rate": 1.9900558845652957e-05, "loss": 0.6061, "step": 3369 }, { "epoch": 0.09253157605711147, "grad_norm": 0.416930615901947, "learning_rate": 1.990049807998303e-05, "loss": 0.509, "step": 3370 }, { "epoch": 0.09255903349807798, "grad_norm": 0.3858082592487335, "learning_rate": 1.9900437295845513e-05, "loss": 0.6243, "step": 3371 }, { "epoch": 0.09258649093904447, "grad_norm": 0.34903237223625183, "learning_rate": 1.9900376493240517e-05, "loss": 0.5225, "step": 3372 }, { "epoch": 0.09261394838001098, "grad_norm": 0.352664589881897, "learning_rate": 1.990031567216816e-05, "loss": 0.6224, "step": 3373 }, { "epoch": 0.09264140582097749, "grad_norm": 0.3579198718070984, "learning_rate": 1.9900254832628553e-05, "loss": 0.5675, "step": 3374 }, { "epoch": 0.09266886326194398, "grad_norm": 0.39280208945274353, "learning_rate": 1.9900193974621815e-05, "loss": 0.5689, "step": 3375 }, { "epoch": 0.09269632070291049, "grad_norm": 0.4074755012989044, "learning_rate": 1.9900133098148052e-05, "loss": 0.5228, "step": 3376 }, { "epoch": 0.09272377814387699, "grad_norm": 0.3777313232421875, "learning_rate": 1.9900072203207384e-05, "loss": 0.5478, "step": 3377 }, { "epoch": 0.0927512355848435, "grad_norm": 0.3825697898864746, "learning_rate": 1.990001128979992e-05, "loss": 0.599, "step": 3378 }, { "epoch": 0.09277869302580999, "grad_norm": 0.3777071535587311, "learning_rate": 1.989995035792578e-05, "loss": 0.5015, "step": 3379 }, { "epoch": 0.0928061504667765, "grad_norm": 0.38451850414276123, "learning_rate": 1.9899889407585067e-05, "loss": 0.6015, "step": 3380 }, { "epoch": 0.092833607907743, "grad_norm": 0.3894476592540741, "learning_rate": 1.98998284387779e-05, "loss": 0.5971, "step": 3381 }, { "epoch": 0.0928610653487095, "grad_norm": 0.35124829411506653, "learning_rate": 1.9899767451504396e-05, "loss": 0.5053, "step": 3382 }, { "epoch": 0.092888522789676, "grad_norm": 0.36521998047828674, "learning_rate": 1.9899706445764666e-05, "loss": 0.5857, "step": 3383 }, { "epoch": 0.0929159802306425, "grad_norm": 0.35375410318374634, "learning_rate": 1.9899645421558823e-05, "loss": 0.5197, "step": 3384 }, { "epoch": 0.09294343767160901, "grad_norm": 0.36403024196624756, "learning_rate": 1.989958437888698e-05, "loss": 0.5606, "step": 3385 }, { "epoch": 0.0929708951125755, "grad_norm": 0.3416014611721039, "learning_rate": 1.9899523317749256e-05, "loss": 0.5924, "step": 3386 }, { "epoch": 0.09299835255354201, "grad_norm": 0.3591092526912689, "learning_rate": 1.989946223814576e-05, "loss": 0.4471, "step": 3387 }, { "epoch": 0.09302580999450852, "grad_norm": 0.3537389039993286, "learning_rate": 1.9899401140076607e-05, "loss": 0.6531, "step": 3388 }, { "epoch": 0.09305326743547501, "grad_norm": 0.3484795093536377, "learning_rate": 1.9899340023541912e-05, "loss": 0.5751, "step": 3389 }, { "epoch": 0.09308072487644152, "grad_norm": 0.38329634070396423, "learning_rate": 1.9899278888541788e-05, "loss": 0.5141, "step": 3390 }, { "epoch": 0.09310818231740801, "grad_norm": 0.3362230956554413, "learning_rate": 1.989921773507635e-05, "loss": 0.4657, "step": 3391 }, { "epoch": 0.09313563975837452, "grad_norm": 0.360150009393692, "learning_rate": 1.9899156563145712e-05, "loss": 0.5389, "step": 3392 }, { "epoch": 0.09316309719934102, "grad_norm": 0.3663862347602844, "learning_rate": 1.9899095372749984e-05, "loss": 0.5404, "step": 3393 }, { "epoch": 0.09319055464030752, "grad_norm": 0.38615378737449646, "learning_rate": 1.9899034163889288e-05, "loss": 0.5797, "step": 3394 }, { "epoch": 0.09321801208127403, "grad_norm": 0.36257484555244446, "learning_rate": 1.989897293656373e-05, "loss": 0.5439, "step": 3395 }, { "epoch": 0.09324546952224053, "grad_norm": 0.35438841581344604, "learning_rate": 1.989891169077343e-05, "loss": 0.5804, "step": 3396 }, { "epoch": 0.09327292696320703, "grad_norm": 0.47881948947906494, "learning_rate": 1.9898850426518504e-05, "loss": 0.6015, "step": 3397 }, { "epoch": 0.09330038440417353, "grad_norm": 0.3705235421657562, "learning_rate": 1.989878914379906e-05, "loss": 0.5405, "step": 3398 }, { "epoch": 0.09332784184514004, "grad_norm": 0.3596845865249634, "learning_rate": 1.9898727842615213e-05, "loss": 0.5738, "step": 3399 }, { "epoch": 0.09335529928610653, "grad_norm": 0.3623288571834564, "learning_rate": 1.989866652296708e-05, "loss": 0.4869, "step": 3400 }, { "epoch": 0.09338275672707304, "grad_norm": 0.3373353183269501, "learning_rate": 1.9898605184854773e-05, "loss": 0.5091, "step": 3401 }, { "epoch": 0.09341021416803953, "grad_norm": 0.35418644547462463, "learning_rate": 1.9898543828278408e-05, "loss": 0.4781, "step": 3402 }, { "epoch": 0.09343767160900604, "grad_norm": 0.3418307900428772, "learning_rate": 1.9898482453238108e-05, "loss": 0.5739, "step": 3403 }, { "epoch": 0.09346512904997255, "grad_norm": 0.38305917382240295, "learning_rate": 1.9898421059733966e-05, "loss": 0.6026, "step": 3404 }, { "epoch": 0.09349258649093904, "grad_norm": 0.3696637451648712, "learning_rate": 1.989835964776612e-05, "loss": 0.6124, "step": 3405 }, { "epoch": 0.09352004393190555, "grad_norm": 0.3388640880584717, "learning_rate": 1.989829821733467e-05, "loss": 0.5274, "step": 3406 }, { "epoch": 0.09354750137287204, "grad_norm": 0.3434932231903076, "learning_rate": 1.9898236768439735e-05, "loss": 0.5355, "step": 3407 }, { "epoch": 0.09357495881383855, "grad_norm": 0.360390841960907, "learning_rate": 1.989817530108143e-05, "loss": 0.5937, "step": 3408 }, { "epoch": 0.09360241625480505, "grad_norm": 0.657248854637146, "learning_rate": 1.9898113815259865e-05, "loss": 0.6228, "step": 3409 }, { "epoch": 0.09362987369577155, "grad_norm": 0.39952245354652405, "learning_rate": 1.9898052310975164e-05, "loss": 0.6106, "step": 3410 }, { "epoch": 0.09365733113673806, "grad_norm": 0.3973071575164795, "learning_rate": 1.989799078822743e-05, "loss": 0.6205, "step": 3411 }, { "epoch": 0.09368478857770456, "grad_norm": 0.34527167677879333, "learning_rate": 1.989792924701679e-05, "loss": 0.5285, "step": 3412 }, { "epoch": 0.09371224601867106, "grad_norm": 2.2323694229125977, "learning_rate": 1.989786768734335e-05, "loss": 0.5093, "step": 3413 }, { "epoch": 0.09373970345963756, "grad_norm": 0.4025181829929352, "learning_rate": 1.989780610920723e-05, "loss": 0.5928, "step": 3414 }, { "epoch": 0.09376716090060407, "grad_norm": 0.35538190603256226, "learning_rate": 1.9897744512608542e-05, "loss": 0.5988, "step": 3415 }, { "epoch": 0.09379461834157056, "grad_norm": 0.3549397587776184, "learning_rate": 1.9897682897547402e-05, "loss": 0.5257, "step": 3416 }, { "epoch": 0.09382207578253707, "grad_norm": 0.3269728124141693, "learning_rate": 1.9897621264023922e-05, "loss": 0.4512, "step": 3417 }, { "epoch": 0.09384953322350358, "grad_norm": 0.3744167983531952, "learning_rate": 1.989755961203822e-05, "loss": 0.6002, "step": 3418 }, { "epoch": 0.09387699066447007, "grad_norm": 0.3477419912815094, "learning_rate": 1.9897497941590412e-05, "loss": 0.5013, "step": 3419 }, { "epoch": 0.09390444810543658, "grad_norm": 0.33220696449279785, "learning_rate": 1.989743625268061e-05, "loss": 0.501, "step": 3420 }, { "epoch": 0.09393190554640307, "grad_norm": 0.36729490756988525, "learning_rate": 1.9897374545308928e-05, "loss": 0.533, "step": 3421 }, { "epoch": 0.09395936298736958, "grad_norm": 0.4926360547542572, "learning_rate": 1.989731281947549e-05, "loss": 0.5656, "step": 3422 }, { "epoch": 0.09398682042833607, "grad_norm": 0.35383841395378113, "learning_rate": 1.98972510751804e-05, "loss": 0.5479, "step": 3423 }, { "epoch": 0.09401427786930258, "grad_norm": 0.3999338448047638, "learning_rate": 1.989718931242378e-05, "loss": 0.5334, "step": 3424 }, { "epoch": 0.09404173531026909, "grad_norm": 0.3541015684604645, "learning_rate": 1.989712753120574e-05, "loss": 0.5031, "step": 3425 }, { "epoch": 0.09406919275123558, "grad_norm": 0.3791353106498718, "learning_rate": 1.9897065731526402e-05, "loss": 0.6643, "step": 3426 }, { "epoch": 0.09409665019220209, "grad_norm": 0.38609379529953003, "learning_rate": 1.9897003913385874e-05, "loss": 0.5845, "step": 3427 }, { "epoch": 0.09412410763316859, "grad_norm": 0.3451629877090454, "learning_rate": 1.989694207678428e-05, "loss": 0.514, "step": 3428 }, { "epoch": 0.0941515650741351, "grad_norm": 0.31919342279434204, "learning_rate": 1.9896880221721727e-05, "loss": 0.5469, "step": 3429 }, { "epoch": 0.09417902251510159, "grad_norm": 0.36091935634613037, "learning_rate": 1.9896818348198336e-05, "loss": 0.5712, "step": 3430 }, { "epoch": 0.0942064799560681, "grad_norm": 0.3924546539783478, "learning_rate": 1.9896756456214214e-05, "loss": 0.5818, "step": 3431 }, { "epoch": 0.09423393739703459, "grad_norm": 0.37218177318573, "learning_rate": 1.9896694545769487e-05, "loss": 0.5645, "step": 3432 }, { "epoch": 0.0942613948380011, "grad_norm": 0.3264714479446411, "learning_rate": 1.9896632616864266e-05, "loss": 0.5092, "step": 3433 }, { "epoch": 0.0942888522789676, "grad_norm": 0.36203232407569885, "learning_rate": 1.989657066949867e-05, "loss": 0.607, "step": 3434 }, { "epoch": 0.0943163097199341, "grad_norm": 0.4394599497318268, "learning_rate": 1.9896508703672804e-05, "loss": 0.5357, "step": 3435 }, { "epoch": 0.09434376716090061, "grad_norm": 0.37444010376930237, "learning_rate": 1.9896446719386794e-05, "loss": 0.5325, "step": 3436 }, { "epoch": 0.0943712246018671, "grad_norm": 0.47420066595077515, "learning_rate": 1.9896384716640754e-05, "loss": 0.5821, "step": 3437 }, { "epoch": 0.09439868204283361, "grad_norm": 0.37569135427474976, "learning_rate": 1.9896322695434797e-05, "loss": 0.5169, "step": 3438 }, { "epoch": 0.0944261394838001, "grad_norm": 0.39137929677963257, "learning_rate": 1.9896260655769037e-05, "loss": 0.6035, "step": 3439 }, { "epoch": 0.09445359692476661, "grad_norm": 0.3653644323348999, "learning_rate": 1.9896198597643593e-05, "loss": 0.6186, "step": 3440 }, { "epoch": 0.09448105436573312, "grad_norm": 0.3543521761894226, "learning_rate": 1.989613652105858e-05, "loss": 0.5046, "step": 3441 }, { "epoch": 0.09450851180669961, "grad_norm": 0.3402203619480133, "learning_rate": 1.989607442601412e-05, "loss": 0.542, "step": 3442 }, { "epoch": 0.09453596924766612, "grad_norm": 0.39587920904159546, "learning_rate": 1.989601231251032e-05, "loss": 0.6175, "step": 3443 }, { "epoch": 0.09456342668863262, "grad_norm": 0.37106457352638245, "learning_rate": 1.9895950180547296e-05, "loss": 0.508, "step": 3444 }, { "epoch": 0.09459088412959912, "grad_norm": 0.39482274651527405, "learning_rate": 1.9895888030125165e-05, "loss": 0.5859, "step": 3445 }, { "epoch": 0.09461834157056562, "grad_norm": 0.35948488116264343, "learning_rate": 1.9895825861244048e-05, "loss": 0.4445, "step": 3446 }, { "epoch": 0.09464579901153213, "grad_norm": 0.3508606553077698, "learning_rate": 1.9895763673904054e-05, "loss": 0.539, "step": 3447 }, { "epoch": 0.09467325645249863, "grad_norm": 0.35918480157852173, "learning_rate": 1.9895701468105304e-05, "loss": 0.4974, "step": 3448 }, { "epoch": 0.09470071389346513, "grad_norm": 0.35035401582717896, "learning_rate": 1.9895639243847914e-05, "loss": 0.5751, "step": 3449 }, { "epoch": 0.09472817133443164, "grad_norm": 0.35891908407211304, "learning_rate": 1.9895577001131995e-05, "loss": 0.5223, "step": 3450 }, { "epoch": 0.09475562877539813, "grad_norm": 0.3968767523765564, "learning_rate": 1.9895514739957667e-05, "loss": 0.535, "step": 3451 }, { "epoch": 0.09478308621636464, "grad_norm": 0.37734493613243103, "learning_rate": 1.989545246032505e-05, "loss": 0.6085, "step": 3452 }, { "epoch": 0.09481054365733113, "grad_norm": 0.35918161273002625, "learning_rate": 1.9895390162234255e-05, "loss": 0.6522, "step": 3453 }, { "epoch": 0.09483800109829764, "grad_norm": 0.49983954429626465, "learning_rate": 1.98953278456854e-05, "loss": 0.6168, "step": 3454 }, { "epoch": 0.09486545853926415, "grad_norm": 0.3979390263557434, "learning_rate": 1.9895265510678593e-05, "loss": 0.5338, "step": 3455 }, { "epoch": 0.09489291598023064, "grad_norm": 0.32399553060531616, "learning_rate": 1.9895203157213964e-05, "loss": 0.524, "step": 3456 }, { "epoch": 0.09492037342119715, "grad_norm": 0.3515511155128479, "learning_rate": 1.9895140785291623e-05, "loss": 0.5191, "step": 3457 }, { "epoch": 0.09494783086216364, "grad_norm": 0.3620997965335846, "learning_rate": 1.9895078394911685e-05, "loss": 0.5637, "step": 3458 }, { "epoch": 0.09497528830313015, "grad_norm": 0.36275407671928406, "learning_rate": 1.9895015986074266e-05, "loss": 0.6176, "step": 3459 }, { "epoch": 0.09500274574409664, "grad_norm": 0.4276838004589081, "learning_rate": 1.9894953558779483e-05, "loss": 0.6303, "step": 3460 }, { "epoch": 0.09503020318506315, "grad_norm": 0.43101316690444946, "learning_rate": 1.9894891113027457e-05, "loss": 0.7029, "step": 3461 }, { "epoch": 0.09505766062602966, "grad_norm": 0.9723101854324341, "learning_rate": 1.98948286488183e-05, "loss": 0.5072, "step": 3462 }, { "epoch": 0.09508511806699615, "grad_norm": 0.3556780219078064, "learning_rate": 1.989476616615213e-05, "loss": 0.6366, "step": 3463 }, { "epoch": 0.09511257550796266, "grad_norm": 0.3611419200897217, "learning_rate": 1.9894703665029063e-05, "loss": 0.5442, "step": 3464 }, { "epoch": 0.09514003294892916, "grad_norm": 0.33991414308547974, "learning_rate": 1.9894641145449218e-05, "loss": 0.5348, "step": 3465 }, { "epoch": 0.09516749038989566, "grad_norm": 0.4211452305316925, "learning_rate": 1.98945786074127e-05, "loss": 0.6589, "step": 3466 }, { "epoch": 0.09519494783086216, "grad_norm": 0.42497605085372925, "learning_rate": 1.9894516050919644e-05, "loss": 0.5349, "step": 3467 }, { "epoch": 0.09522240527182867, "grad_norm": 0.3140662610530853, "learning_rate": 1.9894453475970157e-05, "loss": 0.5303, "step": 3468 }, { "epoch": 0.09524986271279516, "grad_norm": 0.33659735321998596, "learning_rate": 1.9894390882564353e-05, "loss": 0.4863, "step": 3469 }, { "epoch": 0.09527732015376167, "grad_norm": 0.3719843626022339, "learning_rate": 1.9894328270702355e-05, "loss": 0.5224, "step": 3470 }, { "epoch": 0.09530477759472818, "grad_norm": 0.44416165351867676, "learning_rate": 1.9894265640384275e-05, "loss": 0.6584, "step": 3471 }, { "epoch": 0.09533223503569467, "grad_norm": 0.36652106046676636, "learning_rate": 1.989420299161023e-05, "loss": 0.5424, "step": 3472 }, { "epoch": 0.09535969247666118, "grad_norm": 0.38638192415237427, "learning_rate": 1.9894140324380343e-05, "loss": 0.5322, "step": 3473 }, { "epoch": 0.09538714991762767, "grad_norm": 0.3891766667366028, "learning_rate": 1.989407763869472e-05, "loss": 0.542, "step": 3474 }, { "epoch": 0.09541460735859418, "grad_norm": 0.34113895893096924, "learning_rate": 1.989401493455349e-05, "loss": 0.5389, "step": 3475 }, { "epoch": 0.09544206479956067, "grad_norm": 0.34819495677948, "learning_rate": 1.9893952211956763e-05, "loss": 0.5945, "step": 3476 }, { "epoch": 0.09546952224052718, "grad_norm": 0.33019593358039856, "learning_rate": 1.9893889470904656e-05, "loss": 0.6183, "step": 3477 }, { "epoch": 0.09549697968149369, "grad_norm": 0.3333442509174347, "learning_rate": 1.9893826711397287e-05, "loss": 0.491, "step": 3478 }, { "epoch": 0.09552443712246018, "grad_norm": 0.32975566387176514, "learning_rate": 1.9893763933434775e-05, "loss": 0.5944, "step": 3479 }, { "epoch": 0.09555189456342669, "grad_norm": 0.3307519257068634, "learning_rate": 1.9893701137017237e-05, "loss": 0.5, "step": 3480 }, { "epoch": 0.09557935200439319, "grad_norm": 0.3824635148048401, "learning_rate": 1.989363832214479e-05, "loss": 0.5898, "step": 3481 }, { "epoch": 0.0956068094453597, "grad_norm": 0.3317599594593048, "learning_rate": 1.9893575488817544e-05, "loss": 0.4722, "step": 3482 }, { "epoch": 0.09563426688632619, "grad_norm": 0.3494565486907959, "learning_rate": 1.9893512637035622e-05, "loss": 0.5706, "step": 3483 }, { "epoch": 0.0956617243272927, "grad_norm": 0.37023550271987915, "learning_rate": 1.9893449766799146e-05, "loss": 0.4855, "step": 3484 }, { "epoch": 0.0956891817682592, "grad_norm": 0.3768095374107361, "learning_rate": 1.9893386878108228e-05, "loss": 0.5134, "step": 3485 }, { "epoch": 0.0957166392092257, "grad_norm": 0.5182499289512634, "learning_rate": 1.9893323970962983e-05, "loss": 0.5072, "step": 3486 }, { "epoch": 0.0957440966501922, "grad_norm": 0.3611791133880615, "learning_rate": 1.9893261045363535e-05, "loss": 0.5091, "step": 3487 }, { "epoch": 0.0957715540911587, "grad_norm": 0.3808910548686981, "learning_rate": 1.9893198101309995e-05, "loss": 0.5811, "step": 3488 }, { "epoch": 0.09579901153212521, "grad_norm": 0.45492175221443176, "learning_rate": 1.9893135138802483e-05, "loss": 0.6198, "step": 3489 }, { "epoch": 0.0958264689730917, "grad_norm": 0.32423749566078186, "learning_rate": 1.9893072157841118e-05, "loss": 0.491, "step": 3490 }, { "epoch": 0.09585392641405821, "grad_norm": 0.3587932586669922, "learning_rate": 1.9893009158426012e-05, "loss": 0.532, "step": 3491 }, { "epoch": 0.09588138385502472, "grad_norm": 0.3773055672645569, "learning_rate": 1.989294614055729e-05, "loss": 0.5544, "step": 3492 }, { "epoch": 0.09590884129599121, "grad_norm": 1.1692163944244385, "learning_rate": 1.9892883104235065e-05, "loss": 0.5606, "step": 3493 }, { "epoch": 0.09593629873695772, "grad_norm": 0.37721046805381775, "learning_rate": 1.989282004945946e-05, "loss": 0.6271, "step": 3494 }, { "epoch": 0.09596375617792421, "grad_norm": 0.34470996260643005, "learning_rate": 1.989275697623058e-05, "loss": 0.5518, "step": 3495 }, { "epoch": 0.09599121361889072, "grad_norm": 0.4128940999507904, "learning_rate": 1.9892693884548556e-05, "loss": 0.5795, "step": 3496 }, { "epoch": 0.09601867105985722, "grad_norm": 0.39666882157325745, "learning_rate": 1.9892630774413498e-05, "loss": 0.6227, "step": 3497 }, { "epoch": 0.09604612850082372, "grad_norm": 0.3994831442832947, "learning_rate": 1.9892567645825525e-05, "loss": 0.5109, "step": 3498 }, { "epoch": 0.09607358594179022, "grad_norm": 0.35274678468704224, "learning_rate": 1.989250449878476e-05, "loss": 0.5128, "step": 3499 }, { "epoch": 0.09610104338275673, "grad_norm": 0.3613601326942444, "learning_rate": 1.9892441333291315e-05, "loss": 0.5863, "step": 3500 }, { "epoch": 0.09612850082372323, "grad_norm": 0.3756472170352936, "learning_rate": 1.989237814934531e-05, "loss": 0.6302, "step": 3501 }, { "epoch": 0.09615595826468973, "grad_norm": 0.4070020616054535, "learning_rate": 1.9892314946946864e-05, "loss": 0.6044, "step": 3502 }, { "epoch": 0.09618341570565624, "grad_norm": 0.34237125515937805, "learning_rate": 1.9892251726096092e-05, "loss": 0.5237, "step": 3503 }, { "epoch": 0.09621087314662273, "grad_norm": 0.3345862329006195, "learning_rate": 1.9892188486793114e-05, "loss": 0.5011, "step": 3504 }, { "epoch": 0.09623833058758924, "grad_norm": 0.3565434515476227, "learning_rate": 1.9892125229038045e-05, "loss": 0.4762, "step": 3505 }, { "epoch": 0.09626578802855573, "grad_norm": 0.368267685174942, "learning_rate": 1.9892061952831007e-05, "loss": 0.5024, "step": 3506 }, { "epoch": 0.09629324546952224, "grad_norm": 0.36739382147789, "learning_rate": 1.9891998658172115e-05, "loss": 0.6527, "step": 3507 }, { "epoch": 0.09632070291048875, "grad_norm": 0.504655122756958, "learning_rate": 1.989193534506149e-05, "loss": 0.6182, "step": 3508 }, { "epoch": 0.09634816035145524, "grad_norm": 0.34144777059555054, "learning_rate": 1.9891872013499247e-05, "loss": 0.466, "step": 3509 }, { "epoch": 0.09637561779242175, "grad_norm": 0.37317395210266113, "learning_rate": 1.989180866348551e-05, "loss": 0.5797, "step": 3510 }, { "epoch": 0.09640307523338824, "grad_norm": 0.35044172406196594, "learning_rate": 1.9891745295020387e-05, "loss": 0.623, "step": 3511 }, { "epoch": 0.09643053267435475, "grad_norm": 0.4684470295906067, "learning_rate": 1.9891681908104005e-05, "loss": 0.5474, "step": 3512 }, { "epoch": 0.09645799011532125, "grad_norm": 0.38734039664268494, "learning_rate": 1.9891618502736477e-05, "loss": 0.4653, "step": 3513 }, { "epoch": 0.09648544755628775, "grad_norm": 0.3316654562950134, "learning_rate": 1.9891555078917922e-05, "loss": 0.4967, "step": 3514 }, { "epoch": 0.09651290499725426, "grad_norm": 0.34695619344711304, "learning_rate": 1.9891491636648465e-05, "loss": 0.4539, "step": 3515 }, { "epoch": 0.09654036243822076, "grad_norm": 0.3590364456176758, "learning_rate": 1.9891428175928215e-05, "loss": 0.5704, "step": 3516 }, { "epoch": 0.09656781987918726, "grad_norm": 0.36466220021247864, "learning_rate": 1.9891364696757297e-05, "loss": 0.4685, "step": 3517 }, { "epoch": 0.09659527732015376, "grad_norm": 0.3801593780517578, "learning_rate": 1.9891301199135826e-05, "loss": 0.5702, "step": 3518 }, { "epoch": 0.09662273476112027, "grad_norm": 0.4039519429206848, "learning_rate": 1.989123768306392e-05, "loss": 0.5627, "step": 3519 }, { "epoch": 0.09665019220208676, "grad_norm": 0.33174341917037964, "learning_rate": 1.98911741485417e-05, "loss": 0.4603, "step": 3520 }, { "epoch": 0.09667764964305327, "grad_norm": 0.4142787754535675, "learning_rate": 1.9891110595569283e-05, "loss": 0.5153, "step": 3521 }, { "epoch": 0.09670510708401978, "grad_norm": 0.3311069905757904, "learning_rate": 1.9891047024146787e-05, "loss": 0.5475, "step": 3522 }, { "epoch": 0.09673256452498627, "grad_norm": 0.45340561866760254, "learning_rate": 1.9890983434274334e-05, "loss": 0.613, "step": 3523 }, { "epoch": 0.09676002196595278, "grad_norm": 0.35684454441070557, "learning_rate": 1.9890919825952037e-05, "loss": 0.5186, "step": 3524 }, { "epoch": 0.09678747940691927, "grad_norm": 0.3558177053928375, "learning_rate": 1.989085619918002e-05, "loss": 0.4547, "step": 3525 }, { "epoch": 0.09681493684788578, "grad_norm": 0.49769270420074463, "learning_rate": 1.98907925539584e-05, "loss": 0.5751, "step": 3526 }, { "epoch": 0.09684239428885227, "grad_norm": 0.35300588607788086, "learning_rate": 1.989072889028729e-05, "loss": 0.5881, "step": 3527 }, { "epoch": 0.09686985172981878, "grad_norm": 0.37353241443634033, "learning_rate": 1.989066520816682e-05, "loss": 0.5883, "step": 3528 }, { "epoch": 0.09689730917078529, "grad_norm": 0.3843049705028534, "learning_rate": 1.98906015075971e-05, "loss": 0.6096, "step": 3529 }, { "epoch": 0.09692476661175178, "grad_norm": 0.33402693271636963, "learning_rate": 1.989053778857825e-05, "loss": 0.5129, "step": 3530 }, { "epoch": 0.09695222405271829, "grad_norm": 0.35944026708602905, "learning_rate": 1.9890474051110396e-05, "loss": 0.5773, "step": 3531 }, { "epoch": 0.09697968149368479, "grad_norm": 0.35858574509620667, "learning_rate": 1.9890410295193648e-05, "loss": 0.5511, "step": 3532 }, { "epoch": 0.0970071389346513, "grad_norm": 0.37889590859413147, "learning_rate": 1.9890346520828126e-05, "loss": 0.5949, "step": 3533 }, { "epoch": 0.09703459637561779, "grad_norm": 0.41988304257392883, "learning_rate": 1.989028272801395e-05, "loss": 0.5523, "step": 3534 }, { "epoch": 0.0970620538165843, "grad_norm": 0.3533228039741516, "learning_rate": 1.9890218916751247e-05, "loss": 0.4894, "step": 3535 }, { "epoch": 0.09708951125755079, "grad_norm": 0.3477717339992523, "learning_rate": 1.9890155087040125e-05, "loss": 0.5775, "step": 3536 }, { "epoch": 0.0971169686985173, "grad_norm": 0.34496837854385376, "learning_rate": 1.9890091238880706e-05, "loss": 0.5502, "step": 3537 }, { "epoch": 0.0971444261394838, "grad_norm": 0.33033570647239685, "learning_rate": 1.989002737227311e-05, "loss": 0.4924, "step": 3538 }, { "epoch": 0.0971718835804503, "grad_norm": 0.34897157549858093, "learning_rate": 1.9889963487217457e-05, "loss": 0.614, "step": 3539 }, { "epoch": 0.09719934102141681, "grad_norm": 0.3401179015636444, "learning_rate": 1.9889899583713868e-05, "loss": 0.532, "step": 3540 }, { "epoch": 0.0972267984623833, "grad_norm": 0.37104567885398865, "learning_rate": 1.9889835661762457e-05, "loss": 0.5563, "step": 3541 }, { "epoch": 0.09725425590334981, "grad_norm": 0.38156986236572266, "learning_rate": 1.988977172136335e-05, "loss": 0.641, "step": 3542 }, { "epoch": 0.0972817133443163, "grad_norm": 0.34695109724998474, "learning_rate": 1.988970776251666e-05, "loss": 0.6138, "step": 3543 }, { "epoch": 0.09730917078528281, "grad_norm": 0.34380486607551575, "learning_rate": 1.9889643785222505e-05, "loss": 0.558, "step": 3544 }, { "epoch": 0.09733662822624932, "grad_norm": 0.32578280568122864, "learning_rate": 1.9889579789481012e-05, "loss": 0.5645, "step": 3545 }, { "epoch": 0.09736408566721581, "grad_norm": 0.3539084196090698, "learning_rate": 1.9889515775292297e-05, "loss": 0.5663, "step": 3546 }, { "epoch": 0.09739154310818232, "grad_norm": 0.3522771894931793, "learning_rate": 1.9889451742656475e-05, "loss": 0.6001, "step": 3547 }, { "epoch": 0.09741900054914882, "grad_norm": 0.3829791247844696, "learning_rate": 1.988938769157367e-05, "loss": 0.421, "step": 3548 }, { "epoch": 0.09744645799011532, "grad_norm": 0.48487117886543274, "learning_rate": 1.9889323622044e-05, "loss": 0.5747, "step": 3549 }, { "epoch": 0.09747391543108182, "grad_norm": 0.38056084513664246, "learning_rate": 1.988925953406759e-05, "loss": 0.6018, "step": 3550 }, { "epoch": 0.09750137287204833, "grad_norm": 0.44457364082336426, "learning_rate": 1.988919542764455e-05, "loss": 0.5126, "step": 3551 }, { "epoch": 0.09752883031301483, "grad_norm": 0.4096907675266266, "learning_rate": 1.9889131302775007e-05, "loss": 0.6592, "step": 3552 }, { "epoch": 0.09755628775398133, "grad_norm": 0.359571635723114, "learning_rate": 1.9889067159459077e-05, "loss": 0.5318, "step": 3553 }, { "epoch": 0.09758374519494784, "grad_norm": 0.3630951941013336, "learning_rate": 1.988900299769688e-05, "loss": 0.5161, "step": 3554 }, { "epoch": 0.09761120263591433, "grad_norm": 0.3847862184047699, "learning_rate": 1.9888938817488536e-05, "loss": 0.5894, "step": 3555 }, { "epoch": 0.09763866007688084, "grad_norm": 0.3550190031528473, "learning_rate": 1.9888874618834168e-05, "loss": 0.5395, "step": 3556 }, { "epoch": 0.09766611751784733, "grad_norm": 0.33573204278945923, "learning_rate": 1.988881040173389e-05, "loss": 0.5234, "step": 3557 }, { "epoch": 0.09769357495881384, "grad_norm": 0.38134053349494934, "learning_rate": 1.988874616618782e-05, "loss": 0.6045, "step": 3558 }, { "epoch": 0.09772103239978035, "grad_norm": 0.38448596000671387, "learning_rate": 1.988868191219609e-05, "loss": 0.5224, "step": 3559 }, { "epoch": 0.09774848984074684, "grad_norm": 0.3896487057209015, "learning_rate": 1.988861763975881e-05, "loss": 0.5937, "step": 3560 }, { "epoch": 0.09777594728171335, "grad_norm": 0.37976008653640747, "learning_rate": 1.98885533488761e-05, "loss": 0.5414, "step": 3561 }, { "epoch": 0.09780340472267984, "grad_norm": 0.36724749207496643, "learning_rate": 1.988848903954808e-05, "loss": 0.5818, "step": 3562 }, { "epoch": 0.09783086216364635, "grad_norm": 0.35942816734313965, "learning_rate": 1.9888424711774877e-05, "loss": 0.5625, "step": 3563 }, { "epoch": 0.09785831960461285, "grad_norm": 0.3793608844280243, "learning_rate": 1.9888360365556602e-05, "loss": 0.6834, "step": 3564 }, { "epoch": 0.09788577704557935, "grad_norm": 0.4071831703186035, "learning_rate": 1.9888296000893382e-05, "loss": 0.5579, "step": 3565 }, { "epoch": 0.09791323448654585, "grad_norm": 0.4291229546070099, "learning_rate": 1.9888231617785332e-05, "loss": 0.5512, "step": 3566 }, { "epoch": 0.09794069192751235, "grad_norm": 0.33545809984207153, "learning_rate": 1.9888167216232573e-05, "loss": 0.4754, "step": 3567 }, { "epoch": 0.09796814936847886, "grad_norm": 0.3911801874637604, "learning_rate": 1.988810279623523e-05, "loss": 0.4978, "step": 3568 }, { "epoch": 0.09799560680944536, "grad_norm": 0.3877221345901489, "learning_rate": 1.988803835779342e-05, "loss": 0.4528, "step": 3569 }, { "epoch": 0.09802306425041186, "grad_norm": 0.37875303626060486, "learning_rate": 1.9887973900907254e-05, "loss": 0.5478, "step": 3570 }, { "epoch": 0.09805052169137836, "grad_norm": 0.3422386646270752, "learning_rate": 1.988790942557687e-05, "loss": 0.5802, "step": 3571 }, { "epoch": 0.09807797913234487, "grad_norm": 0.36366236209869385, "learning_rate": 1.9887844931802375e-05, "loss": 0.6156, "step": 3572 }, { "epoch": 0.09810543657331136, "grad_norm": 0.44602468609809875, "learning_rate": 1.9887780419583894e-05, "loss": 0.4868, "step": 3573 }, { "epoch": 0.09813289401427787, "grad_norm": 0.3724503219127655, "learning_rate": 1.9887715888921546e-05, "loss": 0.545, "step": 3574 }, { "epoch": 0.09816035145524438, "grad_norm": 0.4117805063724518, "learning_rate": 1.9887651339815455e-05, "loss": 0.5867, "step": 3575 }, { "epoch": 0.09818780889621087, "grad_norm": 0.3923654854297638, "learning_rate": 1.9887586772265736e-05, "loss": 0.5777, "step": 3576 }, { "epoch": 0.09821526633717738, "grad_norm": 0.41430962085723877, "learning_rate": 1.988752218627251e-05, "loss": 0.6211, "step": 3577 }, { "epoch": 0.09824272377814387, "grad_norm": 0.37244170904159546, "learning_rate": 1.98874575818359e-05, "loss": 0.5153, "step": 3578 }, { "epoch": 0.09827018121911038, "grad_norm": 0.3664463460445404, "learning_rate": 1.9887392958956032e-05, "loss": 0.497, "step": 3579 }, { "epoch": 0.09829763866007687, "grad_norm": 0.39220693707466125, "learning_rate": 1.9887328317633013e-05, "loss": 0.5767, "step": 3580 }, { "epoch": 0.09832509610104338, "grad_norm": 0.39501598477363586, "learning_rate": 1.9887263657866974e-05, "loss": 0.4803, "step": 3581 }, { "epoch": 0.09835255354200989, "grad_norm": 0.3672422766685486, "learning_rate": 1.988719897965803e-05, "loss": 0.5547, "step": 3582 }, { "epoch": 0.09838001098297638, "grad_norm": 0.3282439410686493, "learning_rate": 1.988713428300631e-05, "loss": 0.4763, "step": 3583 }, { "epoch": 0.09840746842394289, "grad_norm": 0.3634583353996277, "learning_rate": 1.988706956791193e-05, "loss": 0.6102, "step": 3584 }, { "epoch": 0.09843492586490939, "grad_norm": 0.6410273313522339, "learning_rate": 1.9887004834375e-05, "loss": 0.535, "step": 3585 }, { "epoch": 0.0984623833058759, "grad_norm": 0.6124294996261597, "learning_rate": 1.988694008239566e-05, "loss": 0.5549, "step": 3586 }, { "epoch": 0.09848984074684239, "grad_norm": 0.33909326791763306, "learning_rate": 1.9886875311974014e-05, "loss": 0.5354, "step": 3587 }, { "epoch": 0.0985172981878089, "grad_norm": 0.3788776695728302, "learning_rate": 1.9886810523110192e-05, "loss": 0.5931, "step": 3588 }, { "epoch": 0.0985447556287754, "grad_norm": 0.4040602445602417, "learning_rate": 1.9886745715804315e-05, "loss": 0.5245, "step": 3589 }, { "epoch": 0.0985722130697419, "grad_norm": 0.34940508008003235, "learning_rate": 1.98866808900565e-05, "loss": 0.4769, "step": 3590 }, { "epoch": 0.0985996705107084, "grad_norm": 0.3336066007614136, "learning_rate": 1.9886616045866872e-05, "loss": 0.426, "step": 3591 }, { "epoch": 0.0986271279516749, "grad_norm": 0.36807960271835327, "learning_rate": 1.9886551183235547e-05, "loss": 0.5722, "step": 3592 }, { "epoch": 0.09865458539264141, "grad_norm": 0.3348952531814575, "learning_rate": 1.988648630216265e-05, "loss": 0.5907, "step": 3593 }, { "epoch": 0.0986820428336079, "grad_norm": 0.3802056610584259, "learning_rate": 1.9886421402648303e-05, "loss": 0.538, "step": 3594 }, { "epoch": 0.09870950027457441, "grad_norm": 0.3609468936920166, "learning_rate": 1.9886356484692618e-05, "loss": 0.5057, "step": 3595 }, { "epoch": 0.09873695771554092, "grad_norm": 0.3289881646633148, "learning_rate": 1.9886291548295728e-05, "loss": 0.5684, "step": 3596 }, { "epoch": 0.09876441515650741, "grad_norm": 0.3743343949317932, "learning_rate": 1.9886226593457748e-05, "loss": 0.5325, "step": 3597 }, { "epoch": 0.09879187259747392, "grad_norm": 0.3631688058376312, "learning_rate": 1.98861616201788e-05, "loss": 0.5581, "step": 3598 }, { "epoch": 0.09881933003844041, "grad_norm": 0.3475742042064667, "learning_rate": 1.9886096628459004e-05, "loss": 0.4997, "step": 3599 }, { "epoch": 0.09884678747940692, "grad_norm": 0.39240872859954834, "learning_rate": 1.9886031618298483e-05, "loss": 0.5524, "step": 3600 }, { "epoch": 0.09887424492037342, "grad_norm": 0.3526679277420044, "learning_rate": 1.988596658969736e-05, "loss": 0.4828, "step": 3601 }, { "epoch": 0.09890170236133992, "grad_norm": 0.407236248254776, "learning_rate": 1.9885901542655752e-05, "loss": 0.5865, "step": 3602 }, { "epoch": 0.09892915980230642, "grad_norm": 0.36110520362854004, "learning_rate": 1.9885836477173782e-05, "loss": 0.5074, "step": 3603 }, { "epoch": 0.09895661724327293, "grad_norm": 0.3415331542491913, "learning_rate": 1.9885771393251572e-05, "loss": 0.5664, "step": 3604 }, { "epoch": 0.09898407468423943, "grad_norm": 0.34186282753944397, "learning_rate": 1.9885706290889245e-05, "loss": 0.5343, "step": 3605 }, { "epoch": 0.09901153212520593, "grad_norm": 0.39023756980895996, "learning_rate": 1.988564117008692e-05, "loss": 0.5583, "step": 3606 }, { "epoch": 0.09903898956617244, "grad_norm": 0.36189743876457214, "learning_rate": 1.988557603084472e-05, "loss": 0.5136, "step": 3607 }, { "epoch": 0.09906644700713893, "grad_norm": 0.37914222478866577, "learning_rate": 1.9885510873162764e-05, "loss": 0.561, "step": 3608 }, { "epoch": 0.09909390444810544, "grad_norm": 0.3526715338230133, "learning_rate": 1.9885445697041174e-05, "loss": 0.6085, "step": 3609 }, { "epoch": 0.09912136188907193, "grad_norm": 0.36381399631500244, "learning_rate": 1.9885380502480073e-05, "loss": 0.5324, "step": 3610 }, { "epoch": 0.09914881933003844, "grad_norm": 0.4069061577320099, "learning_rate": 1.9885315289479587e-05, "loss": 0.5786, "step": 3611 }, { "epoch": 0.09917627677100495, "grad_norm": 0.4079681634902954, "learning_rate": 1.9885250058039827e-05, "loss": 0.5896, "step": 3612 }, { "epoch": 0.09920373421197144, "grad_norm": 0.4059285819530487, "learning_rate": 1.9885184808160925e-05, "loss": 0.5825, "step": 3613 }, { "epoch": 0.09923119165293795, "grad_norm": 0.3730524182319641, "learning_rate": 1.9885119539842994e-05, "loss": 0.5691, "step": 3614 }, { "epoch": 0.09925864909390444, "grad_norm": 0.3590240180492401, "learning_rate": 1.988505425308616e-05, "loss": 0.5598, "step": 3615 }, { "epoch": 0.09928610653487095, "grad_norm": 0.3533720374107361, "learning_rate": 1.988498894789055e-05, "loss": 0.6012, "step": 3616 }, { "epoch": 0.09931356397583745, "grad_norm": 0.33175280690193176, "learning_rate": 1.988492362425628e-05, "loss": 0.5197, "step": 3617 }, { "epoch": 0.09934102141680395, "grad_norm": 0.37959831953048706, "learning_rate": 1.9884858282183468e-05, "loss": 0.5025, "step": 3618 }, { "epoch": 0.09936847885777046, "grad_norm": 0.340808242559433, "learning_rate": 1.988479292167224e-05, "loss": 0.5095, "step": 3619 }, { "epoch": 0.09939593629873696, "grad_norm": 0.36388471722602844, "learning_rate": 1.9884727542722724e-05, "loss": 0.4468, "step": 3620 }, { "epoch": 0.09942339373970346, "grad_norm": 0.3724011778831482, "learning_rate": 1.9884662145335033e-05, "loss": 0.614, "step": 3621 }, { "epoch": 0.09945085118066996, "grad_norm": 0.34433940052986145, "learning_rate": 1.9884596729509293e-05, "loss": 0.509, "step": 3622 }, { "epoch": 0.09947830862163647, "grad_norm": 0.458732932806015, "learning_rate": 1.9884531295245626e-05, "loss": 0.5681, "step": 3623 }, { "epoch": 0.09950576606260296, "grad_norm": 0.4233742952346802, "learning_rate": 1.988446584254415e-05, "loss": 0.5868, "step": 3624 }, { "epoch": 0.09953322350356947, "grad_norm": 0.3689969480037689, "learning_rate": 1.9884400371404996e-05, "loss": 0.5987, "step": 3625 }, { "epoch": 0.09956068094453598, "grad_norm": 0.38740184903144836, "learning_rate": 1.9884334881828276e-05, "loss": 0.5966, "step": 3626 }, { "epoch": 0.09958813838550247, "grad_norm": 0.36507171392440796, "learning_rate": 1.988426937381412e-05, "loss": 0.4808, "step": 3627 }, { "epoch": 0.09961559582646898, "grad_norm": 0.3502161204814911, "learning_rate": 1.9884203847362643e-05, "loss": 0.5885, "step": 3628 }, { "epoch": 0.09964305326743547, "grad_norm": 0.4651767611503601, "learning_rate": 1.9884138302473974e-05, "loss": 0.7005, "step": 3629 }, { "epoch": 0.09967051070840198, "grad_norm": 0.3845900893211365, "learning_rate": 1.988407273914823e-05, "loss": 0.5759, "step": 3630 }, { "epoch": 0.09969796814936847, "grad_norm": 0.36267364025115967, "learning_rate": 1.988400715738554e-05, "loss": 0.5762, "step": 3631 }, { "epoch": 0.09972542559033498, "grad_norm": 0.3803366422653198, "learning_rate": 1.9883941557186018e-05, "loss": 0.5841, "step": 3632 }, { "epoch": 0.09975288303130148, "grad_norm": 0.3361320495605469, "learning_rate": 1.988387593854979e-05, "loss": 0.5275, "step": 3633 }, { "epoch": 0.09978034047226798, "grad_norm": 0.39372843503952026, "learning_rate": 1.988381030147698e-05, "loss": 0.5754, "step": 3634 }, { "epoch": 0.09980779791323449, "grad_norm": 0.33151230216026306, "learning_rate": 1.9883744645967713e-05, "loss": 0.508, "step": 3635 }, { "epoch": 0.09983525535420099, "grad_norm": 0.3384150266647339, "learning_rate": 1.9883678972022105e-05, "loss": 0.5156, "step": 3636 }, { "epoch": 0.0998627127951675, "grad_norm": 0.4019555151462555, "learning_rate": 1.988361327964028e-05, "loss": 0.4695, "step": 3637 }, { "epoch": 0.09989017023613399, "grad_norm": 0.37466368079185486, "learning_rate": 1.9883547568822362e-05, "loss": 0.5385, "step": 3638 }, { "epoch": 0.0999176276771005, "grad_norm": 0.4065605401992798, "learning_rate": 1.9883481839568474e-05, "loss": 0.5753, "step": 3639 }, { "epoch": 0.09994508511806699, "grad_norm": 0.36632034182548523, "learning_rate": 1.988341609187874e-05, "loss": 0.5504, "step": 3640 }, { "epoch": 0.0999725425590335, "grad_norm": 0.32964998483657837, "learning_rate": 1.9883350325753276e-05, "loss": 0.5133, "step": 3641 }, { "epoch": 0.1, "grad_norm": 0.3609088361263275, "learning_rate": 1.9883284541192213e-05, "loss": 0.5451, "step": 3642 }, { "epoch": 0.1000274574409665, "grad_norm": 0.3225267231464386, "learning_rate": 1.9883218738195667e-05, "loss": 0.4846, "step": 3643 }, { "epoch": 0.10005491488193301, "grad_norm": 0.3798198103904724, "learning_rate": 1.9883152916763767e-05, "loss": 0.6001, "step": 3644 }, { "epoch": 0.1000823723228995, "grad_norm": 0.34675946831703186, "learning_rate": 1.988308707689663e-05, "loss": 0.508, "step": 3645 }, { "epoch": 0.10010982976386601, "grad_norm": 0.4197950065135956, "learning_rate": 1.9883021218594382e-05, "loss": 0.5485, "step": 3646 }, { "epoch": 0.1001372872048325, "grad_norm": 0.35124471783638, "learning_rate": 1.9882955341857144e-05, "loss": 0.5457, "step": 3647 }, { "epoch": 0.10016474464579901, "grad_norm": 0.40457579493522644, "learning_rate": 1.9882889446685043e-05, "loss": 0.6387, "step": 3648 }, { "epoch": 0.10019220208676552, "grad_norm": 0.37092846632003784, "learning_rate": 1.9882823533078195e-05, "loss": 0.5925, "step": 3649 }, { "epoch": 0.10021965952773201, "grad_norm": 0.47242459654808044, "learning_rate": 1.9882757601036732e-05, "loss": 0.5333, "step": 3650 }, { "epoch": 0.10024711696869852, "grad_norm": 0.49493351578712463, "learning_rate": 1.988269165056077e-05, "loss": 0.6618, "step": 3651 }, { "epoch": 0.10027457440966502, "grad_norm": 0.38014158606529236, "learning_rate": 1.988262568165043e-05, "loss": 0.6363, "step": 3652 }, { "epoch": 0.10030203185063152, "grad_norm": 0.40587669610977173, "learning_rate": 1.9882559694305842e-05, "loss": 0.6299, "step": 3653 }, { "epoch": 0.10032948929159802, "grad_norm": 0.3674899935722351, "learning_rate": 1.9882493688527125e-05, "loss": 0.5582, "step": 3654 }, { "epoch": 0.10035694673256453, "grad_norm": 0.3656216859817505, "learning_rate": 1.9882427664314403e-05, "loss": 0.6827, "step": 3655 }, { "epoch": 0.10038440417353103, "grad_norm": 0.3836021423339844, "learning_rate": 1.98823616216678e-05, "loss": 0.5468, "step": 3656 }, { "epoch": 0.10041186161449753, "grad_norm": 0.39705690741539, "learning_rate": 1.9882295560587442e-05, "loss": 0.5481, "step": 3657 }, { "epoch": 0.10043931905546404, "grad_norm": 0.4003710150718689, "learning_rate": 1.9882229481073443e-05, "loss": 0.6227, "step": 3658 }, { "epoch": 0.10046677649643053, "grad_norm": 0.36636263132095337, "learning_rate": 1.9882163383125934e-05, "loss": 0.6714, "step": 3659 }, { "epoch": 0.10049423393739704, "grad_norm": 0.37111955881118774, "learning_rate": 1.9882097266745036e-05, "loss": 0.545, "step": 3660 }, { "epoch": 0.10052169137836353, "grad_norm": 0.3639630079269409, "learning_rate": 1.9882031131930876e-05, "loss": 0.6285, "step": 3661 }, { "epoch": 0.10054914881933004, "grad_norm": 0.4843692481517792, "learning_rate": 1.988196497868357e-05, "loss": 0.4922, "step": 3662 }, { "epoch": 0.10057660626029655, "grad_norm": 0.5989887714385986, "learning_rate": 1.9881898807003246e-05, "loss": 0.567, "step": 3663 }, { "epoch": 0.10060406370126304, "grad_norm": 0.44745945930480957, "learning_rate": 1.9881832616890027e-05, "loss": 0.5828, "step": 3664 }, { "epoch": 0.10063152114222955, "grad_norm": 0.3645488917827606, "learning_rate": 1.9881766408344037e-05, "loss": 0.5332, "step": 3665 }, { "epoch": 0.10065897858319604, "grad_norm": 1.9621652364730835, "learning_rate": 1.9881700181365397e-05, "loss": 0.4575, "step": 3666 }, { "epoch": 0.10068643602416255, "grad_norm": 0.4034222960472107, "learning_rate": 1.9881633935954235e-05, "loss": 0.4747, "step": 3667 }, { "epoch": 0.10071389346512905, "grad_norm": 0.4081016778945923, "learning_rate": 1.9881567672110668e-05, "loss": 0.5817, "step": 3668 }, { "epoch": 0.10074135090609555, "grad_norm": 0.42450493574142456, "learning_rate": 1.9881501389834827e-05, "loss": 0.542, "step": 3669 }, { "epoch": 0.10076880834706205, "grad_norm": 0.3669437766075134, "learning_rate": 1.988143508912683e-05, "loss": 0.5203, "step": 3670 }, { "epoch": 0.10079626578802856, "grad_norm": 0.38931936025619507, "learning_rate": 1.9881368769986805e-05, "loss": 0.5527, "step": 3671 }, { "epoch": 0.10082372322899506, "grad_norm": 0.36357739567756653, "learning_rate": 1.9881302432414874e-05, "loss": 0.462, "step": 3672 }, { "epoch": 0.10085118066996156, "grad_norm": 0.38488224148750305, "learning_rate": 1.9881236076411158e-05, "loss": 0.5933, "step": 3673 }, { "epoch": 0.10087863811092806, "grad_norm": 0.3460012376308441, "learning_rate": 1.988116970197578e-05, "loss": 0.5468, "step": 3674 }, { "epoch": 0.10090609555189456, "grad_norm": 0.343228280544281, "learning_rate": 1.9881103309108872e-05, "loss": 0.5122, "step": 3675 }, { "epoch": 0.10093355299286107, "grad_norm": 0.4061126112937927, "learning_rate": 1.9881036897810553e-05, "loss": 0.4853, "step": 3676 }, { "epoch": 0.10096101043382756, "grad_norm": 0.3478251099586487, "learning_rate": 1.9880970468080943e-05, "loss": 0.6047, "step": 3677 }, { "epoch": 0.10098846787479407, "grad_norm": 0.3577525317668915, "learning_rate": 1.9880904019920174e-05, "loss": 0.5531, "step": 3678 }, { "epoch": 0.10101592531576058, "grad_norm": 0.37251660227775574, "learning_rate": 1.988083755332836e-05, "loss": 0.5942, "step": 3679 }, { "epoch": 0.10104338275672707, "grad_norm": 0.37358933687210083, "learning_rate": 1.9880771068305633e-05, "loss": 0.5311, "step": 3680 }, { "epoch": 0.10107084019769358, "grad_norm": 0.36396849155426025, "learning_rate": 1.9880704564852112e-05, "loss": 0.5572, "step": 3681 }, { "epoch": 0.10109829763866007, "grad_norm": 0.4194788634777069, "learning_rate": 1.988063804296793e-05, "loss": 0.4283, "step": 3682 }, { "epoch": 0.10112575507962658, "grad_norm": 0.3561704158782959, "learning_rate": 1.9880571502653198e-05, "loss": 0.5556, "step": 3683 }, { "epoch": 0.10115321252059307, "grad_norm": 0.3436970114707947, "learning_rate": 1.988050494390805e-05, "loss": 0.5128, "step": 3684 }, { "epoch": 0.10118066996155958, "grad_norm": 0.369693785905838, "learning_rate": 1.9880438366732605e-05, "loss": 0.4853, "step": 3685 }, { "epoch": 0.10120812740252609, "grad_norm": 0.341208815574646, "learning_rate": 1.988037177112699e-05, "loss": 0.519, "step": 3686 }, { "epoch": 0.10123558484349258, "grad_norm": 0.36568590998649597, "learning_rate": 1.9880305157091327e-05, "loss": 0.5719, "step": 3687 }, { "epoch": 0.10126304228445909, "grad_norm": 0.3450008034706116, "learning_rate": 1.988023852462574e-05, "loss": 0.4703, "step": 3688 }, { "epoch": 0.10129049972542559, "grad_norm": 0.35612383484840393, "learning_rate": 1.9880171873730356e-05, "loss": 0.5182, "step": 3689 }, { "epoch": 0.1013179571663921, "grad_norm": 0.33502310514450073, "learning_rate": 1.98801052044053e-05, "loss": 0.5323, "step": 3690 }, { "epoch": 0.10134541460735859, "grad_norm": 0.7402406334877014, "learning_rate": 1.9880038516650694e-05, "loss": 0.5274, "step": 3691 }, { "epoch": 0.1013728720483251, "grad_norm": 0.39143913984298706, "learning_rate": 1.987997181046666e-05, "loss": 0.5163, "step": 3692 }, { "epoch": 0.1014003294892916, "grad_norm": 0.3534885346889496, "learning_rate": 1.9879905085853328e-05, "loss": 0.5372, "step": 3693 }, { "epoch": 0.1014277869302581, "grad_norm": 0.40381374955177307, "learning_rate": 1.9879838342810818e-05, "loss": 0.5906, "step": 3694 }, { "epoch": 0.1014552443712246, "grad_norm": 0.37369129061698914, "learning_rate": 1.987977158133926e-05, "loss": 0.5029, "step": 3695 }, { "epoch": 0.1014827018121911, "grad_norm": 0.3889693319797516, "learning_rate": 1.987970480143877e-05, "loss": 0.4527, "step": 3696 }, { "epoch": 0.10151015925315761, "grad_norm": 0.35453000664711, "learning_rate": 1.987963800310948e-05, "loss": 0.5734, "step": 3697 }, { "epoch": 0.1015376166941241, "grad_norm": 0.43763452768325806, "learning_rate": 1.9879571186351513e-05, "loss": 0.5543, "step": 3698 }, { "epoch": 0.10156507413509061, "grad_norm": 0.34610670804977417, "learning_rate": 1.987950435116499e-05, "loss": 0.5681, "step": 3699 }, { "epoch": 0.1015925315760571, "grad_norm": 0.4607323706150055, "learning_rate": 1.987943749755004e-05, "loss": 0.6391, "step": 3700 }, { "epoch": 0.10161998901702361, "grad_norm": 0.3961438834667206, "learning_rate": 1.9879370625506783e-05, "loss": 0.6408, "step": 3701 }, { "epoch": 0.10164744645799012, "grad_norm": 0.3470100462436676, "learning_rate": 1.987930373503535e-05, "loss": 0.5275, "step": 3702 }, { "epoch": 0.10167490389895661, "grad_norm": 0.31323787569999695, "learning_rate": 1.9879236826135858e-05, "loss": 0.486, "step": 3703 }, { "epoch": 0.10170236133992312, "grad_norm": 0.36904895305633545, "learning_rate": 1.987916989880844e-05, "loss": 0.5657, "step": 3704 }, { "epoch": 0.10172981878088962, "grad_norm": 0.3412191569805145, "learning_rate": 1.9879102953053215e-05, "loss": 0.5351, "step": 3705 }, { "epoch": 0.10175727622185612, "grad_norm": 0.339280366897583, "learning_rate": 1.9879035988870315e-05, "loss": 0.5507, "step": 3706 }, { "epoch": 0.10178473366282262, "grad_norm": 0.41161108016967773, "learning_rate": 1.9878969006259854e-05, "loss": 0.6781, "step": 3707 }, { "epoch": 0.10181219110378913, "grad_norm": 0.34671422839164734, "learning_rate": 1.9878902005221964e-05, "loss": 0.5794, "step": 3708 }, { "epoch": 0.10183964854475563, "grad_norm": 0.3544754683971405, "learning_rate": 1.987883498575677e-05, "loss": 0.4963, "step": 3709 }, { "epoch": 0.10186710598572213, "grad_norm": 0.342465877532959, "learning_rate": 1.9878767947864396e-05, "loss": 0.5061, "step": 3710 }, { "epoch": 0.10189456342668864, "grad_norm": 0.3471302092075348, "learning_rate": 1.9878700891544966e-05, "loss": 0.4656, "step": 3711 }, { "epoch": 0.10192202086765513, "grad_norm": 0.39402854442596436, "learning_rate": 1.9878633816798608e-05, "loss": 0.4787, "step": 3712 }, { "epoch": 0.10194947830862164, "grad_norm": 0.3881931006908417, "learning_rate": 1.9878566723625444e-05, "loss": 0.5837, "step": 3713 }, { "epoch": 0.10197693574958813, "grad_norm": 0.33060622215270996, "learning_rate": 1.9878499612025598e-05, "loss": 0.4908, "step": 3714 }, { "epoch": 0.10200439319055464, "grad_norm": 0.4108772873878479, "learning_rate": 1.9878432481999196e-05, "loss": 0.4801, "step": 3715 }, { "epoch": 0.10203185063152115, "grad_norm": 0.35200801491737366, "learning_rate": 1.987836533354637e-05, "loss": 0.543, "step": 3716 }, { "epoch": 0.10205930807248764, "grad_norm": 0.3465738594532013, "learning_rate": 1.9878298166667238e-05, "loss": 0.5879, "step": 3717 }, { "epoch": 0.10208676551345415, "grad_norm": 0.3674907088279724, "learning_rate": 1.9878230981361925e-05, "loss": 0.5426, "step": 3718 }, { "epoch": 0.10211422295442064, "grad_norm": 0.35191377997398376, "learning_rate": 1.9878163777630562e-05, "loss": 0.6574, "step": 3719 }, { "epoch": 0.10214168039538715, "grad_norm": 0.3897986114025116, "learning_rate": 1.9878096555473268e-05, "loss": 0.6042, "step": 3720 }, { "epoch": 0.10216913783635365, "grad_norm": 0.3558349609375, "learning_rate": 1.987802931489017e-05, "loss": 0.5657, "step": 3721 }, { "epoch": 0.10219659527732015, "grad_norm": 0.36671921610832214, "learning_rate": 1.98779620558814e-05, "loss": 0.5827, "step": 3722 }, { "epoch": 0.10222405271828666, "grad_norm": 0.34111905097961426, "learning_rate": 1.9877894778447072e-05, "loss": 0.5594, "step": 3723 }, { "epoch": 0.10225151015925316, "grad_norm": 0.379687637090683, "learning_rate": 1.9877827482587323e-05, "loss": 0.6461, "step": 3724 }, { "epoch": 0.10227896760021966, "grad_norm": 0.3512583076953888, "learning_rate": 1.987776016830227e-05, "loss": 0.5029, "step": 3725 }, { "epoch": 0.10230642504118616, "grad_norm": 0.43273666501045227, "learning_rate": 1.987769283559204e-05, "loss": 0.6566, "step": 3726 }, { "epoch": 0.10233388248215267, "grad_norm": 0.7826879620552063, "learning_rate": 1.9877625484456763e-05, "loss": 0.6599, "step": 3727 }, { "epoch": 0.10236133992311916, "grad_norm": 0.3890058994293213, "learning_rate": 1.987755811489656e-05, "loss": 0.5708, "step": 3728 }, { "epoch": 0.10238879736408567, "grad_norm": 0.6155503392219543, "learning_rate": 1.987749072691156e-05, "loss": 0.4309, "step": 3729 }, { "epoch": 0.10241625480505218, "grad_norm": 0.4001244306564331, "learning_rate": 1.987742332050189e-05, "loss": 0.5654, "step": 3730 }, { "epoch": 0.10244371224601867, "grad_norm": 0.3392367660999298, "learning_rate": 1.9877355895667666e-05, "loss": 0.4845, "step": 3731 }, { "epoch": 0.10247116968698518, "grad_norm": 0.40737399458885193, "learning_rate": 1.9877288452409026e-05, "loss": 0.5278, "step": 3732 }, { "epoch": 0.10249862712795167, "grad_norm": 0.3245091438293457, "learning_rate": 1.9877220990726088e-05, "loss": 0.5364, "step": 3733 }, { "epoch": 0.10252608456891818, "grad_norm": 0.39107823371887207, "learning_rate": 1.987715351061898e-05, "loss": 0.4837, "step": 3734 }, { "epoch": 0.10255354200988467, "grad_norm": 0.3771151602268219, "learning_rate": 1.9877086012087833e-05, "loss": 0.623, "step": 3735 }, { "epoch": 0.10258099945085118, "grad_norm": 0.39206475019454956, "learning_rate": 1.9877018495132763e-05, "loss": 0.5623, "step": 3736 }, { "epoch": 0.10260845689181768, "grad_norm": 0.35196352005004883, "learning_rate": 1.9876950959753906e-05, "loss": 0.5885, "step": 3737 }, { "epoch": 0.10263591433278418, "grad_norm": 0.38927483558654785, "learning_rate": 1.9876883405951378e-05, "loss": 0.5413, "step": 3738 }, { "epoch": 0.10266337177375069, "grad_norm": 0.6136228442192078, "learning_rate": 1.9876815833725314e-05, "loss": 0.5748, "step": 3739 }, { "epoch": 0.10269082921471719, "grad_norm": 0.3340201675891876, "learning_rate": 1.9876748243075834e-05, "loss": 0.5677, "step": 3740 }, { "epoch": 0.1027182866556837, "grad_norm": 0.4499374330043793, "learning_rate": 1.9876680634003068e-05, "loss": 0.5531, "step": 3741 }, { "epoch": 0.10274574409665019, "grad_norm": 0.3772997558116913, "learning_rate": 1.987661300650714e-05, "loss": 0.5191, "step": 3742 }, { "epoch": 0.1027732015376167, "grad_norm": 0.3793255686759949, "learning_rate": 1.9876545360588175e-05, "loss": 0.6249, "step": 3743 }, { "epoch": 0.10280065897858319, "grad_norm": 0.4102172255516052, "learning_rate": 1.98764776962463e-05, "loss": 0.5377, "step": 3744 }, { "epoch": 0.1028281164195497, "grad_norm": 0.32869404554367065, "learning_rate": 1.9876410013481643e-05, "loss": 0.4825, "step": 3745 }, { "epoch": 0.1028555738605162, "grad_norm": 0.3301064670085907, "learning_rate": 1.987634231229433e-05, "loss": 0.5309, "step": 3746 }, { "epoch": 0.1028830313014827, "grad_norm": 0.3402499556541443, "learning_rate": 1.9876274592684485e-05, "loss": 0.5436, "step": 3747 }, { "epoch": 0.10291048874244921, "grad_norm": 0.37275901436805725, "learning_rate": 1.9876206854652237e-05, "loss": 0.5614, "step": 3748 }, { "epoch": 0.1029379461834157, "grad_norm": 0.32092857360839844, "learning_rate": 1.987613909819771e-05, "loss": 0.4638, "step": 3749 }, { "epoch": 0.10296540362438221, "grad_norm": 0.4086604118347168, "learning_rate": 1.9876071323321033e-05, "loss": 0.5845, "step": 3750 }, { "epoch": 0.1029928610653487, "grad_norm": 0.33194127678871155, "learning_rate": 1.987600353002233e-05, "loss": 0.4838, "step": 3751 }, { "epoch": 0.10302031850631521, "grad_norm": 0.36046385765075684, "learning_rate": 1.987593571830173e-05, "loss": 0.5821, "step": 3752 }, { "epoch": 0.10304777594728172, "grad_norm": 0.42381903529167175, "learning_rate": 1.987586788815936e-05, "loss": 0.5651, "step": 3753 }, { "epoch": 0.10307523338824821, "grad_norm": 0.3796504735946655, "learning_rate": 1.9875800039595338e-05, "loss": 0.5391, "step": 3754 }, { "epoch": 0.10310269082921472, "grad_norm": 0.3642529249191284, "learning_rate": 1.9875732172609798e-05, "loss": 0.5281, "step": 3755 }, { "epoch": 0.10313014827018122, "grad_norm": 0.3446398377418518, "learning_rate": 1.9875664287202867e-05, "loss": 0.5025, "step": 3756 }, { "epoch": 0.10315760571114772, "grad_norm": 0.3937079608440399, "learning_rate": 1.987559638337467e-05, "loss": 0.4599, "step": 3757 }, { "epoch": 0.10318506315211422, "grad_norm": 0.3722397983074188, "learning_rate": 1.9875528461125336e-05, "loss": 0.5674, "step": 3758 }, { "epoch": 0.10321252059308073, "grad_norm": 0.3857128918170929, "learning_rate": 1.9875460520454987e-05, "loss": 0.5423, "step": 3759 }, { "epoch": 0.10323997803404723, "grad_norm": 0.3713667690753937, "learning_rate": 1.9875392561363755e-05, "loss": 0.5117, "step": 3760 }, { "epoch": 0.10326743547501373, "grad_norm": 0.3877888023853302, "learning_rate": 1.987532458385176e-05, "loss": 0.6346, "step": 3761 }, { "epoch": 0.10329489291598024, "grad_norm": 0.37657803297042847, "learning_rate": 1.9875256587919134e-05, "loss": 0.5142, "step": 3762 }, { "epoch": 0.10332235035694673, "grad_norm": 0.39413389563560486, "learning_rate": 1.9875188573566e-05, "loss": 0.498, "step": 3763 }, { "epoch": 0.10334980779791324, "grad_norm": 0.36739182472229004, "learning_rate": 1.9875120540792495e-05, "loss": 0.5466, "step": 3764 }, { "epoch": 0.10337726523887973, "grad_norm": 0.40594416856765747, "learning_rate": 1.9875052489598732e-05, "loss": 0.637, "step": 3765 }, { "epoch": 0.10340472267984624, "grad_norm": 0.48338863253593445, "learning_rate": 1.9874984419984846e-05, "loss": 0.4924, "step": 3766 }, { "epoch": 0.10343218012081273, "grad_norm": 0.3458135426044464, "learning_rate": 1.9874916331950964e-05, "loss": 0.4986, "step": 3767 }, { "epoch": 0.10345963756177924, "grad_norm": 0.35718342661857605, "learning_rate": 1.9874848225497212e-05, "loss": 0.5293, "step": 3768 }, { "epoch": 0.10348709500274575, "grad_norm": 0.4661085903644562, "learning_rate": 1.9874780100623713e-05, "loss": 0.461, "step": 3769 }, { "epoch": 0.10351455244371224, "grad_norm": 0.39744827151298523, "learning_rate": 1.9874711957330598e-05, "loss": 0.5448, "step": 3770 }, { "epoch": 0.10354200988467875, "grad_norm": 0.33603620529174805, "learning_rate": 1.9874643795617995e-05, "loss": 0.5171, "step": 3771 }, { "epoch": 0.10356946732564525, "grad_norm": 0.43475016951560974, "learning_rate": 1.987457561548603e-05, "loss": 0.6482, "step": 3772 }, { "epoch": 0.10359692476661175, "grad_norm": 0.3865177631378174, "learning_rate": 1.987450741693483e-05, "loss": 0.5457, "step": 3773 }, { "epoch": 0.10362438220757825, "grad_norm": 0.40809866786003113, "learning_rate": 1.987443919996452e-05, "loss": 0.5245, "step": 3774 }, { "epoch": 0.10365183964854476, "grad_norm": 0.4723277986049652, "learning_rate": 1.9874370964575234e-05, "loss": 0.6136, "step": 3775 }, { "epoch": 0.10367929708951126, "grad_norm": 0.3428969383239746, "learning_rate": 1.987430271076709e-05, "loss": 0.566, "step": 3776 }, { "epoch": 0.10370675453047776, "grad_norm": 0.34798505902290344, "learning_rate": 1.9874234438540222e-05, "loss": 0.4591, "step": 3777 }, { "epoch": 0.10373421197144427, "grad_norm": 0.3913669288158417, "learning_rate": 1.9874166147894752e-05, "loss": 0.5721, "step": 3778 }, { "epoch": 0.10376166941241076, "grad_norm": 0.3714759647846222, "learning_rate": 1.9874097838830814e-05, "loss": 0.5103, "step": 3779 }, { "epoch": 0.10378912685337727, "grad_norm": 0.33035799860954285, "learning_rate": 1.987402951134853e-05, "loss": 0.5493, "step": 3780 }, { "epoch": 0.10381658429434376, "grad_norm": 0.4073542058467865, "learning_rate": 1.987396116544803e-05, "loss": 0.4923, "step": 3781 }, { "epoch": 0.10384404173531027, "grad_norm": 0.3643590211868286, "learning_rate": 1.9873892801129444e-05, "loss": 0.5632, "step": 3782 }, { "epoch": 0.10387149917627678, "grad_norm": 0.34655168652534485, "learning_rate": 1.9873824418392895e-05, "loss": 0.4608, "step": 3783 }, { "epoch": 0.10389895661724327, "grad_norm": 0.3425053656101227, "learning_rate": 1.987375601723851e-05, "loss": 0.6181, "step": 3784 }, { "epoch": 0.10392641405820978, "grad_norm": 0.321377158164978, "learning_rate": 1.987368759766642e-05, "loss": 0.5309, "step": 3785 }, { "epoch": 0.10395387149917627, "grad_norm": 0.4320198595523834, "learning_rate": 1.9873619159676753e-05, "loss": 0.5979, "step": 3786 }, { "epoch": 0.10398132894014278, "grad_norm": 0.34884563088417053, "learning_rate": 1.9873550703269635e-05, "loss": 0.4853, "step": 3787 }, { "epoch": 0.10400878638110927, "grad_norm": 0.40331414341926575, "learning_rate": 1.9873482228445192e-05, "loss": 0.6197, "step": 3788 }, { "epoch": 0.10403624382207578, "grad_norm": 0.394056499004364, "learning_rate": 1.9873413735203552e-05, "loss": 0.5205, "step": 3789 }, { "epoch": 0.10406370126304229, "grad_norm": 0.4187641739845276, "learning_rate": 1.9873345223544847e-05, "loss": 0.6397, "step": 3790 }, { "epoch": 0.10409115870400878, "grad_norm": 0.39512133598327637, "learning_rate": 1.98732766934692e-05, "loss": 0.5883, "step": 3791 }, { "epoch": 0.10411861614497529, "grad_norm": 0.4228011965751648, "learning_rate": 1.987320814497674e-05, "loss": 0.603, "step": 3792 }, { "epoch": 0.10414607358594179, "grad_norm": 0.3022291958332062, "learning_rate": 1.9873139578067597e-05, "loss": 0.6128, "step": 3793 }, { "epoch": 0.1041735310269083, "grad_norm": 0.39699631929397583, "learning_rate": 1.98730709927419e-05, "loss": 0.5353, "step": 3794 }, { "epoch": 0.10420098846787479, "grad_norm": 0.33036336302757263, "learning_rate": 1.9873002388999772e-05, "loss": 0.442, "step": 3795 }, { "epoch": 0.1042284459088413, "grad_norm": 0.4097613990306854, "learning_rate": 1.9872933766841344e-05, "loss": 0.6227, "step": 3796 }, { "epoch": 0.1042559033498078, "grad_norm": 0.4052391052246094, "learning_rate": 1.9872865126266742e-05, "loss": 0.5681, "step": 3797 }, { "epoch": 0.1042833607907743, "grad_norm": 0.3291495740413666, "learning_rate": 1.9872796467276096e-05, "loss": 0.5467, "step": 3798 }, { "epoch": 0.1043108182317408, "grad_norm": 0.34201833605766296, "learning_rate": 1.9872727789869534e-05, "loss": 0.5562, "step": 3799 }, { "epoch": 0.1043382756727073, "grad_norm": 0.36519426107406616, "learning_rate": 1.9872659094047184e-05, "loss": 0.4567, "step": 3800 }, { "epoch": 0.10436573311367381, "grad_norm": 0.38032835721969604, "learning_rate": 1.9872590379809173e-05, "loss": 0.5351, "step": 3801 }, { "epoch": 0.1043931905546403, "grad_norm": 0.3412076532840729, "learning_rate": 1.987252164715563e-05, "loss": 0.5292, "step": 3802 }, { "epoch": 0.10442064799560681, "grad_norm": 0.3484349250793457, "learning_rate": 1.9872452896086684e-05, "loss": 0.5571, "step": 3803 }, { "epoch": 0.1044481054365733, "grad_norm": 0.6059137582778931, "learning_rate": 1.9872384126602463e-05, "loss": 0.5296, "step": 3804 }, { "epoch": 0.10447556287753981, "grad_norm": 0.32972705364227295, "learning_rate": 1.9872315338703096e-05, "loss": 0.5582, "step": 3805 }, { "epoch": 0.10450302031850632, "grad_norm": 0.36167091131210327, "learning_rate": 1.9872246532388707e-05, "loss": 0.5228, "step": 3806 }, { "epoch": 0.10453047775947281, "grad_norm": 0.3177085816860199, "learning_rate": 1.9872177707659425e-05, "loss": 0.4593, "step": 3807 }, { "epoch": 0.10455793520043932, "grad_norm": 0.39206618070602417, "learning_rate": 1.9872108864515386e-05, "loss": 0.5746, "step": 3808 }, { "epoch": 0.10458539264140582, "grad_norm": 0.3659050762653351, "learning_rate": 1.987204000295671e-05, "loss": 0.5936, "step": 3809 }, { "epoch": 0.10461285008237232, "grad_norm": 0.577154278755188, "learning_rate": 1.9871971122983532e-05, "loss": 0.5444, "step": 3810 }, { "epoch": 0.10464030752333882, "grad_norm": 0.35058191418647766, "learning_rate": 1.987190222459597e-05, "loss": 0.5623, "step": 3811 }, { "epoch": 0.10466776496430533, "grad_norm": 0.3693815767765045, "learning_rate": 1.9871833307794167e-05, "loss": 0.5382, "step": 3812 }, { "epoch": 0.10469522240527183, "grad_norm": 0.38984403014183044, "learning_rate": 1.9871764372578243e-05, "loss": 0.5758, "step": 3813 }, { "epoch": 0.10472267984623833, "grad_norm": 0.39996346831321716, "learning_rate": 1.9871695418948324e-05, "loss": 0.4671, "step": 3814 }, { "epoch": 0.10475013728720484, "grad_norm": 0.36269518733024597, "learning_rate": 1.9871626446904547e-05, "loss": 0.5834, "step": 3815 }, { "epoch": 0.10477759472817133, "grad_norm": 0.3340657651424408, "learning_rate": 1.987155745644703e-05, "loss": 0.4784, "step": 3816 }, { "epoch": 0.10480505216913784, "grad_norm": 0.5332630276679993, "learning_rate": 1.9871488447575914e-05, "loss": 0.5755, "step": 3817 }, { "epoch": 0.10483250961010433, "grad_norm": 0.406204491853714, "learning_rate": 1.9871419420291317e-05, "loss": 0.6531, "step": 3818 }, { "epoch": 0.10485996705107084, "grad_norm": 0.3646356165409088, "learning_rate": 1.9871350374593376e-05, "loss": 0.5777, "step": 3819 }, { "epoch": 0.10488742449203735, "grad_norm": 0.3302288353443146, "learning_rate": 1.9871281310482212e-05, "loss": 0.5892, "step": 3820 }, { "epoch": 0.10491488193300384, "grad_norm": 0.32823213934898376, "learning_rate": 1.9871212227957962e-05, "loss": 0.5022, "step": 3821 }, { "epoch": 0.10494233937397035, "grad_norm": 0.37528273463249207, "learning_rate": 1.9871143127020747e-05, "loss": 0.5983, "step": 3822 }, { "epoch": 0.10496979681493684, "grad_norm": 0.4582796096801758, "learning_rate": 1.9871074007670702e-05, "loss": 0.578, "step": 3823 }, { "epoch": 0.10499725425590335, "grad_norm": 0.37758493423461914, "learning_rate": 1.9871004869907954e-05, "loss": 0.5223, "step": 3824 }, { "epoch": 0.10502471169686985, "grad_norm": 0.339484840631485, "learning_rate": 1.987093571373263e-05, "loss": 0.5586, "step": 3825 }, { "epoch": 0.10505216913783635, "grad_norm": 0.3803310990333557, "learning_rate": 1.987086653914486e-05, "loss": 0.5, "step": 3826 }, { "epoch": 0.10507962657880286, "grad_norm": 0.416075736284256, "learning_rate": 1.9870797346144772e-05, "loss": 0.5979, "step": 3827 }, { "epoch": 0.10510708401976936, "grad_norm": 0.32400840520858765, "learning_rate": 1.98707281347325e-05, "loss": 0.5511, "step": 3828 }, { "epoch": 0.10513454146073586, "grad_norm": 0.346810907125473, "learning_rate": 1.9870658904908165e-05, "loss": 0.561, "step": 3829 }, { "epoch": 0.10516199890170236, "grad_norm": 0.43753641843795776, "learning_rate": 1.9870589656671907e-05, "loss": 0.5266, "step": 3830 }, { "epoch": 0.10518945634266887, "grad_norm": 0.47127383947372437, "learning_rate": 1.9870520390023843e-05, "loss": 0.6117, "step": 3831 }, { "epoch": 0.10521691378363536, "grad_norm": 0.4203130304813385, "learning_rate": 1.9870451104964114e-05, "loss": 0.5367, "step": 3832 }, { "epoch": 0.10524437122460187, "grad_norm": 0.3533201813697815, "learning_rate": 1.987038180149284e-05, "loss": 0.5412, "step": 3833 }, { "epoch": 0.10527182866556836, "grad_norm": 0.33527401089668274, "learning_rate": 1.9870312479610154e-05, "loss": 0.4716, "step": 3834 }, { "epoch": 0.10529928610653487, "grad_norm": 0.448081374168396, "learning_rate": 1.9870243139316187e-05, "loss": 0.5757, "step": 3835 }, { "epoch": 0.10532674354750138, "grad_norm": 0.41182827949523926, "learning_rate": 1.987017378061106e-05, "loss": 0.5641, "step": 3836 }, { "epoch": 0.10535420098846787, "grad_norm": 0.41510093212127686, "learning_rate": 1.9870104403494914e-05, "loss": 0.5707, "step": 3837 }, { "epoch": 0.10538165842943438, "grad_norm": 0.4042965769767761, "learning_rate": 1.9870035007967873e-05, "loss": 0.5379, "step": 3838 }, { "epoch": 0.10540911587040087, "grad_norm": 0.3770391047000885, "learning_rate": 1.9869965594030066e-05, "loss": 0.6505, "step": 3839 }, { "epoch": 0.10543657331136738, "grad_norm": 0.48352956771850586, "learning_rate": 1.9869896161681622e-05, "loss": 0.5498, "step": 3840 }, { "epoch": 0.10546403075233388, "grad_norm": 0.41967737674713135, "learning_rate": 1.9869826710922676e-05, "loss": 0.4997, "step": 3841 }, { "epoch": 0.10549148819330038, "grad_norm": 0.39243850111961365, "learning_rate": 1.9869757241753347e-05, "loss": 0.6316, "step": 3842 }, { "epoch": 0.10551894563426689, "grad_norm": 0.3841986060142517, "learning_rate": 1.986968775417377e-05, "loss": 0.5987, "step": 3843 }, { "epoch": 0.10554640307523339, "grad_norm": 0.3687991499900818, "learning_rate": 1.9869618248184082e-05, "loss": 0.5208, "step": 3844 }, { "epoch": 0.1055738605161999, "grad_norm": 0.925459623336792, "learning_rate": 1.9869548723784403e-05, "loss": 0.5929, "step": 3845 }, { "epoch": 0.10560131795716639, "grad_norm": 0.47142472863197327, "learning_rate": 1.9869479180974863e-05, "loss": 0.4895, "step": 3846 }, { "epoch": 0.1056287753981329, "grad_norm": 0.375644713640213, "learning_rate": 1.98694096197556e-05, "loss": 0.5071, "step": 3847 }, { "epoch": 0.10565623283909939, "grad_norm": 0.3523799777030945, "learning_rate": 1.986934004012673e-05, "loss": 0.4671, "step": 3848 }, { "epoch": 0.1056836902800659, "grad_norm": 0.3381350338459015, "learning_rate": 1.98692704420884e-05, "loss": 0.5974, "step": 3849 }, { "epoch": 0.1057111477210324, "grad_norm": 0.3653413951396942, "learning_rate": 1.9869200825640723e-05, "loss": 0.4238, "step": 3850 }, { "epoch": 0.1057386051619989, "grad_norm": 0.3673236072063446, "learning_rate": 1.986913119078384e-05, "loss": 0.5607, "step": 3851 }, { "epoch": 0.10576606260296541, "grad_norm": 0.3395763635635376, "learning_rate": 1.9869061537517878e-05, "loss": 0.5204, "step": 3852 }, { "epoch": 0.1057935200439319, "grad_norm": 0.35546445846557617, "learning_rate": 1.9868991865842965e-05, "loss": 0.4429, "step": 3853 }, { "epoch": 0.10582097748489841, "grad_norm": 0.3710554838180542, "learning_rate": 1.9868922175759234e-05, "loss": 0.5599, "step": 3854 }, { "epoch": 0.1058484349258649, "grad_norm": 0.4383479654788971, "learning_rate": 1.9868852467266814e-05, "loss": 0.6035, "step": 3855 }, { "epoch": 0.10587589236683141, "grad_norm": 0.2930375337600708, "learning_rate": 1.986878274036583e-05, "loss": 0.4621, "step": 3856 }, { "epoch": 0.10590334980779792, "grad_norm": 0.33804062008857727, "learning_rate": 1.986871299505642e-05, "loss": 0.5309, "step": 3857 }, { "epoch": 0.10593080724876441, "grad_norm": 0.38290271162986755, "learning_rate": 1.986864323133871e-05, "loss": 0.5569, "step": 3858 }, { "epoch": 0.10595826468973092, "grad_norm": 0.36935073137283325, "learning_rate": 1.986857344921283e-05, "loss": 0.4677, "step": 3859 }, { "epoch": 0.10598572213069742, "grad_norm": 0.4082733988761902, "learning_rate": 1.986850364867891e-05, "loss": 0.6187, "step": 3860 }, { "epoch": 0.10601317957166392, "grad_norm": 0.34845176339149475, "learning_rate": 1.9868433829737085e-05, "loss": 0.5029, "step": 3861 }, { "epoch": 0.10604063701263042, "grad_norm": 0.34293392300605774, "learning_rate": 1.9868363992387477e-05, "loss": 0.6125, "step": 3862 }, { "epoch": 0.10606809445359693, "grad_norm": 0.367667019367218, "learning_rate": 1.986829413663022e-05, "loss": 0.6475, "step": 3863 }, { "epoch": 0.10609555189456343, "grad_norm": 0.3714020252227783, "learning_rate": 1.986822426246545e-05, "loss": 0.6712, "step": 3864 }, { "epoch": 0.10612300933552993, "grad_norm": 0.3475090563297272, "learning_rate": 1.986815436989329e-05, "loss": 0.5974, "step": 3865 }, { "epoch": 0.10615046677649644, "grad_norm": 0.3499235212802887, "learning_rate": 1.986808445891387e-05, "loss": 0.4703, "step": 3866 }, { "epoch": 0.10617792421746293, "grad_norm": 0.37134361267089844, "learning_rate": 1.9868014529527324e-05, "loss": 0.5824, "step": 3867 }, { "epoch": 0.10620538165842944, "grad_norm": 0.3374619781970978, "learning_rate": 1.9867944581733782e-05, "loss": 0.518, "step": 3868 }, { "epoch": 0.10623283909939593, "grad_norm": 0.3901131749153137, "learning_rate": 1.9867874615533373e-05, "loss": 0.5425, "step": 3869 }, { "epoch": 0.10626029654036244, "grad_norm": 0.3555409610271454, "learning_rate": 1.986780463092623e-05, "loss": 0.5846, "step": 3870 }, { "epoch": 0.10628775398132893, "grad_norm": 0.34077024459838867, "learning_rate": 1.9867734627912482e-05, "loss": 0.552, "step": 3871 }, { "epoch": 0.10631521142229544, "grad_norm": 0.4143122732639313, "learning_rate": 1.9867664606492258e-05, "loss": 0.6516, "step": 3872 }, { "epoch": 0.10634266886326195, "grad_norm": 0.36835795640945435, "learning_rate": 1.9867594566665692e-05, "loss": 0.4786, "step": 3873 }, { "epoch": 0.10637012630422844, "grad_norm": 0.3371666967868805, "learning_rate": 1.986752450843291e-05, "loss": 0.554, "step": 3874 }, { "epoch": 0.10639758374519495, "grad_norm": 0.33243823051452637, "learning_rate": 1.986745443179405e-05, "loss": 0.4817, "step": 3875 }, { "epoch": 0.10642504118616145, "grad_norm": 0.41295409202575684, "learning_rate": 1.9867384336749232e-05, "loss": 0.471, "step": 3876 }, { "epoch": 0.10645249862712795, "grad_norm": 0.42155101895332336, "learning_rate": 1.9867314223298593e-05, "loss": 0.5979, "step": 3877 }, { "epoch": 0.10647995606809445, "grad_norm": 0.43157750368118286, "learning_rate": 1.986724409144227e-05, "loss": 0.5077, "step": 3878 }, { "epoch": 0.10650741350906096, "grad_norm": 0.37829843163490295, "learning_rate": 1.9867173941180383e-05, "loss": 0.5335, "step": 3879 }, { "epoch": 0.10653487095002746, "grad_norm": 0.33894768357276917, "learning_rate": 1.986710377251307e-05, "loss": 0.5643, "step": 3880 }, { "epoch": 0.10656232839099396, "grad_norm": 0.39431583881378174, "learning_rate": 1.9867033585440457e-05, "loss": 0.5272, "step": 3881 }, { "epoch": 0.10658978583196047, "grad_norm": 0.3784754276275635, "learning_rate": 1.9866963379962677e-05, "loss": 0.5733, "step": 3882 }, { "epoch": 0.10661724327292696, "grad_norm": 0.3714334964752197, "learning_rate": 1.9866893156079863e-05, "loss": 0.5331, "step": 3883 }, { "epoch": 0.10664470071389347, "grad_norm": 0.3975332975387573, "learning_rate": 1.986682291379214e-05, "loss": 0.5918, "step": 3884 }, { "epoch": 0.10667215815485996, "grad_norm": 0.3915683329105377, "learning_rate": 1.986675265309965e-05, "loss": 0.521, "step": 3885 }, { "epoch": 0.10669961559582647, "grad_norm": 0.33201098442077637, "learning_rate": 1.9866682374002514e-05, "loss": 0.547, "step": 3886 }, { "epoch": 0.10672707303679298, "grad_norm": 0.3589036166667938, "learning_rate": 1.9866612076500865e-05, "loss": 0.5798, "step": 3887 }, { "epoch": 0.10675453047775947, "grad_norm": 0.35575756430625916, "learning_rate": 1.9866541760594837e-05, "loss": 0.5868, "step": 3888 }, { "epoch": 0.10678198791872598, "grad_norm": 0.34954458475112915, "learning_rate": 1.9866471426284558e-05, "loss": 0.4404, "step": 3889 }, { "epoch": 0.10680944535969247, "grad_norm": 0.356108158826828, "learning_rate": 1.9866401073570164e-05, "loss": 0.4828, "step": 3890 }, { "epoch": 0.10683690280065898, "grad_norm": 0.4012312591075897, "learning_rate": 1.986633070245178e-05, "loss": 0.5977, "step": 3891 }, { "epoch": 0.10686436024162548, "grad_norm": 0.3317927122116089, "learning_rate": 1.986626031292954e-05, "loss": 0.5052, "step": 3892 }, { "epoch": 0.10689181768259198, "grad_norm": 0.356916218996048, "learning_rate": 1.9866189905003577e-05, "loss": 0.4527, "step": 3893 }, { "epoch": 0.10691927512355849, "grad_norm": 0.39416465163230896, "learning_rate": 1.986611947867402e-05, "loss": 0.5388, "step": 3894 }, { "epoch": 0.10694673256452498, "grad_norm": 0.4298718571662903, "learning_rate": 1.9866049033941003e-05, "loss": 0.5836, "step": 3895 }, { "epoch": 0.10697419000549149, "grad_norm": 0.34931573271751404, "learning_rate": 1.9865978570804653e-05, "loss": 0.5632, "step": 3896 }, { "epoch": 0.10700164744645799, "grad_norm": 0.37951377034187317, "learning_rate": 1.9865908089265103e-05, "loss": 0.5133, "step": 3897 }, { "epoch": 0.1070291048874245, "grad_norm": 0.3519764244556427, "learning_rate": 1.9865837589322492e-05, "loss": 0.5346, "step": 3898 }, { "epoch": 0.10705656232839099, "grad_norm": 0.35335278511047363, "learning_rate": 1.986576707097694e-05, "loss": 0.5214, "step": 3899 }, { "epoch": 0.1070840197693575, "grad_norm": 0.4014730453491211, "learning_rate": 1.9865696534228583e-05, "loss": 0.608, "step": 3900 }, { "epoch": 0.10711147721032399, "grad_norm": 0.38673245906829834, "learning_rate": 1.9865625979077557e-05, "loss": 0.5819, "step": 3901 }, { "epoch": 0.1071389346512905, "grad_norm": 0.38143277168273926, "learning_rate": 1.9865555405523985e-05, "loss": 0.5511, "step": 3902 }, { "epoch": 0.107166392092257, "grad_norm": 0.3618567883968353, "learning_rate": 1.9865484813568008e-05, "loss": 0.5384, "step": 3903 }, { "epoch": 0.1071938495332235, "grad_norm": 0.3953111469745636, "learning_rate": 1.9865414203209753e-05, "loss": 0.5931, "step": 3904 }, { "epoch": 0.10722130697419001, "grad_norm": 0.37477439641952515, "learning_rate": 1.986534357444935e-05, "loss": 0.5251, "step": 3905 }, { "epoch": 0.1072487644151565, "grad_norm": 0.36615970730781555, "learning_rate": 1.986527292728693e-05, "loss": 0.5913, "step": 3906 }, { "epoch": 0.10727622185612301, "grad_norm": 0.37437349557876587, "learning_rate": 1.9865202261722633e-05, "loss": 0.5727, "step": 3907 }, { "epoch": 0.1073036792970895, "grad_norm": 0.3684615194797516, "learning_rate": 1.986513157775658e-05, "loss": 0.5531, "step": 3908 }, { "epoch": 0.10733113673805601, "grad_norm": 0.3959915041923523, "learning_rate": 1.9865060875388915e-05, "loss": 0.473, "step": 3909 }, { "epoch": 0.10735859417902252, "grad_norm": 0.38141483068466187, "learning_rate": 1.9864990154619755e-05, "loss": 0.5911, "step": 3910 }, { "epoch": 0.10738605161998901, "grad_norm": 0.35106831789016724, "learning_rate": 1.9864919415449243e-05, "loss": 0.5767, "step": 3911 }, { "epoch": 0.10741350906095552, "grad_norm": 0.374546617269516, "learning_rate": 1.9864848657877508e-05, "loss": 0.5956, "step": 3912 }, { "epoch": 0.10744096650192202, "grad_norm": 0.3573896884918213, "learning_rate": 1.986477788190468e-05, "loss": 0.4648, "step": 3913 }, { "epoch": 0.10746842394288852, "grad_norm": 0.4275065064430237, "learning_rate": 1.9864707087530893e-05, "loss": 0.5626, "step": 3914 }, { "epoch": 0.10749588138385502, "grad_norm": 0.34382104873657227, "learning_rate": 1.9864636274756283e-05, "loss": 0.5306, "step": 3915 }, { "epoch": 0.10752333882482153, "grad_norm": 0.41410595178604126, "learning_rate": 1.9864565443580974e-05, "loss": 0.5566, "step": 3916 }, { "epoch": 0.10755079626578803, "grad_norm": 0.3861497938632965, "learning_rate": 1.9864494594005107e-05, "loss": 0.4913, "step": 3917 }, { "epoch": 0.10757825370675453, "grad_norm": 0.39049485325813293, "learning_rate": 1.9864423726028804e-05, "loss": 0.6196, "step": 3918 }, { "epoch": 0.10760571114772104, "grad_norm": 0.3583229184150696, "learning_rate": 1.98643528396522e-05, "loss": 0.5822, "step": 3919 }, { "epoch": 0.10763316858868753, "grad_norm": 0.36045151948928833, "learning_rate": 1.9864281934875434e-05, "loss": 0.5237, "step": 3920 }, { "epoch": 0.10766062602965404, "grad_norm": 0.3459170162677765, "learning_rate": 1.9864211011698635e-05, "loss": 0.5419, "step": 3921 }, { "epoch": 0.10768808347062053, "grad_norm": 0.42030325531959534, "learning_rate": 1.9864140070121932e-05, "loss": 0.5805, "step": 3922 }, { "epoch": 0.10771554091158704, "grad_norm": 0.40273481607437134, "learning_rate": 1.986406911014546e-05, "loss": 0.5432, "step": 3923 }, { "epoch": 0.10774299835255355, "grad_norm": 0.39949485659599304, "learning_rate": 1.9863998131769354e-05, "loss": 0.5173, "step": 3924 }, { "epoch": 0.10777045579352004, "grad_norm": 0.4521189033985138, "learning_rate": 1.986392713499374e-05, "loss": 0.5258, "step": 3925 }, { "epoch": 0.10779791323448655, "grad_norm": 0.40132278203964233, "learning_rate": 1.986385611981875e-05, "loss": 0.5191, "step": 3926 }, { "epoch": 0.10782537067545304, "grad_norm": 0.3166329860687256, "learning_rate": 1.9863785086244525e-05, "loss": 0.491, "step": 3927 }, { "epoch": 0.10785282811641955, "grad_norm": 0.3947601914405823, "learning_rate": 1.986371403427119e-05, "loss": 0.6157, "step": 3928 }, { "epoch": 0.10788028555738605, "grad_norm": 0.38338449597358704, "learning_rate": 1.9863642963898885e-05, "loss": 0.5933, "step": 3929 }, { "epoch": 0.10790774299835255, "grad_norm": 0.4174915552139282, "learning_rate": 1.9863571875127734e-05, "loss": 0.5091, "step": 3930 }, { "epoch": 0.10793520043931906, "grad_norm": 0.39134132862091064, "learning_rate": 1.9863500767957872e-05, "loss": 0.543, "step": 3931 }, { "epoch": 0.10796265788028556, "grad_norm": 0.4175630509853363, "learning_rate": 1.9863429642389434e-05, "loss": 0.5431, "step": 3932 }, { "epoch": 0.10799011532125206, "grad_norm": 0.3955199718475342, "learning_rate": 1.9863358498422554e-05, "loss": 0.5556, "step": 3933 }, { "epoch": 0.10801757276221856, "grad_norm": 0.5020374655723572, "learning_rate": 1.9863287336057362e-05, "loss": 0.6199, "step": 3934 }, { "epoch": 0.10804503020318507, "grad_norm": 0.4130048453807831, "learning_rate": 1.986321615529399e-05, "loss": 0.6334, "step": 3935 }, { "epoch": 0.10807248764415156, "grad_norm": 0.34505125880241394, "learning_rate": 1.9863144956132573e-05, "loss": 0.4968, "step": 3936 }, { "epoch": 0.10809994508511807, "grad_norm": 0.8618636131286621, "learning_rate": 1.986307373857324e-05, "loss": 0.6281, "step": 3937 }, { "epoch": 0.10812740252608456, "grad_norm": 0.388460636138916, "learning_rate": 1.986300250261613e-05, "loss": 0.5592, "step": 3938 }, { "epoch": 0.10815485996705107, "grad_norm": 0.40597933530807495, "learning_rate": 1.986293124826137e-05, "loss": 0.5684, "step": 3939 }, { "epoch": 0.10818231740801758, "grad_norm": 0.36292046308517456, "learning_rate": 1.9862859975509096e-05, "loss": 0.5962, "step": 3940 }, { "epoch": 0.10820977484898407, "grad_norm": 0.45751529932022095, "learning_rate": 1.986278868435944e-05, "loss": 0.5192, "step": 3941 }, { "epoch": 0.10823723228995058, "grad_norm": 0.35921022295951843, "learning_rate": 1.9862717374812535e-05, "loss": 0.5891, "step": 3942 }, { "epoch": 0.10826468973091707, "grad_norm": 0.31273770332336426, "learning_rate": 1.9862646046868516e-05, "loss": 0.4805, "step": 3943 }, { "epoch": 0.10829214717188358, "grad_norm": 0.4266352355480194, "learning_rate": 1.9862574700527514e-05, "loss": 0.5353, "step": 3944 }, { "epoch": 0.10831960461285008, "grad_norm": 0.3353807032108307, "learning_rate": 1.986250333578966e-05, "loss": 0.569, "step": 3945 }, { "epoch": 0.10834706205381658, "grad_norm": 0.3556755781173706, "learning_rate": 1.986243195265509e-05, "loss": 0.5957, "step": 3946 }, { "epoch": 0.10837451949478309, "grad_norm": 0.37778326869010925, "learning_rate": 1.9862360551123937e-05, "loss": 0.5846, "step": 3947 }, { "epoch": 0.10840197693574959, "grad_norm": 0.390471875667572, "learning_rate": 1.9862289131196335e-05, "loss": 0.5808, "step": 3948 }, { "epoch": 0.1084294343767161, "grad_norm": 0.3672283887863159, "learning_rate": 1.9862217692872416e-05, "loss": 0.584, "step": 3949 }, { "epoch": 0.10845689181768259, "grad_norm": 0.35633155703544617, "learning_rate": 1.9862146236152315e-05, "loss": 0.4549, "step": 3950 }, { "epoch": 0.1084843492586491, "grad_norm": 0.3635578453540802, "learning_rate": 1.9862074761036162e-05, "loss": 0.5264, "step": 3951 }, { "epoch": 0.10851180669961559, "grad_norm": 0.32456451654434204, "learning_rate": 1.986200326752409e-05, "loss": 0.6064, "step": 3952 }, { "epoch": 0.1085392641405821, "grad_norm": 0.39114803075790405, "learning_rate": 1.9861931755616237e-05, "loss": 0.5058, "step": 3953 }, { "epoch": 0.1085667215815486, "grad_norm": 0.41415759921073914, "learning_rate": 1.9861860225312733e-05, "loss": 0.6826, "step": 3954 }, { "epoch": 0.1085941790225151, "grad_norm": 0.4190730154514313, "learning_rate": 1.986178867661371e-05, "loss": 0.5927, "step": 3955 }, { "epoch": 0.10862163646348161, "grad_norm": 0.41117870807647705, "learning_rate": 1.9861717109519307e-05, "loss": 0.5698, "step": 3956 }, { "epoch": 0.1086490939044481, "grad_norm": 0.36093124747276306, "learning_rate": 1.9861645524029655e-05, "loss": 0.5381, "step": 3957 }, { "epoch": 0.10867655134541461, "grad_norm": 0.38000255823135376, "learning_rate": 1.986157392014488e-05, "loss": 0.5666, "step": 3958 }, { "epoch": 0.1087040087863811, "grad_norm": 0.3537401258945465, "learning_rate": 1.9861502297865128e-05, "loss": 0.5495, "step": 3959 }, { "epoch": 0.10873146622734761, "grad_norm": 0.37150415778160095, "learning_rate": 1.9861430657190524e-05, "loss": 0.5714, "step": 3960 }, { "epoch": 0.10875892366831412, "grad_norm": 0.3561224937438965, "learning_rate": 1.9861358998121207e-05, "loss": 0.5442, "step": 3961 }, { "epoch": 0.10878638110928061, "grad_norm": 0.3501085937023163, "learning_rate": 1.986128732065731e-05, "loss": 0.5416, "step": 3962 }, { "epoch": 0.10881383855024712, "grad_norm": 0.3778669834136963, "learning_rate": 1.9861215624798956e-05, "loss": 0.5821, "step": 3963 }, { "epoch": 0.10884129599121362, "grad_norm": 0.32850706577301025, "learning_rate": 1.9861143910546293e-05, "loss": 0.576, "step": 3964 }, { "epoch": 0.10886875343218012, "grad_norm": 0.37771663069725037, "learning_rate": 1.986107217789945e-05, "loss": 0.5123, "step": 3965 }, { "epoch": 0.10889621087314662, "grad_norm": 0.372470885515213, "learning_rate": 1.986100042685856e-05, "loss": 0.4771, "step": 3966 }, { "epoch": 0.10892366831411313, "grad_norm": 0.3709075450897217, "learning_rate": 1.9860928657423756e-05, "loss": 0.4778, "step": 3967 }, { "epoch": 0.10895112575507962, "grad_norm": 0.340224951505661, "learning_rate": 1.9860856869595172e-05, "loss": 0.4882, "step": 3968 }, { "epoch": 0.10897858319604613, "grad_norm": 0.3827364444732666, "learning_rate": 1.9860785063372945e-05, "loss": 0.5938, "step": 3969 }, { "epoch": 0.10900604063701264, "grad_norm": 0.39398160576820374, "learning_rate": 1.9860713238757205e-05, "loss": 0.5865, "step": 3970 }, { "epoch": 0.10903349807797913, "grad_norm": 0.34342360496520996, "learning_rate": 1.986064139574809e-05, "loss": 0.5018, "step": 3971 }, { "epoch": 0.10906095551894564, "grad_norm": 0.3409058153629303, "learning_rate": 1.986056953434573e-05, "loss": 0.5361, "step": 3972 }, { "epoch": 0.10908841295991213, "grad_norm": 0.36984938383102417, "learning_rate": 1.986049765455026e-05, "loss": 0.5288, "step": 3973 }, { "epoch": 0.10911587040087864, "grad_norm": 0.3538043200969696, "learning_rate": 1.9860425756361813e-05, "loss": 0.5566, "step": 3974 }, { "epoch": 0.10914332784184513, "grad_norm": 0.34838637709617615, "learning_rate": 1.9860353839780528e-05, "loss": 0.596, "step": 3975 }, { "epoch": 0.10917078528281164, "grad_norm": 0.37920719385147095, "learning_rate": 1.9860281904806535e-05, "loss": 0.4391, "step": 3976 }, { "epoch": 0.10919824272377815, "grad_norm": 0.5259626507759094, "learning_rate": 1.986020995143997e-05, "loss": 0.5645, "step": 3977 }, { "epoch": 0.10922570016474464, "grad_norm": 0.3524862229824066, "learning_rate": 1.9860137979680964e-05, "loss": 0.5079, "step": 3978 }, { "epoch": 0.10925315760571115, "grad_norm": 0.3903754651546478, "learning_rate": 1.9860065989529656e-05, "loss": 0.5793, "step": 3979 }, { "epoch": 0.10928061504667765, "grad_norm": 0.368459552526474, "learning_rate": 1.9859993980986177e-05, "loss": 0.5514, "step": 3980 }, { "epoch": 0.10930807248764415, "grad_norm": 0.4309787452220917, "learning_rate": 1.9859921954050664e-05, "loss": 0.6118, "step": 3981 }, { "epoch": 0.10933552992861065, "grad_norm": 0.35527539253234863, "learning_rate": 1.985984990872325e-05, "loss": 0.5286, "step": 3982 }, { "epoch": 0.10936298736957716, "grad_norm": 0.363607794046402, "learning_rate": 1.9859777845004066e-05, "loss": 0.5984, "step": 3983 }, { "epoch": 0.10939044481054366, "grad_norm": 0.34513360261917114, "learning_rate": 1.985970576289325e-05, "loss": 0.499, "step": 3984 }, { "epoch": 0.10941790225151016, "grad_norm": 0.3455013930797577, "learning_rate": 1.9859633662390938e-05, "loss": 0.5398, "step": 3985 }, { "epoch": 0.10944535969247667, "grad_norm": 0.37129929661750793, "learning_rate": 1.985956154349726e-05, "loss": 0.6246, "step": 3986 }, { "epoch": 0.10947281713344316, "grad_norm": 0.3403526544570923, "learning_rate": 1.9859489406212355e-05, "loss": 0.5358, "step": 3987 }, { "epoch": 0.10950027457440967, "grad_norm": 0.3818034827709198, "learning_rate": 1.9859417250536355e-05, "loss": 0.5606, "step": 3988 }, { "epoch": 0.10952773201537616, "grad_norm": 0.3211546838283539, "learning_rate": 1.9859345076469394e-05, "loss": 0.5412, "step": 3989 }, { "epoch": 0.10955518945634267, "grad_norm": 0.3827301561832428, "learning_rate": 1.985927288401161e-05, "loss": 0.582, "step": 3990 }, { "epoch": 0.10958264689730918, "grad_norm": 0.3984532952308655, "learning_rate": 1.9859200673163132e-05, "loss": 0.5957, "step": 3991 }, { "epoch": 0.10961010433827567, "grad_norm": 0.3133056163787842, "learning_rate": 1.9859128443924097e-05, "loss": 0.5198, "step": 3992 }, { "epoch": 0.10963756177924218, "grad_norm": 0.40924543142318726, "learning_rate": 1.9859056196294645e-05, "loss": 0.5929, "step": 3993 }, { "epoch": 0.10966501922020867, "grad_norm": 0.5046197175979614, "learning_rate": 1.9858983930274908e-05, "loss": 0.5359, "step": 3994 }, { "epoch": 0.10969247666117518, "grad_norm": 0.38889026641845703, "learning_rate": 1.9858911645865015e-05, "loss": 0.4918, "step": 3995 }, { "epoch": 0.10971993410214168, "grad_norm": 0.3846395015716553, "learning_rate": 1.9858839343065105e-05, "loss": 0.6252, "step": 3996 }, { "epoch": 0.10974739154310818, "grad_norm": 0.3960987627506256, "learning_rate": 1.9858767021875317e-05, "loss": 0.4861, "step": 3997 }, { "epoch": 0.10977484898407469, "grad_norm": 0.37865686416625977, "learning_rate": 1.985869468229578e-05, "loss": 0.5672, "step": 3998 }, { "epoch": 0.10980230642504119, "grad_norm": 0.3352625370025635, "learning_rate": 1.985862232432663e-05, "loss": 0.5326, "step": 3999 }, { "epoch": 0.10982976386600769, "grad_norm": 0.3997047245502472, "learning_rate": 1.9858549947968003e-05, "loss": 0.6091, "step": 4000 }, { "epoch": 0.10985722130697419, "grad_norm": 0.4317460358142853, "learning_rate": 1.9858477553220034e-05, "loss": 0.6146, "step": 4001 }, { "epoch": 0.1098846787479407, "grad_norm": 0.3737805485725403, "learning_rate": 1.9858405140082858e-05, "loss": 0.6374, "step": 4002 }, { "epoch": 0.10991213618890719, "grad_norm": 0.43477872014045715, "learning_rate": 1.985833270855661e-05, "loss": 0.6008, "step": 4003 }, { "epoch": 0.1099395936298737, "grad_norm": 0.38140445947647095, "learning_rate": 1.9858260258641426e-05, "loss": 0.585, "step": 4004 }, { "epoch": 0.10996705107084019, "grad_norm": 0.47957196831703186, "learning_rate": 1.9858187790337437e-05, "loss": 0.656, "step": 4005 }, { "epoch": 0.1099945085118067, "grad_norm": 0.37022799253463745, "learning_rate": 1.9858115303644784e-05, "loss": 0.4987, "step": 4006 }, { "epoch": 0.1100219659527732, "grad_norm": 0.36663544178009033, "learning_rate": 1.9858042798563598e-05, "loss": 0.5534, "step": 4007 }, { "epoch": 0.1100494233937397, "grad_norm": 0.3790111541748047, "learning_rate": 1.9857970275094017e-05, "loss": 0.5184, "step": 4008 }, { "epoch": 0.11007688083470621, "grad_norm": 0.36379122734069824, "learning_rate": 1.985789773323618e-05, "loss": 0.6046, "step": 4009 }, { "epoch": 0.1101043382756727, "grad_norm": 0.43821364641189575, "learning_rate": 1.985782517299021e-05, "loss": 0.5057, "step": 4010 }, { "epoch": 0.11013179571663921, "grad_norm": 0.3486274480819702, "learning_rate": 1.9857752594356253e-05, "loss": 0.4844, "step": 4011 }, { "epoch": 0.1101592531576057, "grad_norm": 0.32822877168655396, "learning_rate": 1.985767999733444e-05, "loss": 0.5323, "step": 4012 }, { "epoch": 0.11018671059857221, "grad_norm": 0.350456178188324, "learning_rate": 1.985760738192491e-05, "loss": 0.4883, "step": 4013 }, { "epoch": 0.11021416803953872, "grad_norm": 0.3455464541912079, "learning_rate": 1.9857534748127794e-05, "loss": 0.5194, "step": 4014 }, { "epoch": 0.11024162548050521, "grad_norm": 0.38779500126838684, "learning_rate": 1.985746209594323e-05, "loss": 0.5258, "step": 4015 }, { "epoch": 0.11026908292147172, "grad_norm": 0.3662167489528656, "learning_rate": 1.985738942537135e-05, "loss": 0.552, "step": 4016 }, { "epoch": 0.11029654036243822, "grad_norm": 0.36652469635009766, "learning_rate": 1.98573167364123e-05, "loss": 0.5093, "step": 4017 }, { "epoch": 0.11032399780340472, "grad_norm": 0.41379132866859436, "learning_rate": 1.98572440290662e-05, "loss": 0.5422, "step": 4018 }, { "epoch": 0.11035145524437122, "grad_norm": 0.3547268807888031, "learning_rate": 1.98571713033332e-05, "loss": 0.5235, "step": 4019 }, { "epoch": 0.11037891268533773, "grad_norm": 0.3779987096786499, "learning_rate": 1.9857098559213426e-05, "loss": 0.5709, "step": 4020 }, { "epoch": 0.11040637012630423, "grad_norm": 0.4075092673301697, "learning_rate": 1.9857025796707018e-05, "loss": 0.5713, "step": 4021 }, { "epoch": 0.11043382756727073, "grad_norm": 0.44708681106567383, "learning_rate": 1.9856953015814113e-05, "loss": 0.5571, "step": 4022 }, { "epoch": 0.11046128500823724, "grad_norm": 0.3658026158809662, "learning_rate": 1.985688021653484e-05, "loss": 0.588, "step": 4023 }, { "epoch": 0.11048874244920373, "grad_norm": 0.37584641575813293, "learning_rate": 1.9856807398869345e-05, "loss": 0.5013, "step": 4024 }, { "epoch": 0.11051619989017024, "grad_norm": 0.38247647881507874, "learning_rate": 1.9856734562817756e-05, "loss": 0.5488, "step": 4025 }, { "epoch": 0.11054365733113673, "grad_norm": 0.36218592524528503, "learning_rate": 1.985666170838021e-05, "loss": 0.527, "step": 4026 }, { "epoch": 0.11057111477210324, "grad_norm": 0.3936878740787506, "learning_rate": 1.9856588835556843e-05, "loss": 0.5812, "step": 4027 }, { "epoch": 0.11059857221306975, "grad_norm": 0.3802419602870941, "learning_rate": 1.9856515944347797e-05, "loss": 0.4793, "step": 4028 }, { "epoch": 0.11062602965403624, "grad_norm": 0.3565483093261719, "learning_rate": 1.98564430347532e-05, "loss": 0.4714, "step": 4029 }, { "epoch": 0.11065348709500275, "grad_norm": 0.339617520570755, "learning_rate": 1.985637010677319e-05, "loss": 0.4697, "step": 4030 }, { "epoch": 0.11068094453596924, "grad_norm": 0.33839449286460876, "learning_rate": 1.98562971604079e-05, "loss": 0.6306, "step": 4031 }, { "epoch": 0.11070840197693575, "grad_norm": 0.3494991958141327, "learning_rate": 1.9856224195657476e-05, "loss": 0.5107, "step": 4032 }, { "epoch": 0.11073585941790225, "grad_norm": 0.33218225836753845, "learning_rate": 1.9856151212522048e-05, "loss": 0.5561, "step": 4033 }, { "epoch": 0.11076331685886875, "grad_norm": 0.35247015953063965, "learning_rate": 1.985607821100175e-05, "loss": 0.5988, "step": 4034 }, { "epoch": 0.11079077429983525, "grad_norm": 0.3589276671409607, "learning_rate": 1.9856005191096723e-05, "loss": 0.5791, "step": 4035 }, { "epoch": 0.11081823174080176, "grad_norm": 0.378555566072464, "learning_rate": 1.9855932152807097e-05, "loss": 0.5456, "step": 4036 }, { "epoch": 0.11084568918176826, "grad_norm": 0.38541316986083984, "learning_rate": 1.9855859096133014e-05, "loss": 0.558, "step": 4037 }, { "epoch": 0.11087314662273476, "grad_norm": 0.352606862783432, "learning_rate": 1.985578602107461e-05, "loss": 0.4927, "step": 4038 }, { "epoch": 0.11090060406370127, "grad_norm": 0.35152381658554077, "learning_rate": 1.9855712927632015e-05, "loss": 0.4645, "step": 4039 }, { "epoch": 0.11092806150466776, "grad_norm": 0.3829084038734436, "learning_rate": 1.9855639815805374e-05, "loss": 0.5153, "step": 4040 }, { "epoch": 0.11095551894563427, "grad_norm": 0.38063299655914307, "learning_rate": 1.9855566685594817e-05, "loss": 0.4724, "step": 4041 }, { "epoch": 0.11098297638660076, "grad_norm": 0.35385727882385254, "learning_rate": 1.985549353700048e-05, "loss": 0.5449, "step": 4042 }, { "epoch": 0.11101043382756727, "grad_norm": 0.407499760389328, "learning_rate": 1.985542037002251e-05, "loss": 0.5293, "step": 4043 }, { "epoch": 0.11103789126853378, "grad_norm": 0.3590410649776459, "learning_rate": 1.9855347184661027e-05, "loss": 0.5141, "step": 4044 }, { "epoch": 0.11106534870950027, "grad_norm": 0.5200411081314087, "learning_rate": 1.9855273980916183e-05, "loss": 0.5494, "step": 4045 }, { "epoch": 0.11109280615046678, "grad_norm": 0.3730369210243225, "learning_rate": 1.9855200758788102e-05, "loss": 0.5104, "step": 4046 }, { "epoch": 0.11112026359143327, "grad_norm": 0.35227975249290466, "learning_rate": 1.9855127518276932e-05, "loss": 0.5256, "step": 4047 }, { "epoch": 0.11114772103239978, "grad_norm": 0.37153616547584534, "learning_rate": 1.9855054259382796e-05, "loss": 0.546, "step": 4048 }, { "epoch": 0.11117517847336628, "grad_norm": 0.3858408033847809, "learning_rate": 1.9854980982105846e-05, "loss": 0.5576, "step": 4049 }, { "epoch": 0.11120263591433278, "grad_norm": 0.3595297336578369, "learning_rate": 1.9854907686446205e-05, "loss": 0.5823, "step": 4050 }, { "epoch": 0.11123009335529929, "grad_norm": 0.34671834111213684, "learning_rate": 1.985483437240402e-05, "loss": 0.5383, "step": 4051 }, { "epoch": 0.11125755079626579, "grad_norm": 0.49240052700042725, "learning_rate": 1.985476103997942e-05, "loss": 0.5039, "step": 4052 }, { "epoch": 0.1112850082372323, "grad_norm": 0.3700600564479828, "learning_rate": 1.9854687689172548e-05, "loss": 0.5451, "step": 4053 }, { "epoch": 0.11131246567819879, "grad_norm": 0.3544233739376068, "learning_rate": 1.985461431998354e-05, "loss": 0.5979, "step": 4054 }, { "epoch": 0.1113399231191653, "grad_norm": 0.37619009613990784, "learning_rate": 1.985454093241253e-05, "loss": 0.5601, "step": 4055 }, { "epoch": 0.11136738056013179, "grad_norm": 0.34408509731292725, "learning_rate": 1.9854467526459655e-05, "loss": 0.4718, "step": 4056 }, { "epoch": 0.1113948380010983, "grad_norm": 0.46332937479019165, "learning_rate": 1.9854394102125053e-05, "loss": 0.5365, "step": 4057 }, { "epoch": 0.1114222954420648, "grad_norm": 0.3431167006492615, "learning_rate": 1.985432065940886e-05, "loss": 0.5104, "step": 4058 }, { "epoch": 0.1114497528830313, "grad_norm": 0.3374008536338806, "learning_rate": 1.9854247198311215e-05, "loss": 0.4355, "step": 4059 }, { "epoch": 0.11147721032399781, "grad_norm": 0.3567059636116028, "learning_rate": 1.9854173718832254e-05, "loss": 0.5743, "step": 4060 }, { "epoch": 0.1115046677649643, "grad_norm": 0.3606155812740326, "learning_rate": 1.9854100220972112e-05, "loss": 0.5899, "step": 4061 }, { "epoch": 0.11153212520593081, "grad_norm": 0.35930269956588745, "learning_rate": 1.985402670473093e-05, "loss": 0.5576, "step": 4062 }, { "epoch": 0.1115595826468973, "grad_norm": 0.3509731888771057, "learning_rate": 1.985395317010884e-05, "loss": 0.5373, "step": 4063 }, { "epoch": 0.11158704008786381, "grad_norm": 0.3881068229675293, "learning_rate": 1.9853879617105988e-05, "loss": 0.5766, "step": 4064 }, { "epoch": 0.11161449752883032, "grad_norm": 0.38228127360343933, "learning_rate": 1.98538060457225e-05, "loss": 0.536, "step": 4065 }, { "epoch": 0.11164195496979681, "grad_norm": 0.3625740706920624, "learning_rate": 1.985373245595852e-05, "loss": 0.5957, "step": 4066 }, { "epoch": 0.11166941241076332, "grad_norm": 0.35884225368499756, "learning_rate": 1.9853658847814187e-05, "loss": 0.6206, "step": 4067 }, { "epoch": 0.11169686985172982, "grad_norm": 0.3951311409473419, "learning_rate": 1.9853585221289632e-05, "loss": 0.519, "step": 4068 }, { "epoch": 0.11172432729269632, "grad_norm": 0.35461950302124023, "learning_rate": 1.9853511576384998e-05, "loss": 0.5493, "step": 4069 }, { "epoch": 0.11175178473366282, "grad_norm": 0.3540794253349304, "learning_rate": 1.985343791310042e-05, "loss": 0.4908, "step": 4070 }, { "epoch": 0.11177924217462933, "grad_norm": 0.34181687235832214, "learning_rate": 1.9853364231436032e-05, "loss": 0.5955, "step": 4071 }, { "epoch": 0.11180669961559582, "grad_norm": 0.36895066499710083, "learning_rate": 1.985329053139198e-05, "loss": 0.5873, "step": 4072 }, { "epoch": 0.11183415705656233, "grad_norm": 0.4041939377784729, "learning_rate": 1.985321681296839e-05, "loss": 0.5872, "step": 4073 }, { "epoch": 0.11186161449752884, "grad_norm": 0.3615841865539551, "learning_rate": 1.985314307616541e-05, "loss": 0.5616, "step": 4074 }, { "epoch": 0.11188907193849533, "grad_norm": 0.3597436845302582, "learning_rate": 1.9853069320983173e-05, "loss": 0.5134, "step": 4075 }, { "epoch": 0.11191652937946184, "grad_norm": 0.3820773661136627, "learning_rate": 1.985299554742182e-05, "loss": 0.5648, "step": 4076 }, { "epoch": 0.11194398682042833, "grad_norm": 0.48970553278923035, "learning_rate": 1.985292175548148e-05, "loss": 0.5709, "step": 4077 }, { "epoch": 0.11197144426139484, "grad_norm": 0.3757760524749756, "learning_rate": 1.98528479451623e-05, "loss": 0.5381, "step": 4078 }, { "epoch": 0.11199890170236133, "grad_norm": 0.35946306586265564, "learning_rate": 1.9852774116464414e-05, "loss": 0.5163, "step": 4079 }, { "epoch": 0.11202635914332784, "grad_norm": 0.3923642039299011, "learning_rate": 1.9852700269387957e-05, "loss": 0.5791, "step": 4080 }, { "epoch": 0.11205381658429435, "grad_norm": 0.314471572637558, "learning_rate": 1.985262640393307e-05, "loss": 0.4531, "step": 4081 }, { "epoch": 0.11208127402526084, "grad_norm": 0.37800249457359314, "learning_rate": 1.9852552520099892e-05, "loss": 0.5067, "step": 4082 }, { "epoch": 0.11210873146622735, "grad_norm": 0.3128666579723358, "learning_rate": 1.9852478617888556e-05, "loss": 0.3852, "step": 4083 }, { "epoch": 0.11213618890719385, "grad_norm": 0.41324499249458313, "learning_rate": 1.9852404697299207e-05, "loss": 0.618, "step": 4084 }, { "epoch": 0.11216364634816035, "grad_norm": 0.40907084941864014, "learning_rate": 1.9852330758331977e-05, "loss": 0.5276, "step": 4085 }, { "epoch": 0.11219110378912685, "grad_norm": 0.38857921957969666, "learning_rate": 1.9852256800987006e-05, "loss": 0.5538, "step": 4086 }, { "epoch": 0.11221856123009336, "grad_norm": 0.34586748480796814, "learning_rate": 1.985218282526443e-05, "loss": 0.5477, "step": 4087 }, { "epoch": 0.11224601867105986, "grad_norm": 0.3319321870803833, "learning_rate": 1.9852108831164386e-05, "loss": 0.4691, "step": 4088 }, { "epoch": 0.11227347611202636, "grad_norm": 0.40570083260536194, "learning_rate": 1.985203481868702e-05, "loss": 0.5753, "step": 4089 }, { "epoch": 0.11230093355299287, "grad_norm": 0.33897367119789124, "learning_rate": 1.9851960787832462e-05, "loss": 0.4866, "step": 4090 }, { "epoch": 0.11232839099395936, "grad_norm": 0.3339914381504059, "learning_rate": 1.9851886738600855e-05, "loss": 0.5279, "step": 4091 }, { "epoch": 0.11235584843492587, "grad_norm": 0.43470486998558044, "learning_rate": 1.9851812670992333e-05, "loss": 0.6067, "step": 4092 }, { "epoch": 0.11238330587589236, "grad_norm": 0.4028666913509369, "learning_rate": 1.9851738585007038e-05, "loss": 0.507, "step": 4093 }, { "epoch": 0.11241076331685887, "grad_norm": 0.36804062128067017, "learning_rate": 1.9851664480645102e-05, "loss": 0.5163, "step": 4094 }, { "epoch": 0.11243822075782538, "grad_norm": 0.36617839336395264, "learning_rate": 1.9851590357906668e-05, "loss": 0.6337, "step": 4095 }, { "epoch": 0.11246567819879187, "grad_norm": 0.3709811270236969, "learning_rate": 1.9851516216791878e-05, "loss": 0.5086, "step": 4096 }, { "epoch": 0.11249313563975838, "grad_norm": 0.39433562755584717, "learning_rate": 1.9851442057300863e-05, "loss": 0.6625, "step": 4097 }, { "epoch": 0.11252059308072487, "grad_norm": 0.3529183268547058, "learning_rate": 1.9851367879433764e-05, "loss": 0.4744, "step": 4098 }, { "epoch": 0.11254805052169138, "grad_norm": 0.37831541895866394, "learning_rate": 1.985129368319072e-05, "loss": 0.5098, "step": 4099 }, { "epoch": 0.11257550796265788, "grad_norm": 0.3643631637096405, "learning_rate": 1.985121946857187e-05, "loss": 0.5188, "step": 4100 }, { "epoch": 0.11260296540362438, "grad_norm": 0.36182543635368347, "learning_rate": 1.9851145235577352e-05, "loss": 0.509, "step": 4101 }, { "epoch": 0.11263042284459088, "grad_norm": 0.37973377108573914, "learning_rate": 1.9851070984207302e-05, "loss": 0.5284, "step": 4102 }, { "epoch": 0.11265788028555739, "grad_norm": 0.3289060890674591, "learning_rate": 1.985099671446186e-05, "loss": 0.449, "step": 4103 }, { "epoch": 0.1126853377265239, "grad_norm": 0.40577611327171326, "learning_rate": 1.985092242634117e-05, "loss": 0.5417, "step": 4104 }, { "epoch": 0.11271279516749039, "grad_norm": 0.3411047160625458, "learning_rate": 1.985084811984536e-05, "loss": 0.5529, "step": 4105 }, { "epoch": 0.1127402526084569, "grad_norm": 0.31905806064605713, "learning_rate": 1.985077379497458e-05, "loss": 0.4489, "step": 4106 }, { "epoch": 0.11276771004942339, "grad_norm": 0.38043197989463806, "learning_rate": 1.985069945172896e-05, "loss": 0.5901, "step": 4107 }, { "epoch": 0.1127951674903899, "grad_norm": 0.3472435474395752, "learning_rate": 1.985062509010864e-05, "loss": 0.5745, "step": 4108 }, { "epoch": 0.11282262493135639, "grad_norm": 0.4254503846168518, "learning_rate": 1.985055071011376e-05, "loss": 0.52, "step": 4109 }, { "epoch": 0.1128500823723229, "grad_norm": 0.42407822608947754, "learning_rate": 1.9850476311744462e-05, "loss": 0.5107, "step": 4110 }, { "epoch": 0.11287753981328941, "grad_norm": 0.37014007568359375, "learning_rate": 1.985040189500088e-05, "loss": 0.4598, "step": 4111 }, { "epoch": 0.1129049972542559, "grad_norm": 0.3566095232963562, "learning_rate": 1.9850327459883155e-05, "loss": 0.5822, "step": 4112 }, { "epoch": 0.11293245469522241, "grad_norm": 0.3826529383659363, "learning_rate": 1.9850253006391426e-05, "loss": 0.5052, "step": 4113 }, { "epoch": 0.1129599121361889, "grad_norm": 0.33563855290412903, "learning_rate": 1.985017853452583e-05, "loss": 0.5663, "step": 4114 }, { "epoch": 0.11298736957715541, "grad_norm": 0.4095176160335541, "learning_rate": 1.9850104044286507e-05, "loss": 0.551, "step": 4115 }, { "epoch": 0.1130148270181219, "grad_norm": 0.3572610020637512, "learning_rate": 1.9850029535673598e-05, "loss": 0.5512, "step": 4116 }, { "epoch": 0.11304228445908841, "grad_norm": 0.3405890166759491, "learning_rate": 1.984995500868724e-05, "loss": 0.5373, "step": 4117 }, { "epoch": 0.11306974190005492, "grad_norm": 0.36768409609794617, "learning_rate": 1.9849880463327567e-05, "loss": 0.5107, "step": 4118 }, { "epoch": 0.11309719934102141, "grad_norm": 0.4499264359474182, "learning_rate": 1.984980589959473e-05, "loss": 0.5383, "step": 4119 }, { "epoch": 0.11312465678198792, "grad_norm": 0.36219334602355957, "learning_rate": 1.984973131748886e-05, "loss": 0.5753, "step": 4120 }, { "epoch": 0.11315211422295442, "grad_norm": 0.5831260681152344, "learning_rate": 1.9849656717010094e-05, "loss": 0.5154, "step": 4121 }, { "epoch": 0.11317957166392092, "grad_norm": 0.4036034345626831, "learning_rate": 1.984958209815858e-05, "loss": 0.4951, "step": 4122 }, { "epoch": 0.11320702910488742, "grad_norm": 0.36296018958091736, "learning_rate": 1.9849507460934445e-05, "loss": 0.5371, "step": 4123 }, { "epoch": 0.11323448654585393, "grad_norm": 0.3515297472476959, "learning_rate": 1.984943280533784e-05, "loss": 0.5899, "step": 4124 }, { "epoch": 0.11326194398682043, "grad_norm": 0.3688106834888458, "learning_rate": 1.9849358131368896e-05, "loss": 0.5457, "step": 4125 }, { "epoch": 0.11328940142778693, "grad_norm": 0.3548530042171478, "learning_rate": 1.9849283439027758e-05, "loss": 0.5141, "step": 4126 }, { "epoch": 0.11331685886875344, "grad_norm": 0.4081372618675232, "learning_rate": 1.9849208728314563e-05, "loss": 0.5224, "step": 4127 }, { "epoch": 0.11334431630971993, "grad_norm": 0.34947437047958374, "learning_rate": 1.984913399922945e-05, "loss": 0.5513, "step": 4128 }, { "epoch": 0.11337177375068644, "grad_norm": 7.501532554626465, "learning_rate": 1.984905925177256e-05, "loss": 0.4914, "step": 4129 }, { "epoch": 0.11339923119165293, "grad_norm": 0.4358418583869934, "learning_rate": 1.9848984485944027e-05, "loss": 0.59, "step": 4130 }, { "epoch": 0.11342668863261944, "grad_norm": 0.4103964567184448, "learning_rate": 1.9848909701744e-05, "loss": 0.5567, "step": 4131 }, { "epoch": 0.11345414607358595, "grad_norm": 0.4298754930496216, "learning_rate": 1.9848834899172608e-05, "loss": 0.4509, "step": 4132 }, { "epoch": 0.11348160351455244, "grad_norm": 0.3420647978782654, "learning_rate": 1.9848760078229997e-05, "loss": 0.5147, "step": 4133 }, { "epoch": 0.11350906095551895, "grad_norm": 0.3658965229988098, "learning_rate": 1.9848685238916303e-05, "loss": 0.5536, "step": 4134 }, { "epoch": 0.11353651839648544, "grad_norm": 0.36154550313949585, "learning_rate": 1.984861038123167e-05, "loss": 0.6287, "step": 4135 }, { "epoch": 0.11356397583745195, "grad_norm": 0.3354254364967346, "learning_rate": 1.9848535505176234e-05, "loss": 0.5009, "step": 4136 }, { "epoch": 0.11359143327841845, "grad_norm": 0.325506329536438, "learning_rate": 1.9848460610750137e-05, "loss": 0.5108, "step": 4137 }, { "epoch": 0.11361889071938495, "grad_norm": 0.364805668592453, "learning_rate": 1.984838569795352e-05, "loss": 0.5594, "step": 4138 }, { "epoch": 0.11364634816035145, "grad_norm": 0.3829098343849182, "learning_rate": 1.9848310766786518e-05, "loss": 0.6202, "step": 4139 }, { "epoch": 0.11367380560131796, "grad_norm": 0.3727422058582306, "learning_rate": 1.9848235817249273e-05, "loss": 0.5811, "step": 4140 }, { "epoch": 0.11370126304228446, "grad_norm": 0.3694833815097809, "learning_rate": 1.9848160849341927e-05, "loss": 0.5648, "step": 4141 }, { "epoch": 0.11372872048325096, "grad_norm": 0.3513180613517761, "learning_rate": 1.9848085863064614e-05, "loss": 0.5173, "step": 4142 }, { "epoch": 0.11375617792421747, "grad_norm": 0.35888150334358215, "learning_rate": 1.984801085841748e-05, "loss": 0.5116, "step": 4143 }, { "epoch": 0.11378363536518396, "grad_norm": 0.390251100063324, "learning_rate": 1.9847935835400663e-05, "loss": 0.5636, "step": 4144 }, { "epoch": 0.11381109280615047, "grad_norm": 0.3496544659137726, "learning_rate": 1.9847860794014303e-05, "loss": 0.5625, "step": 4145 }, { "epoch": 0.11383855024711696, "grad_norm": 0.3689925968647003, "learning_rate": 1.984778573425854e-05, "loss": 0.5296, "step": 4146 }, { "epoch": 0.11386600768808347, "grad_norm": 0.3790142238140106, "learning_rate": 1.984771065613351e-05, "loss": 0.5849, "step": 4147 }, { "epoch": 0.11389346512904998, "grad_norm": 0.3604047894477844, "learning_rate": 1.984763555963936e-05, "loss": 0.5147, "step": 4148 }, { "epoch": 0.11392092257001647, "grad_norm": 0.5891048908233643, "learning_rate": 1.9847560444776225e-05, "loss": 0.5687, "step": 4149 }, { "epoch": 0.11394838001098298, "grad_norm": 0.35904812812805176, "learning_rate": 1.984748531154425e-05, "loss": 0.5261, "step": 4150 }, { "epoch": 0.11397583745194947, "grad_norm": 0.33957844972610474, "learning_rate": 1.9847410159943568e-05, "loss": 0.5035, "step": 4151 }, { "epoch": 0.11400329489291598, "grad_norm": 0.3630381226539612, "learning_rate": 1.9847334989974323e-05, "loss": 0.6038, "step": 4152 }, { "epoch": 0.11403075233388248, "grad_norm": 0.34112903475761414, "learning_rate": 1.9847259801636657e-05, "loss": 0.5199, "step": 4153 }, { "epoch": 0.11405820977484898, "grad_norm": 0.34171876311302185, "learning_rate": 1.984718459493071e-05, "loss": 0.5574, "step": 4154 }, { "epoch": 0.11408566721581549, "grad_norm": 0.29017317295074463, "learning_rate": 1.9847109369856618e-05, "loss": 0.3992, "step": 4155 }, { "epoch": 0.11411312465678199, "grad_norm": 0.38415712118148804, "learning_rate": 1.9847034126414525e-05, "loss": 0.6872, "step": 4156 }, { "epoch": 0.1141405820977485, "grad_norm": 0.334358811378479, "learning_rate": 1.984695886460457e-05, "loss": 0.5041, "step": 4157 }, { "epoch": 0.11416803953871499, "grad_norm": 0.4600684642791748, "learning_rate": 1.9846883584426897e-05, "loss": 0.4959, "step": 4158 }, { "epoch": 0.1141954969796815, "grad_norm": 0.3284221589565277, "learning_rate": 1.984680828588164e-05, "loss": 0.5551, "step": 4159 }, { "epoch": 0.11422295442064799, "grad_norm": 0.37273934483528137, "learning_rate": 1.9846732968968946e-05, "loss": 0.6048, "step": 4160 }, { "epoch": 0.1142504118616145, "grad_norm": 0.5212000608444214, "learning_rate": 1.984665763368895e-05, "loss": 0.5511, "step": 4161 }, { "epoch": 0.114277869302581, "grad_norm": 0.36044690012931824, "learning_rate": 1.9846582280041796e-05, "loss": 0.5282, "step": 4162 }, { "epoch": 0.1143053267435475, "grad_norm": 0.32257845997810364, "learning_rate": 1.984650690802762e-05, "loss": 0.482, "step": 4163 }, { "epoch": 0.11433278418451401, "grad_norm": 0.34769800305366516, "learning_rate": 1.9846431517646573e-05, "loss": 0.4921, "step": 4164 }, { "epoch": 0.1143602416254805, "grad_norm": 0.4396451711654663, "learning_rate": 1.9846356108898784e-05, "loss": 0.5081, "step": 4165 }, { "epoch": 0.11438769906644701, "grad_norm": 1.6036192178726196, "learning_rate": 1.9846280681784397e-05, "loss": 0.5874, "step": 4166 }, { "epoch": 0.1144151565074135, "grad_norm": 0.35703063011169434, "learning_rate": 1.9846205236303554e-05, "loss": 0.4805, "step": 4167 }, { "epoch": 0.11444261394838001, "grad_norm": 0.4357331097126007, "learning_rate": 1.9846129772456398e-05, "loss": 0.535, "step": 4168 }, { "epoch": 0.1144700713893465, "grad_norm": 0.35103943943977356, "learning_rate": 1.9846054290243067e-05, "loss": 0.5722, "step": 4169 }, { "epoch": 0.11449752883031301, "grad_norm": 0.41969531774520874, "learning_rate": 1.98459787896637e-05, "loss": 0.5568, "step": 4170 }, { "epoch": 0.11452498627127952, "grad_norm": 0.6014300584793091, "learning_rate": 1.9845903270718443e-05, "loss": 0.5552, "step": 4171 }, { "epoch": 0.11455244371224602, "grad_norm": 0.5062885284423828, "learning_rate": 1.984582773340743e-05, "loss": 0.454, "step": 4172 }, { "epoch": 0.11457990115321252, "grad_norm": 0.38537123799324036, "learning_rate": 1.984575217773081e-05, "loss": 0.5902, "step": 4173 }, { "epoch": 0.11460735859417902, "grad_norm": 0.3320772647857666, "learning_rate": 1.984567660368872e-05, "loss": 0.5944, "step": 4174 }, { "epoch": 0.11463481603514553, "grad_norm": 0.37333858013153076, "learning_rate": 1.9845601011281298e-05, "loss": 0.5906, "step": 4175 }, { "epoch": 0.11466227347611202, "grad_norm": 0.35021984577178955, "learning_rate": 1.984552540050869e-05, "loss": 0.5863, "step": 4176 }, { "epoch": 0.11468973091707853, "grad_norm": 0.3794737160205841, "learning_rate": 1.9845449771371033e-05, "loss": 0.6478, "step": 4177 }, { "epoch": 0.11471718835804504, "grad_norm": 0.3535974323749542, "learning_rate": 1.9845374123868472e-05, "loss": 0.5139, "step": 4178 }, { "epoch": 0.11474464579901153, "grad_norm": 0.3855864405632019, "learning_rate": 1.9845298458001146e-05, "loss": 0.5048, "step": 4179 }, { "epoch": 0.11477210323997804, "grad_norm": 0.3546598255634308, "learning_rate": 1.9845222773769196e-05, "loss": 0.5746, "step": 4180 }, { "epoch": 0.11479956068094453, "grad_norm": 0.3276958763599396, "learning_rate": 1.984514707117276e-05, "loss": 0.4756, "step": 4181 }, { "epoch": 0.11482701812191104, "grad_norm": 0.34229776263237, "learning_rate": 1.9845071350211984e-05, "loss": 0.5029, "step": 4182 }, { "epoch": 0.11485447556287753, "grad_norm": 0.3287321925163269, "learning_rate": 1.984499561088701e-05, "loss": 0.4635, "step": 4183 }, { "epoch": 0.11488193300384404, "grad_norm": 0.3378652036190033, "learning_rate": 1.9844919853197974e-05, "loss": 0.5311, "step": 4184 }, { "epoch": 0.11490939044481055, "grad_norm": 0.4110613763332367, "learning_rate": 1.9844844077145024e-05, "loss": 0.5638, "step": 4185 }, { "epoch": 0.11493684788577704, "grad_norm": 0.47741952538490295, "learning_rate": 1.9844768282728294e-05, "loss": 0.5849, "step": 4186 }, { "epoch": 0.11496430532674355, "grad_norm": 0.3623121678829193, "learning_rate": 1.984469246994793e-05, "loss": 0.5333, "step": 4187 }, { "epoch": 0.11499176276771005, "grad_norm": 0.39770805835723877, "learning_rate": 1.9844616638804073e-05, "loss": 0.5667, "step": 4188 }, { "epoch": 0.11501922020867655, "grad_norm": 0.3497883677482605, "learning_rate": 1.9844540789296863e-05, "loss": 0.4807, "step": 4189 }, { "epoch": 0.11504667764964305, "grad_norm": 0.3554711937904358, "learning_rate": 1.9844464921426443e-05, "loss": 0.5624, "step": 4190 }, { "epoch": 0.11507413509060956, "grad_norm": 0.3578208088874817, "learning_rate": 1.9844389035192954e-05, "loss": 0.5449, "step": 4191 }, { "epoch": 0.11510159253157606, "grad_norm": 0.3598921597003937, "learning_rate": 1.984431313059654e-05, "loss": 0.5522, "step": 4192 }, { "epoch": 0.11512904997254256, "grad_norm": 0.3346679210662842, "learning_rate": 1.9844237207637337e-05, "loss": 0.4762, "step": 4193 }, { "epoch": 0.11515650741350907, "grad_norm": 0.46697360277175903, "learning_rate": 1.984416126631549e-05, "loss": 0.5563, "step": 4194 }, { "epoch": 0.11518396485447556, "grad_norm": 0.3646450638771057, "learning_rate": 1.9844085306631142e-05, "loss": 0.5205, "step": 4195 }, { "epoch": 0.11521142229544207, "grad_norm": 0.35714468359947205, "learning_rate": 1.984400932858443e-05, "loss": 0.5075, "step": 4196 }, { "epoch": 0.11523887973640856, "grad_norm": 0.3419697880744934, "learning_rate": 1.9843933332175502e-05, "loss": 0.5096, "step": 4197 }, { "epoch": 0.11526633717737507, "grad_norm": 0.35312512516975403, "learning_rate": 1.9843857317404496e-05, "loss": 0.4845, "step": 4198 }, { "epoch": 0.11529379461834158, "grad_norm": 0.413107305765152, "learning_rate": 1.984378128427155e-05, "loss": 0.6427, "step": 4199 }, { "epoch": 0.11532125205930807, "grad_norm": 0.36925172805786133, "learning_rate": 1.9843705232776815e-05, "loss": 0.5237, "step": 4200 }, { "epoch": 0.11534870950027458, "grad_norm": 0.35858896374702454, "learning_rate": 1.9843629162920428e-05, "loss": 0.5457, "step": 4201 }, { "epoch": 0.11537616694124107, "grad_norm": 0.681543231010437, "learning_rate": 1.9843553074702528e-05, "loss": 0.4684, "step": 4202 }, { "epoch": 0.11540362438220758, "grad_norm": 0.424679696559906, "learning_rate": 1.9843476968123265e-05, "loss": 0.5888, "step": 4203 }, { "epoch": 0.11543108182317408, "grad_norm": 0.3636133670806885, "learning_rate": 1.984340084318277e-05, "loss": 0.4717, "step": 4204 }, { "epoch": 0.11545853926414058, "grad_norm": 0.46361660957336426, "learning_rate": 1.9843324699881196e-05, "loss": 0.555, "step": 4205 }, { "epoch": 0.11548599670510708, "grad_norm": 0.3949008285999298, "learning_rate": 1.9843248538218675e-05, "loss": 0.5723, "step": 4206 }, { "epoch": 0.11551345414607359, "grad_norm": 0.33406275510787964, "learning_rate": 1.9843172358195356e-05, "loss": 0.5598, "step": 4207 }, { "epoch": 0.1155409115870401, "grad_norm": 0.6197773814201355, "learning_rate": 1.9843096159811375e-05, "loss": 0.5663, "step": 4208 }, { "epoch": 0.11556836902800659, "grad_norm": 0.44938984513282776, "learning_rate": 1.9843019943066885e-05, "loss": 0.6081, "step": 4209 }, { "epoch": 0.1155958264689731, "grad_norm": 0.3471980392932892, "learning_rate": 1.9842943707962016e-05, "loss": 0.4499, "step": 4210 }, { "epoch": 0.11562328390993959, "grad_norm": 0.37636837363243103, "learning_rate": 1.984286745449692e-05, "loss": 0.5757, "step": 4211 }, { "epoch": 0.1156507413509061, "grad_norm": 0.3579261004924774, "learning_rate": 1.984279118267173e-05, "loss": 0.5022, "step": 4212 }, { "epoch": 0.11567819879187259, "grad_norm": 0.3555963337421417, "learning_rate": 1.9842714892486596e-05, "loss": 0.5444, "step": 4213 }, { "epoch": 0.1157056562328391, "grad_norm": 0.32961294054985046, "learning_rate": 1.9842638583941657e-05, "loss": 0.4274, "step": 4214 }, { "epoch": 0.11573311367380561, "grad_norm": 0.35115379095077515, "learning_rate": 1.9842562257037055e-05, "loss": 0.5778, "step": 4215 }, { "epoch": 0.1157605711147721, "grad_norm": 0.4391459822654724, "learning_rate": 1.984248591177293e-05, "loss": 0.5783, "step": 4216 }, { "epoch": 0.11578802855573861, "grad_norm": 0.3486507534980774, "learning_rate": 1.984240954814943e-05, "loss": 0.5172, "step": 4217 }, { "epoch": 0.1158154859967051, "grad_norm": 0.3970213234424591, "learning_rate": 1.9842333166166697e-05, "loss": 0.4913, "step": 4218 }, { "epoch": 0.11584294343767161, "grad_norm": 0.3639698326587677, "learning_rate": 1.984225676582487e-05, "loss": 0.5213, "step": 4219 }, { "epoch": 0.1158704008786381, "grad_norm": 0.34656840562820435, "learning_rate": 1.9842180347124093e-05, "loss": 0.4985, "step": 4220 }, { "epoch": 0.11589785831960461, "grad_norm": 0.46203258633613586, "learning_rate": 1.9842103910064507e-05, "loss": 0.6297, "step": 4221 }, { "epoch": 0.11592531576057112, "grad_norm": 0.40180420875549316, "learning_rate": 1.9842027454646257e-05, "loss": 0.5677, "step": 4222 }, { "epoch": 0.11595277320153762, "grad_norm": 0.4292154908180237, "learning_rate": 1.9841950980869487e-05, "loss": 0.5861, "step": 4223 }, { "epoch": 0.11598023064250412, "grad_norm": 0.38251960277557373, "learning_rate": 1.984187448873433e-05, "loss": 0.5959, "step": 4224 }, { "epoch": 0.11600768808347062, "grad_norm": 0.39926809072494507, "learning_rate": 1.984179797824094e-05, "loss": 0.5663, "step": 4225 }, { "epoch": 0.11603514552443712, "grad_norm": 0.3891090750694275, "learning_rate": 1.984172144938946e-05, "loss": 0.543, "step": 4226 }, { "epoch": 0.11606260296540362, "grad_norm": 0.35699373483657837, "learning_rate": 1.9841644902180024e-05, "loss": 0.5225, "step": 4227 }, { "epoch": 0.11609006040637013, "grad_norm": 0.3626454472541809, "learning_rate": 1.984156833661278e-05, "loss": 0.5572, "step": 4228 }, { "epoch": 0.11611751784733663, "grad_norm": 0.3828919529914856, "learning_rate": 1.984149175268787e-05, "loss": 0.623, "step": 4229 }, { "epoch": 0.11614497528830313, "grad_norm": 0.36768749356269836, "learning_rate": 1.9841415150405435e-05, "loss": 0.5828, "step": 4230 }, { "epoch": 0.11617243272926964, "grad_norm": 0.3888963460922241, "learning_rate": 1.9841338529765623e-05, "loss": 0.5318, "step": 4231 }, { "epoch": 0.11619989017023613, "grad_norm": 0.3801022469997406, "learning_rate": 1.984126189076857e-05, "loss": 0.5379, "step": 4232 }, { "epoch": 0.11622734761120264, "grad_norm": 0.40141505002975464, "learning_rate": 1.9841185233414428e-05, "loss": 0.5517, "step": 4233 }, { "epoch": 0.11625480505216913, "grad_norm": 0.38527941703796387, "learning_rate": 1.984110855770333e-05, "loss": 0.4709, "step": 4234 }, { "epoch": 0.11628226249313564, "grad_norm": 0.3471679389476776, "learning_rate": 1.9841031863635423e-05, "loss": 0.5576, "step": 4235 }, { "epoch": 0.11630971993410213, "grad_norm": 0.3640578091144562, "learning_rate": 1.9840955151210854e-05, "loss": 0.5199, "step": 4236 }, { "epoch": 0.11633717737506864, "grad_norm": 0.41536715626716614, "learning_rate": 1.9840878420429762e-05, "loss": 0.6857, "step": 4237 }, { "epoch": 0.11636463481603515, "grad_norm": 0.3798644542694092, "learning_rate": 1.984080167129229e-05, "loss": 0.5524, "step": 4238 }, { "epoch": 0.11639209225700164, "grad_norm": 0.3628763258457184, "learning_rate": 1.984072490379858e-05, "loss": 0.5834, "step": 4239 }, { "epoch": 0.11641954969796815, "grad_norm": 0.42631757259368896, "learning_rate": 1.984064811794878e-05, "loss": 0.5408, "step": 4240 }, { "epoch": 0.11644700713893465, "grad_norm": 0.34816816449165344, "learning_rate": 1.984057131374303e-05, "loss": 0.5024, "step": 4241 }, { "epoch": 0.11647446457990115, "grad_norm": 0.3807898163795471, "learning_rate": 1.9840494491181475e-05, "loss": 0.5229, "step": 4242 }, { "epoch": 0.11650192202086765, "grad_norm": 0.3971134424209595, "learning_rate": 1.9840417650264257e-05, "loss": 0.5361, "step": 4243 }, { "epoch": 0.11652937946183416, "grad_norm": 0.37945255637168884, "learning_rate": 1.9840340790991516e-05, "loss": 0.5797, "step": 4244 }, { "epoch": 0.11655683690280066, "grad_norm": 0.40804168581962585, "learning_rate": 1.9840263913363402e-05, "loss": 0.5351, "step": 4245 }, { "epoch": 0.11658429434376716, "grad_norm": 0.3872681260108948, "learning_rate": 1.9840187017380054e-05, "loss": 0.6222, "step": 4246 }, { "epoch": 0.11661175178473367, "grad_norm": 0.34761250019073486, "learning_rate": 1.984011010304162e-05, "loss": 0.5602, "step": 4247 }, { "epoch": 0.11663920922570016, "grad_norm": 0.37196213006973267, "learning_rate": 1.9840033170348233e-05, "loss": 0.5545, "step": 4248 }, { "epoch": 0.11666666666666667, "grad_norm": 0.3382221758365631, "learning_rate": 1.9839956219300048e-05, "loss": 0.5562, "step": 4249 }, { "epoch": 0.11669412410763316, "grad_norm": 0.34125590324401855, "learning_rate": 1.9839879249897205e-05, "loss": 0.5171, "step": 4250 }, { "epoch": 0.11672158154859967, "grad_norm": 0.33845072984695435, "learning_rate": 1.9839802262139846e-05, "loss": 0.6263, "step": 4251 }, { "epoch": 0.11674903898956618, "grad_norm": 0.3664212226867676, "learning_rate": 1.9839725256028113e-05, "loss": 0.614, "step": 4252 }, { "epoch": 0.11677649643053267, "grad_norm": 0.4009544849395752, "learning_rate": 1.9839648231562152e-05, "loss": 0.6154, "step": 4253 }, { "epoch": 0.11680395387149918, "grad_norm": 0.3520594537258148, "learning_rate": 1.9839571188742108e-05, "loss": 0.5777, "step": 4254 }, { "epoch": 0.11683141131246567, "grad_norm": 0.4419446587562561, "learning_rate": 1.9839494127568124e-05, "loss": 0.5154, "step": 4255 }, { "epoch": 0.11685886875343218, "grad_norm": 0.3649204969406128, "learning_rate": 1.9839417048040343e-05, "loss": 0.5492, "step": 4256 }, { "epoch": 0.11688632619439868, "grad_norm": 0.3953251540660858, "learning_rate": 1.9839339950158905e-05, "loss": 0.6009, "step": 4257 }, { "epoch": 0.11691378363536518, "grad_norm": 0.3759520351886749, "learning_rate": 1.983926283392396e-05, "loss": 0.5521, "step": 4258 }, { "epoch": 0.11694124107633169, "grad_norm": 0.4148910939693451, "learning_rate": 1.9839185699335653e-05, "loss": 0.4753, "step": 4259 }, { "epoch": 0.11696869851729819, "grad_norm": 0.34228071570396423, "learning_rate": 1.983910854639412e-05, "loss": 0.527, "step": 4260 }, { "epoch": 0.1169961559582647, "grad_norm": 0.3581376373767853, "learning_rate": 1.9839031375099514e-05, "loss": 0.5292, "step": 4261 }, { "epoch": 0.11702361339923119, "grad_norm": 0.3850286900997162, "learning_rate": 1.9838954185451967e-05, "loss": 0.6341, "step": 4262 }, { "epoch": 0.1170510708401977, "grad_norm": 0.3255082666873932, "learning_rate": 1.983887697745164e-05, "loss": 0.5299, "step": 4263 }, { "epoch": 0.11707852828116419, "grad_norm": 0.3631250560283661, "learning_rate": 1.9838799751098657e-05, "loss": 0.5468, "step": 4264 }, { "epoch": 0.1171059857221307, "grad_norm": 0.36529797315597534, "learning_rate": 1.9838722506393176e-05, "loss": 0.6234, "step": 4265 }, { "epoch": 0.1171334431630972, "grad_norm": 0.36167314648628235, "learning_rate": 1.983864524333534e-05, "loss": 0.4918, "step": 4266 }, { "epoch": 0.1171609006040637, "grad_norm": 0.3611524701118469, "learning_rate": 1.983856796192529e-05, "loss": 0.5664, "step": 4267 }, { "epoch": 0.11718835804503021, "grad_norm": 0.3888254761695862, "learning_rate": 1.9838490662163167e-05, "loss": 0.4715, "step": 4268 }, { "epoch": 0.1172158154859967, "grad_norm": 0.4659492075443268, "learning_rate": 1.9838413344049122e-05, "loss": 0.5449, "step": 4269 }, { "epoch": 0.11724327292696321, "grad_norm": 0.40978166460990906, "learning_rate": 1.9838336007583298e-05, "loss": 0.6012, "step": 4270 }, { "epoch": 0.1172707303679297, "grad_norm": 0.3967670798301697, "learning_rate": 1.9838258652765834e-05, "loss": 0.5914, "step": 4271 }, { "epoch": 0.11729818780889621, "grad_norm": 0.3783209025859833, "learning_rate": 1.983818127959688e-05, "loss": 0.5871, "step": 4272 }, { "epoch": 0.1173256452498627, "grad_norm": 0.3439953029155731, "learning_rate": 1.9838103888076573e-05, "loss": 0.5523, "step": 4273 }, { "epoch": 0.11735310269082921, "grad_norm": 0.3168397545814514, "learning_rate": 1.9838026478205064e-05, "loss": 0.5542, "step": 4274 }, { "epoch": 0.11738056013179572, "grad_norm": 0.3501748740673065, "learning_rate": 1.98379490499825e-05, "loss": 0.5215, "step": 4275 }, { "epoch": 0.11740801757276222, "grad_norm": 0.3594259023666382, "learning_rate": 1.9837871603409015e-05, "loss": 0.5607, "step": 4276 }, { "epoch": 0.11743547501372872, "grad_norm": 0.3451467454433441, "learning_rate": 1.9837794138484763e-05, "loss": 0.471, "step": 4277 }, { "epoch": 0.11746293245469522, "grad_norm": 0.36498814821243286, "learning_rate": 1.9837716655209887e-05, "loss": 0.5177, "step": 4278 }, { "epoch": 0.11749038989566173, "grad_norm": 0.3491741120815277, "learning_rate": 1.9837639153584524e-05, "loss": 0.4916, "step": 4279 }, { "epoch": 0.11751784733662822, "grad_norm": 0.43015438318252563, "learning_rate": 1.9837561633608828e-05, "loss": 0.636, "step": 4280 }, { "epoch": 0.11754530477759473, "grad_norm": 0.3575936555862427, "learning_rate": 1.983748409528294e-05, "loss": 0.457, "step": 4281 }, { "epoch": 0.11757276221856124, "grad_norm": 0.4023541808128357, "learning_rate": 1.9837406538607005e-05, "loss": 0.5836, "step": 4282 }, { "epoch": 0.11760021965952773, "grad_norm": 0.37698858976364136, "learning_rate": 1.9837328963581164e-05, "loss": 0.6033, "step": 4283 }, { "epoch": 0.11762767710049424, "grad_norm": 0.32033872604370117, "learning_rate": 1.983725137020557e-05, "loss": 0.5361, "step": 4284 }, { "epoch": 0.11765513454146073, "grad_norm": 0.33542105555534363, "learning_rate": 1.983717375848036e-05, "loss": 0.4519, "step": 4285 }, { "epoch": 0.11768259198242724, "grad_norm": 0.3028580844402313, "learning_rate": 1.9837096128405678e-05, "loss": 0.515, "step": 4286 }, { "epoch": 0.11771004942339373, "grad_norm": 0.3386352062225342, "learning_rate": 1.9837018479981675e-05, "loss": 0.5304, "step": 4287 }, { "epoch": 0.11773750686436024, "grad_norm": 0.38026896119117737, "learning_rate": 1.983694081320849e-05, "loss": 0.6108, "step": 4288 }, { "epoch": 0.11776496430532675, "grad_norm": 0.3676658868789673, "learning_rate": 1.9836863128086274e-05, "loss": 0.5904, "step": 4289 }, { "epoch": 0.11779242174629324, "grad_norm": 0.35059982538223267, "learning_rate": 1.983678542461517e-05, "loss": 0.5941, "step": 4290 }, { "epoch": 0.11781987918725975, "grad_norm": 0.33458882570266724, "learning_rate": 1.983670770279532e-05, "loss": 0.5331, "step": 4291 }, { "epoch": 0.11784733662822625, "grad_norm": 0.33370712399482727, "learning_rate": 1.983662996262687e-05, "loss": 0.4313, "step": 4292 }, { "epoch": 0.11787479406919275, "grad_norm": 0.3713378608226776, "learning_rate": 1.9836552204109964e-05, "loss": 0.536, "step": 4293 }, { "epoch": 0.11790225151015925, "grad_norm": 0.3809208571910858, "learning_rate": 1.983647442724475e-05, "loss": 0.5844, "step": 4294 }, { "epoch": 0.11792970895112576, "grad_norm": 0.34019288420677185, "learning_rate": 1.983639663203137e-05, "loss": 0.6045, "step": 4295 }, { "epoch": 0.11795716639209226, "grad_norm": 0.41290023922920227, "learning_rate": 1.9836318818469978e-05, "loss": 0.7355, "step": 4296 }, { "epoch": 0.11798462383305876, "grad_norm": 0.3842349350452423, "learning_rate": 1.9836240986560705e-05, "loss": 0.6305, "step": 4297 }, { "epoch": 0.11801208127402527, "grad_norm": 0.41502055525779724, "learning_rate": 1.9836163136303705e-05, "loss": 0.5435, "step": 4298 }, { "epoch": 0.11803953871499176, "grad_norm": 0.3837473690509796, "learning_rate": 1.9836085267699122e-05, "loss": 0.5609, "step": 4299 }, { "epoch": 0.11806699615595827, "grad_norm": 0.3647226393222809, "learning_rate": 1.98360073807471e-05, "loss": 0.5895, "step": 4300 }, { "epoch": 0.11809445359692476, "grad_norm": 0.33923929929733276, "learning_rate": 1.9835929475447786e-05, "loss": 0.5228, "step": 4301 }, { "epoch": 0.11812191103789127, "grad_norm": 0.3621585965156555, "learning_rate": 1.9835851551801325e-05, "loss": 0.5352, "step": 4302 }, { "epoch": 0.11814936847885776, "grad_norm": 0.3683701455593109, "learning_rate": 1.983577360980786e-05, "loss": 0.6076, "step": 4303 }, { "epoch": 0.11817682591982427, "grad_norm": 0.3310531675815582, "learning_rate": 1.9835695649467537e-05, "loss": 0.4872, "step": 4304 }, { "epoch": 0.11820428336079078, "grad_norm": 0.3872511386871338, "learning_rate": 1.9835617670780506e-05, "loss": 0.5628, "step": 4305 }, { "epoch": 0.11823174080175727, "grad_norm": 0.38014519214630127, "learning_rate": 1.9835539673746903e-05, "loss": 0.5659, "step": 4306 }, { "epoch": 0.11825919824272378, "grad_norm": 0.3512997627258301, "learning_rate": 1.9835461658366882e-05, "loss": 0.5363, "step": 4307 }, { "epoch": 0.11828665568369028, "grad_norm": 0.3635537922382355, "learning_rate": 1.9835383624640588e-05, "loss": 0.4473, "step": 4308 }, { "epoch": 0.11831411312465678, "grad_norm": 0.38315892219543457, "learning_rate": 1.9835305572568163e-05, "loss": 0.5494, "step": 4309 }, { "epoch": 0.11834157056562328, "grad_norm": 0.33772698044776917, "learning_rate": 1.9835227502149754e-05, "loss": 0.5098, "step": 4310 }, { "epoch": 0.11836902800658979, "grad_norm": 0.30654942989349365, "learning_rate": 1.983514941338551e-05, "loss": 0.5279, "step": 4311 }, { "epoch": 0.1183964854475563, "grad_norm": 0.49251797795295715, "learning_rate": 1.9835071306275567e-05, "loss": 0.5271, "step": 4312 }, { "epoch": 0.11842394288852279, "grad_norm": 0.3411617577075958, "learning_rate": 1.983499318082008e-05, "loss": 0.4674, "step": 4313 }, { "epoch": 0.1184514003294893, "grad_norm": 0.3665089011192322, "learning_rate": 1.9834915037019192e-05, "loss": 0.5278, "step": 4314 }, { "epoch": 0.11847885777045579, "grad_norm": 1.7441482543945312, "learning_rate": 1.983483687487305e-05, "loss": 0.5092, "step": 4315 }, { "epoch": 0.1185063152114223, "grad_norm": 0.37579283118247986, "learning_rate": 1.9834758694381798e-05, "loss": 0.5228, "step": 4316 }, { "epoch": 0.11853377265238879, "grad_norm": 0.3992732763290405, "learning_rate": 1.983468049554558e-05, "loss": 0.6188, "step": 4317 }, { "epoch": 0.1185612300933553, "grad_norm": 0.36934909224510193, "learning_rate": 1.9834602278364544e-05, "loss": 0.565, "step": 4318 }, { "epoch": 0.11858868753432181, "grad_norm": 0.39809319376945496, "learning_rate": 1.9834524042838837e-05, "loss": 0.5739, "step": 4319 }, { "epoch": 0.1186161449752883, "grad_norm": 0.37034982442855835, "learning_rate": 1.9834445788968602e-05, "loss": 0.5429, "step": 4320 }, { "epoch": 0.11864360241625481, "grad_norm": 0.3836788833141327, "learning_rate": 1.9834367516753992e-05, "loss": 0.4707, "step": 4321 }, { "epoch": 0.1186710598572213, "grad_norm": 0.37811779975891113, "learning_rate": 1.983428922619514e-05, "loss": 0.5938, "step": 4322 }, { "epoch": 0.11869851729818781, "grad_norm": 0.3261350095272064, "learning_rate": 1.9834210917292207e-05, "loss": 0.5618, "step": 4323 }, { "epoch": 0.1187259747391543, "grad_norm": 0.36373066902160645, "learning_rate": 1.983413259004533e-05, "loss": 0.5529, "step": 4324 }, { "epoch": 0.11875343218012081, "grad_norm": 0.4428917467594147, "learning_rate": 1.9834054244454655e-05, "loss": 0.6149, "step": 4325 }, { "epoch": 0.11878088962108732, "grad_norm": 0.3130910396575928, "learning_rate": 1.9833975880520332e-05, "loss": 0.5231, "step": 4326 }, { "epoch": 0.11880834706205382, "grad_norm": 0.349582314491272, "learning_rate": 1.9833897498242508e-05, "loss": 0.5839, "step": 4327 }, { "epoch": 0.11883580450302032, "grad_norm": 0.33817189931869507, "learning_rate": 1.983381909762132e-05, "loss": 0.5281, "step": 4328 }, { "epoch": 0.11886326194398682, "grad_norm": 0.40204575657844543, "learning_rate": 1.9833740678656925e-05, "loss": 0.5875, "step": 4329 }, { "epoch": 0.11889071938495333, "grad_norm": 0.37649425864219666, "learning_rate": 1.9833662241349464e-05, "loss": 0.5096, "step": 4330 }, { "epoch": 0.11891817682591982, "grad_norm": 0.4050710201263428, "learning_rate": 1.9833583785699084e-05, "loss": 0.5159, "step": 4331 }, { "epoch": 0.11894563426688633, "grad_norm": 0.3602672219276428, "learning_rate": 1.9833505311705932e-05, "loss": 0.544, "step": 4332 }, { "epoch": 0.11897309170785283, "grad_norm": 0.3396686315536499, "learning_rate": 1.9833426819370156e-05, "loss": 0.5009, "step": 4333 }, { "epoch": 0.11900054914881933, "grad_norm": 0.3839317262172699, "learning_rate": 1.9833348308691898e-05, "loss": 0.5251, "step": 4334 }, { "epoch": 0.11902800658978584, "grad_norm": 0.37074077129364014, "learning_rate": 1.9833269779671308e-05, "loss": 0.6582, "step": 4335 }, { "epoch": 0.11905546403075233, "grad_norm": 0.4075047969818115, "learning_rate": 1.983319123230853e-05, "loss": 0.6552, "step": 4336 }, { "epoch": 0.11908292147171884, "grad_norm": 0.35695329308509827, "learning_rate": 1.9833112666603712e-05, "loss": 0.5325, "step": 4337 }, { "epoch": 0.11911037891268533, "grad_norm": 0.37662944197654724, "learning_rate": 1.9833034082557002e-05, "loss": 0.4535, "step": 4338 }, { "epoch": 0.11913783635365184, "grad_norm": 0.36760398745536804, "learning_rate": 1.9832955480168546e-05, "loss": 0.6185, "step": 4339 }, { "epoch": 0.11916529379461833, "grad_norm": 0.39729052782058716, "learning_rate": 1.983287685943849e-05, "loss": 0.5513, "step": 4340 }, { "epoch": 0.11919275123558484, "grad_norm": 0.33333903551101685, "learning_rate": 1.9832798220366977e-05, "loss": 0.5742, "step": 4341 }, { "epoch": 0.11922020867655135, "grad_norm": 0.36943915486335754, "learning_rate": 1.9832719562954163e-05, "loss": 0.5614, "step": 4342 }, { "epoch": 0.11924766611751784, "grad_norm": 0.3326357305049896, "learning_rate": 1.9832640887200185e-05, "loss": 0.5154, "step": 4343 }, { "epoch": 0.11927512355848435, "grad_norm": 0.36940327286720276, "learning_rate": 1.9832562193105192e-05, "loss": 0.5329, "step": 4344 }, { "epoch": 0.11930258099945085, "grad_norm": 0.412171870470047, "learning_rate": 1.983248348066933e-05, "loss": 0.5513, "step": 4345 }, { "epoch": 0.11933003844041735, "grad_norm": 0.3785339891910553, "learning_rate": 1.9832404749892753e-05, "loss": 0.6072, "step": 4346 }, { "epoch": 0.11935749588138385, "grad_norm": 0.35041067004203796, "learning_rate": 1.9832326000775602e-05, "loss": 0.4891, "step": 4347 }, { "epoch": 0.11938495332235036, "grad_norm": 0.3763292133808136, "learning_rate": 1.9832247233318025e-05, "loss": 0.5586, "step": 4348 }, { "epoch": 0.11941241076331686, "grad_norm": 0.39175355434417725, "learning_rate": 1.983216844752017e-05, "loss": 0.58, "step": 4349 }, { "epoch": 0.11943986820428336, "grad_norm": 0.393155574798584, "learning_rate": 1.983208964338218e-05, "loss": 0.5176, "step": 4350 }, { "epoch": 0.11946732564524987, "grad_norm": 0.5005965828895569, "learning_rate": 1.9832010820904207e-05, "loss": 0.5727, "step": 4351 }, { "epoch": 0.11949478308621636, "grad_norm": 0.32930856943130493, "learning_rate": 1.9831931980086394e-05, "loss": 0.5441, "step": 4352 }, { "epoch": 0.11952224052718287, "grad_norm": 0.3729868233203888, "learning_rate": 1.983185312092889e-05, "loss": 0.5334, "step": 4353 }, { "epoch": 0.11954969796814936, "grad_norm": 0.37040573358535767, "learning_rate": 1.9831774243431842e-05, "loss": 0.5134, "step": 4354 }, { "epoch": 0.11957715540911587, "grad_norm": 0.4061732292175293, "learning_rate": 1.9831695347595398e-05, "loss": 0.5631, "step": 4355 }, { "epoch": 0.11960461285008238, "grad_norm": 0.3730664551258087, "learning_rate": 1.9831616433419704e-05, "loss": 0.5213, "step": 4356 }, { "epoch": 0.11963207029104887, "grad_norm": 0.3753736615180969, "learning_rate": 1.9831537500904906e-05, "loss": 0.5465, "step": 4357 }, { "epoch": 0.11965952773201538, "grad_norm": 0.34763067960739136, "learning_rate": 1.9831458550051158e-05, "loss": 0.5877, "step": 4358 }, { "epoch": 0.11968698517298187, "grad_norm": 0.4155239462852478, "learning_rate": 1.9831379580858597e-05, "loss": 0.5935, "step": 4359 }, { "epoch": 0.11971444261394838, "grad_norm": 0.3937745988368988, "learning_rate": 1.9831300593327377e-05, "loss": 0.539, "step": 4360 }, { "epoch": 0.11974190005491488, "grad_norm": 0.34593167901039124, "learning_rate": 1.9831221587457642e-05, "loss": 0.5118, "step": 4361 }, { "epoch": 0.11976935749588138, "grad_norm": 0.348603755235672, "learning_rate": 1.9831142563249543e-05, "loss": 0.5587, "step": 4362 }, { "epoch": 0.11979681493684789, "grad_norm": 0.3623400926589966, "learning_rate": 1.983106352070322e-05, "loss": 0.5197, "step": 4363 }, { "epoch": 0.11982427237781439, "grad_norm": 0.3395392596721649, "learning_rate": 1.9830984459818832e-05, "loss": 0.5392, "step": 4364 }, { "epoch": 0.1198517298187809, "grad_norm": 0.3711217939853668, "learning_rate": 1.983090538059652e-05, "loss": 0.6533, "step": 4365 }, { "epoch": 0.11987918725974739, "grad_norm": 0.3738940358161926, "learning_rate": 1.983082628303643e-05, "loss": 0.5519, "step": 4366 }, { "epoch": 0.1199066447007139, "grad_norm": 0.3511923551559448, "learning_rate": 1.983074716713871e-05, "loss": 0.446, "step": 4367 }, { "epoch": 0.11993410214168039, "grad_norm": 0.37447214126586914, "learning_rate": 1.9830668032903512e-05, "loss": 0.5112, "step": 4368 }, { "epoch": 0.1199615595826469, "grad_norm": 0.3574022650718689, "learning_rate": 1.9830588880330978e-05, "loss": 0.5966, "step": 4369 }, { "epoch": 0.11998901702361339, "grad_norm": 0.3980100452899933, "learning_rate": 1.9830509709421258e-05, "loss": 0.567, "step": 4370 }, { "epoch": 0.1200164744645799, "grad_norm": 0.9603545069694519, "learning_rate": 1.9830430520174505e-05, "loss": 0.4463, "step": 4371 }, { "epoch": 0.12004393190554641, "grad_norm": 0.34502410888671875, "learning_rate": 1.9830351312590857e-05, "loss": 0.4739, "step": 4372 }, { "epoch": 0.1200713893465129, "grad_norm": 0.34829363226890564, "learning_rate": 1.9830272086670467e-05, "loss": 0.4834, "step": 4373 }, { "epoch": 0.12009884678747941, "grad_norm": 0.3658924102783203, "learning_rate": 1.983019284241348e-05, "loss": 0.545, "step": 4374 }, { "epoch": 0.1201263042284459, "grad_norm": 0.39429062604904175, "learning_rate": 1.983011357982005e-05, "loss": 0.5531, "step": 4375 }, { "epoch": 0.12015376166941241, "grad_norm": 0.35094669461250305, "learning_rate": 1.983003429889032e-05, "loss": 0.5183, "step": 4376 }, { "epoch": 0.1201812191103789, "grad_norm": 0.4926547110080719, "learning_rate": 1.9829954999624434e-05, "loss": 0.577, "step": 4377 }, { "epoch": 0.12020867655134541, "grad_norm": 0.32219961285591125, "learning_rate": 1.9829875682022546e-05, "loss": 0.4787, "step": 4378 }, { "epoch": 0.12023613399231192, "grad_norm": 0.4216304123401642, "learning_rate": 1.9829796346084808e-05, "loss": 0.5561, "step": 4379 }, { "epoch": 0.12026359143327842, "grad_norm": 0.36240512132644653, "learning_rate": 1.9829716991811357e-05, "loss": 0.5583, "step": 4380 }, { "epoch": 0.12029104887424492, "grad_norm": 0.4094868004322052, "learning_rate": 1.982963761920235e-05, "loss": 0.6212, "step": 4381 }, { "epoch": 0.12031850631521142, "grad_norm": 0.41385418176651, "learning_rate": 1.982955822825793e-05, "loss": 0.6435, "step": 4382 }, { "epoch": 0.12034596375617793, "grad_norm": 0.35844698548316956, "learning_rate": 1.9829478818978247e-05, "loss": 0.562, "step": 4383 }, { "epoch": 0.12037342119714442, "grad_norm": 0.43544918298721313, "learning_rate": 1.982939939136345e-05, "loss": 0.5876, "step": 4384 }, { "epoch": 0.12040087863811093, "grad_norm": 0.3851318359375, "learning_rate": 1.982931994541368e-05, "loss": 0.5637, "step": 4385 }, { "epoch": 0.12042833607907744, "grad_norm": 0.35776981711387634, "learning_rate": 1.98292404811291e-05, "loss": 0.585, "step": 4386 }, { "epoch": 0.12045579352004393, "grad_norm": 0.40571829676628113, "learning_rate": 1.9829160998509845e-05, "loss": 0.5985, "step": 4387 }, { "epoch": 0.12048325096101044, "grad_norm": 0.32351094484329224, "learning_rate": 1.982908149755607e-05, "loss": 0.5059, "step": 4388 }, { "epoch": 0.12051070840197693, "grad_norm": 0.3647138476371765, "learning_rate": 1.9829001978267914e-05, "loss": 0.4933, "step": 4389 }, { "epoch": 0.12053816584294344, "grad_norm": 0.38570430874824524, "learning_rate": 1.982892244064554e-05, "loss": 0.5579, "step": 4390 }, { "epoch": 0.12056562328390993, "grad_norm": 0.3582010567188263, "learning_rate": 1.9828842884689085e-05, "loss": 0.5811, "step": 4391 }, { "epoch": 0.12059308072487644, "grad_norm": 0.3458060324192047, "learning_rate": 1.98287633103987e-05, "loss": 0.4998, "step": 4392 }, { "epoch": 0.12062053816584295, "grad_norm": 0.43845903873443604, "learning_rate": 1.9828683717774536e-05, "loss": 0.5588, "step": 4393 }, { "epoch": 0.12064799560680944, "grad_norm": 0.4444512128829956, "learning_rate": 1.9828604106816743e-05, "loss": 0.5135, "step": 4394 }, { "epoch": 0.12067545304777595, "grad_norm": 0.38536563515663147, "learning_rate": 1.982852447752546e-05, "loss": 0.5149, "step": 4395 }, { "epoch": 0.12070291048874245, "grad_norm": 0.5001475811004639, "learning_rate": 1.9828444829900847e-05, "loss": 0.5634, "step": 4396 }, { "epoch": 0.12073036792970895, "grad_norm": 0.37451645731925964, "learning_rate": 1.9828365163943046e-05, "loss": 0.5046, "step": 4397 }, { "epoch": 0.12075782537067545, "grad_norm": 0.4188501834869385, "learning_rate": 1.9828285479652203e-05, "loss": 0.5954, "step": 4398 }, { "epoch": 0.12078528281164196, "grad_norm": 0.38071173429489136, "learning_rate": 1.9828205777028478e-05, "loss": 0.5392, "step": 4399 }, { "epoch": 0.12081274025260846, "grad_norm": 0.3504979908466339, "learning_rate": 1.982812605607201e-05, "loss": 0.5446, "step": 4400 }, { "epoch": 0.12084019769357496, "grad_norm": 0.3245278596878052, "learning_rate": 1.982804631678295e-05, "loss": 0.4406, "step": 4401 }, { "epoch": 0.12086765513454147, "grad_norm": 0.34069398045539856, "learning_rate": 1.9827966559161445e-05, "loss": 0.5259, "step": 4402 }, { "epoch": 0.12089511257550796, "grad_norm": 0.3438992500305176, "learning_rate": 1.9827886783207645e-05, "loss": 0.588, "step": 4403 }, { "epoch": 0.12092257001647447, "grad_norm": 0.3452449142932892, "learning_rate": 1.9827806988921703e-05, "loss": 0.5954, "step": 4404 }, { "epoch": 0.12095002745744096, "grad_norm": 0.40946444869041443, "learning_rate": 1.9827727176303762e-05, "loss": 0.5198, "step": 4405 }, { "epoch": 0.12097748489840747, "grad_norm": 0.3553365170955658, "learning_rate": 1.9827647345353972e-05, "loss": 0.6055, "step": 4406 }, { "epoch": 0.12100494233937396, "grad_norm": 0.3381158411502838, "learning_rate": 1.9827567496072485e-05, "loss": 0.5601, "step": 4407 }, { "epoch": 0.12103239978034047, "grad_norm": 0.3465712368488312, "learning_rate": 1.9827487628459447e-05, "loss": 0.5837, "step": 4408 }, { "epoch": 0.12105985722130698, "grad_norm": 0.37039709091186523, "learning_rate": 1.982740774251501e-05, "loss": 0.5238, "step": 4409 }, { "epoch": 0.12108731466227347, "grad_norm": 0.34742099046707153, "learning_rate": 1.9827327838239322e-05, "loss": 0.5575, "step": 4410 }, { "epoch": 0.12111477210323998, "grad_norm": 0.4354720711708069, "learning_rate": 1.9827247915632527e-05, "loss": 0.5409, "step": 4411 }, { "epoch": 0.12114222954420648, "grad_norm": 0.3863984942436218, "learning_rate": 1.982716797469478e-05, "loss": 0.6206, "step": 4412 }, { "epoch": 0.12116968698517298, "grad_norm": 0.37640589475631714, "learning_rate": 1.9827088015426228e-05, "loss": 0.5816, "step": 4413 }, { "epoch": 0.12119714442613948, "grad_norm": 0.3277275860309601, "learning_rate": 1.982700803782702e-05, "loss": 0.489, "step": 4414 }, { "epoch": 0.12122460186710599, "grad_norm": 0.43336087465286255, "learning_rate": 1.9826928041897307e-05, "loss": 0.555, "step": 4415 }, { "epoch": 0.1212520593080725, "grad_norm": 0.36345458030700684, "learning_rate": 1.9826848027637234e-05, "loss": 0.4991, "step": 4416 }, { "epoch": 0.12127951674903899, "grad_norm": 0.35821524262428284, "learning_rate": 1.982676799504696e-05, "loss": 0.4739, "step": 4417 }, { "epoch": 0.1213069741900055, "grad_norm": 0.4192046523094177, "learning_rate": 1.982668794412662e-05, "loss": 0.5689, "step": 4418 }, { "epoch": 0.12133443163097199, "grad_norm": 0.3432762026786804, "learning_rate": 1.9826607874876374e-05, "loss": 0.5623, "step": 4419 }, { "epoch": 0.1213618890719385, "grad_norm": 0.3570500612258911, "learning_rate": 1.9826527787296367e-05, "loss": 0.5859, "step": 4420 }, { "epoch": 0.12138934651290499, "grad_norm": 0.36303475499153137, "learning_rate": 1.982644768138675e-05, "loss": 0.573, "step": 4421 }, { "epoch": 0.1214168039538715, "grad_norm": 0.361372172832489, "learning_rate": 1.982636755714767e-05, "loss": 0.4629, "step": 4422 }, { "epoch": 0.12144426139483801, "grad_norm": 0.34453481435775757, "learning_rate": 1.982628741457928e-05, "loss": 0.5547, "step": 4423 }, { "epoch": 0.1214717188358045, "grad_norm": 0.38676899671554565, "learning_rate": 1.982620725368173e-05, "loss": 0.6063, "step": 4424 }, { "epoch": 0.12149917627677101, "grad_norm": 0.3573000133037567, "learning_rate": 1.9826127074455162e-05, "loss": 0.5155, "step": 4425 }, { "epoch": 0.1215266337177375, "grad_norm": 0.34688103199005127, "learning_rate": 1.9826046876899734e-05, "loss": 0.5318, "step": 4426 }, { "epoch": 0.12155409115870401, "grad_norm": 0.3390115201473236, "learning_rate": 1.982596666101559e-05, "loss": 0.5128, "step": 4427 }, { "epoch": 0.1215815485996705, "grad_norm": 0.4050641357898712, "learning_rate": 1.9825886426802888e-05, "loss": 0.5821, "step": 4428 }, { "epoch": 0.12160900604063701, "grad_norm": 0.4352811574935913, "learning_rate": 1.9825806174261764e-05, "loss": 0.5786, "step": 4429 }, { "epoch": 0.12163646348160352, "grad_norm": 0.34241983294487, "learning_rate": 1.9825725903392382e-05, "loss": 0.5134, "step": 4430 }, { "epoch": 0.12166392092257002, "grad_norm": 0.7976049184799194, "learning_rate": 1.9825645614194885e-05, "loss": 0.5645, "step": 4431 }, { "epoch": 0.12169137836353652, "grad_norm": 0.37559396028518677, "learning_rate": 1.982556530666942e-05, "loss": 0.4945, "step": 4432 }, { "epoch": 0.12171883580450302, "grad_norm": 0.3490278720855713, "learning_rate": 1.9825484980816138e-05, "loss": 0.5677, "step": 4433 }, { "epoch": 0.12174629324546953, "grad_norm": 0.3461850583553314, "learning_rate": 1.9825404636635194e-05, "loss": 0.3935, "step": 4434 }, { "epoch": 0.12177375068643602, "grad_norm": 0.36581331491470337, "learning_rate": 1.9825324274126733e-05, "loss": 0.6082, "step": 4435 }, { "epoch": 0.12180120812740253, "grad_norm": 0.3520062565803528, "learning_rate": 1.9825243893290906e-05, "loss": 0.6316, "step": 4436 }, { "epoch": 0.12182866556836902, "grad_norm": 0.37399837374687195, "learning_rate": 1.9825163494127864e-05, "loss": 0.4493, "step": 4437 }, { "epoch": 0.12185612300933553, "grad_norm": 0.3887555003166199, "learning_rate": 1.9825083076637757e-05, "loss": 0.4774, "step": 4438 }, { "epoch": 0.12188358045030204, "grad_norm": 0.3307332694530487, "learning_rate": 1.9825002640820733e-05, "loss": 0.513, "step": 4439 }, { "epoch": 0.12191103789126853, "grad_norm": 0.33402732014656067, "learning_rate": 1.9824922186676945e-05, "loss": 0.6136, "step": 4440 }, { "epoch": 0.12193849533223504, "grad_norm": 0.4366951882839203, "learning_rate": 1.982484171420654e-05, "loss": 0.5128, "step": 4441 }, { "epoch": 0.12196595277320153, "grad_norm": 0.44313961267471313, "learning_rate": 1.982476122340967e-05, "loss": 0.441, "step": 4442 }, { "epoch": 0.12199341021416804, "grad_norm": 0.3746240735054016, "learning_rate": 1.9824680714286485e-05, "loss": 0.5133, "step": 4443 }, { "epoch": 0.12202086765513454, "grad_norm": 0.4416984021663666, "learning_rate": 1.982460018683713e-05, "loss": 0.5977, "step": 4444 }, { "epoch": 0.12204832509610104, "grad_norm": 0.33317235112190247, "learning_rate": 1.9824519641061767e-05, "loss": 0.5073, "step": 4445 }, { "epoch": 0.12207578253706755, "grad_norm": 0.3381946086883545, "learning_rate": 1.9824439076960536e-05, "loss": 0.5384, "step": 4446 }, { "epoch": 0.12210323997803404, "grad_norm": 0.3394647240638733, "learning_rate": 1.982435849453359e-05, "loss": 0.5332, "step": 4447 }, { "epoch": 0.12213069741900055, "grad_norm": 0.3510815501213074, "learning_rate": 1.982427789378108e-05, "loss": 0.5833, "step": 4448 }, { "epoch": 0.12215815485996705, "grad_norm": 0.3221791684627533, "learning_rate": 1.982419727470316e-05, "loss": 0.5268, "step": 4449 }, { "epoch": 0.12218561230093355, "grad_norm": 0.35876035690307617, "learning_rate": 1.9824116637299972e-05, "loss": 0.5565, "step": 4450 }, { "epoch": 0.12221306974190005, "grad_norm": 0.5808611512184143, "learning_rate": 1.982403598157167e-05, "loss": 0.5925, "step": 4451 }, { "epoch": 0.12224052718286656, "grad_norm": 0.3830517828464508, "learning_rate": 1.982395530751841e-05, "loss": 0.5246, "step": 4452 }, { "epoch": 0.12226798462383306, "grad_norm": 1.6483724117279053, "learning_rate": 1.9823874615140335e-05, "loss": 0.6146, "step": 4453 }, { "epoch": 0.12229544206479956, "grad_norm": 0.37066078186035156, "learning_rate": 1.98237939044376e-05, "loss": 0.5266, "step": 4454 }, { "epoch": 0.12232289950576607, "grad_norm": 0.3495706021785736, "learning_rate": 1.9823713175410352e-05, "loss": 0.5063, "step": 4455 }, { "epoch": 0.12235035694673256, "grad_norm": 0.3710753321647644, "learning_rate": 1.9823632428058744e-05, "loss": 0.5452, "step": 4456 }, { "epoch": 0.12237781438769907, "grad_norm": 0.34229040145874023, "learning_rate": 1.9823551662382926e-05, "loss": 0.5798, "step": 4457 }, { "epoch": 0.12240527182866556, "grad_norm": 0.44905513525009155, "learning_rate": 1.982347087838305e-05, "loss": 0.5721, "step": 4458 }, { "epoch": 0.12243272926963207, "grad_norm": 0.34064796566963196, "learning_rate": 1.982339007605927e-05, "loss": 0.5656, "step": 4459 }, { "epoch": 0.12246018671059858, "grad_norm": 0.36782774329185486, "learning_rate": 1.9823309255411725e-05, "loss": 0.5016, "step": 4460 }, { "epoch": 0.12248764415156507, "grad_norm": 0.3486008644104004, "learning_rate": 1.9823228416440577e-05, "loss": 0.528, "step": 4461 }, { "epoch": 0.12251510159253158, "grad_norm": 0.3348163664340973, "learning_rate": 1.982314755914597e-05, "loss": 0.57, "step": 4462 }, { "epoch": 0.12254255903349807, "grad_norm": 0.37935537099838257, "learning_rate": 1.9823066683528057e-05, "loss": 0.4647, "step": 4463 }, { "epoch": 0.12257001647446458, "grad_norm": 0.36315077543258667, "learning_rate": 1.9822985789586992e-05, "loss": 0.5554, "step": 4464 }, { "epoch": 0.12259747391543108, "grad_norm": 0.4080066680908203, "learning_rate": 1.9822904877322924e-05, "loss": 0.5817, "step": 4465 }, { "epoch": 0.12262493135639758, "grad_norm": 0.4378693401813507, "learning_rate": 1.9822823946736002e-05, "loss": 0.5017, "step": 4466 }, { "epoch": 0.12265238879736409, "grad_norm": 0.3536355793476105, "learning_rate": 1.9822742997826378e-05, "loss": 0.5724, "step": 4467 }, { "epoch": 0.12267984623833059, "grad_norm": 0.7946407794952393, "learning_rate": 1.9822662030594202e-05, "loss": 0.5324, "step": 4468 }, { "epoch": 0.1227073036792971, "grad_norm": 0.365622878074646, "learning_rate": 1.982258104503963e-05, "loss": 0.5247, "step": 4469 }, { "epoch": 0.12273476112026359, "grad_norm": 0.3522932827472687, "learning_rate": 1.9822500041162808e-05, "loss": 0.5245, "step": 4470 }, { "epoch": 0.1227622185612301, "grad_norm": 0.33957645297050476, "learning_rate": 1.9822419018963886e-05, "loss": 0.4581, "step": 4471 }, { "epoch": 0.12278967600219659, "grad_norm": 0.3887788951396942, "learning_rate": 1.9822337978443017e-05, "loss": 0.4911, "step": 4472 }, { "epoch": 0.1228171334431631, "grad_norm": 0.3589390218257904, "learning_rate": 1.982225691960036e-05, "loss": 0.6144, "step": 4473 }, { "epoch": 0.12284459088412959, "grad_norm": 0.3854289948940277, "learning_rate": 1.982217584243605e-05, "loss": 0.4485, "step": 4474 }, { "epoch": 0.1228720483250961, "grad_norm": 0.3577403426170349, "learning_rate": 1.9822094746950253e-05, "loss": 0.4974, "step": 4475 }, { "epoch": 0.12289950576606261, "grad_norm": 0.3502238690853119, "learning_rate": 1.982201363314311e-05, "loss": 0.6232, "step": 4476 }, { "epoch": 0.1229269632070291, "grad_norm": 0.3329123556613922, "learning_rate": 1.982193250101478e-05, "loss": 0.5471, "step": 4477 }, { "epoch": 0.12295442064799561, "grad_norm": 0.3653741478919983, "learning_rate": 1.9821851350565412e-05, "loss": 0.5619, "step": 4478 }, { "epoch": 0.1229818780889621, "grad_norm": 0.4308152496814728, "learning_rate": 1.9821770181795156e-05, "loss": 0.5205, "step": 4479 }, { "epoch": 0.12300933552992861, "grad_norm": 0.45550432801246643, "learning_rate": 1.982168899470416e-05, "loss": 0.5861, "step": 4480 }, { "epoch": 0.1230367929708951, "grad_norm": 0.4742860794067383, "learning_rate": 1.9821607789292584e-05, "loss": 0.5487, "step": 4481 }, { "epoch": 0.12306425041186161, "grad_norm": 0.3706261217594147, "learning_rate": 1.9821526565560573e-05, "loss": 0.5384, "step": 4482 }, { "epoch": 0.12309170785282812, "grad_norm": 0.35688507556915283, "learning_rate": 1.982144532350828e-05, "loss": 0.5478, "step": 4483 }, { "epoch": 0.12311916529379462, "grad_norm": 0.37396326661109924, "learning_rate": 1.9821364063135855e-05, "loss": 0.5248, "step": 4484 }, { "epoch": 0.12314662273476112, "grad_norm": 0.4188917875289917, "learning_rate": 1.9821282784443454e-05, "loss": 0.5888, "step": 4485 }, { "epoch": 0.12317408017572762, "grad_norm": 0.3921259939670563, "learning_rate": 1.9821201487431224e-05, "loss": 0.5215, "step": 4486 }, { "epoch": 0.12320153761669413, "grad_norm": 0.34143099188804626, "learning_rate": 1.982112017209932e-05, "loss": 0.4988, "step": 4487 }, { "epoch": 0.12322899505766062, "grad_norm": 0.4915735423564911, "learning_rate": 1.9821038838447895e-05, "loss": 0.4973, "step": 4488 }, { "epoch": 0.12325645249862713, "grad_norm": 0.3705565631389618, "learning_rate": 1.9820957486477094e-05, "loss": 0.4905, "step": 4489 }, { "epoch": 0.12328390993959364, "grad_norm": 0.3982454240322113, "learning_rate": 1.9820876116187072e-05, "loss": 0.5099, "step": 4490 }, { "epoch": 0.12331136738056013, "grad_norm": 0.3848978281021118, "learning_rate": 1.9820794727577987e-05, "loss": 0.546, "step": 4491 }, { "epoch": 0.12333882482152664, "grad_norm": 0.3555212616920471, "learning_rate": 1.982071332064998e-05, "loss": 0.4974, "step": 4492 }, { "epoch": 0.12336628226249313, "grad_norm": 0.4396597445011139, "learning_rate": 1.982063189540321e-05, "loss": 0.6043, "step": 4493 }, { "epoch": 0.12339373970345964, "grad_norm": 0.3869277834892273, "learning_rate": 1.982055045183783e-05, "loss": 0.6017, "step": 4494 }, { "epoch": 0.12342119714442613, "grad_norm": 0.3532942831516266, "learning_rate": 1.9820468989953986e-05, "loss": 0.4852, "step": 4495 }, { "epoch": 0.12344865458539264, "grad_norm": 0.33970898389816284, "learning_rate": 1.9820387509751833e-05, "loss": 0.4782, "step": 4496 }, { "epoch": 0.12347611202635915, "grad_norm": 0.4042155146598816, "learning_rate": 1.9820306011231522e-05, "loss": 0.6054, "step": 4497 }, { "epoch": 0.12350356946732564, "grad_norm": 0.39494016766548157, "learning_rate": 1.982022449439321e-05, "loss": 0.6421, "step": 4498 }, { "epoch": 0.12353102690829215, "grad_norm": 0.3989347219467163, "learning_rate": 1.9820142959237042e-05, "loss": 0.6076, "step": 4499 }, { "epoch": 0.12355848434925865, "grad_norm": 0.405865877866745, "learning_rate": 1.9820061405763175e-05, "loss": 0.6346, "step": 4500 }, { "epoch": 0.12358594179022515, "grad_norm": 0.3560645878314972, "learning_rate": 1.9819979833971756e-05, "loss": 0.5277, "step": 4501 }, { "epoch": 0.12361339923119165, "grad_norm": 0.3665539026260376, "learning_rate": 1.981989824386294e-05, "loss": 0.639, "step": 4502 }, { "epoch": 0.12364085667215816, "grad_norm": 0.38791871070861816, "learning_rate": 1.9819816635436883e-05, "loss": 0.6396, "step": 4503 }, { "epoch": 0.12366831411312465, "grad_norm": 0.41537898778915405, "learning_rate": 1.9819735008693734e-05, "loss": 0.519, "step": 4504 }, { "epoch": 0.12369577155409116, "grad_norm": 0.3463146984577179, "learning_rate": 1.9819653363633647e-05, "loss": 0.5834, "step": 4505 }, { "epoch": 0.12372322899505767, "grad_norm": 0.3613133430480957, "learning_rate": 1.9819571700256767e-05, "loss": 0.543, "step": 4506 }, { "epoch": 0.12375068643602416, "grad_norm": 0.3808510899543762, "learning_rate": 1.9819490018563254e-05, "loss": 0.5964, "step": 4507 }, { "epoch": 0.12377814387699067, "grad_norm": 0.4057452380657196, "learning_rate": 1.981940831855326e-05, "loss": 0.5139, "step": 4508 }, { "epoch": 0.12380560131795716, "grad_norm": 0.4041282534599304, "learning_rate": 1.9819326600226933e-05, "loss": 0.5718, "step": 4509 }, { "epoch": 0.12383305875892367, "grad_norm": 0.3776470720767975, "learning_rate": 1.9819244863584427e-05, "loss": 0.5965, "step": 4510 }, { "epoch": 0.12386051619989016, "grad_norm": 0.4001157581806183, "learning_rate": 1.9819163108625898e-05, "loss": 0.5126, "step": 4511 }, { "epoch": 0.12388797364085667, "grad_norm": 0.32709410786628723, "learning_rate": 1.9819081335351497e-05, "loss": 0.5239, "step": 4512 }, { "epoch": 0.12391543108182318, "grad_norm": 0.41309496760368347, "learning_rate": 1.981899954376137e-05, "loss": 0.5328, "step": 4513 }, { "epoch": 0.12394288852278967, "grad_norm": 0.3411867320537567, "learning_rate": 1.9818917733855682e-05, "loss": 0.5097, "step": 4514 }, { "epoch": 0.12397034596375618, "grad_norm": 0.36101090908050537, "learning_rate": 1.9818835905634573e-05, "loss": 0.5307, "step": 4515 }, { "epoch": 0.12399780340472268, "grad_norm": 0.37608838081359863, "learning_rate": 1.9818754059098205e-05, "loss": 0.5236, "step": 4516 }, { "epoch": 0.12402526084568918, "grad_norm": 0.3685913681983948, "learning_rate": 1.9818672194246726e-05, "loss": 0.5244, "step": 4517 }, { "epoch": 0.12405271828665568, "grad_norm": 0.36835530400276184, "learning_rate": 1.9818590311080286e-05, "loss": 0.5735, "step": 4518 }, { "epoch": 0.12408017572762219, "grad_norm": 0.3671985864639282, "learning_rate": 1.9818508409599047e-05, "loss": 0.5545, "step": 4519 }, { "epoch": 0.1241076331685887, "grad_norm": 0.392423152923584, "learning_rate": 1.9818426489803152e-05, "loss": 0.5884, "step": 4520 }, { "epoch": 0.12413509060955519, "grad_norm": 0.34275850653648376, "learning_rate": 1.981834455169276e-05, "loss": 0.5504, "step": 4521 }, { "epoch": 0.1241625480505217, "grad_norm": 0.3889680504798889, "learning_rate": 1.981826259526802e-05, "loss": 0.6218, "step": 4522 }, { "epoch": 0.12419000549148819, "grad_norm": 0.3857469856739044, "learning_rate": 1.9818180620529088e-05, "loss": 0.5784, "step": 4523 }, { "epoch": 0.1242174629324547, "grad_norm": 0.3293091952800751, "learning_rate": 1.9818098627476116e-05, "loss": 0.5917, "step": 4524 }, { "epoch": 0.12424492037342119, "grad_norm": 0.329585999250412, "learning_rate": 1.981801661610925e-05, "loss": 0.4885, "step": 4525 }, { "epoch": 0.1242723778143877, "grad_norm": 0.4473568797111511, "learning_rate": 1.9817934586428657e-05, "loss": 0.5969, "step": 4526 }, { "epoch": 0.12429983525535421, "grad_norm": 0.3558028042316437, "learning_rate": 1.981785253843448e-05, "loss": 0.547, "step": 4527 }, { "epoch": 0.1243272926963207, "grad_norm": 0.3729296028614044, "learning_rate": 1.9817770472126878e-05, "loss": 0.4732, "step": 4528 }, { "epoch": 0.12435475013728721, "grad_norm": 0.3698374927043915, "learning_rate": 1.9817688387505995e-05, "loss": 0.5633, "step": 4529 }, { "epoch": 0.1243822075782537, "grad_norm": 0.3829362392425537, "learning_rate": 1.9817606284571992e-05, "loss": 0.527, "step": 4530 }, { "epoch": 0.12440966501922021, "grad_norm": 0.36799001693725586, "learning_rate": 1.9817524163325022e-05, "loss": 0.5406, "step": 4531 }, { "epoch": 0.1244371224601867, "grad_norm": 0.3593764305114746, "learning_rate": 1.9817442023765237e-05, "loss": 0.5126, "step": 4532 }, { "epoch": 0.12446457990115321, "grad_norm": 0.3810892701148987, "learning_rate": 1.9817359865892785e-05, "loss": 0.5565, "step": 4533 }, { "epoch": 0.12449203734211972, "grad_norm": 0.3468562960624695, "learning_rate": 1.9817277689707827e-05, "loss": 0.5553, "step": 4534 }, { "epoch": 0.12451949478308622, "grad_norm": 0.35383379459381104, "learning_rate": 1.9817195495210508e-05, "loss": 0.5819, "step": 4535 }, { "epoch": 0.12454695222405272, "grad_norm": 0.4723353385925293, "learning_rate": 1.981711328240099e-05, "loss": 0.4977, "step": 4536 }, { "epoch": 0.12457440966501922, "grad_norm": 0.3190907835960388, "learning_rate": 1.981703105127942e-05, "loss": 0.4701, "step": 4537 }, { "epoch": 0.12460186710598573, "grad_norm": 0.3448849022388458, "learning_rate": 1.9816948801845957e-05, "loss": 0.5509, "step": 4538 }, { "epoch": 0.12462932454695222, "grad_norm": 0.3485904633998871, "learning_rate": 1.981686653410075e-05, "loss": 0.4907, "step": 4539 }, { "epoch": 0.12465678198791873, "grad_norm": 0.4145749807357788, "learning_rate": 1.9816784248043952e-05, "loss": 0.5109, "step": 4540 }, { "epoch": 0.12468423942888522, "grad_norm": 0.33381637930870056, "learning_rate": 1.981670194367572e-05, "loss": 0.5699, "step": 4541 }, { "epoch": 0.12471169686985173, "grad_norm": 0.3373650908470154, "learning_rate": 1.9816619620996206e-05, "loss": 0.5492, "step": 4542 }, { "epoch": 0.12473915431081824, "grad_norm": 0.3808574378490448, "learning_rate": 1.9816537280005562e-05, "loss": 0.5352, "step": 4543 }, { "epoch": 0.12476661175178473, "grad_norm": 0.34907492995262146, "learning_rate": 1.9816454920703943e-05, "loss": 0.6171, "step": 4544 }, { "epoch": 0.12479406919275124, "grad_norm": 0.3677372634410858, "learning_rate": 1.98163725430915e-05, "loss": 0.6476, "step": 4545 }, { "epoch": 0.12482152663371773, "grad_norm": 0.4435366988182068, "learning_rate": 1.9816290147168395e-05, "loss": 0.4682, "step": 4546 }, { "epoch": 0.12484898407468424, "grad_norm": 0.3766632676124573, "learning_rate": 1.9816207732934774e-05, "loss": 0.5299, "step": 4547 }, { "epoch": 0.12487644151565074, "grad_norm": 0.34825363755226135, "learning_rate": 1.9816125300390792e-05, "loss": 0.5274, "step": 4548 }, { "epoch": 0.12490389895661724, "grad_norm": 0.460807204246521, "learning_rate": 1.98160428495366e-05, "loss": 0.6617, "step": 4549 }, { "epoch": 0.12493135639758375, "grad_norm": 0.3821741044521332, "learning_rate": 1.9815960380372358e-05, "loss": 0.5602, "step": 4550 }, { "epoch": 0.12495881383855025, "grad_norm": 0.3629308342933655, "learning_rate": 1.9815877892898218e-05, "loss": 0.5247, "step": 4551 }, { "epoch": 0.12498627127951675, "grad_norm": 0.37050512433052063, "learning_rate": 1.9815795387114335e-05, "loss": 0.4854, "step": 4552 }, { "epoch": 0.12501372872048325, "grad_norm": 0.3886166214942932, "learning_rate": 1.9815712863020854e-05, "loss": 0.5081, "step": 4553 }, { "epoch": 0.12504118616144974, "grad_norm": 0.39674025774002075, "learning_rate": 1.981563032061794e-05, "loss": 0.5675, "step": 4554 }, { "epoch": 0.12506864360241626, "grad_norm": 0.36375951766967773, "learning_rate": 1.981554775990574e-05, "loss": 0.5482, "step": 4555 }, { "epoch": 0.12509610104338276, "grad_norm": 0.4314548969268799, "learning_rate": 1.9815465180884416e-05, "loss": 0.6085, "step": 4556 }, { "epoch": 0.12512355848434925, "grad_norm": 0.33327341079711914, "learning_rate": 1.9815382583554114e-05, "loss": 0.5775, "step": 4557 }, { "epoch": 0.12515101592531577, "grad_norm": 0.3660721778869629, "learning_rate": 1.9815299967914988e-05, "loss": 0.4915, "step": 4558 }, { "epoch": 0.12517847336628227, "grad_norm": 0.3982091546058655, "learning_rate": 1.9815217333967198e-05, "loss": 0.5616, "step": 4559 }, { "epoch": 0.12520593080724876, "grad_norm": 0.4000290036201477, "learning_rate": 1.9815134681710895e-05, "loss": 0.5606, "step": 4560 }, { "epoch": 0.12523338824821525, "grad_norm": 0.3396747410297394, "learning_rate": 1.9815052011146233e-05, "loss": 0.5714, "step": 4561 }, { "epoch": 0.12526084568918178, "grad_norm": 0.41182446479797363, "learning_rate": 1.9814969322273368e-05, "loss": 0.5831, "step": 4562 }, { "epoch": 0.12528830313014827, "grad_norm": 0.38821956515312195, "learning_rate": 1.9814886615092447e-05, "loss": 0.5393, "step": 4563 }, { "epoch": 0.12531576057111476, "grad_norm": 0.3326866328716278, "learning_rate": 1.9814803889603634e-05, "loss": 0.5521, "step": 4564 }, { "epoch": 0.1253432180120813, "grad_norm": 0.3558941185474396, "learning_rate": 1.981472114580708e-05, "loss": 0.5037, "step": 4565 }, { "epoch": 0.12537067545304778, "grad_norm": 0.3617771863937378, "learning_rate": 1.9814638383702936e-05, "loss": 0.5158, "step": 4566 }, { "epoch": 0.12539813289401427, "grad_norm": 0.3950182795524597, "learning_rate": 1.981455560329136e-05, "loss": 0.5097, "step": 4567 }, { "epoch": 0.12542559033498077, "grad_norm": 0.35856372117996216, "learning_rate": 1.9814472804572505e-05, "loss": 0.5662, "step": 4568 }, { "epoch": 0.1254530477759473, "grad_norm": 0.4754500985145569, "learning_rate": 1.9814389987546526e-05, "loss": 0.6284, "step": 4569 }, { "epoch": 0.12548050521691378, "grad_norm": 0.4066259264945984, "learning_rate": 1.981430715221358e-05, "loss": 0.5777, "step": 4570 }, { "epoch": 0.12550796265788028, "grad_norm": 0.3523061275482178, "learning_rate": 1.9814224298573818e-05, "loss": 0.5515, "step": 4571 }, { "epoch": 0.1255354200988468, "grad_norm": 0.3919089436531067, "learning_rate": 1.981414142662739e-05, "loss": 0.5592, "step": 4572 }, { "epoch": 0.1255628775398133, "grad_norm": 0.3298614025115967, "learning_rate": 1.981405853637446e-05, "loss": 0.5147, "step": 4573 }, { "epoch": 0.1255903349807798, "grad_norm": 0.479017436504364, "learning_rate": 1.9813975627815178e-05, "loss": 0.5472, "step": 4574 }, { "epoch": 0.12561779242174628, "grad_norm": 0.46824684739112854, "learning_rate": 1.9813892700949703e-05, "loss": 0.4911, "step": 4575 }, { "epoch": 0.1256452498627128, "grad_norm": 0.6302682757377625, "learning_rate": 1.981380975577818e-05, "loss": 0.5693, "step": 4576 }, { "epoch": 0.1256727073036793, "grad_norm": 0.36443573236465454, "learning_rate": 1.9813726792300772e-05, "loss": 0.594, "step": 4577 }, { "epoch": 0.1257001647446458, "grad_norm": 0.38088783621788025, "learning_rate": 1.9813643810517632e-05, "loss": 0.558, "step": 4578 }, { "epoch": 0.12572762218561231, "grad_norm": 0.40735623240470886, "learning_rate": 1.981356081042891e-05, "loss": 0.5335, "step": 4579 }, { "epoch": 0.1257550796265788, "grad_norm": 0.3989115357398987, "learning_rate": 1.981347779203477e-05, "loss": 0.553, "step": 4580 }, { "epoch": 0.1257825370675453, "grad_norm": 0.49549898505210876, "learning_rate": 1.981339475533536e-05, "loss": 0.5686, "step": 4581 }, { "epoch": 0.1258099945085118, "grad_norm": 0.3734446167945862, "learning_rate": 1.9813311700330838e-05, "loss": 0.5999, "step": 4582 }, { "epoch": 0.12583745194947832, "grad_norm": 0.3769661486148834, "learning_rate": 1.9813228627021355e-05, "loss": 0.5373, "step": 4583 }, { "epoch": 0.1258649093904448, "grad_norm": 0.4106237292289734, "learning_rate": 1.981314553540707e-05, "loss": 0.5402, "step": 4584 }, { "epoch": 0.1258923668314113, "grad_norm": 0.3978353440761566, "learning_rate": 1.9813062425488137e-05, "loss": 0.552, "step": 4585 }, { "epoch": 0.1259198242723778, "grad_norm": 0.363187700510025, "learning_rate": 1.9812979297264713e-05, "loss": 0.5798, "step": 4586 }, { "epoch": 0.12594728171334432, "grad_norm": 0.3238270580768585, "learning_rate": 1.9812896150736947e-05, "loss": 0.4637, "step": 4587 }, { "epoch": 0.12597473915431082, "grad_norm": 0.3515819311141968, "learning_rate": 1.9812812985904996e-05, "loss": 0.4821, "step": 4588 }, { "epoch": 0.1260021965952773, "grad_norm": 0.356880784034729, "learning_rate": 1.981272980276902e-05, "loss": 0.4942, "step": 4589 }, { "epoch": 0.12602965403624383, "grad_norm": 0.367489218711853, "learning_rate": 1.9812646601329174e-05, "loss": 0.5193, "step": 4590 }, { "epoch": 0.12605711147721033, "grad_norm": 0.3482157289981842, "learning_rate": 1.9812563381585605e-05, "loss": 0.5327, "step": 4591 }, { "epoch": 0.12608456891817682, "grad_norm": 0.34965598583221436, "learning_rate": 1.9812480143538473e-05, "loss": 0.6748, "step": 4592 }, { "epoch": 0.12611202635914331, "grad_norm": 0.343345046043396, "learning_rate": 1.9812396887187936e-05, "loss": 0.5835, "step": 4593 }, { "epoch": 0.12613948380010984, "grad_norm": 0.43978390097618103, "learning_rate": 1.9812313612534148e-05, "loss": 0.6394, "step": 4594 }, { "epoch": 0.12616694124107633, "grad_norm": 0.3992292582988739, "learning_rate": 1.9812230319577262e-05, "loss": 0.5691, "step": 4595 }, { "epoch": 0.12619439868204282, "grad_norm": 0.3587813973426819, "learning_rate": 1.9812147008317438e-05, "loss": 0.4806, "step": 4596 }, { "epoch": 0.12622185612300935, "grad_norm": 0.3747231364250183, "learning_rate": 1.9812063678754824e-05, "loss": 0.5265, "step": 4597 }, { "epoch": 0.12624931356397584, "grad_norm": 0.3854464590549469, "learning_rate": 1.9811980330889583e-05, "loss": 0.479, "step": 4598 }, { "epoch": 0.12627677100494233, "grad_norm": 0.37662485241889954, "learning_rate": 1.9811896964721865e-05, "loss": 0.5776, "step": 4599 }, { "epoch": 0.12630422844590883, "grad_norm": 0.3242397904396057, "learning_rate": 1.9811813580251827e-05, "loss": 0.4831, "step": 4600 }, { "epoch": 0.12633168588687535, "grad_norm": 0.3922351896762848, "learning_rate": 1.9811730177479624e-05, "loss": 0.6241, "step": 4601 }, { "epoch": 0.12635914332784184, "grad_norm": 0.34332069754600525, "learning_rate": 1.9811646756405417e-05, "loss": 0.501, "step": 4602 }, { "epoch": 0.12638660076880834, "grad_norm": 0.3683393895626068, "learning_rate": 1.9811563317029356e-05, "loss": 0.5545, "step": 4603 }, { "epoch": 0.12641405820977486, "grad_norm": 0.36037373542785645, "learning_rate": 1.9811479859351596e-05, "loss": 0.5479, "step": 4604 }, { "epoch": 0.12644151565074135, "grad_norm": 0.36243706941604614, "learning_rate": 1.9811396383372293e-05, "loss": 0.4875, "step": 4605 }, { "epoch": 0.12646897309170785, "grad_norm": 0.42911162972450256, "learning_rate": 1.9811312889091607e-05, "loss": 0.5095, "step": 4606 }, { "epoch": 0.12649643053267434, "grad_norm": 0.3874106705188751, "learning_rate": 1.981122937650969e-05, "loss": 0.5604, "step": 4607 }, { "epoch": 0.12652388797364086, "grad_norm": 0.41102334856987, "learning_rate": 1.98111458456267e-05, "loss": 0.5224, "step": 4608 }, { "epoch": 0.12655134541460736, "grad_norm": 0.3587576448917389, "learning_rate": 1.9811062296442792e-05, "loss": 0.5293, "step": 4609 }, { "epoch": 0.12657880285557385, "grad_norm": 0.40790796279907227, "learning_rate": 1.981097872895812e-05, "loss": 0.6135, "step": 4610 }, { "epoch": 0.12660626029654037, "grad_norm": 0.3439798057079315, "learning_rate": 1.9810895143172844e-05, "loss": 0.5182, "step": 4611 }, { "epoch": 0.12663371773750687, "grad_norm": 0.35111042857170105, "learning_rate": 1.9810811539087113e-05, "loss": 0.5458, "step": 4612 }, { "epoch": 0.12666117517847336, "grad_norm": 0.4094531834125519, "learning_rate": 1.981072791670109e-05, "loss": 0.544, "step": 4613 }, { "epoch": 0.12668863261943986, "grad_norm": 0.4188336730003357, "learning_rate": 1.981064427601493e-05, "loss": 0.5966, "step": 4614 }, { "epoch": 0.12671609006040638, "grad_norm": 0.3792400360107422, "learning_rate": 1.981056061702878e-05, "loss": 0.5373, "step": 4615 }, { "epoch": 0.12674354750137287, "grad_norm": 0.41572368144989014, "learning_rate": 1.981047693974281e-05, "loss": 0.5726, "step": 4616 }, { "epoch": 0.12677100494233937, "grad_norm": 0.3353620767593384, "learning_rate": 1.981039324415717e-05, "loss": 0.5189, "step": 4617 }, { "epoch": 0.1267984623833059, "grad_norm": 0.3309624493122101, "learning_rate": 1.981030953027201e-05, "loss": 0.4638, "step": 4618 }, { "epoch": 0.12682591982427238, "grad_norm": 0.3376156687736511, "learning_rate": 1.9810225798087498e-05, "loss": 0.5837, "step": 4619 }, { "epoch": 0.12685337726523888, "grad_norm": 0.3366253674030304, "learning_rate": 1.9810142047603777e-05, "loss": 0.4843, "step": 4620 }, { "epoch": 0.12688083470620537, "grad_norm": 0.36876431107521057, "learning_rate": 1.9810058278821017e-05, "loss": 0.5543, "step": 4621 }, { "epoch": 0.1269082921471719, "grad_norm": 0.4178040027618408, "learning_rate": 1.9809974491739364e-05, "loss": 0.5277, "step": 4622 }, { "epoch": 0.12693574958813839, "grad_norm": 0.39849385619163513, "learning_rate": 1.9809890686358977e-05, "loss": 0.5406, "step": 4623 }, { "epoch": 0.12696320702910488, "grad_norm": 0.44090771675109863, "learning_rate": 1.9809806862680012e-05, "loss": 0.633, "step": 4624 }, { "epoch": 0.1269906644700714, "grad_norm": 0.35264307260513306, "learning_rate": 1.9809723020702628e-05, "loss": 0.5185, "step": 4625 }, { "epoch": 0.1270181219110379, "grad_norm": 0.4347602128982544, "learning_rate": 1.9809639160426977e-05, "loss": 0.4859, "step": 4626 }, { "epoch": 0.1270455793520044, "grad_norm": 0.33474084734916687, "learning_rate": 1.9809555281853223e-05, "loss": 0.4794, "step": 4627 }, { "epoch": 0.12707303679297088, "grad_norm": 0.37490859627723694, "learning_rate": 1.9809471384981514e-05, "loss": 0.5467, "step": 4628 }, { "epoch": 0.1271004942339374, "grad_norm": 0.35054439306259155, "learning_rate": 1.9809387469812013e-05, "loss": 0.4803, "step": 4629 }, { "epoch": 0.1271279516749039, "grad_norm": 0.39731365442276, "learning_rate": 1.980930353634487e-05, "loss": 0.4599, "step": 4630 }, { "epoch": 0.1271554091158704, "grad_norm": 0.3572462201118469, "learning_rate": 1.9809219584580245e-05, "loss": 0.5236, "step": 4631 }, { "epoch": 0.12718286655683692, "grad_norm": 0.3291398286819458, "learning_rate": 1.98091356145183e-05, "loss": 0.4892, "step": 4632 }, { "epoch": 0.1272103239978034, "grad_norm": 0.3566029667854309, "learning_rate": 1.980905162615918e-05, "loss": 0.5793, "step": 4633 }, { "epoch": 0.1272377814387699, "grad_norm": 0.3716113567352295, "learning_rate": 1.980896761950305e-05, "loss": 0.6111, "step": 4634 }, { "epoch": 0.1272652388797364, "grad_norm": 0.3750631809234619, "learning_rate": 1.980888359455007e-05, "loss": 0.4202, "step": 4635 }, { "epoch": 0.12729269632070292, "grad_norm": 0.4013785123825073, "learning_rate": 1.9808799551300386e-05, "loss": 0.5263, "step": 4636 }, { "epoch": 0.1273201537616694, "grad_norm": 0.3738197684288025, "learning_rate": 1.980871548975416e-05, "loss": 0.5691, "step": 4637 }, { "epoch": 0.1273476112026359, "grad_norm": 0.3599986732006073, "learning_rate": 1.9808631409911553e-05, "loss": 0.5102, "step": 4638 }, { "epoch": 0.12737506864360243, "grad_norm": 0.3562566936016083, "learning_rate": 1.9808547311772713e-05, "loss": 0.5076, "step": 4639 }, { "epoch": 0.12740252608456892, "grad_norm": 0.4316199719905853, "learning_rate": 1.9808463195337806e-05, "loss": 0.5869, "step": 4640 }, { "epoch": 0.12742998352553542, "grad_norm": 0.3709467053413391, "learning_rate": 1.980837906060698e-05, "loss": 0.5865, "step": 4641 }, { "epoch": 0.1274574409665019, "grad_norm": 0.5408498048782349, "learning_rate": 1.9808294907580402e-05, "loss": 0.6069, "step": 4642 }, { "epoch": 0.12748489840746843, "grad_norm": 0.3829575479030609, "learning_rate": 1.9808210736258217e-05, "loss": 0.5187, "step": 4643 }, { "epoch": 0.12751235584843493, "grad_norm": 0.33770987391471863, "learning_rate": 1.9808126546640592e-05, "loss": 0.5726, "step": 4644 }, { "epoch": 0.12753981328940142, "grad_norm": 0.35748112201690674, "learning_rate": 1.9808042338727685e-05, "loss": 0.5236, "step": 4645 }, { "epoch": 0.12756727073036794, "grad_norm": 0.36625924706459045, "learning_rate": 1.9807958112519642e-05, "loss": 0.6007, "step": 4646 }, { "epoch": 0.12759472817133444, "grad_norm": 0.3561141788959503, "learning_rate": 1.980787386801663e-05, "loss": 0.5816, "step": 4647 }, { "epoch": 0.12762218561230093, "grad_norm": 0.3550746440887451, "learning_rate": 1.9807789605218804e-05, "loss": 0.5106, "step": 4648 }, { "epoch": 0.12764964305326743, "grad_norm": 0.30563101172447205, "learning_rate": 1.9807705324126316e-05, "loss": 0.4876, "step": 4649 }, { "epoch": 0.12767710049423395, "grad_norm": 0.3240388035774231, "learning_rate": 1.980762102473933e-05, "loss": 0.4035, "step": 4650 }, { "epoch": 0.12770455793520044, "grad_norm": 0.3525928854942322, "learning_rate": 1.9807536707058e-05, "loss": 0.5662, "step": 4651 }, { "epoch": 0.12773201537616694, "grad_norm": 0.3636147975921631, "learning_rate": 1.9807452371082486e-05, "loss": 0.5255, "step": 4652 }, { "epoch": 0.12775947281713343, "grad_norm": 0.3470696210861206, "learning_rate": 1.980736801681294e-05, "loss": 0.5777, "step": 4653 }, { "epoch": 0.12778693025809995, "grad_norm": 0.4342823624610901, "learning_rate": 1.9807283644249525e-05, "loss": 0.5906, "step": 4654 }, { "epoch": 0.12781438769906645, "grad_norm": 0.3733419179916382, "learning_rate": 1.9807199253392393e-05, "loss": 0.4728, "step": 4655 }, { "epoch": 0.12784184514003294, "grad_norm": 0.4401317238807678, "learning_rate": 1.9807114844241706e-05, "loss": 0.6095, "step": 4656 }, { "epoch": 0.12786930258099946, "grad_norm": 0.3589775264263153, "learning_rate": 1.9807030416797623e-05, "loss": 0.5125, "step": 4657 }, { "epoch": 0.12789676002196596, "grad_norm": 0.4197657108306885, "learning_rate": 1.980694597106029e-05, "loss": 0.551, "step": 4658 }, { "epoch": 0.12792421746293245, "grad_norm": 0.38242799043655396, "learning_rate": 1.980686150702988e-05, "loss": 0.5215, "step": 4659 }, { "epoch": 0.12795167490389894, "grad_norm": 0.39155423641204834, "learning_rate": 1.980677702470654e-05, "loss": 0.6202, "step": 4660 }, { "epoch": 0.12797913234486546, "grad_norm": 0.3757980167865753, "learning_rate": 1.9806692524090434e-05, "loss": 0.6214, "step": 4661 }, { "epoch": 0.12800658978583196, "grad_norm": 0.3857920467853546, "learning_rate": 1.9806608005181713e-05, "loss": 0.5259, "step": 4662 }, { "epoch": 0.12803404722679845, "grad_norm": 0.3504411578178406, "learning_rate": 1.980652346798054e-05, "loss": 0.5666, "step": 4663 }, { "epoch": 0.12806150466776497, "grad_norm": 0.34494057297706604, "learning_rate": 1.980643891248707e-05, "loss": 0.549, "step": 4664 }, { "epoch": 0.12808896210873147, "grad_norm": 0.3728983998298645, "learning_rate": 1.980635433870146e-05, "loss": 0.4463, "step": 4665 }, { "epoch": 0.12811641954969796, "grad_norm": 0.34523314237594604, "learning_rate": 1.9806269746623875e-05, "loss": 0.4277, "step": 4666 }, { "epoch": 0.12814387699066446, "grad_norm": 0.3845461905002594, "learning_rate": 1.980618513625446e-05, "loss": 0.5737, "step": 4667 }, { "epoch": 0.12817133443163098, "grad_norm": 0.3400353789329529, "learning_rate": 1.9806100507593387e-05, "loss": 0.4713, "step": 4668 }, { "epoch": 0.12819879187259747, "grad_norm": 0.3931112587451935, "learning_rate": 1.98060158606408e-05, "loss": 0.5635, "step": 4669 }, { "epoch": 0.12822624931356397, "grad_norm": 0.424045205116272, "learning_rate": 1.9805931195396868e-05, "loss": 0.5004, "step": 4670 }, { "epoch": 0.1282537067545305, "grad_norm": 0.37648284435272217, "learning_rate": 1.9805846511861744e-05, "loss": 0.5825, "step": 4671 }, { "epoch": 0.12828116419549698, "grad_norm": 0.35095030069351196, "learning_rate": 1.9805761810035588e-05, "loss": 0.4578, "step": 4672 }, { "epoch": 0.12830862163646348, "grad_norm": 0.32021138072013855, "learning_rate": 1.9805677089918554e-05, "loss": 0.4599, "step": 4673 }, { "epoch": 0.12833607907742997, "grad_norm": 0.3179025650024414, "learning_rate": 1.98055923515108e-05, "loss": 0.4724, "step": 4674 }, { "epoch": 0.1283635365183965, "grad_norm": 0.36373889446258545, "learning_rate": 1.9805507594812493e-05, "loss": 0.5513, "step": 4675 }, { "epoch": 0.128390993959363, "grad_norm": 0.34057289361953735, "learning_rate": 1.980542281982378e-05, "loss": 0.545, "step": 4676 }, { "epoch": 0.12841845140032948, "grad_norm": 3.3009727001190186, "learning_rate": 1.9805338026544826e-05, "loss": 0.5835, "step": 4677 }, { "epoch": 0.128445908841296, "grad_norm": 0.33715271949768066, "learning_rate": 1.9805253214975786e-05, "loss": 0.6193, "step": 4678 }, { "epoch": 0.1284733662822625, "grad_norm": 0.3553815186023712, "learning_rate": 1.980516838511682e-05, "loss": 0.5059, "step": 4679 }, { "epoch": 0.128500823723229, "grad_norm": 0.3491773009300232, "learning_rate": 1.9805083536968088e-05, "loss": 0.5758, "step": 4680 }, { "epoch": 0.12852828116419548, "grad_norm": 0.3717077970504761, "learning_rate": 1.9804998670529742e-05, "loss": 0.5182, "step": 4681 }, { "epoch": 0.128555738605162, "grad_norm": 0.3661896586418152, "learning_rate": 1.9804913785801948e-05, "loss": 0.5048, "step": 4682 }, { "epoch": 0.1285831960461285, "grad_norm": 0.4295479953289032, "learning_rate": 1.9804828882784853e-05, "loss": 0.5543, "step": 4683 }, { "epoch": 0.128610653487095, "grad_norm": 0.32970130443573, "learning_rate": 1.980474396147863e-05, "loss": 0.5082, "step": 4684 }, { "epoch": 0.12863811092806152, "grad_norm": 0.3419676125049591, "learning_rate": 1.9804659021883426e-05, "loss": 0.5625, "step": 4685 }, { "epoch": 0.128665568369028, "grad_norm": 0.3935696482658386, "learning_rate": 1.980457406399941e-05, "loss": 0.6187, "step": 4686 }, { "epoch": 0.1286930258099945, "grad_norm": 0.3392459452152252, "learning_rate": 1.980448908782673e-05, "loss": 0.4879, "step": 4687 }, { "epoch": 0.128720483250961, "grad_norm": 0.39766690135002136, "learning_rate": 1.9804404093365546e-05, "loss": 0.6513, "step": 4688 }, { "epoch": 0.12874794069192752, "grad_norm": 0.40386730432510376, "learning_rate": 1.980431908061602e-05, "loss": 0.5999, "step": 4689 }, { "epoch": 0.12877539813289401, "grad_norm": 0.3893100619316101, "learning_rate": 1.980423404957831e-05, "loss": 0.572, "step": 4690 }, { "epoch": 0.1288028555738605, "grad_norm": 0.3883233666419983, "learning_rate": 1.9804149000252578e-05, "loss": 0.4662, "step": 4691 }, { "epoch": 0.12883031301482703, "grad_norm": 0.3404282331466675, "learning_rate": 1.9804063932638974e-05, "loss": 0.5113, "step": 4692 }, { "epoch": 0.12885777045579352, "grad_norm": 0.34899866580963135, "learning_rate": 1.9803978846737667e-05, "loss": 0.5158, "step": 4693 }, { "epoch": 0.12888522789676002, "grad_norm": 0.3995777666568756, "learning_rate": 1.9803893742548807e-05, "loss": 0.5395, "step": 4694 }, { "epoch": 0.1289126853377265, "grad_norm": 0.3717349171638489, "learning_rate": 1.9803808620072556e-05, "loss": 0.6263, "step": 4695 }, { "epoch": 0.12894014277869303, "grad_norm": 0.3653763234615326, "learning_rate": 1.9803723479309075e-05, "loss": 0.5636, "step": 4696 }, { "epoch": 0.12896760021965953, "grad_norm": 0.3593781292438507, "learning_rate": 1.9803638320258518e-05, "loss": 0.4991, "step": 4697 }, { "epoch": 0.12899505766062602, "grad_norm": 0.33587780594825745, "learning_rate": 1.9803553142921048e-05, "loss": 0.5517, "step": 4698 }, { "epoch": 0.12902251510159254, "grad_norm": 0.39920517802238464, "learning_rate": 1.980346794729682e-05, "loss": 0.614, "step": 4699 }, { "epoch": 0.12904997254255904, "grad_norm": 0.33639827370643616, "learning_rate": 1.9803382733386e-05, "loss": 0.575, "step": 4700 }, { "epoch": 0.12907742998352553, "grad_norm": 0.37912821769714355, "learning_rate": 1.980329750118874e-05, "loss": 0.6222, "step": 4701 }, { "epoch": 0.12910488742449203, "grad_norm": 0.33052918314933777, "learning_rate": 1.98032122507052e-05, "loss": 0.4965, "step": 4702 }, { "epoch": 0.12913234486545855, "grad_norm": 0.3617566227912903, "learning_rate": 1.9803126981935543e-05, "loss": 0.603, "step": 4703 }, { "epoch": 0.12915980230642504, "grad_norm": 0.3575611114501953, "learning_rate": 1.9803041694879926e-05, "loss": 0.5267, "step": 4704 }, { "epoch": 0.12918725974739154, "grad_norm": 0.4053252935409546, "learning_rate": 1.9802956389538502e-05, "loss": 0.4742, "step": 4705 }, { "epoch": 0.12921471718835806, "grad_norm": 0.3746340572834015, "learning_rate": 1.9802871065911442e-05, "loss": 0.5456, "step": 4706 }, { "epoch": 0.12924217462932455, "grad_norm": 0.3243280351161957, "learning_rate": 1.9802785723998893e-05, "loss": 0.5274, "step": 4707 }, { "epoch": 0.12926963207029105, "grad_norm": 0.3887551426887512, "learning_rate": 1.9802700363801023e-05, "loss": 0.5664, "step": 4708 }, { "epoch": 0.12929708951125754, "grad_norm": 0.34058913588523865, "learning_rate": 1.980261498531799e-05, "loss": 0.5421, "step": 4709 }, { "epoch": 0.12932454695222406, "grad_norm": 0.38874146342277527, "learning_rate": 1.980252958854995e-05, "loss": 0.5922, "step": 4710 }, { "epoch": 0.12935200439319056, "grad_norm": 0.3872394263744354, "learning_rate": 1.9802444173497063e-05, "loss": 0.4967, "step": 4711 }, { "epoch": 0.12937946183415705, "grad_norm": 0.36738237738609314, "learning_rate": 1.9802358740159487e-05, "loss": 0.4789, "step": 4712 }, { "epoch": 0.12940691927512357, "grad_norm": 0.46389031410217285, "learning_rate": 1.9802273288537387e-05, "loss": 0.6317, "step": 4713 }, { "epoch": 0.12943437671609007, "grad_norm": 0.39626696705818176, "learning_rate": 1.9802187818630915e-05, "loss": 0.5171, "step": 4714 }, { "epoch": 0.12946183415705656, "grad_norm": 0.5049596428871155, "learning_rate": 1.9802102330440238e-05, "loss": 0.4896, "step": 4715 }, { "epoch": 0.12948929159802305, "grad_norm": 0.36169591546058655, "learning_rate": 1.980201682396551e-05, "loss": 0.5107, "step": 4716 }, { "epoch": 0.12951674903898958, "grad_norm": 0.35634663701057434, "learning_rate": 1.9801931299206893e-05, "loss": 0.5953, "step": 4717 }, { "epoch": 0.12954420647995607, "grad_norm": 0.3695346713066101, "learning_rate": 1.9801845756164547e-05, "loss": 0.5403, "step": 4718 }, { "epoch": 0.12957166392092256, "grad_norm": 0.35969969630241394, "learning_rate": 1.9801760194838626e-05, "loss": 0.5527, "step": 4719 }, { "epoch": 0.12959912136188906, "grad_norm": 0.31247997283935547, "learning_rate": 1.9801674615229296e-05, "loss": 0.5927, "step": 4720 }, { "epoch": 0.12962657880285558, "grad_norm": 0.3519132137298584, "learning_rate": 1.9801589017336715e-05, "loss": 0.4857, "step": 4721 }, { "epoch": 0.12965403624382207, "grad_norm": 0.32654133439064026, "learning_rate": 1.9801503401161043e-05, "loss": 0.5428, "step": 4722 }, { "epoch": 0.12968149368478857, "grad_norm": 0.32837679982185364, "learning_rate": 1.9801417766702436e-05, "loss": 0.5056, "step": 4723 }, { "epoch": 0.1297089511257551, "grad_norm": 0.391488641500473, "learning_rate": 1.980133211396106e-05, "loss": 0.5802, "step": 4724 }, { "epoch": 0.12973640856672158, "grad_norm": 0.3670549988746643, "learning_rate": 1.980124644293707e-05, "loss": 0.5567, "step": 4725 }, { "epoch": 0.12976386600768808, "grad_norm": 0.3985184133052826, "learning_rate": 1.9801160753630628e-05, "loss": 0.5998, "step": 4726 }, { "epoch": 0.12979132344865457, "grad_norm": 0.37249550223350525, "learning_rate": 1.980107504604189e-05, "loss": 0.5387, "step": 4727 }, { "epoch": 0.1298187808896211, "grad_norm": 0.36497747898101807, "learning_rate": 1.9800989320171023e-05, "loss": 0.4711, "step": 4728 }, { "epoch": 0.1298462383305876, "grad_norm": 0.35467416048049927, "learning_rate": 1.980090357601818e-05, "loss": 0.5316, "step": 4729 }, { "epoch": 0.12987369577155408, "grad_norm": 0.36455363035202026, "learning_rate": 1.9800817813583526e-05, "loss": 0.5333, "step": 4730 }, { "epoch": 0.1299011532125206, "grad_norm": 0.3497975468635559, "learning_rate": 1.9800732032867215e-05, "loss": 0.654, "step": 4731 }, { "epoch": 0.1299286106534871, "grad_norm": 0.35124337673187256, "learning_rate": 1.9800646233869413e-05, "loss": 0.5405, "step": 4732 }, { "epoch": 0.1299560680944536, "grad_norm": 0.40341323614120483, "learning_rate": 1.9800560416590277e-05, "loss": 0.64, "step": 4733 }, { "epoch": 0.12998352553542009, "grad_norm": 0.3068658411502838, "learning_rate": 1.980047458102997e-05, "loss": 0.5579, "step": 4734 }, { "epoch": 0.1300109829763866, "grad_norm": 0.4347068965435028, "learning_rate": 1.980038872718865e-05, "loss": 0.6138, "step": 4735 }, { "epoch": 0.1300384404173531, "grad_norm": 0.3783550262451172, "learning_rate": 1.9800302855066475e-05, "loss": 0.549, "step": 4736 }, { "epoch": 0.1300658978583196, "grad_norm": 0.3929012715816498, "learning_rate": 1.9800216964663605e-05, "loss": 0.598, "step": 4737 }, { "epoch": 0.13009335529928612, "grad_norm": 0.3782687783241272, "learning_rate": 1.9800131055980206e-05, "loss": 0.6464, "step": 4738 }, { "epoch": 0.1301208127402526, "grad_norm": 0.3863724172115326, "learning_rate": 1.9800045129016437e-05, "loss": 0.6175, "step": 4739 }, { "epoch": 0.1301482701812191, "grad_norm": 0.3933177590370178, "learning_rate": 1.9799959183772453e-05, "loss": 0.546, "step": 4740 }, { "epoch": 0.1301757276221856, "grad_norm": 0.34894153475761414, "learning_rate": 1.9799873220248417e-05, "loss": 0.5249, "step": 4741 }, { "epoch": 0.13020318506315212, "grad_norm": 0.41971153020858765, "learning_rate": 1.979978723844449e-05, "loss": 0.5266, "step": 4742 }, { "epoch": 0.13023064250411862, "grad_norm": 0.3314156234264374, "learning_rate": 1.979970123836083e-05, "loss": 0.5185, "step": 4743 }, { "epoch": 0.1302580999450851, "grad_norm": 0.46580028533935547, "learning_rate": 1.9799615219997604e-05, "loss": 0.5759, "step": 4744 }, { "epoch": 0.13028555738605163, "grad_norm": 0.3579244017601013, "learning_rate": 1.9799529183354967e-05, "loss": 0.5203, "step": 4745 }, { "epoch": 0.13031301482701813, "grad_norm": 0.32934141159057617, "learning_rate": 1.9799443128433078e-05, "loss": 0.4872, "step": 4746 }, { "epoch": 0.13034047226798462, "grad_norm": 0.38458195328712463, "learning_rate": 1.97993570552321e-05, "loss": 0.5031, "step": 4747 }, { "epoch": 0.1303679297089511, "grad_norm": 0.3871532380580902, "learning_rate": 1.9799270963752197e-05, "loss": 0.5257, "step": 4748 }, { "epoch": 0.13039538714991764, "grad_norm": 0.3566650450229645, "learning_rate": 1.979918485399352e-05, "loss": 0.5375, "step": 4749 }, { "epoch": 0.13042284459088413, "grad_norm": 0.3387940526008606, "learning_rate": 1.979909872595624e-05, "loss": 0.4727, "step": 4750 }, { "epoch": 0.13045030203185062, "grad_norm": 0.3862028419971466, "learning_rate": 1.9799012579640517e-05, "loss": 0.5187, "step": 4751 }, { "epoch": 0.13047775947281715, "grad_norm": 0.39067748188972473, "learning_rate": 1.97989264150465e-05, "loss": 0.5272, "step": 4752 }, { "epoch": 0.13050521691378364, "grad_norm": 0.419339656829834, "learning_rate": 1.9798840232174364e-05, "loss": 0.6215, "step": 4753 }, { "epoch": 0.13053267435475013, "grad_norm": 0.3750307261943817, "learning_rate": 1.979875403102426e-05, "loss": 0.5451, "step": 4754 }, { "epoch": 0.13056013179571663, "grad_norm": 0.4008561670780182, "learning_rate": 1.9798667811596352e-05, "loss": 0.5006, "step": 4755 }, { "epoch": 0.13058758923668315, "grad_norm": 0.4110734164714813, "learning_rate": 1.9798581573890804e-05, "loss": 0.615, "step": 4756 }, { "epoch": 0.13061504667764964, "grad_norm": 0.3746647536754608, "learning_rate": 1.9798495317907772e-05, "loss": 0.5267, "step": 4757 }, { "epoch": 0.13064250411861614, "grad_norm": 0.3939198851585388, "learning_rate": 1.9798409043647423e-05, "loss": 0.6504, "step": 4758 }, { "epoch": 0.13066996155958266, "grad_norm": 0.341647207736969, "learning_rate": 1.9798322751109905e-05, "loss": 0.5028, "step": 4759 }, { "epoch": 0.13069741900054915, "grad_norm": 0.3801475167274475, "learning_rate": 1.9798236440295398e-05, "loss": 0.5708, "step": 4760 }, { "epoch": 0.13072487644151565, "grad_norm": 0.3854199945926666, "learning_rate": 1.9798150111204045e-05, "loss": 0.5767, "step": 4761 }, { "epoch": 0.13075233388248214, "grad_norm": 0.37700024247169495, "learning_rate": 1.9798063763836017e-05, "loss": 0.5271, "step": 4762 }, { "epoch": 0.13077979132344866, "grad_norm": 0.7228770852088928, "learning_rate": 1.9797977398191476e-05, "loss": 0.5306, "step": 4763 }, { "epoch": 0.13080724876441516, "grad_norm": 0.3755204975605011, "learning_rate": 1.979789101427058e-05, "loss": 0.5005, "step": 4764 }, { "epoch": 0.13083470620538165, "grad_norm": 0.42763081192970276, "learning_rate": 1.9797804612073485e-05, "loss": 0.4962, "step": 4765 }, { "epoch": 0.13086216364634817, "grad_norm": 0.32806918025016785, "learning_rate": 1.9797718191600358e-05, "loss": 0.5002, "step": 4766 }, { "epoch": 0.13088962108731467, "grad_norm": 0.38588404655456543, "learning_rate": 1.9797631752851364e-05, "loss": 0.6148, "step": 4767 }, { "epoch": 0.13091707852828116, "grad_norm": 0.9077989459037781, "learning_rate": 1.9797545295826658e-05, "loss": 0.5547, "step": 4768 }, { "epoch": 0.13094453596924766, "grad_norm": 0.35402193665504456, "learning_rate": 1.9797458820526403e-05, "loss": 0.5982, "step": 4769 }, { "epoch": 0.13097199341021418, "grad_norm": 0.36603885889053345, "learning_rate": 1.9797372326950758e-05, "loss": 0.5626, "step": 4770 }, { "epoch": 0.13099945085118067, "grad_norm": 0.38114723563194275, "learning_rate": 1.979728581509989e-05, "loss": 0.5507, "step": 4771 }, { "epoch": 0.13102690829214717, "grad_norm": 0.35898557305336, "learning_rate": 1.9797199284973954e-05, "loss": 0.5438, "step": 4772 }, { "epoch": 0.1310543657331137, "grad_norm": 0.3715273141860962, "learning_rate": 1.9797112736573115e-05, "loss": 0.4819, "step": 4773 }, { "epoch": 0.13108182317408018, "grad_norm": 0.40087807178497314, "learning_rate": 1.9797026169897538e-05, "loss": 0.5127, "step": 4774 }, { "epoch": 0.13110928061504667, "grad_norm": 0.3630547523498535, "learning_rate": 1.9796939584947377e-05, "loss": 0.4973, "step": 4775 }, { "epoch": 0.13113673805601317, "grad_norm": 0.3911893367767334, "learning_rate": 1.9796852981722792e-05, "loss": 0.6597, "step": 4776 }, { "epoch": 0.1311641954969797, "grad_norm": 0.37113964557647705, "learning_rate": 1.979676636022396e-05, "loss": 0.6191, "step": 4777 }, { "epoch": 0.13119165293794618, "grad_norm": 0.3561573922634125, "learning_rate": 1.9796679720451025e-05, "loss": 0.4922, "step": 4778 }, { "epoch": 0.13121911037891268, "grad_norm": 0.3172551095485687, "learning_rate": 1.9796593062404157e-05, "loss": 0.4843, "step": 4779 }, { "epoch": 0.1312465678198792, "grad_norm": 0.3621581494808197, "learning_rate": 1.9796506386083515e-05, "loss": 0.5634, "step": 4780 }, { "epoch": 0.1312740252608457, "grad_norm": 0.3462367057800293, "learning_rate": 1.9796419691489263e-05, "loss": 0.4915, "step": 4781 }, { "epoch": 0.1313014827018122, "grad_norm": 0.40557098388671875, "learning_rate": 1.9796332978621562e-05, "loss": 0.5814, "step": 4782 }, { "epoch": 0.13132894014277868, "grad_norm": 0.3666320741176605, "learning_rate": 1.9796246247480572e-05, "loss": 0.5328, "step": 4783 }, { "epoch": 0.1313563975837452, "grad_norm": 0.3556571900844574, "learning_rate": 1.979615949806646e-05, "loss": 0.5149, "step": 4784 }, { "epoch": 0.1313838550247117, "grad_norm": 0.3832613229751587, "learning_rate": 1.979607273037938e-05, "loss": 0.5896, "step": 4785 }, { "epoch": 0.1314113124656782, "grad_norm": 0.3721497058868408, "learning_rate": 1.9795985944419497e-05, "loss": 0.548, "step": 4786 }, { "epoch": 0.1314387699066447, "grad_norm": 0.3426712155342102, "learning_rate": 1.9795899140186977e-05, "loss": 0.5354, "step": 4787 }, { "epoch": 0.1314662273476112, "grad_norm": 0.3597654402256012, "learning_rate": 1.9795812317681978e-05, "loss": 0.6075, "step": 4788 }, { "epoch": 0.1314936847885777, "grad_norm": 0.3309241831302643, "learning_rate": 1.979572547690466e-05, "loss": 0.5315, "step": 4789 }, { "epoch": 0.1315211422295442, "grad_norm": 0.3277648687362671, "learning_rate": 1.979563861785519e-05, "loss": 0.5821, "step": 4790 }, { "epoch": 0.13154859967051072, "grad_norm": 0.37115445733070374, "learning_rate": 1.9795551740533726e-05, "loss": 0.5082, "step": 4791 }, { "epoch": 0.1315760571114772, "grad_norm": 0.43164709210395813, "learning_rate": 1.9795464844940432e-05, "loss": 0.5644, "step": 4792 }, { "epoch": 0.1316035145524437, "grad_norm": 0.45700305700302124, "learning_rate": 1.9795377931075472e-05, "loss": 0.5464, "step": 4793 }, { "epoch": 0.1316309719934102, "grad_norm": 0.4147648215293884, "learning_rate": 1.9795290998939002e-05, "loss": 0.6163, "step": 4794 }, { "epoch": 0.13165842943437672, "grad_norm": 0.36461520195007324, "learning_rate": 1.979520404853119e-05, "loss": 0.584, "step": 4795 }, { "epoch": 0.13168588687534322, "grad_norm": 0.3699861466884613, "learning_rate": 1.97951170798522e-05, "loss": 0.5875, "step": 4796 }, { "epoch": 0.1317133443163097, "grad_norm": 0.4340061843395233, "learning_rate": 1.9795030092902185e-05, "loss": 0.5304, "step": 4797 }, { "epoch": 0.13174080175727623, "grad_norm": 0.36613813042640686, "learning_rate": 1.9794943087681317e-05, "loss": 0.5363, "step": 4798 }, { "epoch": 0.13176825919824273, "grad_norm": 0.9595581293106079, "learning_rate": 1.9794856064189748e-05, "loss": 0.4887, "step": 4799 }, { "epoch": 0.13179571663920922, "grad_norm": 0.4021627604961395, "learning_rate": 1.979476902242765e-05, "loss": 0.6504, "step": 4800 }, { "epoch": 0.13182317408017571, "grad_norm": 0.4145890772342682, "learning_rate": 1.9794681962395182e-05, "loss": 0.5787, "step": 4801 }, { "epoch": 0.13185063152114224, "grad_norm": 0.3672868311405182, "learning_rate": 1.9794594884092504e-05, "loss": 0.5753, "step": 4802 }, { "epoch": 0.13187808896210873, "grad_norm": 0.40493687987327576, "learning_rate": 1.9794507787519783e-05, "loss": 0.573, "step": 4803 }, { "epoch": 0.13190554640307522, "grad_norm": 0.3968786299228668, "learning_rate": 1.9794420672677178e-05, "loss": 0.4645, "step": 4804 }, { "epoch": 0.13193300384404175, "grad_norm": 0.3455352485179901, "learning_rate": 1.979433353956485e-05, "loss": 0.4933, "step": 4805 }, { "epoch": 0.13196046128500824, "grad_norm": 0.383456826210022, "learning_rate": 1.979424638818297e-05, "loss": 0.5916, "step": 4806 }, { "epoch": 0.13198791872597473, "grad_norm": 0.376600444316864, "learning_rate": 1.979415921853169e-05, "loss": 0.5606, "step": 4807 }, { "epoch": 0.13201537616694123, "grad_norm": 0.36070138216018677, "learning_rate": 1.9794072030611175e-05, "loss": 0.5407, "step": 4808 }, { "epoch": 0.13204283360790775, "grad_norm": 0.3542858958244324, "learning_rate": 1.9793984824421596e-05, "loss": 0.5402, "step": 4809 }, { "epoch": 0.13207029104887424, "grad_norm": 0.3590506315231323, "learning_rate": 1.97938975999631e-05, "loss": 0.57, "step": 4810 }, { "epoch": 0.13209774848984074, "grad_norm": 0.46108755469322205, "learning_rate": 1.979381035723587e-05, "loss": 0.5185, "step": 4811 }, { "epoch": 0.13212520593080726, "grad_norm": 0.3627254068851471, "learning_rate": 1.9793723096240052e-05, "loss": 0.5906, "step": 4812 }, { "epoch": 0.13215266337177375, "grad_norm": 0.4196452796459198, "learning_rate": 1.9793635816975816e-05, "loss": 0.5607, "step": 4813 }, { "epoch": 0.13218012081274025, "grad_norm": 0.36930710077285767, "learning_rate": 1.9793548519443325e-05, "loss": 0.5885, "step": 4814 }, { "epoch": 0.13220757825370674, "grad_norm": 0.4373982548713684, "learning_rate": 1.9793461203642736e-05, "loss": 0.5057, "step": 4815 }, { "epoch": 0.13223503569467326, "grad_norm": 0.36196550726890564, "learning_rate": 1.979337386957422e-05, "loss": 0.6055, "step": 4816 }, { "epoch": 0.13226249313563976, "grad_norm": 0.4767090678215027, "learning_rate": 1.9793286517237932e-05, "loss": 0.541, "step": 4817 }, { "epoch": 0.13228995057660625, "grad_norm": 0.3373899459838867, "learning_rate": 1.979319914663404e-05, "loss": 0.5741, "step": 4818 }, { "epoch": 0.13231740801757277, "grad_norm": 0.3310614824295044, "learning_rate": 1.979311175776271e-05, "loss": 0.479, "step": 4819 }, { "epoch": 0.13234486545853927, "grad_norm": 0.5483216047286987, "learning_rate": 1.97930243506241e-05, "loss": 0.501, "step": 4820 }, { "epoch": 0.13237232289950576, "grad_norm": 0.37252137064933777, "learning_rate": 1.979293692521837e-05, "loss": 0.5385, "step": 4821 }, { "epoch": 0.13239978034047226, "grad_norm": 0.3370871841907501, "learning_rate": 1.979284948154569e-05, "loss": 0.4746, "step": 4822 }, { "epoch": 0.13242723778143878, "grad_norm": 0.3453536331653595, "learning_rate": 1.9792762019606218e-05, "loss": 0.5538, "step": 4823 }, { "epoch": 0.13245469522240527, "grad_norm": 0.32206863164901733, "learning_rate": 1.9792674539400122e-05, "loss": 0.5133, "step": 4824 }, { "epoch": 0.13248215266337177, "grad_norm": 0.3509998321533203, "learning_rate": 1.9792587040927563e-05, "loss": 0.5141, "step": 4825 }, { "epoch": 0.1325096101043383, "grad_norm": 0.40161970257759094, "learning_rate": 1.9792499524188702e-05, "loss": 0.5597, "step": 4826 }, { "epoch": 0.13253706754530478, "grad_norm": 0.38260579109191895, "learning_rate": 1.9792411989183704e-05, "loss": 0.625, "step": 4827 }, { "epoch": 0.13256452498627128, "grad_norm": 0.39529919624328613, "learning_rate": 1.9792324435912732e-05, "loss": 0.5666, "step": 4828 }, { "epoch": 0.13259198242723777, "grad_norm": 0.3542931377887726, "learning_rate": 1.9792236864375952e-05, "loss": 0.558, "step": 4829 }, { "epoch": 0.1326194398682043, "grad_norm": 0.3778814971446991, "learning_rate": 1.9792149274573523e-05, "loss": 0.5142, "step": 4830 }, { "epoch": 0.13264689730917079, "grad_norm": 0.37390363216400146, "learning_rate": 1.979206166650561e-05, "loss": 0.5734, "step": 4831 }, { "epoch": 0.13267435475013728, "grad_norm": 0.3479765057563782, "learning_rate": 1.9791974040172376e-05, "loss": 0.5621, "step": 4832 }, { "epoch": 0.1327018121911038, "grad_norm": 0.38255685567855835, "learning_rate": 1.9791886395573986e-05, "loss": 0.5852, "step": 4833 }, { "epoch": 0.1327292696320703, "grad_norm": 0.3987768292427063, "learning_rate": 1.9791798732710605e-05, "loss": 0.5748, "step": 4834 }, { "epoch": 0.1327567270730368, "grad_norm": 0.3850495219230652, "learning_rate": 1.979171105158239e-05, "loss": 0.5382, "step": 4835 }, { "epoch": 0.13278418451400328, "grad_norm": 0.41548630595207214, "learning_rate": 1.979162335218951e-05, "loss": 0.5466, "step": 4836 }, { "epoch": 0.1328116419549698, "grad_norm": 0.4912225604057312, "learning_rate": 1.9791535634532128e-05, "loss": 0.5505, "step": 4837 }, { "epoch": 0.1328390993959363, "grad_norm": 0.40368402004241943, "learning_rate": 1.979144789861041e-05, "loss": 0.5159, "step": 4838 }, { "epoch": 0.1328665568369028, "grad_norm": 0.3249405026435852, "learning_rate": 1.9791360144424514e-05, "loss": 0.374, "step": 4839 }, { "epoch": 0.13289401427786932, "grad_norm": 0.3873373866081238, "learning_rate": 1.9791272371974605e-05, "loss": 0.528, "step": 4840 }, { "epoch": 0.1329214717188358, "grad_norm": 0.36045604944229126, "learning_rate": 1.979118458126085e-05, "loss": 0.5657, "step": 4841 }, { "epoch": 0.1329489291598023, "grad_norm": 0.4165147840976715, "learning_rate": 1.9791096772283408e-05, "loss": 0.5539, "step": 4842 }, { "epoch": 0.1329763866007688, "grad_norm": 0.4338565170764923, "learning_rate": 1.9791008945042448e-05, "loss": 0.6005, "step": 4843 }, { "epoch": 0.13300384404173532, "grad_norm": 0.36060699820518494, "learning_rate": 1.979092109953813e-05, "loss": 0.5776, "step": 4844 }, { "epoch": 0.1330313014827018, "grad_norm": 0.34357690811157227, "learning_rate": 1.9790833235770622e-05, "loss": 0.5238, "step": 4845 }, { "epoch": 0.1330587589236683, "grad_norm": 0.3290780484676361, "learning_rate": 1.979074535374008e-05, "loss": 0.5569, "step": 4846 }, { "epoch": 0.13308621636463483, "grad_norm": 0.38335248827934265, "learning_rate": 1.9790657453446677e-05, "loss": 0.5552, "step": 4847 }, { "epoch": 0.13311367380560132, "grad_norm": 0.35170555114746094, "learning_rate": 1.9790569534890574e-05, "loss": 0.6534, "step": 4848 }, { "epoch": 0.13314113124656782, "grad_norm": 0.3956603705883026, "learning_rate": 1.979048159807193e-05, "loss": 0.63, "step": 4849 }, { "epoch": 0.1331685886875343, "grad_norm": 0.3562845289707184, "learning_rate": 1.9790393642990917e-05, "loss": 0.5712, "step": 4850 }, { "epoch": 0.13319604612850083, "grad_norm": 0.3504672050476074, "learning_rate": 1.9790305669647693e-05, "loss": 0.4978, "step": 4851 }, { "epoch": 0.13322350356946733, "grad_norm": 0.3288407027721405, "learning_rate": 1.9790217678042426e-05, "loss": 0.5141, "step": 4852 }, { "epoch": 0.13325096101043382, "grad_norm": 0.35571756958961487, "learning_rate": 1.9790129668175274e-05, "loss": 0.5459, "step": 4853 }, { "epoch": 0.13327841845140032, "grad_norm": 0.3911959230899811, "learning_rate": 1.979004164004641e-05, "loss": 0.537, "step": 4854 }, { "epoch": 0.13330587589236684, "grad_norm": 0.35158488154411316, "learning_rate": 1.9789953593655994e-05, "loss": 0.5739, "step": 4855 }, { "epoch": 0.13333333333333333, "grad_norm": 0.3217967748641968, "learning_rate": 1.9789865529004188e-05, "loss": 0.5649, "step": 4856 }, { "epoch": 0.13336079077429983, "grad_norm": 0.3467438519001007, "learning_rate": 1.9789777446091157e-05, "loss": 0.5588, "step": 4857 }, { "epoch": 0.13338824821526635, "grad_norm": 0.38607722520828247, "learning_rate": 1.978968934491707e-05, "loss": 0.5828, "step": 4858 }, { "epoch": 0.13341570565623284, "grad_norm": 0.37355121970176697, "learning_rate": 1.9789601225482085e-05, "loss": 0.5332, "step": 4859 }, { "epoch": 0.13344316309719934, "grad_norm": 0.40708932280540466, "learning_rate": 1.978951308778637e-05, "loss": 0.5976, "step": 4860 }, { "epoch": 0.13347062053816583, "grad_norm": 0.36296966671943665, "learning_rate": 1.978942493183009e-05, "loss": 0.6081, "step": 4861 }, { "epoch": 0.13349807797913235, "grad_norm": 0.37403616309165955, "learning_rate": 1.9789336757613406e-05, "loss": 0.5468, "step": 4862 }, { "epoch": 0.13352553542009885, "grad_norm": 0.45402172207832336, "learning_rate": 1.9789248565136488e-05, "loss": 0.5943, "step": 4863 }, { "epoch": 0.13355299286106534, "grad_norm": 0.3953668177127838, "learning_rate": 1.9789160354399494e-05, "loss": 0.5327, "step": 4864 }, { "epoch": 0.13358045030203186, "grad_norm": 0.3832216262817383, "learning_rate": 1.978907212540259e-05, "loss": 0.5273, "step": 4865 }, { "epoch": 0.13360790774299836, "grad_norm": 0.3654977083206177, "learning_rate": 1.9788983878145945e-05, "loss": 0.5229, "step": 4866 }, { "epoch": 0.13363536518396485, "grad_norm": 0.3996317684650421, "learning_rate": 1.978889561262972e-05, "loss": 0.6209, "step": 4867 }, { "epoch": 0.13366282262493134, "grad_norm": 0.3376339077949524, "learning_rate": 1.978880732885408e-05, "loss": 0.5205, "step": 4868 }, { "epoch": 0.13369028006589787, "grad_norm": 0.3975311815738678, "learning_rate": 1.978871902681919e-05, "loss": 0.6138, "step": 4869 }, { "epoch": 0.13371773750686436, "grad_norm": 0.35779356956481934, "learning_rate": 1.9788630706525215e-05, "loss": 0.5395, "step": 4870 }, { "epoch": 0.13374519494783085, "grad_norm": 0.31286463141441345, "learning_rate": 1.9788542367972322e-05, "loss": 0.5043, "step": 4871 }, { "epoch": 0.13377265238879738, "grad_norm": 0.4132097363471985, "learning_rate": 1.9788454011160668e-05, "loss": 0.567, "step": 4872 }, { "epoch": 0.13380010982976387, "grad_norm": 0.3450964689254761, "learning_rate": 1.978836563609043e-05, "loss": 0.5556, "step": 4873 }, { "epoch": 0.13382756727073036, "grad_norm": 0.3646983802318573, "learning_rate": 1.978827724276176e-05, "loss": 0.5836, "step": 4874 }, { "epoch": 0.13385502471169686, "grad_norm": 0.35934266448020935, "learning_rate": 1.9788188831174832e-05, "loss": 0.6229, "step": 4875 }, { "epoch": 0.13388248215266338, "grad_norm": 0.3318850100040436, "learning_rate": 1.978810040132981e-05, "loss": 0.4441, "step": 4876 }, { "epoch": 0.13390993959362987, "grad_norm": 0.34343305230140686, "learning_rate": 1.978801195322685e-05, "loss": 0.5449, "step": 4877 }, { "epoch": 0.13393739703459637, "grad_norm": 0.3646737337112427, "learning_rate": 1.978792348686613e-05, "loss": 0.5331, "step": 4878 }, { "epoch": 0.1339648544755629, "grad_norm": 0.36017486453056335, "learning_rate": 1.9787835002247805e-05, "loss": 0.4994, "step": 4879 }, { "epoch": 0.13399231191652938, "grad_norm": 0.3614514172077179, "learning_rate": 1.9787746499372048e-05, "loss": 0.5393, "step": 4880 }, { "epoch": 0.13401976935749588, "grad_norm": 0.36906108260154724, "learning_rate": 1.9787657978239014e-05, "loss": 0.4727, "step": 4881 }, { "epoch": 0.13404722679846237, "grad_norm": 0.3798219561576843, "learning_rate": 1.978756943884888e-05, "loss": 0.5765, "step": 4882 }, { "epoch": 0.1340746842394289, "grad_norm": 0.36505839228630066, "learning_rate": 1.97874808812018e-05, "loss": 0.5266, "step": 4883 }, { "epoch": 0.1341021416803954, "grad_norm": 0.352092981338501, "learning_rate": 1.9787392305297946e-05, "loss": 0.5291, "step": 4884 }, { "epoch": 0.13412959912136188, "grad_norm": 0.3426252007484436, "learning_rate": 1.9787303711137486e-05, "loss": 0.5751, "step": 4885 }, { "epoch": 0.1341570565623284, "grad_norm": 0.3982907235622406, "learning_rate": 1.9787215098720572e-05, "loss": 0.6256, "step": 4886 }, { "epoch": 0.1341845140032949, "grad_norm": 0.37736284732818604, "learning_rate": 1.9787126468047388e-05, "loss": 0.5722, "step": 4887 }, { "epoch": 0.1342119714442614, "grad_norm": 0.40711498260498047, "learning_rate": 1.9787037819118082e-05, "loss": 0.5686, "step": 4888 }, { "epoch": 0.13423942888522788, "grad_norm": 0.35620665550231934, "learning_rate": 1.978694915193283e-05, "loss": 0.407, "step": 4889 }, { "epoch": 0.1342668863261944, "grad_norm": 0.4262213706970215, "learning_rate": 1.9786860466491794e-05, "loss": 0.6192, "step": 4890 }, { "epoch": 0.1342943437671609, "grad_norm": 0.3600025177001953, "learning_rate": 1.9786771762795137e-05, "loss": 0.503, "step": 4891 }, { "epoch": 0.1343218012081274, "grad_norm": 0.4524364769458771, "learning_rate": 1.9786683040843033e-05, "loss": 0.6354, "step": 4892 }, { "epoch": 0.13434925864909392, "grad_norm": 0.3560301959514618, "learning_rate": 1.9786594300635637e-05, "loss": 0.5314, "step": 4893 }, { "epoch": 0.1343767160900604, "grad_norm": 0.33615347743034363, "learning_rate": 1.978650554217312e-05, "loss": 0.5685, "step": 4894 }, { "epoch": 0.1344041735310269, "grad_norm": 0.32191959023475647, "learning_rate": 1.9786416765455646e-05, "loss": 0.5033, "step": 4895 }, { "epoch": 0.1344316309719934, "grad_norm": 0.36819756031036377, "learning_rate": 1.9786327970483382e-05, "loss": 0.5749, "step": 4896 }, { "epoch": 0.13445908841295992, "grad_norm": 0.3505721092224121, "learning_rate": 1.9786239157256492e-05, "loss": 0.5097, "step": 4897 }, { "epoch": 0.13448654585392641, "grad_norm": 0.4051551818847656, "learning_rate": 1.9786150325775143e-05, "loss": 0.5602, "step": 4898 }, { "epoch": 0.1345140032948929, "grad_norm": 0.3908953368663788, "learning_rate": 1.9786061476039503e-05, "loss": 0.54, "step": 4899 }, { "epoch": 0.13454146073585943, "grad_norm": 0.411014199256897, "learning_rate": 1.978597260804973e-05, "loss": 0.5886, "step": 4900 }, { "epoch": 0.13456891817682592, "grad_norm": 0.46261322498321533, "learning_rate": 1.9785883721805997e-05, "loss": 0.5512, "step": 4901 }, { "epoch": 0.13459637561779242, "grad_norm": 0.4012559652328491, "learning_rate": 1.978579481730847e-05, "loss": 0.5353, "step": 4902 }, { "epoch": 0.1346238330587589, "grad_norm": 0.3576109707355499, "learning_rate": 1.978570589455731e-05, "loss": 0.5644, "step": 4903 }, { "epoch": 0.13465129049972543, "grad_norm": 0.3837830126285553, "learning_rate": 1.9785616953552686e-05, "loss": 0.5107, "step": 4904 }, { "epoch": 0.13467874794069193, "grad_norm": 0.3508562445640564, "learning_rate": 1.978552799429476e-05, "loss": 0.5396, "step": 4905 }, { "epoch": 0.13470620538165842, "grad_norm": 0.3597203195095062, "learning_rate": 1.9785439016783706e-05, "loss": 0.5046, "step": 4906 }, { "epoch": 0.13473366282262494, "grad_norm": 0.9288743138313293, "learning_rate": 1.9785350021019684e-05, "loss": 0.5017, "step": 4907 }, { "epoch": 0.13476112026359144, "grad_norm": 0.3313329219818115, "learning_rate": 1.978526100700286e-05, "loss": 0.5105, "step": 4908 }, { "epoch": 0.13478857770455793, "grad_norm": 0.3363453447818756, "learning_rate": 1.9785171974733402e-05, "loss": 0.4684, "step": 4909 }, { "epoch": 0.13481603514552443, "grad_norm": 0.35958579182624817, "learning_rate": 1.9785082924211474e-05, "loss": 0.4761, "step": 4910 }, { "epoch": 0.13484349258649095, "grad_norm": 0.34296557307243347, "learning_rate": 1.9784993855437244e-05, "loss": 0.5859, "step": 4911 }, { "epoch": 0.13487095002745744, "grad_norm": 0.4491249620914459, "learning_rate": 1.9784904768410877e-05, "loss": 0.4614, "step": 4912 }, { "epoch": 0.13489840746842394, "grad_norm": 0.3616888225078583, "learning_rate": 1.9784815663132538e-05, "loss": 0.606, "step": 4913 }, { "epoch": 0.13492586490939046, "grad_norm": 0.3349001407623291, "learning_rate": 1.9784726539602398e-05, "loss": 0.4706, "step": 4914 }, { "epoch": 0.13495332235035695, "grad_norm": 0.37470510601997375, "learning_rate": 1.978463739782062e-05, "loss": 0.6533, "step": 4915 }, { "epoch": 0.13498077979132345, "grad_norm": 0.4224397540092468, "learning_rate": 1.9784548237787368e-05, "loss": 0.603, "step": 4916 }, { "epoch": 0.13500823723228994, "grad_norm": 0.4052431881427765, "learning_rate": 1.9784459059502816e-05, "loss": 0.5815, "step": 4917 }, { "epoch": 0.13503569467325646, "grad_norm": 0.404073566198349, "learning_rate": 1.978436986296712e-05, "loss": 0.552, "step": 4918 }, { "epoch": 0.13506315211422296, "grad_norm": 0.35327744483947754, "learning_rate": 1.9784280648180448e-05, "loss": 0.6551, "step": 4919 }, { "epoch": 0.13509060955518945, "grad_norm": 0.3622957766056061, "learning_rate": 1.9784191415142975e-05, "loss": 0.5521, "step": 4920 }, { "epoch": 0.13511806699615594, "grad_norm": 0.3593544065952301, "learning_rate": 1.9784102163854862e-05, "loss": 0.5956, "step": 4921 }, { "epoch": 0.13514552443712247, "grad_norm": 0.39253565669059753, "learning_rate": 1.9784012894316276e-05, "loss": 0.6979, "step": 4922 }, { "epoch": 0.13517298187808896, "grad_norm": 0.5700513124465942, "learning_rate": 1.978392360652738e-05, "loss": 0.51, "step": 4923 }, { "epoch": 0.13520043931905545, "grad_norm": 0.4028007984161377, "learning_rate": 1.9783834300488348e-05, "loss": 0.5428, "step": 4924 }, { "epoch": 0.13522789676002198, "grad_norm": 0.3315434753894806, "learning_rate": 1.9783744976199338e-05, "loss": 0.4847, "step": 4925 }, { "epoch": 0.13525535420098847, "grad_norm": 0.34676072001457214, "learning_rate": 1.9783655633660525e-05, "loss": 0.4983, "step": 4926 }, { "epoch": 0.13528281164195496, "grad_norm": 0.43252742290496826, "learning_rate": 1.978356627287207e-05, "loss": 0.546, "step": 4927 }, { "epoch": 0.13531026908292146, "grad_norm": 0.37991413474082947, "learning_rate": 1.9783476893834142e-05, "loss": 0.5254, "step": 4928 }, { "epoch": 0.13533772652388798, "grad_norm": 0.4096761643886566, "learning_rate": 1.9783387496546908e-05, "loss": 0.5806, "step": 4929 }, { "epoch": 0.13536518396485447, "grad_norm": 0.3399903476238251, "learning_rate": 1.978329808101053e-05, "loss": 0.4753, "step": 4930 }, { "epoch": 0.13539264140582097, "grad_norm": 0.38228607177734375, "learning_rate": 1.9783208647225182e-05, "loss": 0.6099, "step": 4931 }, { "epoch": 0.1354200988467875, "grad_norm": 0.3899298906326294, "learning_rate": 1.9783119195191027e-05, "loss": 0.601, "step": 4932 }, { "epoch": 0.13544755628775398, "grad_norm": 0.33892858028411865, "learning_rate": 1.978302972490823e-05, "loss": 0.5131, "step": 4933 }, { "epoch": 0.13547501372872048, "grad_norm": 0.39501917362213135, "learning_rate": 1.9782940236376964e-05, "loss": 0.5184, "step": 4934 }, { "epoch": 0.13550247116968697, "grad_norm": 0.41598808765411377, "learning_rate": 1.978285072959739e-05, "loss": 0.5521, "step": 4935 }, { "epoch": 0.1355299286106535, "grad_norm": 0.31489884853363037, "learning_rate": 1.9782761204569678e-05, "loss": 0.4853, "step": 4936 }, { "epoch": 0.13555738605162, "grad_norm": 0.375687837600708, "learning_rate": 1.978267166129399e-05, "loss": 0.5632, "step": 4937 }, { "epoch": 0.13558484349258648, "grad_norm": 0.46530359983444214, "learning_rate": 1.9782582099770503e-05, "loss": 0.5221, "step": 4938 }, { "epoch": 0.135612300933553, "grad_norm": 0.34890449047088623, "learning_rate": 1.9782492519999375e-05, "loss": 0.5611, "step": 4939 }, { "epoch": 0.1356397583745195, "grad_norm": 0.367064505815506, "learning_rate": 1.9782402921980778e-05, "loss": 0.5406, "step": 4940 }, { "epoch": 0.135667215815486, "grad_norm": 0.34357139468193054, "learning_rate": 1.9782313305714873e-05, "loss": 0.4516, "step": 4941 }, { "epoch": 0.13569467325645249, "grad_norm": 0.33768218755722046, "learning_rate": 1.9782223671201838e-05, "loss": 0.4817, "step": 4942 }, { "epoch": 0.135722130697419, "grad_norm": 0.34148526191711426, "learning_rate": 1.9782134018441826e-05, "loss": 0.4279, "step": 4943 }, { "epoch": 0.1357495881383855, "grad_norm": 0.3914555311203003, "learning_rate": 1.9782044347435017e-05, "loss": 0.597, "step": 4944 }, { "epoch": 0.135777045579352, "grad_norm": 0.40426385402679443, "learning_rate": 1.9781954658181574e-05, "loss": 0.5108, "step": 4945 }, { "epoch": 0.13580450302031852, "grad_norm": 0.39340853691101074, "learning_rate": 1.978186495068166e-05, "loss": 0.5824, "step": 4946 }, { "epoch": 0.135831960461285, "grad_norm": 0.32599565386772156, "learning_rate": 1.978177522493545e-05, "loss": 0.4963, "step": 4947 }, { "epoch": 0.1358594179022515, "grad_norm": 0.3742976486682892, "learning_rate": 1.9781685480943108e-05, "loss": 0.472, "step": 4948 }, { "epoch": 0.135886875343218, "grad_norm": 0.3387261927127838, "learning_rate": 1.9781595718704793e-05, "loss": 0.4988, "step": 4949 }, { "epoch": 0.13591433278418452, "grad_norm": 0.38755255937576294, "learning_rate": 1.9781505938220686e-05, "loss": 0.5627, "step": 4950 }, { "epoch": 0.13594179022515102, "grad_norm": 0.48852458596229553, "learning_rate": 1.9781416139490948e-05, "loss": 0.4642, "step": 4951 }, { "epoch": 0.1359692476661175, "grad_norm": 0.34934186935424805, "learning_rate": 1.9781326322515748e-05, "loss": 0.5211, "step": 4952 }, { "epoch": 0.13599670510708403, "grad_norm": 0.5470390915870667, "learning_rate": 1.978123648729525e-05, "loss": 0.5043, "step": 4953 }, { "epoch": 0.13602416254805053, "grad_norm": 0.7115472555160522, "learning_rate": 1.9781146633829623e-05, "loss": 0.5755, "step": 4954 }, { "epoch": 0.13605161998901702, "grad_norm": 0.42018812894821167, "learning_rate": 1.978105676211904e-05, "loss": 0.4878, "step": 4955 }, { "epoch": 0.1360790774299835, "grad_norm": 0.38933834433555603, "learning_rate": 1.9780966872163662e-05, "loss": 0.6039, "step": 4956 }, { "epoch": 0.13610653487095004, "grad_norm": 0.3920448422431946, "learning_rate": 1.978087696396366e-05, "loss": 0.5669, "step": 4957 }, { "epoch": 0.13613399231191653, "grad_norm": 0.3727545440196991, "learning_rate": 1.97807870375192e-05, "loss": 0.5787, "step": 4958 }, { "epoch": 0.13616144975288302, "grad_norm": 0.3576209843158722, "learning_rate": 1.978069709283045e-05, "loss": 0.5134, "step": 4959 }, { "epoch": 0.13618890719384955, "grad_norm": 0.42967918515205383, "learning_rate": 1.978060712989758e-05, "loss": 0.5237, "step": 4960 }, { "epoch": 0.13621636463481604, "grad_norm": 0.32449549436569214, "learning_rate": 1.9780517148720752e-05, "loss": 0.491, "step": 4961 }, { "epoch": 0.13624382207578253, "grad_norm": 0.35335737466812134, "learning_rate": 1.9780427149300143e-05, "loss": 0.5074, "step": 4962 }, { "epoch": 0.13627127951674903, "grad_norm": 0.3569319546222687, "learning_rate": 1.9780337131635915e-05, "loss": 0.516, "step": 4963 }, { "epoch": 0.13629873695771555, "grad_norm": 0.3274610638618469, "learning_rate": 1.9780247095728234e-05, "loss": 0.4153, "step": 4964 }, { "epoch": 0.13632619439868204, "grad_norm": 0.3309895396232605, "learning_rate": 1.978015704157727e-05, "loss": 0.471, "step": 4965 }, { "epoch": 0.13635365183964854, "grad_norm": 0.3605915307998657, "learning_rate": 1.9780066969183193e-05, "loss": 0.4807, "step": 4966 }, { "epoch": 0.13638110928061506, "grad_norm": 0.3343086540699005, "learning_rate": 1.9779976878546172e-05, "loss": 0.5452, "step": 4967 }, { "epoch": 0.13640856672158155, "grad_norm": 0.3345814049243927, "learning_rate": 1.977988676966637e-05, "loss": 0.5487, "step": 4968 }, { "epoch": 0.13643602416254805, "grad_norm": 0.354967325925827, "learning_rate": 1.977979664254396e-05, "loss": 0.4678, "step": 4969 }, { "epoch": 0.13646348160351454, "grad_norm": 0.39486318826675415, "learning_rate": 1.9779706497179107e-05, "loss": 0.5612, "step": 4970 }, { "epoch": 0.13649093904448106, "grad_norm": 0.4754645526409149, "learning_rate": 1.977961633357198e-05, "loss": 0.447, "step": 4971 }, { "epoch": 0.13651839648544756, "grad_norm": 0.3794698417186737, "learning_rate": 1.9779526151722748e-05, "loss": 0.5947, "step": 4972 }, { "epoch": 0.13654585392641405, "grad_norm": 0.3733673393726349, "learning_rate": 1.9779435951631577e-05, "loss": 0.5024, "step": 4973 }, { "epoch": 0.13657331136738057, "grad_norm": 0.3926224410533905, "learning_rate": 1.9779345733298637e-05, "loss": 0.5836, "step": 4974 }, { "epoch": 0.13660076880834707, "grad_norm": 0.33958786725997925, "learning_rate": 1.9779255496724097e-05, "loss": 0.5435, "step": 4975 }, { "epoch": 0.13662822624931356, "grad_norm": 0.4033879041671753, "learning_rate": 1.9779165241908123e-05, "loss": 0.4929, "step": 4976 }, { "epoch": 0.13665568369028006, "grad_norm": 0.3827058672904968, "learning_rate": 1.9779074968850885e-05, "loss": 0.5298, "step": 4977 }, { "epoch": 0.13668314113124658, "grad_norm": 0.7481989860534668, "learning_rate": 1.9778984677552552e-05, "loss": 0.4931, "step": 4978 }, { "epoch": 0.13671059857221307, "grad_norm": 0.37940120697021484, "learning_rate": 1.977889436801329e-05, "loss": 0.5108, "step": 4979 }, { "epoch": 0.13673805601317957, "grad_norm": 0.37448397278785706, "learning_rate": 1.977880404023327e-05, "loss": 0.605, "step": 4980 }, { "epoch": 0.1367655134541461, "grad_norm": 0.4099752604961395, "learning_rate": 1.9778713694212662e-05, "loss": 0.634, "step": 4981 }, { "epoch": 0.13679297089511258, "grad_norm": 0.4014447033405304, "learning_rate": 1.977862332995163e-05, "loss": 0.5783, "step": 4982 }, { "epoch": 0.13682042833607908, "grad_norm": 0.4280092120170593, "learning_rate": 1.977853294745034e-05, "loss": 0.5758, "step": 4983 }, { "epoch": 0.13684788577704557, "grad_norm": 0.3994157910346985, "learning_rate": 1.9778442546708968e-05, "loss": 0.6057, "step": 4984 }, { "epoch": 0.1368753432180121, "grad_norm": 0.3719986081123352, "learning_rate": 1.977835212772768e-05, "loss": 0.5721, "step": 4985 }, { "epoch": 0.13690280065897859, "grad_norm": 0.3364984095096588, "learning_rate": 1.9778261690506646e-05, "loss": 0.5296, "step": 4986 }, { "epoch": 0.13693025809994508, "grad_norm": 0.34200048446655273, "learning_rate": 1.9778171235046033e-05, "loss": 0.5132, "step": 4987 }, { "epoch": 0.13695771554091157, "grad_norm": 0.36174276471138, "learning_rate": 1.977808076134601e-05, "loss": 0.5723, "step": 4988 }, { "epoch": 0.1369851729818781, "grad_norm": 0.3809409737586975, "learning_rate": 1.9777990269406742e-05, "loss": 0.6174, "step": 4989 }, { "epoch": 0.1370126304228446, "grad_norm": 0.4485138952732086, "learning_rate": 1.9777899759228404e-05, "loss": 0.5459, "step": 4990 }, { "epoch": 0.13704008786381108, "grad_norm": 0.3712345361709595, "learning_rate": 1.9777809230811162e-05, "loss": 0.5421, "step": 4991 }, { "epoch": 0.1370675453047776, "grad_norm": 0.3640673756599426, "learning_rate": 1.9777718684155184e-05, "loss": 0.4841, "step": 4992 }, { "epoch": 0.1370950027457441, "grad_norm": 0.392861008644104, "learning_rate": 1.9777628119260645e-05, "loss": 0.5768, "step": 4993 }, { "epoch": 0.1371224601867106, "grad_norm": 0.337295800447464, "learning_rate": 1.9777537536127704e-05, "loss": 0.5155, "step": 4994 }, { "epoch": 0.1371499176276771, "grad_norm": 0.3368169963359833, "learning_rate": 1.9777446934756535e-05, "loss": 0.5501, "step": 4995 }, { "epoch": 0.1371773750686436, "grad_norm": 0.4147259593009949, "learning_rate": 1.977735631514731e-05, "loss": 0.6063, "step": 4996 }, { "epoch": 0.1372048325096101, "grad_norm": 0.34343671798706055, "learning_rate": 1.977726567730019e-05, "loss": 0.4947, "step": 4997 }, { "epoch": 0.1372322899505766, "grad_norm": 0.3967944085597992, "learning_rate": 1.9777175021215352e-05, "loss": 0.5795, "step": 4998 }, { "epoch": 0.13725974739154312, "grad_norm": 0.3250882029533386, "learning_rate": 1.9777084346892962e-05, "loss": 0.4558, "step": 4999 }, { "epoch": 0.1372872048325096, "grad_norm": 0.39746996760368347, "learning_rate": 1.9776993654333187e-05, "loss": 0.5426, "step": 5000 }, { "epoch": 0.1373146622734761, "grad_norm": 0.4122356176376343, "learning_rate": 1.9776902943536203e-05, "loss": 0.523, "step": 5001 }, { "epoch": 0.1373421197144426, "grad_norm": 0.3609703481197357, "learning_rate": 1.977681221450217e-05, "loss": 0.5056, "step": 5002 }, { "epoch": 0.13736957715540912, "grad_norm": 0.3518233597278595, "learning_rate": 1.9776721467231262e-05, "loss": 0.4903, "step": 5003 }, { "epoch": 0.13739703459637562, "grad_norm": 0.365141361951828, "learning_rate": 1.977663070172365e-05, "loss": 0.5368, "step": 5004 }, { "epoch": 0.1374244920373421, "grad_norm": 0.4940430819988251, "learning_rate": 1.97765399179795e-05, "loss": 0.5058, "step": 5005 }, { "epoch": 0.13745194947830863, "grad_norm": 0.37191492319107056, "learning_rate": 1.9776449115998987e-05, "loss": 0.5355, "step": 5006 }, { "epoch": 0.13747940691927513, "grad_norm": 0.40170803666114807, "learning_rate": 1.9776358295782273e-05, "loss": 0.4981, "step": 5007 }, { "epoch": 0.13750686436024162, "grad_norm": 0.34853240847587585, "learning_rate": 1.9776267457329533e-05, "loss": 0.5415, "step": 5008 }, { "epoch": 0.13753432180120811, "grad_norm": 0.3242327868938446, "learning_rate": 1.977617660064093e-05, "loss": 0.5359, "step": 5009 }, { "epoch": 0.13756177924217464, "grad_norm": 0.35209745168685913, "learning_rate": 1.9776085725716637e-05, "loss": 0.4876, "step": 5010 }, { "epoch": 0.13758923668314113, "grad_norm": 0.47492626309394836, "learning_rate": 1.9775994832556828e-05, "loss": 0.4815, "step": 5011 }, { "epoch": 0.13761669412410762, "grad_norm": 0.43082594871520996, "learning_rate": 1.9775903921161666e-05, "loss": 0.5839, "step": 5012 }, { "epoch": 0.13764415156507415, "grad_norm": 0.39613956212997437, "learning_rate": 1.977581299153132e-05, "loss": 0.5893, "step": 5013 }, { "epoch": 0.13767160900604064, "grad_norm": 0.37491533160209656, "learning_rate": 1.977572204366597e-05, "loss": 0.5288, "step": 5014 }, { "epoch": 0.13769906644700713, "grad_norm": 0.3563674986362457, "learning_rate": 1.9775631077565774e-05, "loss": 0.5016, "step": 5015 }, { "epoch": 0.13772652388797363, "grad_norm": 0.35588786005973816, "learning_rate": 1.9775540093230904e-05, "loss": 0.5031, "step": 5016 }, { "epoch": 0.13775398132894015, "grad_norm": 0.3728574216365814, "learning_rate": 1.9775449090661536e-05, "loss": 0.3755, "step": 5017 }, { "epoch": 0.13778143876990664, "grad_norm": 0.3645084500312805, "learning_rate": 1.977535806985783e-05, "loss": 0.4364, "step": 5018 }, { "epoch": 0.13780889621087314, "grad_norm": 0.32941946387290955, "learning_rate": 1.9775267030819968e-05, "loss": 0.5064, "step": 5019 }, { "epoch": 0.13783635365183966, "grad_norm": 0.3671453893184662, "learning_rate": 1.977517597354811e-05, "loss": 0.5781, "step": 5020 }, { "epoch": 0.13786381109280615, "grad_norm": 0.3550232946872711, "learning_rate": 1.977508489804243e-05, "loss": 0.5248, "step": 5021 }, { "epoch": 0.13789126853377265, "grad_norm": 0.35496723651885986, "learning_rate": 1.9774993804303097e-05, "loss": 0.6194, "step": 5022 }, { "epoch": 0.13791872597473914, "grad_norm": 0.4380885660648346, "learning_rate": 1.977490269233028e-05, "loss": 0.6342, "step": 5023 }, { "epoch": 0.13794618341570566, "grad_norm": 0.3989108204841614, "learning_rate": 1.9774811562124148e-05, "loss": 0.4965, "step": 5024 }, { "epoch": 0.13797364085667216, "grad_norm": 0.32324090600013733, "learning_rate": 1.9774720413684874e-05, "loss": 0.4356, "step": 5025 }, { "epoch": 0.13800109829763865, "grad_norm": 0.3729538023471832, "learning_rate": 1.9774629247012627e-05, "loss": 0.5185, "step": 5026 }, { "epoch": 0.13802855573860517, "grad_norm": 0.4090462028980255, "learning_rate": 1.9774538062107575e-05, "loss": 0.5924, "step": 5027 }, { "epoch": 0.13805601317957167, "grad_norm": 0.3772102892398834, "learning_rate": 1.9774446858969892e-05, "loss": 0.6134, "step": 5028 }, { "epoch": 0.13808347062053816, "grad_norm": 0.34146612882614136, "learning_rate": 1.9774355637599747e-05, "loss": 0.5273, "step": 5029 }, { "epoch": 0.13811092806150466, "grad_norm": 0.3378389775753021, "learning_rate": 1.9774264397997305e-05, "loss": 0.4961, "step": 5030 }, { "epoch": 0.13813838550247118, "grad_norm": 0.3810559809207916, "learning_rate": 1.9774173140162744e-05, "loss": 0.5777, "step": 5031 }, { "epoch": 0.13816584294343767, "grad_norm": 0.3239996135234833, "learning_rate": 1.977408186409623e-05, "loss": 0.5419, "step": 5032 }, { "epoch": 0.13819330038440417, "grad_norm": 0.34209585189819336, "learning_rate": 1.9773990569797932e-05, "loss": 0.4071, "step": 5033 }, { "epoch": 0.1382207578253707, "grad_norm": 0.35367804765701294, "learning_rate": 1.9773899257268024e-05, "loss": 0.514, "step": 5034 }, { "epoch": 0.13824821526633718, "grad_norm": 0.34025809168815613, "learning_rate": 1.977380792650667e-05, "loss": 0.5547, "step": 5035 }, { "epoch": 0.13827567270730368, "grad_norm": 0.371284544467926, "learning_rate": 1.977371657751405e-05, "loss": 0.5993, "step": 5036 }, { "epoch": 0.13830313014827017, "grad_norm": 0.351248562335968, "learning_rate": 1.9773625210290327e-05, "loss": 0.5836, "step": 5037 }, { "epoch": 0.1383305875892367, "grad_norm": 0.3599323332309723, "learning_rate": 1.9773533824835676e-05, "loss": 0.5606, "step": 5038 }, { "epoch": 0.13835804503020319, "grad_norm": 0.4478107988834381, "learning_rate": 1.9773442421150264e-05, "loss": 0.5004, "step": 5039 }, { "epoch": 0.13838550247116968, "grad_norm": 0.4365478754043579, "learning_rate": 1.977335099923426e-05, "loss": 0.6115, "step": 5040 }, { "epoch": 0.1384129599121362, "grad_norm": 0.373989999294281, "learning_rate": 1.9773259559087838e-05, "loss": 0.4948, "step": 5041 }, { "epoch": 0.1384404173531027, "grad_norm": 0.39117470383644104, "learning_rate": 1.9773168100711172e-05, "loss": 0.4943, "step": 5042 }, { "epoch": 0.1384678747940692, "grad_norm": 0.3307638466358185, "learning_rate": 1.9773076624104423e-05, "loss": 0.4717, "step": 5043 }, { "epoch": 0.13849533223503568, "grad_norm": 0.4056659936904907, "learning_rate": 1.977298512926777e-05, "loss": 0.5126, "step": 5044 }, { "epoch": 0.1385227896760022, "grad_norm": 0.3743223249912262, "learning_rate": 1.9772893616201378e-05, "loss": 0.5412, "step": 5045 }, { "epoch": 0.1385502471169687, "grad_norm": 0.394788920879364, "learning_rate": 1.977280208490542e-05, "loss": 0.5713, "step": 5046 }, { "epoch": 0.1385777045579352, "grad_norm": 0.37682902812957764, "learning_rate": 1.9772710535380068e-05, "loss": 0.527, "step": 5047 }, { "epoch": 0.13860516199890172, "grad_norm": 0.34916019439697266, "learning_rate": 1.9772618967625492e-05, "loss": 0.5281, "step": 5048 }, { "epoch": 0.1386326194398682, "grad_norm": 0.44591742753982544, "learning_rate": 1.9772527381641863e-05, "loss": 0.5005, "step": 5049 }, { "epoch": 0.1386600768808347, "grad_norm": 0.3625373840332031, "learning_rate": 1.977243577742935e-05, "loss": 0.5861, "step": 5050 }, { "epoch": 0.1386875343218012, "grad_norm": 0.3989298939704895, "learning_rate": 1.977234415498813e-05, "loss": 0.6209, "step": 5051 }, { "epoch": 0.13871499176276772, "grad_norm": 0.4088045656681061, "learning_rate": 1.9772252514318364e-05, "loss": 0.5534, "step": 5052 }, { "epoch": 0.1387424492037342, "grad_norm": 0.4071587026119232, "learning_rate": 1.977216085542023e-05, "loss": 0.5495, "step": 5053 }, { "epoch": 0.1387699066447007, "grad_norm": 0.42477282881736755, "learning_rate": 1.9772069178293898e-05, "loss": 0.5148, "step": 5054 }, { "epoch": 0.1387973640856672, "grad_norm": 0.49632546305656433, "learning_rate": 1.9771977482939535e-05, "loss": 0.5239, "step": 5055 }, { "epoch": 0.13882482152663372, "grad_norm": 0.3305245637893677, "learning_rate": 1.9771885769357317e-05, "loss": 0.5075, "step": 5056 }, { "epoch": 0.13885227896760022, "grad_norm": 0.35537129640579224, "learning_rate": 1.9771794037547415e-05, "loss": 0.519, "step": 5057 }, { "epoch": 0.1388797364085667, "grad_norm": 0.34813255071640015, "learning_rate": 1.9771702287509997e-05, "loss": 0.5944, "step": 5058 }, { "epoch": 0.13890719384953323, "grad_norm": 0.6297627091407776, "learning_rate": 1.9771610519245233e-05, "loss": 0.5225, "step": 5059 }, { "epoch": 0.13893465129049973, "grad_norm": 0.34805941581726074, "learning_rate": 1.97715187327533e-05, "loss": 0.459, "step": 5060 }, { "epoch": 0.13896210873146622, "grad_norm": 0.3337455093860626, "learning_rate": 1.9771426928034362e-05, "loss": 0.5314, "step": 5061 }, { "epoch": 0.13898956617243272, "grad_norm": 0.36725589632987976, "learning_rate": 1.97713351050886e-05, "loss": 0.5456, "step": 5062 }, { "epoch": 0.13901702361339924, "grad_norm": 0.3686041533946991, "learning_rate": 1.9771243263916177e-05, "loss": 0.5306, "step": 5063 }, { "epoch": 0.13904448105436573, "grad_norm": 0.369164377450943, "learning_rate": 1.9771151404517264e-05, "loss": 0.5484, "step": 5064 }, { "epoch": 0.13907193849533223, "grad_norm": 0.3749980628490448, "learning_rate": 1.9771059526892036e-05, "loss": 0.5986, "step": 5065 }, { "epoch": 0.13909939593629875, "grad_norm": 0.37189480662345886, "learning_rate": 1.9770967631040664e-05, "loss": 0.576, "step": 5066 }, { "epoch": 0.13912685337726524, "grad_norm": 0.35205352306365967, "learning_rate": 1.977087571696332e-05, "loss": 0.5761, "step": 5067 }, { "epoch": 0.13915431081823174, "grad_norm": 0.40013763308525085, "learning_rate": 1.9770783784660176e-05, "loss": 0.4838, "step": 5068 }, { "epoch": 0.13918176825919823, "grad_norm": 0.4290878176689148, "learning_rate": 1.9770691834131398e-05, "loss": 0.5796, "step": 5069 }, { "epoch": 0.13920922570016475, "grad_norm": 0.3411395847797394, "learning_rate": 1.977059986537716e-05, "loss": 0.5201, "step": 5070 }, { "epoch": 0.13923668314113125, "grad_norm": 0.3632297217845917, "learning_rate": 1.9770507878397638e-05, "loss": 0.5265, "step": 5071 }, { "epoch": 0.13926414058209774, "grad_norm": 0.359386682510376, "learning_rate": 1.9770415873192998e-05, "loss": 0.499, "step": 5072 }, { "epoch": 0.13929159802306426, "grad_norm": 0.4210584759712219, "learning_rate": 1.9770323849763418e-05, "loss": 0.5479, "step": 5073 }, { "epoch": 0.13931905546403076, "grad_norm": 0.35762128233909607, "learning_rate": 1.9770231808109063e-05, "loss": 0.5826, "step": 5074 }, { "epoch": 0.13934651290499725, "grad_norm": 0.38777491450309753, "learning_rate": 1.9770139748230107e-05, "loss": 0.6524, "step": 5075 }, { "epoch": 0.13937397034596374, "grad_norm": 0.3771539628505707, "learning_rate": 1.9770047670126725e-05, "loss": 0.5418, "step": 5076 }, { "epoch": 0.13940142778693027, "grad_norm": 0.42269107699394226, "learning_rate": 1.976995557379908e-05, "loss": 0.5886, "step": 5077 }, { "epoch": 0.13942888522789676, "grad_norm": 0.3785474896430969, "learning_rate": 1.9769863459247356e-05, "loss": 0.553, "step": 5078 }, { "epoch": 0.13945634266886325, "grad_norm": 0.42848441004753113, "learning_rate": 1.9769771326471715e-05, "loss": 0.5497, "step": 5079 }, { "epoch": 0.13948380010982978, "grad_norm": 0.36823713779449463, "learning_rate": 1.9769679175472335e-05, "loss": 0.5746, "step": 5080 }, { "epoch": 0.13951125755079627, "grad_norm": 0.33764955401420593, "learning_rate": 1.976958700624938e-05, "loss": 0.5815, "step": 5081 }, { "epoch": 0.13953871499176276, "grad_norm": 0.6321456432342529, "learning_rate": 1.976949481880303e-05, "loss": 0.5499, "step": 5082 }, { "epoch": 0.13956617243272926, "grad_norm": 0.35167545080184937, "learning_rate": 1.976940261313346e-05, "loss": 0.5254, "step": 5083 }, { "epoch": 0.13959362987369578, "grad_norm": 0.40513819456100464, "learning_rate": 1.976931038924083e-05, "loss": 0.4834, "step": 5084 }, { "epoch": 0.13962108731466227, "grad_norm": 0.7617354393005371, "learning_rate": 1.9769218147125316e-05, "loss": 0.5471, "step": 5085 }, { "epoch": 0.13964854475562877, "grad_norm": 0.4126875400543213, "learning_rate": 1.9769125886787095e-05, "loss": 0.6449, "step": 5086 }, { "epoch": 0.1396760021965953, "grad_norm": 0.3729192912578583, "learning_rate": 1.9769033608226336e-05, "loss": 0.5512, "step": 5087 }, { "epoch": 0.13970345963756178, "grad_norm": 0.363986611366272, "learning_rate": 1.9768941311443212e-05, "loss": 0.5668, "step": 5088 }, { "epoch": 0.13973091707852828, "grad_norm": 0.3733406066894531, "learning_rate": 1.9768848996437895e-05, "loss": 0.6401, "step": 5089 }, { "epoch": 0.13975837451949477, "grad_norm": 0.33875253796577454, "learning_rate": 1.9768756663210554e-05, "loss": 0.5083, "step": 5090 }, { "epoch": 0.1397858319604613, "grad_norm": 0.34692931175231934, "learning_rate": 1.976866431176137e-05, "loss": 0.546, "step": 5091 }, { "epoch": 0.1398132894014278, "grad_norm": 0.3689880073070526, "learning_rate": 1.9768571942090505e-05, "loss": 0.5323, "step": 5092 }, { "epoch": 0.13984074684239428, "grad_norm": 0.38928279280662537, "learning_rate": 1.9768479554198134e-05, "loss": 0.5698, "step": 5093 }, { "epoch": 0.1398682042833608, "grad_norm": 0.3700495958328247, "learning_rate": 1.9768387148084432e-05, "loss": 0.552, "step": 5094 }, { "epoch": 0.1398956617243273, "grad_norm": 0.3634013831615448, "learning_rate": 1.9768294723749574e-05, "loss": 0.5251, "step": 5095 }, { "epoch": 0.1399231191652938, "grad_norm": 0.364368200302124, "learning_rate": 1.9768202281193726e-05, "loss": 0.6216, "step": 5096 }, { "epoch": 0.13995057660626029, "grad_norm": 0.38359904289245605, "learning_rate": 1.9768109820417064e-05, "loss": 0.5025, "step": 5097 }, { "epoch": 0.1399780340472268, "grad_norm": 0.3865695595741272, "learning_rate": 1.9768017341419754e-05, "loss": 0.5129, "step": 5098 }, { "epoch": 0.1400054914881933, "grad_norm": 0.3774087727069855, "learning_rate": 1.976792484420198e-05, "loss": 0.5829, "step": 5099 }, { "epoch": 0.1400329489291598, "grad_norm": 0.45045655965805054, "learning_rate": 1.9767832328763907e-05, "loss": 0.5406, "step": 5100 }, { "epoch": 0.14006040637012632, "grad_norm": 0.3464818000793457, "learning_rate": 1.9767739795105708e-05, "loss": 0.5252, "step": 5101 }, { "epoch": 0.1400878638110928, "grad_norm": 0.38034719228744507, "learning_rate": 1.976764724322756e-05, "loss": 0.5336, "step": 5102 }, { "epoch": 0.1401153212520593, "grad_norm": 0.3678613603115082, "learning_rate": 1.976755467312963e-05, "loss": 0.5254, "step": 5103 }, { "epoch": 0.1401427786930258, "grad_norm": 0.36178499460220337, "learning_rate": 1.9767462084812094e-05, "loss": 0.581, "step": 5104 }, { "epoch": 0.14017023613399232, "grad_norm": 0.3591462969779968, "learning_rate": 1.9767369478275123e-05, "loss": 0.5102, "step": 5105 }, { "epoch": 0.14019769357495881, "grad_norm": 0.38280192017555237, "learning_rate": 1.976727685351889e-05, "loss": 0.5385, "step": 5106 }, { "epoch": 0.1402251510159253, "grad_norm": 0.3649687170982361, "learning_rate": 1.976718421054357e-05, "loss": 0.5251, "step": 5107 }, { "epoch": 0.14025260845689183, "grad_norm": 0.4072813391685486, "learning_rate": 1.9767091549349335e-05, "loss": 0.5607, "step": 5108 }, { "epoch": 0.14028006589785832, "grad_norm": 0.36535027623176575, "learning_rate": 1.9766998869936352e-05, "loss": 0.5097, "step": 5109 }, { "epoch": 0.14030752333882482, "grad_norm": 0.36129873991012573, "learning_rate": 1.9766906172304803e-05, "loss": 0.492, "step": 5110 }, { "epoch": 0.1403349807797913, "grad_norm": 0.33539286255836487, "learning_rate": 1.9766813456454855e-05, "loss": 0.5206, "step": 5111 }, { "epoch": 0.14036243822075783, "grad_norm": 0.3805730640888214, "learning_rate": 1.9766720722386685e-05, "loss": 0.5678, "step": 5112 }, { "epoch": 0.14038989566172433, "grad_norm": 0.37901657819747925, "learning_rate": 1.9766627970100463e-05, "loss": 0.5469, "step": 5113 }, { "epoch": 0.14041735310269082, "grad_norm": 0.3843391239643097, "learning_rate": 1.9766535199596362e-05, "loss": 0.5739, "step": 5114 }, { "epoch": 0.14044481054365734, "grad_norm": 0.3886151611804962, "learning_rate": 1.9766442410874554e-05, "loss": 0.533, "step": 5115 }, { "epoch": 0.14047226798462384, "grad_norm": 0.40498775243759155, "learning_rate": 1.9766349603935215e-05, "loss": 0.5406, "step": 5116 }, { "epoch": 0.14049972542559033, "grad_norm": 0.3319019675254822, "learning_rate": 1.9766256778778518e-05, "loss": 0.5118, "step": 5117 }, { "epoch": 0.14052718286655683, "grad_norm": 0.3901922404766083, "learning_rate": 1.9766163935404633e-05, "loss": 0.4975, "step": 5118 }, { "epoch": 0.14055464030752335, "grad_norm": 0.34731176495552063, "learning_rate": 1.976607107381374e-05, "loss": 0.5772, "step": 5119 }, { "epoch": 0.14058209774848984, "grad_norm": 0.36220449209213257, "learning_rate": 1.9765978194006005e-05, "loss": 0.5396, "step": 5120 }, { "epoch": 0.14060955518945634, "grad_norm": 0.35474881529808044, "learning_rate": 1.97658852959816e-05, "loss": 0.5028, "step": 5121 }, { "epoch": 0.14063701263042283, "grad_norm": 0.31767410039901733, "learning_rate": 1.9765792379740704e-05, "loss": 0.5368, "step": 5122 }, { "epoch": 0.14066447007138935, "grad_norm": 0.38548019528388977, "learning_rate": 1.976569944528349e-05, "loss": 0.4788, "step": 5123 }, { "epoch": 0.14069192751235585, "grad_norm": 0.38302141427993774, "learning_rate": 1.9765606492610133e-05, "loss": 0.5324, "step": 5124 }, { "epoch": 0.14071938495332234, "grad_norm": 0.35954320430755615, "learning_rate": 1.9765513521720797e-05, "loss": 0.5093, "step": 5125 }, { "epoch": 0.14074684239428886, "grad_norm": 0.3440805673599243, "learning_rate": 1.9765420532615663e-05, "loss": 0.5685, "step": 5126 }, { "epoch": 0.14077429983525536, "grad_norm": 0.3343869745731354, "learning_rate": 1.9765327525294905e-05, "loss": 0.4488, "step": 5127 }, { "epoch": 0.14080175727622185, "grad_norm": 0.4121215343475342, "learning_rate": 1.9765234499758693e-05, "loss": 0.5567, "step": 5128 }, { "epoch": 0.14082921471718834, "grad_norm": 0.3899824321269989, "learning_rate": 1.97651414560072e-05, "loss": 0.4749, "step": 5129 }, { "epoch": 0.14085667215815487, "grad_norm": 0.32759928703308105, "learning_rate": 1.9765048394040604e-05, "loss": 0.4922, "step": 5130 }, { "epoch": 0.14088412959912136, "grad_norm": 0.5473260879516602, "learning_rate": 1.9764955313859075e-05, "loss": 0.5576, "step": 5131 }, { "epoch": 0.14091158704008785, "grad_norm": 0.3910808265209198, "learning_rate": 1.9764862215462787e-05, "loss": 0.575, "step": 5132 }, { "epoch": 0.14093904448105438, "grad_norm": 0.3842034935951233, "learning_rate": 1.9764769098851916e-05, "loss": 0.5557, "step": 5133 }, { "epoch": 0.14096650192202087, "grad_norm": 0.384150892496109, "learning_rate": 1.9764675964026634e-05, "loss": 0.4754, "step": 5134 }, { "epoch": 0.14099395936298736, "grad_norm": 0.37913212180137634, "learning_rate": 1.9764582810987116e-05, "loss": 0.5225, "step": 5135 }, { "epoch": 0.14102141680395386, "grad_norm": 0.33545777201652527, "learning_rate": 1.9764489639733533e-05, "loss": 0.5391, "step": 5136 }, { "epoch": 0.14104887424492038, "grad_norm": 0.3970257639884949, "learning_rate": 1.976439645026606e-05, "loss": 0.638, "step": 5137 }, { "epoch": 0.14107633168588687, "grad_norm": 6.840550422668457, "learning_rate": 1.9764303242584874e-05, "loss": 0.5736, "step": 5138 }, { "epoch": 0.14110378912685337, "grad_norm": 0.3973082900047302, "learning_rate": 1.976421001669014e-05, "loss": 0.5846, "step": 5139 }, { "epoch": 0.1411312465678199, "grad_norm": 0.3658939599990845, "learning_rate": 1.9764116772582044e-05, "loss": 0.5129, "step": 5140 }, { "epoch": 0.14115870400878638, "grad_norm": 0.36921167373657227, "learning_rate": 1.976402351026075e-05, "loss": 0.5756, "step": 5141 }, { "epoch": 0.14118616144975288, "grad_norm": 0.40854158997535706, "learning_rate": 1.976393022972644e-05, "loss": 0.5263, "step": 5142 }, { "epoch": 0.14121361889071937, "grad_norm": 0.39392220973968506, "learning_rate": 1.976383693097928e-05, "loss": 0.5207, "step": 5143 }, { "epoch": 0.1412410763316859, "grad_norm": 0.3888837695121765, "learning_rate": 1.9763743614019448e-05, "loss": 0.5721, "step": 5144 }, { "epoch": 0.1412685337726524, "grad_norm": 0.41442108154296875, "learning_rate": 1.976365027884712e-05, "loss": 0.4956, "step": 5145 }, { "epoch": 0.14129599121361888, "grad_norm": 0.366674542427063, "learning_rate": 1.9763556925462467e-05, "loss": 0.6122, "step": 5146 }, { "epoch": 0.1413234486545854, "grad_norm": 0.6076432466506958, "learning_rate": 1.9763463553865666e-05, "loss": 0.6184, "step": 5147 }, { "epoch": 0.1413509060955519, "grad_norm": 0.3274708688259125, "learning_rate": 1.9763370164056884e-05, "loss": 0.5578, "step": 5148 }, { "epoch": 0.1413783635365184, "grad_norm": 0.40332868695259094, "learning_rate": 1.9763276756036305e-05, "loss": 0.5948, "step": 5149 }, { "epoch": 0.1414058209774849, "grad_norm": 0.3823903203010559, "learning_rate": 1.9763183329804097e-05, "loss": 0.6232, "step": 5150 }, { "epoch": 0.1414332784184514, "grad_norm": 0.31382521986961365, "learning_rate": 1.9763089885360436e-05, "loss": 0.4756, "step": 5151 }, { "epoch": 0.1414607358594179, "grad_norm": 0.35142281651496887, "learning_rate": 1.9762996422705497e-05, "loss": 0.516, "step": 5152 }, { "epoch": 0.1414881933003844, "grad_norm": 0.3624790608882904, "learning_rate": 1.9762902941839453e-05, "loss": 0.6893, "step": 5153 }, { "epoch": 0.14151565074135092, "grad_norm": 0.37174686789512634, "learning_rate": 1.9762809442762483e-05, "loss": 0.5457, "step": 5154 }, { "epoch": 0.1415431081823174, "grad_norm": 0.3992995619773865, "learning_rate": 1.9762715925474755e-05, "loss": 0.5806, "step": 5155 }, { "epoch": 0.1415705656232839, "grad_norm": 0.378126859664917, "learning_rate": 1.9762622389976445e-05, "loss": 0.5076, "step": 5156 }, { "epoch": 0.1415980230642504, "grad_norm": 0.3404308557510376, "learning_rate": 1.976252883626773e-05, "loss": 0.5297, "step": 5157 }, { "epoch": 0.14162548050521692, "grad_norm": 0.33897069096565247, "learning_rate": 1.976243526434878e-05, "loss": 0.5019, "step": 5158 }, { "epoch": 0.14165293794618342, "grad_norm": 0.37925100326538086, "learning_rate": 1.9762341674219775e-05, "loss": 0.4427, "step": 5159 }, { "epoch": 0.1416803953871499, "grad_norm": 0.365349143743515, "learning_rate": 1.9762248065880884e-05, "loss": 0.5487, "step": 5160 }, { "epoch": 0.14170785282811643, "grad_norm": 0.35825568437576294, "learning_rate": 1.976215443933229e-05, "loss": 0.5854, "step": 5161 }, { "epoch": 0.14173531026908293, "grad_norm": 0.3822456896305084, "learning_rate": 1.9762060794574156e-05, "loss": 0.4915, "step": 5162 }, { "epoch": 0.14176276771004942, "grad_norm": 0.33348333835601807, "learning_rate": 1.9761967131606665e-05, "loss": 0.4296, "step": 5163 }, { "epoch": 0.14179022515101591, "grad_norm": 0.3722299635410309, "learning_rate": 1.976187345042999e-05, "loss": 0.5952, "step": 5164 }, { "epoch": 0.14181768259198244, "grad_norm": 0.35679900646209717, "learning_rate": 1.9761779751044308e-05, "loss": 0.5979, "step": 5165 }, { "epoch": 0.14184514003294893, "grad_norm": 0.4097611904144287, "learning_rate": 1.976168603344979e-05, "loss": 0.5242, "step": 5166 }, { "epoch": 0.14187259747391542, "grad_norm": 0.39235377311706543, "learning_rate": 1.9761592297646607e-05, "loss": 0.5282, "step": 5167 }, { "epoch": 0.14190005491488195, "grad_norm": 0.3546358048915863, "learning_rate": 1.9761498543634946e-05, "loss": 0.5689, "step": 5168 }, { "epoch": 0.14192751235584844, "grad_norm": 0.41730985045433044, "learning_rate": 1.9761404771414973e-05, "loss": 0.6503, "step": 5169 }, { "epoch": 0.14195496979681493, "grad_norm": 0.44483160972595215, "learning_rate": 1.976131098098686e-05, "loss": 0.4986, "step": 5170 }, { "epoch": 0.14198242723778143, "grad_norm": 0.3717547357082367, "learning_rate": 1.976121717235079e-05, "loss": 0.5891, "step": 5171 }, { "epoch": 0.14200988467874795, "grad_norm": 0.3958486020565033, "learning_rate": 1.9761123345506937e-05, "loss": 0.5606, "step": 5172 }, { "epoch": 0.14203734211971444, "grad_norm": 0.3545449674129486, "learning_rate": 1.976102950045547e-05, "loss": 0.5833, "step": 5173 }, { "epoch": 0.14206479956068094, "grad_norm": 0.34818708896636963, "learning_rate": 1.9760935637196565e-05, "loss": 0.5216, "step": 5174 }, { "epoch": 0.14209225700164746, "grad_norm": 0.3399266302585602, "learning_rate": 1.9760841755730403e-05, "loss": 0.5616, "step": 5175 }, { "epoch": 0.14211971444261395, "grad_norm": 0.3955525755882263, "learning_rate": 1.976074785605716e-05, "loss": 0.5102, "step": 5176 }, { "epoch": 0.14214717188358045, "grad_norm": 0.3750360310077667, "learning_rate": 1.9760653938177003e-05, "loss": 0.5888, "step": 5177 }, { "epoch": 0.14217462932454694, "grad_norm": 0.39386892318725586, "learning_rate": 1.976056000209011e-05, "loss": 0.589, "step": 5178 }, { "epoch": 0.14220208676551346, "grad_norm": 0.376517653465271, "learning_rate": 1.976046604779666e-05, "loss": 0.5306, "step": 5179 }, { "epoch": 0.14222954420647996, "grad_norm": 0.36512553691864014, "learning_rate": 1.9760372075296823e-05, "loss": 0.5631, "step": 5180 }, { "epoch": 0.14225700164744645, "grad_norm": 0.34393811225891113, "learning_rate": 1.976027808459078e-05, "loss": 0.5934, "step": 5181 }, { "epoch": 0.14228445908841297, "grad_norm": 0.528462290763855, "learning_rate": 1.9760184075678698e-05, "loss": 0.5977, "step": 5182 }, { "epoch": 0.14231191652937947, "grad_norm": 0.41343313455581665, "learning_rate": 1.976009004856076e-05, "loss": 0.5762, "step": 5183 }, { "epoch": 0.14233937397034596, "grad_norm": 0.41506296396255493, "learning_rate": 1.975999600323714e-05, "loss": 0.477, "step": 5184 }, { "epoch": 0.14236683141131246, "grad_norm": 0.38322389125823975, "learning_rate": 1.9759901939708014e-05, "loss": 0.5447, "step": 5185 }, { "epoch": 0.14239428885227898, "grad_norm": 0.4538055658340454, "learning_rate": 1.9759807857973553e-05, "loss": 0.5417, "step": 5186 }, { "epoch": 0.14242174629324547, "grad_norm": 0.37027794122695923, "learning_rate": 1.9759713758033937e-05, "loss": 0.5197, "step": 5187 }, { "epoch": 0.14244920373421197, "grad_norm": 0.527476966381073, "learning_rate": 1.975961963988934e-05, "loss": 0.5738, "step": 5188 }, { "epoch": 0.14247666117517846, "grad_norm": 0.393109530210495, "learning_rate": 1.9759525503539936e-05, "loss": 0.5933, "step": 5189 }, { "epoch": 0.14250411861614498, "grad_norm": 0.35848256945610046, "learning_rate": 1.9759431348985903e-05, "loss": 0.5367, "step": 5190 }, { "epoch": 0.14253157605711148, "grad_norm": 0.4073767364025116, "learning_rate": 1.9759337176227417e-05, "loss": 0.6019, "step": 5191 }, { "epoch": 0.14255903349807797, "grad_norm": 0.3970816135406494, "learning_rate": 1.9759242985264652e-05, "loss": 0.5613, "step": 5192 }, { "epoch": 0.1425864909390445, "grad_norm": 0.3618789315223694, "learning_rate": 1.9759148776097783e-05, "loss": 0.605, "step": 5193 }, { "epoch": 0.14261394838001099, "grad_norm": 0.42001697421073914, "learning_rate": 1.9759054548726987e-05, "loss": 0.5526, "step": 5194 }, { "epoch": 0.14264140582097748, "grad_norm": 0.3464806377887726, "learning_rate": 1.975896030315244e-05, "loss": 0.5651, "step": 5195 }, { "epoch": 0.14266886326194397, "grad_norm": 0.37471550703048706, "learning_rate": 1.9758866039374315e-05, "loss": 0.6256, "step": 5196 }, { "epoch": 0.1426963207029105, "grad_norm": 0.31307339668273926, "learning_rate": 1.975877175739279e-05, "loss": 0.5061, "step": 5197 }, { "epoch": 0.142723778143877, "grad_norm": 0.39606142044067383, "learning_rate": 1.9758677457208044e-05, "loss": 0.6322, "step": 5198 }, { "epoch": 0.14275123558484348, "grad_norm": 0.34814879298210144, "learning_rate": 1.975858313882025e-05, "loss": 0.4519, "step": 5199 }, { "epoch": 0.14277869302581, "grad_norm": 0.3955276608467102, "learning_rate": 1.9758488802229585e-05, "loss": 0.5791, "step": 5200 }, { "epoch": 0.1428061504667765, "grad_norm": 0.41657453775405884, "learning_rate": 1.975839444743622e-05, "loss": 0.6048, "step": 5201 }, { "epoch": 0.142833607907743, "grad_norm": 0.39284393191337585, "learning_rate": 1.975830007444034e-05, "loss": 0.5717, "step": 5202 }, { "epoch": 0.1428610653487095, "grad_norm": 0.36834201216697693, "learning_rate": 1.9758205683242105e-05, "loss": 0.5417, "step": 5203 }, { "epoch": 0.142888522789676, "grad_norm": 0.34614330530166626, "learning_rate": 1.975811127384171e-05, "loss": 0.4698, "step": 5204 }, { "epoch": 0.1429159802306425, "grad_norm": 0.36794713139533997, "learning_rate": 1.9758016846239325e-05, "loss": 0.5515, "step": 5205 }, { "epoch": 0.142943437671609, "grad_norm": 0.42639344930648804, "learning_rate": 1.975792240043512e-05, "loss": 0.5981, "step": 5206 }, { "epoch": 0.14297089511257552, "grad_norm": 0.4046439528465271, "learning_rate": 1.975782793642928e-05, "loss": 0.5896, "step": 5207 }, { "epoch": 0.142998352553542, "grad_norm": 0.5351312160491943, "learning_rate": 1.975773345422197e-05, "loss": 0.4552, "step": 5208 }, { "epoch": 0.1430258099945085, "grad_norm": 0.406574547290802, "learning_rate": 1.975763895381338e-05, "loss": 0.5978, "step": 5209 }, { "epoch": 0.143053267435475, "grad_norm": 0.3574973940849304, "learning_rate": 1.9757544435203674e-05, "loss": 0.6184, "step": 5210 }, { "epoch": 0.14308072487644152, "grad_norm": 0.4015454947948456, "learning_rate": 1.9757449898393033e-05, "loss": 0.5376, "step": 5211 }, { "epoch": 0.14310818231740802, "grad_norm": 0.3470827043056488, "learning_rate": 1.9757355343381636e-05, "loss": 0.5299, "step": 5212 }, { "epoch": 0.1431356397583745, "grad_norm": 0.37330299615859985, "learning_rate": 1.9757260770169656e-05, "loss": 0.569, "step": 5213 }, { "epoch": 0.14316309719934103, "grad_norm": 0.2965843081474304, "learning_rate": 1.975716617875727e-05, "loss": 0.5072, "step": 5214 }, { "epoch": 0.14319055464030753, "grad_norm": 0.385028600692749, "learning_rate": 1.9757071569144658e-05, "loss": 0.607, "step": 5215 }, { "epoch": 0.14321801208127402, "grad_norm": 0.37850111722946167, "learning_rate": 1.9756976941331992e-05, "loss": 0.5178, "step": 5216 }, { "epoch": 0.14324546952224051, "grad_norm": 0.3664033114910126, "learning_rate": 1.9756882295319448e-05, "loss": 0.5034, "step": 5217 }, { "epoch": 0.14327292696320704, "grad_norm": 0.352734237909317, "learning_rate": 1.9756787631107205e-05, "loss": 0.5825, "step": 5218 }, { "epoch": 0.14330038440417353, "grad_norm": 0.3812539875507355, "learning_rate": 1.975669294869544e-05, "loss": 0.5891, "step": 5219 }, { "epoch": 0.14332784184514002, "grad_norm": 0.5099696516990662, "learning_rate": 1.975659824808433e-05, "loss": 0.6266, "step": 5220 }, { "epoch": 0.14335529928610655, "grad_norm": 0.3631584942340851, "learning_rate": 1.9756503529274047e-05, "loss": 0.5621, "step": 5221 }, { "epoch": 0.14338275672707304, "grad_norm": 0.4031742811203003, "learning_rate": 1.9756408792264774e-05, "loss": 0.4833, "step": 5222 }, { "epoch": 0.14341021416803953, "grad_norm": 2.6287312507629395, "learning_rate": 1.9756314037056686e-05, "loss": 0.435, "step": 5223 }, { "epoch": 0.14343767160900603, "grad_norm": 0.3566969633102417, "learning_rate": 1.9756219263649953e-05, "loss": 0.586, "step": 5224 }, { "epoch": 0.14346512904997255, "grad_norm": 0.3356954753398895, "learning_rate": 1.975612447204476e-05, "loss": 0.5166, "step": 5225 }, { "epoch": 0.14349258649093904, "grad_norm": 0.3739411532878876, "learning_rate": 1.9756029662241283e-05, "loss": 0.6034, "step": 5226 }, { "epoch": 0.14352004393190554, "grad_norm": 0.3644901514053345, "learning_rate": 1.9755934834239695e-05, "loss": 0.5225, "step": 5227 }, { "epoch": 0.14354750137287206, "grad_norm": 0.33727386593818665, "learning_rate": 1.9755839988040177e-05, "loss": 0.5029, "step": 5228 }, { "epoch": 0.14357495881383855, "grad_norm": 0.3365439474582672, "learning_rate": 1.97557451236429e-05, "loss": 0.506, "step": 5229 }, { "epoch": 0.14360241625480505, "grad_norm": 0.3865739107131958, "learning_rate": 1.9755650241048044e-05, "loss": 0.5803, "step": 5230 }, { "epoch": 0.14362987369577154, "grad_norm": 0.35372963547706604, "learning_rate": 1.975555534025579e-05, "loss": 0.5267, "step": 5231 }, { "epoch": 0.14365733113673806, "grad_norm": 0.33166149258613586, "learning_rate": 1.9755460421266312e-05, "loss": 0.5187, "step": 5232 }, { "epoch": 0.14368478857770456, "grad_norm": 0.3612311780452728, "learning_rate": 1.975536548407979e-05, "loss": 0.5579, "step": 5233 }, { "epoch": 0.14371224601867105, "grad_norm": 0.3520779609680176, "learning_rate": 1.975527052869639e-05, "loss": 0.5884, "step": 5234 }, { "epoch": 0.14373970345963757, "grad_norm": 0.40141236782073975, "learning_rate": 1.9755175555116298e-05, "loss": 0.5518, "step": 5235 }, { "epoch": 0.14376716090060407, "grad_norm": 0.40416714549064636, "learning_rate": 1.9755080563339694e-05, "loss": 0.4877, "step": 5236 }, { "epoch": 0.14379461834157056, "grad_norm": 0.4022332429885864, "learning_rate": 1.975498555336675e-05, "loss": 0.5412, "step": 5237 }, { "epoch": 0.14382207578253706, "grad_norm": 0.3340296745300293, "learning_rate": 1.975489052519764e-05, "loss": 0.5334, "step": 5238 }, { "epoch": 0.14384953322350358, "grad_norm": 0.44120121002197266, "learning_rate": 1.9754795478832552e-05, "loss": 0.5343, "step": 5239 }, { "epoch": 0.14387699066447007, "grad_norm": 0.3596265912055969, "learning_rate": 1.9754700414271657e-05, "loss": 0.5109, "step": 5240 }, { "epoch": 0.14390444810543657, "grad_norm": 0.34817934036254883, "learning_rate": 1.975460533151513e-05, "loss": 0.4888, "step": 5241 }, { "epoch": 0.1439319055464031, "grad_norm": 0.3679596781730652, "learning_rate": 1.975451023056315e-05, "loss": 0.6343, "step": 5242 }, { "epoch": 0.14395936298736958, "grad_norm": 0.39662593603134155, "learning_rate": 1.9754415111415898e-05, "loss": 0.5984, "step": 5243 }, { "epoch": 0.14398682042833608, "grad_norm": 0.3538244068622589, "learning_rate": 1.9754319974073544e-05, "loss": 0.5434, "step": 5244 }, { "epoch": 0.14401427786930257, "grad_norm": 0.6787261366844177, "learning_rate": 1.9754224818536272e-05, "loss": 0.5315, "step": 5245 }, { "epoch": 0.1440417353102691, "grad_norm": 0.352488249540329, "learning_rate": 1.9754129644804262e-05, "loss": 0.619, "step": 5246 }, { "epoch": 0.1440691927512356, "grad_norm": 0.36517342925071716, "learning_rate": 1.9754034452877686e-05, "loss": 0.5934, "step": 5247 }, { "epoch": 0.14409665019220208, "grad_norm": 0.385572224855423, "learning_rate": 1.9753939242756716e-05, "loss": 0.558, "step": 5248 }, { "epoch": 0.1441241076331686, "grad_norm": 0.3565976023674011, "learning_rate": 1.975384401444154e-05, "loss": 0.4891, "step": 5249 }, { "epoch": 0.1441515650741351, "grad_norm": 0.6695986986160278, "learning_rate": 1.9753748767932333e-05, "loss": 0.5972, "step": 5250 }, { "epoch": 0.1441790225151016, "grad_norm": 0.39466309547424316, "learning_rate": 1.9753653503229272e-05, "loss": 0.5192, "step": 5251 }, { "epoch": 0.14420647995606808, "grad_norm": 0.3316930830478668, "learning_rate": 1.9753558220332533e-05, "loss": 0.5443, "step": 5252 }, { "epoch": 0.1442339373970346, "grad_norm": 0.34133392572402954, "learning_rate": 1.9753462919242295e-05, "loss": 0.5074, "step": 5253 }, { "epoch": 0.1442613948380011, "grad_norm": 0.351509153842926, "learning_rate": 1.9753367599958735e-05, "loss": 0.5466, "step": 5254 }, { "epoch": 0.1442888522789676, "grad_norm": 0.34685635566711426, "learning_rate": 1.9753272262482032e-05, "loss": 0.574, "step": 5255 }, { "epoch": 0.1443163097199341, "grad_norm": 0.3003350794315338, "learning_rate": 1.9753176906812362e-05, "loss": 0.5031, "step": 5256 }, { "epoch": 0.1443437671609006, "grad_norm": 0.3655555546283722, "learning_rate": 1.975308153294991e-05, "loss": 0.5941, "step": 5257 }, { "epoch": 0.1443712246018671, "grad_norm": 0.3401818871498108, "learning_rate": 1.975298614089484e-05, "loss": 0.4819, "step": 5258 }, { "epoch": 0.1443986820428336, "grad_norm": 0.4256969094276428, "learning_rate": 1.9752890730647342e-05, "loss": 0.6073, "step": 5259 }, { "epoch": 0.14442613948380012, "grad_norm": 0.3619632124900818, "learning_rate": 1.975279530220759e-05, "loss": 0.5001, "step": 5260 }, { "epoch": 0.14445359692476661, "grad_norm": 0.3467947840690613, "learning_rate": 1.975269985557576e-05, "loss": 0.5365, "step": 5261 }, { "epoch": 0.1444810543657331, "grad_norm": 0.3316633701324463, "learning_rate": 1.975260439075204e-05, "loss": 0.5045, "step": 5262 }, { "epoch": 0.1445085118066996, "grad_norm": 0.3911300301551819, "learning_rate": 1.975250890773659e-05, "loss": 0.6252, "step": 5263 }, { "epoch": 0.14453596924766612, "grad_norm": 0.4673580825328827, "learning_rate": 1.9752413406529604e-05, "loss": 0.5916, "step": 5264 }, { "epoch": 0.14456342668863262, "grad_norm": 0.45395970344543457, "learning_rate": 1.975231788713125e-05, "loss": 0.5286, "step": 5265 }, { "epoch": 0.1445908841295991, "grad_norm": 0.33042073249816895, "learning_rate": 1.9752222349541716e-05, "loss": 0.5295, "step": 5266 }, { "epoch": 0.14461834157056563, "grad_norm": 0.3314532935619354, "learning_rate": 1.9752126793761173e-05, "loss": 0.492, "step": 5267 }, { "epoch": 0.14464579901153213, "grad_norm": 0.49460774660110474, "learning_rate": 1.9752031219789798e-05, "loss": 0.4964, "step": 5268 }, { "epoch": 0.14467325645249862, "grad_norm": 0.3714655637741089, "learning_rate": 1.9751935627627774e-05, "loss": 0.4882, "step": 5269 }, { "epoch": 0.14470071389346512, "grad_norm": 0.3792128264904022, "learning_rate": 1.975184001727528e-05, "loss": 0.5328, "step": 5270 }, { "epoch": 0.14472817133443164, "grad_norm": 0.39238178730010986, "learning_rate": 1.975174438873249e-05, "loss": 0.5964, "step": 5271 }, { "epoch": 0.14475562877539813, "grad_norm": 0.37898609042167664, "learning_rate": 1.975164874199958e-05, "loss": 0.6393, "step": 5272 }, { "epoch": 0.14478308621636463, "grad_norm": 0.34487801790237427, "learning_rate": 1.975155307707674e-05, "loss": 0.487, "step": 5273 }, { "epoch": 0.14481054365733115, "grad_norm": 0.37276890873908997, "learning_rate": 1.975145739396414e-05, "loss": 0.5379, "step": 5274 }, { "epoch": 0.14483800109829764, "grad_norm": 0.42593756318092346, "learning_rate": 1.9751361692661954e-05, "loss": 0.4926, "step": 5275 }, { "epoch": 0.14486545853926414, "grad_norm": 0.38760584592819214, "learning_rate": 1.9751265973170372e-05, "loss": 0.5328, "step": 5276 }, { "epoch": 0.14489291598023063, "grad_norm": 0.3921535611152649, "learning_rate": 1.9751170235489562e-05, "loss": 0.5033, "step": 5277 }, { "epoch": 0.14492037342119715, "grad_norm": 0.3716851472854614, "learning_rate": 1.975107447961971e-05, "loss": 0.5427, "step": 5278 }, { "epoch": 0.14494783086216365, "grad_norm": 0.3654526174068451, "learning_rate": 1.9750978705560994e-05, "loss": 0.5302, "step": 5279 }, { "epoch": 0.14497528830313014, "grad_norm": 0.3662642538547516, "learning_rate": 1.975088291331359e-05, "loss": 0.5099, "step": 5280 }, { "epoch": 0.14500274574409666, "grad_norm": 0.3811503052711487, "learning_rate": 1.9750787102877673e-05, "loss": 0.617, "step": 5281 }, { "epoch": 0.14503020318506316, "grad_norm": 0.35303762555122375, "learning_rate": 1.9750691274253428e-05, "loss": 0.514, "step": 5282 }, { "epoch": 0.14505766062602965, "grad_norm": 0.4815235137939453, "learning_rate": 1.9750595427441035e-05, "loss": 0.5843, "step": 5283 }, { "epoch": 0.14508511806699614, "grad_norm": 0.38327503204345703, "learning_rate": 1.9750499562440665e-05, "loss": 0.6647, "step": 5284 }, { "epoch": 0.14511257550796267, "grad_norm": 0.373838871717453, "learning_rate": 1.9750403679252503e-05, "loss": 0.5464, "step": 5285 }, { "epoch": 0.14514003294892916, "grad_norm": 0.3460679054260254, "learning_rate": 1.9750307777876728e-05, "loss": 0.5276, "step": 5286 }, { "epoch": 0.14516749038989565, "grad_norm": 0.36705532670021057, "learning_rate": 1.9750211858313515e-05, "loss": 0.5588, "step": 5287 }, { "epoch": 0.14519494783086218, "grad_norm": 0.39007768034935, "learning_rate": 1.9750115920563044e-05, "loss": 0.5722, "step": 5288 }, { "epoch": 0.14522240527182867, "grad_norm": 0.34954696893692017, "learning_rate": 1.97500199646255e-05, "loss": 0.5083, "step": 5289 }, { "epoch": 0.14524986271279516, "grad_norm": 0.38864612579345703, "learning_rate": 1.974992399050105e-05, "loss": 0.5455, "step": 5290 }, { "epoch": 0.14527732015376166, "grad_norm": 0.32096025347709656, "learning_rate": 1.9749827998189882e-05, "loss": 0.5444, "step": 5291 }, { "epoch": 0.14530477759472818, "grad_norm": 0.3865492045879364, "learning_rate": 1.9749731987692177e-05, "loss": 0.655, "step": 5292 }, { "epoch": 0.14533223503569467, "grad_norm": 0.3834858238697052, "learning_rate": 1.9749635959008108e-05, "loss": 0.5836, "step": 5293 }, { "epoch": 0.14535969247666117, "grad_norm": 0.3700025975704193, "learning_rate": 1.9749539912137852e-05, "loss": 0.5749, "step": 5294 }, { "epoch": 0.1453871499176277, "grad_norm": 0.3751637041568756, "learning_rate": 1.9749443847081596e-05, "loss": 0.519, "step": 5295 }, { "epoch": 0.14541460735859418, "grad_norm": 0.36881428956985474, "learning_rate": 1.974934776383952e-05, "loss": 0.5064, "step": 5296 }, { "epoch": 0.14544206479956068, "grad_norm": 0.356995552778244, "learning_rate": 1.974925166241179e-05, "loss": 0.5934, "step": 5297 }, { "epoch": 0.14546952224052717, "grad_norm": 0.5500452518463135, "learning_rate": 1.97491555427986e-05, "loss": 0.6416, "step": 5298 }, { "epoch": 0.1454969796814937, "grad_norm": 0.44850045442581177, "learning_rate": 1.974905940500012e-05, "loss": 0.55, "step": 5299 }, { "epoch": 0.1455244371224602, "grad_norm": 0.35959866642951965, "learning_rate": 1.9748963249016535e-05, "loss": 0.5386, "step": 5300 }, { "epoch": 0.14555189456342668, "grad_norm": 0.3935684859752655, "learning_rate": 1.9748867074848022e-05, "loss": 0.5113, "step": 5301 }, { "epoch": 0.1455793520043932, "grad_norm": 0.3863079845905304, "learning_rate": 1.9748770882494762e-05, "loss": 0.6613, "step": 5302 }, { "epoch": 0.1456068094453597, "grad_norm": 0.3907838463783264, "learning_rate": 1.9748674671956927e-05, "loss": 0.4867, "step": 5303 }, { "epoch": 0.1456342668863262, "grad_norm": 0.331279993057251, "learning_rate": 1.974857844323471e-05, "loss": 0.4832, "step": 5304 }, { "epoch": 0.14566172432729269, "grad_norm": 0.3659103512763977, "learning_rate": 1.9748482196328275e-05, "loss": 0.5406, "step": 5305 }, { "epoch": 0.1456891817682592, "grad_norm": 0.398784339427948, "learning_rate": 1.9748385931237815e-05, "loss": 0.5818, "step": 5306 }, { "epoch": 0.1457166392092257, "grad_norm": 0.4081275761127472, "learning_rate": 1.97482896479635e-05, "loss": 0.5664, "step": 5307 }, { "epoch": 0.1457440966501922, "grad_norm": 0.3598687946796417, "learning_rate": 1.9748193346505515e-05, "loss": 0.5345, "step": 5308 }, { "epoch": 0.14577155409115872, "grad_norm": 0.366371750831604, "learning_rate": 1.974809702686404e-05, "loss": 0.5731, "step": 5309 }, { "epoch": 0.1457990115321252, "grad_norm": 0.4857545495033264, "learning_rate": 1.974800068903925e-05, "loss": 0.6214, "step": 5310 }, { "epoch": 0.1458264689730917, "grad_norm": 0.383028119802475, "learning_rate": 1.974790433303133e-05, "loss": 0.6039, "step": 5311 }, { "epoch": 0.1458539264140582, "grad_norm": 0.35723817348480225, "learning_rate": 1.9747807958840456e-05, "loss": 0.4647, "step": 5312 }, { "epoch": 0.14588138385502472, "grad_norm": 0.3220714330673218, "learning_rate": 1.974771156646681e-05, "loss": 0.4194, "step": 5313 }, { "epoch": 0.14590884129599122, "grad_norm": 0.37543076276779175, "learning_rate": 1.974761515591057e-05, "loss": 0.5383, "step": 5314 }, { "epoch": 0.1459362987369577, "grad_norm": 0.3483539819717407, "learning_rate": 1.9747518727171916e-05, "loss": 0.5228, "step": 5315 }, { "epoch": 0.14596375617792423, "grad_norm": 0.41193923354148865, "learning_rate": 1.974742228025103e-05, "loss": 0.6158, "step": 5316 }, { "epoch": 0.14599121361889073, "grad_norm": 0.43999892473220825, "learning_rate": 1.9747325815148088e-05, "loss": 0.5995, "step": 5317 }, { "epoch": 0.14601867105985722, "grad_norm": 0.40057849884033203, "learning_rate": 1.9747229331863276e-05, "loss": 0.5819, "step": 5318 }, { "epoch": 0.1460461285008237, "grad_norm": 0.3874513506889343, "learning_rate": 1.9747132830396766e-05, "loss": 0.5409, "step": 5319 }, { "epoch": 0.14607358594179023, "grad_norm": 0.3776434361934662, "learning_rate": 1.9747036310748746e-05, "loss": 0.581, "step": 5320 }, { "epoch": 0.14610104338275673, "grad_norm": 0.3883746266365051, "learning_rate": 1.9746939772919393e-05, "loss": 0.579, "step": 5321 }, { "epoch": 0.14612850082372322, "grad_norm": 0.36094146966934204, "learning_rate": 1.974684321690888e-05, "loss": 0.4632, "step": 5322 }, { "epoch": 0.14615595826468972, "grad_norm": 0.33588096499443054, "learning_rate": 1.9746746642717404e-05, "loss": 0.4656, "step": 5323 }, { "epoch": 0.14618341570565624, "grad_norm": 0.3692617416381836, "learning_rate": 1.9746650050345126e-05, "loss": 0.5322, "step": 5324 }, { "epoch": 0.14621087314662273, "grad_norm": 0.33624306321144104, "learning_rate": 1.974655343979224e-05, "loss": 0.572, "step": 5325 }, { "epoch": 0.14623833058758923, "grad_norm": 0.40420469641685486, "learning_rate": 1.9746456811058917e-05, "loss": 0.5374, "step": 5326 }, { "epoch": 0.14626578802855575, "grad_norm": 0.3463273048400879, "learning_rate": 1.974636016414534e-05, "loss": 0.4844, "step": 5327 }, { "epoch": 0.14629324546952224, "grad_norm": 0.40737441182136536, "learning_rate": 1.9746263499051697e-05, "loss": 0.4701, "step": 5328 }, { "epoch": 0.14632070291048874, "grad_norm": 0.39390110969543457, "learning_rate": 1.9746166815778158e-05, "loss": 0.63, "step": 5329 }, { "epoch": 0.14634816035145523, "grad_norm": 0.44744783639907837, "learning_rate": 1.974607011432491e-05, "loss": 0.5225, "step": 5330 }, { "epoch": 0.14637561779242175, "grad_norm": 0.3700573444366455, "learning_rate": 1.9745973394692125e-05, "loss": 0.6246, "step": 5331 }, { "epoch": 0.14640307523338825, "grad_norm": 0.3573521673679352, "learning_rate": 1.9745876656879998e-05, "loss": 0.5922, "step": 5332 }, { "epoch": 0.14643053267435474, "grad_norm": 0.37474939227104187, "learning_rate": 1.9745779900888694e-05, "loss": 0.4701, "step": 5333 }, { "epoch": 0.14645799011532126, "grad_norm": 0.37685492634773254, "learning_rate": 1.97456831267184e-05, "loss": 0.5486, "step": 5334 }, { "epoch": 0.14648544755628776, "grad_norm": 0.36680108308792114, "learning_rate": 1.9745586334369296e-05, "loss": 0.5099, "step": 5335 }, { "epoch": 0.14651290499725425, "grad_norm": 0.32809144258499146, "learning_rate": 1.9745489523841566e-05, "loss": 0.5017, "step": 5336 }, { "epoch": 0.14654036243822074, "grad_norm": 0.37393873929977417, "learning_rate": 1.9745392695135387e-05, "loss": 0.6129, "step": 5337 }, { "epoch": 0.14656781987918727, "grad_norm": 0.3478635847568512, "learning_rate": 1.974529584825094e-05, "loss": 0.5659, "step": 5338 }, { "epoch": 0.14659527732015376, "grad_norm": 0.3196670711040497, "learning_rate": 1.9745198983188404e-05, "loss": 0.4785, "step": 5339 }, { "epoch": 0.14662273476112025, "grad_norm": 0.36025553941726685, "learning_rate": 1.9745102099947964e-05, "loss": 0.5346, "step": 5340 }, { "epoch": 0.14665019220208678, "grad_norm": 0.3553926646709442, "learning_rate": 1.97450051985298e-05, "loss": 0.5227, "step": 5341 }, { "epoch": 0.14667764964305327, "grad_norm": 0.37404894828796387, "learning_rate": 1.9744908278934085e-05, "loss": 0.5534, "step": 5342 }, { "epoch": 0.14670510708401976, "grad_norm": 0.37603169679641724, "learning_rate": 1.974481134116101e-05, "loss": 0.5271, "step": 5343 }, { "epoch": 0.14673256452498626, "grad_norm": 0.39141950011253357, "learning_rate": 1.974471438521075e-05, "loss": 0.5164, "step": 5344 }, { "epoch": 0.14676002196595278, "grad_norm": 0.39432278275489807, "learning_rate": 1.9744617411083487e-05, "loss": 0.4167, "step": 5345 }, { "epoch": 0.14678747940691927, "grad_norm": 0.4112486243247986, "learning_rate": 1.9744520418779406e-05, "loss": 0.5713, "step": 5346 }, { "epoch": 0.14681493684788577, "grad_norm": 0.41011151671409607, "learning_rate": 1.9744423408298683e-05, "loss": 0.4953, "step": 5347 }, { "epoch": 0.1468423942888523, "grad_norm": 0.5496740937232971, "learning_rate": 1.97443263796415e-05, "loss": 0.5736, "step": 5348 }, { "epoch": 0.14686985172981878, "grad_norm": 0.41244834661483765, "learning_rate": 1.974422933280804e-05, "loss": 0.5883, "step": 5349 }, { "epoch": 0.14689730917078528, "grad_norm": 0.36061879992485046, "learning_rate": 1.9744132267798476e-05, "loss": 0.581, "step": 5350 }, { "epoch": 0.14692476661175177, "grad_norm": 0.39280039072036743, "learning_rate": 1.9744035184613e-05, "loss": 0.5917, "step": 5351 }, { "epoch": 0.1469522240527183, "grad_norm": 0.335227370262146, "learning_rate": 1.9743938083251792e-05, "loss": 0.4464, "step": 5352 }, { "epoch": 0.1469796814936848, "grad_norm": 0.3693096339702606, "learning_rate": 1.974384096371502e-05, "loss": 0.5371, "step": 5353 }, { "epoch": 0.14700713893465128, "grad_norm": 0.3495848774909973, "learning_rate": 1.9743743826002884e-05, "loss": 0.4827, "step": 5354 }, { "epoch": 0.1470345963756178, "grad_norm": 0.4408680200576782, "learning_rate": 1.974364667011555e-05, "loss": 0.5473, "step": 5355 }, { "epoch": 0.1470620538165843, "grad_norm": 0.3825119137763977, "learning_rate": 1.974354949605321e-05, "loss": 0.553, "step": 5356 }, { "epoch": 0.1470895112575508, "grad_norm": 0.33028921484947205, "learning_rate": 1.9743452303816038e-05, "loss": 0.5312, "step": 5357 }, { "epoch": 0.1471169686985173, "grad_norm": 0.37295761704444885, "learning_rate": 1.9743355093404215e-05, "loss": 0.5906, "step": 5358 }, { "epoch": 0.1471444261394838, "grad_norm": 0.32657378911972046, "learning_rate": 1.974325786481793e-05, "loss": 0.4269, "step": 5359 }, { "epoch": 0.1471718835804503, "grad_norm": 0.3780403137207031, "learning_rate": 1.9743160618057355e-05, "loss": 0.4779, "step": 5360 }, { "epoch": 0.1471993410214168, "grad_norm": 0.3739623427391052, "learning_rate": 1.974306335312268e-05, "loss": 0.493, "step": 5361 }, { "epoch": 0.14722679846238332, "grad_norm": 0.3955196142196655, "learning_rate": 1.9742966070014077e-05, "loss": 0.5096, "step": 5362 }, { "epoch": 0.1472542559033498, "grad_norm": 0.36828434467315674, "learning_rate": 1.9742868768731737e-05, "loss": 0.5018, "step": 5363 }, { "epoch": 0.1472817133443163, "grad_norm": 0.3341670632362366, "learning_rate": 1.9742771449275837e-05, "loss": 0.5224, "step": 5364 }, { "epoch": 0.1473091707852828, "grad_norm": 0.3714386522769928, "learning_rate": 1.9742674111646554e-05, "loss": 0.5212, "step": 5365 }, { "epoch": 0.14733662822624932, "grad_norm": 0.37426215410232544, "learning_rate": 1.9742576755844078e-05, "loss": 0.4912, "step": 5366 }, { "epoch": 0.14736408566721582, "grad_norm": 0.34451964497566223, "learning_rate": 1.9742479381868587e-05, "loss": 0.5936, "step": 5367 }, { "epoch": 0.1473915431081823, "grad_norm": 0.3622000813484192, "learning_rate": 1.9742381989720262e-05, "loss": 0.5603, "step": 5368 }, { "epoch": 0.14741900054914883, "grad_norm": 0.4219096899032593, "learning_rate": 1.9742284579399286e-05, "loss": 0.5926, "step": 5369 }, { "epoch": 0.14744645799011533, "grad_norm": 0.32407671213150024, "learning_rate": 1.974218715090584e-05, "loss": 0.4682, "step": 5370 }, { "epoch": 0.14747391543108182, "grad_norm": 0.3334480822086334, "learning_rate": 1.9742089704240104e-05, "loss": 0.4844, "step": 5371 }, { "epoch": 0.14750137287204831, "grad_norm": 0.37841692566871643, "learning_rate": 1.9741992239402265e-05, "loss": 0.5434, "step": 5372 }, { "epoch": 0.14752883031301484, "grad_norm": 0.3657386898994446, "learning_rate": 1.9741894756392495e-05, "loss": 0.4558, "step": 5373 }, { "epoch": 0.14755628775398133, "grad_norm": 0.39582133293151855, "learning_rate": 1.974179725521099e-05, "loss": 0.5888, "step": 5374 }, { "epoch": 0.14758374519494782, "grad_norm": 0.3713113069534302, "learning_rate": 1.9741699735857916e-05, "loss": 0.5564, "step": 5375 }, { "epoch": 0.14761120263591435, "grad_norm": 0.35794365406036377, "learning_rate": 1.9741602198333466e-05, "loss": 0.508, "step": 5376 }, { "epoch": 0.14763866007688084, "grad_norm": 0.38769251108169556, "learning_rate": 1.974150464263782e-05, "loss": 0.6128, "step": 5377 }, { "epoch": 0.14766611751784733, "grad_norm": 0.44010472297668457, "learning_rate": 1.9741407068771157e-05, "loss": 0.6198, "step": 5378 }, { "epoch": 0.14769357495881383, "grad_norm": 0.35897526144981384, "learning_rate": 1.974130947673366e-05, "loss": 0.4612, "step": 5379 }, { "epoch": 0.14772103239978035, "grad_norm": 0.46248939633369446, "learning_rate": 1.9741211866525517e-05, "loss": 0.5973, "step": 5380 }, { "epoch": 0.14774848984074684, "grad_norm": 0.4053727686405182, "learning_rate": 1.97411142381469e-05, "loss": 0.5631, "step": 5381 }, { "epoch": 0.14777594728171334, "grad_norm": 0.3468163311481476, "learning_rate": 1.9741016591597995e-05, "loss": 0.5992, "step": 5382 }, { "epoch": 0.14780340472267986, "grad_norm": 0.3356316387653351, "learning_rate": 1.9740918926878988e-05, "loss": 0.4298, "step": 5383 }, { "epoch": 0.14783086216364635, "grad_norm": 0.3818214535713196, "learning_rate": 1.9740821243990056e-05, "loss": 0.6327, "step": 5384 }, { "epoch": 0.14785831960461285, "grad_norm": 0.37206265330314636, "learning_rate": 1.9740723542931387e-05, "loss": 0.5447, "step": 5385 }, { "epoch": 0.14788577704557934, "grad_norm": 0.3761729598045349, "learning_rate": 1.9740625823703156e-05, "loss": 0.552, "step": 5386 }, { "epoch": 0.14791323448654586, "grad_norm": 0.32449936866760254, "learning_rate": 1.974052808630555e-05, "loss": 0.4601, "step": 5387 }, { "epoch": 0.14794069192751236, "grad_norm": 0.4250233471393585, "learning_rate": 1.974043033073875e-05, "loss": 0.5163, "step": 5388 }, { "epoch": 0.14796814936847885, "grad_norm": 0.3570390045642853, "learning_rate": 1.974033255700294e-05, "loss": 0.5659, "step": 5389 }, { "epoch": 0.14799560680944535, "grad_norm": 0.3714039623737335, "learning_rate": 1.97402347650983e-05, "loss": 0.6185, "step": 5390 }, { "epoch": 0.14802306425041187, "grad_norm": 0.4664088487625122, "learning_rate": 1.9740136955025012e-05, "loss": 0.5648, "step": 5391 }, { "epoch": 0.14805052169137836, "grad_norm": 0.3889542520046234, "learning_rate": 1.9740039126783262e-05, "loss": 0.5461, "step": 5392 }, { "epoch": 0.14807797913234486, "grad_norm": 0.39194533228874207, "learning_rate": 1.973994128037323e-05, "loss": 0.5357, "step": 5393 }, { "epoch": 0.14810543657331138, "grad_norm": 0.35966774821281433, "learning_rate": 1.9739843415795102e-05, "loss": 0.5134, "step": 5394 }, { "epoch": 0.14813289401427787, "grad_norm": 0.43552324175834656, "learning_rate": 1.973974553304905e-05, "loss": 0.6745, "step": 5395 }, { "epoch": 0.14816035145524437, "grad_norm": 0.4363223910331726, "learning_rate": 1.9739647632135273e-05, "loss": 0.4888, "step": 5396 }, { "epoch": 0.14818780889621086, "grad_norm": 0.3487359881401062, "learning_rate": 1.973954971305394e-05, "loss": 0.4945, "step": 5397 }, { "epoch": 0.14821526633717738, "grad_norm": 0.3342610001564026, "learning_rate": 1.9739451775805238e-05, "loss": 0.4479, "step": 5398 }, { "epoch": 0.14824272377814388, "grad_norm": 0.3421074450016022, "learning_rate": 1.9739353820389347e-05, "loss": 0.518, "step": 5399 }, { "epoch": 0.14827018121911037, "grad_norm": 0.4065733253955841, "learning_rate": 1.9739255846806457e-05, "loss": 0.5219, "step": 5400 }, { "epoch": 0.1482976386600769, "grad_norm": 0.3942786157131195, "learning_rate": 1.9739157855056746e-05, "loss": 0.5276, "step": 5401 }, { "epoch": 0.14832509610104339, "grad_norm": 0.429015189409256, "learning_rate": 1.9739059845140394e-05, "loss": 0.5998, "step": 5402 }, { "epoch": 0.14835255354200988, "grad_norm": 0.4054042398929596, "learning_rate": 1.9738961817057595e-05, "loss": 0.6032, "step": 5403 }, { "epoch": 0.14838001098297637, "grad_norm": 0.3672468364238739, "learning_rate": 1.9738863770808517e-05, "loss": 0.5609, "step": 5404 }, { "epoch": 0.1484074684239429, "grad_norm": 0.36872121691703796, "learning_rate": 1.973876570639335e-05, "loss": 0.5642, "step": 5405 }, { "epoch": 0.1484349258649094, "grad_norm": 0.4199083149433136, "learning_rate": 1.973866762381228e-05, "loss": 0.5518, "step": 5406 }, { "epoch": 0.14846238330587588, "grad_norm": 0.3219093680381775, "learning_rate": 1.9738569523065484e-05, "loss": 0.466, "step": 5407 }, { "epoch": 0.1484898407468424, "grad_norm": 0.40219223499298096, "learning_rate": 1.9738471404153146e-05, "loss": 0.5625, "step": 5408 }, { "epoch": 0.1485172981878089, "grad_norm": 0.34217825531959534, "learning_rate": 1.9738373267075455e-05, "loss": 0.4688, "step": 5409 }, { "epoch": 0.1485447556287754, "grad_norm": 0.4533117413520813, "learning_rate": 1.9738275111832586e-05, "loss": 0.5593, "step": 5410 }, { "epoch": 0.1485722130697419, "grad_norm": 0.4782189428806305, "learning_rate": 1.973817693842473e-05, "loss": 0.6073, "step": 5411 }, { "epoch": 0.1485996705107084, "grad_norm": 0.4053649306297302, "learning_rate": 1.9738078746852063e-05, "loss": 0.5427, "step": 5412 }, { "epoch": 0.1486271279516749, "grad_norm": 0.3140270411968231, "learning_rate": 1.973798053711477e-05, "loss": 0.4673, "step": 5413 }, { "epoch": 0.1486545853926414, "grad_norm": 0.4638076722621918, "learning_rate": 1.973788230921304e-05, "loss": 0.5994, "step": 5414 }, { "epoch": 0.14868204283360792, "grad_norm": 0.41284942626953125, "learning_rate": 1.9737784063147047e-05, "loss": 0.5398, "step": 5415 }, { "epoch": 0.1487095002745744, "grad_norm": 0.4148675501346588, "learning_rate": 1.973768579891698e-05, "loss": 0.6447, "step": 5416 }, { "epoch": 0.1487369577155409, "grad_norm": 0.3828205168247223, "learning_rate": 1.973758751652302e-05, "loss": 0.6837, "step": 5417 }, { "epoch": 0.1487644151565074, "grad_norm": 0.32863932847976685, "learning_rate": 1.9737489215965353e-05, "loss": 0.5437, "step": 5418 }, { "epoch": 0.14879187259747392, "grad_norm": 0.35620376467704773, "learning_rate": 1.973739089724416e-05, "loss": 0.6147, "step": 5419 }, { "epoch": 0.14881933003844042, "grad_norm": 0.4132196009159088, "learning_rate": 1.9737292560359626e-05, "loss": 0.5861, "step": 5420 }, { "epoch": 0.1488467874794069, "grad_norm": 0.33130204677581787, "learning_rate": 1.9737194205311935e-05, "loss": 0.515, "step": 5421 }, { "epoch": 0.14887424492037343, "grad_norm": 0.4014488160610199, "learning_rate": 1.9737095832101265e-05, "loss": 0.6329, "step": 5422 }, { "epoch": 0.14890170236133993, "grad_norm": 0.3727554976940155, "learning_rate": 1.9736997440727806e-05, "loss": 0.5733, "step": 5423 }, { "epoch": 0.14892915980230642, "grad_norm": 0.47951358556747437, "learning_rate": 1.9736899031191742e-05, "loss": 0.5966, "step": 5424 }, { "epoch": 0.14895661724327292, "grad_norm": 0.3686008155345917, "learning_rate": 1.973680060349325e-05, "loss": 0.5355, "step": 5425 }, { "epoch": 0.14898407468423944, "grad_norm": 0.321787029504776, "learning_rate": 1.9736702157632515e-05, "loss": 0.5372, "step": 5426 }, { "epoch": 0.14901153212520593, "grad_norm": 0.44427403807640076, "learning_rate": 1.973660369360973e-05, "loss": 0.5212, "step": 5427 }, { "epoch": 0.14903898956617243, "grad_norm": 0.33534571528434753, "learning_rate": 1.973650521142506e-05, "loss": 0.5026, "step": 5428 }, { "epoch": 0.14906644700713895, "grad_norm": 0.37985658645629883, "learning_rate": 1.9736406711078714e-05, "loss": 0.4968, "step": 5429 }, { "epoch": 0.14909390444810544, "grad_norm": 0.48362603783607483, "learning_rate": 1.9736308192570855e-05, "loss": 0.4581, "step": 5430 }, { "epoch": 0.14912136188907194, "grad_norm": 0.40638208389282227, "learning_rate": 1.973620965590167e-05, "loss": 0.6695, "step": 5431 }, { "epoch": 0.14914881933003843, "grad_norm": 0.34846746921539307, "learning_rate": 1.9736111101071355e-05, "loss": 0.5015, "step": 5432 }, { "epoch": 0.14917627677100495, "grad_norm": 0.3912896513938904, "learning_rate": 1.973601252808008e-05, "loss": 0.5661, "step": 5433 }, { "epoch": 0.14920373421197144, "grad_norm": 0.379562646150589, "learning_rate": 1.9735913936928036e-05, "loss": 0.5895, "step": 5434 }, { "epoch": 0.14923119165293794, "grad_norm": 0.35137906670570374, "learning_rate": 1.9735815327615403e-05, "loss": 0.5226, "step": 5435 }, { "epoch": 0.14925864909390446, "grad_norm": 0.31851595640182495, "learning_rate": 1.973571670014237e-05, "loss": 0.5527, "step": 5436 }, { "epoch": 0.14928610653487095, "grad_norm": 0.3791787028312683, "learning_rate": 1.9735618054509115e-05, "loss": 0.5498, "step": 5437 }, { "epoch": 0.14931356397583745, "grad_norm": 0.3747737407684326, "learning_rate": 1.973551939071583e-05, "loss": 0.6288, "step": 5438 }, { "epoch": 0.14934102141680394, "grad_norm": 0.37143221497535706, "learning_rate": 1.9735420708762688e-05, "loss": 0.5986, "step": 5439 }, { "epoch": 0.14936847885777046, "grad_norm": 0.3512117564678192, "learning_rate": 1.9735322008649884e-05, "loss": 0.577, "step": 5440 }, { "epoch": 0.14939593629873696, "grad_norm": 0.37085437774658203, "learning_rate": 1.9735223290377595e-05, "loss": 0.5922, "step": 5441 }, { "epoch": 0.14942339373970345, "grad_norm": 0.3812800645828247, "learning_rate": 1.9735124553946008e-05, "loss": 0.5298, "step": 5442 }, { "epoch": 0.14945085118066997, "grad_norm": 0.39554622769355774, "learning_rate": 1.9735025799355307e-05, "loss": 0.5809, "step": 5443 }, { "epoch": 0.14947830862163647, "grad_norm": 0.36180579662323, "learning_rate": 1.9734927026605674e-05, "loss": 0.4868, "step": 5444 }, { "epoch": 0.14950576606260296, "grad_norm": 0.36902672052383423, "learning_rate": 1.9734828235697295e-05, "loss": 0.5551, "step": 5445 }, { "epoch": 0.14953322350356946, "grad_norm": 0.3490082621574402, "learning_rate": 1.973472942663036e-05, "loss": 0.6011, "step": 5446 }, { "epoch": 0.14956068094453598, "grad_norm": 0.3847252428531647, "learning_rate": 1.973463059940504e-05, "loss": 0.5294, "step": 5447 }, { "epoch": 0.14958813838550247, "grad_norm": 0.38596588373184204, "learning_rate": 1.973453175402153e-05, "loss": 0.5368, "step": 5448 }, { "epoch": 0.14961559582646897, "grad_norm": 0.40037351846694946, "learning_rate": 1.9734432890480014e-05, "loss": 0.5508, "step": 5449 }, { "epoch": 0.1496430532674355, "grad_norm": 0.5406760573387146, "learning_rate": 1.973433400878067e-05, "loss": 0.5212, "step": 5450 }, { "epoch": 0.14967051070840198, "grad_norm": 0.3883829116821289, "learning_rate": 1.973423510892369e-05, "loss": 0.6091, "step": 5451 }, { "epoch": 0.14969796814936848, "grad_norm": 0.35628417134284973, "learning_rate": 1.9734136190909252e-05, "loss": 0.5195, "step": 5452 }, { "epoch": 0.14972542559033497, "grad_norm": 0.3494766354560852, "learning_rate": 1.9734037254737545e-05, "loss": 0.5857, "step": 5453 }, { "epoch": 0.1497528830313015, "grad_norm": 0.4444551169872284, "learning_rate": 1.9733938300408747e-05, "loss": 0.5893, "step": 5454 }, { "epoch": 0.149780340472268, "grad_norm": 0.40822646021842957, "learning_rate": 1.9733839327923053e-05, "loss": 0.5272, "step": 5455 }, { "epoch": 0.14980779791323448, "grad_norm": 0.3623168468475342, "learning_rate": 1.973374033728064e-05, "loss": 0.5543, "step": 5456 }, { "epoch": 0.14983525535420097, "grad_norm": 0.40058866143226624, "learning_rate": 1.9733641328481693e-05, "loss": 0.4685, "step": 5457 }, { "epoch": 0.1498627127951675, "grad_norm": 0.3532308340072632, "learning_rate": 1.97335423015264e-05, "loss": 0.5587, "step": 5458 }, { "epoch": 0.149890170236134, "grad_norm": 0.3472833037376404, "learning_rate": 1.9733443256414943e-05, "loss": 0.4933, "step": 5459 }, { "epoch": 0.14991762767710048, "grad_norm": 0.32734501361846924, "learning_rate": 1.973334419314751e-05, "loss": 0.5674, "step": 5460 }, { "epoch": 0.149945085118067, "grad_norm": 0.3325420320034027, "learning_rate": 1.9733245111724282e-05, "loss": 0.4844, "step": 5461 }, { "epoch": 0.1499725425590335, "grad_norm": 0.35602131485939026, "learning_rate": 1.9733146012145445e-05, "loss": 0.5324, "step": 5462 }, { "epoch": 0.15, "grad_norm": 0.38359367847442627, "learning_rate": 1.9733046894411184e-05, "loss": 0.5913, "step": 5463 }, { "epoch": 0.1500274574409665, "grad_norm": 0.3943618834018707, "learning_rate": 1.9732947758521684e-05, "loss": 0.5638, "step": 5464 }, { "epoch": 0.150054914881933, "grad_norm": 0.3892349600791931, "learning_rate": 1.9732848604477134e-05, "loss": 0.5858, "step": 5465 }, { "epoch": 0.1500823723228995, "grad_norm": 0.38026872277259827, "learning_rate": 1.973274943227771e-05, "loss": 0.5516, "step": 5466 }, { "epoch": 0.150109829763866, "grad_norm": 0.361870139837265, "learning_rate": 1.9732650241923602e-05, "loss": 0.5461, "step": 5467 }, { "epoch": 0.15013728720483252, "grad_norm": 0.3927292227745056, "learning_rate": 1.9732551033415e-05, "loss": 0.5586, "step": 5468 }, { "epoch": 0.15016474464579901, "grad_norm": 0.4120394289493561, "learning_rate": 1.973245180675208e-05, "loss": 0.597, "step": 5469 }, { "epoch": 0.1501922020867655, "grad_norm": 0.357098251581192, "learning_rate": 1.9732352561935033e-05, "loss": 0.5158, "step": 5470 }, { "epoch": 0.150219659527732, "grad_norm": 0.36201396584510803, "learning_rate": 1.973225329896404e-05, "loss": 0.5449, "step": 5471 }, { "epoch": 0.15024711696869852, "grad_norm": 0.3254477381706238, "learning_rate": 1.973215401783929e-05, "loss": 0.436, "step": 5472 }, { "epoch": 0.15027457440966502, "grad_norm": 0.35944077372550964, "learning_rate": 1.9732054718560968e-05, "loss": 0.5849, "step": 5473 }, { "epoch": 0.1503020318506315, "grad_norm": 0.3625255525112152, "learning_rate": 1.9731955401129254e-05, "loss": 0.5405, "step": 5474 }, { "epoch": 0.15032948929159803, "grad_norm": 0.3702065646648407, "learning_rate": 1.973185606554434e-05, "loss": 0.5636, "step": 5475 }, { "epoch": 0.15035694673256453, "grad_norm": 0.33432573080062866, "learning_rate": 1.973175671180641e-05, "loss": 0.4879, "step": 5476 }, { "epoch": 0.15038440417353102, "grad_norm": 0.37046605348587036, "learning_rate": 1.9731657339915645e-05, "loss": 0.5376, "step": 5477 }, { "epoch": 0.15041186161449752, "grad_norm": 0.41332754492759705, "learning_rate": 1.9731557949872232e-05, "loss": 0.5696, "step": 5478 }, { "epoch": 0.15043931905546404, "grad_norm": 0.3858721852302551, "learning_rate": 1.973145854167636e-05, "loss": 0.5648, "step": 5479 }, { "epoch": 0.15046677649643053, "grad_norm": 0.3690875470638275, "learning_rate": 1.973135911532821e-05, "loss": 0.4709, "step": 5480 }, { "epoch": 0.15049423393739703, "grad_norm": 0.35418081283569336, "learning_rate": 1.9731259670827973e-05, "loss": 0.6074, "step": 5481 }, { "epoch": 0.15052169137836355, "grad_norm": 0.3447156250476837, "learning_rate": 1.9731160208175825e-05, "loss": 0.4247, "step": 5482 }, { "epoch": 0.15054914881933004, "grad_norm": 0.3723205626010895, "learning_rate": 1.973106072737196e-05, "loss": 0.6548, "step": 5483 }, { "epoch": 0.15057660626029654, "grad_norm": 0.4221436083316803, "learning_rate": 1.973096122841656e-05, "loss": 0.5544, "step": 5484 }, { "epoch": 0.15060406370126303, "grad_norm": 0.3566027581691742, "learning_rate": 1.9730861711309813e-05, "loss": 0.5151, "step": 5485 }, { "epoch": 0.15063152114222955, "grad_norm": 0.38376685976982117, "learning_rate": 1.9730762176051902e-05, "loss": 0.5585, "step": 5486 }, { "epoch": 0.15065897858319605, "grad_norm": 0.3500271439552307, "learning_rate": 1.9730662622643017e-05, "loss": 0.563, "step": 5487 }, { "epoch": 0.15068643602416254, "grad_norm": 0.38316860795021057, "learning_rate": 1.973056305108334e-05, "loss": 0.5512, "step": 5488 }, { "epoch": 0.15071389346512906, "grad_norm": 0.32794636487960815, "learning_rate": 1.9730463461373055e-05, "loss": 0.5247, "step": 5489 }, { "epoch": 0.15074135090609556, "grad_norm": 0.3793143928050995, "learning_rate": 1.973036385351235e-05, "loss": 0.5294, "step": 5490 }, { "epoch": 0.15076880834706205, "grad_norm": 0.37142741680145264, "learning_rate": 1.9730264227501413e-05, "loss": 0.5825, "step": 5491 }, { "epoch": 0.15079626578802854, "grad_norm": 0.3818708062171936, "learning_rate": 1.9730164583340425e-05, "loss": 0.5395, "step": 5492 }, { "epoch": 0.15082372322899507, "grad_norm": 0.3467860817909241, "learning_rate": 1.9730064921029577e-05, "loss": 0.5504, "step": 5493 }, { "epoch": 0.15085118066996156, "grad_norm": 0.37573227286338806, "learning_rate": 1.972996524056905e-05, "loss": 0.5533, "step": 5494 }, { "epoch": 0.15087863811092805, "grad_norm": 0.41943517327308655, "learning_rate": 1.9729865541959035e-05, "loss": 0.6091, "step": 5495 }, { "epoch": 0.15090609555189458, "grad_norm": 0.3596145510673523, "learning_rate": 1.9729765825199714e-05, "loss": 0.5461, "step": 5496 }, { "epoch": 0.15093355299286107, "grad_norm": 0.44140323996543884, "learning_rate": 1.972966609029127e-05, "loss": 0.5648, "step": 5497 }, { "epoch": 0.15096101043382756, "grad_norm": 0.36005473136901855, "learning_rate": 1.97295663372339e-05, "loss": 0.5603, "step": 5498 }, { "epoch": 0.15098846787479406, "grad_norm": 0.3879510462284088, "learning_rate": 1.9729466566027783e-05, "loss": 0.6081, "step": 5499 }, { "epoch": 0.15101592531576058, "grad_norm": 0.3433636724948883, "learning_rate": 1.9729366776673103e-05, "loss": 0.5526, "step": 5500 }, { "epoch": 0.15104338275672707, "grad_norm": 0.34557685256004333, "learning_rate": 1.9729266969170048e-05, "loss": 0.5649, "step": 5501 }, { "epoch": 0.15107084019769357, "grad_norm": 0.32958993315696716, "learning_rate": 1.972916714351881e-05, "loss": 0.4302, "step": 5502 }, { "epoch": 0.1510982976386601, "grad_norm": 0.34973689913749695, "learning_rate": 1.9729067299719563e-05, "loss": 0.5256, "step": 5503 }, { "epoch": 0.15112575507962658, "grad_norm": 0.3709667921066284, "learning_rate": 1.9728967437772506e-05, "loss": 0.5818, "step": 5504 }, { "epoch": 0.15115321252059308, "grad_norm": 0.3390612006187439, "learning_rate": 1.972886755767782e-05, "loss": 0.5358, "step": 5505 }, { "epoch": 0.15118066996155957, "grad_norm": 0.3532524108886719, "learning_rate": 1.9728767659435685e-05, "loss": 0.5602, "step": 5506 }, { "epoch": 0.1512081274025261, "grad_norm": 0.3373757302761078, "learning_rate": 1.9728667743046296e-05, "loss": 0.4957, "step": 5507 }, { "epoch": 0.1512355848434926, "grad_norm": 0.47588032484054565, "learning_rate": 1.9728567808509837e-05, "loss": 0.53, "step": 5508 }, { "epoch": 0.15126304228445908, "grad_norm": 0.38193178176879883, "learning_rate": 1.9728467855826497e-05, "loss": 0.6181, "step": 5509 }, { "epoch": 0.1512904997254256, "grad_norm": 0.3587850034236908, "learning_rate": 1.9728367884996454e-05, "loss": 0.5358, "step": 5510 }, { "epoch": 0.1513179571663921, "grad_norm": 0.37715083360671997, "learning_rate": 1.9728267896019907e-05, "loss": 0.5014, "step": 5511 }, { "epoch": 0.1513454146073586, "grad_norm": 0.32873043417930603, "learning_rate": 1.972816788889703e-05, "loss": 0.5294, "step": 5512 }, { "epoch": 0.15137287204832509, "grad_norm": 0.3735576868057251, "learning_rate": 1.9728067863628015e-05, "loss": 0.504, "step": 5513 }, { "epoch": 0.1514003294892916, "grad_norm": 0.3857606053352356, "learning_rate": 1.972796782021305e-05, "loss": 0.6554, "step": 5514 }, { "epoch": 0.1514277869302581, "grad_norm": 0.38550594449043274, "learning_rate": 1.9727867758652318e-05, "loss": 0.454, "step": 5515 }, { "epoch": 0.1514552443712246, "grad_norm": 0.34088799357414246, "learning_rate": 1.972776767894601e-05, "loss": 0.4998, "step": 5516 }, { "epoch": 0.15148270181219112, "grad_norm": 0.37846365571022034, "learning_rate": 1.9727667581094313e-05, "loss": 0.5778, "step": 5517 }, { "epoch": 0.1515101592531576, "grad_norm": 0.38862621784210205, "learning_rate": 1.972756746509741e-05, "loss": 0.619, "step": 5518 }, { "epoch": 0.1515376166941241, "grad_norm": 0.3352360129356384, "learning_rate": 1.9727467330955485e-05, "loss": 0.5275, "step": 5519 }, { "epoch": 0.1515650741350906, "grad_norm": 0.35910022258758545, "learning_rate": 1.9727367178668732e-05, "loss": 0.566, "step": 5520 }, { "epoch": 0.15159253157605712, "grad_norm": 0.38554370403289795, "learning_rate": 1.9727267008237334e-05, "loss": 0.5136, "step": 5521 }, { "epoch": 0.15161998901702362, "grad_norm": 0.343098908662796, "learning_rate": 1.9727166819661477e-05, "loss": 0.5412, "step": 5522 }, { "epoch": 0.1516474464579901, "grad_norm": 0.37904587388038635, "learning_rate": 1.9727066612941352e-05, "loss": 0.5571, "step": 5523 }, { "epoch": 0.1516749038989566, "grad_norm": 0.3893482983112335, "learning_rate": 1.9726966388077143e-05, "loss": 0.5691, "step": 5524 }, { "epoch": 0.15170236133992313, "grad_norm": 0.4430007040500641, "learning_rate": 1.9726866145069035e-05, "loss": 0.6242, "step": 5525 }, { "epoch": 0.15172981878088962, "grad_norm": 0.30272871255874634, "learning_rate": 1.9726765883917217e-05, "loss": 0.4733, "step": 5526 }, { "epoch": 0.1517572762218561, "grad_norm": 0.3406831920146942, "learning_rate": 1.972666560462188e-05, "loss": 0.4935, "step": 5527 }, { "epoch": 0.15178473366282264, "grad_norm": 0.3792722523212433, "learning_rate": 1.9726565307183203e-05, "loss": 0.5832, "step": 5528 }, { "epoch": 0.15181219110378913, "grad_norm": 0.3837742209434509, "learning_rate": 1.972646499160138e-05, "loss": 0.5561, "step": 5529 }, { "epoch": 0.15183964854475562, "grad_norm": 0.3891392648220062, "learning_rate": 1.972636465787659e-05, "loss": 0.5031, "step": 5530 }, { "epoch": 0.15186710598572212, "grad_norm": 0.4031456410884857, "learning_rate": 1.9726264306009036e-05, "loss": 0.541, "step": 5531 }, { "epoch": 0.15189456342668864, "grad_norm": 0.33621543645858765, "learning_rate": 1.9726163935998888e-05, "loss": 0.5663, "step": 5532 }, { "epoch": 0.15192202086765513, "grad_norm": 0.36366885900497437, "learning_rate": 1.972606354784634e-05, "loss": 0.6321, "step": 5533 }, { "epoch": 0.15194947830862163, "grad_norm": 0.3497453033924103, "learning_rate": 1.972596314155158e-05, "loss": 0.5924, "step": 5534 }, { "epoch": 0.15197693574958815, "grad_norm": 0.3571999669075012, "learning_rate": 1.9725862717114796e-05, "loss": 0.5954, "step": 5535 }, { "epoch": 0.15200439319055464, "grad_norm": 0.36435437202453613, "learning_rate": 1.9725762274536175e-05, "loss": 0.5434, "step": 5536 }, { "epoch": 0.15203185063152114, "grad_norm": 0.3968261480331421, "learning_rate": 1.97256618138159e-05, "loss": 0.5937, "step": 5537 }, { "epoch": 0.15205930807248763, "grad_norm": 0.3839634954929352, "learning_rate": 1.972556133495416e-05, "loss": 0.5538, "step": 5538 }, { "epoch": 0.15208676551345415, "grad_norm": 0.6172838807106018, "learning_rate": 1.9725460837951148e-05, "loss": 0.5342, "step": 5539 }, { "epoch": 0.15211422295442065, "grad_norm": 0.3880188465118408, "learning_rate": 1.9725360322807048e-05, "loss": 0.53, "step": 5540 }, { "epoch": 0.15214168039538714, "grad_norm": 0.40512752532958984, "learning_rate": 1.9725259789522043e-05, "loss": 0.5645, "step": 5541 }, { "epoch": 0.15216913783635366, "grad_norm": 0.3824261426925659, "learning_rate": 1.972515923809633e-05, "loss": 0.5065, "step": 5542 }, { "epoch": 0.15219659527732016, "grad_norm": 0.36385175585746765, "learning_rate": 1.9725058668530085e-05, "loss": 0.5621, "step": 5543 }, { "epoch": 0.15222405271828665, "grad_norm": 0.3566296100616455, "learning_rate": 1.9724958080823508e-05, "loss": 0.5892, "step": 5544 }, { "epoch": 0.15225151015925315, "grad_norm": 0.41889527440071106, "learning_rate": 1.9724857474976775e-05, "loss": 0.5937, "step": 5545 }, { "epoch": 0.15227896760021967, "grad_norm": 0.3974285125732422, "learning_rate": 1.972475685099008e-05, "loss": 0.5296, "step": 5546 }, { "epoch": 0.15230642504118616, "grad_norm": 0.3210245370864868, "learning_rate": 1.9724656208863614e-05, "loss": 0.5419, "step": 5547 }, { "epoch": 0.15233388248215265, "grad_norm": 0.3521832525730133, "learning_rate": 1.9724555548597556e-05, "loss": 0.5812, "step": 5548 }, { "epoch": 0.15236133992311918, "grad_norm": 0.3567545413970947, "learning_rate": 1.97244548701921e-05, "loss": 0.5357, "step": 5549 }, { "epoch": 0.15238879736408567, "grad_norm": 0.34361639618873596, "learning_rate": 1.972435417364743e-05, "loss": 0.5095, "step": 5550 }, { "epoch": 0.15241625480505216, "grad_norm": 0.3825651705265045, "learning_rate": 1.9724253458963737e-05, "loss": 0.5615, "step": 5551 }, { "epoch": 0.15244371224601866, "grad_norm": 0.3785964548587799, "learning_rate": 1.972415272614121e-05, "loss": 0.5819, "step": 5552 }, { "epoch": 0.15247116968698518, "grad_norm": 0.39576196670532227, "learning_rate": 1.9724051975180032e-05, "loss": 0.6905, "step": 5553 }, { "epoch": 0.15249862712795167, "grad_norm": 0.42916107177734375, "learning_rate": 1.9723951206080394e-05, "loss": 0.607, "step": 5554 }, { "epoch": 0.15252608456891817, "grad_norm": 0.3302153944969177, "learning_rate": 1.9723850418842484e-05, "loss": 0.4805, "step": 5555 }, { "epoch": 0.1525535420098847, "grad_norm": 0.3506641983985901, "learning_rate": 1.9723749613466488e-05, "loss": 0.5067, "step": 5556 }, { "epoch": 0.15258099945085118, "grad_norm": 0.4178961515426636, "learning_rate": 1.9723648789952596e-05, "loss": 0.5738, "step": 5557 }, { "epoch": 0.15260845689181768, "grad_norm": 0.36456209421157837, "learning_rate": 1.9723547948300995e-05, "loss": 0.6381, "step": 5558 }, { "epoch": 0.15263591433278417, "grad_norm": 0.34125974774360657, "learning_rate": 1.9723447088511877e-05, "loss": 0.4905, "step": 5559 }, { "epoch": 0.1526633717737507, "grad_norm": 0.41266191005706787, "learning_rate": 1.9723346210585422e-05, "loss": 0.5414, "step": 5560 }, { "epoch": 0.1526908292147172, "grad_norm": 0.38236403465270996, "learning_rate": 1.9723245314521827e-05, "loss": 0.557, "step": 5561 }, { "epoch": 0.15271828665568368, "grad_norm": 0.3475516140460968, "learning_rate": 1.9723144400321274e-05, "loss": 0.4895, "step": 5562 }, { "epoch": 0.1527457440966502, "grad_norm": 0.38912731409072876, "learning_rate": 1.9723043467983957e-05, "loss": 0.5621, "step": 5563 }, { "epoch": 0.1527732015376167, "grad_norm": 0.40371137857437134, "learning_rate": 1.9722942517510055e-05, "loss": 0.5787, "step": 5564 }, { "epoch": 0.1528006589785832, "grad_norm": 0.3598966896533966, "learning_rate": 1.9722841548899768e-05, "loss": 0.6024, "step": 5565 }, { "epoch": 0.1528281164195497, "grad_norm": 0.3343079090118408, "learning_rate": 1.9722740562153273e-05, "loss": 0.5271, "step": 5566 }, { "epoch": 0.1528555738605162, "grad_norm": 0.376792848110199, "learning_rate": 1.9722639557270765e-05, "loss": 0.5958, "step": 5567 }, { "epoch": 0.1528830313014827, "grad_norm": 0.3950939178466797, "learning_rate": 1.9722538534252432e-05, "loss": 0.5511, "step": 5568 }, { "epoch": 0.1529104887424492, "grad_norm": 0.378921240568161, "learning_rate": 1.9722437493098462e-05, "loss": 0.6029, "step": 5569 }, { "epoch": 0.15293794618341572, "grad_norm": 0.32411178946495056, "learning_rate": 1.9722336433809043e-05, "loss": 0.6283, "step": 5570 }, { "epoch": 0.1529654036243822, "grad_norm": 0.3505442440509796, "learning_rate": 1.9722235356384364e-05, "loss": 0.4478, "step": 5571 }, { "epoch": 0.1529928610653487, "grad_norm": 0.3823806047439575, "learning_rate": 1.972213426082461e-05, "loss": 0.5893, "step": 5572 }, { "epoch": 0.1530203185063152, "grad_norm": 0.33084216713905334, "learning_rate": 1.9722033147129974e-05, "loss": 0.51, "step": 5573 }, { "epoch": 0.15304777594728172, "grad_norm": 0.3912903964519501, "learning_rate": 1.9721932015300647e-05, "loss": 0.5463, "step": 5574 }, { "epoch": 0.15307523338824822, "grad_norm": 0.35563844442367554, "learning_rate": 1.972183086533681e-05, "loss": 0.544, "step": 5575 }, { "epoch": 0.1531026908292147, "grad_norm": 0.3753843903541565, "learning_rate": 1.9721729697238658e-05, "loss": 0.5364, "step": 5576 }, { "epoch": 0.15313014827018123, "grad_norm": 0.3997265696525574, "learning_rate": 1.9721628511006376e-05, "loss": 0.6538, "step": 5577 }, { "epoch": 0.15315760571114773, "grad_norm": 0.4251525402069092, "learning_rate": 1.972152730664015e-05, "loss": 0.5494, "step": 5578 }, { "epoch": 0.15318506315211422, "grad_norm": 0.3555522561073303, "learning_rate": 1.972142608414018e-05, "loss": 0.5594, "step": 5579 }, { "epoch": 0.15321252059308071, "grad_norm": 0.3251302242279053, "learning_rate": 1.9721324843506643e-05, "loss": 0.5197, "step": 5580 }, { "epoch": 0.15323997803404724, "grad_norm": 0.41119250655174255, "learning_rate": 1.9721223584739735e-05, "loss": 0.5744, "step": 5581 }, { "epoch": 0.15326743547501373, "grad_norm": 0.39105573296546936, "learning_rate": 1.972112230783964e-05, "loss": 0.5899, "step": 5582 }, { "epoch": 0.15329489291598022, "grad_norm": 0.40060991048812866, "learning_rate": 1.9721021012806553e-05, "loss": 0.6122, "step": 5583 }, { "epoch": 0.15332235035694675, "grad_norm": 0.3241768181324005, "learning_rate": 1.9720919699640654e-05, "loss": 0.5145, "step": 5584 }, { "epoch": 0.15334980779791324, "grad_norm": 0.3501599133014679, "learning_rate": 1.9720818368342142e-05, "loss": 0.4828, "step": 5585 }, { "epoch": 0.15337726523887973, "grad_norm": 0.32264870405197144, "learning_rate": 1.97207170189112e-05, "loss": 0.5323, "step": 5586 }, { "epoch": 0.15340472267984623, "grad_norm": 0.38283437490463257, "learning_rate": 1.9720615651348014e-05, "loss": 0.5986, "step": 5587 }, { "epoch": 0.15343218012081275, "grad_norm": 0.3666941523551941, "learning_rate": 1.9720514265652785e-05, "loss": 0.6173, "step": 5588 }, { "epoch": 0.15345963756177924, "grad_norm": 0.8631507754325867, "learning_rate": 1.9720412861825688e-05, "loss": 0.5501, "step": 5589 }, { "epoch": 0.15348709500274574, "grad_norm": 0.37115368247032166, "learning_rate": 1.9720311439866918e-05, "loss": 0.4932, "step": 5590 }, { "epoch": 0.15351455244371223, "grad_norm": 0.3938896954059601, "learning_rate": 1.972020999977667e-05, "loss": 0.5539, "step": 5591 }, { "epoch": 0.15354200988467875, "grad_norm": 0.9908981919288635, "learning_rate": 1.9720108541555126e-05, "loss": 0.5734, "step": 5592 }, { "epoch": 0.15356946732564525, "grad_norm": 0.4437166750431061, "learning_rate": 1.972000706520248e-05, "loss": 0.4891, "step": 5593 }, { "epoch": 0.15359692476661174, "grad_norm": 0.3536995053291321, "learning_rate": 1.9719905570718912e-05, "loss": 0.5118, "step": 5594 }, { "epoch": 0.15362438220757826, "grad_norm": 0.31357476115226746, "learning_rate": 1.9719804058104622e-05, "loss": 0.529, "step": 5595 }, { "epoch": 0.15365183964854476, "grad_norm": 0.3798641562461853, "learning_rate": 1.9719702527359795e-05, "loss": 0.5156, "step": 5596 }, { "epoch": 0.15367929708951125, "grad_norm": 0.3632446825504303, "learning_rate": 1.971960097848462e-05, "loss": 0.4834, "step": 5597 }, { "epoch": 0.15370675453047775, "grad_norm": 0.3052544891834259, "learning_rate": 1.9719499411479285e-05, "loss": 0.4785, "step": 5598 }, { "epoch": 0.15373421197144427, "grad_norm": 0.363316148519516, "learning_rate": 1.9719397826343986e-05, "loss": 0.5161, "step": 5599 }, { "epoch": 0.15376166941241076, "grad_norm": 0.3966797888278961, "learning_rate": 1.9719296223078905e-05, "loss": 0.5331, "step": 5600 }, { "epoch": 0.15378912685337726, "grad_norm": 0.37271738052368164, "learning_rate": 1.9719194601684236e-05, "loss": 0.4774, "step": 5601 }, { "epoch": 0.15381658429434378, "grad_norm": 0.36824944615364075, "learning_rate": 1.9719092962160165e-05, "loss": 0.5731, "step": 5602 }, { "epoch": 0.15384404173531027, "grad_norm": 0.350877583026886, "learning_rate": 1.971899130450689e-05, "loss": 0.4993, "step": 5603 }, { "epoch": 0.15387149917627677, "grad_norm": 0.3424382507801056, "learning_rate": 1.9718889628724586e-05, "loss": 0.5147, "step": 5604 }, { "epoch": 0.15389895661724326, "grad_norm": 0.3630553185939789, "learning_rate": 1.9718787934813454e-05, "loss": 0.5399, "step": 5605 }, { "epoch": 0.15392641405820978, "grad_norm": 0.3650819659233093, "learning_rate": 1.971868622277368e-05, "loss": 0.5648, "step": 5606 }, { "epoch": 0.15395387149917628, "grad_norm": 0.612394392490387, "learning_rate": 1.9718584492605455e-05, "loss": 0.6312, "step": 5607 }, { "epoch": 0.15398132894014277, "grad_norm": 0.3908075988292694, "learning_rate": 1.9718482744308967e-05, "loss": 0.5172, "step": 5608 }, { "epoch": 0.1540087863811093, "grad_norm": 0.34633272886276245, "learning_rate": 1.971838097788441e-05, "loss": 0.484, "step": 5609 }, { "epoch": 0.15403624382207579, "grad_norm": 0.342155396938324, "learning_rate": 1.971827919333197e-05, "loss": 0.5997, "step": 5610 }, { "epoch": 0.15406370126304228, "grad_norm": 0.37762144207954407, "learning_rate": 1.9718177390651837e-05, "loss": 0.5277, "step": 5611 }, { "epoch": 0.15409115870400877, "grad_norm": 0.35411328077316284, "learning_rate": 1.97180755698442e-05, "loss": 0.5291, "step": 5612 }, { "epoch": 0.1541186161449753, "grad_norm": 0.3767492473125458, "learning_rate": 1.971797373090925e-05, "loss": 0.5582, "step": 5613 }, { "epoch": 0.1541460735859418, "grad_norm": 0.35152357816696167, "learning_rate": 1.971787187384718e-05, "loss": 0.613, "step": 5614 }, { "epoch": 0.15417353102690828, "grad_norm": 0.3290799558162689, "learning_rate": 1.9717769998658175e-05, "loss": 0.5699, "step": 5615 }, { "epoch": 0.1542009884678748, "grad_norm": 0.3586711287498474, "learning_rate": 1.971766810534243e-05, "loss": 0.6129, "step": 5616 }, { "epoch": 0.1542284459088413, "grad_norm": 0.35451260209083557, "learning_rate": 1.971756619390013e-05, "loss": 0.5973, "step": 5617 }, { "epoch": 0.1542559033498078, "grad_norm": 0.40031322836875916, "learning_rate": 1.9717464264331468e-05, "loss": 0.6035, "step": 5618 }, { "epoch": 0.1542833607907743, "grad_norm": 0.37324970960617065, "learning_rate": 1.9717362316636635e-05, "loss": 0.5302, "step": 5619 }, { "epoch": 0.1543108182317408, "grad_norm": 0.5888229012489319, "learning_rate": 1.9717260350815818e-05, "loss": 0.5205, "step": 5620 }, { "epoch": 0.1543382756727073, "grad_norm": 0.4044210910797119, "learning_rate": 1.971715836686921e-05, "loss": 0.5862, "step": 5621 }, { "epoch": 0.1543657331136738, "grad_norm": 0.3922604024410248, "learning_rate": 1.9717056364797003e-05, "loss": 0.5699, "step": 5622 }, { "epoch": 0.15439319055464032, "grad_norm": 0.37618306279182434, "learning_rate": 1.9716954344599378e-05, "loss": 0.4798, "step": 5623 }, { "epoch": 0.1544206479956068, "grad_norm": 0.36229491233825684, "learning_rate": 1.971685230627654e-05, "loss": 0.5031, "step": 5624 }, { "epoch": 0.1544481054365733, "grad_norm": 0.3969488739967346, "learning_rate": 1.9716750249828665e-05, "loss": 0.5701, "step": 5625 }, { "epoch": 0.1544755628775398, "grad_norm": 0.4239669740200043, "learning_rate": 1.971664817525595e-05, "loss": 0.5679, "step": 5626 }, { "epoch": 0.15450302031850632, "grad_norm": 0.38922828435897827, "learning_rate": 1.9716546082558586e-05, "loss": 0.4963, "step": 5627 }, { "epoch": 0.15453047775947282, "grad_norm": 0.37125635147094727, "learning_rate": 1.9716443971736764e-05, "loss": 0.4945, "step": 5628 }, { "epoch": 0.1545579352004393, "grad_norm": 0.360576331615448, "learning_rate": 1.9716341842790673e-05, "loss": 0.4935, "step": 5629 }, { "epoch": 0.15458539264140583, "grad_norm": 0.3835815489292145, "learning_rate": 1.9716239695720502e-05, "loss": 0.5198, "step": 5630 }, { "epoch": 0.15461285008237233, "grad_norm": 0.41859200596809387, "learning_rate": 1.9716137530526446e-05, "loss": 0.578, "step": 5631 }, { "epoch": 0.15464030752333882, "grad_norm": 0.37173429131507874, "learning_rate": 1.9716035347208686e-05, "loss": 0.5734, "step": 5632 }, { "epoch": 0.15466776496430532, "grad_norm": 0.3195829689502716, "learning_rate": 1.9715933145767424e-05, "loss": 0.504, "step": 5633 }, { "epoch": 0.15469522240527184, "grad_norm": 0.3686690330505371, "learning_rate": 1.9715830926202844e-05, "loss": 0.7215, "step": 5634 }, { "epoch": 0.15472267984623833, "grad_norm": 0.381635457277298, "learning_rate": 1.9715728688515137e-05, "loss": 0.4869, "step": 5635 }, { "epoch": 0.15475013728720483, "grad_norm": 0.3400125205516815, "learning_rate": 1.97156264327045e-05, "loss": 0.4129, "step": 5636 }, { "epoch": 0.15477759472817135, "grad_norm": 0.4204837679862976, "learning_rate": 1.9715524158771115e-05, "loss": 0.469, "step": 5637 }, { "epoch": 0.15480505216913784, "grad_norm": 0.3621370792388916, "learning_rate": 1.9715421866715176e-05, "loss": 0.5086, "step": 5638 }, { "epoch": 0.15483250961010434, "grad_norm": 0.3612726926803589, "learning_rate": 1.9715319556536875e-05, "loss": 0.5486, "step": 5639 }, { "epoch": 0.15485996705107083, "grad_norm": 0.3584776818752289, "learning_rate": 1.9715217228236403e-05, "loss": 0.561, "step": 5640 }, { "epoch": 0.15488742449203735, "grad_norm": 0.40012142062187195, "learning_rate": 1.971511488181395e-05, "loss": 0.6588, "step": 5641 }, { "epoch": 0.15491488193300385, "grad_norm": 0.4113002121448517, "learning_rate": 1.971501251726971e-05, "loss": 0.5808, "step": 5642 }, { "epoch": 0.15494233937397034, "grad_norm": 0.3712954521179199, "learning_rate": 1.9714910134603866e-05, "loss": 0.5207, "step": 5643 }, { "epoch": 0.15496979681493686, "grad_norm": 0.37284165620803833, "learning_rate": 1.9714807733816618e-05, "loss": 0.5148, "step": 5644 }, { "epoch": 0.15499725425590336, "grad_norm": 0.3777366578578949, "learning_rate": 1.971470531490815e-05, "loss": 0.6387, "step": 5645 }, { "epoch": 0.15502471169686985, "grad_norm": 0.41760584712028503, "learning_rate": 1.9714602877878656e-05, "loss": 0.6482, "step": 5646 }, { "epoch": 0.15505216913783634, "grad_norm": 0.3293091058731079, "learning_rate": 1.971450042272833e-05, "loss": 0.5473, "step": 5647 }, { "epoch": 0.15507962657880286, "grad_norm": 0.3748513460159302, "learning_rate": 1.9714397949457358e-05, "loss": 0.5429, "step": 5648 }, { "epoch": 0.15510708401976936, "grad_norm": 0.37803444266319275, "learning_rate": 1.9714295458065933e-05, "loss": 0.5869, "step": 5649 }, { "epoch": 0.15513454146073585, "grad_norm": 0.37386223673820496, "learning_rate": 1.9714192948554247e-05, "loss": 0.5914, "step": 5650 }, { "epoch": 0.15516199890170237, "grad_norm": 0.38799989223480225, "learning_rate": 1.971409042092249e-05, "loss": 0.5054, "step": 5651 }, { "epoch": 0.15518945634266887, "grad_norm": 0.4151075482368469, "learning_rate": 1.9713987875170856e-05, "loss": 0.6192, "step": 5652 }, { "epoch": 0.15521691378363536, "grad_norm": 0.3364473581314087, "learning_rate": 1.9713885311299533e-05, "loss": 0.5616, "step": 5653 }, { "epoch": 0.15524437122460186, "grad_norm": 0.33981823921203613, "learning_rate": 1.9713782729308715e-05, "loss": 0.5037, "step": 5654 }, { "epoch": 0.15527182866556838, "grad_norm": 0.33685415983200073, "learning_rate": 1.9713680129198592e-05, "loss": 0.5078, "step": 5655 }, { "epoch": 0.15529928610653487, "grad_norm": 0.36643290519714355, "learning_rate": 1.9713577510969353e-05, "loss": 0.6075, "step": 5656 }, { "epoch": 0.15532674354750137, "grad_norm": 0.4498896896839142, "learning_rate": 1.971347487462119e-05, "loss": 0.5344, "step": 5657 }, { "epoch": 0.15535420098846786, "grad_norm": 0.3430514633655548, "learning_rate": 1.9713372220154302e-05, "loss": 0.5159, "step": 5658 }, { "epoch": 0.15538165842943438, "grad_norm": 0.3936172127723694, "learning_rate": 1.971326954756887e-05, "loss": 0.6037, "step": 5659 }, { "epoch": 0.15540911587040088, "grad_norm": 0.3689296543598175, "learning_rate": 1.9713166856865092e-05, "loss": 0.5211, "step": 5660 }, { "epoch": 0.15543657331136737, "grad_norm": 0.42469772696495056, "learning_rate": 1.9713064148043156e-05, "loss": 0.6194, "step": 5661 }, { "epoch": 0.1554640307523339, "grad_norm": 0.3666783571243286, "learning_rate": 1.971296142110326e-05, "loss": 0.5575, "step": 5662 }, { "epoch": 0.1554914881933004, "grad_norm": 0.5318717956542969, "learning_rate": 1.9712858676045585e-05, "loss": 0.5137, "step": 5663 }, { "epoch": 0.15551894563426688, "grad_norm": 0.3484901785850525, "learning_rate": 1.971275591287033e-05, "loss": 0.5125, "step": 5664 }, { "epoch": 0.15554640307523337, "grad_norm": 0.36983150243759155, "learning_rate": 1.971265313157769e-05, "loss": 0.5397, "step": 5665 }, { "epoch": 0.1555738605161999, "grad_norm": 0.34134283661842346, "learning_rate": 1.9712550332167848e-05, "loss": 0.5883, "step": 5666 }, { "epoch": 0.1556013179571664, "grad_norm": 0.40571022033691406, "learning_rate": 1.9712447514641e-05, "loss": 0.5857, "step": 5667 }, { "epoch": 0.15562877539813288, "grad_norm": 0.37248679995536804, "learning_rate": 1.9712344678997336e-05, "loss": 0.5755, "step": 5668 }, { "epoch": 0.1556562328390994, "grad_norm": 0.5988563895225525, "learning_rate": 1.9712241825237054e-05, "loss": 0.5292, "step": 5669 }, { "epoch": 0.1556836902800659, "grad_norm": 0.3268367350101471, "learning_rate": 1.9712138953360337e-05, "loss": 0.5259, "step": 5670 }, { "epoch": 0.1557111477210324, "grad_norm": 0.368869811296463, "learning_rate": 1.9712036063367384e-05, "loss": 0.557, "step": 5671 }, { "epoch": 0.1557386051619989, "grad_norm": 0.34634846448898315, "learning_rate": 1.971193315525838e-05, "loss": 0.5087, "step": 5672 }, { "epoch": 0.1557660626029654, "grad_norm": 0.350031316280365, "learning_rate": 1.9711830229033527e-05, "loss": 0.4736, "step": 5673 }, { "epoch": 0.1557935200439319, "grad_norm": 0.3859115540981293, "learning_rate": 1.971172728469301e-05, "loss": 0.6293, "step": 5674 }, { "epoch": 0.1558209774848984, "grad_norm": 0.341254323720932, "learning_rate": 1.9711624322237014e-05, "loss": 0.4693, "step": 5675 }, { "epoch": 0.15584843492586492, "grad_norm": 0.367657870054245, "learning_rate": 1.9711521341665745e-05, "loss": 0.5055, "step": 5676 }, { "epoch": 0.15587589236683141, "grad_norm": 0.3540874421596527, "learning_rate": 1.971141834297939e-05, "loss": 0.632, "step": 5677 }, { "epoch": 0.1559033498077979, "grad_norm": 0.8001943230628967, "learning_rate": 1.9711315326178137e-05, "loss": 0.5501, "step": 5678 }, { "epoch": 0.1559308072487644, "grad_norm": 0.3829965591430664, "learning_rate": 1.9711212291262184e-05, "loss": 0.591, "step": 5679 }, { "epoch": 0.15595826468973092, "grad_norm": 0.469049334526062, "learning_rate": 1.9711109238231723e-05, "loss": 0.6327, "step": 5680 }, { "epoch": 0.15598572213069742, "grad_norm": 0.4662819504737854, "learning_rate": 1.971100616708694e-05, "loss": 0.5724, "step": 5681 }, { "epoch": 0.1560131795716639, "grad_norm": 0.34809795022010803, "learning_rate": 1.9710903077828033e-05, "loss": 0.4946, "step": 5682 }, { "epoch": 0.15604063701263043, "grad_norm": 0.42551249265670776, "learning_rate": 1.9710799970455187e-05, "loss": 0.5812, "step": 5683 }, { "epoch": 0.15606809445359693, "grad_norm": 0.4487471580505371, "learning_rate": 1.9710696844968606e-05, "loss": 0.5986, "step": 5684 }, { "epoch": 0.15609555189456342, "grad_norm": 0.350241094827652, "learning_rate": 1.9710593701368476e-05, "loss": 0.5003, "step": 5685 }, { "epoch": 0.15612300933552992, "grad_norm": 0.36718353629112244, "learning_rate": 1.9710490539654987e-05, "loss": 0.5392, "step": 5686 }, { "epoch": 0.15615046677649644, "grad_norm": 0.3456656336784363, "learning_rate": 1.9710387359828332e-05, "loss": 0.5351, "step": 5687 }, { "epoch": 0.15617792421746293, "grad_norm": 0.38504382967948914, "learning_rate": 1.971028416188871e-05, "loss": 0.4954, "step": 5688 }, { "epoch": 0.15620538165842943, "grad_norm": 0.3964003324508667, "learning_rate": 1.9710180945836307e-05, "loss": 0.5494, "step": 5689 }, { "epoch": 0.15623283909939595, "grad_norm": 0.4076349139213562, "learning_rate": 1.971007771167132e-05, "loss": 0.56, "step": 5690 }, { "epoch": 0.15626029654036244, "grad_norm": 0.3611622452735901, "learning_rate": 1.970997445939393e-05, "loss": 0.5346, "step": 5691 }, { "epoch": 0.15628775398132894, "grad_norm": 0.3816754221916199, "learning_rate": 1.9709871189004346e-05, "loss": 0.5617, "step": 5692 }, { "epoch": 0.15631521142229543, "grad_norm": 0.36257824301719666, "learning_rate": 1.9709767900502754e-05, "loss": 0.5404, "step": 5693 }, { "epoch": 0.15634266886326195, "grad_norm": 0.35999318957328796, "learning_rate": 1.9709664593889345e-05, "loss": 0.5649, "step": 5694 }, { "epoch": 0.15637012630422845, "grad_norm": 0.3931587338447571, "learning_rate": 1.970956126916431e-05, "loss": 0.6255, "step": 5695 }, { "epoch": 0.15639758374519494, "grad_norm": 0.6847226023674011, "learning_rate": 1.9709457926327842e-05, "loss": 0.5495, "step": 5696 }, { "epoch": 0.15642504118616146, "grad_norm": 0.39677295088768005, "learning_rate": 1.9709354565380142e-05, "loss": 0.5969, "step": 5697 }, { "epoch": 0.15645249862712796, "grad_norm": 0.36721742153167725, "learning_rate": 1.9709251186321397e-05, "loss": 0.4868, "step": 5698 }, { "epoch": 0.15647995606809445, "grad_norm": 0.38937443494796753, "learning_rate": 1.9709147789151797e-05, "loss": 0.6131, "step": 5699 }, { "epoch": 0.15650741350906094, "grad_norm": 0.40215927362442017, "learning_rate": 1.9709044373871538e-05, "loss": 0.5846, "step": 5700 }, { "epoch": 0.15653487095002747, "grad_norm": 0.3552262783050537, "learning_rate": 1.9708940940480813e-05, "loss": 0.5372, "step": 5701 }, { "epoch": 0.15656232839099396, "grad_norm": 0.5246961712837219, "learning_rate": 1.9708837488979813e-05, "loss": 0.5628, "step": 5702 }, { "epoch": 0.15658978583196045, "grad_norm": 0.3650798499584198, "learning_rate": 1.9708734019368733e-05, "loss": 0.5507, "step": 5703 }, { "epoch": 0.15661724327292698, "grad_norm": 0.3553321361541748, "learning_rate": 1.9708630531647763e-05, "loss": 0.5193, "step": 5704 }, { "epoch": 0.15664470071389347, "grad_norm": 0.4687361419200897, "learning_rate": 1.97085270258171e-05, "loss": 0.4204, "step": 5705 }, { "epoch": 0.15667215815485996, "grad_norm": 0.3787229657173157, "learning_rate": 1.9708423501876937e-05, "loss": 0.5118, "step": 5706 }, { "epoch": 0.15669961559582646, "grad_norm": 0.36400356888771057, "learning_rate": 1.9708319959827467e-05, "loss": 0.5052, "step": 5707 }, { "epoch": 0.15672707303679298, "grad_norm": 0.34466567635536194, "learning_rate": 1.970821639966888e-05, "loss": 0.4726, "step": 5708 }, { "epoch": 0.15675453047775947, "grad_norm": 0.34959614276885986, "learning_rate": 1.970811282140137e-05, "loss": 0.5026, "step": 5709 }, { "epoch": 0.15678198791872597, "grad_norm": 0.747883141040802, "learning_rate": 1.9708009225025134e-05, "loss": 0.5719, "step": 5710 }, { "epoch": 0.1568094453596925, "grad_norm": 0.3982827961444855, "learning_rate": 1.9707905610540358e-05, "loss": 0.5659, "step": 5711 }, { "epoch": 0.15683690280065898, "grad_norm": 0.35307857394218445, "learning_rate": 1.9707801977947243e-05, "loss": 0.5307, "step": 5712 }, { "epoch": 0.15686436024162548, "grad_norm": 0.5110244750976562, "learning_rate": 1.9707698327245977e-05, "loss": 0.5552, "step": 5713 }, { "epoch": 0.15689181768259197, "grad_norm": 0.41765260696411133, "learning_rate": 1.9707594658436756e-05, "loss": 0.5412, "step": 5714 }, { "epoch": 0.1569192751235585, "grad_norm": 0.36512166261672974, "learning_rate": 1.9707490971519773e-05, "loss": 0.564, "step": 5715 }, { "epoch": 0.156946732564525, "grad_norm": 0.46435675024986267, "learning_rate": 1.9707387266495222e-05, "loss": 0.6512, "step": 5716 }, { "epoch": 0.15697419000549148, "grad_norm": 0.3301486670970917, "learning_rate": 1.9707283543363294e-05, "loss": 0.5047, "step": 5717 }, { "epoch": 0.157001647446458, "grad_norm": 0.42851969599723816, "learning_rate": 1.9707179802124185e-05, "loss": 0.5971, "step": 5718 }, { "epoch": 0.1570291048874245, "grad_norm": 0.34520021080970764, "learning_rate": 1.9707076042778085e-05, "loss": 0.5403, "step": 5719 }, { "epoch": 0.157056562328391, "grad_norm": 0.344036728143692, "learning_rate": 1.9706972265325193e-05, "loss": 0.5243, "step": 5720 }, { "epoch": 0.15708401976935749, "grad_norm": 0.4191938042640686, "learning_rate": 1.9706868469765697e-05, "loss": 0.6, "step": 5721 }, { "epoch": 0.157111477210324, "grad_norm": 0.34539875388145447, "learning_rate": 1.97067646560998e-05, "loss": 0.5308, "step": 5722 }, { "epoch": 0.1571389346512905, "grad_norm": 0.34519219398498535, "learning_rate": 1.970666082432768e-05, "loss": 0.4671, "step": 5723 }, { "epoch": 0.157166392092257, "grad_norm": 0.35154062509536743, "learning_rate": 1.9706556974449544e-05, "loss": 0.5274, "step": 5724 }, { "epoch": 0.1571938495332235, "grad_norm": 0.42690640687942505, "learning_rate": 1.9706453106465577e-05, "loss": 0.5548, "step": 5725 }, { "epoch": 0.15722130697419, "grad_norm": 0.3611331582069397, "learning_rate": 1.9706349220375983e-05, "loss": 0.4802, "step": 5726 }, { "epoch": 0.1572487644151565, "grad_norm": 0.3587016463279724, "learning_rate": 1.9706245316180944e-05, "loss": 0.4781, "step": 5727 }, { "epoch": 0.157276221856123, "grad_norm": 0.3511245548725128, "learning_rate": 1.9706141393880664e-05, "loss": 0.5232, "step": 5728 }, { "epoch": 0.15730367929708952, "grad_norm": 0.32656770944595337, "learning_rate": 1.9706037453475333e-05, "loss": 0.473, "step": 5729 }, { "epoch": 0.15733113673805602, "grad_norm": 0.3651609718799591, "learning_rate": 1.970593349496514e-05, "loss": 0.5553, "step": 5730 }, { "epoch": 0.1573585941790225, "grad_norm": 0.3943035304546356, "learning_rate": 1.970582951835029e-05, "loss": 0.6375, "step": 5731 }, { "epoch": 0.157386051619989, "grad_norm": 0.3842829167842865, "learning_rate": 1.9705725523630965e-05, "loss": 0.5426, "step": 5732 }, { "epoch": 0.15741350906095553, "grad_norm": 0.374269962310791, "learning_rate": 1.9705621510807363e-05, "loss": 0.5657, "step": 5733 }, { "epoch": 0.15744096650192202, "grad_norm": 0.351324200630188, "learning_rate": 1.9705517479879682e-05, "loss": 0.4921, "step": 5734 }, { "epoch": 0.1574684239428885, "grad_norm": 0.4011829197406769, "learning_rate": 1.970541343084811e-05, "loss": 0.5277, "step": 5735 }, { "epoch": 0.15749588138385504, "grad_norm": 0.3412875533103943, "learning_rate": 1.970530936371285e-05, "loss": 0.4528, "step": 5736 }, { "epoch": 0.15752333882482153, "grad_norm": 0.33026960492134094, "learning_rate": 1.9705205278474088e-05, "loss": 0.4501, "step": 5737 }, { "epoch": 0.15755079626578802, "grad_norm": 0.33283692598342896, "learning_rate": 1.970510117513202e-05, "loss": 0.4998, "step": 5738 }, { "epoch": 0.15757825370675452, "grad_norm": 0.3229723870754242, "learning_rate": 1.970499705368684e-05, "loss": 0.5547, "step": 5739 }, { "epoch": 0.15760571114772104, "grad_norm": 0.36741751432418823, "learning_rate": 1.9704892914138746e-05, "loss": 0.5698, "step": 5740 }, { "epoch": 0.15763316858868753, "grad_norm": 0.3811276853084564, "learning_rate": 1.9704788756487926e-05, "loss": 0.5202, "step": 5741 }, { "epoch": 0.15766062602965403, "grad_norm": 0.9053453803062439, "learning_rate": 1.9704684580734578e-05, "loss": 0.5738, "step": 5742 }, { "epoch": 0.15768808347062055, "grad_norm": 0.339199036359787, "learning_rate": 1.9704580386878897e-05, "loss": 0.564, "step": 5743 }, { "epoch": 0.15771554091158704, "grad_norm": 0.36432772874832153, "learning_rate": 1.9704476174921077e-05, "loss": 0.4559, "step": 5744 }, { "epoch": 0.15774299835255354, "grad_norm": 0.31987616419792175, "learning_rate": 1.970437194486131e-05, "loss": 0.5067, "step": 5745 }, { "epoch": 0.15777045579352003, "grad_norm": 0.33007457852363586, "learning_rate": 1.970426769669979e-05, "loss": 0.5477, "step": 5746 }, { "epoch": 0.15779791323448655, "grad_norm": 0.39827388525009155, "learning_rate": 1.970416343043672e-05, "loss": 0.5498, "step": 5747 }, { "epoch": 0.15782537067545305, "grad_norm": 0.3407354950904846, "learning_rate": 1.9704059146072283e-05, "loss": 0.5231, "step": 5748 }, { "epoch": 0.15785282811641954, "grad_norm": 0.37217265367507935, "learning_rate": 1.970395484360668e-05, "loss": 0.6176, "step": 5749 }, { "epoch": 0.15788028555738606, "grad_norm": 0.38865357637405396, "learning_rate": 1.9703850523040103e-05, "loss": 0.5176, "step": 5750 }, { "epoch": 0.15790774299835256, "grad_norm": 0.49500975012779236, "learning_rate": 1.970374618437275e-05, "loss": 0.6275, "step": 5751 }, { "epoch": 0.15793520043931905, "grad_norm": 0.3838292956352234, "learning_rate": 1.970364182760481e-05, "loss": 0.5881, "step": 5752 }, { "epoch": 0.15796265788028555, "grad_norm": 0.40469980239868164, "learning_rate": 1.970353745273648e-05, "loss": 0.6751, "step": 5753 }, { "epoch": 0.15799011532125207, "grad_norm": 0.35777559876441956, "learning_rate": 1.970343305976796e-05, "loss": 0.5185, "step": 5754 }, { "epoch": 0.15801757276221856, "grad_norm": 0.3822026550769806, "learning_rate": 1.970332864869944e-05, "loss": 0.4835, "step": 5755 }, { "epoch": 0.15804503020318506, "grad_norm": 0.37380337715148926, "learning_rate": 1.9703224219531113e-05, "loss": 0.606, "step": 5756 }, { "epoch": 0.15807248764415158, "grad_norm": 0.3529263138771057, "learning_rate": 1.9703119772263174e-05, "loss": 0.4886, "step": 5757 }, { "epoch": 0.15809994508511807, "grad_norm": 0.3444855511188507, "learning_rate": 1.9703015306895824e-05, "loss": 0.4678, "step": 5758 }, { "epoch": 0.15812740252608457, "grad_norm": 0.3289617896080017, "learning_rate": 1.9702910823429254e-05, "loss": 0.5192, "step": 5759 }, { "epoch": 0.15815485996705106, "grad_norm": 0.36807742714881897, "learning_rate": 1.9702806321863653e-05, "loss": 0.6219, "step": 5760 }, { "epoch": 0.15818231740801758, "grad_norm": 0.47651106119155884, "learning_rate": 1.970270180219923e-05, "loss": 0.5432, "step": 5761 }, { "epoch": 0.15820977484898407, "grad_norm": 0.4443008005619049, "learning_rate": 1.9702597264436166e-05, "loss": 0.5397, "step": 5762 }, { "epoch": 0.15823723228995057, "grad_norm": 0.3903176486492157, "learning_rate": 1.970249270857466e-05, "loss": 0.626, "step": 5763 }, { "epoch": 0.1582646897309171, "grad_norm": 0.36990347504615784, "learning_rate": 1.9702388134614908e-05, "loss": 0.5844, "step": 5764 }, { "epoch": 0.15829214717188358, "grad_norm": 0.3933398425579071, "learning_rate": 1.9702283542557108e-05, "loss": 0.6212, "step": 5765 }, { "epoch": 0.15831960461285008, "grad_norm": 0.37500086426734924, "learning_rate": 1.9702178932401453e-05, "loss": 0.5286, "step": 5766 }, { "epoch": 0.15834706205381657, "grad_norm": 0.3650386333465576, "learning_rate": 1.9702074304148137e-05, "loss": 0.4469, "step": 5767 }, { "epoch": 0.1583745194947831, "grad_norm": 0.3907754421234131, "learning_rate": 1.9701969657797354e-05, "loss": 0.4996, "step": 5768 }, { "epoch": 0.1584019769357496, "grad_norm": 0.4064387381076813, "learning_rate": 1.9701864993349303e-05, "loss": 0.4686, "step": 5769 }, { "epoch": 0.15842943437671608, "grad_norm": 0.3955513834953308, "learning_rate": 1.970176031080418e-05, "loss": 0.6584, "step": 5770 }, { "epoch": 0.1584568918176826, "grad_norm": 0.35488778352737427, "learning_rate": 1.9701655610162174e-05, "loss": 0.5377, "step": 5771 }, { "epoch": 0.1584843492586491, "grad_norm": 0.35780948400497437, "learning_rate": 1.9701550891423485e-05, "loss": 0.5219, "step": 5772 }, { "epoch": 0.1585118066996156, "grad_norm": 0.3735301196575165, "learning_rate": 1.9701446154588307e-05, "loss": 0.4948, "step": 5773 }, { "epoch": 0.1585392641405821, "grad_norm": 0.35856130719184875, "learning_rate": 1.9701341399656836e-05, "loss": 0.5293, "step": 5774 }, { "epoch": 0.1585667215815486, "grad_norm": 0.37868866324424744, "learning_rate": 1.9701236626629264e-05, "loss": 0.5893, "step": 5775 }, { "epoch": 0.1585941790225151, "grad_norm": 0.371233195066452, "learning_rate": 1.9701131835505793e-05, "loss": 0.5272, "step": 5776 }, { "epoch": 0.1586216364634816, "grad_norm": 0.3807404339313507, "learning_rate": 1.9701027026286614e-05, "loss": 0.65, "step": 5777 }, { "epoch": 0.15864909390444812, "grad_norm": 0.3668667674064636, "learning_rate": 1.9700922198971922e-05, "loss": 0.6067, "step": 5778 }, { "epoch": 0.1586765513454146, "grad_norm": 0.36773329973220825, "learning_rate": 1.9700817353561916e-05, "loss": 0.539, "step": 5779 }, { "epoch": 0.1587040087863811, "grad_norm": 0.33833056688308716, "learning_rate": 1.970071249005679e-05, "loss": 0.5507, "step": 5780 }, { "epoch": 0.1587314662273476, "grad_norm": 0.36202672123908997, "learning_rate": 1.9700607608456734e-05, "loss": 0.6184, "step": 5781 }, { "epoch": 0.15875892366831412, "grad_norm": 0.332453191280365, "learning_rate": 1.9700502708761955e-05, "loss": 0.5238, "step": 5782 }, { "epoch": 0.15878638110928062, "grad_norm": 0.34454667568206787, "learning_rate": 1.970039779097264e-05, "loss": 0.4607, "step": 5783 }, { "epoch": 0.1588138385502471, "grad_norm": 0.34077200293540955, "learning_rate": 1.9700292855088986e-05, "loss": 0.4803, "step": 5784 }, { "epoch": 0.15884129599121363, "grad_norm": 0.39110666513442993, "learning_rate": 1.970018790111119e-05, "loss": 0.5755, "step": 5785 }, { "epoch": 0.15886875343218013, "grad_norm": 0.33956167101860046, "learning_rate": 1.9700082929039447e-05, "loss": 0.489, "step": 5786 }, { "epoch": 0.15889621087314662, "grad_norm": 0.33375829458236694, "learning_rate": 1.9699977938873956e-05, "loss": 0.5401, "step": 5787 }, { "epoch": 0.15892366831411311, "grad_norm": 0.3255741596221924, "learning_rate": 1.969987293061491e-05, "loss": 0.4952, "step": 5788 }, { "epoch": 0.15895112575507964, "grad_norm": 0.3726412057876587, "learning_rate": 1.9699767904262502e-05, "loss": 0.4313, "step": 5789 }, { "epoch": 0.15897858319604613, "grad_norm": 0.39438995718955994, "learning_rate": 1.9699662859816934e-05, "loss": 0.6077, "step": 5790 }, { "epoch": 0.15900604063701262, "grad_norm": 0.3882342278957367, "learning_rate": 1.96995577972784e-05, "loss": 0.5958, "step": 5791 }, { "epoch": 0.15903349807797912, "grad_norm": 0.38058149814605713, "learning_rate": 1.969945271664709e-05, "loss": 0.4781, "step": 5792 }, { "epoch": 0.15906095551894564, "grad_norm": 0.3860342800617218, "learning_rate": 1.969934761792321e-05, "loss": 0.5583, "step": 5793 }, { "epoch": 0.15908841295991213, "grad_norm": 0.40498441457748413, "learning_rate": 1.969924250110695e-05, "loss": 0.559, "step": 5794 }, { "epoch": 0.15911587040087863, "grad_norm": 0.35673463344573975, "learning_rate": 1.9699137366198507e-05, "loss": 0.4347, "step": 5795 }, { "epoch": 0.15914332784184515, "grad_norm": 0.3768269717693329, "learning_rate": 1.9699032213198074e-05, "loss": 0.5653, "step": 5796 }, { "epoch": 0.15917078528281164, "grad_norm": 0.35625141859054565, "learning_rate": 1.9698927042105855e-05, "loss": 0.607, "step": 5797 }, { "epoch": 0.15919824272377814, "grad_norm": 0.3542579114437103, "learning_rate": 1.9698821852922038e-05, "loss": 0.46, "step": 5798 }, { "epoch": 0.15922570016474463, "grad_norm": 0.4216062128543854, "learning_rate": 1.9698716645646824e-05, "loss": 0.579, "step": 5799 }, { "epoch": 0.15925315760571115, "grad_norm": 0.3080407381057739, "learning_rate": 1.969861142028041e-05, "loss": 0.5334, "step": 5800 }, { "epoch": 0.15928061504667765, "grad_norm": 0.34467074275016785, "learning_rate": 1.969850617682299e-05, "loss": 0.548, "step": 5801 }, { "epoch": 0.15930807248764414, "grad_norm": 0.31846246123313904, "learning_rate": 1.9698400915274757e-05, "loss": 0.4931, "step": 5802 }, { "epoch": 0.15933552992861066, "grad_norm": 0.33270037174224854, "learning_rate": 1.9698295635635913e-05, "loss": 0.5774, "step": 5803 }, { "epoch": 0.15936298736957716, "grad_norm": 0.35140112042427063, "learning_rate": 1.9698190337906654e-05, "loss": 0.5147, "step": 5804 }, { "epoch": 0.15939044481054365, "grad_norm": 0.333535760641098, "learning_rate": 1.969808502208717e-05, "loss": 0.5082, "step": 5805 }, { "epoch": 0.15941790225151015, "grad_norm": 0.3509487807750702, "learning_rate": 1.9697979688177664e-05, "loss": 0.5157, "step": 5806 }, { "epoch": 0.15944535969247667, "grad_norm": 0.3680865466594696, "learning_rate": 1.9697874336178333e-05, "loss": 0.5713, "step": 5807 }, { "epoch": 0.15947281713344316, "grad_norm": 0.412695974111557, "learning_rate": 1.969776896608937e-05, "loss": 0.5717, "step": 5808 }, { "epoch": 0.15950027457440966, "grad_norm": 0.3523961901664734, "learning_rate": 1.969766357791097e-05, "loss": 0.5422, "step": 5809 }, { "epoch": 0.15952773201537618, "grad_norm": 0.35627445578575134, "learning_rate": 1.9697558171643337e-05, "loss": 0.5489, "step": 5810 }, { "epoch": 0.15955518945634267, "grad_norm": 0.34650328755378723, "learning_rate": 1.9697452747286664e-05, "loss": 0.5273, "step": 5811 }, { "epoch": 0.15958264689730917, "grad_norm": 0.4469851553440094, "learning_rate": 1.969734730484114e-05, "loss": 0.5952, "step": 5812 }, { "epoch": 0.15961010433827566, "grad_norm": 0.3882896304130554, "learning_rate": 1.9697241844306972e-05, "loss": 0.5978, "step": 5813 }, { "epoch": 0.15963756177924218, "grad_norm": 0.32871153950691223, "learning_rate": 1.9697136365684354e-05, "loss": 0.4977, "step": 5814 }, { "epoch": 0.15966501922020868, "grad_norm": 0.36846068501472473, "learning_rate": 1.969703086897348e-05, "loss": 0.5118, "step": 5815 }, { "epoch": 0.15969247666117517, "grad_norm": 0.3649718463420868, "learning_rate": 1.9696925354174548e-05, "loss": 0.6007, "step": 5816 }, { "epoch": 0.1597199341021417, "grad_norm": 0.4566709101200104, "learning_rate": 1.9696819821287756e-05, "loss": 0.4894, "step": 5817 }, { "epoch": 0.15974739154310819, "grad_norm": 0.3595947325229645, "learning_rate": 1.9696714270313298e-05, "loss": 0.5847, "step": 5818 }, { "epoch": 0.15977484898407468, "grad_norm": 0.39677107334136963, "learning_rate": 1.9696608701251376e-05, "loss": 0.5167, "step": 5819 }, { "epoch": 0.15980230642504117, "grad_norm": 0.34387025237083435, "learning_rate": 1.9696503114102184e-05, "loss": 0.5128, "step": 5820 }, { "epoch": 0.1598297638660077, "grad_norm": 0.3796578347682953, "learning_rate": 1.9696397508865917e-05, "loss": 0.6254, "step": 5821 }, { "epoch": 0.1598572213069742, "grad_norm": 0.3710852861404419, "learning_rate": 1.9696291885542777e-05, "loss": 0.5229, "step": 5822 }, { "epoch": 0.15988467874794068, "grad_norm": 0.45253461599349976, "learning_rate": 1.9696186244132955e-05, "loss": 0.5564, "step": 5823 }, { "epoch": 0.1599121361889072, "grad_norm": 0.37040168046951294, "learning_rate": 1.9696080584636653e-05, "loss": 0.5519, "step": 5824 }, { "epoch": 0.1599395936298737, "grad_norm": 0.31432172656059265, "learning_rate": 1.9695974907054066e-05, "loss": 0.4253, "step": 5825 }, { "epoch": 0.1599670510708402, "grad_norm": 0.4003652036190033, "learning_rate": 1.969586921138539e-05, "loss": 0.552, "step": 5826 }, { "epoch": 0.1599945085118067, "grad_norm": 0.3812129497528076, "learning_rate": 1.969576349763082e-05, "loss": 0.5731, "step": 5827 }, { "epoch": 0.1600219659527732, "grad_norm": 0.3795168399810791, "learning_rate": 1.9695657765790562e-05, "loss": 0.5125, "step": 5828 }, { "epoch": 0.1600494233937397, "grad_norm": 0.37622833251953125, "learning_rate": 1.969555201586481e-05, "loss": 0.4433, "step": 5829 }, { "epoch": 0.1600768808347062, "grad_norm": 0.36082667112350464, "learning_rate": 1.969544624785375e-05, "loss": 0.5781, "step": 5830 }, { "epoch": 0.16010433827567272, "grad_norm": 0.3895476758480072, "learning_rate": 1.9695340461757596e-05, "loss": 0.5414, "step": 5831 }, { "epoch": 0.1601317957166392, "grad_norm": 0.6734892725944519, "learning_rate": 1.9695234657576533e-05, "loss": 0.4872, "step": 5832 }, { "epoch": 0.1601592531576057, "grad_norm": 0.34317663311958313, "learning_rate": 1.9695128835310767e-05, "loss": 0.6032, "step": 5833 }, { "epoch": 0.1601867105985722, "grad_norm": 0.3094826936721802, "learning_rate": 1.969502299496049e-05, "loss": 0.5206, "step": 5834 }, { "epoch": 0.16021416803953872, "grad_norm": 0.4411672055721283, "learning_rate": 1.96949171365259e-05, "loss": 0.6241, "step": 5835 }, { "epoch": 0.16024162548050522, "grad_norm": 0.33684468269348145, "learning_rate": 1.9694811260007195e-05, "loss": 0.4768, "step": 5836 }, { "epoch": 0.1602690829214717, "grad_norm": 0.40342652797698975, "learning_rate": 1.9694705365404576e-05, "loss": 0.5024, "step": 5837 }, { "epoch": 0.16029654036243823, "grad_norm": 0.3723052442073822, "learning_rate": 1.9694599452718233e-05, "loss": 0.5601, "step": 5838 }, { "epoch": 0.16032399780340473, "grad_norm": 0.38121238350868225, "learning_rate": 1.9694493521948367e-05, "loss": 0.5119, "step": 5839 }, { "epoch": 0.16035145524437122, "grad_norm": 0.4798239767551422, "learning_rate": 1.969438757309518e-05, "loss": 0.4974, "step": 5840 }, { "epoch": 0.16037891268533772, "grad_norm": 0.33632585406303406, "learning_rate": 1.9694281606158864e-05, "loss": 0.5377, "step": 5841 }, { "epoch": 0.16040637012630424, "grad_norm": 0.4216699004173279, "learning_rate": 1.969417562113962e-05, "loss": 0.6273, "step": 5842 }, { "epoch": 0.16043382756727073, "grad_norm": 0.3561878502368927, "learning_rate": 1.9694069618037644e-05, "loss": 0.4422, "step": 5843 }, { "epoch": 0.16046128500823723, "grad_norm": 0.325895220041275, "learning_rate": 1.9693963596853134e-05, "loss": 0.4177, "step": 5844 }, { "epoch": 0.16048874244920375, "grad_norm": 0.3450259268283844, "learning_rate": 1.969385755758629e-05, "loss": 0.488, "step": 5845 }, { "epoch": 0.16051619989017024, "grad_norm": 0.349286288022995, "learning_rate": 1.9693751500237303e-05, "loss": 0.4784, "step": 5846 }, { "epoch": 0.16054365733113674, "grad_norm": 0.357086718082428, "learning_rate": 1.969364542480638e-05, "loss": 0.554, "step": 5847 }, { "epoch": 0.16057111477210323, "grad_norm": 0.4401469826698303, "learning_rate": 1.9693539331293716e-05, "loss": 0.5581, "step": 5848 }, { "epoch": 0.16059857221306975, "grad_norm": 0.3392990827560425, "learning_rate": 1.9693433219699503e-05, "loss": 0.4647, "step": 5849 }, { "epoch": 0.16062602965403625, "grad_norm": 0.47106432914733887, "learning_rate": 1.9693327090023943e-05, "loss": 0.5, "step": 5850 }, { "epoch": 0.16065348709500274, "grad_norm": 0.38277336955070496, "learning_rate": 1.9693220942267234e-05, "loss": 0.5098, "step": 5851 }, { "epoch": 0.16068094453596926, "grad_norm": 0.3795066773891449, "learning_rate": 1.9693114776429578e-05, "loss": 0.496, "step": 5852 }, { "epoch": 0.16070840197693576, "grad_norm": 0.4115433096885681, "learning_rate": 1.9693008592511166e-05, "loss": 0.6135, "step": 5853 }, { "epoch": 0.16073585941790225, "grad_norm": 0.3940642178058624, "learning_rate": 1.9692902390512202e-05, "loss": 0.5703, "step": 5854 }, { "epoch": 0.16076331685886874, "grad_norm": 0.39864999055862427, "learning_rate": 1.969279617043288e-05, "loss": 0.6381, "step": 5855 }, { "epoch": 0.16079077429983527, "grad_norm": 0.36092427372932434, "learning_rate": 1.9692689932273402e-05, "loss": 0.5574, "step": 5856 }, { "epoch": 0.16081823174080176, "grad_norm": 0.3470171391963959, "learning_rate": 1.9692583676033958e-05, "loss": 0.4959, "step": 5857 }, { "epoch": 0.16084568918176825, "grad_norm": 0.7340888977050781, "learning_rate": 1.9692477401714756e-05, "loss": 0.5689, "step": 5858 }, { "epoch": 0.16087314662273475, "grad_norm": 0.31496119499206543, "learning_rate": 1.9692371109315987e-05, "loss": 0.466, "step": 5859 }, { "epoch": 0.16090060406370127, "grad_norm": 0.35240063071250916, "learning_rate": 1.9692264798837856e-05, "loss": 0.5502, "step": 5860 }, { "epoch": 0.16092806150466776, "grad_norm": 0.4119267463684082, "learning_rate": 1.969215847028056e-05, "loss": 0.5882, "step": 5861 }, { "epoch": 0.16095551894563426, "grad_norm": 0.383576363325119, "learning_rate": 1.969205212364429e-05, "loss": 0.569, "step": 5862 }, { "epoch": 0.16098297638660078, "grad_norm": 0.3734026849269867, "learning_rate": 1.9691945758929252e-05, "loss": 0.5759, "step": 5863 }, { "epoch": 0.16101043382756727, "grad_norm": 0.4151698052883148, "learning_rate": 1.969183937613564e-05, "loss": 0.5599, "step": 5864 }, { "epoch": 0.16103789126853377, "grad_norm": 0.3528323769569397, "learning_rate": 1.9691732975263657e-05, "loss": 0.5217, "step": 5865 }, { "epoch": 0.16106534870950026, "grad_norm": 0.3257858455181122, "learning_rate": 1.9691626556313497e-05, "loss": 0.5366, "step": 5866 }, { "epoch": 0.16109280615046678, "grad_norm": 0.3453126847743988, "learning_rate": 1.969152011928536e-05, "loss": 0.5172, "step": 5867 }, { "epoch": 0.16112026359143328, "grad_norm": 0.3504716455936432, "learning_rate": 1.9691413664179444e-05, "loss": 0.478, "step": 5868 }, { "epoch": 0.16114772103239977, "grad_norm": 0.45592445135116577, "learning_rate": 1.969130719099595e-05, "loss": 0.5515, "step": 5869 }, { "epoch": 0.1611751784733663, "grad_norm": 0.422553688287735, "learning_rate": 1.9691200699735072e-05, "loss": 0.5849, "step": 5870 }, { "epoch": 0.1612026359143328, "grad_norm": 0.37896567583084106, "learning_rate": 1.9691094190397017e-05, "loss": 0.6102, "step": 5871 }, { "epoch": 0.16123009335529928, "grad_norm": 0.4469166100025177, "learning_rate": 1.9690987662981974e-05, "loss": 0.5411, "step": 5872 }, { "epoch": 0.16125755079626578, "grad_norm": 0.33068227767944336, "learning_rate": 1.9690881117490147e-05, "loss": 0.4687, "step": 5873 }, { "epoch": 0.1612850082372323, "grad_norm": 0.39757853746414185, "learning_rate": 1.9690774553921733e-05, "loss": 0.4867, "step": 5874 }, { "epoch": 0.1613124656781988, "grad_norm": 0.37361449003219604, "learning_rate": 1.9690667972276937e-05, "loss": 0.601, "step": 5875 }, { "epoch": 0.16133992311916528, "grad_norm": 0.33023834228515625, "learning_rate": 1.9690561372555947e-05, "loss": 0.4837, "step": 5876 }, { "epoch": 0.1613673805601318, "grad_norm": 0.4379355311393738, "learning_rate": 1.9690454754758965e-05, "loss": 0.5206, "step": 5877 }, { "epoch": 0.1613948380010983, "grad_norm": 0.38781869411468506, "learning_rate": 1.96903481188862e-05, "loss": 0.5551, "step": 5878 }, { "epoch": 0.1614222954420648, "grad_norm": 0.36046484112739563, "learning_rate": 1.9690241464937837e-05, "loss": 0.6027, "step": 5879 }, { "epoch": 0.1614497528830313, "grad_norm": 0.3675413727760315, "learning_rate": 1.969013479291408e-05, "loss": 0.6005, "step": 5880 }, { "epoch": 0.1614772103239978, "grad_norm": 0.49054136872291565, "learning_rate": 1.9690028102815132e-05, "loss": 0.6094, "step": 5881 }, { "epoch": 0.1615046677649643, "grad_norm": 0.3671773672103882, "learning_rate": 1.968992139464119e-05, "loss": 0.4651, "step": 5882 }, { "epoch": 0.1615321252059308, "grad_norm": 0.3482096791267395, "learning_rate": 1.9689814668392446e-05, "loss": 0.5422, "step": 5883 }, { "epoch": 0.16155958264689732, "grad_norm": 0.3815556764602661, "learning_rate": 1.968970792406911e-05, "loss": 0.5204, "step": 5884 }, { "epoch": 0.16158704008786381, "grad_norm": 0.3987388610839844, "learning_rate": 1.9689601161671374e-05, "loss": 0.6281, "step": 5885 }, { "epoch": 0.1616144975288303, "grad_norm": 0.3597581386566162, "learning_rate": 1.968949438119944e-05, "loss": 0.5691, "step": 5886 }, { "epoch": 0.1616419549697968, "grad_norm": 0.37345170974731445, "learning_rate": 1.9689387582653506e-05, "loss": 0.5491, "step": 5887 }, { "epoch": 0.16166941241076332, "grad_norm": 0.4356056749820709, "learning_rate": 1.9689280766033775e-05, "loss": 0.5302, "step": 5888 }, { "epoch": 0.16169686985172982, "grad_norm": 0.3340286612510681, "learning_rate": 1.968917393134044e-05, "loss": 0.5846, "step": 5889 }, { "epoch": 0.1617243272926963, "grad_norm": 0.4203908443450928, "learning_rate": 1.9689067078573704e-05, "loss": 0.5624, "step": 5890 }, { "epoch": 0.16175178473366283, "grad_norm": 0.3836018741130829, "learning_rate": 1.9688960207733764e-05, "loss": 0.5782, "step": 5891 }, { "epoch": 0.16177924217462933, "grad_norm": 0.37024611234664917, "learning_rate": 1.968885331882082e-05, "loss": 0.4771, "step": 5892 }, { "epoch": 0.16180669961559582, "grad_norm": 0.34736236929893494, "learning_rate": 1.9688746411835072e-05, "loss": 0.5244, "step": 5893 }, { "epoch": 0.16183415705656232, "grad_norm": 0.4740428924560547, "learning_rate": 1.9688639486776723e-05, "loss": 0.5385, "step": 5894 }, { "epoch": 0.16186161449752884, "grad_norm": 0.37892478704452515, "learning_rate": 1.968853254364597e-05, "loss": 0.44, "step": 5895 }, { "epoch": 0.16188907193849533, "grad_norm": 0.3521192967891693, "learning_rate": 1.9688425582443007e-05, "loss": 0.572, "step": 5896 }, { "epoch": 0.16191652937946183, "grad_norm": 0.5501775145530701, "learning_rate": 1.968831860316804e-05, "loss": 0.5192, "step": 5897 }, { "epoch": 0.16194398682042835, "grad_norm": 0.45205530524253845, "learning_rate": 1.968821160582127e-05, "loss": 0.5395, "step": 5898 }, { "epoch": 0.16197144426139484, "grad_norm": 0.33704814314842224, "learning_rate": 1.968810459040289e-05, "loss": 0.564, "step": 5899 }, { "epoch": 0.16199890170236134, "grad_norm": 0.33496204018592834, "learning_rate": 1.9687997556913104e-05, "loss": 0.4916, "step": 5900 }, { "epoch": 0.16202635914332783, "grad_norm": 0.4107848107814789, "learning_rate": 1.968789050535211e-05, "loss": 0.6047, "step": 5901 }, { "epoch": 0.16205381658429435, "grad_norm": 0.3443552553653717, "learning_rate": 1.9687783435720106e-05, "loss": 0.4893, "step": 5902 }, { "epoch": 0.16208127402526085, "grad_norm": 0.355432391166687, "learning_rate": 1.96876763480173e-05, "loss": 0.5388, "step": 5903 }, { "epoch": 0.16210873146622734, "grad_norm": 0.37068697810173035, "learning_rate": 1.9687569242243877e-05, "loss": 0.6622, "step": 5904 }, { "epoch": 0.16213618890719386, "grad_norm": 0.4133038818836212, "learning_rate": 1.968746211840005e-05, "loss": 0.6187, "step": 5905 }, { "epoch": 0.16216364634816036, "grad_norm": 0.47101274132728577, "learning_rate": 1.9687354976486014e-05, "loss": 0.5879, "step": 5906 }, { "epoch": 0.16219110378912685, "grad_norm": 0.4012320041656494, "learning_rate": 1.9687247816501972e-05, "loss": 0.574, "step": 5907 }, { "epoch": 0.16221856123009334, "grad_norm": 0.3571380078792572, "learning_rate": 1.9687140638448115e-05, "loss": 0.5659, "step": 5908 }, { "epoch": 0.16224601867105987, "grad_norm": 0.3719521462917328, "learning_rate": 1.9687033442324654e-05, "loss": 0.5379, "step": 5909 }, { "epoch": 0.16227347611202636, "grad_norm": 0.37979522347450256, "learning_rate": 1.968692622813178e-05, "loss": 0.4966, "step": 5910 }, { "epoch": 0.16230093355299285, "grad_norm": 0.37114787101745605, "learning_rate": 1.96868189958697e-05, "loss": 0.5757, "step": 5911 }, { "epoch": 0.16232839099395938, "grad_norm": 0.3583783209323883, "learning_rate": 1.968671174553861e-05, "loss": 0.5182, "step": 5912 }, { "epoch": 0.16235584843492587, "grad_norm": 0.46979987621307373, "learning_rate": 1.968660447713871e-05, "loss": 0.5884, "step": 5913 }, { "epoch": 0.16238330587589236, "grad_norm": 0.5307267308235168, "learning_rate": 1.9686497190670203e-05, "loss": 0.5149, "step": 5914 }, { "epoch": 0.16241076331685886, "grad_norm": 0.33474233746528625, "learning_rate": 1.9686389886133287e-05, "loss": 0.5521, "step": 5915 }, { "epoch": 0.16243822075782538, "grad_norm": 0.3897472620010376, "learning_rate": 1.968628256352816e-05, "loss": 0.5584, "step": 5916 }, { "epoch": 0.16246567819879187, "grad_norm": 0.4173433184623718, "learning_rate": 1.9686175222855025e-05, "loss": 0.4708, "step": 5917 }, { "epoch": 0.16249313563975837, "grad_norm": 0.3600271940231323, "learning_rate": 1.9686067864114086e-05, "loss": 0.4948, "step": 5918 }, { "epoch": 0.1625205930807249, "grad_norm": 0.40454354882240295, "learning_rate": 1.9685960487305534e-05, "loss": 0.5384, "step": 5919 }, { "epoch": 0.16254805052169138, "grad_norm": 0.3218441605567932, "learning_rate": 1.9685853092429576e-05, "loss": 0.5256, "step": 5920 }, { "epoch": 0.16257550796265788, "grad_norm": 0.34667226672172546, "learning_rate": 1.9685745679486408e-05, "loss": 0.5208, "step": 5921 }, { "epoch": 0.16260296540362437, "grad_norm": 0.3279931843280792, "learning_rate": 1.9685638248476238e-05, "loss": 0.5071, "step": 5922 }, { "epoch": 0.1626304228445909, "grad_norm": 0.3636189103126526, "learning_rate": 1.968553079939926e-05, "loss": 0.588, "step": 5923 }, { "epoch": 0.1626578802855574, "grad_norm": 0.376956969499588, "learning_rate": 1.9685423332255673e-05, "loss": 0.6114, "step": 5924 }, { "epoch": 0.16268533772652388, "grad_norm": 0.40843212604522705, "learning_rate": 1.9685315847045684e-05, "loss": 0.5531, "step": 5925 }, { "epoch": 0.16271279516749038, "grad_norm": 0.371143639087677, "learning_rate": 1.9685208343769485e-05, "loss": 0.5522, "step": 5926 }, { "epoch": 0.1627402526084569, "grad_norm": 0.34312376379966736, "learning_rate": 1.9685100822427286e-05, "loss": 0.4394, "step": 5927 }, { "epoch": 0.1627677100494234, "grad_norm": 0.353338360786438, "learning_rate": 1.968499328301928e-05, "loss": 0.5805, "step": 5928 }, { "epoch": 0.16279516749038989, "grad_norm": 0.3604951500892639, "learning_rate": 1.9684885725545672e-05, "loss": 0.5784, "step": 5929 }, { "epoch": 0.1628226249313564, "grad_norm": 0.42750948667526245, "learning_rate": 1.9684778150006657e-05, "loss": 0.6232, "step": 5930 }, { "epoch": 0.1628500823723229, "grad_norm": 0.3578625023365021, "learning_rate": 1.9684670556402444e-05, "loss": 0.5072, "step": 5931 }, { "epoch": 0.1628775398132894, "grad_norm": 0.351559042930603, "learning_rate": 1.968456294473323e-05, "loss": 0.477, "step": 5932 }, { "epoch": 0.1629049972542559, "grad_norm": 0.39821723103523254, "learning_rate": 1.9684455314999215e-05, "loss": 0.5088, "step": 5933 }, { "epoch": 0.1629324546952224, "grad_norm": 0.37422698736190796, "learning_rate": 1.9684347667200598e-05, "loss": 0.5423, "step": 5934 }, { "epoch": 0.1629599121361889, "grad_norm": 0.36309877038002014, "learning_rate": 1.9684240001337583e-05, "loss": 0.5705, "step": 5935 }, { "epoch": 0.1629873695771554, "grad_norm": 0.3645501136779785, "learning_rate": 1.968413231741037e-05, "loss": 0.4816, "step": 5936 }, { "epoch": 0.16301482701812192, "grad_norm": 0.5422803163528442, "learning_rate": 1.9684024615419156e-05, "loss": 0.5034, "step": 5937 }, { "epoch": 0.16304228445908842, "grad_norm": 0.346344918012619, "learning_rate": 1.968391689536415e-05, "loss": 0.4519, "step": 5938 }, { "epoch": 0.1630697419000549, "grad_norm": 0.3669911026954651, "learning_rate": 1.9683809157245545e-05, "loss": 0.552, "step": 5939 }, { "epoch": 0.1630971993410214, "grad_norm": 0.37110984325408936, "learning_rate": 1.9683701401063543e-05, "loss": 0.564, "step": 5940 }, { "epoch": 0.16312465678198793, "grad_norm": 0.32724159955978394, "learning_rate": 1.968359362681835e-05, "loss": 0.5184, "step": 5941 }, { "epoch": 0.16315211422295442, "grad_norm": 0.3542429506778717, "learning_rate": 1.9683485834510166e-05, "loss": 0.5383, "step": 5942 }, { "epoch": 0.1631795716639209, "grad_norm": 0.36632803082466125, "learning_rate": 1.9683378024139187e-05, "loss": 0.5435, "step": 5943 }, { "epoch": 0.16320702910488744, "grad_norm": 0.351973295211792, "learning_rate": 1.968327019570562e-05, "loss": 0.5528, "step": 5944 }, { "epoch": 0.16323448654585393, "grad_norm": 0.4227403402328491, "learning_rate": 1.9683162349209662e-05, "loss": 0.5964, "step": 5945 }, { "epoch": 0.16326194398682042, "grad_norm": 0.387090802192688, "learning_rate": 1.968305448465152e-05, "loss": 0.5289, "step": 5946 }, { "epoch": 0.16328940142778692, "grad_norm": 0.37157219648361206, "learning_rate": 1.9682946602031385e-05, "loss": 0.6159, "step": 5947 }, { "epoch": 0.16331685886875344, "grad_norm": 0.34170252084732056, "learning_rate": 1.9682838701349465e-05, "loss": 0.5396, "step": 5948 }, { "epoch": 0.16334431630971993, "grad_norm": 0.3778645396232605, "learning_rate": 1.9682730782605964e-05, "loss": 0.6032, "step": 5949 }, { "epoch": 0.16337177375068643, "grad_norm": 0.4253092110157013, "learning_rate": 1.9682622845801077e-05, "loss": 0.558, "step": 5950 }, { "epoch": 0.16339923119165295, "grad_norm": 0.35710448026657104, "learning_rate": 1.968251489093501e-05, "loss": 0.5067, "step": 5951 }, { "epoch": 0.16342668863261944, "grad_norm": 0.3436387777328491, "learning_rate": 1.9682406918007962e-05, "loss": 0.4816, "step": 5952 }, { "epoch": 0.16345414607358594, "grad_norm": 0.3814050853252411, "learning_rate": 1.9682298927020133e-05, "loss": 0.4851, "step": 5953 }, { "epoch": 0.16348160351455243, "grad_norm": 0.3393646776676178, "learning_rate": 1.9682190917971725e-05, "loss": 0.5341, "step": 5954 }, { "epoch": 0.16350906095551895, "grad_norm": 0.3380119502544403, "learning_rate": 1.9682082890862945e-05, "loss": 0.492, "step": 5955 }, { "epoch": 0.16353651839648545, "grad_norm": 0.39427319169044495, "learning_rate": 1.9681974845693986e-05, "loss": 0.6123, "step": 5956 }, { "epoch": 0.16356397583745194, "grad_norm": 0.37993431091308594, "learning_rate": 1.9681866782465055e-05, "loss": 0.5023, "step": 5957 }, { "epoch": 0.16359143327841846, "grad_norm": 0.36897385120391846, "learning_rate": 1.9681758701176354e-05, "loss": 0.5989, "step": 5958 }, { "epoch": 0.16361889071938496, "grad_norm": 0.36335939168930054, "learning_rate": 1.9681650601828085e-05, "loss": 0.5201, "step": 5959 }, { "epoch": 0.16364634816035145, "grad_norm": 0.34224992990493774, "learning_rate": 1.9681542484420442e-05, "loss": 0.482, "step": 5960 }, { "epoch": 0.16367380560131795, "grad_norm": 0.37969404458999634, "learning_rate": 1.9681434348953638e-05, "loss": 0.5353, "step": 5961 }, { "epoch": 0.16370126304228447, "grad_norm": 0.5661979913711548, "learning_rate": 1.9681326195427864e-05, "loss": 0.4596, "step": 5962 }, { "epoch": 0.16372872048325096, "grad_norm": 0.39917105436325073, "learning_rate": 1.968121802384333e-05, "loss": 0.6122, "step": 5963 }, { "epoch": 0.16375617792421746, "grad_norm": 0.356785386800766, "learning_rate": 1.968110983420023e-05, "loss": 0.5192, "step": 5964 }, { "epoch": 0.16378363536518398, "grad_norm": 0.3861936330795288, "learning_rate": 1.9681001626498778e-05, "loss": 0.5599, "step": 5965 }, { "epoch": 0.16381109280615047, "grad_norm": 0.3468678593635559, "learning_rate": 1.9680893400739162e-05, "loss": 0.5591, "step": 5966 }, { "epoch": 0.16383855024711697, "grad_norm": 0.3300604224205017, "learning_rate": 1.968078515692159e-05, "loss": 0.4924, "step": 5967 }, { "epoch": 0.16386600768808346, "grad_norm": 0.34586697816848755, "learning_rate": 1.968067689504627e-05, "loss": 0.5216, "step": 5968 }, { "epoch": 0.16389346512904998, "grad_norm": 0.4419156312942505, "learning_rate": 1.968056861511339e-05, "loss": 0.5635, "step": 5969 }, { "epoch": 0.16392092257001648, "grad_norm": 0.4222278594970703, "learning_rate": 1.9680460317123165e-05, "loss": 0.5015, "step": 5970 }, { "epoch": 0.16394838001098297, "grad_norm": 0.37268081307411194, "learning_rate": 1.968035200107579e-05, "loss": 0.5531, "step": 5971 }, { "epoch": 0.1639758374519495, "grad_norm": 0.4047996401786804, "learning_rate": 1.968024366697147e-05, "loss": 0.5629, "step": 5972 }, { "epoch": 0.16400329489291599, "grad_norm": 0.3732962906360626, "learning_rate": 1.9680135314810407e-05, "loss": 0.4674, "step": 5973 }, { "epoch": 0.16403075233388248, "grad_norm": 0.4580184817314148, "learning_rate": 1.96800269445928e-05, "loss": 0.5887, "step": 5974 }, { "epoch": 0.16405820977484897, "grad_norm": 0.3527466654777527, "learning_rate": 1.9679918556318853e-05, "loss": 0.6166, "step": 5975 }, { "epoch": 0.1640856672158155, "grad_norm": 0.39620742201805115, "learning_rate": 1.967981014998877e-05, "loss": 0.5702, "step": 5976 }, { "epoch": 0.164113124656782, "grad_norm": 0.47037628293037415, "learning_rate": 1.9679701725602748e-05, "loss": 0.5622, "step": 5977 }, { "epoch": 0.16414058209774848, "grad_norm": 0.37670788168907166, "learning_rate": 1.9679593283160998e-05, "loss": 0.4955, "step": 5978 }, { "epoch": 0.164168039538715, "grad_norm": 0.43218541145324707, "learning_rate": 1.967948482266371e-05, "loss": 0.533, "step": 5979 }, { "epoch": 0.1641954969796815, "grad_norm": 0.3815891444683075, "learning_rate": 1.96793763441111e-05, "loss": 0.4516, "step": 5980 }, { "epoch": 0.164222954420648, "grad_norm": 0.3875274360179901, "learning_rate": 1.9679267847503362e-05, "loss": 0.5915, "step": 5981 }, { "epoch": 0.1642504118616145, "grad_norm": 0.5156402587890625, "learning_rate": 1.9679159332840703e-05, "loss": 0.5609, "step": 5982 }, { "epoch": 0.164277869302581, "grad_norm": 0.42766740918159485, "learning_rate": 1.967905080012332e-05, "loss": 0.4794, "step": 5983 }, { "epoch": 0.1643053267435475, "grad_norm": 0.4484354257583618, "learning_rate": 1.9678942249351416e-05, "loss": 0.5642, "step": 5984 }, { "epoch": 0.164332784184514, "grad_norm": 0.32909175753593445, "learning_rate": 1.9678833680525198e-05, "loss": 0.4652, "step": 5985 }, { "epoch": 0.16436024162548052, "grad_norm": 0.36967480182647705, "learning_rate": 1.9678725093644866e-05, "loss": 0.5624, "step": 5986 }, { "epoch": 0.164387699066447, "grad_norm": 0.4283381402492523, "learning_rate": 1.9678616488710623e-05, "loss": 0.5287, "step": 5987 }, { "epoch": 0.1644151565074135, "grad_norm": 0.34213605523109436, "learning_rate": 1.967850786572267e-05, "loss": 0.4511, "step": 5988 }, { "epoch": 0.16444261394838, "grad_norm": 0.35383349657058716, "learning_rate": 1.967839922468121e-05, "loss": 0.4902, "step": 5989 }, { "epoch": 0.16447007138934652, "grad_norm": 0.44964611530303955, "learning_rate": 1.9678290565586447e-05, "loss": 0.5706, "step": 5990 }, { "epoch": 0.16449752883031302, "grad_norm": 0.36725693941116333, "learning_rate": 1.9678181888438583e-05, "loss": 0.5489, "step": 5991 }, { "epoch": 0.1645249862712795, "grad_norm": 0.3923743665218353, "learning_rate": 1.9678073193237824e-05, "loss": 0.6093, "step": 5992 }, { "epoch": 0.164552443712246, "grad_norm": 0.34573930501937866, "learning_rate": 1.9677964479984367e-05, "loss": 0.6047, "step": 5993 }, { "epoch": 0.16457990115321253, "grad_norm": 0.3507407307624817, "learning_rate": 1.9677855748678416e-05, "loss": 0.4967, "step": 5994 }, { "epoch": 0.16460735859417902, "grad_norm": 0.3383448123931885, "learning_rate": 1.9677746999320176e-05, "loss": 0.5133, "step": 5995 }, { "epoch": 0.16463481603514551, "grad_norm": 0.5422051548957825, "learning_rate": 1.967763823190985e-05, "loss": 0.5192, "step": 5996 }, { "epoch": 0.16466227347611204, "grad_norm": 0.39850959181785583, "learning_rate": 1.9677529446447643e-05, "loss": 0.578, "step": 5997 }, { "epoch": 0.16468973091707853, "grad_norm": 0.4056061804294586, "learning_rate": 1.9677420642933752e-05, "loss": 0.5727, "step": 5998 }, { "epoch": 0.16471718835804502, "grad_norm": 0.35046660900115967, "learning_rate": 1.967731182136838e-05, "loss": 0.5965, "step": 5999 }, { "epoch": 0.16474464579901152, "grad_norm": 0.42219918966293335, "learning_rate": 1.9677202981751736e-05, "loss": 0.5332, "step": 6000 }, { "epoch": 0.16477210323997804, "grad_norm": 0.35297176241874695, "learning_rate": 1.967709412408402e-05, "loss": 0.5674, "step": 6001 }, { "epoch": 0.16479956068094453, "grad_norm": 0.34210219979286194, "learning_rate": 1.9676985248365433e-05, "loss": 0.4778, "step": 6002 }, { "epoch": 0.16482701812191103, "grad_norm": 0.40207263827323914, "learning_rate": 1.9676876354596183e-05, "loss": 0.4935, "step": 6003 }, { "epoch": 0.16485447556287755, "grad_norm": 0.4113471806049347, "learning_rate": 1.967676744277647e-05, "loss": 0.6124, "step": 6004 }, { "epoch": 0.16488193300384404, "grad_norm": 0.39527130126953125, "learning_rate": 1.9676658512906492e-05, "loss": 0.5735, "step": 6005 }, { "epoch": 0.16490939044481054, "grad_norm": 0.3421378433704376, "learning_rate": 1.967654956498646e-05, "loss": 0.5129, "step": 6006 }, { "epoch": 0.16493684788577703, "grad_norm": 0.36119911074638367, "learning_rate": 1.9676440599016574e-05, "loss": 0.5684, "step": 6007 }, { "epoch": 0.16496430532674355, "grad_norm": 0.3911708891391754, "learning_rate": 1.967633161499704e-05, "loss": 0.5673, "step": 6008 }, { "epoch": 0.16499176276771005, "grad_norm": 0.36507293581962585, "learning_rate": 1.9676222612928064e-05, "loss": 0.5548, "step": 6009 }, { "epoch": 0.16501922020867654, "grad_norm": 0.3616604208946228, "learning_rate": 1.9676113592809837e-05, "loss": 0.4869, "step": 6010 }, { "epoch": 0.16504667764964306, "grad_norm": 0.3893595039844513, "learning_rate": 1.967600455464257e-05, "loss": 0.6038, "step": 6011 }, { "epoch": 0.16507413509060956, "grad_norm": 0.3508799970149994, "learning_rate": 1.967589549842647e-05, "loss": 0.5639, "step": 6012 }, { "epoch": 0.16510159253157605, "grad_norm": 0.3853070139884949, "learning_rate": 1.9675786424161734e-05, "loss": 0.5146, "step": 6013 }, { "epoch": 0.16512904997254255, "grad_norm": 0.3202778398990631, "learning_rate": 1.9675677331848568e-05, "loss": 0.4645, "step": 6014 }, { "epoch": 0.16515650741350907, "grad_norm": 0.365339994430542, "learning_rate": 1.9675568221487177e-05, "loss": 0.5665, "step": 6015 }, { "epoch": 0.16518396485447556, "grad_norm": 0.3838532865047455, "learning_rate": 1.9675459093077763e-05, "loss": 0.6005, "step": 6016 }, { "epoch": 0.16521142229544206, "grad_norm": 0.3698323965072632, "learning_rate": 1.967534994662053e-05, "loss": 0.5837, "step": 6017 }, { "epoch": 0.16523887973640858, "grad_norm": 0.3577888011932373, "learning_rate": 1.9675240782115682e-05, "loss": 0.4916, "step": 6018 }, { "epoch": 0.16526633717737507, "grad_norm": 0.4284653067588806, "learning_rate": 1.967513159956342e-05, "loss": 0.57, "step": 6019 }, { "epoch": 0.16529379461834157, "grad_norm": 0.37486064434051514, "learning_rate": 1.9675022398963954e-05, "loss": 0.6318, "step": 6020 }, { "epoch": 0.16532125205930806, "grad_norm": 0.359077125787735, "learning_rate": 1.9674913180317478e-05, "loss": 0.5552, "step": 6021 }, { "epoch": 0.16534870950027458, "grad_norm": 0.40331849455833435, "learning_rate": 1.9674803943624202e-05, "loss": 0.6487, "step": 6022 }, { "epoch": 0.16537616694124108, "grad_norm": 0.3378467559814453, "learning_rate": 1.967469468888433e-05, "loss": 0.5305, "step": 6023 }, { "epoch": 0.16540362438220757, "grad_norm": 0.352027952671051, "learning_rate": 1.9674585416098066e-05, "loss": 0.4094, "step": 6024 }, { "epoch": 0.1654310818231741, "grad_norm": 0.3266749382019043, "learning_rate": 1.9674476125265613e-05, "loss": 0.4815, "step": 6025 }, { "epoch": 0.16545853926414059, "grad_norm": 0.41697314381599426, "learning_rate": 1.967436681638717e-05, "loss": 0.6243, "step": 6026 }, { "epoch": 0.16548599670510708, "grad_norm": 0.3512372672557831, "learning_rate": 1.9674257489462947e-05, "loss": 0.5425, "step": 6027 }, { "epoch": 0.16551345414607357, "grad_norm": 0.41652485728263855, "learning_rate": 1.9674148144493145e-05, "loss": 0.578, "step": 6028 }, { "epoch": 0.1655409115870401, "grad_norm": 0.3305037021636963, "learning_rate": 1.9674038781477973e-05, "loss": 0.4872, "step": 6029 }, { "epoch": 0.1655683690280066, "grad_norm": 0.35801994800567627, "learning_rate": 1.9673929400417625e-05, "loss": 0.5714, "step": 6030 }, { "epoch": 0.16559582646897308, "grad_norm": 0.3911196291446686, "learning_rate": 1.9673820001312318e-05, "loss": 0.6437, "step": 6031 }, { "epoch": 0.1656232839099396, "grad_norm": 0.3658948242664337, "learning_rate": 1.9673710584162244e-05, "loss": 0.557, "step": 6032 }, { "epoch": 0.1656507413509061, "grad_norm": 0.38355833292007446, "learning_rate": 1.9673601148967617e-05, "loss": 0.5708, "step": 6033 }, { "epoch": 0.1656781987918726, "grad_norm": 0.3739334046840668, "learning_rate": 1.9673491695728634e-05, "loss": 0.5117, "step": 6034 }, { "epoch": 0.1657056562328391, "grad_norm": 0.38835033774375916, "learning_rate": 1.9673382224445497e-05, "loss": 0.5351, "step": 6035 }, { "epoch": 0.1657331136738056, "grad_norm": 0.4020492732524872, "learning_rate": 1.967327273511842e-05, "loss": 0.546, "step": 6036 }, { "epoch": 0.1657605711147721, "grad_norm": 0.39028358459472656, "learning_rate": 1.9673163227747602e-05, "loss": 0.4968, "step": 6037 }, { "epoch": 0.1657880285557386, "grad_norm": 0.3714902698993683, "learning_rate": 1.9673053702333246e-05, "loss": 0.5953, "step": 6038 }, { "epoch": 0.16581548599670512, "grad_norm": 0.32104188203811646, "learning_rate": 1.967294415887556e-05, "loss": 0.5447, "step": 6039 }, { "epoch": 0.1658429434376716, "grad_norm": 0.4229342043399811, "learning_rate": 1.9672834597374742e-05, "loss": 0.5742, "step": 6040 }, { "epoch": 0.1658704008786381, "grad_norm": 0.33305221796035767, "learning_rate": 1.9672725017831003e-05, "loss": 0.5075, "step": 6041 }, { "epoch": 0.1658978583196046, "grad_norm": 0.6092918515205383, "learning_rate": 1.9672615420244544e-05, "loss": 0.5673, "step": 6042 }, { "epoch": 0.16592531576057112, "grad_norm": 0.3503728210926056, "learning_rate": 1.967250580461557e-05, "loss": 0.5192, "step": 6043 }, { "epoch": 0.16595277320153762, "grad_norm": 0.34510666131973267, "learning_rate": 1.9672396170944284e-05, "loss": 0.5449, "step": 6044 }, { "epoch": 0.1659802306425041, "grad_norm": 0.510109007358551, "learning_rate": 1.9672286519230895e-05, "loss": 0.6686, "step": 6045 }, { "epoch": 0.16600768808347063, "grad_norm": 0.34774476289749146, "learning_rate": 1.9672176849475603e-05, "loss": 0.4627, "step": 6046 }, { "epoch": 0.16603514552443713, "grad_norm": 0.3549306094646454, "learning_rate": 1.967206716167861e-05, "loss": 0.5547, "step": 6047 }, { "epoch": 0.16606260296540362, "grad_norm": 0.3948396146297455, "learning_rate": 1.967195745584013e-05, "loss": 0.6329, "step": 6048 }, { "epoch": 0.16609006040637012, "grad_norm": 0.30357012152671814, "learning_rate": 1.9671847731960362e-05, "loss": 0.4374, "step": 6049 }, { "epoch": 0.16611751784733664, "grad_norm": 0.36608296632766724, "learning_rate": 1.967173799003951e-05, "loss": 0.6455, "step": 6050 }, { "epoch": 0.16614497528830313, "grad_norm": 0.43936023116111755, "learning_rate": 1.967162823007778e-05, "loss": 0.5327, "step": 6051 }, { "epoch": 0.16617243272926963, "grad_norm": 0.37706443667411804, "learning_rate": 1.9671518452075378e-05, "loss": 0.5166, "step": 6052 }, { "epoch": 0.16619989017023615, "grad_norm": 0.390299916267395, "learning_rate": 1.9671408656032508e-05, "loss": 0.5725, "step": 6053 }, { "epoch": 0.16622734761120264, "grad_norm": 0.3787047564983368, "learning_rate": 1.9671298841949368e-05, "loss": 0.5472, "step": 6054 }, { "epoch": 0.16625480505216914, "grad_norm": 0.3114975094795227, "learning_rate": 1.9671189009826174e-05, "loss": 0.476, "step": 6055 }, { "epoch": 0.16628226249313563, "grad_norm": 0.38148245215415955, "learning_rate": 1.9671079159663127e-05, "loss": 0.5246, "step": 6056 }, { "epoch": 0.16630971993410215, "grad_norm": 0.3514304459095001, "learning_rate": 1.9670969291460426e-05, "loss": 0.5677, "step": 6057 }, { "epoch": 0.16633717737506865, "grad_norm": 0.33231183886528015, "learning_rate": 1.9670859405218285e-05, "loss": 0.4606, "step": 6058 }, { "epoch": 0.16636463481603514, "grad_norm": 0.32489141821861267, "learning_rate": 1.9670749500936903e-05, "loss": 0.5342, "step": 6059 }, { "epoch": 0.16639209225700163, "grad_norm": 0.35079091787338257, "learning_rate": 1.9670639578616485e-05, "loss": 0.5881, "step": 6060 }, { "epoch": 0.16641954969796816, "grad_norm": 0.36974725127220154, "learning_rate": 1.9670529638257242e-05, "loss": 0.4705, "step": 6061 }, { "epoch": 0.16644700713893465, "grad_norm": 0.4431898593902588, "learning_rate": 1.967041967985937e-05, "loss": 0.5052, "step": 6062 }, { "epoch": 0.16647446457990114, "grad_norm": 0.35807061195373535, "learning_rate": 1.9670309703423083e-05, "loss": 0.573, "step": 6063 }, { "epoch": 0.16650192202086767, "grad_norm": 0.4240536689758301, "learning_rate": 1.967019970894858e-05, "loss": 0.6404, "step": 6064 }, { "epoch": 0.16652937946183416, "grad_norm": 0.35580208897590637, "learning_rate": 1.967008969643607e-05, "loss": 0.5303, "step": 6065 }, { "epoch": 0.16655683690280065, "grad_norm": 0.38924211263656616, "learning_rate": 1.966997966588575e-05, "loss": 0.6984, "step": 6066 }, { "epoch": 0.16658429434376715, "grad_norm": 0.3990425765514374, "learning_rate": 1.9669869617297837e-05, "loss": 0.48, "step": 6067 }, { "epoch": 0.16661175178473367, "grad_norm": 0.35663846135139465, "learning_rate": 1.9669759550672528e-05, "loss": 0.5654, "step": 6068 }, { "epoch": 0.16663920922570016, "grad_norm": 0.549411952495575, "learning_rate": 1.9669649466010036e-05, "loss": 0.5292, "step": 6069 }, { "epoch": 0.16666666666666666, "grad_norm": 0.469146192073822, "learning_rate": 1.9669539363310556e-05, "loss": 0.5455, "step": 6070 }, { "epoch": 0.16669412410763318, "grad_norm": 0.4251728057861328, "learning_rate": 1.9669429242574303e-05, "loss": 0.5843, "step": 6071 }, { "epoch": 0.16672158154859967, "grad_norm": 0.4439394772052765, "learning_rate": 1.9669319103801476e-05, "loss": 0.6297, "step": 6072 }, { "epoch": 0.16674903898956617, "grad_norm": 0.373017817735672, "learning_rate": 1.9669208946992284e-05, "loss": 0.4998, "step": 6073 }, { "epoch": 0.16677649643053266, "grad_norm": 0.4103248119354248, "learning_rate": 1.9669098772146933e-05, "loss": 0.5273, "step": 6074 }, { "epoch": 0.16680395387149918, "grad_norm": 0.43823814392089844, "learning_rate": 1.9668988579265624e-05, "loss": 0.615, "step": 6075 }, { "epoch": 0.16683141131246568, "grad_norm": 0.33302512764930725, "learning_rate": 1.9668878368348564e-05, "loss": 0.5368, "step": 6076 }, { "epoch": 0.16685886875343217, "grad_norm": 0.3614601492881775, "learning_rate": 1.9668768139395964e-05, "loss": 0.5541, "step": 6077 }, { "epoch": 0.1668863261943987, "grad_norm": 0.47163820266723633, "learning_rate": 1.9668657892408024e-05, "loss": 0.5357, "step": 6078 }, { "epoch": 0.1669137836353652, "grad_norm": 0.36058926582336426, "learning_rate": 1.966854762738495e-05, "loss": 0.5523, "step": 6079 }, { "epoch": 0.16694124107633168, "grad_norm": 0.46985626220703125, "learning_rate": 1.9668437344326947e-05, "loss": 0.6506, "step": 6080 }, { "epoch": 0.16696869851729818, "grad_norm": 0.35088658332824707, "learning_rate": 1.9668327043234225e-05, "loss": 0.5378, "step": 6081 }, { "epoch": 0.1669961559582647, "grad_norm": 0.3433704376220703, "learning_rate": 1.966821672410699e-05, "loss": 0.5781, "step": 6082 }, { "epoch": 0.1670236133992312, "grad_norm": 0.40412694215774536, "learning_rate": 1.9668106386945442e-05, "loss": 0.5413, "step": 6083 }, { "epoch": 0.16705107084019769, "grad_norm": 0.37778741121292114, "learning_rate": 1.966799603174979e-05, "loss": 0.5615, "step": 6084 }, { "epoch": 0.1670785282811642, "grad_norm": 0.40664738416671753, "learning_rate": 1.966788565852024e-05, "loss": 0.4778, "step": 6085 }, { "epoch": 0.1671059857221307, "grad_norm": 0.3747825622558594, "learning_rate": 1.9667775267256998e-05, "loss": 0.5911, "step": 6086 }, { "epoch": 0.1671334431630972, "grad_norm": 0.33845314383506775, "learning_rate": 1.9667664857960268e-05, "loss": 0.5409, "step": 6087 }, { "epoch": 0.1671609006040637, "grad_norm": 0.3360182046890259, "learning_rate": 1.966755443063026e-05, "loss": 0.5054, "step": 6088 }, { "epoch": 0.1671883580450302, "grad_norm": 0.3897232711315155, "learning_rate": 1.9667443985267174e-05, "loss": 0.5546, "step": 6089 }, { "epoch": 0.1672158154859967, "grad_norm": 0.40664052963256836, "learning_rate": 1.9667333521871223e-05, "loss": 0.6336, "step": 6090 }, { "epoch": 0.1672432729269632, "grad_norm": 0.3653057813644409, "learning_rate": 1.9667223040442608e-05, "loss": 0.6095, "step": 6091 }, { "epoch": 0.16727073036792972, "grad_norm": 0.3559306263923645, "learning_rate": 1.9667112540981535e-05, "loss": 0.4986, "step": 6092 }, { "epoch": 0.16729818780889621, "grad_norm": 0.3687642812728882, "learning_rate": 1.9667002023488212e-05, "loss": 0.5231, "step": 6093 }, { "epoch": 0.1673256452498627, "grad_norm": 1.9055484533309937, "learning_rate": 1.966689148796285e-05, "loss": 0.6183, "step": 6094 }, { "epoch": 0.1673531026908292, "grad_norm": 0.3752286434173584, "learning_rate": 1.9666780934405644e-05, "loss": 0.5455, "step": 6095 }, { "epoch": 0.16738056013179572, "grad_norm": 0.4538190960884094, "learning_rate": 1.9666670362816812e-05, "loss": 0.508, "step": 6096 }, { "epoch": 0.16740801757276222, "grad_norm": 0.3904830813407898, "learning_rate": 1.966655977319655e-05, "loss": 0.5337, "step": 6097 }, { "epoch": 0.1674354750137287, "grad_norm": 0.3765058219432831, "learning_rate": 1.9666449165545067e-05, "loss": 0.5847, "step": 6098 }, { "epoch": 0.16746293245469523, "grad_norm": 0.4019477665424347, "learning_rate": 1.9666338539862577e-05, "loss": 0.6267, "step": 6099 }, { "epoch": 0.16749038989566173, "grad_norm": 0.3540938198566437, "learning_rate": 1.9666227896149276e-05, "loss": 0.5022, "step": 6100 }, { "epoch": 0.16751784733662822, "grad_norm": 0.36842676997184753, "learning_rate": 1.9666117234405378e-05, "loss": 0.5791, "step": 6101 }, { "epoch": 0.16754530477759472, "grad_norm": 0.3152039349079132, "learning_rate": 1.9666006554631083e-05, "loss": 0.426, "step": 6102 }, { "epoch": 0.16757276221856124, "grad_norm": 0.364555299282074, "learning_rate": 1.9665895856826604e-05, "loss": 0.5676, "step": 6103 }, { "epoch": 0.16760021965952773, "grad_norm": 0.5145508646965027, "learning_rate": 1.966578514099214e-05, "loss": 0.5058, "step": 6104 }, { "epoch": 0.16762767710049423, "grad_norm": 0.3821084797382355, "learning_rate": 1.9665674407127907e-05, "loss": 0.5779, "step": 6105 }, { "epoch": 0.16765513454146075, "grad_norm": 0.324028342962265, "learning_rate": 1.9665563655234102e-05, "loss": 0.5476, "step": 6106 }, { "epoch": 0.16768259198242724, "grad_norm": 0.3450806736946106, "learning_rate": 1.9665452885310937e-05, "loss": 0.5042, "step": 6107 }, { "epoch": 0.16771004942339374, "grad_norm": 0.4623746871948242, "learning_rate": 1.966534209735862e-05, "loss": 0.5664, "step": 6108 }, { "epoch": 0.16773750686436023, "grad_norm": 0.48113784193992615, "learning_rate": 1.9665231291377353e-05, "loss": 0.6389, "step": 6109 }, { "epoch": 0.16776496430532675, "grad_norm": 0.4000304937362671, "learning_rate": 1.9665120467367346e-05, "loss": 0.6153, "step": 6110 }, { "epoch": 0.16779242174629325, "grad_norm": 0.35962119698524475, "learning_rate": 1.9665009625328802e-05, "loss": 0.5036, "step": 6111 }, { "epoch": 0.16781987918725974, "grad_norm": 0.34091895818710327, "learning_rate": 1.966489876526193e-05, "loss": 0.5801, "step": 6112 }, { "epoch": 0.16784733662822626, "grad_norm": 0.4326756000518799, "learning_rate": 1.9664787887166943e-05, "loss": 0.5627, "step": 6113 }, { "epoch": 0.16787479406919276, "grad_norm": 0.3511867821216583, "learning_rate": 1.9664676991044035e-05, "loss": 0.546, "step": 6114 }, { "epoch": 0.16790225151015925, "grad_norm": 0.37634846568107605, "learning_rate": 1.9664566076893426e-05, "loss": 0.6147, "step": 6115 }, { "epoch": 0.16792970895112574, "grad_norm": 0.38621097803115845, "learning_rate": 1.9664455144715313e-05, "loss": 0.6216, "step": 6116 }, { "epoch": 0.16795716639209227, "grad_norm": 0.326101690530777, "learning_rate": 1.9664344194509906e-05, "loss": 0.5665, "step": 6117 }, { "epoch": 0.16798462383305876, "grad_norm": 0.40964528918266296, "learning_rate": 1.9664233226277416e-05, "loss": 0.5851, "step": 6118 }, { "epoch": 0.16801208127402525, "grad_norm": 0.33603858947753906, "learning_rate": 1.9664122240018045e-05, "loss": 0.4606, "step": 6119 }, { "epoch": 0.16803953871499178, "grad_norm": 0.35454311966896057, "learning_rate": 1.9664011235732e-05, "loss": 0.6291, "step": 6120 }, { "epoch": 0.16806699615595827, "grad_norm": 0.3686046004295349, "learning_rate": 1.9663900213419492e-05, "loss": 0.5673, "step": 6121 }, { "epoch": 0.16809445359692476, "grad_norm": 0.38228264451026917, "learning_rate": 1.9663789173080723e-05, "loss": 0.6138, "step": 6122 }, { "epoch": 0.16812191103789126, "grad_norm": 0.36588242650032043, "learning_rate": 1.9663678114715904e-05, "loss": 0.5195, "step": 6123 }, { "epoch": 0.16814936847885778, "grad_norm": 0.34180575609207153, "learning_rate": 1.966356703832524e-05, "loss": 0.5052, "step": 6124 }, { "epoch": 0.16817682591982427, "grad_norm": 0.36211350560188293, "learning_rate": 1.966345594390894e-05, "loss": 0.5239, "step": 6125 }, { "epoch": 0.16820428336079077, "grad_norm": 0.3310061991214752, "learning_rate": 1.966334483146721e-05, "loss": 0.521, "step": 6126 }, { "epoch": 0.16823174080175726, "grad_norm": 0.3522648215293884, "learning_rate": 1.9663233701000264e-05, "loss": 0.4746, "step": 6127 }, { "epoch": 0.16825919824272378, "grad_norm": 0.3470238149166107, "learning_rate": 1.9663122552508297e-05, "loss": 0.551, "step": 6128 }, { "epoch": 0.16828665568369028, "grad_norm": 0.516315221786499, "learning_rate": 1.9663011385991523e-05, "loss": 0.5962, "step": 6129 }, { "epoch": 0.16831411312465677, "grad_norm": 0.33558204770088196, "learning_rate": 1.966290020145015e-05, "loss": 0.4921, "step": 6130 }, { "epoch": 0.1683415705656233, "grad_norm": 0.3857131898403168, "learning_rate": 1.966278899888438e-05, "loss": 0.6062, "step": 6131 }, { "epoch": 0.1683690280065898, "grad_norm": 0.4149656891822815, "learning_rate": 1.966267777829443e-05, "loss": 0.5913, "step": 6132 }, { "epoch": 0.16839648544755628, "grad_norm": 0.36160099506378174, "learning_rate": 1.9662566539680497e-05, "loss": 0.6703, "step": 6133 }, { "epoch": 0.16842394288852278, "grad_norm": 0.366005003452301, "learning_rate": 1.9662455283042795e-05, "loss": 0.5602, "step": 6134 }, { "epoch": 0.1684514003294893, "grad_norm": 0.39626607298851013, "learning_rate": 1.966234400838153e-05, "loss": 0.5611, "step": 6135 }, { "epoch": 0.1684788577704558, "grad_norm": 0.38875123858451843, "learning_rate": 1.966223271569691e-05, "loss": 0.5704, "step": 6136 }, { "epoch": 0.1685063152114223, "grad_norm": 0.522930383682251, "learning_rate": 1.9662121404989146e-05, "loss": 0.5189, "step": 6137 }, { "epoch": 0.1685337726523888, "grad_norm": 0.3427080512046814, "learning_rate": 1.9662010076258437e-05, "loss": 0.5328, "step": 6138 }, { "epoch": 0.1685612300933553, "grad_norm": 0.3448258340358734, "learning_rate": 1.9661898729504995e-05, "loss": 0.5062, "step": 6139 }, { "epoch": 0.1685886875343218, "grad_norm": 0.38568389415740967, "learning_rate": 1.966178736472903e-05, "loss": 0.5404, "step": 6140 }, { "epoch": 0.1686161449752883, "grad_norm": 0.35096418857574463, "learning_rate": 1.9661675981930747e-05, "loss": 0.5367, "step": 6141 }, { "epoch": 0.1686436024162548, "grad_norm": 0.32657188177108765, "learning_rate": 1.966156458111036e-05, "loss": 0.4732, "step": 6142 }, { "epoch": 0.1686710598572213, "grad_norm": 0.40800753235816956, "learning_rate": 1.9661453162268066e-05, "loss": 0.5081, "step": 6143 }, { "epoch": 0.1686985172981878, "grad_norm": 0.36590394377708435, "learning_rate": 1.9661341725404078e-05, "loss": 0.5049, "step": 6144 }, { "epoch": 0.16872597473915432, "grad_norm": 0.3546043634414673, "learning_rate": 1.9661230270518606e-05, "loss": 0.4865, "step": 6145 }, { "epoch": 0.16875343218012082, "grad_norm": 0.40301400423049927, "learning_rate": 1.9661118797611854e-05, "loss": 0.4854, "step": 6146 }, { "epoch": 0.1687808896210873, "grad_norm": 0.412809282541275, "learning_rate": 1.9661007306684033e-05, "loss": 0.4533, "step": 6147 }, { "epoch": 0.1688083470620538, "grad_norm": 0.3760605752468109, "learning_rate": 1.966089579773535e-05, "loss": 0.5589, "step": 6148 }, { "epoch": 0.16883580450302033, "grad_norm": 0.3326733410358429, "learning_rate": 1.9660784270766014e-05, "loss": 0.5015, "step": 6149 }, { "epoch": 0.16886326194398682, "grad_norm": 0.3640964925289154, "learning_rate": 1.966067272577623e-05, "loss": 0.5146, "step": 6150 }, { "epoch": 0.16889071938495331, "grad_norm": 0.39652398228645325, "learning_rate": 1.966056116276621e-05, "loss": 0.5389, "step": 6151 }, { "epoch": 0.16891817682591984, "grad_norm": 0.3585420846939087, "learning_rate": 1.966044958173616e-05, "loss": 0.4897, "step": 6152 }, { "epoch": 0.16894563426688633, "grad_norm": 0.36327043175697327, "learning_rate": 1.9660337982686287e-05, "loss": 0.4914, "step": 6153 }, { "epoch": 0.16897309170785282, "grad_norm": 0.38483381271362305, "learning_rate": 1.96602263656168e-05, "loss": 0.48, "step": 6154 }, { "epoch": 0.16900054914881932, "grad_norm": 0.38219207525253296, "learning_rate": 1.966011473052791e-05, "loss": 0.5062, "step": 6155 }, { "epoch": 0.16902800658978584, "grad_norm": 0.3424827456474304, "learning_rate": 1.966000307741982e-05, "loss": 0.5056, "step": 6156 }, { "epoch": 0.16905546403075233, "grad_norm": 0.5753840804100037, "learning_rate": 1.9659891406292743e-05, "loss": 0.5576, "step": 6157 }, { "epoch": 0.16908292147171883, "grad_norm": 0.6950632333755493, "learning_rate": 1.9659779717146886e-05, "loss": 0.5298, "step": 6158 }, { "epoch": 0.16911037891268535, "grad_norm": 0.4094807803630829, "learning_rate": 1.9659668009982456e-05, "loss": 0.6332, "step": 6159 }, { "epoch": 0.16913783635365184, "grad_norm": 0.3458978235721588, "learning_rate": 1.965955628479966e-05, "loss": 0.5028, "step": 6160 }, { "epoch": 0.16916529379461834, "grad_norm": 0.35234832763671875, "learning_rate": 1.965944454159871e-05, "loss": 0.4816, "step": 6161 }, { "epoch": 0.16919275123558483, "grad_norm": 0.3659443259239197, "learning_rate": 1.9659332780379817e-05, "loss": 0.5322, "step": 6162 }, { "epoch": 0.16922020867655135, "grad_norm": 0.4937681555747986, "learning_rate": 1.9659221001143183e-05, "loss": 0.5467, "step": 6163 }, { "epoch": 0.16924766611751785, "grad_norm": 0.39413192868232727, "learning_rate": 1.9659109203889017e-05, "loss": 0.5205, "step": 6164 }, { "epoch": 0.16927512355848434, "grad_norm": 0.33611616492271423, "learning_rate": 1.965899738861753e-05, "loss": 0.4527, "step": 6165 }, { "epoch": 0.16930258099945086, "grad_norm": 0.4305313229560852, "learning_rate": 1.965888555532893e-05, "loss": 0.6364, "step": 6166 }, { "epoch": 0.16933003844041736, "grad_norm": 0.37334996461868286, "learning_rate": 1.9658773704023426e-05, "loss": 0.5692, "step": 6167 }, { "epoch": 0.16935749588138385, "grad_norm": 0.551084041595459, "learning_rate": 1.965866183470123e-05, "loss": 0.5589, "step": 6168 }, { "epoch": 0.16938495332235035, "grad_norm": 0.41478878259658813, "learning_rate": 1.9658549947362542e-05, "loss": 0.587, "step": 6169 }, { "epoch": 0.16941241076331687, "grad_norm": 0.33250170946121216, "learning_rate": 1.9658438042007578e-05, "loss": 0.4483, "step": 6170 }, { "epoch": 0.16943986820428336, "grad_norm": 0.4413265287876129, "learning_rate": 1.9658326118636545e-05, "loss": 0.5979, "step": 6171 }, { "epoch": 0.16946732564524986, "grad_norm": 0.3406670093536377, "learning_rate": 1.9658214177249646e-05, "loss": 0.557, "step": 6172 }, { "epoch": 0.16949478308621638, "grad_norm": 0.3202119767665863, "learning_rate": 1.9658102217847102e-05, "loss": 0.4595, "step": 6173 }, { "epoch": 0.16952224052718287, "grad_norm": 0.347635418176651, "learning_rate": 1.965799024042911e-05, "loss": 0.5725, "step": 6174 }, { "epoch": 0.16954969796814937, "grad_norm": 0.48045438528060913, "learning_rate": 1.9657878244995884e-05, "loss": 0.5639, "step": 6175 }, { "epoch": 0.16957715540911586, "grad_norm": 0.35223814845085144, "learning_rate": 1.9657766231547634e-05, "loss": 0.563, "step": 6176 }, { "epoch": 0.16960461285008238, "grad_norm": 0.35095635056495667, "learning_rate": 1.965765420008457e-05, "loss": 0.4613, "step": 6177 }, { "epoch": 0.16963207029104888, "grad_norm": 0.44871848821640015, "learning_rate": 1.9657542150606897e-05, "loss": 0.507, "step": 6178 }, { "epoch": 0.16965952773201537, "grad_norm": 0.34832823276519775, "learning_rate": 1.9657430083114825e-05, "loss": 0.5234, "step": 6179 }, { "epoch": 0.1696869851729819, "grad_norm": 0.3673684000968933, "learning_rate": 1.9657317997608563e-05, "loss": 0.4582, "step": 6180 }, { "epoch": 0.16971444261394839, "grad_norm": 0.3486049771308899, "learning_rate": 1.965720589408832e-05, "loss": 0.6024, "step": 6181 }, { "epoch": 0.16974190005491488, "grad_norm": 0.3625336289405823, "learning_rate": 1.965709377255431e-05, "loss": 0.5002, "step": 6182 }, { "epoch": 0.16976935749588137, "grad_norm": 0.4737614393234253, "learning_rate": 1.9656981633006732e-05, "loss": 0.5246, "step": 6183 }, { "epoch": 0.1697968149368479, "grad_norm": 0.46187612414360046, "learning_rate": 1.9656869475445807e-05, "loss": 0.5963, "step": 6184 }, { "epoch": 0.1698242723778144, "grad_norm": 0.37999269366264343, "learning_rate": 1.965675729987173e-05, "loss": 0.5828, "step": 6185 }, { "epoch": 0.16985172981878088, "grad_norm": 0.3610737919807434, "learning_rate": 1.9656645106284726e-05, "loss": 0.5263, "step": 6186 }, { "epoch": 0.1698791872597474, "grad_norm": 0.41093388199806213, "learning_rate": 1.9656532894684996e-05, "loss": 0.6023, "step": 6187 }, { "epoch": 0.1699066447007139, "grad_norm": 0.36026233434677124, "learning_rate": 1.965642066507275e-05, "loss": 0.519, "step": 6188 }, { "epoch": 0.1699341021416804, "grad_norm": 0.3770429790019989, "learning_rate": 1.9656308417448193e-05, "loss": 0.5038, "step": 6189 }, { "epoch": 0.1699615595826469, "grad_norm": 0.45526590943336487, "learning_rate": 1.9656196151811546e-05, "loss": 0.5307, "step": 6190 }, { "epoch": 0.1699890170236134, "grad_norm": 0.33381035923957825, "learning_rate": 1.9656083868163004e-05, "loss": 0.5565, "step": 6191 }, { "epoch": 0.1700164744645799, "grad_norm": 0.3913244307041168, "learning_rate": 1.965597156650279e-05, "loss": 0.571, "step": 6192 }, { "epoch": 0.1700439319055464, "grad_norm": 0.39559149742126465, "learning_rate": 1.96558592468311e-05, "loss": 0.5428, "step": 6193 }, { "epoch": 0.1700713893465129, "grad_norm": 0.44421499967575073, "learning_rate": 1.9655746909148158e-05, "loss": 0.5955, "step": 6194 }, { "epoch": 0.1700988467874794, "grad_norm": 0.36310213804244995, "learning_rate": 1.9655634553454162e-05, "loss": 0.5284, "step": 6195 }, { "epoch": 0.1701263042284459, "grad_norm": 0.38929590582847595, "learning_rate": 1.9655522179749328e-05, "loss": 0.5386, "step": 6196 }, { "epoch": 0.1701537616694124, "grad_norm": 0.46390002965927124, "learning_rate": 1.9655409788033863e-05, "loss": 0.5667, "step": 6197 }, { "epoch": 0.17018121911037892, "grad_norm": 0.4154791831970215, "learning_rate": 1.9655297378307977e-05, "loss": 0.5805, "step": 6198 }, { "epoch": 0.17020867655134542, "grad_norm": 0.3305093050003052, "learning_rate": 1.9655184950571877e-05, "loss": 0.4995, "step": 6199 }, { "epoch": 0.1702361339923119, "grad_norm": 0.3512718081474304, "learning_rate": 1.965507250482578e-05, "loss": 0.4672, "step": 6200 }, { "epoch": 0.1702635914332784, "grad_norm": 0.3637877106666565, "learning_rate": 1.965496004106989e-05, "loss": 0.6636, "step": 6201 }, { "epoch": 0.17029104887424493, "grad_norm": 0.3550313115119934, "learning_rate": 1.9654847559304416e-05, "loss": 0.5127, "step": 6202 }, { "epoch": 0.17031850631521142, "grad_norm": 0.3590618371963501, "learning_rate": 1.9654735059529573e-05, "loss": 0.5494, "step": 6203 }, { "epoch": 0.17034596375617791, "grad_norm": 0.3455407917499542, "learning_rate": 1.9654622541745563e-05, "loss": 0.5462, "step": 6204 }, { "epoch": 0.17037342119714444, "grad_norm": 0.34545716643333435, "learning_rate": 1.9654510005952602e-05, "loss": 0.5027, "step": 6205 }, { "epoch": 0.17040087863811093, "grad_norm": 0.4074101448059082, "learning_rate": 1.96543974521509e-05, "loss": 0.5335, "step": 6206 }, { "epoch": 0.17042833607907742, "grad_norm": 0.3968016505241394, "learning_rate": 1.9654284880340667e-05, "loss": 0.5599, "step": 6207 }, { "epoch": 0.17045579352004392, "grad_norm": 0.35486242175102234, "learning_rate": 1.9654172290522112e-05, "loss": 0.5689, "step": 6208 }, { "epoch": 0.17048325096101044, "grad_norm": 0.5093222856521606, "learning_rate": 1.965405968269544e-05, "loss": 0.5451, "step": 6209 }, { "epoch": 0.17051070840197693, "grad_norm": 0.3446919620037079, "learning_rate": 1.9653947056860868e-05, "loss": 0.5744, "step": 6210 }, { "epoch": 0.17053816584294343, "grad_norm": 0.37866225838661194, "learning_rate": 1.9653834413018603e-05, "loss": 0.5609, "step": 6211 }, { "epoch": 0.17056562328390995, "grad_norm": 0.3534792959690094, "learning_rate": 1.9653721751168854e-05, "loss": 0.5789, "step": 6212 }, { "epoch": 0.17059308072487644, "grad_norm": 0.32640522718429565, "learning_rate": 1.9653609071311835e-05, "loss": 0.5412, "step": 6213 }, { "epoch": 0.17062053816584294, "grad_norm": 0.4173998534679413, "learning_rate": 1.9653496373447756e-05, "loss": 0.5957, "step": 6214 }, { "epoch": 0.17064799560680943, "grad_norm": 0.3654249906539917, "learning_rate": 1.965338365757682e-05, "loss": 0.4975, "step": 6215 }, { "epoch": 0.17067545304777595, "grad_norm": 0.36075863242149353, "learning_rate": 1.9653270923699246e-05, "loss": 0.5592, "step": 6216 }, { "epoch": 0.17070291048874245, "grad_norm": 0.34971916675567627, "learning_rate": 1.965315817181524e-05, "loss": 0.5862, "step": 6217 }, { "epoch": 0.17073036792970894, "grad_norm": 0.35275882482528687, "learning_rate": 1.9653045401925014e-05, "loss": 0.5106, "step": 6218 }, { "epoch": 0.17075782537067546, "grad_norm": 0.41081342101097107, "learning_rate": 1.9652932614028777e-05, "loss": 0.6205, "step": 6219 }, { "epoch": 0.17078528281164196, "grad_norm": 0.353102445602417, "learning_rate": 1.965281980812674e-05, "loss": 0.5025, "step": 6220 }, { "epoch": 0.17081274025260845, "grad_norm": 0.36874812841415405, "learning_rate": 1.9652706984219114e-05, "loss": 0.5877, "step": 6221 }, { "epoch": 0.17084019769357495, "grad_norm": 0.3708045780658722, "learning_rate": 1.9652594142306105e-05, "loss": 0.5235, "step": 6222 }, { "epoch": 0.17086765513454147, "grad_norm": 0.3688451647758484, "learning_rate": 1.965248128238793e-05, "loss": 0.4493, "step": 6223 }, { "epoch": 0.17089511257550796, "grad_norm": 0.354002445936203, "learning_rate": 1.9652368404464796e-05, "loss": 0.5689, "step": 6224 }, { "epoch": 0.17092257001647446, "grad_norm": 0.3699571490287781, "learning_rate": 1.9652255508536916e-05, "loss": 0.5365, "step": 6225 }, { "epoch": 0.17095002745744098, "grad_norm": 0.3374769687652588, "learning_rate": 1.9652142594604494e-05, "loss": 0.5045, "step": 6226 }, { "epoch": 0.17097748489840747, "grad_norm": 0.3770489990711212, "learning_rate": 1.965202966266775e-05, "loss": 0.5904, "step": 6227 }, { "epoch": 0.17100494233937397, "grad_norm": 0.47822093963623047, "learning_rate": 1.965191671272689e-05, "loss": 0.4312, "step": 6228 }, { "epoch": 0.17103239978034046, "grad_norm": 0.33589649200439453, "learning_rate": 1.9651803744782124e-05, "loss": 0.5141, "step": 6229 }, { "epoch": 0.17105985722130698, "grad_norm": 0.40495795011520386, "learning_rate": 1.9651690758833662e-05, "loss": 0.5202, "step": 6230 }, { "epoch": 0.17108731466227348, "grad_norm": 0.39113038778305054, "learning_rate": 1.9651577754881716e-05, "loss": 0.5744, "step": 6231 }, { "epoch": 0.17111477210323997, "grad_norm": 0.3650939166545868, "learning_rate": 1.96514647329265e-05, "loss": 0.5544, "step": 6232 }, { "epoch": 0.1711422295442065, "grad_norm": 0.3657686114311218, "learning_rate": 1.965135169296822e-05, "loss": 0.5311, "step": 6233 }, { "epoch": 0.171169686985173, "grad_norm": 0.3627791702747345, "learning_rate": 1.965123863500709e-05, "loss": 0.5818, "step": 6234 }, { "epoch": 0.17119714442613948, "grad_norm": 0.3896605670452118, "learning_rate": 1.9651125559043315e-05, "loss": 0.5759, "step": 6235 }, { "epoch": 0.17122460186710597, "grad_norm": 0.4310438632965088, "learning_rate": 1.9651012465077116e-05, "loss": 0.5319, "step": 6236 }, { "epoch": 0.1712520593080725, "grad_norm": 0.37484776973724365, "learning_rate": 1.9650899353108695e-05, "loss": 0.5879, "step": 6237 }, { "epoch": 0.171279516749039, "grad_norm": 0.4604226052761078, "learning_rate": 1.965078622313827e-05, "loss": 0.5692, "step": 6238 }, { "epoch": 0.17130697419000548, "grad_norm": 0.43739521503448486, "learning_rate": 1.9650673075166047e-05, "loss": 0.5234, "step": 6239 }, { "epoch": 0.171334431630972, "grad_norm": 0.3478800058364868, "learning_rate": 1.965055990919224e-05, "loss": 0.4916, "step": 6240 }, { "epoch": 0.1713618890719385, "grad_norm": 0.3420684039592743, "learning_rate": 1.9650446725217056e-05, "loss": 0.582, "step": 6241 }, { "epoch": 0.171389346512905, "grad_norm": 0.3777155578136444, "learning_rate": 1.965033352324071e-05, "loss": 0.537, "step": 6242 }, { "epoch": 0.1714168039538715, "grad_norm": 0.3580748736858368, "learning_rate": 1.965022030326341e-05, "loss": 0.5458, "step": 6243 }, { "epoch": 0.171444261394838, "grad_norm": 0.39381274580955505, "learning_rate": 1.9650107065285372e-05, "loss": 0.5343, "step": 6244 }, { "epoch": 0.1714717188358045, "grad_norm": 0.3732830584049225, "learning_rate": 1.9649993809306802e-05, "loss": 0.5391, "step": 6245 }, { "epoch": 0.171499176276771, "grad_norm": 0.3884325325489044, "learning_rate": 1.9649880535327918e-05, "loss": 0.6073, "step": 6246 }, { "epoch": 0.17152663371773752, "grad_norm": 0.3908616006374359, "learning_rate": 1.9649767243348923e-05, "loss": 0.6546, "step": 6247 }, { "epoch": 0.17155409115870401, "grad_norm": 0.42377960681915283, "learning_rate": 1.9649653933370034e-05, "loss": 0.6062, "step": 6248 }, { "epoch": 0.1715815485996705, "grad_norm": 0.33391210436820984, "learning_rate": 1.964954060539146e-05, "loss": 0.5028, "step": 6249 }, { "epoch": 0.171609006040637, "grad_norm": 0.45648688077926636, "learning_rate": 1.964942725941341e-05, "loss": 0.5591, "step": 6250 }, { "epoch": 0.17163646348160352, "grad_norm": 0.3206802308559418, "learning_rate": 1.96493138954361e-05, "loss": 0.5494, "step": 6251 }, { "epoch": 0.17166392092257002, "grad_norm": 0.40880995988845825, "learning_rate": 1.9649200513459743e-05, "loss": 0.6102, "step": 6252 }, { "epoch": 0.1716913783635365, "grad_norm": 0.34767529368400574, "learning_rate": 1.9649087113484545e-05, "loss": 0.4461, "step": 6253 }, { "epoch": 0.17171883580450303, "grad_norm": 0.3832607865333557, "learning_rate": 1.964897369551072e-05, "loss": 0.518, "step": 6254 }, { "epoch": 0.17174629324546953, "grad_norm": 0.38373807072639465, "learning_rate": 1.964886025953848e-05, "loss": 0.6017, "step": 6255 }, { "epoch": 0.17177375068643602, "grad_norm": 0.37328845262527466, "learning_rate": 1.9648746805568035e-05, "loss": 0.5061, "step": 6256 }, { "epoch": 0.17180120812740252, "grad_norm": 0.3716537058353424, "learning_rate": 1.96486333335996e-05, "loss": 0.52, "step": 6257 }, { "epoch": 0.17182866556836904, "grad_norm": 0.371990829706192, "learning_rate": 1.9648519843633383e-05, "loss": 0.565, "step": 6258 }, { "epoch": 0.17185612300933553, "grad_norm": 0.3506389260292053, "learning_rate": 1.9648406335669595e-05, "loss": 0.5071, "step": 6259 }, { "epoch": 0.17188358045030203, "grad_norm": 0.3233608603477478, "learning_rate": 1.9648292809708455e-05, "loss": 0.5579, "step": 6260 }, { "epoch": 0.17191103789126852, "grad_norm": 0.36530137062072754, "learning_rate": 1.9648179265750165e-05, "loss": 0.5947, "step": 6261 }, { "epoch": 0.17193849533223504, "grad_norm": 0.3990533649921417, "learning_rate": 1.964806570379494e-05, "loss": 0.5365, "step": 6262 }, { "epoch": 0.17196595277320154, "grad_norm": 0.3801145851612091, "learning_rate": 1.9647952123842998e-05, "loss": 0.4898, "step": 6263 }, { "epoch": 0.17199341021416803, "grad_norm": 0.4027240574359894, "learning_rate": 1.9647838525894543e-05, "loss": 0.5881, "step": 6264 }, { "epoch": 0.17202086765513455, "grad_norm": 0.3803488314151764, "learning_rate": 1.964772490994979e-05, "loss": 0.5948, "step": 6265 }, { "epoch": 0.17204832509610105, "grad_norm": 0.8180059790611267, "learning_rate": 1.964761127600895e-05, "loss": 0.5254, "step": 6266 }, { "epoch": 0.17207578253706754, "grad_norm": 0.40044984221458435, "learning_rate": 1.964749762407224e-05, "loss": 0.6298, "step": 6267 }, { "epoch": 0.17210323997803403, "grad_norm": 0.36538684368133545, "learning_rate": 1.964738395413986e-05, "loss": 0.5953, "step": 6268 }, { "epoch": 0.17213069741900056, "grad_norm": 0.34932634234428406, "learning_rate": 1.964727026621204e-05, "loss": 0.5169, "step": 6269 }, { "epoch": 0.17215815485996705, "grad_norm": 0.33127936720848083, "learning_rate": 1.9647156560288974e-05, "loss": 0.4665, "step": 6270 }, { "epoch": 0.17218561230093354, "grad_norm": 0.4179372489452362, "learning_rate": 1.9647042836370888e-05, "loss": 0.67, "step": 6271 }, { "epoch": 0.17221306974190007, "grad_norm": 0.3473834991455078, "learning_rate": 1.9646929094457983e-05, "loss": 0.5288, "step": 6272 }, { "epoch": 0.17224052718286656, "grad_norm": 0.3818121552467346, "learning_rate": 1.964681533455048e-05, "loss": 0.5648, "step": 6273 }, { "epoch": 0.17226798462383305, "grad_norm": 0.3827752470970154, "learning_rate": 1.9646701556648585e-05, "loss": 0.4941, "step": 6274 }, { "epoch": 0.17229544206479955, "grad_norm": 0.3540970981121063, "learning_rate": 1.9646587760752515e-05, "loss": 0.5288, "step": 6275 }, { "epoch": 0.17232289950576607, "grad_norm": 0.38107189536094666, "learning_rate": 1.9646473946862477e-05, "loss": 0.4309, "step": 6276 }, { "epoch": 0.17235035694673256, "grad_norm": 0.3849301338195801, "learning_rate": 1.9646360114978688e-05, "loss": 0.5415, "step": 6277 }, { "epoch": 0.17237781438769906, "grad_norm": 0.40841755270957947, "learning_rate": 1.9646246265101357e-05, "loss": 0.5745, "step": 6278 }, { "epoch": 0.17240527182866558, "grad_norm": 0.4224247634410858, "learning_rate": 1.96461323972307e-05, "loss": 0.573, "step": 6279 }, { "epoch": 0.17243272926963207, "grad_norm": 0.3294138014316559, "learning_rate": 1.9646018511366923e-05, "loss": 0.5042, "step": 6280 }, { "epoch": 0.17246018671059857, "grad_norm": 0.3309253454208374, "learning_rate": 1.964590460751025e-05, "loss": 0.4453, "step": 6281 }, { "epoch": 0.17248764415156506, "grad_norm": 0.41621649265289307, "learning_rate": 1.9645790685660882e-05, "loss": 0.5913, "step": 6282 }, { "epoch": 0.17251510159253158, "grad_norm": 0.3749745190143585, "learning_rate": 1.9645676745819037e-05, "loss": 0.5453, "step": 6283 }, { "epoch": 0.17254255903349808, "grad_norm": 0.3563118278980255, "learning_rate": 1.9645562787984925e-05, "loss": 0.4887, "step": 6284 }, { "epoch": 0.17257001647446457, "grad_norm": 0.40276679396629333, "learning_rate": 1.964544881215876e-05, "loss": 0.5948, "step": 6285 }, { "epoch": 0.1725974739154311, "grad_norm": 0.36393219232559204, "learning_rate": 1.964533481834076e-05, "loss": 0.5534, "step": 6286 }, { "epoch": 0.1726249313563976, "grad_norm": 0.3739548325538635, "learning_rate": 1.9645220806531126e-05, "loss": 0.5336, "step": 6287 }, { "epoch": 0.17265238879736408, "grad_norm": 0.3317618668079376, "learning_rate": 1.9645106776730076e-05, "loss": 0.432, "step": 6288 }, { "epoch": 0.17267984623833058, "grad_norm": 0.34747225046157837, "learning_rate": 1.9644992728937825e-05, "loss": 0.5676, "step": 6289 }, { "epoch": 0.1727073036792971, "grad_norm": 0.3953397572040558, "learning_rate": 1.964487866315459e-05, "loss": 0.6594, "step": 6290 }, { "epoch": 0.1727347611202636, "grad_norm": 0.3388277292251587, "learning_rate": 1.964476457938057e-05, "loss": 0.4777, "step": 6291 }, { "epoch": 0.17276221856123009, "grad_norm": 0.734465479850769, "learning_rate": 1.964465047761599e-05, "loss": 0.637, "step": 6292 }, { "epoch": 0.1727896760021966, "grad_norm": 0.44092825055122375, "learning_rate": 1.9644536357861056e-05, "loss": 0.6321, "step": 6293 }, { "epoch": 0.1728171334431631, "grad_norm": 0.477285772562027, "learning_rate": 1.9644422220115983e-05, "loss": 0.6205, "step": 6294 }, { "epoch": 0.1728445908841296, "grad_norm": 0.3900772035121918, "learning_rate": 1.964430806438099e-05, "loss": 0.4534, "step": 6295 }, { "epoch": 0.1728720483250961, "grad_norm": 0.434730589389801, "learning_rate": 1.9644193890656277e-05, "loss": 0.4242, "step": 6296 }, { "epoch": 0.1728995057660626, "grad_norm": 0.3868487477302551, "learning_rate": 1.964407969894207e-05, "loss": 0.49, "step": 6297 }, { "epoch": 0.1729269632070291, "grad_norm": 0.4060467481613159, "learning_rate": 1.9643965489238574e-05, "loss": 0.6593, "step": 6298 }, { "epoch": 0.1729544206479956, "grad_norm": 0.36478641629219055, "learning_rate": 1.9643851261546006e-05, "loss": 0.5267, "step": 6299 }, { "epoch": 0.17298187808896212, "grad_norm": 0.31271886825561523, "learning_rate": 1.9643737015864576e-05, "loss": 0.507, "step": 6300 }, { "epoch": 0.17300933552992862, "grad_norm": 0.36355090141296387, "learning_rate": 1.9643622752194496e-05, "loss": 0.4224, "step": 6301 }, { "epoch": 0.1730367929708951, "grad_norm": 0.4620617926120758, "learning_rate": 1.9643508470535985e-05, "loss": 0.6253, "step": 6302 }, { "epoch": 0.1730642504118616, "grad_norm": 0.39504072070121765, "learning_rate": 1.9643394170889255e-05, "loss": 0.5086, "step": 6303 }, { "epoch": 0.17309170785282812, "grad_norm": 0.3610641658306122, "learning_rate": 1.9643279853254514e-05, "loss": 0.5337, "step": 6304 }, { "epoch": 0.17311916529379462, "grad_norm": 0.3229556977748871, "learning_rate": 1.9643165517631978e-05, "loss": 0.4702, "step": 6305 }, { "epoch": 0.1731466227347611, "grad_norm": 0.34793514013290405, "learning_rate": 1.964305116402186e-05, "loss": 0.5029, "step": 6306 }, { "epoch": 0.17317408017572763, "grad_norm": 0.34425070881843567, "learning_rate": 1.9642936792424377e-05, "loss": 0.5196, "step": 6307 }, { "epoch": 0.17320153761669413, "grad_norm": 0.36562421917915344, "learning_rate": 1.9642822402839737e-05, "loss": 0.5748, "step": 6308 }, { "epoch": 0.17322899505766062, "grad_norm": 0.37216031551361084, "learning_rate": 1.9642707995268155e-05, "loss": 0.526, "step": 6309 }, { "epoch": 0.17325645249862712, "grad_norm": 0.33419615030288696, "learning_rate": 1.9642593569709845e-05, "loss": 0.5306, "step": 6310 }, { "epoch": 0.17328390993959364, "grad_norm": 0.41445431113243103, "learning_rate": 1.9642479126165022e-05, "loss": 0.513, "step": 6311 }, { "epoch": 0.17331136738056013, "grad_norm": 0.38927289843559265, "learning_rate": 1.9642364664633897e-05, "loss": 0.4936, "step": 6312 }, { "epoch": 0.17333882482152663, "grad_norm": 0.4009464681148529, "learning_rate": 1.9642250185116686e-05, "loss": 0.5966, "step": 6313 }, { "epoch": 0.17336628226249315, "grad_norm": 0.3917927145957947, "learning_rate": 1.96421356876136e-05, "loss": 0.5538, "step": 6314 }, { "epoch": 0.17339373970345964, "grad_norm": 0.3748290240764618, "learning_rate": 1.964202117212485e-05, "loss": 0.5734, "step": 6315 }, { "epoch": 0.17342119714442614, "grad_norm": 0.458927720785141, "learning_rate": 1.9641906638650657e-05, "loss": 0.6385, "step": 6316 }, { "epoch": 0.17344865458539263, "grad_norm": 0.3693960905075073, "learning_rate": 1.964179208719123e-05, "loss": 0.5027, "step": 6317 }, { "epoch": 0.17347611202635915, "grad_norm": 0.4389669895172119, "learning_rate": 1.9641677517746784e-05, "loss": 0.5542, "step": 6318 }, { "epoch": 0.17350356946732565, "grad_norm": 0.3174303472042084, "learning_rate": 1.9641562930317534e-05, "loss": 0.43, "step": 6319 }, { "epoch": 0.17353102690829214, "grad_norm": 0.39448902010917664, "learning_rate": 1.9641448324903688e-05, "loss": 0.5343, "step": 6320 }, { "epoch": 0.17355848434925866, "grad_norm": 0.41735923290252686, "learning_rate": 1.9641333701505465e-05, "loss": 0.4641, "step": 6321 }, { "epoch": 0.17358594179022516, "grad_norm": 0.4339692294597626, "learning_rate": 1.9641219060123076e-05, "loss": 0.542, "step": 6322 }, { "epoch": 0.17361339923119165, "grad_norm": 0.39699801802635193, "learning_rate": 1.964110440075674e-05, "loss": 0.4867, "step": 6323 }, { "epoch": 0.17364085667215814, "grad_norm": 0.3425326645374298, "learning_rate": 1.9640989723406666e-05, "loss": 0.5199, "step": 6324 }, { "epoch": 0.17366831411312467, "grad_norm": 0.40067920088768005, "learning_rate": 1.964087502807307e-05, "loss": 0.508, "step": 6325 }, { "epoch": 0.17369577155409116, "grad_norm": 0.34886229038238525, "learning_rate": 1.964076031475616e-05, "loss": 0.5465, "step": 6326 }, { "epoch": 0.17372322899505765, "grad_norm": 0.32790178060531616, "learning_rate": 1.9640645583456158e-05, "loss": 0.4505, "step": 6327 }, { "epoch": 0.17375068643602415, "grad_norm": 0.4178152084350586, "learning_rate": 1.9640530834173278e-05, "loss": 0.4506, "step": 6328 }, { "epoch": 0.17377814387699067, "grad_norm": 0.3231436610221863, "learning_rate": 1.964041606690773e-05, "loss": 0.5097, "step": 6329 }, { "epoch": 0.17380560131795716, "grad_norm": 0.34856337308883667, "learning_rate": 1.9640301281659724e-05, "loss": 0.5981, "step": 6330 }, { "epoch": 0.17383305875892366, "grad_norm": 0.6018600463867188, "learning_rate": 1.9640186478429485e-05, "loss": 0.4509, "step": 6331 }, { "epoch": 0.17386051619989018, "grad_norm": 0.33788609504699707, "learning_rate": 1.964007165721722e-05, "loss": 0.4822, "step": 6332 }, { "epoch": 0.17388797364085667, "grad_norm": 0.3100905418395996, "learning_rate": 1.9639956818023144e-05, "loss": 0.4975, "step": 6333 }, { "epoch": 0.17391543108182317, "grad_norm": 0.3466983735561371, "learning_rate": 1.963984196084747e-05, "loss": 0.5216, "step": 6334 }, { "epoch": 0.17394288852278966, "grad_norm": 0.3879547119140625, "learning_rate": 1.9639727085690415e-05, "loss": 0.5636, "step": 6335 }, { "epoch": 0.17397034596375618, "grad_norm": 0.35947099328041077, "learning_rate": 1.9639612192552192e-05, "loss": 0.5285, "step": 6336 }, { "epoch": 0.17399780340472268, "grad_norm": 0.4750586450099945, "learning_rate": 1.963949728143302e-05, "loss": 0.6105, "step": 6337 }, { "epoch": 0.17402526084568917, "grad_norm": 0.4179497957229614, "learning_rate": 1.9639382352333107e-05, "loss": 0.5225, "step": 6338 }, { "epoch": 0.1740527182866557, "grad_norm": 0.4021699130535126, "learning_rate": 1.9639267405252668e-05, "loss": 0.6459, "step": 6339 }, { "epoch": 0.1740801757276222, "grad_norm": 0.3746078312397003, "learning_rate": 1.963915244019192e-05, "loss": 0.4356, "step": 6340 }, { "epoch": 0.17410763316858868, "grad_norm": 0.3728315532207489, "learning_rate": 1.9639037457151072e-05, "loss": 0.5758, "step": 6341 }, { "epoch": 0.17413509060955518, "grad_norm": 0.327722430229187, "learning_rate": 1.963892245613035e-05, "loss": 0.4156, "step": 6342 }, { "epoch": 0.1741625480505217, "grad_norm": 0.4068165719509125, "learning_rate": 1.9638807437129955e-05, "loss": 0.5343, "step": 6343 }, { "epoch": 0.1741900054914882, "grad_norm": 0.35643288493156433, "learning_rate": 1.963869240015011e-05, "loss": 0.5954, "step": 6344 }, { "epoch": 0.1742174629324547, "grad_norm": 0.3351595401763916, "learning_rate": 1.9638577345191028e-05, "loss": 0.4953, "step": 6345 }, { "epoch": 0.1742449203734212, "grad_norm": 0.34982210397720337, "learning_rate": 1.963846227225292e-05, "loss": 0.5311, "step": 6346 }, { "epoch": 0.1742723778143877, "grad_norm": 0.36710089445114136, "learning_rate": 1.9638347181336008e-05, "loss": 0.5392, "step": 6347 }, { "epoch": 0.1742998352553542, "grad_norm": 0.34796142578125, "learning_rate": 1.9638232072440503e-05, "loss": 0.5725, "step": 6348 }, { "epoch": 0.1743272926963207, "grad_norm": 0.36021822690963745, "learning_rate": 1.9638116945566614e-05, "loss": 0.553, "step": 6349 }, { "epoch": 0.1743547501372872, "grad_norm": 0.36497893929481506, "learning_rate": 1.9638001800714563e-05, "loss": 0.5909, "step": 6350 }, { "epoch": 0.1743822075782537, "grad_norm": 0.3209497332572937, "learning_rate": 1.9637886637884563e-05, "loss": 0.4572, "step": 6351 }, { "epoch": 0.1744096650192202, "grad_norm": 0.3253692090511322, "learning_rate": 1.9637771457076828e-05, "loss": 0.5359, "step": 6352 }, { "epoch": 0.17443712246018672, "grad_norm": 0.34648871421813965, "learning_rate": 1.9637656258291574e-05, "loss": 0.4315, "step": 6353 }, { "epoch": 0.17446457990115322, "grad_norm": 0.3678458333015442, "learning_rate": 1.9637541041529014e-05, "loss": 0.5235, "step": 6354 }, { "epoch": 0.1744920373421197, "grad_norm": 0.342326819896698, "learning_rate": 1.9637425806789366e-05, "loss": 0.5383, "step": 6355 }, { "epoch": 0.1745194947830862, "grad_norm": 0.3421344459056854, "learning_rate": 1.963731055407284e-05, "loss": 0.5501, "step": 6356 }, { "epoch": 0.17454695222405273, "grad_norm": 0.3886779844760895, "learning_rate": 1.9637195283379652e-05, "loss": 0.5708, "step": 6357 }, { "epoch": 0.17457440966501922, "grad_norm": 0.360370397567749, "learning_rate": 1.9637079994710026e-05, "loss": 0.484, "step": 6358 }, { "epoch": 0.17460186710598571, "grad_norm": 0.40196460485458374, "learning_rate": 1.9636964688064165e-05, "loss": 0.5759, "step": 6359 }, { "epoch": 0.17462932454695224, "grad_norm": 0.4451005458831787, "learning_rate": 1.963684936344229e-05, "loss": 0.5913, "step": 6360 }, { "epoch": 0.17465678198791873, "grad_norm": 0.3576388955116272, "learning_rate": 1.9636734020844614e-05, "loss": 0.56, "step": 6361 }, { "epoch": 0.17468423942888522, "grad_norm": 0.37611058354377747, "learning_rate": 1.9636618660271354e-05, "loss": 0.6004, "step": 6362 }, { "epoch": 0.17471169686985172, "grad_norm": 0.5051586627960205, "learning_rate": 1.963650328172273e-05, "loss": 0.5222, "step": 6363 }, { "epoch": 0.17473915431081824, "grad_norm": 0.3770478665828705, "learning_rate": 1.9636387885198946e-05, "loss": 0.6314, "step": 6364 }, { "epoch": 0.17476661175178473, "grad_norm": 0.3351290225982666, "learning_rate": 1.9636272470700224e-05, "loss": 0.5561, "step": 6365 }, { "epoch": 0.17479406919275123, "grad_norm": 0.3918443024158478, "learning_rate": 1.963615703822678e-05, "loss": 0.5063, "step": 6366 }, { "epoch": 0.17482152663371775, "grad_norm": 0.3676370084285736, "learning_rate": 1.9636041587778824e-05, "loss": 0.5228, "step": 6367 }, { "epoch": 0.17484898407468424, "grad_norm": 0.3402837812900543, "learning_rate": 1.9635926119356575e-05, "loss": 0.5562, "step": 6368 }, { "epoch": 0.17487644151565074, "grad_norm": 0.39500725269317627, "learning_rate": 1.9635810632960254e-05, "loss": 0.5944, "step": 6369 }, { "epoch": 0.17490389895661723, "grad_norm": 0.3503170311450958, "learning_rate": 1.9635695128590066e-05, "loss": 0.495, "step": 6370 }, { "epoch": 0.17493135639758375, "grad_norm": 0.35058295726776123, "learning_rate": 1.9635579606246232e-05, "loss": 0.5029, "step": 6371 }, { "epoch": 0.17495881383855025, "grad_norm": 0.40651723742485046, "learning_rate": 1.963546406592897e-05, "loss": 0.5571, "step": 6372 }, { "epoch": 0.17498627127951674, "grad_norm": 0.3694835603237152, "learning_rate": 1.9635348507638486e-05, "loss": 0.5008, "step": 6373 }, { "epoch": 0.17501372872048326, "grad_norm": 0.3399786949157715, "learning_rate": 1.9635232931375005e-05, "loss": 0.5716, "step": 6374 }, { "epoch": 0.17504118616144976, "grad_norm": 0.39695432782173157, "learning_rate": 1.963511733713874e-05, "loss": 0.627, "step": 6375 }, { "epoch": 0.17506864360241625, "grad_norm": 0.365743488073349, "learning_rate": 1.9635001724929906e-05, "loss": 0.6279, "step": 6376 }, { "epoch": 0.17509610104338275, "grad_norm": 0.3500584363937378, "learning_rate": 1.9634886094748718e-05, "loss": 0.5325, "step": 6377 }, { "epoch": 0.17512355848434927, "grad_norm": 0.43675854802131653, "learning_rate": 1.9634770446595396e-05, "loss": 0.5701, "step": 6378 }, { "epoch": 0.17515101592531576, "grad_norm": 0.3514039218425751, "learning_rate": 1.9634654780470148e-05, "loss": 0.5671, "step": 6379 }, { "epoch": 0.17517847336628226, "grad_norm": 0.3806209862232208, "learning_rate": 1.9634539096373193e-05, "loss": 0.5211, "step": 6380 }, { "epoch": 0.17520593080724878, "grad_norm": 0.4500492513179779, "learning_rate": 1.963442339430475e-05, "loss": 0.5731, "step": 6381 }, { "epoch": 0.17523338824821527, "grad_norm": 0.3718188405036926, "learning_rate": 1.963430767426503e-05, "loss": 0.5612, "step": 6382 }, { "epoch": 0.17526084568918177, "grad_norm": 0.4168740510940552, "learning_rate": 1.9634191936254253e-05, "loss": 0.5218, "step": 6383 }, { "epoch": 0.17528830313014826, "grad_norm": 0.34360915422439575, "learning_rate": 1.9634076180272633e-05, "loss": 0.5029, "step": 6384 }, { "epoch": 0.17531576057111478, "grad_norm": 0.359855055809021, "learning_rate": 1.9633960406320386e-05, "loss": 0.5945, "step": 6385 }, { "epoch": 0.17534321801208128, "grad_norm": 0.35300198197364807, "learning_rate": 1.963384461439773e-05, "loss": 0.5319, "step": 6386 }, { "epoch": 0.17537067545304777, "grad_norm": 0.3802572190761566, "learning_rate": 1.9633728804504874e-05, "loss": 0.5856, "step": 6387 }, { "epoch": 0.1753981328940143, "grad_norm": 0.39630258083343506, "learning_rate": 1.9633612976642043e-05, "loss": 0.5146, "step": 6388 }, { "epoch": 0.17542559033498079, "grad_norm": 0.3693968951702118, "learning_rate": 1.963349713080945e-05, "loss": 0.55, "step": 6389 }, { "epoch": 0.17545304777594728, "grad_norm": 0.4086429178714752, "learning_rate": 1.9633381267007308e-05, "loss": 0.578, "step": 6390 }, { "epoch": 0.17548050521691377, "grad_norm": 0.4700150489807129, "learning_rate": 1.9633265385235834e-05, "loss": 0.488, "step": 6391 }, { "epoch": 0.1755079626578803, "grad_norm": 0.3728618025779724, "learning_rate": 1.9633149485495247e-05, "loss": 0.5163, "step": 6392 }, { "epoch": 0.1755354200988468, "grad_norm": 0.35823559761047363, "learning_rate": 1.9633033567785765e-05, "loss": 0.5552, "step": 6393 }, { "epoch": 0.17556287753981328, "grad_norm": 0.3725329637527466, "learning_rate": 1.9632917632107596e-05, "loss": 0.6232, "step": 6394 }, { "epoch": 0.17559033498077978, "grad_norm": 0.35217419266700745, "learning_rate": 1.9632801678460965e-05, "loss": 0.608, "step": 6395 }, { "epoch": 0.1756177924217463, "grad_norm": 0.3340470790863037, "learning_rate": 1.963268570684608e-05, "loss": 0.616, "step": 6396 }, { "epoch": 0.1756452498627128, "grad_norm": 0.32194504141807556, "learning_rate": 1.9632569717263164e-05, "loss": 0.4188, "step": 6397 }, { "epoch": 0.1756727073036793, "grad_norm": 0.386080801486969, "learning_rate": 1.9632453709712434e-05, "loss": 0.597, "step": 6398 }, { "epoch": 0.1757001647446458, "grad_norm": 0.38633960485458374, "learning_rate": 1.96323376841941e-05, "loss": 0.5767, "step": 6399 }, { "epoch": 0.1757276221856123, "grad_norm": 0.40756142139434814, "learning_rate": 1.9632221640708378e-05, "loss": 0.5511, "step": 6400 }, { "epoch": 0.1757550796265788, "grad_norm": 0.4353269934654236, "learning_rate": 1.9632105579255497e-05, "loss": 0.6244, "step": 6401 }, { "epoch": 0.1757825370675453, "grad_norm": 0.4366411864757538, "learning_rate": 1.963198949983566e-05, "loss": 0.5699, "step": 6402 }, { "epoch": 0.1758099945085118, "grad_norm": 0.37848639488220215, "learning_rate": 1.9631873402449087e-05, "loss": 0.5527, "step": 6403 }, { "epoch": 0.1758374519494783, "grad_norm": 0.33644554018974304, "learning_rate": 1.9631757287096e-05, "loss": 0.539, "step": 6404 }, { "epoch": 0.1758649093904448, "grad_norm": 0.37744849920272827, "learning_rate": 1.963164115377661e-05, "loss": 0.5777, "step": 6405 }, { "epoch": 0.17589236683141132, "grad_norm": 0.40562304854393005, "learning_rate": 1.9631525002491136e-05, "loss": 0.5727, "step": 6406 }, { "epoch": 0.17591982427237782, "grad_norm": 0.37823766469955444, "learning_rate": 1.9631408833239793e-05, "loss": 0.6612, "step": 6407 }, { "epoch": 0.1759472817133443, "grad_norm": 0.3302900791168213, "learning_rate": 1.9631292646022797e-05, "loss": 0.4957, "step": 6408 }, { "epoch": 0.1759747391543108, "grad_norm": 0.3334633409976959, "learning_rate": 1.9631176440840368e-05, "loss": 0.4909, "step": 6409 }, { "epoch": 0.17600219659527733, "grad_norm": 0.38149914145469666, "learning_rate": 1.9631060217692722e-05, "loss": 0.6034, "step": 6410 }, { "epoch": 0.17602965403624382, "grad_norm": 0.3516790270805359, "learning_rate": 1.9630943976580073e-05, "loss": 0.5003, "step": 6411 }, { "epoch": 0.17605711147721032, "grad_norm": 0.3697787821292877, "learning_rate": 1.9630827717502642e-05, "loss": 0.5082, "step": 6412 }, { "epoch": 0.17608456891817684, "grad_norm": 0.3624064028263092, "learning_rate": 1.9630711440460638e-05, "loss": 0.6165, "step": 6413 }, { "epoch": 0.17611202635914333, "grad_norm": 0.3812905251979828, "learning_rate": 1.963059514545429e-05, "loss": 0.6276, "step": 6414 }, { "epoch": 0.17613948380010983, "grad_norm": 0.35011351108551025, "learning_rate": 1.9630478832483802e-05, "loss": 0.5937, "step": 6415 }, { "epoch": 0.17616694124107632, "grad_norm": 0.3502063751220703, "learning_rate": 1.96303625015494e-05, "loss": 0.5393, "step": 6416 }, { "epoch": 0.17619439868204284, "grad_norm": 0.3670531213283539, "learning_rate": 1.96302461526513e-05, "loss": 0.5753, "step": 6417 }, { "epoch": 0.17622185612300933, "grad_norm": 0.35940277576446533, "learning_rate": 1.9630129785789717e-05, "loss": 0.4311, "step": 6418 }, { "epoch": 0.17624931356397583, "grad_norm": 0.3208222985267639, "learning_rate": 1.9630013400964868e-05, "loss": 0.5306, "step": 6419 }, { "epoch": 0.17627677100494235, "grad_norm": 0.3396967053413391, "learning_rate": 1.962989699817697e-05, "loss": 0.5182, "step": 6420 }, { "epoch": 0.17630422844590884, "grad_norm": 0.3762543797492981, "learning_rate": 1.962978057742624e-05, "loss": 0.471, "step": 6421 }, { "epoch": 0.17633168588687534, "grad_norm": 0.40506893396377563, "learning_rate": 1.9629664138712898e-05, "loss": 0.5752, "step": 6422 }, { "epoch": 0.17635914332784183, "grad_norm": 0.374995619058609, "learning_rate": 1.9629547682037157e-05, "loss": 0.6179, "step": 6423 }, { "epoch": 0.17638660076880835, "grad_norm": 0.3484170734882355, "learning_rate": 1.9629431207399236e-05, "loss": 0.4731, "step": 6424 }, { "epoch": 0.17641405820977485, "grad_norm": 0.3957699239253998, "learning_rate": 1.9629314714799354e-05, "loss": 0.628, "step": 6425 }, { "epoch": 0.17644151565074134, "grad_norm": 0.36423155665397644, "learning_rate": 1.9629198204237726e-05, "loss": 0.5406, "step": 6426 }, { "epoch": 0.17646897309170786, "grad_norm": 0.39159882068634033, "learning_rate": 1.962908167571457e-05, "loss": 0.6011, "step": 6427 }, { "epoch": 0.17649643053267436, "grad_norm": 0.37953871488571167, "learning_rate": 1.96289651292301e-05, "loss": 0.6316, "step": 6428 }, { "epoch": 0.17652388797364085, "grad_norm": 0.38535475730895996, "learning_rate": 1.9628848564784543e-05, "loss": 0.5554, "step": 6429 }, { "epoch": 0.17655134541460735, "grad_norm": 0.37640494108200073, "learning_rate": 1.9628731982378108e-05, "loss": 0.615, "step": 6430 }, { "epoch": 0.17657880285557387, "grad_norm": 0.41096341609954834, "learning_rate": 1.9628615382011014e-05, "loss": 0.4458, "step": 6431 }, { "epoch": 0.17660626029654036, "grad_norm": 1.0131949186325073, "learning_rate": 1.962849876368348e-05, "loss": 0.5573, "step": 6432 }, { "epoch": 0.17663371773750686, "grad_norm": 0.4212898910045624, "learning_rate": 1.962838212739572e-05, "loss": 0.5766, "step": 6433 }, { "epoch": 0.17666117517847338, "grad_norm": 0.37767308950424194, "learning_rate": 1.962826547314796e-05, "loss": 0.5755, "step": 6434 }, { "epoch": 0.17668863261943987, "grad_norm": 0.3814872205257416, "learning_rate": 1.9628148800940407e-05, "loss": 0.6103, "step": 6435 }, { "epoch": 0.17671609006040637, "grad_norm": 0.3215424418449402, "learning_rate": 1.962803211077328e-05, "loss": 0.4802, "step": 6436 }, { "epoch": 0.17674354750137286, "grad_norm": 0.36425426602363586, "learning_rate": 1.9627915402646808e-05, "loss": 0.5513, "step": 6437 }, { "epoch": 0.17677100494233938, "grad_norm": 0.36910152435302734, "learning_rate": 1.9627798676561197e-05, "loss": 0.5033, "step": 6438 }, { "epoch": 0.17679846238330588, "grad_norm": 0.5513092875480652, "learning_rate": 1.962768193251667e-05, "loss": 0.5381, "step": 6439 }, { "epoch": 0.17682591982427237, "grad_norm": 0.3624698519706726, "learning_rate": 1.9627565170513444e-05, "loss": 0.5308, "step": 6440 }, { "epoch": 0.1768533772652389, "grad_norm": 0.37560200691223145, "learning_rate": 1.9627448390551736e-05, "loss": 0.4949, "step": 6441 }, { "epoch": 0.1768808347062054, "grad_norm": 0.3342743217945099, "learning_rate": 1.962733159263176e-05, "loss": 0.521, "step": 6442 }, { "epoch": 0.17690829214717188, "grad_norm": 0.4421239495277405, "learning_rate": 1.9627214776753742e-05, "loss": 0.5194, "step": 6443 }, { "epoch": 0.17693574958813837, "grad_norm": 0.37814947962760925, "learning_rate": 1.9627097942917896e-05, "loss": 0.5377, "step": 6444 }, { "epoch": 0.1769632070291049, "grad_norm": 0.608311116695404, "learning_rate": 1.9626981091124436e-05, "loss": 0.561, "step": 6445 }, { "epoch": 0.1769906644700714, "grad_norm": 0.33120518922805786, "learning_rate": 1.962686422137359e-05, "loss": 0.4672, "step": 6446 }, { "epoch": 0.17701812191103788, "grad_norm": 0.5176143050193787, "learning_rate": 1.9626747333665565e-05, "loss": 0.5516, "step": 6447 }, { "epoch": 0.1770455793520044, "grad_norm": 0.42105114459991455, "learning_rate": 1.9626630428000583e-05, "loss": 0.5555, "step": 6448 }, { "epoch": 0.1770730367929709, "grad_norm": 0.3341221213340759, "learning_rate": 1.9626513504378865e-05, "loss": 0.5196, "step": 6449 }, { "epoch": 0.1771004942339374, "grad_norm": 0.37322095036506653, "learning_rate": 1.9626396562800628e-05, "loss": 0.5776, "step": 6450 }, { "epoch": 0.1771279516749039, "grad_norm": 0.36091160774230957, "learning_rate": 1.9626279603266085e-05, "loss": 0.5891, "step": 6451 }, { "epoch": 0.1771554091158704, "grad_norm": 0.34840884804725647, "learning_rate": 1.962616262577546e-05, "loss": 0.5028, "step": 6452 }, { "epoch": 0.1771828665568369, "grad_norm": 0.39045479893684387, "learning_rate": 1.962604563032897e-05, "loss": 0.5611, "step": 6453 }, { "epoch": 0.1772103239978034, "grad_norm": 0.45066890120506287, "learning_rate": 1.9625928616926836e-05, "loss": 0.4467, "step": 6454 }, { "epoch": 0.17723778143876992, "grad_norm": 0.3778124153614044, "learning_rate": 1.9625811585569266e-05, "loss": 0.5346, "step": 6455 }, { "epoch": 0.17726523887973641, "grad_norm": 0.381182461977005, "learning_rate": 1.962569453625649e-05, "loss": 0.4932, "step": 6456 }, { "epoch": 0.1772926963207029, "grad_norm": 0.3399115204811096, "learning_rate": 1.962557746898872e-05, "loss": 0.5037, "step": 6457 }, { "epoch": 0.1773201537616694, "grad_norm": 0.32503873109817505, "learning_rate": 1.9625460383766177e-05, "loss": 0.514, "step": 6458 }, { "epoch": 0.17734761120263592, "grad_norm": 0.358661949634552, "learning_rate": 1.9625343280589077e-05, "loss": 0.4876, "step": 6459 }, { "epoch": 0.17737506864360242, "grad_norm": 0.43296414613723755, "learning_rate": 1.962522615945764e-05, "loss": 0.5611, "step": 6460 }, { "epoch": 0.1774025260845689, "grad_norm": 0.4234931468963623, "learning_rate": 1.9625109020372085e-05, "loss": 0.5488, "step": 6461 }, { "epoch": 0.1774299835255354, "grad_norm": 0.37605953216552734, "learning_rate": 1.962499186333263e-05, "loss": 0.4267, "step": 6462 }, { "epoch": 0.17745744096650193, "grad_norm": 0.319707989692688, "learning_rate": 1.962487468833949e-05, "loss": 0.4843, "step": 6463 }, { "epoch": 0.17748489840746842, "grad_norm": 0.3673328161239624, "learning_rate": 1.962475749539289e-05, "loss": 0.5378, "step": 6464 }, { "epoch": 0.17751235584843492, "grad_norm": 0.3181294798851013, "learning_rate": 1.9624640284493045e-05, "loss": 0.4759, "step": 6465 }, { "epoch": 0.17753981328940144, "grad_norm": 0.3667600750923157, "learning_rate": 1.9624523055640172e-05, "loss": 0.4326, "step": 6466 }, { "epoch": 0.17756727073036793, "grad_norm": 0.42284926772117615, "learning_rate": 1.9624405808834494e-05, "loss": 0.5874, "step": 6467 }, { "epoch": 0.17759472817133443, "grad_norm": 0.3395390212535858, "learning_rate": 1.9624288544076226e-05, "loss": 0.4754, "step": 6468 }, { "epoch": 0.17762218561230092, "grad_norm": 0.3464043140411377, "learning_rate": 1.962417126136559e-05, "loss": 0.5304, "step": 6469 }, { "epoch": 0.17764964305326744, "grad_norm": 0.345477819442749, "learning_rate": 1.96240539607028e-05, "loss": 0.5944, "step": 6470 }, { "epoch": 0.17767710049423394, "grad_norm": 0.3609393537044525, "learning_rate": 1.962393664208808e-05, "loss": 0.4542, "step": 6471 }, { "epoch": 0.17770455793520043, "grad_norm": 0.37982383370399475, "learning_rate": 1.9623819305521645e-05, "loss": 0.5055, "step": 6472 }, { "epoch": 0.17773201537616695, "grad_norm": 0.341413289308548, "learning_rate": 1.9623701951003717e-05, "loss": 0.5869, "step": 6473 }, { "epoch": 0.17775947281713345, "grad_norm": 0.34792789816856384, "learning_rate": 1.9623584578534514e-05, "loss": 0.5154, "step": 6474 }, { "epoch": 0.17778693025809994, "grad_norm": 0.3972724378108978, "learning_rate": 1.962346718811425e-05, "loss": 0.5417, "step": 6475 }, { "epoch": 0.17781438769906643, "grad_norm": 0.4011193811893463, "learning_rate": 1.9623349779743152e-05, "loss": 0.5466, "step": 6476 }, { "epoch": 0.17784184514003296, "grad_norm": 0.34570643305778503, "learning_rate": 1.9623232353421434e-05, "loss": 0.5971, "step": 6477 }, { "epoch": 0.17786930258099945, "grad_norm": 0.361625999212265, "learning_rate": 1.9623114909149316e-05, "loss": 0.5217, "step": 6478 }, { "epoch": 0.17789676002196594, "grad_norm": 0.4410037398338318, "learning_rate": 1.962299744692702e-05, "loss": 0.5999, "step": 6479 }, { "epoch": 0.17792421746293247, "grad_norm": 0.3798205256462097, "learning_rate": 1.962287996675476e-05, "loss": 0.5198, "step": 6480 }, { "epoch": 0.17795167490389896, "grad_norm": 0.4130730628967285, "learning_rate": 1.962276246863276e-05, "loss": 0.5762, "step": 6481 }, { "epoch": 0.17797913234486545, "grad_norm": 0.37875789403915405, "learning_rate": 1.9622644952561233e-05, "loss": 0.5641, "step": 6482 }, { "epoch": 0.17800658978583195, "grad_norm": 0.4934975802898407, "learning_rate": 1.9622527418540404e-05, "loss": 0.5819, "step": 6483 }, { "epoch": 0.17803404722679847, "grad_norm": 0.36091047525405884, "learning_rate": 1.9622409866570492e-05, "loss": 0.5, "step": 6484 }, { "epoch": 0.17806150466776496, "grad_norm": 0.33989667892456055, "learning_rate": 1.962229229665171e-05, "loss": 0.5643, "step": 6485 }, { "epoch": 0.17808896210873146, "grad_norm": 0.3599645793437958, "learning_rate": 1.9622174708784284e-05, "loss": 0.5775, "step": 6486 }, { "epoch": 0.17811641954969798, "grad_norm": 0.7547571063041687, "learning_rate": 1.9622057102968434e-05, "loss": 0.5833, "step": 6487 }, { "epoch": 0.17814387699066447, "grad_norm": 0.3046639561653137, "learning_rate": 1.9621939479204373e-05, "loss": 0.549, "step": 6488 }, { "epoch": 0.17817133443163097, "grad_norm": 0.32650133967399597, "learning_rate": 1.9621821837492324e-05, "loss": 0.4548, "step": 6489 }, { "epoch": 0.17819879187259746, "grad_norm": 0.3586108088493347, "learning_rate": 1.9621704177832507e-05, "loss": 0.5622, "step": 6490 }, { "epoch": 0.17822624931356398, "grad_norm": 0.36318275332450867, "learning_rate": 1.9621586500225143e-05, "loss": 0.4764, "step": 6491 }, { "epoch": 0.17825370675453048, "grad_norm": 0.4827343225479126, "learning_rate": 1.9621468804670445e-05, "loss": 0.5939, "step": 6492 }, { "epoch": 0.17828116419549697, "grad_norm": 0.39023643732070923, "learning_rate": 1.962135109116864e-05, "loss": 0.512, "step": 6493 }, { "epoch": 0.1783086216364635, "grad_norm": 0.3939274549484253, "learning_rate": 1.9621233359719946e-05, "loss": 0.5771, "step": 6494 }, { "epoch": 0.17833607907743, "grad_norm": 0.36213183403015137, "learning_rate": 1.9621115610324578e-05, "loss": 0.4758, "step": 6495 }, { "epoch": 0.17836353651839648, "grad_norm": 0.3655412495136261, "learning_rate": 1.9620997842982757e-05, "loss": 0.5309, "step": 6496 }, { "epoch": 0.17839099395936298, "grad_norm": 0.36725351214408875, "learning_rate": 1.962088005769471e-05, "loss": 0.5387, "step": 6497 }, { "epoch": 0.1784184514003295, "grad_norm": 0.3786783516407013, "learning_rate": 1.9620762254460647e-05, "loss": 0.4109, "step": 6498 }, { "epoch": 0.178445908841296, "grad_norm": 0.3747915029525757, "learning_rate": 1.9620644433280792e-05, "loss": 0.5941, "step": 6499 }, { "epoch": 0.17847336628226249, "grad_norm": 0.3261842131614685, "learning_rate": 1.9620526594155365e-05, "loss": 0.5169, "step": 6500 }, { "epoch": 0.178500823723229, "grad_norm": 0.37986063957214355, "learning_rate": 1.9620408737084586e-05, "loss": 0.6116, "step": 6501 }, { "epoch": 0.1785282811641955, "grad_norm": 0.4959363341331482, "learning_rate": 1.9620290862068675e-05, "loss": 0.5989, "step": 6502 }, { "epoch": 0.178555738605162, "grad_norm": 0.32684022188186646, "learning_rate": 1.962017296910785e-05, "loss": 0.4652, "step": 6503 }, { "epoch": 0.1785831960461285, "grad_norm": 0.2913042902946472, "learning_rate": 1.962005505820233e-05, "loss": 0.4077, "step": 6504 }, { "epoch": 0.178610653487095, "grad_norm": 0.34130430221557617, "learning_rate": 1.961993712935234e-05, "loss": 0.4612, "step": 6505 }, { "epoch": 0.1786381109280615, "grad_norm": 0.3453182876110077, "learning_rate": 1.9619819182558094e-05, "loss": 0.4867, "step": 6506 }, { "epoch": 0.178665568369028, "grad_norm": 0.3485899567604065, "learning_rate": 1.9619701217819815e-05, "loss": 0.4725, "step": 6507 }, { "epoch": 0.17869302580999452, "grad_norm": 0.3775183856487274, "learning_rate": 1.9619583235137726e-05, "loss": 0.5559, "step": 6508 }, { "epoch": 0.17872048325096102, "grad_norm": 0.37497255206108093, "learning_rate": 1.9619465234512044e-05, "loss": 0.5232, "step": 6509 }, { "epoch": 0.1787479406919275, "grad_norm": 0.3948402404785156, "learning_rate": 1.9619347215942985e-05, "loss": 0.5916, "step": 6510 }, { "epoch": 0.178775398132894, "grad_norm": 0.37442898750305176, "learning_rate": 1.9619229179430777e-05, "loss": 0.4675, "step": 6511 }, { "epoch": 0.17880285557386053, "grad_norm": 0.34123048186302185, "learning_rate": 1.9619111124975635e-05, "loss": 0.5515, "step": 6512 }, { "epoch": 0.17883031301482702, "grad_norm": 0.3985118567943573, "learning_rate": 1.9618993052577782e-05, "loss": 0.5369, "step": 6513 }, { "epoch": 0.1788577704557935, "grad_norm": 0.33410462737083435, "learning_rate": 1.9618874962237436e-05, "loss": 0.4894, "step": 6514 }, { "epoch": 0.17888522789676004, "grad_norm": 0.3095585107803345, "learning_rate": 1.9618756853954818e-05, "loss": 0.4385, "step": 6515 }, { "epoch": 0.17891268533772653, "grad_norm": 0.3615621030330658, "learning_rate": 1.9618638727730152e-05, "loss": 0.5585, "step": 6516 }, { "epoch": 0.17894014277869302, "grad_norm": 0.3658291697502136, "learning_rate": 1.961852058356365e-05, "loss": 0.4886, "step": 6517 }, { "epoch": 0.17896760021965952, "grad_norm": 0.38632744550704956, "learning_rate": 1.961840242145554e-05, "loss": 0.5957, "step": 6518 }, { "epoch": 0.17899505766062604, "grad_norm": 0.34505167603492737, "learning_rate": 1.9618284241406038e-05, "loss": 0.507, "step": 6519 }, { "epoch": 0.17902251510159253, "grad_norm": 0.47317299246788025, "learning_rate": 1.9618166043415367e-05, "loss": 0.5056, "step": 6520 }, { "epoch": 0.17904997254255903, "grad_norm": 0.5056530833244324, "learning_rate": 1.9618047827483745e-05, "loss": 0.5116, "step": 6521 }, { "epoch": 0.17907742998352555, "grad_norm": 0.41631048917770386, "learning_rate": 1.96179295936114e-05, "loss": 0.4989, "step": 6522 }, { "epoch": 0.17910488742449204, "grad_norm": 0.3865056037902832, "learning_rate": 1.9617811341798537e-05, "loss": 0.4802, "step": 6523 }, { "epoch": 0.17913234486545854, "grad_norm": 0.42819109559059143, "learning_rate": 1.9617693072045395e-05, "loss": 0.5655, "step": 6524 }, { "epoch": 0.17915980230642503, "grad_norm": 0.38726502656936646, "learning_rate": 1.9617574784352183e-05, "loss": 0.5293, "step": 6525 }, { "epoch": 0.17918725974739155, "grad_norm": 0.3588806390762329, "learning_rate": 1.961745647871912e-05, "loss": 0.5258, "step": 6526 }, { "epoch": 0.17921471718835805, "grad_norm": 0.3579312562942505, "learning_rate": 1.9617338155146437e-05, "loss": 0.5274, "step": 6527 }, { "epoch": 0.17924217462932454, "grad_norm": 0.4317275583744049, "learning_rate": 1.9617219813634344e-05, "loss": 0.5147, "step": 6528 }, { "epoch": 0.17926963207029104, "grad_norm": 0.41484734416007996, "learning_rate": 1.961710145418307e-05, "loss": 0.5511, "step": 6529 }, { "epoch": 0.17929708951125756, "grad_norm": 0.32726767659187317, "learning_rate": 1.9616983076792833e-05, "loss": 0.5408, "step": 6530 }, { "epoch": 0.17932454695222405, "grad_norm": 0.4116527736186981, "learning_rate": 1.961686468146385e-05, "loss": 0.5183, "step": 6531 }, { "epoch": 0.17935200439319054, "grad_norm": 0.33590108156204224, "learning_rate": 1.961674626819635e-05, "loss": 0.5396, "step": 6532 }, { "epoch": 0.17937946183415707, "grad_norm": 0.32384952902793884, "learning_rate": 1.9616627836990543e-05, "loss": 0.496, "step": 6533 }, { "epoch": 0.17940691927512356, "grad_norm": 0.3532908856868744, "learning_rate": 1.9616509387846658e-05, "loss": 0.4062, "step": 6534 }, { "epoch": 0.17943437671609005, "grad_norm": 0.3925130069255829, "learning_rate": 1.9616390920764912e-05, "loss": 0.5593, "step": 6535 }, { "epoch": 0.17946183415705655, "grad_norm": 0.4175868332386017, "learning_rate": 1.9616272435745533e-05, "loss": 0.6246, "step": 6536 }, { "epoch": 0.17948929159802307, "grad_norm": 0.34742704033851624, "learning_rate": 1.9616153932788733e-05, "loss": 0.4864, "step": 6537 }, { "epoch": 0.17951674903898956, "grad_norm": 0.3763229250907898, "learning_rate": 1.9616035411894737e-05, "loss": 0.5038, "step": 6538 }, { "epoch": 0.17954420647995606, "grad_norm": 0.37009817361831665, "learning_rate": 1.9615916873063767e-05, "loss": 0.5427, "step": 6539 }, { "epoch": 0.17957166392092258, "grad_norm": 0.3447690010070801, "learning_rate": 1.9615798316296045e-05, "loss": 0.5838, "step": 6540 }, { "epoch": 0.17959912136188907, "grad_norm": 0.46372726559638977, "learning_rate": 1.9615679741591784e-05, "loss": 0.5603, "step": 6541 }, { "epoch": 0.17962657880285557, "grad_norm": 0.3795663118362427, "learning_rate": 1.9615561148951214e-05, "loss": 0.5842, "step": 6542 }, { "epoch": 0.17965403624382206, "grad_norm": 0.3541017472743988, "learning_rate": 1.9615442538374555e-05, "loss": 0.5125, "step": 6543 }, { "epoch": 0.17968149368478858, "grad_norm": 0.4066811203956604, "learning_rate": 1.9615323909862027e-05, "loss": 0.5624, "step": 6544 }, { "epoch": 0.17970895112575508, "grad_norm": 0.3183562457561493, "learning_rate": 1.9615205263413852e-05, "loss": 0.5093, "step": 6545 }, { "epoch": 0.17973640856672157, "grad_norm": 0.41035863757133484, "learning_rate": 1.9615086599030245e-05, "loss": 0.5609, "step": 6546 }, { "epoch": 0.1797638660076881, "grad_norm": 0.46291038393974304, "learning_rate": 1.9614967916711437e-05, "loss": 0.5101, "step": 6547 }, { "epoch": 0.1797913234486546, "grad_norm": 0.4031343460083008, "learning_rate": 1.9614849216457645e-05, "loss": 0.5251, "step": 6548 }, { "epoch": 0.17981878088962108, "grad_norm": 0.37913015484809875, "learning_rate": 1.961473049826909e-05, "loss": 0.6316, "step": 6549 }, { "epoch": 0.17984623833058758, "grad_norm": 0.36164912581443787, "learning_rate": 1.9614611762145996e-05, "loss": 0.5409, "step": 6550 }, { "epoch": 0.1798736957715541, "grad_norm": 0.4459293484687805, "learning_rate": 1.9614493008088576e-05, "loss": 0.4943, "step": 6551 }, { "epoch": 0.1799011532125206, "grad_norm": 0.39832809567451477, "learning_rate": 1.9614374236097063e-05, "loss": 0.6195, "step": 6552 }, { "epoch": 0.1799286106534871, "grad_norm": 0.37687790393829346, "learning_rate": 1.9614255446171674e-05, "loss": 0.5681, "step": 6553 }, { "epoch": 0.1799560680944536, "grad_norm": 0.4239349365234375, "learning_rate": 1.961413663831263e-05, "loss": 0.6174, "step": 6554 }, { "epoch": 0.1799835255354201, "grad_norm": 0.38683775067329407, "learning_rate": 1.9614017812520148e-05, "loss": 0.5497, "step": 6555 }, { "epoch": 0.1800109829763866, "grad_norm": 0.3462676703929901, "learning_rate": 1.9613898968794462e-05, "loss": 0.5197, "step": 6556 }, { "epoch": 0.1800384404173531, "grad_norm": 0.36432063579559326, "learning_rate": 1.961378010713578e-05, "loss": 0.5348, "step": 6557 }, { "epoch": 0.1800658978583196, "grad_norm": 0.35698938369750977, "learning_rate": 1.961366122754433e-05, "loss": 0.5294, "step": 6558 }, { "epoch": 0.1800933552992861, "grad_norm": 0.3782150149345398, "learning_rate": 1.9613542330020336e-05, "loss": 0.4948, "step": 6559 }, { "epoch": 0.1801208127402526, "grad_norm": 0.36652326583862305, "learning_rate": 1.9613423414564017e-05, "loss": 0.5846, "step": 6560 }, { "epoch": 0.18014827018121912, "grad_norm": 0.4745424687862396, "learning_rate": 1.9613304481175594e-05, "loss": 0.6733, "step": 6561 }, { "epoch": 0.18017572762218562, "grad_norm": 0.49931764602661133, "learning_rate": 1.961318552985529e-05, "loss": 0.5284, "step": 6562 }, { "epoch": 0.1802031850631521, "grad_norm": 0.37175244092941284, "learning_rate": 1.9613066560603326e-05, "loss": 0.535, "step": 6563 }, { "epoch": 0.1802306425041186, "grad_norm": 0.36032170057296753, "learning_rate": 1.9612947573419925e-05, "loss": 0.5767, "step": 6564 }, { "epoch": 0.18025809994508513, "grad_norm": 0.37396153807640076, "learning_rate": 1.961282856830531e-05, "loss": 0.4551, "step": 6565 }, { "epoch": 0.18028555738605162, "grad_norm": 0.40200379490852356, "learning_rate": 1.9612709545259703e-05, "loss": 0.5484, "step": 6566 }, { "epoch": 0.18031301482701811, "grad_norm": 0.44111600518226624, "learning_rate": 1.9612590504283324e-05, "loss": 0.4964, "step": 6567 }, { "epoch": 0.18034047226798464, "grad_norm": 0.42541831731796265, "learning_rate": 1.9612471445376395e-05, "loss": 0.5271, "step": 6568 }, { "epoch": 0.18036792970895113, "grad_norm": 0.4291318655014038, "learning_rate": 1.9612352368539137e-05, "loss": 0.5871, "step": 6569 }, { "epoch": 0.18039538714991762, "grad_norm": 0.336080402135849, "learning_rate": 1.9612233273771774e-05, "loss": 0.4928, "step": 6570 }, { "epoch": 0.18042284459088412, "grad_norm": 0.3522418141365051, "learning_rate": 1.9612114161074533e-05, "loss": 0.4903, "step": 6571 }, { "epoch": 0.18045030203185064, "grad_norm": 0.37974119186401367, "learning_rate": 1.9611995030447627e-05, "loss": 0.4994, "step": 6572 }, { "epoch": 0.18047775947281713, "grad_norm": 0.833219587802887, "learning_rate": 1.961187588189128e-05, "loss": 0.573, "step": 6573 }, { "epoch": 0.18050521691378363, "grad_norm": 0.4648749530315399, "learning_rate": 1.9611756715405722e-05, "loss": 0.5346, "step": 6574 }, { "epoch": 0.18053267435475015, "grad_norm": 0.31090807914733887, "learning_rate": 1.9611637530991167e-05, "loss": 0.541, "step": 6575 }, { "epoch": 0.18056013179571664, "grad_norm": 0.6276656985282898, "learning_rate": 1.961151832864784e-05, "loss": 0.559, "step": 6576 }, { "epoch": 0.18058758923668314, "grad_norm": 0.3842560350894928, "learning_rate": 1.9611399108375962e-05, "loss": 0.5556, "step": 6577 }, { "epoch": 0.18061504667764963, "grad_norm": 0.35249221324920654, "learning_rate": 1.9611279870175763e-05, "loss": 0.5701, "step": 6578 }, { "epoch": 0.18064250411861615, "grad_norm": 0.3619592785835266, "learning_rate": 1.9611160614047455e-05, "loss": 0.4796, "step": 6579 }, { "epoch": 0.18066996155958265, "grad_norm": 0.37015992403030396, "learning_rate": 1.9611041339991267e-05, "loss": 0.5812, "step": 6580 }, { "epoch": 0.18069741900054914, "grad_norm": 0.44630226492881775, "learning_rate": 1.9610922048007418e-05, "loss": 0.5477, "step": 6581 }, { "epoch": 0.18072487644151566, "grad_norm": 0.4043484032154083, "learning_rate": 1.9610802738096132e-05, "loss": 0.5643, "step": 6582 }, { "epoch": 0.18075233388248216, "grad_norm": 0.3628946840763092, "learning_rate": 1.9610683410257635e-05, "loss": 0.5816, "step": 6583 }, { "epoch": 0.18077979132344865, "grad_norm": 0.4177703261375427, "learning_rate": 1.961056406449214e-05, "loss": 0.5619, "step": 6584 }, { "epoch": 0.18080724876441515, "grad_norm": 0.47375360131263733, "learning_rate": 1.9610444700799883e-05, "loss": 0.562, "step": 6585 }, { "epoch": 0.18083470620538167, "grad_norm": 0.40963634848594666, "learning_rate": 1.9610325319181075e-05, "loss": 0.5467, "step": 6586 }, { "epoch": 0.18086216364634816, "grad_norm": 0.3774314522743225, "learning_rate": 1.9610205919635944e-05, "loss": 0.5998, "step": 6587 }, { "epoch": 0.18088962108731466, "grad_norm": 0.4255989193916321, "learning_rate": 1.9610086502164708e-05, "loss": 0.5866, "step": 6588 }, { "epoch": 0.18091707852828118, "grad_norm": 0.38105064630508423, "learning_rate": 1.9609967066767596e-05, "loss": 0.5948, "step": 6589 }, { "epoch": 0.18094453596924767, "grad_norm": 0.3226192593574524, "learning_rate": 1.960984761344483e-05, "loss": 0.5672, "step": 6590 }, { "epoch": 0.18097199341021417, "grad_norm": 0.33413857221603394, "learning_rate": 1.9609728142196628e-05, "loss": 0.5268, "step": 6591 }, { "epoch": 0.18099945085118066, "grad_norm": 0.3308675289154053, "learning_rate": 1.960960865302322e-05, "loss": 0.5193, "step": 6592 }, { "epoch": 0.18102690829214718, "grad_norm": 0.3591359555721283, "learning_rate": 1.9609489145924823e-05, "loss": 0.5873, "step": 6593 }, { "epoch": 0.18105436573311368, "grad_norm": 0.35315337777137756, "learning_rate": 1.960936962090166e-05, "loss": 0.5802, "step": 6594 }, { "epoch": 0.18108182317408017, "grad_norm": 0.380507230758667, "learning_rate": 1.9609250077953955e-05, "loss": 0.5422, "step": 6595 }, { "epoch": 0.18110928061504666, "grad_norm": 0.3569798171520233, "learning_rate": 1.9609130517081936e-05, "loss": 0.4997, "step": 6596 }, { "epoch": 0.18113673805601319, "grad_norm": 0.32641392946243286, "learning_rate": 1.9609010938285818e-05, "loss": 0.5357, "step": 6597 }, { "epoch": 0.18116419549697968, "grad_norm": 0.4071410894393921, "learning_rate": 1.9608891341565833e-05, "loss": 0.5302, "step": 6598 }, { "epoch": 0.18119165293794617, "grad_norm": 0.4328916072845459, "learning_rate": 1.9608771726922193e-05, "loss": 0.5877, "step": 6599 }, { "epoch": 0.1812191103789127, "grad_norm": 0.3360719382762909, "learning_rate": 1.960865209435513e-05, "loss": 0.6043, "step": 6600 }, { "epoch": 0.1812465678198792, "grad_norm": 0.4246176779270172, "learning_rate": 1.960853244386486e-05, "loss": 0.5568, "step": 6601 }, { "epoch": 0.18127402526084568, "grad_norm": 0.3815915286540985, "learning_rate": 1.9608412775451616e-05, "loss": 0.5772, "step": 6602 }, { "epoch": 0.18130148270181218, "grad_norm": 0.3796115219593048, "learning_rate": 1.9608293089115612e-05, "loss": 0.4624, "step": 6603 }, { "epoch": 0.1813289401427787, "grad_norm": 0.408968448638916, "learning_rate": 1.9608173384857076e-05, "loss": 0.4709, "step": 6604 }, { "epoch": 0.1813563975837452, "grad_norm": 0.3403010070323944, "learning_rate": 1.960805366267623e-05, "loss": 0.517, "step": 6605 }, { "epoch": 0.1813838550247117, "grad_norm": 0.3846514821052551, "learning_rate": 1.9607933922573296e-05, "loss": 0.562, "step": 6606 }, { "epoch": 0.1814113124656782, "grad_norm": 0.33959582448005676, "learning_rate": 1.9607814164548504e-05, "loss": 0.4731, "step": 6607 }, { "epoch": 0.1814387699066447, "grad_norm": 0.32449421286582947, "learning_rate": 1.9607694388602066e-05, "loss": 0.5145, "step": 6608 }, { "epoch": 0.1814662273476112, "grad_norm": 0.7601893544197083, "learning_rate": 1.9607574594734213e-05, "loss": 0.5012, "step": 6609 }, { "epoch": 0.1814936847885777, "grad_norm": 0.3440118134021759, "learning_rate": 1.9607454782945166e-05, "loss": 0.4708, "step": 6610 }, { "epoch": 0.1815211422295442, "grad_norm": 0.36644262075424194, "learning_rate": 1.9607334953235152e-05, "loss": 0.387, "step": 6611 }, { "epoch": 0.1815485996705107, "grad_norm": 0.368236243724823, "learning_rate": 1.960721510560439e-05, "loss": 0.547, "step": 6612 }, { "epoch": 0.1815760571114772, "grad_norm": 0.3474389910697937, "learning_rate": 1.960709524005311e-05, "loss": 0.5262, "step": 6613 }, { "epoch": 0.18160351455244372, "grad_norm": 0.37778881192207336, "learning_rate": 1.9606975356581527e-05, "loss": 0.4798, "step": 6614 }, { "epoch": 0.18163097199341022, "grad_norm": 0.34977850317955017, "learning_rate": 1.960685545518987e-05, "loss": 0.5012, "step": 6615 }, { "epoch": 0.1816584294343767, "grad_norm": 0.3307528495788574, "learning_rate": 1.960673553587836e-05, "loss": 0.4949, "step": 6616 }, { "epoch": 0.1816858868753432, "grad_norm": 0.42286619544029236, "learning_rate": 1.9606615598647224e-05, "loss": 0.467, "step": 6617 }, { "epoch": 0.18171334431630973, "grad_norm": 0.37802568078041077, "learning_rate": 1.9606495643496682e-05, "loss": 0.5148, "step": 6618 }, { "epoch": 0.18174080175727622, "grad_norm": 0.33488020300865173, "learning_rate": 1.960637567042696e-05, "loss": 0.5194, "step": 6619 }, { "epoch": 0.18176825919824272, "grad_norm": 0.3429882526397705, "learning_rate": 1.9606255679438282e-05, "loss": 0.5369, "step": 6620 }, { "epoch": 0.18179571663920924, "grad_norm": 0.43393421173095703, "learning_rate": 1.9606135670530872e-05, "loss": 0.5289, "step": 6621 }, { "epoch": 0.18182317408017573, "grad_norm": 0.3814091682434082, "learning_rate": 1.960601564370495e-05, "loss": 0.5861, "step": 6622 }, { "epoch": 0.18185063152114223, "grad_norm": 0.3789820671081543, "learning_rate": 1.9605895598960746e-05, "loss": 0.5173, "step": 6623 }, { "epoch": 0.18187808896210872, "grad_norm": 0.3469187915325165, "learning_rate": 1.960577553629848e-05, "loss": 0.4075, "step": 6624 }, { "epoch": 0.18190554640307524, "grad_norm": 0.3718814253807068, "learning_rate": 1.9605655455718377e-05, "loss": 0.4723, "step": 6625 }, { "epoch": 0.18193300384404174, "grad_norm": 0.38630571961402893, "learning_rate": 1.960553535722066e-05, "loss": 0.5965, "step": 6626 }, { "epoch": 0.18196046128500823, "grad_norm": 0.4062061607837677, "learning_rate": 1.9605415240805556e-05, "loss": 0.4945, "step": 6627 }, { "epoch": 0.18198791872597475, "grad_norm": 0.3063819110393524, "learning_rate": 1.9605295106473285e-05, "loss": 0.467, "step": 6628 }, { "epoch": 0.18201537616694125, "grad_norm": 0.37494996190071106, "learning_rate": 1.9605174954224074e-05, "loss": 0.6977, "step": 6629 }, { "epoch": 0.18204283360790774, "grad_norm": 0.3210318088531494, "learning_rate": 1.9605054784058145e-05, "loss": 0.4574, "step": 6630 }, { "epoch": 0.18207029104887423, "grad_norm": 0.4004106819629669, "learning_rate": 1.960493459597572e-05, "loss": 0.5671, "step": 6631 }, { "epoch": 0.18209774848984076, "grad_norm": 0.4476136565208435, "learning_rate": 1.9604814389977032e-05, "loss": 0.5584, "step": 6632 }, { "epoch": 0.18212520593080725, "grad_norm": 0.34398287534713745, "learning_rate": 1.96046941660623e-05, "loss": 0.4851, "step": 6633 }, { "epoch": 0.18215266337177374, "grad_norm": 0.4284767508506775, "learning_rate": 1.9604573924231745e-05, "loss": 0.5355, "step": 6634 }, { "epoch": 0.18218012081274026, "grad_norm": 0.34190985560417175, "learning_rate": 1.9604453664485594e-05, "loss": 0.5136, "step": 6635 }, { "epoch": 0.18220757825370676, "grad_norm": 0.3798222839832306, "learning_rate": 1.9604333386824072e-05, "loss": 0.4169, "step": 6636 }, { "epoch": 0.18223503569467325, "grad_norm": 0.3595619201660156, "learning_rate": 1.9604213091247404e-05, "loss": 0.598, "step": 6637 }, { "epoch": 0.18226249313563975, "grad_norm": 0.4001055061817169, "learning_rate": 1.9604092777755812e-05, "loss": 0.6254, "step": 6638 }, { "epoch": 0.18228995057660627, "grad_norm": 0.3701055645942688, "learning_rate": 1.9603972446349525e-05, "loss": 0.5338, "step": 6639 }, { "epoch": 0.18231740801757276, "grad_norm": 0.4521777033805847, "learning_rate": 1.9603852097028758e-05, "loss": 0.4755, "step": 6640 }, { "epoch": 0.18234486545853926, "grad_norm": 0.3986315131187439, "learning_rate": 1.9603731729793746e-05, "loss": 0.5989, "step": 6641 }, { "epoch": 0.18237232289950578, "grad_norm": 0.39256805181503296, "learning_rate": 1.960361134464471e-05, "loss": 0.5795, "step": 6642 }, { "epoch": 0.18239978034047227, "grad_norm": 0.586524248123169, "learning_rate": 1.960349094158187e-05, "loss": 0.5527, "step": 6643 }, { "epoch": 0.18242723778143877, "grad_norm": 0.3523845374584198, "learning_rate": 1.9603370520605457e-05, "loss": 0.4464, "step": 6644 }, { "epoch": 0.18245469522240526, "grad_norm": 0.35071879625320435, "learning_rate": 1.960325008171569e-05, "loss": 0.533, "step": 6645 }, { "epoch": 0.18248215266337178, "grad_norm": 0.3633764088153839, "learning_rate": 1.96031296249128e-05, "loss": 0.5729, "step": 6646 }, { "epoch": 0.18250961010433828, "grad_norm": 0.37920546531677246, "learning_rate": 1.9603009150197007e-05, "loss": 0.5671, "step": 6647 }, { "epoch": 0.18253706754530477, "grad_norm": 0.3546657860279083, "learning_rate": 1.960288865756854e-05, "loss": 0.5268, "step": 6648 }, { "epoch": 0.1825645249862713, "grad_norm": 0.3585810363292694, "learning_rate": 1.9602768147027617e-05, "loss": 0.5103, "step": 6649 }, { "epoch": 0.1825919824272378, "grad_norm": 0.50678950548172, "learning_rate": 1.9602647618574466e-05, "loss": 0.6073, "step": 6650 }, { "epoch": 0.18261943986820428, "grad_norm": 0.37133702635765076, "learning_rate": 1.9602527072209313e-05, "loss": 0.5358, "step": 6651 }, { "epoch": 0.18264689730917077, "grad_norm": 0.6753482222557068, "learning_rate": 1.9602406507932384e-05, "loss": 0.4233, "step": 6652 }, { "epoch": 0.1826743547501373, "grad_norm": 0.3662383556365967, "learning_rate": 1.96022859257439e-05, "loss": 0.6022, "step": 6653 }, { "epoch": 0.1827018121911038, "grad_norm": 0.37322214245796204, "learning_rate": 1.960216532564409e-05, "loss": 0.5604, "step": 6654 }, { "epoch": 0.18272926963207028, "grad_norm": 0.3577675223350525, "learning_rate": 1.9602044707633174e-05, "loss": 0.5433, "step": 6655 }, { "epoch": 0.1827567270730368, "grad_norm": 0.3609428107738495, "learning_rate": 1.9601924071711383e-05, "loss": 0.5977, "step": 6656 }, { "epoch": 0.1827841845140033, "grad_norm": 0.31481990218162537, "learning_rate": 1.9601803417878938e-05, "loss": 0.4566, "step": 6657 }, { "epoch": 0.1828116419549698, "grad_norm": 0.4299263656139374, "learning_rate": 1.9601682746136066e-05, "loss": 0.6007, "step": 6658 }, { "epoch": 0.1828390993959363, "grad_norm": 0.37897035479545593, "learning_rate": 1.960156205648299e-05, "loss": 0.4972, "step": 6659 }, { "epoch": 0.1828665568369028, "grad_norm": 0.3300919234752655, "learning_rate": 1.960144134891994e-05, "loss": 0.514, "step": 6660 }, { "epoch": 0.1828940142778693, "grad_norm": 0.40170082449913025, "learning_rate": 1.960132062344713e-05, "loss": 0.4872, "step": 6661 }, { "epoch": 0.1829214717188358, "grad_norm": 0.3318944275379181, "learning_rate": 1.96011998800648e-05, "loss": 0.5076, "step": 6662 }, { "epoch": 0.1829489291598023, "grad_norm": 0.38876667618751526, "learning_rate": 1.9601079118773162e-05, "loss": 0.4879, "step": 6663 }, { "epoch": 0.18297638660076881, "grad_norm": 0.37409788370132446, "learning_rate": 1.9600958339572452e-05, "loss": 0.5259, "step": 6664 }, { "epoch": 0.1830038440417353, "grad_norm": 0.3315151333808899, "learning_rate": 1.9600837542462886e-05, "loss": 0.5197, "step": 6665 }, { "epoch": 0.1830313014827018, "grad_norm": 0.6452762484550476, "learning_rate": 1.9600716727444696e-05, "loss": 0.598, "step": 6666 }, { "epoch": 0.18305875892366832, "grad_norm": 0.3477502167224884, "learning_rate": 1.960059589451811e-05, "loss": 0.5288, "step": 6667 }, { "epoch": 0.18308621636463482, "grad_norm": 0.3696436285972595, "learning_rate": 1.9600475043683344e-05, "loss": 0.5245, "step": 6668 }, { "epoch": 0.1831136738056013, "grad_norm": 0.37194985151290894, "learning_rate": 1.9600354174940625e-05, "loss": 0.451, "step": 6669 }, { "epoch": 0.1831411312465678, "grad_norm": 0.40646710991859436, "learning_rate": 1.9600233288290184e-05, "loss": 0.527, "step": 6670 }, { "epoch": 0.18316858868753433, "grad_norm": 0.3552835285663605, "learning_rate": 1.9600112383732243e-05, "loss": 0.5376, "step": 6671 }, { "epoch": 0.18319604612850082, "grad_norm": 0.3644622564315796, "learning_rate": 1.9599991461267032e-05, "loss": 0.5348, "step": 6672 }, { "epoch": 0.18322350356946732, "grad_norm": 0.35038691759109497, "learning_rate": 1.959987052089477e-05, "loss": 0.5235, "step": 6673 }, { "epoch": 0.18325096101043384, "grad_norm": 0.36963406205177307, "learning_rate": 1.9599749562615684e-05, "loss": 0.6213, "step": 6674 }, { "epoch": 0.18327841845140033, "grad_norm": 0.39860793948173523, "learning_rate": 1.9599628586430004e-05, "loss": 0.4727, "step": 6675 }, { "epoch": 0.18330587589236683, "grad_norm": 0.36047565937042236, "learning_rate": 1.9599507592337953e-05, "loss": 0.5209, "step": 6676 }, { "epoch": 0.18333333333333332, "grad_norm": 0.3773106336593628, "learning_rate": 1.9599386580339754e-05, "loss": 0.5616, "step": 6677 }, { "epoch": 0.18336079077429984, "grad_norm": 0.4006684422492981, "learning_rate": 1.9599265550435634e-05, "loss": 0.58, "step": 6678 }, { "epoch": 0.18338824821526634, "grad_norm": 0.37613150477409363, "learning_rate": 1.9599144502625824e-05, "loss": 0.5614, "step": 6679 }, { "epoch": 0.18341570565623283, "grad_norm": 0.46269986033439636, "learning_rate": 1.9599023436910544e-05, "loss": 0.5156, "step": 6680 }, { "epoch": 0.18344316309719935, "grad_norm": 0.5378171801567078, "learning_rate": 1.9598902353290024e-05, "loss": 0.4752, "step": 6681 }, { "epoch": 0.18347062053816585, "grad_norm": 0.3583143651485443, "learning_rate": 1.9598781251764485e-05, "loss": 0.5165, "step": 6682 }, { "epoch": 0.18349807797913234, "grad_norm": 0.34920769929885864, "learning_rate": 1.9598660132334156e-05, "loss": 0.4413, "step": 6683 }, { "epoch": 0.18352553542009883, "grad_norm": 0.3512398302555084, "learning_rate": 1.9598538994999262e-05, "loss": 0.5468, "step": 6684 }, { "epoch": 0.18355299286106536, "grad_norm": 0.37508997321128845, "learning_rate": 1.959841783976003e-05, "loss": 0.5553, "step": 6685 }, { "epoch": 0.18358045030203185, "grad_norm": 0.441540390253067, "learning_rate": 1.9598296666616685e-05, "loss": 0.4951, "step": 6686 }, { "epoch": 0.18360790774299834, "grad_norm": 0.32495248317718506, "learning_rate": 1.9598175475569453e-05, "loss": 0.5471, "step": 6687 }, { "epoch": 0.18363536518396487, "grad_norm": 0.3588579595088959, "learning_rate": 1.959805426661856e-05, "loss": 0.5766, "step": 6688 }, { "epoch": 0.18366282262493136, "grad_norm": 0.30517950654029846, "learning_rate": 1.9597933039764234e-05, "loss": 0.4042, "step": 6689 }, { "epoch": 0.18369028006589785, "grad_norm": 0.35640642046928406, "learning_rate": 1.9597811795006697e-05, "loss": 0.5535, "step": 6690 }, { "epoch": 0.18371773750686435, "grad_norm": 0.3426697850227356, "learning_rate": 1.959769053234618e-05, "loss": 0.5005, "step": 6691 }, { "epoch": 0.18374519494783087, "grad_norm": 0.3562275767326355, "learning_rate": 1.9597569251782907e-05, "loss": 0.5053, "step": 6692 }, { "epoch": 0.18377265238879736, "grad_norm": 0.3417372703552246, "learning_rate": 1.9597447953317105e-05, "loss": 0.5253, "step": 6693 }, { "epoch": 0.18380010982976386, "grad_norm": 0.35134291648864746, "learning_rate": 1.9597326636948994e-05, "loss": 0.5072, "step": 6694 }, { "epoch": 0.18382756727073038, "grad_norm": 0.40764763951301575, "learning_rate": 1.959720530267881e-05, "loss": 0.4605, "step": 6695 }, { "epoch": 0.18385502471169687, "grad_norm": 0.4107544720172882, "learning_rate": 1.9597083950506776e-05, "loss": 0.5272, "step": 6696 }, { "epoch": 0.18388248215266337, "grad_norm": 0.3437783718109131, "learning_rate": 1.9596962580433113e-05, "loss": 0.4842, "step": 6697 }, { "epoch": 0.18390993959362986, "grad_norm": 0.3789130449295044, "learning_rate": 1.9596841192458056e-05, "loss": 0.5946, "step": 6698 }, { "epoch": 0.18393739703459638, "grad_norm": 0.3519393801689148, "learning_rate": 1.959671978658183e-05, "loss": 0.5054, "step": 6699 }, { "epoch": 0.18396485447556288, "grad_norm": 0.3724072277545929, "learning_rate": 1.9596598362804655e-05, "loss": 0.511, "step": 6700 }, { "epoch": 0.18399231191652937, "grad_norm": 0.3829008638858795, "learning_rate": 1.9596476921126757e-05, "loss": 0.5145, "step": 6701 }, { "epoch": 0.1840197693574959, "grad_norm": 0.3328899145126343, "learning_rate": 1.959635546154837e-05, "loss": 0.5134, "step": 6702 }, { "epoch": 0.1840472267984624, "grad_norm": 0.37000131607055664, "learning_rate": 1.959623398406972e-05, "loss": 0.5689, "step": 6703 }, { "epoch": 0.18407468423942888, "grad_norm": 0.39848434925079346, "learning_rate": 1.9596112488691027e-05, "loss": 0.5255, "step": 6704 }, { "epoch": 0.18410214168039538, "grad_norm": 0.39122459292411804, "learning_rate": 1.9595990975412523e-05, "loss": 0.6085, "step": 6705 }, { "epoch": 0.1841295991213619, "grad_norm": 0.37932607531547546, "learning_rate": 1.9595869444234434e-05, "loss": 0.5976, "step": 6706 }, { "epoch": 0.1841570565623284, "grad_norm": 0.39840081334114075, "learning_rate": 1.9595747895156987e-05, "loss": 0.5048, "step": 6707 }, { "epoch": 0.18418451400329489, "grad_norm": 0.4509585201740265, "learning_rate": 1.9595626328180405e-05, "loss": 0.5943, "step": 6708 }, { "epoch": 0.1842119714442614, "grad_norm": 0.3566094934940338, "learning_rate": 1.9595504743304916e-05, "loss": 0.5614, "step": 6709 }, { "epoch": 0.1842394288852279, "grad_norm": 0.38042911887168884, "learning_rate": 1.959538314053075e-05, "loss": 0.4996, "step": 6710 }, { "epoch": 0.1842668863261944, "grad_norm": 0.5622090101242065, "learning_rate": 1.9595261519858132e-05, "loss": 0.5194, "step": 6711 }, { "epoch": 0.1842943437671609, "grad_norm": 0.33112847805023193, "learning_rate": 1.959513988128729e-05, "loss": 0.5218, "step": 6712 }, { "epoch": 0.1843218012081274, "grad_norm": 0.3660420775413513, "learning_rate": 1.959501822481845e-05, "loss": 0.493, "step": 6713 }, { "epoch": 0.1843492586490939, "grad_norm": 0.35983988642692566, "learning_rate": 1.9594896550451835e-05, "loss": 0.5816, "step": 6714 }, { "epoch": 0.1843767160900604, "grad_norm": 0.33008211851119995, "learning_rate": 1.9594774858187676e-05, "loss": 0.5586, "step": 6715 }, { "epoch": 0.18440417353102692, "grad_norm": 0.3390534818172455, "learning_rate": 1.9594653148026203e-05, "loss": 0.4777, "step": 6716 }, { "epoch": 0.18443163097199342, "grad_norm": 0.360431969165802, "learning_rate": 1.959453141996764e-05, "loss": 0.5251, "step": 6717 }, { "epoch": 0.1844590884129599, "grad_norm": 0.3713293671607971, "learning_rate": 1.959440967401221e-05, "loss": 0.5239, "step": 6718 }, { "epoch": 0.1844865458539264, "grad_norm": 0.3547488749027252, "learning_rate": 1.9594287910160145e-05, "loss": 0.5391, "step": 6719 }, { "epoch": 0.18451400329489293, "grad_norm": 0.3685436248779297, "learning_rate": 1.9594166128411672e-05, "loss": 0.5793, "step": 6720 }, { "epoch": 0.18454146073585942, "grad_norm": 0.3591940402984619, "learning_rate": 1.9594044328767015e-05, "loss": 0.5288, "step": 6721 }, { "epoch": 0.1845689181768259, "grad_norm": 0.30893898010253906, "learning_rate": 1.9593922511226406e-05, "loss": 0.4966, "step": 6722 }, { "epoch": 0.18459637561779244, "grad_norm": 0.3758653402328491, "learning_rate": 1.959380067579007e-05, "loss": 0.5593, "step": 6723 }, { "epoch": 0.18462383305875893, "grad_norm": 0.3378475606441498, "learning_rate": 1.959367882245823e-05, "loss": 0.5111, "step": 6724 }, { "epoch": 0.18465129049972542, "grad_norm": 0.35130152106285095, "learning_rate": 1.959355695123112e-05, "loss": 0.523, "step": 6725 }, { "epoch": 0.18467874794069192, "grad_norm": 0.32864147424697876, "learning_rate": 1.9593435062108962e-05, "loss": 0.5466, "step": 6726 }, { "epoch": 0.18470620538165844, "grad_norm": 0.37267106771469116, "learning_rate": 1.959331315509199e-05, "loss": 0.537, "step": 6727 }, { "epoch": 0.18473366282262493, "grad_norm": 0.38670453429222107, "learning_rate": 1.9593191230180424e-05, "loss": 0.5722, "step": 6728 }, { "epoch": 0.18476112026359143, "grad_norm": 0.3192785978317261, "learning_rate": 1.9593069287374497e-05, "loss": 0.4945, "step": 6729 }, { "epoch": 0.18478857770455792, "grad_norm": 0.3513008952140808, "learning_rate": 1.9592947326674426e-05, "loss": 0.5073, "step": 6730 }, { "epoch": 0.18481603514552444, "grad_norm": 0.38058778643608093, "learning_rate": 1.9592825348080456e-05, "loss": 0.5119, "step": 6731 }, { "epoch": 0.18484349258649094, "grad_norm": 0.3522785007953644, "learning_rate": 1.95927033515928e-05, "loss": 0.5045, "step": 6732 }, { "epoch": 0.18487095002745743, "grad_norm": 0.40654295682907104, "learning_rate": 1.959258133721169e-05, "loss": 0.5941, "step": 6733 }, { "epoch": 0.18489840746842395, "grad_norm": 0.4667496085166931, "learning_rate": 1.959245930493736e-05, "loss": 0.5367, "step": 6734 }, { "epoch": 0.18492586490939045, "grad_norm": 0.4088931679725647, "learning_rate": 1.9592337254770028e-05, "loss": 0.5446, "step": 6735 }, { "epoch": 0.18495332235035694, "grad_norm": 0.3413527309894562, "learning_rate": 1.9592215186709924e-05, "loss": 0.543, "step": 6736 }, { "epoch": 0.18498077979132344, "grad_norm": 0.411981463432312, "learning_rate": 1.959209310075728e-05, "loss": 0.6398, "step": 6737 }, { "epoch": 0.18500823723228996, "grad_norm": 0.4007175862789154, "learning_rate": 1.959197099691232e-05, "loss": 0.5316, "step": 6738 }, { "epoch": 0.18503569467325645, "grad_norm": 1.1644816398620605, "learning_rate": 1.9591848875175274e-05, "loss": 0.5249, "step": 6739 }, { "epoch": 0.18506315211422295, "grad_norm": 0.41457629203796387, "learning_rate": 1.9591726735546364e-05, "loss": 0.5607, "step": 6740 }, { "epoch": 0.18509060955518947, "grad_norm": 0.37312108278274536, "learning_rate": 1.9591604578025825e-05, "loss": 0.581, "step": 6741 }, { "epoch": 0.18511806699615596, "grad_norm": 0.37413543462753296, "learning_rate": 1.959148240261388e-05, "loss": 0.5594, "step": 6742 }, { "epoch": 0.18514552443712246, "grad_norm": 0.34128063917160034, "learning_rate": 1.9591360209310766e-05, "loss": 0.5457, "step": 6743 }, { "epoch": 0.18517298187808895, "grad_norm": 0.37640324234962463, "learning_rate": 1.95912379981167e-05, "loss": 0.5524, "step": 6744 }, { "epoch": 0.18520043931905547, "grad_norm": 0.39246833324432373, "learning_rate": 1.9591115769031913e-05, "loss": 0.5822, "step": 6745 }, { "epoch": 0.18522789676002197, "grad_norm": 0.4212758541107178, "learning_rate": 1.959099352205663e-05, "loss": 0.5571, "step": 6746 }, { "epoch": 0.18525535420098846, "grad_norm": 0.33912530541419983, "learning_rate": 1.9590871257191088e-05, "loss": 0.4844, "step": 6747 }, { "epoch": 0.18528281164195498, "grad_norm": 0.3508831858634949, "learning_rate": 1.959074897443551e-05, "loss": 0.4508, "step": 6748 }, { "epoch": 0.18531026908292147, "grad_norm": 0.3268592953681946, "learning_rate": 1.9590626673790125e-05, "loss": 0.5058, "step": 6749 }, { "epoch": 0.18533772652388797, "grad_norm": 0.33366236090660095, "learning_rate": 1.9590504355255158e-05, "loss": 0.4969, "step": 6750 }, { "epoch": 0.18536518396485446, "grad_norm": 0.3933591842651367, "learning_rate": 1.9590382018830842e-05, "loss": 0.5265, "step": 6751 }, { "epoch": 0.18539264140582098, "grad_norm": 0.3208042085170746, "learning_rate": 1.95902596645174e-05, "loss": 0.3922, "step": 6752 }, { "epoch": 0.18542009884678748, "grad_norm": 0.35445287823677063, "learning_rate": 1.9590137292315067e-05, "loss": 0.5696, "step": 6753 }, { "epoch": 0.18544755628775397, "grad_norm": 0.38244447112083435, "learning_rate": 1.959001490222406e-05, "loss": 0.5438, "step": 6754 }, { "epoch": 0.1854750137287205, "grad_norm": 0.3867049217224121, "learning_rate": 1.958989249424462e-05, "loss": 0.5239, "step": 6755 }, { "epoch": 0.185502471169687, "grad_norm": 0.3641013503074646, "learning_rate": 1.9589770068376966e-05, "loss": 0.4816, "step": 6756 }, { "epoch": 0.18552992861065348, "grad_norm": 0.385949045419693, "learning_rate": 1.9589647624621335e-05, "loss": 0.5934, "step": 6757 }, { "epoch": 0.18555738605161998, "grad_norm": 0.3570936620235443, "learning_rate": 1.9589525162977946e-05, "loss": 0.5603, "step": 6758 }, { "epoch": 0.1855848434925865, "grad_norm": 0.42774534225463867, "learning_rate": 1.9589402683447034e-05, "loss": 0.5201, "step": 6759 }, { "epoch": 0.185612300933553, "grad_norm": 0.3746121823787689, "learning_rate": 1.9589280186028826e-05, "loss": 0.4315, "step": 6760 }, { "epoch": 0.1856397583745195, "grad_norm": 0.3705534040927887, "learning_rate": 1.958915767072355e-05, "loss": 0.5596, "step": 6761 }, { "epoch": 0.185667215815486, "grad_norm": 0.43801939487457275, "learning_rate": 1.9589035137531433e-05, "loss": 0.6298, "step": 6762 }, { "epoch": 0.1856946732564525, "grad_norm": 0.3556654751300812, "learning_rate": 1.9588912586452705e-05, "loss": 0.5103, "step": 6763 }, { "epoch": 0.185722130697419, "grad_norm": 0.36112627387046814, "learning_rate": 1.9588790017487595e-05, "loss": 0.5946, "step": 6764 }, { "epoch": 0.1857495881383855, "grad_norm": 0.351998507976532, "learning_rate": 1.958866743063633e-05, "loss": 0.506, "step": 6765 }, { "epoch": 0.185777045579352, "grad_norm": 0.3649598956108093, "learning_rate": 1.9588544825899144e-05, "loss": 0.5496, "step": 6766 }, { "epoch": 0.1858045030203185, "grad_norm": 0.349015474319458, "learning_rate": 1.958842220327626e-05, "loss": 0.5857, "step": 6767 }, { "epoch": 0.185831960461285, "grad_norm": 0.555534303188324, "learning_rate": 1.9588299562767905e-05, "loss": 0.5371, "step": 6768 }, { "epoch": 0.18585941790225152, "grad_norm": 0.40861591696739197, "learning_rate": 1.958817690437431e-05, "loss": 0.5323, "step": 6769 }, { "epoch": 0.18588687534321802, "grad_norm": 0.37565916776657104, "learning_rate": 1.958805422809571e-05, "loss": 0.5789, "step": 6770 }, { "epoch": 0.1859143327841845, "grad_norm": 0.32951074838638306, "learning_rate": 1.958793153393233e-05, "loss": 0.527, "step": 6771 }, { "epoch": 0.185941790225151, "grad_norm": 0.3565272092819214, "learning_rate": 1.9587808821884394e-05, "loss": 0.5697, "step": 6772 }, { "epoch": 0.18596924766611753, "grad_norm": 0.7992799878120422, "learning_rate": 1.9587686091952132e-05, "loss": 0.522, "step": 6773 }, { "epoch": 0.18599670510708402, "grad_norm": 0.3551509380340576, "learning_rate": 1.958756334413578e-05, "loss": 0.4987, "step": 6774 }, { "epoch": 0.18602416254805051, "grad_norm": 0.3657396137714386, "learning_rate": 1.958744057843556e-05, "loss": 0.5902, "step": 6775 }, { "epoch": 0.18605161998901704, "grad_norm": 0.3613986670970917, "learning_rate": 1.9587317794851704e-05, "loss": 0.5304, "step": 6776 }, { "epoch": 0.18607907742998353, "grad_norm": 0.7609866261482239, "learning_rate": 1.958719499338444e-05, "loss": 0.5423, "step": 6777 }, { "epoch": 0.18610653487095002, "grad_norm": 0.43186846375465393, "learning_rate": 1.9587072174034e-05, "loss": 0.5101, "step": 6778 }, { "epoch": 0.18613399231191652, "grad_norm": 0.3713214099407196, "learning_rate": 1.9586949336800606e-05, "loss": 0.5974, "step": 6779 }, { "epoch": 0.18616144975288304, "grad_norm": 0.7383443713188171, "learning_rate": 1.9586826481684494e-05, "loss": 0.5713, "step": 6780 }, { "epoch": 0.18618890719384953, "grad_norm": 0.527539849281311, "learning_rate": 1.958670360868589e-05, "loss": 0.6145, "step": 6781 }, { "epoch": 0.18621636463481603, "grad_norm": 0.3888605237007141, "learning_rate": 1.958658071780503e-05, "loss": 0.6001, "step": 6782 }, { "epoch": 0.18624382207578255, "grad_norm": 0.34986022114753723, "learning_rate": 1.958645780904213e-05, "loss": 0.5309, "step": 6783 }, { "epoch": 0.18627127951674904, "grad_norm": 0.44435322284698486, "learning_rate": 1.958633488239743e-05, "loss": 0.5877, "step": 6784 }, { "epoch": 0.18629873695771554, "grad_norm": 0.3781454861164093, "learning_rate": 1.9586211937871155e-05, "loss": 0.5375, "step": 6785 }, { "epoch": 0.18632619439868203, "grad_norm": 0.3649424612522125, "learning_rate": 1.9586088975463534e-05, "loss": 0.5073, "step": 6786 }, { "epoch": 0.18635365183964855, "grad_norm": 0.34935101866722107, "learning_rate": 1.9585965995174798e-05, "loss": 0.4495, "step": 6787 }, { "epoch": 0.18638110928061505, "grad_norm": 0.3906448185443878, "learning_rate": 1.9585842997005175e-05, "loss": 0.565, "step": 6788 }, { "epoch": 0.18640856672158154, "grad_norm": 0.3720589280128479, "learning_rate": 1.9585719980954897e-05, "loss": 0.5435, "step": 6789 }, { "epoch": 0.18643602416254806, "grad_norm": 0.36351558566093445, "learning_rate": 1.958559694702419e-05, "loss": 0.5594, "step": 6790 }, { "epoch": 0.18646348160351456, "grad_norm": 0.31544938683509827, "learning_rate": 1.958547389521329e-05, "loss": 0.4473, "step": 6791 }, { "epoch": 0.18649093904448105, "grad_norm": 0.39252379536628723, "learning_rate": 1.9585350825522417e-05, "loss": 0.5522, "step": 6792 }, { "epoch": 0.18651839648544755, "grad_norm": 0.3611988425254822, "learning_rate": 1.9585227737951808e-05, "loss": 0.5863, "step": 6793 }, { "epoch": 0.18654585392641407, "grad_norm": 0.36890915036201477, "learning_rate": 1.9585104632501688e-05, "loss": 0.5302, "step": 6794 }, { "epoch": 0.18657331136738056, "grad_norm": 0.38087841868400574, "learning_rate": 1.9584981509172292e-05, "loss": 0.5253, "step": 6795 }, { "epoch": 0.18660076880834706, "grad_norm": 0.3283732533454895, "learning_rate": 1.9584858367963843e-05, "loss": 0.5147, "step": 6796 }, { "epoch": 0.18662822624931355, "grad_norm": 0.399641215801239, "learning_rate": 1.9584735208876575e-05, "loss": 0.4525, "step": 6797 }, { "epoch": 0.18665568369028007, "grad_norm": 0.3760416805744171, "learning_rate": 1.9584612031910714e-05, "loss": 0.4979, "step": 6798 }, { "epoch": 0.18668314113124657, "grad_norm": 0.4187765121459961, "learning_rate": 1.95844888370665e-05, "loss": 0.538, "step": 6799 }, { "epoch": 0.18671059857221306, "grad_norm": 0.4055293798446655, "learning_rate": 1.958436562434415e-05, "loss": 0.6165, "step": 6800 }, { "epoch": 0.18673805601317958, "grad_norm": 0.3894589841365814, "learning_rate": 1.95842423937439e-05, "loss": 0.5605, "step": 6801 }, { "epoch": 0.18676551345414608, "grad_norm": 0.3786599338054657, "learning_rate": 1.9584119145265977e-05, "loss": 0.5537, "step": 6802 }, { "epoch": 0.18679297089511257, "grad_norm": 0.42943623661994934, "learning_rate": 1.9583995878910613e-05, "loss": 0.5497, "step": 6803 }, { "epoch": 0.18682042833607906, "grad_norm": 0.40102970600128174, "learning_rate": 1.958387259467804e-05, "loss": 0.5631, "step": 6804 }, { "epoch": 0.18684788577704559, "grad_norm": 0.36541756987571716, "learning_rate": 1.9583749292568484e-05, "loss": 0.5318, "step": 6805 }, { "epoch": 0.18687534321801208, "grad_norm": 0.3666214644908905, "learning_rate": 1.958362597258218e-05, "loss": 0.553, "step": 6806 }, { "epoch": 0.18690280065897857, "grad_norm": 0.5189576148986816, "learning_rate": 1.9583502634719354e-05, "loss": 0.5501, "step": 6807 }, { "epoch": 0.1869302580999451, "grad_norm": 0.34714779257774353, "learning_rate": 1.9583379278980237e-05, "loss": 0.4784, "step": 6808 }, { "epoch": 0.1869577155409116, "grad_norm": 0.38772812485694885, "learning_rate": 1.9583255905365057e-05, "loss": 0.5522, "step": 6809 }, { "epoch": 0.18698517298187808, "grad_norm": 0.39513394236564636, "learning_rate": 1.9583132513874045e-05, "loss": 0.545, "step": 6810 }, { "epoch": 0.18701263042284458, "grad_norm": 0.3575737774372101, "learning_rate": 1.9583009104507437e-05, "loss": 0.539, "step": 6811 }, { "epoch": 0.1870400878638111, "grad_norm": 0.4082486927509308, "learning_rate": 1.9582885677265455e-05, "loss": 0.5866, "step": 6812 }, { "epoch": 0.1870675453047776, "grad_norm": 0.38610196113586426, "learning_rate": 1.958276223214833e-05, "loss": 0.6093, "step": 6813 }, { "epoch": 0.1870950027457441, "grad_norm": 0.4153306484222412, "learning_rate": 1.9582638769156303e-05, "loss": 0.6159, "step": 6814 }, { "epoch": 0.1871224601867106, "grad_norm": 0.3909022808074951, "learning_rate": 1.958251528828959e-05, "loss": 0.5829, "step": 6815 }, { "epoch": 0.1871499176276771, "grad_norm": 0.3837563097476959, "learning_rate": 1.958239178954843e-05, "loss": 0.5611, "step": 6816 }, { "epoch": 0.1871773750686436, "grad_norm": 0.38818663358688354, "learning_rate": 1.958226827293305e-05, "loss": 0.5532, "step": 6817 }, { "epoch": 0.1872048325096101, "grad_norm": 0.3570185601711273, "learning_rate": 1.958214473844368e-05, "loss": 0.519, "step": 6818 }, { "epoch": 0.1872322899505766, "grad_norm": 0.39876121282577515, "learning_rate": 1.9582021186080556e-05, "loss": 0.5541, "step": 6819 }, { "epoch": 0.1872597473915431, "grad_norm": 0.3844282627105713, "learning_rate": 1.9581897615843898e-05, "loss": 0.5838, "step": 6820 }, { "epoch": 0.1872872048325096, "grad_norm": 0.40826576948165894, "learning_rate": 1.9581774027733947e-05, "loss": 0.5071, "step": 6821 }, { "epoch": 0.18731466227347612, "grad_norm": 0.3945906162261963, "learning_rate": 1.958165042175093e-05, "loss": 0.4977, "step": 6822 }, { "epoch": 0.18734211971444262, "grad_norm": 0.3626886010169983, "learning_rate": 1.9581526797895075e-05, "loss": 0.4982, "step": 6823 }, { "epoch": 0.1873695771554091, "grad_norm": 0.441259503364563, "learning_rate": 1.9581403156166618e-05, "loss": 0.5975, "step": 6824 }, { "epoch": 0.1873970345963756, "grad_norm": 0.3822338581085205, "learning_rate": 1.9581279496565782e-05, "loss": 0.5875, "step": 6825 }, { "epoch": 0.18742449203734213, "grad_norm": 0.36095020174980164, "learning_rate": 1.95811558190928e-05, "loss": 0.6033, "step": 6826 }, { "epoch": 0.18745194947830862, "grad_norm": 0.4864550232887268, "learning_rate": 1.9581032123747907e-05, "loss": 0.5338, "step": 6827 }, { "epoch": 0.18747940691927512, "grad_norm": 0.3608875870704651, "learning_rate": 1.9580908410531333e-05, "loss": 0.5929, "step": 6828 }, { "epoch": 0.18750686436024164, "grad_norm": 0.5104290246963501, "learning_rate": 1.9580784679443305e-05, "loss": 0.4842, "step": 6829 }, { "epoch": 0.18753432180120813, "grad_norm": 0.3639203608036041, "learning_rate": 1.9580660930484057e-05, "loss": 0.5468, "step": 6830 }, { "epoch": 0.18756177924217463, "grad_norm": 0.4148023724555969, "learning_rate": 1.9580537163653817e-05, "loss": 0.5649, "step": 6831 }, { "epoch": 0.18758923668314112, "grad_norm": 0.3456474542617798, "learning_rate": 1.958041337895282e-05, "loss": 0.5843, "step": 6832 }, { "epoch": 0.18761669412410764, "grad_norm": 0.3674614429473877, "learning_rate": 1.9580289576381292e-05, "loss": 0.5368, "step": 6833 }, { "epoch": 0.18764415156507414, "grad_norm": 0.38668695092201233, "learning_rate": 1.9580165755939467e-05, "loss": 0.5621, "step": 6834 }, { "epoch": 0.18767160900604063, "grad_norm": 0.3668268322944641, "learning_rate": 1.958004191762757e-05, "loss": 0.4718, "step": 6835 }, { "epoch": 0.18769906644700715, "grad_norm": 0.36033424735069275, "learning_rate": 1.9579918061445845e-05, "loss": 0.5545, "step": 6836 }, { "epoch": 0.18772652388797365, "grad_norm": 0.48583075404167175, "learning_rate": 1.9579794187394512e-05, "loss": 0.6826, "step": 6837 }, { "epoch": 0.18775398132894014, "grad_norm": 0.3291285037994385, "learning_rate": 1.9579670295473806e-05, "loss": 0.5118, "step": 6838 }, { "epoch": 0.18778143876990663, "grad_norm": 0.3703378736972809, "learning_rate": 1.9579546385683957e-05, "loss": 0.6616, "step": 6839 }, { "epoch": 0.18780889621087316, "grad_norm": 0.38657477498054504, "learning_rate": 1.95794224580252e-05, "loss": 0.5587, "step": 6840 }, { "epoch": 0.18783635365183965, "grad_norm": 0.36306384205818176, "learning_rate": 1.9579298512497758e-05, "loss": 0.5605, "step": 6841 }, { "epoch": 0.18786381109280614, "grad_norm": 0.35069623589515686, "learning_rate": 1.9579174549101867e-05, "loss": 0.5088, "step": 6842 }, { "epoch": 0.18789126853377267, "grad_norm": 0.40265753865242004, "learning_rate": 1.957905056783776e-05, "loss": 0.5499, "step": 6843 }, { "epoch": 0.18791872597473916, "grad_norm": 0.29761531949043274, "learning_rate": 1.9578926568705667e-05, "loss": 0.4424, "step": 6844 }, { "epoch": 0.18794618341570565, "grad_norm": 0.4590238928794861, "learning_rate": 1.9578802551705818e-05, "loss": 0.5865, "step": 6845 }, { "epoch": 0.18797364085667215, "grad_norm": 0.3460122346878052, "learning_rate": 1.9578678516838443e-05, "loss": 0.4932, "step": 6846 }, { "epoch": 0.18800109829763867, "grad_norm": 0.34033727645874023, "learning_rate": 1.957855446410378e-05, "loss": 0.4905, "step": 6847 }, { "epoch": 0.18802855573860516, "grad_norm": 0.34941911697387695, "learning_rate": 1.9578430393502052e-05, "loss": 0.5207, "step": 6848 }, { "epoch": 0.18805601317957166, "grad_norm": 0.35217875242233276, "learning_rate": 1.9578306305033493e-05, "loss": 0.4919, "step": 6849 }, { "epoch": 0.18808347062053818, "grad_norm": 0.3916667699813843, "learning_rate": 1.957818219869834e-05, "loss": 0.5571, "step": 6850 }, { "epoch": 0.18811092806150467, "grad_norm": 0.3776398301124573, "learning_rate": 1.957805807449682e-05, "loss": 0.5623, "step": 6851 }, { "epoch": 0.18813838550247117, "grad_norm": 0.38278159499168396, "learning_rate": 1.957793393242916e-05, "loss": 0.474, "step": 6852 }, { "epoch": 0.18816584294343766, "grad_norm": 0.3364105224609375, "learning_rate": 1.95778097724956e-05, "loss": 0.5273, "step": 6853 }, { "epoch": 0.18819330038440418, "grad_norm": 0.37100768089294434, "learning_rate": 1.9577685594696367e-05, "loss": 0.5498, "step": 6854 }, { "epoch": 0.18822075782537068, "grad_norm": 0.34563401341438293, "learning_rate": 1.9577561399031692e-05, "loss": 0.5326, "step": 6855 }, { "epoch": 0.18824821526633717, "grad_norm": 0.43228229880332947, "learning_rate": 1.9577437185501812e-05, "loss": 0.5766, "step": 6856 }, { "epoch": 0.1882756727073037, "grad_norm": 0.3489593267440796, "learning_rate": 1.957731295410695e-05, "loss": 0.5441, "step": 6857 }, { "epoch": 0.1883031301482702, "grad_norm": 0.35675302147865295, "learning_rate": 1.9577188704847344e-05, "loss": 0.5205, "step": 6858 }, { "epoch": 0.18833058758923668, "grad_norm": 0.3516134023666382, "learning_rate": 1.9577064437723226e-05, "loss": 0.4966, "step": 6859 }, { "epoch": 0.18835804503020318, "grad_norm": 0.39622431993484497, "learning_rate": 1.9576940152734828e-05, "loss": 0.6286, "step": 6860 }, { "epoch": 0.1883855024711697, "grad_norm": 0.4257540702819824, "learning_rate": 1.957681584988238e-05, "loss": 0.5941, "step": 6861 }, { "epoch": 0.1884129599121362, "grad_norm": 0.36149847507476807, "learning_rate": 1.957669152916611e-05, "loss": 0.5653, "step": 6862 }, { "epoch": 0.18844041735310268, "grad_norm": 0.36363133788108826, "learning_rate": 1.9576567190586257e-05, "loss": 0.4977, "step": 6863 }, { "epoch": 0.18846787479406918, "grad_norm": 0.36831215023994446, "learning_rate": 1.9576442834143047e-05, "loss": 0.5491, "step": 6864 }, { "epoch": 0.1884953322350357, "grad_norm": 0.40143856406211853, "learning_rate": 1.9576318459836715e-05, "loss": 0.62, "step": 6865 }, { "epoch": 0.1885227896760022, "grad_norm": 0.39163464307785034, "learning_rate": 1.9576194067667495e-05, "loss": 0.6031, "step": 6866 }, { "epoch": 0.1885502471169687, "grad_norm": 0.3671136498451233, "learning_rate": 1.9576069657635614e-05, "loss": 0.5444, "step": 6867 }, { "epoch": 0.1885777045579352, "grad_norm": 0.4096602499485016, "learning_rate": 1.9575945229741305e-05, "loss": 0.5598, "step": 6868 }, { "epoch": 0.1886051619989017, "grad_norm": 0.4224018454551697, "learning_rate": 1.9575820783984807e-05, "loss": 0.5479, "step": 6869 }, { "epoch": 0.1886326194398682, "grad_norm": 0.506557822227478, "learning_rate": 1.9575696320366345e-05, "loss": 0.5493, "step": 6870 }, { "epoch": 0.1886600768808347, "grad_norm": 0.36617833375930786, "learning_rate": 1.957557183888615e-05, "loss": 0.581, "step": 6871 }, { "epoch": 0.18868753432180121, "grad_norm": 0.3464500606060028, "learning_rate": 1.957544733954446e-05, "loss": 0.4837, "step": 6872 }, { "epoch": 0.1887149917627677, "grad_norm": 0.3941195607185364, "learning_rate": 1.957532282234151e-05, "loss": 0.5904, "step": 6873 }, { "epoch": 0.1887424492037342, "grad_norm": 0.3146474063396454, "learning_rate": 1.957519828727752e-05, "loss": 0.4368, "step": 6874 }, { "epoch": 0.18876990664470072, "grad_norm": 0.3891701400279999, "learning_rate": 1.957507373435273e-05, "loss": 0.602, "step": 6875 }, { "epoch": 0.18879736408566722, "grad_norm": 0.3965761661529541, "learning_rate": 1.9574949163567373e-05, "loss": 0.4592, "step": 6876 }, { "epoch": 0.1888248215266337, "grad_norm": 0.353738397359848, "learning_rate": 1.9574824574921682e-05, "loss": 0.5322, "step": 6877 }, { "epoch": 0.1888522789676002, "grad_norm": 0.4667591452598572, "learning_rate": 1.9574699968415884e-05, "loss": 0.5746, "step": 6878 }, { "epoch": 0.18887973640856673, "grad_norm": 0.3589186668395996, "learning_rate": 1.9574575344050213e-05, "loss": 0.5249, "step": 6879 }, { "epoch": 0.18890719384953322, "grad_norm": 0.42578983306884766, "learning_rate": 1.9574450701824907e-05, "loss": 0.6439, "step": 6880 }, { "epoch": 0.18893465129049972, "grad_norm": 0.34998226165771484, "learning_rate": 1.957432604174019e-05, "loss": 0.5326, "step": 6881 }, { "epoch": 0.18896210873146624, "grad_norm": 0.35209110379219055, "learning_rate": 1.9574201363796303e-05, "loss": 0.4835, "step": 6882 }, { "epoch": 0.18898956617243273, "grad_norm": 0.37573570013046265, "learning_rate": 1.9574076667993473e-05, "loss": 0.5019, "step": 6883 }, { "epoch": 0.18901702361339923, "grad_norm": 0.48956596851348877, "learning_rate": 1.9573951954331935e-05, "loss": 0.5878, "step": 6884 }, { "epoch": 0.18904448105436572, "grad_norm": 0.3590899109840393, "learning_rate": 1.9573827222811925e-05, "loss": 0.5709, "step": 6885 }, { "epoch": 0.18907193849533224, "grad_norm": 0.32039064168930054, "learning_rate": 1.9573702473433664e-05, "loss": 0.4467, "step": 6886 }, { "epoch": 0.18909939593629874, "grad_norm": 0.3242812752723694, "learning_rate": 1.9573577706197398e-05, "loss": 0.5435, "step": 6887 }, { "epoch": 0.18912685337726523, "grad_norm": 0.34487634897232056, "learning_rate": 1.957345292110335e-05, "loss": 0.5011, "step": 6888 }, { "epoch": 0.18915431081823175, "grad_norm": 0.419750839471817, "learning_rate": 1.9573328118151756e-05, "loss": 0.6627, "step": 6889 }, { "epoch": 0.18918176825919825, "grad_norm": 0.3542127013206482, "learning_rate": 1.9573203297342855e-05, "loss": 0.5175, "step": 6890 }, { "epoch": 0.18920922570016474, "grad_norm": 0.7070826888084412, "learning_rate": 1.9573078458676873e-05, "loss": 0.4743, "step": 6891 }, { "epoch": 0.18923668314113123, "grad_norm": 0.35835543274879456, "learning_rate": 1.957295360215404e-05, "loss": 0.55, "step": 6892 }, { "epoch": 0.18926414058209776, "grad_norm": 0.3788932263851166, "learning_rate": 1.95728287277746e-05, "loss": 0.5016, "step": 6893 }, { "epoch": 0.18929159802306425, "grad_norm": 0.373090535402298, "learning_rate": 1.9572703835538776e-05, "loss": 0.4741, "step": 6894 }, { "epoch": 0.18931905546403074, "grad_norm": 0.3625165522098541, "learning_rate": 1.9572578925446805e-05, "loss": 0.4836, "step": 6895 }, { "epoch": 0.18934651290499727, "grad_norm": 0.3365139365196228, "learning_rate": 1.9572453997498915e-05, "loss": 0.5459, "step": 6896 }, { "epoch": 0.18937397034596376, "grad_norm": 0.3525559604167938, "learning_rate": 1.9572329051695344e-05, "loss": 0.5182, "step": 6897 }, { "epoch": 0.18940142778693025, "grad_norm": 0.3484532833099365, "learning_rate": 1.957220408803633e-05, "loss": 0.499, "step": 6898 }, { "epoch": 0.18942888522789675, "grad_norm": 0.3612794280052185, "learning_rate": 1.9572079106522092e-05, "loss": 0.4309, "step": 6899 }, { "epoch": 0.18945634266886327, "grad_norm": 0.4073112905025482, "learning_rate": 1.957195410715288e-05, "loss": 0.5053, "step": 6900 }, { "epoch": 0.18948380010982976, "grad_norm": 0.4449286460876465, "learning_rate": 1.9571829089928913e-05, "loss": 0.4792, "step": 6901 }, { "epoch": 0.18951125755079626, "grad_norm": 0.4298200011253357, "learning_rate": 1.957170405485043e-05, "loss": 0.5624, "step": 6902 }, { "epoch": 0.18953871499176278, "grad_norm": 0.4122362434864044, "learning_rate": 1.9571579001917666e-05, "loss": 0.5347, "step": 6903 }, { "epoch": 0.18956617243272927, "grad_norm": 0.4166927933692932, "learning_rate": 1.957145393113085e-05, "loss": 0.5836, "step": 6904 }, { "epoch": 0.18959362987369577, "grad_norm": 0.6806356906890869, "learning_rate": 1.9571328842490218e-05, "loss": 0.4892, "step": 6905 }, { "epoch": 0.18962108731466226, "grad_norm": 0.47306039929389954, "learning_rate": 1.9571203735996006e-05, "loss": 0.5874, "step": 6906 }, { "epoch": 0.18964854475562878, "grad_norm": 0.40287163853645325, "learning_rate": 1.957107861164844e-05, "loss": 0.675, "step": 6907 }, { "epoch": 0.18967600219659528, "grad_norm": 0.3965892791748047, "learning_rate": 1.957095346944776e-05, "loss": 0.5873, "step": 6908 }, { "epoch": 0.18970345963756177, "grad_norm": 0.36189836263656616, "learning_rate": 1.95708283093942e-05, "loss": 0.5078, "step": 6909 }, { "epoch": 0.1897309170785283, "grad_norm": 0.3958370089530945, "learning_rate": 1.9570703131487984e-05, "loss": 0.5232, "step": 6910 }, { "epoch": 0.1897583745194948, "grad_norm": 0.34994015097618103, "learning_rate": 1.9570577935729354e-05, "loss": 0.5146, "step": 6911 }, { "epoch": 0.18978583196046128, "grad_norm": 0.3360840976238251, "learning_rate": 1.9570452722118546e-05, "loss": 0.5117, "step": 6912 }, { "epoch": 0.18981328940142778, "grad_norm": 0.3502565622329712, "learning_rate": 1.9570327490655787e-05, "loss": 0.4983, "step": 6913 }, { "epoch": 0.1898407468423943, "grad_norm": 0.34775620698928833, "learning_rate": 1.957020224134131e-05, "loss": 0.5462, "step": 6914 }, { "epoch": 0.1898682042833608, "grad_norm": 0.32616209983825684, "learning_rate": 1.9570076974175352e-05, "loss": 0.524, "step": 6915 }, { "epoch": 0.18989566172432729, "grad_norm": 0.622235119342804, "learning_rate": 1.956995168915815e-05, "loss": 0.5386, "step": 6916 }, { "epoch": 0.1899231191652938, "grad_norm": 0.36962130665779114, "learning_rate": 1.956982638628993e-05, "loss": 0.6172, "step": 6917 }, { "epoch": 0.1899505766062603, "grad_norm": 0.356423556804657, "learning_rate": 1.956970106557093e-05, "loss": 0.5311, "step": 6918 }, { "epoch": 0.1899780340472268, "grad_norm": 0.43130388855934143, "learning_rate": 1.9569575727001384e-05, "loss": 0.5775, "step": 6919 }, { "epoch": 0.1900054914881933, "grad_norm": 0.38675811886787415, "learning_rate": 1.9569450370581525e-05, "loss": 0.546, "step": 6920 }, { "epoch": 0.1900329489291598, "grad_norm": 0.34962788224220276, "learning_rate": 1.9569324996311586e-05, "loss": 0.5709, "step": 6921 }, { "epoch": 0.1900604063701263, "grad_norm": 0.3248359262943268, "learning_rate": 1.95691996041918e-05, "loss": 0.5863, "step": 6922 }, { "epoch": 0.1900878638110928, "grad_norm": 0.3379427492618561, "learning_rate": 1.9569074194222406e-05, "loss": 0.5491, "step": 6923 }, { "epoch": 0.19011532125205932, "grad_norm": 0.40340539813041687, "learning_rate": 1.9568948766403633e-05, "loss": 0.5619, "step": 6924 }, { "epoch": 0.19014277869302582, "grad_norm": 0.4092773497104645, "learning_rate": 1.9568823320735718e-05, "loss": 0.5902, "step": 6925 }, { "epoch": 0.1901702361339923, "grad_norm": 0.33309510350227356, "learning_rate": 1.956869785721889e-05, "loss": 0.5837, "step": 6926 }, { "epoch": 0.1901976935749588, "grad_norm": 0.3484421670436859, "learning_rate": 1.956857237585339e-05, "loss": 0.5071, "step": 6927 }, { "epoch": 0.19022515101592533, "grad_norm": 0.364927738904953, "learning_rate": 1.956844687663945e-05, "loss": 0.4728, "step": 6928 }, { "epoch": 0.19025260845689182, "grad_norm": 0.3537469506263733, "learning_rate": 1.9568321359577298e-05, "loss": 0.5719, "step": 6929 }, { "epoch": 0.1902800658978583, "grad_norm": 0.3491513133049011, "learning_rate": 1.9568195824667174e-05, "loss": 0.5196, "step": 6930 }, { "epoch": 0.1903075233388248, "grad_norm": 0.4068663716316223, "learning_rate": 1.9568070271909314e-05, "loss": 0.5905, "step": 6931 }, { "epoch": 0.19033498077979133, "grad_norm": 0.3350856304168701, "learning_rate": 1.9567944701303947e-05, "loss": 0.5652, "step": 6932 }, { "epoch": 0.19036243822075782, "grad_norm": 0.3768492639064789, "learning_rate": 1.956781911285131e-05, "loss": 0.579, "step": 6933 }, { "epoch": 0.19038989566172432, "grad_norm": 0.34556999802589417, "learning_rate": 1.9567693506551632e-05, "loss": 0.4936, "step": 6934 }, { "epoch": 0.19041735310269084, "grad_norm": 0.37416672706604004, "learning_rate": 1.9567567882405157e-05, "loss": 0.567, "step": 6935 }, { "epoch": 0.19044481054365733, "grad_norm": 0.40244197845458984, "learning_rate": 1.9567442240412115e-05, "loss": 0.563, "step": 6936 }, { "epoch": 0.19047226798462383, "grad_norm": 0.3270292580127716, "learning_rate": 1.956731658057274e-05, "loss": 0.4459, "step": 6937 }, { "epoch": 0.19049972542559032, "grad_norm": 0.3339061737060547, "learning_rate": 1.956719090288726e-05, "loss": 0.5165, "step": 6938 }, { "epoch": 0.19052718286655684, "grad_norm": 0.3981926441192627, "learning_rate": 1.956706520735592e-05, "loss": 0.4263, "step": 6939 }, { "epoch": 0.19055464030752334, "grad_norm": 0.40066128969192505, "learning_rate": 1.9566939493978953e-05, "loss": 0.4971, "step": 6940 }, { "epoch": 0.19058209774848983, "grad_norm": 0.3356865644454956, "learning_rate": 1.9566813762756584e-05, "loss": 0.5175, "step": 6941 }, { "epoch": 0.19060955518945635, "grad_norm": 0.39213845133781433, "learning_rate": 1.9566688013689056e-05, "loss": 0.4426, "step": 6942 }, { "epoch": 0.19063701263042285, "grad_norm": 0.3634313941001892, "learning_rate": 1.9566562246776604e-05, "loss": 0.616, "step": 6943 }, { "epoch": 0.19066447007138934, "grad_norm": 0.39019617438316345, "learning_rate": 1.956643646201946e-05, "loss": 0.5721, "step": 6944 }, { "epoch": 0.19069192751235584, "grad_norm": 0.4079192280769348, "learning_rate": 1.9566310659417857e-05, "loss": 0.4959, "step": 6945 }, { "epoch": 0.19071938495332236, "grad_norm": 0.3635096848011017, "learning_rate": 1.956618483897203e-05, "loss": 0.4588, "step": 6946 }, { "epoch": 0.19074684239428885, "grad_norm": 0.4046266973018646, "learning_rate": 1.9566059000682217e-05, "loss": 0.542, "step": 6947 }, { "epoch": 0.19077429983525535, "grad_norm": 0.3891264498233795, "learning_rate": 1.956593314454865e-05, "loss": 0.52, "step": 6948 }, { "epoch": 0.19080175727622187, "grad_norm": 0.39161258935928345, "learning_rate": 1.9565807270571567e-05, "loss": 0.5359, "step": 6949 }, { "epoch": 0.19082921471718836, "grad_norm": 0.43789616227149963, "learning_rate": 1.95656813787512e-05, "loss": 0.5174, "step": 6950 }, { "epoch": 0.19085667215815486, "grad_norm": 0.37501177191734314, "learning_rate": 1.956555546908778e-05, "loss": 0.6246, "step": 6951 }, { "epoch": 0.19088412959912135, "grad_norm": 0.3913848400115967, "learning_rate": 1.956542954158155e-05, "loss": 0.5828, "step": 6952 }, { "epoch": 0.19091158704008787, "grad_norm": 0.4014701843261719, "learning_rate": 1.9565303596232738e-05, "loss": 0.5447, "step": 6953 }, { "epoch": 0.19093904448105437, "grad_norm": 0.3197363018989563, "learning_rate": 1.9565177633041587e-05, "loss": 0.4677, "step": 6954 }, { "epoch": 0.19096650192202086, "grad_norm": 0.34348398447036743, "learning_rate": 1.9565051652008323e-05, "loss": 0.5528, "step": 6955 }, { "epoch": 0.19099395936298738, "grad_norm": 0.34984290599823, "learning_rate": 1.9564925653133185e-05, "loss": 0.4813, "step": 6956 }, { "epoch": 0.19102141680395388, "grad_norm": 0.3776915371417999, "learning_rate": 1.9564799636416406e-05, "loss": 0.509, "step": 6957 }, { "epoch": 0.19104887424492037, "grad_norm": 0.3569590449333191, "learning_rate": 1.9564673601858226e-05, "loss": 0.5147, "step": 6958 }, { "epoch": 0.19107633168588686, "grad_norm": 0.3705546259880066, "learning_rate": 1.9564547549458876e-05, "loss": 0.5665, "step": 6959 }, { "epoch": 0.19110378912685339, "grad_norm": 0.3040412962436676, "learning_rate": 1.9564421479218593e-05, "loss": 0.5245, "step": 6960 }, { "epoch": 0.19113124656781988, "grad_norm": 0.36125874519348145, "learning_rate": 1.956429539113761e-05, "loss": 0.5273, "step": 6961 }, { "epoch": 0.19115870400878637, "grad_norm": 0.39060819149017334, "learning_rate": 1.9564169285216166e-05, "loss": 0.5504, "step": 6962 }, { "epoch": 0.1911861614497529, "grad_norm": 0.38899141550064087, "learning_rate": 1.956404316145449e-05, "loss": 0.5614, "step": 6963 }, { "epoch": 0.1912136188907194, "grad_norm": 0.3639388680458069, "learning_rate": 1.9563917019852827e-05, "loss": 0.5602, "step": 6964 }, { "epoch": 0.19124107633168588, "grad_norm": 0.3669624328613281, "learning_rate": 1.95637908604114e-05, "loss": 0.5448, "step": 6965 }, { "epoch": 0.19126853377265238, "grad_norm": 0.39606618881225586, "learning_rate": 1.956366468313045e-05, "loss": 0.5014, "step": 6966 }, { "epoch": 0.1912959912136189, "grad_norm": 0.3647729456424713, "learning_rate": 1.956353848801022e-05, "loss": 0.536, "step": 6967 }, { "epoch": 0.1913234486545854, "grad_norm": 0.4668254256248474, "learning_rate": 1.9563412275050933e-05, "loss": 0.5897, "step": 6968 }, { "epoch": 0.1913509060955519, "grad_norm": 0.45616406202316284, "learning_rate": 1.956328604425283e-05, "loss": 0.5883, "step": 6969 }, { "epoch": 0.1913783635365184, "grad_norm": 0.3435705006122589, "learning_rate": 1.956315979561615e-05, "loss": 0.4694, "step": 6970 }, { "epoch": 0.1914058209774849, "grad_norm": 0.40309232473373413, "learning_rate": 1.956303352914112e-05, "loss": 0.524, "step": 6971 }, { "epoch": 0.1914332784184514, "grad_norm": 0.3847871422767639, "learning_rate": 1.9562907244827983e-05, "loss": 0.5878, "step": 6972 }, { "epoch": 0.1914607358594179, "grad_norm": 0.37726402282714844, "learning_rate": 1.956278094267697e-05, "loss": 0.4876, "step": 6973 }, { "epoch": 0.1914881933003844, "grad_norm": 0.3669898509979248, "learning_rate": 1.9562654622688325e-05, "loss": 0.5974, "step": 6974 }, { "epoch": 0.1915156507413509, "grad_norm": 0.3792892396450043, "learning_rate": 1.956252828486227e-05, "loss": 0.4739, "step": 6975 }, { "epoch": 0.1915431081823174, "grad_norm": 0.4032416045665741, "learning_rate": 1.956240192919905e-05, "loss": 0.5378, "step": 6976 }, { "epoch": 0.19157056562328392, "grad_norm": 0.3781232237815857, "learning_rate": 1.9562275555698898e-05, "loss": 0.5348, "step": 6977 }, { "epoch": 0.19159802306425042, "grad_norm": 0.3404254913330078, "learning_rate": 1.956214916436205e-05, "loss": 0.4797, "step": 6978 }, { "epoch": 0.1916254805052169, "grad_norm": 0.37766364216804504, "learning_rate": 1.956202275518874e-05, "loss": 0.6121, "step": 6979 }, { "epoch": 0.1916529379461834, "grad_norm": 0.32225149869918823, "learning_rate": 1.956189632817921e-05, "loss": 0.5117, "step": 6980 }, { "epoch": 0.19168039538714993, "grad_norm": 0.45331841707229614, "learning_rate": 1.956176988333369e-05, "loss": 0.5688, "step": 6981 }, { "epoch": 0.19170785282811642, "grad_norm": 0.4000934064388275, "learning_rate": 1.9561643420652415e-05, "loss": 0.4954, "step": 6982 }, { "epoch": 0.19173531026908291, "grad_norm": 0.35431960225105286, "learning_rate": 1.9561516940135627e-05, "loss": 0.4943, "step": 6983 }, { "epoch": 0.19176276771004944, "grad_norm": 0.39247363805770874, "learning_rate": 1.9561390441783557e-05, "loss": 0.5184, "step": 6984 }, { "epoch": 0.19179022515101593, "grad_norm": 0.6845055818557739, "learning_rate": 1.956126392559644e-05, "loss": 0.513, "step": 6985 }, { "epoch": 0.19181768259198242, "grad_norm": 0.4050014615058899, "learning_rate": 1.9561137391574515e-05, "loss": 0.5543, "step": 6986 }, { "epoch": 0.19184514003294892, "grad_norm": 0.4189422130584717, "learning_rate": 1.956101083971802e-05, "loss": 0.6427, "step": 6987 }, { "epoch": 0.19187259747391544, "grad_norm": 0.3994913697242737, "learning_rate": 1.9560884270027186e-05, "loss": 0.5599, "step": 6988 }, { "epoch": 0.19190005491488193, "grad_norm": 0.3419458866119385, "learning_rate": 1.9560757682502252e-05, "loss": 0.5459, "step": 6989 }, { "epoch": 0.19192751235584843, "grad_norm": 0.37614187598228455, "learning_rate": 1.9560631077143456e-05, "loss": 0.6211, "step": 6990 }, { "epoch": 0.19195496979681495, "grad_norm": 0.4427298605442047, "learning_rate": 1.9560504453951026e-05, "loss": 0.5894, "step": 6991 }, { "epoch": 0.19198242723778144, "grad_norm": 0.3692069351673126, "learning_rate": 1.9560377812925208e-05, "loss": 0.4909, "step": 6992 }, { "epoch": 0.19200988467874794, "grad_norm": 0.35063228011131287, "learning_rate": 1.9560251154066232e-05, "loss": 0.5294, "step": 6993 }, { "epoch": 0.19203734211971443, "grad_norm": 0.46396633982658386, "learning_rate": 1.9560124477374337e-05, "loss": 0.5342, "step": 6994 }, { "epoch": 0.19206479956068095, "grad_norm": 0.40436574816703796, "learning_rate": 1.955999778284976e-05, "loss": 0.5713, "step": 6995 }, { "epoch": 0.19209225700164745, "grad_norm": 0.34070324897766113, "learning_rate": 1.9559871070492734e-05, "loss": 0.4958, "step": 6996 }, { "epoch": 0.19211971444261394, "grad_norm": 0.38829436898231506, "learning_rate": 1.9559744340303498e-05, "loss": 0.5949, "step": 6997 }, { "epoch": 0.19214717188358044, "grad_norm": 0.36199212074279785, "learning_rate": 1.9559617592282287e-05, "loss": 0.4665, "step": 6998 }, { "epoch": 0.19217462932454696, "grad_norm": 0.4077532887458801, "learning_rate": 1.955949082642934e-05, "loss": 0.6246, "step": 6999 }, { "epoch": 0.19220208676551345, "grad_norm": 0.8044859766960144, "learning_rate": 1.9559364042744887e-05, "loss": 0.6253, "step": 7000 }, { "epoch": 0.19222954420647995, "grad_norm": 0.37125468254089355, "learning_rate": 1.9559237241229175e-05, "loss": 0.5385, "step": 7001 }, { "epoch": 0.19225700164744647, "grad_norm": 0.3773503601551056, "learning_rate": 1.955911042188243e-05, "loss": 0.5636, "step": 7002 }, { "epoch": 0.19228445908841296, "grad_norm": 0.3741331696510315, "learning_rate": 1.9558983584704896e-05, "loss": 0.505, "step": 7003 }, { "epoch": 0.19231191652937946, "grad_norm": 0.3699350953102112, "learning_rate": 1.9558856729696803e-05, "loss": 0.5747, "step": 7004 }, { "epoch": 0.19233937397034595, "grad_norm": 0.3491131365299225, "learning_rate": 1.9558729856858396e-05, "loss": 0.5475, "step": 7005 }, { "epoch": 0.19236683141131247, "grad_norm": 0.3777410387992859, "learning_rate": 1.9558602966189905e-05, "loss": 0.5264, "step": 7006 }, { "epoch": 0.19239428885227897, "grad_norm": 0.3943873345851898, "learning_rate": 1.955847605769157e-05, "loss": 0.5645, "step": 7007 }, { "epoch": 0.19242174629324546, "grad_norm": 0.4020509123802185, "learning_rate": 1.9558349131363625e-05, "loss": 0.497, "step": 7008 }, { "epoch": 0.19244920373421198, "grad_norm": 0.3494105935096741, "learning_rate": 1.9558222187206308e-05, "loss": 0.5004, "step": 7009 }, { "epoch": 0.19247666117517848, "grad_norm": 0.35440894961357117, "learning_rate": 1.955809522521986e-05, "loss": 0.5305, "step": 7010 }, { "epoch": 0.19250411861614497, "grad_norm": 0.38787394762039185, "learning_rate": 1.9557968245404507e-05, "loss": 0.4617, "step": 7011 }, { "epoch": 0.19253157605711146, "grad_norm": 0.39531490206718445, "learning_rate": 1.9557841247760495e-05, "loss": 0.5392, "step": 7012 }, { "epoch": 0.19255903349807799, "grad_norm": 0.3669515550136566, "learning_rate": 1.955771423228806e-05, "loss": 0.587, "step": 7013 }, { "epoch": 0.19258649093904448, "grad_norm": 0.4034994840621948, "learning_rate": 1.9557587198987438e-05, "loss": 0.5638, "step": 7014 }, { "epoch": 0.19261394838001097, "grad_norm": 0.3339743912220001, "learning_rate": 1.9557460147858865e-05, "loss": 0.504, "step": 7015 }, { "epoch": 0.1926414058209775, "grad_norm": 0.3385424315929413, "learning_rate": 1.955733307890258e-05, "loss": 0.5177, "step": 7016 }, { "epoch": 0.192668863261944, "grad_norm": 0.344289094209671, "learning_rate": 1.9557205992118815e-05, "loss": 0.5282, "step": 7017 }, { "epoch": 0.19269632070291048, "grad_norm": 0.3443506956100464, "learning_rate": 1.955707888750781e-05, "loss": 0.4742, "step": 7018 }, { "epoch": 0.19272377814387698, "grad_norm": 0.3377014398574829, "learning_rate": 1.9556951765069806e-05, "loss": 0.4955, "step": 7019 }, { "epoch": 0.1927512355848435, "grad_norm": 0.3553220331668854, "learning_rate": 1.9556824624805035e-05, "loss": 0.5678, "step": 7020 }, { "epoch": 0.19277869302581, "grad_norm": 0.32762786746025085, "learning_rate": 1.9556697466713738e-05, "loss": 0.518, "step": 7021 }, { "epoch": 0.1928061504667765, "grad_norm": 0.3683374524116516, "learning_rate": 1.9556570290796148e-05, "loss": 0.4317, "step": 7022 }, { "epoch": 0.192833607907743, "grad_norm": 0.35216841101646423, "learning_rate": 1.9556443097052505e-05, "loss": 0.4583, "step": 7023 }, { "epoch": 0.1928610653487095, "grad_norm": 0.37241801619529724, "learning_rate": 1.9556315885483044e-05, "loss": 0.5315, "step": 7024 }, { "epoch": 0.192888522789676, "grad_norm": 0.34667956829071045, "learning_rate": 1.9556188656088005e-05, "loss": 0.5932, "step": 7025 }, { "epoch": 0.1929159802306425, "grad_norm": 0.3337510824203491, "learning_rate": 1.9556061408867622e-05, "loss": 0.5288, "step": 7026 }, { "epoch": 0.192943437671609, "grad_norm": 0.3353162407875061, "learning_rate": 1.955593414382214e-05, "loss": 0.5025, "step": 7027 }, { "epoch": 0.1929708951125755, "grad_norm": 0.3487168550491333, "learning_rate": 1.9555806860951785e-05, "loss": 0.5534, "step": 7028 }, { "epoch": 0.192998352553542, "grad_norm": 0.36860448122024536, "learning_rate": 1.9555679560256803e-05, "loss": 0.5171, "step": 7029 }, { "epoch": 0.19302580999450852, "grad_norm": 0.3501944839954376, "learning_rate": 1.9555552241737428e-05, "loss": 0.5378, "step": 7030 }, { "epoch": 0.19305326743547502, "grad_norm": 0.5548388957977295, "learning_rate": 1.95554249053939e-05, "loss": 0.675, "step": 7031 }, { "epoch": 0.1930807248764415, "grad_norm": 0.32186439633369446, "learning_rate": 1.9555297551226454e-05, "loss": 0.483, "step": 7032 }, { "epoch": 0.193108182317408, "grad_norm": 0.35725170373916626, "learning_rate": 1.9555170179235326e-05, "loss": 0.5159, "step": 7033 }, { "epoch": 0.19313563975837453, "grad_norm": 0.9094733595848083, "learning_rate": 1.955504278942076e-05, "loss": 0.4956, "step": 7034 }, { "epoch": 0.19316309719934102, "grad_norm": 0.30891624093055725, "learning_rate": 1.955491538178299e-05, "loss": 0.4183, "step": 7035 }, { "epoch": 0.19319055464030752, "grad_norm": 0.32969704270362854, "learning_rate": 1.955478795632225e-05, "loss": 0.5475, "step": 7036 }, { "epoch": 0.19321801208127404, "grad_norm": 0.35502755641937256, "learning_rate": 1.955466051303878e-05, "loss": 0.4936, "step": 7037 }, { "epoch": 0.19324546952224053, "grad_norm": 0.34554553031921387, "learning_rate": 1.9554533051932824e-05, "loss": 0.5049, "step": 7038 }, { "epoch": 0.19327292696320703, "grad_norm": 0.37126660346984863, "learning_rate": 1.955440557300461e-05, "loss": 0.479, "step": 7039 }, { "epoch": 0.19330038440417352, "grad_norm": 0.4616793096065521, "learning_rate": 1.955427807625438e-05, "loss": 0.4941, "step": 7040 }, { "epoch": 0.19332784184514004, "grad_norm": 0.35193684697151184, "learning_rate": 1.9554150561682374e-05, "loss": 0.587, "step": 7041 }, { "epoch": 0.19335529928610654, "grad_norm": 0.36055055260658264, "learning_rate": 1.9554023029288823e-05, "loss": 0.5468, "step": 7042 }, { "epoch": 0.19338275672707303, "grad_norm": 0.36816155910491943, "learning_rate": 1.9553895479073977e-05, "loss": 0.6131, "step": 7043 }, { "epoch": 0.19341021416803955, "grad_norm": 0.3417634665966034, "learning_rate": 1.9553767911038066e-05, "loss": 0.5746, "step": 7044 }, { "epoch": 0.19343767160900605, "grad_norm": 0.3691891133785248, "learning_rate": 1.9553640325181323e-05, "loss": 0.5051, "step": 7045 }, { "epoch": 0.19346512904997254, "grad_norm": 0.34039413928985596, "learning_rate": 1.9553512721503994e-05, "loss": 0.5046, "step": 7046 }, { "epoch": 0.19349258649093903, "grad_norm": 0.361599862575531, "learning_rate": 1.9553385100006318e-05, "loss": 0.45, "step": 7047 }, { "epoch": 0.19352004393190556, "grad_norm": 0.3631707429885864, "learning_rate": 1.9553257460688525e-05, "loss": 0.5249, "step": 7048 }, { "epoch": 0.19354750137287205, "grad_norm": 0.736594557762146, "learning_rate": 1.955312980355086e-05, "loss": 0.5581, "step": 7049 }, { "epoch": 0.19357495881383854, "grad_norm": 0.4240773320198059, "learning_rate": 1.955300212859356e-05, "loss": 0.6069, "step": 7050 }, { "epoch": 0.19360241625480507, "grad_norm": 0.35737890005111694, "learning_rate": 1.955287443581686e-05, "loss": 0.5218, "step": 7051 }, { "epoch": 0.19362987369577156, "grad_norm": 0.38303494453430176, "learning_rate": 1.9552746725221003e-05, "loss": 0.6008, "step": 7052 }, { "epoch": 0.19365733113673805, "grad_norm": 0.37311851978302, "learning_rate": 1.955261899680622e-05, "loss": 0.6081, "step": 7053 }, { "epoch": 0.19368478857770455, "grad_norm": 0.3554532825946808, "learning_rate": 1.9552491250572758e-05, "loss": 0.6025, "step": 7054 }, { "epoch": 0.19371224601867107, "grad_norm": 0.33497458696365356, "learning_rate": 1.955236348652085e-05, "loss": 0.5215, "step": 7055 }, { "epoch": 0.19373970345963756, "grad_norm": 0.3799905478954315, "learning_rate": 1.9552235704650732e-05, "loss": 0.5028, "step": 7056 }, { "epoch": 0.19376716090060406, "grad_norm": 0.4394189119338989, "learning_rate": 1.9552107904962653e-05, "loss": 0.5432, "step": 7057 }, { "epoch": 0.19379461834157058, "grad_norm": 0.3874707818031311, "learning_rate": 1.9551980087456837e-05, "loss": 0.6236, "step": 7058 }, { "epoch": 0.19382207578253707, "grad_norm": 0.3800000250339508, "learning_rate": 1.9551852252133533e-05, "loss": 0.5927, "step": 7059 }, { "epoch": 0.19384953322350357, "grad_norm": 0.3664473593235016, "learning_rate": 1.9551724398992973e-05, "loss": 0.5339, "step": 7060 }, { "epoch": 0.19387699066447006, "grad_norm": 0.3939146399497986, "learning_rate": 1.95515965280354e-05, "loss": 0.6566, "step": 7061 }, { "epoch": 0.19390444810543658, "grad_norm": 0.4141204059123993, "learning_rate": 1.955146863926105e-05, "loss": 0.6651, "step": 7062 }, { "epoch": 0.19393190554640308, "grad_norm": 0.3623238205909729, "learning_rate": 1.9551340732670167e-05, "loss": 0.5329, "step": 7063 }, { "epoch": 0.19395936298736957, "grad_norm": 0.35530078411102295, "learning_rate": 1.955121280826298e-05, "loss": 0.5935, "step": 7064 }, { "epoch": 0.19398682042833607, "grad_norm": 0.3710092008113861, "learning_rate": 1.9551084866039736e-05, "loss": 0.5622, "step": 7065 }, { "epoch": 0.1940142778693026, "grad_norm": 0.3602275252342224, "learning_rate": 1.955095690600067e-05, "loss": 0.6223, "step": 7066 }, { "epoch": 0.19404173531026908, "grad_norm": 0.3741648495197296, "learning_rate": 1.955082892814602e-05, "loss": 0.5677, "step": 7067 }, { "epoch": 0.19406919275123558, "grad_norm": 0.3664383590221405, "learning_rate": 1.9550700932476023e-05, "loss": 0.5994, "step": 7068 }, { "epoch": 0.1940966501922021, "grad_norm": 0.4249798357486725, "learning_rate": 1.9550572918990926e-05, "loss": 0.5886, "step": 7069 }, { "epoch": 0.1941241076331686, "grad_norm": 0.36835092306137085, "learning_rate": 1.9550444887690958e-05, "loss": 0.5749, "step": 7070 }, { "epoch": 0.19415156507413509, "grad_norm": 0.9168388247489929, "learning_rate": 1.9550316838576365e-05, "loss": 0.4887, "step": 7071 }, { "epoch": 0.19417902251510158, "grad_norm": 0.3361561894416809, "learning_rate": 1.9550188771647383e-05, "loss": 0.4964, "step": 7072 }, { "epoch": 0.1942064799560681, "grad_norm": 0.38099122047424316, "learning_rate": 1.9550060686904247e-05, "loss": 0.5402, "step": 7073 }, { "epoch": 0.1942339373970346, "grad_norm": 0.38914793729782104, "learning_rate": 1.9549932584347205e-05, "loss": 0.5691, "step": 7074 }, { "epoch": 0.1942613948380011, "grad_norm": 0.3655528128147125, "learning_rate": 1.9549804463976488e-05, "loss": 0.562, "step": 7075 }, { "epoch": 0.1942888522789676, "grad_norm": 0.34185680747032166, "learning_rate": 1.9549676325792336e-05, "loss": 0.5355, "step": 7076 }, { "epoch": 0.1943163097199341, "grad_norm": 0.44784706830978394, "learning_rate": 1.9549548169794992e-05, "loss": 0.5154, "step": 7077 }, { "epoch": 0.1943437671609006, "grad_norm": 0.4111733138561249, "learning_rate": 1.9549419995984692e-05, "loss": 0.5346, "step": 7078 }, { "epoch": 0.1943712246018671, "grad_norm": 0.4037534296512604, "learning_rate": 1.954929180436168e-05, "loss": 0.6513, "step": 7079 }, { "epoch": 0.19439868204283361, "grad_norm": 0.3821345269680023, "learning_rate": 1.9549163594926185e-05, "loss": 0.5449, "step": 7080 }, { "epoch": 0.1944261394838001, "grad_norm": 0.40433260798454285, "learning_rate": 1.9549035367678453e-05, "loss": 0.5432, "step": 7081 }, { "epoch": 0.1944535969247666, "grad_norm": 0.40730172395706177, "learning_rate": 1.9548907122618727e-05, "loss": 0.5429, "step": 7082 }, { "epoch": 0.19448105436573312, "grad_norm": 0.3996466398239136, "learning_rate": 1.954877885974724e-05, "loss": 0.5716, "step": 7083 }, { "epoch": 0.19450851180669962, "grad_norm": 0.4058230519294739, "learning_rate": 1.9548650579064226e-05, "loss": 0.5286, "step": 7084 }, { "epoch": 0.1945359692476661, "grad_norm": 0.37513747811317444, "learning_rate": 1.9548522280569938e-05, "loss": 0.5185, "step": 7085 }, { "epoch": 0.1945634266886326, "grad_norm": 0.362341046333313, "learning_rate": 1.9548393964264606e-05, "loss": 0.547, "step": 7086 }, { "epoch": 0.19459088412959913, "grad_norm": 0.44074681401252747, "learning_rate": 1.9548265630148472e-05, "loss": 0.5489, "step": 7087 }, { "epoch": 0.19461834157056562, "grad_norm": 0.38960081338882446, "learning_rate": 1.9548137278221776e-05, "loss": 0.5169, "step": 7088 }, { "epoch": 0.19464579901153212, "grad_norm": 0.3788408935070038, "learning_rate": 1.9548008908484756e-05, "loss": 0.533, "step": 7089 }, { "epoch": 0.19467325645249864, "grad_norm": 0.37933051586151123, "learning_rate": 1.9547880520937655e-05, "loss": 0.4978, "step": 7090 }, { "epoch": 0.19470071389346513, "grad_norm": 0.3765076994895935, "learning_rate": 1.9547752115580703e-05, "loss": 0.5499, "step": 7091 }, { "epoch": 0.19472817133443163, "grad_norm": 0.3727859556674957, "learning_rate": 1.9547623692414152e-05, "loss": 0.4743, "step": 7092 }, { "epoch": 0.19475562877539812, "grad_norm": 0.36264777183532715, "learning_rate": 1.9547495251438232e-05, "loss": 0.4938, "step": 7093 }, { "epoch": 0.19478308621636464, "grad_norm": 0.595095694065094, "learning_rate": 1.9547366792653187e-05, "loss": 0.5597, "step": 7094 }, { "epoch": 0.19481054365733114, "grad_norm": 0.33324581384658813, "learning_rate": 1.9547238316059257e-05, "loss": 0.5454, "step": 7095 }, { "epoch": 0.19483800109829763, "grad_norm": 0.3286076784133911, "learning_rate": 1.954710982165668e-05, "loss": 0.5155, "step": 7096 }, { "epoch": 0.19486545853926415, "grad_norm": 0.39441075921058655, "learning_rate": 1.9546981309445695e-05, "loss": 0.5928, "step": 7097 }, { "epoch": 0.19489291598023065, "grad_norm": 0.3677467107772827, "learning_rate": 1.9546852779426545e-05, "loss": 0.5252, "step": 7098 }, { "epoch": 0.19492037342119714, "grad_norm": 0.3494504988193512, "learning_rate": 1.9546724231599468e-05, "loss": 0.5204, "step": 7099 }, { "epoch": 0.19494783086216363, "grad_norm": 0.43648621439933777, "learning_rate": 1.95465956659647e-05, "loss": 0.526, "step": 7100 }, { "epoch": 0.19497528830313016, "grad_norm": 0.34320247173309326, "learning_rate": 1.9546467082522486e-05, "loss": 0.536, "step": 7101 }, { "epoch": 0.19500274574409665, "grad_norm": 0.32266420125961304, "learning_rate": 1.9546338481273065e-05, "loss": 0.4496, "step": 7102 }, { "epoch": 0.19503020318506314, "grad_norm": 0.3357849419116974, "learning_rate": 1.9546209862216674e-05, "loss": 0.575, "step": 7103 }, { "epoch": 0.19505766062602967, "grad_norm": 0.372760146856308, "learning_rate": 1.9546081225353553e-05, "loss": 0.4671, "step": 7104 }, { "epoch": 0.19508511806699616, "grad_norm": 0.38168057799339294, "learning_rate": 1.954595257068395e-05, "loss": 0.5125, "step": 7105 }, { "epoch": 0.19511257550796265, "grad_norm": 0.3345735967159271, "learning_rate": 1.9545823898208094e-05, "loss": 0.5129, "step": 7106 }, { "epoch": 0.19514003294892915, "grad_norm": 0.3664141297340393, "learning_rate": 1.954569520792623e-05, "loss": 0.5156, "step": 7107 }, { "epoch": 0.19516749038989567, "grad_norm": 0.37676674127578735, "learning_rate": 1.9545566499838596e-05, "loss": 0.5049, "step": 7108 }, { "epoch": 0.19519494783086216, "grad_norm": 0.422270804643631, "learning_rate": 1.9545437773945438e-05, "loss": 0.578, "step": 7109 }, { "epoch": 0.19522240527182866, "grad_norm": 0.473947674036026, "learning_rate": 1.954530903024699e-05, "loss": 0.5399, "step": 7110 }, { "epoch": 0.19524986271279518, "grad_norm": 0.37826886773109436, "learning_rate": 1.9545180268743492e-05, "loss": 0.6155, "step": 7111 }, { "epoch": 0.19527732015376167, "grad_norm": 0.35873594880104065, "learning_rate": 1.9545051489435193e-05, "loss": 0.4885, "step": 7112 }, { "epoch": 0.19530477759472817, "grad_norm": 0.369585782289505, "learning_rate": 1.9544922692322322e-05, "loss": 0.5725, "step": 7113 }, { "epoch": 0.19533223503569466, "grad_norm": 0.3691712021827698, "learning_rate": 1.9544793877405123e-05, "loss": 0.5744, "step": 7114 }, { "epoch": 0.19535969247666118, "grad_norm": 0.39016222953796387, "learning_rate": 1.9544665044683838e-05, "loss": 0.5968, "step": 7115 }, { "epoch": 0.19538714991762768, "grad_norm": 0.3748838007450104, "learning_rate": 1.954453619415871e-05, "loss": 0.598, "step": 7116 }, { "epoch": 0.19541460735859417, "grad_norm": 0.3429998457431793, "learning_rate": 1.954440732582997e-05, "loss": 0.5423, "step": 7117 }, { "epoch": 0.1954420647995607, "grad_norm": 0.3752644658088684, "learning_rate": 1.9544278439697864e-05, "loss": 0.5473, "step": 7118 }, { "epoch": 0.1954695222405272, "grad_norm": 0.39227351546287537, "learning_rate": 1.954414953576264e-05, "loss": 0.5222, "step": 7119 }, { "epoch": 0.19549697968149368, "grad_norm": 0.3240601420402527, "learning_rate": 1.954402061402452e-05, "loss": 0.529, "step": 7120 }, { "epoch": 0.19552443712246018, "grad_norm": 0.39040884375572205, "learning_rate": 1.9543891674483767e-05, "loss": 0.49, "step": 7121 }, { "epoch": 0.1955518945634267, "grad_norm": 0.3687610924243927, "learning_rate": 1.95437627171406e-05, "loss": 0.4753, "step": 7122 }, { "epoch": 0.1955793520043932, "grad_norm": 0.37360528111457825, "learning_rate": 1.9543633741995275e-05, "loss": 0.5386, "step": 7123 }, { "epoch": 0.1956068094453597, "grad_norm": 0.36997178196907043, "learning_rate": 1.9543504749048025e-05, "loss": 0.5374, "step": 7124 }, { "epoch": 0.1956342668863262, "grad_norm": 0.3902798295021057, "learning_rate": 1.9543375738299096e-05, "loss": 0.5533, "step": 7125 }, { "epoch": 0.1956617243272927, "grad_norm": 0.3777187764644623, "learning_rate": 1.9543246709748722e-05, "loss": 0.5266, "step": 7126 }, { "epoch": 0.1956891817682592, "grad_norm": 0.324217826128006, "learning_rate": 1.954311766339715e-05, "loss": 0.525, "step": 7127 }, { "epoch": 0.1957166392092257, "grad_norm": 0.4059572219848633, "learning_rate": 1.9542988599244615e-05, "loss": 0.4792, "step": 7128 }, { "epoch": 0.1957440966501922, "grad_norm": 0.34937992691993713, "learning_rate": 1.9542859517291363e-05, "loss": 0.476, "step": 7129 }, { "epoch": 0.1957715540911587, "grad_norm": 0.46754807233810425, "learning_rate": 1.954273041753763e-05, "loss": 0.4902, "step": 7130 }, { "epoch": 0.1957990115321252, "grad_norm": 0.3189769685268402, "learning_rate": 1.9542601299983658e-05, "loss": 0.5168, "step": 7131 }, { "epoch": 0.1958264689730917, "grad_norm": 0.3236069083213806, "learning_rate": 1.954247216462969e-05, "loss": 0.486, "step": 7132 }, { "epoch": 0.19585392641405822, "grad_norm": 0.3994380235671997, "learning_rate": 1.9542343011475967e-05, "loss": 0.4839, "step": 7133 }, { "epoch": 0.1958813838550247, "grad_norm": 0.3506726920604706, "learning_rate": 1.954221384052273e-05, "loss": 0.5285, "step": 7134 }, { "epoch": 0.1959088412959912, "grad_norm": 0.3980531394481659, "learning_rate": 1.9542084651770213e-05, "loss": 0.5184, "step": 7135 }, { "epoch": 0.19593629873695773, "grad_norm": 0.35364046692848206, "learning_rate": 1.9541955445218668e-05, "loss": 0.4596, "step": 7136 }, { "epoch": 0.19596375617792422, "grad_norm": 0.3507658541202545, "learning_rate": 1.954182622086833e-05, "loss": 0.5934, "step": 7137 }, { "epoch": 0.19599121361889071, "grad_norm": 0.39827755093574524, "learning_rate": 1.954169697871944e-05, "loss": 0.5393, "step": 7138 }, { "epoch": 0.1960186710598572, "grad_norm": 0.41258329153060913, "learning_rate": 1.954156771877224e-05, "loss": 0.5903, "step": 7139 }, { "epoch": 0.19604612850082373, "grad_norm": 0.33346524834632874, "learning_rate": 1.954143844102697e-05, "loss": 0.5391, "step": 7140 }, { "epoch": 0.19607358594179022, "grad_norm": 0.36685311794281006, "learning_rate": 1.9541309145483872e-05, "loss": 0.5959, "step": 7141 }, { "epoch": 0.19610104338275672, "grad_norm": 0.4047291874885559, "learning_rate": 1.9541179832143187e-05, "loss": 0.6385, "step": 7142 }, { "epoch": 0.19612850082372324, "grad_norm": 0.34613046050071716, "learning_rate": 1.954105050100516e-05, "loss": 0.519, "step": 7143 }, { "epoch": 0.19615595826468973, "grad_norm": 0.34517884254455566, "learning_rate": 1.9540921152070026e-05, "loss": 0.493, "step": 7144 }, { "epoch": 0.19618341570565623, "grad_norm": 0.3584359288215637, "learning_rate": 1.954079178533803e-05, "loss": 0.4932, "step": 7145 }, { "epoch": 0.19621087314662272, "grad_norm": 0.4661368131637573, "learning_rate": 1.9540662400809408e-05, "loss": 0.5303, "step": 7146 }, { "epoch": 0.19623833058758924, "grad_norm": 0.37627050280570984, "learning_rate": 1.9540532998484408e-05, "loss": 0.5054, "step": 7147 }, { "epoch": 0.19626578802855574, "grad_norm": 0.35841119289398193, "learning_rate": 1.954040357836327e-05, "loss": 0.5564, "step": 7148 }, { "epoch": 0.19629324546952223, "grad_norm": 0.4122997224330902, "learning_rate": 1.9540274140446237e-05, "loss": 0.5458, "step": 7149 }, { "epoch": 0.19632070291048875, "grad_norm": 0.35828328132629395, "learning_rate": 1.9540144684733543e-05, "loss": 0.5018, "step": 7150 }, { "epoch": 0.19634816035145525, "grad_norm": 0.3851050138473511, "learning_rate": 1.9540015211225436e-05, "loss": 0.4864, "step": 7151 }, { "epoch": 0.19637561779242174, "grad_norm": 0.36378082633018494, "learning_rate": 1.9539885719922153e-05, "loss": 0.4787, "step": 7152 }, { "epoch": 0.19640307523338824, "grad_norm": 0.39105677604675293, "learning_rate": 1.9539756210823944e-05, "loss": 0.5556, "step": 7153 }, { "epoch": 0.19643053267435476, "grad_norm": 0.3436042070388794, "learning_rate": 1.9539626683931044e-05, "loss": 0.562, "step": 7154 }, { "epoch": 0.19645799011532125, "grad_norm": 0.3398236334323883, "learning_rate": 1.9539497139243692e-05, "loss": 0.4782, "step": 7155 }, { "epoch": 0.19648544755628775, "grad_norm": 0.3495853543281555, "learning_rate": 1.9539367576762136e-05, "loss": 0.4932, "step": 7156 }, { "epoch": 0.19651290499725427, "grad_norm": 0.44746723771095276, "learning_rate": 1.9539237996486614e-05, "loss": 0.6078, "step": 7157 }, { "epoch": 0.19654036243822076, "grad_norm": 0.39059901237487793, "learning_rate": 1.953910839841737e-05, "loss": 0.4787, "step": 7158 }, { "epoch": 0.19656781987918726, "grad_norm": 0.3783426582813263, "learning_rate": 1.953897878255464e-05, "loss": 0.552, "step": 7159 }, { "epoch": 0.19659527732015375, "grad_norm": 0.38346585631370544, "learning_rate": 1.9538849148898672e-05, "loss": 0.535, "step": 7160 }, { "epoch": 0.19662273476112027, "grad_norm": 0.30041077733039856, "learning_rate": 1.953871949744971e-05, "loss": 0.4641, "step": 7161 }, { "epoch": 0.19665019220208677, "grad_norm": 0.36442095041275024, "learning_rate": 1.9538589828207985e-05, "loss": 0.5903, "step": 7162 }, { "epoch": 0.19667764964305326, "grad_norm": 2.016639471054077, "learning_rate": 1.9538460141173748e-05, "loss": 0.641, "step": 7163 }, { "epoch": 0.19670510708401978, "grad_norm": 0.5272617936134338, "learning_rate": 1.9538330436347238e-05, "loss": 0.5308, "step": 7164 }, { "epoch": 0.19673256452498628, "grad_norm": 0.3539971709251404, "learning_rate": 1.9538200713728703e-05, "loss": 0.5316, "step": 7165 }, { "epoch": 0.19676002196595277, "grad_norm": 0.3413262963294983, "learning_rate": 1.9538070973318373e-05, "loss": 0.5218, "step": 7166 }, { "epoch": 0.19678747940691926, "grad_norm": 0.32233503460884094, "learning_rate": 1.95379412151165e-05, "loss": 0.4572, "step": 7167 }, { "epoch": 0.19681493684788579, "grad_norm": 0.49622124433517456, "learning_rate": 1.953781143912332e-05, "loss": 0.5181, "step": 7168 }, { "epoch": 0.19684239428885228, "grad_norm": 0.3376036286354065, "learning_rate": 1.953768164533908e-05, "loss": 0.5897, "step": 7169 }, { "epoch": 0.19686985172981877, "grad_norm": 0.6925176978111267, "learning_rate": 1.953755183376402e-05, "loss": 0.5564, "step": 7170 }, { "epoch": 0.1968973091707853, "grad_norm": 0.6746611595153809, "learning_rate": 1.953742200439838e-05, "loss": 0.487, "step": 7171 }, { "epoch": 0.1969247666117518, "grad_norm": 0.3919059634208679, "learning_rate": 1.9537292157242405e-05, "loss": 0.6005, "step": 7172 }, { "epoch": 0.19695222405271828, "grad_norm": 0.3767068684101105, "learning_rate": 1.953716229229633e-05, "loss": 0.4442, "step": 7173 }, { "epoch": 0.19697968149368478, "grad_norm": 0.37462007999420166, "learning_rate": 1.9537032409560414e-05, "loss": 0.6061, "step": 7174 }, { "epoch": 0.1970071389346513, "grad_norm": 0.4104886054992676, "learning_rate": 1.9536902509034883e-05, "loss": 0.5115, "step": 7175 }, { "epoch": 0.1970345963756178, "grad_norm": 0.36642777919769287, "learning_rate": 1.9536772590719987e-05, "loss": 0.6339, "step": 7176 }, { "epoch": 0.1970620538165843, "grad_norm": 0.4304368793964386, "learning_rate": 1.9536642654615963e-05, "loss": 0.5584, "step": 7177 }, { "epoch": 0.1970895112575508, "grad_norm": 0.3695346415042877, "learning_rate": 1.9536512700723057e-05, "loss": 0.5586, "step": 7178 }, { "epoch": 0.1971169686985173, "grad_norm": 0.4176194369792938, "learning_rate": 1.953638272904151e-05, "loss": 0.5453, "step": 7179 }, { "epoch": 0.1971444261394838, "grad_norm": 0.357776015996933, "learning_rate": 1.953625273957157e-05, "loss": 0.5602, "step": 7180 }, { "epoch": 0.1971718835804503, "grad_norm": 0.39299970865249634, "learning_rate": 1.9536122732313476e-05, "loss": 0.4265, "step": 7181 }, { "epoch": 0.1971993410214168, "grad_norm": 0.38582319021224976, "learning_rate": 1.9535992707267465e-05, "loss": 0.5775, "step": 7182 }, { "epoch": 0.1972267984623833, "grad_norm": 0.33422353863716125, "learning_rate": 1.9535862664433786e-05, "loss": 0.4638, "step": 7183 }, { "epoch": 0.1972542559033498, "grad_norm": 0.3745768368244171, "learning_rate": 1.953573260381268e-05, "loss": 0.6488, "step": 7184 }, { "epoch": 0.19728171334431632, "grad_norm": 0.3630523681640625, "learning_rate": 1.9535602525404388e-05, "loss": 0.4787, "step": 7185 }, { "epoch": 0.19730917078528282, "grad_norm": 0.39216434955596924, "learning_rate": 1.9535472429209155e-05, "loss": 0.517, "step": 7186 }, { "epoch": 0.1973366282262493, "grad_norm": 0.33977261185646057, "learning_rate": 1.9535342315227225e-05, "loss": 0.4784, "step": 7187 }, { "epoch": 0.1973640856672158, "grad_norm": 0.3722745180130005, "learning_rate": 1.953521218345883e-05, "loss": 0.4741, "step": 7188 }, { "epoch": 0.19739154310818233, "grad_norm": 0.40505266189575195, "learning_rate": 1.9535082033904228e-05, "loss": 0.6026, "step": 7189 }, { "epoch": 0.19741900054914882, "grad_norm": 0.3552279770374298, "learning_rate": 1.9534951866563655e-05, "loss": 0.5548, "step": 7190 }, { "epoch": 0.19744645799011531, "grad_norm": 0.6264123916625977, "learning_rate": 1.953482168143735e-05, "loss": 0.4778, "step": 7191 }, { "epoch": 0.19747391543108184, "grad_norm": 0.3956158459186554, "learning_rate": 1.953469147852556e-05, "loss": 0.5932, "step": 7192 }, { "epoch": 0.19750137287204833, "grad_norm": 0.39580288529396057, "learning_rate": 1.9534561257828533e-05, "loss": 0.5117, "step": 7193 }, { "epoch": 0.19752883031301482, "grad_norm": 0.3647053837776184, "learning_rate": 1.95344310193465e-05, "loss": 0.5876, "step": 7194 }, { "epoch": 0.19755628775398132, "grad_norm": 0.369294673204422, "learning_rate": 1.953430076307971e-05, "loss": 0.4724, "step": 7195 }, { "epoch": 0.19758374519494784, "grad_norm": 0.3545277416706085, "learning_rate": 1.953417048902841e-05, "loss": 0.4994, "step": 7196 }, { "epoch": 0.19761120263591433, "grad_norm": 0.3297555148601532, "learning_rate": 1.9534040197192837e-05, "loss": 0.5086, "step": 7197 }, { "epoch": 0.19763866007688083, "grad_norm": 0.35315993428230286, "learning_rate": 1.9533909887573236e-05, "loss": 0.4819, "step": 7198 }, { "epoch": 0.19766611751784732, "grad_norm": 0.36271387338638306, "learning_rate": 1.953377956016985e-05, "loss": 0.488, "step": 7199 }, { "epoch": 0.19769357495881384, "grad_norm": 0.3455018103122711, "learning_rate": 1.953364921498292e-05, "loss": 0.49, "step": 7200 }, { "epoch": 0.19772103239978034, "grad_norm": 0.41672009229660034, "learning_rate": 1.9533518852012692e-05, "loss": 0.5471, "step": 7201 }, { "epoch": 0.19774848984074683, "grad_norm": 0.4056854844093323, "learning_rate": 1.9533388471259413e-05, "loss": 0.4609, "step": 7202 }, { "epoch": 0.19777594728171335, "grad_norm": 0.3947330713272095, "learning_rate": 1.953325807272332e-05, "loss": 0.546, "step": 7203 }, { "epoch": 0.19780340472267985, "grad_norm": 0.35936641693115234, "learning_rate": 1.9533127656404657e-05, "loss": 0.5069, "step": 7204 }, { "epoch": 0.19783086216364634, "grad_norm": 0.3275044858455658, "learning_rate": 1.9532997222303668e-05, "loss": 0.4886, "step": 7205 }, { "epoch": 0.19785831960461284, "grad_norm": 0.41177043318748474, "learning_rate": 1.9532866770420597e-05, "loss": 0.4495, "step": 7206 }, { "epoch": 0.19788577704557936, "grad_norm": 0.3257928490638733, "learning_rate": 1.953273630075569e-05, "loss": 0.5104, "step": 7207 }, { "epoch": 0.19791323448654585, "grad_norm": 0.41101768612861633, "learning_rate": 1.9532605813309185e-05, "loss": 0.5408, "step": 7208 }, { "epoch": 0.19794069192751235, "grad_norm": 0.3737339973449707, "learning_rate": 1.9532475308081324e-05, "loss": 0.5636, "step": 7209 }, { "epoch": 0.19796814936847887, "grad_norm": 0.3762427568435669, "learning_rate": 1.9532344785072358e-05, "loss": 0.5922, "step": 7210 }, { "epoch": 0.19799560680944536, "grad_norm": 0.459736168384552, "learning_rate": 1.9532214244282527e-05, "loss": 0.5316, "step": 7211 }, { "epoch": 0.19802306425041186, "grad_norm": 0.3514285981655121, "learning_rate": 1.9532083685712072e-05, "loss": 0.4972, "step": 7212 }, { "epoch": 0.19805052169137835, "grad_norm": 0.43646520376205444, "learning_rate": 1.953195310936124e-05, "loss": 0.535, "step": 7213 }, { "epoch": 0.19807797913234487, "grad_norm": 0.34438374638557434, "learning_rate": 1.9531822515230276e-05, "loss": 0.4892, "step": 7214 }, { "epoch": 0.19810543657331137, "grad_norm": 0.36947011947631836, "learning_rate": 1.9531691903319415e-05, "loss": 0.5635, "step": 7215 }, { "epoch": 0.19813289401427786, "grad_norm": 0.34953573346138, "learning_rate": 1.953156127362891e-05, "loss": 0.5776, "step": 7216 }, { "epoch": 0.19816035145524438, "grad_norm": 0.31937530636787415, "learning_rate": 1.9531430626159e-05, "loss": 0.4799, "step": 7217 }, { "epoch": 0.19818780889621088, "grad_norm": 0.3912700116634369, "learning_rate": 1.9531299960909928e-05, "loss": 0.6182, "step": 7218 }, { "epoch": 0.19821526633717737, "grad_norm": 0.38298895955085754, "learning_rate": 1.953116927788194e-05, "loss": 0.5679, "step": 7219 }, { "epoch": 0.19824272377814386, "grad_norm": 0.34369972348213196, "learning_rate": 1.9531038577075284e-05, "loss": 0.4537, "step": 7220 }, { "epoch": 0.1982701812191104, "grad_norm": 0.3912615180015564, "learning_rate": 1.9530907858490195e-05, "loss": 0.5986, "step": 7221 }, { "epoch": 0.19829763866007688, "grad_norm": 0.3950369656085968, "learning_rate": 1.953077712212692e-05, "loss": 0.5873, "step": 7222 }, { "epoch": 0.19832509610104337, "grad_norm": 0.3334648013114929, "learning_rate": 1.953064636798571e-05, "loss": 0.4981, "step": 7223 }, { "epoch": 0.1983525535420099, "grad_norm": 0.380347341299057, "learning_rate": 1.95305155960668e-05, "loss": 0.5684, "step": 7224 }, { "epoch": 0.1983800109829764, "grad_norm": 0.3821478486061096, "learning_rate": 1.953038480637043e-05, "loss": 0.5324, "step": 7225 }, { "epoch": 0.19840746842394288, "grad_norm": 0.3779415488243103, "learning_rate": 1.9530253998896857e-05, "loss": 0.4894, "step": 7226 }, { "epoch": 0.19843492586490938, "grad_norm": 0.4097645580768585, "learning_rate": 1.953012317364632e-05, "loss": 0.6155, "step": 7227 }, { "epoch": 0.1984623833058759, "grad_norm": 0.407749205827713, "learning_rate": 1.9529992330619056e-05, "loss": 0.5623, "step": 7228 }, { "epoch": 0.1984898407468424, "grad_norm": 0.453078955411911, "learning_rate": 1.9529861469815316e-05, "loss": 0.4845, "step": 7229 }, { "epoch": 0.1985172981878089, "grad_norm": 0.3514716923236847, "learning_rate": 1.9529730591235346e-05, "loss": 0.5976, "step": 7230 }, { "epoch": 0.1985447556287754, "grad_norm": 0.3212401866912842, "learning_rate": 1.9529599694879383e-05, "loss": 0.4459, "step": 7231 }, { "epoch": 0.1985722130697419, "grad_norm": 0.41688039898872375, "learning_rate": 1.952946878074768e-05, "loss": 0.4856, "step": 7232 }, { "epoch": 0.1985996705107084, "grad_norm": 0.3676583468914032, "learning_rate": 1.952933784884047e-05, "loss": 0.5004, "step": 7233 }, { "epoch": 0.1986271279516749, "grad_norm": 0.3243754506111145, "learning_rate": 1.952920689915801e-05, "loss": 0.4736, "step": 7234 }, { "epoch": 0.19865458539264141, "grad_norm": 0.3447073996067047, "learning_rate": 1.9529075931700535e-05, "loss": 0.4623, "step": 7235 }, { "epoch": 0.1986820428336079, "grad_norm": 0.36044448614120483, "learning_rate": 1.9528944946468292e-05, "loss": 0.5093, "step": 7236 }, { "epoch": 0.1987095002745744, "grad_norm": 0.3483608067035675, "learning_rate": 1.9528813943461523e-05, "loss": 0.5598, "step": 7237 }, { "epoch": 0.19873695771554092, "grad_norm": 0.38116654753685, "learning_rate": 1.9528682922680476e-05, "loss": 0.5669, "step": 7238 }, { "epoch": 0.19876441515650742, "grad_norm": 0.3651668131351471, "learning_rate": 1.9528551884125395e-05, "loss": 0.4976, "step": 7239 }, { "epoch": 0.1987918725974739, "grad_norm": 0.32412225008010864, "learning_rate": 1.9528420827796526e-05, "loss": 0.5661, "step": 7240 }, { "epoch": 0.1988193300384404, "grad_norm": 0.355745792388916, "learning_rate": 1.9528289753694108e-05, "loss": 0.5558, "step": 7241 }, { "epoch": 0.19884678747940693, "grad_norm": 0.3472450077533722, "learning_rate": 1.952815866181839e-05, "loss": 0.5157, "step": 7242 }, { "epoch": 0.19887424492037342, "grad_norm": 0.33475399017333984, "learning_rate": 1.952802755216961e-05, "loss": 0.4584, "step": 7243 }, { "epoch": 0.19890170236133992, "grad_norm": 0.3952518105506897, "learning_rate": 1.9527896424748025e-05, "loss": 0.5297, "step": 7244 }, { "epoch": 0.19892915980230644, "grad_norm": 0.38270044326782227, "learning_rate": 1.952776527955387e-05, "loss": 0.5259, "step": 7245 }, { "epoch": 0.19895661724327293, "grad_norm": 0.4353136420249939, "learning_rate": 1.952763411658739e-05, "loss": 0.5541, "step": 7246 }, { "epoch": 0.19898407468423943, "grad_norm": 0.3596036434173584, "learning_rate": 1.9527502935848832e-05, "loss": 0.492, "step": 7247 }, { "epoch": 0.19901153212520592, "grad_norm": 0.39277714490890503, "learning_rate": 1.9527371737338443e-05, "loss": 0.5848, "step": 7248 }, { "epoch": 0.19903898956617244, "grad_norm": 0.3556603789329529, "learning_rate": 1.9527240521056462e-05, "loss": 0.5664, "step": 7249 }, { "epoch": 0.19906644700713894, "grad_norm": 0.40850144624710083, "learning_rate": 1.9527109287003138e-05, "loss": 0.5624, "step": 7250 }, { "epoch": 0.19909390444810543, "grad_norm": 0.34166961908340454, "learning_rate": 1.9526978035178713e-05, "loss": 0.4499, "step": 7251 }, { "epoch": 0.19912136188907195, "grad_norm": 0.32886263728141785, "learning_rate": 1.9526846765583435e-05, "loss": 0.4667, "step": 7252 }, { "epoch": 0.19914881933003845, "grad_norm": 0.3720371127128601, "learning_rate": 1.9526715478217545e-05, "loss": 0.549, "step": 7253 }, { "epoch": 0.19917627677100494, "grad_norm": 0.3854215443134308, "learning_rate": 1.9526584173081293e-05, "loss": 0.5358, "step": 7254 }, { "epoch": 0.19920373421197143, "grad_norm": 0.4011201560497284, "learning_rate": 1.9526452850174917e-05, "loss": 0.5591, "step": 7255 }, { "epoch": 0.19923119165293796, "grad_norm": 0.33162906765937805, "learning_rate": 1.952632150949867e-05, "loss": 0.5235, "step": 7256 }, { "epoch": 0.19925864909390445, "grad_norm": 0.34256017208099365, "learning_rate": 1.9526190151052787e-05, "loss": 0.5689, "step": 7257 }, { "epoch": 0.19928610653487094, "grad_norm": 0.4785984456539154, "learning_rate": 1.9526058774837525e-05, "loss": 0.5547, "step": 7258 }, { "epoch": 0.19931356397583747, "grad_norm": 0.3598606288433075, "learning_rate": 1.952592738085312e-05, "loss": 0.4014, "step": 7259 }, { "epoch": 0.19934102141680396, "grad_norm": 0.35917675495147705, "learning_rate": 1.9525795969099822e-05, "loss": 0.511, "step": 7260 }, { "epoch": 0.19936847885777045, "grad_norm": 0.3319828510284424, "learning_rate": 1.9525664539577877e-05, "loss": 0.5112, "step": 7261 }, { "epoch": 0.19939593629873695, "grad_norm": 0.4017603397369385, "learning_rate": 1.952553309228752e-05, "loss": 0.6258, "step": 7262 }, { "epoch": 0.19942339373970347, "grad_norm": 0.37855392694473267, "learning_rate": 1.952540162722901e-05, "loss": 0.5565, "step": 7263 }, { "epoch": 0.19945085118066996, "grad_norm": 0.3728278875350952, "learning_rate": 1.9525270144402582e-05, "loss": 0.5611, "step": 7264 }, { "epoch": 0.19947830862163646, "grad_norm": 0.42211565375328064, "learning_rate": 1.9525138643808487e-05, "loss": 0.5032, "step": 7265 }, { "epoch": 0.19950576606260295, "grad_norm": 0.3804474174976349, "learning_rate": 1.9525007125446968e-05, "loss": 0.5952, "step": 7266 }, { "epoch": 0.19953322350356947, "grad_norm": 0.36546412110328674, "learning_rate": 1.952487558931827e-05, "loss": 0.6175, "step": 7267 }, { "epoch": 0.19956068094453597, "grad_norm": 0.35153576731681824, "learning_rate": 1.9524744035422637e-05, "loss": 0.5121, "step": 7268 }, { "epoch": 0.19958813838550246, "grad_norm": 0.385099321603775, "learning_rate": 1.9524612463760322e-05, "loss": 0.5623, "step": 7269 }, { "epoch": 0.19961559582646898, "grad_norm": 0.3399818539619446, "learning_rate": 1.952448087433156e-05, "loss": 0.5185, "step": 7270 }, { "epoch": 0.19964305326743548, "grad_norm": 0.4528478682041168, "learning_rate": 1.9524349267136603e-05, "loss": 0.5886, "step": 7271 }, { "epoch": 0.19967051070840197, "grad_norm": 0.36903467774391174, "learning_rate": 1.9524217642175696e-05, "loss": 0.6044, "step": 7272 }, { "epoch": 0.19969796814936847, "grad_norm": 0.3784032166004181, "learning_rate": 1.9524085999449083e-05, "loss": 0.6529, "step": 7273 }, { "epoch": 0.199725425590335, "grad_norm": 0.35832545161247253, "learning_rate": 1.9523954338957007e-05, "loss": 0.5627, "step": 7274 }, { "epoch": 0.19975288303130148, "grad_norm": 0.4117189049720764, "learning_rate": 1.952382266069972e-05, "loss": 0.5711, "step": 7275 }, { "epoch": 0.19978034047226798, "grad_norm": 0.4110107719898224, "learning_rate": 1.9523690964677462e-05, "loss": 0.5575, "step": 7276 }, { "epoch": 0.1998077979132345, "grad_norm": 0.3601077198982239, "learning_rate": 1.952355925089048e-05, "loss": 0.5116, "step": 7277 }, { "epoch": 0.199835255354201, "grad_norm": 0.36038461327552795, "learning_rate": 1.952342751933902e-05, "loss": 0.5262, "step": 7278 }, { "epoch": 0.19986271279516749, "grad_norm": 0.4073856472969055, "learning_rate": 1.9523295770023334e-05, "loss": 0.6271, "step": 7279 }, { "epoch": 0.19989017023613398, "grad_norm": 0.3714544177055359, "learning_rate": 1.9523164002943654e-05, "loss": 0.5467, "step": 7280 }, { "epoch": 0.1999176276771005, "grad_norm": 0.35000598430633545, "learning_rate": 1.952303221810024e-05, "loss": 0.5675, "step": 7281 }, { "epoch": 0.199945085118067, "grad_norm": 1.7407808303833008, "learning_rate": 1.952290041549333e-05, "loss": 0.5475, "step": 7282 }, { "epoch": 0.1999725425590335, "grad_norm": 0.3054966330528259, "learning_rate": 1.9522768595123168e-05, "loss": 0.4296, "step": 7283 }, { "epoch": 0.2, "grad_norm": 0.33471494913101196, "learning_rate": 1.9522636756990008e-05, "loss": 0.4848, "step": 7284 }, { "epoch": 0.2000274574409665, "grad_norm": 0.37647995352745056, "learning_rate": 1.952250490109409e-05, "loss": 0.5895, "step": 7285 }, { "epoch": 0.200054914881933, "grad_norm": 0.34489938616752625, "learning_rate": 1.9522373027435655e-05, "loss": 0.5708, "step": 7286 }, { "epoch": 0.2000823723228995, "grad_norm": 0.45866742730140686, "learning_rate": 1.9522241136014965e-05, "loss": 0.482, "step": 7287 }, { "epoch": 0.20010982976386602, "grad_norm": 0.3989732563495636, "learning_rate": 1.9522109226832247e-05, "loss": 0.5117, "step": 7288 }, { "epoch": 0.2001372872048325, "grad_norm": 0.32786762714385986, "learning_rate": 1.9521977299887764e-05, "loss": 0.5022, "step": 7289 }, { "epoch": 0.200164744645799, "grad_norm": 0.3497236371040344, "learning_rate": 1.9521845355181748e-05, "loss": 0.5104, "step": 7290 }, { "epoch": 0.20019220208676552, "grad_norm": 0.4128434360027313, "learning_rate": 1.9521713392714458e-05, "loss": 0.4664, "step": 7291 }, { "epoch": 0.20021965952773202, "grad_norm": 0.332742840051651, "learning_rate": 1.9521581412486127e-05, "loss": 0.5013, "step": 7292 }, { "epoch": 0.2002471169686985, "grad_norm": 0.4773743152618408, "learning_rate": 1.9521449414497013e-05, "loss": 0.549, "step": 7293 }, { "epoch": 0.200274574409665, "grad_norm": 0.4046773612499237, "learning_rate": 1.9521317398747352e-05, "loss": 0.6262, "step": 7294 }, { "epoch": 0.20030203185063153, "grad_norm": 0.3849119544029236, "learning_rate": 1.9521185365237396e-05, "loss": 0.5346, "step": 7295 }, { "epoch": 0.20032948929159802, "grad_norm": 0.35975560545921326, "learning_rate": 1.9521053313967392e-05, "loss": 0.5089, "step": 7296 }, { "epoch": 0.20035694673256452, "grad_norm": 0.36791449785232544, "learning_rate": 1.9520921244937583e-05, "loss": 0.5369, "step": 7297 }, { "epoch": 0.20038440417353104, "grad_norm": 0.38655492663383484, "learning_rate": 1.952078915814822e-05, "loss": 0.6763, "step": 7298 }, { "epoch": 0.20041186161449753, "grad_norm": 0.3122752010822296, "learning_rate": 1.9520657053599547e-05, "loss": 0.4254, "step": 7299 }, { "epoch": 0.20043931905546403, "grad_norm": 0.37392082810401917, "learning_rate": 1.952052493129181e-05, "loss": 0.5373, "step": 7300 }, { "epoch": 0.20046677649643052, "grad_norm": 0.35789406299591064, "learning_rate": 1.9520392791225255e-05, "loss": 0.6196, "step": 7301 }, { "epoch": 0.20049423393739704, "grad_norm": 0.5537394881248474, "learning_rate": 1.9520260633400126e-05, "loss": 0.5176, "step": 7302 }, { "epoch": 0.20052169137836354, "grad_norm": 0.3785388469696045, "learning_rate": 1.9520128457816673e-05, "loss": 0.507, "step": 7303 }, { "epoch": 0.20054914881933003, "grad_norm": 0.3671973645687103, "learning_rate": 1.9519996264475143e-05, "loss": 0.5078, "step": 7304 }, { "epoch": 0.20057660626029655, "grad_norm": 0.38311681151390076, "learning_rate": 1.9519864053375782e-05, "loss": 0.5446, "step": 7305 }, { "epoch": 0.20060406370126305, "grad_norm": 0.38404810428619385, "learning_rate": 1.9519731824518836e-05, "loss": 0.4427, "step": 7306 }, { "epoch": 0.20063152114222954, "grad_norm": 0.4808088541030884, "learning_rate": 1.951959957790455e-05, "loss": 0.5144, "step": 7307 }, { "epoch": 0.20065897858319603, "grad_norm": 0.47841259837150574, "learning_rate": 1.9519467313533177e-05, "loss": 0.5221, "step": 7308 }, { "epoch": 0.20068643602416256, "grad_norm": 0.3672653138637543, "learning_rate": 1.9519335031404953e-05, "loss": 0.4775, "step": 7309 }, { "epoch": 0.20071389346512905, "grad_norm": 0.42646127939224243, "learning_rate": 1.9519202731520133e-05, "loss": 0.6006, "step": 7310 }, { "epoch": 0.20074135090609554, "grad_norm": 0.35583093762397766, "learning_rate": 1.9519070413878965e-05, "loss": 0.4687, "step": 7311 }, { "epoch": 0.20076880834706207, "grad_norm": 0.5379402041435242, "learning_rate": 1.951893807848169e-05, "loss": 0.6122, "step": 7312 }, { "epoch": 0.20079626578802856, "grad_norm": 0.4611837863922119, "learning_rate": 1.9518805725328557e-05, "loss": 0.5346, "step": 7313 }, { "epoch": 0.20082372322899505, "grad_norm": 0.3678635358810425, "learning_rate": 1.9518673354419815e-05, "loss": 0.4801, "step": 7314 }, { "epoch": 0.20085118066996155, "grad_norm": 0.3484295904636383, "learning_rate": 1.9518540965755707e-05, "loss": 0.5877, "step": 7315 }, { "epoch": 0.20087863811092807, "grad_norm": 0.34005439281463623, "learning_rate": 1.9518408559336483e-05, "loss": 0.5507, "step": 7316 }, { "epoch": 0.20090609555189456, "grad_norm": 0.35982435941696167, "learning_rate": 1.951827613516239e-05, "loss": 0.5501, "step": 7317 }, { "epoch": 0.20093355299286106, "grad_norm": 0.3501453995704651, "learning_rate": 1.9518143693233675e-05, "loss": 0.5305, "step": 7318 }, { "epoch": 0.20096101043382758, "grad_norm": 0.3547631800174713, "learning_rate": 1.9518011233550584e-05, "loss": 0.5318, "step": 7319 }, { "epoch": 0.20098846787479407, "grad_norm": 0.41490888595581055, "learning_rate": 1.951787875611336e-05, "loss": 0.5947, "step": 7320 }, { "epoch": 0.20101592531576057, "grad_norm": 0.3756621479988098, "learning_rate": 1.951774626092226e-05, "loss": 0.5938, "step": 7321 }, { "epoch": 0.20104338275672706, "grad_norm": 0.38661646842956543, "learning_rate": 1.9517613747977523e-05, "loss": 0.5915, "step": 7322 }, { "epoch": 0.20107084019769358, "grad_norm": 0.3998299837112427, "learning_rate": 1.9517481217279396e-05, "loss": 0.4543, "step": 7323 }, { "epoch": 0.20109829763866008, "grad_norm": 0.32125037908554077, "learning_rate": 1.9517348668828133e-05, "loss": 0.5313, "step": 7324 }, { "epoch": 0.20112575507962657, "grad_norm": 0.3847087025642395, "learning_rate": 1.9517216102623978e-05, "loss": 0.5969, "step": 7325 }, { "epoch": 0.2011532125205931, "grad_norm": 0.3381783366203308, "learning_rate": 1.951708351866717e-05, "loss": 0.5447, "step": 7326 }, { "epoch": 0.2011806699615596, "grad_norm": 0.37442320585250854, "learning_rate": 1.9516950916957973e-05, "loss": 0.5833, "step": 7327 }, { "epoch": 0.20120812740252608, "grad_norm": 0.43819111585617065, "learning_rate": 1.951681829749662e-05, "loss": 0.5307, "step": 7328 }, { "epoch": 0.20123558484349258, "grad_norm": 0.36493250727653503, "learning_rate": 1.9516685660283366e-05, "loss": 0.5221, "step": 7329 }, { "epoch": 0.2012630422844591, "grad_norm": 0.4357490539550781, "learning_rate": 1.9516553005318452e-05, "loss": 0.5254, "step": 7330 }, { "epoch": 0.2012904997254256, "grad_norm": 0.383034884929657, "learning_rate": 1.951642033260213e-05, "loss": 0.546, "step": 7331 }, { "epoch": 0.2013179571663921, "grad_norm": 0.3640297055244446, "learning_rate": 1.951628764213465e-05, "loss": 0.5413, "step": 7332 }, { "epoch": 0.20134541460735858, "grad_norm": 0.34441515803337097, "learning_rate": 1.9516154933916253e-05, "loss": 0.5343, "step": 7333 }, { "epoch": 0.2013728720483251, "grad_norm": 0.3524656593799591, "learning_rate": 1.9516022207947193e-05, "loss": 0.5556, "step": 7334 }, { "epoch": 0.2014003294892916, "grad_norm": 0.36219149827957153, "learning_rate": 1.9515889464227717e-05, "loss": 0.555, "step": 7335 }, { "epoch": 0.2014277869302581, "grad_norm": 0.34537792205810547, "learning_rate": 1.9515756702758063e-05, "loss": 0.4698, "step": 7336 }, { "epoch": 0.2014552443712246, "grad_norm": 0.4025348126888275, "learning_rate": 1.9515623923538487e-05, "loss": 0.5352, "step": 7337 }, { "epoch": 0.2014827018121911, "grad_norm": 0.3460020422935486, "learning_rate": 1.9515491126569238e-05, "loss": 0.5559, "step": 7338 }, { "epoch": 0.2015101592531576, "grad_norm": 0.3857753574848175, "learning_rate": 1.951535831185056e-05, "loss": 0.4493, "step": 7339 }, { "epoch": 0.2015376166941241, "grad_norm": 0.39023688435554504, "learning_rate": 1.9515225479382702e-05, "loss": 0.5817, "step": 7340 }, { "epoch": 0.20156507413509062, "grad_norm": 0.36615413427352905, "learning_rate": 1.951509262916591e-05, "loss": 0.5104, "step": 7341 }, { "epoch": 0.2015925315760571, "grad_norm": 0.3541872203350067, "learning_rate": 1.9514959761200435e-05, "loss": 0.5908, "step": 7342 }, { "epoch": 0.2016199890170236, "grad_norm": 0.3829677104949951, "learning_rate": 1.9514826875486525e-05, "loss": 0.568, "step": 7343 }, { "epoch": 0.20164744645799013, "grad_norm": 0.4017241597175598, "learning_rate": 1.9514693972024424e-05, "loss": 0.5495, "step": 7344 }, { "epoch": 0.20167490389895662, "grad_norm": 0.36972206830978394, "learning_rate": 1.9514561050814382e-05, "loss": 0.5559, "step": 7345 }, { "epoch": 0.20170236133992311, "grad_norm": 0.34825921058654785, "learning_rate": 1.9514428111856648e-05, "loss": 0.4808, "step": 7346 }, { "epoch": 0.2017298187808896, "grad_norm": 0.32623592019081116, "learning_rate": 1.951429515515147e-05, "loss": 0.5539, "step": 7347 }, { "epoch": 0.20175727622185613, "grad_norm": 0.3953423500061035, "learning_rate": 1.951416218069909e-05, "loss": 0.4878, "step": 7348 }, { "epoch": 0.20178473366282262, "grad_norm": 0.37235966324806213, "learning_rate": 1.9514029188499765e-05, "loss": 0.5559, "step": 7349 }, { "epoch": 0.20181219110378912, "grad_norm": 0.3385111093521118, "learning_rate": 1.9513896178553734e-05, "loss": 0.4926, "step": 7350 }, { "epoch": 0.20183964854475564, "grad_norm": 0.354172945022583, "learning_rate": 1.9513763150861255e-05, "loss": 0.4642, "step": 7351 }, { "epoch": 0.20186710598572213, "grad_norm": 0.34014031291007996, "learning_rate": 1.9513630105422572e-05, "loss": 0.5624, "step": 7352 }, { "epoch": 0.20189456342668863, "grad_norm": 0.3470444679260254, "learning_rate": 1.951349704223793e-05, "loss": 0.4593, "step": 7353 }, { "epoch": 0.20192202086765512, "grad_norm": 0.31937283277511597, "learning_rate": 1.9513363961307582e-05, "loss": 0.5544, "step": 7354 }, { "epoch": 0.20194947830862164, "grad_norm": 0.41197383403778076, "learning_rate": 1.951323086263177e-05, "loss": 0.5179, "step": 7355 }, { "epoch": 0.20197693574958814, "grad_norm": 0.48682957887649536, "learning_rate": 1.951309774621075e-05, "loss": 0.6141, "step": 7356 }, { "epoch": 0.20200439319055463, "grad_norm": 0.32521429657936096, "learning_rate": 1.9512964612044763e-05, "loss": 0.5302, "step": 7357 }, { "epoch": 0.20203185063152115, "grad_norm": 0.37742552161216736, "learning_rate": 1.9512831460134066e-05, "loss": 0.6019, "step": 7358 }, { "epoch": 0.20205930807248765, "grad_norm": 0.3743424713611603, "learning_rate": 1.95126982904789e-05, "loss": 0.551, "step": 7359 }, { "epoch": 0.20208676551345414, "grad_norm": 0.3655437231063843, "learning_rate": 1.9512565103079513e-05, "loss": 0.4784, "step": 7360 }, { "epoch": 0.20211422295442064, "grad_norm": 0.35205432772636414, "learning_rate": 1.9512431897936156e-05, "loss": 0.4429, "step": 7361 }, { "epoch": 0.20214168039538716, "grad_norm": 0.36893174052238464, "learning_rate": 1.951229867504908e-05, "loss": 0.4682, "step": 7362 }, { "epoch": 0.20216913783635365, "grad_norm": 0.36047354340553284, "learning_rate": 1.951216543441853e-05, "loss": 0.5339, "step": 7363 }, { "epoch": 0.20219659527732015, "grad_norm": 0.3535478711128235, "learning_rate": 1.9512032176044756e-05, "loss": 0.4651, "step": 7364 }, { "epoch": 0.20222405271828667, "grad_norm": 0.3515976667404175, "learning_rate": 1.9511898899928007e-05, "loss": 0.5185, "step": 7365 }, { "epoch": 0.20225151015925316, "grad_norm": 0.3961832821369171, "learning_rate": 1.951176560606853e-05, "loss": 0.5227, "step": 7366 }, { "epoch": 0.20227896760021966, "grad_norm": 0.33784645795822144, "learning_rate": 1.951163229446657e-05, "loss": 0.5414, "step": 7367 }, { "epoch": 0.20230642504118615, "grad_norm": 0.3836570978164673, "learning_rate": 1.9511498965122383e-05, "loss": 0.6672, "step": 7368 }, { "epoch": 0.20233388248215267, "grad_norm": 0.3401603400707245, "learning_rate": 1.9511365618036216e-05, "loss": 0.4424, "step": 7369 }, { "epoch": 0.20236133992311917, "grad_norm": 0.4185061454772949, "learning_rate": 1.9511232253208317e-05, "loss": 0.5626, "step": 7370 }, { "epoch": 0.20238879736408566, "grad_norm": 0.38302484154701233, "learning_rate": 1.9511098870638934e-05, "loss": 0.6222, "step": 7371 }, { "epoch": 0.20241625480505218, "grad_norm": 0.3807424008846283, "learning_rate": 1.9510965470328316e-05, "loss": 0.5937, "step": 7372 }, { "epoch": 0.20244371224601868, "grad_norm": 0.36280813813209534, "learning_rate": 1.9510832052276712e-05, "loss": 0.5736, "step": 7373 }, { "epoch": 0.20247116968698517, "grad_norm": 0.30268439650535583, "learning_rate": 1.9510698616484366e-05, "loss": 0.4508, "step": 7374 }, { "epoch": 0.20249862712795166, "grad_norm": 0.3752821981906891, "learning_rate": 1.9510565162951538e-05, "loss": 0.523, "step": 7375 }, { "epoch": 0.20252608456891819, "grad_norm": 0.4249717891216278, "learning_rate": 1.951043169167847e-05, "loss": 0.4806, "step": 7376 }, { "epoch": 0.20255354200988468, "grad_norm": 0.39721015095710754, "learning_rate": 1.951029820266541e-05, "loss": 0.5368, "step": 7377 }, { "epoch": 0.20258099945085117, "grad_norm": 0.38381874561309814, "learning_rate": 1.9510164695912603e-05, "loss": 0.5875, "step": 7378 }, { "epoch": 0.2026084568918177, "grad_norm": 0.3454338610172272, "learning_rate": 1.9510031171420308e-05, "loss": 0.5221, "step": 7379 }, { "epoch": 0.2026359143327842, "grad_norm": 0.3470473885536194, "learning_rate": 1.950989762918877e-05, "loss": 0.4671, "step": 7380 }, { "epoch": 0.20266337177375068, "grad_norm": 0.3788517117500305, "learning_rate": 1.950976406921824e-05, "loss": 0.5021, "step": 7381 }, { "epoch": 0.20269082921471718, "grad_norm": 0.3841521441936493, "learning_rate": 1.9509630491508963e-05, "loss": 0.562, "step": 7382 }, { "epoch": 0.2027182866556837, "grad_norm": 0.3664693236351013, "learning_rate": 1.9509496896061192e-05, "loss": 0.4937, "step": 7383 }, { "epoch": 0.2027457440966502, "grad_norm": 0.3343413174152374, "learning_rate": 1.950936328287517e-05, "loss": 0.5455, "step": 7384 }, { "epoch": 0.2027732015376167, "grad_norm": 0.4551730751991272, "learning_rate": 1.950922965195115e-05, "loss": 0.5455, "step": 7385 }, { "epoch": 0.2028006589785832, "grad_norm": 0.4085313081741333, "learning_rate": 1.9509096003289386e-05, "loss": 0.555, "step": 7386 }, { "epoch": 0.2028281164195497, "grad_norm": 0.36422106623649597, "learning_rate": 1.950896233689012e-05, "loss": 0.5937, "step": 7387 }, { "epoch": 0.2028555738605162, "grad_norm": 0.3654335141181946, "learning_rate": 1.9508828652753607e-05, "loss": 0.5371, "step": 7388 }, { "epoch": 0.2028830313014827, "grad_norm": 0.34930387139320374, "learning_rate": 1.9508694950880093e-05, "loss": 0.4635, "step": 7389 }, { "epoch": 0.2029104887424492, "grad_norm": 0.346162885427475, "learning_rate": 1.9508561231269825e-05, "loss": 0.5455, "step": 7390 }, { "epoch": 0.2029379461834157, "grad_norm": 0.38914257287979126, "learning_rate": 1.950842749392306e-05, "loss": 0.5214, "step": 7391 }, { "epoch": 0.2029654036243822, "grad_norm": 0.40267452597618103, "learning_rate": 1.9508293738840038e-05, "loss": 0.5633, "step": 7392 }, { "epoch": 0.20299286106534872, "grad_norm": 0.33594340085983276, "learning_rate": 1.9508159966021017e-05, "loss": 0.4994, "step": 7393 }, { "epoch": 0.20302031850631522, "grad_norm": 0.3360845446586609, "learning_rate": 1.950802617546624e-05, "loss": 0.4815, "step": 7394 }, { "epoch": 0.2030477759472817, "grad_norm": 0.36471429467201233, "learning_rate": 1.9507892367175963e-05, "loss": 0.5172, "step": 7395 }, { "epoch": 0.2030752333882482, "grad_norm": 0.40203022956848145, "learning_rate": 1.9507758541150432e-05, "loss": 0.6034, "step": 7396 }, { "epoch": 0.20310269082921473, "grad_norm": 0.37335720658302307, "learning_rate": 1.9507624697389895e-05, "loss": 0.5717, "step": 7397 }, { "epoch": 0.20313014827018122, "grad_norm": 0.942424476146698, "learning_rate": 1.9507490835894605e-05, "loss": 0.5968, "step": 7398 }, { "epoch": 0.20315760571114772, "grad_norm": 0.3348281979560852, "learning_rate": 1.950735695666481e-05, "loss": 0.6025, "step": 7399 }, { "epoch": 0.2031850631521142, "grad_norm": 0.3216911852359772, "learning_rate": 1.950722305970076e-05, "loss": 0.4775, "step": 7400 }, { "epoch": 0.20321252059308073, "grad_norm": 0.3893691599369049, "learning_rate": 1.95070891450027e-05, "loss": 0.5993, "step": 7401 }, { "epoch": 0.20323997803404723, "grad_norm": 0.3540497124195099, "learning_rate": 1.9506955212570892e-05, "loss": 0.6153, "step": 7402 }, { "epoch": 0.20326743547501372, "grad_norm": 0.4128074645996094, "learning_rate": 1.9506821262405574e-05, "loss": 0.556, "step": 7403 }, { "epoch": 0.20329489291598024, "grad_norm": 0.3790435492992401, "learning_rate": 1.9506687294506998e-05, "loss": 0.5678, "step": 7404 }, { "epoch": 0.20332235035694673, "grad_norm": 4.062503814697266, "learning_rate": 1.950655330887542e-05, "loss": 0.6926, "step": 7405 }, { "epoch": 0.20334980779791323, "grad_norm": 0.40454739332199097, "learning_rate": 1.9506419305511085e-05, "loss": 0.5258, "step": 7406 }, { "epoch": 0.20337726523887972, "grad_norm": 0.3350772559642792, "learning_rate": 1.9506285284414245e-05, "loss": 0.5694, "step": 7407 }, { "epoch": 0.20340472267984624, "grad_norm": 0.3232493996620178, "learning_rate": 1.9506151245585147e-05, "loss": 0.394, "step": 7408 }, { "epoch": 0.20343218012081274, "grad_norm": 0.3331725001335144, "learning_rate": 1.950601718902404e-05, "loss": 0.4905, "step": 7409 }, { "epoch": 0.20345963756177923, "grad_norm": 0.36789557337760925, "learning_rate": 1.9505883114731184e-05, "loss": 0.49, "step": 7410 }, { "epoch": 0.20348709500274575, "grad_norm": 0.35910680890083313, "learning_rate": 1.9505749022706818e-05, "loss": 0.4764, "step": 7411 }, { "epoch": 0.20351455244371225, "grad_norm": 0.3915785849094391, "learning_rate": 1.9505614912951193e-05, "loss": 0.5085, "step": 7412 }, { "epoch": 0.20354200988467874, "grad_norm": 0.37323465943336487, "learning_rate": 1.950548078546457e-05, "loss": 0.6104, "step": 7413 }, { "epoch": 0.20356946732564524, "grad_norm": 0.3650323152542114, "learning_rate": 1.9505346640247187e-05, "loss": 0.4727, "step": 7414 }, { "epoch": 0.20359692476661176, "grad_norm": 0.4937528073787689, "learning_rate": 1.95052124772993e-05, "loss": 0.4801, "step": 7415 }, { "epoch": 0.20362438220757825, "grad_norm": 0.35039907693862915, "learning_rate": 1.9505078296621157e-05, "loss": 0.5747, "step": 7416 }, { "epoch": 0.20365183964854475, "grad_norm": 0.34304288029670715, "learning_rate": 1.9504944098213007e-05, "loss": 0.5953, "step": 7417 }, { "epoch": 0.20367929708951127, "grad_norm": 0.3442210853099823, "learning_rate": 1.9504809882075105e-05, "loss": 0.5233, "step": 7418 }, { "epoch": 0.20370675453047776, "grad_norm": 0.35135722160339355, "learning_rate": 1.95046756482077e-05, "loss": 0.4981, "step": 7419 }, { "epoch": 0.20373421197144426, "grad_norm": 0.3934701085090637, "learning_rate": 1.950454139661104e-05, "loss": 0.5674, "step": 7420 }, { "epoch": 0.20376166941241075, "grad_norm": 0.3908854126930237, "learning_rate": 1.9504407127285377e-05, "loss": 0.5464, "step": 7421 }, { "epoch": 0.20378912685337727, "grad_norm": 0.3426850736141205, "learning_rate": 1.9504272840230963e-05, "loss": 0.484, "step": 7422 }, { "epoch": 0.20381658429434377, "grad_norm": 0.4569788873195648, "learning_rate": 1.9504138535448045e-05, "loss": 0.5434, "step": 7423 }, { "epoch": 0.20384404173531026, "grad_norm": 0.36274266242980957, "learning_rate": 1.9504004212936872e-05, "loss": 0.5862, "step": 7424 }, { "epoch": 0.20387149917627678, "grad_norm": 0.3782878518104553, "learning_rate": 1.9503869872697703e-05, "loss": 0.5124, "step": 7425 }, { "epoch": 0.20389895661724328, "grad_norm": 0.36932921409606934, "learning_rate": 1.9503735514730785e-05, "loss": 0.5111, "step": 7426 }, { "epoch": 0.20392641405820977, "grad_norm": 0.3320193290710449, "learning_rate": 1.9503601139036362e-05, "loss": 0.609, "step": 7427 }, { "epoch": 0.20395387149917626, "grad_norm": 0.47825369238853455, "learning_rate": 1.950346674561469e-05, "loss": 0.5782, "step": 7428 }, { "epoch": 0.2039813289401428, "grad_norm": 0.3573855459690094, "learning_rate": 1.9503332334466024e-05, "loss": 0.5557, "step": 7429 }, { "epoch": 0.20400878638110928, "grad_norm": 0.3699520528316498, "learning_rate": 1.9503197905590607e-05, "loss": 0.5409, "step": 7430 }, { "epoch": 0.20403624382207577, "grad_norm": 0.3708404004573822, "learning_rate": 1.950306345898869e-05, "loss": 0.4878, "step": 7431 }, { "epoch": 0.2040637012630423, "grad_norm": 0.37477177381515503, "learning_rate": 1.950292899466053e-05, "loss": 0.6121, "step": 7432 }, { "epoch": 0.2040911587040088, "grad_norm": 0.3681904375553131, "learning_rate": 1.9502794512606375e-05, "loss": 0.4563, "step": 7433 }, { "epoch": 0.20411861614497528, "grad_norm": 0.38901153206825256, "learning_rate": 1.9502660012826474e-05, "loss": 0.5073, "step": 7434 }, { "epoch": 0.20414607358594178, "grad_norm": 0.4143115282058716, "learning_rate": 1.950252549532108e-05, "loss": 0.5616, "step": 7435 }, { "epoch": 0.2041735310269083, "grad_norm": 0.34499984979629517, "learning_rate": 1.950239096009044e-05, "loss": 0.5479, "step": 7436 }, { "epoch": 0.2042009884678748, "grad_norm": 0.45897531509399414, "learning_rate": 1.9502256407134813e-05, "loss": 0.526, "step": 7437 }, { "epoch": 0.2042284459088413, "grad_norm": 0.3583773672580719, "learning_rate": 1.950212183645444e-05, "loss": 0.5565, "step": 7438 }, { "epoch": 0.2042559033498078, "grad_norm": 0.42982640862464905, "learning_rate": 1.950198724804958e-05, "loss": 0.5528, "step": 7439 }, { "epoch": 0.2042833607907743, "grad_norm": 0.37345951795578003, "learning_rate": 1.9501852641920483e-05, "loss": 0.5203, "step": 7440 }, { "epoch": 0.2043108182317408, "grad_norm": 0.32849639654159546, "learning_rate": 1.9501718018067395e-05, "loss": 0.5254, "step": 7441 }, { "epoch": 0.2043382756727073, "grad_norm": 0.3645012378692627, "learning_rate": 1.950158337649057e-05, "loss": 0.5449, "step": 7442 }, { "epoch": 0.20436573311367381, "grad_norm": 0.37232595682144165, "learning_rate": 1.9501448717190258e-05, "loss": 0.5909, "step": 7443 }, { "epoch": 0.2043931905546403, "grad_norm": 0.32305845618247986, "learning_rate": 1.9501314040166716e-05, "loss": 0.5585, "step": 7444 }, { "epoch": 0.2044206479956068, "grad_norm": 0.9522542953491211, "learning_rate": 1.950117934542019e-05, "loss": 0.4839, "step": 7445 }, { "epoch": 0.20444810543657332, "grad_norm": 0.3621273934841156, "learning_rate": 1.9501044632950932e-05, "loss": 0.5928, "step": 7446 }, { "epoch": 0.20447556287753982, "grad_norm": 0.3878495991230011, "learning_rate": 1.950090990275919e-05, "loss": 0.5525, "step": 7447 }, { "epoch": 0.2045030203185063, "grad_norm": 0.36488908529281616, "learning_rate": 1.9500775154845222e-05, "loss": 0.6066, "step": 7448 }, { "epoch": 0.2045304777594728, "grad_norm": 0.345989465713501, "learning_rate": 1.9500640389209275e-05, "loss": 0.5049, "step": 7449 }, { "epoch": 0.20455793520043933, "grad_norm": 0.34257805347442627, "learning_rate": 1.9500505605851602e-05, "loss": 0.5215, "step": 7450 }, { "epoch": 0.20458539264140582, "grad_norm": 0.34737899899482727, "learning_rate": 1.9500370804772456e-05, "loss": 0.5714, "step": 7451 }, { "epoch": 0.20461285008237232, "grad_norm": 0.34886297583580017, "learning_rate": 1.9500235985972083e-05, "loss": 0.5165, "step": 7452 }, { "epoch": 0.20464030752333884, "grad_norm": 0.37414979934692383, "learning_rate": 1.950010114945074e-05, "loss": 0.519, "step": 7453 }, { "epoch": 0.20466776496430533, "grad_norm": 0.36315059661865234, "learning_rate": 1.949996629520867e-05, "loss": 0.6013, "step": 7454 }, { "epoch": 0.20469522240527183, "grad_norm": 0.5032866597175598, "learning_rate": 1.949983142324614e-05, "loss": 0.5549, "step": 7455 }, { "epoch": 0.20472267984623832, "grad_norm": 0.3336421251296997, "learning_rate": 1.9499696533563385e-05, "loss": 0.4733, "step": 7456 }, { "epoch": 0.20475013728720484, "grad_norm": 0.34455493092536926, "learning_rate": 1.949956162616067e-05, "loss": 0.5516, "step": 7457 }, { "epoch": 0.20477759472817134, "grad_norm": 0.37362754344940186, "learning_rate": 1.9499426701038236e-05, "loss": 0.5631, "step": 7458 }, { "epoch": 0.20480505216913783, "grad_norm": 0.34454748034477234, "learning_rate": 1.9499291758196342e-05, "loss": 0.4897, "step": 7459 }, { "epoch": 0.20483250961010435, "grad_norm": 0.34226006269454956, "learning_rate": 1.9499156797635234e-05, "loss": 0.5312, "step": 7460 }, { "epoch": 0.20485996705107085, "grad_norm": 0.3489041030406952, "learning_rate": 1.9499021819355168e-05, "loss": 0.5001, "step": 7461 }, { "epoch": 0.20488742449203734, "grad_norm": 0.3512673079967499, "learning_rate": 1.9498886823356397e-05, "loss": 0.514, "step": 7462 }, { "epoch": 0.20491488193300383, "grad_norm": 0.4462568461894989, "learning_rate": 1.949875180963917e-05, "loss": 0.5459, "step": 7463 }, { "epoch": 0.20494233937397036, "grad_norm": 0.3507233262062073, "learning_rate": 1.9498616778203735e-05, "loss": 0.5351, "step": 7464 }, { "epoch": 0.20496979681493685, "grad_norm": 0.36092373728752136, "learning_rate": 1.9498481729050353e-05, "loss": 0.5782, "step": 7465 }, { "epoch": 0.20499725425590334, "grad_norm": 0.3713226914405823, "learning_rate": 1.9498346662179267e-05, "loss": 0.5547, "step": 7466 }, { "epoch": 0.20502471169686984, "grad_norm": 0.3731040060520172, "learning_rate": 1.949821157759074e-05, "loss": 0.604, "step": 7467 }, { "epoch": 0.20505216913783636, "grad_norm": 0.36185961961746216, "learning_rate": 1.949807647528501e-05, "loss": 0.5346, "step": 7468 }, { "epoch": 0.20507962657880285, "grad_norm": 0.45466071367263794, "learning_rate": 1.949794135526234e-05, "loss": 0.5899, "step": 7469 }, { "epoch": 0.20510708401976935, "grad_norm": 0.3466683626174927, "learning_rate": 1.9497806217522975e-05, "loss": 0.5138, "step": 7470 }, { "epoch": 0.20513454146073587, "grad_norm": 0.3795687258243561, "learning_rate": 1.9497671062067168e-05, "loss": 0.4833, "step": 7471 }, { "epoch": 0.20516199890170236, "grad_norm": 0.3838491141796112, "learning_rate": 1.949753588889518e-05, "loss": 0.5565, "step": 7472 }, { "epoch": 0.20518945634266886, "grad_norm": 0.35425469279289246, "learning_rate": 1.9497400698007252e-05, "loss": 0.5556, "step": 7473 }, { "epoch": 0.20521691378363535, "grad_norm": 0.3542131185531616, "learning_rate": 1.949726548940364e-05, "loss": 0.5686, "step": 7474 }, { "epoch": 0.20524437122460187, "grad_norm": 0.37023600935935974, "learning_rate": 1.9497130263084597e-05, "loss": 0.5366, "step": 7475 }, { "epoch": 0.20527182866556837, "grad_norm": 0.40987759828567505, "learning_rate": 1.9496995019050377e-05, "loss": 0.5729, "step": 7476 }, { "epoch": 0.20529928610653486, "grad_norm": 0.35138148069381714, "learning_rate": 1.949685975730123e-05, "loss": 0.5626, "step": 7477 }, { "epoch": 0.20532674354750138, "grad_norm": 0.32947590947151184, "learning_rate": 1.9496724477837406e-05, "loss": 0.4729, "step": 7478 }, { "epoch": 0.20535420098846788, "grad_norm": 0.39107081294059753, "learning_rate": 1.949658918065916e-05, "loss": 0.6181, "step": 7479 }, { "epoch": 0.20538165842943437, "grad_norm": 0.4130382239818573, "learning_rate": 1.9496453865766748e-05, "loss": 0.531, "step": 7480 }, { "epoch": 0.20540911587040087, "grad_norm": 0.36393144726753235, "learning_rate": 1.949631853316041e-05, "loss": 0.558, "step": 7481 }, { "epoch": 0.2054365733113674, "grad_norm": 0.3440810441970825, "learning_rate": 1.9496183182840415e-05, "loss": 0.443, "step": 7482 }, { "epoch": 0.20546403075233388, "grad_norm": 0.9952693581581116, "learning_rate": 1.9496047814807006e-05, "loss": 0.5398, "step": 7483 }, { "epoch": 0.20549148819330038, "grad_norm": 0.35108742117881775, "learning_rate": 1.9495912429060437e-05, "loss": 0.5284, "step": 7484 }, { "epoch": 0.2055189456342669, "grad_norm": 0.5456353425979614, "learning_rate": 1.9495777025600962e-05, "loss": 0.3654, "step": 7485 }, { "epoch": 0.2055464030752334, "grad_norm": 0.378020316362381, "learning_rate": 1.949564160442883e-05, "loss": 0.5853, "step": 7486 }, { "epoch": 0.20557386051619989, "grad_norm": 0.3525356948375702, "learning_rate": 1.9495506165544295e-05, "loss": 0.5693, "step": 7487 }, { "epoch": 0.20560131795716638, "grad_norm": 0.3635256290435791, "learning_rate": 1.949537070894761e-05, "loss": 0.6041, "step": 7488 }, { "epoch": 0.2056287753981329, "grad_norm": 0.3216346204280853, "learning_rate": 1.949523523463903e-05, "loss": 0.4887, "step": 7489 }, { "epoch": 0.2056562328390994, "grad_norm": 0.35648542642593384, "learning_rate": 1.9495099742618806e-05, "loss": 0.5051, "step": 7490 }, { "epoch": 0.2056836902800659, "grad_norm": 0.3705017864704132, "learning_rate": 1.9494964232887193e-05, "loss": 0.4549, "step": 7491 }, { "epoch": 0.2057111477210324, "grad_norm": 0.35166114568710327, "learning_rate": 1.9494828705444436e-05, "loss": 0.5827, "step": 7492 }, { "epoch": 0.2057386051619989, "grad_norm": 0.3724901080131531, "learning_rate": 1.9494693160290796e-05, "loss": 0.4704, "step": 7493 }, { "epoch": 0.2057660626029654, "grad_norm": 0.3477155268192291, "learning_rate": 1.949455759742652e-05, "loss": 0.5008, "step": 7494 }, { "epoch": 0.2057935200439319, "grad_norm": 0.3310941457748413, "learning_rate": 1.9494422016851867e-05, "loss": 0.4863, "step": 7495 }, { "epoch": 0.20582097748489842, "grad_norm": 0.364769846200943, "learning_rate": 1.9494286418567086e-05, "loss": 0.514, "step": 7496 }, { "epoch": 0.2058484349258649, "grad_norm": 0.36607009172439575, "learning_rate": 1.949415080257243e-05, "loss": 0.4901, "step": 7497 }, { "epoch": 0.2058758923668314, "grad_norm": 0.37459835410118103, "learning_rate": 1.9494015168868152e-05, "loss": 0.5698, "step": 7498 }, { "epoch": 0.20590334980779793, "grad_norm": 0.7836223244667053, "learning_rate": 1.9493879517454507e-05, "loss": 0.5407, "step": 7499 }, { "epoch": 0.20593080724876442, "grad_norm": 0.3678928315639496, "learning_rate": 1.9493743848331744e-05, "loss": 0.5654, "step": 7500 }, { "epoch": 0.2059582646897309, "grad_norm": 0.32342708110809326, "learning_rate": 1.949360816150012e-05, "loss": 0.4634, "step": 7501 }, { "epoch": 0.2059857221306974, "grad_norm": 0.3260608911514282, "learning_rate": 1.949347245695989e-05, "loss": 0.5718, "step": 7502 }, { "epoch": 0.20601317957166393, "grad_norm": 0.40840449929237366, "learning_rate": 1.94933367347113e-05, "loss": 0.5584, "step": 7503 }, { "epoch": 0.20604063701263042, "grad_norm": 0.3426133692264557, "learning_rate": 1.949320099475461e-05, "loss": 0.4608, "step": 7504 }, { "epoch": 0.20606809445359692, "grad_norm": 0.37744447588920593, "learning_rate": 1.9493065237090067e-05, "loss": 0.5405, "step": 7505 }, { "epoch": 0.20609555189456344, "grad_norm": 0.3186626732349396, "learning_rate": 1.9492929461717928e-05, "loss": 0.4646, "step": 7506 }, { "epoch": 0.20612300933552993, "grad_norm": 0.40855786204338074, "learning_rate": 1.949279366863845e-05, "loss": 0.5015, "step": 7507 }, { "epoch": 0.20615046677649643, "grad_norm": 0.39541903138160706, "learning_rate": 1.949265785785188e-05, "loss": 0.5277, "step": 7508 }, { "epoch": 0.20617792421746292, "grad_norm": 0.3273670971393585, "learning_rate": 1.949252202935847e-05, "loss": 0.5505, "step": 7509 }, { "epoch": 0.20620538165842944, "grad_norm": 0.39661481976509094, "learning_rate": 1.949238618315848e-05, "loss": 0.5713, "step": 7510 }, { "epoch": 0.20623283909939594, "grad_norm": 0.3211910128593445, "learning_rate": 1.949225031925216e-05, "loss": 0.4672, "step": 7511 }, { "epoch": 0.20626029654036243, "grad_norm": 0.3473476469516754, "learning_rate": 1.949211443763976e-05, "loss": 0.4468, "step": 7512 }, { "epoch": 0.20628775398132895, "grad_norm": 0.4172080457210541, "learning_rate": 1.949197853832154e-05, "loss": 0.5541, "step": 7513 }, { "epoch": 0.20631521142229545, "grad_norm": 0.35693198442459106, "learning_rate": 1.9491842621297752e-05, "loss": 0.5727, "step": 7514 }, { "epoch": 0.20634266886326194, "grad_norm": 0.3937622606754303, "learning_rate": 1.9491706686568645e-05, "loss": 0.5893, "step": 7515 }, { "epoch": 0.20637012630422844, "grad_norm": 0.4228106439113617, "learning_rate": 1.9491570734134476e-05, "loss": 0.5457, "step": 7516 }, { "epoch": 0.20639758374519496, "grad_norm": 0.3606700897216797, "learning_rate": 1.94914347639955e-05, "loss": 0.5494, "step": 7517 }, { "epoch": 0.20642504118616145, "grad_norm": 0.3906656503677368, "learning_rate": 1.949129877615197e-05, "loss": 0.5727, "step": 7518 }, { "epoch": 0.20645249862712794, "grad_norm": 0.3916078209877014, "learning_rate": 1.9491162770604134e-05, "loss": 0.5683, "step": 7519 }, { "epoch": 0.20647995606809447, "grad_norm": 0.3519538640975952, "learning_rate": 1.9491026747352255e-05, "loss": 0.5653, "step": 7520 }, { "epoch": 0.20650741350906096, "grad_norm": 0.3471977114677429, "learning_rate": 1.9490890706396577e-05, "loss": 0.504, "step": 7521 }, { "epoch": 0.20653487095002745, "grad_norm": 0.39811068773269653, "learning_rate": 1.949075464773736e-05, "loss": 0.537, "step": 7522 }, { "epoch": 0.20656232839099395, "grad_norm": 0.414059579372406, "learning_rate": 1.949061857137486e-05, "loss": 0.6011, "step": 7523 }, { "epoch": 0.20658978583196047, "grad_norm": 0.40160319209098816, "learning_rate": 1.9490482477309327e-05, "loss": 0.5724, "step": 7524 }, { "epoch": 0.20661724327292696, "grad_norm": 0.34640300273895264, "learning_rate": 1.9490346365541013e-05, "loss": 0.5071, "step": 7525 }, { "epoch": 0.20664470071389346, "grad_norm": 0.41131648421287537, "learning_rate": 1.9490210236070175e-05, "loss": 0.5574, "step": 7526 }, { "epoch": 0.20667215815485998, "grad_norm": 0.337809681892395, "learning_rate": 1.9490074088897064e-05, "loss": 0.4609, "step": 7527 }, { "epoch": 0.20669961559582647, "grad_norm": 0.3415927588939667, "learning_rate": 1.948993792402194e-05, "loss": 0.535, "step": 7528 }, { "epoch": 0.20672707303679297, "grad_norm": 0.5304617285728455, "learning_rate": 1.948980174144505e-05, "loss": 0.4696, "step": 7529 }, { "epoch": 0.20675453047775946, "grad_norm": 0.3284441828727722, "learning_rate": 1.9489665541166648e-05, "loss": 0.4358, "step": 7530 }, { "epoch": 0.20678198791872598, "grad_norm": 0.36018145084381104, "learning_rate": 1.9489529323186996e-05, "loss": 0.4855, "step": 7531 }, { "epoch": 0.20680944535969248, "grad_norm": 0.36659812927246094, "learning_rate": 1.948939308750634e-05, "loss": 0.4312, "step": 7532 }, { "epoch": 0.20683690280065897, "grad_norm": 0.3751612901687622, "learning_rate": 1.9489256834124937e-05, "loss": 0.5172, "step": 7533 }, { "epoch": 0.20686436024162547, "grad_norm": 0.40616172552108765, "learning_rate": 1.9489120563043045e-05, "loss": 0.545, "step": 7534 }, { "epoch": 0.206891817682592, "grad_norm": 0.37372636795043945, "learning_rate": 1.9488984274260912e-05, "loss": 0.5708, "step": 7535 }, { "epoch": 0.20691927512355848, "grad_norm": 0.45384055376052856, "learning_rate": 1.9488847967778796e-05, "loss": 0.5404, "step": 7536 }, { "epoch": 0.20694673256452498, "grad_norm": 0.385635644197464, "learning_rate": 1.948871164359695e-05, "loss": 0.4954, "step": 7537 }, { "epoch": 0.2069741900054915, "grad_norm": 0.37352365255355835, "learning_rate": 1.9488575301715626e-05, "loss": 0.6172, "step": 7538 }, { "epoch": 0.207001647446458, "grad_norm": 0.3299863338470459, "learning_rate": 1.9488438942135084e-05, "loss": 0.5538, "step": 7539 }, { "epoch": 0.2070291048874245, "grad_norm": 0.5008835196495056, "learning_rate": 1.9488302564855576e-05, "loss": 0.4997, "step": 7540 }, { "epoch": 0.20705656232839098, "grad_norm": 0.7937750816345215, "learning_rate": 1.948816616987735e-05, "loss": 0.5392, "step": 7541 }, { "epoch": 0.2070840197693575, "grad_norm": 0.5317788124084473, "learning_rate": 1.9488029757200668e-05, "loss": 0.4696, "step": 7542 }, { "epoch": 0.207111477210324, "grad_norm": 0.39890098571777344, "learning_rate": 1.9487893326825783e-05, "loss": 0.5104, "step": 7543 }, { "epoch": 0.2071389346512905, "grad_norm": 0.39517533779144287, "learning_rate": 1.9487756878752952e-05, "loss": 0.47, "step": 7544 }, { "epoch": 0.207166392092257, "grad_norm": 0.3346083462238312, "learning_rate": 1.948762041298242e-05, "loss": 0.5186, "step": 7545 }, { "epoch": 0.2071938495332235, "grad_norm": 0.37469974160194397, "learning_rate": 1.948748392951445e-05, "loss": 0.6002, "step": 7546 }, { "epoch": 0.20722130697419, "grad_norm": 0.5224581956863403, "learning_rate": 1.9487347428349295e-05, "loss": 0.4765, "step": 7547 }, { "epoch": 0.2072487644151565, "grad_norm": 0.3902285695075989, "learning_rate": 1.948721090948721e-05, "loss": 0.5348, "step": 7548 }, { "epoch": 0.20727622185612302, "grad_norm": 0.8881040215492249, "learning_rate": 1.9487074372928448e-05, "loss": 0.4758, "step": 7549 }, { "epoch": 0.2073036792970895, "grad_norm": 0.38446030020713806, "learning_rate": 1.9486937818673267e-05, "loss": 0.5388, "step": 7550 }, { "epoch": 0.207331136738056, "grad_norm": 0.35867998003959656, "learning_rate": 1.9486801246721914e-05, "loss": 0.5149, "step": 7551 }, { "epoch": 0.20735859417902253, "grad_norm": 0.39368197321891785, "learning_rate": 1.948666465707465e-05, "loss": 0.5402, "step": 7552 }, { "epoch": 0.20738605161998902, "grad_norm": 0.36504340171813965, "learning_rate": 1.948652804973173e-05, "loss": 0.5504, "step": 7553 }, { "epoch": 0.20741350906095551, "grad_norm": 0.3094841539859772, "learning_rate": 1.9486391424693408e-05, "loss": 0.493, "step": 7554 }, { "epoch": 0.207440966501922, "grad_norm": 0.3840715289115906, "learning_rate": 1.948625478195994e-05, "loss": 0.5289, "step": 7555 }, { "epoch": 0.20746842394288853, "grad_norm": 0.40064117312431335, "learning_rate": 1.9486118121531575e-05, "loss": 0.4444, "step": 7556 }, { "epoch": 0.20749588138385502, "grad_norm": 0.3870463967323303, "learning_rate": 1.9485981443408576e-05, "loss": 0.6025, "step": 7557 }, { "epoch": 0.20752333882482152, "grad_norm": 0.34878480434417725, "learning_rate": 1.948584474759119e-05, "loss": 0.5113, "step": 7558 }, { "epoch": 0.20755079626578804, "grad_norm": 0.34857890009880066, "learning_rate": 1.948570803407968e-05, "loss": 0.492, "step": 7559 }, { "epoch": 0.20757825370675453, "grad_norm": 0.35261139273643494, "learning_rate": 1.9485571302874292e-05, "loss": 0.525, "step": 7560 }, { "epoch": 0.20760571114772103, "grad_norm": 0.38633492588996887, "learning_rate": 1.948543455397529e-05, "loss": 0.534, "step": 7561 }, { "epoch": 0.20763316858868752, "grad_norm": 0.7279139161109924, "learning_rate": 1.9485297787382925e-05, "loss": 0.576, "step": 7562 }, { "epoch": 0.20766062602965404, "grad_norm": 0.3838392198085785, "learning_rate": 1.948516100309745e-05, "loss": 0.477, "step": 7563 }, { "epoch": 0.20768808347062054, "grad_norm": 0.35583823919296265, "learning_rate": 1.9485024201119127e-05, "loss": 0.5427, "step": 7564 }, { "epoch": 0.20771554091158703, "grad_norm": 0.3963923156261444, "learning_rate": 1.94848873814482e-05, "loss": 0.6007, "step": 7565 }, { "epoch": 0.20774299835255355, "grad_norm": 0.34324321150779724, "learning_rate": 1.9484750544084936e-05, "loss": 0.4685, "step": 7566 }, { "epoch": 0.20777045579352005, "grad_norm": 0.42678889632225037, "learning_rate": 1.9484613689029585e-05, "loss": 0.603, "step": 7567 }, { "epoch": 0.20779791323448654, "grad_norm": 0.3567548990249634, "learning_rate": 1.94844768162824e-05, "loss": 0.5712, "step": 7568 }, { "epoch": 0.20782537067545304, "grad_norm": 0.43005841970443726, "learning_rate": 1.948433992584364e-05, "loss": 0.5862, "step": 7569 }, { "epoch": 0.20785282811641956, "grad_norm": 1.48933744430542, "learning_rate": 1.9484203017713557e-05, "loss": 0.4989, "step": 7570 }, { "epoch": 0.20788028555738605, "grad_norm": 0.3668833076953888, "learning_rate": 1.9484066091892408e-05, "loss": 0.5629, "step": 7571 }, { "epoch": 0.20790774299835255, "grad_norm": 0.48281916975975037, "learning_rate": 1.948392914838045e-05, "loss": 0.5414, "step": 7572 }, { "epoch": 0.20793520043931907, "grad_norm": 0.38930875062942505, "learning_rate": 1.948379218717794e-05, "loss": 0.5588, "step": 7573 }, { "epoch": 0.20796265788028556, "grad_norm": 0.3609628677368164, "learning_rate": 1.948365520828513e-05, "loss": 0.5194, "step": 7574 }, { "epoch": 0.20799011532125206, "grad_norm": 0.4016500413417816, "learning_rate": 1.948351821170227e-05, "loss": 0.4534, "step": 7575 }, { "epoch": 0.20801757276221855, "grad_norm": 0.35170140862464905, "learning_rate": 1.948338119742963e-05, "loss": 0.5715, "step": 7576 }, { "epoch": 0.20804503020318507, "grad_norm": 0.3507955074310303, "learning_rate": 1.9483244165467453e-05, "loss": 0.4482, "step": 7577 }, { "epoch": 0.20807248764415157, "grad_norm": 0.3529643416404724, "learning_rate": 1.9483107115815996e-05, "loss": 0.5237, "step": 7578 }, { "epoch": 0.20809994508511806, "grad_norm": 0.5187897682189941, "learning_rate": 1.9482970048475524e-05, "loss": 0.527, "step": 7579 }, { "epoch": 0.20812740252608458, "grad_norm": 0.35961034893989563, "learning_rate": 1.9482832963446282e-05, "loss": 0.5209, "step": 7580 }, { "epoch": 0.20815485996705108, "grad_norm": 0.3582625091075897, "learning_rate": 1.9482695860728534e-05, "loss": 0.536, "step": 7581 }, { "epoch": 0.20818231740801757, "grad_norm": 0.3997671604156494, "learning_rate": 1.9482558740322524e-05, "loss": 0.534, "step": 7582 }, { "epoch": 0.20820977484898406, "grad_norm": 0.3752930760383606, "learning_rate": 1.9482421602228525e-05, "loss": 0.5547, "step": 7583 }, { "epoch": 0.20823723228995059, "grad_norm": 0.4523254334926605, "learning_rate": 1.9482284446446778e-05, "loss": 0.6777, "step": 7584 }, { "epoch": 0.20826468973091708, "grad_norm": 0.38860979676246643, "learning_rate": 1.9482147272977543e-05, "loss": 0.5459, "step": 7585 }, { "epoch": 0.20829214717188357, "grad_norm": 0.3776243329048157, "learning_rate": 1.948201008182108e-05, "loss": 0.5005, "step": 7586 }, { "epoch": 0.2083196046128501, "grad_norm": 0.48295915126800537, "learning_rate": 1.948187287297764e-05, "loss": 0.5243, "step": 7587 }, { "epoch": 0.2083470620538166, "grad_norm": 0.3734515309333801, "learning_rate": 1.948173564644748e-05, "loss": 0.5674, "step": 7588 }, { "epoch": 0.20837451949478308, "grad_norm": 0.3722502887248993, "learning_rate": 1.9481598402230862e-05, "loss": 0.5113, "step": 7589 }, { "epoch": 0.20840197693574958, "grad_norm": 0.36111605167388916, "learning_rate": 1.9481461140328033e-05, "loss": 0.5037, "step": 7590 }, { "epoch": 0.2084294343767161, "grad_norm": 0.40063241124153137, "learning_rate": 1.948132386073925e-05, "loss": 0.5119, "step": 7591 }, { "epoch": 0.2084568918176826, "grad_norm": 0.407366544008255, "learning_rate": 1.948118656346478e-05, "loss": 0.566, "step": 7592 }, { "epoch": 0.2084843492586491, "grad_norm": 0.4047910273075104, "learning_rate": 1.9481049248504863e-05, "loss": 0.6004, "step": 7593 }, { "epoch": 0.2085118066996156, "grad_norm": 0.3414928615093231, "learning_rate": 1.9480911915859767e-05, "loss": 0.4888, "step": 7594 }, { "epoch": 0.2085392641405821, "grad_norm": 0.5145168900489807, "learning_rate": 1.9480774565529744e-05, "loss": 0.5782, "step": 7595 }, { "epoch": 0.2085667215815486, "grad_norm": 0.3232507109642029, "learning_rate": 1.9480637197515048e-05, "loss": 0.4727, "step": 7596 }, { "epoch": 0.2085941790225151, "grad_norm": 0.375011146068573, "learning_rate": 1.948049981181594e-05, "loss": 0.4958, "step": 7597 }, { "epoch": 0.2086216364634816, "grad_norm": 0.4271661937236786, "learning_rate": 1.9480362408432674e-05, "loss": 0.595, "step": 7598 }, { "epoch": 0.2086490939044481, "grad_norm": 0.3530728816986084, "learning_rate": 1.9480224987365506e-05, "loss": 0.5761, "step": 7599 }, { "epoch": 0.2086765513454146, "grad_norm": 0.42961248755455017, "learning_rate": 1.9480087548614693e-05, "loss": 0.5569, "step": 7600 }, { "epoch": 0.2087040087863811, "grad_norm": 0.35871684551239014, "learning_rate": 1.947995009218049e-05, "loss": 0.5674, "step": 7601 }, { "epoch": 0.20873146622734762, "grad_norm": 0.4190363883972168, "learning_rate": 1.9479812618063156e-05, "loss": 0.5029, "step": 7602 }, { "epoch": 0.2087589236683141, "grad_norm": 0.38788285851478577, "learning_rate": 1.9479675126262943e-05, "loss": 0.5421, "step": 7603 }, { "epoch": 0.2087863811092806, "grad_norm": 0.36662906408309937, "learning_rate": 1.9479537616780115e-05, "loss": 0.5245, "step": 7604 }, { "epoch": 0.20881383855024713, "grad_norm": 0.3465394675731659, "learning_rate": 1.9479400089614922e-05, "loss": 0.4834, "step": 7605 }, { "epoch": 0.20884129599121362, "grad_norm": 0.34957262873649597, "learning_rate": 1.9479262544767618e-05, "loss": 0.4832, "step": 7606 }, { "epoch": 0.20886875343218012, "grad_norm": 0.3421435058116913, "learning_rate": 1.9479124982238467e-05, "loss": 0.4614, "step": 7607 }, { "epoch": 0.2088962108731466, "grad_norm": 0.38553664088249207, "learning_rate": 1.9478987402027724e-05, "loss": 0.5922, "step": 7608 }, { "epoch": 0.20892366831411313, "grad_norm": 0.6419899463653564, "learning_rate": 1.9478849804135646e-05, "loss": 0.573, "step": 7609 }, { "epoch": 0.20895112575507963, "grad_norm": 0.40198925137519836, "learning_rate": 1.9478712188562487e-05, "loss": 0.5646, "step": 7610 }, { "epoch": 0.20897858319604612, "grad_norm": 0.35192954540252686, "learning_rate": 1.94785745553085e-05, "loss": 0.5508, "step": 7611 }, { "epoch": 0.20900604063701264, "grad_norm": 0.4219098389148712, "learning_rate": 1.947843690437395e-05, "loss": 0.6408, "step": 7612 }, { "epoch": 0.20903349807797914, "grad_norm": 0.4437122344970703, "learning_rate": 1.9478299235759088e-05, "loss": 0.5916, "step": 7613 }, { "epoch": 0.20906095551894563, "grad_norm": 0.3997351825237274, "learning_rate": 1.9478161549464174e-05, "loss": 0.5318, "step": 7614 }, { "epoch": 0.20908841295991212, "grad_norm": 0.5187093615531921, "learning_rate": 1.9478023845489462e-05, "loss": 0.6048, "step": 7615 }, { "epoch": 0.20911587040087865, "grad_norm": 0.3538030982017517, "learning_rate": 1.947788612383521e-05, "loss": 0.4998, "step": 7616 }, { "epoch": 0.20914332784184514, "grad_norm": 0.3669971525669098, "learning_rate": 1.9477748384501678e-05, "loss": 0.5451, "step": 7617 }, { "epoch": 0.20917078528281163, "grad_norm": 0.3813173472881317, "learning_rate": 1.9477610627489118e-05, "loss": 0.5209, "step": 7618 }, { "epoch": 0.20919824272377815, "grad_norm": 0.3493728041648865, "learning_rate": 1.947747285279779e-05, "loss": 0.471, "step": 7619 }, { "epoch": 0.20922570016474465, "grad_norm": 0.363414466381073, "learning_rate": 1.9477335060427954e-05, "loss": 0.4192, "step": 7620 }, { "epoch": 0.20925315760571114, "grad_norm": 0.39078351855278015, "learning_rate": 1.9477197250379862e-05, "loss": 0.5647, "step": 7621 }, { "epoch": 0.20928061504667764, "grad_norm": 0.35664811730384827, "learning_rate": 1.947705942265377e-05, "loss": 0.5332, "step": 7622 }, { "epoch": 0.20930807248764416, "grad_norm": 0.3787766098976135, "learning_rate": 1.9476921577249935e-05, "loss": 0.6096, "step": 7623 }, { "epoch": 0.20933552992861065, "grad_norm": 0.37204113602638245, "learning_rate": 1.9476783714168623e-05, "loss": 0.57, "step": 7624 }, { "epoch": 0.20936298736957715, "grad_norm": 0.4101165533065796, "learning_rate": 1.9476645833410078e-05, "loss": 0.5939, "step": 7625 }, { "epoch": 0.20939044481054367, "grad_norm": 0.3835039734840393, "learning_rate": 1.947650793497457e-05, "loss": 0.5811, "step": 7626 }, { "epoch": 0.20941790225151016, "grad_norm": 0.35198166966438293, "learning_rate": 1.9476370018862346e-05, "loss": 0.5427, "step": 7627 }, { "epoch": 0.20944535969247666, "grad_norm": 0.3503159284591675, "learning_rate": 1.9476232085073668e-05, "loss": 0.5157, "step": 7628 }, { "epoch": 0.20947281713344315, "grad_norm": 0.3532663583755493, "learning_rate": 1.9476094133608793e-05, "loss": 0.5397, "step": 7629 }, { "epoch": 0.20950027457440967, "grad_norm": 0.3778485655784607, "learning_rate": 1.9475956164467978e-05, "loss": 0.5557, "step": 7630 }, { "epoch": 0.20952773201537617, "grad_norm": 0.3534981608390808, "learning_rate": 1.9475818177651483e-05, "loss": 0.5216, "step": 7631 }, { "epoch": 0.20955518945634266, "grad_norm": 0.37158554792404175, "learning_rate": 1.9475680173159558e-05, "loss": 0.6198, "step": 7632 }, { "epoch": 0.20958264689730918, "grad_norm": 0.33717790246009827, "learning_rate": 1.947554215099247e-05, "loss": 0.5407, "step": 7633 }, { "epoch": 0.20961010433827568, "grad_norm": 0.4234338104724884, "learning_rate": 1.947540411115047e-05, "loss": 0.5494, "step": 7634 }, { "epoch": 0.20963756177924217, "grad_norm": 0.3379095196723938, "learning_rate": 1.9475266053633818e-05, "loss": 0.4766, "step": 7635 }, { "epoch": 0.20966501922020866, "grad_norm": 0.3719825744628906, "learning_rate": 1.9475127978442767e-05, "loss": 0.6212, "step": 7636 }, { "epoch": 0.2096924766611752, "grad_norm": 0.37886571884155273, "learning_rate": 1.9474989885577584e-05, "loss": 0.5207, "step": 7637 }, { "epoch": 0.20971993410214168, "grad_norm": 0.37721848487854004, "learning_rate": 1.9474851775038516e-05, "loss": 0.5293, "step": 7638 }, { "epoch": 0.20974739154310817, "grad_norm": 0.38806986808776855, "learning_rate": 1.9474713646825828e-05, "loss": 0.5895, "step": 7639 }, { "epoch": 0.2097748489840747, "grad_norm": 0.3770739436149597, "learning_rate": 1.947457550093977e-05, "loss": 0.588, "step": 7640 }, { "epoch": 0.2098023064250412, "grad_norm": 0.3505558371543884, "learning_rate": 1.9474437337380608e-05, "loss": 0.5031, "step": 7641 }, { "epoch": 0.20982976386600768, "grad_norm": 0.3607693016529083, "learning_rate": 1.94742991561486e-05, "loss": 0.505, "step": 7642 }, { "epoch": 0.20985722130697418, "grad_norm": 0.38945865631103516, "learning_rate": 1.9474160957243997e-05, "loss": 0.574, "step": 7643 }, { "epoch": 0.2098846787479407, "grad_norm": 0.3453112840652466, "learning_rate": 1.947402274066706e-05, "loss": 0.4886, "step": 7644 }, { "epoch": 0.2099121361889072, "grad_norm": 0.37124085426330566, "learning_rate": 1.9473884506418048e-05, "loss": 0.5024, "step": 7645 }, { "epoch": 0.2099395936298737, "grad_norm": 0.3769884705543518, "learning_rate": 1.9473746254497214e-05, "loss": 0.5225, "step": 7646 }, { "epoch": 0.2099670510708402, "grad_norm": 0.34181246161460876, "learning_rate": 1.9473607984904824e-05, "loss": 0.5634, "step": 7647 }, { "epoch": 0.2099945085118067, "grad_norm": 0.3925192356109619, "learning_rate": 1.947346969764113e-05, "loss": 0.4693, "step": 7648 }, { "epoch": 0.2100219659527732, "grad_norm": 0.4676647186279297, "learning_rate": 1.9473331392706392e-05, "loss": 0.5835, "step": 7649 }, { "epoch": 0.2100494233937397, "grad_norm": 0.3543460965156555, "learning_rate": 1.9473193070100866e-05, "loss": 0.5609, "step": 7650 }, { "epoch": 0.21007688083470621, "grad_norm": 0.33256974816322327, "learning_rate": 1.9473054729824813e-05, "loss": 0.4807, "step": 7651 }, { "epoch": 0.2101043382756727, "grad_norm": 0.3847549259662628, "learning_rate": 1.9472916371878487e-05, "loss": 0.5844, "step": 7652 }, { "epoch": 0.2101317957166392, "grad_norm": 0.38168856501579285, "learning_rate": 1.947277799626215e-05, "loss": 0.5506, "step": 7653 }, { "epoch": 0.21015925315760572, "grad_norm": 0.40839627385139465, "learning_rate": 1.947263960297606e-05, "loss": 0.5083, "step": 7654 }, { "epoch": 0.21018671059857222, "grad_norm": 0.41970112919807434, "learning_rate": 1.947250119202047e-05, "loss": 0.5679, "step": 7655 }, { "epoch": 0.2102141680395387, "grad_norm": 0.3623751103878021, "learning_rate": 1.9472362763395646e-05, "loss": 0.5218, "step": 7656 }, { "epoch": 0.2102416254805052, "grad_norm": 0.3482477068901062, "learning_rate": 1.9472224317101843e-05, "loss": 0.5485, "step": 7657 }, { "epoch": 0.21026908292147173, "grad_norm": 0.4256426990032196, "learning_rate": 1.9472085853139315e-05, "loss": 0.5505, "step": 7658 }, { "epoch": 0.21029654036243822, "grad_norm": 0.38097429275512695, "learning_rate": 1.9471947371508326e-05, "loss": 0.571, "step": 7659 }, { "epoch": 0.21032399780340472, "grad_norm": 0.41050899028778076, "learning_rate": 1.9471808872209132e-05, "loss": 0.5228, "step": 7660 }, { "epoch": 0.21035145524437124, "grad_norm": 0.36769217252731323, "learning_rate": 1.947167035524199e-05, "loss": 0.5461, "step": 7661 }, { "epoch": 0.21037891268533773, "grad_norm": 0.40046578645706177, "learning_rate": 1.9471531820607162e-05, "loss": 0.539, "step": 7662 }, { "epoch": 0.21040637012630423, "grad_norm": 0.4364890456199646, "learning_rate": 1.9471393268304898e-05, "loss": 0.4454, "step": 7663 }, { "epoch": 0.21043382756727072, "grad_norm": 0.3513913154602051, "learning_rate": 1.947125469833547e-05, "loss": 0.5173, "step": 7664 }, { "epoch": 0.21046128500823724, "grad_norm": 0.3839256167411804, "learning_rate": 1.9471116110699127e-05, "loss": 0.5594, "step": 7665 }, { "epoch": 0.21048874244920374, "grad_norm": 0.3834983706474304, "learning_rate": 1.9470977505396127e-05, "loss": 0.6565, "step": 7666 }, { "epoch": 0.21051619989017023, "grad_norm": 0.35657617449760437, "learning_rate": 1.9470838882426735e-05, "loss": 0.5306, "step": 7667 }, { "epoch": 0.21054365733113672, "grad_norm": 0.33958151936531067, "learning_rate": 1.9470700241791203e-05, "loss": 0.5854, "step": 7668 }, { "epoch": 0.21057111477210325, "grad_norm": 0.42710384726524353, "learning_rate": 1.9470561583489792e-05, "loss": 0.5282, "step": 7669 }, { "epoch": 0.21059857221306974, "grad_norm": 0.37558797001838684, "learning_rate": 1.9470422907522764e-05, "loss": 0.5626, "step": 7670 }, { "epoch": 0.21062602965403623, "grad_norm": 0.35391250252723694, "learning_rate": 1.9470284213890373e-05, "loss": 0.5831, "step": 7671 }, { "epoch": 0.21065348709500276, "grad_norm": 0.3747095763683319, "learning_rate": 1.947014550259288e-05, "loss": 0.5323, "step": 7672 }, { "epoch": 0.21068094453596925, "grad_norm": 0.3469755947589874, "learning_rate": 1.947000677363054e-05, "loss": 0.4097, "step": 7673 }, { "epoch": 0.21070840197693574, "grad_norm": 0.3303808271884918, "learning_rate": 1.9469868027003618e-05, "loss": 0.5152, "step": 7674 }, { "epoch": 0.21073585941790224, "grad_norm": 0.3417487144470215, "learning_rate": 1.9469729262712368e-05, "loss": 0.5053, "step": 7675 }, { "epoch": 0.21076331685886876, "grad_norm": 0.3354649245738983, "learning_rate": 1.9469590480757053e-05, "loss": 0.4341, "step": 7676 }, { "epoch": 0.21079077429983525, "grad_norm": 0.44170454144477844, "learning_rate": 1.9469451681137926e-05, "loss": 0.5935, "step": 7677 }, { "epoch": 0.21081823174080175, "grad_norm": 0.35220542550086975, "learning_rate": 1.9469312863855255e-05, "loss": 0.4873, "step": 7678 }, { "epoch": 0.21084568918176827, "grad_norm": 0.38703009486198425, "learning_rate": 1.9469174028909287e-05, "loss": 0.5801, "step": 7679 }, { "epoch": 0.21087314662273476, "grad_norm": 0.32918980717658997, "learning_rate": 1.946903517630029e-05, "loss": 0.5066, "step": 7680 }, { "epoch": 0.21090060406370126, "grad_norm": 0.4387950599193573, "learning_rate": 1.946889630602852e-05, "loss": 0.5686, "step": 7681 }, { "epoch": 0.21092806150466775, "grad_norm": 0.4177253246307373, "learning_rate": 1.9468757418094238e-05, "loss": 0.4867, "step": 7682 }, { "epoch": 0.21095551894563427, "grad_norm": 0.35858961939811707, "learning_rate": 1.9468618512497696e-05, "loss": 0.5274, "step": 7683 }, { "epoch": 0.21098297638660077, "grad_norm": 0.4139188230037689, "learning_rate": 1.9468479589239165e-05, "loss": 0.5649, "step": 7684 }, { "epoch": 0.21101043382756726, "grad_norm": 0.3729940354824066, "learning_rate": 1.9468340648318893e-05, "loss": 0.5132, "step": 7685 }, { "epoch": 0.21103789126853378, "grad_norm": 0.43315044045448303, "learning_rate": 1.9468201689737144e-05, "loss": 0.5743, "step": 7686 }, { "epoch": 0.21106534870950028, "grad_norm": 0.3565322458744049, "learning_rate": 1.946806271349418e-05, "loss": 0.5038, "step": 7687 }, { "epoch": 0.21109280615046677, "grad_norm": 0.36181968450546265, "learning_rate": 1.9467923719590254e-05, "loss": 0.5503, "step": 7688 }, { "epoch": 0.21112026359143327, "grad_norm": 0.30538374185562134, "learning_rate": 1.946778470802563e-05, "loss": 0.545, "step": 7689 }, { "epoch": 0.2111477210323998, "grad_norm": 0.37401947379112244, "learning_rate": 1.9467645678800565e-05, "loss": 0.4703, "step": 7690 }, { "epoch": 0.21117517847336628, "grad_norm": 0.5387868285179138, "learning_rate": 1.9467506631915323e-05, "loss": 0.5836, "step": 7691 }, { "epoch": 0.21120263591433278, "grad_norm": 0.37364670634269714, "learning_rate": 1.9467367567370155e-05, "loss": 0.5191, "step": 7692 }, { "epoch": 0.2112300933552993, "grad_norm": 0.4309769570827484, "learning_rate": 1.9467228485165326e-05, "loss": 0.6035, "step": 7693 }, { "epoch": 0.2112575507962658, "grad_norm": 0.656316339969635, "learning_rate": 1.9467089385301093e-05, "loss": 0.637, "step": 7694 }, { "epoch": 0.21128500823723229, "grad_norm": 0.3901107609272003, "learning_rate": 1.9466950267777718e-05, "loss": 0.5611, "step": 7695 }, { "epoch": 0.21131246567819878, "grad_norm": 0.34908527135849, "learning_rate": 1.946681113259546e-05, "loss": 0.4274, "step": 7696 }, { "epoch": 0.2113399231191653, "grad_norm": 0.3590952754020691, "learning_rate": 1.9466671979754575e-05, "loss": 0.3912, "step": 7697 }, { "epoch": 0.2113673805601318, "grad_norm": 0.37690168619155884, "learning_rate": 1.946653280925533e-05, "loss": 0.5621, "step": 7698 }, { "epoch": 0.2113948380010983, "grad_norm": 0.3536081314086914, "learning_rate": 1.9466393621097976e-05, "loss": 0.4488, "step": 7699 }, { "epoch": 0.2114222954420648, "grad_norm": 0.36269640922546387, "learning_rate": 1.946625441528278e-05, "loss": 0.4956, "step": 7700 }, { "epoch": 0.2114497528830313, "grad_norm": 0.36642515659332275, "learning_rate": 1.9466115191809995e-05, "loss": 0.5487, "step": 7701 }, { "epoch": 0.2114772103239978, "grad_norm": 0.37331146001815796, "learning_rate": 1.9465975950679884e-05, "loss": 0.5501, "step": 7702 }, { "epoch": 0.2115046677649643, "grad_norm": 0.3674823045730591, "learning_rate": 1.946583669189271e-05, "loss": 0.5283, "step": 7703 }, { "epoch": 0.21153212520593082, "grad_norm": 0.3422505557537079, "learning_rate": 1.9465697415448726e-05, "loss": 0.5448, "step": 7704 }, { "epoch": 0.2115595826468973, "grad_norm": 0.37669679522514343, "learning_rate": 1.9465558121348197e-05, "loss": 0.6219, "step": 7705 }, { "epoch": 0.2115870400878638, "grad_norm": 0.32592153549194336, "learning_rate": 1.946541880959138e-05, "loss": 0.5223, "step": 7706 }, { "epoch": 0.21161449752883033, "grad_norm": 0.36265498399734497, "learning_rate": 1.9465279480178537e-05, "loss": 0.536, "step": 7707 }, { "epoch": 0.21164195496979682, "grad_norm": 0.39926716685295105, "learning_rate": 1.9465140133109927e-05, "loss": 0.5005, "step": 7708 }, { "epoch": 0.2116694124107633, "grad_norm": 0.33597373962402344, "learning_rate": 1.946500076838581e-05, "loss": 0.495, "step": 7709 }, { "epoch": 0.2116968698517298, "grad_norm": 0.3775760531425476, "learning_rate": 1.946486138600644e-05, "loss": 0.5253, "step": 7710 }, { "epoch": 0.21172432729269633, "grad_norm": 0.36405226588249207, "learning_rate": 1.946472198597209e-05, "loss": 0.5675, "step": 7711 }, { "epoch": 0.21175178473366282, "grad_norm": 0.3907422721385956, "learning_rate": 1.946458256828301e-05, "loss": 0.5013, "step": 7712 }, { "epoch": 0.21177924217462932, "grad_norm": 0.3653850257396698, "learning_rate": 1.9464443132939463e-05, "loss": 0.539, "step": 7713 }, { "epoch": 0.21180669961559584, "grad_norm": 0.33872750401496887, "learning_rate": 1.9464303679941706e-05, "loss": 0.4829, "step": 7714 }, { "epoch": 0.21183415705656233, "grad_norm": 0.3511808514595032, "learning_rate": 1.9464164209290007e-05, "loss": 0.5203, "step": 7715 }, { "epoch": 0.21186161449752883, "grad_norm": 0.35137391090393066, "learning_rate": 1.9464024720984617e-05, "loss": 0.542, "step": 7716 }, { "epoch": 0.21188907193849532, "grad_norm": 0.33773982524871826, "learning_rate": 1.9463885215025803e-05, "loss": 0.5342, "step": 7717 }, { "epoch": 0.21191652937946184, "grad_norm": 0.34832075238227844, "learning_rate": 1.9463745691413823e-05, "loss": 0.5604, "step": 7718 }, { "epoch": 0.21194398682042834, "grad_norm": 0.33251723647117615, "learning_rate": 1.9463606150148937e-05, "loss": 0.5422, "step": 7719 }, { "epoch": 0.21197144426139483, "grad_norm": 0.3877292275428772, "learning_rate": 1.9463466591231404e-05, "loss": 0.593, "step": 7720 }, { "epoch": 0.21199890170236135, "grad_norm": 0.32499247789382935, "learning_rate": 1.9463327014661484e-05, "loss": 0.4878, "step": 7721 }, { "epoch": 0.21202635914332785, "grad_norm": 0.45507898926734924, "learning_rate": 1.946318742043944e-05, "loss": 0.5905, "step": 7722 }, { "epoch": 0.21205381658429434, "grad_norm": 0.3592537045478821, "learning_rate": 1.946304780856553e-05, "loss": 0.5617, "step": 7723 }, { "epoch": 0.21208127402526084, "grad_norm": 0.36364272236824036, "learning_rate": 1.9462908179040015e-05, "loss": 0.4983, "step": 7724 }, { "epoch": 0.21210873146622736, "grad_norm": 0.3402278423309326, "learning_rate": 1.946276853186316e-05, "loss": 0.6051, "step": 7725 }, { "epoch": 0.21213618890719385, "grad_norm": 1.2872556447982788, "learning_rate": 1.946262886703522e-05, "loss": 0.579, "step": 7726 }, { "epoch": 0.21216364634816035, "grad_norm": 0.3633585274219513, "learning_rate": 1.9462489184556453e-05, "loss": 0.5368, "step": 7727 }, { "epoch": 0.21219110378912687, "grad_norm": 0.3664308488368988, "learning_rate": 1.9462349484427127e-05, "loss": 0.5376, "step": 7728 }, { "epoch": 0.21221856123009336, "grad_norm": 0.43939539790153503, "learning_rate": 1.94622097666475e-05, "loss": 0.4712, "step": 7729 }, { "epoch": 0.21224601867105986, "grad_norm": 0.4222705066204071, "learning_rate": 1.946207003121783e-05, "loss": 0.4837, "step": 7730 }, { "epoch": 0.21227347611202635, "grad_norm": 0.4053070545196533, "learning_rate": 1.9461930278138382e-05, "loss": 0.5362, "step": 7731 }, { "epoch": 0.21230093355299287, "grad_norm": 0.35824474692344666, "learning_rate": 1.9461790507409412e-05, "loss": 0.5348, "step": 7732 }, { "epoch": 0.21232839099395936, "grad_norm": 2.450716733932495, "learning_rate": 1.9461650719031182e-05, "loss": 0.4265, "step": 7733 }, { "epoch": 0.21235584843492586, "grad_norm": 0.47630947828292847, "learning_rate": 1.9461510913003953e-05, "loss": 0.6345, "step": 7734 }, { "epoch": 0.21238330587589235, "grad_norm": 0.38426467776298523, "learning_rate": 1.946137108932799e-05, "loss": 0.5179, "step": 7735 }, { "epoch": 0.21241076331685887, "grad_norm": 0.34811273217201233, "learning_rate": 1.9461231248003546e-05, "loss": 0.5383, "step": 7736 }, { "epoch": 0.21243822075782537, "grad_norm": 0.3579137325286865, "learning_rate": 1.946109138903089e-05, "loss": 0.5503, "step": 7737 }, { "epoch": 0.21246567819879186, "grad_norm": 0.35739371180534363, "learning_rate": 1.946095151241028e-05, "loss": 0.5221, "step": 7738 }, { "epoch": 0.21249313563975838, "grad_norm": 0.41838210821151733, "learning_rate": 1.946081161814197e-05, "loss": 0.5313, "step": 7739 }, { "epoch": 0.21252059308072488, "grad_norm": 0.3637886047363281, "learning_rate": 1.946067170622623e-05, "loss": 0.5063, "step": 7740 }, { "epoch": 0.21254805052169137, "grad_norm": 0.3488861322402954, "learning_rate": 1.946053177666332e-05, "loss": 0.5999, "step": 7741 }, { "epoch": 0.21257550796265787, "grad_norm": 0.3828712999820709, "learning_rate": 1.9460391829453496e-05, "loss": 0.5904, "step": 7742 }, { "epoch": 0.2126029654036244, "grad_norm": 0.40868139266967773, "learning_rate": 1.9460251864597022e-05, "loss": 0.5636, "step": 7743 }, { "epoch": 0.21263042284459088, "grad_norm": 0.33385640382766724, "learning_rate": 1.946011188209416e-05, "loss": 0.5471, "step": 7744 }, { "epoch": 0.21265788028555738, "grad_norm": 0.38346022367477417, "learning_rate": 1.945997188194517e-05, "loss": 0.5311, "step": 7745 }, { "epoch": 0.2126853377265239, "grad_norm": 0.3262152671813965, "learning_rate": 1.945983186415031e-05, "loss": 0.471, "step": 7746 }, { "epoch": 0.2127127951674904, "grad_norm": 0.410839319229126, "learning_rate": 1.9459691828709847e-05, "loss": 0.5733, "step": 7747 }, { "epoch": 0.2127402526084569, "grad_norm": 0.3856702744960785, "learning_rate": 1.9459551775624036e-05, "loss": 0.5099, "step": 7748 }, { "epoch": 0.21276771004942338, "grad_norm": 0.48778197169303894, "learning_rate": 1.9459411704893147e-05, "loss": 0.4901, "step": 7749 }, { "epoch": 0.2127951674903899, "grad_norm": 0.34644731879234314, "learning_rate": 1.9459271616517436e-05, "loss": 0.4881, "step": 7750 }, { "epoch": 0.2128226249313564, "grad_norm": 0.3588396906852722, "learning_rate": 1.945913151049716e-05, "loss": 0.4686, "step": 7751 }, { "epoch": 0.2128500823723229, "grad_norm": 0.39102795720100403, "learning_rate": 1.9458991386832588e-05, "loss": 0.5387, "step": 7752 }, { "epoch": 0.2128775398132894, "grad_norm": 0.33956971764564514, "learning_rate": 1.945885124552398e-05, "loss": 0.4511, "step": 7753 }, { "epoch": 0.2129049972542559, "grad_norm": 0.3745773136615753, "learning_rate": 1.9458711086571593e-05, "loss": 0.552, "step": 7754 }, { "epoch": 0.2129324546952224, "grad_norm": 0.3400760889053345, "learning_rate": 1.945857090997569e-05, "loss": 0.5615, "step": 7755 }, { "epoch": 0.2129599121361889, "grad_norm": 0.38846296072006226, "learning_rate": 1.9458430715736534e-05, "loss": 0.6239, "step": 7756 }, { "epoch": 0.21298736957715542, "grad_norm": 0.41156572103500366, "learning_rate": 1.9458290503854383e-05, "loss": 0.6212, "step": 7757 }, { "epoch": 0.2130148270181219, "grad_norm": 0.32920360565185547, "learning_rate": 1.945815027432951e-05, "loss": 0.5645, "step": 7758 }, { "epoch": 0.2130422844590884, "grad_norm": 0.35983172059059143, "learning_rate": 1.945801002716216e-05, "loss": 0.4652, "step": 7759 }, { "epoch": 0.21306974190005493, "grad_norm": 0.3461645245552063, "learning_rate": 1.9457869762352604e-05, "loss": 0.5258, "step": 7760 }, { "epoch": 0.21309719934102142, "grad_norm": 0.327502965927124, "learning_rate": 1.9457729479901103e-05, "loss": 0.5173, "step": 7761 }, { "epoch": 0.21312465678198791, "grad_norm": 0.33881354331970215, "learning_rate": 1.9457589179807922e-05, "loss": 0.4745, "step": 7762 }, { "epoch": 0.2131521142229544, "grad_norm": 0.38264113664627075, "learning_rate": 1.9457448862073314e-05, "loss": 0.5875, "step": 7763 }, { "epoch": 0.21317957166392093, "grad_norm": 0.34533512592315674, "learning_rate": 1.9457308526697548e-05, "loss": 0.5339, "step": 7764 }, { "epoch": 0.21320702910488742, "grad_norm": 0.43047794699668884, "learning_rate": 1.945716817368088e-05, "loss": 0.5035, "step": 7765 }, { "epoch": 0.21323448654585392, "grad_norm": 0.3788689970970154, "learning_rate": 1.945702780302358e-05, "loss": 0.503, "step": 7766 }, { "epoch": 0.21326194398682044, "grad_norm": 0.38941720128059387, "learning_rate": 1.94568874147259e-05, "loss": 0.6281, "step": 7767 }, { "epoch": 0.21328940142778693, "grad_norm": 0.3785932660102844, "learning_rate": 1.9456747008788107e-05, "loss": 0.4516, "step": 7768 }, { "epoch": 0.21331685886875343, "grad_norm": 0.31275665760040283, "learning_rate": 1.9456606585210465e-05, "loss": 0.4924, "step": 7769 }, { "epoch": 0.21334431630971992, "grad_norm": 0.42561084032058716, "learning_rate": 1.9456466143993232e-05, "loss": 0.5882, "step": 7770 }, { "epoch": 0.21337177375068644, "grad_norm": 0.3582158386707306, "learning_rate": 1.945632568513667e-05, "loss": 0.508, "step": 7771 }, { "epoch": 0.21339923119165294, "grad_norm": 0.3860912621021271, "learning_rate": 1.9456185208641045e-05, "loss": 0.5579, "step": 7772 }, { "epoch": 0.21342668863261943, "grad_norm": 0.35531359910964966, "learning_rate": 1.9456044714506618e-05, "loss": 0.5235, "step": 7773 }, { "epoch": 0.21345414607358595, "grad_norm": 0.3715299069881439, "learning_rate": 1.9455904202733645e-05, "loss": 0.5249, "step": 7774 }, { "epoch": 0.21348160351455245, "grad_norm": 0.36395663022994995, "learning_rate": 1.94557636733224e-05, "loss": 0.4949, "step": 7775 }, { "epoch": 0.21350906095551894, "grad_norm": 0.3640040159225464, "learning_rate": 1.9455623126273127e-05, "loss": 0.5214, "step": 7776 }, { "epoch": 0.21353651839648544, "grad_norm": 0.37844139337539673, "learning_rate": 1.945548256158611e-05, "loss": 0.5375, "step": 7777 }, { "epoch": 0.21356397583745196, "grad_norm": 0.35938823223114014, "learning_rate": 1.9455341979261596e-05, "loss": 0.5216, "step": 7778 }, { "epoch": 0.21359143327841845, "grad_norm": 0.38652148842811584, "learning_rate": 1.945520137929985e-05, "loss": 0.4868, "step": 7779 }, { "epoch": 0.21361889071938495, "grad_norm": 0.3520498275756836, "learning_rate": 1.9455060761701134e-05, "loss": 0.5502, "step": 7780 }, { "epoch": 0.21364634816035147, "grad_norm": 0.3801784813404083, "learning_rate": 1.9454920126465716e-05, "loss": 0.4967, "step": 7781 }, { "epoch": 0.21367380560131796, "grad_norm": 0.3093474209308624, "learning_rate": 1.9454779473593852e-05, "loss": 0.4482, "step": 7782 }, { "epoch": 0.21370126304228446, "grad_norm": 0.5183829665184021, "learning_rate": 1.9454638803085807e-05, "loss": 0.5829, "step": 7783 }, { "epoch": 0.21372872048325095, "grad_norm": 0.3645957112312317, "learning_rate": 1.9454498114941845e-05, "loss": 0.4891, "step": 7784 }, { "epoch": 0.21375617792421747, "grad_norm": 0.3807961046695709, "learning_rate": 1.945435740916222e-05, "loss": 0.5381, "step": 7785 }, { "epoch": 0.21378363536518397, "grad_norm": 0.4211444854736328, "learning_rate": 1.9454216685747206e-05, "loss": 0.5974, "step": 7786 }, { "epoch": 0.21381109280615046, "grad_norm": 0.3753012716770172, "learning_rate": 1.9454075944697065e-05, "loss": 0.6361, "step": 7787 }, { "epoch": 0.21383855024711698, "grad_norm": 0.37476488947868347, "learning_rate": 1.945393518601205e-05, "loss": 0.5664, "step": 7788 }, { "epoch": 0.21386600768808348, "grad_norm": 0.38222450017929077, "learning_rate": 1.9453794409692425e-05, "loss": 0.533, "step": 7789 }, { "epoch": 0.21389346512904997, "grad_norm": 0.370902955532074, "learning_rate": 1.9453653615738463e-05, "loss": 0.5158, "step": 7790 }, { "epoch": 0.21392092257001646, "grad_norm": 0.35353022813796997, "learning_rate": 1.9453512804150416e-05, "loss": 0.6049, "step": 7791 }, { "epoch": 0.21394838001098299, "grad_norm": 0.320016086101532, "learning_rate": 1.945337197492855e-05, "loss": 0.4474, "step": 7792 }, { "epoch": 0.21397583745194948, "grad_norm": 0.34728676080703735, "learning_rate": 1.945323112807313e-05, "loss": 0.4769, "step": 7793 }, { "epoch": 0.21400329489291597, "grad_norm": 0.40206435322761536, "learning_rate": 1.9453090263584416e-05, "loss": 0.4866, "step": 7794 }, { "epoch": 0.2140307523338825, "grad_norm": 0.3609091639518738, "learning_rate": 1.9452949381462672e-05, "loss": 0.5287, "step": 7795 }, { "epoch": 0.214058209774849, "grad_norm": 0.34392276406288147, "learning_rate": 1.945280848170816e-05, "loss": 0.4632, "step": 7796 }, { "epoch": 0.21408566721581548, "grad_norm": 0.4118764400482178, "learning_rate": 1.9452667564321143e-05, "loss": 0.5209, "step": 7797 }, { "epoch": 0.21411312465678198, "grad_norm": 0.3453790545463562, "learning_rate": 1.9452526629301883e-05, "loss": 0.5549, "step": 7798 }, { "epoch": 0.2141405820977485, "grad_norm": 0.35427603125572205, "learning_rate": 1.9452385676650646e-05, "loss": 0.5463, "step": 7799 }, { "epoch": 0.214168039538715, "grad_norm": 0.3975958228111267, "learning_rate": 1.945224470636769e-05, "loss": 0.6021, "step": 7800 }, { "epoch": 0.2141954969796815, "grad_norm": 0.3883931636810303, "learning_rate": 1.9452103718453283e-05, "loss": 0.5291, "step": 7801 }, { "epoch": 0.21422295442064798, "grad_norm": 0.35460758209228516, "learning_rate": 1.9451962712907684e-05, "loss": 0.4754, "step": 7802 }, { "epoch": 0.2142504118616145, "grad_norm": 0.3371031880378723, "learning_rate": 1.945182168973116e-05, "loss": 0.5006, "step": 7803 }, { "epoch": 0.214277869302581, "grad_norm": 0.37003234028816223, "learning_rate": 1.9451680648923973e-05, "loss": 0.5842, "step": 7804 }, { "epoch": 0.2143053267435475, "grad_norm": 0.3449018895626068, "learning_rate": 1.9451539590486385e-05, "loss": 0.5423, "step": 7805 }, { "epoch": 0.214332784184514, "grad_norm": 0.3767179548740387, "learning_rate": 1.9451398514418656e-05, "loss": 0.5095, "step": 7806 }, { "epoch": 0.2143602416254805, "grad_norm": 0.38707834482192993, "learning_rate": 1.9451257420721052e-05, "loss": 0.6203, "step": 7807 }, { "epoch": 0.214387699066447, "grad_norm": 0.37045225501060486, "learning_rate": 1.945111630939384e-05, "loss": 0.5496, "step": 7808 }, { "epoch": 0.2144151565074135, "grad_norm": 0.3707679510116577, "learning_rate": 1.9450975180437278e-05, "loss": 0.5286, "step": 7809 }, { "epoch": 0.21444261394838002, "grad_norm": 0.4984795153141022, "learning_rate": 1.9450834033851628e-05, "loss": 0.5781, "step": 7810 }, { "epoch": 0.2144700713893465, "grad_norm": 0.410521924495697, "learning_rate": 1.9450692869637158e-05, "loss": 0.6318, "step": 7811 }, { "epoch": 0.214497528830313, "grad_norm": 0.38461872935295105, "learning_rate": 1.9450551687794134e-05, "loss": 0.5601, "step": 7812 }, { "epoch": 0.21452498627127953, "grad_norm": 0.4145320951938629, "learning_rate": 1.945041048832281e-05, "loss": 0.5345, "step": 7813 }, { "epoch": 0.21455244371224602, "grad_norm": 0.4532339572906494, "learning_rate": 1.945026927122345e-05, "loss": 0.4754, "step": 7814 }, { "epoch": 0.21457990115321252, "grad_norm": 0.38856783509254456, "learning_rate": 1.945012803649633e-05, "loss": 0.506, "step": 7815 }, { "epoch": 0.214607358594179, "grad_norm": 0.37192535400390625, "learning_rate": 1.9449986784141703e-05, "loss": 0.5293, "step": 7816 }, { "epoch": 0.21463481603514553, "grad_norm": 0.3196938633918762, "learning_rate": 1.9449845514159832e-05, "loss": 0.4074, "step": 7817 }, { "epoch": 0.21466227347611203, "grad_norm": 0.3958622217178345, "learning_rate": 1.9449704226550985e-05, "loss": 0.4945, "step": 7818 }, { "epoch": 0.21468973091707852, "grad_norm": 0.3842329978942871, "learning_rate": 1.9449562921315423e-05, "loss": 0.5904, "step": 7819 }, { "epoch": 0.21471718835804504, "grad_norm": 0.4619813561439514, "learning_rate": 1.944942159845341e-05, "loss": 0.5369, "step": 7820 }, { "epoch": 0.21474464579901154, "grad_norm": 0.43263882398605347, "learning_rate": 1.944928025796521e-05, "loss": 0.4265, "step": 7821 }, { "epoch": 0.21477210323997803, "grad_norm": 0.33089905977249146, "learning_rate": 1.9449138899851086e-05, "loss": 0.5698, "step": 7822 }, { "epoch": 0.21479956068094452, "grad_norm": 0.397299200296402, "learning_rate": 1.9448997524111304e-05, "loss": 0.5658, "step": 7823 }, { "epoch": 0.21482701812191105, "grad_norm": 0.35092252492904663, "learning_rate": 1.9448856130746124e-05, "loss": 0.5345, "step": 7824 }, { "epoch": 0.21485447556287754, "grad_norm": 0.37571823596954346, "learning_rate": 1.9448714719755815e-05, "loss": 0.4825, "step": 7825 }, { "epoch": 0.21488193300384403, "grad_norm": 0.39365723729133606, "learning_rate": 1.9448573291140635e-05, "loss": 0.4763, "step": 7826 }, { "epoch": 0.21490939044481056, "grad_norm": 0.3824669122695923, "learning_rate": 1.9448431844900847e-05, "loss": 0.4753, "step": 7827 }, { "epoch": 0.21493684788577705, "grad_norm": 0.3541501462459564, "learning_rate": 1.9448290381036722e-05, "loss": 0.4938, "step": 7828 }, { "epoch": 0.21496430532674354, "grad_norm": 0.3657459616661072, "learning_rate": 1.9448148899548518e-05, "loss": 0.5147, "step": 7829 }, { "epoch": 0.21499176276771004, "grad_norm": 0.3974591791629791, "learning_rate": 1.9448007400436503e-05, "loss": 0.5172, "step": 7830 }, { "epoch": 0.21501922020867656, "grad_norm": 0.38809505105018616, "learning_rate": 1.9447865883700937e-05, "loss": 0.5293, "step": 7831 }, { "epoch": 0.21504667764964305, "grad_norm": 0.36427539587020874, "learning_rate": 1.9447724349342086e-05, "loss": 0.5458, "step": 7832 }, { "epoch": 0.21507413509060955, "grad_norm": 0.3454245924949646, "learning_rate": 1.9447582797360215e-05, "loss": 0.4544, "step": 7833 }, { "epoch": 0.21510159253157607, "grad_norm": 0.35093578696250916, "learning_rate": 1.9447441227755584e-05, "loss": 0.4833, "step": 7834 }, { "epoch": 0.21512904997254256, "grad_norm": 0.3398698568344116, "learning_rate": 1.944729964052846e-05, "loss": 0.5151, "step": 7835 }, { "epoch": 0.21515650741350906, "grad_norm": 0.3350887596607208, "learning_rate": 1.944715803567911e-05, "loss": 0.5495, "step": 7836 }, { "epoch": 0.21518396485447555, "grad_norm": 0.34562286734580994, "learning_rate": 1.944701641320779e-05, "loss": 0.5365, "step": 7837 }, { "epoch": 0.21521142229544207, "grad_norm": 0.3703630268573761, "learning_rate": 1.9446874773114775e-05, "loss": 0.5576, "step": 7838 }, { "epoch": 0.21523887973640857, "grad_norm": 0.3719813823699951, "learning_rate": 1.944673311540032e-05, "loss": 0.5251, "step": 7839 }, { "epoch": 0.21526633717737506, "grad_norm": 0.38200628757476807, "learning_rate": 1.9446591440064692e-05, "loss": 0.5187, "step": 7840 }, { "epoch": 0.21529379461834158, "grad_norm": 0.38458025455474854, "learning_rate": 1.944644974710816e-05, "loss": 0.5332, "step": 7841 }, { "epoch": 0.21532125205930808, "grad_norm": 0.3553345501422882, "learning_rate": 1.944630803653098e-05, "loss": 0.5498, "step": 7842 }, { "epoch": 0.21534870950027457, "grad_norm": 0.41316306591033936, "learning_rate": 1.9446166308333424e-05, "loss": 0.4549, "step": 7843 }, { "epoch": 0.21537616694124107, "grad_norm": 0.3650861084461212, "learning_rate": 1.944602456251575e-05, "loss": 0.5081, "step": 7844 }, { "epoch": 0.2154036243822076, "grad_norm": 0.36110904812812805, "learning_rate": 1.9445882799078228e-05, "loss": 0.5488, "step": 7845 }, { "epoch": 0.21543108182317408, "grad_norm": 0.3443514406681061, "learning_rate": 1.9445741018021122e-05, "loss": 0.5205, "step": 7846 }, { "epoch": 0.21545853926414057, "grad_norm": 0.38647717237472534, "learning_rate": 1.9445599219344687e-05, "loss": 0.4975, "step": 7847 }, { "epoch": 0.2154859967051071, "grad_norm": 0.3483750522136688, "learning_rate": 1.94454574030492e-05, "loss": 0.4812, "step": 7848 }, { "epoch": 0.2155134541460736, "grad_norm": 0.35276123881340027, "learning_rate": 1.9445315569134918e-05, "loss": 0.5544, "step": 7849 }, { "epoch": 0.21554091158704008, "grad_norm": 0.43551766872406006, "learning_rate": 1.944517371760211e-05, "loss": 0.5955, "step": 7850 }, { "epoch": 0.21556836902800658, "grad_norm": 0.6050980687141418, "learning_rate": 1.9445031848451035e-05, "loss": 0.473, "step": 7851 }, { "epoch": 0.2155958264689731, "grad_norm": 0.37531015276908875, "learning_rate": 1.9444889961681964e-05, "loss": 0.5373, "step": 7852 }, { "epoch": 0.2156232839099396, "grad_norm": 0.4050746262073517, "learning_rate": 1.944474805729516e-05, "loss": 0.5934, "step": 7853 }, { "epoch": 0.2156507413509061, "grad_norm": 0.3454146087169647, "learning_rate": 1.9444606135290884e-05, "loss": 0.5084, "step": 7854 }, { "epoch": 0.2156781987918726, "grad_norm": 0.41142481565475464, "learning_rate": 1.9444464195669403e-05, "loss": 0.527, "step": 7855 }, { "epoch": 0.2157056562328391, "grad_norm": 0.4680810272693634, "learning_rate": 1.9444322238430982e-05, "loss": 0.4867, "step": 7856 }, { "epoch": 0.2157331136738056, "grad_norm": 0.3439626395702362, "learning_rate": 1.944418026357589e-05, "loss": 0.4977, "step": 7857 }, { "epoch": 0.2157605711147721, "grad_norm": 0.368764728307724, "learning_rate": 1.9444038271104383e-05, "loss": 0.5187, "step": 7858 }, { "epoch": 0.21578802855573861, "grad_norm": 0.32216280698776245, "learning_rate": 1.944389626101673e-05, "loss": 0.5532, "step": 7859 }, { "epoch": 0.2158154859967051, "grad_norm": 0.3852599263191223, "learning_rate": 1.94437542333132e-05, "loss": 0.4917, "step": 7860 }, { "epoch": 0.2158429434376716, "grad_norm": 0.38828572630882263, "learning_rate": 1.944361218799405e-05, "loss": 0.6249, "step": 7861 }, { "epoch": 0.21587040087863812, "grad_norm": 0.3365877568721771, "learning_rate": 1.9443470125059552e-05, "loss": 0.4793, "step": 7862 }, { "epoch": 0.21589785831960462, "grad_norm": 0.3742566406726837, "learning_rate": 1.944332804450997e-05, "loss": 0.5, "step": 7863 }, { "epoch": 0.2159253157605711, "grad_norm": 0.33727753162384033, "learning_rate": 1.9443185946345564e-05, "loss": 0.4964, "step": 7864 }, { "epoch": 0.2159527732015376, "grad_norm": 0.341051310300827, "learning_rate": 1.94430438305666e-05, "loss": 0.4436, "step": 7865 }, { "epoch": 0.21598023064250413, "grad_norm": 0.3972136378288269, "learning_rate": 1.944290169717335e-05, "loss": 0.5906, "step": 7866 }, { "epoch": 0.21600768808347062, "grad_norm": 0.34802401065826416, "learning_rate": 1.9442759546166076e-05, "loss": 0.5184, "step": 7867 }, { "epoch": 0.21603514552443712, "grad_norm": 0.3707738518714905, "learning_rate": 1.9442617377545037e-05, "loss": 0.55, "step": 7868 }, { "epoch": 0.2160626029654036, "grad_norm": 0.3427625894546509, "learning_rate": 1.9442475191310503e-05, "loss": 0.5346, "step": 7869 }, { "epoch": 0.21609006040637013, "grad_norm": 0.380463570356369, "learning_rate": 1.9442332987462742e-05, "loss": 0.5556, "step": 7870 }, { "epoch": 0.21611751784733663, "grad_norm": 0.3904848098754883, "learning_rate": 1.9442190766002012e-05, "loss": 0.6003, "step": 7871 }, { "epoch": 0.21614497528830312, "grad_norm": 0.34269508719444275, "learning_rate": 1.9442048526928587e-05, "loss": 0.4615, "step": 7872 }, { "epoch": 0.21617243272926964, "grad_norm": 0.35646456480026245, "learning_rate": 1.944190627024273e-05, "loss": 0.56, "step": 7873 }, { "epoch": 0.21619989017023614, "grad_norm": 0.3351840674877167, "learning_rate": 1.9441763995944697e-05, "loss": 0.5237, "step": 7874 }, { "epoch": 0.21622734761120263, "grad_norm": 0.3447469174861908, "learning_rate": 1.9441621704034766e-05, "loss": 0.537, "step": 7875 }, { "epoch": 0.21625480505216912, "grad_norm": 0.36922597885131836, "learning_rate": 1.9441479394513197e-05, "loss": 0.5575, "step": 7876 }, { "epoch": 0.21628226249313565, "grad_norm": 0.32156461477279663, "learning_rate": 1.9441337067380252e-05, "loss": 0.5062, "step": 7877 }, { "epoch": 0.21630971993410214, "grad_norm": 0.3258417248725891, "learning_rate": 1.9441194722636204e-05, "loss": 0.4724, "step": 7878 }, { "epoch": 0.21633717737506863, "grad_norm": 0.3396294116973877, "learning_rate": 1.944105236028131e-05, "loss": 0.4424, "step": 7879 }, { "epoch": 0.21636463481603516, "grad_norm": 0.3740018308162689, "learning_rate": 1.944090998031584e-05, "loss": 0.6026, "step": 7880 }, { "epoch": 0.21639209225700165, "grad_norm": 0.44629162549972534, "learning_rate": 1.944076758274007e-05, "loss": 0.5874, "step": 7881 }, { "epoch": 0.21641954969796814, "grad_norm": 0.4096331000328064, "learning_rate": 1.9440625167554247e-05, "loss": 0.6143, "step": 7882 }, { "epoch": 0.21644700713893464, "grad_norm": 0.35968008637428284, "learning_rate": 1.9440482734758644e-05, "loss": 0.4896, "step": 7883 }, { "epoch": 0.21647446457990116, "grad_norm": 0.34813886880874634, "learning_rate": 1.9440340284353528e-05, "loss": 0.5317, "step": 7884 }, { "epoch": 0.21650192202086765, "grad_norm": 0.34971025586128235, "learning_rate": 1.944019781633917e-05, "loss": 0.5402, "step": 7885 }, { "epoch": 0.21652937946183415, "grad_norm": 0.371437668800354, "learning_rate": 1.9440055330715822e-05, "loss": 0.5017, "step": 7886 }, { "epoch": 0.21655683690280067, "grad_norm": 1.4274760484695435, "learning_rate": 1.9439912827483765e-05, "loss": 0.4852, "step": 7887 }, { "epoch": 0.21658429434376716, "grad_norm": 0.3596683740615845, "learning_rate": 1.943977030664325e-05, "loss": 0.5525, "step": 7888 }, { "epoch": 0.21661175178473366, "grad_norm": 0.3330322206020355, "learning_rate": 1.943962776819456e-05, "loss": 0.4811, "step": 7889 }, { "epoch": 0.21663920922570015, "grad_norm": 0.3559999465942383, "learning_rate": 1.9439485212137943e-05, "loss": 0.6012, "step": 7890 }, { "epoch": 0.21666666666666667, "grad_norm": 0.355718195438385, "learning_rate": 1.9439342638473676e-05, "loss": 0.5839, "step": 7891 }, { "epoch": 0.21669412410763317, "grad_norm": 0.35481736063957214, "learning_rate": 1.9439200047202024e-05, "loss": 0.5444, "step": 7892 }, { "epoch": 0.21672158154859966, "grad_norm": 0.370109885931015, "learning_rate": 1.943905743832325e-05, "loss": 0.5166, "step": 7893 }, { "epoch": 0.21674903898956618, "grad_norm": 0.31574854254722595, "learning_rate": 1.9438914811837623e-05, "loss": 0.4847, "step": 7894 }, { "epoch": 0.21677649643053268, "grad_norm": 0.3373795747756958, "learning_rate": 1.9438772167745406e-05, "loss": 0.5453, "step": 7895 }, { "epoch": 0.21680395387149917, "grad_norm": 0.34027326107025146, "learning_rate": 1.9438629506046868e-05, "loss": 0.5282, "step": 7896 }, { "epoch": 0.21683141131246567, "grad_norm": 0.3506055772304535, "learning_rate": 1.943848682674227e-05, "loss": 0.5414, "step": 7897 }, { "epoch": 0.2168588687534322, "grad_norm": 0.37641438841819763, "learning_rate": 1.9438344129831886e-05, "loss": 0.4736, "step": 7898 }, { "epoch": 0.21688632619439868, "grad_norm": 0.3529817461967468, "learning_rate": 1.9438201415315974e-05, "loss": 0.528, "step": 7899 }, { "epoch": 0.21691378363536518, "grad_norm": 0.37758827209472656, "learning_rate": 1.9438058683194808e-05, "loss": 0.5531, "step": 7900 }, { "epoch": 0.2169412410763317, "grad_norm": 0.3631792366504669, "learning_rate": 1.9437915933468648e-05, "loss": 0.4541, "step": 7901 }, { "epoch": 0.2169686985172982, "grad_norm": 0.3837461471557617, "learning_rate": 1.9437773166137762e-05, "loss": 0.4999, "step": 7902 }, { "epoch": 0.21699615595826469, "grad_norm": 0.3627293109893799, "learning_rate": 1.9437630381202415e-05, "loss": 0.5277, "step": 7903 }, { "epoch": 0.21702361339923118, "grad_norm": 0.3513796329498291, "learning_rate": 1.943748757866288e-05, "loss": 0.5393, "step": 7904 }, { "epoch": 0.2170510708401977, "grad_norm": 0.3478952944278717, "learning_rate": 1.9437344758519415e-05, "loss": 0.5572, "step": 7905 }, { "epoch": 0.2170785282811642, "grad_norm": 0.36101892590522766, "learning_rate": 1.943720192077229e-05, "loss": 0.4934, "step": 7906 }, { "epoch": 0.2171059857221307, "grad_norm": 0.3258741796016693, "learning_rate": 1.943705906542177e-05, "loss": 0.5146, "step": 7907 }, { "epoch": 0.2171334431630972, "grad_norm": 0.3595561385154724, "learning_rate": 1.9436916192468126e-05, "loss": 0.5377, "step": 7908 }, { "epoch": 0.2171609006040637, "grad_norm": 0.49214884638786316, "learning_rate": 1.9436773301911622e-05, "loss": 0.6015, "step": 7909 }, { "epoch": 0.2171883580450302, "grad_norm": 0.3790237605571747, "learning_rate": 1.9436630393752524e-05, "loss": 0.4787, "step": 7910 }, { "epoch": 0.2172158154859967, "grad_norm": 0.36321330070495605, "learning_rate": 1.9436487467991094e-05, "loss": 0.53, "step": 7911 }, { "epoch": 0.21724327292696322, "grad_norm": 0.3699352741241455, "learning_rate": 1.9436344524627606e-05, "loss": 0.4051, "step": 7912 }, { "epoch": 0.2172707303679297, "grad_norm": 0.3265607953071594, "learning_rate": 1.9436201563662328e-05, "loss": 0.4663, "step": 7913 }, { "epoch": 0.2172981878088962, "grad_norm": 0.38447192311286926, "learning_rate": 1.9436058585095513e-05, "loss": 0.6017, "step": 7914 }, { "epoch": 0.21732564524986273, "grad_norm": 0.3323126435279846, "learning_rate": 1.9435915588927445e-05, "loss": 0.5288, "step": 7915 }, { "epoch": 0.21735310269082922, "grad_norm": 0.4829493463039398, "learning_rate": 1.943577257515838e-05, "loss": 0.6018, "step": 7916 }, { "epoch": 0.2173805601317957, "grad_norm": 0.32431560754776, "learning_rate": 1.943562954378859e-05, "loss": 0.4971, "step": 7917 }, { "epoch": 0.2174080175727622, "grad_norm": 0.3597950041294098, "learning_rate": 1.9435486494818337e-05, "loss": 0.6569, "step": 7918 }, { "epoch": 0.21743547501372873, "grad_norm": 0.34114962816238403, "learning_rate": 1.943534342824789e-05, "loss": 0.5559, "step": 7919 }, { "epoch": 0.21746293245469522, "grad_norm": 0.3888196051120758, "learning_rate": 1.9435200344077512e-05, "loss": 0.5553, "step": 7920 }, { "epoch": 0.21749038989566172, "grad_norm": 0.34974297881126404, "learning_rate": 1.943505724230748e-05, "loss": 0.5862, "step": 7921 }, { "epoch": 0.21751784733662824, "grad_norm": 0.41655224561691284, "learning_rate": 1.9434914122938055e-05, "loss": 0.5174, "step": 7922 }, { "epoch": 0.21754530477759473, "grad_norm": 0.36801937222480774, "learning_rate": 1.9434770985969502e-05, "loss": 0.5648, "step": 7923 }, { "epoch": 0.21757276221856123, "grad_norm": 0.3897433578968048, "learning_rate": 1.9434627831402092e-05, "loss": 0.4758, "step": 7924 }, { "epoch": 0.21760021965952772, "grad_norm": 0.36516886949539185, "learning_rate": 1.9434484659236088e-05, "loss": 0.5588, "step": 7925 }, { "epoch": 0.21762767710049424, "grad_norm": 0.3792569041252136, "learning_rate": 1.943434146947176e-05, "loss": 0.5564, "step": 7926 }, { "epoch": 0.21765513454146074, "grad_norm": 0.40502235293388367, "learning_rate": 1.943419826210937e-05, "loss": 0.6092, "step": 7927 }, { "epoch": 0.21768259198242723, "grad_norm": 0.43261590600013733, "learning_rate": 1.9434055037149192e-05, "loss": 0.4696, "step": 7928 }, { "epoch": 0.21771004942339375, "grad_norm": 0.36214739084243774, "learning_rate": 1.943391179459149e-05, "loss": 0.518, "step": 7929 }, { "epoch": 0.21773750686436025, "grad_norm": 0.36184144020080566, "learning_rate": 1.9433768534436535e-05, "loss": 0.5994, "step": 7930 }, { "epoch": 0.21776496430532674, "grad_norm": 0.41555866599082947, "learning_rate": 1.9433625256684588e-05, "loss": 0.4974, "step": 7931 }, { "epoch": 0.21779242174629324, "grad_norm": 0.3799848258495331, "learning_rate": 1.9433481961335918e-05, "loss": 0.5232, "step": 7932 }, { "epoch": 0.21781987918725976, "grad_norm": 0.34598562121391296, "learning_rate": 1.9433338648390795e-05, "loss": 0.5145, "step": 7933 }, { "epoch": 0.21784733662822625, "grad_norm": 0.3671753704547882, "learning_rate": 1.9433195317849485e-05, "loss": 0.6042, "step": 7934 }, { "epoch": 0.21787479406919275, "grad_norm": 0.3436928689479828, "learning_rate": 1.9433051969712254e-05, "loss": 0.5384, "step": 7935 }, { "epoch": 0.21790225151015924, "grad_norm": 0.3844831585884094, "learning_rate": 1.9432908603979367e-05, "loss": 0.6113, "step": 7936 }, { "epoch": 0.21792970895112576, "grad_norm": 0.3374062776565552, "learning_rate": 1.94327652206511e-05, "loss": 0.5226, "step": 7937 }, { "epoch": 0.21795716639209226, "grad_norm": 0.3655458092689514, "learning_rate": 1.9432621819727714e-05, "loss": 0.6314, "step": 7938 }, { "epoch": 0.21798462383305875, "grad_norm": 0.34000229835510254, "learning_rate": 1.9432478401209475e-05, "loss": 0.3956, "step": 7939 }, { "epoch": 0.21801208127402527, "grad_norm": 0.36685314774513245, "learning_rate": 1.9432334965096658e-05, "loss": 0.5144, "step": 7940 }, { "epoch": 0.21803953871499177, "grad_norm": 0.3515413999557495, "learning_rate": 1.9432191511389524e-05, "loss": 0.4726, "step": 7941 }, { "epoch": 0.21806699615595826, "grad_norm": 0.35673612356185913, "learning_rate": 1.943204804008834e-05, "loss": 0.5079, "step": 7942 }, { "epoch": 0.21809445359692475, "grad_norm": 0.39238718152046204, "learning_rate": 1.943190455119338e-05, "loss": 0.5358, "step": 7943 }, { "epoch": 0.21812191103789128, "grad_norm": 0.3443711996078491, "learning_rate": 1.9431761044704904e-05, "loss": 0.5929, "step": 7944 }, { "epoch": 0.21814936847885777, "grad_norm": 0.35231325030326843, "learning_rate": 1.9431617520623185e-05, "loss": 0.4959, "step": 7945 }, { "epoch": 0.21817682591982426, "grad_norm": 0.39489567279815674, "learning_rate": 1.943147397894849e-05, "loss": 0.6586, "step": 7946 }, { "epoch": 0.21820428336079079, "grad_norm": 0.3550316095352173, "learning_rate": 1.943133041968108e-05, "loss": 0.5165, "step": 7947 }, { "epoch": 0.21823174080175728, "grad_norm": 0.43636754155158997, "learning_rate": 1.9431186842821238e-05, "loss": 0.6744, "step": 7948 }, { "epoch": 0.21825919824272377, "grad_norm": 0.3834889829158783, "learning_rate": 1.943104324836922e-05, "loss": 0.5526, "step": 7949 }, { "epoch": 0.21828665568369027, "grad_norm": 0.7978171110153198, "learning_rate": 1.943089963632529e-05, "loss": 0.6308, "step": 7950 }, { "epoch": 0.2183141131246568, "grad_norm": 0.43618106842041016, "learning_rate": 1.9430756006689727e-05, "loss": 0.5239, "step": 7951 }, { "epoch": 0.21834157056562328, "grad_norm": 0.34848111867904663, "learning_rate": 1.9430612359462795e-05, "loss": 0.546, "step": 7952 }, { "epoch": 0.21836902800658978, "grad_norm": 0.3319282829761505, "learning_rate": 1.943046869464476e-05, "loss": 0.5227, "step": 7953 }, { "epoch": 0.2183964854475563, "grad_norm": 0.38722464442253113, "learning_rate": 1.943032501223589e-05, "loss": 0.4816, "step": 7954 }, { "epoch": 0.2184239428885228, "grad_norm": 0.3183745741844177, "learning_rate": 1.9430181312236453e-05, "loss": 0.4417, "step": 7955 }, { "epoch": 0.2184514003294893, "grad_norm": 0.3960619866847992, "learning_rate": 1.943003759464672e-05, "loss": 0.507, "step": 7956 }, { "epoch": 0.21847885777045578, "grad_norm": 0.3683792054653168, "learning_rate": 1.942989385946696e-05, "loss": 0.5329, "step": 7957 }, { "epoch": 0.2185063152114223, "grad_norm": 0.35868459939956665, "learning_rate": 1.9429750106697435e-05, "loss": 0.5143, "step": 7958 }, { "epoch": 0.2185337726523888, "grad_norm": 0.45949098467826843, "learning_rate": 1.9429606336338417e-05, "loss": 0.4886, "step": 7959 }, { "epoch": 0.2185612300933553, "grad_norm": 0.3489017188549042, "learning_rate": 1.942946254839017e-05, "loss": 0.4686, "step": 7960 }, { "epoch": 0.2185886875343218, "grad_norm": 0.38500669598579407, "learning_rate": 1.942931874285297e-05, "loss": 0.5871, "step": 7961 }, { "epoch": 0.2186161449752883, "grad_norm": 0.3436225950717926, "learning_rate": 1.942917491972708e-05, "loss": 0.5024, "step": 7962 }, { "epoch": 0.2186436024162548, "grad_norm": 0.4188995659351349, "learning_rate": 1.942903107901277e-05, "loss": 0.5545, "step": 7963 }, { "epoch": 0.2186710598572213, "grad_norm": 0.35615140199661255, "learning_rate": 1.9428887220710305e-05, "loss": 0.5169, "step": 7964 }, { "epoch": 0.21869851729818782, "grad_norm": 0.37379491329193115, "learning_rate": 1.942874334481996e-05, "loss": 0.522, "step": 7965 }, { "epoch": 0.2187259747391543, "grad_norm": 0.3643708825111389, "learning_rate": 1.9428599451341997e-05, "loss": 0.6121, "step": 7966 }, { "epoch": 0.2187534321801208, "grad_norm": 0.38752424716949463, "learning_rate": 1.9428455540276688e-05, "loss": 0.5857, "step": 7967 }, { "epoch": 0.21878088962108733, "grad_norm": 0.35472923517227173, "learning_rate": 1.94283116116243e-05, "loss": 0.4401, "step": 7968 }, { "epoch": 0.21880834706205382, "grad_norm": 0.38868609070777893, "learning_rate": 1.94281676653851e-05, "loss": 0.6214, "step": 7969 }, { "epoch": 0.21883580450302031, "grad_norm": 0.41988715529441833, "learning_rate": 1.942802370155936e-05, "loss": 0.615, "step": 7970 }, { "epoch": 0.2188632619439868, "grad_norm": 0.38401252031326294, "learning_rate": 1.9427879720147347e-05, "loss": 0.5053, "step": 7971 }, { "epoch": 0.21889071938495333, "grad_norm": 0.36533430218696594, "learning_rate": 1.942773572114933e-05, "loss": 0.5848, "step": 7972 }, { "epoch": 0.21891817682591982, "grad_norm": 0.39511051774024963, "learning_rate": 1.9427591704565572e-05, "loss": 0.4584, "step": 7973 }, { "epoch": 0.21894563426688632, "grad_norm": 0.40671393275260925, "learning_rate": 1.942744767039635e-05, "loss": 0.5543, "step": 7974 }, { "epoch": 0.21897309170785284, "grad_norm": 0.349764347076416, "learning_rate": 1.942730361864193e-05, "loss": 0.5454, "step": 7975 }, { "epoch": 0.21900054914881933, "grad_norm": 0.3443093001842499, "learning_rate": 1.9427159549302578e-05, "loss": 0.5439, "step": 7976 }, { "epoch": 0.21902800658978583, "grad_norm": 0.3339177966117859, "learning_rate": 1.9427015462378565e-05, "loss": 0.6097, "step": 7977 }, { "epoch": 0.21905546403075232, "grad_norm": 0.44275394082069397, "learning_rate": 1.942687135787016e-05, "loss": 0.5317, "step": 7978 }, { "epoch": 0.21908292147171884, "grad_norm": 0.4820781648159027, "learning_rate": 1.942672723577763e-05, "loss": 0.6381, "step": 7979 }, { "epoch": 0.21911037891268534, "grad_norm": 0.34550103545188904, "learning_rate": 1.9426583096101246e-05, "loss": 0.5536, "step": 7980 }, { "epoch": 0.21913783635365183, "grad_norm": 0.4455774426460266, "learning_rate": 1.9426438938841278e-05, "loss": 0.4805, "step": 7981 }, { "epoch": 0.21916529379461835, "grad_norm": 0.4327555000782013, "learning_rate": 1.942629476399799e-05, "loss": 0.6124, "step": 7982 }, { "epoch": 0.21919275123558485, "grad_norm": 0.3599054217338562, "learning_rate": 1.9426150571571653e-05, "loss": 0.669, "step": 7983 }, { "epoch": 0.21922020867655134, "grad_norm": 0.3785346448421478, "learning_rate": 1.942600636156254e-05, "loss": 0.5789, "step": 7984 }, { "epoch": 0.21924766611751784, "grad_norm": 0.382394939661026, "learning_rate": 1.9425862133970914e-05, "loss": 0.5672, "step": 7985 }, { "epoch": 0.21927512355848436, "grad_norm": 0.3413185775279999, "learning_rate": 1.9425717888797048e-05, "loss": 0.4851, "step": 7986 }, { "epoch": 0.21930258099945085, "grad_norm": 0.3566800653934479, "learning_rate": 1.942557362604121e-05, "loss": 0.4765, "step": 7987 }, { "epoch": 0.21933003844041735, "grad_norm": 0.39806798100471497, "learning_rate": 1.9425429345703668e-05, "loss": 0.5156, "step": 7988 }, { "epoch": 0.21935749588138387, "grad_norm": 0.35243725776672363, "learning_rate": 1.9425285047784694e-05, "loss": 0.5506, "step": 7989 }, { "epoch": 0.21938495332235036, "grad_norm": 0.356689453125, "learning_rate": 1.942514073228455e-05, "loss": 0.5237, "step": 7990 }, { "epoch": 0.21941241076331686, "grad_norm": 0.3501352071762085, "learning_rate": 1.9424996399203518e-05, "loss": 0.5511, "step": 7991 }, { "epoch": 0.21943986820428335, "grad_norm": 0.3923778235912323, "learning_rate": 1.9424852048541852e-05, "loss": 0.5639, "step": 7992 }, { "epoch": 0.21946732564524987, "grad_norm": 0.36888450384140015, "learning_rate": 1.9424707680299834e-05, "loss": 0.4783, "step": 7993 }, { "epoch": 0.21949478308621637, "grad_norm": 0.36135852336883545, "learning_rate": 1.942456329447773e-05, "loss": 0.558, "step": 7994 }, { "epoch": 0.21952224052718286, "grad_norm": 0.3470737934112549, "learning_rate": 1.9424418891075803e-05, "loss": 0.5454, "step": 7995 }, { "epoch": 0.21954969796814938, "grad_norm": 0.45633184909820557, "learning_rate": 1.9424274470094326e-05, "loss": 0.5677, "step": 7996 }, { "epoch": 0.21957715540911588, "grad_norm": 0.3937956392765045, "learning_rate": 1.942413003153357e-05, "loss": 0.6229, "step": 7997 }, { "epoch": 0.21960461285008237, "grad_norm": 0.39656075835227966, "learning_rate": 1.9423985575393806e-05, "loss": 0.4988, "step": 7998 }, { "epoch": 0.21963207029104886, "grad_norm": 0.4055325984954834, "learning_rate": 1.94238411016753e-05, "loss": 0.5542, "step": 7999 }, { "epoch": 0.21965952773201539, "grad_norm": 0.34276989102363586, "learning_rate": 1.9423696610378322e-05, "loss": 0.4681, "step": 8000 }, { "epoch": 0.21968698517298188, "grad_norm": 0.36218729615211487, "learning_rate": 1.9423552101503143e-05, "loss": 0.5736, "step": 8001 }, { "epoch": 0.21971444261394837, "grad_norm": 0.3875357210636139, "learning_rate": 1.942340757505003e-05, "loss": 0.5509, "step": 8002 }, { "epoch": 0.21974190005491487, "grad_norm": 0.33614692091941833, "learning_rate": 1.9423263031019258e-05, "loss": 0.4746, "step": 8003 }, { "epoch": 0.2197693574958814, "grad_norm": 0.3934895396232605, "learning_rate": 1.942311846941109e-05, "loss": 0.5298, "step": 8004 }, { "epoch": 0.21979681493684788, "grad_norm": 0.3472811281681061, "learning_rate": 1.94229738902258e-05, "loss": 0.484, "step": 8005 }, { "epoch": 0.21982427237781438, "grad_norm": 0.3462182879447937, "learning_rate": 1.9422829293463654e-05, "loss": 0.4763, "step": 8006 }, { "epoch": 0.2198517298187809, "grad_norm": 0.37922170758247375, "learning_rate": 1.9422684679124924e-05, "loss": 0.6223, "step": 8007 }, { "epoch": 0.2198791872597474, "grad_norm": 0.37897342443466187, "learning_rate": 1.9422540047209885e-05, "loss": 0.507, "step": 8008 }, { "epoch": 0.2199066447007139, "grad_norm": 0.4368216395378113, "learning_rate": 1.9422395397718796e-05, "loss": 0.6185, "step": 8009 }, { "epoch": 0.21993410214168038, "grad_norm": 0.3697028160095215, "learning_rate": 1.9422250730651933e-05, "loss": 0.5036, "step": 8010 }, { "epoch": 0.2199615595826469, "grad_norm": 0.4216284453868866, "learning_rate": 1.9422106046009567e-05, "loss": 0.542, "step": 8011 }, { "epoch": 0.2199890170236134, "grad_norm": 0.4419436454772949, "learning_rate": 1.9421961343791965e-05, "loss": 0.511, "step": 8012 }, { "epoch": 0.2200164744645799, "grad_norm": 0.39433273673057556, "learning_rate": 1.9421816623999396e-05, "loss": 0.5903, "step": 8013 }, { "epoch": 0.2200439319055464, "grad_norm": 0.35068029165267944, "learning_rate": 1.9421671886632135e-05, "loss": 0.5622, "step": 8014 }, { "epoch": 0.2200713893465129, "grad_norm": 0.35394105315208435, "learning_rate": 1.9421527131690446e-05, "loss": 0.5761, "step": 8015 }, { "epoch": 0.2200988467874794, "grad_norm": 0.3729163408279419, "learning_rate": 1.94213823591746e-05, "loss": 0.4959, "step": 8016 }, { "epoch": 0.2201263042284459, "grad_norm": 0.38093140721321106, "learning_rate": 1.9421237569084877e-05, "loss": 0.463, "step": 8017 }, { "epoch": 0.22015376166941242, "grad_norm": 0.34641382098197937, "learning_rate": 1.9421092761421532e-05, "loss": 0.4343, "step": 8018 }, { "epoch": 0.2201812191103789, "grad_norm": 0.38729119300842285, "learning_rate": 1.9420947936184846e-05, "loss": 0.6339, "step": 8019 }, { "epoch": 0.2202086765513454, "grad_norm": 0.37823647260665894, "learning_rate": 1.9420803093375086e-05, "loss": 0.5138, "step": 8020 }, { "epoch": 0.22023613399231193, "grad_norm": 0.3326253890991211, "learning_rate": 1.942065823299252e-05, "loss": 0.4414, "step": 8021 }, { "epoch": 0.22026359143327842, "grad_norm": 0.3608744144439697, "learning_rate": 1.9420513355037414e-05, "loss": 0.5158, "step": 8022 }, { "epoch": 0.22029104887424492, "grad_norm": 0.3428604006767273, "learning_rate": 1.942036845951005e-05, "loss": 0.4724, "step": 8023 }, { "epoch": 0.2203185063152114, "grad_norm": 0.35999351739883423, "learning_rate": 1.9420223546410692e-05, "loss": 0.5302, "step": 8024 }, { "epoch": 0.22034596375617793, "grad_norm": 0.3220391869544983, "learning_rate": 1.942007861573961e-05, "loss": 0.4553, "step": 8025 }, { "epoch": 0.22037342119714443, "grad_norm": 0.3695027828216553, "learning_rate": 1.9419933667497074e-05, "loss": 0.5158, "step": 8026 }, { "epoch": 0.22040087863811092, "grad_norm": 0.3768795430660248, "learning_rate": 1.9419788701683358e-05, "loss": 0.6201, "step": 8027 }, { "epoch": 0.22042833607907744, "grad_norm": 0.3422100245952606, "learning_rate": 1.9419643718298726e-05, "loss": 0.531, "step": 8028 }, { "epoch": 0.22045579352004394, "grad_norm": 0.3567357361316681, "learning_rate": 1.9419498717343455e-05, "loss": 0.5153, "step": 8029 }, { "epoch": 0.22048325096101043, "grad_norm": 0.3580523431301117, "learning_rate": 1.9419353698817808e-05, "loss": 0.5734, "step": 8030 }, { "epoch": 0.22051070840197692, "grad_norm": 0.3664233982563019, "learning_rate": 1.9419208662722065e-05, "loss": 0.5356, "step": 8031 }, { "epoch": 0.22053816584294345, "grad_norm": 0.33101823925971985, "learning_rate": 1.941906360905649e-05, "loss": 0.4896, "step": 8032 }, { "epoch": 0.22056562328390994, "grad_norm": 0.3464110791683197, "learning_rate": 1.9418918537821355e-05, "loss": 0.4816, "step": 8033 }, { "epoch": 0.22059308072487643, "grad_norm": 0.35937443375587463, "learning_rate": 1.941877344901693e-05, "loss": 0.5487, "step": 8034 }, { "epoch": 0.22062053816584296, "grad_norm": 0.3409962058067322, "learning_rate": 1.9418628342643486e-05, "loss": 0.5046, "step": 8035 }, { "epoch": 0.22064799560680945, "grad_norm": 0.3625980615615845, "learning_rate": 1.9418483218701295e-05, "loss": 0.5077, "step": 8036 }, { "epoch": 0.22067545304777594, "grad_norm": 0.39992594718933105, "learning_rate": 1.9418338077190628e-05, "loss": 0.5232, "step": 8037 }, { "epoch": 0.22070291048874244, "grad_norm": 0.41398885846138, "learning_rate": 1.9418192918111752e-05, "loss": 0.6121, "step": 8038 }, { "epoch": 0.22073036792970896, "grad_norm": 0.40345078706741333, "learning_rate": 1.9418047741464942e-05, "loss": 0.6386, "step": 8039 }, { "epoch": 0.22075782537067545, "grad_norm": 0.3615337908267975, "learning_rate": 1.9417902547250465e-05, "loss": 0.4559, "step": 8040 }, { "epoch": 0.22078528281164195, "grad_norm": 0.364635169506073, "learning_rate": 1.9417757335468596e-05, "loss": 0.5159, "step": 8041 }, { "epoch": 0.22081274025260847, "grad_norm": 0.43943163752555847, "learning_rate": 1.9417612106119606e-05, "loss": 0.5454, "step": 8042 }, { "epoch": 0.22084019769357496, "grad_norm": 0.3705514967441559, "learning_rate": 1.941746685920376e-05, "loss": 0.5734, "step": 8043 }, { "epoch": 0.22086765513454146, "grad_norm": 0.37002331018447876, "learning_rate": 1.9417321594721333e-05, "loss": 0.5527, "step": 8044 }, { "epoch": 0.22089511257550795, "grad_norm": 0.3963075280189514, "learning_rate": 1.9417176312672596e-05, "loss": 0.5722, "step": 8045 }, { "epoch": 0.22092257001647447, "grad_norm": 0.3318404257297516, "learning_rate": 1.941703101305782e-05, "loss": 0.4843, "step": 8046 }, { "epoch": 0.22095002745744097, "grad_norm": 1.0186270475387573, "learning_rate": 1.9416885695877274e-05, "loss": 0.5053, "step": 8047 }, { "epoch": 0.22097748489840746, "grad_norm": 0.4562040865421295, "learning_rate": 1.9416740361131233e-05, "loss": 0.5391, "step": 8048 }, { "epoch": 0.22100494233937398, "grad_norm": 0.41872820258140564, "learning_rate": 1.9416595008819964e-05, "loss": 0.5594, "step": 8049 }, { "epoch": 0.22103239978034048, "grad_norm": 0.34873166680336, "learning_rate": 1.941644963894374e-05, "loss": 0.528, "step": 8050 }, { "epoch": 0.22105985722130697, "grad_norm": 0.5339556932449341, "learning_rate": 1.9416304251502832e-05, "loss": 0.6856, "step": 8051 }, { "epoch": 0.22108731466227347, "grad_norm": 0.39145246148109436, "learning_rate": 1.9416158846497515e-05, "loss": 0.6462, "step": 8052 }, { "epoch": 0.22111477210324, "grad_norm": 0.3652844727039337, "learning_rate": 1.9416013423928054e-05, "loss": 0.5627, "step": 8053 }, { "epoch": 0.22114222954420648, "grad_norm": 0.33739838004112244, "learning_rate": 1.9415867983794722e-05, "loss": 0.4182, "step": 8054 }, { "epoch": 0.22116968698517298, "grad_norm": 0.3650475740432739, "learning_rate": 1.941572252609779e-05, "loss": 0.529, "step": 8055 }, { "epoch": 0.2211971444261395, "grad_norm": 0.3355696499347687, "learning_rate": 1.941557705083753e-05, "loss": 0.5541, "step": 8056 }, { "epoch": 0.221224601867106, "grad_norm": 0.32872310280799866, "learning_rate": 1.9415431558014215e-05, "loss": 0.5699, "step": 8057 }, { "epoch": 0.22125205930807249, "grad_norm": 0.36793744564056396, "learning_rate": 1.9415286047628116e-05, "loss": 0.5362, "step": 8058 }, { "epoch": 0.22127951674903898, "grad_norm": 0.358155757188797, "learning_rate": 1.9415140519679503e-05, "loss": 0.5759, "step": 8059 }, { "epoch": 0.2213069741900055, "grad_norm": 0.32244130969047546, "learning_rate": 1.9414994974168645e-05, "loss": 0.4608, "step": 8060 }, { "epoch": 0.221334431630972, "grad_norm": 0.3774900734424591, "learning_rate": 1.941484941109582e-05, "loss": 0.5871, "step": 8061 }, { "epoch": 0.2213618890719385, "grad_norm": 0.3488394618034363, "learning_rate": 1.9414703830461294e-05, "loss": 0.5162, "step": 8062 }, { "epoch": 0.221389346512905, "grad_norm": 0.37092679738998413, "learning_rate": 1.9414558232265342e-05, "loss": 0.524, "step": 8063 }, { "epoch": 0.2214168039538715, "grad_norm": 0.356804221868515, "learning_rate": 1.9414412616508236e-05, "loss": 0.5263, "step": 8064 }, { "epoch": 0.221444261394838, "grad_norm": 0.39769163727760315, "learning_rate": 1.9414266983190243e-05, "loss": 0.5771, "step": 8065 }, { "epoch": 0.2214717188358045, "grad_norm": 0.41833820939064026, "learning_rate": 1.9414121332311637e-05, "loss": 0.6255, "step": 8066 }, { "epoch": 0.22149917627677101, "grad_norm": 0.3350055515766144, "learning_rate": 1.941397566387269e-05, "loss": 0.5677, "step": 8067 }, { "epoch": 0.2215266337177375, "grad_norm": 0.3791565001010895, "learning_rate": 1.9413829977873676e-05, "loss": 0.5524, "step": 8068 }, { "epoch": 0.221554091158704, "grad_norm": 0.33183881640434265, "learning_rate": 1.9413684274314862e-05, "loss": 0.5177, "step": 8069 }, { "epoch": 0.2215815485996705, "grad_norm": 0.4252963066101074, "learning_rate": 1.9413538553196524e-05, "loss": 0.5096, "step": 8070 }, { "epoch": 0.22160900604063702, "grad_norm": 0.384536474943161, "learning_rate": 1.941339281451893e-05, "loss": 0.5441, "step": 8071 }, { "epoch": 0.2216364634816035, "grad_norm": 0.35665157437324524, "learning_rate": 1.941324705828236e-05, "loss": 0.5445, "step": 8072 }, { "epoch": 0.22166392092257, "grad_norm": 0.35787519812583923, "learning_rate": 1.9413101284487072e-05, "loss": 0.5839, "step": 8073 }, { "epoch": 0.22169137836353653, "grad_norm": 0.40504369139671326, "learning_rate": 1.9412955493133348e-05, "loss": 0.4853, "step": 8074 }, { "epoch": 0.22171883580450302, "grad_norm": 0.34215256571769714, "learning_rate": 1.941280968422146e-05, "loss": 0.5166, "step": 8075 }, { "epoch": 0.22174629324546952, "grad_norm": 0.34900760650634766, "learning_rate": 1.941266385775168e-05, "loss": 0.5137, "step": 8076 }, { "epoch": 0.221773750686436, "grad_norm": 0.33578115701675415, "learning_rate": 1.941251801372427e-05, "loss": 0.5322, "step": 8077 }, { "epoch": 0.22180120812740253, "grad_norm": 0.379616379737854, "learning_rate": 1.941237215213952e-05, "loss": 0.4597, "step": 8078 }, { "epoch": 0.22182866556836903, "grad_norm": 0.34200215339660645, "learning_rate": 1.9412226272997684e-05, "loss": 0.5655, "step": 8079 }, { "epoch": 0.22185612300933552, "grad_norm": 0.372405081987381, "learning_rate": 1.9412080376299044e-05, "loss": 0.5579, "step": 8080 }, { "epoch": 0.22188358045030204, "grad_norm": 0.3557495176792145, "learning_rate": 1.941193446204387e-05, "loss": 0.5228, "step": 8081 }, { "epoch": 0.22191103789126854, "grad_norm": 0.3343425989151001, "learning_rate": 1.9411788530232433e-05, "loss": 0.4215, "step": 8082 }, { "epoch": 0.22193849533223503, "grad_norm": 0.40734240412712097, "learning_rate": 1.9411642580865007e-05, "loss": 0.4984, "step": 8083 }, { "epoch": 0.22196595277320152, "grad_norm": 0.333194375038147, "learning_rate": 1.941149661394187e-05, "loss": 0.5733, "step": 8084 }, { "epoch": 0.22199341021416805, "grad_norm": 0.3769598603248596, "learning_rate": 1.941135062946328e-05, "loss": 0.4221, "step": 8085 }, { "epoch": 0.22202086765513454, "grad_norm": 0.3758370876312256, "learning_rate": 1.9411204627429517e-05, "loss": 0.4936, "step": 8086 }, { "epoch": 0.22204832509610103, "grad_norm": 0.3733516037464142, "learning_rate": 1.9411058607840858e-05, "loss": 0.5278, "step": 8087 }, { "epoch": 0.22207578253706756, "grad_norm": 0.44331783056259155, "learning_rate": 1.941091257069757e-05, "loss": 0.561, "step": 8088 }, { "epoch": 0.22210323997803405, "grad_norm": 0.3653470575809479, "learning_rate": 1.941076651599993e-05, "loss": 0.5397, "step": 8089 }, { "epoch": 0.22213069741900054, "grad_norm": 0.44359949231147766, "learning_rate": 1.94106204437482e-05, "loss": 0.4615, "step": 8090 }, { "epoch": 0.22215815485996704, "grad_norm": 0.35146793723106384, "learning_rate": 1.9410474353942663e-05, "loss": 0.5512, "step": 8091 }, { "epoch": 0.22218561230093356, "grad_norm": 0.3815726637840271, "learning_rate": 1.9410328246583586e-05, "loss": 0.5272, "step": 8092 }, { "epoch": 0.22221306974190005, "grad_norm": 0.3618924617767334, "learning_rate": 1.9410182121671245e-05, "loss": 0.5908, "step": 8093 }, { "epoch": 0.22224052718286655, "grad_norm": 0.3695196211338043, "learning_rate": 1.941003597920591e-05, "loss": 0.611, "step": 8094 }, { "epoch": 0.22226798462383307, "grad_norm": 0.4186045527458191, "learning_rate": 1.9409889819187856e-05, "loss": 0.5999, "step": 8095 }, { "epoch": 0.22229544206479956, "grad_norm": 0.3763331174850464, "learning_rate": 1.9409743641617353e-05, "loss": 0.5191, "step": 8096 }, { "epoch": 0.22232289950576606, "grad_norm": 0.3983600437641144, "learning_rate": 1.9409597446494678e-05, "loss": 0.5703, "step": 8097 }, { "epoch": 0.22235035694673255, "grad_norm": 0.39477670192718506, "learning_rate": 1.9409451233820098e-05, "loss": 0.5549, "step": 8098 }, { "epoch": 0.22237781438769907, "grad_norm": 0.369409441947937, "learning_rate": 1.9409305003593887e-05, "loss": 0.4906, "step": 8099 }, { "epoch": 0.22240527182866557, "grad_norm": 0.3776870369911194, "learning_rate": 1.940915875581632e-05, "loss": 0.5514, "step": 8100 }, { "epoch": 0.22243272926963206, "grad_norm": 0.5322780609130859, "learning_rate": 1.940901249048767e-05, "loss": 0.5302, "step": 8101 }, { "epoch": 0.22246018671059858, "grad_norm": 1.0888235569000244, "learning_rate": 1.9408866207608206e-05, "loss": 0.5463, "step": 8102 }, { "epoch": 0.22248764415156508, "grad_norm": 0.37611550092697144, "learning_rate": 1.9408719907178207e-05, "loss": 0.5496, "step": 8103 }, { "epoch": 0.22251510159253157, "grad_norm": 0.41753020882606506, "learning_rate": 1.940857358919794e-05, "loss": 0.5997, "step": 8104 }, { "epoch": 0.22254255903349807, "grad_norm": 0.3216085135936737, "learning_rate": 1.940842725366768e-05, "loss": 0.4694, "step": 8105 }, { "epoch": 0.2225700164744646, "grad_norm": 0.37054505944252014, "learning_rate": 1.9408280900587704e-05, "loss": 0.4918, "step": 8106 }, { "epoch": 0.22259747391543108, "grad_norm": 0.34465891122817993, "learning_rate": 1.9408134529958277e-05, "loss": 0.466, "step": 8107 }, { "epoch": 0.22262493135639758, "grad_norm": 0.37950631976127625, "learning_rate": 1.940798814177968e-05, "loss": 0.5828, "step": 8108 }, { "epoch": 0.2226523887973641, "grad_norm": 0.34023621678352356, "learning_rate": 1.9407841736052183e-05, "loss": 0.4421, "step": 8109 }, { "epoch": 0.2226798462383306, "grad_norm": 0.4022599160671234, "learning_rate": 1.9407695312776054e-05, "loss": 0.5185, "step": 8110 }, { "epoch": 0.2227073036792971, "grad_norm": 0.3752758800983429, "learning_rate": 1.940754887195157e-05, "loss": 0.5481, "step": 8111 }, { "epoch": 0.22273476112026358, "grad_norm": 0.3862849473953247, "learning_rate": 1.940740241357901e-05, "loss": 0.4948, "step": 8112 }, { "epoch": 0.2227622185612301, "grad_norm": 0.3702285885810852, "learning_rate": 1.940725593765864e-05, "loss": 0.5327, "step": 8113 }, { "epoch": 0.2227896760021966, "grad_norm": 0.35002318024635315, "learning_rate": 1.9407109444190736e-05, "loss": 0.5123, "step": 8114 }, { "epoch": 0.2228171334431631, "grad_norm": 0.34253624081611633, "learning_rate": 1.940696293317557e-05, "loss": 0.5392, "step": 8115 }, { "epoch": 0.2228445908841296, "grad_norm": 0.42128393054008484, "learning_rate": 1.9406816404613415e-05, "loss": 0.5638, "step": 8116 }, { "epoch": 0.2228720483250961, "grad_norm": 0.40481773018836975, "learning_rate": 1.9406669858504542e-05, "loss": 0.5295, "step": 8117 }, { "epoch": 0.2228995057660626, "grad_norm": 0.3671500086784363, "learning_rate": 1.9406523294849232e-05, "loss": 0.5792, "step": 8118 }, { "epoch": 0.2229269632070291, "grad_norm": 0.4110405445098877, "learning_rate": 1.940637671364775e-05, "loss": 0.5985, "step": 8119 }, { "epoch": 0.22295442064799562, "grad_norm": 0.3592608869075775, "learning_rate": 1.940623011490038e-05, "loss": 0.4866, "step": 8120 }, { "epoch": 0.2229818780889621, "grad_norm": 0.3975278437137604, "learning_rate": 1.9406083498607385e-05, "loss": 0.5696, "step": 8121 }, { "epoch": 0.2230093355299286, "grad_norm": 0.8165764212608337, "learning_rate": 1.9405936864769043e-05, "loss": 0.5169, "step": 8122 }, { "epoch": 0.22303679297089513, "grad_norm": 0.3947144150733948, "learning_rate": 1.9405790213385623e-05, "loss": 0.4833, "step": 8123 }, { "epoch": 0.22306425041186162, "grad_norm": 0.331704705953598, "learning_rate": 1.9405643544457403e-05, "loss": 0.4864, "step": 8124 }, { "epoch": 0.22309170785282811, "grad_norm": 0.3743974566459656, "learning_rate": 1.940549685798466e-05, "loss": 0.6104, "step": 8125 }, { "epoch": 0.2231191652937946, "grad_norm": 0.39051494002342224, "learning_rate": 1.9405350153967658e-05, "loss": 0.5959, "step": 8126 }, { "epoch": 0.22314662273476113, "grad_norm": 0.34079813957214355, "learning_rate": 1.9405203432406684e-05, "loss": 0.4791, "step": 8127 }, { "epoch": 0.22317408017572762, "grad_norm": 0.3683178126811981, "learning_rate": 1.9405056693301998e-05, "loss": 0.5776, "step": 8128 }, { "epoch": 0.22320153761669412, "grad_norm": 0.307147353887558, "learning_rate": 1.940490993665388e-05, "loss": 0.4954, "step": 8129 }, { "epoch": 0.22322899505766064, "grad_norm": 0.3666066527366638, "learning_rate": 1.9404763162462603e-05, "loss": 0.5417, "step": 8130 }, { "epoch": 0.22325645249862713, "grad_norm": 0.3381190598011017, "learning_rate": 1.9404616370728443e-05, "loss": 0.4771, "step": 8131 }, { "epoch": 0.22328390993959363, "grad_norm": 0.33160486817359924, "learning_rate": 1.9404469561451668e-05, "loss": 0.4289, "step": 8132 }, { "epoch": 0.22331136738056012, "grad_norm": 0.4484253525733948, "learning_rate": 1.9404322734632563e-05, "loss": 0.4942, "step": 8133 }, { "epoch": 0.22333882482152664, "grad_norm": 0.3430327773094177, "learning_rate": 1.940417589027139e-05, "loss": 0.4425, "step": 8134 }, { "epoch": 0.22336628226249314, "grad_norm": 0.38501861691474915, "learning_rate": 1.9404029028368426e-05, "loss": 0.4384, "step": 8135 }, { "epoch": 0.22339373970345963, "grad_norm": 0.3726552426815033, "learning_rate": 1.9403882148923947e-05, "loss": 0.5079, "step": 8136 }, { "epoch": 0.22342119714442613, "grad_norm": 0.39679890871047974, "learning_rate": 1.9403735251938228e-05, "loss": 0.5707, "step": 8137 }, { "epoch": 0.22344865458539265, "grad_norm": 0.33097171783447266, "learning_rate": 1.9403588337411538e-05, "loss": 0.4561, "step": 8138 }, { "epoch": 0.22347611202635914, "grad_norm": 0.370498389005661, "learning_rate": 1.9403441405344158e-05, "loss": 0.559, "step": 8139 }, { "epoch": 0.22350356946732564, "grad_norm": 0.3464675545692444, "learning_rate": 1.9403294455736358e-05, "loss": 0.5593, "step": 8140 }, { "epoch": 0.22353102690829216, "grad_norm": 0.3214951455593109, "learning_rate": 1.9403147488588414e-05, "loss": 0.455, "step": 8141 }, { "epoch": 0.22355848434925865, "grad_norm": 0.3698200583457947, "learning_rate": 1.94030005039006e-05, "loss": 0.4109, "step": 8142 }, { "epoch": 0.22358594179022515, "grad_norm": 0.37049373984336853, "learning_rate": 1.940285350167318e-05, "loss": 0.5085, "step": 8143 }, { "epoch": 0.22361339923119164, "grad_norm": 0.3983760178089142, "learning_rate": 1.940270648190645e-05, "loss": 0.5381, "step": 8144 }, { "epoch": 0.22364085667215816, "grad_norm": 0.38429495692253113, "learning_rate": 1.9402559444600663e-05, "loss": 0.6073, "step": 8145 }, { "epoch": 0.22366831411312466, "grad_norm": 0.3701890707015991, "learning_rate": 1.9402412389756104e-05, "loss": 0.5944, "step": 8146 }, { "epoch": 0.22369577155409115, "grad_norm": 0.4375746548175812, "learning_rate": 1.940226531737304e-05, "loss": 0.6594, "step": 8147 }, { "epoch": 0.22372322899505767, "grad_norm": 0.3941718339920044, "learning_rate": 1.940211822745176e-05, "loss": 0.6006, "step": 8148 }, { "epoch": 0.22375068643602417, "grad_norm": 0.34504422545433044, "learning_rate": 1.9401971119992523e-05, "loss": 0.4602, "step": 8149 }, { "epoch": 0.22377814387699066, "grad_norm": 0.3628522455692291, "learning_rate": 1.9401823994995608e-05, "loss": 0.4945, "step": 8150 }, { "epoch": 0.22380560131795715, "grad_norm": 0.6254026293754578, "learning_rate": 1.9401676852461292e-05, "loss": 0.5244, "step": 8151 }, { "epoch": 0.22383305875892368, "grad_norm": 0.3853122889995575, "learning_rate": 1.940152969238985e-05, "loss": 0.5559, "step": 8152 }, { "epoch": 0.22386051619989017, "grad_norm": 0.40778622031211853, "learning_rate": 1.940138251478155e-05, "loss": 0.5079, "step": 8153 }, { "epoch": 0.22388797364085666, "grad_norm": 0.3807878792285919, "learning_rate": 1.9401235319636674e-05, "loss": 0.5529, "step": 8154 }, { "epoch": 0.22391543108182319, "grad_norm": 0.4939849078655243, "learning_rate": 1.9401088106955494e-05, "loss": 0.6086, "step": 8155 }, { "epoch": 0.22394288852278968, "grad_norm": 0.5908517241477966, "learning_rate": 1.9400940876738283e-05, "loss": 0.5496, "step": 8156 }, { "epoch": 0.22397034596375617, "grad_norm": 0.35418739914894104, "learning_rate": 1.9400793628985318e-05, "loss": 0.5532, "step": 8157 }, { "epoch": 0.22399780340472267, "grad_norm": 0.8612314462661743, "learning_rate": 1.9400646363696873e-05, "loss": 0.5032, "step": 8158 }, { "epoch": 0.2240252608456892, "grad_norm": 0.36516737937927246, "learning_rate": 1.940049908087322e-05, "loss": 0.4895, "step": 8159 }, { "epoch": 0.22405271828665568, "grad_norm": 0.35089412331581116, "learning_rate": 1.9400351780514638e-05, "loss": 0.5366, "step": 8160 }, { "epoch": 0.22408017572762218, "grad_norm": 0.37289080023765564, "learning_rate": 1.9400204462621398e-05, "loss": 0.5755, "step": 8161 }, { "epoch": 0.2241076331685887, "grad_norm": 0.3228849470615387, "learning_rate": 1.9400057127193777e-05, "loss": 0.4443, "step": 8162 }, { "epoch": 0.2241350906095552, "grad_norm": 0.35565316677093506, "learning_rate": 1.939990977423205e-05, "loss": 0.5044, "step": 8163 }, { "epoch": 0.2241625480505217, "grad_norm": 0.3674179017543793, "learning_rate": 1.9399762403736492e-05, "loss": 0.5079, "step": 8164 }, { "epoch": 0.22419000549148818, "grad_norm": 0.38229286670684814, "learning_rate": 1.9399615015707373e-05, "loss": 0.5112, "step": 8165 }, { "epoch": 0.2242174629324547, "grad_norm": 0.4192439615726471, "learning_rate": 1.9399467610144977e-05, "loss": 0.5181, "step": 8166 }, { "epoch": 0.2242449203734212, "grad_norm": 0.3487095534801483, "learning_rate": 1.939932018704957e-05, "loss": 0.5, "step": 8167 }, { "epoch": 0.2242723778143877, "grad_norm": 0.3799746036529541, "learning_rate": 1.9399172746421434e-05, "loss": 0.5262, "step": 8168 }, { "epoch": 0.2242998352553542, "grad_norm": 0.34884634613990784, "learning_rate": 1.939902528826084e-05, "loss": 0.4214, "step": 8169 }, { "epoch": 0.2243272926963207, "grad_norm": 0.3416674733161926, "learning_rate": 1.9398877812568063e-05, "loss": 0.512, "step": 8170 }, { "epoch": 0.2243547501372872, "grad_norm": 0.34479090571403503, "learning_rate": 1.939873031934338e-05, "loss": 0.577, "step": 8171 }, { "epoch": 0.2243822075782537, "grad_norm": 0.3860377371311188, "learning_rate": 1.9398582808587062e-05, "loss": 0.6078, "step": 8172 }, { "epoch": 0.22440966501922022, "grad_norm": 0.35651302337646484, "learning_rate": 1.9398435280299394e-05, "loss": 0.5435, "step": 8173 }, { "epoch": 0.2244371224601867, "grad_norm": 0.33736810088157654, "learning_rate": 1.9398287734480637e-05, "loss": 0.4772, "step": 8174 }, { "epoch": 0.2244645799011532, "grad_norm": 0.389241099357605, "learning_rate": 1.939814017113108e-05, "loss": 0.6196, "step": 8175 }, { "epoch": 0.22449203734211973, "grad_norm": 0.3848176896572113, "learning_rate": 1.939799259025099e-05, "loss": 0.5439, "step": 8176 }, { "epoch": 0.22451949478308622, "grad_norm": 0.3971244990825653, "learning_rate": 1.9397844991840646e-05, "loss": 0.6287, "step": 8177 }, { "epoch": 0.22454695222405271, "grad_norm": 0.43334460258483887, "learning_rate": 1.9397697375900314e-05, "loss": 0.6153, "step": 8178 }, { "epoch": 0.2245744096650192, "grad_norm": 0.35335347056388855, "learning_rate": 1.9397549742430283e-05, "loss": 0.5235, "step": 8179 }, { "epoch": 0.22460186710598573, "grad_norm": 0.3384202718734741, "learning_rate": 1.9397402091430827e-05, "loss": 0.4728, "step": 8180 }, { "epoch": 0.22462932454695222, "grad_norm": 0.3337043225765228, "learning_rate": 1.9397254422902208e-05, "loss": 0.4552, "step": 8181 }, { "epoch": 0.22465678198791872, "grad_norm": 0.3568907678127289, "learning_rate": 1.9397106736844716e-05, "loss": 0.5303, "step": 8182 }, { "epoch": 0.22468423942888524, "grad_norm": 0.32075825333595276, "learning_rate": 1.939695903325862e-05, "loss": 0.4616, "step": 8183 }, { "epoch": 0.22471169686985173, "grad_norm": 0.3909532427787781, "learning_rate": 1.939681131214419e-05, "loss": 0.5744, "step": 8184 }, { "epoch": 0.22473915431081823, "grad_norm": 0.4133884906768799, "learning_rate": 1.9396663573501718e-05, "loss": 0.5627, "step": 8185 }, { "epoch": 0.22476661175178472, "grad_norm": 0.3536604344844818, "learning_rate": 1.9396515817331463e-05, "loss": 0.5931, "step": 8186 }, { "epoch": 0.22479406919275124, "grad_norm": 0.35507336258888245, "learning_rate": 1.9396368043633708e-05, "loss": 0.5154, "step": 8187 }, { "epoch": 0.22482152663371774, "grad_norm": 0.3671905994415283, "learning_rate": 1.9396220252408727e-05, "loss": 0.5213, "step": 8188 }, { "epoch": 0.22484898407468423, "grad_norm": 0.36801913380622864, "learning_rate": 1.93960724436568e-05, "loss": 0.5455, "step": 8189 }, { "epoch": 0.22487644151565075, "grad_norm": 0.3268343210220337, "learning_rate": 1.9395924617378196e-05, "loss": 0.4839, "step": 8190 }, { "epoch": 0.22490389895661725, "grad_norm": 0.33030569553375244, "learning_rate": 1.939577677357319e-05, "loss": 0.4389, "step": 8191 }, { "epoch": 0.22493135639758374, "grad_norm": 0.3602979779243469, "learning_rate": 1.9395628912242064e-05, "loss": 0.5704, "step": 8192 }, { "epoch": 0.22495881383855024, "grad_norm": 0.3248290419578552, "learning_rate": 1.9395481033385094e-05, "loss": 0.489, "step": 8193 }, { "epoch": 0.22498627127951676, "grad_norm": 0.40111038088798523, "learning_rate": 1.9395333137002552e-05, "loss": 0.6126, "step": 8194 }, { "epoch": 0.22501372872048325, "grad_norm": 0.3910524249076843, "learning_rate": 1.9395185223094718e-05, "loss": 0.548, "step": 8195 }, { "epoch": 0.22504118616144975, "grad_norm": 0.3715423047542572, "learning_rate": 1.939503729166186e-05, "loss": 0.5686, "step": 8196 }, { "epoch": 0.22506864360241627, "grad_norm": 0.36621761322021484, "learning_rate": 1.939488934270426e-05, "loss": 0.6206, "step": 8197 }, { "epoch": 0.22509610104338276, "grad_norm": 0.3830116093158722, "learning_rate": 1.9394741376222194e-05, "loss": 0.5879, "step": 8198 }, { "epoch": 0.22512355848434926, "grad_norm": 0.3857790231704712, "learning_rate": 1.939459339221594e-05, "loss": 0.4498, "step": 8199 }, { "epoch": 0.22515101592531575, "grad_norm": 0.3503985106945038, "learning_rate": 1.9394445390685766e-05, "loss": 0.4833, "step": 8200 }, { "epoch": 0.22517847336628227, "grad_norm": 0.40891578793525696, "learning_rate": 1.9394297371631955e-05, "loss": 0.4714, "step": 8201 }, { "epoch": 0.22520593080724877, "grad_norm": 0.3622020483016968, "learning_rate": 1.939414933505478e-05, "loss": 0.5441, "step": 8202 }, { "epoch": 0.22523338824821526, "grad_norm": 0.4088347256183624, "learning_rate": 1.9394001280954517e-05, "loss": 0.6546, "step": 8203 }, { "epoch": 0.22526084568918175, "grad_norm": 0.35266146063804626, "learning_rate": 1.939385320933145e-05, "loss": 0.5449, "step": 8204 }, { "epoch": 0.22528830313014828, "grad_norm": 0.38363903760910034, "learning_rate": 1.939370512018584e-05, "loss": 0.4649, "step": 8205 }, { "epoch": 0.22531576057111477, "grad_norm": 0.32043397426605225, "learning_rate": 1.9393557013517978e-05, "loss": 0.4311, "step": 8206 }, { "epoch": 0.22534321801208126, "grad_norm": 0.35859671235084534, "learning_rate": 1.939340888932813e-05, "loss": 0.5176, "step": 8207 }, { "epoch": 0.2253706754530478, "grad_norm": 0.37504032254219055, "learning_rate": 1.939326074761658e-05, "loss": 0.5584, "step": 8208 }, { "epoch": 0.22539813289401428, "grad_norm": 0.39444440603256226, "learning_rate": 1.93931125883836e-05, "loss": 0.4843, "step": 8209 }, { "epoch": 0.22542559033498077, "grad_norm": 0.3432862460613251, "learning_rate": 1.939296441162946e-05, "loss": 0.5092, "step": 8210 }, { "epoch": 0.22545304777594727, "grad_norm": 0.4030492901802063, "learning_rate": 1.9392816217354454e-05, "loss": 0.5344, "step": 8211 }, { "epoch": 0.2254805052169138, "grad_norm": 0.3438984155654907, "learning_rate": 1.939266800555884e-05, "loss": 0.5418, "step": 8212 }, { "epoch": 0.22550796265788028, "grad_norm": 0.4286535084247589, "learning_rate": 1.9392519776242907e-05, "loss": 0.4657, "step": 8213 }, { "epoch": 0.22553542009884678, "grad_norm": 0.3363277018070221, "learning_rate": 1.9392371529406927e-05, "loss": 0.4964, "step": 8214 }, { "epoch": 0.2255628775398133, "grad_norm": 0.3975432813167572, "learning_rate": 1.9392223265051175e-05, "loss": 0.5285, "step": 8215 }, { "epoch": 0.2255903349807798, "grad_norm": 0.3355986773967743, "learning_rate": 1.939207498317593e-05, "loss": 0.5071, "step": 8216 }, { "epoch": 0.2256177924217463, "grad_norm": 0.3327268958091736, "learning_rate": 1.9391926683781466e-05, "loss": 0.4665, "step": 8217 }, { "epoch": 0.22564524986271278, "grad_norm": 0.3756018877029419, "learning_rate": 1.939177836686806e-05, "loss": 0.6186, "step": 8218 }, { "epoch": 0.2256727073036793, "grad_norm": 0.371738463640213, "learning_rate": 1.939163003243599e-05, "loss": 0.632, "step": 8219 }, { "epoch": 0.2257001647446458, "grad_norm": 0.3623785078525543, "learning_rate": 1.9391481680485535e-05, "loss": 0.5015, "step": 8220 }, { "epoch": 0.2257276221856123, "grad_norm": 0.35255128145217896, "learning_rate": 1.9391333311016968e-05, "loss": 0.5564, "step": 8221 }, { "epoch": 0.22575507962657881, "grad_norm": 0.34015733003616333, "learning_rate": 1.9391184924030568e-05, "loss": 0.5311, "step": 8222 }, { "epoch": 0.2257825370675453, "grad_norm": 0.33660411834716797, "learning_rate": 1.939103651952661e-05, "loss": 0.3982, "step": 8223 }, { "epoch": 0.2258099945085118, "grad_norm": 0.425221711397171, "learning_rate": 1.9390888097505373e-05, "loss": 0.5843, "step": 8224 }, { "epoch": 0.2258374519494783, "grad_norm": 0.37836596369743347, "learning_rate": 1.939073965796713e-05, "loss": 0.5103, "step": 8225 }, { "epoch": 0.22586490939044482, "grad_norm": 0.3715420961380005, "learning_rate": 1.9390591200912162e-05, "loss": 0.5559, "step": 8226 }, { "epoch": 0.2258923668314113, "grad_norm": 0.49609532952308655, "learning_rate": 1.9390442726340742e-05, "loss": 0.601, "step": 8227 }, { "epoch": 0.2259198242723778, "grad_norm": 0.3451451361179352, "learning_rate": 1.939029423425315e-05, "loss": 0.5227, "step": 8228 }, { "epoch": 0.22594728171334433, "grad_norm": 0.33748912811279297, "learning_rate": 1.9390145724649665e-05, "loss": 0.5151, "step": 8229 }, { "epoch": 0.22597473915431082, "grad_norm": 0.3437998294830322, "learning_rate": 1.938999719753056e-05, "loss": 0.4683, "step": 8230 }, { "epoch": 0.22600219659527732, "grad_norm": 0.40850162506103516, "learning_rate": 1.9389848652896113e-05, "loss": 0.5356, "step": 8231 }, { "epoch": 0.2260296540362438, "grad_norm": 1.3040398359298706, "learning_rate": 1.9389700090746602e-05, "loss": 0.5616, "step": 8232 }, { "epoch": 0.22605711147721033, "grad_norm": 0.42423245310783386, "learning_rate": 1.9389551511082303e-05, "loss": 0.5635, "step": 8233 }, { "epoch": 0.22608456891817683, "grad_norm": 0.3468954563140869, "learning_rate": 1.9389402913903495e-05, "loss": 0.54, "step": 8234 }, { "epoch": 0.22611202635914332, "grad_norm": 0.3918916881084442, "learning_rate": 1.938925429921045e-05, "loss": 0.4775, "step": 8235 }, { "epoch": 0.22613948380010984, "grad_norm": 0.4532139301300049, "learning_rate": 1.938910566700345e-05, "loss": 0.6267, "step": 8236 }, { "epoch": 0.22616694124107634, "grad_norm": 0.3539528250694275, "learning_rate": 1.9388957017282775e-05, "loss": 0.4387, "step": 8237 }, { "epoch": 0.22619439868204283, "grad_norm": 0.36046403646469116, "learning_rate": 1.9388808350048697e-05, "loss": 0.5509, "step": 8238 }, { "epoch": 0.22622185612300932, "grad_norm": 0.3788404166698456, "learning_rate": 1.9388659665301494e-05, "loss": 0.5497, "step": 8239 }, { "epoch": 0.22624931356397585, "grad_norm": 0.3916245400905609, "learning_rate": 1.938851096304144e-05, "loss": 0.5745, "step": 8240 }, { "epoch": 0.22627677100494234, "grad_norm": 0.3724147379398346, "learning_rate": 1.9388362243268823e-05, "loss": 0.5605, "step": 8241 }, { "epoch": 0.22630422844590883, "grad_norm": 0.3624235987663269, "learning_rate": 1.9388213505983914e-05, "loss": 0.5241, "step": 8242 }, { "epoch": 0.22633168588687536, "grad_norm": 0.5791997909545898, "learning_rate": 1.9388064751186985e-05, "loss": 0.5601, "step": 8243 }, { "epoch": 0.22635914332784185, "grad_norm": 0.4999411702156067, "learning_rate": 1.9387915978878324e-05, "loss": 0.5273, "step": 8244 }, { "epoch": 0.22638660076880834, "grad_norm": 0.4118257761001587, "learning_rate": 1.93877671890582e-05, "loss": 0.5306, "step": 8245 }, { "epoch": 0.22641405820977484, "grad_norm": 0.3871108293533325, "learning_rate": 1.9387618381726897e-05, "loss": 0.5492, "step": 8246 }, { "epoch": 0.22644151565074136, "grad_norm": 0.37442949414253235, "learning_rate": 1.9387469556884687e-05, "loss": 0.5591, "step": 8247 }, { "epoch": 0.22646897309170785, "grad_norm": 0.37944644689559937, "learning_rate": 1.938732071453185e-05, "loss": 0.5953, "step": 8248 }, { "epoch": 0.22649643053267435, "grad_norm": 0.4290885329246521, "learning_rate": 1.9387171854668665e-05, "loss": 0.579, "step": 8249 }, { "epoch": 0.22652388797364087, "grad_norm": 0.3293841481208801, "learning_rate": 1.938702297729541e-05, "loss": 0.4879, "step": 8250 }, { "epoch": 0.22655134541460736, "grad_norm": 0.44695019721984863, "learning_rate": 1.938687408241236e-05, "loss": 0.5447, "step": 8251 }, { "epoch": 0.22657880285557386, "grad_norm": 0.36266106367111206, "learning_rate": 1.938672517001979e-05, "loss": 0.5833, "step": 8252 }, { "epoch": 0.22660626029654035, "grad_norm": 0.38067343831062317, "learning_rate": 1.9386576240117987e-05, "loss": 0.4788, "step": 8253 }, { "epoch": 0.22663371773750687, "grad_norm": 0.3255939483642578, "learning_rate": 1.938642729270722e-05, "loss": 0.4983, "step": 8254 }, { "epoch": 0.22666117517847337, "grad_norm": 0.3380301892757416, "learning_rate": 1.9386278327787772e-05, "loss": 0.3782, "step": 8255 }, { "epoch": 0.22668863261943986, "grad_norm": 0.3777279257774353, "learning_rate": 1.938612934535992e-05, "loss": 0.525, "step": 8256 }, { "epoch": 0.22671609006040638, "grad_norm": 0.3482208847999573, "learning_rate": 1.9385980345423938e-05, "loss": 0.5632, "step": 8257 }, { "epoch": 0.22674354750137288, "grad_norm": 0.3751506507396698, "learning_rate": 1.938583132798011e-05, "loss": 0.4955, "step": 8258 }, { "epoch": 0.22677100494233937, "grad_norm": 0.48257002234458923, "learning_rate": 1.938568229302871e-05, "loss": 0.642, "step": 8259 }, { "epoch": 0.22679846238330587, "grad_norm": 0.36828184127807617, "learning_rate": 1.9385533240570018e-05, "loss": 0.5496, "step": 8260 }, { "epoch": 0.2268259198242724, "grad_norm": 0.36173102259635925, "learning_rate": 1.938538417060431e-05, "loss": 0.5731, "step": 8261 }, { "epoch": 0.22685337726523888, "grad_norm": 0.404882550239563, "learning_rate": 1.9385235083131863e-05, "loss": 0.5991, "step": 8262 }, { "epoch": 0.22688083470620538, "grad_norm": 0.42122867703437805, "learning_rate": 1.938508597815296e-05, "loss": 0.5175, "step": 8263 }, { "epoch": 0.2269082921471719, "grad_norm": 0.41757726669311523, "learning_rate": 1.938493685566788e-05, "loss": 0.5439, "step": 8264 }, { "epoch": 0.2269357495881384, "grad_norm": 0.3838428854942322, "learning_rate": 1.938478771567689e-05, "loss": 0.5365, "step": 8265 }, { "epoch": 0.22696320702910489, "grad_norm": 0.36478307843208313, "learning_rate": 1.9384638558180277e-05, "loss": 0.4969, "step": 8266 }, { "epoch": 0.22699066447007138, "grad_norm": 0.3522917628288269, "learning_rate": 1.9384489383178322e-05, "loss": 0.4406, "step": 8267 }, { "epoch": 0.2270181219110379, "grad_norm": 0.3297838568687439, "learning_rate": 1.9384340190671293e-05, "loss": 0.4684, "step": 8268 }, { "epoch": 0.2270455793520044, "grad_norm": 0.36850079894065857, "learning_rate": 1.9384190980659482e-05, "loss": 0.5012, "step": 8269 }, { "epoch": 0.2270730367929709, "grad_norm": 0.3305157721042633, "learning_rate": 1.9384041753143155e-05, "loss": 0.5177, "step": 8270 }, { "epoch": 0.22710049423393738, "grad_norm": 0.38315239548683167, "learning_rate": 1.9383892508122594e-05, "loss": 0.5439, "step": 8271 }, { "epoch": 0.2271279516749039, "grad_norm": 0.3304203450679779, "learning_rate": 1.938374324559808e-05, "loss": 0.4765, "step": 8272 }, { "epoch": 0.2271554091158704, "grad_norm": 0.3577759265899658, "learning_rate": 1.938359396556989e-05, "loss": 0.4821, "step": 8273 }, { "epoch": 0.2271828665568369, "grad_norm": 0.43631523847579956, "learning_rate": 1.9383444668038302e-05, "loss": 0.4951, "step": 8274 }, { "epoch": 0.22721032399780342, "grad_norm": 0.3672175407409668, "learning_rate": 1.9383295353003597e-05, "loss": 0.4348, "step": 8275 }, { "epoch": 0.2272377814387699, "grad_norm": 0.3975318968296051, "learning_rate": 1.9383146020466047e-05, "loss": 0.5396, "step": 8276 }, { "epoch": 0.2272652388797364, "grad_norm": 0.3787493109703064, "learning_rate": 1.938299667042594e-05, "loss": 0.4477, "step": 8277 }, { "epoch": 0.2272926963207029, "grad_norm": 0.3545624911785126, "learning_rate": 1.9382847302883548e-05, "loss": 0.5539, "step": 8278 }, { "epoch": 0.22732015376166942, "grad_norm": 0.34871143102645874, "learning_rate": 1.938269791783915e-05, "loss": 0.4938, "step": 8279 }, { "epoch": 0.2273476112026359, "grad_norm": 0.37139931321144104, "learning_rate": 1.9382548515293026e-05, "loss": 0.5272, "step": 8280 }, { "epoch": 0.2273750686436024, "grad_norm": 0.3813760578632355, "learning_rate": 1.9382399095245453e-05, "loss": 0.4886, "step": 8281 }, { "epoch": 0.22740252608456893, "grad_norm": 0.3447664678096771, "learning_rate": 1.9382249657696716e-05, "loss": 0.586, "step": 8282 }, { "epoch": 0.22742998352553542, "grad_norm": 0.3689160645008087, "learning_rate": 1.938210020264708e-05, "loss": 0.633, "step": 8283 }, { "epoch": 0.22745744096650192, "grad_norm": 0.3481333553791046, "learning_rate": 1.9381950730096842e-05, "loss": 0.5802, "step": 8284 }, { "epoch": 0.2274848984074684, "grad_norm": 0.32448044419288635, "learning_rate": 1.9381801240046268e-05, "loss": 0.4434, "step": 8285 }, { "epoch": 0.22751235584843493, "grad_norm": 0.3732805848121643, "learning_rate": 1.9381651732495638e-05, "loss": 0.5638, "step": 8286 }, { "epoch": 0.22753981328940143, "grad_norm": 0.3274799883365631, "learning_rate": 1.9381502207445236e-05, "loss": 0.5158, "step": 8287 }, { "epoch": 0.22756727073036792, "grad_norm": 0.41455018520355225, "learning_rate": 1.9381352664895337e-05, "loss": 0.4542, "step": 8288 }, { "epoch": 0.22759472817133444, "grad_norm": 0.4866873323917389, "learning_rate": 1.938120310484622e-05, "loss": 0.5796, "step": 8289 }, { "epoch": 0.22762218561230094, "grad_norm": 0.33844077587127686, "learning_rate": 1.9381053527298167e-05, "loss": 0.5087, "step": 8290 }, { "epoch": 0.22764964305326743, "grad_norm": 0.3990241289138794, "learning_rate": 1.938090393225146e-05, "loss": 0.5487, "step": 8291 }, { "epoch": 0.22767710049423392, "grad_norm": 0.394076406955719, "learning_rate": 1.9380754319706365e-05, "loss": 0.4936, "step": 8292 }, { "epoch": 0.22770455793520045, "grad_norm": 0.37080851197242737, "learning_rate": 1.938060468966317e-05, "loss": 0.5943, "step": 8293 }, { "epoch": 0.22773201537616694, "grad_norm": 0.33536213636398315, "learning_rate": 1.9380455042122158e-05, "loss": 0.5476, "step": 8294 }, { "epoch": 0.22775947281713343, "grad_norm": 0.37987062335014343, "learning_rate": 1.93803053770836e-05, "loss": 0.534, "step": 8295 }, { "epoch": 0.22778693025809996, "grad_norm": 0.3935850262641907, "learning_rate": 1.938015569454778e-05, "loss": 0.5242, "step": 8296 }, { "epoch": 0.22781438769906645, "grad_norm": 0.3473701775074005, "learning_rate": 1.9380005994514978e-05, "loss": 0.4842, "step": 8297 }, { "epoch": 0.22784184514003294, "grad_norm": 0.46271824836730957, "learning_rate": 1.9379856276985466e-05, "loss": 0.5722, "step": 8298 }, { "epoch": 0.22786930258099944, "grad_norm": 0.4648285210132599, "learning_rate": 1.937970654195953e-05, "loss": 0.5331, "step": 8299 }, { "epoch": 0.22789676002196596, "grad_norm": 0.3610915243625641, "learning_rate": 1.937955678943745e-05, "loss": 0.5108, "step": 8300 }, { "epoch": 0.22792421746293245, "grad_norm": 0.7849158048629761, "learning_rate": 1.93794070194195e-05, "loss": 0.6301, "step": 8301 }, { "epoch": 0.22795167490389895, "grad_norm": 0.3651820123195648, "learning_rate": 1.9379257231905964e-05, "loss": 0.5084, "step": 8302 }, { "epoch": 0.22797913234486547, "grad_norm": 0.35178637504577637, "learning_rate": 1.9379107426897122e-05, "loss": 0.5794, "step": 8303 }, { "epoch": 0.22800658978583196, "grad_norm": 0.41504713892936707, "learning_rate": 1.9378957604393245e-05, "loss": 0.541, "step": 8304 }, { "epoch": 0.22803404722679846, "grad_norm": 0.35744708776474, "learning_rate": 1.9378807764394625e-05, "loss": 0.4818, "step": 8305 }, { "epoch": 0.22806150466776495, "grad_norm": 0.36151817440986633, "learning_rate": 1.937865790690153e-05, "loss": 0.5922, "step": 8306 }, { "epoch": 0.22808896210873147, "grad_norm": 0.3591896891593933, "learning_rate": 1.937850803191425e-05, "loss": 0.5875, "step": 8307 }, { "epoch": 0.22811641954969797, "grad_norm": 0.40182918310165405, "learning_rate": 1.9378358139433057e-05, "loss": 0.5161, "step": 8308 }, { "epoch": 0.22814387699066446, "grad_norm": 0.5497351288795471, "learning_rate": 1.9378208229458232e-05, "loss": 0.5613, "step": 8309 }, { "epoch": 0.22817133443163098, "grad_norm": 0.3608291447162628, "learning_rate": 1.9378058301990056e-05, "loss": 0.5138, "step": 8310 }, { "epoch": 0.22819879187259748, "grad_norm": 0.349816232919693, "learning_rate": 1.9377908357028803e-05, "loss": 0.5403, "step": 8311 }, { "epoch": 0.22822624931356397, "grad_norm": 0.400680273771286, "learning_rate": 1.9377758394574765e-05, "loss": 0.5153, "step": 8312 }, { "epoch": 0.22825370675453047, "grad_norm": 0.3917526304721832, "learning_rate": 1.9377608414628212e-05, "loss": 0.5556, "step": 8313 }, { "epoch": 0.228281164195497, "grad_norm": 0.34170255064964294, "learning_rate": 1.9377458417189426e-05, "loss": 0.5511, "step": 8314 }, { "epoch": 0.22830862163646348, "grad_norm": 0.40340352058410645, "learning_rate": 1.937730840225869e-05, "loss": 0.5398, "step": 8315 }, { "epoch": 0.22833607907742998, "grad_norm": 0.3228435218334198, "learning_rate": 1.937715836983628e-05, "loss": 0.5185, "step": 8316 }, { "epoch": 0.2283635365183965, "grad_norm": 0.3587694764137268, "learning_rate": 1.9377008319922473e-05, "loss": 0.5958, "step": 8317 }, { "epoch": 0.228390993959363, "grad_norm": 0.5970098376274109, "learning_rate": 1.9376858252517556e-05, "loss": 0.4964, "step": 8318 }, { "epoch": 0.2284184514003295, "grad_norm": 0.3690517842769623, "learning_rate": 1.9376708167621805e-05, "loss": 0.4465, "step": 8319 }, { "epoch": 0.22844590884129598, "grad_norm": 0.37074998021125793, "learning_rate": 1.93765580652355e-05, "loss": 0.5765, "step": 8320 }, { "epoch": 0.2284733662822625, "grad_norm": 0.3895581066608429, "learning_rate": 1.9376407945358923e-05, "loss": 0.5424, "step": 8321 }, { "epoch": 0.228500823723229, "grad_norm": 0.43124139308929443, "learning_rate": 1.937625780799235e-05, "loss": 0.5628, "step": 8322 }, { "epoch": 0.2285282811641955, "grad_norm": 0.40431898832321167, "learning_rate": 1.937610765313607e-05, "loss": 0.5221, "step": 8323 }, { "epoch": 0.228555738605162, "grad_norm": 0.43194764852523804, "learning_rate": 1.937595748079035e-05, "loss": 0.6126, "step": 8324 }, { "epoch": 0.2285831960461285, "grad_norm": 0.35418859124183655, "learning_rate": 1.9375807290955476e-05, "loss": 0.4963, "step": 8325 }, { "epoch": 0.228610653487095, "grad_norm": 0.37133777141571045, "learning_rate": 1.937565708363173e-05, "loss": 0.5561, "step": 8326 }, { "epoch": 0.2286381109280615, "grad_norm": 0.36189162731170654, "learning_rate": 1.9375506858819395e-05, "loss": 0.5532, "step": 8327 }, { "epoch": 0.22866556836902802, "grad_norm": 0.4030328094959259, "learning_rate": 1.9375356616518742e-05, "loss": 0.47, "step": 8328 }, { "epoch": 0.2286930258099945, "grad_norm": 0.3655211627483368, "learning_rate": 1.9375206356730064e-05, "loss": 0.5113, "step": 8329 }, { "epoch": 0.228720483250961, "grad_norm": 0.39692947268486023, "learning_rate": 1.937505607945363e-05, "loss": 0.5941, "step": 8330 }, { "epoch": 0.22874794069192753, "grad_norm": 0.3223343789577484, "learning_rate": 1.9374905784689723e-05, "loss": 0.4245, "step": 8331 }, { "epoch": 0.22877539813289402, "grad_norm": 0.37122848629951477, "learning_rate": 1.9374755472438623e-05, "loss": 0.6966, "step": 8332 }, { "epoch": 0.22880285557386051, "grad_norm": 0.30354607105255127, "learning_rate": 1.9374605142700616e-05, "loss": 0.4847, "step": 8333 }, { "epoch": 0.228830313014827, "grad_norm": 0.34248772263526917, "learning_rate": 1.9374454795475976e-05, "loss": 0.5452, "step": 8334 }, { "epoch": 0.22885777045579353, "grad_norm": 0.47256413102149963, "learning_rate": 1.9374304430764988e-05, "loss": 0.551, "step": 8335 }, { "epoch": 0.22888522789676002, "grad_norm": 0.3832658529281616, "learning_rate": 1.937415404856793e-05, "loss": 0.5174, "step": 8336 }, { "epoch": 0.22891268533772652, "grad_norm": 0.35880351066589355, "learning_rate": 1.9374003648885083e-05, "loss": 0.5184, "step": 8337 }, { "epoch": 0.228940142778693, "grad_norm": 0.36333250999450684, "learning_rate": 1.9373853231716728e-05, "loss": 0.5274, "step": 8338 }, { "epoch": 0.22896760021965953, "grad_norm": 0.4805937111377716, "learning_rate": 1.937370279706314e-05, "loss": 0.4883, "step": 8339 }, { "epoch": 0.22899505766062603, "grad_norm": 0.5494170784950256, "learning_rate": 1.937355234492461e-05, "loss": 0.5185, "step": 8340 }, { "epoch": 0.22902251510159252, "grad_norm": 0.3449389636516571, "learning_rate": 1.9373401875301407e-05, "loss": 0.4778, "step": 8341 }, { "epoch": 0.22904997254255904, "grad_norm": 0.33531445264816284, "learning_rate": 1.9373251388193823e-05, "loss": 0.5024, "step": 8342 }, { "epoch": 0.22907742998352554, "grad_norm": 0.41718944907188416, "learning_rate": 1.937310088360213e-05, "loss": 0.5451, "step": 8343 }, { "epoch": 0.22910488742449203, "grad_norm": 0.3556210398674011, "learning_rate": 1.9372950361526617e-05, "loss": 0.5563, "step": 8344 }, { "epoch": 0.22913234486545853, "grad_norm": 0.3673871159553528, "learning_rate": 1.9372799821967558e-05, "loss": 0.4622, "step": 8345 }, { "epoch": 0.22915980230642505, "grad_norm": 0.34238117933273315, "learning_rate": 1.9372649264925237e-05, "loss": 0.5268, "step": 8346 }, { "epoch": 0.22918725974739154, "grad_norm": 0.42789629101753235, "learning_rate": 1.937249869039993e-05, "loss": 0.5243, "step": 8347 }, { "epoch": 0.22921471718835804, "grad_norm": 0.39402592182159424, "learning_rate": 1.9372348098391925e-05, "loss": 0.4856, "step": 8348 }, { "epoch": 0.22924217462932456, "grad_norm": 0.3448143005371094, "learning_rate": 1.9372197488901497e-05, "loss": 0.5623, "step": 8349 }, { "epoch": 0.22926963207029105, "grad_norm": 0.39573538303375244, "learning_rate": 1.9372046861928932e-05, "loss": 0.5098, "step": 8350 }, { "epoch": 0.22929708951125755, "grad_norm": 0.360020250082016, "learning_rate": 1.937189621747451e-05, "loss": 0.5175, "step": 8351 }, { "epoch": 0.22932454695222404, "grad_norm": 0.33992478251457214, "learning_rate": 1.9371745555538504e-05, "loss": 0.4483, "step": 8352 }, { "epoch": 0.22935200439319056, "grad_norm": 0.36409255862236023, "learning_rate": 1.9371594876121207e-05, "loss": 0.5202, "step": 8353 }, { "epoch": 0.22937946183415706, "grad_norm": 0.3408110737800598, "learning_rate": 1.9371444179222894e-05, "loss": 0.4872, "step": 8354 }, { "epoch": 0.22940691927512355, "grad_norm": 0.4453859329223633, "learning_rate": 1.9371293464843844e-05, "loss": 0.436, "step": 8355 }, { "epoch": 0.22943437671609007, "grad_norm": 0.3627023994922638, "learning_rate": 1.937114273298434e-05, "loss": 0.5081, "step": 8356 }, { "epoch": 0.22946183415705657, "grad_norm": 0.35361307859420776, "learning_rate": 1.9370991983644667e-05, "loss": 0.5822, "step": 8357 }, { "epoch": 0.22948929159802306, "grad_norm": 0.3736143708229065, "learning_rate": 1.9370841216825103e-05, "loss": 0.5692, "step": 8358 }, { "epoch": 0.22951674903898955, "grad_norm": 0.4052828550338745, "learning_rate": 1.9370690432525926e-05, "loss": 0.5243, "step": 8359 }, { "epoch": 0.22954420647995608, "grad_norm": 0.3726356327533722, "learning_rate": 1.9370539630747425e-05, "loss": 0.5305, "step": 8360 }, { "epoch": 0.22957166392092257, "grad_norm": 0.3779815137386322, "learning_rate": 1.9370388811489873e-05, "loss": 0.5178, "step": 8361 }, { "epoch": 0.22959912136188906, "grad_norm": 0.4221555292606354, "learning_rate": 1.9370237974753556e-05, "loss": 0.548, "step": 8362 }, { "epoch": 0.22962657880285559, "grad_norm": 0.3809456527233124, "learning_rate": 1.9370087120538755e-05, "loss": 0.4832, "step": 8363 }, { "epoch": 0.22965403624382208, "grad_norm": 0.44110214710235596, "learning_rate": 1.9369936248845755e-05, "loss": 0.5036, "step": 8364 }, { "epoch": 0.22968149368478857, "grad_norm": 0.360573410987854, "learning_rate": 1.936978535967483e-05, "loss": 0.4149, "step": 8365 }, { "epoch": 0.22970895112575507, "grad_norm": 0.3249720335006714, "learning_rate": 1.936963445302626e-05, "loss": 0.524, "step": 8366 }, { "epoch": 0.2297364085667216, "grad_norm": 0.3680763840675354, "learning_rate": 1.936948352890034e-05, "loss": 0.5011, "step": 8367 }, { "epoch": 0.22976386600768808, "grad_norm": 0.31878310441970825, "learning_rate": 1.9369332587297336e-05, "loss": 0.4144, "step": 8368 }, { "epoch": 0.22979132344865458, "grad_norm": 0.38002678751945496, "learning_rate": 1.936918162821754e-05, "loss": 0.5736, "step": 8369 }, { "epoch": 0.2298187808896211, "grad_norm": 0.3544250428676605, "learning_rate": 1.936903065166123e-05, "loss": 0.5988, "step": 8370 }, { "epoch": 0.2298462383305876, "grad_norm": 0.4053061902523041, "learning_rate": 1.9368879657628685e-05, "loss": 0.4748, "step": 8371 }, { "epoch": 0.2298736957715541, "grad_norm": 0.36695608496665955, "learning_rate": 1.936872864612019e-05, "loss": 0.5766, "step": 8372 }, { "epoch": 0.22990115321252058, "grad_norm": 0.3581576943397522, "learning_rate": 1.9368577617136025e-05, "loss": 0.5771, "step": 8373 }, { "epoch": 0.2299286106534871, "grad_norm": 0.8321191072463989, "learning_rate": 1.9368426570676475e-05, "loss": 0.4991, "step": 8374 }, { "epoch": 0.2299560680944536, "grad_norm": 0.37169933319091797, "learning_rate": 1.9368275506741822e-05, "loss": 0.3949, "step": 8375 }, { "epoch": 0.2299835255354201, "grad_norm": 0.4076373875141144, "learning_rate": 1.936812442533234e-05, "loss": 0.4871, "step": 8376 }, { "epoch": 0.2300109829763866, "grad_norm": 0.4650156497955322, "learning_rate": 1.936797332644832e-05, "loss": 0.4494, "step": 8377 }, { "epoch": 0.2300384404173531, "grad_norm": 0.38125163316726685, "learning_rate": 1.9367822210090034e-05, "loss": 0.5277, "step": 8378 }, { "epoch": 0.2300658978583196, "grad_norm": 0.39368772506713867, "learning_rate": 1.9367671076257775e-05, "loss": 0.5699, "step": 8379 }, { "epoch": 0.2300933552992861, "grad_norm": 0.3228253722190857, "learning_rate": 1.936751992495182e-05, "loss": 0.4354, "step": 8380 }, { "epoch": 0.23012081274025262, "grad_norm": 0.35213181376457214, "learning_rate": 1.9367368756172444e-05, "loss": 0.5301, "step": 8381 }, { "epoch": 0.2301482701812191, "grad_norm": 0.4002475440502167, "learning_rate": 1.936721756991994e-05, "loss": 0.5426, "step": 8382 }, { "epoch": 0.2301757276221856, "grad_norm": 0.3616049885749817, "learning_rate": 1.9367066366194586e-05, "loss": 0.4724, "step": 8383 }, { "epoch": 0.23020318506315213, "grad_norm": 0.3268059194087982, "learning_rate": 1.9366915144996665e-05, "loss": 0.4045, "step": 8384 }, { "epoch": 0.23023064250411862, "grad_norm": 0.3999198079109192, "learning_rate": 1.9366763906326454e-05, "loss": 0.5548, "step": 8385 }, { "epoch": 0.23025809994508512, "grad_norm": 0.3608124256134033, "learning_rate": 1.9366612650184243e-05, "loss": 0.5682, "step": 8386 }, { "epoch": 0.2302855573860516, "grad_norm": 0.420049786567688, "learning_rate": 1.9366461376570308e-05, "loss": 0.5517, "step": 8387 }, { "epoch": 0.23031301482701813, "grad_norm": 0.3834000527858734, "learning_rate": 1.9366310085484935e-05, "loss": 0.5566, "step": 8388 }, { "epoch": 0.23034047226798463, "grad_norm": 0.3702384829521179, "learning_rate": 1.93661587769284e-05, "loss": 0.5881, "step": 8389 }, { "epoch": 0.23036792970895112, "grad_norm": 0.42525824904441833, "learning_rate": 1.936600745090099e-05, "loss": 0.5553, "step": 8390 }, { "epoch": 0.23039538714991764, "grad_norm": 0.3670484721660614, "learning_rate": 1.9365856107402988e-05, "loss": 0.5382, "step": 8391 }, { "epoch": 0.23042284459088413, "grad_norm": 0.3988783061504364, "learning_rate": 1.9365704746434677e-05, "loss": 0.5892, "step": 8392 }, { "epoch": 0.23045030203185063, "grad_norm": 0.41409626603126526, "learning_rate": 1.9365553367996335e-05, "loss": 0.5549, "step": 8393 }, { "epoch": 0.23047775947281712, "grad_norm": 0.3989344537258148, "learning_rate": 1.936540197208825e-05, "loss": 0.5479, "step": 8394 }, { "epoch": 0.23050521691378364, "grad_norm": 0.348515123128891, "learning_rate": 1.9365250558710697e-05, "loss": 0.5679, "step": 8395 }, { "epoch": 0.23053267435475014, "grad_norm": 0.369470477104187, "learning_rate": 1.9365099127863966e-05, "loss": 0.6099, "step": 8396 }, { "epoch": 0.23056013179571663, "grad_norm": 0.35255682468414307, "learning_rate": 1.9364947679548333e-05, "loss": 0.5293, "step": 8397 }, { "epoch": 0.23058758923668315, "grad_norm": 0.3967011272907257, "learning_rate": 1.936479621376409e-05, "loss": 0.4981, "step": 8398 }, { "epoch": 0.23061504667764965, "grad_norm": 0.37821337580680847, "learning_rate": 1.9364644730511505e-05, "loss": 0.6683, "step": 8399 }, { "epoch": 0.23064250411861614, "grad_norm": 0.5256261229515076, "learning_rate": 1.9364493229790872e-05, "loss": 0.5095, "step": 8400 }, { "epoch": 0.23066996155958264, "grad_norm": 0.4660869538784027, "learning_rate": 1.936434171160247e-05, "loss": 0.5405, "step": 8401 }, { "epoch": 0.23069741900054916, "grad_norm": 0.32417139410972595, "learning_rate": 1.9364190175946588e-05, "loss": 0.6054, "step": 8402 }, { "epoch": 0.23072487644151565, "grad_norm": 0.36102524399757385, "learning_rate": 1.9364038622823496e-05, "loss": 0.5149, "step": 8403 }, { "epoch": 0.23075233388248215, "grad_norm": 0.35940611362457275, "learning_rate": 1.9363887052233483e-05, "loss": 0.518, "step": 8404 }, { "epoch": 0.23077979132344864, "grad_norm": 0.36217042803764343, "learning_rate": 1.9363735464176835e-05, "loss": 0.534, "step": 8405 }, { "epoch": 0.23080724876441516, "grad_norm": 0.5451474785804749, "learning_rate": 1.936358385865383e-05, "loss": 0.5027, "step": 8406 }, { "epoch": 0.23083470620538166, "grad_norm": 0.36515772342681885, "learning_rate": 1.936343223566475e-05, "loss": 0.5295, "step": 8407 }, { "epoch": 0.23086216364634815, "grad_norm": 0.4011467397212982, "learning_rate": 1.936328059520989e-05, "loss": 0.6563, "step": 8408 }, { "epoch": 0.23088962108731467, "grad_norm": 0.4163006544113159, "learning_rate": 1.9363128937289516e-05, "loss": 0.5445, "step": 8409 }, { "epoch": 0.23091707852828117, "grad_norm": 0.3622475564479828, "learning_rate": 1.9362977261903917e-05, "loss": 0.4908, "step": 8410 }, { "epoch": 0.23094453596924766, "grad_norm": 0.323786199092865, "learning_rate": 1.936282556905338e-05, "loss": 0.4534, "step": 8411 }, { "epoch": 0.23097199341021415, "grad_norm": 0.41697025299072266, "learning_rate": 1.9362673858738183e-05, "loss": 0.6305, "step": 8412 }, { "epoch": 0.23099945085118068, "grad_norm": 0.34213608503341675, "learning_rate": 1.9362522130958612e-05, "loss": 0.521, "step": 8413 }, { "epoch": 0.23102690829214717, "grad_norm": 0.3749963343143463, "learning_rate": 1.936237038571495e-05, "loss": 0.5196, "step": 8414 }, { "epoch": 0.23105436573311366, "grad_norm": 0.4436042904853821, "learning_rate": 1.9362218623007476e-05, "loss": 0.582, "step": 8415 }, { "epoch": 0.2310818231740802, "grad_norm": 0.45022717118263245, "learning_rate": 1.936206684283648e-05, "loss": 0.63, "step": 8416 }, { "epoch": 0.23110928061504668, "grad_norm": 0.37875375151634216, "learning_rate": 1.936191504520224e-05, "loss": 0.5056, "step": 8417 }, { "epoch": 0.23113673805601317, "grad_norm": 0.34428831934928894, "learning_rate": 1.936176323010504e-05, "loss": 0.5388, "step": 8418 }, { "epoch": 0.23116419549697967, "grad_norm": 0.42678502202033997, "learning_rate": 1.9361611397545164e-05, "loss": 0.5922, "step": 8419 }, { "epoch": 0.2311916529379462, "grad_norm": 0.40828803181648254, "learning_rate": 1.9361459547522895e-05, "loss": 0.5532, "step": 8420 }, { "epoch": 0.23121911037891268, "grad_norm": 0.40800923109054565, "learning_rate": 1.9361307680038517e-05, "loss": 0.5837, "step": 8421 }, { "epoch": 0.23124656781987918, "grad_norm": 0.3530968427658081, "learning_rate": 1.936115579509231e-05, "loss": 0.5601, "step": 8422 }, { "epoch": 0.2312740252608457, "grad_norm": 0.36083531379699707, "learning_rate": 1.936100389268456e-05, "loss": 0.5005, "step": 8423 }, { "epoch": 0.2313014827018122, "grad_norm": 0.3541795015335083, "learning_rate": 1.9360851972815553e-05, "loss": 0.5592, "step": 8424 }, { "epoch": 0.2313289401427787, "grad_norm": 0.37679022550582886, "learning_rate": 1.9360700035485567e-05, "loss": 0.5128, "step": 8425 }, { "epoch": 0.23135639758374518, "grad_norm": 0.4327252507209778, "learning_rate": 1.9360548080694888e-05, "loss": 0.6306, "step": 8426 }, { "epoch": 0.2313838550247117, "grad_norm": 0.48539143800735474, "learning_rate": 1.93603961084438e-05, "loss": 0.5395, "step": 8427 }, { "epoch": 0.2314113124656782, "grad_norm": 0.3696403205394745, "learning_rate": 1.9360244118732584e-05, "loss": 0.5279, "step": 8428 }, { "epoch": 0.2314387699066447, "grad_norm": 0.390292227268219, "learning_rate": 1.936009211156153e-05, "loss": 0.5573, "step": 8429 }, { "epoch": 0.23146622734761121, "grad_norm": 0.3671209216117859, "learning_rate": 1.9359940086930914e-05, "loss": 0.5187, "step": 8430 }, { "epoch": 0.2314936847885777, "grad_norm": 0.32470062375068665, "learning_rate": 1.935978804484102e-05, "loss": 0.517, "step": 8431 }, { "epoch": 0.2315211422295442, "grad_norm": 0.3870636820793152, "learning_rate": 1.9359635985292135e-05, "loss": 0.5423, "step": 8432 }, { "epoch": 0.2315485996705107, "grad_norm": 0.33690956234931946, "learning_rate": 1.9359483908284546e-05, "loss": 0.4664, "step": 8433 }, { "epoch": 0.23157605711147722, "grad_norm": 0.3568219542503357, "learning_rate": 1.9359331813818526e-05, "loss": 0.5389, "step": 8434 }, { "epoch": 0.2316035145524437, "grad_norm": 0.33700230717658997, "learning_rate": 1.935917970189437e-05, "loss": 0.4946, "step": 8435 }, { "epoch": 0.2316309719934102, "grad_norm": 0.41333431005477905, "learning_rate": 1.9359027572512353e-05, "loss": 0.5301, "step": 8436 }, { "epoch": 0.23165842943437673, "grad_norm": 0.4045794606208801, "learning_rate": 1.9358875425672762e-05, "loss": 0.5401, "step": 8437 }, { "epoch": 0.23168588687534322, "grad_norm": 0.3702659606933594, "learning_rate": 1.9358723261375883e-05, "loss": 0.522, "step": 8438 }, { "epoch": 0.23171334431630972, "grad_norm": 0.36181724071502686, "learning_rate": 1.9358571079622e-05, "loss": 0.521, "step": 8439 }, { "epoch": 0.2317408017572762, "grad_norm": 0.3695557713508606, "learning_rate": 1.9358418880411395e-05, "loss": 0.4926, "step": 8440 }, { "epoch": 0.23176825919824273, "grad_norm": 0.40611955523490906, "learning_rate": 1.935826666374435e-05, "loss": 0.5905, "step": 8441 }, { "epoch": 0.23179571663920923, "grad_norm": 0.5479353666305542, "learning_rate": 1.935811442962115e-05, "loss": 0.6161, "step": 8442 }, { "epoch": 0.23182317408017572, "grad_norm": 0.4138200283050537, "learning_rate": 1.9357962178042083e-05, "loss": 0.5386, "step": 8443 }, { "epoch": 0.23185063152114224, "grad_norm": 0.36864376068115234, "learning_rate": 1.9357809909007428e-05, "loss": 0.5716, "step": 8444 }, { "epoch": 0.23187808896210874, "grad_norm": 0.4255823493003845, "learning_rate": 1.935765762251747e-05, "loss": 0.6117, "step": 8445 }, { "epoch": 0.23190554640307523, "grad_norm": 0.4143540859222412, "learning_rate": 1.9357505318572496e-05, "loss": 0.5111, "step": 8446 }, { "epoch": 0.23193300384404172, "grad_norm": 0.8079360127449036, "learning_rate": 1.9357352997172784e-05, "loss": 0.6535, "step": 8447 }, { "epoch": 0.23196046128500825, "grad_norm": 0.38722383975982666, "learning_rate": 1.9357200658318626e-05, "loss": 0.5619, "step": 8448 }, { "epoch": 0.23198791872597474, "grad_norm": 0.30461180210113525, "learning_rate": 1.93570483020103e-05, "loss": 0.4615, "step": 8449 }, { "epoch": 0.23201537616694123, "grad_norm": 0.38834893703460693, "learning_rate": 1.9356895928248097e-05, "loss": 0.5416, "step": 8450 }, { "epoch": 0.23204283360790776, "grad_norm": 0.4207307994365692, "learning_rate": 1.9356743537032294e-05, "loss": 0.6292, "step": 8451 }, { "epoch": 0.23207029104887425, "grad_norm": 0.3035129904747009, "learning_rate": 1.9356591128363177e-05, "loss": 0.4341, "step": 8452 }, { "epoch": 0.23209774848984074, "grad_norm": 0.3611961603164673, "learning_rate": 1.935643870224103e-05, "loss": 0.584, "step": 8453 }, { "epoch": 0.23212520593080724, "grad_norm": 0.4158090651035309, "learning_rate": 1.935628625866614e-05, "loss": 0.492, "step": 8454 }, { "epoch": 0.23215266337177376, "grad_norm": 0.5046626329421997, "learning_rate": 1.9356133797638793e-05, "loss": 0.5711, "step": 8455 }, { "epoch": 0.23218012081274025, "grad_norm": 0.4193890690803528, "learning_rate": 1.9355981319159267e-05, "loss": 0.4891, "step": 8456 }, { "epoch": 0.23220757825370675, "grad_norm": 0.33180728554725647, "learning_rate": 1.935582882322785e-05, "loss": 0.6084, "step": 8457 }, { "epoch": 0.23223503569467327, "grad_norm": 0.40174567699432373, "learning_rate": 1.9355676309844828e-05, "loss": 0.6026, "step": 8458 }, { "epoch": 0.23226249313563976, "grad_norm": 0.38191354274749756, "learning_rate": 1.935552377901048e-05, "loss": 0.55, "step": 8459 }, { "epoch": 0.23228995057660626, "grad_norm": 0.3343692123889923, "learning_rate": 1.93553712307251e-05, "loss": 0.4343, "step": 8460 }, { "epoch": 0.23231740801757275, "grad_norm": 0.36166682839393616, "learning_rate": 1.9355218664988958e-05, "loss": 0.5238, "step": 8461 }, { "epoch": 0.23234486545853927, "grad_norm": 0.426285058259964, "learning_rate": 1.9355066081802353e-05, "loss": 0.5877, "step": 8462 }, { "epoch": 0.23237232289950577, "grad_norm": 0.359115332365036, "learning_rate": 1.9354913481165565e-05, "loss": 0.5131, "step": 8463 }, { "epoch": 0.23239978034047226, "grad_norm": 0.3586997091770172, "learning_rate": 1.9354760863078875e-05, "loss": 0.4571, "step": 8464 }, { "epoch": 0.23242723778143878, "grad_norm": 0.40124601125717163, "learning_rate": 1.935460822754257e-05, "loss": 0.5754, "step": 8465 }, { "epoch": 0.23245469522240528, "grad_norm": 0.350619912147522, "learning_rate": 1.9354455574556942e-05, "loss": 0.5586, "step": 8466 }, { "epoch": 0.23248215266337177, "grad_norm": 0.466869056224823, "learning_rate": 1.935430290412226e-05, "loss": 0.6146, "step": 8467 }, { "epoch": 0.23250961010433827, "grad_norm": 0.5188923478126526, "learning_rate": 1.9354150216238823e-05, "loss": 0.5365, "step": 8468 }, { "epoch": 0.2325370675453048, "grad_norm": 0.37030351161956787, "learning_rate": 1.935399751090691e-05, "loss": 0.53, "step": 8469 }, { "epoch": 0.23256452498627128, "grad_norm": 0.38124024868011475, "learning_rate": 1.9353844788126805e-05, "loss": 0.5574, "step": 8470 }, { "epoch": 0.23259198242723778, "grad_norm": 0.3539254069328308, "learning_rate": 1.935369204789879e-05, "loss": 0.5557, "step": 8471 }, { "epoch": 0.23261943986820427, "grad_norm": 0.32976844906806946, "learning_rate": 1.9353539290223158e-05, "loss": 0.5195, "step": 8472 }, { "epoch": 0.2326468973091708, "grad_norm": 0.36041536927223206, "learning_rate": 1.9353386515100188e-05, "loss": 0.5452, "step": 8473 }, { "epoch": 0.23267435475013729, "grad_norm": 0.3967084288597107, "learning_rate": 1.935323372253017e-05, "loss": 0.5089, "step": 8474 }, { "epoch": 0.23270181219110378, "grad_norm": 0.3400379419326782, "learning_rate": 1.9353080912513383e-05, "loss": 0.5392, "step": 8475 }, { "epoch": 0.2327292696320703, "grad_norm": 0.3672572672367096, "learning_rate": 1.9352928085050116e-05, "loss": 0.5357, "step": 8476 }, { "epoch": 0.2327567270730368, "grad_norm": 0.3795188069343567, "learning_rate": 1.9352775240140654e-05, "loss": 0.5341, "step": 8477 }, { "epoch": 0.2327841845140033, "grad_norm": 0.35329607129096985, "learning_rate": 1.935262237778528e-05, "loss": 0.4511, "step": 8478 }, { "epoch": 0.23281164195496978, "grad_norm": 0.3488442003726959, "learning_rate": 1.935246949798428e-05, "loss": 0.498, "step": 8479 }, { "epoch": 0.2328390993959363, "grad_norm": 0.36890390515327454, "learning_rate": 1.935231660073794e-05, "loss": 0.5839, "step": 8480 }, { "epoch": 0.2328665568369028, "grad_norm": 0.38353490829467773, "learning_rate": 1.9352163686046546e-05, "loss": 0.5816, "step": 8481 }, { "epoch": 0.2328940142778693, "grad_norm": 0.40030544996261597, "learning_rate": 1.935201075391038e-05, "loss": 0.5792, "step": 8482 }, { "epoch": 0.23292147171883582, "grad_norm": 0.31404563784599304, "learning_rate": 1.9351857804329726e-05, "loss": 0.4443, "step": 8483 }, { "epoch": 0.2329489291598023, "grad_norm": 0.39186206459999084, "learning_rate": 1.935170483730488e-05, "loss": 0.5634, "step": 8484 }, { "epoch": 0.2329763866007688, "grad_norm": 0.4767107367515564, "learning_rate": 1.9351551852836114e-05, "loss": 0.5729, "step": 8485 }, { "epoch": 0.2330038440417353, "grad_norm": 0.40498772263526917, "learning_rate": 1.9351398850923722e-05, "loss": 0.4944, "step": 8486 }, { "epoch": 0.23303130148270182, "grad_norm": 0.33704230189323425, "learning_rate": 1.9351245831567984e-05, "loss": 0.5257, "step": 8487 }, { "epoch": 0.2330587589236683, "grad_norm": 0.39260613918304443, "learning_rate": 1.9351092794769188e-05, "loss": 0.48, "step": 8488 }, { "epoch": 0.2330862163646348, "grad_norm": 0.3843478858470917, "learning_rate": 1.9350939740527622e-05, "loss": 0.5323, "step": 8489 }, { "epoch": 0.23311367380560133, "grad_norm": 0.40935003757476807, "learning_rate": 1.9350786668843568e-05, "loss": 0.6, "step": 8490 }, { "epoch": 0.23314113124656782, "grad_norm": 0.31980761885643005, "learning_rate": 1.9350633579717312e-05, "loss": 0.4532, "step": 8491 }, { "epoch": 0.23316858868753432, "grad_norm": 0.3711133301258087, "learning_rate": 1.935048047314914e-05, "loss": 0.5505, "step": 8492 }, { "epoch": 0.2331960461285008, "grad_norm": 0.34983018040657043, "learning_rate": 1.935032734913934e-05, "loss": 0.5025, "step": 8493 }, { "epoch": 0.23322350356946733, "grad_norm": 0.38475221395492554, "learning_rate": 1.935017420768819e-05, "loss": 0.4905, "step": 8494 }, { "epoch": 0.23325096101043383, "grad_norm": 0.33185693621635437, "learning_rate": 1.9350021048795983e-05, "loss": 0.4465, "step": 8495 }, { "epoch": 0.23327841845140032, "grad_norm": 0.32868966460227966, "learning_rate": 1.9349867872463006e-05, "loss": 0.3982, "step": 8496 }, { "epoch": 0.23330587589236684, "grad_norm": 0.35981109738349915, "learning_rate": 1.9349714678689537e-05, "loss": 0.5208, "step": 8497 }, { "epoch": 0.23333333333333334, "grad_norm": 0.3587247431278229, "learning_rate": 1.934956146747587e-05, "loss": 0.4582, "step": 8498 }, { "epoch": 0.23336079077429983, "grad_norm": 0.37437930703163147, "learning_rate": 1.934940823882228e-05, "loss": 0.5096, "step": 8499 }, { "epoch": 0.23338824821526633, "grad_norm": 0.43049880862236023, "learning_rate": 1.9349254992729064e-05, "loss": 0.5483, "step": 8500 }, { "epoch": 0.23341570565623285, "grad_norm": 0.3522612452507019, "learning_rate": 1.9349101729196505e-05, "loss": 0.5217, "step": 8501 }, { "epoch": 0.23344316309719934, "grad_norm": 0.3774700164794922, "learning_rate": 1.934894844822489e-05, "loss": 0.5569, "step": 8502 }, { "epoch": 0.23347062053816584, "grad_norm": 0.45812085270881653, "learning_rate": 1.9348795149814497e-05, "loss": 0.5448, "step": 8503 }, { "epoch": 0.23349807797913236, "grad_norm": 0.3713299632072449, "learning_rate": 1.934864183396562e-05, "loss": 0.5349, "step": 8504 }, { "epoch": 0.23352553542009885, "grad_norm": 0.40539610385894775, "learning_rate": 1.9348488500678536e-05, "loss": 0.5124, "step": 8505 }, { "epoch": 0.23355299286106534, "grad_norm": 0.3698674440383911, "learning_rate": 1.9348335149953544e-05, "loss": 0.5315, "step": 8506 }, { "epoch": 0.23358045030203184, "grad_norm": 0.3760128617286682, "learning_rate": 1.934818178179092e-05, "loss": 0.5914, "step": 8507 }, { "epoch": 0.23360790774299836, "grad_norm": 0.3199860751628876, "learning_rate": 1.9348028396190955e-05, "loss": 0.4828, "step": 8508 }, { "epoch": 0.23363536518396485, "grad_norm": 0.456095814704895, "learning_rate": 1.9347874993153934e-05, "loss": 0.4599, "step": 8509 }, { "epoch": 0.23366282262493135, "grad_norm": 0.3267587423324585, "learning_rate": 1.9347721572680143e-05, "loss": 0.5056, "step": 8510 }, { "epoch": 0.23369028006589787, "grad_norm": 0.36335471272468567, "learning_rate": 1.9347568134769864e-05, "loss": 0.4779, "step": 8511 }, { "epoch": 0.23371773750686436, "grad_norm": 0.34659016132354736, "learning_rate": 1.934741467942339e-05, "loss": 0.5216, "step": 8512 }, { "epoch": 0.23374519494783086, "grad_norm": 0.3411429524421692, "learning_rate": 1.9347261206641006e-05, "loss": 0.4959, "step": 8513 }, { "epoch": 0.23377265238879735, "grad_norm": 0.4224499464035034, "learning_rate": 1.9347107716422993e-05, "loss": 0.5419, "step": 8514 }, { "epoch": 0.23380010982976387, "grad_norm": 0.3756140470504761, "learning_rate": 1.9346954208769644e-05, "loss": 0.532, "step": 8515 }, { "epoch": 0.23382756727073037, "grad_norm": 0.3705260455608368, "learning_rate": 1.934680068368124e-05, "loss": 0.6022, "step": 8516 }, { "epoch": 0.23385502471169686, "grad_norm": 0.36000990867614746, "learning_rate": 1.9346647141158072e-05, "loss": 0.4101, "step": 8517 }, { "epoch": 0.23388248215266338, "grad_norm": 0.3387793302536011, "learning_rate": 1.934649358120042e-05, "loss": 0.5209, "step": 8518 }, { "epoch": 0.23390993959362988, "grad_norm": 0.3457236588001251, "learning_rate": 1.9346340003808576e-05, "loss": 0.5341, "step": 8519 }, { "epoch": 0.23393739703459637, "grad_norm": 0.37173834443092346, "learning_rate": 1.9346186408982824e-05, "loss": 0.5326, "step": 8520 }, { "epoch": 0.23396485447556287, "grad_norm": 0.33690914511680603, "learning_rate": 1.9346032796723454e-05, "loss": 0.5025, "step": 8521 }, { "epoch": 0.2339923119165294, "grad_norm": 0.36679011583328247, "learning_rate": 1.934587916703075e-05, "loss": 0.5139, "step": 8522 }, { "epoch": 0.23401976935749588, "grad_norm": 0.3778705298900604, "learning_rate": 1.9345725519905e-05, "loss": 0.524, "step": 8523 }, { "epoch": 0.23404722679846238, "grad_norm": 0.3421902358531952, "learning_rate": 1.9345571855346485e-05, "loss": 0.5231, "step": 8524 }, { "epoch": 0.2340746842394289, "grad_norm": 0.34022143483161926, "learning_rate": 1.9345418173355493e-05, "loss": 0.4637, "step": 8525 }, { "epoch": 0.2341021416803954, "grad_norm": 0.41050609946250916, "learning_rate": 1.934526447393232e-05, "loss": 0.6575, "step": 8526 }, { "epoch": 0.2341295991213619, "grad_norm": 0.32567453384399414, "learning_rate": 1.9345110757077245e-05, "loss": 0.4852, "step": 8527 }, { "epoch": 0.23415705656232838, "grad_norm": 0.31403446197509766, "learning_rate": 1.9344957022790554e-05, "loss": 0.5109, "step": 8528 }, { "epoch": 0.2341845140032949, "grad_norm": 0.3811957836151123, "learning_rate": 1.9344803271072535e-05, "loss": 0.5722, "step": 8529 }, { "epoch": 0.2342119714442614, "grad_norm": 0.33398640155792236, "learning_rate": 1.934464950192348e-05, "loss": 0.4123, "step": 8530 }, { "epoch": 0.2342394288852279, "grad_norm": 0.3418843150138855, "learning_rate": 1.9344495715343665e-05, "loss": 0.5059, "step": 8531 }, { "epoch": 0.2342668863261944, "grad_norm": 0.37118762731552124, "learning_rate": 1.9344341911333386e-05, "loss": 0.5615, "step": 8532 }, { "epoch": 0.2342943437671609, "grad_norm": 0.32620885968208313, "learning_rate": 1.9344188089892927e-05, "loss": 0.5386, "step": 8533 }, { "epoch": 0.2343218012081274, "grad_norm": 0.33162638545036316, "learning_rate": 1.934403425102257e-05, "loss": 0.5294, "step": 8534 }, { "epoch": 0.2343492586490939, "grad_norm": 0.3626667559146881, "learning_rate": 1.9343880394722613e-05, "loss": 0.4637, "step": 8535 }, { "epoch": 0.23437671609006042, "grad_norm": 0.3506741225719452, "learning_rate": 1.9343726520993337e-05, "loss": 0.5251, "step": 8536 }, { "epoch": 0.2344041735310269, "grad_norm": 0.39587530493736267, "learning_rate": 1.9343572629835027e-05, "loss": 0.5515, "step": 8537 }, { "epoch": 0.2344316309719934, "grad_norm": 0.39913907647132874, "learning_rate": 1.9343418721247973e-05, "loss": 0.562, "step": 8538 }, { "epoch": 0.2344590884129599, "grad_norm": 0.39188653230667114, "learning_rate": 1.934326479523246e-05, "loss": 0.5122, "step": 8539 }, { "epoch": 0.23448654585392642, "grad_norm": 0.4758952558040619, "learning_rate": 1.9343110851788775e-05, "loss": 0.5019, "step": 8540 }, { "epoch": 0.23451400329489291, "grad_norm": 0.343119740486145, "learning_rate": 1.934295689091721e-05, "loss": 0.5287, "step": 8541 }, { "epoch": 0.2345414607358594, "grad_norm": 0.38913148641586304, "learning_rate": 1.9342802912618044e-05, "loss": 0.5387, "step": 8542 }, { "epoch": 0.23456891817682593, "grad_norm": 0.3456226885318756, "learning_rate": 1.934264891689157e-05, "loss": 0.5832, "step": 8543 }, { "epoch": 0.23459637561779242, "grad_norm": 0.5934251546859741, "learning_rate": 1.9342494903738073e-05, "loss": 0.5993, "step": 8544 }, { "epoch": 0.23462383305875892, "grad_norm": 0.3693557381629944, "learning_rate": 1.9342340873157844e-05, "loss": 0.4517, "step": 8545 }, { "epoch": 0.2346512904997254, "grad_norm": 0.37915730476379395, "learning_rate": 1.9342186825151167e-05, "loss": 0.5811, "step": 8546 }, { "epoch": 0.23467874794069193, "grad_norm": 0.3659084141254425, "learning_rate": 1.934203275971833e-05, "loss": 0.5591, "step": 8547 }, { "epoch": 0.23470620538165843, "grad_norm": 0.42994675040245056, "learning_rate": 1.934187867685962e-05, "loss": 0.5376, "step": 8548 }, { "epoch": 0.23473366282262492, "grad_norm": 0.3818734884262085, "learning_rate": 1.934172457657532e-05, "loss": 0.565, "step": 8549 }, { "epoch": 0.23476112026359144, "grad_norm": 0.38331976532936096, "learning_rate": 1.934157045886573e-05, "loss": 0.5526, "step": 8550 }, { "epoch": 0.23478857770455794, "grad_norm": 0.36273208260536194, "learning_rate": 1.9341416323731125e-05, "loss": 0.5598, "step": 8551 }, { "epoch": 0.23481603514552443, "grad_norm": 0.4888930022716522, "learning_rate": 1.93412621711718e-05, "loss": 0.5482, "step": 8552 }, { "epoch": 0.23484349258649093, "grad_norm": 0.3193429708480835, "learning_rate": 1.9341108001188037e-05, "loss": 0.4724, "step": 8553 }, { "epoch": 0.23487095002745745, "grad_norm": 0.3565026819705963, "learning_rate": 1.9340953813780126e-05, "loss": 0.4839, "step": 8554 }, { "epoch": 0.23489840746842394, "grad_norm": 0.34403276443481445, "learning_rate": 1.9340799608948354e-05, "loss": 0.554, "step": 8555 }, { "epoch": 0.23492586490939044, "grad_norm": 0.3737739622592926, "learning_rate": 1.9340645386693013e-05, "loss": 0.6271, "step": 8556 }, { "epoch": 0.23495332235035696, "grad_norm": 0.36897990107536316, "learning_rate": 1.9340491147014387e-05, "loss": 0.4926, "step": 8557 }, { "epoch": 0.23498077979132345, "grad_norm": 0.3662615418434143, "learning_rate": 1.934033688991276e-05, "loss": 0.5011, "step": 8558 }, { "epoch": 0.23500823723228995, "grad_norm": 0.4747711420059204, "learning_rate": 1.934018261538843e-05, "loss": 0.5553, "step": 8559 }, { "epoch": 0.23503569467325644, "grad_norm": 0.34206700325012207, "learning_rate": 1.934002832344167e-05, "loss": 0.5311, "step": 8560 }, { "epoch": 0.23506315211422296, "grad_norm": 0.33285316824913025, "learning_rate": 1.9339874014072785e-05, "loss": 0.5162, "step": 8561 }, { "epoch": 0.23509060955518946, "grad_norm": 0.43468138575553894, "learning_rate": 1.9339719687282045e-05, "loss": 0.5606, "step": 8562 }, { "epoch": 0.23511806699615595, "grad_norm": 0.4893453121185303, "learning_rate": 1.933956534306975e-05, "loss": 0.5115, "step": 8563 }, { "epoch": 0.23514552443712247, "grad_norm": 0.6238056421279907, "learning_rate": 1.9339410981436192e-05, "loss": 0.5326, "step": 8564 }, { "epoch": 0.23517298187808897, "grad_norm": 0.3759390413761139, "learning_rate": 1.9339256602381642e-05, "loss": 0.5065, "step": 8565 }, { "epoch": 0.23520043931905546, "grad_norm": 0.3776780664920807, "learning_rate": 1.9339102205906403e-05, "loss": 0.5139, "step": 8566 }, { "epoch": 0.23522789676002195, "grad_norm": 0.36807072162628174, "learning_rate": 1.9338947792010755e-05, "loss": 0.5033, "step": 8567 }, { "epoch": 0.23525535420098848, "grad_norm": 0.39904293417930603, "learning_rate": 1.933879336069499e-05, "loss": 0.4902, "step": 8568 }, { "epoch": 0.23528281164195497, "grad_norm": 0.3751380741596222, "learning_rate": 1.9338638911959397e-05, "loss": 0.5457, "step": 8569 }, { "epoch": 0.23531026908292146, "grad_norm": 0.3144533336162567, "learning_rate": 1.9338484445804256e-05, "loss": 0.489, "step": 8570 }, { "epoch": 0.23533772652388799, "grad_norm": 0.39271610975265503, "learning_rate": 1.933832996222987e-05, "loss": 0.5953, "step": 8571 }, { "epoch": 0.23536518396485448, "grad_norm": 0.3701516389846802, "learning_rate": 1.933817546123651e-05, "loss": 0.6106, "step": 8572 }, { "epoch": 0.23539264140582097, "grad_norm": 0.319723516702652, "learning_rate": 1.9338020942824477e-05, "loss": 0.4863, "step": 8573 }, { "epoch": 0.23542009884678747, "grad_norm": 0.45861396193504333, "learning_rate": 1.9337866406994053e-05, "loss": 0.5314, "step": 8574 }, { "epoch": 0.235447556287754, "grad_norm": 0.3413717448711395, "learning_rate": 1.9337711853745527e-05, "loss": 0.5968, "step": 8575 }, { "epoch": 0.23547501372872048, "grad_norm": 0.35395121574401855, "learning_rate": 1.933755728307919e-05, "loss": 0.5707, "step": 8576 }, { "epoch": 0.23550247116968698, "grad_norm": 0.3424719274044037, "learning_rate": 1.9337402694995325e-05, "loss": 0.5367, "step": 8577 }, { "epoch": 0.2355299286106535, "grad_norm": 0.34722939133644104, "learning_rate": 1.9337248089494225e-05, "loss": 0.4912, "step": 8578 }, { "epoch": 0.23555738605162, "grad_norm": 0.36852777004241943, "learning_rate": 1.933709346657618e-05, "loss": 0.5822, "step": 8579 }, { "epoch": 0.2355848434925865, "grad_norm": 0.36839136481285095, "learning_rate": 1.9336938826241475e-05, "loss": 0.5439, "step": 8580 }, { "epoch": 0.23561230093355298, "grad_norm": 0.3577512800693512, "learning_rate": 1.9336784168490395e-05, "loss": 0.46, "step": 8581 }, { "epoch": 0.2356397583745195, "grad_norm": 0.3609701991081238, "learning_rate": 1.9336629493323237e-05, "loss": 0.4488, "step": 8582 }, { "epoch": 0.235667215815486, "grad_norm": 0.3604409098625183, "learning_rate": 1.9336474800740285e-05, "loss": 0.4946, "step": 8583 }, { "epoch": 0.2356946732564525, "grad_norm": 0.3667648434638977, "learning_rate": 1.9336320090741828e-05, "loss": 0.4684, "step": 8584 }, { "epoch": 0.235722130697419, "grad_norm": 0.3735356032848358, "learning_rate": 1.933616536332815e-05, "loss": 0.4953, "step": 8585 }, { "epoch": 0.2357495881383855, "grad_norm": 0.4693150222301483, "learning_rate": 1.9336010618499545e-05, "loss": 0.6141, "step": 8586 }, { "epoch": 0.235777045579352, "grad_norm": 0.3749711513519287, "learning_rate": 1.9335855856256302e-05, "loss": 0.5125, "step": 8587 }, { "epoch": 0.2358045030203185, "grad_norm": 0.4062865078449249, "learning_rate": 1.933570107659871e-05, "loss": 0.5863, "step": 8588 }, { "epoch": 0.23583196046128502, "grad_norm": 0.3656039237976074, "learning_rate": 1.9335546279527053e-05, "loss": 0.5084, "step": 8589 }, { "epoch": 0.2358594179022515, "grad_norm": 0.430649071931839, "learning_rate": 1.9335391465041623e-05, "loss": 0.5555, "step": 8590 }, { "epoch": 0.235886875343218, "grad_norm": 0.37617939710617065, "learning_rate": 1.9335236633142707e-05, "loss": 0.5231, "step": 8591 }, { "epoch": 0.23591433278418453, "grad_norm": 0.381004273891449, "learning_rate": 1.93350817838306e-05, "loss": 0.5093, "step": 8592 }, { "epoch": 0.23594179022515102, "grad_norm": 0.41566330194473267, "learning_rate": 1.9334926917105582e-05, "loss": 0.5188, "step": 8593 }, { "epoch": 0.23596924766611752, "grad_norm": 0.34515780210494995, "learning_rate": 1.933477203296795e-05, "loss": 0.4878, "step": 8594 }, { "epoch": 0.235996705107084, "grad_norm": 0.3470410704612732, "learning_rate": 1.9334617131417984e-05, "loss": 0.562, "step": 8595 }, { "epoch": 0.23602416254805053, "grad_norm": 0.38418757915496826, "learning_rate": 1.933446221245598e-05, "loss": 0.478, "step": 8596 }, { "epoch": 0.23605161998901703, "grad_norm": 0.36442896723747253, "learning_rate": 1.9334307276082225e-05, "loss": 0.4602, "step": 8597 }, { "epoch": 0.23607907742998352, "grad_norm": 0.3872376084327698, "learning_rate": 1.9334152322297006e-05, "loss": 0.5611, "step": 8598 }, { "epoch": 0.23610653487095004, "grad_norm": 0.360520601272583, "learning_rate": 1.933399735110062e-05, "loss": 0.5742, "step": 8599 }, { "epoch": 0.23613399231191654, "grad_norm": 0.36925315856933594, "learning_rate": 1.9333842362493343e-05, "loss": 0.4882, "step": 8600 }, { "epoch": 0.23616144975288303, "grad_norm": 0.38667699694633484, "learning_rate": 1.933368735647547e-05, "loss": 0.4368, "step": 8601 }, { "epoch": 0.23618890719384952, "grad_norm": 0.32521966099739075, "learning_rate": 1.9333532333047295e-05, "loss": 0.4535, "step": 8602 }, { "epoch": 0.23621636463481605, "grad_norm": 0.3459606468677521, "learning_rate": 1.9333377292209105e-05, "loss": 0.5399, "step": 8603 }, { "epoch": 0.23624382207578254, "grad_norm": 0.3724338710308075, "learning_rate": 1.9333222233961183e-05, "loss": 0.4513, "step": 8604 }, { "epoch": 0.23627127951674903, "grad_norm": 0.38877207040786743, "learning_rate": 1.9333067158303823e-05, "loss": 0.513, "step": 8605 }, { "epoch": 0.23629873695771553, "grad_norm": 0.36853349208831787, "learning_rate": 1.9332912065237318e-05, "loss": 0.6043, "step": 8606 }, { "epoch": 0.23632619439868205, "grad_norm": 0.3518364131450653, "learning_rate": 1.933275695476195e-05, "loss": 0.5441, "step": 8607 }, { "epoch": 0.23635365183964854, "grad_norm": 0.49680325388908386, "learning_rate": 1.9332601826878013e-05, "loss": 0.5386, "step": 8608 }, { "epoch": 0.23638110928061504, "grad_norm": 0.4230257570743561, "learning_rate": 1.9332446681585794e-05, "loss": 0.5936, "step": 8609 }, { "epoch": 0.23640856672158156, "grad_norm": 0.34685561060905457, "learning_rate": 1.9332291518885583e-05, "loss": 0.5675, "step": 8610 }, { "epoch": 0.23643602416254805, "grad_norm": 0.355356365442276, "learning_rate": 1.933213633877767e-05, "loss": 0.5668, "step": 8611 }, { "epoch": 0.23646348160351455, "grad_norm": 0.3643311858177185, "learning_rate": 1.933198114126235e-05, "loss": 0.5343, "step": 8612 }, { "epoch": 0.23649093904448104, "grad_norm": 0.4126100242137909, "learning_rate": 1.93318259263399e-05, "loss": 0.6064, "step": 8613 }, { "epoch": 0.23651839648544756, "grad_norm": 0.3455587327480316, "learning_rate": 1.9331670694010616e-05, "loss": 0.4876, "step": 8614 }, { "epoch": 0.23654585392641406, "grad_norm": 0.36429563164711, "learning_rate": 1.933151544427479e-05, "loss": 0.4638, "step": 8615 }, { "epoch": 0.23657331136738055, "grad_norm": 0.37373897433280945, "learning_rate": 1.933136017713271e-05, "loss": 0.4388, "step": 8616 }, { "epoch": 0.23660076880834707, "grad_norm": 0.35950934886932373, "learning_rate": 1.9331204892584665e-05, "loss": 0.5498, "step": 8617 }, { "epoch": 0.23662822624931357, "grad_norm": 0.4094233214855194, "learning_rate": 1.933104959063094e-05, "loss": 0.5879, "step": 8618 }, { "epoch": 0.23665568369028006, "grad_norm": 0.3661895990371704, "learning_rate": 1.9330894271271834e-05, "loss": 0.6273, "step": 8619 }, { "epoch": 0.23668314113124655, "grad_norm": 0.4045199155807495, "learning_rate": 1.933073893450763e-05, "loss": 0.561, "step": 8620 }, { "epoch": 0.23671059857221308, "grad_norm": 0.41713860630989075, "learning_rate": 1.9330583580338624e-05, "loss": 0.5908, "step": 8621 }, { "epoch": 0.23673805601317957, "grad_norm": 0.3813289403915405, "learning_rate": 1.93304282087651e-05, "loss": 0.5269, "step": 8622 }, { "epoch": 0.23676551345414606, "grad_norm": 0.3825836181640625, "learning_rate": 1.9330272819787347e-05, "loss": 0.4669, "step": 8623 }, { "epoch": 0.2367929708951126, "grad_norm": 1.2098512649536133, "learning_rate": 1.933011741340566e-05, "loss": 0.4349, "step": 8624 }, { "epoch": 0.23682042833607908, "grad_norm": 0.3369964361190796, "learning_rate": 1.9329961989620325e-05, "loss": 0.5635, "step": 8625 }, { "epoch": 0.23684788577704557, "grad_norm": 0.3384723365306854, "learning_rate": 1.932980654843163e-05, "loss": 0.445, "step": 8626 }, { "epoch": 0.23687534321801207, "grad_norm": 0.418923944234848, "learning_rate": 1.932965108983987e-05, "loss": 0.5836, "step": 8627 }, { "epoch": 0.2369028006589786, "grad_norm": 0.30650240182876587, "learning_rate": 1.9329495613845337e-05, "loss": 0.4898, "step": 8628 }, { "epoch": 0.23693025809994508, "grad_norm": 0.4277849793434143, "learning_rate": 1.9329340120448313e-05, "loss": 0.576, "step": 8629 }, { "epoch": 0.23695771554091158, "grad_norm": 0.3599676787853241, "learning_rate": 1.9329184609649095e-05, "loss": 0.5354, "step": 8630 }, { "epoch": 0.2369851729818781, "grad_norm": 0.40852537751197815, "learning_rate": 1.9329029081447968e-05, "loss": 0.6004, "step": 8631 }, { "epoch": 0.2370126304228446, "grad_norm": 0.3445485234260559, "learning_rate": 1.9328873535845223e-05, "loss": 0.559, "step": 8632 }, { "epoch": 0.2370400878638111, "grad_norm": 0.35131844878196716, "learning_rate": 1.9328717972841155e-05, "loss": 0.4857, "step": 8633 }, { "epoch": 0.23706754530477758, "grad_norm": 0.39153027534484863, "learning_rate": 1.932856239243605e-05, "loss": 0.5592, "step": 8634 }, { "epoch": 0.2370950027457441, "grad_norm": 0.37044039368629456, "learning_rate": 1.9328406794630196e-05, "loss": 0.4759, "step": 8635 }, { "epoch": 0.2371224601867106, "grad_norm": 0.36120831966400146, "learning_rate": 1.9328251179423888e-05, "loss": 0.5587, "step": 8636 }, { "epoch": 0.2371499176276771, "grad_norm": 0.41824790835380554, "learning_rate": 1.9328095546817415e-05, "loss": 0.4619, "step": 8637 }, { "epoch": 0.23717737506864361, "grad_norm": 0.47333577275276184, "learning_rate": 1.9327939896811063e-05, "loss": 0.6522, "step": 8638 }, { "epoch": 0.2372048325096101, "grad_norm": 0.36484938859939575, "learning_rate": 1.932778422940513e-05, "loss": 0.5696, "step": 8639 }, { "epoch": 0.2372322899505766, "grad_norm": 0.3691408634185791, "learning_rate": 1.93276285445999e-05, "loss": 0.5048, "step": 8640 }, { "epoch": 0.2372597473915431, "grad_norm": 0.38754594326019287, "learning_rate": 1.9327472842395666e-05, "loss": 0.5647, "step": 8641 }, { "epoch": 0.23728720483250962, "grad_norm": 0.33950772881507874, "learning_rate": 1.932731712279272e-05, "loss": 0.4531, "step": 8642 }, { "epoch": 0.2373146622734761, "grad_norm": 0.3555997908115387, "learning_rate": 1.9327161385791348e-05, "loss": 0.5162, "step": 8643 }, { "epoch": 0.2373421197144426, "grad_norm": 0.34732234477996826, "learning_rate": 1.9327005631391844e-05, "loss": 0.4566, "step": 8644 }, { "epoch": 0.23736957715540913, "grad_norm": 0.3446820378303528, "learning_rate": 1.93268498595945e-05, "loss": 0.4989, "step": 8645 }, { "epoch": 0.23739703459637562, "grad_norm": 0.3615168035030365, "learning_rate": 1.93266940703996e-05, "loss": 0.5684, "step": 8646 }, { "epoch": 0.23742449203734212, "grad_norm": 0.3305380642414093, "learning_rate": 1.9326538263807442e-05, "loss": 0.5011, "step": 8647 }, { "epoch": 0.2374519494783086, "grad_norm": 0.3770204484462738, "learning_rate": 1.9326382439818313e-05, "loss": 0.524, "step": 8648 }, { "epoch": 0.23747940691927513, "grad_norm": 0.344301700592041, "learning_rate": 1.9326226598432503e-05, "loss": 0.5387, "step": 8649 }, { "epoch": 0.23750686436024163, "grad_norm": 0.3580450415611267, "learning_rate": 1.9326070739650307e-05, "loss": 0.4837, "step": 8650 }, { "epoch": 0.23753432180120812, "grad_norm": 0.3390201926231384, "learning_rate": 1.932591486347201e-05, "loss": 0.4695, "step": 8651 }, { "epoch": 0.23756177924217464, "grad_norm": 0.38818982243537903, "learning_rate": 1.9325758969897904e-05, "loss": 0.551, "step": 8652 }, { "epoch": 0.23758923668314114, "grad_norm": 0.39187362790107727, "learning_rate": 1.932560305892828e-05, "loss": 0.5617, "step": 8653 }, { "epoch": 0.23761669412410763, "grad_norm": 0.7717456221580505, "learning_rate": 1.9325447130563435e-05, "loss": 0.5353, "step": 8654 }, { "epoch": 0.23764415156507412, "grad_norm": 0.3545801341533661, "learning_rate": 1.932529118480365e-05, "loss": 0.5838, "step": 8655 }, { "epoch": 0.23767160900604065, "grad_norm": 0.3680892288684845, "learning_rate": 1.932513522164922e-05, "loss": 0.5548, "step": 8656 }, { "epoch": 0.23769906644700714, "grad_norm": 0.546219527721405, "learning_rate": 1.9324979241100442e-05, "loss": 0.5219, "step": 8657 }, { "epoch": 0.23772652388797363, "grad_norm": 0.3776657283306122, "learning_rate": 1.93248232431576e-05, "loss": 0.4143, "step": 8658 }, { "epoch": 0.23775398132894016, "grad_norm": 0.3670983612537384, "learning_rate": 1.9324667227820985e-05, "loss": 0.4836, "step": 8659 }, { "epoch": 0.23778143876990665, "grad_norm": 0.3388442397117615, "learning_rate": 1.932451119509089e-05, "loss": 0.4669, "step": 8660 }, { "epoch": 0.23780889621087314, "grad_norm": 0.5087418556213379, "learning_rate": 1.9324355144967606e-05, "loss": 0.5226, "step": 8661 }, { "epoch": 0.23783635365183964, "grad_norm": 0.3978942632675171, "learning_rate": 1.9324199077451423e-05, "loss": 0.6166, "step": 8662 }, { "epoch": 0.23786381109280616, "grad_norm": 0.3360753059387207, "learning_rate": 1.9324042992542635e-05, "loss": 0.4061, "step": 8663 }, { "epoch": 0.23789126853377265, "grad_norm": 0.3519030213356018, "learning_rate": 1.9323886890241526e-05, "loss": 0.5506, "step": 8664 }, { "epoch": 0.23791872597473915, "grad_norm": 0.43568044900894165, "learning_rate": 1.9323730770548398e-05, "loss": 0.4782, "step": 8665 }, { "epoch": 0.23794618341570567, "grad_norm": 0.3485465943813324, "learning_rate": 1.9323574633463532e-05, "loss": 0.6398, "step": 8666 }, { "epoch": 0.23797364085667216, "grad_norm": 0.37835636734962463, "learning_rate": 1.9323418478987226e-05, "loss": 0.5909, "step": 8667 }, { "epoch": 0.23800109829763866, "grad_norm": 0.3837113380432129, "learning_rate": 1.9323262307119765e-05, "loss": 0.5251, "step": 8668 }, { "epoch": 0.23802855573860515, "grad_norm": 0.3711813986301422, "learning_rate": 1.9323106117861448e-05, "loss": 0.5265, "step": 8669 }, { "epoch": 0.23805601317957167, "grad_norm": 0.4221333861351013, "learning_rate": 1.932294991121256e-05, "loss": 0.5585, "step": 8670 }, { "epoch": 0.23808347062053817, "grad_norm": 0.3598327338695526, "learning_rate": 1.9322793687173397e-05, "loss": 0.5587, "step": 8671 }, { "epoch": 0.23811092806150466, "grad_norm": 0.36905649304389954, "learning_rate": 1.932263744574425e-05, "loss": 0.5679, "step": 8672 }, { "epoch": 0.23813838550247116, "grad_norm": 0.4212762117385864, "learning_rate": 1.9322481186925404e-05, "loss": 0.6078, "step": 8673 }, { "epoch": 0.23816584294343768, "grad_norm": 0.3412187993526459, "learning_rate": 1.932232491071716e-05, "loss": 0.5184, "step": 8674 }, { "epoch": 0.23819330038440417, "grad_norm": 0.6421278119087219, "learning_rate": 1.93221686171198e-05, "loss": 0.5635, "step": 8675 }, { "epoch": 0.23822075782537067, "grad_norm": 0.38099056482315063, "learning_rate": 1.9322012306133623e-05, "loss": 0.6067, "step": 8676 }, { "epoch": 0.2382482152663372, "grad_norm": 0.39919885993003845, "learning_rate": 1.9321855977758916e-05, "loss": 0.4628, "step": 8677 }, { "epoch": 0.23827567270730368, "grad_norm": 0.3473449647426605, "learning_rate": 1.9321699631995974e-05, "loss": 0.4701, "step": 8678 }, { "epoch": 0.23830313014827018, "grad_norm": 0.48190218210220337, "learning_rate": 1.9321543268845085e-05, "loss": 0.5611, "step": 8679 }, { "epoch": 0.23833058758923667, "grad_norm": 0.3256576657295227, "learning_rate": 1.9321386888306545e-05, "loss": 0.5129, "step": 8680 }, { "epoch": 0.2383580450302032, "grad_norm": 0.3790212571620941, "learning_rate": 1.9321230490380644e-05, "loss": 0.5258, "step": 8681 }, { "epoch": 0.23838550247116969, "grad_norm": 0.37949302792549133, "learning_rate": 1.932107407506767e-05, "loss": 0.5427, "step": 8682 }, { "epoch": 0.23841295991213618, "grad_norm": 0.35003527998924255, "learning_rate": 1.932091764236792e-05, "loss": 0.4264, "step": 8683 }, { "epoch": 0.2384404173531027, "grad_norm": 0.3364484906196594, "learning_rate": 1.9320761192281686e-05, "loss": 0.5557, "step": 8684 }, { "epoch": 0.2384678747940692, "grad_norm": 0.3961993157863617, "learning_rate": 1.9320604724809254e-05, "loss": 0.5091, "step": 8685 }, { "epoch": 0.2384953322350357, "grad_norm": 0.4192810356616974, "learning_rate": 1.9320448239950922e-05, "loss": 0.5523, "step": 8686 }, { "epoch": 0.23852278967600218, "grad_norm": 0.3533271551132202, "learning_rate": 1.9320291737706975e-05, "loss": 0.5796, "step": 8687 }, { "epoch": 0.2385502471169687, "grad_norm": 0.31446099281311035, "learning_rate": 1.9320135218077713e-05, "loss": 0.5019, "step": 8688 }, { "epoch": 0.2385777045579352, "grad_norm": 0.36499038338661194, "learning_rate": 1.9319978681063426e-05, "loss": 0.4543, "step": 8689 }, { "epoch": 0.2386051619989017, "grad_norm": 0.3847835063934326, "learning_rate": 1.9319822126664397e-05, "loss": 0.5299, "step": 8690 }, { "epoch": 0.23863261943986822, "grad_norm": 0.30964967608451843, "learning_rate": 1.931966555488093e-05, "loss": 0.4225, "step": 8691 }, { "epoch": 0.2386600768808347, "grad_norm": 0.3615293800830841, "learning_rate": 1.931950896571331e-05, "loss": 0.512, "step": 8692 }, { "epoch": 0.2386875343218012, "grad_norm": 0.4327225685119629, "learning_rate": 1.9319352359161836e-05, "loss": 0.5558, "step": 8693 }, { "epoch": 0.2387149917627677, "grad_norm": 0.39535006880760193, "learning_rate": 1.9319195735226793e-05, "loss": 0.595, "step": 8694 }, { "epoch": 0.23874244920373422, "grad_norm": 0.33427637815475464, "learning_rate": 1.9319039093908475e-05, "loss": 0.4256, "step": 8695 }, { "epoch": 0.2387699066447007, "grad_norm": 0.37088125944137573, "learning_rate": 1.9318882435207175e-05, "loss": 0.5316, "step": 8696 }, { "epoch": 0.2387973640856672, "grad_norm": 0.3383795917034149, "learning_rate": 1.9318725759123186e-05, "loss": 0.4943, "step": 8697 }, { "epoch": 0.23882482152663373, "grad_norm": 0.34637120366096497, "learning_rate": 1.9318569065656797e-05, "loss": 0.4782, "step": 8698 }, { "epoch": 0.23885227896760022, "grad_norm": 0.36923933029174805, "learning_rate": 1.9318412354808306e-05, "loss": 0.485, "step": 8699 }, { "epoch": 0.23887973640856672, "grad_norm": 0.37204357981681824, "learning_rate": 1.9318255626577997e-05, "loss": 0.5142, "step": 8700 }, { "epoch": 0.2389071938495332, "grad_norm": 0.3451897203922272, "learning_rate": 1.9318098880966173e-05, "loss": 0.535, "step": 8701 }, { "epoch": 0.23893465129049973, "grad_norm": 0.3818557858467102, "learning_rate": 1.9317942117973114e-05, "loss": 0.5331, "step": 8702 }, { "epoch": 0.23896210873146623, "grad_norm": 0.4101245105266571, "learning_rate": 1.9317785337599125e-05, "loss": 0.4711, "step": 8703 }, { "epoch": 0.23898956617243272, "grad_norm": 0.36616313457489014, "learning_rate": 1.9317628539844488e-05, "loss": 0.5482, "step": 8704 }, { "epoch": 0.23901702361339924, "grad_norm": 0.5665686726570129, "learning_rate": 1.93174717247095e-05, "loss": 0.4904, "step": 8705 }, { "epoch": 0.23904448105436574, "grad_norm": 0.4397428631782532, "learning_rate": 1.9317314892194458e-05, "loss": 0.576, "step": 8706 }, { "epoch": 0.23907193849533223, "grad_norm": 0.3776445984840393, "learning_rate": 1.9317158042299647e-05, "loss": 0.5142, "step": 8707 }, { "epoch": 0.23909939593629873, "grad_norm": 0.4118364751338959, "learning_rate": 1.9317001175025363e-05, "loss": 0.5683, "step": 8708 }, { "epoch": 0.23912685337726525, "grad_norm": 0.382700115442276, "learning_rate": 1.9316844290371898e-05, "loss": 0.5419, "step": 8709 }, { "epoch": 0.23915431081823174, "grad_norm": 0.3689383864402771, "learning_rate": 1.9316687388339543e-05, "loss": 0.5332, "step": 8710 }, { "epoch": 0.23918176825919824, "grad_norm": 0.3800449073314667, "learning_rate": 1.9316530468928596e-05, "loss": 0.5174, "step": 8711 }, { "epoch": 0.23920922570016476, "grad_norm": 0.365041583776474, "learning_rate": 1.9316373532139345e-05, "loss": 0.4734, "step": 8712 }, { "epoch": 0.23923668314113125, "grad_norm": 0.32409483194351196, "learning_rate": 1.931621657797208e-05, "loss": 0.5256, "step": 8713 }, { "epoch": 0.23926414058209775, "grad_norm": 0.3906497061252594, "learning_rate": 1.9316059606427103e-05, "loss": 0.5504, "step": 8714 }, { "epoch": 0.23929159802306424, "grad_norm": 0.3529214560985565, "learning_rate": 1.93159026175047e-05, "loss": 0.5371, "step": 8715 }, { "epoch": 0.23931905546403076, "grad_norm": 0.37346965074539185, "learning_rate": 1.9315745611205167e-05, "loss": 0.5235, "step": 8716 }, { "epoch": 0.23934651290499726, "grad_norm": 0.3317355513572693, "learning_rate": 1.931558858752879e-05, "loss": 0.4945, "step": 8717 }, { "epoch": 0.23937397034596375, "grad_norm": 0.34470245242118835, "learning_rate": 1.9315431546475872e-05, "loss": 0.4978, "step": 8718 }, { "epoch": 0.23940142778693027, "grad_norm": 0.39471694827079773, "learning_rate": 1.93152744880467e-05, "loss": 0.5327, "step": 8719 }, { "epoch": 0.23942888522789676, "grad_norm": 0.3433377146720886, "learning_rate": 1.9315117412241568e-05, "loss": 0.5221, "step": 8720 }, { "epoch": 0.23945634266886326, "grad_norm": 0.39297083020210266, "learning_rate": 1.9314960319060768e-05, "loss": 0.52, "step": 8721 }, { "epoch": 0.23948380010982975, "grad_norm": 0.3467468321323395, "learning_rate": 1.9314803208504592e-05, "loss": 0.4845, "step": 8722 }, { "epoch": 0.23951125755079627, "grad_norm": 0.4292147159576416, "learning_rate": 1.9314646080573337e-05, "loss": 0.5611, "step": 8723 }, { "epoch": 0.23953871499176277, "grad_norm": 0.3624248802661896, "learning_rate": 1.93144889352673e-05, "loss": 0.5289, "step": 8724 }, { "epoch": 0.23956617243272926, "grad_norm": 0.3910753130912781, "learning_rate": 1.9314331772586758e-05, "loss": 0.5386, "step": 8725 }, { "epoch": 0.23959362987369578, "grad_norm": 0.33419308066368103, "learning_rate": 1.931417459253202e-05, "loss": 0.522, "step": 8726 }, { "epoch": 0.23962108731466228, "grad_norm": 0.37947797775268555, "learning_rate": 1.931401739510337e-05, "loss": 0.4804, "step": 8727 }, { "epoch": 0.23964854475562877, "grad_norm": 0.3607715964317322, "learning_rate": 1.9313860180301112e-05, "loss": 0.5428, "step": 8728 }, { "epoch": 0.23967600219659527, "grad_norm": 0.33891913294792175, "learning_rate": 1.9313702948125526e-05, "loss": 0.5156, "step": 8729 }, { "epoch": 0.2397034596375618, "grad_norm": 0.4001785218715668, "learning_rate": 1.9313545698576912e-05, "loss": 0.5244, "step": 8730 }, { "epoch": 0.23973091707852828, "grad_norm": 0.4216938018798828, "learning_rate": 1.9313388431655563e-05, "loss": 0.5651, "step": 8731 }, { "epoch": 0.23975837451949478, "grad_norm": 0.35378292202949524, "learning_rate": 1.9313231147361776e-05, "loss": 0.569, "step": 8732 }, { "epoch": 0.2397858319604613, "grad_norm": 0.3383517861366272, "learning_rate": 1.9313073845695837e-05, "loss": 0.4619, "step": 8733 }, { "epoch": 0.2398132894014278, "grad_norm": 0.3643558621406555, "learning_rate": 1.9312916526658042e-05, "loss": 0.5067, "step": 8734 }, { "epoch": 0.2398407468423943, "grad_norm": 0.33568063378334045, "learning_rate": 1.9312759190248686e-05, "loss": 0.445, "step": 8735 }, { "epoch": 0.23986820428336078, "grad_norm": 0.3454386293888092, "learning_rate": 1.9312601836468063e-05, "loss": 0.5796, "step": 8736 }, { "epoch": 0.2398956617243273, "grad_norm": 0.36241310834884644, "learning_rate": 1.9312444465316467e-05, "loss": 0.5299, "step": 8737 }, { "epoch": 0.2399231191652938, "grad_norm": 0.4499903619289398, "learning_rate": 1.9312287076794188e-05, "loss": 0.6538, "step": 8738 }, { "epoch": 0.2399505766062603, "grad_norm": 0.44785594940185547, "learning_rate": 1.931212967090152e-05, "loss": 0.5807, "step": 8739 }, { "epoch": 0.23997803404722678, "grad_norm": 0.5055844187736511, "learning_rate": 1.931197224763876e-05, "loss": 0.4739, "step": 8740 }, { "epoch": 0.2400054914881933, "grad_norm": 0.3270832598209381, "learning_rate": 1.93118148070062e-05, "loss": 0.5022, "step": 8741 }, { "epoch": 0.2400329489291598, "grad_norm": 0.35608938336372375, "learning_rate": 1.9311657349004133e-05, "loss": 0.5874, "step": 8742 }, { "epoch": 0.2400604063701263, "grad_norm": 0.3691215217113495, "learning_rate": 1.9311499873632852e-05, "loss": 0.5293, "step": 8743 }, { "epoch": 0.24008786381109282, "grad_norm": 0.49517306685447693, "learning_rate": 1.931134238089265e-05, "loss": 0.5066, "step": 8744 }, { "epoch": 0.2401153212520593, "grad_norm": 0.3925166428089142, "learning_rate": 1.9311184870783828e-05, "loss": 0.5097, "step": 8745 }, { "epoch": 0.2401427786930258, "grad_norm": 0.3840397298336029, "learning_rate": 1.931102734330667e-05, "loss": 0.5847, "step": 8746 }, { "epoch": 0.2401702361339923, "grad_norm": 0.31489264965057373, "learning_rate": 1.9310869798461476e-05, "loss": 0.4619, "step": 8747 }, { "epoch": 0.24019769357495882, "grad_norm": 0.376049667596817, "learning_rate": 1.9310712236248535e-05, "loss": 0.478, "step": 8748 }, { "epoch": 0.24022515101592531, "grad_norm": 0.3488622307777405, "learning_rate": 1.931055465666815e-05, "loss": 0.5954, "step": 8749 }, { "epoch": 0.2402526084568918, "grad_norm": 0.35344359278678894, "learning_rate": 1.9310397059720606e-05, "loss": 0.5639, "step": 8750 }, { "epoch": 0.24028006589785833, "grad_norm": 0.4201376140117645, "learning_rate": 1.93102394454062e-05, "loss": 0.5325, "step": 8751 }, { "epoch": 0.24030752333882482, "grad_norm": 0.3509422242641449, "learning_rate": 1.9310081813725225e-05, "loss": 0.5505, "step": 8752 }, { "epoch": 0.24033498077979132, "grad_norm": 0.3810165226459503, "learning_rate": 1.930992416467798e-05, "loss": 0.4481, "step": 8753 }, { "epoch": 0.2403624382207578, "grad_norm": 0.42471763491630554, "learning_rate": 1.930976649826475e-05, "loss": 0.6068, "step": 8754 }, { "epoch": 0.24038989566172433, "grad_norm": 0.388739675283432, "learning_rate": 1.9309608814485836e-05, "loss": 0.5523, "step": 8755 }, { "epoch": 0.24041735310269083, "grad_norm": 0.352566659450531, "learning_rate": 1.930945111334153e-05, "loss": 0.5423, "step": 8756 }, { "epoch": 0.24044481054365732, "grad_norm": 0.4109717905521393, "learning_rate": 1.9309293394832125e-05, "loss": 0.5499, "step": 8757 }, { "epoch": 0.24047226798462384, "grad_norm": 1.2370877265930176, "learning_rate": 1.930913565895792e-05, "loss": 0.594, "step": 8758 }, { "epoch": 0.24049972542559034, "grad_norm": 0.4043298363685608, "learning_rate": 1.9308977905719203e-05, "loss": 0.604, "step": 8759 }, { "epoch": 0.24052718286655683, "grad_norm": 0.3620893657207489, "learning_rate": 1.9308820135116275e-05, "loss": 0.517, "step": 8760 }, { "epoch": 0.24055464030752333, "grad_norm": 0.38919132947921753, "learning_rate": 1.9308662347149423e-05, "loss": 0.568, "step": 8761 }, { "epoch": 0.24058209774848985, "grad_norm": 0.3775987923145294, "learning_rate": 1.9308504541818944e-05, "loss": 0.5283, "step": 8762 }, { "epoch": 0.24060955518945634, "grad_norm": 0.3933328688144684, "learning_rate": 1.9308346719125136e-05, "loss": 0.5502, "step": 8763 }, { "epoch": 0.24063701263042284, "grad_norm": 0.3943025469779968, "learning_rate": 1.930818887906829e-05, "loss": 0.4934, "step": 8764 }, { "epoch": 0.24066447007138936, "grad_norm": 0.32819873094558716, "learning_rate": 1.93080310216487e-05, "loss": 0.487, "step": 8765 }, { "epoch": 0.24069192751235585, "grad_norm": 0.3633822500705719, "learning_rate": 1.930787314686666e-05, "loss": 0.5238, "step": 8766 }, { "epoch": 0.24071938495332235, "grad_norm": 0.45222803950309753, "learning_rate": 1.9307715254722468e-05, "loss": 0.6686, "step": 8767 }, { "epoch": 0.24074684239428884, "grad_norm": 0.3346841633319855, "learning_rate": 1.9307557345216413e-05, "loss": 0.5024, "step": 8768 }, { "epoch": 0.24077429983525536, "grad_norm": 0.37977755069732666, "learning_rate": 1.9307399418348798e-05, "loss": 0.4808, "step": 8769 }, { "epoch": 0.24080175727622186, "grad_norm": 0.374072402715683, "learning_rate": 1.930724147411991e-05, "loss": 0.447, "step": 8770 }, { "epoch": 0.24082921471718835, "grad_norm": 0.3875643014907837, "learning_rate": 1.930708351253005e-05, "loss": 0.6378, "step": 8771 }, { "epoch": 0.24085667215815487, "grad_norm": 0.3488048017024994, "learning_rate": 1.9306925533579503e-05, "loss": 0.5223, "step": 8772 }, { "epoch": 0.24088412959912137, "grad_norm": 0.33946987986564636, "learning_rate": 1.930676753726857e-05, "loss": 0.4766, "step": 8773 }, { "epoch": 0.24091158704008786, "grad_norm": 0.3605501055717468, "learning_rate": 1.9306609523597547e-05, "loss": 0.5711, "step": 8774 }, { "epoch": 0.24093904448105435, "grad_norm": 0.41460269689559937, "learning_rate": 1.9306451492566726e-05, "loss": 0.5989, "step": 8775 }, { "epoch": 0.24096650192202088, "grad_norm": 0.3465343415737152, "learning_rate": 1.9306293444176403e-05, "loss": 0.4464, "step": 8776 }, { "epoch": 0.24099395936298737, "grad_norm": 0.33681365847587585, "learning_rate": 1.930613537842687e-05, "loss": 0.5007, "step": 8777 }, { "epoch": 0.24102141680395386, "grad_norm": 0.33834904432296753, "learning_rate": 1.9305977295318428e-05, "loss": 0.5151, "step": 8778 }, { "epoch": 0.24104887424492039, "grad_norm": 0.3941114842891693, "learning_rate": 1.9305819194851367e-05, "loss": 0.5245, "step": 8779 }, { "epoch": 0.24107633168588688, "grad_norm": 0.3722449541091919, "learning_rate": 1.9305661077025982e-05, "loss": 0.4733, "step": 8780 }, { "epoch": 0.24110378912685337, "grad_norm": 0.37828347086906433, "learning_rate": 1.9305502941842574e-05, "loss": 0.5233, "step": 8781 }, { "epoch": 0.24113124656781987, "grad_norm": 0.39776018261909485, "learning_rate": 1.930534478930143e-05, "loss": 0.3782, "step": 8782 }, { "epoch": 0.2411587040087864, "grad_norm": 0.3979213535785675, "learning_rate": 1.9305186619402846e-05, "loss": 0.5411, "step": 8783 }, { "epoch": 0.24118616144975288, "grad_norm": 0.387920081615448, "learning_rate": 1.930502843214712e-05, "loss": 0.5494, "step": 8784 }, { "epoch": 0.24121361889071938, "grad_norm": 0.3695265054702759, "learning_rate": 1.9304870227534547e-05, "loss": 0.5182, "step": 8785 }, { "epoch": 0.2412410763316859, "grad_norm": 0.33115866780281067, "learning_rate": 1.930471200556542e-05, "loss": 0.4944, "step": 8786 }, { "epoch": 0.2412685337726524, "grad_norm": 0.38284412026405334, "learning_rate": 1.930455376624004e-05, "loss": 0.6048, "step": 8787 }, { "epoch": 0.2412959912136189, "grad_norm": 0.3611135482788086, "learning_rate": 1.9304395509558694e-05, "loss": 0.4594, "step": 8788 }, { "epoch": 0.24132344865458538, "grad_norm": 0.3107925355434418, "learning_rate": 1.930423723552168e-05, "loss": 0.4426, "step": 8789 }, { "epoch": 0.2413509060955519, "grad_norm": 0.37070703506469727, "learning_rate": 1.9304078944129293e-05, "loss": 0.5816, "step": 8790 }, { "epoch": 0.2413783635365184, "grad_norm": 0.4081724286079407, "learning_rate": 1.930392063538183e-05, "loss": 0.565, "step": 8791 }, { "epoch": 0.2414058209774849, "grad_norm": 0.36235496401786804, "learning_rate": 1.930376230927959e-05, "loss": 0.4946, "step": 8792 }, { "epoch": 0.2414332784184514, "grad_norm": 0.36039504408836365, "learning_rate": 1.9303603965822858e-05, "loss": 0.5361, "step": 8793 }, { "epoch": 0.2414607358594179, "grad_norm": 0.4050566256046295, "learning_rate": 1.930344560501194e-05, "loss": 0.4838, "step": 8794 }, { "epoch": 0.2414881933003844, "grad_norm": 0.36476171016693115, "learning_rate": 1.9303287226847122e-05, "loss": 0.5862, "step": 8795 }, { "epoch": 0.2415156507413509, "grad_norm": 0.4001944065093994, "learning_rate": 1.9303128831328707e-05, "loss": 0.4375, "step": 8796 }, { "epoch": 0.24154310818231742, "grad_norm": 0.3582918047904968, "learning_rate": 1.9302970418456986e-05, "loss": 0.5176, "step": 8797 }, { "epoch": 0.2415705656232839, "grad_norm": 0.3351721465587616, "learning_rate": 1.9302811988232256e-05, "loss": 0.5227, "step": 8798 }, { "epoch": 0.2415980230642504, "grad_norm": 0.3836292624473572, "learning_rate": 1.9302653540654814e-05, "loss": 0.5974, "step": 8799 }, { "epoch": 0.24162548050521693, "grad_norm": 0.3444044888019562, "learning_rate": 1.9302495075724955e-05, "loss": 0.4979, "step": 8800 }, { "epoch": 0.24165293794618342, "grad_norm": 0.4008808732032776, "learning_rate": 1.930233659344297e-05, "loss": 0.5521, "step": 8801 }, { "epoch": 0.24168039538714992, "grad_norm": 0.392223984003067, "learning_rate": 1.930217809380916e-05, "loss": 0.5106, "step": 8802 }, { "epoch": 0.2417078528281164, "grad_norm": 0.31517598032951355, "learning_rate": 1.9302019576823824e-05, "loss": 0.4558, "step": 8803 }, { "epoch": 0.24173531026908293, "grad_norm": 0.33643049001693726, "learning_rate": 1.9301861042487243e-05, "loss": 0.4479, "step": 8804 }, { "epoch": 0.24176276771004943, "grad_norm": 0.4739621579647064, "learning_rate": 1.930170249079973e-05, "loss": 0.5035, "step": 8805 }, { "epoch": 0.24179022515101592, "grad_norm": 0.39752477407455444, "learning_rate": 1.930154392176157e-05, "loss": 0.5977, "step": 8806 }, { "epoch": 0.2418176825919824, "grad_norm": 0.37203168869018555, "learning_rate": 1.9301385335373063e-05, "loss": 0.5173, "step": 8807 }, { "epoch": 0.24184514003294894, "grad_norm": 0.36752045154571533, "learning_rate": 1.9301226731634505e-05, "loss": 0.5604, "step": 8808 }, { "epoch": 0.24187259747391543, "grad_norm": 0.37713170051574707, "learning_rate": 1.9301068110546185e-05, "loss": 0.5305, "step": 8809 }, { "epoch": 0.24190005491488192, "grad_norm": 0.360727995634079, "learning_rate": 1.9300909472108407e-05, "loss": 0.4903, "step": 8810 }, { "epoch": 0.24192751235584845, "grad_norm": 0.46317192912101746, "learning_rate": 1.9300750816321467e-05, "loss": 0.6315, "step": 8811 }, { "epoch": 0.24195496979681494, "grad_norm": 0.3427072763442993, "learning_rate": 1.9300592143185656e-05, "loss": 0.5287, "step": 8812 }, { "epoch": 0.24198242723778143, "grad_norm": 0.38186538219451904, "learning_rate": 1.930043345270127e-05, "loss": 0.5232, "step": 8813 }, { "epoch": 0.24200988467874793, "grad_norm": 0.33103299140930176, "learning_rate": 1.9300274744868615e-05, "loss": 0.4996, "step": 8814 }, { "epoch": 0.24203734211971445, "grad_norm": 0.4639434814453125, "learning_rate": 1.9300116019687972e-05, "loss": 0.6244, "step": 8815 }, { "epoch": 0.24206479956068094, "grad_norm": 0.372749388217926, "learning_rate": 1.929995727715965e-05, "loss": 0.5585, "step": 8816 }, { "epoch": 0.24209225700164744, "grad_norm": 0.36547544598579407, "learning_rate": 1.9299798517283935e-05, "loss": 0.4899, "step": 8817 }, { "epoch": 0.24211971444261396, "grad_norm": 0.3483883738517761, "learning_rate": 1.9299639740061127e-05, "loss": 0.461, "step": 8818 }, { "epoch": 0.24214717188358045, "grad_norm": 0.3713272213935852, "learning_rate": 1.9299480945491528e-05, "loss": 0.5495, "step": 8819 }, { "epoch": 0.24217462932454695, "grad_norm": 0.3524238169193268, "learning_rate": 1.9299322133575426e-05, "loss": 0.5915, "step": 8820 }, { "epoch": 0.24220208676551344, "grad_norm": 0.37169042229652405, "learning_rate": 1.929916330431312e-05, "loss": 0.5572, "step": 8821 }, { "epoch": 0.24222954420647996, "grad_norm": 0.3970213830471039, "learning_rate": 1.9299004457704906e-05, "loss": 0.5227, "step": 8822 }, { "epoch": 0.24225700164744646, "grad_norm": 0.3722146153450012, "learning_rate": 1.9298845593751077e-05, "loss": 0.5587, "step": 8823 }, { "epoch": 0.24228445908841295, "grad_norm": 0.37391263246536255, "learning_rate": 1.9298686712451938e-05, "loss": 0.4914, "step": 8824 }, { "epoch": 0.24231191652937947, "grad_norm": 0.37321266531944275, "learning_rate": 1.929852781380778e-05, "loss": 0.5748, "step": 8825 }, { "epoch": 0.24233937397034597, "grad_norm": 0.3978263735771179, "learning_rate": 1.92983688978189e-05, "loss": 0.6019, "step": 8826 }, { "epoch": 0.24236683141131246, "grad_norm": 0.3377845883369446, "learning_rate": 1.9298209964485594e-05, "loss": 0.5967, "step": 8827 }, { "epoch": 0.24239428885227896, "grad_norm": 0.3157040476799011, "learning_rate": 1.929805101380816e-05, "loss": 0.5186, "step": 8828 }, { "epoch": 0.24242174629324548, "grad_norm": 0.370583176612854, "learning_rate": 1.929789204578689e-05, "loss": 0.547, "step": 8829 }, { "epoch": 0.24244920373421197, "grad_norm": 0.36539730429649353, "learning_rate": 1.9297733060422086e-05, "loss": 0.4814, "step": 8830 }, { "epoch": 0.24247666117517847, "grad_norm": 0.3976539671421051, "learning_rate": 1.9297574057714042e-05, "loss": 0.5703, "step": 8831 }, { "epoch": 0.242504118616145, "grad_norm": 0.417807012796402, "learning_rate": 1.9297415037663056e-05, "loss": 0.5014, "step": 8832 }, { "epoch": 0.24253157605711148, "grad_norm": 0.4118380546569824, "learning_rate": 1.9297256000269423e-05, "loss": 0.5271, "step": 8833 }, { "epoch": 0.24255903349807797, "grad_norm": 0.414305180311203, "learning_rate": 1.9297096945533437e-05, "loss": 0.5311, "step": 8834 }, { "epoch": 0.24258649093904447, "grad_norm": 0.4518028795719147, "learning_rate": 1.9296937873455404e-05, "loss": 0.588, "step": 8835 }, { "epoch": 0.242613948380011, "grad_norm": 0.36559179425239563, "learning_rate": 1.929677878403561e-05, "loss": 0.515, "step": 8836 }, { "epoch": 0.24264140582097748, "grad_norm": 0.32477259635925293, "learning_rate": 1.9296619677274358e-05, "loss": 0.4318, "step": 8837 }, { "epoch": 0.24266886326194398, "grad_norm": 0.3472536504268646, "learning_rate": 1.9296460553171944e-05, "loss": 0.448, "step": 8838 }, { "epoch": 0.2426963207029105, "grad_norm": 0.3629855811595917, "learning_rate": 1.929630141172866e-05, "loss": 0.5885, "step": 8839 }, { "epoch": 0.242723778143877, "grad_norm": 0.37134453654289246, "learning_rate": 1.929614225294481e-05, "loss": 0.5195, "step": 8840 }, { "epoch": 0.2427512355848435, "grad_norm": 0.35730916261672974, "learning_rate": 1.9295983076820687e-05, "loss": 0.5588, "step": 8841 }, { "epoch": 0.24277869302580998, "grad_norm": 0.31360548734664917, "learning_rate": 1.9295823883356592e-05, "loss": 0.3532, "step": 8842 }, { "epoch": 0.2428061504667765, "grad_norm": 0.3425596058368683, "learning_rate": 1.9295664672552814e-05, "loss": 0.5087, "step": 8843 }, { "epoch": 0.242833607907743, "grad_norm": 0.3623373508453369, "learning_rate": 1.9295505444409658e-05, "loss": 0.5199, "step": 8844 }, { "epoch": 0.2428610653487095, "grad_norm": 0.38768088817596436, "learning_rate": 1.9295346198927413e-05, "loss": 0.4664, "step": 8845 }, { "epoch": 0.24288852278967601, "grad_norm": 0.3735514283180237, "learning_rate": 1.9295186936106386e-05, "loss": 0.4974, "step": 8846 }, { "epoch": 0.2429159802306425, "grad_norm": 0.3841317594051361, "learning_rate": 1.9295027655946863e-05, "loss": 0.5432, "step": 8847 }, { "epoch": 0.242943437671609, "grad_norm": 0.3236297369003296, "learning_rate": 1.929486835844915e-05, "loss": 0.4912, "step": 8848 }, { "epoch": 0.2429708951125755, "grad_norm": 0.40409380197525024, "learning_rate": 1.929470904361354e-05, "loss": 0.5848, "step": 8849 }, { "epoch": 0.24299835255354202, "grad_norm": 0.395097941160202, "learning_rate": 1.9294549711440334e-05, "loss": 0.553, "step": 8850 }, { "epoch": 0.2430258099945085, "grad_norm": 0.35927122831344604, "learning_rate": 1.9294390361929825e-05, "loss": 0.4683, "step": 8851 }, { "epoch": 0.243053267435475, "grad_norm": 0.405039519071579, "learning_rate": 1.929423099508231e-05, "loss": 0.5249, "step": 8852 }, { "epoch": 0.24308072487644153, "grad_norm": 0.39939430356025696, "learning_rate": 1.9294071610898088e-05, "loss": 0.5465, "step": 8853 }, { "epoch": 0.24310818231740802, "grad_norm": 0.389818012714386, "learning_rate": 1.9293912209377455e-05, "loss": 0.6072, "step": 8854 }, { "epoch": 0.24313563975837452, "grad_norm": 0.32416054606437683, "learning_rate": 1.9293752790520712e-05, "loss": 0.4839, "step": 8855 }, { "epoch": 0.243163097199341, "grad_norm": 0.45535510778427124, "learning_rate": 1.929359335432815e-05, "loss": 0.4554, "step": 8856 }, { "epoch": 0.24319055464030753, "grad_norm": 0.3382176458835602, "learning_rate": 1.929343390080008e-05, "loss": 0.5426, "step": 8857 }, { "epoch": 0.24321801208127403, "grad_norm": 0.38820210099220276, "learning_rate": 1.9293274429936783e-05, "loss": 0.5334, "step": 8858 }, { "epoch": 0.24324546952224052, "grad_norm": 0.35064661502838135, "learning_rate": 1.929311494173856e-05, "loss": 0.4797, "step": 8859 }, { "epoch": 0.24327292696320704, "grad_norm": 0.3924979567527771, "learning_rate": 1.9292955436205715e-05, "loss": 0.4988, "step": 8860 }, { "epoch": 0.24330038440417354, "grad_norm": 0.3647165894508362, "learning_rate": 1.9292795913338543e-05, "loss": 0.5226, "step": 8861 }, { "epoch": 0.24332784184514003, "grad_norm": 0.330352783203125, "learning_rate": 1.9292636373137337e-05, "loss": 0.494, "step": 8862 }, { "epoch": 0.24335529928610652, "grad_norm": 0.37149685621261597, "learning_rate": 1.92924768156024e-05, "loss": 0.5742, "step": 8863 }, { "epoch": 0.24338275672707305, "grad_norm": 0.34584543108940125, "learning_rate": 1.929231724073403e-05, "loss": 0.5643, "step": 8864 }, { "epoch": 0.24341021416803954, "grad_norm": 0.3584834933280945, "learning_rate": 1.929215764853252e-05, "loss": 0.5031, "step": 8865 }, { "epoch": 0.24343767160900603, "grad_norm": 0.40060994029045105, "learning_rate": 1.929199803899817e-05, "loss": 0.5779, "step": 8866 }, { "epoch": 0.24346512904997256, "grad_norm": 0.41579535603523254, "learning_rate": 1.929183841213128e-05, "loss": 0.5293, "step": 8867 }, { "epoch": 0.24349258649093905, "grad_norm": 0.3597356379032135, "learning_rate": 1.929167876793215e-05, "loss": 0.5284, "step": 8868 }, { "epoch": 0.24352004393190554, "grad_norm": 0.36807945370674133, "learning_rate": 1.9291519106401065e-05, "loss": 0.5301, "step": 8869 }, { "epoch": 0.24354750137287204, "grad_norm": 0.3535303771495819, "learning_rate": 1.9291359427538336e-05, "loss": 0.5599, "step": 8870 }, { "epoch": 0.24357495881383856, "grad_norm": 0.38852062821388245, "learning_rate": 1.9291199731344255e-05, "loss": 0.4815, "step": 8871 }, { "epoch": 0.24360241625480505, "grad_norm": 0.4124987721443176, "learning_rate": 1.9291040017819122e-05, "loss": 0.5497, "step": 8872 }, { "epoch": 0.24362987369577155, "grad_norm": 0.35583651065826416, "learning_rate": 1.9290880286963233e-05, "loss": 0.5856, "step": 8873 }, { "epoch": 0.24365733113673804, "grad_norm": 0.41346269845962524, "learning_rate": 1.9290720538776888e-05, "loss": 0.5309, "step": 8874 }, { "epoch": 0.24368478857770456, "grad_norm": 0.3889893591403961, "learning_rate": 1.9290560773260384e-05, "loss": 0.5298, "step": 8875 }, { "epoch": 0.24371224601867106, "grad_norm": 0.8197377920150757, "learning_rate": 1.9290400990414017e-05, "loss": 0.6535, "step": 8876 }, { "epoch": 0.24373970345963755, "grad_norm": 0.3761925995349884, "learning_rate": 1.9290241190238087e-05, "loss": 0.568, "step": 8877 }, { "epoch": 0.24376716090060407, "grad_norm": 0.3553387224674225, "learning_rate": 1.9290081372732893e-05, "loss": 0.4877, "step": 8878 }, { "epoch": 0.24379461834157057, "grad_norm": 0.39596959948539734, "learning_rate": 1.9289921537898736e-05, "loss": 0.5323, "step": 8879 }, { "epoch": 0.24382207578253706, "grad_norm": 0.456145316362381, "learning_rate": 1.9289761685735905e-05, "loss": 0.475, "step": 8880 }, { "epoch": 0.24384953322350356, "grad_norm": 0.45268484950065613, "learning_rate": 1.9289601816244708e-05, "loss": 0.5306, "step": 8881 }, { "epoch": 0.24387699066447008, "grad_norm": 0.33775749802589417, "learning_rate": 1.9289441929425435e-05, "loss": 0.492, "step": 8882 }, { "epoch": 0.24390444810543657, "grad_norm": 0.39855024218559265, "learning_rate": 1.928928202527839e-05, "loss": 0.5747, "step": 8883 }, { "epoch": 0.24393190554640307, "grad_norm": 0.36737215518951416, "learning_rate": 1.9289122103803866e-05, "loss": 0.5899, "step": 8884 }, { "epoch": 0.2439593629873696, "grad_norm": 0.4010552763938904, "learning_rate": 1.9288962165002168e-05, "loss": 0.5359, "step": 8885 }, { "epoch": 0.24398682042833608, "grad_norm": 0.34777408838272095, "learning_rate": 1.9288802208873588e-05, "loss": 0.5474, "step": 8886 }, { "epoch": 0.24401427786930258, "grad_norm": 0.3867762088775635, "learning_rate": 1.928864223541843e-05, "loss": 0.4944, "step": 8887 }, { "epoch": 0.24404173531026907, "grad_norm": 0.5653756260871887, "learning_rate": 1.9288482244636987e-05, "loss": 0.5047, "step": 8888 }, { "epoch": 0.2440691927512356, "grad_norm": 0.37851250171661377, "learning_rate": 1.9288322236529563e-05, "loss": 0.6185, "step": 8889 }, { "epoch": 0.24409665019220209, "grad_norm": 0.3492375314235687, "learning_rate": 1.9288162211096452e-05, "loss": 0.5129, "step": 8890 }, { "epoch": 0.24412410763316858, "grad_norm": 0.6131927371025085, "learning_rate": 1.9288002168337953e-05, "loss": 0.5597, "step": 8891 }, { "epoch": 0.2441515650741351, "grad_norm": 0.39827340841293335, "learning_rate": 1.928784210825437e-05, "loss": 0.5758, "step": 8892 }, { "epoch": 0.2441790225151016, "grad_norm": 0.36514872312545776, "learning_rate": 1.9287682030845995e-05, "loss": 0.5919, "step": 8893 }, { "epoch": 0.2442064799560681, "grad_norm": 0.35158678889274597, "learning_rate": 1.9287521936113124e-05, "loss": 0.4632, "step": 8894 }, { "epoch": 0.24423393739703458, "grad_norm": 0.3559192717075348, "learning_rate": 1.9287361824056065e-05, "loss": 0.5547, "step": 8895 }, { "epoch": 0.2442613948380011, "grad_norm": 0.3226472735404968, "learning_rate": 1.9287201694675112e-05, "loss": 0.5593, "step": 8896 }, { "epoch": 0.2442888522789676, "grad_norm": 0.33776840567588806, "learning_rate": 1.9287041547970563e-05, "loss": 0.524, "step": 8897 }, { "epoch": 0.2443163097199341, "grad_norm": 0.38217613101005554, "learning_rate": 1.9286881383942716e-05, "loss": 0.5466, "step": 8898 }, { "epoch": 0.24434376716090062, "grad_norm": 0.33925744891166687, "learning_rate": 1.9286721202591874e-05, "loss": 0.5294, "step": 8899 }, { "epoch": 0.2443712246018671, "grad_norm": 0.3478332757949829, "learning_rate": 1.928656100391833e-05, "loss": 0.616, "step": 8900 }, { "epoch": 0.2443986820428336, "grad_norm": 0.35395899415016174, "learning_rate": 1.928640078792239e-05, "loss": 0.431, "step": 8901 }, { "epoch": 0.2444261394838001, "grad_norm": 0.6209436058998108, "learning_rate": 1.9286240554604344e-05, "loss": 0.5106, "step": 8902 }, { "epoch": 0.24445359692476662, "grad_norm": 0.3773331344127655, "learning_rate": 1.9286080303964495e-05, "loss": 0.4877, "step": 8903 }, { "epoch": 0.2444810543657331, "grad_norm": 0.38577529788017273, "learning_rate": 1.9285920036003145e-05, "loss": 0.5772, "step": 8904 }, { "epoch": 0.2445085118066996, "grad_norm": 0.3526794910430908, "learning_rate": 1.928575975072059e-05, "loss": 0.4405, "step": 8905 }, { "epoch": 0.24453596924766613, "grad_norm": 0.36106443405151367, "learning_rate": 1.928559944811713e-05, "loss": 0.5256, "step": 8906 }, { "epoch": 0.24456342668863262, "grad_norm": 0.36406657099723816, "learning_rate": 1.9285439128193063e-05, "loss": 0.5531, "step": 8907 }, { "epoch": 0.24459088412959912, "grad_norm": 0.38600608706474304, "learning_rate": 1.928527879094869e-05, "loss": 0.5544, "step": 8908 }, { "epoch": 0.2446183415705656, "grad_norm": 0.4203549325466156, "learning_rate": 1.9285118436384305e-05, "loss": 0.5535, "step": 8909 }, { "epoch": 0.24464579901153213, "grad_norm": 0.33707916736602783, "learning_rate": 1.9284958064500215e-05, "loss": 0.5341, "step": 8910 }, { "epoch": 0.24467325645249863, "grad_norm": 0.3737312853336334, "learning_rate": 1.9284797675296713e-05, "loss": 0.5123, "step": 8911 }, { "epoch": 0.24470071389346512, "grad_norm": 0.3659346401691437, "learning_rate": 1.9284637268774098e-05, "loss": 0.578, "step": 8912 }, { "epoch": 0.24472817133443164, "grad_norm": 0.3864384889602661, "learning_rate": 1.9284476844932674e-05, "loss": 0.4758, "step": 8913 }, { "epoch": 0.24475562877539814, "grad_norm": 0.3191125988960266, "learning_rate": 1.9284316403772733e-05, "loss": 0.4794, "step": 8914 }, { "epoch": 0.24478308621636463, "grad_norm": 0.3681592345237732, "learning_rate": 1.9284155945294584e-05, "loss": 0.5158, "step": 8915 }, { "epoch": 0.24481054365733113, "grad_norm": 0.33160915970802307, "learning_rate": 1.928399546949852e-05, "loss": 0.4838, "step": 8916 }, { "epoch": 0.24483800109829765, "grad_norm": 0.3539070785045624, "learning_rate": 1.928383497638484e-05, "loss": 0.5238, "step": 8917 }, { "epoch": 0.24486545853926414, "grad_norm": 0.3628561794757843, "learning_rate": 1.9283674465953843e-05, "loss": 0.5326, "step": 8918 }, { "epoch": 0.24489291598023064, "grad_norm": 0.3470414876937866, "learning_rate": 1.928351393820583e-05, "loss": 0.4669, "step": 8919 }, { "epoch": 0.24492037342119716, "grad_norm": 0.3995512127876282, "learning_rate": 1.9283353393141104e-05, "loss": 0.5369, "step": 8920 }, { "epoch": 0.24494783086216365, "grad_norm": 0.4245094656944275, "learning_rate": 1.928319283075996e-05, "loss": 0.5865, "step": 8921 }, { "epoch": 0.24497528830313015, "grad_norm": 0.4900933802127838, "learning_rate": 1.92830322510627e-05, "loss": 0.51, "step": 8922 }, { "epoch": 0.24500274574409664, "grad_norm": 0.3695622682571411, "learning_rate": 1.928287165404962e-05, "loss": 0.6239, "step": 8923 }, { "epoch": 0.24503020318506316, "grad_norm": 0.44630488753318787, "learning_rate": 1.928271103972102e-05, "loss": 0.5141, "step": 8924 }, { "epoch": 0.24505766062602966, "grad_norm": 0.4476328492164612, "learning_rate": 1.92825504080772e-05, "loss": 0.5495, "step": 8925 }, { "epoch": 0.24508511806699615, "grad_norm": 0.3979041576385498, "learning_rate": 1.9282389759118466e-05, "loss": 0.4479, "step": 8926 }, { "epoch": 0.24511257550796267, "grad_norm": 0.361213356256485, "learning_rate": 1.9282229092845113e-05, "loss": 0.606, "step": 8927 }, { "epoch": 0.24514003294892917, "grad_norm": 0.36312371492385864, "learning_rate": 1.928206840925744e-05, "loss": 0.5019, "step": 8928 }, { "epoch": 0.24516749038989566, "grad_norm": 0.4203580319881439, "learning_rate": 1.928190770835574e-05, "loss": 0.6118, "step": 8929 }, { "epoch": 0.24519494783086215, "grad_norm": 0.7821816205978394, "learning_rate": 1.9281746990140324e-05, "loss": 0.4961, "step": 8930 }, { "epoch": 0.24522240527182868, "grad_norm": 0.3967643678188324, "learning_rate": 1.9281586254611487e-05, "loss": 0.5739, "step": 8931 }, { "epoch": 0.24524986271279517, "grad_norm": 0.3846569061279297, "learning_rate": 1.928142550176953e-05, "loss": 0.5654, "step": 8932 }, { "epoch": 0.24527732015376166, "grad_norm": 0.49462729692459106, "learning_rate": 1.9281264731614753e-05, "loss": 0.5365, "step": 8933 }, { "epoch": 0.24530477759472818, "grad_norm": 0.41566550731658936, "learning_rate": 1.9281103944147452e-05, "loss": 0.5698, "step": 8934 }, { "epoch": 0.24533223503569468, "grad_norm": 0.3757728934288025, "learning_rate": 1.9280943139367933e-05, "loss": 0.5646, "step": 8935 }, { "epoch": 0.24535969247666117, "grad_norm": 0.3690943419933319, "learning_rate": 1.9280782317276493e-05, "loss": 0.4971, "step": 8936 }, { "epoch": 0.24538714991762767, "grad_norm": 0.46816757321357727, "learning_rate": 1.928062147787343e-05, "loss": 0.5058, "step": 8937 }, { "epoch": 0.2454146073585942, "grad_norm": 0.3572867512702942, "learning_rate": 1.928046062115905e-05, "loss": 0.5684, "step": 8938 }, { "epoch": 0.24544206479956068, "grad_norm": 0.3656623363494873, "learning_rate": 1.9280299747133644e-05, "loss": 0.5626, "step": 8939 }, { "epoch": 0.24546952224052718, "grad_norm": 0.34860438108444214, "learning_rate": 1.9280138855797518e-05, "loss": 0.5718, "step": 8940 }, { "epoch": 0.24549697968149367, "grad_norm": 0.35981255769729614, "learning_rate": 1.927997794715097e-05, "loss": 0.4679, "step": 8941 }, { "epoch": 0.2455244371224602, "grad_norm": 0.39586910605430603, "learning_rate": 1.9279817021194304e-05, "loss": 0.6404, "step": 8942 }, { "epoch": 0.2455518945634267, "grad_norm": 0.3561554551124573, "learning_rate": 1.9279656077927815e-05, "loss": 0.5013, "step": 8943 }, { "epoch": 0.24557935200439318, "grad_norm": 0.37397658824920654, "learning_rate": 1.927949511735181e-05, "loss": 0.5021, "step": 8944 }, { "epoch": 0.2456068094453597, "grad_norm": 0.34936854243278503, "learning_rate": 1.927933413946658e-05, "loss": 0.4787, "step": 8945 }, { "epoch": 0.2456342668863262, "grad_norm": 0.3514274060726166, "learning_rate": 1.9279173144272435e-05, "loss": 0.4472, "step": 8946 }, { "epoch": 0.2456617243272927, "grad_norm": 0.3639184236526489, "learning_rate": 1.927901213176967e-05, "loss": 0.5288, "step": 8947 }, { "epoch": 0.24568918176825918, "grad_norm": 0.36766672134399414, "learning_rate": 1.927885110195858e-05, "loss": 0.5239, "step": 8948 }, { "epoch": 0.2457166392092257, "grad_norm": 0.39966461062431335, "learning_rate": 1.9278690054839476e-05, "loss": 0.5708, "step": 8949 }, { "epoch": 0.2457440966501922, "grad_norm": 0.5669352412223816, "learning_rate": 1.9278528990412652e-05, "loss": 0.6326, "step": 8950 }, { "epoch": 0.2457715540911587, "grad_norm": 0.7845436930656433, "learning_rate": 1.9278367908678414e-05, "loss": 0.5685, "step": 8951 }, { "epoch": 0.24579901153212522, "grad_norm": 0.38865017890930176, "learning_rate": 1.9278206809637053e-05, "loss": 0.6086, "step": 8952 }, { "epoch": 0.2458264689730917, "grad_norm": 0.3581632673740387, "learning_rate": 1.927804569328888e-05, "loss": 0.5535, "step": 8953 }, { "epoch": 0.2458539264140582, "grad_norm": 0.3529069721698761, "learning_rate": 1.9277884559634184e-05, "loss": 0.4727, "step": 8954 }, { "epoch": 0.2458813838550247, "grad_norm": 0.40914320945739746, "learning_rate": 1.9277723408673278e-05, "loss": 0.6066, "step": 8955 }, { "epoch": 0.24590884129599122, "grad_norm": 0.3754177689552307, "learning_rate": 1.9277562240406452e-05, "loss": 0.4961, "step": 8956 }, { "epoch": 0.24593629873695771, "grad_norm": 0.3791096806526184, "learning_rate": 1.927740105483402e-05, "loss": 0.532, "step": 8957 }, { "epoch": 0.2459637561779242, "grad_norm": 0.3577145040035248, "learning_rate": 1.9277239851956264e-05, "loss": 0.5199, "step": 8958 }, { "epoch": 0.24599121361889073, "grad_norm": 0.3663995862007141, "learning_rate": 1.9277078631773502e-05, "loss": 0.5314, "step": 8959 }, { "epoch": 0.24601867105985722, "grad_norm": 0.38312116265296936, "learning_rate": 1.9276917394286022e-05, "loss": 0.5449, "step": 8960 }, { "epoch": 0.24604612850082372, "grad_norm": 0.36814674735069275, "learning_rate": 1.9276756139494134e-05, "loss": 0.5114, "step": 8961 }, { "epoch": 0.2460735859417902, "grad_norm": 0.33360207080841064, "learning_rate": 1.927659486739813e-05, "loss": 0.5612, "step": 8962 }, { "epoch": 0.24610104338275673, "grad_norm": 0.34528860449790955, "learning_rate": 1.927643357799832e-05, "loss": 0.5092, "step": 8963 }, { "epoch": 0.24612850082372323, "grad_norm": 0.4145629405975342, "learning_rate": 1.9276272271295e-05, "loss": 0.5454, "step": 8964 }, { "epoch": 0.24615595826468972, "grad_norm": 0.3989708423614502, "learning_rate": 1.9276110947288472e-05, "loss": 0.5243, "step": 8965 }, { "epoch": 0.24618341570565624, "grad_norm": 0.38464537262916565, "learning_rate": 1.9275949605979036e-05, "loss": 0.5368, "step": 8966 }, { "epoch": 0.24621087314662274, "grad_norm": 0.34941402077674866, "learning_rate": 1.9275788247366997e-05, "loss": 0.4657, "step": 8967 }, { "epoch": 0.24623833058758923, "grad_norm": 0.3891195058822632, "learning_rate": 1.927562687145265e-05, "loss": 0.5631, "step": 8968 }, { "epoch": 0.24626578802855573, "grad_norm": 0.426717072725296, "learning_rate": 1.9275465478236296e-05, "loss": 0.6311, "step": 8969 }, { "epoch": 0.24629324546952225, "grad_norm": 0.3893952965736389, "learning_rate": 1.927530406771824e-05, "loss": 0.5565, "step": 8970 }, { "epoch": 0.24632070291048874, "grad_norm": 0.3718827962875366, "learning_rate": 1.927514263989878e-05, "loss": 0.5062, "step": 8971 }, { "epoch": 0.24634816035145524, "grad_norm": 0.3970291316509247, "learning_rate": 1.927498119477822e-05, "loss": 0.4903, "step": 8972 }, { "epoch": 0.24637561779242176, "grad_norm": 0.3338545262813568, "learning_rate": 1.9274819732356862e-05, "loss": 0.4879, "step": 8973 }, { "epoch": 0.24640307523338825, "grad_norm": 0.3473609685897827, "learning_rate": 1.9274658252635002e-05, "loss": 0.551, "step": 8974 }, { "epoch": 0.24643053267435475, "grad_norm": 0.43231162428855896, "learning_rate": 1.9274496755612944e-05, "loss": 0.5739, "step": 8975 }, { "epoch": 0.24645799011532124, "grad_norm": 0.3601270318031311, "learning_rate": 1.927433524129099e-05, "loss": 0.5445, "step": 8976 }, { "epoch": 0.24648544755628776, "grad_norm": 0.3769501745700836, "learning_rate": 1.9274173709669443e-05, "loss": 0.4978, "step": 8977 }, { "epoch": 0.24651290499725426, "grad_norm": 0.36306682229042053, "learning_rate": 1.92740121607486e-05, "loss": 0.5059, "step": 8978 }, { "epoch": 0.24654036243822075, "grad_norm": 0.3761597275733948, "learning_rate": 1.927385059452877e-05, "loss": 0.5094, "step": 8979 }, { "epoch": 0.24656781987918727, "grad_norm": 0.3937368094921112, "learning_rate": 1.927368901101024e-05, "loss": 0.5333, "step": 8980 }, { "epoch": 0.24659527732015377, "grad_norm": 0.3871310353279114, "learning_rate": 1.9273527410193325e-05, "loss": 0.4599, "step": 8981 }, { "epoch": 0.24662273476112026, "grad_norm": 0.40378662943840027, "learning_rate": 1.927336579207832e-05, "loss": 0.5339, "step": 8982 }, { "epoch": 0.24665019220208675, "grad_norm": 0.4014345705509186, "learning_rate": 1.9273204156665528e-05, "loss": 0.5092, "step": 8983 }, { "epoch": 0.24667764964305328, "grad_norm": 0.76580810546875, "learning_rate": 1.9273042503955254e-05, "loss": 0.4853, "step": 8984 }, { "epoch": 0.24670510708401977, "grad_norm": 0.4220210909843445, "learning_rate": 1.927288083394779e-05, "loss": 0.5448, "step": 8985 }, { "epoch": 0.24673256452498626, "grad_norm": 0.39838936924934387, "learning_rate": 1.9272719146643448e-05, "loss": 0.5401, "step": 8986 }, { "epoch": 0.24676002196595279, "grad_norm": 0.583463191986084, "learning_rate": 1.9272557442042522e-05, "loss": 0.3948, "step": 8987 }, { "epoch": 0.24678747940691928, "grad_norm": 0.4246950149536133, "learning_rate": 1.927239572014532e-05, "loss": 0.5896, "step": 8988 }, { "epoch": 0.24681493684788577, "grad_norm": 0.3343854546546936, "learning_rate": 1.927223398095214e-05, "loss": 0.5284, "step": 8989 }, { "epoch": 0.24684239428885227, "grad_norm": 0.37356263399124146, "learning_rate": 1.9272072224463285e-05, "loss": 0.5533, "step": 8990 }, { "epoch": 0.2468698517298188, "grad_norm": 0.385189414024353, "learning_rate": 1.9271910450679057e-05, "loss": 0.57, "step": 8991 }, { "epoch": 0.24689730917078528, "grad_norm": 0.36856862902641296, "learning_rate": 1.9271748659599754e-05, "loss": 0.5755, "step": 8992 }, { "epoch": 0.24692476661175178, "grad_norm": 0.37300851941108704, "learning_rate": 1.927158685122568e-05, "loss": 0.617, "step": 8993 }, { "epoch": 0.2469522240527183, "grad_norm": 0.34357428550720215, "learning_rate": 1.927142502555714e-05, "loss": 0.4788, "step": 8994 }, { "epoch": 0.2469796814936848, "grad_norm": 0.31360888481140137, "learning_rate": 1.9271263182594433e-05, "loss": 0.5382, "step": 8995 }, { "epoch": 0.2470071389346513, "grad_norm": 0.4268900454044342, "learning_rate": 1.9271101322337858e-05, "loss": 0.5587, "step": 8996 }, { "epoch": 0.24703459637561778, "grad_norm": 0.3806796073913574, "learning_rate": 1.9270939444787723e-05, "loss": 0.631, "step": 8997 }, { "epoch": 0.2470620538165843, "grad_norm": 0.39507320523262024, "learning_rate": 1.9270777549944326e-05, "loss": 0.5204, "step": 8998 }, { "epoch": 0.2470895112575508, "grad_norm": 0.42569592595100403, "learning_rate": 1.927061563780797e-05, "loss": 0.459, "step": 8999 }, { "epoch": 0.2471169686985173, "grad_norm": 0.3510018289089203, "learning_rate": 1.927045370837896e-05, "loss": 0.499, "step": 9000 }, { "epoch": 0.2471444261394838, "grad_norm": 0.3838255703449249, "learning_rate": 1.9270291761657592e-05, "loss": 0.592, "step": 9001 }, { "epoch": 0.2471718835804503, "grad_norm": 0.4676567316055298, "learning_rate": 1.9270129797644167e-05, "loss": 0.5353, "step": 9002 }, { "epoch": 0.2471993410214168, "grad_norm": 0.33687251806259155, "learning_rate": 1.9269967816339e-05, "loss": 0.4403, "step": 9003 }, { "epoch": 0.2472267984623833, "grad_norm": 0.3682221472263336, "learning_rate": 1.926980581774238e-05, "loss": 0.5887, "step": 9004 }, { "epoch": 0.24725425590334982, "grad_norm": 0.4365924596786499, "learning_rate": 1.9269643801854612e-05, "loss": 0.5392, "step": 9005 }, { "epoch": 0.2472817133443163, "grad_norm": 0.36387577652931213, "learning_rate": 1.9269481768676003e-05, "loss": 0.6194, "step": 9006 }, { "epoch": 0.2473091707852828, "grad_norm": 0.33847740292549133, "learning_rate": 1.926931971820685e-05, "loss": 0.4756, "step": 9007 }, { "epoch": 0.2473366282262493, "grad_norm": 0.3706493675708771, "learning_rate": 1.9269157650447457e-05, "loss": 0.5391, "step": 9008 }, { "epoch": 0.24736408566721582, "grad_norm": 0.3875819742679596, "learning_rate": 1.926899556539813e-05, "loss": 0.547, "step": 9009 }, { "epoch": 0.24739154310818232, "grad_norm": 0.43154144287109375, "learning_rate": 1.926883346305916e-05, "loss": 0.5863, "step": 9010 }, { "epoch": 0.2474190005491488, "grad_norm": 0.39727526903152466, "learning_rate": 1.9268671343430865e-05, "loss": 0.5918, "step": 9011 }, { "epoch": 0.24744645799011533, "grad_norm": 0.40581876039505005, "learning_rate": 1.9268509206513536e-05, "loss": 0.5159, "step": 9012 }, { "epoch": 0.24747391543108183, "grad_norm": 0.3444885015487671, "learning_rate": 1.926834705230748e-05, "loss": 0.5097, "step": 9013 }, { "epoch": 0.24750137287204832, "grad_norm": 0.44204992055892944, "learning_rate": 1.9268184880813e-05, "loss": 0.5792, "step": 9014 }, { "epoch": 0.2475288303130148, "grad_norm": 0.3651072084903717, "learning_rate": 1.9268022692030396e-05, "loss": 0.57, "step": 9015 }, { "epoch": 0.24755628775398134, "grad_norm": 0.3853370249271393, "learning_rate": 1.9267860485959972e-05, "loss": 0.6042, "step": 9016 }, { "epoch": 0.24758374519494783, "grad_norm": 0.4136281907558441, "learning_rate": 1.926769826260203e-05, "loss": 0.5159, "step": 9017 }, { "epoch": 0.24761120263591432, "grad_norm": 0.3485356569290161, "learning_rate": 1.9267536021956873e-05, "loss": 0.5276, "step": 9018 }, { "epoch": 0.24763866007688085, "grad_norm": 0.38352078199386597, "learning_rate": 1.9267373764024803e-05, "loss": 0.5688, "step": 9019 }, { "epoch": 0.24766611751784734, "grad_norm": 0.4677684009075165, "learning_rate": 1.9267211488806126e-05, "loss": 0.4172, "step": 9020 }, { "epoch": 0.24769357495881383, "grad_norm": 0.40758442878723145, "learning_rate": 1.9267049196301137e-05, "loss": 0.4897, "step": 9021 }, { "epoch": 0.24772103239978033, "grad_norm": 0.3817583918571472, "learning_rate": 1.9266886886510146e-05, "loss": 0.4406, "step": 9022 }, { "epoch": 0.24774848984074685, "grad_norm": 0.4267972409725189, "learning_rate": 1.9266724559433453e-05, "loss": 0.5551, "step": 9023 }, { "epoch": 0.24777594728171334, "grad_norm": 0.37184616923332214, "learning_rate": 1.9266562215071364e-05, "loss": 0.5566, "step": 9024 }, { "epoch": 0.24780340472267984, "grad_norm": 0.33697089552879333, "learning_rate": 1.9266399853424173e-05, "loss": 0.4411, "step": 9025 }, { "epoch": 0.24783086216364636, "grad_norm": 0.4017350375652313, "learning_rate": 1.9266237474492194e-05, "loss": 0.5092, "step": 9026 }, { "epoch": 0.24785831960461285, "grad_norm": 0.337526798248291, "learning_rate": 1.9266075078275724e-05, "loss": 0.5091, "step": 9027 }, { "epoch": 0.24788577704557935, "grad_norm": 0.35568729043006897, "learning_rate": 1.9265912664775063e-05, "loss": 0.5595, "step": 9028 }, { "epoch": 0.24791323448654584, "grad_norm": 0.39410164952278137, "learning_rate": 1.926575023399052e-05, "loss": 0.5127, "step": 9029 }, { "epoch": 0.24794069192751236, "grad_norm": 0.4824616312980652, "learning_rate": 1.92655877859224e-05, "loss": 0.5496, "step": 9030 }, { "epoch": 0.24796814936847886, "grad_norm": 0.37118831276893616, "learning_rate": 1.9265425320570995e-05, "loss": 0.3518, "step": 9031 }, { "epoch": 0.24799560680944535, "grad_norm": 0.36401259899139404, "learning_rate": 1.9265262837936616e-05, "loss": 0.5597, "step": 9032 }, { "epoch": 0.24802306425041187, "grad_norm": 0.376029908657074, "learning_rate": 1.9265100338019563e-05, "loss": 0.6042, "step": 9033 }, { "epoch": 0.24805052169137837, "grad_norm": 0.3486684262752533, "learning_rate": 1.926493782082014e-05, "loss": 0.4698, "step": 9034 }, { "epoch": 0.24807797913234486, "grad_norm": 0.3893037438392639, "learning_rate": 1.9264775286338658e-05, "loss": 0.5891, "step": 9035 }, { "epoch": 0.24810543657331136, "grad_norm": 0.3350045084953308, "learning_rate": 1.926461273457541e-05, "loss": 0.4763, "step": 9036 }, { "epoch": 0.24813289401427788, "grad_norm": 0.3900049924850464, "learning_rate": 1.92644501655307e-05, "loss": 0.4506, "step": 9037 }, { "epoch": 0.24816035145524437, "grad_norm": 0.3707972764968872, "learning_rate": 1.9264287579204835e-05, "loss": 0.514, "step": 9038 }, { "epoch": 0.24818780889621087, "grad_norm": 0.3202662765979767, "learning_rate": 1.926412497559812e-05, "loss": 0.551, "step": 9039 }, { "epoch": 0.2482152663371774, "grad_norm": 0.3974660038948059, "learning_rate": 1.926396235471085e-05, "loss": 0.6303, "step": 9040 }, { "epoch": 0.24824272377814388, "grad_norm": 0.33950406312942505, "learning_rate": 1.9263799716543335e-05, "loss": 0.5168, "step": 9041 }, { "epoch": 0.24827018121911038, "grad_norm": 0.42187732458114624, "learning_rate": 1.9263637061095878e-05, "loss": 0.5361, "step": 9042 }, { "epoch": 0.24829763866007687, "grad_norm": 0.36966729164123535, "learning_rate": 1.926347438836878e-05, "loss": 0.5227, "step": 9043 }, { "epoch": 0.2483250961010434, "grad_norm": 0.40615132451057434, "learning_rate": 1.9263311698362347e-05, "loss": 0.5481, "step": 9044 }, { "epoch": 0.24835255354200989, "grad_norm": 0.3797815442085266, "learning_rate": 1.926314899107688e-05, "loss": 0.5094, "step": 9045 }, { "epoch": 0.24838001098297638, "grad_norm": 0.35387295484542847, "learning_rate": 1.9262986266512683e-05, "loss": 0.6453, "step": 9046 }, { "epoch": 0.2484074684239429, "grad_norm": 0.3629722595214844, "learning_rate": 1.926282352467006e-05, "loss": 0.551, "step": 9047 }, { "epoch": 0.2484349258649094, "grad_norm": 0.363050639629364, "learning_rate": 1.9262660765549318e-05, "loss": 0.5453, "step": 9048 }, { "epoch": 0.2484623833058759, "grad_norm": 0.386135995388031, "learning_rate": 1.9262497989150753e-05, "loss": 0.5826, "step": 9049 }, { "epoch": 0.24848984074684238, "grad_norm": 0.3985317647457123, "learning_rate": 1.9262335195474678e-05, "loss": 0.5664, "step": 9050 }, { "epoch": 0.2485172981878089, "grad_norm": 0.33118829131126404, "learning_rate": 1.926217238452139e-05, "loss": 0.5278, "step": 9051 }, { "epoch": 0.2485447556287754, "grad_norm": 0.3617837727069855, "learning_rate": 1.9262009556291193e-05, "loss": 0.4041, "step": 9052 }, { "epoch": 0.2485722130697419, "grad_norm": 0.3425613045692444, "learning_rate": 1.9261846710784393e-05, "loss": 0.5787, "step": 9053 }, { "epoch": 0.24859967051070841, "grad_norm": 0.38179564476013184, "learning_rate": 1.926168384800129e-05, "loss": 0.527, "step": 9054 }, { "epoch": 0.2486271279516749, "grad_norm": 0.35539910197257996, "learning_rate": 1.9261520967942194e-05, "loss": 0.4829, "step": 9055 }, { "epoch": 0.2486545853926414, "grad_norm": 0.390514612197876, "learning_rate": 1.9261358070607406e-05, "loss": 0.4971, "step": 9056 }, { "epoch": 0.2486820428336079, "grad_norm": 0.37187421321868896, "learning_rate": 1.926119515599723e-05, "loss": 0.5342, "step": 9057 }, { "epoch": 0.24870950027457442, "grad_norm": 0.36795029044151306, "learning_rate": 1.9261032224111963e-05, "loss": 0.5298, "step": 9058 }, { "epoch": 0.2487369577155409, "grad_norm": 0.38844868540763855, "learning_rate": 1.9260869274951922e-05, "loss": 0.5267, "step": 9059 }, { "epoch": 0.2487644151565074, "grad_norm": 0.3975600600242615, "learning_rate": 1.9260706308517402e-05, "loss": 0.527, "step": 9060 }, { "epoch": 0.24879187259747393, "grad_norm": 0.4517129361629486, "learning_rate": 1.9260543324808706e-05, "loss": 0.4864, "step": 9061 }, { "epoch": 0.24881933003844042, "grad_norm": 0.3742983341217041, "learning_rate": 1.9260380323826145e-05, "loss": 0.6081, "step": 9062 }, { "epoch": 0.24884678747940692, "grad_norm": 0.35753780603408813, "learning_rate": 1.9260217305570018e-05, "loss": 0.5292, "step": 9063 }, { "epoch": 0.2488742449203734, "grad_norm": 0.3447599709033966, "learning_rate": 1.926005427004063e-05, "loss": 0.5304, "step": 9064 }, { "epoch": 0.24890170236133993, "grad_norm": 0.3454020023345947, "learning_rate": 1.9259891217238283e-05, "loss": 0.4844, "step": 9065 }, { "epoch": 0.24892915980230643, "grad_norm": 0.4065542221069336, "learning_rate": 1.9259728147163285e-05, "loss": 0.5627, "step": 9066 }, { "epoch": 0.24895661724327292, "grad_norm": 0.3810174763202667, "learning_rate": 1.9259565059815937e-05, "loss": 0.5858, "step": 9067 }, { "epoch": 0.24898407468423944, "grad_norm": 0.35013991594314575, "learning_rate": 1.925940195519655e-05, "loss": 0.4389, "step": 9068 }, { "epoch": 0.24901153212520594, "grad_norm": 0.34617388248443604, "learning_rate": 1.925923883330542e-05, "loss": 0.4939, "step": 9069 }, { "epoch": 0.24903898956617243, "grad_norm": 0.3777387738227844, "learning_rate": 1.9259075694142855e-05, "loss": 0.5673, "step": 9070 }, { "epoch": 0.24906644700713892, "grad_norm": 0.5668032169342041, "learning_rate": 1.925891253770916e-05, "loss": 0.5416, "step": 9071 }, { "epoch": 0.24909390444810545, "grad_norm": 0.33152204751968384, "learning_rate": 1.9258749364004633e-05, "loss": 0.5333, "step": 9072 }, { "epoch": 0.24912136188907194, "grad_norm": 0.3562043011188507, "learning_rate": 1.9258586173029587e-05, "loss": 0.5659, "step": 9073 }, { "epoch": 0.24914881933003843, "grad_norm": 0.36649617552757263, "learning_rate": 1.9258422964784325e-05, "loss": 0.5329, "step": 9074 }, { "epoch": 0.24917627677100493, "grad_norm": 0.35291269421577454, "learning_rate": 1.9258259739269146e-05, "loss": 0.4792, "step": 9075 }, { "epoch": 0.24920373421197145, "grad_norm": 0.3677527904510498, "learning_rate": 1.9258096496484357e-05, "loss": 0.5322, "step": 9076 }, { "epoch": 0.24923119165293794, "grad_norm": 0.3584049642086029, "learning_rate": 1.9257933236430265e-05, "loss": 0.44, "step": 9077 }, { "epoch": 0.24925864909390444, "grad_norm": 0.36686909198760986, "learning_rate": 1.9257769959107172e-05, "loss": 0.5843, "step": 9078 }, { "epoch": 0.24928610653487096, "grad_norm": 0.3654935657978058, "learning_rate": 1.9257606664515385e-05, "loss": 0.5871, "step": 9079 }, { "epoch": 0.24931356397583745, "grad_norm": 0.361832857131958, "learning_rate": 1.9257443352655202e-05, "loss": 0.5555, "step": 9080 }, { "epoch": 0.24934102141680395, "grad_norm": 0.346492201089859, "learning_rate": 1.9257280023526938e-05, "loss": 0.5168, "step": 9081 }, { "epoch": 0.24936847885777044, "grad_norm": 0.4902135729789734, "learning_rate": 1.925711667713089e-05, "loss": 0.5912, "step": 9082 }, { "epoch": 0.24939593629873696, "grad_norm": 0.37343859672546387, "learning_rate": 1.9256953313467365e-05, "loss": 0.4454, "step": 9083 }, { "epoch": 0.24942339373970346, "grad_norm": 0.3479512929916382, "learning_rate": 1.9256789932536666e-05, "loss": 0.5548, "step": 9084 }, { "epoch": 0.24945085118066995, "grad_norm": 0.3330829441547394, "learning_rate": 1.9256626534339102e-05, "loss": 0.4285, "step": 9085 }, { "epoch": 0.24947830862163647, "grad_norm": 0.3404856324195862, "learning_rate": 1.9256463118874972e-05, "loss": 0.5112, "step": 9086 }, { "epoch": 0.24950576606260297, "grad_norm": 0.3757345378398895, "learning_rate": 1.925629968614459e-05, "loss": 0.4858, "step": 9087 }, { "epoch": 0.24953322350356946, "grad_norm": 0.3925851583480835, "learning_rate": 1.9256136236148248e-05, "loss": 0.5326, "step": 9088 }, { "epoch": 0.24956068094453596, "grad_norm": 0.3609011769294739, "learning_rate": 1.9255972768886264e-05, "loss": 0.5085, "step": 9089 }, { "epoch": 0.24958813838550248, "grad_norm": 0.36159613728523254, "learning_rate": 1.925580928435893e-05, "loss": 0.4564, "step": 9090 }, { "epoch": 0.24961559582646897, "grad_norm": 0.40588635206222534, "learning_rate": 1.925564578256656e-05, "loss": 0.5392, "step": 9091 }, { "epoch": 0.24964305326743547, "grad_norm": 0.37397056818008423, "learning_rate": 1.9255482263509456e-05, "loss": 0.5158, "step": 9092 }, { "epoch": 0.249670510708402, "grad_norm": 0.33622536063194275, "learning_rate": 1.9255318727187927e-05, "loss": 0.4883, "step": 9093 }, { "epoch": 0.24969796814936848, "grad_norm": 0.3653191924095154, "learning_rate": 1.925515517360227e-05, "loss": 0.5091, "step": 9094 }, { "epoch": 0.24972542559033498, "grad_norm": 0.4347001612186432, "learning_rate": 1.9254991602752802e-05, "loss": 0.5127, "step": 9095 }, { "epoch": 0.24975288303130147, "grad_norm": 0.3373603820800781, "learning_rate": 1.9254828014639813e-05, "loss": 0.5666, "step": 9096 }, { "epoch": 0.249780340472268, "grad_norm": 0.35183706879615784, "learning_rate": 1.9254664409263618e-05, "loss": 0.5874, "step": 9097 }, { "epoch": 0.2498077979132345, "grad_norm": 0.3919627368450165, "learning_rate": 1.9254500786624523e-05, "loss": 0.5808, "step": 9098 }, { "epoch": 0.24983525535420098, "grad_norm": 0.3264128863811493, "learning_rate": 1.9254337146722828e-05, "loss": 0.5347, "step": 9099 }, { "epoch": 0.2498627127951675, "grad_norm": 0.35240277647972107, "learning_rate": 1.9254173489558842e-05, "loss": 0.5619, "step": 9100 }, { "epoch": 0.249890170236134, "grad_norm": 0.3960556387901306, "learning_rate": 1.9254009815132867e-05, "loss": 0.5311, "step": 9101 }, { "epoch": 0.2499176276771005, "grad_norm": 0.3834902048110962, "learning_rate": 1.9253846123445214e-05, "loss": 0.5991, "step": 9102 }, { "epoch": 0.24994508511806698, "grad_norm": 0.3542090952396393, "learning_rate": 1.9253682414496178e-05, "loss": 0.5842, "step": 9103 }, { "epoch": 0.2499725425590335, "grad_norm": 0.3413252830505371, "learning_rate": 1.9253518688286075e-05, "loss": 0.5371, "step": 9104 }, { "epoch": 0.25, "grad_norm": 0.4094061255455017, "learning_rate": 1.9253354944815205e-05, "loss": 0.5411, "step": 9105 }, { "epoch": 0.2500274574409665, "grad_norm": 0.35323366522789, "learning_rate": 1.9253191184083877e-05, "loss": 0.5061, "step": 9106 }, { "epoch": 0.250054914881933, "grad_norm": 0.3559894263744354, "learning_rate": 1.9253027406092393e-05, "loss": 0.6088, "step": 9107 }, { "epoch": 0.2500823723228995, "grad_norm": 0.3637772500514984, "learning_rate": 1.9252863610841058e-05, "loss": 0.4797, "step": 9108 }, { "epoch": 0.25010982976386603, "grad_norm": 0.39542654156684875, "learning_rate": 1.9252699798330183e-05, "loss": 0.5604, "step": 9109 }, { "epoch": 0.2501372872048325, "grad_norm": 0.3327066898345947, "learning_rate": 1.9252535968560066e-05, "loss": 0.5078, "step": 9110 }, { "epoch": 0.250164744645799, "grad_norm": 0.3499424457550049, "learning_rate": 1.9252372121531016e-05, "loss": 0.5564, "step": 9111 }, { "epoch": 0.2501922020867655, "grad_norm": 0.34591567516326904, "learning_rate": 1.9252208257243337e-05, "loss": 0.5128, "step": 9112 }, { "epoch": 0.250219659527732, "grad_norm": 0.34285977482795715, "learning_rate": 1.9252044375697342e-05, "loss": 0.5446, "step": 9113 }, { "epoch": 0.2502471169686985, "grad_norm": 0.3922332525253296, "learning_rate": 1.925188047689333e-05, "loss": 0.5419, "step": 9114 }, { "epoch": 0.250274574409665, "grad_norm": 0.37338587641716003, "learning_rate": 1.9251716560831608e-05, "loss": 0.5767, "step": 9115 }, { "epoch": 0.25030203185063155, "grad_norm": 0.39025434851646423, "learning_rate": 1.9251552627512483e-05, "loss": 0.5276, "step": 9116 }, { "epoch": 0.25032948929159804, "grad_norm": 0.4520682990550995, "learning_rate": 1.9251388676936257e-05, "loss": 0.473, "step": 9117 }, { "epoch": 0.25035694673256453, "grad_norm": 0.4202898442745209, "learning_rate": 1.925122470910324e-05, "loss": 0.6, "step": 9118 }, { "epoch": 0.250384404173531, "grad_norm": 0.385065495967865, "learning_rate": 1.9251060724013734e-05, "loss": 0.4979, "step": 9119 }, { "epoch": 0.2504118616144975, "grad_norm": 0.3776172995567322, "learning_rate": 1.9250896721668047e-05, "loss": 0.5492, "step": 9120 }, { "epoch": 0.250439319055464, "grad_norm": 0.33749276399612427, "learning_rate": 1.9250732702066487e-05, "loss": 0.572, "step": 9121 }, { "epoch": 0.2504667764964305, "grad_norm": 0.3705171048641205, "learning_rate": 1.925056866520936e-05, "loss": 0.4921, "step": 9122 }, { "epoch": 0.25049423393739706, "grad_norm": 0.4013906717300415, "learning_rate": 1.9250404611096967e-05, "loss": 0.5236, "step": 9123 }, { "epoch": 0.25052169137836355, "grad_norm": 0.33239617943763733, "learning_rate": 1.9250240539729618e-05, "loss": 0.4815, "step": 9124 }, { "epoch": 0.25054914881933005, "grad_norm": 0.35447174310684204, "learning_rate": 1.9250076451107618e-05, "loss": 0.5196, "step": 9125 }, { "epoch": 0.25057660626029654, "grad_norm": 0.36241912841796875, "learning_rate": 1.9249912345231273e-05, "loss": 0.5522, "step": 9126 }, { "epoch": 0.25060406370126304, "grad_norm": 0.3687591254711151, "learning_rate": 1.924974822210089e-05, "loss": 0.5475, "step": 9127 }, { "epoch": 0.25063152114222953, "grad_norm": 0.378627210855484, "learning_rate": 1.924958408171677e-05, "loss": 0.5797, "step": 9128 }, { "epoch": 0.250658978583196, "grad_norm": 0.4018658399581909, "learning_rate": 1.9249419924079228e-05, "loss": 0.5348, "step": 9129 }, { "epoch": 0.2506864360241626, "grad_norm": 0.39601489901542664, "learning_rate": 1.9249255749188566e-05, "loss": 0.597, "step": 9130 }, { "epoch": 0.25071389346512907, "grad_norm": 0.36341652274131775, "learning_rate": 1.924909155704509e-05, "loss": 0.5714, "step": 9131 }, { "epoch": 0.25074135090609556, "grad_norm": 0.4147484302520752, "learning_rate": 1.9248927347649103e-05, "loss": 0.4959, "step": 9132 }, { "epoch": 0.25076880834706206, "grad_norm": 0.32254934310913086, "learning_rate": 1.924876312100092e-05, "loss": 0.5, "step": 9133 }, { "epoch": 0.25079626578802855, "grad_norm": 0.4903514087200165, "learning_rate": 1.9248598877100837e-05, "loss": 0.5348, "step": 9134 }, { "epoch": 0.25082372322899504, "grad_norm": 0.3636152744293213, "learning_rate": 1.924843461594917e-05, "loss": 0.5581, "step": 9135 }, { "epoch": 0.25085118066996154, "grad_norm": 0.3985777497291565, "learning_rate": 1.9248270337546215e-05, "loss": 0.445, "step": 9136 }, { "epoch": 0.2508786381109281, "grad_norm": 0.38671907782554626, "learning_rate": 1.9248106041892287e-05, "loss": 0.5526, "step": 9137 }, { "epoch": 0.2509060955518946, "grad_norm": 0.5352598428726196, "learning_rate": 1.924794172898769e-05, "loss": 0.5271, "step": 9138 }, { "epoch": 0.2509335529928611, "grad_norm": 0.3920799195766449, "learning_rate": 1.9247777398832727e-05, "loss": 0.5585, "step": 9139 }, { "epoch": 0.25096101043382757, "grad_norm": 0.36504101753234863, "learning_rate": 1.924761305142771e-05, "loss": 0.5137, "step": 9140 }, { "epoch": 0.25098846787479406, "grad_norm": 0.36053213477134705, "learning_rate": 1.9247448686772944e-05, "loss": 0.5174, "step": 9141 }, { "epoch": 0.25101592531576056, "grad_norm": 0.3781697154045105, "learning_rate": 1.9247284304868734e-05, "loss": 0.5912, "step": 9142 }, { "epoch": 0.25104338275672705, "grad_norm": 0.5993843078613281, "learning_rate": 1.924711990571539e-05, "loss": 0.63, "step": 9143 }, { "epoch": 0.2510708401976936, "grad_norm": 0.34873393177986145, "learning_rate": 1.924695548931321e-05, "loss": 0.498, "step": 9144 }, { "epoch": 0.2510982976386601, "grad_norm": 0.42013177275657654, "learning_rate": 1.924679105566251e-05, "loss": 0.5858, "step": 9145 }, { "epoch": 0.2511257550796266, "grad_norm": 0.3837541937828064, "learning_rate": 1.9246626604763595e-05, "loss": 0.5561, "step": 9146 }, { "epoch": 0.2511532125205931, "grad_norm": 0.35581403970718384, "learning_rate": 1.924646213661677e-05, "loss": 0.4361, "step": 9147 }, { "epoch": 0.2511806699615596, "grad_norm": 0.35335561633110046, "learning_rate": 1.924629765122234e-05, "loss": 0.514, "step": 9148 }, { "epoch": 0.25120812740252607, "grad_norm": 0.3804866373538971, "learning_rate": 1.9246133148580616e-05, "loss": 0.5291, "step": 9149 }, { "epoch": 0.25123558484349257, "grad_norm": 0.6132171750068665, "learning_rate": 1.92459686286919e-05, "loss": 0.5025, "step": 9150 }, { "epoch": 0.2512630422844591, "grad_norm": 0.43740227818489075, "learning_rate": 1.92458040915565e-05, "loss": 0.4598, "step": 9151 }, { "epoch": 0.2512904997254256, "grad_norm": 0.3644731938838959, "learning_rate": 1.924563953717473e-05, "loss": 0.531, "step": 9152 }, { "epoch": 0.2513179571663921, "grad_norm": 0.3786477744579315, "learning_rate": 1.924547496554689e-05, "loss": 0.5158, "step": 9153 }, { "epoch": 0.2513454146073586, "grad_norm": 0.37483397126197815, "learning_rate": 1.9245310376673286e-05, "loss": 0.5406, "step": 9154 }, { "epoch": 0.2513728720483251, "grad_norm": 0.33854225277900696, "learning_rate": 1.9245145770554227e-05, "loss": 0.4676, "step": 9155 }, { "epoch": 0.2514003294892916, "grad_norm": 0.3552713096141815, "learning_rate": 1.9244981147190024e-05, "loss": 0.464, "step": 9156 }, { "epoch": 0.2514277869302581, "grad_norm": 1.5122889280319214, "learning_rate": 1.924481650658098e-05, "loss": 0.5131, "step": 9157 }, { "epoch": 0.25145524437122463, "grad_norm": 0.3461609482765198, "learning_rate": 1.9244651848727398e-05, "loss": 0.5404, "step": 9158 }, { "epoch": 0.2514827018121911, "grad_norm": 0.34808599948883057, "learning_rate": 1.9244487173629594e-05, "loss": 0.5084, "step": 9159 }, { "epoch": 0.2515101592531576, "grad_norm": 0.3515630066394806, "learning_rate": 1.924432248128787e-05, "loss": 0.4892, "step": 9160 }, { "epoch": 0.2515376166941241, "grad_norm": 0.3981937766075134, "learning_rate": 1.9244157771702533e-05, "loss": 0.5492, "step": 9161 }, { "epoch": 0.2515650741350906, "grad_norm": 0.35601192712783813, "learning_rate": 1.924399304487389e-05, "loss": 0.4734, "step": 9162 }, { "epoch": 0.2515925315760571, "grad_norm": 0.40008625388145447, "learning_rate": 1.924382830080225e-05, "loss": 0.5631, "step": 9163 }, { "epoch": 0.2516199890170236, "grad_norm": 0.340348482131958, "learning_rate": 1.9243663539487924e-05, "loss": 0.544, "step": 9164 }, { "epoch": 0.25164744645799014, "grad_norm": 0.4126564562320709, "learning_rate": 1.9243498760931216e-05, "loss": 0.6353, "step": 9165 }, { "epoch": 0.25167490389895664, "grad_norm": 0.3793872892856598, "learning_rate": 1.9243333965132426e-05, "loss": 0.6266, "step": 9166 }, { "epoch": 0.25170236133992313, "grad_norm": 0.3849842846393585, "learning_rate": 1.9243169152091875e-05, "loss": 0.5569, "step": 9167 }, { "epoch": 0.2517298187808896, "grad_norm": 0.3575260043144226, "learning_rate": 1.924300432180986e-05, "loss": 0.521, "step": 9168 }, { "epoch": 0.2517572762218561, "grad_norm": 0.3284849524497986, "learning_rate": 1.9242839474286688e-05, "loss": 0.5083, "step": 9169 }, { "epoch": 0.2517847336628226, "grad_norm": 0.4159833490848541, "learning_rate": 1.9242674609522676e-05, "loss": 0.5428, "step": 9170 }, { "epoch": 0.2518121911037891, "grad_norm": 0.3966629207134247, "learning_rate": 1.9242509727518125e-05, "loss": 0.5851, "step": 9171 }, { "epoch": 0.2518396485447556, "grad_norm": 0.4097462296485901, "learning_rate": 1.9242344828273343e-05, "loss": 0.5266, "step": 9172 }, { "epoch": 0.25186710598572215, "grad_norm": 0.35925450921058655, "learning_rate": 1.9242179911788636e-05, "loss": 0.504, "step": 9173 }, { "epoch": 0.25189456342668864, "grad_norm": 0.35132086277008057, "learning_rate": 1.9242014978064317e-05, "loss": 0.4713, "step": 9174 }, { "epoch": 0.25192202086765514, "grad_norm": 0.35621213912963867, "learning_rate": 1.9241850027100686e-05, "loss": 0.5747, "step": 9175 }, { "epoch": 0.25194947830862163, "grad_norm": 0.39318573474884033, "learning_rate": 1.924168505889806e-05, "loss": 0.5541, "step": 9176 }, { "epoch": 0.2519769357495881, "grad_norm": 0.3449374735355377, "learning_rate": 1.924152007345674e-05, "loss": 0.5397, "step": 9177 }, { "epoch": 0.2520043931905546, "grad_norm": 0.4337042272090912, "learning_rate": 1.924135507077704e-05, "loss": 0.618, "step": 9178 }, { "epoch": 0.2520318506315211, "grad_norm": 0.3307179808616638, "learning_rate": 1.9241190050859254e-05, "loss": 0.4957, "step": 9179 }, { "epoch": 0.25205930807248766, "grad_norm": 0.3720493018627167, "learning_rate": 1.92410250137037e-05, "loss": 0.5049, "step": 9180 }, { "epoch": 0.25208676551345416, "grad_norm": 0.3644431531429291, "learning_rate": 1.924085995931069e-05, "loss": 0.627, "step": 9181 }, { "epoch": 0.25211422295442065, "grad_norm": 0.35852593183517456, "learning_rate": 1.9240694887680527e-05, "loss": 0.5763, "step": 9182 }, { "epoch": 0.25214168039538715, "grad_norm": 0.35912227630615234, "learning_rate": 1.924052979881352e-05, "loss": 0.5236, "step": 9183 }, { "epoch": 0.25216913783635364, "grad_norm": 0.4230536222457886, "learning_rate": 1.924036469270997e-05, "loss": 0.4853, "step": 9184 }, { "epoch": 0.25219659527732013, "grad_norm": 0.9716504216194153, "learning_rate": 1.9240199569370194e-05, "loss": 0.5131, "step": 9185 }, { "epoch": 0.25222405271828663, "grad_norm": 0.3638480603694916, "learning_rate": 1.9240034428794497e-05, "loss": 0.4831, "step": 9186 }, { "epoch": 0.2522515101592532, "grad_norm": 0.34507960081100464, "learning_rate": 1.9239869270983184e-05, "loss": 0.4442, "step": 9187 }, { "epoch": 0.2522789676002197, "grad_norm": 0.42061883211135864, "learning_rate": 1.923970409593657e-05, "loss": 0.5762, "step": 9188 }, { "epoch": 0.25230642504118617, "grad_norm": 0.4057043194770813, "learning_rate": 1.9239538903654957e-05, "loss": 0.596, "step": 9189 }, { "epoch": 0.25233388248215266, "grad_norm": 0.3289967179298401, "learning_rate": 1.9239373694138656e-05, "loss": 0.4932, "step": 9190 }, { "epoch": 0.25236133992311915, "grad_norm": 0.3397117853164673, "learning_rate": 1.923920846738797e-05, "loss": 0.4928, "step": 9191 }, { "epoch": 0.25238879736408565, "grad_norm": 0.4395521283149719, "learning_rate": 1.9239043223403216e-05, "loss": 0.5614, "step": 9192 }, { "epoch": 0.25241625480505214, "grad_norm": 0.3447589874267578, "learning_rate": 1.9238877962184695e-05, "loss": 0.5056, "step": 9193 }, { "epoch": 0.2524437122460187, "grad_norm": 0.38349661231040955, "learning_rate": 1.923871268373272e-05, "loss": 0.5388, "step": 9194 }, { "epoch": 0.2524711696869852, "grad_norm": 0.3629039227962494, "learning_rate": 1.9238547388047593e-05, "loss": 0.5468, "step": 9195 }, { "epoch": 0.2524986271279517, "grad_norm": 0.40116602182388306, "learning_rate": 1.923838207512963e-05, "loss": 0.5782, "step": 9196 }, { "epoch": 0.2525260845689182, "grad_norm": 0.3317120671272278, "learning_rate": 1.9238216744979133e-05, "loss": 0.5139, "step": 9197 }, { "epoch": 0.25255354200988467, "grad_norm": 0.3759322762489319, "learning_rate": 1.923805139759642e-05, "loss": 0.4666, "step": 9198 }, { "epoch": 0.25258099945085116, "grad_norm": 0.3652150630950928, "learning_rate": 1.923788603298179e-05, "loss": 0.489, "step": 9199 }, { "epoch": 0.25260845689181766, "grad_norm": 0.36475786566734314, "learning_rate": 1.923772065113555e-05, "loss": 0.5617, "step": 9200 }, { "epoch": 0.2526359143327842, "grad_norm": 0.36925405263900757, "learning_rate": 1.9237555252058015e-05, "loss": 0.452, "step": 9201 }, { "epoch": 0.2526633717737507, "grad_norm": 0.34392330050468445, "learning_rate": 1.923738983574949e-05, "loss": 0.4494, "step": 9202 }, { "epoch": 0.2526908292147172, "grad_norm": 0.39312809705734253, "learning_rate": 1.923722440221029e-05, "loss": 0.4405, "step": 9203 }, { "epoch": 0.2527182866556837, "grad_norm": 0.31688985228538513, "learning_rate": 1.923705895144071e-05, "loss": 0.4994, "step": 9204 }, { "epoch": 0.2527457440966502, "grad_norm": 0.35526227951049805, "learning_rate": 1.9236893483441073e-05, "loss": 0.4939, "step": 9205 }, { "epoch": 0.2527732015376167, "grad_norm": 0.3921359181404114, "learning_rate": 1.9236727998211678e-05, "loss": 0.5171, "step": 9206 }, { "epoch": 0.25280065897858317, "grad_norm": 0.3670036494731903, "learning_rate": 1.923656249575284e-05, "loss": 0.6245, "step": 9207 }, { "epoch": 0.2528281164195497, "grad_norm": 0.3445891737937927, "learning_rate": 1.9236396976064868e-05, "loss": 0.5461, "step": 9208 }, { "epoch": 0.2528555738605162, "grad_norm": 0.34589987993240356, "learning_rate": 1.9236231439148062e-05, "loss": 0.453, "step": 9209 }, { "epoch": 0.2528830313014827, "grad_norm": 0.42586272954940796, "learning_rate": 1.9236065885002735e-05, "loss": 0.5655, "step": 9210 }, { "epoch": 0.2529104887424492, "grad_norm": 0.3948447108268738, "learning_rate": 1.9235900313629202e-05, "loss": 0.604, "step": 9211 }, { "epoch": 0.2529379461834157, "grad_norm": 0.3970859944820404, "learning_rate": 1.9235734725027767e-05, "loss": 0.6507, "step": 9212 }, { "epoch": 0.2529654036243822, "grad_norm": 0.3473385274410248, "learning_rate": 1.9235569119198736e-05, "loss": 0.4937, "step": 9213 }, { "epoch": 0.2529928610653487, "grad_norm": 0.3550540506839752, "learning_rate": 1.923540349614242e-05, "loss": 0.509, "step": 9214 }, { "epoch": 0.25302031850631523, "grad_norm": 0.38992270827293396, "learning_rate": 1.9235237855859135e-05, "loss": 0.4707, "step": 9215 }, { "epoch": 0.25304777594728173, "grad_norm": 0.3983781337738037, "learning_rate": 1.9235072198349182e-05, "loss": 0.5918, "step": 9216 }, { "epoch": 0.2530752333882482, "grad_norm": 0.3403855860233307, "learning_rate": 1.923490652361287e-05, "loss": 0.5399, "step": 9217 }, { "epoch": 0.2531026908292147, "grad_norm": 0.3685276210308075, "learning_rate": 1.923474083165051e-05, "loss": 0.5242, "step": 9218 }, { "epoch": 0.2531301482701812, "grad_norm": 0.38868582248687744, "learning_rate": 1.923457512246241e-05, "loss": 0.5678, "step": 9219 }, { "epoch": 0.2531576057111477, "grad_norm": 0.42382553219795227, "learning_rate": 1.923440939604888e-05, "loss": 0.6371, "step": 9220 }, { "epoch": 0.2531850631521142, "grad_norm": 0.34444892406463623, "learning_rate": 1.923424365241023e-05, "loss": 0.5487, "step": 9221 }, { "epoch": 0.25321252059308075, "grad_norm": 0.4879078269004822, "learning_rate": 1.9234077891546768e-05, "loss": 0.5373, "step": 9222 }, { "epoch": 0.25323997803404724, "grad_norm": 0.8280101418495178, "learning_rate": 1.9233912113458805e-05, "loss": 0.4845, "step": 9223 }, { "epoch": 0.25326743547501374, "grad_norm": 0.6367643475532532, "learning_rate": 1.923374631814665e-05, "loss": 0.6264, "step": 9224 }, { "epoch": 0.25329489291598023, "grad_norm": 0.34076905250549316, "learning_rate": 1.923358050561061e-05, "loss": 0.512, "step": 9225 }, { "epoch": 0.2533223503569467, "grad_norm": 0.32174184918403625, "learning_rate": 1.9233414675850993e-05, "loss": 0.4263, "step": 9226 }, { "epoch": 0.2533498077979132, "grad_norm": 0.5080952644348145, "learning_rate": 1.9233248828868114e-05, "loss": 0.5216, "step": 9227 }, { "epoch": 0.2533772652388797, "grad_norm": 0.3670526146888733, "learning_rate": 1.9233082964662277e-05, "loss": 0.5875, "step": 9228 }, { "epoch": 0.25340472267984626, "grad_norm": 0.3749102056026459, "learning_rate": 1.9232917083233794e-05, "loss": 0.4949, "step": 9229 }, { "epoch": 0.25343218012081276, "grad_norm": 0.34109166264533997, "learning_rate": 1.9232751184582973e-05, "loss": 0.4756, "step": 9230 }, { "epoch": 0.25345963756177925, "grad_norm": 0.35943323373794556, "learning_rate": 1.9232585268710125e-05, "loss": 0.599, "step": 9231 }, { "epoch": 0.25348709500274574, "grad_norm": 0.3242088854312897, "learning_rate": 1.9232419335615556e-05, "loss": 0.4555, "step": 9232 }, { "epoch": 0.25351455244371224, "grad_norm": 0.38507041335105896, "learning_rate": 1.923225338529958e-05, "loss": 0.5683, "step": 9233 }, { "epoch": 0.25354200988467873, "grad_norm": 0.3613778054714203, "learning_rate": 1.9232087417762507e-05, "loss": 0.4852, "step": 9234 }, { "epoch": 0.2535694673256452, "grad_norm": 0.4349735975265503, "learning_rate": 1.9231921433004644e-05, "loss": 0.5542, "step": 9235 }, { "epoch": 0.2535969247666118, "grad_norm": 0.3407600224018097, "learning_rate": 1.92317554310263e-05, "loss": 0.5194, "step": 9236 }, { "epoch": 0.25362438220757827, "grad_norm": 0.41521039605140686, "learning_rate": 1.9231589411827785e-05, "loss": 0.4776, "step": 9237 }, { "epoch": 0.25365183964854476, "grad_norm": 0.3628336787223816, "learning_rate": 1.923142337540941e-05, "loss": 0.5593, "step": 9238 }, { "epoch": 0.25367929708951126, "grad_norm": 0.3978654742240906, "learning_rate": 1.9231257321771485e-05, "loss": 0.6509, "step": 9239 }, { "epoch": 0.25370675453047775, "grad_norm": 0.3913373053073883, "learning_rate": 1.923109125091432e-05, "loss": 0.5849, "step": 9240 }, { "epoch": 0.25373421197144425, "grad_norm": 0.33332937955856323, "learning_rate": 1.9230925162838223e-05, "loss": 0.4854, "step": 9241 }, { "epoch": 0.25376166941241074, "grad_norm": 0.3485899269580841, "learning_rate": 1.92307590575435e-05, "loss": 0.4307, "step": 9242 }, { "epoch": 0.2537891268533773, "grad_norm": 0.6639112830162048, "learning_rate": 1.9230592935030468e-05, "loss": 0.511, "step": 9243 }, { "epoch": 0.2538165842943438, "grad_norm": 0.37942221760749817, "learning_rate": 1.9230426795299433e-05, "loss": 0.523, "step": 9244 }, { "epoch": 0.2538440417353103, "grad_norm": 0.3961610198020935, "learning_rate": 1.923026063835071e-05, "loss": 0.6266, "step": 9245 }, { "epoch": 0.25387149917627677, "grad_norm": 0.3878762722015381, "learning_rate": 1.92300944641846e-05, "loss": 0.5532, "step": 9246 }, { "epoch": 0.25389895661724327, "grad_norm": 0.36735787987709045, "learning_rate": 1.922992827280142e-05, "loss": 0.5224, "step": 9247 }, { "epoch": 0.25392641405820976, "grad_norm": 0.34473443031311035, "learning_rate": 1.9229762064201482e-05, "loss": 0.5019, "step": 9248 }, { "epoch": 0.25395387149917625, "grad_norm": 0.3501138687133789, "learning_rate": 1.9229595838385086e-05, "loss": 0.5429, "step": 9249 }, { "epoch": 0.2539813289401428, "grad_norm": 0.35584867000579834, "learning_rate": 1.9229429595352552e-05, "loss": 0.5294, "step": 9250 }, { "epoch": 0.2540087863811093, "grad_norm": 0.34628039598464966, "learning_rate": 1.9229263335104185e-05, "loss": 0.4581, "step": 9251 }, { "epoch": 0.2540362438220758, "grad_norm": 0.4224596619606018, "learning_rate": 1.9229097057640294e-05, "loss": 0.5535, "step": 9252 }, { "epoch": 0.2540637012630423, "grad_norm": 0.4321525990962982, "learning_rate": 1.9228930762961194e-05, "loss": 0.462, "step": 9253 }, { "epoch": 0.2540911587040088, "grad_norm": 0.3775803744792938, "learning_rate": 1.922876445106719e-05, "loss": 0.4175, "step": 9254 }, { "epoch": 0.2541186161449753, "grad_norm": 0.3562146723270416, "learning_rate": 1.9228598121958598e-05, "loss": 0.5485, "step": 9255 }, { "epoch": 0.25414607358594177, "grad_norm": 0.3525570034980774, "learning_rate": 1.9228431775635722e-05, "loss": 0.5507, "step": 9256 }, { "epoch": 0.2541735310269083, "grad_norm": 0.45910903811454773, "learning_rate": 1.9228265412098877e-05, "loss": 0.5401, "step": 9257 }, { "epoch": 0.2542009884678748, "grad_norm": 0.36710599064826965, "learning_rate": 1.922809903134837e-05, "loss": 0.4806, "step": 9258 }, { "epoch": 0.2542284459088413, "grad_norm": 0.4092499315738678, "learning_rate": 1.9227932633384516e-05, "loss": 0.5703, "step": 9259 }, { "epoch": 0.2542559033498078, "grad_norm": 0.42364785075187683, "learning_rate": 1.922776621820762e-05, "loss": 0.5615, "step": 9260 }, { "epoch": 0.2542833607907743, "grad_norm": 0.3695901036262512, "learning_rate": 1.9227599785817997e-05, "loss": 0.4508, "step": 9261 }, { "epoch": 0.2543108182317408, "grad_norm": 0.3634258210659027, "learning_rate": 1.9227433336215954e-05, "loss": 0.5141, "step": 9262 }, { "epoch": 0.2543382756727073, "grad_norm": 0.3354853093624115, "learning_rate": 1.9227266869401805e-05, "loss": 0.4969, "step": 9263 }, { "epoch": 0.25436573311367383, "grad_norm": 0.5445295572280884, "learning_rate": 1.9227100385375855e-05, "loss": 0.4972, "step": 9264 }, { "epoch": 0.2543931905546403, "grad_norm": 0.32671359181404114, "learning_rate": 1.922693388413842e-05, "loss": 0.5754, "step": 9265 }, { "epoch": 0.2544206479956068, "grad_norm": 0.40394893288612366, "learning_rate": 1.9226767365689805e-05, "loss": 0.5225, "step": 9266 }, { "epoch": 0.2544481054365733, "grad_norm": 0.33649009466171265, "learning_rate": 1.9226600830030326e-05, "loss": 0.5506, "step": 9267 }, { "epoch": 0.2544755628775398, "grad_norm": 0.351272314786911, "learning_rate": 1.9226434277160293e-05, "loss": 0.5165, "step": 9268 }, { "epoch": 0.2545030203185063, "grad_norm": 0.3700573146343231, "learning_rate": 1.9226267707080013e-05, "loss": 0.6038, "step": 9269 }, { "epoch": 0.2545304777594728, "grad_norm": 0.3656693994998932, "learning_rate": 1.9226101119789803e-05, "loss": 0.45, "step": 9270 }, { "epoch": 0.25455793520043934, "grad_norm": 0.4011545181274414, "learning_rate": 1.9225934515289967e-05, "loss": 0.5431, "step": 9271 }, { "epoch": 0.25458539264140584, "grad_norm": 0.4228493571281433, "learning_rate": 1.9225767893580817e-05, "loss": 0.6088, "step": 9272 }, { "epoch": 0.25461285008237233, "grad_norm": 0.3713211715221405, "learning_rate": 1.922560125466267e-05, "loss": 0.495, "step": 9273 }, { "epoch": 0.2546403075233388, "grad_norm": 0.3894771635532379, "learning_rate": 1.922543459853583e-05, "loss": 0.5902, "step": 9274 }, { "epoch": 0.2546677649643053, "grad_norm": 0.37792837619781494, "learning_rate": 1.9225267925200607e-05, "loss": 0.5177, "step": 9275 }, { "epoch": 0.2546952224052718, "grad_norm": 0.4030459523200989, "learning_rate": 1.9225101234657318e-05, "loss": 0.6547, "step": 9276 }, { "epoch": 0.2547226798462383, "grad_norm": 0.35420218110084534, "learning_rate": 1.922493452690627e-05, "loss": 0.5242, "step": 9277 }, { "epoch": 0.25475013728720486, "grad_norm": 1.379102349281311, "learning_rate": 1.9224767801947775e-05, "loss": 0.4847, "step": 9278 }, { "epoch": 0.25477759472817135, "grad_norm": 0.36212506890296936, "learning_rate": 1.9224601059782144e-05, "loss": 0.5858, "step": 9279 }, { "epoch": 0.25480505216913785, "grad_norm": 0.4349627196788788, "learning_rate": 1.9224434300409686e-05, "loss": 0.5702, "step": 9280 }, { "epoch": 0.25483250961010434, "grad_norm": 0.4370262026786804, "learning_rate": 1.922426752383072e-05, "loss": 0.5002, "step": 9281 }, { "epoch": 0.25485996705107083, "grad_norm": 0.34502363204956055, "learning_rate": 1.9224100730045542e-05, "loss": 0.5468, "step": 9282 }, { "epoch": 0.25488742449203733, "grad_norm": 0.39965078234672546, "learning_rate": 1.922393391905448e-05, "loss": 0.5532, "step": 9283 }, { "epoch": 0.2549148819330038, "grad_norm": 0.3468093276023865, "learning_rate": 1.9223767090857833e-05, "loss": 0.4682, "step": 9284 }, { "epoch": 0.2549423393739704, "grad_norm": 0.37292858958244324, "learning_rate": 1.9223600245455918e-05, "loss": 0.5013, "step": 9285 }, { "epoch": 0.25496979681493687, "grad_norm": 0.3804837167263031, "learning_rate": 1.9223433382849043e-05, "loss": 0.5112, "step": 9286 }, { "epoch": 0.25499725425590336, "grad_norm": 0.4023738205432892, "learning_rate": 1.9223266503037522e-05, "loss": 0.6053, "step": 9287 }, { "epoch": 0.25502471169686985, "grad_norm": 0.34250831604003906, "learning_rate": 1.9223099606021668e-05, "loss": 0.4574, "step": 9288 }, { "epoch": 0.25505216913783635, "grad_norm": 0.4080987572669983, "learning_rate": 1.9222932691801786e-05, "loss": 0.4927, "step": 9289 }, { "epoch": 0.25507962657880284, "grad_norm": 0.38583827018737793, "learning_rate": 1.9222765760378193e-05, "loss": 0.5698, "step": 9290 }, { "epoch": 0.25510708401976934, "grad_norm": 0.32917115092277527, "learning_rate": 1.9222598811751195e-05, "loss": 0.5429, "step": 9291 }, { "epoch": 0.2551345414607359, "grad_norm": 0.3686443567276001, "learning_rate": 1.922243184592111e-05, "loss": 0.5069, "step": 9292 }, { "epoch": 0.2551619989017024, "grad_norm": 0.40303730964660645, "learning_rate": 1.9222264862888247e-05, "loss": 0.4944, "step": 9293 }, { "epoch": 0.2551894563426689, "grad_norm": 0.3628910481929779, "learning_rate": 1.9222097862652914e-05, "loss": 0.4955, "step": 9294 }, { "epoch": 0.25521691378363537, "grad_norm": 0.3929886817932129, "learning_rate": 1.9221930845215426e-05, "loss": 0.511, "step": 9295 }, { "epoch": 0.25524437122460186, "grad_norm": 0.3697390556335449, "learning_rate": 1.9221763810576095e-05, "loss": 0.5384, "step": 9296 }, { "epoch": 0.25527182866556836, "grad_norm": 0.36886462569236755, "learning_rate": 1.922159675873523e-05, "loss": 0.6082, "step": 9297 }, { "epoch": 0.25529928610653485, "grad_norm": 0.3694456219673157, "learning_rate": 1.9221429689693143e-05, "loss": 0.4883, "step": 9298 }, { "epoch": 0.2553267435475014, "grad_norm": 0.3711444139480591, "learning_rate": 1.9221262603450146e-05, "loss": 0.4986, "step": 9299 }, { "epoch": 0.2553542009884679, "grad_norm": 0.33560970425605774, "learning_rate": 1.922109550000655e-05, "loss": 0.4831, "step": 9300 }, { "epoch": 0.2553816584294344, "grad_norm": 0.33212780952453613, "learning_rate": 1.9220928379362673e-05, "loss": 0.52, "step": 9301 }, { "epoch": 0.2554091158704009, "grad_norm": 0.3715454936027527, "learning_rate": 1.9220761241518818e-05, "loss": 0.5738, "step": 9302 }, { "epoch": 0.2554365733113674, "grad_norm": 0.34120985865592957, "learning_rate": 1.9220594086475302e-05, "loss": 0.4611, "step": 9303 }, { "epoch": 0.25546403075233387, "grad_norm": 0.3942822813987732, "learning_rate": 1.9220426914232436e-05, "loss": 0.57, "step": 9304 }, { "epoch": 0.25549148819330036, "grad_norm": 0.4363870918750763, "learning_rate": 1.922025972479053e-05, "loss": 0.4826, "step": 9305 }, { "epoch": 0.25551894563426686, "grad_norm": 0.3561538755893707, "learning_rate": 1.9220092518149895e-05, "loss": 0.5519, "step": 9306 }, { "epoch": 0.2555464030752334, "grad_norm": 0.4997478425502777, "learning_rate": 1.9219925294310848e-05, "loss": 0.5006, "step": 9307 }, { "epoch": 0.2555738605161999, "grad_norm": 0.33099740743637085, "learning_rate": 1.9219758053273694e-05, "loss": 0.4171, "step": 9308 }, { "epoch": 0.2556013179571664, "grad_norm": 0.3670232594013214, "learning_rate": 1.921959079503875e-05, "loss": 0.5307, "step": 9309 }, { "epoch": 0.2556287753981329, "grad_norm": 0.34771132469177246, "learning_rate": 1.9219423519606328e-05, "loss": 0.5403, "step": 9310 }, { "epoch": 0.2556562328390994, "grad_norm": 0.3830222487449646, "learning_rate": 1.9219256226976736e-05, "loss": 0.4736, "step": 9311 }, { "epoch": 0.2556836902800659, "grad_norm": 0.4084498882293701, "learning_rate": 1.921908891715029e-05, "loss": 0.5383, "step": 9312 }, { "epoch": 0.2557111477210324, "grad_norm": 0.3903753459453583, "learning_rate": 1.92189215901273e-05, "loss": 0.5245, "step": 9313 }, { "epoch": 0.2557386051619989, "grad_norm": 0.3190092146396637, "learning_rate": 1.921875424590808e-05, "loss": 0.5152, "step": 9314 }, { "epoch": 0.2557660626029654, "grad_norm": 0.42319542169570923, "learning_rate": 1.9218586884492943e-05, "loss": 0.52, "step": 9315 }, { "epoch": 0.2557935200439319, "grad_norm": 0.4614998996257782, "learning_rate": 1.9218419505882195e-05, "loss": 0.5767, "step": 9316 }, { "epoch": 0.2558209774848984, "grad_norm": 0.4192696213722229, "learning_rate": 1.9218252110076157e-05, "loss": 0.5856, "step": 9317 }, { "epoch": 0.2558484349258649, "grad_norm": 0.41604986786842346, "learning_rate": 1.9218084697075133e-05, "loss": 0.6127, "step": 9318 }, { "epoch": 0.2558758923668314, "grad_norm": 0.33703628182411194, "learning_rate": 1.921791726687944e-05, "loss": 0.516, "step": 9319 }, { "epoch": 0.2559033498077979, "grad_norm": 0.36875078082084656, "learning_rate": 1.921774981948939e-05, "loss": 0.5644, "step": 9320 }, { "epoch": 0.25593080724876444, "grad_norm": 0.3895040452480316, "learning_rate": 1.9217582354905295e-05, "loss": 0.4861, "step": 9321 }, { "epoch": 0.25595826468973093, "grad_norm": 0.35173675417900085, "learning_rate": 1.9217414873127466e-05, "loss": 0.5614, "step": 9322 }, { "epoch": 0.2559857221306974, "grad_norm": 0.34261536598205566, "learning_rate": 1.9217247374156216e-05, "loss": 0.5037, "step": 9323 }, { "epoch": 0.2560131795716639, "grad_norm": 0.3492025136947632, "learning_rate": 1.9217079857991858e-05, "loss": 0.4765, "step": 9324 }, { "epoch": 0.2560406370126304, "grad_norm": 0.3745185434818268, "learning_rate": 1.9216912324634703e-05, "loss": 0.5136, "step": 9325 }, { "epoch": 0.2560680944535969, "grad_norm": 0.3573043644428253, "learning_rate": 1.921674477408507e-05, "loss": 0.5757, "step": 9326 }, { "epoch": 0.2560955518945634, "grad_norm": 0.5591436624526978, "learning_rate": 1.9216577206343262e-05, "loss": 0.5943, "step": 9327 }, { "epoch": 0.25612300933552995, "grad_norm": 0.3741816282272339, "learning_rate": 1.9216409621409597e-05, "loss": 0.4811, "step": 9328 }, { "epoch": 0.25615046677649644, "grad_norm": 0.3403530716896057, "learning_rate": 1.9216242019284385e-05, "loss": 0.4476, "step": 9329 }, { "epoch": 0.25617792421746294, "grad_norm": 0.33798351883888245, "learning_rate": 1.921607439996794e-05, "loss": 0.5307, "step": 9330 }, { "epoch": 0.25620538165842943, "grad_norm": 0.34892329573631287, "learning_rate": 1.921590676346058e-05, "loss": 0.5301, "step": 9331 }, { "epoch": 0.2562328390993959, "grad_norm": 0.3473939001560211, "learning_rate": 1.921573910976261e-05, "loss": 0.5707, "step": 9332 }, { "epoch": 0.2562602965403624, "grad_norm": 0.395912230014801, "learning_rate": 1.921557143887434e-05, "loss": 0.5873, "step": 9333 }, { "epoch": 0.2562877539813289, "grad_norm": 0.3834340572357178, "learning_rate": 1.9215403750796093e-05, "loss": 0.4938, "step": 9334 }, { "epoch": 0.25631521142229546, "grad_norm": 0.4441642761230469, "learning_rate": 1.9215236045528177e-05, "loss": 0.5523, "step": 9335 }, { "epoch": 0.25634266886326196, "grad_norm": 0.37556761503219604, "learning_rate": 1.9215068323070903e-05, "loss": 0.4894, "step": 9336 }, { "epoch": 0.25637012630422845, "grad_norm": 0.3497155010700226, "learning_rate": 1.9214900583424586e-05, "loss": 0.456, "step": 9337 }, { "epoch": 0.25639758374519495, "grad_norm": 0.4276037812232971, "learning_rate": 1.9214732826589538e-05, "loss": 0.4984, "step": 9338 }, { "epoch": 0.25642504118616144, "grad_norm": 0.42994052171707153, "learning_rate": 1.9214565052566073e-05, "loss": 0.5867, "step": 9339 }, { "epoch": 0.25645249862712793, "grad_norm": 0.38128891587257385, "learning_rate": 1.92143972613545e-05, "loss": 0.5429, "step": 9340 }, { "epoch": 0.25647995606809443, "grad_norm": 0.3462149202823639, "learning_rate": 1.921422945295514e-05, "loss": 0.4942, "step": 9341 }, { "epoch": 0.256507413509061, "grad_norm": 0.3939410150051117, "learning_rate": 1.9214061627368298e-05, "loss": 0.6007, "step": 9342 }, { "epoch": 0.25653487095002747, "grad_norm": 0.3880685269832611, "learning_rate": 1.9213893784594293e-05, "loss": 0.6385, "step": 9343 }, { "epoch": 0.25656232839099397, "grad_norm": 0.42445260286331177, "learning_rate": 1.9213725924633434e-05, "loss": 0.5674, "step": 9344 }, { "epoch": 0.25658978583196046, "grad_norm": 0.3889710605144501, "learning_rate": 1.9213558047486036e-05, "loss": 0.5342, "step": 9345 }, { "epoch": 0.25661724327292695, "grad_norm": 0.4134369492530823, "learning_rate": 1.921339015315241e-05, "loss": 0.5765, "step": 9346 }, { "epoch": 0.25664470071389345, "grad_norm": 0.37270528078079224, "learning_rate": 1.9213222241632873e-05, "loss": 0.4983, "step": 9347 }, { "epoch": 0.25667215815485994, "grad_norm": 0.38544169068336487, "learning_rate": 1.9213054312927735e-05, "loss": 0.4265, "step": 9348 }, { "epoch": 0.2566996155958265, "grad_norm": 0.3660299479961395, "learning_rate": 1.9212886367037308e-05, "loss": 0.5608, "step": 9349 }, { "epoch": 0.256727073036793, "grad_norm": 0.3559917211532593, "learning_rate": 1.921271840396191e-05, "loss": 0.5724, "step": 9350 }, { "epoch": 0.2567545304777595, "grad_norm": 0.3443695306777954, "learning_rate": 1.921255042370185e-05, "loss": 0.5572, "step": 9351 }, { "epoch": 0.256781987918726, "grad_norm": 0.37343403697013855, "learning_rate": 1.9212382426257445e-05, "loss": 0.5567, "step": 9352 }, { "epoch": 0.25680944535969247, "grad_norm": 0.41103556752204895, "learning_rate": 1.9212214411629007e-05, "loss": 0.5543, "step": 9353 }, { "epoch": 0.25683690280065896, "grad_norm": 0.4135097861289978, "learning_rate": 1.921204637981685e-05, "loss": 0.5267, "step": 9354 }, { "epoch": 0.25686436024162546, "grad_norm": 0.3880484402179718, "learning_rate": 1.9211878330821282e-05, "loss": 0.6162, "step": 9355 }, { "epoch": 0.256891817682592, "grad_norm": 0.37275198101997375, "learning_rate": 1.9211710264642622e-05, "loss": 0.5013, "step": 9356 }, { "epoch": 0.2569192751235585, "grad_norm": 0.44539740681648254, "learning_rate": 1.9211542181281184e-05, "loss": 0.5022, "step": 9357 }, { "epoch": 0.256946732564525, "grad_norm": 0.3565678298473358, "learning_rate": 1.921137408073728e-05, "loss": 0.4677, "step": 9358 }, { "epoch": 0.2569741900054915, "grad_norm": 0.4411122798919678, "learning_rate": 1.9211205963011223e-05, "loss": 0.5473, "step": 9359 }, { "epoch": 0.257001647446458, "grad_norm": 0.34746164083480835, "learning_rate": 1.9211037828103327e-05, "loss": 0.5528, "step": 9360 }, { "epoch": 0.2570291048874245, "grad_norm": 0.4477424621582031, "learning_rate": 1.9210869676013906e-05, "loss": 0.4895, "step": 9361 }, { "epoch": 0.25705656232839097, "grad_norm": 0.5270004272460938, "learning_rate": 1.9210701506743273e-05, "loss": 0.5554, "step": 9362 }, { "epoch": 0.2570840197693575, "grad_norm": 0.4253723621368408, "learning_rate": 1.921053332029174e-05, "loss": 0.5987, "step": 9363 }, { "epoch": 0.257111477210324, "grad_norm": 0.3637545704841614, "learning_rate": 1.9210365116659624e-05, "loss": 0.5204, "step": 9364 }, { "epoch": 0.2571389346512905, "grad_norm": 0.35714495182037354, "learning_rate": 1.9210196895847236e-05, "loss": 0.5286, "step": 9365 }, { "epoch": 0.257166392092257, "grad_norm": 0.3479013442993164, "learning_rate": 1.921002865785489e-05, "loss": 0.4622, "step": 9366 }, { "epoch": 0.2571938495332235, "grad_norm": 0.4222853481769562, "learning_rate": 1.9209860402682907e-05, "loss": 0.5599, "step": 9367 }, { "epoch": 0.25722130697419, "grad_norm": 0.5828927159309387, "learning_rate": 1.9209692130331592e-05, "loss": 0.5573, "step": 9368 }, { "epoch": 0.2572487644151565, "grad_norm": 0.35766535997390747, "learning_rate": 1.9209523840801264e-05, "loss": 0.5647, "step": 9369 }, { "epoch": 0.25727622185612303, "grad_norm": 0.3310072720050812, "learning_rate": 1.920935553409223e-05, "loss": 0.5529, "step": 9370 }, { "epoch": 0.2573036792970895, "grad_norm": 0.36336129903793335, "learning_rate": 1.9209187210204812e-05, "loss": 0.4414, "step": 9371 }, { "epoch": 0.257331136738056, "grad_norm": 0.33759063482284546, "learning_rate": 1.920901886913932e-05, "loss": 0.5615, "step": 9372 }, { "epoch": 0.2573585941790225, "grad_norm": 0.3640938699245453, "learning_rate": 1.920885051089607e-05, "loss": 0.5396, "step": 9373 }, { "epoch": 0.257386051619989, "grad_norm": 0.3611994683742523, "learning_rate": 1.9208682135475373e-05, "loss": 0.4953, "step": 9374 }, { "epoch": 0.2574135090609555, "grad_norm": 0.39750203490257263, "learning_rate": 1.920851374287754e-05, "loss": 0.5691, "step": 9375 }, { "epoch": 0.257440966501922, "grad_norm": 0.35357576608657837, "learning_rate": 1.9208345333102896e-05, "loss": 0.4823, "step": 9376 }, { "epoch": 0.25746842394288855, "grad_norm": 0.3235712945461273, "learning_rate": 1.920817690615175e-05, "loss": 0.5088, "step": 9377 }, { "epoch": 0.25749588138385504, "grad_norm": 0.5578622221946716, "learning_rate": 1.9208008462024412e-05, "loss": 0.6277, "step": 9378 }, { "epoch": 0.25752333882482153, "grad_norm": 0.386970192193985, "learning_rate": 1.92078400007212e-05, "loss": 0.4655, "step": 9379 }, { "epoch": 0.25755079626578803, "grad_norm": 0.3529108762741089, "learning_rate": 1.920767152224243e-05, "loss": 0.4666, "step": 9380 }, { "epoch": 0.2575782537067545, "grad_norm": 0.47067561745643616, "learning_rate": 1.920750302658841e-05, "loss": 0.6155, "step": 9381 }, { "epoch": 0.257605711147721, "grad_norm": 0.3230997622013092, "learning_rate": 1.920733451375946e-05, "loss": 0.4763, "step": 9382 }, { "epoch": 0.2576331685886875, "grad_norm": 0.3728141188621521, "learning_rate": 1.9207165983755892e-05, "loss": 0.5555, "step": 9383 }, { "epoch": 0.25766062602965406, "grad_norm": 0.42391595244407654, "learning_rate": 1.9206997436578023e-05, "loss": 0.5773, "step": 9384 }, { "epoch": 0.25768808347062055, "grad_norm": 0.4148499071598053, "learning_rate": 1.920682887222616e-05, "loss": 0.5336, "step": 9385 }, { "epoch": 0.25771554091158705, "grad_norm": 0.3777301013469696, "learning_rate": 1.9206660290700628e-05, "loss": 0.4701, "step": 9386 }, { "epoch": 0.25774299835255354, "grad_norm": 0.3644326329231262, "learning_rate": 1.9206491692001732e-05, "loss": 0.5486, "step": 9387 }, { "epoch": 0.25777045579352004, "grad_norm": 0.39396777749061584, "learning_rate": 1.9206323076129792e-05, "loss": 0.5719, "step": 9388 }, { "epoch": 0.25779791323448653, "grad_norm": 0.37625572085380554, "learning_rate": 1.9206154443085123e-05, "loss": 0.5887, "step": 9389 }, { "epoch": 0.257825370675453, "grad_norm": 0.36311405897140503, "learning_rate": 1.9205985792868037e-05, "loss": 0.5327, "step": 9390 }, { "epoch": 0.2578528281164196, "grad_norm": 0.41146159172058105, "learning_rate": 1.9205817125478846e-05, "loss": 0.5308, "step": 9391 }, { "epoch": 0.25788028555738607, "grad_norm": 0.3573685586452484, "learning_rate": 1.9205648440917874e-05, "loss": 0.5466, "step": 9392 }, { "epoch": 0.25790774299835256, "grad_norm": 0.39070719480514526, "learning_rate": 1.9205479739185423e-05, "loss": 0.6015, "step": 9393 }, { "epoch": 0.25793520043931906, "grad_norm": 0.3390437066555023, "learning_rate": 1.9205311020281822e-05, "loss": 0.4797, "step": 9394 }, { "epoch": 0.25796265788028555, "grad_norm": 0.35160815715789795, "learning_rate": 1.9205142284207372e-05, "loss": 0.485, "step": 9395 }, { "epoch": 0.25799011532125204, "grad_norm": 0.3834616243839264, "learning_rate": 1.9204973530962396e-05, "loss": 0.5494, "step": 9396 }, { "epoch": 0.25801757276221854, "grad_norm": 0.3805585205554962, "learning_rate": 1.9204804760547204e-05, "loss": 0.5374, "step": 9397 }, { "epoch": 0.2580450302031851, "grad_norm": 0.36873164772987366, "learning_rate": 1.9204635972962116e-05, "loss": 0.4568, "step": 9398 }, { "epoch": 0.2580724876441516, "grad_norm": 0.31688961386680603, "learning_rate": 1.9204467168207443e-05, "loss": 0.4768, "step": 9399 }, { "epoch": 0.2580999450851181, "grad_norm": 0.37426334619522095, "learning_rate": 1.92042983462835e-05, "loss": 0.4801, "step": 9400 }, { "epoch": 0.25812740252608457, "grad_norm": 0.3795463740825653, "learning_rate": 1.9204129507190604e-05, "loss": 0.5224, "step": 9401 }, { "epoch": 0.25815485996705106, "grad_norm": 0.4310855567455292, "learning_rate": 1.920396065092907e-05, "loss": 0.5745, "step": 9402 }, { "epoch": 0.25818231740801756, "grad_norm": 0.3663748502731323, "learning_rate": 1.920379177749921e-05, "loss": 0.5381, "step": 9403 }, { "epoch": 0.25820977484898405, "grad_norm": 0.36408692598342896, "learning_rate": 1.9203622886901342e-05, "loss": 0.5402, "step": 9404 }, { "epoch": 0.2582372322899506, "grad_norm": 0.3618702292442322, "learning_rate": 1.920345397913578e-05, "loss": 0.5389, "step": 9405 }, { "epoch": 0.2582646897309171, "grad_norm": 0.38373878598213196, "learning_rate": 1.9203285054202836e-05, "loss": 0.5052, "step": 9406 }, { "epoch": 0.2582921471718836, "grad_norm": 0.32435715198516846, "learning_rate": 1.920311611210283e-05, "loss": 0.376, "step": 9407 }, { "epoch": 0.2583196046128501, "grad_norm": 0.4073440730571747, "learning_rate": 1.9202947152836078e-05, "loss": 0.6131, "step": 9408 }, { "epoch": 0.2583470620538166, "grad_norm": 0.3742148280143738, "learning_rate": 1.920277817640289e-05, "loss": 0.548, "step": 9409 }, { "epoch": 0.2583745194947831, "grad_norm": 0.36369067430496216, "learning_rate": 1.9202609182803584e-05, "loss": 0.4942, "step": 9410 }, { "epoch": 0.25840197693574957, "grad_norm": 0.3462900221347809, "learning_rate": 1.9202440172038474e-05, "loss": 0.5, "step": 9411 }, { "epoch": 0.2584294343767161, "grad_norm": 0.35675254464149475, "learning_rate": 1.9202271144107875e-05, "loss": 0.532, "step": 9412 }, { "epoch": 0.2584568918176826, "grad_norm": 0.3716842830181122, "learning_rate": 1.9202102099012103e-05, "loss": 0.6108, "step": 9413 }, { "epoch": 0.2584843492586491, "grad_norm": 0.38760700821876526, "learning_rate": 1.9201933036751476e-05, "loss": 0.5948, "step": 9414 }, { "epoch": 0.2585118066996156, "grad_norm": 0.38309627771377563, "learning_rate": 1.9201763957326305e-05, "loss": 0.5496, "step": 9415 }, { "epoch": 0.2585392641405821, "grad_norm": 0.3586679697036743, "learning_rate": 1.920159486073691e-05, "loss": 0.5491, "step": 9416 }, { "epoch": 0.2585667215815486, "grad_norm": 0.39460381865501404, "learning_rate": 1.92014257469836e-05, "loss": 0.552, "step": 9417 }, { "epoch": 0.2585941790225151, "grad_norm": 0.3503015339374542, "learning_rate": 1.92012566160667e-05, "loss": 0.6555, "step": 9418 }, { "epoch": 0.25862163646348163, "grad_norm": 0.32782843708992004, "learning_rate": 1.9201087467986516e-05, "loss": 0.4853, "step": 9419 }, { "epoch": 0.2586490939044481, "grad_norm": 0.3259095847606659, "learning_rate": 1.9200918302743365e-05, "loss": 0.4735, "step": 9420 }, { "epoch": 0.2586765513454146, "grad_norm": 0.37474367022514343, "learning_rate": 1.920074912033757e-05, "loss": 0.5339, "step": 9421 }, { "epoch": 0.2587040087863811, "grad_norm": 0.4079749286174774, "learning_rate": 1.9200579920769435e-05, "loss": 0.5886, "step": 9422 }, { "epoch": 0.2587314662273476, "grad_norm": 0.38050031661987305, "learning_rate": 1.9200410704039285e-05, "loss": 0.6123, "step": 9423 }, { "epoch": 0.2587589236683141, "grad_norm": 0.3240785598754883, "learning_rate": 1.9200241470147435e-05, "loss": 0.5014, "step": 9424 }, { "epoch": 0.2587863811092806, "grad_norm": 0.3648619055747986, "learning_rate": 1.9200072219094195e-05, "loss": 0.6159, "step": 9425 }, { "epoch": 0.25881383855024714, "grad_norm": 0.3824846148490906, "learning_rate": 1.9199902950879886e-05, "loss": 0.5362, "step": 9426 }, { "epoch": 0.25884129599121364, "grad_norm": 0.3598269522190094, "learning_rate": 1.9199733665504817e-05, "loss": 0.537, "step": 9427 }, { "epoch": 0.25886875343218013, "grad_norm": 0.3789520859718323, "learning_rate": 1.9199564362969315e-05, "loss": 0.5695, "step": 9428 }, { "epoch": 0.2588962108731466, "grad_norm": 0.4428604245185852, "learning_rate": 1.9199395043273688e-05, "loss": 0.5574, "step": 9429 }, { "epoch": 0.2589236683141131, "grad_norm": 0.3672722578048706, "learning_rate": 1.919922570641825e-05, "loss": 0.4251, "step": 9430 }, { "epoch": 0.2589511257550796, "grad_norm": 0.3708668053150177, "learning_rate": 1.919905635240332e-05, "loss": 0.5602, "step": 9431 }, { "epoch": 0.2589785831960461, "grad_norm": 0.37353041768074036, "learning_rate": 1.919888698122922e-05, "loss": 0.5621, "step": 9432 }, { "epoch": 0.25900604063701266, "grad_norm": 0.37249499559402466, "learning_rate": 1.9198717592896256e-05, "loss": 0.5303, "step": 9433 }, { "epoch": 0.25903349807797915, "grad_norm": 0.35409191250801086, "learning_rate": 1.9198548187404746e-05, "loss": 0.6018, "step": 9434 }, { "epoch": 0.25906095551894565, "grad_norm": 0.36687397956848145, "learning_rate": 1.919837876475501e-05, "loss": 0.4825, "step": 9435 }, { "epoch": 0.25908841295991214, "grad_norm": 0.34235846996307373, "learning_rate": 1.9198209324947358e-05, "loss": 0.5198, "step": 9436 }, { "epoch": 0.25911587040087863, "grad_norm": 0.3279513418674469, "learning_rate": 1.9198039867982113e-05, "loss": 0.5297, "step": 9437 }, { "epoch": 0.25914332784184513, "grad_norm": 0.3567107021808624, "learning_rate": 1.919787039385959e-05, "loss": 0.5059, "step": 9438 }, { "epoch": 0.2591707852828116, "grad_norm": 0.43064436316490173, "learning_rate": 1.91977009025801e-05, "loss": 0.7024, "step": 9439 }, { "epoch": 0.2591982427237781, "grad_norm": 0.3340536653995514, "learning_rate": 1.9197531394143965e-05, "loss": 0.4398, "step": 9440 }, { "epoch": 0.25922570016474467, "grad_norm": 0.37037140130996704, "learning_rate": 1.9197361868551498e-05, "loss": 0.5502, "step": 9441 }, { "epoch": 0.25925315760571116, "grad_norm": 0.3282790184020996, "learning_rate": 1.919719232580301e-05, "loss": 0.4547, "step": 9442 }, { "epoch": 0.25928061504667765, "grad_norm": 0.4376263916492462, "learning_rate": 1.919702276589883e-05, "loss": 0.5073, "step": 9443 }, { "epoch": 0.25930807248764415, "grad_norm": 0.3813644051551819, "learning_rate": 1.9196853188839264e-05, "loss": 0.5741, "step": 9444 }, { "epoch": 0.25933552992861064, "grad_norm": 0.37996840476989746, "learning_rate": 1.9196683594624633e-05, "loss": 0.4946, "step": 9445 }, { "epoch": 0.25936298736957714, "grad_norm": 0.3667914569377899, "learning_rate": 1.919651398325525e-05, "loss": 0.4566, "step": 9446 }, { "epoch": 0.25939044481054363, "grad_norm": 0.3937976062297821, "learning_rate": 1.9196344354731433e-05, "loss": 0.5584, "step": 9447 }, { "epoch": 0.2594179022515102, "grad_norm": 0.3504382371902466, "learning_rate": 1.9196174709053503e-05, "loss": 0.5789, "step": 9448 }, { "epoch": 0.2594453596924767, "grad_norm": 0.359549880027771, "learning_rate": 1.9196005046221765e-05, "loss": 0.5128, "step": 9449 }, { "epoch": 0.25947281713344317, "grad_norm": 0.34907567501068115, "learning_rate": 1.9195835366236547e-05, "loss": 0.5578, "step": 9450 }, { "epoch": 0.25950027457440966, "grad_norm": 0.33749520778656006, "learning_rate": 1.9195665669098163e-05, "loss": 0.4476, "step": 9451 }, { "epoch": 0.25952773201537616, "grad_norm": 0.4322696924209595, "learning_rate": 1.9195495954806925e-05, "loss": 0.6148, "step": 9452 }, { "epoch": 0.25955518945634265, "grad_norm": 0.3985159695148468, "learning_rate": 1.9195326223363147e-05, "loss": 0.5592, "step": 9453 }, { "epoch": 0.25958264689730914, "grad_norm": 0.37949469685554504, "learning_rate": 1.919515647476716e-05, "loss": 0.4844, "step": 9454 }, { "epoch": 0.2596101043382757, "grad_norm": 0.391836941242218, "learning_rate": 1.9194986709019266e-05, "loss": 0.5415, "step": 9455 }, { "epoch": 0.2596375617792422, "grad_norm": 0.38724151253700256, "learning_rate": 1.919481692611979e-05, "loss": 0.5332, "step": 9456 }, { "epoch": 0.2596650192202087, "grad_norm": 0.34634166955947876, "learning_rate": 1.9194647126069043e-05, "loss": 0.5125, "step": 9457 }, { "epoch": 0.2596924766611752, "grad_norm": 0.405081182718277, "learning_rate": 1.9194477308867347e-05, "loss": 0.5509, "step": 9458 }, { "epoch": 0.25971993410214167, "grad_norm": 0.3887420892715454, "learning_rate": 1.9194307474515015e-05, "loss": 0.5518, "step": 9459 }, { "epoch": 0.25974739154310816, "grad_norm": 0.37182143330574036, "learning_rate": 1.9194137623012365e-05, "loss": 0.581, "step": 9460 }, { "epoch": 0.25977484898407466, "grad_norm": 0.3479037880897522, "learning_rate": 1.9193967754359715e-05, "loss": 0.5338, "step": 9461 }, { "epoch": 0.2598023064250412, "grad_norm": 0.34946316480636597, "learning_rate": 1.9193797868557377e-05, "loss": 0.5952, "step": 9462 }, { "epoch": 0.2598297638660077, "grad_norm": 0.3115597665309906, "learning_rate": 1.9193627965605673e-05, "loss": 0.3976, "step": 9463 }, { "epoch": 0.2598572213069742, "grad_norm": 0.40065428614616394, "learning_rate": 1.9193458045504924e-05, "loss": 0.4622, "step": 9464 }, { "epoch": 0.2598846787479407, "grad_norm": 0.3280889093875885, "learning_rate": 1.9193288108255437e-05, "loss": 0.4875, "step": 9465 }, { "epoch": 0.2599121361889072, "grad_norm": 0.3393799960613251, "learning_rate": 1.919311815385753e-05, "loss": 0.4773, "step": 9466 }, { "epoch": 0.2599395936298737, "grad_norm": 0.34150591492652893, "learning_rate": 1.919294818231153e-05, "loss": 0.4708, "step": 9467 }, { "epoch": 0.25996705107084017, "grad_norm": 0.4000703692436218, "learning_rate": 1.9192778193617746e-05, "loss": 0.5074, "step": 9468 }, { "epoch": 0.2599945085118067, "grad_norm": 0.45190656185150146, "learning_rate": 1.91926081877765e-05, "loss": 0.5634, "step": 9469 }, { "epoch": 0.2600219659527732, "grad_norm": 0.39426058530807495, "learning_rate": 1.9192438164788098e-05, "loss": 0.4571, "step": 9470 }, { "epoch": 0.2600494233937397, "grad_norm": 1.2232091426849365, "learning_rate": 1.919226812465287e-05, "loss": 0.5066, "step": 9471 }, { "epoch": 0.2600768808347062, "grad_norm": 0.36946648359298706, "learning_rate": 1.9192098067371127e-05, "loss": 0.5637, "step": 9472 }, { "epoch": 0.2601043382756727, "grad_norm": 0.38049912452697754, "learning_rate": 1.9191927992943187e-05, "loss": 0.5257, "step": 9473 }, { "epoch": 0.2601317957166392, "grad_norm": 0.3689614534378052, "learning_rate": 1.9191757901369366e-05, "loss": 0.4212, "step": 9474 }, { "epoch": 0.2601592531576057, "grad_norm": 0.41822826862335205, "learning_rate": 1.9191587792649985e-05, "loss": 0.5407, "step": 9475 }, { "epoch": 0.26018671059857224, "grad_norm": 0.3599544167518616, "learning_rate": 1.9191417666785356e-05, "loss": 0.5085, "step": 9476 }, { "epoch": 0.26021416803953873, "grad_norm": 0.3625636100769043, "learning_rate": 1.9191247523775805e-05, "loss": 0.4296, "step": 9477 }, { "epoch": 0.2602416254805052, "grad_norm": 0.34985464811325073, "learning_rate": 1.9191077363621643e-05, "loss": 0.5353, "step": 9478 }, { "epoch": 0.2602690829214717, "grad_norm": 0.36791735887527466, "learning_rate": 1.9190907186323185e-05, "loss": 0.558, "step": 9479 }, { "epoch": 0.2602965403624382, "grad_norm": 0.34052157402038574, "learning_rate": 1.9190736991880752e-05, "loss": 0.4388, "step": 9480 }, { "epoch": 0.2603239978034047, "grad_norm": 0.3950366675853729, "learning_rate": 1.919056678029466e-05, "loss": 0.4852, "step": 9481 }, { "epoch": 0.2603514552443712, "grad_norm": 0.4166395664215088, "learning_rate": 1.919039655156523e-05, "loss": 0.5958, "step": 9482 }, { "epoch": 0.26037891268533775, "grad_norm": 0.36381885409355164, "learning_rate": 1.9190226305692777e-05, "loss": 0.5336, "step": 9483 }, { "epoch": 0.26040637012630424, "grad_norm": 0.42643511295318604, "learning_rate": 1.919005604267762e-05, "loss": 0.5836, "step": 9484 }, { "epoch": 0.26043382756727074, "grad_norm": 0.33563244342803955, "learning_rate": 1.9189885762520075e-05, "loss": 0.5112, "step": 9485 }, { "epoch": 0.26046128500823723, "grad_norm": 0.36456364393234253, "learning_rate": 1.918971546522046e-05, "loss": 0.5826, "step": 9486 }, { "epoch": 0.2604887424492037, "grad_norm": 0.36982157826423645, "learning_rate": 1.918954515077909e-05, "loss": 0.5324, "step": 9487 }, { "epoch": 0.2605161998901702, "grad_norm": 0.4584997594356537, "learning_rate": 1.918937481919629e-05, "loss": 0.5325, "step": 9488 }, { "epoch": 0.2605436573311367, "grad_norm": 0.3793880343437195, "learning_rate": 1.9189204470472365e-05, "loss": 0.4719, "step": 9489 }, { "epoch": 0.26057111477210326, "grad_norm": 0.4369315803050995, "learning_rate": 1.918903410460765e-05, "loss": 0.5407, "step": 9490 }, { "epoch": 0.26059857221306976, "grad_norm": 0.5877314209938049, "learning_rate": 1.918886372160245e-05, "loss": 0.4972, "step": 9491 }, { "epoch": 0.26062602965403625, "grad_norm": 0.41305211186408997, "learning_rate": 1.9188693321457083e-05, "loss": 0.5245, "step": 9492 }, { "epoch": 0.26065348709500274, "grad_norm": 0.3475630581378937, "learning_rate": 1.9188522904171877e-05, "loss": 0.5137, "step": 9493 }, { "epoch": 0.26068094453596924, "grad_norm": 0.3432077169418335, "learning_rate": 1.918835246974714e-05, "loss": 0.5028, "step": 9494 }, { "epoch": 0.26070840197693573, "grad_norm": 0.34927037358283997, "learning_rate": 1.918818201818319e-05, "loss": 0.5297, "step": 9495 }, { "epoch": 0.2607358594179022, "grad_norm": 0.3665786385536194, "learning_rate": 1.918801154948035e-05, "loss": 0.5275, "step": 9496 }, { "epoch": 0.2607633168588688, "grad_norm": 0.34769123792648315, "learning_rate": 1.9187841063638935e-05, "loss": 0.5663, "step": 9497 }, { "epoch": 0.26079077429983527, "grad_norm": 0.35744988918304443, "learning_rate": 1.9187670560659267e-05, "loss": 0.5366, "step": 9498 }, { "epoch": 0.26081823174080176, "grad_norm": 0.3775603771209717, "learning_rate": 1.918750004054166e-05, "loss": 0.5453, "step": 9499 }, { "epoch": 0.26084568918176826, "grad_norm": 0.39013585448265076, "learning_rate": 1.9187329503286433e-05, "loss": 0.5383, "step": 9500 }, { "epoch": 0.26087314662273475, "grad_norm": 0.4151013493537903, "learning_rate": 1.9187158948893904e-05, "loss": 0.569, "step": 9501 }, { "epoch": 0.26090060406370125, "grad_norm": 0.35886621475219727, "learning_rate": 1.918698837736439e-05, "loss": 0.522, "step": 9502 }, { "epoch": 0.26092806150466774, "grad_norm": 0.41683346033096313, "learning_rate": 1.9186817788698212e-05, "loss": 0.4657, "step": 9503 }, { "epoch": 0.2609555189456343, "grad_norm": 0.32532039284706116, "learning_rate": 1.9186647182895686e-05, "loss": 0.4216, "step": 9504 }, { "epoch": 0.2609829763866008, "grad_norm": 0.3871030807495117, "learning_rate": 1.918647655995713e-05, "loss": 0.5127, "step": 9505 }, { "epoch": 0.2610104338275673, "grad_norm": 0.3389013111591339, "learning_rate": 1.9186305919882866e-05, "loss": 0.485, "step": 9506 }, { "epoch": 0.2610378912685338, "grad_norm": 0.40613073110580444, "learning_rate": 1.9186135262673206e-05, "loss": 0.5195, "step": 9507 }, { "epoch": 0.26106534870950027, "grad_norm": 0.3572169542312622, "learning_rate": 1.9185964588328476e-05, "loss": 0.5143, "step": 9508 }, { "epoch": 0.26109280615046676, "grad_norm": 0.36299630999565125, "learning_rate": 1.918579389684899e-05, "loss": 0.5144, "step": 9509 }, { "epoch": 0.26112026359143325, "grad_norm": 0.3462035655975342, "learning_rate": 1.9185623188235062e-05, "loss": 0.4629, "step": 9510 }, { "epoch": 0.2611477210323998, "grad_norm": 0.35392338037490845, "learning_rate": 1.918545246248702e-05, "loss": 0.6023, "step": 9511 }, { "epoch": 0.2611751784733663, "grad_norm": 0.3871360123157501, "learning_rate": 1.9185281719605174e-05, "loss": 0.4806, "step": 9512 }, { "epoch": 0.2612026359143328, "grad_norm": 0.4602441191673279, "learning_rate": 1.918511095958985e-05, "loss": 0.4911, "step": 9513 }, { "epoch": 0.2612300933552993, "grad_norm": 0.4180428683757782, "learning_rate": 1.918494018244136e-05, "loss": 0.574, "step": 9514 }, { "epoch": 0.2612575507962658, "grad_norm": 0.39070966839790344, "learning_rate": 1.9184769388160022e-05, "loss": 0.5824, "step": 9515 }, { "epoch": 0.2612850082372323, "grad_norm": 0.36380016803741455, "learning_rate": 1.9184598576746164e-05, "loss": 0.4366, "step": 9516 }, { "epoch": 0.26131246567819877, "grad_norm": 0.3573151230812073, "learning_rate": 1.9184427748200095e-05, "loss": 0.4965, "step": 9517 }, { "epoch": 0.2613399231191653, "grad_norm": 0.34474125504493713, "learning_rate": 1.9184256902522135e-05, "loss": 0.4917, "step": 9518 }, { "epoch": 0.2613673805601318, "grad_norm": 0.3957749903202057, "learning_rate": 1.9184086039712608e-05, "loss": 0.6056, "step": 9519 }, { "epoch": 0.2613948380010983, "grad_norm": 0.40790924429893494, "learning_rate": 1.9183915159771826e-05, "loss": 0.5259, "step": 9520 }, { "epoch": 0.2614222954420648, "grad_norm": 0.41063570976257324, "learning_rate": 1.9183744262700114e-05, "loss": 0.6009, "step": 9521 }, { "epoch": 0.2614497528830313, "grad_norm": 0.35424110293388367, "learning_rate": 1.9183573348497788e-05, "loss": 0.4744, "step": 9522 }, { "epoch": 0.2614772103239978, "grad_norm": 0.3755441904067993, "learning_rate": 1.9183402417165162e-05, "loss": 0.5374, "step": 9523 }, { "epoch": 0.2615046677649643, "grad_norm": 0.39821648597717285, "learning_rate": 1.9183231468702562e-05, "loss": 0.5477, "step": 9524 }, { "epoch": 0.26153212520593083, "grad_norm": 0.3581530451774597, "learning_rate": 1.9183060503110303e-05, "loss": 0.5325, "step": 9525 }, { "epoch": 0.2615595826468973, "grad_norm": 0.3808218240737915, "learning_rate": 1.918288952038871e-05, "loss": 0.6346, "step": 9526 }, { "epoch": 0.2615870400878638, "grad_norm": 0.36927419900894165, "learning_rate": 1.9182718520538095e-05, "loss": 0.5593, "step": 9527 }, { "epoch": 0.2616144975288303, "grad_norm": 0.3387787640094757, "learning_rate": 1.918254750355878e-05, "loss": 0.4724, "step": 9528 }, { "epoch": 0.2616419549697968, "grad_norm": 0.5253043174743652, "learning_rate": 1.918237646945108e-05, "loss": 0.5357, "step": 9529 }, { "epoch": 0.2616694124107633, "grad_norm": 0.31770265102386475, "learning_rate": 1.918220541821532e-05, "loss": 0.5194, "step": 9530 }, { "epoch": 0.2616968698517298, "grad_norm": 0.35823407769203186, "learning_rate": 1.9182034349851814e-05, "loss": 0.5099, "step": 9531 }, { "epoch": 0.26172432729269635, "grad_norm": 0.36212533712387085, "learning_rate": 1.9181863264360886e-05, "loss": 0.5166, "step": 9532 }, { "epoch": 0.26175178473366284, "grad_norm": 0.36044299602508545, "learning_rate": 1.918169216174285e-05, "loss": 0.5694, "step": 9533 }, { "epoch": 0.26177924217462933, "grad_norm": 0.38584983348846436, "learning_rate": 1.918152104199803e-05, "loss": 0.5278, "step": 9534 }, { "epoch": 0.26180669961559583, "grad_norm": 0.35913383960723877, "learning_rate": 1.9181349905126742e-05, "loss": 0.4868, "step": 9535 }, { "epoch": 0.2618341570565623, "grad_norm": 0.32784292101860046, "learning_rate": 1.9181178751129307e-05, "loss": 0.4187, "step": 9536 }, { "epoch": 0.2618616144975288, "grad_norm": 0.3950020968914032, "learning_rate": 1.918100758000604e-05, "loss": 0.5302, "step": 9537 }, { "epoch": 0.2618890719384953, "grad_norm": 0.3308124244213104, "learning_rate": 1.918083639175727e-05, "loss": 0.5441, "step": 9538 }, { "epoch": 0.26191652937946186, "grad_norm": 0.3867616355419159, "learning_rate": 1.9180665186383303e-05, "loss": 0.575, "step": 9539 }, { "epoch": 0.26194398682042835, "grad_norm": 0.3432632386684418, "learning_rate": 1.918049396388447e-05, "loss": 0.5281, "step": 9540 }, { "epoch": 0.26197144426139485, "grad_norm": 0.4088667929172516, "learning_rate": 1.918032272426108e-05, "loss": 0.5744, "step": 9541 }, { "epoch": 0.26199890170236134, "grad_norm": 0.5341666340827942, "learning_rate": 1.9180151467513465e-05, "loss": 0.5968, "step": 9542 }, { "epoch": 0.26202635914332784, "grad_norm": 0.4142821729183197, "learning_rate": 1.9179980193641935e-05, "loss": 0.4704, "step": 9543 }, { "epoch": 0.26205381658429433, "grad_norm": 0.4774053394794464, "learning_rate": 1.917980890264681e-05, "loss": 0.5291, "step": 9544 }, { "epoch": 0.2620812740252608, "grad_norm": 0.3450198471546173, "learning_rate": 1.9179637594528414e-05, "loss": 0.4904, "step": 9545 }, { "epoch": 0.2621087314662274, "grad_norm": 0.3376244902610779, "learning_rate": 1.9179466269287063e-05, "loss": 0.5289, "step": 9546 }, { "epoch": 0.26213618890719387, "grad_norm": 0.3433992862701416, "learning_rate": 1.9179294926923077e-05, "loss": 0.5682, "step": 9547 }, { "epoch": 0.26216364634816036, "grad_norm": 0.2906266152858734, "learning_rate": 1.9179123567436777e-05, "loss": 0.4544, "step": 9548 }, { "epoch": 0.26219110378912686, "grad_norm": 0.3786798119544983, "learning_rate": 1.9178952190828482e-05, "loss": 0.5017, "step": 9549 }, { "epoch": 0.26221856123009335, "grad_norm": 0.43893924355506897, "learning_rate": 1.9178780797098514e-05, "loss": 0.4954, "step": 9550 }, { "epoch": 0.26224601867105984, "grad_norm": 0.40444415807724, "learning_rate": 1.9178609386247188e-05, "loss": 0.4769, "step": 9551 }, { "epoch": 0.26227347611202634, "grad_norm": 0.3869764506816864, "learning_rate": 1.917843795827482e-05, "loss": 0.5012, "step": 9552 }, { "epoch": 0.2623009335529929, "grad_norm": 0.369089812040329, "learning_rate": 1.9178266513181744e-05, "loss": 0.5107, "step": 9553 }, { "epoch": 0.2623283909939594, "grad_norm": 0.36887305974960327, "learning_rate": 1.917809505096827e-05, "loss": 0.5492, "step": 9554 }, { "epoch": 0.2623558484349259, "grad_norm": 0.4875061511993408, "learning_rate": 1.9177923571634717e-05, "loss": 0.5542, "step": 9555 }, { "epoch": 0.26238330587589237, "grad_norm": 0.3922513723373413, "learning_rate": 1.917775207518141e-05, "loss": 0.5484, "step": 9556 }, { "epoch": 0.26241076331685886, "grad_norm": 0.36044880747795105, "learning_rate": 1.9177580561608668e-05, "loss": 0.4466, "step": 9557 }, { "epoch": 0.26243822075782536, "grad_norm": 0.37549281120300293, "learning_rate": 1.9177409030916806e-05, "loss": 0.5171, "step": 9558 }, { "epoch": 0.26246567819879185, "grad_norm": 0.35882240533828735, "learning_rate": 1.9177237483106147e-05, "loss": 0.6259, "step": 9559 }, { "epoch": 0.2624931356397584, "grad_norm": 0.3721841275691986, "learning_rate": 1.917706591817701e-05, "loss": 0.5209, "step": 9560 }, { "epoch": 0.2625205930807249, "grad_norm": 0.35287773609161377, "learning_rate": 1.9176894336129717e-05, "loss": 0.532, "step": 9561 }, { "epoch": 0.2625480505216914, "grad_norm": 0.35146135091781616, "learning_rate": 1.917672273696459e-05, "loss": 0.5406, "step": 9562 }, { "epoch": 0.2625755079626579, "grad_norm": 0.40884673595428467, "learning_rate": 1.9176551120681942e-05, "loss": 0.5381, "step": 9563 }, { "epoch": 0.2626029654036244, "grad_norm": 0.3548591136932373, "learning_rate": 1.91763794872821e-05, "loss": 0.5852, "step": 9564 }, { "epoch": 0.26263042284459087, "grad_norm": 0.6528147459030151, "learning_rate": 1.917620783676538e-05, "loss": 0.4871, "step": 9565 }, { "epoch": 0.26265788028555737, "grad_norm": 0.35479700565338135, "learning_rate": 1.9176036169132103e-05, "loss": 0.5341, "step": 9566 }, { "epoch": 0.2626853377265239, "grad_norm": 0.3318730592727661, "learning_rate": 1.9175864484382593e-05, "loss": 0.42, "step": 9567 }, { "epoch": 0.2627127951674904, "grad_norm": 0.43123915791511536, "learning_rate": 1.9175692782517163e-05, "loss": 0.5072, "step": 9568 }, { "epoch": 0.2627402526084569, "grad_norm": 0.3558466136455536, "learning_rate": 1.917552106353614e-05, "loss": 0.487, "step": 9569 }, { "epoch": 0.2627677100494234, "grad_norm": 0.3710677921772003, "learning_rate": 1.9175349327439844e-05, "loss": 0.5034, "step": 9570 }, { "epoch": 0.2627951674903899, "grad_norm": 0.3625600337982178, "learning_rate": 1.9175177574228588e-05, "loss": 0.5482, "step": 9571 }, { "epoch": 0.2628226249313564, "grad_norm": 0.37285807728767395, "learning_rate": 1.9175005803902703e-05, "loss": 0.5354, "step": 9572 }, { "epoch": 0.2628500823723229, "grad_norm": 0.37697094678878784, "learning_rate": 1.91748340164625e-05, "loss": 0.6026, "step": 9573 }, { "epoch": 0.2628775398132894, "grad_norm": 0.427742063999176, "learning_rate": 1.9174662211908305e-05, "loss": 0.4564, "step": 9574 }, { "epoch": 0.2629049972542559, "grad_norm": 0.3344784379005432, "learning_rate": 1.9174490390240435e-05, "loss": 0.5301, "step": 9575 }, { "epoch": 0.2629324546952224, "grad_norm": 0.4011137783527374, "learning_rate": 1.9174318551459214e-05, "loss": 0.5045, "step": 9576 }, { "epoch": 0.2629599121361889, "grad_norm": 0.8655737042427063, "learning_rate": 1.917414669556496e-05, "loss": 0.4776, "step": 9577 }, { "epoch": 0.2629873695771554, "grad_norm": 0.37418410181999207, "learning_rate": 1.9173974822557993e-05, "loss": 0.5568, "step": 9578 }, { "epoch": 0.2630148270181219, "grad_norm": 0.36712846159935, "learning_rate": 1.9173802932438637e-05, "loss": 0.4976, "step": 9579 }, { "epoch": 0.2630422844590884, "grad_norm": 0.38154181838035583, "learning_rate": 1.917363102520721e-05, "loss": 0.4971, "step": 9580 }, { "epoch": 0.2630697419000549, "grad_norm": 0.3725656270980835, "learning_rate": 1.9173459100864033e-05, "loss": 0.5227, "step": 9581 }, { "epoch": 0.26309719934102144, "grad_norm": 0.3549153804779053, "learning_rate": 1.9173287159409425e-05, "loss": 0.4697, "step": 9582 }, { "epoch": 0.26312465678198793, "grad_norm": 0.32899147272109985, "learning_rate": 1.917311520084371e-05, "loss": 0.5183, "step": 9583 }, { "epoch": 0.2631521142229544, "grad_norm": 0.3964860141277313, "learning_rate": 1.917294322516721e-05, "loss": 0.5777, "step": 9584 }, { "epoch": 0.2631795716639209, "grad_norm": 0.35303834080696106, "learning_rate": 1.9172771232380244e-05, "loss": 0.6091, "step": 9585 }, { "epoch": 0.2632070291048874, "grad_norm": 0.4135599434375763, "learning_rate": 1.9172599222483128e-05, "loss": 0.6001, "step": 9586 }, { "epoch": 0.2632344865458539, "grad_norm": 0.3965635299682617, "learning_rate": 1.917242719547619e-05, "loss": 0.4871, "step": 9587 }, { "epoch": 0.2632619439868204, "grad_norm": 0.3555094003677368, "learning_rate": 1.9172255151359746e-05, "loss": 0.4738, "step": 9588 }, { "epoch": 0.26328940142778695, "grad_norm": 0.33029139041900635, "learning_rate": 1.9172083090134117e-05, "loss": 0.4596, "step": 9589 }, { "epoch": 0.26331685886875345, "grad_norm": 0.41901499032974243, "learning_rate": 1.9171911011799625e-05, "loss": 0.6084, "step": 9590 }, { "epoch": 0.26334431630971994, "grad_norm": 0.3587934076786041, "learning_rate": 1.9171738916356596e-05, "loss": 0.5308, "step": 9591 }, { "epoch": 0.26337177375068643, "grad_norm": 0.34647253155708313, "learning_rate": 1.9171566803805347e-05, "loss": 0.5515, "step": 9592 }, { "epoch": 0.2633992311916529, "grad_norm": 0.3451676666736603, "learning_rate": 1.9171394674146195e-05, "loss": 0.4881, "step": 9593 }, { "epoch": 0.2634266886326194, "grad_norm": 0.4046671986579895, "learning_rate": 1.9171222527379468e-05, "loss": 0.5588, "step": 9594 }, { "epoch": 0.2634541460735859, "grad_norm": 0.3560144305229187, "learning_rate": 1.917105036350548e-05, "loss": 0.5265, "step": 9595 }, { "epoch": 0.26348160351455246, "grad_norm": 0.3412487208843231, "learning_rate": 1.9170878182524558e-05, "loss": 0.4738, "step": 9596 }, { "epoch": 0.26350906095551896, "grad_norm": 0.3334677815437317, "learning_rate": 1.9170705984437022e-05, "loss": 0.51, "step": 9597 }, { "epoch": 0.26353651839648545, "grad_norm": 0.3341671824455261, "learning_rate": 1.9170533769243192e-05, "loss": 0.4559, "step": 9598 }, { "epoch": 0.26356397583745195, "grad_norm": 0.3877742290496826, "learning_rate": 1.917036153694339e-05, "loss": 0.4878, "step": 9599 }, { "epoch": 0.26359143327841844, "grad_norm": 0.383279949426651, "learning_rate": 1.9170189287537936e-05, "loss": 0.6668, "step": 9600 }, { "epoch": 0.26361889071938494, "grad_norm": 0.30659568309783936, "learning_rate": 1.9170017021027153e-05, "loss": 0.4629, "step": 9601 }, { "epoch": 0.26364634816035143, "grad_norm": 0.34990280866622925, "learning_rate": 1.916984473741136e-05, "loss": 0.488, "step": 9602 }, { "epoch": 0.263673805601318, "grad_norm": 0.3550940454006195, "learning_rate": 1.916967243669088e-05, "loss": 0.5919, "step": 9603 }, { "epoch": 0.2637012630422845, "grad_norm": 0.392974317073822, "learning_rate": 1.9169500118866034e-05, "loss": 0.5459, "step": 9604 }, { "epoch": 0.26372872048325097, "grad_norm": 0.3230685889720917, "learning_rate": 1.9169327783937145e-05, "loss": 0.6285, "step": 9605 }, { "epoch": 0.26375617792421746, "grad_norm": 0.4024016261100769, "learning_rate": 1.9169155431904535e-05, "loss": 0.5661, "step": 9606 }, { "epoch": 0.26378363536518395, "grad_norm": 0.34372881054878235, "learning_rate": 1.9168983062768522e-05, "loss": 0.5269, "step": 9607 }, { "epoch": 0.26381109280615045, "grad_norm": 0.34878993034362793, "learning_rate": 1.916881067652943e-05, "loss": 0.4844, "step": 9608 }, { "epoch": 0.26383855024711694, "grad_norm": 0.38800927996635437, "learning_rate": 1.9168638273187575e-05, "loss": 0.5056, "step": 9609 }, { "epoch": 0.2638660076880835, "grad_norm": 0.35882869362831116, "learning_rate": 1.9168465852743286e-05, "loss": 0.5151, "step": 9610 }, { "epoch": 0.26389346512905, "grad_norm": 0.40235576033592224, "learning_rate": 1.9168293415196887e-05, "loss": 0.5709, "step": 9611 }, { "epoch": 0.2639209225700165, "grad_norm": 0.3672046363353729, "learning_rate": 1.916812096054869e-05, "loss": 0.4532, "step": 9612 }, { "epoch": 0.263948380010983, "grad_norm": 0.3578701317310333, "learning_rate": 1.916794848879902e-05, "loss": 0.5409, "step": 9613 }, { "epoch": 0.26397583745194947, "grad_norm": 0.44952693581581116, "learning_rate": 1.9167775999948203e-05, "loss": 0.5958, "step": 9614 }, { "epoch": 0.26400329489291596, "grad_norm": 0.352177232503891, "learning_rate": 1.9167603493996555e-05, "loss": 0.6156, "step": 9615 }, { "epoch": 0.26403075233388246, "grad_norm": 0.3794274628162384, "learning_rate": 1.9167430970944402e-05, "loss": 0.5287, "step": 9616 }, { "epoch": 0.264058209774849, "grad_norm": 0.4086071848869324, "learning_rate": 1.9167258430792067e-05, "loss": 0.5552, "step": 9617 }, { "epoch": 0.2640856672158155, "grad_norm": 0.3740593492984772, "learning_rate": 1.9167085873539865e-05, "loss": 0.5087, "step": 9618 }, { "epoch": 0.264113124656782, "grad_norm": 0.37112903594970703, "learning_rate": 1.9166913299188124e-05, "loss": 0.6133, "step": 9619 }, { "epoch": 0.2641405820977485, "grad_norm": 0.45442551374435425, "learning_rate": 1.9166740707737163e-05, "loss": 0.4923, "step": 9620 }, { "epoch": 0.264168039538715, "grad_norm": 0.4267108738422394, "learning_rate": 1.9166568099187307e-05, "loss": 0.5127, "step": 9621 }, { "epoch": 0.2641954969796815, "grad_norm": 0.3346796929836273, "learning_rate": 1.916639547353887e-05, "loss": 0.4951, "step": 9622 }, { "epoch": 0.26422295442064797, "grad_norm": 0.3688947558403015, "learning_rate": 1.9166222830792188e-05, "loss": 0.5071, "step": 9623 }, { "epoch": 0.2642504118616145, "grad_norm": 0.36276134848594666, "learning_rate": 1.916605017094757e-05, "loss": 0.5383, "step": 9624 }, { "epoch": 0.264277869302581, "grad_norm": 0.37570056319236755, "learning_rate": 1.9165877494005345e-05, "loss": 0.5961, "step": 9625 }, { "epoch": 0.2643053267435475, "grad_norm": 0.3653617799282074, "learning_rate": 1.9165704799965833e-05, "loss": 0.5136, "step": 9626 }, { "epoch": 0.264332784184514, "grad_norm": 0.3883560001850128, "learning_rate": 1.9165532088829354e-05, "loss": 0.5519, "step": 9627 }, { "epoch": 0.2643602416254805, "grad_norm": 0.3660111129283905, "learning_rate": 1.9165359360596233e-05, "loss": 0.5241, "step": 9628 }, { "epoch": 0.264387699066447, "grad_norm": 0.38687291741371155, "learning_rate": 1.9165186615266792e-05, "loss": 0.5431, "step": 9629 }, { "epoch": 0.2644151565074135, "grad_norm": 0.38031086325645447, "learning_rate": 1.9165013852841353e-05, "loss": 0.5243, "step": 9630 }, { "epoch": 0.26444261394838003, "grad_norm": 0.37998342514038086, "learning_rate": 1.9164841073320238e-05, "loss": 0.5546, "step": 9631 }, { "epoch": 0.26447007138934653, "grad_norm": 0.36830297112464905, "learning_rate": 1.9164668276703772e-05, "loss": 0.524, "step": 9632 }, { "epoch": 0.264497528830313, "grad_norm": 0.3662182092666626, "learning_rate": 1.916449546299227e-05, "loss": 0.5106, "step": 9633 }, { "epoch": 0.2645249862712795, "grad_norm": 0.39061954617500305, "learning_rate": 1.916432263218606e-05, "loss": 0.5605, "step": 9634 }, { "epoch": 0.264552443712246, "grad_norm": 0.42209678888320923, "learning_rate": 1.9164149784285464e-05, "loss": 0.6107, "step": 9635 }, { "epoch": 0.2645799011532125, "grad_norm": 0.38872209191322327, "learning_rate": 1.9163976919290806e-05, "loss": 0.4836, "step": 9636 }, { "epoch": 0.264607358594179, "grad_norm": 0.3437592387199402, "learning_rate": 1.9163804037202404e-05, "loss": 0.557, "step": 9637 }, { "epoch": 0.26463481603514555, "grad_norm": 0.33125877380371094, "learning_rate": 1.9163631138020582e-05, "loss": 0.5307, "step": 9638 }, { "epoch": 0.26466227347611204, "grad_norm": 0.33722245693206787, "learning_rate": 1.9163458221745665e-05, "loss": 0.492, "step": 9639 }, { "epoch": 0.26468973091707854, "grad_norm": 0.367610365152359, "learning_rate": 1.9163285288377973e-05, "loss": 0.5687, "step": 9640 }, { "epoch": 0.26471718835804503, "grad_norm": 0.6498546600341797, "learning_rate": 1.916311233791783e-05, "loss": 0.5727, "step": 9641 }, { "epoch": 0.2647446457990115, "grad_norm": 0.404351145029068, "learning_rate": 1.9162939370365555e-05, "loss": 0.5695, "step": 9642 }, { "epoch": 0.264772103239978, "grad_norm": 0.35110992193222046, "learning_rate": 1.916276638572148e-05, "loss": 0.5452, "step": 9643 }, { "epoch": 0.2647995606809445, "grad_norm": 0.3296765387058258, "learning_rate": 1.9162593383985916e-05, "loss": 0.4481, "step": 9644 }, { "epoch": 0.26482701812191106, "grad_norm": 0.3731238842010498, "learning_rate": 1.9162420365159193e-05, "loss": 0.478, "step": 9645 }, { "epoch": 0.26485447556287756, "grad_norm": 0.3715347945690155, "learning_rate": 1.916224732924163e-05, "loss": 0.5003, "step": 9646 }, { "epoch": 0.26488193300384405, "grad_norm": 0.33556532859802246, "learning_rate": 1.9162074276233554e-05, "loss": 0.5378, "step": 9647 }, { "epoch": 0.26490939044481054, "grad_norm": 0.38038182258605957, "learning_rate": 1.9161901206135283e-05, "loss": 0.4609, "step": 9648 }, { "epoch": 0.26493684788577704, "grad_norm": 0.36881354451179504, "learning_rate": 1.9161728118947142e-05, "loss": 0.4816, "step": 9649 }, { "epoch": 0.26496430532674353, "grad_norm": 0.3789440989494324, "learning_rate": 1.9161555014669455e-05, "loss": 0.5452, "step": 9650 }, { "epoch": 0.26499176276771, "grad_norm": 0.37304946780204773, "learning_rate": 1.9161381893302543e-05, "loss": 0.5518, "step": 9651 }, { "epoch": 0.2650192202086766, "grad_norm": 0.4110119938850403, "learning_rate": 1.916120875484673e-05, "loss": 0.5955, "step": 9652 }, { "epoch": 0.26504667764964307, "grad_norm": 0.3528750538825989, "learning_rate": 1.916103559930234e-05, "loss": 0.4578, "step": 9653 }, { "epoch": 0.26507413509060956, "grad_norm": 0.3554224967956543, "learning_rate": 1.9160862426669694e-05, "loss": 0.5499, "step": 9654 }, { "epoch": 0.26510159253157606, "grad_norm": 0.4055248498916626, "learning_rate": 1.9160689236949116e-05, "loss": 0.5534, "step": 9655 }, { "epoch": 0.26512904997254255, "grad_norm": 0.3381199538707733, "learning_rate": 1.916051603014093e-05, "loss": 0.5125, "step": 9656 }, { "epoch": 0.26515650741350905, "grad_norm": 0.35418254137039185, "learning_rate": 1.9160342806245456e-05, "loss": 0.5727, "step": 9657 }, { "epoch": 0.26518396485447554, "grad_norm": 0.42246386408805847, "learning_rate": 1.9160169565263018e-05, "loss": 0.5844, "step": 9658 }, { "epoch": 0.2652114222954421, "grad_norm": 0.3594554662704468, "learning_rate": 1.9159996307193946e-05, "loss": 0.5483, "step": 9659 }, { "epoch": 0.2652388797364086, "grad_norm": 0.3888269364833832, "learning_rate": 1.9159823032038552e-05, "loss": 0.506, "step": 9660 }, { "epoch": 0.2652663371773751, "grad_norm": 0.3493714928627014, "learning_rate": 1.9159649739797163e-05, "loss": 0.4455, "step": 9661 }, { "epoch": 0.26529379461834157, "grad_norm": 0.47876495122909546, "learning_rate": 1.915947643047011e-05, "loss": 0.5399, "step": 9662 }, { "epoch": 0.26532125205930807, "grad_norm": 0.40291666984558105, "learning_rate": 1.9159303104057706e-05, "loss": 0.5226, "step": 9663 }, { "epoch": 0.26534870950027456, "grad_norm": 0.3761269748210907, "learning_rate": 1.915912976056028e-05, "loss": 0.496, "step": 9664 }, { "epoch": 0.26537616694124105, "grad_norm": 0.3822683095932007, "learning_rate": 1.915895639997815e-05, "loss": 0.4768, "step": 9665 }, { "epoch": 0.2654036243822076, "grad_norm": 0.3779137432575226, "learning_rate": 1.9158783022311644e-05, "loss": 0.5691, "step": 9666 }, { "epoch": 0.2654310818231741, "grad_norm": 0.4052309989929199, "learning_rate": 1.9158609627561087e-05, "loss": 0.4986, "step": 9667 }, { "epoch": 0.2654585392641406, "grad_norm": 0.43523305654525757, "learning_rate": 1.9158436215726797e-05, "loss": 0.4736, "step": 9668 }, { "epoch": 0.2654859967051071, "grad_norm": 0.598957359790802, "learning_rate": 1.9158262786809107e-05, "loss": 0.633, "step": 9669 }, { "epoch": 0.2655134541460736, "grad_norm": 0.4731094241142273, "learning_rate": 1.915808934080833e-05, "loss": 0.6567, "step": 9670 }, { "epoch": 0.2655409115870401, "grad_norm": 0.41556647419929504, "learning_rate": 1.9157915877724792e-05, "loss": 0.4223, "step": 9671 }, { "epoch": 0.26556836902800657, "grad_norm": 0.3959093987941742, "learning_rate": 1.915774239755882e-05, "loss": 0.4651, "step": 9672 }, { "epoch": 0.2655958264689731, "grad_norm": 0.3465990126132965, "learning_rate": 1.9157568900310734e-05, "loss": 0.4876, "step": 9673 }, { "epoch": 0.2656232839099396, "grad_norm": 0.3708794414997101, "learning_rate": 1.915739538598086e-05, "loss": 0.4798, "step": 9674 }, { "epoch": 0.2656507413509061, "grad_norm": 0.4300672709941864, "learning_rate": 1.915722185456952e-05, "loss": 0.5542, "step": 9675 }, { "epoch": 0.2656781987918726, "grad_norm": 0.3363136053085327, "learning_rate": 1.915704830607704e-05, "loss": 0.5387, "step": 9676 }, { "epoch": 0.2657056562328391, "grad_norm": 0.4030798375606537, "learning_rate": 1.9156874740503743e-05, "loss": 0.5011, "step": 9677 }, { "epoch": 0.2657331136738056, "grad_norm": 0.3293135464191437, "learning_rate": 1.915670115784995e-05, "loss": 0.4442, "step": 9678 }, { "epoch": 0.2657605711147721, "grad_norm": 0.4067402184009552, "learning_rate": 1.9156527558115988e-05, "loss": 0.6114, "step": 9679 }, { "epoch": 0.26578802855573863, "grad_norm": 0.5132409334182739, "learning_rate": 1.9156353941302178e-05, "loss": 0.4927, "step": 9680 }, { "epoch": 0.2658154859967051, "grad_norm": 0.4042612612247467, "learning_rate": 1.9156180307408846e-05, "loss": 0.4884, "step": 9681 }, { "epoch": 0.2658429434376716, "grad_norm": 0.3470262885093689, "learning_rate": 1.9156006656436318e-05, "loss": 0.4649, "step": 9682 }, { "epoch": 0.2658704008786381, "grad_norm": 0.4658263325691223, "learning_rate": 1.9155832988384912e-05, "loss": 0.612, "step": 9683 }, { "epoch": 0.2658978583196046, "grad_norm": 0.3036240339279175, "learning_rate": 1.9155659303254957e-05, "loss": 0.432, "step": 9684 }, { "epoch": 0.2659253157605711, "grad_norm": 0.3453723192214966, "learning_rate": 1.915548560104678e-05, "loss": 0.4734, "step": 9685 }, { "epoch": 0.2659527732015376, "grad_norm": 0.3761102259159088, "learning_rate": 1.915531188176069e-05, "loss": 0.4547, "step": 9686 }, { "epoch": 0.26598023064250415, "grad_norm": 0.38731110095977783, "learning_rate": 1.9155138145397027e-05, "loss": 0.5732, "step": 9687 }, { "epoch": 0.26600768808347064, "grad_norm": 0.38174399733543396, "learning_rate": 1.915496439195611e-05, "loss": 0.5151, "step": 9688 }, { "epoch": 0.26603514552443713, "grad_norm": 0.3666574954986572, "learning_rate": 1.9154790621438262e-05, "loss": 0.4831, "step": 9689 }, { "epoch": 0.2660626029654036, "grad_norm": 0.44414129853248596, "learning_rate": 1.915461683384381e-05, "loss": 0.4982, "step": 9690 }, { "epoch": 0.2660900604063701, "grad_norm": 0.40528759360313416, "learning_rate": 1.915444302917307e-05, "loss": 0.5717, "step": 9691 }, { "epoch": 0.2661175178473366, "grad_norm": 0.3392919600009918, "learning_rate": 1.9154269207426374e-05, "loss": 0.5271, "step": 9692 }, { "epoch": 0.2661449752883031, "grad_norm": 0.38686496019363403, "learning_rate": 1.9154095368604046e-05, "loss": 0.4953, "step": 9693 }, { "epoch": 0.26617243272926966, "grad_norm": 0.41040706634521484, "learning_rate": 1.9153921512706407e-05, "loss": 0.5589, "step": 9694 }, { "epoch": 0.26619989017023615, "grad_norm": 0.3274059593677521, "learning_rate": 1.9153747639733788e-05, "loss": 0.5578, "step": 9695 }, { "epoch": 0.26622734761120265, "grad_norm": 0.3710768222808838, "learning_rate": 1.9153573749686502e-05, "loss": 0.521, "step": 9696 }, { "epoch": 0.26625480505216914, "grad_norm": 0.33454635739326477, "learning_rate": 1.9153399842564878e-05, "loss": 0.4592, "step": 9697 }, { "epoch": 0.26628226249313564, "grad_norm": 0.3518390655517578, "learning_rate": 1.9153225918369245e-05, "loss": 0.5498, "step": 9698 }, { "epoch": 0.26630971993410213, "grad_norm": 0.40558212995529175, "learning_rate": 1.9153051977099926e-05, "loss": 0.5929, "step": 9699 }, { "epoch": 0.2663371773750686, "grad_norm": 0.36725592613220215, "learning_rate": 1.9152878018757244e-05, "loss": 0.4901, "step": 9700 }, { "epoch": 0.2663646348160352, "grad_norm": 12.186029434204102, "learning_rate": 1.9152704043341522e-05, "loss": 0.5255, "step": 9701 }, { "epoch": 0.26639209225700167, "grad_norm": 0.32981520891189575, "learning_rate": 1.9152530050853082e-05, "loss": 0.4988, "step": 9702 }, { "epoch": 0.26641954969796816, "grad_norm": 1.2789078950881958, "learning_rate": 1.9152356041292256e-05, "loss": 0.4802, "step": 9703 }, { "epoch": 0.26644700713893466, "grad_norm": 0.34066396951675415, "learning_rate": 1.9152182014659366e-05, "loss": 0.4901, "step": 9704 }, { "epoch": 0.26647446457990115, "grad_norm": 0.41185519099235535, "learning_rate": 1.9152007970954733e-05, "loss": 0.6412, "step": 9705 }, { "epoch": 0.26650192202086764, "grad_norm": 0.891452431678772, "learning_rate": 1.9151833910178683e-05, "loss": 0.5312, "step": 9706 }, { "epoch": 0.26652937946183414, "grad_norm": 0.41768109798431396, "learning_rate": 1.9151659832331545e-05, "loss": 0.4994, "step": 9707 }, { "epoch": 0.26655683690280063, "grad_norm": 0.35634541511535645, "learning_rate": 1.915148573741364e-05, "loss": 0.5043, "step": 9708 }, { "epoch": 0.2665842943437672, "grad_norm": 2.5922248363494873, "learning_rate": 1.915131162542529e-05, "loss": 0.5656, "step": 9709 }, { "epoch": 0.2666117517847337, "grad_norm": 0.38412413001060486, "learning_rate": 1.9151137496366828e-05, "loss": 0.5962, "step": 9710 }, { "epoch": 0.26663920922570017, "grad_norm": 0.37335050106048584, "learning_rate": 1.9150963350238568e-05, "loss": 0.4948, "step": 9711 }, { "epoch": 0.26666666666666666, "grad_norm": 0.41074469685554504, "learning_rate": 1.915078918704085e-05, "loss": 0.5451, "step": 9712 }, { "epoch": 0.26669412410763316, "grad_norm": 0.36663976311683655, "learning_rate": 1.915061500677398e-05, "loss": 0.5506, "step": 9713 }, { "epoch": 0.26672158154859965, "grad_norm": 0.32534879446029663, "learning_rate": 1.9150440809438296e-05, "loss": 0.4235, "step": 9714 }, { "epoch": 0.26674903898956615, "grad_norm": 0.3794516324996948, "learning_rate": 1.9150266595034122e-05, "loss": 0.5989, "step": 9715 }, { "epoch": 0.2667764964305327, "grad_norm": 0.392097532749176, "learning_rate": 1.9150092363561777e-05, "loss": 0.5068, "step": 9716 }, { "epoch": 0.2668039538714992, "grad_norm": 0.3764788806438446, "learning_rate": 1.914991811502159e-05, "loss": 0.4465, "step": 9717 }, { "epoch": 0.2668314113124657, "grad_norm": 0.3451516926288605, "learning_rate": 1.9149743849413887e-05, "loss": 0.5162, "step": 9718 }, { "epoch": 0.2668588687534322, "grad_norm": 0.35021522641181946, "learning_rate": 1.914956956673899e-05, "loss": 0.5427, "step": 9719 }, { "epoch": 0.26688632619439867, "grad_norm": 0.4173634350299835, "learning_rate": 1.9149395266997226e-05, "loss": 0.5881, "step": 9720 }, { "epoch": 0.26691378363536516, "grad_norm": 0.33796870708465576, "learning_rate": 1.9149220950188917e-05, "loss": 0.4808, "step": 9721 }, { "epoch": 0.26694124107633166, "grad_norm": 0.6692757606506348, "learning_rate": 1.9149046616314395e-05, "loss": 0.602, "step": 9722 }, { "epoch": 0.2669686985172982, "grad_norm": 0.4141981303691864, "learning_rate": 1.9148872265373978e-05, "loss": 0.5389, "step": 9723 }, { "epoch": 0.2669961559582647, "grad_norm": 0.362693727016449, "learning_rate": 1.9148697897367996e-05, "loss": 0.5547, "step": 9724 }, { "epoch": 0.2670236133992312, "grad_norm": 0.3432229459285736, "learning_rate": 1.9148523512296773e-05, "loss": 0.5044, "step": 9725 }, { "epoch": 0.2670510708401977, "grad_norm": 0.36700043082237244, "learning_rate": 1.914834911016063e-05, "loss": 0.48, "step": 9726 }, { "epoch": 0.2670785282811642, "grad_norm": 0.3539049029350281, "learning_rate": 1.91481746909599e-05, "loss": 0.4678, "step": 9727 }, { "epoch": 0.2671059857221307, "grad_norm": 0.33634528517723083, "learning_rate": 1.9148000254694903e-05, "loss": 0.5355, "step": 9728 }, { "epoch": 0.2671334431630972, "grad_norm": 0.6096423268318176, "learning_rate": 1.914782580136597e-05, "loss": 0.5652, "step": 9729 }, { "epoch": 0.2671609006040637, "grad_norm": 0.37629780173301697, "learning_rate": 1.9147651330973417e-05, "loss": 0.5001, "step": 9730 }, { "epoch": 0.2671883580450302, "grad_norm": 0.35613054037094116, "learning_rate": 1.9147476843517574e-05, "loss": 0.5549, "step": 9731 }, { "epoch": 0.2672158154859967, "grad_norm": 0.3602292537689209, "learning_rate": 1.914730233899877e-05, "loss": 0.5752, "step": 9732 }, { "epoch": 0.2672432729269632, "grad_norm": 0.3413795828819275, "learning_rate": 1.9147127817417327e-05, "loss": 0.5173, "step": 9733 }, { "epoch": 0.2672707303679297, "grad_norm": 0.37886103987693787, "learning_rate": 1.914695327877357e-05, "loss": 0.4866, "step": 9734 }, { "epoch": 0.2672981878088962, "grad_norm": 0.36669865250587463, "learning_rate": 1.9146778723067826e-05, "loss": 0.5272, "step": 9735 }, { "epoch": 0.2673256452498627, "grad_norm": 0.3531798720359802, "learning_rate": 1.9146604150300425e-05, "loss": 0.5531, "step": 9736 }, { "epoch": 0.26735310269082924, "grad_norm": 0.4010038673877716, "learning_rate": 1.9146429560471685e-05, "loss": 0.6124, "step": 9737 }, { "epoch": 0.26738056013179573, "grad_norm": 0.39246878027915955, "learning_rate": 1.9146254953581934e-05, "loss": 0.5279, "step": 9738 }, { "epoch": 0.2674080175727622, "grad_norm": 0.3997604548931122, "learning_rate": 1.9146080329631496e-05, "loss": 0.4994, "step": 9739 }, { "epoch": 0.2674354750137287, "grad_norm": 0.36739468574523926, "learning_rate": 1.91459056886207e-05, "loss": 0.6005, "step": 9740 }, { "epoch": 0.2674629324546952, "grad_norm": 0.4121066927909851, "learning_rate": 1.9145731030549873e-05, "loss": 0.5236, "step": 9741 }, { "epoch": 0.2674903898956617, "grad_norm": 0.37619736790657043, "learning_rate": 1.914555635541934e-05, "loss": 0.4976, "step": 9742 }, { "epoch": 0.2675178473366282, "grad_norm": 0.3541381061077118, "learning_rate": 1.914538166322942e-05, "loss": 0.572, "step": 9743 }, { "epoch": 0.26754530477759475, "grad_norm": 0.3646325170993805, "learning_rate": 1.914520695398045e-05, "loss": 0.5426, "step": 9744 }, { "epoch": 0.26757276221856124, "grad_norm": 0.3175213038921356, "learning_rate": 1.9145032227672747e-05, "loss": 0.4039, "step": 9745 }, { "epoch": 0.26760021965952774, "grad_norm": 0.41148555278778076, "learning_rate": 1.9144857484306642e-05, "loss": 0.4795, "step": 9746 }, { "epoch": 0.26762767710049423, "grad_norm": 0.6565341353416443, "learning_rate": 1.914468272388246e-05, "loss": 0.5675, "step": 9747 }, { "epoch": 0.2676551345414607, "grad_norm": 0.43212229013442993, "learning_rate": 1.9144507946400522e-05, "loss": 0.5796, "step": 9748 }, { "epoch": 0.2676825919824272, "grad_norm": 0.37087878584861755, "learning_rate": 1.9144333151861162e-05, "loss": 0.5407, "step": 9749 }, { "epoch": 0.2677100494233937, "grad_norm": 0.3386351466178894, "learning_rate": 1.91441583402647e-05, "loss": 0.5366, "step": 9750 }, { "epoch": 0.26773750686436026, "grad_norm": 0.3601735532283783, "learning_rate": 1.9143983511611463e-05, "loss": 0.537, "step": 9751 }, { "epoch": 0.26776496430532676, "grad_norm": 0.3241231143474579, "learning_rate": 1.914380866590178e-05, "loss": 0.4037, "step": 9752 }, { "epoch": 0.26779242174629325, "grad_norm": 0.41108840703964233, "learning_rate": 1.9143633803135974e-05, "loss": 0.5729, "step": 9753 }, { "epoch": 0.26781987918725975, "grad_norm": 0.4378061890602112, "learning_rate": 1.914345892331438e-05, "loss": 0.5661, "step": 9754 }, { "epoch": 0.26784733662822624, "grad_norm": 0.37385082244873047, "learning_rate": 1.9143284026437307e-05, "loss": 0.558, "step": 9755 }, { "epoch": 0.26787479406919273, "grad_norm": 0.44738465547561646, "learning_rate": 1.9143109112505092e-05, "loss": 0.5883, "step": 9756 }, { "epoch": 0.26790225151015923, "grad_norm": 0.3492004871368408, "learning_rate": 1.914293418151807e-05, "loss": 0.4841, "step": 9757 }, { "epoch": 0.2679297089511258, "grad_norm": 0.3484858274459839, "learning_rate": 1.9142759233476548e-05, "loss": 0.4678, "step": 9758 }, { "epoch": 0.26795716639209227, "grad_norm": 0.3618752062320709, "learning_rate": 1.9142584268380866e-05, "loss": 0.4551, "step": 9759 }, { "epoch": 0.26798462383305877, "grad_norm": 0.4073898494243622, "learning_rate": 1.9142409286231345e-05, "loss": 0.5035, "step": 9760 }, { "epoch": 0.26801208127402526, "grad_norm": 0.3515893518924713, "learning_rate": 1.9142234287028313e-05, "loss": 0.4867, "step": 9761 }, { "epoch": 0.26803953871499175, "grad_norm": 0.32989564538002014, "learning_rate": 1.91420592707721e-05, "loss": 0.4032, "step": 9762 }, { "epoch": 0.26806699615595825, "grad_norm": 0.4137049615383148, "learning_rate": 1.914188423746302e-05, "loss": 0.4766, "step": 9763 }, { "epoch": 0.26809445359692474, "grad_norm": 0.47461962699890137, "learning_rate": 1.9141709187101414e-05, "loss": 0.636, "step": 9764 }, { "epoch": 0.2681219110378913, "grad_norm": 0.37120887637138367, "learning_rate": 1.9141534119687605e-05, "loss": 0.5088, "step": 9765 }, { "epoch": 0.2681493684788578, "grad_norm": 0.47231078147888184, "learning_rate": 1.914135903522191e-05, "loss": 0.5103, "step": 9766 }, { "epoch": 0.2681768259198243, "grad_norm": 0.5834417939186096, "learning_rate": 1.9141183933704668e-05, "loss": 0.4296, "step": 9767 }, { "epoch": 0.2682042833607908, "grad_norm": 0.37957099080085754, "learning_rate": 1.91410088151362e-05, "loss": 0.4987, "step": 9768 }, { "epoch": 0.26823174080175727, "grad_norm": 0.3324994444847107, "learning_rate": 1.9140833679516832e-05, "loss": 0.4339, "step": 9769 }, { "epoch": 0.26825919824272376, "grad_norm": 0.3767721354961395, "learning_rate": 1.914065852684689e-05, "loss": 0.5255, "step": 9770 }, { "epoch": 0.26828665568369026, "grad_norm": 0.3966728448867798, "learning_rate": 1.9140483357126706e-05, "loss": 0.52, "step": 9771 }, { "epoch": 0.2683141131246568, "grad_norm": 0.373540461063385, "learning_rate": 1.9140308170356604e-05, "loss": 0.5236, "step": 9772 }, { "epoch": 0.2683415705656233, "grad_norm": 0.3543161153793335, "learning_rate": 1.914013296653691e-05, "loss": 0.5812, "step": 9773 }, { "epoch": 0.2683690280065898, "grad_norm": 0.5518162250518799, "learning_rate": 1.9139957745667944e-05, "loss": 0.5956, "step": 9774 }, { "epoch": 0.2683964854475563, "grad_norm": 0.3820234537124634, "learning_rate": 1.9139782507750048e-05, "loss": 0.6127, "step": 9775 }, { "epoch": 0.2684239428885228, "grad_norm": 0.5409272909164429, "learning_rate": 1.9139607252783534e-05, "loss": 0.5323, "step": 9776 }, { "epoch": 0.2684514003294893, "grad_norm": 0.3401220142841339, "learning_rate": 1.913943198076874e-05, "loss": 0.5102, "step": 9777 }, { "epoch": 0.26847885777045577, "grad_norm": 0.3793210983276367, "learning_rate": 1.9139256691705985e-05, "loss": 0.5254, "step": 9778 }, { "epoch": 0.2685063152114223, "grad_norm": 0.44602257013320923, "learning_rate": 1.91390813855956e-05, "loss": 0.5057, "step": 9779 }, { "epoch": 0.2685337726523888, "grad_norm": 0.39210301637649536, "learning_rate": 1.9138906062437916e-05, "loss": 0.5318, "step": 9780 }, { "epoch": 0.2685612300933553, "grad_norm": 0.4559965431690216, "learning_rate": 1.913873072223325e-05, "loss": 0.4473, "step": 9781 }, { "epoch": 0.2685886875343218, "grad_norm": 0.31834569573402405, "learning_rate": 1.913855536498194e-05, "loss": 0.5, "step": 9782 }, { "epoch": 0.2686161449752883, "grad_norm": 0.35518959164619446, "learning_rate": 1.9138379990684303e-05, "loss": 0.5259, "step": 9783 }, { "epoch": 0.2686436024162548, "grad_norm": 0.39839065074920654, "learning_rate": 1.913820459934067e-05, "loss": 0.4609, "step": 9784 }, { "epoch": 0.2686710598572213, "grad_norm": 0.35992231965065, "learning_rate": 1.9138029190951372e-05, "loss": 0.5665, "step": 9785 }, { "epoch": 0.26869851729818783, "grad_norm": 0.3677034378051758, "learning_rate": 1.913785376551673e-05, "loss": 0.5876, "step": 9786 }, { "epoch": 0.2687259747391543, "grad_norm": 0.33809253573417664, "learning_rate": 1.9137678323037073e-05, "loss": 0.4672, "step": 9787 }, { "epoch": 0.2687534321801208, "grad_norm": 0.49835261702537537, "learning_rate": 1.9137502863512735e-05, "loss": 0.5511, "step": 9788 }, { "epoch": 0.2687808896210873, "grad_norm": 0.41913217306137085, "learning_rate": 1.9137327386944035e-05, "loss": 0.5076, "step": 9789 }, { "epoch": 0.2688083470620538, "grad_norm": 0.3479680120944977, "learning_rate": 1.9137151893331304e-05, "loss": 0.4966, "step": 9790 }, { "epoch": 0.2688358045030203, "grad_norm": 0.3717440962791443, "learning_rate": 1.9136976382674867e-05, "loss": 0.4866, "step": 9791 }, { "epoch": 0.2688632619439868, "grad_norm": 0.36988821625709534, "learning_rate": 1.9136800854975056e-05, "loss": 0.534, "step": 9792 }, { "epoch": 0.26889071938495335, "grad_norm": 0.4553162157535553, "learning_rate": 1.913662531023219e-05, "loss": 0.5426, "step": 9793 }, { "epoch": 0.26891817682591984, "grad_norm": 0.4279744625091553, "learning_rate": 1.9136449748446606e-05, "loss": 0.4615, "step": 9794 }, { "epoch": 0.26894563426688634, "grad_norm": 0.3745644688606262, "learning_rate": 1.9136274169618626e-05, "loss": 0.5117, "step": 9795 }, { "epoch": 0.26897309170785283, "grad_norm": 0.3508593738079071, "learning_rate": 1.9136098573748577e-05, "loss": 0.514, "step": 9796 }, { "epoch": 0.2690005491488193, "grad_norm": 0.45678842067718506, "learning_rate": 1.9135922960836792e-05, "loss": 0.6062, "step": 9797 }, { "epoch": 0.2690280065897858, "grad_norm": 0.3407766819000244, "learning_rate": 1.913574733088359e-05, "loss": 0.5492, "step": 9798 }, { "epoch": 0.2690554640307523, "grad_norm": 0.38647571206092834, "learning_rate": 1.9135571683889307e-05, "loss": 0.5625, "step": 9799 }, { "epoch": 0.26908292147171886, "grad_norm": 0.42047086358070374, "learning_rate": 1.9135396019854268e-05, "loss": 0.4911, "step": 9800 }, { "epoch": 0.26911037891268536, "grad_norm": 0.34407517313957214, "learning_rate": 1.9135220338778797e-05, "loss": 0.5344, "step": 9801 }, { "epoch": 0.26913783635365185, "grad_norm": 0.3305038511753082, "learning_rate": 1.9135044640663225e-05, "loss": 0.4769, "step": 9802 }, { "epoch": 0.26916529379461834, "grad_norm": 0.5723841190338135, "learning_rate": 1.9134868925507878e-05, "loss": 0.5245, "step": 9803 }, { "epoch": 0.26919275123558484, "grad_norm": 0.3831455707550049, "learning_rate": 1.9134693193313087e-05, "loss": 0.5333, "step": 9804 }, { "epoch": 0.26922020867655133, "grad_norm": 0.3809056580066681, "learning_rate": 1.913451744407918e-05, "loss": 0.5814, "step": 9805 }, { "epoch": 0.2692476661175178, "grad_norm": 0.3578951954841614, "learning_rate": 1.9134341677806477e-05, "loss": 0.4907, "step": 9806 }, { "epoch": 0.2692751235584844, "grad_norm": 0.3252653479576111, "learning_rate": 1.9134165894495315e-05, "loss": 0.4658, "step": 9807 }, { "epoch": 0.26930258099945087, "grad_norm": 0.3932402729988098, "learning_rate": 1.9133990094146017e-05, "loss": 0.5325, "step": 9808 }, { "epoch": 0.26933003844041736, "grad_norm": 0.3751697242259979, "learning_rate": 1.9133814276758913e-05, "loss": 0.4848, "step": 9809 }, { "epoch": 0.26935749588138386, "grad_norm": 0.34531140327453613, "learning_rate": 1.913363844233433e-05, "loss": 0.5235, "step": 9810 }, { "epoch": 0.26938495332235035, "grad_norm": 0.35406339168548584, "learning_rate": 1.9133462590872595e-05, "loss": 0.5223, "step": 9811 }, { "epoch": 0.26941241076331685, "grad_norm": 0.3575029969215393, "learning_rate": 1.913328672237404e-05, "loss": 0.4226, "step": 9812 }, { "epoch": 0.26943986820428334, "grad_norm": 0.4238944947719574, "learning_rate": 1.913311083683899e-05, "loss": 0.5413, "step": 9813 }, { "epoch": 0.2694673256452499, "grad_norm": 0.3711632192134857, "learning_rate": 1.9132934934267767e-05, "loss": 0.55, "step": 9814 }, { "epoch": 0.2694947830862164, "grad_norm": 0.345323383808136, "learning_rate": 1.9132759014660713e-05, "loss": 0.5165, "step": 9815 }, { "epoch": 0.2695222405271829, "grad_norm": 0.3540407121181488, "learning_rate": 1.9132583078018145e-05, "loss": 0.5934, "step": 9816 }, { "epoch": 0.26954969796814937, "grad_norm": 0.32070431113243103, "learning_rate": 1.9132407124340394e-05, "loss": 0.4986, "step": 9817 }, { "epoch": 0.26957715540911587, "grad_norm": 0.3500041365623474, "learning_rate": 1.9132231153627793e-05, "loss": 0.4066, "step": 9818 }, { "epoch": 0.26960461285008236, "grad_norm": 0.35406187176704407, "learning_rate": 1.9132055165880662e-05, "loss": 0.6191, "step": 9819 }, { "epoch": 0.26963207029104885, "grad_norm": 1.061446189880371, "learning_rate": 1.9131879161099336e-05, "loss": 0.5011, "step": 9820 }, { "epoch": 0.2696595277320154, "grad_norm": 0.34080320596694946, "learning_rate": 1.9131703139284143e-05, "loss": 0.4704, "step": 9821 }, { "epoch": 0.2696869851729819, "grad_norm": 0.3683513402938843, "learning_rate": 1.9131527100435406e-05, "loss": 0.5345, "step": 9822 }, { "epoch": 0.2697144426139484, "grad_norm": 0.40094754099845886, "learning_rate": 1.9131351044553456e-05, "loss": 0.6297, "step": 9823 }, { "epoch": 0.2697419000549149, "grad_norm": 0.38259661197662354, "learning_rate": 1.9131174971638625e-05, "loss": 0.5272, "step": 9824 }, { "epoch": 0.2697693574958814, "grad_norm": 0.336723268032074, "learning_rate": 1.9130998881691233e-05, "loss": 0.6052, "step": 9825 }, { "epoch": 0.2697968149368479, "grad_norm": 0.3405775725841522, "learning_rate": 1.913082277471162e-05, "loss": 0.5587, "step": 9826 }, { "epoch": 0.26982427237781437, "grad_norm": 0.43415552377700806, "learning_rate": 1.91306466507001e-05, "loss": 0.526, "step": 9827 }, { "epoch": 0.2698517298187809, "grad_norm": 0.32873955368995667, "learning_rate": 1.9130470509657018e-05, "loss": 0.5046, "step": 9828 }, { "epoch": 0.2698791872597474, "grad_norm": 0.3966353237628937, "learning_rate": 1.913029435158269e-05, "loss": 0.5816, "step": 9829 }, { "epoch": 0.2699066447007139, "grad_norm": 0.39512449502944946, "learning_rate": 1.9130118176477452e-05, "loss": 0.5135, "step": 9830 }, { "epoch": 0.2699341021416804, "grad_norm": 0.39724794030189514, "learning_rate": 1.912994198434163e-05, "loss": 0.5595, "step": 9831 }, { "epoch": 0.2699615595826469, "grad_norm": 0.3550942838191986, "learning_rate": 1.912976577517555e-05, "loss": 0.4725, "step": 9832 }, { "epoch": 0.2699890170236134, "grad_norm": 0.34817275404930115, "learning_rate": 1.9129589548979543e-05, "loss": 0.5192, "step": 9833 }, { "epoch": 0.2700164744645799, "grad_norm": 0.4286346435546875, "learning_rate": 1.9129413305753936e-05, "loss": 0.5685, "step": 9834 }, { "epoch": 0.27004393190554643, "grad_norm": 0.3743230998516083, "learning_rate": 1.9129237045499064e-05, "loss": 0.493, "step": 9835 }, { "epoch": 0.2700713893465129, "grad_norm": 0.35164231061935425, "learning_rate": 1.9129060768215248e-05, "loss": 0.508, "step": 9836 }, { "epoch": 0.2700988467874794, "grad_norm": 0.34301045536994934, "learning_rate": 1.912888447390282e-05, "loss": 0.5341, "step": 9837 }, { "epoch": 0.2701263042284459, "grad_norm": 0.41521045565605164, "learning_rate": 1.912870816256211e-05, "loss": 0.5097, "step": 9838 }, { "epoch": 0.2701537616694124, "grad_norm": 0.33203360438346863, "learning_rate": 1.9128531834193445e-05, "loss": 0.4112, "step": 9839 }, { "epoch": 0.2701812191103789, "grad_norm": 0.41138365864753723, "learning_rate": 1.9128355488797155e-05, "loss": 0.4383, "step": 9840 }, { "epoch": 0.2702086765513454, "grad_norm": 0.3815813362598419, "learning_rate": 1.912817912637357e-05, "loss": 0.5802, "step": 9841 }, { "epoch": 0.2702361339923119, "grad_norm": 0.4031226933002472, "learning_rate": 1.9128002746923014e-05, "loss": 0.4641, "step": 9842 }, { "epoch": 0.27026359143327844, "grad_norm": 0.3417733907699585, "learning_rate": 1.912782635044582e-05, "loss": 0.4844, "step": 9843 }, { "epoch": 0.27029104887424493, "grad_norm": 0.4594157636165619, "learning_rate": 1.9127649936942317e-05, "loss": 0.5427, "step": 9844 }, { "epoch": 0.2703185063152114, "grad_norm": 0.3856273293495178, "learning_rate": 1.9127473506412836e-05, "loss": 0.541, "step": 9845 }, { "epoch": 0.2703459637561779, "grad_norm": 0.3583327531814575, "learning_rate": 1.9127297058857705e-05, "loss": 0.5102, "step": 9846 }, { "epoch": 0.2703734211971444, "grad_norm": 0.3427186906337738, "learning_rate": 1.9127120594277245e-05, "loss": 0.5359, "step": 9847 }, { "epoch": 0.2704008786381109, "grad_norm": 0.39706024527549744, "learning_rate": 1.91269441126718e-05, "loss": 0.4787, "step": 9848 }, { "epoch": 0.2704283360790774, "grad_norm": 0.35180458426475525, "learning_rate": 1.9126767614041686e-05, "loss": 0.5576, "step": 9849 }, { "epoch": 0.27045579352004395, "grad_norm": 0.3995959758758545, "learning_rate": 1.912659109838724e-05, "loss": 0.568, "step": 9850 }, { "epoch": 0.27048325096101045, "grad_norm": 0.33609065413475037, "learning_rate": 1.9126414565708788e-05, "loss": 0.4897, "step": 9851 }, { "epoch": 0.27051070840197694, "grad_norm": 0.35539305210113525, "learning_rate": 1.912623801600666e-05, "loss": 0.5311, "step": 9852 }, { "epoch": 0.27053816584294343, "grad_norm": 0.3461754322052002, "learning_rate": 1.9126061449281183e-05, "loss": 0.4742, "step": 9853 }, { "epoch": 0.27056562328390993, "grad_norm": 0.419450581073761, "learning_rate": 1.9125884865532695e-05, "loss": 0.5289, "step": 9854 }, { "epoch": 0.2705930807248764, "grad_norm": 0.36920684576034546, "learning_rate": 1.9125708264761514e-05, "loss": 0.5316, "step": 9855 }, { "epoch": 0.2706205381658429, "grad_norm": 0.509120762348175, "learning_rate": 1.9125531646967973e-05, "loss": 0.5789, "step": 9856 }, { "epoch": 0.27064799560680947, "grad_norm": 0.40556105971336365, "learning_rate": 1.9125355012152407e-05, "loss": 0.5234, "step": 9857 }, { "epoch": 0.27067545304777596, "grad_norm": 0.4005683958530426, "learning_rate": 1.912517836031514e-05, "loss": 0.5272, "step": 9858 }, { "epoch": 0.27070291048874245, "grad_norm": 0.3622051775455475, "learning_rate": 1.9125001691456502e-05, "loss": 0.5044, "step": 9859 }, { "epoch": 0.27073036792970895, "grad_norm": 0.35783475637435913, "learning_rate": 1.9124825005576823e-05, "loss": 0.6155, "step": 9860 }, { "epoch": 0.27075782537067544, "grad_norm": 0.4288792610168457, "learning_rate": 1.9124648302676437e-05, "loss": 0.525, "step": 9861 }, { "epoch": 0.27078528281164194, "grad_norm": 0.3309309184551239, "learning_rate": 1.9124471582755666e-05, "loss": 0.5416, "step": 9862 }, { "epoch": 0.27081274025260843, "grad_norm": 0.36693888902664185, "learning_rate": 1.9124294845814842e-05, "loss": 0.5579, "step": 9863 }, { "epoch": 0.270840197693575, "grad_norm": 0.5052103996276855, "learning_rate": 1.91241180918543e-05, "loss": 0.537, "step": 9864 }, { "epoch": 0.2708676551345415, "grad_norm": 0.43067431449890137, "learning_rate": 1.9123941320874367e-05, "loss": 0.5768, "step": 9865 }, { "epoch": 0.27089511257550797, "grad_norm": 0.3520979583263397, "learning_rate": 1.9123764532875366e-05, "loss": 0.5159, "step": 9866 }, { "epoch": 0.27092257001647446, "grad_norm": 0.42264533042907715, "learning_rate": 1.9123587727857636e-05, "loss": 0.5545, "step": 9867 }, { "epoch": 0.27095002745744096, "grad_norm": 0.3593469262123108, "learning_rate": 1.91234109058215e-05, "loss": 0.4808, "step": 9868 }, { "epoch": 0.27097748489840745, "grad_norm": 0.3687663674354553, "learning_rate": 1.9123234066767294e-05, "loss": 0.5531, "step": 9869 }, { "epoch": 0.27100494233937394, "grad_norm": 0.3696453273296356, "learning_rate": 1.912305721069534e-05, "loss": 0.4925, "step": 9870 }, { "epoch": 0.2710323997803405, "grad_norm": 0.4145413339138031, "learning_rate": 1.912288033760598e-05, "loss": 0.6008, "step": 9871 }, { "epoch": 0.271059857221307, "grad_norm": 1.381593942642212, "learning_rate": 1.912270344749953e-05, "loss": 0.4836, "step": 9872 }, { "epoch": 0.2710873146622735, "grad_norm": 0.4204270541667938, "learning_rate": 1.9122526540376327e-05, "loss": 0.437, "step": 9873 }, { "epoch": 0.27111477210324, "grad_norm": 0.38020676374435425, "learning_rate": 1.9122349616236705e-05, "loss": 0.5648, "step": 9874 }, { "epoch": 0.27114222954420647, "grad_norm": 0.37495705485343933, "learning_rate": 1.9122172675080985e-05, "loss": 0.5662, "step": 9875 }, { "epoch": 0.27116968698517296, "grad_norm": 0.3831397593021393, "learning_rate": 1.9121995716909503e-05, "loss": 0.4566, "step": 9876 }, { "epoch": 0.27119714442613946, "grad_norm": 0.346337229013443, "learning_rate": 1.9121818741722587e-05, "loss": 0.5676, "step": 9877 }, { "epoch": 0.271224601867106, "grad_norm": 0.3712306618690491, "learning_rate": 1.912164174952057e-05, "loss": 0.5637, "step": 9878 }, { "epoch": 0.2712520593080725, "grad_norm": 0.39312639832496643, "learning_rate": 1.912146474030378e-05, "loss": 0.514, "step": 9879 }, { "epoch": 0.271279516749039, "grad_norm": 0.3729567527770996, "learning_rate": 1.9121287714072543e-05, "loss": 0.5227, "step": 9880 }, { "epoch": 0.2713069741900055, "grad_norm": 0.3381660282611847, "learning_rate": 1.9121110670827193e-05, "loss": 0.4594, "step": 9881 }, { "epoch": 0.271334431630972, "grad_norm": 0.3260592222213745, "learning_rate": 1.9120933610568066e-05, "loss": 0.4101, "step": 9882 }, { "epoch": 0.2713618890719385, "grad_norm": 0.35581591725349426, "learning_rate": 1.9120756533295482e-05, "loss": 0.5291, "step": 9883 }, { "epoch": 0.27138934651290497, "grad_norm": 0.6947402358055115, "learning_rate": 1.9120579439009777e-05, "loss": 0.5759, "step": 9884 }, { "epoch": 0.2714168039538715, "grad_norm": 0.3291179835796356, "learning_rate": 1.912040232771128e-05, "loss": 0.5098, "step": 9885 }, { "epoch": 0.271444261394838, "grad_norm": 0.4356599748134613, "learning_rate": 1.9120225199400323e-05, "loss": 0.5764, "step": 9886 }, { "epoch": 0.2714717188358045, "grad_norm": 0.34648871421813965, "learning_rate": 1.9120048054077237e-05, "loss": 0.5097, "step": 9887 }, { "epoch": 0.271499176276771, "grad_norm": 0.38144561648368835, "learning_rate": 1.9119870891742346e-05, "loss": 0.4987, "step": 9888 }, { "epoch": 0.2715266337177375, "grad_norm": 0.336112380027771, "learning_rate": 1.9119693712395986e-05, "loss": 0.5384, "step": 9889 }, { "epoch": 0.271554091158704, "grad_norm": 0.3938659727573395, "learning_rate": 1.911951651603849e-05, "loss": 0.6183, "step": 9890 }, { "epoch": 0.2715815485996705, "grad_norm": 0.43657591938972473, "learning_rate": 1.9119339302670187e-05, "loss": 0.5843, "step": 9891 }, { "epoch": 0.27160900604063704, "grad_norm": 0.4665045142173767, "learning_rate": 1.91191620722914e-05, "loss": 0.542, "step": 9892 }, { "epoch": 0.27163646348160353, "grad_norm": 0.41740739345550537, "learning_rate": 1.9118984824902464e-05, "loss": 0.471, "step": 9893 }, { "epoch": 0.27166392092257, "grad_norm": 0.3649424612522125, "learning_rate": 1.9118807560503717e-05, "loss": 0.5623, "step": 9894 }, { "epoch": 0.2716913783635365, "grad_norm": 0.3550761938095093, "learning_rate": 1.911863027909548e-05, "loss": 0.4918, "step": 9895 }, { "epoch": 0.271718835804503, "grad_norm": 0.396675169467926, "learning_rate": 1.911845298067809e-05, "loss": 0.5367, "step": 9896 }, { "epoch": 0.2717462932454695, "grad_norm": 0.3661268949508667, "learning_rate": 1.9118275665251872e-05, "loss": 0.5139, "step": 9897 }, { "epoch": 0.271773750686436, "grad_norm": 0.37169986963272095, "learning_rate": 1.9118098332817155e-05, "loss": 0.5154, "step": 9898 }, { "epoch": 0.27180120812740255, "grad_norm": 0.3403613567352295, "learning_rate": 1.9117920983374284e-05, "loss": 0.4897, "step": 9899 }, { "epoch": 0.27182866556836904, "grad_norm": 0.3278340697288513, "learning_rate": 1.9117743616923575e-05, "loss": 0.4479, "step": 9900 }, { "epoch": 0.27185612300933554, "grad_norm": 0.4376673698425293, "learning_rate": 1.9117566233465362e-05, "loss": 0.5406, "step": 9901 }, { "epoch": 0.27188358045030203, "grad_norm": 0.4957289397716522, "learning_rate": 1.9117388832999984e-05, "loss": 0.585, "step": 9902 }, { "epoch": 0.2719110378912685, "grad_norm": 0.3694450259208679, "learning_rate": 1.9117211415527764e-05, "loss": 0.5186, "step": 9903 }, { "epoch": 0.271938495332235, "grad_norm": 0.34558039903640747, "learning_rate": 1.911703398104903e-05, "loss": 0.4704, "step": 9904 }, { "epoch": 0.2719659527732015, "grad_norm": 0.37124475836753845, "learning_rate": 1.9116856529564124e-05, "loss": 0.4675, "step": 9905 }, { "epoch": 0.27199341021416806, "grad_norm": 0.3943414092063904, "learning_rate": 1.9116679061073368e-05, "loss": 0.5972, "step": 9906 }, { "epoch": 0.27202086765513456, "grad_norm": 0.4420850872993469, "learning_rate": 1.9116501575577094e-05, "loss": 0.6081, "step": 9907 }, { "epoch": 0.27204832509610105, "grad_norm": 0.3354905843734741, "learning_rate": 1.911632407307564e-05, "loss": 0.4853, "step": 9908 }, { "epoch": 0.27207578253706755, "grad_norm": 0.33850884437561035, "learning_rate": 1.911614655356933e-05, "loss": 0.5847, "step": 9909 }, { "epoch": 0.27210323997803404, "grad_norm": 0.37864333391189575, "learning_rate": 1.9115969017058495e-05, "loss": 0.5625, "step": 9910 }, { "epoch": 0.27213069741900053, "grad_norm": 0.42382922768592834, "learning_rate": 1.9115791463543472e-05, "loss": 0.6441, "step": 9911 }, { "epoch": 0.272158154859967, "grad_norm": 0.36682820320129395, "learning_rate": 1.9115613893024586e-05, "loss": 0.5443, "step": 9912 }, { "epoch": 0.2721856123009336, "grad_norm": 0.4664061665534973, "learning_rate": 1.911543630550217e-05, "loss": 0.5219, "step": 9913 }, { "epoch": 0.27221306974190007, "grad_norm": 0.37746065855026245, "learning_rate": 1.9115258700976554e-05, "loss": 0.5582, "step": 9914 }, { "epoch": 0.27224052718286657, "grad_norm": 0.34339815378189087, "learning_rate": 1.9115081079448076e-05, "loss": 0.5328, "step": 9915 }, { "epoch": 0.27226798462383306, "grad_norm": 0.38903892040252686, "learning_rate": 1.9114903440917062e-05, "loss": 0.4052, "step": 9916 }, { "epoch": 0.27229544206479955, "grad_norm": 0.3646758496761322, "learning_rate": 1.9114725785383843e-05, "loss": 0.5102, "step": 9917 }, { "epoch": 0.27232289950576605, "grad_norm": 0.3549175262451172, "learning_rate": 1.911454811284875e-05, "loss": 0.4975, "step": 9918 }, { "epoch": 0.27235035694673254, "grad_norm": 0.3610672354698181, "learning_rate": 1.9114370423312118e-05, "loss": 0.491, "step": 9919 }, { "epoch": 0.2723778143876991, "grad_norm": 0.35971638560295105, "learning_rate": 1.9114192716774275e-05, "loss": 0.4716, "step": 9920 }, { "epoch": 0.2724052718286656, "grad_norm": 0.3618254065513611, "learning_rate": 1.9114014993235552e-05, "loss": 0.4952, "step": 9921 }, { "epoch": 0.2724327292696321, "grad_norm": 0.4243094325065613, "learning_rate": 1.9113837252696282e-05, "loss": 0.5846, "step": 9922 }, { "epoch": 0.2724601867105986, "grad_norm": 0.35683801770210266, "learning_rate": 1.91136594951568e-05, "loss": 0.525, "step": 9923 }, { "epoch": 0.27248764415156507, "grad_norm": 0.3508235812187195, "learning_rate": 1.9113481720617434e-05, "loss": 0.5451, "step": 9924 }, { "epoch": 0.27251510159253156, "grad_norm": 0.38714319467544556, "learning_rate": 1.9113303929078514e-05, "loss": 0.5318, "step": 9925 }, { "epoch": 0.27254255903349806, "grad_norm": 0.3994044363498688, "learning_rate": 1.9113126120540374e-05, "loss": 0.5152, "step": 9926 }, { "epoch": 0.2725700164744646, "grad_norm": 0.3863588869571686, "learning_rate": 1.9112948295003347e-05, "loss": 0.4705, "step": 9927 }, { "epoch": 0.2725974739154311, "grad_norm": 0.44262945652008057, "learning_rate": 1.911277045246776e-05, "loss": 0.5849, "step": 9928 }, { "epoch": 0.2726249313563976, "grad_norm": 0.36223524808883667, "learning_rate": 1.9112592592933948e-05, "loss": 0.4574, "step": 9929 }, { "epoch": 0.2726523887973641, "grad_norm": 0.4359566569328308, "learning_rate": 1.9112414716402244e-05, "loss": 0.5759, "step": 9930 }, { "epoch": 0.2726798462383306, "grad_norm": 0.3343963623046875, "learning_rate": 1.9112236822872977e-05, "loss": 0.4844, "step": 9931 }, { "epoch": 0.2727073036792971, "grad_norm": 0.3669431507587433, "learning_rate": 1.9112058912346483e-05, "loss": 0.449, "step": 9932 }, { "epoch": 0.27273476112026357, "grad_norm": 0.3658734858036041, "learning_rate": 1.9111880984823088e-05, "loss": 0.4988, "step": 9933 }, { "epoch": 0.2727622185612301, "grad_norm": 0.3613303601741791, "learning_rate": 1.9111703040303125e-05, "loss": 0.5287, "step": 9934 }, { "epoch": 0.2727896760021966, "grad_norm": 0.3479010760784149, "learning_rate": 1.911152507878693e-05, "loss": 0.4995, "step": 9935 }, { "epoch": 0.2728171334431631, "grad_norm": 0.36884674429893494, "learning_rate": 1.9111347100274833e-05, "loss": 0.5592, "step": 9936 }, { "epoch": 0.2728445908841296, "grad_norm": 0.32190725207328796, "learning_rate": 1.9111169104767167e-05, "loss": 0.4531, "step": 9937 }, { "epoch": 0.2728720483250961, "grad_norm": 0.35047444701194763, "learning_rate": 1.911099109226426e-05, "loss": 0.5092, "step": 9938 }, { "epoch": 0.2728995057660626, "grad_norm": 0.3900201916694641, "learning_rate": 1.9110813062766448e-05, "loss": 0.4392, "step": 9939 }, { "epoch": 0.2729269632070291, "grad_norm": 0.3737871050834656, "learning_rate": 1.9110635016274063e-05, "loss": 0.579, "step": 9940 }, { "epoch": 0.27295442064799563, "grad_norm": 0.33886560797691345, "learning_rate": 1.9110456952787432e-05, "loss": 0.5252, "step": 9941 }, { "epoch": 0.2729818780889621, "grad_norm": 0.46305903792381287, "learning_rate": 1.9110278872306895e-05, "loss": 0.5625, "step": 9942 }, { "epoch": 0.2730093355299286, "grad_norm": 0.5367904305458069, "learning_rate": 1.911010077483278e-05, "loss": 0.5828, "step": 9943 }, { "epoch": 0.2730367929708951, "grad_norm": 0.33565518260002136, "learning_rate": 1.9109922660365417e-05, "loss": 0.5064, "step": 9944 }, { "epoch": 0.2730642504118616, "grad_norm": 0.36408793926239014, "learning_rate": 1.910974452890514e-05, "loss": 0.5084, "step": 9945 }, { "epoch": 0.2730917078528281, "grad_norm": 0.4075987935066223, "learning_rate": 1.9109566380452283e-05, "loss": 0.4868, "step": 9946 }, { "epoch": 0.2731191652937946, "grad_norm": 0.3790868818759918, "learning_rate": 1.910938821500718e-05, "loss": 0.5465, "step": 9947 }, { "epoch": 0.27314662273476115, "grad_norm": 0.3407665491104126, "learning_rate": 1.910921003257016e-05, "loss": 0.5163, "step": 9948 }, { "epoch": 0.27317408017572764, "grad_norm": 0.3683985769748688, "learning_rate": 1.9109031833141554e-05, "loss": 0.5194, "step": 9949 }, { "epoch": 0.27320153761669413, "grad_norm": 0.32936447858810425, "learning_rate": 1.9108853616721698e-05, "loss": 0.386, "step": 9950 }, { "epoch": 0.27322899505766063, "grad_norm": 0.4124986231327057, "learning_rate": 1.910867538331092e-05, "loss": 0.5568, "step": 9951 }, { "epoch": 0.2732564524986271, "grad_norm": 0.35784968733787537, "learning_rate": 1.910849713290956e-05, "loss": 0.5283, "step": 9952 }, { "epoch": 0.2732839099395936, "grad_norm": 0.39179691672325134, "learning_rate": 1.910831886551794e-05, "loss": 0.5587, "step": 9953 }, { "epoch": 0.2733113673805601, "grad_norm": 0.40338844060897827, "learning_rate": 1.9108140581136403e-05, "loss": 0.5416, "step": 9954 }, { "epoch": 0.27333882482152666, "grad_norm": 0.38420209288597107, "learning_rate": 1.9107962279765276e-05, "loss": 0.4498, "step": 9955 }, { "epoch": 0.27336628226249315, "grad_norm": 0.34317854046821594, "learning_rate": 1.910778396140489e-05, "loss": 0.5104, "step": 9956 }, { "epoch": 0.27339373970345965, "grad_norm": 0.37836387753486633, "learning_rate": 1.910760562605558e-05, "loss": 0.6248, "step": 9957 }, { "epoch": 0.27342119714442614, "grad_norm": 0.3767372667789459, "learning_rate": 1.9107427273717684e-05, "loss": 0.4832, "step": 9958 }, { "epoch": 0.27344865458539264, "grad_norm": 0.33637264370918274, "learning_rate": 1.9107248904391525e-05, "loss": 0.4897, "step": 9959 }, { "epoch": 0.27347611202635913, "grad_norm": 0.3900202512741089, "learning_rate": 1.910707051807744e-05, "loss": 0.4915, "step": 9960 }, { "epoch": 0.2735035694673256, "grad_norm": 0.39375507831573486, "learning_rate": 1.9106892114775763e-05, "loss": 0.5079, "step": 9961 }, { "epoch": 0.2735310269082922, "grad_norm": 0.4811682105064392, "learning_rate": 1.9106713694486823e-05, "loss": 0.5258, "step": 9962 }, { "epoch": 0.27355848434925867, "grad_norm": 0.44188427925109863, "learning_rate": 1.9106535257210956e-05, "loss": 0.4965, "step": 9963 }, { "epoch": 0.27358594179022516, "grad_norm": 2.236426830291748, "learning_rate": 1.9106356802948498e-05, "loss": 0.5074, "step": 9964 }, { "epoch": 0.27361339923119166, "grad_norm": 0.34329482913017273, "learning_rate": 1.9106178331699776e-05, "loss": 0.4345, "step": 9965 }, { "epoch": 0.27364085667215815, "grad_norm": 0.36275172233581543, "learning_rate": 1.9105999843465123e-05, "loss": 0.5576, "step": 9966 }, { "epoch": 0.27366831411312464, "grad_norm": 0.4447198212146759, "learning_rate": 1.9105821338244877e-05, "loss": 0.6074, "step": 9967 }, { "epoch": 0.27369577155409114, "grad_norm": 0.347940593957901, "learning_rate": 1.910564281603937e-05, "loss": 0.5254, "step": 9968 }, { "epoch": 0.2737232289950577, "grad_norm": 0.7880483865737915, "learning_rate": 1.9105464276848928e-05, "loss": 0.5391, "step": 9969 }, { "epoch": 0.2737506864360242, "grad_norm": 0.38076260685920715, "learning_rate": 1.910528572067389e-05, "loss": 0.5729, "step": 9970 }, { "epoch": 0.2737781438769907, "grad_norm": 1.6311343908309937, "learning_rate": 1.910510714751459e-05, "loss": 0.4575, "step": 9971 }, { "epoch": 0.27380560131795717, "grad_norm": 0.37366747856140137, "learning_rate": 1.910492855737136e-05, "loss": 0.5605, "step": 9972 }, { "epoch": 0.27383305875892366, "grad_norm": 0.5087101459503174, "learning_rate": 1.910474995024453e-05, "loss": 0.6478, "step": 9973 }, { "epoch": 0.27386051619989016, "grad_norm": 0.3841054141521454, "learning_rate": 1.9104571326134435e-05, "loss": 0.5645, "step": 9974 }, { "epoch": 0.27388797364085665, "grad_norm": 0.4028850495815277, "learning_rate": 1.9104392685041407e-05, "loss": 0.625, "step": 9975 }, { "epoch": 0.27391543108182315, "grad_norm": 0.32923951745033264, "learning_rate": 1.9104214026965783e-05, "loss": 0.5419, "step": 9976 }, { "epoch": 0.2739428885227897, "grad_norm": 0.3664994537830353, "learning_rate": 1.9104035351907896e-05, "loss": 0.5487, "step": 9977 }, { "epoch": 0.2739703459637562, "grad_norm": 0.44592389464378357, "learning_rate": 1.910385665986808e-05, "loss": 0.5639, "step": 9978 }, { "epoch": 0.2739978034047227, "grad_norm": 0.33739885687828064, "learning_rate": 1.910367795084666e-05, "loss": 0.5456, "step": 9979 }, { "epoch": 0.2740252608456892, "grad_norm": 0.3400880694389343, "learning_rate": 1.9103499224843976e-05, "loss": 0.5241, "step": 9980 }, { "epoch": 0.27405271828665567, "grad_norm": 0.3730035424232483, "learning_rate": 1.9103320481860362e-05, "loss": 0.5853, "step": 9981 }, { "epoch": 0.27408017572762217, "grad_norm": 0.3500209450721741, "learning_rate": 1.9103141721896147e-05, "loss": 0.4626, "step": 9982 }, { "epoch": 0.27410763316858866, "grad_norm": 0.33709242939949036, "learning_rate": 1.910296294495167e-05, "loss": 0.5073, "step": 9983 }, { "epoch": 0.2741350906095552, "grad_norm": 0.5570001602172852, "learning_rate": 1.910278415102726e-05, "loss": 0.5044, "step": 9984 }, { "epoch": 0.2741625480505217, "grad_norm": 0.3342267572879791, "learning_rate": 1.9102605340123254e-05, "loss": 0.4663, "step": 9985 }, { "epoch": 0.2741900054914882, "grad_norm": 0.34972089529037476, "learning_rate": 1.9102426512239983e-05, "loss": 0.5377, "step": 9986 }, { "epoch": 0.2742174629324547, "grad_norm": 0.3424040973186493, "learning_rate": 1.910224766737778e-05, "loss": 0.5509, "step": 9987 }, { "epoch": 0.2742449203734212, "grad_norm": 0.3814975619316101, "learning_rate": 1.9102068805536983e-05, "loss": 0.5742, "step": 9988 }, { "epoch": 0.2742723778143877, "grad_norm": 0.344881147146225, "learning_rate": 1.910188992671792e-05, "loss": 0.4989, "step": 9989 }, { "epoch": 0.2742998352553542, "grad_norm": 0.3487935960292816, "learning_rate": 1.9101711030920928e-05, "loss": 0.498, "step": 9990 }, { "epoch": 0.2743272926963207, "grad_norm": 0.3517645001411438, "learning_rate": 1.9101532118146342e-05, "loss": 0.5103, "step": 9991 }, { "epoch": 0.2743547501372872, "grad_norm": 0.3459113538265228, "learning_rate": 1.910135318839449e-05, "loss": 0.5651, "step": 9992 }, { "epoch": 0.2743822075782537, "grad_norm": 0.37721192836761475, "learning_rate": 1.9101174241665715e-05, "loss": 0.5566, "step": 9993 }, { "epoch": 0.2744096650192202, "grad_norm": 0.3718890845775604, "learning_rate": 1.910099527796034e-05, "loss": 0.5301, "step": 9994 }, { "epoch": 0.2744371224601867, "grad_norm": 0.38843467831611633, "learning_rate": 1.9100816297278707e-05, "loss": 0.5005, "step": 9995 }, { "epoch": 0.2744645799011532, "grad_norm": 0.4001559019088745, "learning_rate": 1.9100637299621147e-05, "loss": 0.5637, "step": 9996 }, { "epoch": 0.2744920373421197, "grad_norm": 0.3482390344142914, "learning_rate": 1.9100458284987992e-05, "loss": 0.5526, "step": 9997 }, { "epoch": 0.27451949478308624, "grad_norm": 0.43493956327438354, "learning_rate": 1.910027925337958e-05, "loss": 0.507, "step": 9998 }, { "epoch": 0.27454695222405273, "grad_norm": 0.35914936661720276, "learning_rate": 1.9100100204796243e-05, "loss": 0.5376, "step": 9999 }, { "epoch": 0.2745744096650192, "grad_norm": 0.35638219118118286, "learning_rate": 1.9099921139238312e-05, "loss": 0.561, "step": 10000 }, { "epoch": 0.2746018671059857, "grad_norm": 0.3287396728992462, "learning_rate": 1.9099742056706123e-05, "loss": 0.4849, "step": 10001 }, { "epoch": 0.2746293245469522, "grad_norm": 0.4176933169364929, "learning_rate": 1.9099562957200013e-05, "loss": 0.499, "step": 10002 }, { "epoch": 0.2746567819879187, "grad_norm": 0.41822922229766846, "learning_rate": 1.9099383840720315e-05, "loss": 0.6528, "step": 10003 }, { "epoch": 0.2746842394288852, "grad_norm": 0.37375423312187195, "learning_rate": 1.909920470726736e-05, "loss": 0.5349, "step": 10004 }, { "epoch": 0.27471169686985175, "grad_norm": 0.43775495886802673, "learning_rate": 1.9099025556841485e-05, "loss": 0.5967, "step": 10005 }, { "epoch": 0.27473915431081825, "grad_norm": 0.35610273480415344, "learning_rate": 1.9098846389443018e-05, "loss": 0.5, "step": 10006 }, { "epoch": 0.27476661175178474, "grad_norm": 0.45750105381011963, "learning_rate": 1.9098667205072305e-05, "loss": 0.5583, "step": 10007 }, { "epoch": 0.27479406919275123, "grad_norm": 0.3684680461883545, "learning_rate": 1.9098488003729673e-05, "loss": 0.55, "step": 10008 }, { "epoch": 0.2748215266337177, "grad_norm": 0.5242764949798584, "learning_rate": 1.9098308785415456e-05, "loss": 0.5899, "step": 10009 }, { "epoch": 0.2748489840746842, "grad_norm": 0.4170685112476349, "learning_rate": 1.909812955012999e-05, "loss": 0.5977, "step": 10010 }, { "epoch": 0.2748764415156507, "grad_norm": 0.3485099673271179, "learning_rate": 1.9097950297873606e-05, "loss": 0.4667, "step": 10011 }, { "epoch": 0.27490389895661727, "grad_norm": 0.9485474228858948, "learning_rate": 1.9097771028646647e-05, "loss": 0.5536, "step": 10012 }, { "epoch": 0.27493135639758376, "grad_norm": 0.4386463761329651, "learning_rate": 1.9097591742449434e-05, "loss": 0.5065, "step": 10013 }, { "epoch": 0.27495881383855025, "grad_norm": 0.37624701857566833, "learning_rate": 1.9097412439282312e-05, "loss": 0.5391, "step": 10014 }, { "epoch": 0.27498627127951675, "grad_norm": 0.37362465262413025, "learning_rate": 1.9097233119145614e-05, "loss": 0.5316, "step": 10015 }, { "epoch": 0.27501372872048324, "grad_norm": 0.3448387086391449, "learning_rate": 1.909705378203967e-05, "loss": 0.5078, "step": 10016 }, { "epoch": 0.27504118616144974, "grad_norm": 0.48027241230010986, "learning_rate": 1.909687442796482e-05, "loss": 0.4422, "step": 10017 }, { "epoch": 0.27506864360241623, "grad_norm": 0.3532547056674957, "learning_rate": 1.909669505692139e-05, "loss": 0.5375, "step": 10018 }, { "epoch": 0.2750961010433828, "grad_norm": 0.31640633940696716, "learning_rate": 1.9096515668909725e-05, "loss": 0.5288, "step": 10019 }, { "epoch": 0.2751235584843493, "grad_norm": 0.3431938588619232, "learning_rate": 1.9096336263930154e-05, "loss": 0.4186, "step": 10020 }, { "epoch": 0.27515101592531577, "grad_norm": 0.35133492946624756, "learning_rate": 1.9096156841983013e-05, "loss": 0.4638, "step": 10021 }, { "epoch": 0.27517847336628226, "grad_norm": 0.34118378162384033, "learning_rate": 1.9095977403068636e-05, "loss": 0.5211, "step": 10022 }, { "epoch": 0.27520593080724876, "grad_norm": 0.3888697028160095, "learning_rate": 1.909579794718736e-05, "loss": 0.564, "step": 10023 }, { "epoch": 0.27523338824821525, "grad_norm": 0.3812296688556671, "learning_rate": 1.9095618474339518e-05, "loss": 0.5277, "step": 10024 }, { "epoch": 0.27526084568918174, "grad_norm": 0.4377080500125885, "learning_rate": 1.909543898452544e-05, "loss": 0.5405, "step": 10025 }, { "epoch": 0.2752883031301483, "grad_norm": 0.3324609100818634, "learning_rate": 1.909525947774547e-05, "loss": 0.3646, "step": 10026 }, { "epoch": 0.2753157605711148, "grad_norm": 0.4445863664150238, "learning_rate": 1.9095079953999936e-05, "loss": 0.5441, "step": 10027 }, { "epoch": 0.2753432180120813, "grad_norm": 0.3771135210990906, "learning_rate": 1.9094900413289173e-05, "loss": 0.5223, "step": 10028 }, { "epoch": 0.2753706754530478, "grad_norm": 0.4525003433227539, "learning_rate": 1.909472085561352e-05, "loss": 0.5974, "step": 10029 }, { "epoch": 0.27539813289401427, "grad_norm": 0.394182026386261, "learning_rate": 1.909454128097331e-05, "loss": 0.505, "step": 10030 }, { "epoch": 0.27542559033498076, "grad_norm": 0.37022989988327026, "learning_rate": 1.9094361689368882e-05, "loss": 0.5132, "step": 10031 }, { "epoch": 0.27545304777594726, "grad_norm": 0.3969801962375641, "learning_rate": 1.909418208080056e-05, "loss": 0.5498, "step": 10032 }, { "epoch": 0.2754805052169138, "grad_norm": 0.36887598037719727, "learning_rate": 1.909400245526869e-05, "loss": 0.5229, "step": 10033 }, { "epoch": 0.2755079626578803, "grad_norm": 0.3238699734210968, "learning_rate": 1.9093822812773602e-05, "loss": 0.5168, "step": 10034 }, { "epoch": 0.2755354200988468, "grad_norm": 0.37131747603416443, "learning_rate": 1.909364315331563e-05, "loss": 0.4795, "step": 10035 }, { "epoch": 0.2755628775398133, "grad_norm": 0.3810587227344513, "learning_rate": 1.9093463476895113e-05, "loss": 0.526, "step": 10036 }, { "epoch": 0.2755903349807798, "grad_norm": 0.37247276306152344, "learning_rate": 1.9093283783512384e-05, "loss": 0.4977, "step": 10037 }, { "epoch": 0.2756177924217463, "grad_norm": 0.3957775831222534, "learning_rate": 1.909310407316778e-05, "loss": 0.5182, "step": 10038 }, { "epoch": 0.27564524986271277, "grad_norm": 0.3680056631565094, "learning_rate": 1.9092924345861635e-05, "loss": 0.4943, "step": 10039 }, { "epoch": 0.2756727073036793, "grad_norm": 0.3110438287258148, "learning_rate": 1.909274460159428e-05, "loss": 0.4951, "step": 10040 }, { "epoch": 0.2757001647446458, "grad_norm": 0.3851597309112549, "learning_rate": 1.909256484036606e-05, "loss": 0.5345, "step": 10041 }, { "epoch": 0.2757276221856123, "grad_norm": 0.4439169466495514, "learning_rate": 1.90923850621773e-05, "loss": 0.526, "step": 10042 }, { "epoch": 0.2757550796265788, "grad_norm": 0.3456805944442749, "learning_rate": 1.909220526702834e-05, "loss": 0.5797, "step": 10043 }, { "epoch": 0.2757825370675453, "grad_norm": 0.36618614196777344, "learning_rate": 1.9092025454919517e-05, "loss": 0.5618, "step": 10044 }, { "epoch": 0.2758099945085118, "grad_norm": 0.40008407831192017, "learning_rate": 1.9091845625851163e-05, "loss": 0.5406, "step": 10045 }, { "epoch": 0.2758374519494783, "grad_norm": 0.3378714919090271, "learning_rate": 1.9091665779823617e-05, "loss": 0.4501, "step": 10046 }, { "epoch": 0.27586490939044483, "grad_norm": 0.36479124426841736, "learning_rate": 1.9091485916837214e-05, "loss": 0.5154, "step": 10047 }, { "epoch": 0.27589236683141133, "grad_norm": 0.40910428762435913, "learning_rate": 1.9091306036892284e-05, "loss": 0.6026, "step": 10048 }, { "epoch": 0.2759198242723778, "grad_norm": 0.3771435618400574, "learning_rate": 1.909112613998917e-05, "loss": 0.5912, "step": 10049 }, { "epoch": 0.2759472817133443, "grad_norm": 0.35349783301353455, "learning_rate": 1.90909462261282e-05, "loss": 0.502, "step": 10050 }, { "epoch": 0.2759747391543108, "grad_norm": 0.3553089499473572, "learning_rate": 1.909076629530972e-05, "loss": 0.5741, "step": 10051 }, { "epoch": 0.2760021965952773, "grad_norm": 0.3832581341266632, "learning_rate": 1.9090586347534055e-05, "loss": 0.549, "step": 10052 }, { "epoch": 0.2760296540362438, "grad_norm": 0.3852662742137909, "learning_rate": 1.9090406382801547e-05, "loss": 0.4671, "step": 10053 }, { "epoch": 0.27605711147721035, "grad_norm": 0.35346144437789917, "learning_rate": 1.9090226401112527e-05, "loss": 0.6014, "step": 10054 }, { "epoch": 0.27608456891817684, "grad_norm": 0.3751943111419678, "learning_rate": 1.9090046402467337e-05, "loss": 0.5021, "step": 10055 }, { "epoch": 0.27611202635914334, "grad_norm": 0.34393438696861267, "learning_rate": 1.908986638686631e-05, "loss": 0.5256, "step": 10056 }, { "epoch": 0.27613948380010983, "grad_norm": 0.3526577055454254, "learning_rate": 1.9089686354309775e-05, "loss": 0.5283, "step": 10057 }, { "epoch": 0.2761669412410763, "grad_norm": 0.3358345329761505, "learning_rate": 1.908950630479808e-05, "loss": 0.5004, "step": 10058 }, { "epoch": 0.2761943986820428, "grad_norm": 0.3643590211868286, "learning_rate": 1.9089326238331552e-05, "loss": 0.485, "step": 10059 }, { "epoch": 0.2762218561230093, "grad_norm": 0.36023953557014465, "learning_rate": 1.908914615491053e-05, "loss": 0.5774, "step": 10060 }, { "epoch": 0.27624931356397586, "grad_norm": 0.34720706939697266, "learning_rate": 1.908896605453535e-05, "loss": 0.5323, "step": 10061 }, { "epoch": 0.27627677100494236, "grad_norm": 0.5511215329170227, "learning_rate": 1.9088785937206345e-05, "loss": 0.3961, "step": 10062 }, { "epoch": 0.27630422844590885, "grad_norm": 0.43661054968833923, "learning_rate": 1.9088605802923855e-05, "loss": 0.5243, "step": 10063 }, { "epoch": 0.27633168588687534, "grad_norm": 0.3310297727584839, "learning_rate": 1.9088425651688217e-05, "loss": 0.4851, "step": 10064 }, { "epoch": 0.27635914332784184, "grad_norm": 0.3422190248966217, "learning_rate": 1.908824548349976e-05, "loss": 0.5268, "step": 10065 }, { "epoch": 0.27638660076880833, "grad_norm": 0.39735254645347595, "learning_rate": 1.9088065298358826e-05, "loss": 0.5052, "step": 10066 }, { "epoch": 0.2764140582097748, "grad_norm": 0.4234541058540344, "learning_rate": 1.908788509626575e-05, "loss": 0.5657, "step": 10067 }, { "epoch": 0.2764415156507414, "grad_norm": 0.3662709593772888, "learning_rate": 1.908770487722087e-05, "loss": 0.5877, "step": 10068 }, { "epoch": 0.27646897309170787, "grad_norm": 0.3970843255519867, "learning_rate": 1.9087524641224516e-05, "loss": 0.6228, "step": 10069 }, { "epoch": 0.27649643053267436, "grad_norm": 0.3679914176464081, "learning_rate": 1.9087344388277033e-05, "loss": 0.5827, "step": 10070 }, { "epoch": 0.27652388797364086, "grad_norm": 0.6151641607284546, "learning_rate": 1.9087164118378747e-05, "loss": 0.4618, "step": 10071 }, { "epoch": 0.27655134541460735, "grad_norm": 0.36288532614707947, "learning_rate": 1.9086983831530004e-05, "loss": 0.5853, "step": 10072 }, { "epoch": 0.27657880285557385, "grad_norm": 0.3736637234687805, "learning_rate": 1.9086803527731135e-05, "loss": 0.6437, "step": 10073 }, { "epoch": 0.27660626029654034, "grad_norm": 0.3885047435760498, "learning_rate": 1.9086623206982476e-05, "loss": 0.5996, "step": 10074 }, { "epoch": 0.2766337177375069, "grad_norm": 0.3286890387535095, "learning_rate": 1.9086442869284366e-05, "loss": 0.478, "step": 10075 }, { "epoch": 0.2766611751784734, "grad_norm": 0.3712425231933594, "learning_rate": 1.9086262514637137e-05, "loss": 0.5561, "step": 10076 }, { "epoch": 0.2766886326194399, "grad_norm": 0.4538847804069519, "learning_rate": 1.908608214304113e-05, "loss": 0.607, "step": 10077 }, { "epoch": 0.27671609006040637, "grad_norm": 0.40046167373657227, "learning_rate": 1.908590175449668e-05, "loss": 0.5749, "step": 10078 }, { "epoch": 0.27674354750137287, "grad_norm": 0.3507947027683258, "learning_rate": 1.9085721349004124e-05, "loss": 0.5634, "step": 10079 }, { "epoch": 0.27677100494233936, "grad_norm": 0.34595856070518494, "learning_rate": 1.9085540926563796e-05, "loss": 0.5347, "step": 10080 }, { "epoch": 0.27679846238330585, "grad_norm": 0.3872843086719513, "learning_rate": 1.9085360487176037e-05, "loss": 0.5467, "step": 10081 }, { "epoch": 0.2768259198242724, "grad_norm": 0.4008041024208069, "learning_rate": 1.908518003084118e-05, "loss": 0.5371, "step": 10082 }, { "epoch": 0.2768533772652389, "grad_norm": 0.434414267539978, "learning_rate": 1.908499955755956e-05, "loss": 0.5672, "step": 10083 }, { "epoch": 0.2768808347062054, "grad_norm": 0.4168112576007843, "learning_rate": 1.9084819067331522e-05, "loss": 0.5745, "step": 10084 }, { "epoch": 0.2769082921471719, "grad_norm": 0.322084903717041, "learning_rate": 1.9084638560157393e-05, "loss": 0.5764, "step": 10085 }, { "epoch": 0.2769357495881384, "grad_norm": 0.4214191138744354, "learning_rate": 1.9084458036037517e-05, "loss": 0.6041, "step": 10086 }, { "epoch": 0.2769632070291049, "grad_norm": 0.35040736198425293, "learning_rate": 1.9084277494972224e-05, "loss": 0.5666, "step": 10087 }, { "epoch": 0.27699066447007137, "grad_norm": 0.401603102684021, "learning_rate": 1.9084096936961854e-05, "loss": 0.6, "step": 10088 }, { "epoch": 0.2770181219110379, "grad_norm": 0.35348135232925415, "learning_rate": 1.9083916362006746e-05, "loss": 0.5161, "step": 10089 }, { "epoch": 0.2770455793520044, "grad_norm": 0.37455788254737854, "learning_rate": 1.9083735770107232e-05, "loss": 0.5396, "step": 10090 }, { "epoch": 0.2770730367929709, "grad_norm": 0.3767058253288269, "learning_rate": 1.908355516126365e-05, "loss": 0.6047, "step": 10091 }, { "epoch": 0.2771004942339374, "grad_norm": 0.3493451774120331, "learning_rate": 1.9083374535476343e-05, "loss": 0.4437, "step": 10092 }, { "epoch": 0.2771279516749039, "grad_norm": 0.412237286567688, "learning_rate": 1.9083193892745643e-05, "loss": 0.5136, "step": 10093 }, { "epoch": 0.2771554091158704, "grad_norm": 0.36062684655189514, "learning_rate": 1.9083013233071885e-05, "loss": 0.5589, "step": 10094 }, { "epoch": 0.2771828665568369, "grad_norm": 0.3837268352508545, "learning_rate": 1.908283255645541e-05, "loss": 0.4594, "step": 10095 }, { "epoch": 0.27721032399780343, "grad_norm": 0.36245501041412354, "learning_rate": 1.908265186289655e-05, "loss": 0.5059, "step": 10096 }, { "epoch": 0.2772377814387699, "grad_norm": 0.3449246883392334, "learning_rate": 1.908247115239565e-05, "loss": 0.5351, "step": 10097 }, { "epoch": 0.2772652388797364, "grad_norm": 0.3645075857639313, "learning_rate": 1.9082290424953043e-05, "loss": 0.453, "step": 10098 }, { "epoch": 0.2772926963207029, "grad_norm": 0.3478483259677887, "learning_rate": 1.908210968056906e-05, "loss": 0.5082, "step": 10099 }, { "epoch": 0.2773201537616694, "grad_norm": 0.4178157150745392, "learning_rate": 1.908192891924405e-05, "loss": 0.5687, "step": 10100 }, { "epoch": 0.2773476112026359, "grad_norm": 0.3862474858760834, "learning_rate": 1.908174814097834e-05, "loss": 0.5234, "step": 10101 }, { "epoch": 0.2773750686436024, "grad_norm": 0.37392503023147583, "learning_rate": 1.9081567345772273e-05, "loss": 0.526, "step": 10102 }, { "epoch": 0.27740252608456895, "grad_norm": 0.3900866210460663, "learning_rate": 1.9081386533626184e-05, "loss": 0.4953, "step": 10103 }, { "epoch": 0.27742998352553544, "grad_norm": 0.3395434021949768, "learning_rate": 1.908120570454041e-05, "loss": 0.5767, "step": 10104 }, { "epoch": 0.27745744096650193, "grad_norm": 0.38716092705726624, "learning_rate": 1.908102485851529e-05, "loss": 0.4954, "step": 10105 }, { "epoch": 0.2774848984074684, "grad_norm": 0.3187010884284973, "learning_rate": 1.908084399555116e-05, "loss": 0.3911, "step": 10106 }, { "epoch": 0.2775123558484349, "grad_norm": 0.3850921392440796, "learning_rate": 1.9080663115648357e-05, "loss": 0.493, "step": 10107 }, { "epoch": 0.2775398132894014, "grad_norm": 0.4332759380340576, "learning_rate": 1.908048221880722e-05, "loss": 0.5413, "step": 10108 }, { "epoch": 0.2775672707303679, "grad_norm": 0.3489910960197449, "learning_rate": 1.9080301305028084e-05, "loss": 0.6139, "step": 10109 }, { "epoch": 0.2775947281713344, "grad_norm": 0.3139532804489136, "learning_rate": 1.9080120374311288e-05, "loss": 0.4808, "step": 10110 }, { "epoch": 0.27762218561230095, "grad_norm": 0.3745000660419464, "learning_rate": 1.907993942665717e-05, "loss": 0.5263, "step": 10111 }, { "epoch": 0.27764964305326745, "grad_norm": 0.37539970874786377, "learning_rate": 1.907975846206607e-05, "loss": 0.4593, "step": 10112 }, { "epoch": 0.27767710049423394, "grad_norm": 0.4050968587398529, "learning_rate": 1.9079577480538318e-05, "loss": 0.6491, "step": 10113 }, { "epoch": 0.27770455793520044, "grad_norm": 0.33210405707359314, "learning_rate": 1.9079396482074258e-05, "loss": 0.4967, "step": 10114 }, { "epoch": 0.27773201537616693, "grad_norm": 0.3818451166152954, "learning_rate": 1.9079215466674225e-05, "loss": 0.5637, "step": 10115 }, { "epoch": 0.2777594728171334, "grad_norm": 0.32986488938331604, "learning_rate": 1.907903443433856e-05, "loss": 0.528, "step": 10116 }, { "epoch": 0.2777869302580999, "grad_norm": 0.5922122597694397, "learning_rate": 1.9078853385067594e-05, "loss": 0.5113, "step": 10117 }, { "epoch": 0.27781438769906647, "grad_norm": 0.4134388267993927, "learning_rate": 1.9078672318861674e-05, "loss": 0.5446, "step": 10118 }, { "epoch": 0.27784184514003296, "grad_norm": 0.33276763558387756, "learning_rate": 1.9078491235721127e-05, "loss": 0.5273, "step": 10119 }, { "epoch": 0.27786930258099946, "grad_norm": 0.43442440032958984, "learning_rate": 1.90783101356463e-05, "loss": 0.5171, "step": 10120 }, { "epoch": 0.27789676002196595, "grad_norm": 0.38945823907852173, "learning_rate": 1.907812901863753e-05, "loss": 0.4784, "step": 10121 }, { "epoch": 0.27792421746293244, "grad_norm": 0.3631054162979126, "learning_rate": 1.9077947884695143e-05, "loss": 0.5169, "step": 10122 }, { "epoch": 0.27795167490389894, "grad_norm": 0.38212332129478455, "learning_rate": 1.9077766733819492e-05, "loss": 0.5769, "step": 10123 }, { "epoch": 0.27797913234486543, "grad_norm": 0.32767829298973083, "learning_rate": 1.907758556601091e-05, "loss": 0.509, "step": 10124 }, { "epoch": 0.278006589785832, "grad_norm": 0.32863789796829224, "learning_rate": 1.9077404381269732e-05, "loss": 0.4128, "step": 10125 }, { "epoch": 0.2780340472267985, "grad_norm": 0.3218472898006439, "learning_rate": 1.9077223179596295e-05, "loss": 0.5674, "step": 10126 }, { "epoch": 0.27806150466776497, "grad_norm": 0.3329183757305145, "learning_rate": 1.9077041960990943e-05, "loss": 0.4593, "step": 10127 }, { "epoch": 0.27808896210873146, "grad_norm": 0.38333770632743835, "learning_rate": 1.907686072545401e-05, "loss": 0.5353, "step": 10128 }, { "epoch": 0.27811641954969796, "grad_norm": 0.36337265372276306, "learning_rate": 1.9076679472985833e-05, "loss": 0.542, "step": 10129 }, { "epoch": 0.27814387699066445, "grad_norm": 0.41443076729774475, "learning_rate": 1.907649820358675e-05, "loss": 0.5857, "step": 10130 }, { "epoch": 0.27817133443163095, "grad_norm": 0.35546642541885376, "learning_rate": 1.9076316917257108e-05, "loss": 0.5417, "step": 10131 }, { "epoch": 0.2781987918725975, "grad_norm": 0.3215763568878174, "learning_rate": 1.9076135613997234e-05, "loss": 0.4407, "step": 10132 }, { "epoch": 0.278226249313564, "grad_norm": 0.3689330816268921, "learning_rate": 1.9075954293807468e-05, "loss": 0.4773, "step": 10133 }, { "epoch": 0.2782537067545305, "grad_norm": 0.38261327147483826, "learning_rate": 1.9075772956688153e-05, "loss": 0.5576, "step": 10134 }, { "epoch": 0.278281164195497, "grad_norm": 0.3439863324165344, "learning_rate": 1.9075591602639628e-05, "loss": 0.6457, "step": 10135 }, { "epoch": 0.27830862163646347, "grad_norm": 0.4377201497554779, "learning_rate": 1.907541023166222e-05, "loss": 0.5721, "step": 10136 }, { "epoch": 0.27833607907742997, "grad_norm": 0.42143985629081726, "learning_rate": 1.907522884375628e-05, "loss": 0.5791, "step": 10137 }, { "epoch": 0.27836353651839646, "grad_norm": 0.3778749108314514, "learning_rate": 1.9075047438922144e-05, "loss": 0.5706, "step": 10138 }, { "epoch": 0.278390993959363, "grad_norm": 0.35900846123695374, "learning_rate": 1.9074866017160145e-05, "loss": 0.5332, "step": 10139 }, { "epoch": 0.2784184514003295, "grad_norm": 0.3700673282146454, "learning_rate": 1.907468457847063e-05, "loss": 0.4912, "step": 10140 }, { "epoch": 0.278445908841296, "grad_norm": 0.351886123418808, "learning_rate": 1.9074503122853923e-05, "loss": 0.528, "step": 10141 }, { "epoch": 0.2784733662822625, "grad_norm": 0.3374916911125183, "learning_rate": 1.9074321650310377e-05, "loss": 0.4631, "step": 10142 }, { "epoch": 0.278500823723229, "grad_norm": 0.5390721559524536, "learning_rate": 1.9074140160840325e-05, "loss": 0.5108, "step": 10143 }, { "epoch": 0.2785282811641955, "grad_norm": 0.44561368227005005, "learning_rate": 1.9073958654444102e-05, "loss": 0.5725, "step": 10144 }, { "epoch": 0.278555738605162, "grad_norm": 0.38934701681137085, "learning_rate": 1.9073777131122053e-05, "loss": 0.498, "step": 10145 }, { "epoch": 0.2785831960461285, "grad_norm": 0.38667458295822144, "learning_rate": 1.9073595590874513e-05, "loss": 0.5318, "step": 10146 }, { "epoch": 0.278610653487095, "grad_norm": 0.42917266488075256, "learning_rate": 1.9073414033701822e-05, "loss": 0.6572, "step": 10147 }, { "epoch": 0.2786381109280615, "grad_norm": 0.3867585361003876, "learning_rate": 1.907323245960432e-05, "loss": 0.5475, "step": 10148 }, { "epoch": 0.278665568369028, "grad_norm": 0.39309927821159363, "learning_rate": 1.9073050868582338e-05, "loss": 0.4587, "step": 10149 }, { "epoch": 0.2786930258099945, "grad_norm": 0.41154101490974426, "learning_rate": 1.9072869260636226e-05, "loss": 0.5833, "step": 10150 }, { "epoch": 0.278720483250961, "grad_norm": 0.4631204605102539, "learning_rate": 1.907268763576631e-05, "loss": 0.5148, "step": 10151 }, { "epoch": 0.2787479406919275, "grad_norm": 0.3848029673099518, "learning_rate": 1.9072505993972944e-05, "loss": 0.5737, "step": 10152 }, { "epoch": 0.27877539813289404, "grad_norm": 0.4893612861633301, "learning_rate": 1.9072324335256453e-05, "loss": 0.5169, "step": 10153 }, { "epoch": 0.27880285557386053, "grad_norm": 0.3600632846355438, "learning_rate": 1.9072142659617182e-05, "loss": 0.5295, "step": 10154 }, { "epoch": 0.278830313014827, "grad_norm": 0.3742150068283081, "learning_rate": 1.9071960967055473e-05, "loss": 0.484, "step": 10155 }, { "epoch": 0.2788577704557935, "grad_norm": 0.30332812666893005, "learning_rate": 1.907177925757166e-05, "loss": 0.3949, "step": 10156 }, { "epoch": 0.27888522789676, "grad_norm": 0.41561776399612427, "learning_rate": 1.907159753116608e-05, "loss": 0.4925, "step": 10157 }, { "epoch": 0.2789126853377265, "grad_norm": 0.3246718943119049, "learning_rate": 1.907141578783908e-05, "loss": 0.4219, "step": 10158 }, { "epoch": 0.278940142778693, "grad_norm": 0.7736389636993408, "learning_rate": 1.907123402759099e-05, "loss": 0.5496, "step": 10159 }, { "epoch": 0.27896760021965955, "grad_norm": 0.3723120391368866, "learning_rate": 1.9071052250422157e-05, "loss": 0.5017, "step": 10160 }, { "epoch": 0.27899505766062604, "grad_norm": 0.3314211368560791, "learning_rate": 1.9070870456332914e-05, "loss": 0.4901, "step": 10161 }, { "epoch": 0.27902251510159254, "grad_norm": 0.5713589787483215, "learning_rate": 1.9070688645323602e-05, "loss": 0.5831, "step": 10162 }, { "epoch": 0.27904997254255903, "grad_norm": 0.4533842206001282, "learning_rate": 1.9070506817394566e-05, "loss": 0.5662, "step": 10163 }, { "epoch": 0.2790774299835255, "grad_norm": 0.36232590675354004, "learning_rate": 1.9070324972546132e-05, "loss": 0.4933, "step": 10164 }, { "epoch": 0.279104887424492, "grad_norm": 0.359088271856308, "learning_rate": 1.907014311077865e-05, "loss": 0.5385, "step": 10165 }, { "epoch": 0.2791323448654585, "grad_norm": 0.35530003905296326, "learning_rate": 1.906996123209246e-05, "loss": 0.5996, "step": 10166 }, { "epoch": 0.27915980230642506, "grad_norm": 0.32721996307373047, "learning_rate": 1.9069779336487895e-05, "loss": 0.4387, "step": 10167 }, { "epoch": 0.27918725974739156, "grad_norm": 0.42004910111427307, "learning_rate": 1.9069597423965296e-05, "loss": 0.5734, "step": 10168 }, { "epoch": 0.27921471718835805, "grad_norm": 0.3662331998348236, "learning_rate": 1.9069415494525002e-05, "loss": 0.5515, "step": 10169 }, { "epoch": 0.27924217462932455, "grad_norm": 0.3793543875217438, "learning_rate": 1.9069233548167357e-05, "loss": 0.5693, "step": 10170 }, { "epoch": 0.27926963207029104, "grad_norm": 0.3882600665092468, "learning_rate": 1.9069051584892695e-05, "loss": 0.4352, "step": 10171 }, { "epoch": 0.27929708951125753, "grad_norm": 0.36040449142456055, "learning_rate": 1.9068869604701356e-05, "loss": 0.5906, "step": 10172 }, { "epoch": 0.27932454695222403, "grad_norm": 0.38540560007095337, "learning_rate": 1.9068687607593684e-05, "loss": 0.5937, "step": 10173 }, { "epoch": 0.2793520043931906, "grad_norm": 0.41839760541915894, "learning_rate": 1.9068505593570012e-05, "loss": 0.601, "step": 10174 }, { "epoch": 0.2793794618341571, "grad_norm": 0.4477398693561554, "learning_rate": 1.9068323562630683e-05, "loss": 0.4873, "step": 10175 }, { "epoch": 0.27940691927512357, "grad_norm": 0.33766305446624756, "learning_rate": 1.9068141514776037e-05, "loss": 0.5178, "step": 10176 }, { "epoch": 0.27943437671609006, "grad_norm": 0.3489362895488739, "learning_rate": 1.9067959450006415e-05, "loss": 0.4489, "step": 10177 }, { "epoch": 0.27946183415705655, "grad_norm": 0.35088905692100525, "learning_rate": 1.9067777368322154e-05, "loss": 0.5559, "step": 10178 }, { "epoch": 0.27948929159802305, "grad_norm": 0.3465668857097626, "learning_rate": 1.9067595269723588e-05, "loss": 0.4735, "step": 10179 }, { "epoch": 0.27951674903898954, "grad_norm": 0.3686895966529846, "learning_rate": 1.906741315421107e-05, "loss": 0.4561, "step": 10180 }, { "epoch": 0.2795442064799561, "grad_norm": 0.3835863173007965, "learning_rate": 1.906723102178493e-05, "loss": 0.4938, "step": 10181 }, { "epoch": 0.2795716639209226, "grad_norm": 0.34615838527679443, "learning_rate": 1.9067048872445508e-05, "loss": 0.426, "step": 10182 }, { "epoch": 0.2795991213618891, "grad_norm": 0.3825620412826538, "learning_rate": 1.906686670619315e-05, "loss": 0.5551, "step": 10183 }, { "epoch": 0.2796265788028556, "grad_norm": 0.3846849799156189, "learning_rate": 1.906668452302819e-05, "loss": 0.5441, "step": 10184 }, { "epoch": 0.27965403624382207, "grad_norm": 0.41443169116973877, "learning_rate": 1.906650232295097e-05, "loss": 0.561, "step": 10185 }, { "epoch": 0.27968149368478856, "grad_norm": 0.3504103720188141, "learning_rate": 1.9066320105961828e-05, "loss": 0.4993, "step": 10186 }, { "epoch": 0.27970895112575506, "grad_norm": 0.3688082993030548, "learning_rate": 1.906613787206111e-05, "loss": 0.538, "step": 10187 }, { "epoch": 0.2797364085667216, "grad_norm": 0.3797212839126587, "learning_rate": 1.9065955621249146e-05, "loss": 0.5555, "step": 10188 }, { "epoch": 0.2797638660076881, "grad_norm": 0.36018460988998413, "learning_rate": 1.906577335352628e-05, "loss": 0.4415, "step": 10189 }, { "epoch": 0.2797913234486546, "grad_norm": 0.40526458621025085, "learning_rate": 1.9065591068892857e-05, "loss": 0.5063, "step": 10190 }, { "epoch": 0.2798187808896211, "grad_norm": 0.3413203954696655, "learning_rate": 1.9065408767349214e-05, "loss": 0.4511, "step": 10191 }, { "epoch": 0.2798462383305876, "grad_norm": 0.6199982762336731, "learning_rate": 1.9065226448895688e-05, "loss": 0.4441, "step": 10192 }, { "epoch": 0.2798736957715541, "grad_norm": 0.3361359238624573, "learning_rate": 1.9065044113532622e-05, "loss": 0.4888, "step": 10193 }, { "epoch": 0.27990115321252057, "grad_norm": 0.3666882812976837, "learning_rate": 1.9064861761260352e-05, "loss": 0.5495, "step": 10194 }, { "epoch": 0.2799286106534871, "grad_norm": 0.36797451972961426, "learning_rate": 1.906467939207923e-05, "loss": 0.5132, "step": 10195 }, { "epoch": 0.2799560680944536, "grad_norm": 0.37044480443000793, "learning_rate": 1.9064497005989582e-05, "loss": 0.5285, "step": 10196 }, { "epoch": 0.2799835255354201, "grad_norm": 0.402975857257843, "learning_rate": 1.9064314602991755e-05, "loss": 0.5082, "step": 10197 }, { "epoch": 0.2800109829763866, "grad_norm": 0.36571329832077026, "learning_rate": 1.9064132183086087e-05, "loss": 0.5364, "step": 10198 }, { "epoch": 0.2800384404173531, "grad_norm": 0.3385966122150421, "learning_rate": 1.906394974627292e-05, "loss": 0.4887, "step": 10199 }, { "epoch": 0.2800658978583196, "grad_norm": 0.35743266344070435, "learning_rate": 1.9063767292552598e-05, "loss": 0.5452, "step": 10200 }, { "epoch": 0.2800933552992861, "grad_norm": 0.35939881205558777, "learning_rate": 1.9063584821925452e-05, "loss": 0.499, "step": 10201 }, { "epoch": 0.28012081274025263, "grad_norm": 0.4043385684490204, "learning_rate": 1.906340233439183e-05, "loss": 0.48, "step": 10202 }, { "epoch": 0.28014827018121913, "grad_norm": 0.35582882165908813, "learning_rate": 1.906321982995207e-05, "loss": 0.5291, "step": 10203 }, { "epoch": 0.2801757276221856, "grad_norm": 0.3591715693473816, "learning_rate": 1.906303730860651e-05, "loss": 0.5424, "step": 10204 }, { "epoch": 0.2802031850631521, "grad_norm": 0.3288976550102234, "learning_rate": 1.9062854770355494e-05, "loss": 0.5059, "step": 10205 }, { "epoch": 0.2802306425041186, "grad_norm": 0.4004463851451874, "learning_rate": 1.9062672215199365e-05, "loss": 0.4726, "step": 10206 }, { "epoch": 0.2802580999450851, "grad_norm": 0.43167829513549805, "learning_rate": 1.9062489643138455e-05, "loss": 0.4857, "step": 10207 }, { "epoch": 0.2802855573860516, "grad_norm": 0.3386596739292145, "learning_rate": 1.906230705417311e-05, "loss": 0.4794, "step": 10208 }, { "epoch": 0.28031301482701815, "grad_norm": 0.39179620146751404, "learning_rate": 1.9062124448303674e-05, "loss": 0.5464, "step": 10209 }, { "epoch": 0.28034047226798464, "grad_norm": 0.3380320370197296, "learning_rate": 1.906194182553048e-05, "loss": 0.4602, "step": 10210 }, { "epoch": 0.28036792970895114, "grad_norm": 0.39554399251937866, "learning_rate": 1.906175918585387e-05, "loss": 0.4546, "step": 10211 }, { "epoch": 0.28039538714991763, "grad_norm": 0.3588031232357025, "learning_rate": 1.9061576529274194e-05, "loss": 0.5789, "step": 10212 }, { "epoch": 0.2804228445908841, "grad_norm": 0.3855004906654358, "learning_rate": 1.906139385579178e-05, "loss": 0.4887, "step": 10213 }, { "epoch": 0.2804503020318506, "grad_norm": 0.3578420877456665, "learning_rate": 1.9061211165406978e-05, "loss": 0.5373, "step": 10214 }, { "epoch": 0.2804777594728171, "grad_norm": 0.4339151382446289, "learning_rate": 1.906102845812012e-05, "loss": 0.6163, "step": 10215 }, { "epoch": 0.28050521691378366, "grad_norm": 0.36802688241004944, "learning_rate": 1.9060845733931557e-05, "loss": 0.5449, "step": 10216 }, { "epoch": 0.28053267435475016, "grad_norm": 0.35641390085220337, "learning_rate": 1.906066299284162e-05, "loss": 0.5185, "step": 10217 }, { "epoch": 0.28056013179571665, "grad_norm": 0.42019885778427124, "learning_rate": 1.906048023485066e-05, "loss": 0.5287, "step": 10218 }, { "epoch": 0.28058758923668314, "grad_norm": 0.9418049454689026, "learning_rate": 1.906029745995901e-05, "loss": 0.5956, "step": 10219 }, { "epoch": 0.28061504667764964, "grad_norm": 0.4017864763736725, "learning_rate": 1.9060114668167015e-05, "loss": 0.5762, "step": 10220 }, { "epoch": 0.28064250411861613, "grad_norm": 0.4477185010910034, "learning_rate": 1.9059931859475013e-05, "loss": 0.5904, "step": 10221 }, { "epoch": 0.2806699615595826, "grad_norm": 0.391867458820343, "learning_rate": 1.9059749033883345e-05, "loss": 0.5087, "step": 10222 }, { "epoch": 0.2806974190005492, "grad_norm": 0.4331045150756836, "learning_rate": 1.905956619139236e-05, "loss": 0.4722, "step": 10223 }, { "epoch": 0.28072487644151567, "grad_norm": 0.3518742322921753, "learning_rate": 1.9059383332002386e-05, "loss": 0.5398, "step": 10224 }, { "epoch": 0.28075233388248216, "grad_norm": 0.40507814288139343, "learning_rate": 1.905920045571377e-05, "loss": 0.4985, "step": 10225 }, { "epoch": 0.28077979132344866, "grad_norm": 0.41753992438316345, "learning_rate": 1.9059017562526857e-05, "loss": 0.5772, "step": 10226 }, { "epoch": 0.28080724876441515, "grad_norm": 0.3756506145000458, "learning_rate": 1.9058834652441985e-05, "loss": 0.474, "step": 10227 }, { "epoch": 0.28083470620538165, "grad_norm": 0.38233691453933716, "learning_rate": 1.9058651725459494e-05, "loss": 0.5558, "step": 10228 }, { "epoch": 0.28086216364634814, "grad_norm": 0.3718664348125458, "learning_rate": 1.9058468781579726e-05, "loss": 0.4859, "step": 10229 }, { "epoch": 0.2808896210873147, "grad_norm": 0.3687560558319092, "learning_rate": 1.9058285820803023e-05, "loss": 0.5513, "step": 10230 }, { "epoch": 0.2809170785282812, "grad_norm": 0.3517082929611206, "learning_rate": 1.9058102843129726e-05, "loss": 0.5684, "step": 10231 }, { "epoch": 0.2809445359692477, "grad_norm": 0.3281862437725067, "learning_rate": 1.905791984856018e-05, "loss": 0.4272, "step": 10232 }, { "epoch": 0.28097199341021417, "grad_norm": 0.38546091318130493, "learning_rate": 1.9057736837094717e-05, "loss": 0.5397, "step": 10233 }, { "epoch": 0.28099945085118067, "grad_norm": 0.3665340840816498, "learning_rate": 1.9057553808733685e-05, "loss": 0.5956, "step": 10234 }, { "epoch": 0.28102690829214716, "grad_norm": 0.4210297763347626, "learning_rate": 1.905737076347743e-05, "loss": 0.5453, "step": 10235 }, { "epoch": 0.28105436573311365, "grad_norm": 0.6254693865776062, "learning_rate": 1.9057187701326278e-05, "loss": 0.5024, "step": 10236 }, { "epoch": 0.2810818231740802, "grad_norm": 0.3718787133693695, "learning_rate": 1.9057004622280586e-05, "loss": 0.5036, "step": 10237 }, { "epoch": 0.2811092806150467, "grad_norm": 0.3613419532775879, "learning_rate": 1.9056821526340687e-05, "loss": 0.4794, "step": 10238 }, { "epoch": 0.2811367380560132, "grad_norm": 0.3742837905883789, "learning_rate": 1.905663841350693e-05, "loss": 0.5, "step": 10239 }, { "epoch": 0.2811641954969797, "grad_norm": 0.3746374547481537, "learning_rate": 1.905645528377965e-05, "loss": 0.5264, "step": 10240 }, { "epoch": 0.2811916529379462, "grad_norm": 0.3873957097530365, "learning_rate": 1.9056272137159187e-05, "loss": 0.4652, "step": 10241 }, { "epoch": 0.2812191103789127, "grad_norm": 0.435569167137146, "learning_rate": 1.905608897364589e-05, "loss": 0.5122, "step": 10242 }, { "epoch": 0.28124656781987917, "grad_norm": 0.3504176139831543, "learning_rate": 1.9055905793240096e-05, "loss": 0.5106, "step": 10243 }, { "epoch": 0.28127402526084566, "grad_norm": 0.557307243347168, "learning_rate": 1.9055722595942144e-05, "loss": 0.5317, "step": 10244 }, { "epoch": 0.2813014827018122, "grad_norm": 0.3631347715854645, "learning_rate": 1.9055539381752383e-05, "loss": 0.5336, "step": 10245 }, { "epoch": 0.2813289401427787, "grad_norm": 0.38810497522354126, "learning_rate": 1.9055356150671146e-05, "loss": 0.5798, "step": 10246 }, { "epoch": 0.2813563975837452, "grad_norm": 0.48672375082969666, "learning_rate": 1.9055172902698787e-05, "loss": 0.4459, "step": 10247 }, { "epoch": 0.2813838550247117, "grad_norm": 0.36992624402046204, "learning_rate": 1.9054989637835636e-05, "loss": 0.5024, "step": 10248 }, { "epoch": 0.2814113124656782, "grad_norm": 0.41024020314216614, "learning_rate": 1.905480635608204e-05, "loss": 0.5298, "step": 10249 }, { "epoch": 0.2814387699066447, "grad_norm": 0.335920512676239, "learning_rate": 1.905462305743834e-05, "loss": 0.5634, "step": 10250 }, { "epoch": 0.2814662273476112, "grad_norm": 0.3628586232662201, "learning_rate": 1.9054439741904878e-05, "loss": 0.5077, "step": 10251 }, { "epoch": 0.2814936847885777, "grad_norm": 0.37049973011016846, "learning_rate": 1.9054256409481996e-05, "loss": 0.5212, "step": 10252 }, { "epoch": 0.2815211422295442, "grad_norm": 0.35797208547592163, "learning_rate": 1.9054073060170036e-05, "loss": 0.5179, "step": 10253 }, { "epoch": 0.2815485996705107, "grad_norm": 0.33772796392440796, "learning_rate": 1.9053889693969337e-05, "loss": 0.5285, "step": 10254 }, { "epoch": 0.2815760571114772, "grad_norm": 0.3518511950969696, "learning_rate": 1.905370631088025e-05, "loss": 0.5371, "step": 10255 }, { "epoch": 0.2816035145524437, "grad_norm": 0.39319294691085815, "learning_rate": 1.9053522910903107e-05, "loss": 0.5605, "step": 10256 }, { "epoch": 0.2816309719934102, "grad_norm": 0.32929757237434387, "learning_rate": 1.9053339494038258e-05, "loss": 0.5231, "step": 10257 }, { "epoch": 0.2816584294343767, "grad_norm": 0.36477240920066833, "learning_rate": 1.9053156060286037e-05, "loss": 0.5549, "step": 10258 }, { "epoch": 0.28168588687534324, "grad_norm": 0.4101243317127228, "learning_rate": 1.9052972609646792e-05, "loss": 0.4713, "step": 10259 }, { "epoch": 0.28171334431630973, "grad_norm": 0.32519930601119995, "learning_rate": 1.9052789142120868e-05, "loss": 0.4925, "step": 10260 }, { "epoch": 0.2817408017572762, "grad_norm": 0.4037676751613617, "learning_rate": 1.9052605657708596e-05, "loss": 0.5852, "step": 10261 }, { "epoch": 0.2817682591982427, "grad_norm": 0.37784698605537415, "learning_rate": 1.905242215641033e-05, "loss": 0.5586, "step": 10262 }, { "epoch": 0.2817957166392092, "grad_norm": 0.3382710814476013, "learning_rate": 1.9052238638226404e-05, "loss": 0.5199, "step": 10263 }, { "epoch": 0.2818231740801757, "grad_norm": 0.38013482093811035, "learning_rate": 1.9052055103157165e-05, "loss": 0.5628, "step": 10264 }, { "epoch": 0.2818506315211422, "grad_norm": 0.3392690122127533, "learning_rate": 1.9051871551202957e-05, "loss": 0.4881, "step": 10265 }, { "epoch": 0.28187808896210875, "grad_norm": 0.39749979972839355, "learning_rate": 1.9051687982364116e-05, "loss": 0.4633, "step": 10266 }, { "epoch": 0.28190554640307525, "grad_norm": 0.35572412610054016, "learning_rate": 1.905150439664099e-05, "loss": 0.499, "step": 10267 }, { "epoch": 0.28193300384404174, "grad_norm": 0.3532596528530121, "learning_rate": 1.905132079403392e-05, "loss": 0.4846, "step": 10268 }, { "epoch": 0.28196046128500823, "grad_norm": 0.40593889355659485, "learning_rate": 1.9051137174543247e-05, "loss": 0.5043, "step": 10269 }, { "epoch": 0.28198791872597473, "grad_norm": 0.38289058208465576, "learning_rate": 1.905095353816931e-05, "loss": 0.5013, "step": 10270 }, { "epoch": 0.2820153761669412, "grad_norm": 0.4139348864555359, "learning_rate": 1.905076988491246e-05, "loss": 0.5588, "step": 10271 }, { "epoch": 0.2820428336079077, "grad_norm": 0.3624190390110016, "learning_rate": 1.9050586214773036e-05, "loss": 0.4814, "step": 10272 }, { "epoch": 0.28207029104887427, "grad_norm": 0.419989675283432, "learning_rate": 1.9050402527751378e-05, "loss": 0.5568, "step": 10273 }, { "epoch": 0.28209774848984076, "grad_norm": 0.43515145778656006, "learning_rate": 1.905021882384783e-05, "loss": 0.6187, "step": 10274 }, { "epoch": 0.28212520593080725, "grad_norm": 0.37258821725845337, "learning_rate": 1.9050035103062742e-05, "loss": 0.4791, "step": 10275 }, { "epoch": 0.28215266337177375, "grad_norm": 0.3481035530567169, "learning_rate": 1.9049851365396445e-05, "loss": 0.5248, "step": 10276 }, { "epoch": 0.28218012081274024, "grad_norm": 0.33561205863952637, "learning_rate": 1.9049667610849287e-05, "loss": 0.4745, "step": 10277 }, { "epoch": 0.28220757825370674, "grad_norm": 0.3939867317676544, "learning_rate": 1.9049483839421612e-05, "loss": 0.5011, "step": 10278 }, { "epoch": 0.28223503569467323, "grad_norm": 0.3458270728588104, "learning_rate": 1.904930005111376e-05, "loss": 0.5115, "step": 10279 }, { "epoch": 0.2822624931356398, "grad_norm": 0.3392016887664795, "learning_rate": 1.9049116245926076e-05, "loss": 0.4466, "step": 10280 }, { "epoch": 0.2822899505766063, "grad_norm": 0.3804769814014435, "learning_rate": 1.9048932423858903e-05, "loss": 0.4978, "step": 10281 }, { "epoch": 0.28231740801757277, "grad_norm": 0.3667837679386139, "learning_rate": 1.904874858491258e-05, "loss": 0.5212, "step": 10282 }, { "epoch": 0.28234486545853926, "grad_norm": 0.3595873713493347, "learning_rate": 1.9048564729087452e-05, "loss": 0.523, "step": 10283 }, { "epoch": 0.28237232289950576, "grad_norm": 0.4001471996307373, "learning_rate": 1.9048380856383866e-05, "loss": 0.5854, "step": 10284 }, { "epoch": 0.28239978034047225, "grad_norm": 0.35025468468666077, "learning_rate": 1.904819696680216e-05, "loss": 0.5408, "step": 10285 }, { "epoch": 0.28242723778143874, "grad_norm": 0.42405906319618225, "learning_rate": 1.904801306034268e-05, "loss": 0.4308, "step": 10286 }, { "epoch": 0.2824546952224053, "grad_norm": 0.563441812992096, "learning_rate": 1.904782913700577e-05, "loss": 0.4373, "step": 10287 }, { "epoch": 0.2824821526633718, "grad_norm": 0.34220945835113525, "learning_rate": 1.9047645196791768e-05, "loss": 0.5138, "step": 10288 }, { "epoch": 0.2825096101043383, "grad_norm": 0.416213721036911, "learning_rate": 1.904746123970102e-05, "loss": 0.5523, "step": 10289 }, { "epoch": 0.2825370675453048, "grad_norm": 0.3685774505138397, "learning_rate": 1.9047277265733873e-05, "loss": 0.5898, "step": 10290 }, { "epoch": 0.28256452498627127, "grad_norm": 0.36235812306404114, "learning_rate": 1.9047093274890662e-05, "loss": 0.5627, "step": 10291 }, { "epoch": 0.28259198242723776, "grad_norm": 0.3979489207267761, "learning_rate": 1.9046909267171735e-05, "loss": 0.5024, "step": 10292 }, { "epoch": 0.28261943986820426, "grad_norm": 0.3816218078136444, "learning_rate": 1.9046725242577436e-05, "loss": 0.482, "step": 10293 }, { "epoch": 0.2826468973091708, "grad_norm": 0.3656329810619354, "learning_rate": 1.9046541201108105e-05, "loss": 0.4833, "step": 10294 }, { "epoch": 0.2826743547501373, "grad_norm": 0.36685308814048767, "learning_rate": 1.904635714276409e-05, "loss": 0.5568, "step": 10295 }, { "epoch": 0.2827018121911038, "grad_norm": 0.3623228967189789, "learning_rate": 1.904617306754573e-05, "loss": 0.4581, "step": 10296 }, { "epoch": 0.2827292696320703, "grad_norm": 0.377581387758255, "learning_rate": 1.904598897545337e-05, "loss": 0.5795, "step": 10297 }, { "epoch": 0.2827567270730368, "grad_norm": 0.39367231726646423, "learning_rate": 1.9045804866487357e-05, "loss": 0.4831, "step": 10298 }, { "epoch": 0.2827841845140033, "grad_norm": 0.34587156772613525, "learning_rate": 1.904562074064803e-05, "loss": 0.4489, "step": 10299 }, { "epoch": 0.2828116419549698, "grad_norm": 0.35807543992996216, "learning_rate": 1.9045436597935727e-05, "loss": 0.5196, "step": 10300 }, { "epoch": 0.2828390993959363, "grad_norm": 0.35670167207717896, "learning_rate": 1.9045252438350803e-05, "loss": 0.5742, "step": 10301 }, { "epoch": 0.2828665568369028, "grad_norm": 0.38472217321395874, "learning_rate": 1.9045068261893596e-05, "loss": 0.6194, "step": 10302 }, { "epoch": 0.2828940142778693, "grad_norm": 0.35421690344810486, "learning_rate": 1.9044884068564448e-05, "loss": 0.5947, "step": 10303 }, { "epoch": 0.2829214717188358, "grad_norm": 0.3976437747478485, "learning_rate": 1.9044699858363705e-05, "loss": 0.5429, "step": 10304 }, { "epoch": 0.2829489291598023, "grad_norm": 0.40571755170822144, "learning_rate": 1.9044515631291712e-05, "loss": 0.5753, "step": 10305 }, { "epoch": 0.2829763866007688, "grad_norm": 0.3495918810367584, "learning_rate": 1.904433138734881e-05, "loss": 0.4796, "step": 10306 }, { "epoch": 0.2830038440417353, "grad_norm": 0.3505289852619171, "learning_rate": 1.9044147126535343e-05, "loss": 0.4182, "step": 10307 }, { "epoch": 0.28303130148270184, "grad_norm": 0.3644457459449768, "learning_rate": 1.904396284885165e-05, "loss": 0.423, "step": 10308 }, { "epoch": 0.28305875892366833, "grad_norm": 0.4123876690864563, "learning_rate": 1.9043778554298088e-05, "loss": 0.5481, "step": 10309 }, { "epoch": 0.2830862163646348, "grad_norm": 0.3858984708786011, "learning_rate": 1.9043594242874987e-05, "loss": 0.4867, "step": 10310 }, { "epoch": 0.2831136738056013, "grad_norm": 0.3421988785266876, "learning_rate": 1.90434099145827e-05, "loss": 0.5592, "step": 10311 }, { "epoch": 0.2831411312465678, "grad_norm": 0.38252657651901245, "learning_rate": 1.9043225569421567e-05, "loss": 0.5392, "step": 10312 }, { "epoch": 0.2831685886875343, "grad_norm": 0.4140589237213135, "learning_rate": 1.9043041207391927e-05, "loss": 0.5569, "step": 10313 }, { "epoch": 0.2831960461285008, "grad_norm": 0.3463423550128937, "learning_rate": 1.9042856828494134e-05, "loss": 0.5894, "step": 10314 }, { "epoch": 0.28322350356946735, "grad_norm": 0.32841095328330994, "learning_rate": 1.9042672432728526e-05, "loss": 0.5212, "step": 10315 }, { "epoch": 0.28325096101043384, "grad_norm": 0.29680681228637695, "learning_rate": 1.9042488020095447e-05, "loss": 0.4559, "step": 10316 }, { "epoch": 0.28327841845140034, "grad_norm": 0.3770354986190796, "learning_rate": 1.904230359059524e-05, "loss": 0.5308, "step": 10317 }, { "epoch": 0.28330587589236683, "grad_norm": 0.46051642298698425, "learning_rate": 1.9042119144228254e-05, "loss": 0.5178, "step": 10318 }, { "epoch": 0.2833333333333333, "grad_norm": 0.35680311918258667, "learning_rate": 1.904193468099483e-05, "loss": 0.5406, "step": 10319 }, { "epoch": 0.2833607907742998, "grad_norm": 0.3562031090259552, "learning_rate": 1.904175020089531e-05, "loss": 0.469, "step": 10320 }, { "epoch": 0.2833882482152663, "grad_norm": 0.4270978271961212, "learning_rate": 1.904156570393004e-05, "loss": 0.5421, "step": 10321 }, { "epoch": 0.28341570565623286, "grad_norm": 0.4014187753200531, "learning_rate": 1.9041381190099366e-05, "loss": 0.4804, "step": 10322 }, { "epoch": 0.28344316309719936, "grad_norm": 0.6620213985443115, "learning_rate": 1.904119665940363e-05, "loss": 0.4841, "step": 10323 }, { "epoch": 0.28347062053816585, "grad_norm": 0.3710618317127228, "learning_rate": 1.9041012111843175e-05, "loss": 0.5531, "step": 10324 }, { "epoch": 0.28349807797913235, "grad_norm": 0.3191380798816681, "learning_rate": 1.904082754741835e-05, "loss": 0.5167, "step": 10325 }, { "epoch": 0.28352553542009884, "grad_norm": 0.3532584607601166, "learning_rate": 1.904064296612949e-05, "loss": 0.4982, "step": 10326 }, { "epoch": 0.28355299286106533, "grad_norm": 0.4938264489173889, "learning_rate": 1.904045836797695e-05, "loss": 0.5024, "step": 10327 }, { "epoch": 0.28358045030203183, "grad_norm": 0.3586426377296448, "learning_rate": 1.904027375296107e-05, "loss": 0.5873, "step": 10328 }, { "epoch": 0.2836079077429984, "grad_norm": 0.3490108847618103, "learning_rate": 1.9040089121082195e-05, "loss": 0.5111, "step": 10329 }, { "epoch": 0.28363536518396487, "grad_norm": 0.3585154712200165, "learning_rate": 1.9039904472340667e-05, "loss": 0.4822, "step": 10330 }, { "epoch": 0.28366282262493137, "grad_norm": 0.32075005769729614, "learning_rate": 1.903971980673683e-05, "loss": 0.465, "step": 10331 }, { "epoch": 0.28369028006589786, "grad_norm": 0.3793329894542694, "learning_rate": 1.903953512427103e-05, "loss": 0.5536, "step": 10332 }, { "epoch": 0.28371773750686435, "grad_norm": 0.34835758805274963, "learning_rate": 1.903935042494362e-05, "loss": 0.4981, "step": 10333 }, { "epoch": 0.28374519494783085, "grad_norm": 0.37638190388679504, "learning_rate": 1.9039165708754927e-05, "loss": 0.4462, "step": 10334 }, { "epoch": 0.28377265238879734, "grad_norm": 0.3612581789493561, "learning_rate": 1.903898097570531e-05, "loss": 0.5221, "step": 10335 }, { "epoch": 0.2838001098297639, "grad_norm": 0.3648439943790436, "learning_rate": 1.9038796225795108e-05, "loss": 0.5755, "step": 10336 }, { "epoch": 0.2838275672707304, "grad_norm": 0.4192727208137512, "learning_rate": 1.903861145902466e-05, "loss": 0.5768, "step": 10337 }, { "epoch": 0.2838550247116969, "grad_norm": 0.35826629400253296, "learning_rate": 1.9038426675394326e-05, "loss": 0.5561, "step": 10338 }, { "epoch": 0.2838824821526634, "grad_norm": 0.38933679461479187, "learning_rate": 1.9038241874904437e-05, "loss": 0.5025, "step": 10339 }, { "epoch": 0.28390993959362987, "grad_norm": 0.41523459553718567, "learning_rate": 1.9038057057555342e-05, "loss": 0.5513, "step": 10340 }, { "epoch": 0.28393739703459636, "grad_norm": 0.40151193737983704, "learning_rate": 1.9037872223347385e-05, "loss": 0.5289, "step": 10341 }, { "epoch": 0.28396485447556286, "grad_norm": 0.41636037826538086, "learning_rate": 1.9037687372280914e-05, "loss": 0.592, "step": 10342 }, { "epoch": 0.2839923119165294, "grad_norm": 0.388034462928772, "learning_rate": 1.9037502504356272e-05, "loss": 0.4716, "step": 10343 }, { "epoch": 0.2840197693574959, "grad_norm": 0.4041734039783478, "learning_rate": 1.9037317619573804e-05, "loss": 0.5712, "step": 10344 }, { "epoch": 0.2840472267984624, "grad_norm": 0.3613959550857544, "learning_rate": 1.9037132717933852e-05, "loss": 0.4339, "step": 10345 }, { "epoch": 0.2840746842394289, "grad_norm": 0.3556935489177704, "learning_rate": 1.9036947799436766e-05, "loss": 0.5083, "step": 10346 }, { "epoch": 0.2841021416803954, "grad_norm": 0.33111143112182617, "learning_rate": 1.9036762864082883e-05, "loss": 0.5632, "step": 10347 }, { "epoch": 0.2841295991213619, "grad_norm": 0.3536824882030487, "learning_rate": 1.9036577911872557e-05, "loss": 0.4581, "step": 10348 }, { "epoch": 0.28415705656232837, "grad_norm": 0.445364773273468, "learning_rate": 1.903639294280613e-05, "loss": 0.5658, "step": 10349 }, { "epoch": 0.2841845140032949, "grad_norm": 0.4457208514213562, "learning_rate": 1.9036207956883944e-05, "loss": 0.4817, "step": 10350 }, { "epoch": 0.2842119714442614, "grad_norm": 0.3674617111682892, "learning_rate": 1.9036022954106347e-05, "loss": 0.4991, "step": 10351 }, { "epoch": 0.2842394288852279, "grad_norm": 0.3404936194419861, "learning_rate": 1.9035837934473683e-05, "loss": 0.4797, "step": 10352 }, { "epoch": 0.2842668863261944, "grad_norm": 0.34075498580932617, "learning_rate": 1.9035652897986296e-05, "loss": 0.5655, "step": 10353 }, { "epoch": 0.2842943437671609, "grad_norm": 0.36926889419555664, "learning_rate": 1.9035467844644532e-05, "loss": 0.5504, "step": 10354 }, { "epoch": 0.2843218012081274, "grad_norm": 0.403548002243042, "learning_rate": 1.903528277444874e-05, "loss": 0.4535, "step": 10355 }, { "epoch": 0.2843492586490939, "grad_norm": 0.35838234424591064, "learning_rate": 1.903509768739926e-05, "loss": 0.4789, "step": 10356 }, { "epoch": 0.28437671609006043, "grad_norm": 0.41408780217170715, "learning_rate": 1.903491258349644e-05, "loss": 0.5157, "step": 10357 }, { "epoch": 0.2844041735310269, "grad_norm": 0.3383234739303589, "learning_rate": 1.903472746274062e-05, "loss": 0.4915, "step": 10358 }, { "epoch": 0.2844316309719934, "grad_norm": 0.3544083833694458, "learning_rate": 1.9034542325132154e-05, "loss": 0.3826, "step": 10359 }, { "epoch": 0.2844590884129599, "grad_norm": 0.38491493463516235, "learning_rate": 1.9034357170671383e-05, "loss": 0.488, "step": 10360 }, { "epoch": 0.2844865458539264, "grad_norm": 0.3857385516166687, "learning_rate": 1.9034171999358655e-05, "loss": 0.4575, "step": 10361 }, { "epoch": 0.2845140032948929, "grad_norm": 0.4501913785934448, "learning_rate": 1.903398681119431e-05, "loss": 0.5972, "step": 10362 }, { "epoch": 0.2845414607358594, "grad_norm": 0.4114726781845093, "learning_rate": 1.903380160617869e-05, "loss": 0.589, "step": 10363 }, { "epoch": 0.28456891817682595, "grad_norm": 0.39547839760780334, "learning_rate": 1.9033616384312157e-05, "loss": 0.5664, "step": 10364 }, { "epoch": 0.28459637561779244, "grad_norm": 0.44773146510124207, "learning_rate": 1.903343114559504e-05, "loss": 0.519, "step": 10365 }, { "epoch": 0.28462383305875893, "grad_norm": 0.3704400360584259, "learning_rate": 1.9033245890027695e-05, "loss": 0.5283, "step": 10366 }, { "epoch": 0.28465129049972543, "grad_norm": 0.3868197202682495, "learning_rate": 1.9033060617610458e-05, "loss": 0.5418, "step": 10367 }, { "epoch": 0.2846787479406919, "grad_norm": 0.36931097507476807, "learning_rate": 1.9032875328343686e-05, "loss": 0.5132, "step": 10368 }, { "epoch": 0.2847062053816584, "grad_norm": 0.37181660532951355, "learning_rate": 1.9032690022227714e-05, "loss": 0.5227, "step": 10369 }, { "epoch": 0.2847336628226249, "grad_norm": 0.32040759921073914, "learning_rate": 1.9032504699262895e-05, "loss": 0.5053, "step": 10370 }, { "epoch": 0.28476112026359146, "grad_norm": 0.3438103199005127, "learning_rate": 1.903231935944957e-05, "loss": 0.4426, "step": 10371 }, { "epoch": 0.28478857770455795, "grad_norm": 0.355803906917572, "learning_rate": 1.903213400278809e-05, "loss": 0.5556, "step": 10372 }, { "epoch": 0.28481603514552445, "grad_norm": 0.3536284267902374, "learning_rate": 1.9031948629278792e-05, "loss": 0.4523, "step": 10373 }, { "epoch": 0.28484349258649094, "grad_norm": 0.42222172021865845, "learning_rate": 1.9031763238922033e-05, "loss": 0.512, "step": 10374 }, { "epoch": 0.28487095002745744, "grad_norm": 0.35044607520103455, "learning_rate": 1.9031577831718147e-05, "loss": 0.5002, "step": 10375 }, { "epoch": 0.28489840746842393, "grad_norm": 0.37926939129829407, "learning_rate": 1.9031392407667488e-05, "loss": 0.4952, "step": 10376 }, { "epoch": 0.2849258649093904, "grad_norm": 0.384799987077713, "learning_rate": 1.90312069667704e-05, "loss": 0.491, "step": 10377 }, { "epoch": 0.2849533223503569, "grad_norm": 0.34999439120292664, "learning_rate": 1.903102150902723e-05, "loss": 0.5001, "step": 10378 }, { "epoch": 0.28498077979132347, "grad_norm": 0.4538396894931793, "learning_rate": 1.903083603443832e-05, "loss": 0.5454, "step": 10379 }, { "epoch": 0.28500823723228996, "grad_norm": 0.35642197728157043, "learning_rate": 1.903065054300402e-05, "loss": 0.5763, "step": 10380 }, { "epoch": 0.28503569467325646, "grad_norm": 0.3210013806819916, "learning_rate": 1.9030465034724676e-05, "loss": 0.4529, "step": 10381 }, { "epoch": 0.28506315211422295, "grad_norm": 0.3584228456020355, "learning_rate": 1.903027950960063e-05, "loss": 0.6149, "step": 10382 }, { "epoch": 0.28509060955518944, "grad_norm": 0.3775506913661957, "learning_rate": 1.903009396763223e-05, "loss": 0.5353, "step": 10383 }, { "epoch": 0.28511806699615594, "grad_norm": 0.34134209156036377, "learning_rate": 1.9029908408819826e-05, "loss": 0.5348, "step": 10384 }, { "epoch": 0.28514552443712243, "grad_norm": 0.3560941219329834, "learning_rate": 1.902972283316376e-05, "loss": 0.563, "step": 10385 }, { "epoch": 0.285172981878089, "grad_norm": 0.8777788281440735, "learning_rate": 1.9029537240664377e-05, "loss": 0.5038, "step": 10386 }, { "epoch": 0.2852004393190555, "grad_norm": 0.3336629569530487, "learning_rate": 1.9029351631322025e-05, "loss": 0.451, "step": 10387 }, { "epoch": 0.28522789676002197, "grad_norm": 0.3367331027984619, "learning_rate": 1.9029166005137053e-05, "loss": 0.4986, "step": 10388 }, { "epoch": 0.28525535420098846, "grad_norm": 0.36165305972099304, "learning_rate": 1.9028980362109803e-05, "loss": 0.5106, "step": 10389 }, { "epoch": 0.28528281164195496, "grad_norm": 0.3331947326660156, "learning_rate": 1.9028794702240624e-05, "loss": 0.5285, "step": 10390 }, { "epoch": 0.28531026908292145, "grad_norm": 0.4914761781692505, "learning_rate": 1.902860902552986e-05, "loss": 0.5716, "step": 10391 }, { "epoch": 0.28533772652388795, "grad_norm": 0.4121670126914978, "learning_rate": 1.9028423331977855e-05, "loss": 0.6018, "step": 10392 }, { "epoch": 0.2853651839648545, "grad_norm": 0.362193763256073, "learning_rate": 1.9028237621584967e-05, "loss": 0.4432, "step": 10393 }, { "epoch": 0.285392641405821, "grad_norm": 0.3541114032268524, "learning_rate": 1.902805189435153e-05, "loss": 0.5461, "step": 10394 }, { "epoch": 0.2854200988467875, "grad_norm": 0.35656654834747314, "learning_rate": 1.9027866150277894e-05, "loss": 0.6156, "step": 10395 }, { "epoch": 0.285447556287754, "grad_norm": 0.38039037585258484, "learning_rate": 1.9027680389364406e-05, "loss": 0.4478, "step": 10396 }, { "epoch": 0.2854750137287205, "grad_norm": 0.35162752866744995, "learning_rate": 1.9027494611611416e-05, "loss": 0.4809, "step": 10397 }, { "epoch": 0.28550247116968697, "grad_norm": 0.4025368392467499, "learning_rate": 1.902730881701926e-05, "loss": 0.6054, "step": 10398 }, { "epoch": 0.28552992861065346, "grad_norm": 0.3677777051925659, "learning_rate": 1.9027123005588302e-05, "loss": 0.5127, "step": 10399 }, { "epoch": 0.28555738605162, "grad_norm": 0.35755929350852966, "learning_rate": 1.902693717731887e-05, "loss": 0.5056, "step": 10400 }, { "epoch": 0.2855848434925865, "grad_norm": 0.42068102955818176, "learning_rate": 1.9026751332211324e-05, "loss": 0.5748, "step": 10401 }, { "epoch": 0.285612300933553, "grad_norm": 0.40213748812675476, "learning_rate": 1.9026565470266007e-05, "loss": 0.5408, "step": 10402 }, { "epoch": 0.2856397583745195, "grad_norm": 0.3552500605583191, "learning_rate": 1.902637959148326e-05, "loss": 0.5415, "step": 10403 }, { "epoch": 0.285667215815486, "grad_norm": 0.3328021168708801, "learning_rate": 1.9026193695863436e-05, "loss": 0.5291, "step": 10404 }, { "epoch": 0.2856946732564525, "grad_norm": 0.3589495122432709, "learning_rate": 1.902600778340688e-05, "loss": 0.4675, "step": 10405 }, { "epoch": 0.285722130697419, "grad_norm": 0.36390942335128784, "learning_rate": 1.9025821854113937e-05, "loss": 0.4683, "step": 10406 }, { "epoch": 0.2857495881383855, "grad_norm": 0.37147483229637146, "learning_rate": 1.902563590798496e-05, "loss": 0.5254, "step": 10407 }, { "epoch": 0.285777045579352, "grad_norm": 0.361863911151886, "learning_rate": 1.9025449945020287e-05, "loss": 0.4741, "step": 10408 }, { "epoch": 0.2858045030203185, "grad_norm": 0.3356306552886963, "learning_rate": 1.9025263965220272e-05, "loss": 0.55, "step": 10409 }, { "epoch": 0.285831960461285, "grad_norm": 0.3445087671279907, "learning_rate": 1.9025077968585257e-05, "loss": 0.4535, "step": 10410 }, { "epoch": 0.2858594179022515, "grad_norm": 0.3716329038143158, "learning_rate": 1.902489195511559e-05, "loss": 0.5666, "step": 10411 }, { "epoch": 0.285886875343218, "grad_norm": 0.39139917492866516, "learning_rate": 1.9024705924811624e-05, "loss": 0.5451, "step": 10412 }, { "epoch": 0.2859143327841845, "grad_norm": 0.4575875401496887, "learning_rate": 1.90245198776737e-05, "loss": 0.6697, "step": 10413 }, { "epoch": 0.28594179022515104, "grad_norm": 0.39047345519065857, "learning_rate": 1.9024333813702164e-05, "loss": 0.5396, "step": 10414 }, { "epoch": 0.28596924766611753, "grad_norm": 0.3881571590900421, "learning_rate": 1.9024147732897363e-05, "loss": 0.5305, "step": 10415 }, { "epoch": 0.285996705107084, "grad_norm": 0.3369690477848053, "learning_rate": 1.902396163525965e-05, "loss": 0.5106, "step": 10416 }, { "epoch": 0.2860241625480505, "grad_norm": 0.46756622195243835, "learning_rate": 1.902377552078937e-05, "loss": 0.6251, "step": 10417 }, { "epoch": 0.286051619989017, "grad_norm": 0.4553997218608856, "learning_rate": 1.9023589389486866e-05, "loss": 0.527, "step": 10418 }, { "epoch": 0.2860790774299835, "grad_norm": 0.3492216467857361, "learning_rate": 1.9023403241352487e-05, "loss": 0.5554, "step": 10419 }, { "epoch": 0.28610653487095, "grad_norm": 0.3446822166442871, "learning_rate": 1.902321707638658e-05, "loss": 0.5038, "step": 10420 }, { "epoch": 0.28613399231191655, "grad_norm": 0.3892175257205963, "learning_rate": 1.9023030894589496e-05, "loss": 0.5155, "step": 10421 }, { "epoch": 0.28616144975288305, "grad_norm": 0.34333235025405884, "learning_rate": 1.902284469596158e-05, "loss": 0.4969, "step": 10422 }, { "epoch": 0.28618890719384954, "grad_norm": 0.37184762954711914, "learning_rate": 1.9022658480503175e-05, "loss": 0.5483, "step": 10423 }, { "epoch": 0.28621636463481603, "grad_norm": 0.36515378952026367, "learning_rate": 1.9022472248214633e-05, "loss": 0.5681, "step": 10424 }, { "epoch": 0.28624382207578253, "grad_norm": 0.357617050409317, "learning_rate": 1.9022285999096305e-05, "loss": 0.5211, "step": 10425 }, { "epoch": 0.286271279516749, "grad_norm": 0.3632064759731293, "learning_rate": 1.902209973314853e-05, "loss": 0.5389, "step": 10426 }, { "epoch": 0.2862987369577155, "grad_norm": 0.33186075091362, "learning_rate": 1.9021913450371664e-05, "loss": 0.5183, "step": 10427 }, { "epoch": 0.28632619439868207, "grad_norm": 0.3152519166469574, "learning_rate": 1.9021727150766046e-05, "loss": 0.5506, "step": 10428 }, { "epoch": 0.28635365183964856, "grad_norm": 0.35798463225364685, "learning_rate": 1.9021540834332027e-05, "loss": 0.4908, "step": 10429 }, { "epoch": 0.28638110928061505, "grad_norm": 0.38702595233917236, "learning_rate": 1.9021354501069958e-05, "loss": 0.4768, "step": 10430 }, { "epoch": 0.28640856672158155, "grad_norm": 0.43462061882019043, "learning_rate": 1.902116815098018e-05, "loss": 0.5861, "step": 10431 }, { "epoch": 0.28643602416254804, "grad_norm": 0.32983705401420593, "learning_rate": 1.9020981784063045e-05, "loss": 0.5116, "step": 10432 }, { "epoch": 0.28646348160351454, "grad_norm": 0.3761734068393707, "learning_rate": 1.9020795400318903e-05, "loss": 0.6156, "step": 10433 }, { "epoch": 0.28649093904448103, "grad_norm": 0.3590378761291504, "learning_rate": 1.9020608999748096e-05, "loss": 0.5087, "step": 10434 }, { "epoch": 0.2865183964854476, "grad_norm": 0.42511725425720215, "learning_rate": 1.9020422582350976e-05, "loss": 0.4726, "step": 10435 }, { "epoch": 0.2865458539264141, "grad_norm": 0.33881792426109314, "learning_rate": 1.9020236148127885e-05, "loss": 0.4451, "step": 10436 }, { "epoch": 0.28657331136738057, "grad_norm": 0.3795412480831146, "learning_rate": 1.902004969707918e-05, "loss": 0.5303, "step": 10437 }, { "epoch": 0.28660076880834706, "grad_norm": 0.3286045789718628, "learning_rate": 1.9019863229205197e-05, "loss": 0.4895, "step": 10438 }, { "epoch": 0.28662822624931356, "grad_norm": 0.3766331374645233, "learning_rate": 1.90196767445063e-05, "loss": 0.6271, "step": 10439 }, { "epoch": 0.28665568369028005, "grad_norm": 0.36473169922828674, "learning_rate": 1.9019490242982818e-05, "loss": 0.5477, "step": 10440 }, { "epoch": 0.28668314113124654, "grad_norm": 0.5746028423309326, "learning_rate": 1.901930372463511e-05, "loss": 0.6216, "step": 10441 }, { "epoch": 0.2867105985722131, "grad_norm": 0.3696066439151764, "learning_rate": 1.9019117189463522e-05, "loss": 0.5485, "step": 10442 }, { "epoch": 0.2867380560131796, "grad_norm": 0.35725805163383484, "learning_rate": 1.90189306374684e-05, "loss": 0.5893, "step": 10443 }, { "epoch": 0.2867655134541461, "grad_norm": 0.33947044610977173, "learning_rate": 1.90187440686501e-05, "loss": 0.5341, "step": 10444 }, { "epoch": 0.2867929708951126, "grad_norm": 0.34681540727615356, "learning_rate": 1.9018557483008958e-05, "loss": 0.5369, "step": 10445 }, { "epoch": 0.28682042833607907, "grad_norm": 0.34683138132095337, "learning_rate": 1.901837088054533e-05, "loss": 0.513, "step": 10446 }, { "epoch": 0.28684788577704556, "grad_norm": 0.3450472056865692, "learning_rate": 1.9018184261259564e-05, "loss": 0.5602, "step": 10447 }, { "epoch": 0.28687534321801206, "grad_norm": 0.3499576151371002, "learning_rate": 1.9017997625152004e-05, "loss": 0.5332, "step": 10448 }, { "epoch": 0.2869028006589786, "grad_norm": 0.3635813295841217, "learning_rate": 1.9017810972223004e-05, "loss": 0.5448, "step": 10449 }, { "epoch": 0.2869302580999451, "grad_norm": 0.3411414325237274, "learning_rate": 1.9017624302472902e-05, "loss": 0.5107, "step": 10450 }, { "epoch": 0.2869577155409116, "grad_norm": 0.362878680229187, "learning_rate": 1.9017437615902057e-05, "loss": 0.5618, "step": 10451 }, { "epoch": 0.2869851729818781, "grad_norm": 0.6208773255348206, "learning_rate": 1.901725091251081e-05, "loss": 0.4807, "step": 10452 }, { "epoch": 0.2870126304228446, "grad_norm": 0.33850955963134766, "learning_rate": 1.9017064192299514e-05, "loss": 0.5163, "step": 10453 }, { "epoch": 0.2870400878638111, "grad_norm": 0.34463855624198914, "learning_rate": 1.9016877455268518e-05, "loss": 0.4788, "step": 10454 }, { "epoch": 0.28706754530477757, "grad_norm": 0.3369885981082916, "learning_rate": 1.901669070141816e-05, "loss": 0.5661, "step": 10455 }, { "epoch": 0.2870950027457441, "grad_norm": 0.39134877920150757, "learning_rate": 1.9016503930748803e-05, "loss": 0.5798, "step": 10456 }, { "epoch": 0.2871224601867106, "grad_norm": 0.3710697293281555, "learning_rate": 1.9016317143260786e-05, "loss": 0.4912, "step": 10457 }, { "epoch": 0.2871499176276771, "grad_norm": 0.3803185820579529, "learning_rate": 1.901613033895446e-05, "loss": 0.5342, "step": 10458 }, { "epoch": 0.2871773750686436, "grad_norm": 0.34467628598213196, "learning_rate": 1.9015943517830173e-05, "loss": 0.5311, "step": 10459 }, { "epoch": 0.2872048325096101, "grad_norm": 0.4370225667953491, "learning_rate": 1.9015756679888273e-05, "loss": 0.5603, "step": 10460 }, { "epoch": 0.2872322899505766, "grad_norm": 0.3366222381591797, "learning_rate": 1.9015569825129112e-05, "loss": 0.4821, "step": 10461 }, { "epoch": 0.2872597473915431, "grad_norm": 0.3841160833835602, "learning_rate": 1.9015382953553034e-05, "loss": 0.5617, "step": 10462 }, { "epoch": 0.28728720483250964, "grad_norm": 0.4222491383552551, "learning_rate": 1.9015196065160387e-05, "loss": 0.507, "step": 10463 }, { "epoch": 0.28731466227347613, "grad_norm": 0.3281455934047699, "learning_rate": 1.9015009159951528e-05, "loss": 0.4563, "step": 10464 }, { "epoch": 0.2873421197144426, "grad_norm": 0.37589216232299805, "learning_rate": 1.9014822237926794e-05, "loss": 0.5162, "step": 10465 }, { "epoch": 0.2873695771554091, "grad_norm": 0.3575749099254608, "learning_rate": 1.9014635299086542e-05, "loss": 0.5889, "step": 10466 }, { "epoch": 0.2873970345963756, "grad_norm": 0.3571198284626007, "learning_rate": 1.9014448343431118e-05, "loss": 0.5163, "step": 10467 }, { "epoch": 0.2874244920373421, "grad_norm": 0.6338431239128113, "learning_rate": 1.901426137096087e-05, "loss": 0.5042, "step": 10468 }, { "epoch": 0.2874519494783086, "grad_norm": 0.37023860216140747, "learning_rate": 1.901407438167615e-05, "loss": 0.5356, "step": 10469 }, { "epoch": 0.28747940691927515, "grad_norm": 0.32283490896224976, "learning_rate": 1.90138873755773e-05, "loss": 0.5144, "step": 10470 }, { "epoch": 0.28750686436024164, "grad_norm": 0.3237452805042267, "learning_rate": 1.9013700352664675e-05, "loss": 0.4279, "step": 10471 }, { "epoch": 0.28753432180120814, "grad_norm": 0.4035041332244873, "learning_rate": 1.9013513312938622e-05, "loss": 0.5385, "step": 10472 }, { "epoch": 0.28756177924217463, "grad_norm": 0.37745827436447144, "learning_rate": 1.9013326256399493e-05, "loss": 0.5875, "step": 10473 }, { "epoch": 0.2875892366831411, "grad_norm": 0.38245484232902527, "learning_rate": 1.901313918304763e-05, "loss": 0.5214, "step": 10474 }, { "epoch": 0.2876166941241076, "grad_norm": 0.367196261882782, "learning_rate": 1.9012952092883384e-05, "loss": 0.5762, "step": 10475 }, { "epoch": 0.2876441515650741, "grad_norm": 0.39956963062286377, "learning_rate": 1.9012764985907113e-05, "loss": 0.5052, "step": 10476 }, { "epoch": 0.28767160900604066, "grad_norm": 0.42284533381462097, "learning_rate": 1.901257786211915e-05, "loss": 0.5383, "step": 10477 }, { "epoch": 0.28769906644700716, "grad_norm": 0.36900678277015686, "learning_rate": 1.901239072151986e-05, "loss": 0.512, "step": 10478 }, { "epoch": 0.28772652388797365, "grad_norm": 0.42578569054603577, "learning_rate": 1.9012203564109583e-05, "loss": 0.4248, "step": 10479 }, { "epoch": 0.28775398132894014, "grad_norm": 0.325700581073761, "learning_rate": 1.9012016389888667e-05, "loss": 0.5181, "step": 10480 }, { "epoch": 0.28778143876990664, "grad_norm": 0.35645052790641785, "learning_rate": 1.9011829198857467e-05, "loss": 0.4844, "step": 10481 }, { "epoch": 0.28780889621087313, "grad_norm": 0.3549627661705017, "learning_rate": 1.901164199101633e-05, "loss": 0.5782, "step": 10482 }, { "epoch": 0.2878363536518396, "grad_norm": 0.5051921010017395, "learning_rate": 1.9011454766365602e-05, "loss": 0.4939, "step": 10483 }, { "epoch": 0.2878638110928062, "grad_norm": 0.37499353289604187, "learning_rate": 1.9011267524905635e-05, "loss": 0.5197, "step": 10484 }, { "epoch": 0.28789126853377267, "grad_norm": 0.350375771522522, "learning_rate": 1.901108026663678e-05, "loss": 0.4597, "step": 10485 }, { "epoch": 0.28791872597473916, "grad_norm": 0.383234441280365, "learning_rate": 1.9010892991559383e-05, "loss": 0.546, "step": 10486 }, { "epoch": 0.28794618341570566, "grad_norm": 0.3967646360397339, "learning_rate": 1.9010705699673793e-05, "loss": 0.408, "step": 10487 }, { "epoch": 0.28797364085667215, "grad_norm": 0.3999048173427582, "learning_rate": 1.9010518390980362e-05, "loss": 0.6573, "step": 10488 }, { "epoch": 0.28800109829763865, "grad_norm": 0.3686874806880951, "learning_rate": 1.901033106547944e-05, "loss": 0.5857, "step": 10489 }, { "epoch": 0.28802855573860514, "grad_norm": 0.36177563667297363, "learning_rate": 1.9010143723171374e-05, "loss": 0.5195, "step": 10490 }, { "epoch": 0.2880560131795717, "grad_norm": 0.3890845477581024, "learning_rate": 1.9009956364056515e-05, "loss": 0.6105, "step": 10491 }, { "epoch": 0.2880834706205382, "grad_norm": 0.3787180185317993, "learning_rate": 1.900976898813521e-05, "loss": 0.5284, "step": 10492 }, { "epoch": 0.2881109280615047, "grad_norm": 0.4012282192707062, "learning_rate": 1.900958159540781e-05, "loss": 0.5966, "step": 10493 }, { "epoch": 0.2881383855024712, "grad_norm": 0.3944941461086273, "learning_rate": 1.9009394185874667e-05, "loss": 0.5324, "step": 10494 }, { "epoch": 0.28816584294343767, "grad_norm": 0.3732452392578125, "learning_rate": 1.9009206759536125e-05, "loss": 0.4759, "step": 10495 }, { "epoch": 0.28819330038440416, "grad_norm": 0.38772061467170715, "learning_rate": 1.900901931639254e-05, "loss": 0.5844, "step": 10496 }, { "epoch": 0.28822075782537065, "grad_norm": 0.4300769567489624, "learning_rate": 1.9008831856444257e-05, "loss": 0.5706, "step": 10497 }, { "epoch": 0.2882482152663372, "grad_norm": 0.39787742495536804, "learning_rate": 1.900864437969163e-05, "loss": 0.5595, "step": 10498 }, { "epoch": 0.2882756727073037, "grad_norm": 0.34762898087501526, "learning_rate": 1.9008456886135006e-05, "loss": 0.4767, "step": 10499 }, { "epoch": 0.2883031301482702, "grad_norm": 0.37809187173843384, "learning_rate": 1.900826937577473e-05, "loss": 0.492, "step": 10500 }, { "epoch": 0.2883305875892367, "grad_norm": 0.3552244305610657, "learning_rate": 1.900808184861116e-05, "loss": 0.4816, "step": 10501 }, { "epoch": 0.2883580450302032, "grad_norm": 0.37785398960113525, "learning_rate": 1.9007894304644644e-05, "loss": 0.5483, "step": 10502 }, { "epoch": 0.2883855024711697, "grad_norm": 0.3766702711582184, "learning_rate": 1.9007706743875526e-05, "loss": 0.5138, "step": 10503 }, { "epoch": 0.28841295991213617, "grad_norm": 0.34409067034721375, "learning_rate": 1.900751916630416e-05, "loss": 0.512, "step": 10504 }, { "epoch": 0.2884404173531027, "grad_norm": 0.38180670142173767, "learning_rate": 1.90073315719309e-05, "loss": 0.5173, "step": 10505 }, { "epoch": 0.2884678747940692, "grad_norm": 0.3249037265777588, "learning_rate": 1.9007143960756086e-05, "loss": 0.4774, "step": 10506 }, { "epoch": 0.2884953322350357, "grad_norm": 0.34776920080184937, "learning_rate": 1.900695633278008e-05, "loss": 0.4916, "step": 10507 }, { "epoch": 0.2885227896760022, "grad_norm": 0.3667697310447693, "learning_rate": 1.9006768688003224e-05, "loss": 0.5879, "step": 10508 }, { "epoch": 0.2885502471169687, "grad_norm": 0.37004387378692627, "learning_rate": 1.9006581026425866e-05, "loss": 0.5585, "step": 10509 }, { "epoch": 0.2885777045579352, "grad_norm": 0.33895349502563477, "learning_rate": 1.9006393348048365e-05, "loss": 0.5357, "step": 10510 }, { "epoch": 0.2886051619989017, "grad_norm": 0.3531481623649597, "learning_rate": 1.9006205652871062e-05, "loss": 0.4712, "step": 10511 }, { "epoch": 0.2886326194398682, "grad_norm": 0.3935193121433258, "learning_rate": 1.9006017940894312e-05, "loss": 0.5155, "step": 10512 }, { "epoch": 0.2886600768808347, "grad_norm": 0.45821988582611084, "learning_rate": 1.9005830212118465e-05, "loss": 0.5595, "step": 10513 }, { "epoch": 0.2886875343218012, "grad_norm": 0.36502715945243835, "learning_rate": 1.9005642466543868e-05, "loss": 0.5625, "step": 10514 }, { "epoch": 0.2887149917627677, "grad_norm": 0.37807443737983704, "learning_rate": 1.9005454704170877e-05, "loss": 0.5065, "step": 10515 }, { "epoch": 0.2887424492037342, "grad_norm": 0.3204372823238373, "learning_rate": 1.900526692499984e-05, "loss": 0.4533, "step": 10516 }, { "epoch": 0.2887699066447007, "grad_norm": 0.3946102261543274, "learning_rate": 1.9005079129031102e-05, "loss": 0.5679, "step": 10517 }, { "epoch": 0.2887973640856672, "grad_norm": 0.3674132227897644, "learning_rate": 1.9004891316265016e-05, "loss": 0.5508, "step": 10518 }, { "epoch": 0.2888248215266337, "grad_norm": 0.3421477675437927, "learning_rate": 1.900470348670194e-05, "loss": 0.451, "step": 10519 }, { "epoch": 0.28885227896760024, "grad_norm": 0.3460724949836731, "learning_rate": 1.9004515640342212e-05, "loss": 0.4643, "step": 10520 }, { "epoch": 0.28887973640856673, "grad_norm": 0.3642425537109375, "learning_rate": 1.9004327777186195e-05, "loss": 0.5223, "step": 10521 }, { "epoch": 0.28890719384953323, "grad_norm": 0.5207915306091309, "learning_rate": 1.9004139897234225e-05, "loss": 0.4952, "step": 10522 }, { "epoch": 0.2889346512904997, "grad_norm": 1.336111068725586, "learning_rate": 1.9003952000486666e-05, "loss": 0.5729, "step": 10523 }, { "epoch": 0.2889621087314662, "grad_norm": 0.3569989502429962, "learning_rate": 1.900376408694386e-05, "loss": 0.625, "step": 10524 }, { "epoch": 0.2889895661724327, "grad_norm": 0.37392285466194153, "learning_rate": 1.900357615660616e-05, "loss": 0.4312, "step": 10525 }, { "epoch": 0.2890170236133992, "grad_norm": 0.48103228211402893, "learning_rate": 1.900338820947392e-05, "loss": 0.5405, "step": 10526 }, { "epoch": 0.28904448105436575, "grad_norm": 0.35615968704223633, "learning_rate": 1.9003200245547484e-05, "loss": 0.5112, "step": 10527 }, { "epoch": 0.28907193849533225, "grad_norm": 0.3727186918258667, "learning_rate": 1.9003012264827206e-05, "loss": 0.5191, "step": 10528 }, { "epoch": 0.28909939593629874, "grad_norm": 0.31604960560798645, "learning_rate": 1.900282426731344e-05, "loss": 0.3951, "step": 10529 }, { "epoch": 0.28912685337726524, "grad_norm": 0.3875981867313385, "learning_rate": 1.900263625300653e-05, "loss": 0.5023, "step": 10530 }, { "epoch": 0.28915431081823173, "grad_norm": 0.41025760769844055, "learning_rate": 1.9002448221906834e-05, "loss": 0.554, "step": 10531 }, { "epoch": 0.2891817682591982, "grad_norm": 0.3608320653438568, "learning_rate": 1.9002260174014696e-05, "loss": 0.5487, "step": 10532 }, { "epoch": 0.2892092257001647, "grad_norm": 0.3547680079936981, "learning_rate": 1.9002072109330468e-05, "loss": 0.5077, "step": 10533 }, { "epoch": 0.28923668314113127, "grad_norm": 0.39259544014930725, "learning_rate": 1.9001884027854504e-05, "loss": 0.5284, "step": 10534 }, { "epoch": 0.28926414058209776, "grad_norm": 0.34032854437828064, "learning_rate": 1.9001695929587158e-05, "loss": 0.5149, "step": 10535 }, { "epoch": 0.28929159802306426, "grad_norm": 0.420770525932312, "learning_rate": 1.900150781452877e-05, "loss": 0.5844, "step": 10536 }, { "epoch": 0.28931905546403075, "grad_norm": 0.44021326303482056, "learning_rate": 1.9001319682679698e-05, "loss": 0.5513, "step": 10537 }, { "epoch": 0.28934651290499724, "grad_norm": 0.4814611077308655, "learning_rate": 1.9001131534040292e-05, "loss": 0.5505, "step": 10538 }, { "epoch": 0.28937397034596374, "grad_norm": 0.3775991201400757, "learning_rate": 1.9000943368610904e-05, "loss": 0.5364, "step": 10539 }, { "epoch": 0.28940142778693023, "grad_norm": 0.37884852290153503, "learning_rate": 1.9000755186391883e-05, "loss": 0.4734, "step": 10540 }, { "epoch": 0.2894288852278968, "grad_norm": 0.39455530047416687, "learning_rate": 1.9000566987383582e-05, "loss": 0.5162, "step": 10541 }, { "epoch": 0.2894563426688633, "grad_norm": 0.39992010593414307, "learning_rate": 1.900037877158635e-05, "loss": 0.5079, "step": 10542 }, { "epoch": 0.28948380010982977, "grad_norm": 0.376645028591156, "learning_rate": 1.9000190539000536e-05, "loss": 0.6127, "step": 10543 }, { "epoch": 0.28951125755079626, "grad_norm": 0.36446720361709595, "learning_rate": 1.9000002289626497e-05, "loss": 0.5725, "step": 10544 }, { "epoch": 0.28953871499176276, "grad_norm": 0.33712437748908997, "learning_rate": 1.8999814023464584e-05, "loss": 0.5047, "step": 10545 }, { "epoch": 0.28956617243272925, "grad_norm": 0.41511058807373047, "learning_rate": 1.899962574051514e-05, "loss": 0.5084, "step": 10546 }, { "epoch": 0.28959362987369575, "grad_norm": 0.3432982563972473, "learning_rate": 1.899943744077853e-05, "loss": 0.4455, "step": 10547 }, { "epoch": 0.2896210873146623, "grad_norm": 0.424612432718277, "learning_rate": 1.8999249124255088e-05, "loss": 0.6561, "step": 10548 }, { "epoch": 0.2896485447556288, "grad_norm": 0.39852529764175415, "learning_rate": 1.899906079094518e-05, "loss": 0.47, "step": 10549 }, { "epoch": 0.2896760021965953, "grad_norm": 0.38231784105300903, "learning_rate": 1.8998872440849148e-05, "loss": 0.5367, "step": 10550 }, { "epoch": 0.2897034596375618, "grad_norm": 0.3322747051715851, "learning_rate": 1.8998684073967346e-05, "loss": 0.5665, "step": 10551 }, { "epoch": 0.28973091707852827, "grad_norm": 0.33011916279792786, "learning_rate": 1.8998495690300126e-05, "loss": 0.5235, "step": 10552 }, { "epoch": 0.28975837451949477, "grad_norm": 0.3492213189601898, "learning_rate": 1.8998307289847844e-05, "loss": 0.5483, "step": 10553 }, { "epoch": 0.28978583196046126, "grad_norm": 0.35531044006347656, "learning_rate": 1.8998118872610845e-05, "loss": 0.5252, "step": 10554 }, { "epoch": 0.2898132894014278, "grad_norm": 0.39919111132621765, "learning_rate": 1.899793043858948e-05, "loss": 0.5636, "step": 10555 }, { "epoch": 0.2898407468423943, "grad_norm": 0.36453473567962646, "learning_rate": 1.8997741987784105e-05, "loss": 0.5846, "step": 10556 }, { "epoch": 0.2898682042833608, "grad_norm": 0.36717137694358826, "learning_rate": 1.899755352019507e-05, "loss": 0.5509, "step": 10557 }, { "epoch": 0.2898956617243273, "grad_norm": 0.6730944514274597, "learning_rate": 1.8997365035822727e-05, "loss": 0.526, "step": 10558 }, { "epoch": 0.2899231191652938, "grad_norm": 0.3569776713848114, "learning_rate": 1.8997176534667426e-05, "loss": 0.4946, "step": 10559 }, { "epoch": 0.2899505766062603, "grad_norm": 0.3382580578327179, "learning_rate": 1.8996988016729516e-05, "loss": 0.5395, "step": 10560 }, { "epoch": 0.2899780340472268, "grad_norm": 0.4099234938621521, "learning_rate": 1.8996799482009355e-05, "loss": 0.4891, "step": 10561 }, { "epoch": 0.2900054914881933, "grad_norm": 0.3214190602302551, "learning_rate": 1.899661093050729e-05, "loss": 0.3988, "step": 10562 }, { "epoch": 0.2900329489291598, "grad_norm": 0.3363940417766571, "learning_rate": 1.8996422362223672e-05, "loss": 0.4861, "step": 10563 }, { "epoch": 0.2900604063701263, "grad_norm": 1.281022548675537, "learning_rate": 1.8996233777158858e-05, "loss": 0.5633, "step": 10564 }, { "epoch": 0.2900878638110928, "grad_norm": 0.42590969800949097, "learning_rate": 1.8996045175313196e-05, "loss": 0.5534, "step": 10565 }, { "epoch": 0.2901153212520593, "grad_norm": 0.471465528011322, "learning_rate": 1.899585655668704e-05, "loss": 0.491, "step": 10566 }, { "epoch": 0.2901427786930258, "grad_norm": 0.35766884684562683, "learning_rate": 1.899566792128074e-05, "loss": 0.5064, "step": 10567 }, { "epoch": 0.2901702361339923, "grad_norm": 0.3660731017589569, "learning_rate": 1.8995479269094646e-05, "loss": 0.4616, "step": 10568 }, { "epoch": 0.29019769357495884, "grad_norm": 0.43302810192108154, "learning_rate": 1.8995290600129115e-05, "loss": 0.5236, "step": 10569 }, { "epoch": 0.29022515101592533, "grad_norm": 0.41010960936546326, "learning_rate": 1.8995101914384494e-05, "loss": 0.6231, "step": 10570 }, { "epoch": 0.2902526084568918, "grad_norm": 0.3536451458930969, "learning_rate": 1.899491321186114e-05, "loss": 0.5187, "step": 10571 }, { "epoch": 0.2902800658978583, "grad_norm": 0.38244542479515076, "learning_rate": 1.8994724492559398e-05, "loss": 0.5704, "step": 10572 }, { "epoch": 0.2903075233388248, "grad_norm": 0.3789508044719696, "learning_rate": 1.8994535756479628e-05, "loss": 0.5542, "step": 10573 }, { "epoch": 0.2903349807797913, "grad_norm": 0.40209096670150757, "learning_rate": 1.8994347003622174e-05, "loss": 0.5307, "step": 10574 }, { "epoch": 0.2903624382207578, "grad_norm": 0.3571290969848633, "learning_rate": 1.8994158233987394e-05, "loss": 0.6003, "step": 10575 }, { "epoch": 0.29038989566172435, "grad_norm": 0.36364102363586426, "learning_rate": 1.8993969447575638e-05, "loss": 0.506, "step": 10576 }, { "epoch": 0.29041735310269085, "grad_norm": 0.3656938076019287, "learning_rate": 1.899378064438726e-05, "loss": 0.5182, "step": 10577 }, { "epoch": 0.29044481054365734, "grad_norm": 0.3556077182292938, "learning_rate": 1.899359182442261e-05, "loss": 0.4828, "step": 10578 }, { "epoch": 0.29047226798462383, "grad_norm": 0.36831703782081604, "learning_rate": 1.899340298768204e-05, "loss": 0.5143, "step": 10579 }, { "epoch": 0.2904997254255903, "grad_norm": 0.45847517251968384, "learning_rate": 1.8993214134165904e-05, "loss": 0.5039, "step": 10580 }, { "epoch": 0.2905271828665568, "grad_norm": 0.34670403599739075, "learning_rate": 1.8993025263874552e-05, "loss": 0.5534, "step": 10581 }, { "epoch": 0.2905546403075233, "grad_norm": 0.33571258187294006, "learning_rate": 1.8992836376808337e-05, "loss": 0.5395, "step": 10582 }, { "epoch": 0.29058209774848986, "grad_norm": 0.35840147733688354, "learning_rate": 1.8992647472967613e-05, "loss": 0.4174, "step": 10583 }, { "epoch": 0.29060955518945636, "grad_norm": 0.345600962638855, "learning_rate": 1.8992458552352733e-05, "loss": 0.4831, "step": 10584 }, { "epoch": 0.29063701263042285, "grad_norm": 0.38233110308647156, "learning_rate": 1.8992269614964046e-05, "loss": 0.5464, "step": 10585 }, { "epoch": 0.29066447007138935, "grad_norm": 0.3238309919834137, "learning_rate": 1.8992080660801904e-05, "loss": 0.5218, "step": 10586 }, { "epoch": 0.29069192751235584, "grad_norm": 0.42455628514289856, "learning_rate": 1.8991891689866667e-05, "loss": 0.5752, "step": 10587 }, { "epoch": 0.29071938495332234, "grad_norm": 0.3436873257160187, "learning_rate": 1.899170270215868e-05, "loss": 0.4937, "step": 10588 }, { "epoch": 0.29074684239428883, "grad_norm": 0.35423097014427185, "learning_rate": 1.8991513697678296e-05, "loss": 0.565, "step": 10589 }, { "epoch": 0.2907742998352554, "grad_norm": 0.3527736961841583, "learning_rate": 1.899132467642587e-05, "loss": 0.5101, "step": 10590 }, { "epoch": 0.2908017572762219, "grad_norm": 0.3552381694316864, "learning_rate": 1.8991135638401754e-05, "loss": 0.581, "step": 10591 }, { "epoch": 0.29082921471718837, "grad_norm": 0.3791722059249878, "learning_rate": 1.89909465836063e-05, "loss": 0.4683, "step": 10592 }, { "epoch": 0.29085667215815486, "grad_norm": 0.43254151940345764, "learning_rate": 1.899075751203986e-05, "loss": 0.5116, "step": 10593 }, { "epoch": 0.29088412959912135, "grad_norm": 0.6379194855690002, "learning_rate": 1.899056842370279e-05, "loss": 0.5228, "step": 10594 }, { "epoch": 0.29091158704008785, "grad_norm": 0.36204150319099426, "learning_rate": 1.899037931859544e-05, "loss": 0.5567, "step": 10595 }, { "epoch": 0.29093904448105434, "grad_norm": 0.46727314591407776, "learning_rate": 1.8990190196718163e-05, "loss": 0.5828, "step": 10596 }, { "epoch": 0.2909665019220209, "grad_norm": 0.466789186000824, "learning_rate": 1.8990001058071314e-05, "loss": 0.4491, "step": 10597 }, { "epoch": 0.2909939593629874, "grad_norm": 0.45434898138046265, "learning_rate": 1.898981190265524e-05, "loss": 0.5592, "step": 10598 }, { "epoch": 0.2910214168039539, "grad_norm": 0.7237570881843567, "learning_rate": 1.89896227304703e-05, "loss": 0.5324, "step": 10599 }, { "epoch": 0.2910488742449204, "grad_norm": 0.40640079975128174, "learning_rate": 1.8989433541516842e-05, "loss": 0.4076, "step": 10600 }, { "epoch": 0.29107633168588687, "grad_norm": 0.36548855900764465, "learning_rate": 1.8989244335795222e-05, "loss": 0.4758, "step": 10601 }, { "epoch": 0.29110378912685336, "grad_norm": 0.35590359568595886, "learning_rate": 1.8989055113305796e-05, "loss": 0.5635, "step": 10602 }, { "epoch": 0.29113124656781986, "grad_norm": 0.3500351905822754, "learning_rate": 1.898886587404891e-05, "loss": 0.427, "step": 10603 }, { "epoch": 0.2911587040087864, "grad_norm": 0.3665693700313568, "learning_rate": 1.8988676618024916e-05, "loss": 0.5828, "step": 10604 }, { "epoch": 0.2911861614497529, "grad_norm": 0.3762263357639313, "learning_rate": 1.898848734523418e-05, "loss": 0.5123, "step": 10605 }, { "epoch": 0.2912136188907194, "grad_norm": 0.354993999004364, "learning_rate": 1.8988298055677038e-05, "loss": 0.4763, "step": 10606 }, { "epoch": 0.2912410763316859, "grad_norm": 0.351469486951828, "learning_rate": 1.8988108749353857e-05, "loss": 0.5309, "step": 10607 }, { "epoch": 0.2912685337726524, "grad_norm": 0.4178650975227356, "learning_rate": 1.8987919426264984e-05, "loss": 0.5358, "step": 10608 }, { "epoch": 0.2912959912136189, "grad_norm": 0.4530799686908722, "learning_rate": 1.898773008641077e-05, "loss": 0.5837, "step": 10609 }, { "epoch": 0.29132344865458537, "grad_norm": 0.5075688362121582, "learning_rate": 1.8987540729791573e-05, "loss": 0.4582, "step": 10610 }, { "epoch": 0.2913509060955519, "grad_norm": 0.3918704688549042, "learning_rate": 1.8987351356407744e-05, "loss": 0.5408, "step": 10611 }, { "epoch": 0.2913783635365184, "grad_norm": 0.37213873863220215, "learning_rate": 1.8987161966259635e-05, "loss": 0.5944, "step": 10612 }, { "epoch": 0.2914058209774849, "grad_norm": 0.40451282262802124, "learning_rate": 1.8986972559347602e-05, "loss": 0.545, "step": 10613 }, { "epoch": 0.2914332784184514, "grad_norm": 0.3403902053833008, "learning_rate": 1.8986783135671997e-05, "loss": 0.5075, "step": 10614 }, { "epoch": 0.2914607358594179, "grad_norm": 0.33036917448043823, "learning_rate": 1.8986593695233174e-05, "loss": 0.5548, "step": 10615 }, { "epoch": 0.2914881933003844, "grad_norm": 0.35949158668518066, "learning_rate": 1.898640423803148e-05, "loss": 0.5522, "step": 10616 }, { "epoch": 0.2915156507413509, "grad_norm": 0.3353768587112427, "learning_rate": 1.898621476406728e-05, "loss": 0.5099, "step": 10617 }, { "epoch": 0.29154310818231743, "grad_norm": 0.3969521224498749, "learning_rate": 1.898602527334092e-05, "loss": 0.5388, "step": 10618 }, { "epoch": 0.29157056562328393, "grad_norm": 0.5075987577438354, "learning_rate": 1.8985835765852753e-05, "loss": 0.5726, "step": 10619 }, { "epoch": 0.2915980230642504, "grad_norm": 0.3636513352394104, "learning_rate": 1.8985646241603137e-05, "loss": 0.5276, "step": 10620 }, { "epoch": 0.2916254805052169, "grad_norm": 0.3523928225040436, "learning_rate": 1.898545670059242e-05, "loss": 0.5162, "step": 10621 }, { "epoch": 0.2916529379461834, "grad_norm": 0.3505879342556, "learning_rate": 1.8985267142820962e-05, "loss": 0.5123, "step": 10622 }, { "epoch": 0.2916803953871499, "grad_norm": 0.3566543459892273, "learning_rate": 1.8985077568289108e-05, "loss": 0.4946, "step": 10623 }, { "epoch": 0.2917078528281164, "grad_norm": 0.39260542392730713, "learning_rate": 1.898488797699722e-05, "loss": 0.5302, "step": 10624 }, { "epoch": 0.29173531026908295, "grad_norm": 0.3354779779911041, "learning_rate": 1.898469836894565e-05, "loss": 0.4769, "step": 10625 }, { "epoch": 0.29176276771004944, "grad_norm": 0.3801306486129761, "learning_rate": 1.8984508744134747e-05, "loss": 0.4616, "step": 10626 }, { "epoch": 0.29179022515101594, "grad_norm": 0.3731025457382202, "learning_rate": 1.8984319102564867e-05, "loss": 0.4498, "step": 10627 }, { "epoch": 0.29181768259198243, "grad_norm": 0.38671189546585083, "learning_rate": 1.898412944423637e-05, "loss": 0.563, "step": 10628 }, { "epoch": 0.2918451400329489, "grad_norm": 0.33593514561653137, "learning_rate": 1.89839397691496e-05, "loss": 0.5782, "step": 10629 }, { "epoch": 0.2918725974739154, "grad_norm": 0.34059178829193115, "learning_rate": 1.8983750077304912e-05, "loss": 0.5726, "step": 10630 }, { "epoch": 0.2919000549148819, "grad_norm": 0.32741212844848633, "learning_rate": 1.8983560368702667e-05, "loss": 0.5409, "step": 10631 }, { "epoch": 0.29192751235584846, "grad_norm": 0.3799813687801361, "learning_rate": 1.8983370643343214e-05, "loss": 0.5612, "step": 10632 }, { "epoch": 0.29195496979681496, "grad_norm": 0.3772587180137634, "learning_rate": 1.8983180901226905e-05, "loss": 0.5145, "step": 10633 }, { "epoch": 0.29198242723778145, "grad_norm": 0.3765260577201843, "learning_rate": 1.89829911423541e-05, "loss": 0.5202, "step": 10634 }, { "epoch": 0.29200988467874794, "grad_norm": 0.4091521203517914, "learning_rate": 1.8982801366725147e-05, "loss": 0.4754, "step": 10635 }, { "epoch": 0.29203734211971444, "grad_norm": 0.62028568983078, "learning_rate": 1.8982611574340404e-05, "loss": 0.489, "step": 10636 }, { "epoch": 0.29206479956068093, "grad_norm": 0.42159050703048706, "learning_rate": 1.8982421765200224e-05, "loss": 0.5379, "step": 10637 }, { "epoch": 0.2920922570016474, "grad_norm": 0.32403329014778137, "learning_rate": 1.898223193930496e-05, "loss": 0.5124, "step": 10638 }, { "epoch": 0.292119714442614, "grad_norm": 0.3590332567691803, "learning_rate": 1.8982042096654964e-05, "loss": 0.5353, "step": 10639 }, { "epoch": 0.29214717188358047, "grad_norm": 0.3689941167831421, "learning_rate": 1.89818522372506e-05, "loss": 0.5249, "step": 10640 }, { "epoch": 0.29217462932454696, "grad_norm": 0.4200587570667267, "learning_rate": 1.8981662361092206e-05, "loss": 0.4941, "step": 10641 }, { "epoch": 0.29220208676551346, "grad_norm": 0.34544819593429565, "learning_rate": 1.898147246818015e-05, "loss": 0.4489, "step": 10642 }, { "epoch": 0.29222954420647995, "grad_norm": 0.4147844910621643, "learning_rate": 1.898128255851478e-05, "loss": 0.5329, "step": 10643 }, { "epoch": 0.29225700164744645, "grad_norm": 0.3494395911693573, "learning_rate": 1.898109263209645e-05, "loss": 0.4889, "step": 10644 }, { "epoch": 0.29228445908841294, "grad_norm": 0.5508231520652771, "learning_rate": 1.898090268892552e-05, "loss": 0.5276, "step": 10645 }, { "epoch": 0.29231191652937943, "grad_norm": 0.3594970703125, "learning_rate": 1.8980712729002337e-05, "loss": 0.4781, "step": 10646 }, { "epoch": 0.292339373970346, "grad_norm": 0.37386807799339294, "learning_rate": 1.898052275232726e-05, "loss": 0.5329, "step": 10647 }, { "epoch": 0.2923668314113125, "grad_norm": 0.45178818702697754, "learning_rate": 1.8980332758900642e-05, "loss": 0.5677, "step": 10648 }, { "epoch": 0.29239428885227897, "grad_norm": 0.36211520433425903, "learning_rate": 1.8980142748722837e-05, "loss": 0.5231, "step": 10649 }, { "epoch": 0.29242174629324547, "grad_norm": 0.4018748700618744, "learning_rate": 1.8979952721794196e-05, "loss": 0.5871, "step": 10650 }, { "epoch": 0.29244920373421196, "grad_norm": 0.39492884278297424, "learning_rate": 1.8979762678115082e-05, "loss": 0.5733, "step": 10651 }, { "epoch": 0.29247666117517845, "grad_norm": 0.30207088589668274, "learning_rate": 1.897957261768584e-05, "loss": 0.4986, "step": 10652 }, { "epoch": 0.29250411861614495, "grad_norm": 0.32317227125167847, "learning_rate": 1.8979382540506835e-05, "loss": 0.4459, "step": 10653 }, { "epoch": 0.2925315760571115, "grad_norm": 0.3597392737865448, "learning_rate": 1.897919244657841e-05, "loss": 0.4494, "step": 10654 }, { "epoch": 0.292559033498078, "grad_norm": 0.37299197912216187, "learning_rate": 1.897900233590093e-05, "loss": 0.505, "step": 10655 }, { "epoch": 0.2925864909390445, "grad_norm": 0.42168453335762024, "learning_rate": 1.8978812208474742e-05, "loss": 0.584, "step": 10656 }, { "epoch": 0.292613948380011, "grad_norm": 0.361549437046051, "learning_rate": 1.8978622064300204e-05, "loss": 0.574, "step": 10657 }, { "epoch": 0.2926414058209775, "grad_norm": 0.38631999492645264, "learning_rate": 1.897843190337767e-05, "loss": 0.5602, "step": 10658 }, { "epoch": 0.29266886326194397, "grad_norm": 0.3645487129688263, "learning_rate": 1.8978241725707495e-05, "loss": 0.5663, "step": 10659 }, { "epoch": 0.29269632070291046, "grad_norm": 0.4305476248264313, "learning_rate": 1.8978051531290035e-05, "loss": 0.5551, "step": 10660 }, { "epoch": 0.292723778143877, "grad_norm": 0.32242971658706665, "learning_rate": 1.897786132012564e-05, "loss": 0.5104, "step": 10661 }, { "epoch": 0.2927512355848435, "grad_norm": 0.3539206087589264, "learning_rate": 1.897767109221467e-05, "loss": 0.5456, "step": 10662 }, { "epoch": 0.29277869302581, "grad_norm": 0.3587137460708618, "learning_rate": 1.8977480847557482e-05, "loss": 0.475, "step": 10663 }, { "epoch": 0.2928061504667765, "grad_norm": 0.3514917194843292, "learning_rate": 1.897729058615442e-05, "loss": 0.5413, "step": 10664 }, { "epoch": 0.292833607907743, "grad_norm": 0.3490965962409973, "learning_rate": 1.8977100308005854e-05, "loss": 0.5728, "step": 10665 }, { "epoch": 0.2928610653487095, "grad_norm": 0.3661758303642273, "learning_rate": 1.8976910013112123e-05, "loss": 0.5357, "step": 10666 }, { "epoch": 0.292888522789676, "grad_norm": 0.35939550399780273, "learning_rate": 1.8976719701473594e-05, "loss": 0.5825, "step": 10667 }, { "epoch": 0.2929159802306425, "grad_norm": 0.3733099400997162, "learning_rate": 1.8976529373090616e-05, "loss": 0.5297, "step": 10668 }, { "epoch": 0.292943437671609, "grad_norm": 0.44955357909202576, "learning_rate": 1.8976339027963546e-05, "loss": 0.4997, "step": 10669 }, { "epoch": 0.2929708951125755, "grad_norm": 0.3806009888648987, "learning_rate": 1.897614866609274e-05, "loss": 0.6011, "step": 10670 }, { "epoch": 0.292998352553542, "grad_norm": 0.39469921588897705, "learning_rate": 1.897595828747855e-05, "loss": 0.5321, "step": 10671 }, { "epoch": 0.2930258099945085, "grad_norm": 0.3487900495529175, "learning_rate": 1.8975767892121336e-05, "loss": 0.5609, "step": 10672 }, { "epoch": 0.293053267435475, "grad_norm": 0.33829739689826965, "learning_rate": 1.8975577480021447e-05, "loss": 0.4949, "step": 10673 }, { "epoch": 0.2930807248764415, "grad_norm": 0.41052666306495667, "learning_rate": 1.8975387051179244e-05, "loss": 0.5489, "step": 10674 }, { "epoch": 0.29310818231740804, "grad_norm": 0.3558547794818878, "learning_rate": 1.8975196605595076e-05, "loss": 0.5016, "step": 10675 }, { "epoch": 0.29313563975837453, "grad_norm": 0.41525760293006897, "learning_rate": 1.8975006143269304e-05, "loss": 0.5591, "step": 10676 }, { "epoch": 0.293163097199341, "grad_norm": 0.3811076581478119, "learning_rate": 1.8974815664202283e-05, "loss": 0.5101, "step": 10677 }, { "epoch": 0.2931905546403075, "grad_norm": 0.3430793881416321, "learning_rate": 1.8974625168394363e-05, "loss": 0.5123, "step": 10678 }, { "epoch": 0.293218012081274, "grad_norm": 0.37612423300743103, "learning_rate": 1.8974434655845907e-05, "loss": 0.5084, "step": 10679 }, { "epoch": 0.2932454695222405, "grad_norm": 0.7070287466049194, "learning_rate": 1.8974244126557262e-05, "loss": 0.5816, "step": 10680 }, { "epoch": 0.293272926963207, "grad_norm": 0.37966230511665344, "learning_rate": 1.8974053580528786e-05, "loss": 0.5478, "step": 10681 }, { "epoch": 0.29330038440417355, "grad_norm": 0.37395399808883667, "learning_rate": 1.8973863017760838e-05, "loss": 0.4388, "step": 10682 }, { "epoch": 0.29332784184514005, "grad_norm": 0.3886961340904236, "learning_rate": 1.8973672438253774e-05, "loss": 0.5208, "step": 10683 }, { "epoch": 0.29335529928610654, "grad_norm": 0.3940000832080841, "learning_rate": 1.8973481842007943e-05, "loss": 0.5334, "step": 10684 }, { "epoch": 0.29338275672707304, "grad_norm": 0.3680715560913086, "learning_rate": 1.8973291229023708e-05, "loss": 0.5249, "step": 10685 }, { "epoch": 0.29341021416803953, "grad_norm": 0.3418077230453491, "learning_rate": 1.8973100599301417e-05, "loss": 0.4969, "step": 10686 }, { "epoch": 0.293437671609006, "grad_norm": 0.36081090569496155, "learning_rate": 1.897290995284143e-05, "loss": 0.4295, "step": 10687 }, { "epoch": 0.2934651290499725, "grad_norm": 0.417817085981369, "learning_rate": 1.8972719289644103e-05, "loss": 0.5146, "step": 10688 }, { "epoch": 0.29349258649093907, "grad_norm": 0.6383808851242065, "learning_rate": 1.8972528609709794e-05, "loss": 0.549, "step": 10689 }, { "epoch": 0.29352004393190556, "grad_norm": 0.3753795027732849, "learning_rate": 1.8972337913038853e-05, "loss": 0.5689, "step": 10690 }, { "epoch": 0.29354750137287206, "grad_norm": 0.40231436491012573, "learning_rate": 1.8972147199631635e-05, "loss": 0.5544, "step": 10691 }, { "epoch": 0.29357495881383855, "grad_norm": 0.35932618379592896, "learning_rate": 1.8971956469488503e-05, "loss": 0.4925, "step": 10692 }, { "epoch": 0.29360241625480504, "grad_norm": 0.7994845509529114, "learning_rate": 1.897176572260981e-05, "loss": 0.4321, "step": 10693 }, { "epoch": 0.29362987369577154, "grad_norm": 0.38827452063560486, "learning_rate": 1.8971574958995906e-05, "loss": 0.564, "step": 10694 }, { "epoch": 0.29365733113673803, "grad_norm": 0.4744679927825928, "learning_rate": 1.8971384178647152e-05, "loss": 0.4762, "step": 10695 }, { "epoch": 0.2936847885777046, "grad_norm": 0.386243999004364, "learning_rate": 1.8971193381563907e-05, "loss": 0.4425, "step": 10696 }, { "epoch": 0.2937122460186711, "grad_norm": 0.3495384156703949, "learning_rate": 1.897100256774652e-05, "loss": 0.5209, "step": 10697 }, { "epoch": 0.29373970345963757, "grad_norm": 0.3609231114387512, "learning_rate": 1.897081173719535e-05, "loss": 0.5279, "step": 10698 }, { "epoch": 0.29376716090060406, "grad_norm": 0.3890160024166107, "learning_rate": 1.897062088991075e-05, "loss": 0.5719, "step": 10699 }, { "epoch": 0.29379461834157056, "grad_norm": 0.34962525963783264, "learning_rate": 1.8970430025893085e-05, "loss": 0.4854, "step": 10700 }, { "epoch": 0.29382207578253705, "grad_norm": 0.3605354130268097, "learning_rate": 1.89702391451427e-05, "loss": 0.5268, "step": 10701 }, { "epoch": 0.29384953322350355, "grad_norm": 0.4129225015640259, "learning_rate": 1.897004824765996e-05, "loss": 0.4104, "step": 10702 }, { "epoch": 0.2938769906644701, "grad_norm": 0.3510945439338684, "learning_rate": 1.8969857333445218e-05, "loss": 0.4675, "step": 10703 }, { "epoch": 0.2939044481054366, "grad_norm": 0.39796701073646545, "learning_rate": 1.8969666402498824e-05, "loss": 0.5441, "step": 10704 }, { "epoch": 0.2939319055464031, "grad_norm": 0.3555264174938202, "learning_rate": 1.896947545482114e-05, "loss": 0.6661, "step": 10705 }, { "epoch": 0.2939593629873696, "grad_norm": 0.3644757568836212, "learning_rate": 1.8969284490412528e-05, "loss": 0.4444, "step": 10706 }, { "epoch": 0.29398682042833607, "grad_norm": 0.35775241255760193, "learning_rate": 1.896909350927333e-05, "loss": 0.5771, "step": 10707 }, { "epoch": 0.29401427786930256, "grad_norm": 0.38780707120895386, "learning_rate": 1.8968902511403914e-05, "loss": 0.5008, "step": 10708 }, { "epoch": 0.29404173531026906, "grad_norm": 0.3810136020183563, "learning_rate": 1.8968711496804634e-05, "loss": 0.4859, "step": 10709 }, { "epoch": 0.2940691927512356, "grad_norm": 0.38880324363708496, "learning_rate": 1.896852046547584e-05, "loss": 0.5811, "step": 10710 }, { "epoch": 0.2940966501922021, "grad_norm": 0.34620943665504456, "learning_rate": 1.8968329417417896e-05, "loss": 0.5437, "step": 10711 }, { "epoch": 0.2941241076331686, "grad_norm": 0.3644077479839325, "learning_rate": 1.8968138352631152e-05, "loss": 0.4247, "step": 10712 }, { "epoch": 0.2941515650741351, "grad_norm": 0.34870901703834534, "learning_rate": 1.896794727111597e-05, "loss": 0.5363, "step": 10713 }, { "epoch": 0.2941790225151016, "grad_norm": 0.4676774740219116, "learning_rate": 1.8967756172872707e-05, "loss": 0.5235, "step": 10714 }, { "epoch": 0.2942064799560681, "grad_norm": 0.3515307605266571, "learning_rate": 1.896756505790171e-05, "loss": 0.4944, "step": 10715 }, { "epoch": 0.2942339373970346, "grad_norm": 0.35742703080177307, "learning_rate": 1.896737392620335e-05, "loss": 0.5574, "step": 10716 }, { "epoch": 0.2942613948380011, "grad_norm": 0.33269786834716797, "learning_rate": 1.896718277777797e-05, "loss": 0.4496, "step": 10717 }, { "epoch": 0.2942888522789676, "grad_norm": 0.4283444583415985, "learning_rate": 1.8966991612625932e-05, "loss": 0.5117, "step": 10718 }, { "epoch": 0.2943163097199341, "grad_norm": 0.3926866054534912, "learning_rate": 1.8966800430747593e-05, "loss": 0.5748, "step": 10719 }, { "epoch": 0.2943437671609006, "grad_norm": 0.39476293325424194, "learning_rate": 1.896660923214331e-05, "loss": 0.5296, "step": 10720 }, { "epoch": 0.2943712246018671, "grad_norm": 0.3824125826358795, "learning_rate": 1.8966418016813443e-05, "loss": 0.5509, "step": 10721 }, { "epoch": 0.2943986820428336, "grad_norm": 0.4133097529411316, "learning_rate": 1.8966226784758342e-05, "loss": 0.5274, "step": 10722 }, { "epoch": 0.2944261394838001, "grad_norm": 0.34004297852516174, "learning_rate": 1.8966035535978363e-05, "loss": 0.5118, "step": 10723 }, { "epoch": 0.29445359692476664, "grad_norm": 0.3975982666015625, "learning_rate": 1.896584427047387e-05, "loss": 0.5356, "step": 10724 }, { "epoch": 0.29448105436573313, "grad_norm": 0.3809800446033478, "learning_rate": 1.8965652988245214e-05, "loss": 0.4528, "step": 10725 }, { "epoch": 0.2945085118066996, "grad_norm": 0.3797608017921448, "learning_rate": 1.8965461689292756e-05, "loss": 0.5172, "step": 10726 }, { "epoch": 0.2945359692476661, "grad_norm": 0.3808315396308899, "learning_rate": 1.8965270373616845e-05, "loss": 0.4681, "step": 10727 }, { "epoch": 0.2945634266886326, "grad_norm": 0.3571028709411621, "learning_rate": 1.8965079041217848e-05, "loss": 0.5899, "step": 10728 }, { "epoch": 0.2945908841295991, "grad_norm": 0.37878352403640747, "learning_rate": 1.8964887692096116e-05, "loss": 0.6057, "step": 10729 }, { "epoch": 0.2946183415705656, "grad_norm": 0.3301376402378082, "learning_rate": 1.896469632625201e-05, "loss": 0.5044, "step": 10730 }, { "epoch": 0.29464579901153215, "grad_norm": 0.3999161720275879, "learning_rate": 1.896450494368588e-05, "loss": 0.5327, "step": 10731 }, { "epoch": 0.29467325645249864, "grad_norm": 0.8599289059638977, "learning_rate": 1.896431354439809e-05, "loss": 0.4956, "step": 10732 }, { "epoch": 0.29470071389346514, "grad_norm": 0.3344440758228302, "learning_rate": 1.8964122128388992e-05, "loss": 0.4491, "step": 10733 }, { "epoch": 0.29472817133443163, "grad_norm": 0.39317572116851807, "learning_rate": 1.8963930695658946e-05, "loss": 0.521, "step": 10734 }, { "epoch": 0.2947556287753981, "grad_norm": 0.8964151740074158, "learning_rate": 1.896373924620831e-05, "loss": 0.5063, "step": 10735 }, { "epoch": 0.2947830862163646, "grad_norm": 0.412073016166687, "learning_rate": 1.8963547780037436e-05, "loss": 0.6236, "step": 10736 }, { "epoch": 0.2948105436573311, "grad_norm": 0.4031982421875, "learning_rate": 1.8963356297146688e-05, "loss": 0.5961, "step": 10737 }, { "epoch": 0.29483800109829766, "grad_norm": 0.3629284203052521, "learning_rate": 1.8963164797536414e-05, "loss": 0.4738, "step": 10738 }, { "epoch": 0.29486545853926416, "grad_norm": 0.36002859473228455, "learning_rate": 1.8962973281206984e-05, "loss": 0.46, "step": 10739 }, { "epoch": 0.29489291598023065, "grad_norm": 0.3896573781967163, "learning_rate": 1.8962781748158746e-05, "loss": 0.4905, "step": 10740 }, { "epoch": 0.29492037342119715, "grad_norm": 0.3493849039077759, "learning_rate": 1.8962590198392057e-05, "loss": 0.5057, "step": 10741 }, { "epoch": 0.29494783086216364, "grad_norm": 0.35759106278419495, "learning_rate": 1.8962398631907278e-05, "loss": 0.5894, "step": 10742 }, { "epoch": 0.29497528830313013, "grad_norm": 0.3753238022327423, "learning_rate": 1.896220704870477e-05, "loss": 0.5197, "step": 10743 }, { "epoch": 0.29500274574409663, "grad_norm": 0.40956246852874756, "learning_rate": 1.896201544878488e-05, "loss": 0.5725, "step": 10744 }, { "epoch": 0.2950302031850632, "grad_norm": 0.3645186424255371, "learning_rate": 1.896182383214797e-05, "loss": 0.5235, "step": 10745 }, { "epoch": 0.29505766062602967, "grad_norm": 0.3461386561393738, "learning_rate": 1.89616321987944e-05, "loss": 0.5132, "step": 10746 }, { "epoch": 0.29508511806699617, "grad_norm": 0.3824485242366791, "learning_rate": 1.8961440548724525e-05, "loss": 0.4629, "step": 10747 }, { "epoch": 0.29511257550796266, "grad_norm": 0.3827904164791107, "learning_rate": 1.8961248881938706e-05, "loss": 0.434, "step": 10748 }, { "epoch": 0.29514003294892915, "grad_norm": 0.48615413904190063, "learning_rate": 1.8961057198437295e-05, "loss": 0.5412, "step": 10749 }, { "epoch": 0.29516749038989565, "grad_norm": 0.4157959520816803, "learning_rate": 1.8960865498220654e-05, "loss": 0.5921, "step": 10750 }, { "epoch": 0.29519494783086214, "grad_norm": 0.4102717339992523, "learning_rate": 1.8960673781289136e-05, "loss": 0.4758, "step": 10751 }, { "epoch": 0.2952224052718287, "grad_norm": 0.35036247968673706, "learning_rate": 1.8960482047643106e-05, "loss": 0.4619, "step": 10752 }, { "epoch": 0.2952498627127952, "grad_norm": 0.36394867300987244, "learning_rate": 1.8960290297282914e-05, "loss": 0.5925, "step": 10753 }, { "epoch": 0.2952773201537617, "grad_norm": 0.3919617831707001, "learning_rate": 1.8960098530208923e-05, "loss": 0.5275, "step": 10754 }, { "epoch": 0.2953047775947282, "grad_norm": 0.3491906225681305, "learning_rate": 1.8959906746421484e-05, "loss": 0.4655, "step": 10755 }, { "epoch": 0.29533223503569467, "grad_norm": 0.35618141293525696, "learning_rate": 1.8959714945920963e-05, "loss": 0.6225, "step": 10756 }, { "epoch": 0.29535969247666116, "grad_norm": 0.3772583603858948, "learning_rate": 1.895952312870771e-05, "loss": 0.5359, "step": 10757 }, { "epoch": 0.29538714991762766, "grad_norm": 0.406768262386322, "learning_rate": 1.895933129478209e-05, "loss": 0.5385, "step": 10758 }, { "epoch": 0.2954146073585942, "grad_norm": 0.3257448077201843, "learning_rate": 1.895913944414446e-05, "loss": 0.4436, "step": 10759 }, { "epoch": 0.2954420647995607, "grad_norm": 0.34982961416244507, "learning_rate": 1.8958947576795174e-05, "loss": 0.4768, "step": 10760 }, { "epoch": 0.2954695222405272, "grad_norm": 0.35456717014312744, "learning_rate": 1.895875569273459e-05, "loss": 0.4307, "step": 10761 }, { "epoch": 0.2954969796814937, "grad_norm": 0.3436332643032074, "learning_rate": 1.8958563791963067e-05, "loss": 0.4552, "step": 10762 }, { "epoch": 0.2955244371224602, "grad_norm": 0.38015687465667725, "learning_rate": 1.8958371874480964e-05, "loss": 0.5585, "step": 10763 }, { "epoch": 0.2955518945634267, "grad_norm": 0.37490296363830566, "learning_rate": 1.895817994028864e-05, "loss": 0.5001, "step": 10764 }, { "epoch": 0.29557935200439317, "grad_norm": 0.37402117252349854, "learning_rate": 1.8957987989386448e-05, "loss": 0.52, "step": 10765 }, { "epoch": 0.2956068094453597, "grad_norm": 0.40306907892227173, "learning_rate": 1.8957796021774753e-05, "loss": 0.5567, "step": 10766 }, { "epoch": 0.2956342668863262, "grad_norm": 0.3416852355003357, "learning_rate": 1.8957604037453907e-05, "loss": 0.4812, "step": 10767 }, { "epoch": 0.2956617243272927, "grad_norm": 0.3615260720252991, "learning_rate": 1.8957412036424272e-05, "loss": 0.4837, "step": 10768 }, { "epoch": 0.2956891817682592, "grad_norm": 0.3758598268032074, "learning_rate": 1.89572200186862e-05, "loss": 0.4532, "step": 10769 }, { "epoch": 0.2957166392092257, "grad_norm": 0.4432010054588318, "learning_rate": 1.895702798424006e-05, "loss": 0.5066, "step": 10770 }, { "epoch": 0.2957440966501922, "grad_norm": 0.3523518741130829, "learning_rate": 1.8956835933086203e-05, "loss": 0.5782, "step": 10771 }, { "epoch": 0.2957715540911587, "grad_norm": 0.47596898674964905, "learning_rate": 1.8956643865224987e-05, "loss": 0.4943, "step": 10772 }, { "epoch": 0.29579901153212523, "grad_norm": 0.36161455512046814, "learning_rate": 1.8956451780656772e-05, "loss": 0.5132, "step": 10773 }, { "epoch": 0.2958264689730917, "grad_norm": 0.36001235246658325, "learning_rate": 1.8956259679381913e-05, "loss": 0.5229, "step": 10774 }, { "epoch": 0.2958539264140582, "grad_norm": 0.35767462849617004, "learning_rate": 1.8956067561400772e-05, "loss": 0.4461, "step": 10775 }, { "epoch": 0.2958813838550247, "grad_norm": 0.3993496596813202, "learning_rate": 1.895587542671371e-05, "loss": 0.5866, "step": 10776 }, { "epoch": 0.2959088412959912, "grad_norm": 0.34247928857803345, "learning_rate": 1.895568327532108e-05, "loss": 0.5829, "step": 10777 }, { "epoch": 0.2959362987369577, "grad_norm": 0.4307877719402313, "learning_rate": 1.895549110722324e-05, "loss": 0.5422, "step": 10778 }, { "epoch": 0.2959637561779242, "grad_norm": 0.34574517607688904, "learning_rate": 1.8955298922420556e-05, "loss": 0.4858, "step": 10779 }, { "epoch": 0.2959912136188907, "grad_norm": 0.38505983352661133, "learning_rate": 1.8955106720913377e-05, "loss": 0.5945, "step": 10780 }, { "epoch": 0.29601867105985724, "grad_norm": 0.4265718460083008, "learning_rate": 1.895491450270207e-05, "loss": 0.5641, "step": 10781 }, { "epoch": 0.29604612850082374, "grad_norm": 0.3774309456348419, "learning_rate": 1.8954722267786986e-05, "loss": 0.5249, "step": 10782 }, { "epoch": 0.29607358594179023, "grad_norm": 0.40438979864120483, "learning_rate": 1.8954530016168485e-05, "loss": 0.5457, "step": 10783 }, { "epoch": 0.2961010433827567, "grad_norm": 0.3367365002632141, "learning_rate": 1.895433774784693e-05, "loss": 0.4888, "step": 10784 }, { "epoch": 0.2961285008237232, "grad_norm": 0.3954540491104126, "learning_rate": 1.895414546282268e-05, "loss": 0.5069, "step": 10785 }, { "epoch": 0.2961559582646897, "grad_norm": 0.41326645016670227, "learning_rate": 1.8953953161096085e-05, "loss": 0.5623, "step": 10786 }, { "epoch": 0.2961834157056562, "grad_norm": 0.40745478868484497, "learning_rate": 1.8953760842667514e-05, "loss": 0.5332, "step": 10787 }, { "epoch": 0.29621087314662276, "grad_norm": 0.38341668248176575, "learning_rate": 1.895356850753732e-05, "loss": 0.4969, "step": 10788 }, { "epoch": 0.29623833058758925, "grad_norm": 0.4105336368083954, "learning_rate": 1.8953376155705864e-05, "loss": 0.6407, "step": 10789 }, { "epoch": 0.29626578802855574, "grad_norm": 0.3423267900943756, "learning_rate": 1.8953183787173505e-05, "loss": 0.4796, "step": 10790 }, { "epoch": 0.29629324546952224, "grad_norm": 0.37765052914619446, "learning_rate": 1.8952991401940598e-05, "loss": 0.5262, "step": 10791 }, { "epoch": 0.29632070291048873, "grad_norm": 0.33892494440078735, "learning_rate": 1.8952799000007505e-05, "loss": 0.54, "step": 10792 }, { "epoch": 0.2963481603514552, "grad_norm": 0.39131444692611694, "learning_rate": 1.8952606581374584e-05, "loss": 0.4029, "step": 10793 }, { "epoch": 0.2963756177924217, "grad_norm": 0.4347560405731201, "learning_rate": 1.8952414146042195e-05, "loss": 0.5395, "step": 10794 }, { "epoch": 0.29640307523338827, "grad_norm": 0.38824957609176636, "learning_rate": 1.8952221694010698e-05, "loss": 0.5775, "step": 10795 }, { "epoch": 0.29643053267435476, "grad_norm": 0.3228376805782318, "learning_rate": 1.895202922528045e-05, "loss": 0.4573, "step": 10796 }, { "epoch": 0.29645799011532126, "grad_norm": 0.40403038263320923, "learning_rate": 1.895183673985181e-05, "loss": 0.491, "step": 10797 }, { "epoch": 0.29648544755628775, "grad_norm": 0.32940474152565, "learning_rate": 1.8951644237725137e-05, "loss": 0.5031, "step": 10798 }, { "epoch": 0.29651290499725425, "grad_norm": 0.3782308101654053, "learning_rate": 1.895145171890079e-05, "loss": 0.5925, "step": 10799 }, { "epoch": 0.29654036243822074, "grad_norm": 0.36629363894462585, "learning_rate": 1.8951259183379127e-05, "loss": 0.5115, "step": 10800 }, { "epoch": 0.29656781987918723, "grad_norm": 0.38566383719444275, "learning_rate": 1.895106663116051e-05, "loss": 0.5162, "step": 10801 }, { "epoch": 0.2965952773201538, "grad_norm": 0.3724574148654938, "learning_rate": 1.8950874062245298e-05, "loss": 0.4638, "step": 10802 }, { "epoch": 0.2966227347611203, "grad_norm": 0.3856683671474457, "learning_rate": 1.895068147663385e-05, "loss": 0.5624, "step": 10803 }, { "epoch": 0.29665019220208677, "grad_norm": 0.3430090844631195, "learning_rate": 1.895048887432652e-05, "loss": 0.4519, "step": 10804 }, { "epoch": 0.29667764964305327, "grad_norm": 0.38990867137908936, "learning_rate": 1.8950296255323675e-05, "loss": 0.524, "step": 10805 }, { "epoch": 0.29670510708401976, "grad_norm": 0.31869977712631226, "learning_rate": 1.895010361962567e-05, "loss": 0.4839, "step": 10806 }, { "epoch": 0.29673256452498625, "grad_norm": 0.4051859676837921, "learning_rate": 1.8949910967232866e-05, "loss": 0.4615, "step": 10807 }, { "epoch": 0.29676002196595275, "grad_norm": 0.5086353421211243, "learning_rate": 1.894971829814562e-05, "loss": 0.4516, "step": 10808 }, { "epoch": 0.2967874794069193, "grad_norm": 0.3583243489265442, "learning_rate": 1.8949525612364296e-05, "loss": 0.56, "step": 10809 }, { "epoch": 0.2968149368478858, "grad_norm": 0.34585142135620117, "learning_rate": 1.8949332909889246e-05, "loss": 0.508, "step": 10810 }, { "epoch": 0.2968423942888523, "grad_norm": 0.3738400936126709, "learning_rate": 1.8949140190720836e-05, "loss": 0.4975, "step": 10811 }, { "epoch": 0.2968698517298188, "grad_norm": 0.501816987991333, "learning_rate": 1.8948947454859426e-05, "loss": 0.4902, "step": 10812 }, { "epoch": 0.2968973091707853, "grad_norm": 0.4884268641471863, "learning_rate": 1.894875470230537e-05, "loss": 1.0136, "step": 10813 }, { "epoch": 0.29692476661175177, "grad_norm": 0.4167112410068512, "learning_rate": 1.894856193305903e-05, "loss": 0.5, "step": 10814 }, { "epoch": 0.29695222405271826, "grad_norm": 0.3739244043827057, "learning_rate": 1.8948369147120768e-05, "loss": 0.5406, "step": 10815 }, { "epoch": 0.2969796814936848, "grad_norm": 0.37864840030670166, "learning_rate": 1.894817634449094e-05, "loss": 0.5474, "step": 10816 }, { "epoch": 0.2970071389346513, "grad_norm": 0.3553939461708069, "learning_rate": 1.8947983525169906e-05, "loss": 0.5815, "step": 10817 }, { "epoch": 0.2970345963756178, "grad_norm": 0.4425792992115021, "learning_rate": 1.8947790689158028e-05, "loss": 0.554, "step": 10818 }, { "epoch": 0.2970620538165843, "grad_norm": 1.8045668601989746, "learning_rate": 1.8947597836455664e-05, "loss": 0.5429, "step": 10819 }, { "epoch": 0.2970895112575508, "grad_norm": 0.3370573818683624, "learning_rate": 1.8947404967063176e-05, "loss": 0.4308, "step": 10820 }, { "epoch": 0.2971169686985173, "grad_norm": 0.41697758436203003, "learning_rate": 1.894721208098092e-05, "loss": 0.5071, "step": 10821 }, { "epoch": 0.2971444261394838, "grad_norm": 0.38402098417282104, "learning_rate": 1.894701917820926e-05, "loss": 0.5711, "step": 10822 }, { "epoch": 0.2971718835804503, "grad_norm": 0.4011977016925812, "learning_rate": 1.8946826258748552e-05, "loss": 0.5497, "step": 10823 }, { "epoch": 0.2971993410214168, "grad_norm": 0.4152972400188446, "learning_rate": 1.894663332259916e-05, "loss": 0.5524, "step": 10824 }, { "epoch": 0.2972267984623833, "grad_norm": 0.3698970675468445, "learning_rate": 1.894644036976144e-05, "loss": 0.5423, "step": 10825 }, { "epoch": 0.2972542559033498, "grad_norm": 0.3738550841808319, "learning_rate": 1.8946247400235753e-05, "loss": 0.5507, "step": 10826 }, { "epoch": 0.2972817133443163, "grad_norm": 0.4567551612854004, "learning_rate": 1.894605441402246e-05, "loss": 0.571, "step": 10827 }, { "epoch": 0.2973091707852828, "grad_norm": 0.3612056374549866, "learning_rate": 1.8945861411121916e-05, "loss": 0.5048, "step": 10828 }, { "epoch": 0.2973366282262493, "grad_norm": 0.4756089150905609, "learning_rate": 1.894566839153449e-05, "loss": 0.4877, "step": 10829 }, { "epoch": 0.29736408566721584, "grad_norm": 0.4706931412220001, "learning_rate": 1.8945475355260538e-05, "loss": 0.5432, "step": 10830 }, { "epoch": 0.29739154310818233, "grad_norm": 0.33526045083999634, "learning_rate": 1.8945282302300415e-05, "loss": 0.5719, "step": 10831 }, { "epoch": 0.2974190005491488, "grad_norm": 0.37052178382873535, "learning_rate": 1.8945089232654488e-05, "loss": 0.5342, "step": 10832 }, { "epoch": 0.2974464579901153, "grad_norm": 0.3655042350292206, "learning_rate": 1.8944896146323117e-05, "loss": 0.5706, "step": 10833 }, { "epoch": 0.2974739154310818, "grad_norm": 0.33872321248054504, "learning_rate": 1.8944703043306657e-05, "loss": 0.527, "step": 10834 }, { "epoch": 0.2975013728720483, "grad_norm": 0.6805616021156311, "learning_rate": 1.894450992360547e-05, "loss": 0.5659, "step": 10835 }, { "epoch": 0.2975288303130148, "grad_norm": 0.324282705783844, "learning_rate": 1.8944316787219917e-05, "loss": 0.4753, "step": 10836 }, { "epoch": 0.29755628775398135, "grad_norm": 0.38324716687202454, "learning_rate": 1.8944123634150362e-05, "loss": 0.561, "step": 10837 }, { "epoch": 0.29758374519494785, "grad_norm": 0.36725595593452454, "learning_rate": 1.894393046439716e-05, "loss": 0.5573, "step": 10838 }, { "epoch": 0.29761120263591434, "grad_norm": 0.35875651240348816, "learning_rate": 1.8943737277960676e-05, "loss": 0.5518, "step": 10839 }, { "epoch": 0.29763866007688083, "grad_norm": 0.4366750121116638, "learning_rate": 1.894354407484126e-05, "loss": 0.5713, "step": 10840 }, { "epoch": 0.29766611751784733, "grad_norm": 0.3965785801410675, "learning_rate": 1.8943350855039288e-05, "loss": 0.4631, "step": 10841 }, { "epoch": 0.2976935749588138, "grad_norm": 0.458773136138916, "learning_rate": 1.8943157618555104e-05, "loss": 0.556, "step": 10842 }, { "epoch": 0.2977210323997803, "grad_norm": 0.339426189661026, "learning_rate": 1.8942964365389085e-05, "loss": 0.4292, "step": 10843 }, { "epoch": 0.29774848984074687, "grad_norm": 0.3585992753505707, "learning_rate": 1.8942771095541578e-05, "loss": 0.5333, "step": 10844 }, { "epoch": 0.29777594728171336, "grad_norm": 0.37483206391334534, "learning_rate": 1.894257780901295e-05, "loss": 0.4779, "step": 10845 }, { "epoch": 0.29780340472267985, "grad_norm": 0.3996804356575012, "learning_rate": 1.8942384505803562e-05, "loss": 0.4891, "step": 10846 }, { "epoch": 0.29783086216364635, "grad_norm": 0.3551115095615387, "learning_rate": 1.894219118591377e-05, "loss": 0.5872, "step": 10847 }, { "epoch": 0.29785831960461284, "grad_norm": 0.5261492133140564, "learning_rate": 1.894199784934394e-05, "loss": 0.55, "step": 10848 }, { "epoch": 0.29788577704557934, "grad_norm": 0.43165284395217896, "learning_rate": 1.894180449609443e-05, "loss": 0.6002, "step": 10849 }, { "epoch": 0.29791323448654583, "grad_norm": 0.3774586021900177, "learning_rate": 1.8941611126165597e-05, "loss": 0.5531, "step": 10850 }, { "epoch": 0.2979406919275124, "grad_norm": 0.3163756728172302, "learning_rate": 1.8941417739557806e-05, "loss": 0.5142, "step": 10851 }, { "epoch": 0.2979681493684789, "grad_norm": 0.37798184156417847, "learning_rate": 1.8941224336271423e-05, "loss": 0.5171, "step": 10852 }, { "epoch": 0.29799560680944537, "grad_norm": 0.337776243686676, "learning_rate": 1.89410309163068e-05, "loss": 0.4721, "step": 10853 }, { "epoch": 0.29802306425041186, "grad_norm": 0.36611634492874146, "learning_rate": 1.8940837479664297e-05, "loss": 0.5413, "step": 10854 }, { "epoch": 0.29805052169137836, "grad_norm": 0.38074663281440735, "learning_rate": 1.8940644026344283e-05, "loss": 0.4903, "step": 10855 }, { "epoch": 0.29807797913234485, "grad_norm": 0.4038684368133545, "learning_rate": 1.8940450556347112e-05, "loss": 0.5315, "step": 10856 }, { "epoch": 0.29810543657331134, "grad_norm": 0.4130924344062805, "learning_rate": 1.8940257069673147e-05, "loss": 0.5714, "step": 10857 }, { "epoch": 0.2981328940142779, "grad_norm": 0.34778186678886414, "learning_rate": 1.894006356632275e-05, "loss": 0.4828, "step": 10858 }, { "epoch": 0.2981603514552444, "grad_norm": 0.39373865723609924, "learning_rate": 1.893987004629628e-05, "loss": 0.6504, "step": 10859 }, { "epoch": 0.2981878088962109, "grad_norm": 0.4166102409362793, "learning_rate": 1.89396765095941e-05, "loss": 0.6126, "step": 10860 }, { "epoch": 0.2982152663371774, "grad_norm": 0.37797075510025024, "learning_rate": 1.8939482956216573e-05, "loss": 0.5222, "step": 10861 }, { "epoch": 0.29824272377814387, "grad_norm": 0.3921407163143158, "learning_rate": 1.8939289386164055e-05, "loss": 0.5674, "step": 10862 }, { "epoch": 0.29827018121911036, "grad_norm": 0.3702322244644165, "learning_rate": 1.8939095799436908e-05, "loss": 0.5663, "step": 10863 }, { "epoch": 0.29829763866007686, "grad_norm": 0.36403998732566833, "learning_rate": 1.8938902196035494e-05, "loss": 0.4339, "step": 10864 }, { "epoch": 0.2983250961010434, "grad_norm": 0.3975658714771271, "learning_rate": 1.8938708575960175e-05, "loss": 0.5348, "step": 10865 }, { "epoch": 0.2983525535420099, "grad_norm": 0.3963545560836792, "learning_rate": 1.8938514939211315e-05, "loss": 0.4667, "step": 10866 }, { "epoch": 0.2983800109829764, "grad_norm": 0.40832045674324036, "learning_rate": 1.8938321285789267e-05, "loss": 0.4775, "step": 10867 }, { "epoch": 0.2984074684239429, "grad_norm": 0.3746139407157898, "learning_rate": 1.8938127615694397e-05, "loss": 0.4818, "step": 10868 }, { "epoch": 0.2984349258649094, "grad_norm": 0.36171770095825195, "learning_rate": 1.893793392892707e-05, "loss": 0.5496, "step": 10869 }, { "epoch": 0.2984623833058759, "grad_norm": 0.3743970990180969, "learning_rate": 1.893774022548764e-05, "loss": 0.5395, "step": 10870 }, { "epoch": 0.29848984074684237, "grad_norm": 0.37003394961357117, "learning_rate": 1.8937546505376474e-05, "loss": 0.509, "step": 10871 }, { "epoch": 0.2985172981878089, "grad_norm": 0.4097583293914795, "learning_rate": 1.8937352768593933e-05, "loss": 0.5206, "step": 10872 }, { "epoch": 0.2985447556287754, "grad_norm": 0.3751893639564514, "learning_rate": 1.8937159015140372e-05, "loss": 0.5733, "step": 10873 }, { "epoch": 0.2985722130697419, "grad_norm": 0.39508625864982605, "learning_rate": 1.893696524501616e-05, "loss": 0.4766, "step": 10874 }, { "epoch": 0.2985996705107084, "grad_norm": 0.3583570122718811, "learning_rate": 1.8936771458221655e-05, "loss": 0.4721, "step": 10875 }, { "epoch": 0.2986271279516749, "grad_norm": 0.37513467669487, "learning_rate": 1.893657765475722e-05, "loss": 0.5363, "step": 10876 }, { "epoch": 0.2986545853926414, "grad_norm": 0.3767146170139313, "learning_rate": 1.8936383834623214e-05, "loss": 0.5867, "step": 10877 }, { "epoch": 0.2986820428336079, "grad_norm": 0.428607314825058, "learning_rate": 1.893618999782e-05, "loss": 0.5477, "step": 10878 }, { "epoch": 0.29870950027457444, "grad_norm": 0.32567232847213745, "learning_rate": 1.8935996144347938e-05, "loss": 0.4484, "step": 10879 }, { "epoch": 0.29873695771554093, "grad_norm": 0.3490471839904785, "learning_rate": 1.8935802274207392e-05, "loss": 0.5881, "step": 10880 }, { "epoch": 0.2987644151565074, "grad_norm": 0.398642361164093, "learning_rate": 1.893560838739873e-05, "loss": 0.5796, "step": 10881 }, { "epoch": 0.2987918725974739, "grad_norm": 0.35972437262535095, "learning_rate": 1.8935414483922296e-05, "loss": 0.5341, "step": 10882 }, { "epoch": 0.2988193300384404, "grad_norm": 0.3990684151649475, "learning_rate": 1.8935220563778468e-05, "loss": 0.5463, "step": 10883 }, { "epoch": 0.2988467874794069, "grad_norm": 0.4865363538265228, "learning_rate": 1.89350266269676e-05, "loss": 0.5355, "step": 10884 }, { "epoch": 0.2988742449203734, "grad_norm": 0.33082637190818787, "learning_rate": 1.8934832673490057e-05, "loss": 0.4935, "step": 10885 }, { "epoch": 0.29890170236133995, "grad_norm": 0.36841699481010437, "learning_rate": 1.89346387033462e-05, "loss": 0.5268, "step": 10886 }, { "epoch": 0.29892915980230644, "grad_norm": 0.3702302873134613, "learning_rate": 1.8934444716536385e-05, "loss": 0.4521, "step": 10887 }, { "epoch": 0.29895661724327294, "grad_norm": 0.3535923957824707, "learning_rate": 1.893425071306098e-05, "loss": 0.5261, "step": 10888 }, { "epoch": 0.29898407468423943, "grad_norm": 0.35352078080177307, "learning_rate": 1.893405669292035e-05, "loss": 0.5131, "step": 10889 }, { "epoch": 0.2990115321252059, "grad_norm": 0.45138221979141235, "learning_rate": 1.8933862656114853e-05, "loss": 0.4558, "step": 10890 }, { "epoch": 0.2990389895661724, "grad_norm": 0.3415718376636505, "learning_rate": 1.8933668602644847e-05, "loss": 0.4999, "step": 10891 }, { "epoch": 0.2990664470071389, "grad_norm": 0.47314587235450745, "learning_rate": 1.89334745325107e-05, "loss": 0.5296, "step": 10892 }, { "epoch": 0.29909390444810546, "grad_norm": 0.42635369300842285, "learning_rate": 1.893328044571277e-05, "loss": 0.614, "step": 10893 }, { "epoch": 0.29912136188907196, "grad_norm": 0.35159507393836975, "learning_rate": 1.8933086342251426e-05, "loss": 0.4873, "step": 10894 }, { "epoch": 0.29914881933003845, "grad_norm": 0.35765641927719116, "learning_rate": 1.893289222212702e-05, "loss": 0.4631, "step": 10895 }, { "epoch": 0.29917627677100495, "grad_norm": 0.4177396595478058, "learning_rate": 1.893269808533992e-05, "loss": 0.5504, "step": 10896 }, { "epoch": 0.29920373421197144, "grad_norm": 0.4810444414615631, "learning_rate": 1.8932503931890487e-05, "loss": 0.5831, "step": 10897 }, { "epoch": 0.29923119165293793, "grad_norm": 0.33061009645462036, "learning_rate": 1.8932309761779084e-05, "loss": 0.4452, "step": 10898 }, { "epoch": 0.2992586490939044, "grad_norm": 0.3618420660495758, "learning_rate": 1.8932115575006072e-05, "loss": 0.4546, "step": 10899 }, { "epoch": 0.299286106534871, "grad_norm": 0.4312288761138916, "learning_rate": 1.8931921371571814e-05, "loss": 0.545, "step": 10900 }, { "epoch": 0.29931356397583747, "grad_norm": 0.3678516149520874, "learning_rate": 1.893172715147667e-05, "loss": 0.5695, "step": 10901 }, { "epoch": 0.29934102141680397, "grad_norm": 0.38743850588798523, "learning_rate": 1.8931532914721008e-05, "loss": 0.4791, "step": 10902 }, { "epoch": 0.29936847885777046, "grad_norm": 0.35151833295822144, "learning_rate": 1.893133866130518e-05, "loss": 0.6062, "step": 10903 }, { "epoch": 0.29939593629873695, "grad_norm": 0.34097206592559814, "learning_rate": 1.8931144391229563e-05, "loss": 0.5439, "step": 10904 }, { "epoch": 0.29942339373970345, "grad_norm": 0.342652827501297, "learning_rate": 1.8930950104494506e-05, "loss": 0.4525, "step": 10905 }, { "epoch": 0.29945085118066994, "grad_norm": 0.4296620786190033, "learning_rate": 1.893075580110038e-05, "loss": 0.5906, "step": 10906 }, { "epoch": 0.2994783086216365, "grad_norm": 0.3718644082546234, "learning_rate": 1.893056148104754e-05, "loss": 0.5376, "step": 10907 }, { "epoch": 0.299505766062603, "grad_norm": 0.36686626076698303, "learning_rate": 1.8930367144336356e-05, "loss": 0.4849, "step": 10908 }, { "epoch": 0.2995332235035695, "grad_norm": 0.3789626657962799, "learning_rate": 1.8930172790967183e-05, "loss": 0.5524, "step": 10909 }, { "epoch": 0.299560680944536, "grad_norm": 0.39235207438468933, "learning_rate": 1.8929978420940392e-05, "loss": 0.4888, "step": 10910 }, { "epoch": 0.29958813838550247, "grad_norm": 0.42099934816360474, "learning_rate": 1.892978403425634e-05, "loss": 0.5967, "step": 10911 }, { "epoch": 0.29961559582646896, "grad_norm": 0.3725258708000183, "learning_rate": 1.8929589630915387e-05, "loss": 0.6167, "step": 10912 }, { "epoch": 0.29964305326743546, "grad_norm": 0.3896276652812958, "learning_rate": 1.8929395210917904e-05, "loss": 0.4957, "step": 10913 }, { "epoch": 0.29967051070840195, "grad_norm": 0.41210320591926575, "learning_rate": 1.8929200774264247e-05, "loss": 0.6769, "step": 10914 }, { "epoch": 0.2996979681493685, "grad_norm": 0.40179261565208435, "learning_rate": 1.8929006320954778e-05, "loss": 0.4715, "step": 10915 }, { "epoch": 0.299725425590335, "grad_norm": 0.38657140731811523, "learning_rate": 1.892881185098987e-05, "loss": 0.5082, "step": 10916 }, { "epoch": 0.2997528830313015, "grad_norm": 0.48375290632247925, "learning_rate": 1.892861736436987e-05, "loss": 0.5634, "step": 10917 }, { "epoch": 0.299780340472268, "grad_norm": 0.36648663878440857, "learning_rate": 1.892842286109515e-05, "loss": 0.5542, "step": 10918 }, { "epoch": 0.2998077979132345, "grad_norm": 0.32879289984703064, "learning_rate": 1.8928228341166077e-05, "loss": 0.5671, "step": 10919 }, { "epoch": 0.29983525535420097, "grad_norm": 0.41883763670921326, "learning_rate": 1.892803380458301e-05, "loss": 0.5689, "step": 10920 }, { "epoch": 0.29986271279516746, "grad_norm": 0.3617636561393738, "learning_rate": 1.8927839251346302e-05, "loss": 0.5446, "step": 10921 }, { "epoch": 0.299890170236134, "grad_norm": 0.358537882566452, "learning_rate": 1.892764468145633e-05, "loss": 0.5016, "step": 10922 }, { "epoch": 0.2999176276771005, "grad_norm": 0.33985936641693115, "learning_rate": 1.8927450094913448e-05, "loss": 0.4849, "step": 10923 }, { "epoch": 0.299945085118067, "grad_norm": 0.35180002450942993, "learning_rate": 1.8927255491718023e-05, "loss": 0.5006, "step": 10924 }, { "epoch": 0.2999725425590335, "grad_norm": 0.3203871548175812, "learning_rate": 1.892706087187042e-05, "loss": 0.4593, "step": 10925 }, { "epoch": 0.3, "grad_norm": 0.3373394310474396, "learning_rate": 1.8926866235371e-05, "loss": 0.4657, "step": 10926 }, { "epoch": 0.3000274574409665, "grad_norm": 0.3684391975402832, "learning_rate": 1.892667158222012e-05, "loss": 0.5571, "step": 10927 }, { "epoch": 0.300054914881933, "grad_norm": 0.3817688226699829, "learning_rate": 1.892647691241815e-05, "loss": 0.5024, "step": 10928 }, { "epoch": 0.3000823723228995, "grad_norm": 0.34217390418052673, "learning_rate": 1.8926282225965456e-05, "loss": 0.5153, "step": 10929 }, { "epoch": 0.300109829763866, "grad_norm": 0.3489822745323181, "learning_rate": 1.8926087522862392e-05, "loss": 0.5126, "step": 10930 }, { "epoch": 0.3001372872048325, "grad_norm": 0.30839526653289795, "learning_rate": 1.8925892803109328e-05, "loss": 0.4316, "step": 10931 }, { "epoch": 0.300164744645799, "grad_norm": 0.38616976141929626, "learning_rate": 1.8925698066706625e-05, "loss": 0.4847, "step": 10932 }, { "epoch": 0.3001922020867655, "grad_norm": 0.3926428258419037, "learning_rate": 1.892550331365465e-05, "loss": 0.515, "step": 10933 }, { "epoch": 0.300219659527732, "grad_norm": 0.3826693594455719, "learning_rate": 1.8925308543953756e-05, "loss": 0.5745, "step": 10934 }, { "epoch": 0.3002471169686985, "grad_norm": 0.455049991607666, "learning_rate": 1.8925113757604315e-05, "loss": 0.5463, "step": 10935 }, { "epoch": 0.30027457440966504, "grad_norm": 0.4945071041584015, "learning_rate": 1.892491895460669e-05, "loss": 0.4937, "step": 10936 }, { "epoch": 0.30030203185063153, "grad_norm": 0.36703863739967346, "learning_rate": 1.892472413496124e-05, "loss": 0.5464, "step": 10937 }, { "epoch": 0.30032948929159803, "grad_norm": 0.33624207973480225, "learning_rate": 1.8924529298668334e-05, "loss": 0.4928, "step": 10938 }, { "epoch": 0.3003569467325645, "grad_norm": 0.3720129430294037, "learning_rate": 1.892433444572833e-05, "loss": 0.526, "step": 10939 }, { "epoch": 0.300384404173531, "grad_norm": 0.3663005828857422, "learning_rate": 1.8924139576141596e-05, "loss": 0.5699, "step": 10940 }, { "epoch": 0.3004118616144975, "grad_norm": 0.4051869809627533, "learning_rate": 1.8923944689908492e-05, "loss": 0.6255, "step": 10941 }, { "epoch": 0.300439319055464, "grad_norm": 0.3736560046672821, "learning_rate": 1.8923749787029384e-05, "loss": 0.5133, "step": 10942 }, { "epoch": 0.30046677649643055, "grad_norm": 0.39923352003097534, "learning_rate": 1.8923554867504633e-05, "loss": 0.4813, "step": 10943 }, { "epoch": 0.30049423393739705, "grad_norm": 0.37185561656951904, "learning_rate": 1.8923359931334606e-05, "loss": 0.4301, "step": 10944 }, { "epoch": 0.30052169137836354, "grad_norm": 0.37370091676712036, "learning_rate": 1.8923164978519664e-05, "loss": 0.5423, "step": 10945 }, { "epoch": 0.30054914881933004, "grad_norm": 0.3686252236366272, "learning_rate": 1.8922970009060172e-05, "loss": 0.5039, "step": 10946 }, { "epoch": 0.30057660626029653, "grad_norm": 0.43140631914138794, "learning_rate": 1.8922775022956493e-05, "loss": 0.527, "step": 10947 }, { "epoch": 0.300604063701263, "grad_norm": 0.4005013406276703, "learning_rate": 1.8922580020208986e-05, "loss": 0.48, "step": 10948 }, { "epoch": 0.3006315211422295, "grad_norm": 0.3619464337825775, "learning_rate": 1.8922385000818026e-05, "loss": 0.525, "step": 10949 }, { "epoch": 0.30065897858319607, "grad_norm": 0.39588168263435364, "learning_rate": 1.8922189964783967e-05, "loss": 0.5758, "step": 10950 }, { "epoch": 0.30068643602416256, "grad_norm": 0.3503384292125702, "learning_rate": 1.8921994912107178e-05, "loss": 0.4646, "step": 10951 }, { "epoch": 0.30071389346512906, "grad_norm": 0.4395134150981903, "learning_rate": 1.8921799842788018e-05, "loss": 0.5409, "step": 10952 }, { "epoch": 0.30074135090609555, "grad_norm": 0.49381551146507263, "learning_rate": 1.8921604756826855e-05, "loss": 0.5247, "step": 10953 }, { "epoch": 0.30076880834706204, "grad_norm": 0.33386942744255066, "learning_rate": 1.8921409654224053e-05, "loss": 0.3821, "step": 10954 }, { "epoch": 0.30079626578802854, "grad_norm": 0.39377403259277344, "learning_rate": 1.8921214534979975e-05, "loss": 0.5262, "step": 10955 }, { "epoch": 0.30082372322899503, "grad_norm": 0.38531485199928284, "learning_rate": 1.892101939909498e-05, "loss": 0.5776, "step": 10956 }, { "epoch": 0.3008511806699616, "grad_norm": 0.34894055128097534, "learning_rate": 1.8920824246569444e-05, "loss": 0.6013, "step": 10957 }, { "epoch": 0.3008786381109281, "grad_norm": 0.321336954832077, "learning_rate": 1.892062907740372e-05, "loss": 0.4718, "step": 10958 }, { "epoch": 0.30090609555189457, "grad_norm": 0.41395363211631775, "learning_rate": 1.8920433891598172e-05, "loss": 0.5943, "step": 10959 }, { "epoch": 0.30093355299286106, "grad_norm": 0.3755747973918915, "learning_rate": 1.892023868915317e-05, "loss": 0.4931, "step": 10960 }, { "epoch": 0.30096101043382756, "grad_norm": 0.37217792868614197, "learning_rate": 1.892004347006908e-05, "loss": 0.4486, "step": 10961 }, { "epoch": 0.30098846787479405, "grad_norm": 0.3231324851512909, "learning_rate": 1.891984823434626e-05, "loss": 0.4737, "step": 10962 }, { "epoch": 0.30101592531576055, "grad_norm": 0.3688288629055023, "learning_rate": 1.8919652981985072e-05, "loss": 0.5913, "step": 10963 }, { "epoch": 0.3010433827567271, "grad_norm": 0.34187984466552734, "learning_rate": 1.891945771298589e-05, "loss": 0.5196, "step": 10964 }, { "epoch": 0.3010708401976936, "grad_norm": 0.3866761326789856, "learning_rate": 1.8919262427349067e-05, "loss": 0.5297, "step": 10965 }, { "epoch": 0.3010982976386601, "grad_norm": 0.3917812705039978, "learning_rate": 1.891906712507498e-05, "loss": 0.6087, "step": 10966 }, { "epoch": 0.3011257550796266, "grad_norm": 0.41688334941864014, "learning_rate": 1.891887180616398e-05, "loss": 0.4813, "step": 10967 }, { "epoch": 0.30115321252059307, "grad_norm": 0.3556533753871918, "learning_rate": 1.891867647061644e-05, "loss": 0.4935, "step": 10968 }, { "epoch": 0.30118066996155957, "grad_norm": 0.5756800770759583, "learning_rate": 1.891848111843272e-05, "loss": 0.4915, "step": 10969 }, { "epoch": 0.30120812740252606, "grad_norm": 0.3702562153339386, "learning_rate": 1.8918285749613185e-05, "loss": 0.5295, "step": 10970 }, { "epoch": 0.3012355848434926, "grad_norm": 0.3238142430782318, "learning_rate": 1.8918090364158208e-05, "loss": 0.4898, "step": 10971 }, { "epoch": 0.3012630422844591, "grad_norm": 0.37529510259628296, "learning_rate": 1.8917894962068137e-05, "loss": 0.4808, "step": 10972 }, { "epoch": 0.3012904997254256, "grad_norm": 0.39626359939575195, "learning_rate": 1.8917699543343352e-05, "loss": 0.5674, "step": 10973 }, { "epoch": 0.3013179571663921, "grad_norm": 0.3661440908908844, "learning_rate": 1.891750410798421e-05, "loss": 0.4872, "step": 10974 }, { "epoch": 0.3013454146073586, "grad_norm": 0.35855865478515625, "learning_rate": 1.8917308655991074e-05, "loss": 0.5679, "step": 10975 }, { "epoch": 0.3013728720483251, "grad_norm": 0.3775087296962738, "learning_rate": 1.8917113187364312e-05, "loss": 0.5289, "step": 10976 }, { "epoch": 0.3014003294892916, "grad_norm": 0.3446570932865143, "learning_rate": 1.891691770210429e-05, "loss": 0.5188, "step": 10977 }, { "epoch": 0.3014277869302581, "grad_norm": 0.370281457901001, "learning_rate": 1.891672220021137e-05, "loss": 0.4836, "step": 10978 }, { "epoch": 0.3014552443712246, "grad_norm": 0.354131281375885, "learning_rate": 1.8916526681685918e-05, "loss": 0.525, "step": 10979 }, { "epoch": 0.3014827018121911, "grad_norm": 0.4400595724582672, "learning_rate": 1.8916331146528296e-05, "loss": 0.5939, "step": 10980 }, { "epoch": 0.3015101592531576, "grad_norm": 0.36991164088249207, "learning_rate": 1.891613559473887e-05, "loss": 0.5561, "step": 10981 }, { "epoch": 0.3015376166941241, "grad_norm": 0.4268181324005127, "learning_rate": 1.8915940026318007e-05, "loss": 0.533, "step": 10982 }, { "epoch": 0.3015650741350906, "grad_norm": 0.37012046575546265, "learning_rate": 1.8915744441266072e-05, "loss": 0.5706, "step": 10983 }, { "epoch": 0.3015925315760571, "grad_norm": 0.5497652888298035, "learning_rate": 1.8915548839583425e-05, "loss": 0.604, "step": 10984 }, { "epoch": 0.30161998901702364, "grad_norm": 0.39667990803718567, "learning_rate": 1.8915353221270437e-05, "loss": 0.5641, "step": 10985 }, { "epoch": 0.30164744645799013, "grad_norm": 0.37480154633522034, "learning_rate": 1.8915157586327465e-05, "loss": 0.4555, "step": 10986 }, { "epoch": 0.3016749038989566, "grad_norm": 0.4517703056335449, "learning_rate": 1.8914961934754882e-05, "loss": 0.5646, "step": 10987 }, { "epoch": 0.3017023613399231, "grad_norm": 0.36712831258773804, "learning_rate": 1.891476626655305e-05, "loss": 0.4609, "step": 10988 }, { "epoch": 0.3017298187808896, "grad_norm": 0.38053327798843384, "learning_rate": 1.8914570581722334e-05, "loss": 0.5727, "step": 10989 }, { "epoch": 0.3017572762218561, "grad_norm": 0.358246386051178, "learning_rate": 1.8914374880263095e-05, "loss": 0.4547, "step": 10990 }, { "epoch": 0.3017847336628226, "grad_norm": 0.37945061922073364, "learning_rate": 1.8914179162175705e-05, "loss": 0.5111, "step": 10991 }, { "epoch": 0.30181219110378915, "grad_norm": 0.44828668236732483, "learning_rate": 1.8913983427460522e-05, "loss": 0.5382, "step": 10992 }, { "epoch": 0.30183964854475565, "grad_norm": 0.44445618987083435, "learning_rate": 1.891378767611792e-05, "loss": 0.5636, "step": 10993 }, { "epoch": 0.30186710598572214, "grad_norm": 0.4053727984428406, "learning_rate": 1.8913591908148258e-05, "loss": 0.5417, "step": 10994 }, { "epoch": 0.30189456342668863, "grad_norm": 0.3573754131793976, "learning_rate": 1.8913396123551902e-05, "loss": 0.5857, "step": 10995 }, { "epoch": 0.3019220208676551, "grad_norm": 0.35638779401779175, "learning_rate": 1.8913200322329213e-05, "loss": 0.4659, "step": 10996 }, { "epoch": 0.3019494783086216, "grad_norm": 0.3810458481311798, "learning_rate": 1.8913004504480566e-05, "loss": 0.5382, "step": 10997 }, { "epoch": 0.3019769357495881, "grad_norm": 0.3639042377471924, "learning_rate": 1.891280867000632e-05, "loss": 0.533, "step": 10998 }, { "epoch": 0.30200439319055467, "grad_norm": 0.3233224153518677, "learning_rate": 1.891261281890684e-05, "loss": 0.4893, "step": 10999 }, { "epoch": 0.30203185063152116, "grad_norm": 0.581595242023468, "learning_rate": 1.891241695118249e-05, "loss": 0.526, "step": 11000 }, { "epoch": 0.30205930807248765, "grad_norm": 0.4037179946899414, "learning_rate": 1.891222106683364e-05, "loss": 0.5573, "step": 11001 }, { "epoch": 0.30208676551345415, "grad_norm": 0.3376685082912445, "learning_rate": 1.8912025165860655e-05, "loss": 0.547, "step": 11002 }, { "epoch": 0.30211422295442064, "grad_norm": 0.4141163229942322, "learning_rate": 1.8911829248263897e-05, "loss": 0.658, "step": 11003 }, { "epoch": 0.30214168039538714, "grad_norm": 0.34919866919517517, "learning_rate": 1.8911633314043737e-05, "loss": 0.486, "step": 11004 }, { "epoch": 0.30216913783635363, "grad_norm": 0.3765205442905426, "learning_rate": 1.8911437363200533e-05, "loss": 0.5295, "step": 11005 }, { "epoch": 0.3021965952773202, "grad_norm": 0.3734496831893921, "learning_rate": 1.8911241395734652e-05, "loss": 0.4452, "step": 11006 }, { "epoch": 0.3022240527182867, "grad_norm": 0.36797529458999634, "learning_rate": 1.8911045411646464e-05, "loss": 0.5721, "step": 11007 }, { "epoch": 0.30225151015925317, "grad_norm": 0.38844358921051025, "learning_rate": 1.8910849410936333e-05, "loss": 0.4141, "step": 11008 }, { "epoch": 0.30227896760021966, "grad_norm": 0.3836982846260071, "learning_rate": 1.8910653393604623e-05, "loss": 0.539, "step": 11009 }, { "epoch": 0.30230642504118616, "grad_norm": 0.3667266070842743, "learning_rate": 1.89104573596517e-05, "loss": 0.4171, "step": 11010 }, { "epoch": 0.30233388248215265, "grad_norm": 0.3465175926685333, "learning_rate": 1.8910261309077932e-05, "loss": 0.5992, "step": 11011 }, { "epoch": 0.30236133992311914, "grad_norm": 0.36270904541015625, "learning_rate": 1.891006524188368e-05, "loss": 0.5199, "step": 11012 }, { "epoch": 0.3023887973640857, "grad_norm": 0.36580193042755127, "learning_rate": 1.8909869158069313e-05, "loss": 0.4885, "step": 11013 }, { "epoch": 0.3024162548050522, "grad_norm": 0.37753745913505554, "learning_rate": 1.8909673057635197e-05, "loss": 0.515, "step": 11014 }, { "epoch": 0.3024437122460187, "grad_norm": 0.3852120041847229, "learning_rate": 1.89094769405817e-05, "loss": 0.5866, "step": 11015 }, { "epoch": 0.3024711696869852, "grad_norm": 0.3899472653865814, "learning_rate": 1.8909280806909184e-05, "loss": 0.5152, "step": 11016 }, { "epoch": 0.30249862712795167, "grad_norm": 0.38010329008102417, "learning_rate": 1.8909084656618014e-05, "loss": 0.4494, "step": 11017 }, { "epoch": 0.30252608456891816, "grad_norm": 0.3977871835231781, "learning_rate": 1.8908888489708556e-05, "loss": 0.5496, "step": 11018 }, { "epoch": 0.30255354200988466, "grad_norm": 0.35185250639915466, "learning_rate": 1.890869230618118e-05, "loss": 0.5602, "step": 11019 }, { "epoch": 0.3025809994508512, "grad_norm": 0.38584157824516296, "learning_rate": 1.890849610603625e-05, "loss": 0.5779, "step": 11020 }, { "epoch": 0.3026084568918177, "grad_norm": 0.39722099900245667, "learning_rate": 1.890829988927413e-05, "loss": 0.6081, "step": 11021 }, { "epoch": 0.3026359143327842, "grad_norm": 0.3683856725692749, "learning_rate": 1.8908103655895186e-05, "loss": 0.5024, "step": 11022 }, { "epoch": 0.3026633717737507, "grad_norm": 0.4113459885120392, "learning_rate": 1.890790740589979e-05, "loss": 0.548, "step": 11023 }, { "epoch": 0.3026908292147172, "grad_norm": 0.5018664002418518, "learning_rate": 1.8907711139288302e-05, "loss": 0.5726, "step": 11024 }, { "epoch": 0.3027182866556837, "grad_norm": 0.37239977717399597, "learning_rate": 1.8907514856061085e-05, "loss": 0.5864, "step": 11025 }, { "epoch": 0.30274574409665017, "grad_norm": 0.3443738520145416, "learning_rate": 1.8907318556218514e-05, "loss": 0.4307, "step": 11026 }, { "epoch": 0.3027732015376167, "grad_norm": 0.4101811647415161, "learning_rate": 1.890712223976095e-05, "loss": 0.5526, "step": 11027 }, { "epoch": 0.3028006589785832, "grad_norm": 0.3058456778526306, "learning_rate": 1.890692590668876e-05, "loss": 0.4442, "step": 11028 }, { "epoch": 0.3028281164195497, "grad_norm": 0.33990174531936646, "learning_rate": 1.8906729557002316e-05, "loss": 0.4946, "step": 11029 }, { "epoch": 0.3028555738605162, "grad_norm": 0.3908669352531433, "learning_rate": 1.8906533190701973e-05, "loss": 0.4889, "step": 11030 }, { "epoch": 0.3028830313014827, "grad_norm": 0.37746283411979675, "learning_rate": 1.8906336807788102e-05, "loss": 0.5259, "step": 11031 }, { "epoch": 0.3029104887424492, "grad_norm": 0.3542831242084503, "learning_rate": 1.8906140408261073e-05, "loss": 0.5853, "step": 11032 }, { "epoch": 0.3029379461834157, "grad_norm": 0.35915061831474304, "learning_rate": 1.890594399212125e-05, "loss": 0.5319, "step": 11033 }, { "epoch": 0.30296540362438223, "grad_norm": 0.449190229177475, "learning_rate": 1.8905747559368998e-05, "loss": 0.5418, "step": 11034 }, { "epoch": 0.30299286106534873, "grad_norm": 0.3490431606769562, "learning_rate": 1.8905551110004684e-05, "loss": 0.512, "step": 11035 }, { "epoch": 0.3030203185063152, "grad_norm": 0.35249394178390503, "learning_rate": 1.8905354644028672e-05, "loss": 0.4811, "step": 11036 }, { "epoch": 0.3030477759472817, "grad_norm": 0.3732965588569641, "learning_rate": 1.8905158161441337e-05, "loss": 0.5558, "step": 11037 }, { "epoch": 0.3030752333882482, "grad_norm": 0.4014623761177063, "learning_rate": 1.8904961662243036e-05, "loss": 0.5062, "step": 11038 }, { "epoch": 0.3031026908292147, "grad_norm": 0.4227347671985626, "learning_rate": 1.8904765146434144e-05, "loss": 0.5547, "step": 11039 }, { "epoch": 0.3031301482701812, "grad_norm": 0.35590022802352905, "learning_rate": 1.8904568614015018e-05, "loss": 0.5176, "step": 11040 }, { "epoch": 0.30315760571114775, "grad_norm": 0.3649507761001587, "learning_rate": 1.8904372064986033e-05, "loss": 0.4598, "step": 11041 }, { "epoch": 0.30318506315211424, "grad_norm": 0.4455070495605469, "learning_rate": 1.8904175499347548e-05, "loss": 0.5999, "step": 11042 }, { "epoch": 0.30321252059308074, "grad_norm": 0.3774770498275757, "learning_rate": 1.8903978917099937e-05, "loss": 0.488, "step": 11043 }, { "epoch": 0.30323997803404723, "grad_norm": 0.46598726511001587, "learning_rate": 1.890378231824356e-05, "loss": 0.5827, "step": 11044 }, { "epoch": 0.3032674354750137, "grad_norm": 0.4246653616428375, "learning_rate": 1.8903585702778793e-05, "loss": 0.5708, "step": 11045 }, { "epoch": 0.3032948929159802, "grad_norm": 0.34432244300842285, "learning_rate": 1.890338907070599e-05, "loss": 0.4961, "step": 11046 }, { "epoch": 0.3033223503569467, "grad_norm": 0.4035293459892273, "learning_rate": 1.890319242202553e-05, "loss": 0.4107, "step": 11047 }, { "epoch": 0.3033498077979132, "grad_norm": 0.3767447769641876, "learning_rate": 1.8902995756737773e-05, "loss": 0.594, "step": 11048 }, { "epoch": 0.30337726523887976, "grad_norm": 0.3230535387992859, "learning_rate": 1.8902799074843088e-05, "loss": 0.5147, "step": 11049 }, { "epoch": 0.30340472267984625, "grad_norm": 0.39285826683044434, "learning_rate": 1.890260237634184e-05, "loss": 0.5682, "step": 11050 }, { "epoch": 0.30343218012081274, "grad_norm": 0.6553149223327637, "learning_rate": 1.89024056612344e-05, "loss": 0.5417, "step": 11051 }, { "epoch": 0.30345963756177924, "grad_norm": 0.3421376049518585, "learning_rate": 1.890220892952113e-05, "loss": 0.5252, "step": 11052 }, { "epoch": 0.30348709500274573, "grad_norm": 0.353819340467453, "learning_rate": 1.89020121812024e-05, "loss": 0.5146, "step": 11053 }, { "epoch": 0.3035145524437122, "grad_norm": 0.3898753821849823, "learning_rate": 1.8901815416278574e-05, "loss": 0.5325, "step": 11054 }, { "epoch": 0.3035420098846787, "grad_norm": 0.4109979569911957, "learning_rate": 1.8901618634750022e-05, "loss": 0.5444, "step": 11055 }, { "epoch": 0.30356946732564527, "grad_norm": 0.3525926470756531, "learning_rate": 1.890142183661711e-05, "loss": 0.555, "step": 11056 }, { "epoch": 0.30359692476661176, "grad_norm": 0.3611142337322235, "learning_rate": 1.8901225021880206e-05, "loss": 0.5166, "step": 11057 }, { "epoch": 0.30362438220757826, "grad_norm": 0.33160778880119324, "learning_rate": 1.8901028190539676e-05, "loss": 0.5397, "step": 11058 }, { "epoch": 0.30365183964854475, "grad_norm": 0.398023784160614, "learning_rate": 1.8900831342595888e-05, "loss": 0.6044, "step": 11059 }, { "epoch": 0.30367929708951125, "grad_norm": 0.38794273138046265, "learning_rate": 1.890063447804921e-05, "loss": 0.6014, "step": 11060 }, { "epoch": 0.30370675453047774, "grad_norm": 0.3740580677986145, "learning_rate": 1.890043759690001e-05, "loss": 0.4537, "step": 11061 }, { "epoch": 0.30373421197144423, "grad_norm": 0.4302629828453064, "learning_rate": 1.8900240699148647e-05, "loss": 0.5907, "step": 11062 }, { "epoch": 0.3037616694124108, "grad_norm": 0.34816381335258484, "learning_rate": 1.8900043784795493e-05, "loss": 0.4908, "step": 11063 }, { "epoch": 0.3037891268533773, "grad_norm": 0.34268495440483093, "learning_rate": 1.8899846853840924e-05, "loss": 0.5366, "step": 11064 }, { "epoch": 0.30381658429434377, "grad_norm": 0.39991295337677, "learning_rate": 1.8899649906285296e-05, "loss": 0.5373, "step": 11065 }, { "epoch": 0.30384404173531027, "grad_norm": 0.38865283131599426, "learning_rate": 1.8899452942128983e-05, "loss": 0.5386, "step": 11066 }, { "epoch": 0.30387149917627676, "grad_norm": 0.3438073992729187, "learning_rate": 1.8899255961372347e-05, "loss": 0.5009, "step": 11067 }, { "epoch": 0.30389895661724325, "grad_norm": 0.410197913646698, "learning_rate": 1.8899058964015758e-05, "loss": 0.5667, "step": 11068 }, { "epoch": 0.30392641405820975, "grad_norm": 0.37694212794303894, "learning_rate": 1.8898861950059587e-05, "loss": 0.5719, "step": 11069 }, { "epoch": 0.3039538714991763, "grad_norm": 0.37923651933670044, "learning_rate": 1.8898664919504196e-05, "loss": 0.4865, "step": 11070 }, { "epoch": 0.3039813289401428, "grad_norm": 0.3996421992778778, "learning_rate": 1.8898467872349957e-05, "loss": 0.5514, "step": 11071 }, { "epoch": 0.3040087863811093, "grad_norm": 0.35641753673553467, "learning_rate": 1.8898270808597234e-05, "loss": 0.4709, "step": 11072 }, { "epoch": 0.3040362438220758, "grad_norm": 0.4131092131137848, "learning_rate": 1.8898073728246396e-05, "loss": 0.5852, "step": 11073 }, { "epoch": 0.3040637012630423, "grad_norm": 0.39850255846977234, "learning_rate": 1.889787663129781e-05, "loss": 0.548, "step": 11074 }, { "epoch": 0.30409115870400877, "grad_norm": 0.37634116411209106, "learning_rate": 1.8897679517751846e-05, "loss": 0.5494, "step": 11075 }, { "epoch": 0.30411861614497526, "grad_norm": 0.3412899971008301, "learning_rate": 1.8897482387608867e-05, "loss": 0.5105, "step": 11076 }, { "epoch": 0.3041460735859418, "grad_norm": 0.5330919623374939, "learning_rate": 1.8897285240869247e-05, "loss": 0.4889, "step": 11077 }, { "epoch": 0.3041735310269083, "grad_norm": 0.5215820670127869, "learning_rate": 1.8897088077533347e-05, "loss": 0.622, "step": 11078 }, { "epoch": 0.3042009884678748, "grad_norm": 0.39739474654197693, "learning_rate": 1.8896890897601537e-05, "loss": 0.5734, "step": 11079 }, { "epoch": 0.3042284459088413, "grad_norm": 0.35337233543395996, "learning_rate": 1.889669370107419e-05, "loss": 0.4498, "step": 11080 }, { "epoch": 0.3042559033498078, "grad_norm": 0.3757399320602417, "learning_rate": 1.889649648795167e-05, "loss": 0.5759, "step": 11081 }, { "epoch": 0.3042833607907743, "grad_norm": 0.3337714672088623, "learning_rate": 1.8896299258234343e-05, "loss": 0.5123, "step": 11082 }, { "epoch": 0.3043108182317408, "grad_norm": 0.33044418692588806, "learning_rate": 1.889610201192258e-05, "loss": 0.4714, "step": 11083 }, { "epoch": 0.3043382756727073, "grad_norm": 0.541510820388794, "learning_rate": 1.8895904749016745e-05, "loss": 0.5748, "step": 11084 }, { "epoch": 0.3043657331136738, "grad_norm": 0.35813772678375244, "learning_rate": 1.889570746951721e-05, "loss": 0.4871, "step": 11085 }, { "epoch": 0.3043931905546403, "grad_norm": 0.39763695001602173, "learning_rate": 1.8895510173424337e-05, "loss": 0.5201, "step": 11086 }, { "epoch": 0.3044206479956068, "grad_norm": 0.35128843784332275, "learning_rate": 1.8895312860738504e-05, "loss": 0.4265, "step": 11087 }, { "epoch": 0.3044481054365733, "grad_norm": 0.3703915476799011, "learning_rate": 1.889511553146007e-05, "loss": 0.5842, "step": 11088 }, { "epoch": 0.3044755628775398, "grad_norm": 0.41767123341560364, "learning_rate": 1.8894918185589408e-05, "loss": 0.527, "step": 11089 }, { "epoch": 0.3045030203185063, "grad_norm": 0.460068017244339, "learning_rate": 1.8894720823126888e-05, "loss": 0.4644, "step": 11090 }, { "epoch": 0.30453047775947284, "grad_norm": 0.37693119049072266, "learning_rate": 1.889452344407287e-05, "loss": 0.4477, "step": 11091 }, { "epoch": 0.30455793520043933, "grad_norm": 0.32962101697921753, "learning_rate": 1.889432604842773e-05, "loss": 0.4901, "step": 11092 }, { "epoch": 0.3045853926414058, "grad_norm": 1.7100961208343506, "learning_rate": 1.889412863619183e-05, "loss": 0.4982, "step": 11093 }, { "epoch": 0.3046128500823723, "grad_norm": 0.3305343985557556, "learning_rate": 1.8893931207365545e-05, "loss": 0.4847, "step": 11094 }, { "epoch": 0.3046403075233388, "grad_norm": 0.4166306257247925, "learning_rate": 1.8893733761949235e-05, "loss": 0.6074, "step": 11095 }, { "epoch": 0.3046677649643053, "grad_norm": 0.4021270275115967, "learning_rate": 1.8893536299943277e-05, "loss": 0.5879, "step": 11096 }, { "epoch": 0.3046952224052718, "grad_norm": 0.3765951097011566, "learning_rate": 1.8893338821348033e-05, "loss": 0.5089, "step": 11097 }, { "epoch": 0.30472267984623835, "grad_norm": 0.315895140171051, "learning_rate": 1.8893141326163876e-05, "loss": 0.4404, "step": 11098 }, { "epoch": 0.30475013728720485, "grad_norm": 0.36457934975624084, "learning_rate": 1.8892943814391168e-05, "loss": 0.4795, "step": 11099 }, { "epoch": 0.30477759472817134, "grad_norm": 0.36460238695144653, "learning_rate": 1.889274628603029e-05, "loss": 0.58, "step": 11100 }, { "epoch": 0.30480505216913784, "grad_norm": 0.3470749855041504, "learning_rate": 1.889254874108159e-05, "loss": 0.4698, "step": 11101 }, { "epoch": 0.30483250961010433, "grad_norm": 0.35653284192085266, "learning_rate": 1.8892351179545457e-05, "loss": 0.5692, "step": 11102 }, { "epoch": 0.3048599670510708, "grad_norm": 0.33791258931159973, "learning_rate": 1.889215360142225e-05, "loss": 0.5446, "step": 11103 }, { "epoch": 0.3048874244920373, "grad_norm": 0.34571146965026855, "learning_rate": 1.8891956006712337e-05, "loss": 0.5253, "step": 11104 }, { "epoch": 0.30491488193300387, "grad_norm": 0.37521734833717346, "learning_rate": 1.8891758395416087e-05, "loss": 0.5717, "step": 11105 }, { "epoch": 0.30494233937397036, "grad_norm": 0.39167213439941406, "learning_rate": 1.889156076753387e-05, "loss": 0.4981, "step": 11106 }, { "epoch": 0.30496979681493686, "grad_norm": 0.41422390937805176, "learning_rate": 1.8891363123066056e-05, "loss": 0.5136, "step": 11107 }, { "epoch": 0.30499725425590335, "grad_norm": 0.3901185393333435, "learning_rate": 1.8891165462013014e-05, "loss": 0.5542, "step": 11108 }, { "epoch": 0.30502471169686984, "grad_norm": 0.38954511284828186, "learning_rate": 1.8890967784375104e-05, "loss": 0.5531, "step": 11109 }, { "epoch": 0.30505216913783634, "grad_norm": 0.3441867232322693, "learning_rate": 1.8890770090152705e-05, "loss": 0.5321, "step": 11110 }, { "epoch": 0.30507962657880283, "grad_norm": 0.39199697971343994, "learning_rate": 1.8890572379346184e-05, "loss": 0.3829, "step": 11111 }, { "epoch": 0.3051070840197694, "grad_norm": 1.3695906400680542, "learning_rate": 1.8890374651955906e-05, "loss": 0.4986, "step": 11112 }, { "epoch": 0.3051345414607359, "grad_norm": 0.3585394322872162, "learning_rate": 1.889017690798224e-05, "loss": 0.5481, "step": 11113 }, { "epoch": 0.30516199890170237, "grad_norm": 0.3653968870639801, "learning_rate": 1.8889979147425562e-05, "loss": 0.5329, "step": 11114 }, { "epoch": 0.30518945634266886, "grad_norm": 0.33642831444740295, "learning_rate": 1.888978137028623e-05, "loss": 0.4556, "step": 11115 }, { "epoch": 0.30521691378363536, "grad_norm": 0.48646461963653564, "learning_rate": 1.888958357656462e-05, "loss": 0.4926, "step": 11116 }, { "epoch": 0.30524437122460185, "grad_norm": 0.3697434067726135, "learning_rate": 1.88893857662611e-05, "loss": 0.4893, "step": 11117 }, { "epoch": 0.30527182866556835, "grad_norm": 0.3651670217514038, "learning_rate": 1.8889187939376042e-05, "loss": 0.5702, "step": 11118 }, { "epoch": 0.3052992861065349, "grad_norm": 0.40896478295326233, "learning_rate": 1.8888990095909806e-05, "loss": 0.5925, "step": 11119 }, { "epoch": 0.3053267435475014, "grad_norm": 0.3947643041610718, "learning_rate": 1.888879223586277e-05, "loss": 0.5668, "step": 11120 }, { "epoch": 0.3053542009884679, "grad_norm": 0.37182289361953735, "learning_rate": 1.8888594359235297e-05, "loss": 0.4636, "step": 11121 }, { "epoch": 0.3053816584294344, "grad_norm": 0.3330807685852051, "learning_rate": 1.888839646602776e-05, "loss": 0.4599, "step": 11122 }, { "epoch": 0.30540911587040087, "grad_norm": 0.34839746356010437, "learning_rate": 1.8888198556240527e-05, "loss": 0.5, "step": 11123 }, { "epoch": 0.30543657331136737, "grad_norm": 0.39751002192497253, "learning_rate": 1.8888000629873967e-05, "loss": 0.5283, "step": 11124 }, { "epoch": 0.30546403075233386, "grad_norm": 0.8811100125312805, "learning_rate": 1.8887802686928448e-05, "loss": 0.5671, "step": 11125 }, { "epoch": 0.3054914881933004, "grad_norm": 0.3810620605945587, "learning_rate": 1.888760472740434e-05, "loss": 0.4879, "step": 11126 }, { "epoch": 0.3055189456342669, "grad_norm": 0.32690519094467163, "learning_rate": 1.8887406751302013e-05, "loss": 0.4237, "step": 11127 }, { "epoch": 0.3055464030752334, "grad_norm": 0.3620862662792206, "learning_rate": 1.8887208758621837e-05, "loss": 0.6174, "step": 11128 }, { "epoch": 0.3055738605161999, "grad_norm": 0.42098695039749146, "learning_rate": 1.8887010749364178e-05, "loss": 0.5735, "step": 11129 }, { "epoch": 0.3056013179571664, "grad_norm": 0.33709877729415894, "learning_rate": 1.8886812723529407e-05, "loss": 0.4568, "step": 11130 }, { "epoch": 0.3056287753981329, "grad_norm": 0.3997587561607361, "learning_rate": 1.88866146811179e-05, "loss": 0.6112, "step": 11131 }, { "epoch": 0.3056562328390994, "grad_norm": 0.3535623550415039, "learning_rate": 1.8886416622130012e-05, "loss": 0.5472, "step": 11132 }, { "epoch": 0.3056836902800659, "grad_norm": 0.343872994184494, "learning_rate": 1.8886218546566127e-05, "loss": 0.4998, "step": 11133 }, { "epoch": 0.3057111477210324, "grad_norm": 0.35378050804138184, "learning_rate": 1.8886020454426606e-05, "loss": 0.4997, "step": 11134 }, { "epoch": 0.3057386051619989, "grad_norm": 0.36605924367904663, "learning_rate": 1.8885822345711823e-05, "loss": 0.5724, "step": 11135 }, { "epoch": 0.3057660626029654, "grad_norm": 0.3024701476097107, "learning_rate": 1.888562422042214e-05, "loss": 0.441, "step": 11136 }, { "epoch": 0.3057935200439319, "grad_norm": 0.338835746049881, "learning_rate": 1.8885426078557937e-05, "loss": 0.5089, "step": 11137 }, { "epoch": 0.3058209774848984, "grad_norm": 0.342124879360199, "learning_rate": 1.8885227920119575e-05, "loss": 0.487, "step": 11138 }, { "epoch": 0.3058484349258649, "grad_norm": 0.3488984704017639, "learning_rate": 1.888502974510743e-05, "loss": 0.5064, "step": 11139 }, { "epoch": 0.30587589236683144, "grad_norm": 0.34910279512405396, "learning_rate": 1.8884831553521866e-05, "loss": 0.3722, "step": 11140 }, { "epoch": 0.30590334980779793, "grad_norm": 0.36274996399879456, "learning_rate": 1.8884633345363257e-05, "loss": 0.4789, "step": 11141 }, { "epoch": 0.3059308072487644, "grad_norm": 0.4109959304332733, "learning_rate": 1.888443512063197e-05, "loss": 0.5024, "step": 11142 }, { "epoch": 0.3059582646897309, "grad_norm": 0.4690670073032379, "learning_rate": 1.888423687932838e-05, "loss": 0.5341, "step": 11143 }, { "epoch": 0.3059857221306974, "grad_norm": 0.3583936393260956, "learning_rate": 1.888403862145285e-05, "loss": 0.4822, "step": 11144 }, { "epoch": 0.3060131795716639, "grad_norm": 0.4393978416919708, "learning_rate": 1.888384034700575e-05, "loss": 0.4841, "step": 11145 }, { "epoch": 0.3060406370126304, "grad_norm": 0.3791782855987549, "learning_rate": 1.8883642055987453e-05, "loss": 0.5396, "step": 11146 }, { "epoch": 0.30606809445359695, "grad_norm": 0.385439932346344, "learning_rate": 1.8883443748398333e-05, "loss": 0.5263, "step": 11147 }, { "epoch": 0.30609555189456344, "grad_norm": 0.39708036184310913, "learning_rate": 1.888324542423875e-05, "loss": 0.5413, "step": 11148 }, { "epoch": 0.30612300933552994, "grad_norm": 0.43442943692207336, "learning_rate": 1.888304708350908e-05, "loss": 0.5313, "step": 11149 }, { "epoch": 0.30615046677649643, "grad_norm": 0.43197867274284363, "learning_rate": 1.8882848726209694e-05, "loss": 0.5251, "step": 11150 }, { "epoch": 0.3061779242174629, "grad_norm": 0.35540616512298584, "learning_rate": 1.888265035234096e-05, "loss": 0.5658, "step": 11151 }, { "epoch": 0.3062053816584294, "grad_norm": 0.4087935984134674, "learning_rate": 1.8882451961903246e-05, "loss": 0.5364, "step": 11152 }, { "epoch": 0.3062328390993959, "grad_norm": 0.3958073556423187, "learning_rate": 1.8882253554896927e-05, "loss": 0.5635, "step": 11153 }, { "epoch": 0.30626029654036246, "grad_norm": 0.35070890188217163, "learning_rate": 1.8882055131322365e-05, "loss": 0.5101, "step": 11154 }, { "epoch": 0.30628775398132896, "grad_norm": 0.39489981532096863, "learning_rate": 1.888185669117994e-05, "loss": 0.5703, "step": 11155 }, { "epoch": 0.30631521142229545, "grad_norm": 0.38177090883255005, "learning_rate": 1.8881658234470017e-05, "loss": 0.483, "step": 11156 }, { "epoch": 0.30634266886326195, "grad_norm": 0.5147358775138855, "learning_rate": 1.8881459761192965e-05, "loss": 0.5852, "step": 11157 }, { "epoch": 0.30637012630422844, "grad_norm": 0.40596964955329895, "learning_rate": 1.8881261271349158e-05, "loss": 0.5576, "step": 11158 }, { "epoch": 0.30639758374519493, "grad_norm": 0.3992031514644623, "learning_rate": 1.8881062764938965e-05, "loss": 0.5265, "step": 11159 }, { "epoch": 0.30642504118616143, "grad_norm": 1.1233546733856201, "learning_rate": 1.888086424196275e-05, "loss": 0.4586, "step": 11160 }, { "epoch": 0.306452498627128, "grad_norm": 0.3356042504310608, "learning_rate": 1.8880665702420894e-05, "loss": 0.4778, "step": 11161 }, { "epoch": 0.3064799560680945, "grad_norm": 0.3586128056049347, "learning_rate": 1.888046714631376e-05, "loss": 0.5609, "step": 11162 }, { "epoch": 0.30650741350906097, "grad_norm": 0.4246090352535248, "learning_rate": 1.8880268573641722e-05, "loss": 0.5529, "step": 11163 }, { "epoch": 0.30653487095002746, "grad_norm": 0.35237622261047363, "learning_rate": 1.8880069984405148e-05, "loss": 0.4341, "step": 11164 }, { "epoch": 0.30656232839099395, "grad_norm": 0.41939783096313477, "learning_rate": 1.8879871378604408e-05, "loss": 0.6026, "step": 11165 }, { "epoch": 0.30658978583196045, "grad_norm": 0.39945438504219055, "learning_rate": 1.8879672756239873e-05, "loss": 0.5128, "step": 11166 }, { "epoch": 0.30661724327292694, "grad_norm": 0.411832720041275, "learning_rate": 1.887947411731192e-05, "loss": 0.5703, "step": 11167 }, { "epoch": 0.3066447007138935, "grad_norm": 0.3555002808570862, "learning_rate": 1.887927546182091e-05, "loss": 0.5469, "step": 11168 }, { "epoch": 0.30667215815486, "grad_norm": 0.39220160245895386, "learning_rate": 1.8879076789767216e-05, "loss": 0.547, "step": 11169 }, { "epoch": 0.3066996155958265, "grad_norm": 0.3668062686920166, "learning_rate": 1.887887810115121e-05, "loss": 0.4923, "step": 11170 }, { "epoch": 0.306727073036793, "grad_norm": 0.3531438112258911, "learning_rate": 1.887867939597326e-05, "loss": 0.4498, "step": 11171 }, { "epoch": 0.30675453047775947, "grad_norm": 0.3432660400867462, "learning_rate": 1.8878480674233744e-05, "loss": 0.459, "step": 11172 }, { "epoch": 0.30678198791872596, "grad_norm": 0.3698910176753998, "learning_rate": 1.887828193593303e-05, "loss": 0.509, "step": 11173 }, { "epoch": 0.30680944535969246, "grad_norm": 0.39957156777381897, "learning_rate": 1.887808318107148e-05, "loss": 0.5989, "step": 11174 }, { "epoch": 0.306836902800659, "grad_norm": 0.3847670555114746, "learning_rate": 1.8877884409649478e-05, "loss": 0.5319, "step": 11175 }, { "epoch": 0.3068643602416255, "grad_norm": 0.41158926486968994, "learning_rate": 1.8877685621667383e-05, "loss": 0.621, "step": 11176 }, { "epoch": 0.306891817682592, "grad_norm": 0.3625026345252991, "learning_rate": 1.8877486817125574e-05, "loss": 0.5486, "step": 11177 }, { "epoch": 0.3069192751235585, "grad_norm": 0.3941892087459564, "learning_rate": 1.8877287996024417e-05, "loss": 0.6189, "step": 11178 }, { "epoch": 0.306946732564525, "grad_norm": 0.3573720157146454, "learning_rate": 1.8877089158364284e-05, "loss": 0.5053, "step": 11179 }, { "epoch": 0.3069741900054915, "grad_norm": 0.51397705078125, "learning_rate": 1.8876890304145548e-05, "loss": 0.4986, "step": 11180 }, { "epoch": 0.30700164744645797, "grad_norm": 0.3304443359375, "learning_rate": 1.887669143336858e-05, "loss": 0.4895, "step": 11181 }, { "epoch": 0.30702910488742446, "grad_norm": 0.40529879927635193, "learning_rate": 1.887649254603374e-05, "loss": 0.5615, "step": 11182 }, { "epoch": 0.307056562328391, "grad_norm": 0.45701003074645996, "learning_rate": 1.887629364214142e-05, "loss": 0.5666, "step": 11183 }, { "epoch": 0.3070840197693575, "grad_norm": 0.385613352060318, "learning_rate": 1.8876094721691974e-05, "loss": 0.5635, "step": 11184 }, { "epoch": 0.307111477210324, "grad_norm": 0.3333829939365387, "learning_rate": 1.887589578468578e-05, "loss": 0.5487, "step": 11185 }, { "epoch": 0.3071389346512905, "grad_norm": 0.3847090005874634, "learning_rate": 1.8875696831123207e-05, "loss": 0.5398, "step": 11186 }, { "epoch": 0.307166392092257, "grad_norm": 0.37340474128723145, "learning_rate": 1.8875497861004625e-05, "loss": 0.4973, "step": 11187 }, { "epoch": 0.3071938495332235, "grad_norm": 0.36711013317108154, "learning_rate": 1.8875298874330407e-05, "loss": 0.5712, "step": 11188 }, { "epoch": 0.30722130697419, "grad_norm": 0.36542782187461853, "learning_rate": 1.8875099871100926e-05, "loss": 0.4953, "step": 11189 }, { "epoch": 0.30724876441515653, "grad_norm": 0.3626367449760437, "learning_rate": 1.887490085131655e-05, "loss": 0.578, "step": 11190 }, { "epoch": 0.307276221856123, "grad_norm": 0.45987555384635925, "learning_rate": 1.8874701814977653e-05, "loss": 0.5178, "step": 11191 }, { "epoch": 0.3073036792970895, "grad_norm": 0.3902856409549713, "learning_rate": 1.88745027620846e-05, "loss": 0.5393, "step": 11192 }, { "epoch": 0.307331136738056, "grad_norm": 0.42272672057151794, "learning_rate": 1.8874303692637772e-05, "loss": 0.5869, "step": 11193 }, { "epoch": 0.3073585941790225, "grad_norm": 0.3236834704875946, "learning_rate": 1.8874104606637533e-05, "loss": 0.4965, "step": 11194 }, { "epoch": 0.307386051619989, "grad_norm": 0.3777744174003601, "learning_rate": 1.8873905504084255e-05, "loss": 0.5244, "step": 11195 }, { "epoch": 0.3074135090609555, "grad_norm": 0.42487698793411255, "learning_rate": 1.8873706384978313e-05, "loss": 0.5869, "step": 11196 }, { "epoch": 0.30744096650192204, "grad_norm": 0.3544144630432129, "learning_rate": 1.887350724932008e-05, "loss": 0.467, "step": 11197 }, { "epoch": 0.30746842394288854, "grad_norm": 0.33189520239830017, "learning_rate": 1.8873308097109918e-05, "loss": 0.5268, "step": 11198 }, { "epoch": 0.30749588138385503, "grad_norm": 0.36669474840164185, "learning_rate": 1.8873108928348208e-05, "loss": 0.5442, "step": 11199 }, { "epoch": 0.3075233388248215, "grad_norm": 0.413105309009552, "learning_rate": 1.8872909743035314e-05, "loss": 0.4661, "step": 11200 }, { "epoch": 0.307550796265788, "grad_norm": 0.37067943811416626, "learning_rate": 1.8872710541171614e-05, "loss": 0.5275, "step": 11201 }, { "epoch": 0.3075782537067545, "grad_norm": 0.38456183671951294, "learning_rate": 1.887251132275748e-05, "loss": 0.5076, "step": 11202 }, { "epoch": 0.307605711147721, "grad_norm": 0.34157684445381165, "learning_rate": 1.8872312087793276e-05, "loss": 0.5025, "step": 11203 }, { "epoch": 0.30763316858868756, "grad_norm": 0.3774910569190979, "learning_rate": 1.887211283627938e-05, "loss": 0.5331, "step": 11204 }, { "epoch": 0.30766062602965405, "grad_norm": 0.37343159317970276, "learning_rate": 1.887191356821616e-05, "loss": 0.5279, "step": 11205 }, { "epoch": 0.30768808347062054, "grad_norm": 0.3930940628051758, "learning_rate": 1.887171428360399e-05, "loss": 0.5325, "step": 11206 }, { "epoch": 0.30771554091158704, "grad_norm": 0.5612218976020813, "learning_rate": 1.8871514982443245e-05, "loss": 0.5521, "step": 11207 }, { "epoch": 0.30774299835255353, "grad_norm": 0.35316982865333557, "learning_rate": 1.887131566473429e-05, "loss": 0.5041, "step": 11208 }, { "epoch": 0.30777045579352, "grad_norm": 0.38150864839553833, "learning_rate": 1.8871116330477504e-05, "loss": 0.4917, "step": 11209 }, { "epoch": 0.3077979132344865, "grad_norm": 0.38072529435157776, "learning_rate": 1.8870916979673248e-05, "loss": 0.5502, "step": 11210 }, { "epoch": 0.30782537067545307, "grad_norm": 0.45102939009666443, "learning_rate": 1.8870717612321905e-05, "loss": 0.5282, "step": 11211 }, { "epoch": 0.30785282811641956, "grad_norm": 0.337655246257782, "learning_rate": 1.887051822842384e-05, "loss": 0.5143, "step": 11212 }, { "epoch": 0.30788028555738606, "grad_norm": 0.3924558460712433, "learning_rate": 1.887031882797943e-05, "loss": 0.582, "step": 11213 }, { "epoch": 0.30790774299835255, "grad_norm": 0.41195330023765564, "learning_rate": 1.8870119410989046e-05, "loss": 0.5792, "step": 11214 }, { "epoch": 0.30793520043931905, "grad_norm": 0.4164257049560547, "learning_rate": 1.8869919977453052e-05, "loss": 0.5024, "step": 11215 }, { "epoch": 0.30796265788028554, "grad_norm": 0.42760002613067627, "learning_rate": 1.8869720527371832e-05, "loss": 0.4942, "step": 11216 }, { "epoch": 0.30799011532125203, "grad_norm": 0.3968799412250519, "learning_rate": 1.886952106074575e-05, "loss": 0.5398, "step": 11217 }, { "epoch": 0.3080175727622186, "grad_norm": 0.4029574990272522, "learning_rate": 1.8869321577575178e-05, "loss": 0.4926, "step": 11218 }, { "epoch": 0.3080450302031851, "grad_norm": 0.31256774067878723, "learning_rate": 1.8869122077860497e-05, "loss": 0.5423, "step": 11219 }, { "epoch": 0.30807248764415157, "grad_norm": 0.3690100908279419, "learning_rate": 1.8868922561602067e-05, "loss": 0.5483, "step": 11220 }, { "epoch": 0.30809994508511807, "grad_norm": 0.37646380066871643, "learning_rate": 1.8868723028800263e-05, "loss": 0.5305, "step": 11221 }, { "epoch": 0.30812740252608456, "grad_norm": 0.3178396224975586, "learning_rate": 1.8868523479455468e-05, "loss": 0.5369, "step": 11222 }, { "epoch": 0.30815485996705105, "grad_norm": 0.3572700321674347, "learning_rate": 1.8868323913568042e-05, "loss": 0.5059, "step": 11223 }, { "epoch": 0.30818231740801755, "grad_norm": 0.3217736780643463, "learning_rate": 1.886812433113836e-05, "loss": 0.4716, "step": 11224 }, { "epoch": 0.3082097748489841, "grad_norm": 0.36267292499542236, "learning_rate": 1.88679247321668e-05, "loss": 0.4975, "step": 11225 }, { "epoch": 0.3082372322899506, "grad_norm": 0.40382349491119385, "learning_rate": 1.8867725116653727e-05, "loss": 0.5417, "step": 11226 }, { "epoch": 0.3082646897309171, "grad_norm": 0.3571723699569702, "learning_rate": 1.886752548459952e-05, "loss": 0.5127, "step": 11227 }, { "epoch": 0.3082921471718836, "grad_norm": 0.41572263836860657, "learning_rate": 1.886732583600454e-05, "loss": 0.472, "step": 11228 }, { "epoch": 0.3083196046128501, "grad_norm": 0.40711382031440735, "learning_rate": 1.8867126170869174e-05, "loss": 0.5332, "step": 11229 }, { "epoch": 0.30834706205381657, "grad_norm": 0.36496198177337646, "learning_rate": 1.8866926489193784e-05, "loss": 0.5124, "step": 11230 }, { "epoch": 0.30837451949478306, "grad_norm": 0.326542466878891, "learning_rate": 1.8866726790978748e-05, "loss": 0.4243, "step": 11231 }, { "epoch": 0.3084019769357496, "grad_norm": 0.3426162302494049, "learning_rate": 1.8866527076224438e-05, "loss": 0.4704, "step": 11232 }, { "epoch": 0.3084294343767161, "grad_norm": 0.35072818398475647, "learning_rate": 1.8866327344931223e-05, "loss": 0.4827, "step": 11233 }, { "epoch": 0.3084568918176826, "grad_norm": 0.4028296172618866, "learning_rate": 1.886612759709948e-05, "loss": 0.5892, "step": 11234 }, { "epoch": 0.3084843492586491, "grad_norm": 0.3248579502105713, "learning_rate": 1.8865927832729575e-05, "loss": 0.5464, "step": 11235 }, { "epoch": 0.3085118066996156, "grad_norm": 0.40313732624053955, "learning_rate": 1.8865728051821887e-05, "loss": 0.5848, "step": 11236 }, { "epoch": 0.3085392641405821, "grad_norm": 0.3550894260406494, "learning_rate": 1.886552825437679e-05, "loss": 0.5547, "step": 11237 }, { "epoch": 0.3085667215815486, "grad_norm": 0.345862478017807, "learning_rate": 1.886532844039465e-05, "loss": 0.4932, "step": 11238 }, { "epoch": 0.3085941790225151, "grad_norm": 0.33761218190193176, "learning_rate": 1.886512860987584e-05, "loss": 0.5217, "step": 11239 }, { "epoch": 0.3086216364634816, "grad_norm": 0.34675318002700806, "learning_rate": 1.886492876282074e-05, "loss": 0.5321, "step": 11240 }, { "epoch": 0.3086490939044481, "grad_norm": 0.36701714992523193, "learning_rate": 1.886472889922972e-05, "loss": 0.4906, "step": 11241 }, { "epoch": 0.3086765513454146, "grad_norm": 0.3599221706390381, "learning_rate": 1.886452901910315e-05, "loss": 0.455, "step": 11242 }, { "epoch": 0.3087040087863811, "grad_norm": 0.37764397263526917, "learning_rate": 1.8864329122441402e-05, "loss": 0.5288, "step": 11243 }, { "epoch": 0.3087314662273476, "grad_norm": 0.35613590478897095, "learning_rate": 1.8864129209244853e-05, "loss": 0.5536, "step": 11244 }, { "epoch": 0.3087589236683141, "grad_norm": 0.4068291485309601, "learning_rate": 1.886392927951387e-05, "loss": 0.4589, "step": 11245 }, { "epoch": 0.30878638110928064, "grad_norm": 0.3485753536224365, "learning_rate": 1.8863729333248836e-05, "loss": 0.5517, "step": 11246 }, { "epoch": 0.30881383855024713, "grad_norm": 0.3644098937511444, "learning_rate": 1.886352937045011e-05, "loss": 0.4773, "step": 11247 }, { "epoch": 0.3088412959912136, "grad_norm": 0.38053247332572937, "learning_rate": 1.886332939111808e-05, "loss": 0.498, "step": 11248 }, { "epoch": 0.3088687534321801, "grad_norm": 0.3748336136341095, "learning_rate": 1.8863129395253107e-05, "loss": 0.5071, "step": 11249 }, { "epoch": 0.3088962108731466, "grad_norm": 0.4131118953227997, "learning_rate": 1.8862929382855573e-05, "loss": 0.5111, "step": 11250 }, { "epoch": 0.3089236683141131, "grad_norm": 0.3669385612010956, "learning_rate": 1.8862729353925847e-05, "loss": 0.5315, "step": 11251 }, { "epoch": 0.3089511257550796, "grad_norm": 0.3807041645050049, "learning_rate": 1.88625293084643e-05, "loss": 0.5202, "step": 11252 }, { "epoch": 0.30897858319604615, "grad_norm": 0.4254024624824524, "learning_rate": 1.8862329246471307e-05, "loss": 0.4682, "step": 11253 }, { "epoch": 0.30900604063701265, "grad_norm": 0.38423192501068115, "learning_rate": 1.886212916794724e-05, "loss": 0.5197, "step": 11254 }, { "epoch": 0.30903349807797914, "grad_norm": 0.34807705879211426, "learning_rate": 1.8861929072892476e-05, "loss": 0.5323, "step": 11255 }, { "epoch": 0.30906095551894563, "grad_norm": 0.3598158657550812, "learning_rate": 1.8861728961307385e-05, "loss": 0.5579, "step": 11256 }, { "epoch": 0.30908841295991213, "grad_norm": 0.3874618709087372, "learning_rate": 1.8861528833192343e-05, "loss": 0.5748, "step": 11257 }, { "epoch": 0.3091158704008786, "grad_norm": 0.37065351009368896, "learning_rate": 1.886132868854772e-05, "loss": 0.5575, "step": 11258 }, { "epoch": 0.3091433278418451, "grad_norm": 0.4391917288303375, "learning_rate": 1.8861128527373892e-05, "loss": 0.5372, "step": 11259 }, { "epoch": 0.30917078528281167, "grad_norm": 0.3859999179840088, "learning_rate": 1.8860928349671227e-05, "loss": 0.5407, "step": 11260 }, { "epoch": 0.30919824272377816, "grad_norm": 0.35364970564842224, "learning_rate": 1.8860728155440108e-05, "loss": 0.5285, "step": 11261 }, { "epoch": 0.30922570016474465, "grad_norm": 0.35805779695510864, "learning_rate": 1.8860527944680898e-05, "loss": 0.5052, "step": 11262 }, { "epoch": 0.30925315760571115, "grad_norm": 0.3966748118400574, "learning_rate": 1.886032771739398e-05, "loss": 0.5184, "step": 11263 }, { "epoch": 0.30928061504667764, "grad_norm": 0.38174790143966675, "learning_rate": 1.8860127473579722e-05, "loss": 0.5191, "step": 11264 }, { "epoch": 0.30930807248764414, "grad_norm": 0.44568976759910583, "learning_rate": 1.8859927213238497e-05, "loss": 0.4984, "step": 11265 }, { "epoch": 0.30933552992861063, "grad_norm": 0.36715659499168396, "learning_rate": 1.885972693637068e-05, "loss": 0.5263, "step": 11266 }, { "epoch": 0.3093629873695772, "grad_norm": 0.5756145715713501, "learning_rate": 1.885952664297664e-05, "loss": 0.5074, "step": 11267 }, { "epoch": 0.3093904448105437, "grad_norm": 0.3872334063053131, "learning_rate": 1.8859326333056765e-05, "loss": 0.5283, "step": 11268 }, { "epoch": 0.30941790225151017, "grad_norm": 0.3908112943172455, "learning_rate": 1.8859126006611412e-05, "loss": 0.5216, "step": 11269 }, { "epoch": 0.30944535969247666, "grad_norm": 0.3831113576889038, "learning_rate": 1.8858925663640965e-05, "loss": 0.5663, "step": 11270 }, { "epoch": 0.30947281713344316, "grad_norm": 0.5934312343597412, "learning_rate": 1.8858725304145792e-05, "loss": 0.5449, "step": 11271 }, { "epoch": 0.30950027457440965, "grad_norm": 0.3202603757381439, "learning_rate": 1.8858524928126267e-05, "loss": 0.451, "step": 11272 }, { "epoch": 0.30952773201537614, "grad_norm": 0.3631031811237335, "learning_rate": 1.885832453558277e-05, "loss": 0.5633, "step": 11273 }, { "epoch": 0.3095551894563427, "grad_norm": 0.32394886016845703, "learning_rate": 1.8858124126515667e-05, "loss": 0.4832, "step": 11274 }, { "epoch": 0.3095826468973092, "grad_norm": 0.37036362290382385, "learning_rate": 1.885792370092534e-05, "loss": 0.5431, "step": 11275 }, { "epoch": 0.3096101043382757, "grad_norm": 0.3637445569038391, "learning_rate": 1.8857723258812154e-05, "loss": 0.5992, "step": 11276 }, { "epoch": 0.3096375617792422, "grad_norm": 0.3149133622646332, "learning_rate": 1.8857522800176488e-05, "loss": 0.5549, "step": 11277 }, { "epoch": 0.30966501922020867, "grad_norm": 0.3982038199901581, "learning_rate": 1.8857322325018715e-05, "loss": 0.4847, "step": 11278 }, { "epoch": 0.30969247666117516, "grad_norm": 0.41919785737991333, "learning_rate": 1.885712183333921e-05, "loss": 0.5599, "step": 11279 }, { "epoch": 0.30971993410214166, "grad_norm": 0.37685608863830566, "learning_rate": 1.8856921325138343e-05, "loss": 0.5277, "step": 11280 }, { "epoch": 0.3097473915431082, "grad_norm": 0.4007309377193451, "learning_rate": 1.8856720800416496e-05, "loss": 0.5105, "step": 11281 }, { "epoch": 0.3097748489840747, "grad_norm": 0.36570093035697937, "learning_rate": 1.8856520259174033e-05, "loss": 0.5713, "step": 11282 }, { "epoch": 0.3098023064250412, "grad_norm": 0.38156914710998535, "learning_rate": 1.8856319701411334e-05, "loss": 0.5557, "step": 11283 }, { "epoch": 0.3098297638660077, "grad_norm": 0.3536161482334137, "learning_rate": 1.8856119127128772e-05, "loss": 0.4971, "step": 11284 }, { "epoch": 0.3098572213069742, "grad_norm": 0.34560626745224, "learning_rate": 1.8855918536326727e-05, "loss": 0.5117, "step": 11285 }, { "epoch": 0.3098846787479407, "grad_norm": 0.4105583727359772, "learning_rate": 1.885571792900556e-05, "loss": 0.6173, "step": 11286 }, { "epoch": 0.3099121361889072, "grad_norm": 0.3999376893043518, "learning_rate": 1.8855517305165655e-05, "loss": 0.5745, "step": 11287 }, { "epoch": 0.3099395936298737, "grad_norm": 0.3814598321914673, "learning_rate": 1.8855316664807388e-05, "loss": 0.53, "step": 11288 }, { "epoch": 0.3099670510708402, "grad_norm": 0.37982991337776184, "learning_rate": 1.8855116007931124e-05, "loss": 0.6337, "step": 11289 }, { "epoch": 0.3099945085118067, "grad_norm": 0.3733052611351013, "learning_rate": 1.8854915334537244e-05, "loss": 0.6006, "step": 11290 }, { "epoch": 0.3100219659527732, "grad_norm": 0.32917851209640503, "learning_rate": 1.885471464462612e-05, "loss": 0.4561, "step": 11291 }, { "epoch": 0.3100494233937397, "grad_norm": 0.3965912461280823, "learning_rate": 1.8854513938198127e-05, "loss": 0.546, "step": 11292 }, { "epoch": 0.3100768808347062, "grad_norm": 0.4211497902870178, "learning_rate": 1.8854313215253643e-05, "loss": 0.6284, "step": 11293 }, { "epoch": 0.3101043382756727, "grad_norm": 0.3517448902130127, "learning_rate": 1.8854112475793036e-05, "loss": 0.4975, "step": 11294 }, { "epoch": 0.31013179571663924, "grad_norm": 0.357208251953125, "learning_rate": 1.8853911719816682e-05, "loss": 0.603, "step": 11295 }, { "epoch": 0.31015925315760573, "grad_norm": 0.40067464113235474, "learning_rate": 1.8853710947324958e-05, "loss": 0.6465, "step": 11296 }, { "epoch": 0.3101867105985722, "grad_norm": 0.34183788299560547, "learning_rate": 1.8853510158318236e-05, "loss": 0.4746, "step": 11297 }, { "epoch": 0.3102141680395387, "grad_norm": 0.36647069454193115, "learning_rate": 1.8853309352796896e-05, "loss": 0.538, "step": 11298 }, { "epoch": 0.3102416254805052, "grad_norm": 0.3190484046936035, "learning_rate": 1.8853108530761303e-05, "loss": 0.4935, "step": 11299 }, { "epoch": 0.3102690829214717, "grad_norm": 0.32953938841819763, "learning_rate": 1.885290769221184e-05, "loss": 0.5298, "step": 11300 }, { "epoch": 0.3102965403624382, "grad_norm": 0.3689562678337097, "learning_rate": 1.885270683714888e-05, "loss": 0.5438, "step": 11301 }, { "epoch": 0.31032399780340475, "grad_norm": 0.3849407136440277, "learning_rate": 1.8852505965572792e-05, "loss": 0.5533, "step": 11302 }, { "epoch": 0.31035145524437124, "grad_norm": 0.41696494817733765, "learning_rate": 1.8852305077483958e-05, "loss": 0.5171, "step": 11303 }, { "epoch": 0.31037891268533774, "grad_norm": 0.42613956332206726, "learning_rate": 1.885210417288275e-05, "loss": 0.6309, "step": 11304 }, { "epoch": 0.31040637012630423, "grad_norm": 0.3716728687286377, "learning_rate": 1.8851903251769542e-05, "loss": 0.5493, "step": 11305 }, { "epoch": 0.3104338275672707, "grad_norm": 0.39843443036079407, "learning_rate": 1.885170231414471e-05, "loss": 0.525, "step": 11306 }, { "epoch": 0.3104612850082372, "grad_norm": 0.3518000543117523, "learning_rate": 1.8851501360008627e-05, "loss": 0.5194, "step": 11307 }, { "epoch": 0.3104887424492037, "grad_norm": 0.31618738174438477, "learning_rate": 1.885130038936167e-05, "loss": 0.4513, "step": 11308 }, { "epoch": 0.31051619989017026, "grad_norm": 0.39830586314201355, "learning_rate": 1.885109940220421e-05, "loss": 0.5385, "step": 11309 }, { "epoch": 0.31054365733113676, "grad_norm": 0.39882344007492065, "learning_rate": 1.885089839853663e-05, "loss": 0.611, "step": 11310 }, { "epoch": 0.31057111477210325, "grad_norm": 0.47808486223220825, "learning_rate": 1.8850697378359295e-05, "loss": 0.5672, "step": 11311 }, { "epoch": 0.31059857221306975, "grad_norm": 0.34714749455451965, "learning_rate": 1.8850496341672583e-05, "loss": 0.5815, "step": 11312 }, { "epoch": 0.31062602965403624, "grad_norm": 0.3725500702857971, "learning_rate": 1.8850295288476875e-05, "loss": 0.5125, "step": 11313 }, { "epoch": 0.31065348709500273, "grad_norm": 0.36777591705322266, "learning_rate": 1.8850094218772538e-05, "loss": 0.5715, "step": 11314 }, { "epoch": 0.31068094453596923, "grad_norm": 0.36089763045310974, "learning_rate": 1.8849893132559956e-05, "loss": 0.5491, "step": 11315 }, { "epoch": 0.3107084019769357, "grad_norm": 0.3788428008556366, "learning_rate": 1.8849692029839492e-05, "loss": 0.6035, "step": 11316 }, { "epoch": 0.31073585941790227, "grad_norm": 0.36409837007522583, "learning_rate": 1.884949091061153e-05, "loss": 0.5395, "step": 11317 }, { "epoch": 0.31076331685886877, "grad_norm": 0.4296551048755646, "learning_rate": 1.8849289774876446e-05, "loss": 0.5723, "step": 11318 }, { "epoch": 0.31079077429983526, "grad_norm": 0.35167181491851807, "learning_rate": 1.884908862263461e-05, "loss": 0.4639, "step": 11319 }, { "epoch": 0.31081823174080175, "grad_norm": 0.6806081533432007, "learning_rate": 1.88488874538864e-05, "loss": 0.4543, "step": 11320 }, { "epoch": 0.31084568918176825, "grad_norm": 0.3366139829158783, "learning_rate": 1.8848686268632193e-05, "loss": 0.5027, "step": 11321 }, { "epoch": 0.31087314662273474, "grad_norm": 0.3681115210056305, "learning_rate": 1.8848485066872357e-05, "loss": 0.5811, "step": 11322 }, { "epoch": 0.31090060406370124, "grad_norm": 0.3778879642486572, "learning_rate": 1.8848283848607277e-05, "loss": 0.5513, "step": 11323 }, { "epoch": 0.3109280615046678, "grad_norm": 0.33004075288772583, "learning_rate": 1.884808261383732e-05, "loss": 0.5232, "step": 11324 }, { "epoch": 0.3109555189456343, "grad_norm": 0.3816484808921814, "learning_rate": 1.884788136256287e-05, "loss": 0.5295, "step": 11325 }, { "epoch": 0.3109829763866008, "grad_norm": 0.3728027939796448, "learning_rate": 1.8847680094784292e-05, "loss": 0.5481, "step": 11326 }, { "epoch": 0.31101043382756727, "grad_norm": 0.3555505871772766, "learning_rate": 1.8847478810501963e-05, "loss": 0.5436, "step": 11327 }, { "epoch": 0.31103789126853376, "grad_norm": 0.331566721200943, "learning_rate": 1.884727750971627e-05, "loss": 0.4482, "step": 11328 }, { "epoch": 0.31106534870950026, "grad_norm": 0.40008875727653503, "learning_rate": 1.884707619242758e-05, "loss": 0.5492, "step": 11329 }, { "epoch": 0.31109280615046675, "grad_norm": 0.4424271583557129, "learning_rate": 1.8846874858636265e-05, "loss": 0.5452, "step": 11330 }, { "epoch": 0.3111202635914333, "grad_norm": 0.35575130581855774, "learning_rate": 1.884667350834271e-05, "loss": 0.5009, "step": 11331 }, { "epoch": 0.3111477210323998, "grad_norm": 0.3749183416366577, "learning_rate": 1.884647214154728e-05, "loss": 0.4858, "step": 11332 }, { "epoch": 0.3111751784733663, "grad_norm": 0.40534406900405884, "learning_rate": 1.884627075825036e-05, "loss": 0.5064, "step": 11333 }, { "epoch": 0.3112026359143328, "grad_norm": 0.326568603515625, "learning_rate": 1.884606935845232e-05, "loss": 0.4694, "step": 11334 }, { "epoch": 0.3112300933552993, "grad_norm": 0.40226808190345764, "learning_rate": 1.8845867942153536e-05, "loss": 0.58, "step": 11335 }, { "epoch": 0.31125755079626577, "grad_norm": 0.351814329624176, "learning_rate": 1.8845666509354388e-05, "loss": 0.5317, "step": 11336 }, { "epoch": 0.31128500823723226, "grad_norm": 0.3486435115337372, "learning_rate": 1.8845465060055244e-05, "loss": 0.5216, "step": 11337 }, { "epoch": 0.3113124656781988, "grad_norm": 0.34122416377067566, "learning_rate": 1.8845263594256492e-05, "loss": 0.5282, "step": 11338 }, { "epoch": 0.3113399231191653, "grad_norm": 0.36832940578460693, "learning_rate": 1.8845062111958494e-05, "loss": 0.5048, "step": 11339 }, { "epoch": 0.3113673805601318, "grad_norm": 0.38114285469055176, "learning_rate": 1.8844860613161635e-05, "loss": 0.5293, "step": 11340 }, { "epoch": 0.3113948380010983, "grad_norm": 0.3649303913116455, "learning_rate": 1.884465909786629e-05, "loss": 0.4974, "step": 11341 }, { "epoch": 0.3114222954420648, "grad_norm": 0.36032944917678833, "learning_rate": 1.8844457566072828e-05, "loss": 0.5473, "step": 11342 }, { "epoch": 0.3114497528830313, "grad_norm": 0.3570484519004822, "learning_rate": 1.8844256017781632e-05, "loss": 0.5896, "step": 11343 }, { "epoch": 0.3114772103239978, "grad_norm": 0.4493335783481598, "learning_rate": 1.8844054452993073e-05, "loss": 0.4676, "step": 11344 }, { "epoch": 0.3115046677649643, "grad_norm": 0.37627431750297546, "learning_rate": 1.8843852871707535e-05, "loss": 0.5348, "step": 11345 }, { "epoch": 0.3115321252059308, "grad_norm": 0.3497869074344635, "learning_rate": 1.8843651273925386e-05, "loss": 0.4898, "step": 11346 }, { "epoch": 0.3115595826468973, "grad_norm": 0.4168960452079773, "learning_rate": 1.8843449659647003e-05, "loss": 0.6214, "step": 11347 }, { "epoch": 0.3115870400878638, "grad_norm": 0.3855307698249817, "learning_rate": 1.884324802887277e-05, "loss": 0.5214, "step": 11348 }, { "epoch": 0.3116144975288303, "grad_norm": 0.37278813123703003, "learning_rate": 1.8843046381603052e-05, "loss": 0.5448, "step": 11349 }, { "epoch": 0.3116419549697968, "grad_norm": 0.3956211507320404, "learning_rate": 1.8842844717838233e-05, "loss": 0.5961, "step": 11350 }, { "epoch": 0.3116694124107633, "grad_norm": 0.3878605365753174, "learning_rate": 1.8842643037578684e-05, "loss": 0.5669, "step": 11351 }, { "epoch": 0.31169686985172984, "grad_norm": 0.3865380883216858, "learning_rate": 1.884244134082478e-05, "loss": 0.6094, "step": 11352 }, { "epoch": 0.31172432729269633, "grad_norm": 0.3722621500492096, "learning_rate": 1.8842239627576906e-05, "loss": 0.4518, "step": 11353 }, { "epoch": 0.31175178473366283, "grad_norm": 0.40808913111686707, "learning_rate": 1.8842037897835434e-05, "loss": 0.5379, "step": 11354 }, { "epoch": 0.3117792421746293, "grad_norm": 0.4044474959373474, "learning_rate": 1.8841836151600735e-05, "loss": 0.4585, "step": 11355 }, { "epoch": 0.3118066996155958, "grad_norm": 0.3958859145641327, "learning_rate": 1.884163438887319e-05, "loss": 0.4874, "step": 11356 }, { "epoch": 0.3118341570565623, "grad_norm": 0.39973965287208557, "learning_rate": 1.884143260965318e-05, "loss": 0.6342, "step": 11357 }, { "epoch": 0.3118616144975288, "grad_norm": 0.32400909066200256, "learning_rate": 1.8841230813941068e-05, "loss": 0.4811, "step": 11358 }, { "epoch": 0.31188907193849535, "grad_norm": 0.36785465478897095, "learning_rate": 1.8841029001737245e-05, "loss": 0.5556, "step": 11359 }, { "epoch": 0.31191652937946185, "grad_norm": 0.39711031317710876, "learning_rate": 1.884082717304208e-05, "loss": 0.5149, "step": 11360 }, { "epoch": 0.31194398682042834, "grad_norm": 0.3844723105430603, "learning_rate": 1.884062532785595e-05, "loss": 0.529, "step": 11361 }, { "epoch": 0.31197144426139484, "grad_norm": 0.6045128703117371, "learning_rate": 1.8840423466179232e-05, "loss": 0.5163, "step": 11362 }, { "epoch": 0.31199890170236133, "grad_norm": 0.41016659140586853, "learning_rate": 1.8840221588012305e-05, "loss": 0.551, "step": 11363 }, { "epoch": 0.3120263591433278, "grad_norm": 0.37000808119773865, "learning_rate": 1.8840019693355544e-05, "loss": 0.4864, "step": 11364 }, { "epoch": 0.3120538165842943, "grad_norm": 0.36425498127937317, "learning_rate": 1.8839817782209318e-05, "loss": 0.5363, "step": 11365 }, { "epoch": 0.31208127402526087, "grad_norm": 0.4120878577232361, "learning_rate": 1.8839615854574017e-05, "loss": 0.5579, "step": 11366 }, { "epoch": 0.31210873146622736, "grad_norm": 0.5323432683944702, "learning_rate": 1.883941391045001e-05, "loss": 0.5898, "step": 11367 }, { "epoch": 0.31213618890719386, "grad_norm": 0.46722346544265747, "learning_rate": 1.883921194983767e-05, "loss": 0.4565, "step": 11368 }, { "epoch": 0.31216364634816035, "grad_norm": 0.3680703639984131, "learning_rate": 1.8839009972737385e-05, "loss": 0.5351, "step": 11369 }, { "epoch": 0.31219110378912684, "grad_norm": 0.40063047409057617, "learning_rate": 1.883880797914952e-05, "loss": 0.5632, "step": 11370 }, { "epoch": 0.31221856123009334, "grad_norm": 0.3955143094062805, "learning_rate": 1.883860596907446e-05, "loss": 0.52, "step": 11371 }, { "epoch": 0.31224601867105983, "grad_norm": 0.3987671434879303, "learning_rate": 1.883840394251258e-05, "loss": 0.5044, "step": 11372 }, { "epoch": 0.3122734761120264, "grad_norm": 0.38556650280952454, "learning_rate": 1.8838201899464254e-05, "loss": 0.5977, "step": 11373 }, { "epoch": 0.3123009335529929, "grad_norm": 0.3640457093715668, "learning_rate": 1.883799983992986e-05, "loss": 0.5737, "step": 11374 }, { "epoch": 0.31232839099395937, "grad_norm": 0.3726956248283386, "learning_rate": 1.883779776390978e-05, "loss": 0.5028, "step": 11375 }, { "epoch": 0.31235584843492586, "grad_norm": 0.44557851552963257, "learning_rate": 1.8837595671404383e-05, "loss": 0.4355, "step": 11376 }, { "epoch": 0.31238330587589236, "grad_norm": 0.40030235052108765, "learning_rate": 1.883739356241405e-05, "loss": 0.5378, "step": 11377 }, { "epoch": 0.31241076331685885, "grad_norm": 0.3414601981639862, "learning_rate": 1.8837191436939156e-05, "loss": 0.524, "step": 11378 }, { "epoch": 0.31243822075782535, "grad_norm": 0.34316831827163696, "learning_rate": 1.883698929498008e-05, "loss": 0.5561, "step": 11379 }, { "epoch": 0.3124656781987919, "grad_norm": 0.3822277784347534, "learning_rate": 1.8836787136537198e-05, "loss": 0.4425, "step": 11380 }, { "epoch": 0.3124931356397584, "grad_norm": 0.3973458707332611, "learning_rate": 1.883658496161089e-05, "loss": 0.4705, "step": 11381 }, { "epoch": 0.3125205930807249, "grad_norm": 0.3669137954711914, "learning_rate": 1.883638277020153e-05, "loss": 0.4851, "step": 11382 }, { "epoch": 0.3125480505216914, "grad_norm": 0.3308241367340088, "learning_rate": 1.8836180562309493e-05, "loss": 0.5117, "step": 11383 }, { "epoch": 0.3125755079626579, "grad_norm": 0.32334885001182556, "learning_rate": 1.8835978337935162e-05, "loss": 0.5222, "step": 11384 }, { "epoch": 0.31260296540362437, "grad_norm": 0.4367953836917877, "learning_rate": 1.8835776097078908e-05, "loss": 0.5632, "step": 11385 }, { "epoch": 0.31263042284459086, "grad_norm": 0.37910184264183044, "learning_rate": 1.8835573839741114e-05, "loss": 0.567, "step": 11386 }, { "epoch": 0.3126578802855574, "grad_norm": 0.35174256563186646, "learning_rate": 1.8835371565922156e-05, "loss": 0.5443, "step": 11387 }, { "epoch": 0.3126853377265239, "grad_norm": 0.3539165258407593, "learning_rate": 1.883516927562241e-05, "loss": 0.5147, "step": 11388 }, { "epoch": 0.3127127951674904, "grad_norm": 0.3502107858657837, "learning_rate": 1.8834966968842254e-05, "loss": 0.5224, "step": 11389 }, { "epoch": 0.3127402526084569, "grad_norm": 0.3474768400192261, "learning_rate": 1.883476464558206e-05, "loss": 0.5394, "step": 11390 }, { "epoch": 0.3127677100494234, "grad_norm": 0.36897218227386475, "learning_rate": 1.8834562305842215e-05, "loss": 0.5068, "step": 11391 }, { "epoch": 0.3127951674903899, "grad_norm": 0.3595658540725708, "learning_rate": 1.8834359949623093e-05, "loss": 0.5523, "step": 11392 }, { "epoch": 0.3128226249313564, "grad_norm": 0.3529592454433441, "learning_rate": 1.8834157576925066e-05, "loss": 0.5351, "step": 11393 }, { "epoch": 0.3128500823723229, "grad_norm": 0.40112701058387756, "learning_rate": 1.8833955187748515e-05, "loss": 0.5219, "step": 11394 }, { "epoch": 0.3128775398132894, "grad_norm": 0.41050034761428833, "learning_rate": 1.883375278209382e-05, "loss": 0.5125, "step": 11395 }, { "epoch": 0.3129049972542559, "grad_norm": 0.42023107409477234, "learning_rate": 1.8833550359961354e-05, "loss": 0.5497, "step": 11396 }, { "epoch": 0.3129324546952224, "grad_norm": 0.3478147089481354, "learning_rate": 1.8833347921351503e-05, "loss": 0.4126, "step": 11397 }, { "epoch": 0.3129599121361889, "grad_norm": 0.410381555557251, "learning_rate": 1.883314546626463e-05, "loss": 0.5235, "step": 11398 }, { "epoch": 0.3129873695771554, "grad_norm": 0.39227548241615295, "learning_rate": 1.883294299470113e-05, "loss": 0.5471, "step": 11399 }, { "epoch": 0.3130148270181219, "grad_norm": 0.42469561100006104, "learning_rate": 1.883274050666137e-05, "loss": 0.5337, "step": 11400 }, { "epoch": 0.31304228445908844, "grad_norm": 0.4026385545730591, "learning_rate": 1.8832538002145728e-05, "loss": 0.4831, "step": 11401 }, { "epoch": 0.31306974190005493, "grad_norm": 0.3808736801147461, "learning_rate": 1.8832335481154587e-05, "loss": 0.5374, "step": 11402 }, { "epoch": 0.3130971993410214, "grad_norm": 0.35645681619644165, "learning_rate": 1.8832132943688317e-05, "loss": 0.5334, "step": 11403 }, { "epoch": 0.3131246567819879, "grad_norm": 0.6723828911781311, "learning_rate": 1.8831930389747303e-05, "loss": 0.4953, "step": 11404 }, { "epoch": 0.3131521142229544, "grad_norm": 0.33795520663261414, "learning_rate": 1.883172781933192e-05, "loss": 0.3935, "step": 11405 }, { "epoch": 0.3131795716639209, "grad_norm": 0.3502257168292999, "learning_rate": 1.8831525232442547e-05, "loss": 0.5126, "step": 11406 }, { "epoch": 0.3132070291048874, "grad_norm": 0.3785855174064636, "learning_rate": 1.883132262907956e-05, "loss": 0.5388, "step": 11407 }, { "epoch": 0.31323448654585395, "grad_norm": 0.3328665494918823, "learning_rate": 1.8831120009243333e-05, "loss": 0.4335, "step": 11408 }, { "epoch": 0.31326194398682045, "grad_norm": 0.46332070231437683, "learning_rate": 1.883091737293425e-05, "loss": 0.5587, "step": 11409 }, { "epoch": 0.31328940142778694, "grad_norm": 0.3294747769832611, "learning_rate": 1.8830714720152695e-05, "loss": 0.4678, "step": 11410 }, { "epoch": 0.31331685886875343, "grad_norm": 0.3508578836917877, "learning_rate": 1.883051205089903e-05, "loss": 0.5501, "step": 11411 }, { "epoch": 0.31334431630971993, "grad_norm": 0.38605642318725586, "learning_rate": 1.8830309365173643e-05, "loss": 0.4804, "step": 11412 }, { "epoch": 0.3133717737506864, "grad_norm": 0.3724783658981323, "learning_rate": 1.8830106662976914e-05, "loss": 0.5251, "step": 11413 }, { "epoch": 0.3133992311916529, "grad_norm": 0.36035382747650146, "learning_rate": 1.882990394430922e-05, "loss": 0.5377, "step": 11414 }, { "epoch": 0.31342668863261947, "grad_norm": 0.3796662986278534, "learning_rate": 1.882970120917093e-05, "loss": 0.4843, "step": 11415 }, { "epoch": 0.31345414607358596, "grad_norm": 0.35763317346572876, "learning_rate": 1.8829498457562435e-05, "loss": 0.4254, "step": 11416 }, { "epoch": 0.31348160351455245, "grad_norm": 0.36443617939949036, "learning_rate": 1.8829295689484103e-05, "loss": 0.5082, "step": 11417 }, { "epoch": 0.31350906095551895, "grad_norm": 0.42130762338638306, "learning_rate": 1.882909290493632e-05, "loss": 0.5953, "step": 11418 }, { "epoch": 0.31353651839648544, "grad_norm": 0.487576425075531, "learning_rate": 1.8828890103919458e-05, "loss": 0.601, "step": 11419 }, { "epoch": 0.31356397583745194, "grad_norm": 0.3987968862056732, "learning_rate": 1.88286872864339e-05, "loss": 0.5735, "step": 11420 }, { "epoch": 0.31359143327841843, "grad_norm": 0.4013846814632416, "learning_rate": 1.8828484452480024e-05, "loss": 0.5936, "step": 11421 }, { "epoch": 0.313618890719385, "grad_norm": 0.41978880763053894, "learning_rate": 1.8828281602058204e-05, "loss": 0.5468, "step": 11422 }, { "epoch": 0.3136463481603515, "grad_norm": 0.3428100645542145, "learning_rate": 1.8828078735168823e-05, "loss": 0.5743, "step": 11423 }, { "epoch": 0.31367380560131797, "grad_norm": 0.36299219727516174, "learning_rate": 1.8827875851812254e-05, "loss": 0.4606, "step": 11424 }, { "epoch": 0.31370126304228446, "grad_norm": 0.4280633330345154, "learning_rate": 1.882767295198888e-05, "loss": 0.608, "step": 11425 }, { "epoch": 0.31372872048325096, "grad_norm": 0.32518595457077026, "learning_rate": 1.882747003569908e-05, "loss": 0.472, "step": 11426 }, { "epoch": 0.31375617792421745, "grad_norm": 0.4326891005039215, "learning_rate": 1.8827267102943233e-05, "loss": 0.5997, "step": 11427 }, { "epoch": 0.31378363536518394, "grad_norm": 0.3928942382335663, "learning_rate": 1.8827064153721714e-05, "loss": 0.5238, "step": 11428 }, { "epoch": 0.3138110928061505, "grad_norm": 0.3610036373138428, "learning_rate": 1.8826861188034898e-05, "loss": 0.4697, "step": 11429 }, { "epoch": 0.313838550247117, "grad_norm": 0.37714314460754395, "learning_rate": 1.8826658205883178e-05, "loss": 0.539, "step": 11430 }, { "epoch": 0.3138660076880835, "grad_norm": 0.4490905702114105, "learning_rate": 1.8826455207266916e-05, "loss": 0.578, "step": 11431 }, { "epoch": 0.31389346512905, "grad_norm": 0.38836225867271423, "learning_rate": 1.8826252192186502e-05, "loss": 0.6084, "step": 11432 }, { "epoch": 0.31392092257001647, "grad_norm": 0.36520203948020935, "learning_rate": 1.882604916064231e-05, "loss": 0.5578, "step": 11433 }, { "epoch": 0.31394838001098296, "grad_norm": 0.36134952306747437, "learning_rate": 1.8825846112634715e-05, "loss": 0.4851, "step": 11434 }, { "epoch": 0.31397583745194946, "grad_norm": 0.35547468066215515, "learning_rate": 1.8825643048164105e-05, "loss": 0.6206, "step": 11435 }, { "epoch": 0.314003294892916, "grad_norm": 0.3826424479484558, "learning_rate": 1.882543996723085e-05, "loss": 0.5012, "step": 11436 }, { "epoch": 0.3140307523338825, "grad_norm": 0.4151184856891632, "learning_rate": 1.8825236869835338e-05, "loss": 0.5395, "step": 11437 }, { "epoch": 0.314058209774849, "grad_norm": 0.3388717472553253, "learning_rate": 1.8825033755977936e-05, "loss": 0.4409, "step": 11438 }, { "epoch": 0.3140856672158155, "grad_norm": 0.4017639756202698, "learning_rate": 1.8824830625659035e-05, "loss": 0.5724, "step": 11439 }, { "epoch": 0.314113124656782, "grad_norm": 0.33554476499557495, "learning_rate": 1.8824627478879008e-05, "loss": 0.5083, "step": 11440 }, { "epoch": 0.3141405820977485, "grad_norm": 0.35722264647483826, "learning_rate": 1.8824424315638233e-05, "loss": 0.5388, "step": 11441 }, { "epoch": 0.31416803953871497, "grad_norm": 0.39653512835502625, "learning_rate": 1.8824221135937088e-05, "loss": 0.475, "step": 11442 }, { "epoch": 0.3141954969796815, "grad_norm": 0.36152908205986023, "learning_rate": 1.8824017939775955e-05, "loss": 0.524, "step": 11443 }, { "epoch": 0.314222954420648, "grad_norm": 0.3253719210624695, "learning_rate": 1.8823814727155216e-05, "loss": 0.492, "step": 11444 }, { "epoch": 0.3142504118616145, "grad_norm": 0.39274415373802185, "learning_rate": 1.882361149807524e-05, "loss": 0.5564, "step": 11445 }, { "epoch": 0.314277869302581, "grad_norm": 0.39100995659828186, "learning_rate": 1.882340825253642e-05, "loss": 0.5995, "step": 11446 }, { "epoch": 0.3143053267435475, "grad_norm": 0.38169682025909424, "learning_rate": 1.882320499053912e-05, "loss": 0.5554, "step": 11447 }, { "epoch": 0.314332784184514, "grad_norm": 0.5330796837806702, "learning_rate": 1.882300171208373e-05, "loss": 0.5365, "step": 11448 }, { "epoch": 0.3143602416254805, "grad_norm": 0.34954431653022766, "learning_rate": 1.8822798417170628e-05, "loss": 0.4791, "step": 11449 }, { "epoch": 0.314387699066447, "grad_norm": 0.34413471817970276, "learning_rate": 1.882259510580019e-05, "loss": 0.4972, "step": 11450 }, { "epoch": 0.31441515650741353, "grad_norm": 0.3539583683013916, "learning_rate": 1.8822391777972796e-05, "loss": 0.5313, "step": 11451 }, { "epoch": 0.31444261394838, "grad_norm": 0.42896172404289246, "learning_rate": 1.8822188433688822e-05, "loss": 0.5486, "step": 11452 }, { "epoch": 0.3144700713893465, "grad_norm": 0.44661253690719604, "learning_rate": 1.8821985072948655e-05, "loss": 0.5733, "step": 11453 }, { "epoch": 0.314497528830313, "grad_norm": 0.40140625834465027, "learning_rate": 1.882178169575267e-05, "loss": 0.5521, "step": 11454 }, { "epoch": 0.3145249862712795, "grad_norm": 0.42406731843948364, "learning_rate": 1.8821578302101246e-05, "loss": 0.5596, "step": 11455 }, { "epoch": 0.314552443712246, "grad_norm": 0.3538399338722229, "learning_rate": 1.8821374891994762e-05, "loss": 0.4983, "step": 11456 }, { "epoch": 0.3145799011532125, "grad_norm": 0.37343060970306396, "learning_rate": 1.88211714654336e-05, "loss": 0.5813, "step": 11457 }, { "epoch": 0.31460735859417904, "grad_norm": 0.314078688621521, "learning_rate": 1.8820968022418134e-05, "loss": 0.4336, "step": 11458 }, { "epoch": 0.31463481603514554, "grad_norm": 0.3489234149456024, "learning_rate": 1.8820764562948752e-05, "loss": 0.4489, "step": 11459 }, { "epoch": 0.31466227347611203, "grad_norm": 0.3500722050666809, "learning_rate": 1.8820561087025826e-05, "loss": 0.4884, "step": 11460 }, { "epoch": 0.3146897309170785, "grad_norm": 0.39647892117500305, "learning_rate": 1.882035759464974e-05, "loss": 0.5098, "step": 11461 }, { "epoch": 0.314717188358045, "grad_norm": 0.48070645332336426, "learning_rate": 1.882015408582087e-05, "loss": 0.53, "step": 11462 }, { "epoch": 0.3147446457990115, "grad_norm": 0.3557566702365875, "learning_rate": 1.8819950560539597e-05, "loss": 0.5094, "step": 11463 }, { "epoch": 0.314772103239978, "grad_norm": 0.40291810035705566, "learning_rate": 1.8819747018806304e-05, "loss": 0.5257, "step": 11464 }, { "epoch": 0.31479956068094456, "grad_norm": 0.4701206386089325, "learning_rate": 1.8819543460621368e-05, "loss": 0.5041, "step": 11465 }, { "epoch": 0.31482701812191105, "grad_norm": 0.4614526629447937, "learning_rate": 1.8819339885985165e-05, "loss": 0.5473, "step": 11466 }, { "epoch": 0.31485447556287754, "grad_norm": 0.3847768008708954, "learning_rate": 1.881913629489808e-05, "loss": 0.4405, "step": 11467 }, { "epoch": 0.31488193300384404, "grad_norm": 0.45060089230537415, "learning_rate": 1.8818932687360492e-05, "loss": 0.4809, "step": 11468 }, { "epoch": 0.31490939044481053, "grad_norm": 0.3610547184944153, "learning_rate": 1.8818729063372782e-05, "loss": 0.5213, "step": 11469 }, { "epoch": 0.314936847885777, "grad_norm": 0.36548951268196106, "learning_rate": 1.8818525422935322e-05, "loss": 0.4946, "step": 11470 }, { "epoch": 0.3149643053267435, "grad_norm": 0.3619285225868225, "learning_rate": 1.8818321766048503e-05, "loss": 0.4987, "step": 11471 }, { "epoch": 0.31499176276771007, "grad_norm": 0.3898758888244629, "learning_rate": 1.8818118092712697e-05, "loss": 0.4597, "step": 11472 }, { "epoch": 0.31501922020867656, "grad_norm": 0.3421955406665802, "learning_rate": 1.8817914402928287e-05, "loss": 0.5643, "step": 11473 }, { "epoch": 0.31504667764964306, "grad_norm": 0.3998892903327942, "learning_rate": 1.881771069669565e-05, "loss": 0.5054, "step": 11474 }, { "epoch": 0.31507413509060955, "grad_norm": 0.348342627286911, "learning_rate": 1.881750697401517e-05, "loss": 0.4562, "step": 11475 }, { "epoch": 0.31510159253157605, "grad_norm": 0.3780558705329895, "learning_rate": 1.881730323488723e-05, "loss": 0.4743, "step": 11476 }, { "epoch": 0.31512904997254254, "grad_norm": 0.3481757640838623, "learning_rate": 1.8817099479312198e-05, "loss": 0.4952, "step": 11477 }, { "epoch": 0.31515650741350903, "grad_norm": 0.36746710538864136, "learning_rate": 1.8816895707290464e-05, "loss": 0.5358, "step": 11478 }, { "epoch": 0.3151839648544756, "grad_norm": 0.45495277643203735, "learning_rate": 1.8816691918822406e-05, "loss": 0.5057, "step": 11479 }, { "epoch": 0.3152114222954421, "grad_norm": 0.3488655984401703, "learning_rate": 1.8816488113908404e-05, "loss": 0.5295, "step": 11480 }, { "epoch": 0.3152388797364086, "grad_norm": 0.38944074511528015, "learning_rate": 1.8816284292548833e-05, "loss": 0.5073, "step": 11481 }, { "epoch": 0.31526633717737507, "grad_norm": 0.3641659915447235, "learning_rate": 1.8816080454744084e-05, "loss": 0.545, "step": 11482 }, { "epoch": 0.31529379461834156, "grad_norm": 0.3780931830406189, "learning_rate": 1.8815876600494526e-05, "loss": 0.4525, "step": 11483 }, { "epoch": 0.31532125205930805, "grad_norm": 0.3898509740829468, "learning_rate": 1.8815672729800552e-05, "loss": 0.535, "step": 11484 }, { "epoch": 0.31534870950027455, "grad_norm": 0.35204508900642395, "learning_rate": 1.881546884266253e-05, "loss": 0.5053, "step": 11485 }, { "epoch": 0.3153761669412411, "grad_norm": 0.3814624547958374, "learning_rate": 1.8815264939080845e-05, "loss": 0.5473, "step": 11486 }, { "epoch": 0.3154036243822076, "grad_norm": 0.3794768452644348, "learning_rate": 1.8815061019055875e-05, "loss": 0.5184, "step": 11487 }, { "epoch": 0.3154310818231741, "grad_norm": 0.32853177189826965, "learning_rate": 1.8814857082588007e-05, "loss": 0.5757, "step": 11488 }, { "epoch": 0.3154585392641406, "grad_norm": 0.35091933608055115, "learning_rate": 1.8814653129677616e-05, "loss": 0.5508, "step": 11489 }, { "epoch": 0.3154859967051071, "grad_norm": 0.4936169385910034, "learning_rate": 1.8814449160325083e-05, "loss": 0.6039, "step": 11490 }, { "epoch": 0.31551345414607357, "grad_norm": 0.34139588475227356, "learning_rate": 1.8814245174530786e-05, "loss": 0.5111, "step": 11491 }, { "epoch": 0.31554091158704006, "grad_norm": 0.40885162353515625, "learning_rate": 1.8814041172295116e-05, "loss": 0.5957, "step": 11492 }, { "epoch": 0.3155683690280066, "grad_norm": 0.42207106947898865, "learning_rate": 1.881383715361844e-05, "loss": 0.5928, "step": 11493 }, { "epoch": 0.3155958264689731, "grad_norm": 0.36567479372024536, "learning_rate": 1.8813633118501145e-05, "loss": 0.5001, "step": 11494 }, { "epoch": 0.3156232839099396, "grad_norm": 0.43504583835601807, "learning_rate": 1.8813429066943617e-05, "loss": 0.5177, "step": 11495 }, { "epoch": 0.3156507413509061, "grad_norm": 0.3886643648147583, "learning_rate": 1.8813224998946223e-05, "loss": 0.5948, "step": 11496 }, { "epoch": 0.3156781987918726, "grad_norm": 0.37390103936195374, "learning_rate": 1.8813020914509356e-05, "loss": 0.5057, "step": 11497 }, { "epoch": 0.3157056562328391, "grad_norm": 0.370434433221817, "learning_rate": 1.8812816813633392e-05, "loss": 0.5095, "step": 11498 }, { "epoch": 0.3157331136738056, "grad_norm": 0.3845430314540863, "learning_rate": 1.881261269631871e-05, "loss": 0.5049, "step": 11499 }, { "epoch": 0.3157605711147721, "grad_norm": 0.4968569278717041, "learning_rate": 1.8812408562565695e-05, "loss": 0.5537, "step": 11500 }, { "epoch": 0.3157880285557386, "grad_norm": 0.413463830947876, "learning_rate": 1.8812204412374724e-05, "loss": 0.4184, "step": 11501 }, { "epoch": 0.3158154859967051, "grad_norm": 0.435461163520813, "learning_rate": 1.881200024574618e-05, "loss": 0.5037, "step": 11502 }, { "epoch": 0.3158429434376716, "grad_norm": 0.35665401816368103, "learning_rate": 1.8811796062680442e-05, "loss": 0.4741, "step": 11503 }, { "epoch": 0.3158704008786381, "grad_norm": 0.3172353208065033, "learning_rate": 1.8811591863177893e-05, "loss": 0.4597, "step": 11504 }, { "epoch": 0.3158978583196046, "grad_norm": 0.34494349360466003, "learning_rate": 1.8811387647238914e-05, "loss": 0.4451, "step": 11505 }, { "epoch": 0.3159253157605711, "grad_norm": 0.37418729066848755, "learning_rate": 1.8811183414863884e-05, "loss": 0.5216, "step": 11506 }, { "epoch": 0.31595277320153764, "grad_norm": 0.43794745206832886, "learning_rate": 1.8810979166053182e-05, "loss": 0.5769, "step": 11507 }, { "epoch": 0.31598023064250413, "grad_norm": 0.3706580102443695, "learning_rate": 1.8810774900807195e-05, "loss": 0.4966, "step": 11508 }, { "epoch": 0.31600768808347063, "grad_norm": 0.4037052094936371, "learning_rate": 1.8810570619126302e-05, "loss": 0.4976, "step": 11509 }, { "epoch": 0.3160351455244371, "grad_norm": 0.3261788487434387, "learning_rate": 1.8810366321010876e-05, "loss": 0.4989, "step": 11510 }, { "epoch": 0.3160626029654036, "grad_norm": 0.4436168968677521, "learning_rate": 1.881016200646131e-05, "loss": 0.5856, "step": 11511 }, { "epoch": 0.3160900604063701, "grad_norm": 0.35024094581604004, "learning_rate": 1.8809957675477978e-05, "loss": 0.4636, "step": 11512 }, { "epoch": 0.3161175178473366, "grad_norm": 0.43110066652297974, "learning_rate": 1.8809753328061267e-05, "loss": 0.5881, "step": 11513 }, { "epoch": 0.31614497528830315, "grad_norm": 0.37327733635902405, "learning_rate": 1.880954896421155e-05, "loss": 0.5206, "step": 11514 }, { "epoch": 0.31617243272926965, "grad_norm": 0.3956168591976166, "learning_rate": 1.8809344583929213e-05, "loss": 0.4882, "step": 11515 }, { "epoch": 0.31619989017023614, "grad_norm": 0.3827648460865021, "learning_rate": 1.880914018721464e-05, "loss": 0.5975, "step": 11516 }, { "epoch": 0.31622734761120264, "grad_norm": 0.37007343769073486, "learning_rate": 1.8808935774068206e-05, "loss": 0.4352, "step": 11517 }, { "epoch": 0.31625480505216913, "grad_norm": 0.360985666513443, "learning_rate": 1.8808731344490295e-05, "loss": 0.5879, "step": 11518 }, { "epoch": 0.3162822624931356, "grad_norm": 0.3572729229927063, "learning_rate": 1.880852689848129e-05, "loss": 0.5203, "step": 11519 }, { "epoch": 0.3163097199341021, "grad_norm": 0.43787163496017456, "learning_rate": 1.880832243604157e-05, "loss": 0.4907, "step": 11520 }, { "epoch": 0.31633717737506867, "grad_norm": 0.3678734004497528, "learning_rate": 1.880811795717152e-05, "loss": 0.5082, "step": 11521 }, { "epoch": 0.31636463481603516, "grad_norm": 0.3624853491783142, "learning_rate": 1.8807913461871513e-05, "loss": 0.5246, "step": 11522 }, { "epoch": 0.31639209225700166, "grad_norm": 0.3597498834133148, "learning_rate": 1.8807708950141944e-05, "loss": 0.5574, "step": 11523 }, { "epoch": 0.31641954969796815, "grad_norm": 0.351953387260437, "learning_rate": 1.880750442198318e-05, "loss": 0.4898, "step": 11524 }, { "epoch": 0.31644700713893464, "grad_norm": 0.3388998508453369, "learning_rate": 1.8807299877395615e-05, "loss": 0.4327, "step": 11525 }, { "epoch": 0.31647446457990114, "grad_norm": 0.3764038383960724, "learning_rate": 1.880709531637962e-05, "loss": 0.5096, "step": 11526 }, { "epoch": 0.31650192202086763, "grad_norm": 0.34394365549087524, "learning_rate": 1.8806890738935582e-05, "loss": 0.6114, "step": 11527 }, { "epoch": 0.3165293794618342, "grad_norm": 0.3825882375240326, "learning_rate": 1.8806686145063885e-05, "loss": 0.5053, "step": 11528 }, { "epoch": 0.3165568369028007, "grad_norm": 0.4009203016757965, "learning_rate": 1.8806481534764905e-05, "loss": 0.5629, "step": 11529 }, { "epoch": 0.31658429434376717, "grad_norm": 0.416076123714447, "learning_rate": 1.8806276908039026e-05, "loss": 0.545, "step": 11530 }, { "epoch": 0.31661175178473366, "grad_norm": 0.3119119703769684, "learning_rate": 1.880607226488663e-05, "loss": 0.3959, "step": 11531 }, { "epoch": 0.31663920922570016, "grad_norm": 0.5025073885917664, "learning_rate": 1.88058676053081e-05, "loss": 0.4718, "step": 11532 }, { "epoch": 0.31666666666666665, "grad_norm": 0.3827705383300781, "learning_rate": 1.8805662929303815e-05, "loss": 0.5281, "step": 11533 }, { "epoch": 0.31669412410763315, "grad_norm": 0.3446609377861023, "learning_rate": 1.880545823687416e-05, "loss": 0.5309, "step": 11534 }, { "epoch": 0.3167215815485997, "grad_norm": 0.3336891233921051, "learning_rate": 1.8805253528019515e-05, "loss": 0.5179, "step": 11535 }, { "epoch": 0.3167490389895662, "grad_norm": 0.5082288980484009, "learning_rate": 1.880504880274026e-05, "loss": 0.5328, "step": 11536 }, { "epoch": 0.3167764964305327, "grad_norm": 0.3335157036781311, "learning_rate": 1.880484406103678e-05, "loss": 0.4538, "step": 11537 }, { "epoch": 0.3168039538714992, "grad_norm": 0.38271403312683105, "learning_rate": 1.8804639302909454e-05, "loss": 0.541, "step": 11538 }, { "epoch": 0.31683141131246567, "grad_norm": 0.36313244700431824, "learning_rate": 1.8804434528358667e-05, "loss": 0.5155, "step": 11539 }, { "epoch": 0.31685886875343217, "grad_norm": 0.3532748520374298, "learning_rate": 1.88042297373848e-05, "loss": 0.5145, "step": 11540 }, { "epoch": 0.31688632619439866, "grad_norm": 0.35819903016090393, "learning_rate": 1.8804024929988234e-05, "loss": 0.4696, "step": 11541 }, { "epoch": 0.3169137836353652, "grad_norm": 0.35440605878829956, "learning_rate": 1.880382010616935e-05, "loss": 0.5826, "step": 11542 }, { "epoch": 0.3169412410763317, "grad_norm": 0.35829195380210876, "learning_rate": 1.8803615265928537e-05, "loss": 0.5371, "step": 11543 }, { "epoch": 0.3169686985172982, "grad_norm": 0.3524274528026581, "learning_rate": 1.8803410409266165e-05, "loss": 0.5204, "step": 11544 }, { "epoch": 0.3169961559582647, "grad_norm": 0.3680247366428375, "learning_rate": 1.8803205536182627e-05, "loss": 0.4507, "step": 11545 }, { "epoch": 0.3170236133992312, "grad_norm": 0.3740106225013733, "learning_rate": 1.88030006466783e-05, "loss": 0.5717, "step": 11546 }, { "epoch": 0.3170510708401977, "grad_norm": 0.3944126069545746, "learning_rate": 1.8802795740753567e-05, "loss": 0.5568, "step": 11547 }, { "epoch": 0.3170785282811642, "grad_norm": 0.49929237365722656, "learning_rate": 1.880259081840881e-05, "loss": 0.5953, "step": 11548 }, { "epoch": 0.3171059857221307, "grad_norm": 0.3294670879840851, "learning_rate": 1.880238587964441e-05, "loss": 0.4465, "step": 11549 }, { "epoch": 0.3171334431630972, "grad_norm": 0.40135473012924194, "learning_rate": 1.8802180924460757e-05, "loss": 0.6064, "step": 11550 }, { "epoch": 0.3171609006040637, "grad_norm": 0.37009915709495544, "learning_rate": 1.880197595285822e-05, "loss": 0.5726, "step": 11551 }, { "epoch": 0.3171883580450302, "grad_norm": 0.33187779784202576, "learning_rate": 1.8801770964837193e-05, "loss": 0.4795, "step": 11552 }, { "epoch": 0.3172158154859967, "grad_norm": 0.44523945450782776, "learning_rate": 1.8801565960398055e-05, "loss": 0.4923, "step": 11553 }, { "epoch": 0.3172432729269632, "grad_norm": 0.446218341588974, "learning_rate": 1.8801360939541183e-05, "loss": 0.5082, "step": 11554 }, { "epoch": 0.3172707303679297, "grad_norm": 0.34654587507247925, "learning_rate": 1.8801155902266965e-05, "loss": 0.5511, "step": 11555 }, { "epoch": 0.31729818780889624, "grad_norm": 0.39677271246910095, "learning_rate": 1.8800950848575786e-05, "loss": 0.5524, "step": 11556 }, { "epoch": 0.31732564524986273, "grad_norm": 0.4771231710910797, "learning_rate": 1.8800745778468023e-05, "loss": 0.5297, "step": 11557 }, { "epoch": 0.3173531026908292, "grad_norm": 0.3777872622013092, "learning_rate": 1.8800540691944057e-05, "loss": 0.561, "step": 11558 }, { "epoch": 0.3173805601317957, "grad_norm": 0.37661466002464294, "learning_rate": 1.8800335589004275e-05, "loss": 0.5606, "step": 11559 }, { "epoch": 0.3174080175727622, "grad_norm": 0.34776484966278076, "learning_rate": 1.880013046964906e-05, "loss": 0.5157, "step": 11560 }, { "epoch": 0.3174354750137287, "grad_norm": 0.35297444462776184, "learning_rate": 1.8799925333878793e-05, "loss": 0.5539, "step": 11561 }, { "epoch": 0.3174629324546952, "grad_norm": 0.384945273399353, "learning_rate": 1.8799720181693856e-05, "loss": 0.5465, "step": 11562 }, { "epoch": 0.31749038989566175, "grad_norm": 0.38219723105430603, "learning_rate": 1.879951501309463e-05, "loss": 0.604, "step": 11563 }, { "epoch": 0.31751784733662825, "grad_norm": 0.35860779881477356, "learning_rate": 1.8799309828081506e-05, "loss": 0.511, "step": 11564 }, { "epoch": 0.31754530477759474, "grad_norm": 0.39727258682250977, "learning_rate": 1.8799104626654854e-05, "loss": 0.5395, "step": 11565 }, { "epoch": 0.31757276221856123, "grad_norm": 0.40390148758888245, "learning_rate": 1.879889940881507e-05, "loss": 0.524, "step": 11566 }, { "epoch": 0.3176002196595277, "grad_norm": 0.648410439491272, "learning_rate": 1.8798694174562527e-05, "loss": 0.5025, "step": 11567 }, { "epoch": 0.3176276771004942, "grad_norm": 0.3706580698490143, "learning_rate": 1.879848892389761e-05, "loss": 0.5135, "step": 11568 }, { "epoch": 0.3176551345414607, "grad_norm": 0.3756009340286255, "learning_rate": 1.8798283656820704e-05, "loss": 0.5526, "step": 11569 }, { "epoch": 0.31768259198242726, "grad_norm": 0.4069195091724396, "learning_rate": 1.879807837333219e-05, "loss": 0.5022, "step": 11570 }, { "epoch": 0.31771004942339376, "grad_norm": 0.4003545641899109, "learning_rate": 1.8797873073432452e-05, "loss": 0.5489, "step": 11571 }, { "epoch": 0.31773750686436025, "grad_norm": 0.34933412075042725, "learning_rate": 1.8797667757121875e-05, "loss": 0.5158, "step": 11572 }, { "epoch": 0.31776496430532675, "grad_norm": 0.38408082723617554, "learning_rate": 1.8797462424400837e-05, "loss": 0.5253, "step": 11573 }, { "epoch": 0.31779242174629324, "grad_norm": 0.3721802234649658, "learning_rate": 1.8797257075269726e-05, "loss": 0.5437, "step": 11574 }, { "epoch": 0.31781987918725974, "grad_norm": 0.32017526030540466, "learning_rate": 1.8797051709728923e-05, "loss": 0.4925, "step": 11575 }, { "epoch": 0.31784733662822623, "grad_norm": 0.41804009675979614, "learning_rate": 1.8796846327778808e-05, "loss": 0.5814, "step": 11576 }, { "epoch": 0.3178747940691928, "grad_norm": 0.4707590937614441, "learning_rate": 1.8796640929419768e-05, "loss": 0.5919, "step": 11577 }, { "epoch": 0.3179022515101593, "grad_norm": 0.4131036698818207, "learning_rate": 1.8796435514652188e-05, "loss": 0.5845, "step": 11578 }, { "epoch": 0.31792970895112577, "grad_norm": 0.3870420455932617, "learning_rate": 1.8796230083476446e-05, "loss": 0.5995, "step": 11579 }, { "epoch": 0.31795716639209226, "grad_norm": 0.4472878873348236, "learning_rate": 1.8796024635892926e-05, "loss": 0.5573, "step": 11580 }, { "epoch": 0.31798462383305875, "grad_norm": 0.35519537329673767, "learning_rate": 1.8795819171902015e-05, "loss": 0.5528, "step": 11581 }, { "epoch": 0.31801208127402525, "grad_norm": 0.3316037952899933, "learning_rate": 1.8795613691504094e-05, "loss": 0.5657, "step": 11582 }, { "epoch": 0.31803953871499174, "grad_norm": 0.4174230396747589, "learning_rate": 1.8795408194699544e-05, "loss": 0.5228, "step": 11583 }, { "epoch": 0.31806699615595824, "grad_norm": 0.38040396571159363, "learning_rate": 1.879520268148875e-05, "loss": 0.4614, "step": 11584 }, { "epoch": 0.3180944535969248, "grad_norm": 0.3828701972961426, "learning_rate": 1.87949971518721e-05, "loss": 0.5074, "step": 11585 }, { "epoch": 0.3181219110378913, "grad_norm": 0.3946191370487213, "learning_rate": 1.8794791605849972e-05, "loss": 0.4755, "step": 11586 }, { "epoch": 0.3181493684788578, "grad_norm": 0.38268402218818665, "learning_rate": 1.8794586043422747e-05, "loss": 0.6457, "step": 11587 }, { "epoch": 0.31817682591982427, "grad_norm": 0.46169114112854004, "learning_rate": 1.8794380464590815e-05, "loss": 0.6266, "step": 11588 }, { "epoch": 0.31820428336079076, "grad_norm": 0.45835524797439575, "learning_rate": 1.879417486935456e-05, "loss": 0.5278, "step": 11589 }, { "epoch": 0.31823174080175726, "grad_norm": 0.3857206404209137, "learning_rate": 1.8793969257714357e-05, "loss": 0.5272, "step": 11590 }, { "epoch": 0.31825919824272375, "grad_norm": 0.3728189766407013, "learning_rate": 1.8793763629670596e-05, "loss": 0.5061, "step": 11591 }, { "epoch": 0.3182866556836903, "grad_norm": 0.4018442630767822, "learning_rate": 1.879355798522366e-05, "loss": 0.5898, "step": 11592 }, { "epoch": 0.3183141131246568, "grad_norm": 0.37019404768943787, "learning_rate": 1.879335232437393e-05, "loss": 0.5345, "step": 11593 }, { "epoch": 0.3183415705656233, "grad_norm": 0.3224369287490845, "learning_rate": 1.8793146647121795e-05, "loss": 0.476, "step": 11594 }, { "epoch": 0.3183690280065898, "grad_norm": 0.3348217010498047, "learning_rate": 1.879294095346763e-05, "loss": 0.4793, "step": 11595 }, { "epoch": 0.3183964854475563, "grad_norm": 0.3426550626754761, "learning_rate": 1.8792735243411827e-05, "loss": 0.5172, "step": 11596 }, { "epoch": 0.31842394288852277, "grad_norm": 0.4189211130142212, "learning_rate": 1.8792529516954768e-05, "loss": 0.5364, "step": 11597 }, { "epoch": 0.31845140032948926, "grad_norm": 0.36945801973342896, "learning_rate": 1.8792323774096833e-05, "loss": 0.4743, "step": 11598 }, { "epoch": 0.3184788577704558, "grad_norm": 0.3782860040664673, "learning_rate": 1.8792118014838406e-05, "loss": 0.5186, "step": 11599 }, { "epoch": 0.3185063152114223, "grad_norm": 0.4049023985862732, "learning_rate": 1.8791912239179876e-05, "loss": 0.5905, "step": 11600 }, { "epoch": 0.3185337726523888, "grad_norm": 0.3841196894645691, "learning_rate": 1.8791706447121623e-05, "loss": 0.4422, "step": 11601 }, { "epoch": 0.3185612300933553, "grad_norm": 0.38670873641967773, "learning_rate": 1.879150063866403e-05, "loss": 0.5418, "step": 11602 }, { "epoch": 0.3185886875343218, "grad_norm": 0.33993688225746155, "learning_rate": 1.8791294813807485e-05, "loss": 0.5401, "step": 11603 }, { "epoch": 0.3186161449752883, "grad_norm": 0.35934340953826904, "learning_rate": 1.8791088972552365e-05, "loss": 0.5623, "step": 11604 }, { "epoch": 0.3186436024162548, "grad_norm": 0.3227153420448303, "learning_rate": 1.8790883114899064e-05, "loss": 0.5313, "step": 11605 }, { "epoch": 0.31867105985722133, "grad_norm": 0.3852930963039398, "learning_rate": 1.8790677240847954e-05, "loss": 0.4862, "step": 11606 }, { "epoch": 0.3186985172981878, "grad_norm": 0.35360977053642273, "learning_rate": 1.8790471350399427e-05, "loss": 0.5084, "step": 11607 }, { "epoch": 0.3187259747391543, "grad_norm": 0.40991950035095215, "learning_rate": 1.8790265443553868e-05, "loss": 0.5857, "step": 11608 }, { "epoch": 0.3187534321801208, "grad_norm": 0.3776117265224457, "learning_rate": 1.8790059520311658e-05, "loss": 0.4601, "step": 11609 }, { "epoch": 0.3187808896210873, "grad_norm": 0.34307917952537537, "learning_rate": 1.878985358067318e-05, "loss": 0.472, "step": 11610 }, { "epoch": 0.3188083470620538, "grad_norm": 0.3723316788673401, "learning_rate": 1.878964762463882e-05, "loss": 0.5197, "step": 11611 }, { "epoch": 0.3188358045030203, "grad_norm": 0.41890260577201843, "learning_rate": 1.8789441652208965e-05, "loss": 0.5419, "step": 11612 }, { "epoch": 0.31886326194398684, "grad_norm": 0.3821306824684143, "learning_rate": 1.8789235663383992e-05, "loss": 0.5199, "step": 11613 }, { "epoch": 0.31889071938495334, "grad_norm": 0.3857809007167816, "learning_rate": 1.878902965816429e-05, "loss": 0.5224, "step": 11614 }, { "epoch": 0.31891817682591983, "grad_norm": 0.33963897824287415, "learning_rate": 1.8788823636550245e-05, "loss": 0.5248, "step": 11615 }, { "epoch": 0.3189456342668863, "grad_norm": 0.36402612924575806, "learning_rate": 1.8788617598542237e-05, "loss": 0.5388, "step": 11616 }, { "epoch": 0.3189730917078528, "grad_norm": 0.36881953477859497, "learning_rate": 1.878841154414065e-05, "loss": 0.6057, "step": 11617 }, { "epoch": 0.3190005491488193, "grad_norm": 0.38719192147254944, "learning_rate": 1.878820547334587e-05, "loss": 0.6447, "step": 11618 }, { "epoch": 0.3190280065897858, "grad_norm": 0.3778699040412903, "learning_rate": 1.878799938615829e-05, "loss": 0.5515, "step": 11619 }, { "epoch": 0.31905546403075236, "grad_norm": 0.3776033818721771, "learning_rate": 1.878779328257828e-05, "loss": 0.5269, "step": 11620 }, { "epoch": 0.31908292147171885, "grad_norm": 0.47917038202285767, "learning_rate": 1.878758716260623e-05, "loss": 0.548, "step": 11621 }, { "epoch": 0.31911037891268534, "grad_norm": 0.45578888058662415, "learning_rate": 1.8787381026242528e-05, "loss": 0.4945, "step": 11622 }, { "epoch": 0.31913783635365184, "grad_norm": 0.339183509349823, "learning_rate": 1.8787174873487556e-05, "loss": 0.4622, "step": 11623 }, { "epoch": 0.31916529379461833, "grad_norm": 0.3797185719013214, "learning_rate": 1.8786968704341696e-05, "loss": 0.5711, "step": 11624 }, { "epoch": 0.3191927512355848, "grad_norm": 0.3996993899345398, "learning_rate": 1.8786762518805336e-05, "loss": 0.618, "step": 11625 }, { "epoch": 0.3192202086765513, "grad_norm": 0.3783722519874573, "learning_rate": 1.8786556316878858e-05, "loss": 0.5544, "step": 11626 }, { "epoch": 0.31924766611751787, "grad_norm": 0.45522722601890564, "learning_rate": 1.8786350098562655e-05, "loss": 0.5249, "step": 11627 }, { "epoch": 0.31927512355848436, "grad_norm": 0.31489941477775574, "learning_rate": 1.8786143863857096e-05, "loss": 0.5011, "step": 11628 }, { "epoch": 0.31930258099945086, "grad_norm": 0.3268744945526123, "learning_rate": 1.8785937612762577e-05, "loss": 0.527, "step": 11629 }, { "epoch": 0.31933003844041735, "grad_norm": 0.4782208502292633, "learning_rate": 1.8785731345279483e-05, "loss": 0.5288, "step": 11630 }, { "epoch": 0.31935749588138385, "grad_norm": 0.39080944657325745, "learning_rate": 1.878552506140819e-05, "loss": 0.5086, "step": 11631 }, { "epoch": 0.31938495332235034, "grad_norm": 0.4034901261329651, "learning_rate": 1.8785318761149096e-05, "loss": 0.5457, "step": 11632 }, { "epoch": 0.31941241076331683, "grad_norm": 0.36330708861351013, "learning_rate": 1.8785112444502576e-05, "loss": 0.5211, "step": 11633 }, { "epoch": 0.3194398682042834, "grad_norm": 0.3466344177722931, "learning_rate": 1.8784906111469018e-05, "loss": 0.5494, "step": 11634 }, { "epoch": 0.3194673256452499, "grad_norm": 0.41117408871650696, "learning_rate": 1.8784699762048804e-05, "loss": 0.5572, "step": 11635 }, { "epoch": 0.31949478308621637, "grad_norm": 0.39266934990882874, "learning_rate": 1.878449339624232e-05, "loss": 0.4686, "step": 11636 }, { "epoch": 0.31952224052718287, "grad_norm": 0.3957371711730957, "learning_rate": 1.8784287014049956e-05, "loss": 0.4832, "step": 11637 }, { "epoch": 0.31954969796814936, "grad_norm": 0.37069690227508545, "learning_rate": 1.878408061547209e-05, "loss": 0.6524, "step": 11638 }, { "epoch": 0.31957715540911585, "grad_norm": 0.4338432550430298, "learning_rate": 1.8783874200509115e-05, "loss": 0.5688, "step": 11639 }, { "epoch": 0.31960461285008235, "grad_norm": 0.3654215633869171, "learning_rate": 1.8783667769161408e-05, "loss": 0.6487, "step": 11640 }, { "epoch": 0.3196320702910489, "grad_norm": 0.3625013530254364, "learning_rate": 1.8783461321429356e-05, "loss": 0.5534, "step": 11641 }, { "epoch": 0.3196595277320154, "grad_norm": 0.3790997564792633, "learning_rate": 1.8783254857313347e-05, "loss": 0.5592, "step": 11642 }, { "epoch": 0.3196869851729819, "grad_norm": 0.37769949436187744, "learning_rate": 1.878304837681376e-05, "loss": 0.5399, "step": 11643 }, { "epoch": 0.3197144426139484, "grad_norm": 0.3315916955471039, "learning_rate": 1.878284187993099e-05, "loss": 0.5816, "step": 11644 }, { "epoch": 0.3197419000549149, "grad_norm": 0.39127054810523987, "learning_rate": 1.8782635366665416e-05, "loss": 0.4574, "step": 11645 }, { "epoch": 0.31976935749588137, "grad_norm": 0.32830289006233215, "learning_rate": 1.8782428837017425e-05, "loss": 0.4049, "step": 11646 }, { "epoch": 0.31979681493684786, "grad_norm": 0.5111897587776184, "learning_rate": 1.8782222290987396e-05, "loss": 0.5081, "step": 11647 }, { "epoch": 0.3198242723778144, "grad_norm": 0.4389162063598633, "learning_rate": 1.8782015728575723e-05, "loss": 0.5853, "step": 11648 }, { "epoch": 0.3198517298187809, "grad_norm": 0.3880854547023773, "learning_rate": 1.8781809149782786e-05, "loss": 0.6067, "step": 11649 }, { "epoch": 0.3198791872597474, "grad_norm": 0.33045369386672974, "learning_rate": 1.8781602554608973e-05, "loss": 0.4819, "step": 11650 }, { "epoch": 0.3199066447007139, "grad_norm": 0.4233371317386627, "learning_rate": 1.878139594305467e-05, "loss": 0.4685, "step": 11651 }, { "epoch": 0.3199341021416804, "grad_norm": 0.4157913625240326, "learning_rate": 1.878118931512026e-05, "loss": 0.5381, "step": 11652 }, { "epoch": 0.3199615595826469, "grad_norm": 0.40798288583755493, "learning_rate": 1.878098267080613e-05, "loss": 0.5424, "step": 11653 }, { "epoch": 0.3199890170236134, "grad_norm": 0.44678348302841187, "learning_rate": 1.8780776010112665e-05, "loss": 0.6024, "step": 11654 }, { "epoch": 0.3200164744645799, "grad_norm": 0.526862382888794, "learning_rate": 1.878056933304025e-05, "loss": 0.4688, "step": 11655 }, { "epoch": 0.3200439319055464, "grad_norm": 0.33278653025627136, "learning_rate": 1.8780362639589266e-05, "loss": 0.4627, "step": 11656 }, { "epoch": 0.3200713893465129, "grad_norm": 0.36074602603912354, "learning_rate": 1.8780155929760108e-05, "loss": 0.4496, "step": 11657 }, { "epoch": 0.3200988467874794, "grad_norm": 0.3786594271659851, "learning_rate": 1.8779949203553158e-05, "loss": 0.5388, "step": 11658 }, { "epoch": 0.3201263042284459, "grad_norm": 0.5121511220932007, "learning_rate": 1.8779742460968795e-05, "loss": 0.4966, "step": 11659 }, { "epoch": 0.3201537616694124, "grad_norm": 0.38612762093544006, "learning_rate": 1.8779535702007416e-05, "loss": 0.5444, "step": 11660 }, { "epoch": 0.3201812191103789, "grad_norm": 0.39932361245155334, "learning_rate": 1.8779328926669397e-05, "loss": 0.4704, "step": 11661 }, { "epoch": 0.32020867655134544, "grad_norm": 0.3625341057777405, "learning_rate": 1.8779122134955128e-05, "loss": 0.4861, "step": 11662 }, { "epoch": 0.32023613399231193, "grad_norm": 0.35291585326194763, "learning_rate": 1.8778915326865e-05, "loss": 0.5571, "step": 11663 }, { "epoch": 0.3202635914332784, "grad_norm": 0.5690022706985474, "learning_rate": 1.8778708502399384e-05, "loss": 0.5642, "step": 11664 }, { "epoch": 0.3202910488742449, "grad_norm": 0.5463053584098816, "learning_rate": 1.8778501661558677e-05, "loss": 0.5693, "step": 11665 }, { "epoch": 0.3203185063152114, "grad_norm": 0.3813205063343048, "learning_rate": 1.877829480434327e-05, "loss": 0.6088, "step": 11666 }, { "epoch": 0.3203459637561779, "grad_norm": 0.5110883712768555, "learning_rate": 1.8778087930753535e-05, "loss": 0.5149, "step": 11667 }, { "epoch": 0.3203734211971444, "grad_norm": 0.4248523414134979, "learning_rate": 1.8777881040789864e-05, "loss": 0.5626, "step": 11668 }, { "epoch": 0.32040087863811095, "grad_norm": 0.34755319356918335, "learning_rate": 1.8777674134452647e-05, "loss": 0.479, "step": 11669 }, { "epoch": 0.32042833607907745, "grad_norm": 0.41231346130371094, "learning_rate": 1.8777467211742263e-05, "loss": 0.5798, "step": 11670 }, { "epoch": 0.32045579352004394, "grad_norm": 0.4012342393398285, "learning_rate": 1.87772602726591e-05, "loss": 0.5155, "step": 11671 }, { "epoch": 0.32048325096101044, "grad_norm": 0.3623591959476471, "learning_rate": 1.877705331720355e-05, "loss": 0.5237, "step": 11672 }, { "epoch": 0.32051070840197693, "grad_norm": 0.3641941249370575, "learning_rate": 1.877684634537599e-05, "loss": 0.5729, "step": 11673 }, { "epoch": 0.3205381658429434, "grad_norm": 0.36328041553497314, "learning_rate": 1.8776639357176815e-05, "loss": 0.4996, "step": 11674 }, { "epoch": 0.3205656232839099, "grad_norm": 0.4021020233631134, "learning_rate": 1.87764323526064e-05, "loss": 0.5388, "step": 11675 }, { "epoch": 0.32059308072487647, "grad_norm": 0.4047730565071106, "learning_rate": 1.8776225331665142e-05, "loss": 0.5238, "step": 11676 }, { "epoch": 0.32062053816584296, "grad_norm": 0.3573513925075531, "learning_rate": 1.8776018294353423e-05, "loss": 0.4819, "step": 11677 }, { "epoch": 0.32064799560680946, "grad_norm": 0.3823285400867462, "learning_rate": 1.8775811240671626e-05, "loss": 0.5398, "step": 11678 }, { "epoch": 0.32067545304777595, "grad_norm": 0.328141450881958, "learning_rate": 1.877560417062014e-05, "loss": 0.5145, "step": 11679 }, { "epoch": 0.32070291048874244, "grad_norm": 0.38665664196014404, "learning_rate": 1.8775397084199356e-05, "loss": 0.5575, "step": 11680 }, { "epoch": 0.32073036792970894, "grad_norm": 0.344605028629303, "learning_rate": 1.8775189981409655e-05, "loss": 0.4673, "step": 11681 }, { "epoch": 0.32075782537067543, "grad_norm": 0.4010832905769348, "learning_rate": 1.8774982862251422e-05, "loss": 0.5113, "step": 11682 }, { "epoch": 0.320785282811642, "grad_norm": 0.4564690887928009, "learning_rate": 1.877477572672504e-05, "loss": 0.6079, "step": 11683 }, { "epoch": 0.3208127402526085, "grad_norm": 0.33443665504455566, "learning_rate": 1.877456857483091e-05, "loss": 0.5245, "step": 11684 }, { "epoch": 0.32084019769357497, "grad_norm": 0.3897082507610321, "learning_rate": 1.8774361406569402e-05, "loss": 0.4983, "step": 11685 }, { "epoch": 0.32086765513454146, "grad_norm": 0.3381296694278717, "learning_rate": 1.8774154221940916e-05, "loss": 0.5212, "step": 11686 }, { "epoch": 0.32089511257550796, "grad_norm": 0.3756665885448456, "learning_rate": 1.8773947020945826e-05, "loss": 0.4724, "step": 11687 }, { "epoch": 0.32092257001647445, "grad_norm": 0.367234468460083, "learning_rate": 1.8773739803584527e-05, "loss": 0.4755, "step": 11688 }, { "epoch": 0.32095002745744095, "grad_norm": 0.3668709993362427, "learning_rate": 1.8773532569857404e-05, "loss": 0.5903, "step": 11689 }, { "epoch": 0.3209774848984075, "grad_norm": 0.3850155472755432, "learning_rate": 1.8773325319764838e-05, "loss": 0.5262, "step": 11690 }, { "epoch": 0.321004942339374, "grad_norm": 0.34990671277046204, "learning_rate": 1.8773118053307223e-05, "loss": 0.4575, "step": 11691 }, { "epoch": 0.3210323997803405, "grad_norm": 0.3775002062320709, "learning_rate": 1.8772910770484945e-05, "loss": 0.6011, "step": 11692 }, { "epoch": 0.321059857221307, "grad_norm": 0.39319416880607605, "learning_rate": 1.8772703471298387e-05, "loss": 0.5164, "step": 11693 }, { "epoch": 0.32108731466227347, "grad_norm": 0.729278564453125, "learning_rate": 1.877249615574794e-05, "loss": 0.4989, "step": 11694 }, { "epoch": 0.32111477210323996, "grad_norm": 0.37515923380851746, "learning_rate": 1.8772288823833982e-05, "loss": 0.4788, "step": 11695 }, { "epoch": 0.32114222954420646, "grad_norm": 0.36077016592025757, "learning_rate": 1.8772081475556908e-05, "loss": 0.4234, "step": 11696 }, { "epoch": 0.321169686985173, "grad_norm": 0.4090329110622406, "learning_rate": 1.8771874110917102e-05, "loss": 0.5138, "step": 11697 }, { "epoch": 0.3211971444261395, "grad_norm": 0.3452886939048767, "learning_rate": 1.8771666729914954e-05, "loss": 0.5458, "step": 11698 }, { "epoch": 0.321224601867106, "grad_norm": 0.36332178115844727, "learning_rate": 1.8771459332550847e-05, "loss": 0.5442, "step": 11699 }, { "epoch": 0.3212520593080725, "grad_norm": 0.40509194135665894, "learning_rate": 1.8771251918825166e-05, "loss": 0.474, "step": 11700 }, { "epoch": 0.321279516749039, "grad_norm": 0.39882466197013855, "learning_rate": 1.87710444887383e-05, "loss": 0.5298, "step": 11701 }, { "epoch": 0.3213069741900055, "grad_norm": 0.37578001618385315, "learning_rate": 1.877083704229064e-05, "loss": 0.5583, "step": 11702 }, { "epoch": 0.321334431630972, "grad_norm": 0.37791958451271057, "learning_rate": 1.877062957948257e-05, "loss": 0.5222, "step": 11703 }, { "epoch": 0.3213618890719385, "grad_norm": 0.4049600064754486, "learning_rate": 1.8770422100314474e-05, "loss": 0.426, "step": 11704 }, { "epoch": 0.321389346512905, "grad_norm": 0.35453349351882935, "learning_rate": 1.877021460478674e-05, "loss": 0.5294, "step": 11705 }, { "epoch": 0.3214168039538715, "grad_norm": 0.34919509291648865, "learning_rate": 1.877000709289976e-05, "loss": 0.472, "step": 11706 }, { "epoch": 0.321444261394838, "grad_norm": 0.3662647604942322, "learning_rate": 1.876979956465392e-05, "loss": 0.5639, "step": 11707 }, { "epoch": 0.3214717188358045, "grad_norm": 0.3434661328792572, "learning_rate": 1.8769592020049597e-05, "loss": 0.4983, "step": 11708 }, { "epoch": 0.321499176276771, "grad_norm": 0.33679434657096863, "learning_rate": 1.8769384459087195e-05, "loss": 0.4651, "step": 11709 }, { "epoch": 0.3215266337177375, "grad_norm": 0.3306517004966736, "learning_rate": 1.8769176881767086e-05, "loss": 0.4706, "step": 11710 }, { "epoch": 0.32155409115870404, "grad_norm": 0.3731515407562256, "learning_rate": 1.8768969288089666e-05, "loss": 0.5993, "step": 11711 }, { "epoch": 0.32158154859967053, "grad_norm": 0.3641342520713806, "learning_rate": 1.8768761678055318e-05, "loss": 0.5438, "step": 11712 }, { "epoch": 0.321609006040637, "grad_norm": 0.3975144326686859, "learning_rate": 1.8768554051664432e-05, "loss": 0.541, "step": 11713 }, { "epoch": 0.3216364634816035, "grad_norm": 0.35624611377716064, "learning_rate": 1.8768346408917394e-05, "loss": 0.5627, "step": 11714 }, { "epoch": 0.32166392092257, "grad_norm": 0.3859911262989044, "learning_rate": 1.876813874981459e-05, "loss": 0.5357, "step": 11715 }, { "epoch": 0.3216913783635365, "grad_norm": 0.5796484351158142, "learning_rate": 1.8767931074356413e-05, "loss": 0.4921, "step": 11716 }, { "epoch": 0.321718835804503, "grad_norm": 0.3390295207500458, "learning_rate": 1.876772338254324e-05, "loss": 0.447, "step": 11717 }, { "epoch": 0.3217462932454695, "grad_norm": 0.35643672943115234, "learning_rate": 1.8767515674375466e-05, "loss": 0.4376, "step": 11718 }, { "epoch": 0.32177375068643604, "grad_norm": 0.3699345886707306, "learning_rate": 1.876730794985348e-05, "loss": 0.4272, "step": 11719 }, { "epoch": 0.32180120812740254, "grad_norm": 0.36987611651420593, "learning_rate": 1.8767100208977663e-05, "loss": 0.5553, "step": 11720 }, { "epoch": 0.32182866556836903, "grad_norm": 0.3499537706375122, "learning_rate": 1.876689245174841e-05, "loss": 0.5239, "step": 11721 }, { "epoch": 0.3218561230093355, "grad_norm": 0.37930670380592346, "learning_rate": 1.87666846781661e-05, "loss": 0.5478, "step": 11722 }, { "epoch": 0.321883580450302, "grad_norm": 0.32309725880622864, "learning_rate": 1.876647688823113e-05, "loss": 0.4614, "step": 11723 }, { "epoch": 0.3219110378912685, "grad_norm": 0.3830820918083191, "learning_rate": 1.876626908194388e-05, "loss": 0.5165, "step": 11724 }, { "epoch": 0.321938495332235, "grad_norm": 0.4565393030643463, "learning_rate": 1.876606125930474e-05, "loss": 0.5548, "step": 11725 }, { "epoch": 0.32196595277320156, "grad_norm": 4.418843746185303, "learning_rate": 1.87658534203141e-05, "loss": 0.5428, "step": 11726 }, { "epoch": 0.32199341021416805, "grad_norm": 0.3529362976551056, "learning_rate": 1.8765645564972343e-05, "loss": 0.4695, "step": 11727 }, { "epoch": 0.32202086765513455, "grad_norm": 0.3233625888824463, "learning_rate": 1.8765437693279858e-05, "loss": 0.5133, "step": 11728 }, { "epoch": 0.32204832509610104, "grad_norm": 0.35758453607559204, "learning_rate": 1.876522980523704e-05, "loss": 0.4867, "step": 11729 }, { "epoch": 0.32207578253706753, "grad_norm": 0.47603800892829895, "learning_rate": 1.8765021900844264e-05, "loss": 0.4853, "step": 11730 }, { "epoch": 0.32210323997803403, "grad_norm": 0.41322100162506104, "learning_rate": 1.876481398010193e-05, "loss": 0.4746, "step": 11731 }, { "epoch": 0.3221306974190005, "grad_norm": 0.36421751976013184, "learning_rate": 1.876460604301042e-05, "loss": 0.5048, "step": 11732 }, { "epoch": 0.32215815485996707, "grad_norm": 0.35288381576538086, "learning_rate": 1.876439808957012e-05, "loss": 0.5416, "step": 11733 }, { "epoch": 0.32218561230093357, "grad_norm": 0.38899943232536316, "learning_rate": 1.876419011978142e-05, "loss": 0.5479, "step": 11734 }, { "epoch": 0.32221306974190006, "grad_norm": 0.4016796946525574, "learning_rate": 1.8763982133644712e-05, "loss": 0.5336, "step": 11735 }, { "epoch": 0.32224052718286655, "grad_norm": 0.35294830799102783, "learning_rate": 1.8763774131160376e-05, "loss": 0.5248, "step": 11736 }, { "epoch": 0.32226798462383305, "grad_norm": 0.3345772325992584, "learning_rate": 1.8763566112328805e-05, "loss": 0.5239, "step": 11737 }, { "epoch": 0.32229544206479954, "grad_norm": 0.3485015630722046, "learning_rate": 1.8763358077150386e-05, "loss": 0.4791, "step": 11738 }, { "epoch": 0.32232289950576604, "grad_norm": 0.3693860173225403, "learning_rate": 1.876315002562551e-05, "loss": 0.5407, "step": 11739 }, { "epoch": 0.3223503569467326, "grad_norm": 0.4738292396068573, "learning_rate": 1.876294195775456e-05, "loss": 0.5216, "step": 11740 }, { "epoch": 0.3223778143876991, "grad_norm": 0.37546899914741516, "learning_rate": 1.876273387353793e-05, "loss": 0.5201, "step": 11741 }, { "epoch": 0.3224052718286656, "grad_norm": 0.3609587252140045, "learning_rate": 1.8762525772976e-05, "loss": 0.5452, "step": 11742 }, { "epoch": 0.32243272926963207, "grad_norm": 0.4050866365432739, "learning_rate": 1.8762317656069164e-05, "loss": 0.5468, "step": 11743 }, { "epoch": 0.32246018671059856, "grad_norm": 0.34959083795547485, "learning_rate": 1.876210952281781e-05, "loss": 0.619, "step": 11744 }, { "epoch": 0.32248764415156506, "grad_norm": 0.35286134481430054, "learning_rate": 1.8761901373222324e-05, "loss": 0.4111, "step": 11745 }, { "epoch": 0.32251510159253155, "grad_norm": 0.535861611366272, "learning_rate": 1.8761693207283095e-05, "loss": 0.5336, "step": 11746 }, { "epoch": 0.3225425590334981, "grad_norm": 0.3382166028022766, "learning_rate": 1.8761485025000515e-05, "loss": 0.4143, "step": 11747 }, { "epoch": 0.3225700164744646, "grad_norm": 0.35565534234046936, "learning_rate": 1.8761276826374966e-05, "loss": 0.4747, "step": 11748 }, { "epoch": 0.3225974739154311, "grad_norm": 0.36466243863105774, "learning_rate": 1.8761068611406838e-05, "loss": 0.51, "step": 11749 }, { "epoch": 0.3226249313563976, "grad_norm": 0.38052818179130554, "learning_rate": 1.8760860380096524e-05, "loss": 0.4849, "step": 11750 }, { "epoch": 0.3226523887973641, "grad_norm": 0.39832523465156555, "learning_rate": 1.876065213244441e-05, "loss": 0.5659, "step": 11751 }, { "epoch": 0.32267984623833057, "grad_norm": 0.34363284707069397, "learning_rate": 1.8760443868450882e-05, "loss": 0.5698, "step": 11752 }, { "epoch": 0.32270730367929706, "grad_norm": 0.37747445702552795, "learning_rate": 1.876023558811633e-05, "loss": 0.4953, "step": 11753 }, { "epoch": 0.3227347611202636, "grad_norm": 0.376833438873291, "learning_rate": 1.8760027291441144e-05, "loss": 0.5668, "step": 11754 }, { "epoch": 0.3227622185612301, "grad_norm": 0.34841740131378174, "learning_rate": 1.875981897842571e-05, "loss": 0.4654, "step": 11755 }, { "epoch": 0.3227896760021966, "grad_norm": 0.3922888934612274, "learning_rate": 1.875961064907042e-05, "loss": 0.5388, "step": 11756 }, { "epoch": 0.3228171334431631, "grad_norm": 0.3895992040634155, "learning_rate": 1.8759402303375656e-05, "loss": 0.5774, "step": 11757 }, { "epoch": 0.3228445908841296, "grad_norm": 0.3481987416744232, "learning_rate": 1.8759193941341815e-05, "loss": 0.4363, "step": 11758 }, { "epoch": 0.3228720483250961, "grad_norm": 0.3576910197734833, "learning_rate": 1.8758985562969278e-05, "loss": 0.5135, "step": 11759 }, { "epoch": 0.3228995057660626, "grad_norm": 0.36013075709342957, "learning_rate": 1.875877716825844e-05, "loss": 0.4949, "step": 11760 }, { "epoch": 0.3229269632070291, "grad_norm": 0.35083386301994324, "learning_rate": 1.8758568757209687e-05, "loss": 0.5537, "step": 11761 }, { "epoch": 0.3229544206479956, "grad_norm": 0.386028915643692, "learning_rate": 1.8758360329823405e-05, "loss": 0.5753, "step": 11762 }, { "epoch": 0.3229818780889621, "grad_norm": 0.3761747479438782, "learning_rate": 1.8758151886099993e-05, "loss": 0.5196, "step": 11763 }, { "epoch": 0.3230093355299286, "grad_norm": 0.40966475009918213, "learning_rate": 1.8757943426039825e-05, "loss": 0.4777, "step": 11764 }, { "epoch": 0.3230367929708951, "grad_norm": 0.3564780354499817, "learning_rate": 1.87577349496433e-05, "loss": 0.5493, "step": 11765 }, { "epoch": 0.3230642504118616, "grad_norm": 0.35177674889564514, "learning_rate": 1.8757526456910804e-05, "loss": 0.5447, "step": 11766 }, { "epoch": 0.3230917078528281, "grad_norm": 0.41459929943084717, "learning_rate": 1.8757317947842727e-05, "loss": 0.5504, "step": 11767 }, { "epoch": 0.32311916529379464, "grad_norm": 0.33840495347976685, "learning_rate": 1.8757109422439456e-05, "loss": 0.5479, "step": 11768 }, { "epoch": 0.32314662273476114, "grad_norm": 0.3994670808315277, "learning_rate": 1.8756900880701382e-05, "loss": 0.5682, "step": 11769 }, { "epoch": 0.32317408017572763, "grad_norm": 0.387246310710907, "learning_rate": 1.8756692322628887e-05, "loss": 0.4904, "step": 11770 }, { "epoch": 0.3232015376166941, "grad_norm": 0.37535202503204346, "learning_rate": 1.8756483748222372e-05, "loss": 0.5555, "step": 11771 }, { "epoch": 0.3232289950576606, "grad_norm": 0.4275708496570587, "learning_rate": 1.8756275157482223e-05, "loss": 0.5589, "step": 11772 }, { "epoch": 0.3232564524986271, "grad_norm": 0.3694626986980438, "learning_rate": 1.875606655040882e-05, "loss": 0.627, "step": 11773 }, { "epoch": 0.3232839099395936, "grad_norm": 0.36067405343055725, "learning_rate": 1.875585792700256e-05, "loss": 0.5178, "step": 11774 }, { "epoch": 0.32331136738056016, "grad_norm": 0.4087122976779938, "learning_rate": 1.8755649287263832e-05, "loss": 0.5461, "step": 11775 }, { "epoch": 0.32333882482152665, "grad_norm": 0.3742195963859558, "learning_rate": 1.8755440631193024e-05, "loss": 0.4426, "step": 11776 }, { "epoch": 0.32336628226249314, "grad_norm": 0.3824004530906677, "learning_rate": 1.875523195879052e-05, "loss": 0.5832, "step": 11777 }, { "epoch": 0.32339373970345964, "grad_norm": 0.41236451268196106, "learning_rate": 1.875502327005672e-05, "loss": 0.5411, "step": 11778 }, { "epoch": 0.32342119714442613, "grad_norm": 0.3324199616909027, "learning_rate": 1.8754814564992006e-05, "loss": 0.4746, "step": 11779 }, { "epoch": 0.3234486545853926, "grad_norm": 0.36813125014305115, "learning_rate": 1.8754605843596767e-05, "loss": 0.4913, "step": 11780 }, { "epoch": 0.3234761120263591, "grad_norm": 0.4062739908695221, "learning_rate": 1.875439710587139e-05, "loss": 0.5379, "step": 11781 }, { "epoch": 0.32350356946732567, "grad_norm": 0.35170477628707886, "learning_rate": 1.8754188351816274e-05, "loss": 0.5287, "step": 11782 }, { "epoch": 0.32353102690829216, "grad_norm": 0.3509424030780792, "learning_rate": 1.8753979581431804e-05, "loss": 0.4769, "step": 11783 }, { "epoch": 0.32355848434925866, "grad_norm": 0.44032424688339233, "learning_rate": 1.8753770794718364e-05, "loss": 0.563, "step": 11784 }, { "epoch": 0.32358594179022515, "grad_norm": 0.8945039510726929, "learning_rate": 1.875356199167635e-05, "loss": 0.5847, "step": 11785 }, { "epoch": 0.32361339923119165, "grad_norm": 0.35138463973999023, "learning_rate": 1.8753353172306146e-05, "loss": 0.4875, "step": 11786 }, { "epoch": 0.32364085667215814, "grad_norm": 0.4001033306121826, "learning_rate": 1.8753144336608148e-05, "loss": 0.5208, "step": 11787 }, { "epoch": 0.32366831411312463, "grad_norm": 0.4111853241920471, "learning_rate": 1.8752935484582742e-05, "loss": 0.5946, "step": 11788 }, { "epoch": 0.3236957715540912, "grad_norm": 0.3526361584663391, "learning_rate": 1.8752726616230315e-05, "loss": 0.5281, "step": 11789 }, { "epoch": 0.3237232289950577, "grad_norm": 0.35789182782173157, "learning_rate": 1.8752517731551264e-05, "loss": 0.5391, "step": 11790 }, { "epoch": 0.32375068643602417, "grad_norm": 0.3532872796058655, "learning_rate": 1.8752308830545968e-05, "loss": 0.4878, "step": 11791 }, { "epoch": 0.32377814387699067, "grad_norm": 0.36151787638664246, "learning_rate": 1.8752099913214827e-05, "loss": 0.4764, "step": 11792 }, { "epoch": 0.32380560131795716, "grad_norm": 0.38197311758995056, "learning_rate": 1.8751890979558226e-05, "loss": 0.5121, "step": 11793 }, { "epoch": 0.32383305875892365, "grad_norm": 0.38917815685272217, "learning_rate": 1.8751682029576554e-05, "loss": 0.5811, "step": 11794 }, { "epoch": 0.32386051619989015, "grad_norm": 0.41152122616767883, "learning_rate": 1.87514730632702e-05, "loss": 0.5, "step": 11795 }, { "epoch": 0.3238879736408567, "grad_norm": 0.3710826337337494, "learning_rate": 1.875126408063956e-05, "loss": 0.5365, "step": 11796 }, { "epoch": 0.3239154310818232, "grad_norm": 0.4517432451248169, "learning_rate": 1.8751055081685012e-05, "loss": 0.4836, "step": 11797 }, { "epoch": 0.3239428885227897, "grad_norm": 0.43861591815948486, "learning_rate": 1.875084606640696e-05, "loss": 0.5146, "step": 11798 }, { "epoch": 0.3239703459637562, "grad_norm": 0.3344217836856842, "learning_rate": 1.8750637034805784e-05, "loss": 0.4734, "step": 11799 }, { "epoch": 0.3239978034047227, "grad_norm": 0.35802844166755676, "learning_rate": 1.8750427986881878e-05, "loss": 0.501, "step": 11800 }, { "epoch": 0.32402526084568917, "grad_norm": 0.39263108372688293, "learning_rate": 1.8750218922635633e-05, "loss": 0.6072, "step": 11801 }, { "epoch": 0.32405271828665566, "grad_norm": 0.3624120056629181, "learning_rate": 1.8750009842067433e-05, "loss": 0.4195, "step": 11802 }, { "epoch": 0.3240801757276222, "grad_norm": 0.3647159934043884, "learning_rate": 1.8749800745177674e-05, "loss": 0.5131, "step": 11803 }, { "epoch": 0.3241076331685887, "grad_norm": 0.4189095199108124, "learning_rate": 1.8749591631966745e-05, "loss": 0.5016, "step": 11804 }, { "epoch": 0.3241350906095552, "grad_norm": 0.354525089263916, "learning_rate": 1.8749382502435033e-05, "loss": 0.4989, "step": 11805 }, { "epoch": 0.3241625480505217, "grad_norm": 0.3763374984264374, "learning_rate": 1.8749173356582933e-05, "loss": 0.5879, "step": 11806 }, { "epoch": 0.3241900054914882, "grad_norm": 0.37494030594825745, "learning_rate": 1.874896419441083e-05, "loss": 0.4589, "step": 11807 }, { "epoch": 0.3242174629324547, "grad_norm": 0.3371298015117645, "learning_rate": 1.8748755015919116e-05, "loss": 0.5154, "step": 11808 }, { "epoch": 0.3242449203734212, "grad_norm": 0.36820024251937866, "learning_rate": 1.8748545821108184e-05, "loss": 0.4428, "step": 11809 }, { "epoch": 0.3242723778143877, "grad_norm": 0.3441532254219055, "learning_rate": 1.874833660997842e-05, "loss": 0.5158, "step": 11810 }, { "epoch": 0.3242998352553542, "grad_norm": 0.3371216952800751, "learning_rate": 1.8748127382530218e-05, "loss": 0.4932, "step": 11811 }, { "epoch": 0.3243272926963207, "grad_norm": 0.3727260231971741, "learning_rate": 1.8747918138763963e-05, "loss": 0.5257, "step": 11812 }, { "epoch": 0.3243547501372872, "grad_norm": 0.3732303977012634, "learning_rate": 1.8747708878680052e-05, "loss": 0.5162, "step": 11813 }, { "epoch": 0.3243822075782537, "grad_norm": 0.35399365425109863, "learning_rate": 1.8747499602278868e-05, "loss": 0.5895, "step": 11814 }, { "epoch": 0.3244096650192202, "grad_norm": 0.3818722367286682, "learning_rate": 1.874729030956081e-05, "loss": 0.5476, "step": 11815 }, { "epoch": 0.3244371224601867, "grad_norm": 0.32948315143585205, "learning_rate": 1.874708100052626e-05, "loss": 0.4715, "step": 11816 }, { "epoch": 0.32446457990115324, "grad_norm": 0.350591242313385, "learning_rate": 1.8746871675175616e-05, "loss": 0.5568, "step": 11817 }, { "epoch": 0.32449203734211973, "grad_norm": 0.36386141180992126, "learning_rate": 1.8746662333509263e-05, "loss": 0.4461, "step": 11818 }, { "epoch": 0.3245194947830862, "grad_norm": 0.3573114275932312, "learning_rate": 1.874645297552759e-05, "loss": 0.5324, "step": 11819 }, { "epoch": 0.3245469522240527, "grad_norm": 0.37623730301856995, "learning_rate": 1.8746243601230994e-05, "loss": 0.4968, "step": 11820 }, { "epoch": 0.3245744096650192, "grad_norm": 0.3793499171733856, "learning_rate": 1.874603421061986e-05, "loss": 0.5165, "step": 11821 }, { "epoch": 0.3246018671059857, "grad_norm": 0.3535768985748291, "learning_rate": 1.8745824803694583e-05, "loss": 0.5636, "step": 11822 }, { "epoch": 0.3246293245469522, "grad_norm": 0.4491533935070038, "learning_rate": 1.874561538045555e-05, "loss": 0.4895, "step": 11823 }, { "epoch": 0.32465678198791875, "grad_norm": 0.3881840705871582, "learning_rate": 1.8745405940903153e-05, "loss": 0.5451, "step": 11824 }, { "epoch": 0.32468423942888525, "grad_norm": 0.39851683378219604, "learning_rate": 1.8745196485037785e-05, "loss": 0.5568, "step": 11825 }, { "epoch": 0.32471169686985174, "grad_norm": 0.3721083998680115, "learning_rate": 1.8744987012859832e-05, "loss": 0.5492, "step": 11826 }, { "epoch": 0.32473915431081823, "grad_norm": 0.3399198055267334, "learning_rate": 1.874477752436969e-05, "loss": 0.5165, "step": 11827 }, { "epoch": 0.32476661175178473, "grad_norm": 0.32275038957595825, "learning_rate": 1.8744568019567744e-05, "loss": 0.5055, "step": 11828 }, { "epoch": 0.3247940691927512, "grad_norm": 0.387845903635025, "learning_rate": 1.8744358498454388e-05, "loss": 0.4623, "step": 11829 }, { "epoch": 0.3248215266337177, "grad_norm": 0.35860076546669006, "learning_rate": 1.8744148961030013e-05, "loss": 0.5092, "step": 11830 }, { "epoch": 0.32484898407468427, "grad_norm": 0.3902624249458313, "learning_rate": 1.8743939407295012e-05, "loss": 0.5582, "step": 11831 }, { "epoch": 0.32487644151565076, "grad_norm": 0.46027377247810364, "learning_rate": 1.874372983724977e-05, "loss": 0.5612, "step": 11832 }, { "epoch": 0.32490389895661725, "grad_norm": 0.38625526428222656, "learning_rate": 1.874352025089468e-05, "loss": 0.4977, "step": 11833 }, { "epoch": 0.32493135639758375, "grad_norm": 0.36495640873908997, "learning_rate": 1.8743310648230135e-05, "loss": 0.4983, "step": 11834 }, { "epoch": 0.32495881383855024, "grad_norm": 0.3611765503883362, "learning_rate": 1.8743101029256528e-05, "loss": 0.5061, "step": 11835 }, { "epoch": 0.32498627127951674, "grad_norm": 0.36867478489875793, "learning_rate": 1.8742891393974246e-05, "loss": 0.5109, "step": 11836 }, { "epoch": 0.32501372872048323, "grad_norm": 0.3954559862613678, "learning_rate": 1.874268174238368e-05, "loss": 0.555, "step": 11837 }, { "epoch": 0.3250411861614498, "grad_norm": 0.3238357603549957, "learning_rate": 1.874247207448522e-05, "loss": 0.4128, "step": 11838 }, { "epoch": 0.3250686436024163, "grad_norm": 0.4470655024051666, "learning_rate": 1.874226239027926e-05, "loss": 0.5528, "step": 11839 }, { "epoch": 0.32509610104338277, "grad_norm": 0.41073381900787354, "learning_rate": 1.8742052689766197e-05, "loss": 0.5409, "step": 11840 }, { "epoch": 0.32512355848434926, "grad_norm": 0.35816171765327454, "learning_rate": 1.874184297294641e-05, "loss": 0.575, "step": 11841 }, { "epoch": 0.32515101592531576, "grad_norm": 0.34618017077445984, "learning_rate": 1.8741633239820297e-05, "loss": 0.5031, "step": 11842 }, { "epoch": 0.32517847336628225, "grad_norm": 0.34049302339553833, "learning_rate": 1.8741423490388247e-05, "loss": 0.4425, "step": 11843 }, { "epoch": 0.32520593080724874, "grad_norm": 0.731560468673706, "learning_rate": 1.8741213724650654e-05, "loss": 0.4997, "step": 11844 }, { "epoch": 0.3252333882482153, "grad_norm": 0.3937719464302063, "learning_rate": 1.8741003942607907e-05, "loss": 0.5367, "step": 11845 }, { "epoch": 0.3252608456891818, "grad_norm": 0.43563374876976013, "learning_rate": 1.8740794144260396e-05, "loss": 0.4869, "step": 11846 }, { "epoch": 0.3252883031301483, "grad_norm": 0.3960205018520355, "learning_rate": 1.8740584329608517e-05, "loss": 0.6345, "step": 11847 }, { "epoch": 0.3253157605711148, "grad_norm": 0.4458274841308594, "learning_rate": 1.8740374498652656e-05, "loss": 0.4927, "step": 11848 }, { "epoch": 0.32534321801208127, "grad_norm": 0.3444967269897461, "learning_rate": 1.8740164651393207e-05, "loss": 0.5716, "step": 11849 }, { "epoch": 0.32537067545304776, "grad_norm": 0.4570190906524658, "learning_rate": 1.8739954787830563e-05, "loss": 0.5786, "step": 11850 }, { "epoch": 0.32539813289401426, "grad_norm": 0.41920405626296997, "learning_rate": 1.8739744907965113e-05, "loss": 0.5582, "step": 11851 }, { "epoch": 0.32542559033498075, "grad_norm": 0.4723065197467804, "learning_rate": 1.873953501179725e-05, "loss": 0.5044, "step": 11852 }, { "epoch": 0.3254530477759473, "grad_norm": 0.3658095598220825, "learning_rate": 1.8739325099327366e-05, "loss": 0.5598, "step": 11853 }, { "epoch": 0.3254805052169138, "grad_norm": 0.47806867957115173, "learning_rate": 1.873911517055585e-05, "loss": 0.5429, "step": 11854 }, { "epoch": 0.3255079626578803, "grad_norm": 0.3586364984512329, "learning_rate": 1.8738905225483093e-05, "loss": 0.5933, "step": 11855 }, { "epoch": 0.3255354200988468, "grad_norm": 0.3627058267593384, "learning_rate": 1.873869526410949e-05, "loss": 0.606, "step": 11856 }, { "epoch": 0.3255628775398133, "grad_norm": 0.45916688442230225, "learning_rate": 1.8738485286435432e-05, "loss": 0.5252, "step": 11857 }, { "epoch": 0.32559033498077977, "grad_norm": 0.49303504824638367, "learning_rate": 1.873827529246131e-05, "loss": 0.552, "step": 11858 }, { "epoch": 0.32561779242174627, "grad_norm": 0.5051195621490479, "learning_rate": 1.8738065282187516e-05, "loss": 0.6283, "step": 11859 }, { "epoch": 0.3256452498627128, "grad_norm": 0.4401436150074005, "learning_rate": 1.873785525561444e-05, "loss": 0.5461, "step": 11860 }, { "epoch": 0.3256727073036793, "grad_norm": 0.35721680521965027, "learning_rate": 1.8737645212742474e-05, "loss": 0.4383, "step": 11861 }, { "epoch": 0.3257001647446458, "grad_norm": 1.262752890586853, "learning_rate": 1.8737435153572017e-05, "loss": 0.3915, "step": 11862 }, { "epoch": 0.3257276221856123, "grad_norm": 0.3267965316772461, "learning_rate": 1.873722507810345e-05, "loss": 0.4543, "step": 11863 }, { "epoch": 0.3257550796265788, "grad_norm": 0.35942745208740234, "learning_rate": 1.8737014986337167e-05, "loss": 0.5768, "step": 11864 }, { "epoch": 0.3257825370675453, "grad_norm": 0.32210710644721985, "learning_rate": 1.8736804878273566e-05, "loss": 0.5179, "step": 11865 }, { "epoch": 0.3258099945085118, "grad_norm": 0.3873448669910431, "learning_rate": 1.8736594753913038e-05, "loss": 0.5931, "step": 11866 }, { "epoch": 0.32583745194947833, "grad_norm": 0.3410918712615967, "learning_rate": 1.873638461325597e-05, "loss": 0.5366, "step": 11867 }, { "epoch": 0.3258649093904448, "grad_norm": 0.3944343328475952, "learning_rate": 1.8736174456302755e-05, "loss": 0.4895, "step": 11868 }, { "epoch": 0.3258923668314113, "grad_norm": 0.3842236399650574, "learning_rate": 1.8735964283053786e-05, "loss": 0.5166, "step": 11869 }, { "epoch": 0.3259198242723778, "grad_norm": 0.4130324721336365, "learning_rate": 1.8735754093509458e-05, "loss": 0.5286, "step": 11870 }, { "epoch": 0.3259472817133443, "grad_norm": 0.562262237071991, "learning_rate": 1.873554388767016e-05, "loss": 0.6011, "step": 11871 }, { "epoch": 0.3259747391543108, "grad_norm": 0.3272240459918976, "learning_rate": 1.873533366553628e-05, "loss": 0.5056, "step": 11872 }, { "epoch": 0.3260021965952773, "grad_norm": 0.3584408760070801, "learning_rate": 1.873512342710822e-05, "loss": 0.5003, "step": 11873 }, { "epoch": 0.32602965403624384, "grad_norm": 0.355514794588089, "learning_rate": 1.8734913172386368e-05, "loss": 0.4823, "step": 11874 }, { "epoch": 0.32605711147721034, "grad_norm": 0.3622104823589325, "learning_rate": 1.873470290137111e-05, "loss": 0.5491, "step": 11875 }, { "epoch": 0.32608456891817683, "grad_norm": 0.45517709851264954, "learning_rate": 1.8734492614062847e-05, "loss": 0.6096, "step": 11876 }, { "epoch": 0.3261120263591433, "grad_norm": 0.40321797132492065, "learning_rate": 1.8734282310461965e-05, "loss": 0.6014, "step": 11877 }, { "epoch": 0.3261394838001098, "grad_norm": 0.35896438360214233, "learning_rate": 1.873407199056886e-05, "loss": 0.5423, "step": 11878 }, { "epoch": 0.3261669412410763, "grad_norm": 0.34002968668937683, "learning_rate": 1.8733861654383923e-05, "loss": 0.511, "step": 11879 }, { "epoch": 0.3261943986820428, "grad_norm": 0.4070585370063782, "learning_rate": 1.873365130190755e-05, "loss": 0.5394, "step": 11880 }, { "epoch": 0.32622185612300936, "grad_norm": 0.3630584180355072, "learning_rate": 1.8733440933140127e-05, "loss": 0.4681, "step": 11881 }, { "epoch": 0.32624931356397585, "grad_norm": 0.38701823353767395, "learning_rate": 1.8733230548082048e-05, "loss": 0.4497, "step": 11882 }, { "epoch": 0.32627677100494235, "grad_norm": 0.3601362407207489, "learning_rate": 1.8733020146733706e-05, "loss": 0.4424, "step": 11883 }, { "epoch": 0.32630422844590884, "grad_norm": 0.3628718852996826, "learning_rate": 1.8732809729095498e-05, "loss": 0.5318, "step": 11884 }, { "epoch": 0.32633168588687533, "grad_norm": 0.40647241473197937, "learning_rate": 1.873259929516781e-05, "loss": 0.5951, "step": 11885 }, { "epoch": 0.3263591433278418, "grad_norm": 0.3834950923919678, "learning_rate": 1.8732388844951036e-05, "loss": 0.5224, "step": 11886 }, { "epoch": 0.3263866007688083, "grad_norm": 0.35455188155174255, "learning_rate": 1.8732178378445572e-05, "loss": 0.5697, "step": 11887 }, { "epoch": 0.32641405820977487, "grad_norm": 0.36037057638168335, "learning_rate": 1.8731967895651808e-05, "loss": 0.5764, "step": 11888 }, { "epoch": 0.32644151565074137, "grad_norm": 0.4315629303455353, "learning_rate": 1.8731757396570138e-05, "loss": 0.5663, "step": 11889 }, { "epoch": 0.32646897309170786, "grad_norm": 0.3512505292892456, "learning_rate": 1.8731546881200953e-05, "loss": 0.4588, "step": 11890 }, { "epoch": 0.32649643053267435, "grad_norm": 0.3286362290382385, "learning_rate": 1.8731336349544646e-05, "loss": 0.5014, "step": 11891 }, { "epoch": 0.32652388797364085, "grad_norm": 0.3424447178840637, "learning_rate": 1.873112580160161e-05, "loss": 0.4551, "step": 11892 }, { "epoch": 0.32655134541460734, "grad_norm": 0.3483246862888336, "learning_rate": 1.8730915237372237e-05, "loss": 0.4615, "step": 11893 }, { "epoch": 0.32657880285557384, "grad_norm": 0.380545437335968, "learning_rate": 1.8730704656856918e-05, "loss": 0.5952, "step": 11894 }, { "epoch": 0.3266062602965404, "grad_norm": 0.3417699933052063, "learning_rate": 1.8730494060056052e-05, "loss": 0.491, "step": 11895 }, { "epoch": 0.3266337177375069, "grad_norm": 0.3761958181858063, "learning_rate": 1.873028344697003e-05, "loss": 0.5725, "step": 11896 }, { "epoch": 0.3266611751784734, "grad_norm": 0.3180446922779083, "learning_rate": 1.873007281759924e-05, "loss": 0.4656, "step": 11897 }, { "epoch": 0.32668863261943987, "grad_norm": 0.3801502287387848, "learning_rate": 1.8729862171944074e-05, "loss": 0.5118, "step": 11898 }, { "epoch": 0.32671609006040636, "grad_norm": 0.35072335600852966, "learning_rate": 1.8729651510004935e-05, "loss": 0.4917, "step": 11899 }, { "epoch": 0.32674354750137286, "grad_norm": 0.39461833238601685, "learning_rate": 1.8729440831782207e-05, "loss": 0.5104, "step": 11900 }, { "epoch": 0.32677100494233935, "grad_norm": 0.3451365530490875, "learning_rate": 1.8729230137276287e-05, "loss": 0.521, "step": 11901 }, { "epoch": 0.3267984623833059, "grad_norm": 0.3301972448825836, "learning_rate": 1.8729019426487565e-05, "loss": 0.406, "step": 11902 }, { "epoch": 0.3268259198242724, "grad_norm": 0.34601786732673645, "learning_rate": 1.8728808699416437e-05, "loss": 0.501, "step": 11903 }, { "epoch": 0.3268533772652389, "grad_norm": 0.35476481914520264, "learning_rate": 1.8728597956063293e-05, "loss": 0.4915, "step": 11904 }, { "epoch": 0.3268808347062054, "grad_norm": 0.35447824001312256, "learning_rate": 1.8728387196428532e-05, "loss": 0.5726, "step": 11905 }, { "epoch": 0.3269082921471719, "grad_norm": 0.3805021047592163, "learning_rate": 1.872817642051254e-05, "loss": 0.6128, "step": 11906 }, { "epoch": 0.32693574958813837, "grad_norm": 0.34017351269721985, "learning_rate": 1.8727965628315713e-05, "loss": 0.4998, "step": 11907 }, { "epoch": 0.32696320702910486, "grad_norm": 0.3979431986808777, "learning_rate": 1.8727754819838448e-05, "loss": 0.6182, "step": 11908 }, { "epoch": 0.3269906644700714, "grad_norm": 0.41271457076072693, "learning_rate": 1.872754399508113e-05, "loss": 0.6475, "step": 11909 }, { "epoch": 0.3270181219110379, "grad_norm": 0.8775897026062012, "learning_rate": 1.872733315404416e-05, "loss": 0.5523, "step": 11910 }, { "epoch": 0.3270455793520044, "grad_norm": 0.4202950596809387, "learning_rate": 1.872712229672793e-05, "loss": 0.5234, "step": 11911 }, { "epoch": 0.3270730367929709, "grad_norm": 0.38990646600723267, "learning_rate": 1.8726911423132827e-05, "loss": 0.5206, "step": 11912 }, { "epoch": 0.3271004942339374, "grad_norm": 0.4083561897277832, "learning_rate": 1.872670053325925e-05, "loss": 0.5644, "step": 11913 }, { "epoch": 0.3271279516749039, "grad_norm": 0.3420838415622711, "learning_rate": 1.8726489627107593e-05, "loss": 0.5072, "step": 11914 }, { "epoch": 0.3271554091158704, "grad_norm": 0.4254245162010193, "learning_rate": 1.8726278704678246e-05, "loss": 0.545, "step": 11915 }, { "epoch": 0.3271828665568369, "grad_norm": 0.37963324785232544, "learning_rate": 1.8726067765971606e-05, "loss": 0.5568, "step": 11916 }, { "epoch": 0.3272103239978034, "grad_norm": 0.377331405878067, "learning_rate": 1.8725856810988063e-05, "loss": 0.5651, "step": 11917 }, { "epoch": 0.3272377814387699, "grad_norm": 0.37602683901786804, "learning_rate": 1.8725645839728012e-05, "loss": 0.5085, "step": 11918 }, { "epoch": 0.3272652388797364, "grad_norm": 0.43218210339546204, "learning_rate": 1.8725434852191847e-05, "loss": 0.631, "step": 11919 }, { "epoch": 0.3272926963207029, "grad_norm": 0.3711969256401062, "learning_rate": 1.8725223848379965e-05, "loss": 0.5169, "step": 11920 }, { "epoch": 0.3273201537616694, "grad_norm": 0.3311774730682373, "learning_rate": 1.872501282829275e-05, "loss": 0.5882, "step": 11921 }, { "epoch": 0.3273476112026359, "grad_norm": 0.3809075355529785, "learning_rate": 1.8724801791930605e-05, "loss": 0.5045, "step": 11922 }, { "epoch": 0.32737506864360244, "grad_norm": 0.35884279012680054, "learning_rate": 1.8724590739293917e-05, "loss": 0.4942, "step": 11923 }, { "epoch": 0.32740252608456893, "grad_norm": 0.3947276771068573, "learning_rate": 1.8724379670383083e-05, "loss": 0.6038, "step": 11924 }, { "epoch": 0.32742998352553543, "grad_norm": 0.36902132630348206, "learning_rate": 1.87241685851985e-05, "loss": 0.4684, "step": 11925 }, { "epoch": 0.3274574409665019, "grad_norm": 0.3667244017124176, "learning_rate": 1.8723957483740555e-05, "loss": 0.5358, "step": 11926 }, { "epoch": 0.3274848984074684, "grad_norm": 0.3329610228538513, "learning_rate": 1.8723746366009646e-05, "loss": 0.4196, "step": 11927 }, { "epoch": 0.3275123558484349, "grad_norm": 0.3810889422893524, "learning_rate": 1.8723535232006163e-05, "loss": 0.5472, "step": 11928 }, { "epoch": 0.3275398132894014, "grad_norm": 0.31712639331817627, "learning_rate": 1.8723324081730507e-05, "loss": 0.396, "step": 11929 }, { "epoch": 0.32756727073036795, "grad_norm": 0.39449983835220337, "learning_rate": 1.8723112915183063e-05, "loss": 0.5248, "step": 11930 }, { "epoch": 0.32759472817133445, "grad_norm": 0.3842149078845978, "learning_rate": 1.8722901732364235e-05, "loss": 0.5218, "step": 11931 }, { "epoch": 0.32762218561230094, "grad_norm": 0.3618023097515106, "learning_rate": 1.8722690533274405e-05, "loss": 0.4748, "step": 11932 }, { "epoch": 0.32764964305326744, "grad_norm": 0.32393065094947815, "learning_rate": 1.8722479317913977e-05, "loss": 0.3915, "step": 11933 }, { "epoch": 0.32767710049423393, "grad_norm": 0.41031593084335327, "learning_rate": 1.872226808628334e-05, "loss": 0.5697, "step": 11934 }, { "epoch": 0.3277045579352004, "grad_norm": 0.3688621520996094, "learning_rate": 1.872205683838289e-05, "loss": 0.5438, "step": 11935 }, { "epoch": 0.3277320153761669, "grad_norm": 0.40309765934944153, "learning_rate": 1.872184557421302e-05, "loss": 0.4713, "step": 11936 }, { "epoch": 0.32775947281713347, "grad_norm": 0.3495140075683594, "learning_rate": 1.8721634293774123e-05, "loss": 0.546, "step": 11937 }, { "epoch": 0.32778693025809996, "grad_norm": 0.3573471009731293, "learning_rate": 1.87214229970666e-05, "loss": 0.4585, "step": 11938 }, { "epoch": 0.32781438769906646, "grad_norm": 0.3342423439025879, "learning_rate": 1.872121168409083e-05, "loss": 0.4648, "step": 11939 }, { "epoch": 0.32784184514003295, "grad_norm": 0.379768967628479, "learning_rate": 1.8721000354847224e-05, "loss": 0.4978, "step": 11940 }, { "epoch": 0.32786930258099944, "grad_norm": 0.36495450139045715, "learning_rate": 1.8720789009336165e-05, "loss": 0.5121, "step": 11941 }, { "epoch": 0.32789676002196594, "grad_norm": 0.37773025035858154, "learning_rate": 1.8720577647558052e-05, "loss": 0.5196, "step": 11942 }, { "epoch": 0.32792421746293243, "grad_norm": 0.35105395317077637, "learning_rate": 1.872036626951328e-05, "loss": 0.6151, "step": 11943 }, { "epoch": 0.327951674903899, "grad_norm": 0.3671186566352844, "learning_rate": 1.8720154875202242e-05, "loss": 0.5802, "step": 11944 }, { "epoch": 0.3279791323448655, "grad_norm": 0.36610904335975647, "learning_rate": 1.871994346462533e-05, "loss": 0.4996, "step": 11945 }, { "epoch": 0.32800658978583197, "grad_norm": 0.41155996918678284, "learning_rate": 1.8719732037782945e-05, "loss": 0.531, "step": 11946 }, { "epoch": 0.32803404722679846, "grad_norm": 0.3811686635017395, "learning_rate": 1.871952059467547e-05, "loss": 0.5636, "step": 11947 }, { "epoch": 0.32806150466776496, "grad_norm": 0.3475353717803955, "learning_rate": 1.871930913530331e-05, "loss": 0.4653, "step": 11948 }, { "epoch": 0.32808896210873145, "grad_norm": 0.3832700550556183, "learning_rate": 1.8719097659666854e-05, "loss": 0.5519, "step": 11949 }, { "epoch": 0.32811641954969795, "grad_norm": 0.40531888604164124, "learning_rate": 1.8718886167766503e-05, "loss": 0.6075, "step": 11950 }, { "epoch": 0.3281438769906645, "grad_norm": 0.36197394132614136, "learning_rate": 1.8718674659602638e-05, "loss": 0.5517, "step": 11951 }, { "epoch": 0.328171334431631, "grad_norm": 0.37411078810691833, "learning_rate": 1.871846313517567e-05, "loss": 0.4971, "step": 11952 }, { "epoch": 0.3281987918725975, "grad_norm": 0.34037643671035767, "learning_rate": 1.871825159448598e-05, "loss": 0.5419, "step": 11953 }, { "epoch": 0.328226249313564, "grad_norm": 0.3751624524593353, "learning_rate": 1.871804003753397e-05, "loss": 0.5588, "step": 11954 }, { "epoch": 0.32825370675453047, "grad_norm": 0.32654792070388794, "learning_rate": 1.8717828464320035e-05, "loss": 0.5553, "step": 11955 }, { "epoch": 0.32828116419549697, "grad_norm": 0.3332383930683136, "learning_rate": 1.8717616874844565e-05, "loss": 0.4837, "step": 11956 }, { "epoch": 0.32830862163646346, "grad_norm": 0.3685525953769684, "learning_rate": 1.8717405269107956e-05, "loss": 0.6043, "step": 11957 }, { "epoch": 0.32833607907743, "grad_norm": 0.4895397424697876, "learning_rate": 1.871719364711061e-05, "loss": 0.5197, "step": 11958 }, { "epoch": 0.3283635365183965, "grad_norm": 0.37867289781570435, "learning_rate": 1.871698200885291e-05, "loss": 0.5401, "step": 11959 }, { "epoch": 0.328390993959363, "grad_norm": 0.3331539034843445, "learning_rate": 1.8716770354335256e-05, "loss": 0.4719, "step": 11960 }, { "epoch": 0.3284184514003295, "grad_norm": 0.3324986398220062, "learning_rate": 1.8716558683558046e-05, "loss": 0.486, "step": 11961 }, { "epoch": 0.328445908841296, "grad_norm": 0.3692602217197418, "learning_rate": 1.871634699652167e-05, "loss": 0.5442, "step": 11962 }, { "epoch": 0.3284733662822625, "grad_norm": 0.3447672724723816, "learning_rate": 1.8716135293226524e-05, "loss": 0.5574, "step": 11963 }, { "epoch": 0.328500823723229, "grad_norm": 0.4328417181968689, "learning_rate": 1.871592357367301e-05, "loss": 0.567, "step": 11964 }, { "epoch": 0.3285282811641955, "grad_norm": 0.41026797890663147, "learning_rate": 1.8715711837861513e-05, "loss": 0.5316, "step": 11965 }, { "epoch": 0.328555738605162, "grad_norm": 0.32474908232688904, "learning_rate": 1.8715500085792427e-05, "loss": 0.4859, "step": 11966 }, { "epoch": 0.3285831960461285, "grad_norm": 0.4551509916782379, "learning_rate": 1.8715288317466157e-05, "loss": 0.5501, "step": 11967 }, { "epoch": 0.328610653487095, "grad_norm": 0.3482462763786316, "learning_rate": 1.8715076532883092e-05, "loss": 0.521, "step": 11968 }, { "epoch": 0.3286381109280615, "grad_norm": 0.39024749398231506, "learning_rate": 1.8714864732043628e-05, "loss": 0.4831, "step": 11969 }, { "epoch": 0.328665568369028, "grad_norm": 0.39094290137290955, "learning_rate": 1.871465291494816e-05, "loss": 0.4918, "step": 11970 }, { "epoch": 0.3286930258099945, "grad_norm": 0.3801063001155853, "learning_rate": 1.871444108159708e-05, "loss": 0.5443, "step": 11971 }, { "epoch": 0.32872048325096104, "grad_norm": 0.3376005291938782, "learning_rate": 1.871422923199079e-05, "loss": 0.5127, "step": 11972 }, { "epoch": 0.32874794069192753, "grad_norm": 0.36973005533218384, "learning_rate": 1.871401736612968e-05, "loss": 0.5915, "step": 11973 }, { "epoch": 0.328775398132894, "grad_norm": 0.3496617376804352, "learning_rate": 1.8713805484014146e-05, "loss": 0.4672, "step": 11974 }, { "epoch": 0.3288028555738605, "grad_norm": 0.4258404076099396, "learning_rate": 1.8713593585644584e-05, "loss": 0.5443, "step": 11975 }, { "epoch": 0.328830313014827, "grad_norm": 0.36212247610092163, "learning_rate": 1.8713381671021385e-05, "loss": 0.5174, "step": 11976 }, { "epoch": 0.3288577704557935, "grad_norm": 0.32974421977996826, "learning_rate": 1.8713169740144952e-05, "loss": 0.5087, "step": 11977 }, { "epoch": 0.32888522789676, "grad_norm": 0.3839033842086792, "learning_rate": 1.871295779301568e-05, "loss": 0.5308, "step": 11978 }, { "epoch": 0.32891268533772655, "grad_norm": 0.3841021656990051, "learning_rate": 1.8712745829633956e-05, "loss": 0.5909, "step": 11979 }, { "epoch": 0.32894014277869305, "grad_norm": 0.3926509618759155, "learning_rate": 1.871253385000018e-05, "loss": 0.4797, "step": 11980 }, { "epoch": 0.32896760021965954, "grad_norm": 0.371751606464386, "learning_rate": 1.8712321854114747e-05, "loss": 0.4602, "step": 11981 }, { "epoch": 0.32899505766062603, "grad_norm": 0.3857334554195404, "learning_rate": 1.8712109841978056e-05, "loss": 0.5749, "step": 11982 }, { "epoch": 0.3290225151015925, "grad_norm": 0.34016844630241394, "learning_rate": 1.87118978135905e-05, "loss": 0.5286, "step": 11983 }, { "epoch": 0.329049972542559, "grad_norm": 0.48504170775413513, "learning_rate": 1.8711685768952472e-05, "loss": 0.5168, "step": 11984 }, { "epoch": 0.3290774299835255, "grad_norm": 0.3634660243988037, "learning_rate": 1.871147370806437e-05, "loss": 0.4876, "step": 11985 }, { "epoch": 0.329104887424492, "grad_norm": 0.42379602789878845, "learning_rate": 1.871126163092659e-05, "loss": 0.4987, "step": 11986 }, { "epoch": 0.32913234486545856, "grad_norm": 0.32686731219291687, "learning_rate": 1.8711049537539524e-05, "loss": 0.4696, "step": 11987 }, { "epoch": 0.32915980230642505, "grad_norm": 0.4843846261501312, "learning_rate": 1.8710837427903574e-05, "loss": 0.5487, "step": 11988 }, { "epoch": 0.32918725974739155, "grad_norm": 0.3684459328651428, "learning_rate": 1.871062530201913e-05, "loss": 0.4861, "step": 11989 }, { "epoch": 0.32921471718835804, "grad_norm": 0.4207896292209625, "learning_rate": 1.8710413159886588e-05, "loss": 0.5476, "step": 11990 }, { "epoch": 0.32924217462932454, "grad_norm": 0.3507305085659027, "learning_rate": 1.871020100150635e-05, "loss": 0.5349, "step": 11991 }, { "epoch": 0.32926963207029103, "grad_norm": 0.3623514175415039, "learning_rate": 1.8709988826878804e-05, "loss": 0.445, "step": 11992 }, { "epoch": 0.3292970895112575, "grad_norm": 0.4062873423099518, "learning_rate": 1.870977663600435e-05, "loss": 0.5573, "step": 11993 }, { "epoch": 0.3293245469522241, "grad_norm": 0.37034663558006287, "learning_rate": 1.8709564428883382e-05, "loss": 0.5369, "step": 11994 }, { "epoch": 0.32935200439319057, "grad_norm": 0.6620703339576721, "learning_rate": 1.8709352205516298e-05, "loss": 0.5132, "step": 11995 }, { "epoch": 0.32937946183415706, "grad_norm": 0.3737325370311737, "learning_rate": 1.8709139965903488e-05, "loss": 0.4526, "step": 11996 }, { "epoch": 0.32940691927512356, "grad_norm": 0.3490918278694153, "learning_rate": 1.8708927710045358e-05, "loss": 0.4534, "step": 11997 }, { "epoch": 0.32943437671609005, "grad_norm": 0.33697646856307983, "learning_rate": 1.8708715437942295e-05, "loss": 0.5137, "step": 11998 }, { "epoch": 0.32946183415705654, "grad_norm": 0.3654334843158722, "learning_rate": 1.8708503149594703e-05, "loss": 0.519, "step": 11999 }, { "epoch": 0.32948929159802304, "grad_norm": 0.30000773072242737, "learning_rate": 1.8708290845002966e-05, "loss": 0.4206, "step": 12000 }, { "epoch": 0.3295167490389896, "grad_norm": 0.4063723683357239, "learning_rate": 1.870807852416749e-05, "loss": 0.5022, "step": 12001 }, { "epoch": 0.3295442064799561, "grad_norm": 0.41489601135253906, "learning_rate": 1.8707866187088668e-05, "loss": 0.4791, "step": 12002 }, { "epoch": 0.3295716639209226, "grad_norm": 0.3383142352104187, "learning_rate": 1.87076538337669e-05, "loss": 0.4896, "step": 12003 }, { "epoch": 0.32959912136188907, "grad_norm": 0.39272210001945496, "learning_rate": 1.8707441464202575e-05, "loss": 0.5029, "step": 12004 }, { "epoch": 0.32962657880285556, "grad_norm": 0.40507009625434875, "learning_rate": 1.8707229078396093e-05, "loss": 0.5156, "step": 12005 }, { "epoch": 0.32965403624382206, "grad_norm": 0.44115784764289856, "learning_rate": 1.870701667634785e-05, "loss": 0.6101, "step": 12006 }, { "epoch": 0.32968149368478855, "grad_norm": 0.41089463233947754, "learning_rate": 1.8706804258058243e-05, "loss": 0.5663, "step": 12007 }, { "epoch": 0.3297089511257551, "grad_norm": 0.44675686955451965, "learning_rate": 1.8706591823527665e-05, "loss": 0.4388, "step": 12008 }, { "epoch": 0.3297364085667216, "grad_norm": 0.3847343623638153, "learning_rate": 1.8706379372756513e-05, "loss": 0.5279, "step": 12009 }, { "epoch": 0.3297638660076881, "grad_norm": 0.3951273560523987, "learning_rate": 1.870616690574519e-05, "loss": 0.5042, "step": 12010 }, { "epoch": 0.3297913234486546, "grad_norm": 0.35910746455192566, "learning_rate": 1.8705954422494082e-05, "loss": 0.5204, "step": 12011 }, { "epoch": 0.3298187808896211, "grad_norm": 0.40818873047828674, "learning_rate": 1.870574192300359e-05, "loss": 0.4511, "step": 12012 }, { "epoch": 0.32984623833058757, "grad_norm": 0.3781450092792511, "learning_rate": 1.8705529407274117e-05, "loss": 0.5801, "step": 12013 }, { "epoch": 0.32987369577155407, "grad_norm": 0.34624141454696655, "learning_rate": 1.870531687530605e-05, "loss": 0.4921, "step": 12014 }, { "epoch": 0.3299011532125206, "grad_norm": 0.4332934319972992, "learning_rate": 1.8705104327099786e-05, "loss": 0.5839, "step": 12015 }, { "epoch": 0.3299286106534871, "grad_norm": 0.38911232352256775, "learning_rate": 1.870489176265573e-05, "loss": 0.5462, "step": 12016 }, { "epoch": 0.3299560680944536, "grad_norm": 0.39207619428634644, "learning_rate": 1.8704679181974268e-05, "loss": 0.5065, "step": 12017 }, { "epoch": 0.3299835255354201, "grad_norm": 0.4410933554172516, "learning_rate": 1.87044665850558e-05, "loss": 0.5662, "step": 12018 }, { "epoch": 0.3300109829763866, "grad_norm": 0.3646087944507599, "learning_rate": 1.8704253971900726e-05, "loss": 0.5274, "step": 12019 }, { "epoch": 0.3300384404173531, "grad_norm": 0.32533836364746094, "learning_rate": 1.870404134250944e-05, "loss": 0.431, "step": 12020 }, { "epoch": 0.3300658978583196, "grad_norm": 0.3370291590690613, "learning_rate": 1.870382869688234e-05, "loss": 0.4744, "step": 12021 }, { "epoch": 0.33009335529928613, "grad_norm": 0.4749324917793274, "learning_rate": 1.8703616035019817e-05, "loss": 0.5805, "step": 12022 }, { "epoch": 0.3301208127402526, "grad_norm": 0.3739639222621918, "learning_rate": 1.870340335692228e-05, "loss": 0.5696, "step": 12023 }, { "epoch": 0.3301482701812191, "grad_norm": 0.37467482686042786, "learning_rate": 1.870319066259011e-05, "loss": 0.523, "step": 12024 }, { "epoch": 0.3301757276221856, "grad_norm": 0.3902188539505005, "learning_rate": 1.8702977952023715e-05, "loss": 0.5692, "step": 12025 }, { "epoch": 0.3302031850631521, "grad_norm": 0.34801313281059265, "learning_rate": 1.870276522522349e-05, "loss": 0.5901, "step": 12026 }, { "epoch": 0.3302306425041186, "grad_norm": 0.5108718276023865, "learning_rate": 1.8702552482189827e-05, "loss": 0.5245, "step": 12027 }, { "epoch": 0.3302580999450851, "grad_norm": 0.4149084687232971, "learning_rate": 1.8702339722923127e-05, "loss": 0.5488, "step": 12028 }, { "epoch": 0.33028555738605164, "grad_norm": 0.3081459701061249, "learning_rate": 1.870212694742379e-05, "loss": 0.4508, "step": 12029 }, { "epoch": 0.33031301482701814, "grad_norm": 0.3462482690811157, "learning_rate": 1.8701914155692207e-05, "loss": 0.455, "step": 12030 }, { "epoch": 0.33034047226798463, "grad_norm": 0.3526279926300049, "learning_rate": 1.8701701347728774e-05, "loss": 0.5595, "step": 12031 }, { "epoch": 0.3303679297089511, "grad_norm": 0.3455398380756378, "learning_rate": 1.870148852353389e-05, "loss": 0.4702, "step": 12032 }, { "epoch": 0.3303953871499176, "grad_norm": 0.3110741972923279, "learning_rate": 1.8701275683107957e-05, "loss": 0.4577, "step": 12033 }, { "epoch": 0.3304228445908841, "grad_norm": 0.39964115619659424, "learning_rate": 1.8701062826451367e-05, "loss": 0.5103, "step": 12034 }, { "epoch": 0.3304503020318506, "grad_norm": 0.46287909150123596, "learning_rate": 1.8700849953564514e-05, "loss": 0.5369, "step": 12035 }, { "epoch": 0.33047775947281716, "grad_norm": 0.3270765244960785, "learning_rate": 1.8700637064447803e-05, "loss": 0.4546, "step": 12036 }, { "epoch": 0.33050521691378365, "grad_norm": 0.3289569318294525, "learning_rate": 1.8700424159101624e-05, "loss": 0.4793, "step": 12037 }, { "epoch": 0.33053267435475014, "grad_norm": 0.3470202088356018, "learning_rate": 1.870021123752638e-05, "loss": 0.543, "step": 12038 }, { "epoch": 0.33056013179571664, "grad_norm": 0.3927311301231384, "learning_rate": 1.869999829972246e-05, "loss": 0.4478, "step": 12039 }, { "epoch": 0.33058758923668313, "grad_norm": 0.39717915654182434, "learning_rate": 1.8699785345690272e-05, "loss": 0.5784, "step": 12040 }, { "epoch": 0.3306150466776496, "grad_norm": 0.32537826895713806, "learning_rate": 1.8699572375430206e-05, "loss": 0.4518, "step": 12041 }, { "epoch": 0.3306425041186161, "grad_norm": 0.3489329218864441, "learning_rate": 1.869935938894266e-05, "loss": 0.4384, "step": 12042 }, { "epoch": 0.33066996155958267, "grad_norm": 0.4078299403190613, "learning_rate": 1.8699146386228035e-05, "loss": 0.4905, "step": 12043 }, { "epoch": 0.33069741900054916, "grad_norm": 0.3919115364551544, "learning_rate": 1.8698933367286722e-05, "loss": 0.5269, "step": 12044 }, { "epoch": 0.33072487644151566, "grad_norm": 0.37842345237731934, "learning_rate": 1.8698720332119124e-05, "loss": 0.5223, "step": 12045 }, { "epoch": 0.33075233388248215, "grad_norm": 0.35697075724601746, "learning_rate": 1.8698507280725634e-05, "loss": 0.5165, "step": 12046 }, { "epoch": 0.33077979132344865, "grad_norm": 0.3557453155517578, "learning_rate": 1.8698294213106653e-05, "loss": 0.5171, "step": 12047 }, { "epoch": 0.33080724876441514, "grad_norm": 0.40448036789894104, "learning_rate": 1.869808112926258e-05, "loss": 0.5101, "step": 12048 }, { "epoch": 0.33083470620538163, "grad_norm": 0.3700740337371826, "learning_rate": 1.8697868029193805e-05, "loss": 0.4455, "step": 12049 }, { "epoch": 0.3308621636463482, "grad_norm": 0.3457902669906616, "learning_rate": 1.8697654912900733e-05, "loss": 0.4835, "step": 12050 }, { "epoch": 0.3308896210873147, "grad_norm": 0.402881383895874, "learning_rate": 1.8697441780383757e-05, "loss": 0.5527, "step": 12051 }, { "epoch": 0.33091707852828117, "grad_norm": 0.3516930341720581, "learning_rate": 1.8697228631643275e-05, "loss": 0.5179, "step": 12052 }, { "epoch": 0.33094453596924767, "grad_norm": 0.35406506061553955, "learning_rate": 1.869701546667969e-05, "loss": 0.5472, "step": 12053 }, { "epoch": 0.33097199341021416, "grad_norm": 0.4205666184425354, "learning_rate": 1.8696802285493392e-05, "loss": 0.6205, "step": 12054 }, { "epoch": 0.33099945085118065, "grad_norm": 0.3455674946308136, "learning_rate": 1.8696589088084786e-05, "loss": 0.5433, "step": 12055 }, { "epoch": 0.33102690829214715, "grad_norm": 0.3698217272758484, "learning_rate": 1.869637587445426e-05, "loss": 0.4609, "step": 12056 }, { "epoch": 0.3310543657331137, "grad_norm": 0.391926646232605, "learning_rate": 1.8696162644602222e-05, "loss": 0.5282, "step": 12057 }, { "epoch": 0.3310818231740802, "grad_norm": 0.37686607241630554, "learning_rate": 1.8695949398529062e-05, "loss": 0.5662, "step": 12058 }, { "epoch": 0.3311092806150467, "grad_norm": 0.3250347673892975, "learning_rate": 1.8695736136235183e-05, "loss": 0.5073, "step": 12059 }, { "epoch": 0.3311367380560132, "grad_norm": 0.3344913423061371, "learning_rate": 1.869552285772098e-05, "loss": 0.4769, "step": 12060 }, { "epoch": 0.3311641954969797, "grad_norm": 0.3916247487068176, "learning_rate": 1.869530956298685e-05, "loss": 0.5883, "step": 12061 }, { "epoch": 0.33119165293794617, "grad_norm": 0.38963887095451355, "learning_rate": 1.8695096252033196e-05, "loss": 0.557, "step": 12062 }, { "epoch": 0.33121911037891266, "grad_norm": 0.35012879967689514, "learning_rate": 1.8694882924860408e-05, "loss": 0.5076, "step": 12063 }, { "epoch": 0.3312465678198792, "grad_norm": 0.4036499559879303, "learning_rate": 1.869466958146889e-05, "loss": 0.5589, "step": 12064 }, { "epoch": 0.3312740252608457, "grad_norm": 0.4382277727127075, "learning_rate": 1.8694456221859042e-05, "loss": 0.6802, "step": 12065 }, { "epoch": 0.3313014827018122, "grad_norm": 0.593174934387207, "learning_rate": 1.8694242846031256e-05, "loss": 0.496, "step": 12066 }, { "epoch": 0.3313289401427787, "grad_norm": 0.3283044993877411, "learning_rate": 1.869402945398593e-05, "loss": 0.492, "step": 12067 }, { "epoch": 0.3313563975837452, "grad_norm": 0.4231805205345154, "learning_rate": 1.8693816045723466e-05, "loss": 0.5351, "step": 12068 }, { "epoch": 0.3313838550247117, "grad_norm": 0.37303122878074646, "learning_rate": 1.869360262124426e-05, "loss": 0.6117, "step": 12069 }, { "epoch": 0.3314113124656782, "grad_norm": 0.3678549528121948, "learning_rate": 1.869338918054871e-05, "loss": 0.5225, "step": 12070 }, { "epoch": 0.3314387699066447, "grad_norm": 0.40193289518356323, "learning_rate": 1.8693175723637215e-05, "loss": 0.4503, "step": 12071 }, { "epoch": 0.3314662273476112, "grad_norm": 0.5912889838218689, "learning_rate": 1.8692962250510175e-05, "loss": 0.5612, "step": 12072 }, { "epoch": 0.3314936847885777, "grad_norm": 0.33107373118400574, "learning_rate": 1.8692748761167984e-05, "loss": 0.5548, "step": 12073 }, { "epoch": 0.3315211422295442, "grad_norm": 0.39332205057144165, "learning_rate": 1.869253525561104e-05, "loss": 0.4967, "step": 12074 }, { "epoch": 0.3315485996705107, "grad_norm": 0.3109099268913269, "learning_rate": 1.8692321733839745e-05, "loss": 0.443, "step": 12075 }, { "epoch": 0.3315760571114772, "grad_norm": 0.3873143196105957, "learning_rate": 1.8692108195854497e-05, "loss": 0.4905, "step": 12076 }, { "epoch": 0.3316035145524437, "grad_norm": 0.3872961401939392, "learning_rate": 1.869189464165569e-05, "loss": 0.5468, "step": 12077 }, { "epoch": 0.33163097199341024, "grad_norm": 0.3680601418018341, "learning_rate": 1.869168107124373e-05, "loss": 0.4542, "step": 12078 }, { "epoch": 0.33165842943437673, "grad_norm": 0.349619060754776, "learning_rate": 1.8691467484619003e-05, "loss": 0.5134, "step": 12079 }, { "epoch": 0.3316858868753432, "grad_norm": 0.34198543429374695, "learning_rate": 1.8691253881781925e-05, "loss": 0.4896, "step": 12080 }, { "epoch": 0.3317133443163097, "grad_norm": 0.4209119379520416, "learning_rate": 1.8691040262732877e-05, "loss": 0.5358, "step": 12081 }, { "epoch": 0.3317408017572762, "grad_norm": 0.3897978961467743, "learning_rate": 1.8690826627472268e-05, "loss": 0.4977, "step": 12082 }, { "epoch": 0.3317682591982427, "grad_norm": 0.37333399057388306, "learning_rate": 1.8690612976000493e-05, "loss": 0.6173, "step": 12083 }, { "epoch": 0.3317957166392092, "grad_norm": 0.37462806701660156, "learning_rate": 1.8690399308317954e-05, "loss": 0.5824, "step": 12084 }, { "epoch": 0.33182317408017575, "grad_norm": 0.44185304641723633, "learning_rate": 1.869018562442504e-05, "loss": 0.5988, "step": 12085 }, { "epoch": 0.33185063152114225, "grad_norm": 0.29703062772750854, "learning_rate": 1.8689971924322162e-05, "loss": 0.4688, "step": 12086 }, { "epoch": 0.33187808896210874, "grad_norm": 0.4677489995956421, "learning_rate": 1.8689758208009713e-05, "loss": 0.5246, "step": 12087 }, { "epoch": 0.33190554640307524, "grad_norm": 0.370398610830307, "learning_rate": 1.868954447548809e-05, "loss": 0.5618, "step": 12088 }, { "epoch": 0.33193300384404173, "grad_norm": 0.3788541257381439, "learning_rate": 1.8689330726757687e-05, "loss": 0.5289, "step": 12089 }, { "epoch": 0.3319604612850082, "grad_norm": 0.35506099462509155, "learning_rate": 1.8689116961818916e-05, "loss": 0.5538, "step": 12090 }, { "epoch": 0.3319879187259747, "grad_norm": 0.33543774485588074, "learning_rate": 1.868890318067217e-05, "loss": 0.6239, "step": 12091 }, { "epoch": 0.33201537616694127, "grad_norm": 0.34431928396224976, "learning_rate": 1.8688689383317844e-05, "loss": 0.5264, "step": 12092 }, { "epoch": 0.33204283360790776, "grad_norm": 0.33699700236320496, "learning_rate": 1.868847556975634e-05, "loss": 0.5364, "step": 12093 }, { "epoch": 0.33207029104887426, "grad_norm": 0.36346331238746643, "learning_rate": 1.8688261739988053e-05, "loss": 0.5381, "step": 12094 }, { "epoch": 0.33209774848984075, "grad_norm": 0.3682243824005127, "learning_rate": 1.8688047894013386e-05, "loss": 0.503, "step": 12095 }, { "epoch": 0.33212520593080724, "grad_norm": 0.36588090658187866, "learning_rate": 1.8687834031832735e-05, "loss": 0.5189, "step": 12096 }, { "epoch": 0.33215266337177374, "grad_norm": 0.36555665731430054, "learning_rate": 1.8687620153446505e-05, "loss": 0.5454, "step": 12097 }, { "epoch": 0.33218012081274023, "grad_norm": 0.3560894727706909, "learning_rate": 1.868740625885509e-05, "loss": 0.546, "step": 12098 }, { "epoch": 0.3322075782537068, "grad_norm": 0.35019826889038086, "learning_rate": 1.8687192348058887e-05, "loss": 0.4706, "step": 12099 }, { "epoch": 0.3322350356946733, "grad_norm": 0.38681304454803467, "learning_rate": 1.86869784210583e-05, "loss": 0.5708, "step": 12100 }, { "epoch": 0.33226249313563977, "grad_norm": 0.4226163327693939, "learning_rate": 1.8686764477853726e-05, "loss": 0.5694, "step": 12101 }, { "epoch": 0.33228995057660626, "grad_norm": 0.33914461731910706, "learning_rate": 1.868655051844556e-05, "loss": 0.5094, "step": 12102 }, { "epoch": 0.33231740801757276, "grad_norm": 0.3543728291988373, "learning_rate": 1.868633654283421e-05, "loss": 0.5009, "step": 12103 }, { "epoch": 0.33234486545853925, "grad_norm": 0.45445477962493896, "learning_rate": 1.8686122551020066e-05, "loss": 0.5518, "step": 12104 }, { "epoch": 0.33237232289950575, "grad_norm": 0.34171491861343384, "learning_rate": 1.8685908543003534e-05, "loss": 0.4789, "step": 12105 }, { "epoch": 0.3323997803404723, "grad_norm": 0.3719686269760132, "learning_rate": 1.868569451878501e-05, "loss": 0.6077, "step": 12106 }, { "epoch": 0.3324272377814388, "grad_norm": 0.40135595202445984, "learning_rate": 1.8685480478364894e-05, "loss": 0.4451, "step": 12107 }, { "epoch": 0.3324546952224053, "grad_norm": 0.36429738998413086, "learning_rate": 1.868526642174358e-05, "loss": 0.5931, "step": 12108 }, { "epoch": 0.3324821526633718, "grad_norm": 0.3630122244358063, "learning_rate": 1.8685052348921474e-05, "loss": 0.5478, "step": 12109 }, { "epoch": 0.33250961010433827, "grad_norm": 0.36155635118484497, "learning_rate": 1.8684838259898977e-05, "loss": 0.4655, "step": 12110 }, { "epoch": 0.33253706754530477, "grad_norm": 0.41808179020881653, "learning_rate": 1.868462415467648e-05, "loss": 0.4833, "step": 12111 }, { "epoch": 0.33256452498627126, "grad_norm": 0.38781073689460754, "learning_rate": 1.8684410033254392e-05, "loss": 0.4461, "step": 12112 }, { "epoch": 0.3325919824272378, "grad_norm": 0.40093326568603516, "learning_rate": 1.8684195895633105e-05, "loss": 0.5435, "step": 12113 }, { "epoch": 0.3326194398682043, "grad_norm": 1.5812658071517944, "learning_rate": 1.868398174181302e-05, "loss": 0.5875, "step": 12114 }, { "epoch": 0.3326468973091708, "grad_norm": 0.351866215467453, "learning_rate": 1.868376757179454e-05, "loss": 0.4528, "step": 12115 }, { "epoch": 0.3326743547501373, "grad_norm": 0.42574647068977356, "learning_rate": 1.868355338557806e-05, "loss": 0.5771, "step": 12116 }, { "epoch": 0.3327018121911038, "grad_norm": 0.352555513381958, "learning_rate": 1.8683339183163985e-05, "loss": 0.5393, "step": 12117 }, { "epoch": 0.3327292696320703, "grad_norm": 0.2993614673614502, "learning_rate": 1.8683124964552707e-05, "loss": 0.4646, "step": 12118 }, { "epoch": 0.3327567270730368, "grad_norm": 0.36872532963752747, "learning_rate": 1.868291072974463e-05, "loss": 0.5603, "step": 12119 }, { "epoch": 0.33278418451400327, "grad_norm": 0.3609529733657837, "learning_rate": 1.8682696478740154e-05, "loss": 0.6538, "step": 12120 }, { "epoch": 0.3328116419549698, "grad_norm": 0.36905625462532043, "learning_rate": 1.8682482211539675e-05, "loss": 0.5579, "step": 12121 }, { "epoch": 0.3328390993959363, "grad_norm": 0.3709834814071655, "learning_rate": 1.86822679281436e-05, "loss": 0.4971, "step": 12122 }, { "epoch": 0.3328665568369028, "grad_norm": 0.3465105891227722, "learning_rate": 1.8682053628552325e-05, "loss": 0.5359, "step": 12123 }, { "epoch": 0.3328940142778693, "grad_norm": 0.3874542713165283, "learning_rate": 1.8681839312766246e-05, "loss": 0.5046, "step": 12124 }, { "epoch": 0.3329214717188358, "grad_norm": 0.3714902400970459, "learning_rate": 1.8681624980785765e-05, "loss": 0.4783, "step": 12125 }, { "epoch": 0.3329489291598023, "grad_norm": 0.3609793782234192, "learning_rate": 1.8681410632611284e-05, "loss": 0.5694, "step": 12126 }, { "epoch": 0.3329763866007688, "grad_norm": 0.3704984188079834, "learning_rate": 1.8681196268243204e-05, "loss": 0.561, "step": 12127 }, { "epoch": 0.33300384404173533, "grad_norm": 0.35756394267082214, "learning_rate": 1.8680981887681917e-05, "loss": 0.4623, "step": 12128 }, { "epoch": 0.3330313014827018, "grad_norm": 0.3672991096973419, "learning_rate": 1.868076749092783e-05, "loss": 0.4207, "step": 12129 }, { "epoch": 0.3330587589236683, "grad_norm": 0.37532728910446167, "learning_rate": 1.8680553077981343e-05, "loss": 0.5006, "step": 12130 }, { "epoch": 0.3330862163646348, "grad_norm": 0.42688310146331787, "learning_rate": 1.8680338648842852e-05, "loss": 0.5345, "step": 12131 }, { "epoch": 0.3331136738056013, "grad_norm": 0.346250057220459, "learning_rate": 1.868012420351276e-05, "loss": 0.4552, "step": 12132 }, { "epoch": 0.3331411312465678, "grad_norm": 0.44948047399520874, "learning_rate": 1.8679909741991465e-05, "loss": 0.5806, "step": 12133 }, { "epoch": 0.3331685886875343, "grad_norm": 0.36561450362205505, "learning_rate": 1.8679695264279368e-05, "loss": 0.5347, "step": 12134 }, { "epoch": 0.33319604612850084, "grad_norm": 0.4279904067516327, "learning_rate": 1.8679480770376874e-05, "loss": 0.5833, "step": 12135 }, { "epoch": 0.33322350356946734, "grad_norm": 0.3907735347747803, "learning_rate": 1.867926626028437e-05, "loss": 0.5113, "step": 12136 }, { "epoch": 0.33325096101043383, "grad_norm": 0.4309743046760559, "learning_rate": 1.867905173400227e-05, "loss": 0.5348, "step": 12137 }, { "epoch": 0.3332784184514003, "grad_norm": 0.4029633700847626, "learning_rate": 1.8678837191530967e-05, "loss": 0.6536, "step": 12138 }, { "epoch": 0.3333058758923668, "grad_norm": 0.3549163341522217, "learning_rate": 1.8678622632870863e-05, "loss": 0.5534, "step": 12139 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5452627539634705, "learning_rate": 1.867840805802236e-05, "loss": 0.5361, "step": 12140 }, { "epoch": 0.3333607907742998, "grad_norm": 0.4094119966030121, "learning_rate": 1.8678193466985853e-05, "loss": 0.5425, "step": 12141 }, { "epoch": 0.33338824821526636, "grad_norm": 0.3872023820877075, "learning_rate": 1.8677978859761745e-05, "loss": 0.5013, "step": 12142 }, { "epoch": 0.33341570565623285, "grad_norm": 0.38617101311683655, "learning_rate": 1.8677764236350437e-05, "loss": 0.5312, "step": 12143 }, { "epoch": 0.33344316309719935, "grad_norm": 0.3354259133338928, "learning_rate": 1.867754959675233e-05, "loss": 0.5445, "step": 12144 }, { "epoch": 0.33347062053816584, "grad_norm": 0.6444119811058044, "learning_rate": 1.867733494096782e-05, "loss": 0.5703, "step": 12145 }, { "epoch": 0.33349807797913233, "grad_norm": 0.3976154923439026, "learning_rate": 1.8677120268997317e-05, "loss": 0.5792, "step": 12146 }, { "epoch": 0.33352553542009883, "grad_norm": 0.3712121546268463, "learning_rate": 1.8676905580841213e-05, "loss": 0.603, "step": 12147 }, { "epoch": 0.3335529928610653, "grad_norm": 1.1602619886398315, "learning_rate": 1.867669087649991e-05, "loss": 0.5124, "step": 12148 }, { "epoch": 0.3335804503020319, "grad_norm": 0.3670958876609802, "learning_rate": 1.867647615597381e-05, "loss": 0.5187, "step": 12149 }, { "epoch": 0.33360790774299837, "grad_norm": 0.3441571891307831, "learning_rate": 1.8676261419263314e-05, "loss": 0.4445, "step": 12150 }, { "epoch": 0.33363536518396486, "grad_norm": 0.3844088912010193, "learning_rate": 1.867604666636882e-05, "loss": 0.4717, "step": 12151 }, { "epoch": 0.33366282262493135, "grad_norm": 0.3463568389415741, "learning_rate": 1.867583189729073e-05, "loss": 0.5081, "step": 12152 }, { "epoch": 0.33369028006589785, "grad_norm": 0.37555140256881714, "learning_rate": 1.867561711202944e-05, "loss": 0.5815, "step": 12153 }, { "epoch": 0.33371773750686434, "grad_norm": 0.3874824047088623, "learning_rate": 1.8675402310585358e-05, "loss": 0.6192, "step": 12154 }, { "epoch": 0.33374519494783084, "grad_norm": 0.3401980400085449, "learning_rate": 1.8675187492958886e-05, "loss": 0.4827, "step": 12155 }, { "epoch": 0.3337726523887974, "grad_norm": 0.3529663681983948, "learning_rate": 1.8674972659150414e-05, "loss": 0.5637, "step": 12156 }, { "epoch": 0.3338001098297639, "grad_norm": 0.3410813510417938, "learning_rate": 1.8674757809160354e-05, "loss": 0.533, "step": 12157 }, { "epoch": 0.3338275672707304, "grad_norm": 0.39642900228500366, "learning_rate": 1.8674542942989102e-05, "loss": 0.5631, "step": 12158 }, { "epoch": 0.33385502471169687, "grad_norm": 0.5239543914794922, "learning_rate": 1.8674328060637058e-05, "loss": 0.5568, "step": 12159 }, { "epoch": 0.33388248215266336, "grad_norm": 0.34486326575279236, "learning_rate": 1.867411316210462e-05, "loss": 0.406, "step": 12160 }, { "epoch": 0.33390993959362986, "grad_norm": 0.3769332468509674, "learning_rate": 1.8673898247392197e-05, "loss": 0.5526, "step": 12161 }, { "epoch": 0.33393739703459635, "grad_norm": 0.3466017246246338, "learning_rate": 1.8673683316500184e-05, "loss": 0.5712, "step": 12162 }, { "epoch": 0.3339648544755629, "grad_norm": 0.4155208170413971, "learning_rate": 1.867346836942898e-05, "loss": 0.5157, "step": 12163 }, { "epoch": 0.3339923119165294, "grad_norm": 0.48610249161720276, "learning_rate": 1.8673253406178994e-05, "loss": 0.56, "step": 12164 }, { "epoch": 0.3340197693574959, "grad_norm": 0.3727554380893707, "learning_rate": 1.8673038426750625e-05, "loss": 0.5691, "step": 12165 }, { "epoch": 0.3340472267984624, "grad_norm": 0.39319634437561035, "learning_rate": 1.8672823431144264e-05, "loss": 0.5632, "step": 12166 }, { "epoch": 0.3340746842394289, "grad_norm": 0.4360782504081726, "learning_rate": 1.8672608419360323e-05, "loss": 0.566, "step": 12167 }, { "epoch": 0.33410214168039537, "grad_norm": 0.37296417355537415, "learning_rate": 1.86723933913992e-05, "loss": 0.5455, "step": 12168 }, { "epoch": 0.33412959912136186, "grad_norm": 0.35333916544914246, "learning_rate": 1.8672178347261293e-05, "loss": 0.4477, "step": 12169 }, { "epoch": 0.3341570565623284, "grad_norm": 0.3674434721469879, "learning_rate": 1.867196328694701e-05, "loss": 0.5058, "step": 12170 }, { "epoch": 0.3341845140032949, "grad_norm": 0.3275064527988434, "learning_rate": 1.867174821045674e-05, "loss": 0.474, "step": 12171 }, { "epoch": 0.3342119714442614, "grad_norm": 0.354155570268631, "learning_rate": 1.8671533117790898e-05, "loss": 0.529, "step": 12172 }, { "epoch": 0.3342394288852279, "grad_norm": 0.35658782720565796, "learning_rate": 1.8671318008949873e-05, "loss": 0.5338, "step": 12173 }, { "epoch": 0.3342668863261944, "grad_norm": 0.36811596155166626, "learning_rate": 1.867110288393408e-05, "loss": 0.6117, "step": 12174 }, { "epoch": 0.3342943437671609, "grad_norm": 0.37824440002441406, "learning_rate": 1.867088774274391e-05, "loss": 0.5102, "step": 12175 }, { "epoch": 0.3343218012081274, "grad_norm": 0.3602931797504425, "learning_rate": 1.8670672585379764e-05, "loss": 0.5587, "step": 12176 }, { "epoch": 0.33434925864909393, "grad_norm": 0.41337916254997253, "learning_rate": 1.8670457411842048e-05, "loss": 0.5076, "step": 12177 }, { "epoch": 0.3343767160900604, "grad_norm": 0.43571823835372925, "learning_rate": 1.8670242222131163e-05, "loss": 0.5164, "step": 12178 }, { "epoch": 0.3344041735310269, "grad_norm": 0.34395861625671387, "learning_rate": 1.8670027016247508e-05, "loss": 0.4734, "step": 12179 }, { "epoch": 0.3344316309719934, "grad_norm": 0.3572176396846771, "learning_rate": 1.8669811794191487e-05, "loss": 0.5064, "step": 12180 }, { "epoch": 0.3344590884129599, "grad_norm": 0.42073866724967957, "learning_rate": 1.86695965559635e-05, "loss": 0.5437, "step": 12181 }, { "epoch": 0.3344865458539264, "grad_norm": 0.35572758316993713, "learning_rate": 1.8669381301563945e-05, "loss": 0.5326, "step": 12182 }, { "epoch": 0.3345140032948929, "grad_norm": 0.3477490544319153, "learning_rate": 1.8669166030993228e-05, "loss": 0.5448, "step": 12183 }, { "epoch": 0.33454146073585944, "grad_norm": 0.3768146336078644, "learning_rate": 1.866895074425175e-05, "loss": 0.5274, "step": 12184 }, { "epoch": 0.33456891817682594, "grad_norm": 0.35373422503471375, "learning_rate": 1.8668735441339912e-05, "loss": 0.5015, "step": 12185 }, { "epoch": 0.33459637561779243, "grad_norm": 0.36402344703674316, "learning_rate": 1.8668520122258117e-05, "loss": 0.499, "step": 12186 }, { "epoch": 0.3346238330587589, "grad_norm": 0.37815454602241516, "learning_rate": 1.8668304787006765e-05, "loss": 0.4702, "step": 12187 }, { "epoch": 0.3346512904997254, "grad_norm": 0.35976994037628174, "learning_rate": 1.8668089435586254e-05, "loss": 0.5459, "step": 12188 }, { "epoch": 0.3346787479406919, "grad_norm": 0.4207213521003723, "learning_rate": 1.8667874067996993e-05, "loss": 0.6455, "step": 12189 }, { "epoch": 0.3347062053816584, "grad_norm": 0.36147817969322205, "learning_rate": 1.8667658684239384e-05, "loss": 0.5196, "step": 12190 }, { "epoch": 0.33473366282262496, "grad_norm": 0.4904122054576874, "learning_rate": 1.866744328431382e-05, "loss": 0.5213, "step": 12191 }, { "epoch": 0.33476112026359145, "grad_norm": 0.3992885649204254, "learning_rate": 1.866722786822071e-05, "loss": 0.5706, "step": 12192 }, { "epoch": 0.33478857770455794, "grad_norm": 0.374067485332489, "learning_rate": 1.8667012435960454e-05, "loss": 0.4957, "step": 12193 }, { "epoch": 0.33481603514552444, "grad_norm": 0.3733747601509094, "learning_rate": 1.866679698753345e-05, "loss": 0.4718, "step": 12194 }, { "epoch": 0.33484349258649093, "grad_norm": 0.37749797105789185, "learning_rate": 1.8666581522940106e-05, "loss": 0.483, "step": 12195 }, { "epoch": 0.3348709500274574, "grad_norm": 0.3245806097984314, "learning_rate": 1.8666366042180823e-05, "loss": 0.4459, "step": 12196 }, { "epoch": 0.3348984074684239, "grad_norm": 0.37576979398727417, "learning_rate": 1.8666150545256e-05, "loss": 0.4832, "step": 12197 }, { "epoch": 0.33492586490939047, "grad_norm": 0.3478068709373474, "learning_rate": 1.866593503216604e-05, "loss": 0.545, "step": 12198 }, { "epoch": 0.33495332235035696, "grad_norm": 0.3988746106624603, "learning_rate": 1.8665719502911347e-05, "loss": 0.4473, "step": 12199 }, { "epoch": 0.33498077979132346, "grad_norm": 0.3786168694496155, "learning_rate": 1.866550395749232e-05, "loss": 0.5159, "step": 12200 }, { "epoch": 0.33500823723228995, "grad_norm": 0.3746756613254547, "learning_rate": 1.8665288395909362e-05, "loss": 0.5113, "step": 12201 }, { "epoch": 0.33503569467325645, "grad_norm": 0.37161025404930115, "learning_rate": 1.8665072818162878e-05, "loss": 0.5639, "step": 12202 }, { "epoch": 0.33506315211422294, "grad_norm": 0.40593913197517395, "learning_rate": 1.8664857224253264e-05, "loss": 0.5011, "step": 12203 }, { "epoch": 0.33509060955518943, "grad_norm": 0.4121600091457367, "learning_rate": 1.8664641614180927e-05, "loss": 0.5775, "step": 12204 }, { "epoch": 0.335118066996156, "grad_norm": 0.38124480843544006, "learning_rate": 1.8664425987946265e-05, "loss": 0.4453, "step": 12205 }, { "epoch": 0.3351455244371225, "grad_norm": 0.45303475856781006, "learning_rate": 1.866421034554969e-05, "loss": 0.5189, "step": 12206 }, { "epoch": 0.33517298187808897, "grad_norm": 0.3391396701335907, "learning_rate": 1.8663994686991594e-05, "loss": 0.5195, "step": 12207 }, { "epoch": 0.33520043931905547, "grad_norm": 0.3407212495803833, "learning_rate": 1.8663779012272384e-05, "loss": 0.4256, "step": 12208 }, { "epoch": 0.33522789676002196, "grad_norm": 0.3795984089374542, "learning_rate": 1.8663563321392456e-05, "loss": 0.5101, "step": 12209 }, { "epoch": 0.33525535420098845, "grad_norm": 0.3443688154220581, "learning_rate": 1.8663347614352224e-05, "loss": 0.6034, "step": 12210 }, { "epoch": 0.33528281164195495, "grad_norm": 0.4039149582386017, "learning_rate": 1.866313189115208e-05, "loss": 0.5617, "step": 12211 }, { "epoch": 0.3353102690829215, "grad_norm": 0.32635635137557983, "learning_rate": 1.866291615179243e-05, "loss": 0.4994, "step": 12212 }, { "epoch": 0.335337726523888, "grad_norm": 0.35553935170173645, "learning_rate": 1.8662700396273678e-05, "loss": 0.5676, "step": 12213 }, { "epoch": 0.3353651839648545, "grad_norm": 0.3551388680934906, "learning_rate": 1.8662484624596223e-05, "loss": 0.5107, "step": 12214 }, { "epoch": 0.335392641405821, "grad_norm": 0.36790353059768677, "learning_rate": 1.866226883676047e-05, "loss": 0.4836, "step": 12215 }, { "epoch": 0.3354200988467875, "grad_norm": 0.40190771222114563, "learning_rate": 1.866205303276682e-05, "loss": 0.4613, "step": 12216 }, { "epoch": 0.33544755628775397, "grad_norm": 0.3645809292793274, "learning_rate": 1.8661837212615677e-05, "loss": 0.5539, "step": 12217 }, { "epoch": 0.33547501372872046, "grad_norm": 0.3380280137062073, "learning_rate": 1.866162137630745e-05, "loss": 0.5134, "step": 12218 }, { "epoch": 0.335502471169687, "grad_norm": 0.35894468426704407, "learning_rate": 1.8661405523842523e-05, "loss": 0.5062, "step": 12219 }, { "epoch": 0.3355299286106535, "grad_norm": 0.41869670152664185, "learning_rate": 1.866118965522132e-05, "loss": 0.5766, "step": 12220 }, { "epoch": 0.33555738605162, "grad_norm": 0.4016895294189453, "learning_rate": 1.8660973770444227e-05, "loss": 0.5291, "step": 12221 }, { "epoch": 0.3355848434925865, "grad_norm": 0.4200184941291809, "learning_rate": 1.866075786951166e-05, "loss": 0.5162, "step": 12222 }, { "epoch": 0.335612300933553, "grad_norm": 0.42865708470344543, "learning_rate": 1.8660541952424013e-05, "loss": 0.4723, "step": 12223 }, { "epoch": 0.3356397583745195, "grad_norm": 0.3986501097679138, "learning_rate": 1.8660326019181687e-05, "loss": 0.6406, "step": 12224 }, { "epoch": 0.335667215815486, "grad_norm": 0.4323972165584564, "learning_rate": 1.8660110069785095e-05, "loss": 0.5553, "step": 12225 }, { "epoch": 0.3356946732564525, "grad_norm": 0.36001917719841003, "learning_rate": 1.865989410423463e-05, "loss": 0.4732, "step": 12226 }, { "epoch": 0.335722130697419, "grad_norm": 0.34065404534339905, "learning_rate": 1.86596781225307e-05, "loss": 0.5765, "step": 12227 }, { "epoch": 0.3357495881383855, "grad_norm": 0.36439767479896545, "learning_rate": 1.86594621246737e-05, "loss": 0.5266, "step": 12228 }, { "epoch": 0.335777045579352, "grad_norm": 0.34761327505111694, "learning_rate": 1.865924611066405e-05, "loss": 0.408, "step": 12229 }, { "epoch": 0.3358045030203185, "grad_norm": 0.3375909626483917, "learning_rate": 1.8659030080502137e-05, "loss": 0.4868, "step": 12230 }, { "epoch": 0.335831960461285, "grad_norm": 0.35106220841407776, "learning_rate": 1.8658814034188367e-05, "loss": 0.5103, "step": 12231 }, { "epoch": 0.3358594179022515, "grad_norm": 0.371696412563324, "learning_rate": 1.865859797172315e-05, "loss": 0.5261, "step": 12232 }, { "epoch": 0.33588687534321804, "grad_norm": 0.40100982785224915, "learning_rate": 1.8658381893106883e-05, "loss": 0.5898, "step": 12233 }, { "epoch": 0.33591433278418453, "grad_norm": 0.3814913332462311, "learning_rate": 1.865816579833997e-05, "loss": 0.5103, "step": 12234 }, { "epoch": 0.335941790225151, "grad_norm": 0.551315426826477, "learning_rate": 1.8657949687422813e-05, "loss": 0.5139, "step": 12235 }, { "epoch": 0.3359692476661175, "grad_norm": 0.3415037989616394, "learning_rate": 1.8657733560355815e-05, "loss": 0.4809, "step": 12236 }, { "epoch": 0.335996705107084, "grad_norm": 0.3835240304470062, "learning_rate": 1.8657517417139385e-05, "loss": 0.5352, "step": 12237 }, { "epoch": 0.3360241625480505, "grad_norm": 0.4481523633003235, "learning_rate": 1.865730125777392e-05, "loss": 0.5939, "step": 12238 }, { "epoch": 0.336051619989017, "grad_norm": 0.38402310013771057, "learning_rate": 1.8657085082259825e-05, "loss": 0.5031, "step": 12239 }, { "epoch": 0.33607907742998355, "grad_norm": 0.41845640540122986, "learning_rate": 1.8656868890597505e-05, "loss": 0.6612, "step": 12240 }, { "epoch": 0.33610653487095005, "grad_norm": 0.45300304889678955, "learning_rate": 1.8656652682787356e-05, "loss": 0.5224, "step": 12241 }, { "epoch": 0.33613399231191654, "grad_norm": 0.3763873875141144, "learning_rate": 1.865643645882979e-05, "loss": 0.4357, "step": 12242 }, { "epoch": 0.33616144975288303, "grad_norm": 0.3630322813987732, "learning_rate": 1.8656220218725212e-05, "loss": 0.5369, "step": 12243 }, { "epoch": 0.33618890719384953, "grad_norm": 0.3758666515350342, "learning_rate": 1.8656003962474018e-05, "loss": 0.5102, "step": 12244 }, { "epoch": 0.336216364634816, "grad_norm": 0.41092216968536377, "learning_rate": 1.865578769007661e-05, "loss": 0.5519, "step": 12245 }, { "epoch": 0.3362438220757825, "grad_norm": 0.3627135753631592, "learning_rate": 1.86555714015334e-05, "loss": 0.5392, "step": 12246 }, { "epoch": 0.33627127951674907, "grad_norm": 0.41984620690345764, "learning_rate": 1.865535509684478e-05, "loss": 0.6242, "step": 12247 }, { "epoch": 0.33629873695771556, "grad_norm": 0.3515876531600952, "learning_rate": 1.8655138776011167e-05, "loss": 0.5045, "step": 12248 }, { "epoch": 0.33632619439868205, "grad_norm": 0.37769272923469543, "learning_rate": 1.8654922439032955e-05, "loss": 0.4963, "step": 12249 }, { "epoch": 0.33635365183964855, "grad_norm": 0.3715553283691406, "learning_rate": 1.8654706085910553e-05, "loss": 0.552, "step": 12250 }, { "epoch": 0.33638110928061504, "grad_norm": 0.3972318470478058, "learning_rate": 1.865448971664436e-05, "loss": 0.5045, "step": 12251 }, { "epoch": 0.33640856672158154, "grad_norm": 0.42951247096061707, "learning_rate": 1.8654273331234783e-05, "loss": 0.6115, "step": 12252 }, { "epoch": 0.33643602416254803, "grad_norm": 0.4076823890209198, "learning_rate": 1.8654056929682222e-05, "loss": 0.5329, "step": 12253 }, { "epoch": 0.3364634816035145, "grad_norm": 0.40803059935569763, "learning_rate": 1.8653840511987083e-05, "loss": 0.4908, "step": 12254 }, { "epoch": 0.3364909390444811, "grad_norm": 0.405326247215271, "learning_rate": 1.8653624078149766e-05, "loss": 0.6085, "step": 12255 }, { "epoch": 0.33651839648544757, "grad_norm": 0.40463972091674805, "learning_rate": 1.8653407628170685e-05, "loss": 0.5734, "step": 12256 }, { "epoch": 0.33654585392641406, "grad_norm": 0.39930757880210876, "learning_rate": 1.8653191162050235e-05, "loss": 0.5684, "step": 12257 }, { "epoch": 0.33657331136738056, "grad_norm": 0.37083762884140015, "learning_rate": 1.865297467978882e-05, "loss": 0.5889, "step": 12258 }, { "epoch": 0.33660076880834705, "grad_norm": 0.34814825654029846, "learning_rate": 1.8652758181386844e-05, "loss": 0.5698, "step": 12259 }, { "epoch": 0.33662822624931354, "grad_norm": 0.39724937081336975, "learning_rate": 1.8652541666844716e-05, "loss": 0.6186, "step": 12260 }, { "epoch": 0.33665568369028004, "grad_norm": 0.35423409938812256, "learning_rate": 1.8652325136162836e-05, "loss": 0.5007, "step": 12261 }, { "epoch": 0.3366831411312466, "grad_norm": 0.3681260645389557, "learning_rate": 1.8652108589341607e-05, "loss": 0.5694, "step": 12262 }, { "epoch": 0.3367105985722131, "grad_norm": 0.3532644212245941, "learning_rate": 1.8651892026381432e-05, "loss": 0.4338, "step": 12263 }, { "epoch": 0.3367380560131796, "grad_norm": 0.45802879333496094, "learning_rate": 1.865167544728272e-05, "loss": 0.4626, "step": 12264 }, { "epoch": 0.33676551345414607, "grad_norm": 0.4022749066352844, "learning_rate": 1.865145885204587e-05, "loss": 0.5096, "step": 12265 }, { "epoch": 0.33679297089511256, "grad_norm": 0.3634476065635681, "learning_rate": 1.8651242240671286e-05, "loss": 0.4978, "step": 12266 }, { "epoch": 0.33682042833607906, "grad_norm": 0.3663511574268341, "learning_rate": 1.865102561315938e-05, "loss": 0.5275, "step": 12267 }, { "epoch": 0.33684788577704555, "grad_norm": 0.37149372696876526, "learning_rate": 1.8650808969510547e-05, "loss": 0.4852, "step": 12268 }, { "epoch": 0.3368753432180121, "grad_norm": 0.36191242933273315, "learning_rate": 1.8650592309725195e-05, "loss": 0.5046, "step": 12269 }, { "epoch": 0.3369028006589786, "grad_norm": 0.4134601950645447, "learning_rate": 1.8650375633803724e-05, "loss": 0.608, "step": 12270 }, { "epoch": 0.3369302580999451, "grad_norm": 0.36362940073013306, "learning_rate": 1.8650158941746547e-05, "loss": 0.4153, "step": 12271 }, { "epoch": 0.3369577155409116, "grad_norm": 0.3547532856464386, "learning_rate": 1.864994223355406e-05, "loss": 0.5343, "step": 12272 }, { "epoch": 0.3369851729818781, "grad_norm": 0.3606550395488739, "learning_rate": 1.864972550922667e-05, "loss": 0.4196, "step": 12273 }, { "epoch": 0.3370126304228446, "grad_norm": 0.35373443365097046, "learning_rate": 1.8649508768764782e-05, "loss": 0.5243, "step": 12274 }, { "epoch": 0.33704008786381107, "grad_norm": 0.4609447121620178, "learning_rate": 1.86492920121688e-05, "loss": 0.5464, "step": 12275 }, { "epoch": 0.3370675453047776, "grad_norm": 0.3898613750934601, "learning_rate": 1.8649075239439123e-05, "loss": 0.5867, "step": 12276 }, { "epoch": 0.3370950027457441, "grad_norm": 0.34443727135658264, "learning_rate": 1.8648858450576163e-05, "loss": 0.5104, "step": 12277 }, { "epoch": 0.3371224601867106, "grad_norm": 0.3428122401237488, "learning_rate": 1.8648641645580325e-05, "loss": 0.5451, "step": 12278 }, { "epoch": 0.3371499176276771, "grad_norm": 0.35781967639923096, "learning_rate": 1.8648424824452006e-05, "loss": 0.477, "step": 12279 }, { "epoch": 0.3371773750686436, "grad_norm": 0.3872476816177368, "learning_rate": 1.8648207987191616e-05, "loss": 0.5087, "step": 12280 }, { "epoch": 0.3372048325096101, "grad_norm": 0.37101325392723083, "learning_rate": 1.8647991133799558e-05, "loss": 0.4322, "step": 12281 }, { "epoch": 0.3372322899505766, "grad_norm": 0.39960938692092896, "learning_rate": 1.8647774264276238e-05, "loss": 0.5194, "step": 12282 }, { "epoch": 0.33725974739154313, "grad_norm": 0.3782779276371002, "learning_rate": 1.8647557378622057e-05, "loss": 0.5814, "step": 12283 }, { "epoch": 0.3372872048325096, "grad_norm": 0.3230981230735779, "learning_rate": 1.864734047683742e-05, "loss": 0.4516, "step": 12284 }, { "epoch": 0.3373146622734761, "grad_norm": 0.39579570293426514, "learning_rate": 1.8647123558922736e-05, "loss": 0.577, "step": 12285 }, { "epoch": 0.3373421197144426, "grad_norm": 0.3730456233024597, "learning_rate": 1.8646906624878403e-05, "loss": 0.5419, "step": 12286 }, { "epoch": 0.3373695771554091, "grad_norm": 0.37980756163597107, "learning_rate": 1.8646689674704835e-05, "loss": 0.566, "step": 12287 }, { "epoch": 0.3373970345963756, "grad_norm": 0.3712944984436035, "learning_rate": 1.8646472708402424e-05, "loss": 0.4567, "step": 12288 }, { "epoch": 0.3374244920373421, "grad_norm": 0.37411099672317505, "learning_rate": 1.8646255725971588e-05, "loss": 0.5074, "step": 12289 }, { "epoch": 0.33745194947830864, "grad_norm": 0.3757662773132324, "learning_rate": 1.8646038727412726e-05, "loss": 0.534, "step": 12290 }, { "epoch": 0.33747940691927514, "grad_norm": 0.37685900926589966, "learning_rate": 1.864582171272624e-05, "loss": 0.5398, "step": 12291 }, { "epoch": 0.33750686436024163, "grad_norm": 0.3579704165458679, "learning_rate": 1.8645604681912535e-05, "loss": 0.4653, "step": 12292 }, { "epoch": 0.3375343218012081, "grad_norm": 0.44706645607948303, "learning_rate": 1.864538763497202e-05, "loss": 0.5697, "step": 12293 }, { "epoch": 0.3375617792421746, "grad_norm": 0.3792467713356018, "learning_rate": 1.8645170571905096e-05, "loss": 0.4414, "step": 12294 }, { "epoch": 0.3375892366831411, "grad_norm": 0.4254254698753357, "learning_rate": 1.8644953492712174e-05, "loss": 0.508, "step": 12295 }, { "epoch": 0.3376166941241076, "grad_norm": 0.44086042046546936, "learning_rate": 1.864473639739365e-05, "loss": 0.5728, "step": 12296 }, { "epoch": 0.33764415156507416, "grad_norm": 0.5534827709197998, "learning_rate": 1.8644519285949933e-05, "loss": 0.582, "step": 12297 }, { "epoch": 0.33767160900604065, "grad_norm": 0.45145976543426514, "learning_rate": 1.864430215838143e-05, "loss": 0.5287, "step": 12298 }, { "epoch": 0.33769906644700715, "grad_norm": 0.3428511321544647, "learning_rate": 1.8644085014688546e-05, "loss": 0.4833, "step": 12299 }, { "epoch": 0.33772652388797364, "grad_norm": 0.3595331609249115, "learning_rate": 1.8643867854871684e-05, "loss": 0.4736, "step": 12300 }, { "epoch": 0.33775398132894013, "grad_norm": 0.37817004323005676, "learning_rate": 1.8643650678931248e-05, "loss": 0.549, "step": 12301 }, { "epoch": 0.33778143876990663, "grad_norm": 0.38260671496391296, "learning_rate": 1.864343348686765e-05, "loss": 0.5196, "step": 12302 }, { "epoch": 0.3378088962108731, "grad_norm": 0.3613940477371216, "learning_rate": 1.8643216278681285e-05, "loss": 0.5373, "step": 12303 }, { "epoch": 0.33783635365183967, "grad_norm": 0.37521806359291077, "learning_rate": 1.8642999054372564e-05, "loss": 0.5921, "step": 12304 }, { "epoch": 0.33786381109280617, "grad_norm": 0.35872453451156616, "learning_rate": 1.864278181394189e-05, "loss": 0.4109, "step": 12305 }, { "epoch": 0.33789126853377266, "grad_norm": 0.37959444522857666, "learning_rate": 1.864256455738967e-05, "loss": 0.4749, "step": 12306 }, { "epoch": 0.33791872597473915, "grad_norm": 0.360113263130188, "learning_rate": 1.864234728471631e-05, "loss": 0.5281, "step": 12307 }, { "epoch": 0.33794618341570565, "grad_norm": 0.3299367129802704, "learning_rate": 1.8642129995922215e-05, "loss": 0.5673, "step": 12308 }, { "epoch": 0.33797364085667214, "grad_norm": 0.3171939551830292, "learning_rate": 1.8641912691007786e-05, "loss": 0.4786, "step": 12309 }, { "epoch": 0.33800109829763864, "grad_norm": 0.3738742172718048, "learning_rate": 1.8641695369973435e-05, "loss": 0.4371, "step": 12310 }, { "epoch": 0.3380285557386052, "grad_norm": 0.3594864308834076, "learning_rate": 1.8641478032819563e-05, "loss": 0.5408, "step": 12311 }, { "epoch": 0.3380560131795717, "grad_norm": 0.38559210300445557, "learning_rate": 1.8641260679546572e-05, "loss": 0.4735, "step": 12312 }, { "epoch": 0.3380834706205382, "grad_norm": 0.5643694996833801, "learning_rate": 1.8641043310154877e-05, "loss": 0.6501, "step": 12313 }, { "epoch": 0.33811092806150467, "grad_norm": 0.38152432441711426, "learning_rate": 1.864082592464488e-05, "loss": 0.5175, "step": 12314 }, { "epoch": 0.33813838550247116, "grad_norm": 0.3851596713066101, "learning_rate": 1.864060852301698e-05, "loss": 0.5719, "step": 12315 }, { "epoch": 0.33816584294343766, "grad_norm": 0.34640422463417053, "learning_rate": 1.864039110527159e-05, "loss": 0.5371, "step": 12316 }, { "epoch": 0.33819330038440415, "grad_norm": 0.37013259530067444, "learning_rate": 1.8640173671409113e-05, "loss": 0.5209, "step": 12317 }, { "epoch": 0.3382207578253707, "grad_norm": 0.4598497152328491, "learning_rate": 1.863995622142995e-05, "loss": 0.5944, "step": 12318 }, { "epoch": 0.3382482152663372, "grad_norm": 0.33150139451026917, "learning_rate": 1.8639738755334517e-05, "loss": 0.4611, "step": 12319 }, { "epoch": 0.3382756727073037, "grad_norm": 0.3093506991863251, "learning_rate": 1.863952127312321e-05, "loss": 0.4334, "step": 12320 }, { "epoch": 0.3383031301482702, "grad_norm": 0.35145682096481323, "learning_rate": 1.863930377479644e-05, "loss": 0.4214, "step": 12321 }, { "epoch": 0.3383305875892367, "grad_norm": 0.4030013680458069, "learning_rate": 1.8639086260354612e-05, "loss": 0.5167, "step": 12322 }, { "epoch": 0.33835804503020317, "grad_norm": 0.3948081433773041, "learning_rate": 1.863886872979813e-05, "loss": 0.4276, "step": 12323 }, { "epoch": 0.33838550247116966, "grad_norm": 0.43430545926094055, "learning_rate": 1.86386511831274e-05, "loss": 0.5692, "step": 12324 }, { "epoch": 0.3384129599121362, "grad_norm": 0.41613835096359253, "learning_rate": 1.863843362034283e-05, "loss": 0.5457, "step": 12325 }, { "epoch": 0.3384404173531027, "grad_norm": 0.36906808614730835, "learning_rate": 1.863821604144482e-05, "loss": 0.4701, "step": 12326 }, { "epoch": 0.3384678747940692, "grad_norm": 0.9441107511520386, "learning_rate": 1.8637998446433785e-05, "loss": 0.5439, "step": 12327 }, { "epoch": 0.3384953322350357, "grad_norm": 0.4258708655834198, "learning_rate": 1.8637780835310125e-05, "loss": 0.5619, "step": 12328 }, { "epoch": 0.3385227896760022, "grad_norm": 0.4126697778701782, "learning_rate": 1.8637563208074245e-05, "loss": 0.5644, "step": 12329 }, { "epoch": 0.3385502471169687, "grad_norm": 0.3614232838153839, "learning_rate": 1.8637345564726554e-05, "loss": 0.4439, "step": 12330 }, { "epoch": 0.3385777045579352, "grad_norm": 0.37252941727638245, "learning_rate": 1.8637127905267457e-05, "loss": 0.5506, "step": 12331 }, { "epoch": 0.3386051619989017, "grad_norm": 0.3750757873058319, "learning_rate": 1.863691022969736e-05, "loss": 0.5594, "step": 12332 }, { "epoch": 0.3386326194398682, "grad_norm": 0.4187791347503662, "learning_rate": 1.863669253801667e-05, "loss": 0.5271, "step": 12333 }, { "epoch": 0.3386600768808347, "grad_norm": 0.3930562138557434, "learning_rate": 1.8636474830225787e-05, "loss": 0.5001, "step": 12334 }, { "epoch": 0.3386875343218012, "grad_norm": 0.39050620794296265, "learning_rate": 1.8636257106325127e-05, "loss": 0.4167, "step": 12335 }, { "epoch": 0.3387149917627677, "grad_norm": 0.4423873722553253, "learning_rate": 1.8636039366315088e-05, "loss": 0.7007, "step": 12336 }, { "epoch": 0.3387424492037342, "grad_norm": 0.6910070180892944, "learning_rate": 1.863582161019608e-05, "loss": 0.5401, "step": 12337 }, { "epoch": 0.3387699066447007, "grad_norm": 0.36229830980300903, "learning_rate": 1.863560383796851e-05, "loss": 0.5225, "step": 12338 }, { "epoch": 0.33879736408566724, "grad_norm": 0.3740771412849426, "learning_rate": 1.8635386049632783e-05, "loss": 0.4709, "step": 12339 }, { "epoch": 0.33882482152663373, "grad_norm": 0.5569590926170349, "learning_rate": 1.8635168245189302e-05, "loss": 0.5815, "step": 12340 }, { "epoch": 0.33885227896760023, "grad_norm": 0.41412991285324097, "learning_rate": 1.8634950424638477e-05, "loss": 0.5606, "step": 12341 }, { "epoch": 0.3388797364085667, "grad_norm": 0.3704295754432678, "learning_rate": 1.8634732587980714e-05, "loss": 0.4886, "step": 12342 }, { "epoch": 0.3389071938495332, "grad_norm": 0.35827991366386414, "learning_rate": 1.863451473521642e-05, "loss": 0.4911, "step": 12343 }, { "epoch": 0.3389346512904997, "grad_norm": 0.3694479167461395, "learning_rate": 1.8634296866346e-05, "loss": 0.5076, "step": 12344 }, { "epoch": 0.3389621087314662, "grad_norm": 0.35337457060813904, "learning_rate": 1.863407898136986e-05, "loss": 0.4513, "step": 12345 }, { "epoch": 0.33898956617243275, "grad_norm": 0.43388286232948303, "learning_rate": 1.8633861080288408e-05, "loss": 0.4968, "step": 12346 }, { "epoch": 0.33901702361339925, "grad_norm": 0.3522838056087494, "learning_rate": 1.8633643163102047e-05, "loss": 0.5055, "step": 12347 }, { "epoch": 0.33904448105436574, "grad_norm": 0.3700820803642273, "learning_rate": 1.8633425229811187e-05, "loss": 0.581, "step": 12348 }, { "epoch": 0.33907193849533224, "grad_norm": 0.5878597497940063, "learning_rate": 1.8633207280416236e-05, "loss": 0.6278, "step": 12349 }, { "epoch": 0.33909939593629873, "grad_norm": 0.5447765588760376, "learning_rate": 1.8632989314917593e-05, "loss": 0.6206, "step": 12350 }, { "epoch": 0.3391268533772652, "grad_norm": 0.41337907314300537, "learning_rate": 1.8632771333315674e-05, "loss": 0.561, "step": 12351 }, { "epoch": 0.3391543108182317, "grad_norm": 0.401763379573822, "learning_rate": 1.863255333561088e-05, "loss": 0.561, "step": 12352 }, { "epoch": 0.33918176825919827, "grad_norm": 0.3655933737754822, "learning_rate": 1.863233532180362e-05, "loss": 0.5668, "step": 12353 }, { "epoch": 0.33920922570016476, "grad_norm": 0.3661268949508667, "learning_rate": 1.8632117291894297e-05, "loss": 0.5681, "step": 12354 }, { "epoch": 0.33923668314113126, "grad_norm": 0.3617664575576782, "learning_rate": 1.863189924588332e-05, "loss": 0.4502, "step": 12355 }, { "epoch": 0.33926414058209775, "grad_norm": 0.36743050813674927, "learning_rate": 1.86316811837711e-05, "loss": 0.5831, "step": 12356 }, { "epoch": 0.33929159802306424, "grad_norm": 0.3674524128437042, "learning_rate": 1.8631463105558033e-05, "loss": 0.5612, "step": 12357 }, { "epoch": 0.33931905546403074, "grad_norm": 0.5027694702148438, "learning_rate": 1.8631245011244537e-05, "loss": 0.5648, "step": 12358 }, { "epoch": 0.33934651290499723, "grad_norm": 0.4020113945007324, "learning_rate": 1.8631026900831014e-05, "loss": 0.5139, "step": 12359 }, { "epoch": 0.3393739703459638, "grad_norm": 0.3367515802383423, "learning_rate": 1.8630808774317872e-05, "loss": 0.456, "step": 12360 }, { "epoch": 0.3394014277869303, "grad_norm": 0.3334173262119293, "learning_rate": 1.8630590631705514e-05, "loss": 0.4403, "step": 12361 }, { "epoch": 0.33942888522789677, "grad_norm": 0.32617881894111633, "learning_rate": 1.8630372472994352e-05, "loss": 0.4942, "step": 12362 }, { "epoch": 0.33945634266886326, "grad_norm": 0.34045061469078064, "learning_rate": 1.863015429818479e-05, "loss": 0.4927, "step": 12363 }, { "epoch": 0.33948380010982976, "grad_norm": 0.34071052074432373, "learning_rate": 1.862993610727724e-05, "loss": 0.4918, "step": 12364 }, { "epoch": 0.33951125755079625, "grad_norm": 0.32686951756477356, "learning_rate": 1.86297179002721e-05, "loss": 0.4385, "step": 12365 }, { "epoch": 0.33953871499176275, "grad_norm": 0.38040047883987427, "learning_rate": 1.8629499677169782e-05, "loss": 0.5351, "step": 12366 }, { "epoch": 0.3395661724327293, "grad_norm": 0.41433972120285034, "learning_rate": 1.862928143797069e-05, "loss": 0.5592, "step": 12367 }, { "epoch": 0.3395936298736958, "grad_norm": 0.4037224054336548, "learning_rate": 1.862906318267524e-05, "loss": 0.5497, "step": 12368 }, { "epoch": 0.3396210873146623, "grad_norm": 0.36436253786087036, "learning_rate": 1.8628844911283832e-05, "loss": 0.5001, "step": 12369 }, { "epoch": 0.3396485447556288, "grad_norm": 0.3786572813987732, "learning_rate": 1.8628626623796873e-05, "loss": 0.5339, "step": 12370 }, { "epoch": 0.3396760021965953, "grad_norm": 0.39340558648109436, "learning_rate": 1.862840832021477e-05, "loss": 0.5385, "step": 12371 }, { "epoch": 0.33970345963756177, "grad_norm": 0.41884729266166687, "learning_rate": 1.8628190000537935e-05, "loss": 0.5175, "step": 12372 }, { "epoch": 0.33973091707852826, "grad_norm": 0.36940476298332214, "learning_rate": 1.862797166476677e-05, "loss": 0.5029, "step": 12373 }, { "epoch": 0.3397583745194948, "grad_norm": 0.38178202509880066, "learning_rate": 1.8627753312901685e-05, "loss": 0.4913, "step": 12374 }, { "epoch": 0.3397858319604613, "grad_norm": 0.39873433113098145, "learning_rate": 1.8627534944943085e-05, "loss": 0.4895, "step": 12375 }, { "epoch": 0.3398132894014278, "grad_norm": 0.3858374059200287, "learning_rate": 1.862731656089138e-05, "loss": 0.4912, "step": 12376 }, { "epoch": 0.3398407468423943, "grad_norm": 0.357807993888855, "learning_rate": 1.8627098160746976e-05, "loss": 0.556, "step": 12377 }, { "epoch": 0.3398682042833608, "grad_norm": 0.34629297256469727, "learning_rate": 1.8626879744510277e-05, "loss": 0.4069, "step": 12378 }, { "epoch": 0.3398956617243273, "grad_norm": 0.3755984902381897, "learning_rate": 1.8626661312181696e-05, "loss": 0.4824, "step": 12379 }, { "epoch": 0.3399231191652938, "grad_norm": 0.3901381492614746, "learning_rate": 1.862644286376164e-05, "loss": 0.5628, "step": 12380 }, { "epoch": 0.3399505766062603, "grad_norm": 0.37018489837646484, "learning_rate": 1.8626224399250515e-05, "loss": 0.5507, "step": 12381 }, { "epoch": 0.3399780340472268, "grad_norm": 0.33702513575553894, "learning_rate": 1.8626005918648727e-05, "loss": 0.4758, "step": 12382 }, { "epoch": 0.3400054914881933, "grad_norm": 0.36110442876815796, "learning_rate": 1.8625787421956684e-05, "loss": 0.5717, "step": 12383 }, { "epoch": 0.3400329489291598, "grad_norm": 0.37742021679878235, "learning_rate": 1.8625568909174797e-05, "loss": 0.4661, "step": 12384 }, { "epoch": 0.3400604063701263, "grad_norm": 0.4361168444156647, "learning_rate": 1.8625350380303468e-05, "loss": 0.5575, "step": 12385 }, { "epoch": 0.3400878638110928, "grad_norm": 0.3802225589752197, "learning_rate": 1.862513183534311e-05, "loss": 0.4849, "step": 12386 }, { "epoch": 0.3401153212520593, "grad_norm": 0.3888755440711975, "learning_rate": 1.8624913274294128e-05, "loss": 0.5363, "step": 12387 }, { "epoch": 0.3401427786930258, "grad_norm": 0.359723299741745, "learning_rate": 1.862469469715693e-05, "loss": 0.4921, "step": 12388 }, { "epoch": 0.34017023613399233, "grad_norm": 0.3222038149833679, "learning_rate": 1.862447610393192e-05, "loss": 0.4841, "step": 12389 }, { "epoch": 0.3401976935749588, "grad_norm": 0.3656991720199585, "learning_rate": 1.8624257494619512e-05, "loss": 0.5863, "step": 12390 }, { "epoch": 0.3402251510159253, "grad_norm": 0.36762839555740356, "learning_rate": 1.8624038869220115e-05, "loss": 0.5493, "step": 12391 }, { "epoch": 0.3402526084568918, "grad_norm": 0.4071088135242462, "learning_rate": 1.862382022773413e-05, "loss": 0.5881, "step": 12392 }, { "epoch": 0.3402800658978583, "grad_norm": 0.3734399378299713, "learning_rate": 1.8623601570161968e-05, "loss": 0.5637, "step": 12393 }, { "epoch": 0.3403075233388248, "grad_norm": 0.38561490178108215, "learning_rate": 1.8623382896504038e-05, "loss": 0.5483, "step": 12394 }, { "epoch": 0.3403349807797913, "grad_norm": 0.42822587490081787, "learning_rate": 1.8623164206760746e-05, "loss": 0.6326, "step": 12395 }, { "epoch": 0.34036243822075785, "grad_norm": 0.3804072141647339, "learning_rate": 1.8622945500932497e-05, "loss": 0.4989, "step": 12396 }, { "epoch": 0.34038989566172434, "grad_norm": 0.44036227464675903, "learning_rate": 1.8622726779019703e-05, "loss": 0.6435, "step": 12397 }, { "epoch": 0.34041735310269083, "grad_norm": 0.34180787205696106, "learning_rate": 1.8622508041022777e-05, "loss": 0.4462, "step": 12398 }, { "epoch": 0.34044481054365733, "grad_norm": 0.378200888633728, "learning_rate": 1.8622289286942117e-05, "loss": 0.4878, "step": 12399 }, { "epoch": 0.3404722679846238, "grad_norm": 0.41270050406455994, "learning_rate": 1.8622070516778135e-05, "loss": 0.5893, "step": 12400 }, { "epoch": 0.3404997254255903, "grad_norm": 0.3788231909275055, "learning_rate": 1.862185173053124e-05, "loss": 0.6649, "step": 12401 }, { "epoch": 0.3405271828665568, "grad_norm": 0.4233676493167877, "learning_rate": 1.8621632928201843e-05, "loss": 0.5745, "step": 12402 }, { "epoch": 0.34055464030752336, "grad_norm": 0.348463237285614, "learning_rate": 1.8621414109790346e-05, "loss": 0.545, "step": 12403 }, { "epoch": 0.34058209774848985, "grad_norm": 0.3803178369998932, "learning_rate": 1.8621195275297163e-05, "loss": 0.5165, "step": 12404 }, { "epoch": 0.34060955518945635, "grad_norm": 0.37968289852142334, "learning_rate": 1.8620976424722697e-05, "loss": 0.4565, "step": 12405 }, { "epoch": 0.34063701263042284, "grad_norm": 0.5169217586517334, "learning_rate": 1.8620757558067358e-05, "loss": 0.5259, "step": 12406 }, { "epoch": 0.34066447007138934, "grad_norm": 0.3714880049228668, "learning_rate": 1.8620538675331555e-05, "loss": 0.535, "step": 12407 }, { "epoch": 0.34069192751235583, "grad_norm": 0.3667689263820648, "learning_rate": 1.86203197765157e-05, "loss": 0.4902, "step": 12408 }, { "epoch": 0.3407193849533223, "grad_norm": 0.3574177026748657, "learning_rate": 1.862010086162019e-05, "loss": 0.4402, "step": 12409 }, { "epoch": 0.3407468423942889, "grad_norm": 0.3938351273536682, "learning_rate": 1.8619881930645446e-05, "loss": 0.5181, "step": 12410 }, { "epoch": 0.34077429983525537, "grad_norm": 0.5523144602775574, "learning_rate": 1.861966298359187e-05, "loss": 0.504, "step": 12411 }, { "epoch": 0.34080175727622186, "grad_norm": 0.36027419567108154, "learning_rate": 1.861944402045987e-05, "loss": 0.4619, "step": 12412 }, { "epoch": 0.34082921471718836, "grad_norm": 0.37237390875816345, "learning_rate": 1.861922504124986e-05, "loss": 0.538, "step": 12413 }, { "epoch": 0.34085667215815485, "grad_norm": 0.4055570662021637, "learning_rate": 1.861900604596224e-05, "loss": 0.6089, "step": 12414 }, { "epoch": 0.34088412959912134, "grad_norm": 0.34897580742836, "learning_rate": 1.8618787034597426e-05, "loss": 0.4285, "step": 12415 }, { "epoch": 0.34091158704008784, "grad_norm": 0.3791837692260742, "learning_rate": 1.861856800715582e-05, "loss": 0.4547, "step": 12416 }, { "epoch": 0.3409390444810544, "grad_norm": 0.4035971760749817, "learning_rate": 1.8618348963637836e-05, "loss": 0.4493, "step": 12417 }, { "epoch": 0.3409665019220209, "grad_norm": 0.41616013646125793, "learning_rate": 1.861812990404388e-05, "loss": 0.5377, "step": 12418 }, { "epoch": 0.3409939593629874, "grad_norm": 0.3754522204399109, "learning_rate": 1.861791082837436e-05, "loss": 0.5748, "step": 12419 }, { "epoch": 0.34102141680395387, "grad_norm": 0.46873170137405396, "learning_rate": 1.861769173662969e-05, "loss": 0.6039, "step": 12420 }, { "epoch": 0.34104887424492036, "grad_norm": 0.360714852809906, "learning_rate": 1.861747262881027e-05, "loss": 0.4089, "step": 12421 }, { "epoch": 0.34107633168588686, "grad_norm": 0.3650037348270416, "learning_rate": 1.8617253504916513e-05, "loss": 0.5147, "step": 12422 }, { "epoch": 0.34110378912685335, "grad_norm": 0.40146762132644653, "learning_rate": 1.861703436494883e-05, "loss": 0.5459, "step": 12423 }, { "epoch": 0.3411312465678199, "grad_norm": 0.3985220193862915, "learning_rate": 1.8616815208907627e-05, "loss": 0.546, "step": 12424 }, { "epoch": 0.3411587040087864, "grad_norm": 0.3417741060256958, "learning_rate": 1.8616596036793315e-05, "loss": 0.5084, "step": 12425 }, { "epoch": 0.3411861614497529, "grad_norm": 0.35618314146995544, "learning_rate": 1.86163768486063e-05, "loss": 0.5016, "step": 12426 }, { "epoch": 0.3412136188907194, "grad_norm": 0.37012559175491333, "learning_rate": 1.861615764434699e-05, "loss": 0.4429, "step": 12427 }, { "epoch": 0.3412410763316859, "grad_norm": 0.574319064617157, "learning_rate": 1.86159384240158e-05, "loss": 0.5153, "step": 12428 }, { "epoch": 0.34126853377265237, "grad_norm": 0.40714189410209656, "learning_rate": 1.8615719187613133e-05, "loss": 0.5487, "step": 12429 }, { "epoch": 0.34129599121361887, "grad_norm": 0.354127436876297, "learning_rate": 1.86154999351394e-05, "loss": 0.4633, "step": 12430 }, { "epoch": 0.3413234486545854, "grad_norm": 0.3709923326969147, "learning_rate": 1.861528066659501e-05, "loss": 0.552, "step": 12431 }, { "epoch": 0.3413509060955519, "grad_norm": 0.41628921031951904, "learning_rate": 1.861506138198037e-05, "loss": 0.5572, "step": 12432 }, { "epoch": 0.3413783635365184, "grad_norm": 0.37817472219467163, "learning_rate": 1.8614842081295893e-05, "loss": 0.5313, "step": 12433 }, { "epoch": 0.3414058209774849, "grad_norm": 0.3538553714752197, "learning_rate": 1.8614622764541987e-05, "loss": 0.5745, "step": 12434 }, { "epoch": 0.3414332784184514, "grad_norm": 0.3838284909725189, "learning_rate": 1.8614403431719057e-05, "loss": 0.5648, "step": 12435 }, { "epoch": 0.3414607358594179, "grad_norm": 0.4210561513900757, "learning_rate": 1.8614184082827518e-05, "loss": 0.5507, "step": 12436 }, { "epoch": 0.3414881933003844, "grad_norm": 0.33766961097717285, "learning_rate": 1.8613964717867775e-05, "loss": 0.4836, "step": 12437 }, { "epoch": 0.34151565074135093, "grad_norm": 0.3755749464035034, "learning_rate": 1.861374533684024e-05, "loss": 0.4847, "step": 12438 }, { "epoch": 0.3415431081823174, "grad_norm": 0.37769976258277893, "learning_rate": 1.8613525939745317e-05, "loss": 0.5452, "step": 12439 }, { "epoch": 0.3415705656232839, "grad_norm": 0.35986432433128357, "learning_rate": 1.8613306526583422e-05, "loss": 0.5138, "step": 12440 }, { "epoch": 0.3415980230642504, "grad_norm": 0.3905627131462097, "learning_rate": 1.861308709735496e-05, "loss": 0.5263, "step": 12441 }, { "epoch": 0.3416254805052169, "grad_norm": 0.39064085483551025, "learning_rate": 1.8612867652060344e-05, "loss": 0.6245, "step": 12442 }, { "epoch": 0.3416529379461834, "grad_norm": 0.40754708647727966, "learning_rate": 1.861264819069998e-05, "loss": 0.5301, "step": 12443 }, { "epoch": 0.3416803953871499, "grad_norm": 0.3976210355758667, "learning_rate": 1.8612428713274276e-05, "loss": 0.5108, "step": 12444 }, { "epoch": 0.34170785282811644, "grad_norm": 0.3640335202217102, "learning_rate": 1.8612209219783646e-05, "loss": 0.4789, "step": 12445 }, { "epoch": 0.34173531026908294, "grad_norm": 0.3840211033821106, "learning_rate": 1.8611989710228497e-05, "loss": 0.5509, "step": 12446 }, { "epoch": 0.34176276771004943, "grad_norm": 0.4026833474636078, "learning_rate": 1.8611770184609237e-05, "loss": 0.5243, "step": 12447 }, { "epoch": 0.3417902251510159, "grad_norm": 0.3280714750289917, "learning_rate": 1.861155064292628e-05, "loss": 0.5493, "step": 12448 }, { "epoch": 0.3418176825919824, "grad_norm": 0.3767845034599304, "learning_rate": 1.861133108518003e-05, "loss": 0.4415, "step": 12449 }, { "epoch": 0.3418451400329489, "grad_norm": 0.34884724020957947, "learning_rate": 1.86111115113709e-05, "loss": 0.4681, "step": 12450 }, { "epoch": 0.3418725974739154, "grad_norm": 0.3761449158191681, "learning_rate": 1.8610891921499297e-05, "loss": 0.5501, "step": 12451 }, { "epoch": 0.34190005491488196, "grad_norm": 0.5941891074180603, "learning_rate": 1.8610672315565633e-05, "loss": 0.4971, "step": 12452 }, { "epoch": 0.34192751235584845, "grad_norm": 0.37456271052360535, "learning_rate": 1.861045269357032e-05, "loss": 0.5319, "step": 12453 }, { "epoch": 0.34195496979681494, "grad_norm": 0.3723776340484619, "learning_rate": 1.861023305551376e-05, "loss": 0.5101, "step": 12454 }, { "epoch": 0.34198242723778144, "grad_norm": 0.3292977213859558, "learning_rate": 1.861001340139637e-05, "loss": 0.4383, "step": 12455 }, { "epoch": 0.34200988467874793, "grad_norm": 0.3781681954860687, "learning_rate": 1.8609793731218556e-05, "loss": 0.5332, "step": 12456 }, { "epoch": 0.3420373421197144, "grad_norm": 0.3383600115776062, "learning_rate": 1.860957404498073e-05, "loss": 0.4931, "step": 12457 }, { "epoch": 0.3420647995606809, "grad_norm": 0.34892529249191284, "learning_rate": 1.86093543426833e-05, "loss": 0.5087, "step": 12458 }, { "epoch": 0.34209225700164747, "grad_norm": 0.4160260856151581, "learning_rate": 1.8609134624326675e-05, "loss": 0.5562, "step": 12459 }, { "epoch": 0.34211971444261396, "grad_norm": 0.46445804834365845, "learning_rate": 1.860891488991127e-05, "loss": 0.4814, "step": 12460 }, { "epoch": 0.34214717188358046, "grad_norm": 0.3425804674625397, "learning_rate": 1.8608695139437486e-05, "loss": 0.5229, "step": 12461 }, { "epoch": 0.34217462932454695, "grad_norm": 0.45031970739364624, "learning_rate": 1.860847537290574e-05, "loss": 0.4581, "step": 12462 }, { "epoch": 0.34220208676551345, "grad_norm": 0.42683106660842896, "learning_rate": 1.860825559031644e-05, "loss": 0.531, "step": 12463 }, { "epoch": 0.34222954420647994, "grad_norm": 0.4106147885322571, "learning_rate": 1.8608035791669997e-05, "loss": 0.62, "step": 12464 }, { "epoch": 0.34225700164744643, "grad_norm": 0.3677841126918793, "learning_rate": 1.8607815976966818e-05, "loss": 0.4677, "step": 12465 }, { "epoch": 0.342284459088413, "grad_norm": 0.3894851505756378, "learning_rate": 1.8607596146207316e-05, "loss": 0.5785, "step": 12466 }, { "epoch": 0.3423119165293795, "grad_norm": 0.33146294951438904, "learning_rate": 1.86073762993919e-05, "loss": 0.5208, "step": 12467 }, { "epoch": 0.342339373970346, "grad_norm": 0.3290240168571472, "learning_rate": 1.860715643652098e-05, "loss": 0.5761, "step": 12468 }, { "epoch": 0.34236683141131247, "grad_norm": 0.3947576582431793, "learning_rate": 1.8606936557594967e-05, "loss": 0.5094, "step": 12469 }, { "epoch": 0.34239428885227896, "grad_norm": 0.3360365629196167, "learning_rate": 1.860671666261427e-05, "loss": 0.5083, "step": 12470 }, { "epoch": 0.34242174629324545, "grad_norm": 0.3637236952781677, "learning_rate": 1.8606496751579298e-05, "loss": 0.545, "step": 12471 }, { "epoch": 0.34244920373421195, "grad_norm": 0.3957521617412567, "learning_rate": 1.8606276824490462e-05, "loss": 0.5217, "step": 12472 }, { "epoch": 0.3424766611751785, "grad_norm": 0.34500351548194885, "learning_rate": 1.8606056881348177e-05, "loss": 0.4974, "step": 12473 }, { "epoch": 0.342504118616145, "grad_norm": 0.37129634618759155, "learning_rate": 1.8605836922152846e-05, "loss": 0.5369, "step": 12474 }, { "epoch": 0.3425315760571115, "grad_norm": 0.39873069524765015, "learning_rate": 1.8605616946904885e-05, "loss": 0.5652, "step": 12475 }, { "epoch": 0.342559033498078, "grad_norm": 0.34749987721443176, "learning_rate": 1.86053969556047e-05, "loss": 0.4899, "step": 12476 }, { "epoch": 0.3425864909390445, "grad_norm": 0.32393577694892883, "learning_rate": 1.86051769482527e-05, "loss": 0.4903, "step": 12477 }, { "epoch": 0.34261394838001097, "grad_norm": 0.38452261686325073, "learning_rate": 1.8604956924849304e-05, "loss": 0.5277, "step": 12478 }, { "epoch": 0.34264140582097746, "grad_norm": 0.3591618835926056, "learning_rate": 1.8604736885394917e-05, "loss": 0.4912, "step": 12479 }, { "epoch": 0.342668863261944, "grad_norm": 0.3449806272983551, "learning_rate": 1.8604516829889944e-05, "loss": 0.5345, "step": 12480 }, { "epoch": 0.3426963207029105, "grad_norm": 0.37548309564590454, "learning_rate": 1.8604296758334804e-05, "loss": 0.5328, "step": 12481 }, { "epoch": 0.342723778143877, "grad_norm": 0.36883246898651123, "learning_rate": 1.8604076670729905e-05, "loss": 0.5689, "step": 12482 }, { "epoch": 0.3427512355848435, "grad_norm": 0.35661500692367554, "learning_rate": 1.860385656707566e-05, "loss": 0.4678, "step": 12483 }, { "epoch": 0.34277869302581, "grad_norm": 0.3643106520175934, "learning_rate": 1.860363644737247e-05, "loss": 0.5079, "step": 12484 }, { "epoch": 0.3428061504667765, "grad_norm": 0.3650033473968506, "learning_rate": 1.860341631162075e-05, "loss": 0.5285, "step": 12485 }, { "epoch": 0.342833607907743, "grad_norm": 0.3510039448738098, "learning_rate": 1.860319615982092e-05, "loss": 0.5098, "step": 12486 }, { "epoch": 0.3428610653487095, "grad_norm": 0.36367693543434143, "learning_rate": 1.8602975991973383e-05, "loss": 0.5392, "step": 12487 }, { "epoch": 0.342888522789676, "grad_norm": 0.3612433969974518, "learning_rate": 1.8602755808078547e-05, "loss": 0.555, "step": 12488 }, { "epoch": 0.3429159802306425, "grad_norm": 0.3998146653175354, "learning_rate": 1.8602535608136828e-05, "loss": 0.5215, "step": 12489 }, { "epoch": 0.342943437671609, "grad_norm": 0.35060691833496094, "learning_rate": 1.8602315392148632e-05, "loss": 0.5331, "step": 12490 }, { "epoch": 0.3429708951125755, "grad_norm": 0.329326868057251, "learning_rate": 1.8602095160114373e-05, "loss": 0.453, "step": 12491 }, { "epoch": 0.342998352553542, "grad_norm": 0.351910799741745, "learning_rate": 1.860187491203446e-05, "loss": 0.4895, "step": 12492 }, { "epoch": 0.3430258099945085, "grad_norm": 0.36010316014289856, "learning_rate": 1.8601654647909307e-05, "loss": 0.4606, "step": 12493 }, { "epoch": 0.34305326743547504, "grad_norm": 2.198714256286621, "learning_rate": 1.860143436773932e-05, "loss": 0.6337, "step": 12494 }, { "epoch": 0.34308072487644153, "grad_norm": 1.2635620832443237, "learning_rate": 1.8601214071524918e-05, "loss": 0.5827, "step": 12495 }, { "epoch": 0.34310818231740803, "grad_norm": 0.43448880314826965, "learning_rate": 1.8600993759266505e-05, "loss": 0.4854, "step": 12496 }, { "epoch": 0.3431356397583745, "grad_norm": 0.36445188522338867, "learning_rate": 1.8600773430964488e-05, "loss": 0.5326, "step": 12497 }, { "epoch": 0.343163097199341, "grad_norm": 0.3614044785499573, "learning_rate": 1.8600553086619288e-05, "loss": 0.481, "step": 12498 }, { "epoch": 0.3431905546403075, "grad_norm": 0.37353554368019104, "learning_rate": 1.860033272623131e-05, "loss": 0.5469, "step": 12499 }, { "epoch": 0.343218012081274, "grad_norm": 0.3660149574279785, "learning_rate": 1.860011234980097e-05, "loss": 0.5934, "step": 12500 }, { "epoch": 0.34324546952224055, "grad_norm": 0.3388027250766754, "learning_rate": 1.859989195732867e-05, "loss": 0.5028, "step": 12501 }, { "epoch": 0.34327292696320705, "grad_norm": 0.37738460302352905, "learning_rate": 1.859967154881483e-05, "loss": 0.5669, "step": 12502 }, { "epoch": 0.34330038440417354, "grad_norm": 0.38425740599632263, "learning_rate": 1.859945112425986e-05, "loss": 0.6531, "step": 12503 }, { "epoch": 0.34332784184514004, "grad_norm": 0.3569954037666321, "learning_rate": 1.8599230683664163e-05, "loss": 0.5528, "step": 12504 }, { "epoch": 0.34335529928610653, "grad_norm": 0.3778884708881378, "learning_rate": 1.859901022702816e-05, "loss": 0.5437, "step": 12505 }, { "epoch": 0.343382756727073, "grad_norm": 0.3584970533847809, "learning_rate": 1.859878975435226e-05, "loss": 0.4754, "step": 12506 }, { "epoch": 0.3434102141680395, "grad_norm": 0.34502074122428894, "learning_rate": 1.859856926563687e-05, "loss": 0.5165, "step": 12507 }, { "epoch": 0.34343767160900607, "grad_norm": 0.44411519169807434, "learning_rate": 1.8598348760882404e-05, "loss": 0.5446, "step": 12508 }, { "epoch": 0.34346512904997256, "grad_norm": 0.3735094964504242, "learning_rate": 1.8598128240089277e-05, "loss": 0.5584, "step": 12509 }, { "epoch": 0.34349258649093906, "grad_norm": 0.38954228162765503, "learning_rate": 1.8597907703257893e-05, "loss": 0.5498, "step": 12510 }, { "epoch": 0.34352004393190555, "grad_norm": 0.3487035036087036, "learning_rate": 1.8597687150388668e-05, "loss": 0.5664, "step": 12511 }, { "epoch": 0.34354750137287204, "grad_norm": 0.39853784441947937, "learning_rate": 1.8597466581482013e-05, "loss": 0.6092, "step": 12512 }, { "epoch": 0.34357495881383854, "grad_norm": 0.4248814582824707, "learning_rate": 1.8597245996538337e-05, "loss": 0.5115, "step": 12513 }, { "epoch": 0.34360241625480503, "grad_norm": 0.3748270869255066, "learning_rate": 1.8597025395558054e-05, "loss": 0.4932, "step": 12514 }, { "epoch": 0.3436298736957716, "grad_norm": 0.37733325362205505, "learning_rate": 1.8596804778541574e-05, "loss": 0.5763, "step": 12515 }, { "epoch": 0.3436573311367381, "grad_norm": 0.4772658348083496, "learning_rate": 1.859658414548931e-05, "loss": 0.5957, "step": 12516 }, { "epoch": 0.34368478857770457, "grad_norm": 0.3823661804199219, "learning_rate": 1.8596363496401676e-05, "loss": 0.5669, "step": 12517 }, { "epoch": 0.34371224601867106, "grad_norm": 0.3263639211654663, "learning_rate": 1.8596142831279074e-05, "loss": 0.4735, "step": 12518 }, { "epoch": 0.34373970345963756, "grad_norm": 0.3318633437156677, "learning_rate": 1.859592215012193e-05, "loss": 0.3987, "step": 12519 }, { "epoch": 0.34376716090060405, "grad_norm": 0.3850191533565521, "learning_rate": 1.859570145293064e-05, "loss": 0.5533, "step": 12520 }, { "epoch": 0.34379461834157055, "grad_norm": 0.41838932037353516, "learning_rate": 1.859548073970563e-05, "loss": 0.5673, "step": 12521 }, { "epoch": 0.34382207578253704, "grad_norm": 0.38155174255371094, "learning_rate": 1.85952600104473e-05, "loss": 0.5472, "step": 12522 }, { "epoch": 0.3438495332235036, "grad_norm": 0.36532118916511536, "learning_rate": 1.859503926515607e-05, "loss": 0.6311, "step": 12523 }, { "epoch": 0.3438769906644701, "grad_norm": 1.0349308252334595, "learning_rate": 1.8594818503832347e-05, "loss": 0.4433, "step": 12524 }, { "epoch": 0.3439044481054366, "grad_norm": 0.3971216678619385, "learning_rate": 1.8594597726476542e-05, "loss": 0.5239, "step": 12525 }, { "epoch": 0.34393190554640307, "grad_norm": 0.3294568359851837, "learning_rate": 1.8594376933089073e-05, "loss": 0.5352, "step": 12526 }, { "epoch": 0.34395936298736957, "grad_norm": 0.3669932186603546, "learning_rate": 1.859415612367035e-05, "loss": 0.538, "step": 12527 }, { "epoch": 0.34398682042833606, "grad_norm": 0.34438735246658325, "learning_rate": 1.859393529822078e-05, "loss": 0.5185, "step": 12528 }, { "epoch": 0.34401427786930255, "grad_norm": 0.3622181713581085, "learning_rate": 1.859371445674077e-05, "loss": 0.499, "step": 12529 }, { "epoch": 0.3440417353102691, "grad_norm": 0.37381789088249207, "learning_rate": 1.859349359923075e-05, "loss": 0.5719, "step": 12530 }, { "epoch": 0.3440691927512356, "grad_norm": 0.35320615768432617, "learning_rate": 1.859327272569112e-05, "loss": 0.5209, "step": 12531 }, { "epoch": 0.3440966501922021, "grad_norm": 0.4062102138996124, "learning_rate": 1.859305183612229e-05, "loss": 0.4452, "step": 12532 }, { "epoch": 0.3441241076331686, "grad_norm": 0.38576367497444153, "learning_rate": 1.859283093052468e-05, "loss": 0.561, "step": 12533 }, { "epoch": 0.3441515650741351, "grad_norm": 0.37779340147972107, "learning_rate": 1.8592610008898697e-05, "loss": 0.5168, "step": 12534 }, { "epoch": 0.3441790225151016, "grad_norm": 0.3470847010612488, "learning_rate": 1.8592389071244753e-05, "loss": 0.4421, "step": 12535 }, { "epoch": 0.34420647995606807, "grad_norm": 0.3575572967529297, "learning_rate": 1.8592168117563264e-05, "loss": 0.5279, "step": 12536 }, { "epoch": 0.3442339373970346, "grad_norm": 0.3466982841491699, "learning_rate": 1.8591947147854635e-05, "loss": 0.4521, "step": 12537 }, { "epoch": 0.3442613948380011, "grad_norm": 0.3703071177005768, "learning_rate": 1.8591726162119285e-05, "loss": 0.4548, "step": 12538 }, { "epoch": 0.3442888522789676, "grad_norm": 0.35229015350341797, "learning_rate": 1.8591505160357618e-05, "loss": 0.5301, "step": 12539 }, { "epoch": 0.3443163097199341, "grad_norm": 0.3755365312099457, "learning_rate": 1.8591284142570057e-05, "loss": 0.5182, "step": 12540 }, { "epoch": 0.3443437671609006, "grad_norm": 0.347196102142334, "learning_rate": 1.859106310875701e-05, "loss": 0.4578, "step": 12541 }, { "epoch": 0.3443712246018671, "grad_norm": 0.4409518241882324, "learning_rate": 1.8590842058918883e-05, "loss": 0.4652, "step": 12542 }, { "epoch": 0.3443986820428336, "grad_norm": 0.45305538177490234, "learning_rate": 1.85906209930561e-05, "loss": 0.5599, "step": 12543 }, { "epoch": 0.34442613948380013, "grad_norm": 0.41491568088531494, "learning_rate": 1.859039991116906e-05, "loss": 0.5216, "step": 12544 }, { "epoch": 0.3444535969247666, "grad_norm": 0.36472949385643005, "learning_rate": 1.859017881325819e-05, "loss": 0.5467, "step": 12545 }, { "epoch": 0.3444810543657331, "grad_norm": 0.44581353664398193, "learning_rate": 1.858995769932389e-05, "loss": 0.5782, "step": 12546 }, { "epoch": 0.3445085118066996, "grad_norm": 0.43733033537864685, "learning_rate": 1.858973656936658e-05, "loss": 0.5851, "step": 12547 }, { "epoch": 0.3445359692476661, "grad_norm": 0.36269429326057434, "learning_rate": 1.8589515423386667e-05, "loss": 0.5391, "step": 12548 }, { "epoch": 0.3445634266886326, "grad_norm": 0.35754284262657166, "learning_rate": 1.8589294261384568e-05, "loss": 0.5828, "step": 12549 }, { "epoch": 0.3445908841295991, "grad_norm": 0.4186266362667084, "learning_rate": 1.858907308336069e-05, "loss": 0.5528, "step": 12550 }, { "epoch": 0.34461834157056564, "grad_norm": 0.36250320076942444, "learning_rate": 1.8588851889315457e-05, "loss": 0.5713, "step": 12551 }, { "epoch": 0.34464579901153214, "grad_norm": 0.32410547137260437, "learning_rate": 1.8588630679249265e-05, "loss": 0.5768, "step": 12552 }, { "epoch": 0.34467325645249863, "grad_norm": 0.3781469166278839, "learning_rate": 1.8588409453162543e-05, "loss": 0.5458, "step": 12553 }, { "epoch": 0.3447007138934651, "grad_norm": 0.36061641573905945, "learning_rate": 1.8588188211055694e-05, "loss": 0.4982, "step": 12554 }, { "epoch": 0.3447281713344316, "grad_norm": 0.33096417784690857, "learning_rate": 1.8587966952929134e-05, "loss": 0.5317, "step": 12555 }, { "epoch": 0.3447556287753981, "grad_norm": 0.3772202134132385, "learning_rate": 1.858774567878327e-05, "loss": 0.4823, "step": 12556 }, { "epoch": 0.3447830862163646, "grad_norm": 0.35657283663749695, "learning_rate": 1.8587524388618523e-05, "loss": 0.5557, "step": 12557 }, { "epoch": 0.34481054365733116, "grad_norm": 0.3761727809906006, "learning_rate": 1.8587303082435305e-05, "loss": 0.4941, "step": 12558 }, { "epoch": 0.34483800109829765, "grad_norm": 0.45941758155822754, "learning_rate": 1.8587081760234024e-05, "loss": 0.4645, "step": 12559 }, { "epoch": 0.34486545853926415, "grad_norm": 0.413360059261322, "learning_rate": 1.858686042201509e-05, "loss": 0.5752, "step": 12560 }, { "epoch": 0.34489291598023064, "grad_norm": 0.3964988589286804, "learning_rate": 1.8586639067778925e-05, "loss": 0.5321, "step": 12561 }, { "epoch": 0.34492037342119714, "grad_norm": 0.3585057556629181, "learning_rate": 1.858641769752594e-05, "loss": 0.5144, "step": 12562 }, { "epoch": 0.34494783086216363, "grad_norm": 0.3535102307796478, "learning_rate": 1.858619631125654e-05, "loss": 0.5496, "step": 12563 }, { "epoch": 0.3449752883031301, "grad_norm": 0.3658895194530487, "learning_rate": 1.8585974908971147e-05, "loss": 0.504, "step": 12564 }, { "epoch": 0.3450027457440967, "grad_norm": 0.35339754819869995, "learning_rate": 1.858575349067017e-05, "loss": 0.4541, "step": 12565 }, { "epoch": 0.34503020318506317, "grad_norm": 0.2955337464809418, "learning_rate": 1.858553205635402e-05, "loss": 0.3666, "step": 12566 }, { "epoch": 0.34505766062602966, "grad_norm": 0.36888229846954346, "learning_rate": 1.8585310606023116e-05, "loss": 0.5204, "step": 12567 }, { "epoch": 0.34508511806699615, "grad_norm": 0.33436739444732666, "learning_rate": 1.8585089139677867e-05, "loss": 0.5474, "step": 12568 }, { "epoch": 0.34511257550796265, "grad_norm": 0.4198274612426758, "learning_rate": 1.8584867657318684e-05, "loss": 0.5903, "step": 12569 }, { "epoch": 0.34514003294892914, "grad_norm": 0.3595098853111267, "learning_rate": 1.8584646158945984e-05, "loss": 0.5077, "step": 12570 }, { "epoch": 0.34516749038989564, "grad_norm": 0.3708937168121338, "learning_rate": 1.858442464456018e-05, "loss": 0.5575, "step": 12571 }, { "epoch": 0.3451949478308622, "grad_norm": 0.4717086851596832, "learning_rate": 1.8584203114161687e-05, "loss": 0.5941, "step": 12572 }, { "epoch": 0.3452224052718287, "grad_norm": 0.4951496720314026, "learning_rate": 1.858398156775091e-05, "loss": 0.5318, "step": 12573 }, { "epoch": 0.3452498627127952, "grad_norm": 0.3295329213142395, "learning_rate": 1.858376000532827e-05, "loss": 0.4509, "step": 12574 }, { "epoch": 0.34527732015376167, "grad_norm": 0.3636619746685028, "learning_rate": 1.8583538426894177e-05, "loss": 0.4703, "step": 12575 }, { "epoch": 0.34530477759472816, "grad_norm": 0.36063358187675476, "learning_rate": 1.8583316832449048e-05, "loss": 0.5193, "step": 12576 }, { "epoch": 0.34533223503569466, "grad_norm": 0.47096073627471924, "learning_rate": 1.858309522199329e-05, "loss": 0.5805, "step": 12577 }, { "epoch": 0.34535969247666115, "grad_norm": 0.34318244457244873, "learning_rate": 1.8582873595527324e-05, "loss": 0.5531, "step": 12578 }, { "epoch": 0.3453871499176277, "grad_norm": 0.3895787000656128, "learning_rate": 1.8582651953051555e-05, "loss": 0.5509, "step": 12579 }, { "epoch": 0.3454146073585942, "grad_norm": 0.36655959486961365, "learning_rate": 1.8582430294566406e-05, "loss": 0.5143, "step": 12580 }, { "epoch": 0.3454420647995607, "grad_norm": 0.4519481658935547, "learning_rate": 1.8582208620072283e-05, "loss": 0.5571, "step": 12581 }, { "epoch": 0.3454695222405272, "grad_norm": 0.37004977464675903, "learning_rate": 1.85819869295696e-05, "loss": 0.5283, "step": 12582 }, { "epoch": 0.3454969796814937, "grad_norm": 0.34681248664855957, "learning_rate": 1.8581765223058774e-05, "loss": 0.541, "step": 12583 }, { "epoch": 0.34552443712246017, "grad_norm": 0.3371754288673401, "learning_rate": 1.8581543500540214e-05, "loss": 0.4932, "step": 12584 }, { "epoch": 0.34555189456342666, "grad_norm": 0.44022807478904724, "learning_rate": 1.858132176201434e-05, "loss": 0.5804, "step": 12585 }, { "epoch": 0.3455793520043932, "grad_norm": 0.4288005530834198, "learning_rate": 1.858110000748156e-05, "loss": 0.6112, "step": 12586 }, { "epoch": 0.3456068094453597, "grad_norm": 0.3526349365711212, "learning_rate": 1.858087823694229e-05, "loss": 0.5009, "step": 12587 }, { "epoch": 0.3456342668863262, "grad_norm": 0.4124602973461151, "learning_rate": 1.8580656450396945e-05, "loss": 0.4929, "step": 12588 }, { "epoch": 0.3456617243272927, "grad_norm": 0.38749369978904724, "learning_rate": 1.858043464784594e-05, "loss": 0.5433, "step": 12589 }, { "epoch": 0.3456891817682592, "grad_norm": 0.41337698698043823, "learning_rate": 1.8580212829289682e-05, "loss": 0.516, "step": 12590 }, { "epoch": 0.3457166392092257, "grad_norm": 0.4181760549545288, "learning_rate": 1.8579990994728586e-05, "loss": 0.5273, "step": 12591 }, { "epoch": 0.3457440966501922, "grad_norm": 0.4541707932949066, "learning_rate": 1.8579769144163073e-05, "loss": 0.5728, "step": 12592 }, { "epoch": 0.34577155409115873, "grad_norm": 0.3735782504081726, "learning_rate": 1.857954727759355e-05, "loss": 0.5792, "step": 12593 }, { "epoch": 0.3457990115321252, "grad_norm": 0.43929624557495117, "learning_rate": 1.8579325395020434e-05, "loss": 0.5598, "step": 12594 }, { "epoch": 0.3458264689730917, "grad_norm": 0.3621836006641388, "learning_rate": 1.8579103496444135e-05, "loss": 0.4317, "step": 12595 }, { "epoch": 0.3458539264140582, "grad_norm": 0.3541490435600281, "learning_rate": 1.8578881581865072e-05, "loss": 0.4959, "step": 12596 }, { "epoch": 0.3458813838550247, "grad_norm": 0.37178272008895874, "learning_rate": 1.8578659651283657e-05, "loss": 0.5221, "step": 12597 }, { "epoch": 0.3459088412959912, "grad_norm": 0.3842722773551941, "learning_rate": 1.8578437704700305e-05, "loss": 0.6163, "step": 12598 }, { "epoch": 0.3459362987369577, "grad_norm": 0.40147021412849426, "learning_rate": 1.8578215742115427e-05, "loss": 0.5713, "step": 12599 }, { "epoch": 0.34596375617792424, "grad_norm": 0.360003799200058, "learning_rate": 1.857799376352944e-05, "loss": 0.4893, "step": 12600 }, { "epoch": 0.34599121361889074, "grad_norm": 0.3810230493545532, "learning_rate": 1.8577771768942755e-05, "loss": 0.5357, "step": 12601 }, { "epoch": 0.34601867105985723, "grad_norm": 0.7750182151794434, "learning_rate": 1.857754975835579e-05, "loss": 0.4874, "step": 12602 }, { "epoch": 0.3460461285008237, "grad_norm": 0.3628039062023163, "learning_rate": 1.8577327731768954e-05, "loss": 0.472, "step": 12603 }, { "epoch": 0.3460735859417902, "grad_norm": 0.3613610863685608, "learning_rate": 1.8577105689182668e-05, "loss": 0.5248, "step": 12604 }, { "epoch": 0.3461010433827567, "grad_norm": 0.386263906955719, "learning_rate": 1.857688363059734e-05, "loss": 0.5373, "step": 12605 }, { "epoch": 0.3461285008237232, "grad_norm": 7.524552822113037, "learning_rate": 1.8576661556013387e-05, "loss": 0.839, "step": 12606 }, { "epoch": 0.34615595826468976, "grad_norm": 0.4306505620479584, "learning_rate": 1.8576439465431225e-05, "loss": 0.5836, "step": 12607 }, { "epoch": 0.34618341570565625, "grad_norm": 0.32971298694610596, "learning_rate": 1.8576217358851264e-05, "loss": 0.429, "step": 12608 }, { "epoch": 0.34621087314662274, "grad_norm": 0.3432246446609497, "learning_rate": 1.8575995236273918e-05, "loss": 0.4706, "step": 12609 }, { "epoch": 0.34623833058758924, "grad_norm": 0.3342866003513336, "learning_rate": 1.8575773097699608e-05, "loss": 0.4977, "step": 12610 }, { "epoch": 0.34626578802855573, "grad_norm": 0.35850274562835693, "learning_rate": 1.8575550943128742e-05, "loss": 0.5337, "step": 12611 }, { "epoch": 0.3462932454695222, "grad_norm": 0.37367355823516846, "learning_rate": 1.8575328772561737e-05, "loss": 0.4298, "step": 12612 }, { "epoch": 0.3463207029104887, "grad_norm": 0.4273996353149414, "learning_rate": 1.8575106585999007e-05, "loss": 0.5516, "step": 12613 }, { "epoch": 0.34634816035145527, "grad_norm": 0.43801572918891907, "learning_rate": 1.8574884383440967e-05, "loss": 0.5033, "step": 12614 }, { "epoch": 0.34637561779242176, "grad_norm": 0.4426799416542053, "learning_rate": 1.857466216488803e-05, "loss": 0.4923, "step": 12615 }, { "epoch": 0.34640307523338826, "grad_norm": 0.34461185336112976, "learning_rate": 1.8574439930340612e-05, "loss": 0.5115, "step": 12616 }, { "epoch": 0.34643053267435475, "grad_norm": 0.36092299222946167, "learning_rate": 1.857421767979913e-05, "loss": 0.5669, "step": 12617 }, { "epoch": 0.34645799011532125, "grad_norm": 0.4030083417892456, "learning_rate": 1.857399541326399e-05, "loss": 0.5501, "step": 12618 }, { "epoch": 0.34648544755628774, "grad_norm": 0.3334687352180481, "learning_rate": 1.857377313073561e-05, "loss": 0.4838, "step": 12619 }, { "epoch": 0.34651290499725423, "grad_norm": 0.3064964711666107, "learning_rate": 1.8573550832214412e-05, "loss": 0.3982, "step": 12620 }, { "epoch": 0.3465403624382208, "grad_norm": 0.3856368958950043, "learning_rate": 1.8573328517700805e-05, "loss": 0.5618, "step": 12621 }, { "epoch": 0.3465678198791873, "grad_norm": 0.34063056111335754, "learning_rate": 1.8573106187195203e-05, "loss": 0.5213, "step": 12622 }, { "epoch": 0.34659527732015377, "grad_norm": 0.3651998043060303, "learning_rate": 1.8572883840698018e-05, "loss": 0.5476, "step": 12623 }, { "epoch": 0.34662273476112027, "grad_norm": 0.3949795663356781, "learning_rate": 1.8572661478209675e-05, "loss": 0.5731, "step": 12624 }, { "epoch": 0.34665019220208676, "grad_norm": 0.44128307700157166, "learning_rate": 1.857243909973058e-05, "loss": 0.4941, "step": 12625 }, { "epoch": 0.34667764964305325, "grad_norm": 0.5818378925323486, "learning_rate": 1.857221670526115e-05, "loss": 0.6558, "step": 12626 }, { "epoch": 0.34670510708401975, "grad_norm": 0.36324968934059143, "learning_rate": 1.8571994294801796e-05, "loss": 0.5412, "step": 12627 }, { "epoch": 0.3467325645249863, "grad_norm": 0.3635287582874298, "learning_rate": 1.857177186835294e-05, "loss": 0.5338, "step": 12628 }, { "epoch": 0.3467600219659528, "grad_norm": 0.37550461292266846, "learning_rate": 1.8571549425914997e-05, "loss": 0.4425, "step": 12629 }, { "epoch": 0.3467874794069193, "grad_norm": 0.3531869351863861, "learning_rate": 1.8571326967488375e-05, "loss": 0.5721, "step": 12630 }, { "epoch": 0.3468149368478858, "grad_norm": 0.468712717294693, "learning_rate": 1.8571104493073495e-05, "loss": 0.5234, "step": 12631 }, { "epoch": 0.3468423942888523, "grad_norm": 0.3820745348930359, "learning_rate": 1.8570882002670765e-05, "loss": 0.55, "step": 12632 }, { "epoch": 0.34686985172981877, "grad_norm": 0.41026949882507324, "learning_rate": 1.8570659496280608e-05, "loss": 0.5244, "step": 12633 }, { "epoch": 0.34689730917078526, "grad_norm": 0.32711026072502136, "learning_rate": 1.8570436973903435e-05, "loss": 0.4778, "step": 12634 }, { "epoch": 0.3469247666117518, "grad_norm": 0.34249842166900635, "learning_rate": 1.857021443553966e-05, "loss": 0.5649, "step": 12635 }, { "epoch": 0.3469522240527183, "grad_norm": 0.3711831867694855, "learning_rate": 1.85699918811897e-05, "loss": 0.5599, "step": 12636 }, { "epoch": 0.3469796814936848, "grad_norm": 0.35696038603782654, "learning_rate": 1.856976931085397e-05, "loss": 0.524, "step": 12637 }, { "epoch": 0.3470071389346513, "grad_norm": 0.4327448606491089, "learning_rate": 1.856954672453289e-05, "loss": 0.5927, "step": 12638 }, { "epoch": 0.3470345963756178, "grad_norm": 0.3969862759113312, "learning_rate": 1.8569324122226865e-05, "loss": 0.5845, "step": 12639 }, { "epoch": 0.3470620538165843, "grad_norm": 0.3609474003314972, "learning_rate": 1.8569101503936317e-05, "loss": 0.5648, "step": 12640 }, { "epoch": 0.3470895112575508, "grad_norm": 0.378280371427536, "learning_rate": 1.856887886966166e-05, "loss": 0.5533, "step": 12641 }, { "epoch": 0.3471169686985173, "grad_norm": 0.33960744738578796, "learning_rate": 1.8568656219403306e-05, "loss": 0.5316, "step": 12642 }, { "epoch": 0.3471444261394838, "grad_norm": 0.438093364238739, "learning_rate": 1.8568433553161677e-05, "loss": 0.5649, "step": 12643 }, { "epoch": 0.3471718835804503, "grad_norm": 0.3610592186450958, "learning_rate": 1.8568210870937183e-05, "loss": 0.5684, "step": 12644 }, { "epoch": 0.3471993410214168, "grad_norm": 0.41504061222076416, "learning_rate": 1.856798817273024e-05, "loss": 0.5887, "step": 12645 }, { "epoch": 0.3472267984623833, "grad_norm": 0.353227436542511, "learning_rate": 1.856776545854127e-05, "loss": 0.5031, "step": 12646 }, { "epoch": 0.3472542559033498, "grad_norm": 0.3714257478713989, "learning_rate": 1.8567542728370678e-05, "loss": 0.5387, "step": 12647 }, { "epoch": 0.3472817133443163, "grad_norm": 0.3157273232936859, "learning_rate": 1.8567319982218887e-05, "loss": 0.4931, "step": 12648 }, { "epoch": 0.34730917078528284, "grad_norm": 0.3749190866947174, "learning_rate": 1.8567097220086308e-05, "loss": 0.5485, "step": 12649 }, { "epoch": 0.34733662822624933, "grad_norm": 0.377169668674469, "learning_rate": 1.8566874441973358e-05, "loss": 0.47, "step": 12650 }, { "epoch": 0.3473640856672158, "grad_norm": 0.3514383137226105, "learning_rate": 1.8566651647880454e-05, "loss": 0.5512, "step": 12651 }, { "epoch": 0.3473915431081823, "grad_norm": 0.3752650022506714, "learning_rate": 1.8566428837808012e-05, "loss": 0.4709, "step": 12652 }, { "epoch": 0.3474190005491488, "grad_norm": 0.46758154034614563, "learning_rate": 1.8566206011756443e-05, "loss": 0.66, "step": 12653 }, { "epoch": 0.3474464579901153, "grad_norm": 0.3444725275039673, "learning_rate": 1.8565983169726166e-05, "loss": 0.4485, "step": 12654 }, { "epoch": 0.3474739154310818, "grad_norm": 0.39415305852890015, "learning_rate": 1.85657603117176e-05, "loss": 0.5179, "step": 12655 }, { "epoch": 0.3475013728720483, "grad_norm": 0.37960362434387207, "learning_rate": 1.8565537437731152e-05, "loss": 0.4972, "step": 12656 }, { "epoch": 0.34752883031301485, "grad_norm": 0.7715675830841064, "learning_rate": 1.8565314547767247e-05, "loss": 0.5379, "step": 12657 }, { "epoch": 0.34755628775398134, "grad_norm": 0.42669180035591125, "learning_rate": 1.8565091641826295e-05, "loss": 0.4943, "step": 12658 }, { "epoch": 0.34758374519494784, "grad_norm": 0.3571719229221344, "learning_rate": 1.8564868719908713e-05, "loss": 0.5319, "step": 12659 }, { "epoch": 0.34761120263591433, "grad_norm": 0.4460781216621399, "learning_rate": 1.856464578201492e-05, "loss": 0.476, "step": 12660 }, { "epoch": 0.3476386600768808, "grad_norm": 0.3776991665363312, "learning_rate": 1.8564422828145327e-05, "loss": 0.5341, "step": 12661 }, { "epoch": 0.3476661175178473, "grad_norm": 0.3844689428806305, "learning_rate": 1.856419985830035e-05, "loss": 0.5207, "step": 12662 }, { "epoch": 0.3476935749588138, "grad_norm": 0.457171767950058, "learning_rate": 1.856397687248041e-05, "loss": 0.5157, "step": 12663 }, { "epoch": 0.34772103239978036, "grad_norm": 0.5097456574440002, "learning_rate": 1.856375387068592e-05, "loss": 0.6521, "step": 12664 }, { "epoch": 0.34774848984074685, "grad_norm": 0.4551370441913605, "learning_rate": 1.8563530852917293e-05, "loss": 0.5223, "step": 12665 }, { "epoch": 0.34777594728171335, "grad_norm": 0.4334767758846283, "learning_rate": 1.856330781917495e-05, "loss": 0.5329, "step": 12666 }, { "epoch": 0.34780340472267984, "grad_norm": 0.39035770297050476, "learning_rate": 1.8563084769459303e-05, "loss": 0.6251, "step": 12667 }, { "epoch": 0.34783086216364634, "grad_norm": 0.3291419744491577, "learning_rate": 1.856286170377077e-05, "loss": 0.532, "step": 12668 }, { "epoch": 0.34785831960461283, "grad_norm": 0.4791546165943146, "learning_rate": 1.856263862210977e-05, "loss": 0.6054, "step": 12669 }, { "epoch": 0.3478857770455793, "grad_norm": 0.419318825006485, "learning_rate": 1.8562415524476714e-05, "loss": 0.5956, "step": 12670 }, { "epoch": 0.3479132344865459, "grad_norm": 0.3686564564704895, "learning_rate": 1.856219241087202e-05, "loss": 0.5119, "step": 12671 }, { "epoch": 0.34794069192751237, "grad_norm": 0.4044903516769409, "learning_rate": 1.8561969281296103e-05, "loss": 0.587, "step": 12672 }, { "epoch": 0.34796814936847886, "grad_norm": 0.35701268911361694, "learning_rate": 1.8561746135749384e-05, "loss": 0.5276, "step": 12673 }, { "epoch": 0.34799560680944536, "grad_norm": 0.31550541520118713, "learning_rate": 1.8561522974232273e-05, "loss": 0.4819, "step": 12674 }, { "epoch": 0.34802306425041185, "grad_norm": 0.4033799171447754, "learning_rate": 1.8561299796745193e-05, "loss": 0.5118, "step": 12675 }, { "epoch": 0.34805052169137835, "grad_norm": 0.38989681005477905, "learning_rate": 1.8561076603288553e-05, "loss": 0.6018, "step": 12676 }, { "epoch": 0.34807797913234484, "grad_norm": 0.3164607584476471, "learning_rate": 1.8560853393862774e-05, "loss": 0.4325, "step": 12677 }, { "epoch": 0.3481054365733114, "grad_norm": 0.36645808815956116, "learning_rate": 1.856063016846827e-05, "loss": 0.655, "step": 12678 }, { "epoch": 0.3481328940142779, "grad_norm": 0.3638545274734497, "learning_rate": 1.8560406927105458e-05, "loss": 0.5429, "step": 12679 }, { "epoch": 0.3481603514552444, "grad_norm": 0.3753422796726227, "learning_rate": 1.8560183669774758e-05, "loss": 0.5253, "step": 12680 }, { "epoch": 0.34818780889621087, "grad_norm": 0.3696102797985077, "learning_rate": 1.855996039647658e-05, "loss": 0.557, "step": 12681 }, { "epoch": 0.34821526633717736, "grad_norm": 0.39345821738243103, "learning_rate": 1.8559737107211348e-05, "loss": 0.5393, "step": 12682 }, { "epoch": 0.34824272377814386, "grad_norm": 0.3660038113594055, "learning_rate": 1.855951380197947e-05, "loss": 0.5734, "step": 12683 }, { "epoch": 0.34827018121911035, "grad_norm": 0.3815183639526367, "learning_rate": 1.855929048078137e-05, "loss": 0.5138, "step": 12684 }, { "epoch": 0.3482976386600769, "grad_norm": 0.34636929631233215, "learning_rate": 1.8559067143617458e-05, "loss": 0.4594, "step": 12685 }, { "epoch": 0.3483250961010434, "grad_norm": 0.36037972569465637, "learning_rate": 1.855884379048816e-05, "loss": 0.5037, "step": 12686 }, { "epoch": 0.3483525535420099, "grad_norm": 0.3991076648235321, "learning_rate": 1.8558620421393877e-05, "loss": 0.5144, "step": 12687 }, { "epoch": 0.3483800109829764, "grad_norm": 0.3529602587223053, "learning_rate": 1.8558397036335044e-05, "loss": 0.5001, "step": 12688 }, { "epoch": 0.3484074684239429, "grad_norm": 0.48140037059783936, "learning_rate": 1.8558173635312065e-05, "loss": 0.5983, "step": 12689 }, { "epoch": 0.3484349258649094, "grad_norm": 0.432038813829422, "learning_rate": 1.855795021832536e-05, "loss": 0.6027, "step": 12690 }, { "epoch": 0.34846238330587587, "grad_norm": 0.3885188400745392, "learning_rate": 1.855772678537535e-05, "loss": 0.4654, "step": 12691 }, { "epoch": 0.3484898407468424, "grad_norm": 0.3622991144657135, "learning_rate": 1.8557503336462447e-05, "loss": 0.4782, "step": 12692 }, { "epoch": 0.3485172981878089, "grad_norm": 0.41469234228134155, "learning_rate": 1.855727987158707e-05, "loss": 0.5633, "step": 12693 }, { "epoch": 0.3485447556287754, "grad_norm": 0.3547157347202301, "learning_rate": 1.8557056390749633e-05, "loss": 0.5342, "step": 12694 }, { "epoch": 0.3485722130697419, "grad_norm": 0.37723395228385925, "learning_rate": 1.8556832893950555e-05, "loss": 0.492, "step": 12695 }, { "epoch": 0.3485996705107084, "grad_norm": 0.40916070342063904, "learning_rate": 1.855660938119025e-05, "loss": 0.5964, "step": 12696 }, { "epoch": 0.3486271279516749, "grad_norm": 0.3483370542526245, "learning_rate": 1.855638585246914e-05, "loss": 0.4685, "step": 12697 }, { "epoch": 0.3486545853926414, "grad_norm": 0.3677574396133423, "learning_rate": 1.8556162307787642e-05, "loss": 0.5245, "step": 12698 }, { "epoch": 0.34868204283360793, "grad_norm": 0.37959524989128113, "learning_rate": 1.8555938747146167e-05, "loss": 0.5292, "step": 12699 }, { "epoch": 0.3487095002745744, "grad_norm": 0.4152543246746063, "learning_rate": 1.8555715170545138e-05, "loss": 0.6112, "step": 12700 }, { "epoch": 0.3487369577155409, "grad_norm": 0.3743566572666168, "learning_rate": 1.8555491577984967e-05, "loss": 0.4732, "step": 12701 }, { "epoch": 0.3487644151565074, "grad_norm": 0.33464205265045166, "learning_rate": 1.8555267969466076e-05, "loss": 0.5436, "step": 12702 }, { "epoch": 0.3487918725974739, "grad_norm": 0.4057473838329315, "learning_rate": 1.8555044344988877e-05, "loss": 0.4758, "step": 12703 }, { "epoch": 0.3488193300384404, "grad_norm": 0.35516592860221863, "learning_rate": 1.8554820704553795e-05, "loss": 0.6022, "step": 12704 }, { "epoch": 0.3488467874794069, "grad_norm": 0.4343249499797821, "learning_rate": 1.8554597048161235e-05, "loss": 0.5497, "step": 12705 }, { "epoch": 0.34887424492037344, "grad_norm": 0.3886490762233734, "learning_rate": 1.8554373375811625e-05, "loss": 0.5605, "step": 12706 }, { "epoch": 0.34890170236133994, "grad_norm": 0.6382454037666321, "learning_rate": 1.8554149687505377e-05, "loss": 0.4582, "step": 12707 }, { "epoch": 0.34892915980230643, "grad_norm": 0.37029969692230225, "learning_rate": 1.8553925983242913e-05, "loss": 0.4814, "step": 12708 }, { "epoch": 0.3489566172432729, "grad_norm": 0.4169829487800598, "learning_rate": 1.8553702263024645e-05, "loss": 0.6322, "step": 12709 }, { "epoch": 0.3489840746842394, "grad_norm": 0.36576756834983826, "learning_rate": 1.855347852685099e-05, "loss": 0.5533, "step": 12710 }, { "epoch": 0.3490115321252059, "grad_norm": 0.35884466767311096, "learning_rate": 1.8553254774722373e-05, "loss": 0.5604, "step": 12711 }, { "epoch": 0.3490389895661724, "grad_norm": 0.3826451301574707, "learning_rate": 1.8553031006639198e-05, "loss": 0.4887, "step": 12712 }, { "epoch": 0.34906644700713896, "grad_norm": 0.33862385153770447, "learning_rate": 1.8552807222601897e-05, "loss": 0.4632, "step": 12713 }, { "epoch": 0.34909390444810545, "grad_norm": 0.37098926305770874, "learning_rate": 1.855258342261088e-05, "loss": 0.5172, "step": 12714 }, { "epoch": 0.34912136188907195, "grad_norm": 0.353633314371109, "learning_rate": 1.8552359606666562e-05, "loss": 0.485, "step": 12715 }, { "epoch": 0.34914881933003844, "grad_norm": 1.1264735460281372, "learning_rate": 1.8552135774769365e-05, "loss": 0.5854, "step": 12716 }, { "epoch": 0.34917627677100493, "grad_norm": 0.34871238470077515, "learning_rate": 1.8551911926919705e-05, "loss": 0.5267, "step": 12717 }, { "epoch": 0.34920373421197143, "grad_norm": 0.3877100944519043, "learning_rate": 1.8551688063118003e-05, "loss": 0.5429, "step": 12718 }, { "epoch": 0.3492311916529379, "grad_norm": 0.35262084007263184, "learning_rate": 1.8551464183364668e-05, "loss": 0.4494, "step": 12719 }, { "epoch": 0.34925864909390447, "grad_norm": 0.4484689235687256, "learning_rate": 1.855124028766013e-05, "loss": 0.5421, "step": 12720 }, { "epoch": 0.34928610653487097, "grad_norm": 0.379679799079895, "learning_rate": 1.8551016376004796e-05, "loss": 0.508, "step": 12721 }, { "epoch": 0.34931356397583746, "grad_norm": 0.43114110827445984, "learning_rate": 1.8550792448399083e-05, "loss": 0.5378, "step": 12722 }, { "epoch": 0.34934102141680395, "grad_norm": 0.35555553436279297, "learning_rate": 1.855056850484342e-05, "loss": 0.478, "step": 12723 }, { "epoch": 0.34936847885777045, "grad_norm": 0.39386579394340515, "learning_rate": 1.8550344545338214e-05, "loss": 0.501, "step": 12724 }, { "epoch": 0.34939593629873694, "grad_norm": 0.3838627338409424, "learning_rate": 1.8550120569883887e-05, "loss": 0.4911, "step": 12725 }, { "epoch": 0.34942339373970344, "grad_norm": 0.36489996314048767, "learning_rate": 1.8549896578480856e-05, "loss": 0.4717, "step": 12726 }, { "epoch": 0.34945085118067, "grad_norm": 0.3553563058376312, "learning_rate": 1.854967257112954e-05, "loss": 0.4237, "step": 12727 }, { "epoch": 0.3494783086216365, "grad_norm": 0.3648727238178253, "learning_rate": 1.8549448547830356e-05, "loss": 0.5015, "step": 12728 }, { "epoch": 0.349505766062603, "grad_norm": 0.4029054045677185, "learning_rate": 1.8549224508583722e-05, "loss": 0.5437, "step": 12729 }, { "epoch": 0.34953322350356947, "grad_norm": 0.3872370421886444, "learning_rate": 1.8549000453390053e-05, "loss": 0.5273, "step": 12730 }, { "epoch": 0.34956068094453596, "grad_norm": 0.4240622818470001, "learning_rate": 1.8548776382249772e-05, "loss": 0.4971, "step": 12731 }, { "epoch": 0.34958813838550246, "grad_norm": 0.3131033182144165, "learning_rate": 1.8548552295163294e-05, "loss": 0.4216, "step": 12732 }, { "epoch": 0.34961559582646895, "grad_norm": 0.34314465522766113, "learning_rate": 1.8548328192131037e-05, "loss": 0.5537, "step": 12733 }, { "epoch": 0.3496430532674355, "grad_norm": 0.35740411281585693, "learning_rate": 1.854810407315342e-05, "loss": 0.4894, "step": 12734 }, { "epoch": 0.349670510708402, "grad_norm": 0.3857709765434265, "learning_rate": 1.8547879938230862e-05, "loss": 0.5537, "step": 12735 }, { "epoch": 0.3496979681493685, "grad_norm": 0.35877251625061035, "learning_rate": 1.8547655787363778e-05, "loss": 0.5082, "step": 12736 }, { "epoch": 0.349725425590335, "grad_norm": 0.3999338746070862, "learning_rate": 1.8547431620552586e-05, "loss": 0.593, "step": 12737 }, { "epoch": 0.3497528830313015, "grad_norm": 0.352750688791275, "learning_rate": 1.8547207437797713e-05, "loss": 0.506, "step": 12738 }, { "epoch": 0.34978034047226797, "grad_norm": 0.3492289185523987, "learning_rate": 1.8546983239099563e-05, "loss": 0.5164, "step": 12739 }, { "epoch": 0.34980779791323446, "grad_norm": 0.3752346336841583, "learning_rate": 1.8546759024458565e-05, "loss": 0.4911, "step": 12740 }, { "epoch": 0.349835255354201, "grad_norm": 0.40652841329574585, "learning_rate": 1.8546534793875134e-05, "loss": 0.5714, "step": 12741 }, { "epoch": 0.3498627127951675, "grad_norm": 0.31728947162628174, "learning_rate": 1.8546310547349685e-05, "loss": 0.5137, "step": 12742 }, { "epoch": 0.349890170236134, "grad_norm": 0.3243911564350128, "learning_rate": 1.8546086284882642e-05, "loss": 0.5372, "step": 12743 }, { "epoch": 0.3499176276771005, "grad_norm": 0.33510416746139526, "learning_rate": 1.854586200647442e-05, "loss": 0.4561, "step": 12744 }, { "epoch": 0.349945085118067, "grad_norm": 0.3367617428302765, "learning_rate": 1.8545637712125436e-05, "loss": 0.4979, "step": 12745 }, { "epoch": 0.3499725425590335, "grad_norm": 0.34926047921180725, "learning_rate": 1.854541340183611e-05, "loss": 0.4727, "step": 12746 }, { "epoch": 0.35, "grad_norm": 0.4349781274795532, "learning_rate": 1.8545189075606865e-05, "loss": 0.5567, "step": 12747 }, { "epoch": 0.3500274574409665, "grad_norm": 0.335662305355072, "learning_rate": 1.8544964733438112e-05, "loss": 0.4645, "step": 12748 }, { "epoch": 0.350054914881933, "grad_norm": 0.6128681302070618, "learning_rate": 1.8544740375330272e-05, "loss": 0.4064, "step": 12749 }, { "epoch": 0.3500823723228995, "grad_norm": 0.4202558696269989, "learning_rate": 1.8544516001283762e-05, "loss": 0.5582, "step": 12750 }, { "epoch": 0.350109829763866, "grad_norm": 0.33661505579948425, "learning_rate": 1.854429161129901e-05, "loss": 0.5238, "step": 12751 }, { "epoch": 0.3501372872048325, "grad_norm": 0.32788875699043274, "learning_rate": 1.8544067205376417e-05, "loss": 0.4553, "step": 12752 }, { "epoch": 0.350164744645799, "grad_norm": 0.42758116126060486, "learning_rate": 1.8543842783516418e-05, "loss": 0.4639, "step": 12753 }, { "epoch": 0.3501922020867655, "grad_norm": 0.3572043180465698, "learning_rate": 1.8543618345719424e-05, "loss": 0.5102, "step": 12754 }, { "epoch": 0.35021965952773204, "grad_norm": 0.3937124013900757, "learning_rate": 1.8543393891985853e-05, "loss": 0.5021, "step": 12755 }, { "epoch": 0.35024711696869854, "grad_norm": 0.4226818084716797, "learning_rate": 1.854316942231613e-05, "loss": 0.458, "step": 12756 }, { "epoch": 0.35027457440966503, "grad_norm": 0.36984461545944214, "learning_rate": 1.854294493671067e-05, "loss": 0.507, "step": 12757 }, { "epoch": 0.3503020318506315, "grad_norm": 0.3662862479686737, "learning_rate": 1.8542720435169888e-05, "loss": 0.5671, "step": 12758 }, { "epoch": 0.350329489291598, "grad_norm": 0.4066954553127289, "learning_rate": 1.8542495917694205e-05, "loss": 0.5575, "step": 12759 }, { "epoch": 0.3503569467325645, "grad_norm": 0.3352821171283722, "learning_rate": 1.8542271384284043e-05, "loss": 0.5312, "step": 12760 }, { "epoch": 0.350384404173531, "grad_norm": 0.3678243160247803, "learning_rate": 1.8542046834939816e-05, "loss": 0.5047, "step": 12761 }, { "epoch": 0.35041186161449756, "grad_norm": 0.3693774342536926, "learning_rate": 1.8541822269661947e-05, "loss": 0.5954, "step": 12762 }, { "epoch": 0.35043931905546405, "grad_norm": 0.4238046407699585, "learning_rate": 1.8541597688450853e-05, "loss": 0.5149, "step": 12763 }, { "epoch": 0.35046677649643054, "grad_norm": 0.3897447884082794, "learning_rate": 1.8541373091306955e-05, "loss": 0.5516, "step": 12764 }, { "epoch": 0.35049423393739704, "grad_norm": 0.35969260334968567, "learning_rate": 1.8541148478230668e-05, "loss": 0.4635, "step": 12765 }, { "epoch": 0.35052169137836353, "grad_norm": 0.3547324538230896, "learning_rate": 1.8540923849222412e-05, "loss": 0.4954, "step": 12766 }, { "epoch": 0.35054914881933, "grad_norm": 0.3481164574623108, "learning_rate": 1.854069920428261e-05, "loss": 0.4254, "step": 12767 }, { "epoch": 0.3505766062602965, "grad_norm": 0.35063424706459045, "learning_rate": 1.8540474543411675e-05, "loss": 0.5396, "step": 12768 }, { "epoch": 0.35060406370126307, "grad_norm": 0.3863530158996582, "learning_rate": 1.8540249866610033e-05, "loss": 0.5965, "step": 12769 }, { "epoch": 0.35063152114222956, "grad_norm": 0.3833959102630615, "learning_rate": 1.8540025173878097e-05, "loss": 0.5034, "step": 12770 }, { "epoch": 0.35065897858319606, "grad_norm": 0.34697434306144714, "learning_rate": 1.8539800465216287e-05, "loss": 0.539, "step": 12771 }, { "epoch": 0.35068643602416255, "grad_norm": 0.3505365550518036, "learning_rate": 1.8539575740625026e-05, "loss": 0.4604, "step": 12772 }, { "epoch": 0.35071389346512905, "grad_norm": 0.3836917579174042, "learning_rate": 1.8539351000104727e-05, "loss": 0.5126, "step": 12773 }, { "epoch": 0.35074135090609554, "grad_norm": 0.35812002420425415, "learning_rate": 1.8539126243655815e-05, "loss": 0.5647, "step": 12774 }, { "epoch": 0.35076880834706203, "grad_norm": 0.5677432417869568, "learning_rate": 1.853890147127871e-05, "loss": 0.4971, "step": 12775 }, { "epoch": 0.3507962657880286, "grad_norm": 0.3886649012565613, "learning_rate": 1.8538676682973826e-05, "loss": 0.4616, "step": 12776 }, { "epoch": 0.3508237232289951, "grad_norm": 0.38589757680892944, "learning_rate": 1.8538451878741583e-05, "loss": 0.5971, "step": 12777 }, { "epoch": 0.35085118066996157, "grad_norm": 0.41209521889686584, "learning_rate": 1.8538227058582407e-05, "loss": 0.531, "step": 12778 }, { "epoch": 0.35087863811092806, "grad_norm": 0.315022349357605, "learning_rate": 1.8538002222496707e-05, "loss": 0.4432, "step": 12779 }, { "epoch": 0.35090609555189456, "grad_norm": 0.3819486200809479, "learning_rate": 1.853777737048491e-05, "loss": 0.5419, "step": 12780 }, { "epoch": 0.35093355299286105, "grad_norm": 0.5199286937713623, "learning_rate": 1.8537552502547435e-05, "loss": 0.5752, "step": 12781 }, { "epoch": 0.35096101043382755, "grad_norm": 0.35057711601257324, "learning_rate": 1.8537327618684696e-05, "loss": 0.5092, "step": 12782 }, { "epoch": 0.3509884678747941, "grad_norm": 0.4272479712963104, "learning_rate": 1.853710271889712e-05, "loss": 0.515, "step": 12783 }, { "epoch": 0.3510159253157606, "grad_norm": 0.3476881682872772, "learning_rate": 1.853687780318512e-05, "loss": 0.4821, "step": 12784 }, { "epoch": 0.3510433827567271, "grad_norm": 0.3694227933883667, "learning_rate": 1.853665287154912e-05, "loss": 0.5293, "step": 12785 }, { "epoch": 0.3510708401976936, "grad_norm": 0.40199264883995056, "learning_rate": 1.853642792398954e-05, "loss": 0.5242, "step": 12786 }, { "epoch": 0.3510982976386601, "grad_norm": 0.36939477920532227, "learning_rate": 1.8536202960506793e-05, "loss": 0.4786, "step": 12787 }, { "epoch": 0.35112575507962657, "grad_norm": 0.4210872948169708, "learning_rate": 1.8535977981101306e-05, "loss": 0.5389, "step": 12788 }, { "epoch": 0.35115321252059306, "grad_norm": 0.4220708906650543, "learning_rate": 1.8535752985773493e-05, "loss": 0.558, "step": 12789 }, { "epoch": 0.35118066996155956, "grad_norm": 0.39541953802108765, "learning_rate": 1.8535527974523775e-05, "loss": 0.5173, "step": 12790 }, { "epoch": 0.3512081274025261, "grad_norm": 0.4044537842273712, "learning_rate": 1.853530294735258e-05, "loss": 0.5359, "step": 12791 }, { "epoch": 0.3512355848434926, "grad_norm": 0.3896861970424652, "learning_rate": 1.8535077904260315e-05, "loss": 0.5585, "step": 12792 }, { "epoch": 0.3512630422844591, "grad_norm": 0.3692465126514435, "learning_rate": 1.8534852845247408e-05, "loss": 0.5377, "step": 12793 }, { "epoch": 0.3512904997254256, "grad_norm": 0.3557482957839966, "learning_rate": 1.8534627770314277e-05, "loss": 0.5238, "step": 12794 }, { "epoch": 0.3513179571663921, "grad_norm": 0.36346128582954407, "learning_rate": 1.8534402679461338e-05, "loss": 0.4571, "step": 12795 }, { "epoch": 0.3513454146073586, "grad_norm": 0.3818471431732178, "learning_rate": 1.8534177572689018e-05, "loss": 0.5025, "step": 12796 }, { "epoch": 0.35137287204832507, "grad_norm": 0.38022857904434204, "learning_rate": 1.853395244999773e-05, "loss": 0.5901, "step": 12797 }, { "epoch": 0.3514003294892916, "grad_norm": 0.4028988480567932, "learning_rate": 1.8533727311387898e-05, "loss": 0.6092, "step": 12798 }, { "epoch": 0.3514277869302581, "grad_norm": 0.43498778343200684, "learning_rate": 1.853350215685994e-05, "loss": 0.5886, "step": 12799 }, { "epoch": 0.3514552443712246, "grad_norm": 0.375965416431427, "learning_rate": 1.853327698641428e-05, "loss": 0.506, "step": 12800 }, { "epoch": 0.3514827018121911, "grad_norm": 0.34415990114212036, "learning_rate": 1.8533051800051333e-05, "loss": 0.495, "step": 12801 }, { "epoch": 0.3515101592531576, "grad_norm": 0.3847366273403168, "learning_rate": 1.853282659777152e-05, "loss": 0.516, "step": 12802 }, { "epoch": 0.3515376166941241, "grad_norm": 0.6048359870910645, "learning_rate": 1.8532601379575263e-05, "loss": 0.5826, "step": 12803 }, { "epoch": 0.3515650741350906, "grad_norm": 0.40873315930366516, "learning_rate": 1.853237614546298e-05, "loss": 0.5244, "step": 12804 }, { "epoch": 0.35159253157605713, "grad_norm": 0.4180256128311157, "learning_rate": 1.8532150895435095e-05, "loss": 0.4362, "step": 12805 }, { "epoch": 0.3516199890170236, "grad_norm": 0.347024530172348, "learning_rate": 1.8531925629492026e-05, "loss": 0.5762, "step": 12806 }, { "epoch": 0.3516474464579901, "grad_norm": 0.3762400150299072, "learning_rate": 1.853170034763419e-05, "loss": 0.5328, "step": 12807 }, { "epoch": 0.3516749038989566, "grad_norm": 0.3446353077888489, "learning_rate": 1.853147504986201e-05, "loss": 0.4289, "step": 12808 }, { "epoch": 0.3517023613399231, "grad_norm": 0.40376850962638855, "learning_rate": 1.8531249736175905e-05, "loss": 0.6095, "step": 12809 }, { "epoch": 0.3517298187808896, "grad_norm": 0.3772500455379486, "learning_rate": 1.8531024406576297e-05, "loss": 0.5399, "step": 12810 }, { "epoch": 0.3517572762218561, "grad_norm": 0.3339478671550751, "learning_rate": 1.8530799061063607e-05, "loss": 0.4992, "step": 12811 }, { "epoch": 0.35178473366282265, "grad_norm": 0.36901745200157166, "learning_rate": 1.853057369963825e-05, "loss": 0.4759, "step": 12812 }, { "epoch": 0.35181219110378914, "grad_norm": 0.3863079249858856, "learning_rate": 1.8530348322300657e-05, "loss": 0.4362, "step": 12813 }, { "epoch": 0.35183964854475563, "grad_norm": 0.3718009889125824, "learning_rate": 1.853012292905124e-05, "loss": 0.5582, "step": 12814 }, { "epoch": 0.35186710598572213, "grad_norm": 0.3525647521018982, "learning_rate": 1.8529897519890417e-05, "loss": 0.5195, "step": 12815 }, { "epoch": 0.3518945634266886, "grad_norm": 0.40134939551353455, "learning_rate": 1.852967209481862e-05, "loss": 0.549, "step": 12816 }, { "epoch": 0.3519220208676551, "grad_norm": 0.43578633666038513, "learning_rate": 1.8529446653836257e-05, "loss": 0.5981, "step": 12817 }, { "epoch": 0.3519494783086216, "grad_norm": 0.3593917191028595, "learning_rate": 1.8529221196943755e-05, "loss": 0.5465, "step": 12818 }, { "epoch": 0.35197693574958816, "grad_norm": 0.35442444682121277, "learning_rate": 1.852899572414153e-05, "loss": 0.5559, "step": 12819 }, { "epoch": 0.35200439319055465, "grad_norm": 0.3310231864452362, "learning_rate": 1.8528770235430008e-05, "loss": 0.4823, "step": 12820 }, { "epoch": 0.35203185063152115, "grad_norm": 0.38789474964141846, "learning_rate": 1.852854473080961e-05, "loss": 0.4664, "step": 12821 }, { "epoch": 0.35205930807248764, "grad_norm": 0.38769039511680603, "learning_rate": 1.852831921028075e-05, "loss": 0.6271, "step": 12822 }, { "epoch": 0.35208676551345414, "grad_norm": 0.3682776093482971, "learning_rate": 1.8528093673843855e-05, "loss": 0.5802, "step": 12823 }, { "epoch": 0.35211422295442063, "grad_norm": 0.3962457478046417, "learning_rate": 1.8527868121499343e-05, "loss": 0.4792, "step": 12824 }, { "epoch": 0.3521416803953871, "grad_norm": 0.38238704204559326, "learning_rate": 1.8527642553247637e-05, "loss": 0.4732, "step": 12825 }, { "epoch": 0.3521691378363537, "grad_norm": 0.4394192099571228, "learning_rate": 1.8527416969089153e-05, "loss": 0.5149, "step": 12826 }, { "epoch": 0.35219659527732017, "grad_norm": 0.39303654432296753, "learning_rate": 1.8527191369024316e-05, "loss": 0.4709, "step": 12827 }, { "epoch": 0.35222405271828666, "grad_norm": 0.3971758782863617, "learning_rate": 1.8526965753053546e-05, "loss": 0.4593, "step": 12828 }, { "epoch": 0.35225151015925316, "grad_norm": 0.5130875110626221, "learning_rate": 1.8526740121177265e-05, "loss": 0.5363, "step": 12829 }, { "epoch": 0.35227896760021965, "grad_norm": 0.33994677662849426, "learning_rate": 1.852651447339589e-05, "loss": 0.5003, "step": 12830 }, { "epoch": 0.35230642504118614, "grad_norm": 0.32734236121177673, "learning_rate": 1.8526288809709846e-05, "loss": 0.5521, "step": 12831 }, { "epoch": 0.35233388248215264, "grad_norm": 0.4052627682685852, "learning_rate": 1.852606313011955e-05, "loss": 0.5121, "step": 12832 }, { "epoch": 0.3523613399231192, "grad_norm": 0.40140101313591003, "learning_rate": 1.8525837434625425e-05, "loss": 0.535, "step": 12833 }, { "epoch": 0.3523887973640857, "grad_norm": 0.3887263536453247, "learning_rate": 1.8525611723227892e-05, "loss": 0.486, "step": 12834 }, { "epoch": 0.3524162548050522, "grad_norm": 0.45914196968078613, "learning_rate": 1.8525385995927373e-05, "loss": 0.5076, "step": 12835 }, { "epoch": 0.35244371224601867, "grad_norm": 0.3840571939945221, "learning_rate": 1.8525160252724288e-05, "loss": 0.4862, "step": 12836 }, { "epoch": 0.35247116968698516, "grad_norm": 0.3851255178451538, "learning_rate": 1.8524934493619054e-05, "loss": 0.5704, "step": 12837 }, { "epoch": 0.35249862712795166, "grad_norm": 0.34848812222480774, "learning_rate": 1.8524708718612104e-05, "loss": 0.4727, "step": 12838 }, { "epoch": 0.35252608456891815, "grad_norm": 0.4177800118923187, "learning_rate": 1.852448292770385e-05, "loss": 0.517, "step": 12839 }, { "epoch": 0.3525535420098847, "grad_norm": 0.41203561425209045, "learning_rate": 1.852425712089471e-05, "loss": 0.5718, "step": 12840 }, { "epoch": 0.3525809994508512, "grad_norm": 0.37807831168174744, "learning_rate": 1.8524031298185114e-05, "loss": 0.573, "step": 12841 }, { "epoch": 0.3526084568918177, "grad_norm": 0.4216982424259186, "learning_rate": 1.8523805459575473e-05, "loss": 0.5767, "step": 12842 }, { "epoch": 0.3526359143327842, "grad_norm": 0.379564493894577, "learning_rate": 1.8523579605066218e-05, "loss": 0.5847, "step": 12843 }, { "epoch": 0.3526633717737507, "grad_norm": 0.38498130440711975, "learning_rate": 1.852335373465777e-05, "loss": 0.4862, "step": 12844 }, { "epoch": 0.35269082921471717, "grad_norm": 0.32204508781433105, "learning_rate": 1.8523127848350544e-05, "loss": 0.4844, "step": 12845 }, { "epoch": 0.35271828665568367, "grad_norm": 0.3881717920303345, "learning_rate": 1.8522901946144968e-05, "loss": 0.6119, "step": 12846 }, { "epoch": 0.3527457440966502, "grad_norm": 0.3525262176990509, "learning_rate": 1.852267602804145e-05, "loss": 0.5149, "step": 12847 }, { "epoch": 0.3527732015376167, "grad_norm": 0.3706952631473541, "learning_rate": 1.852245009404043e-05, "loss": 0.4492, "step": 12848 }, { "epoch": 0.3528006589785832, "grad_norm": 0.386439710855484, "learning_rate": 1.8522224144142317e-05, "loss": 0.4742, "step": 12849 }, { "epoch": 0.3528281164195497, "grad_norm": 0.35717129707336426, "learning_rate": 1.852199817834754e-05, "loss": 0.4295, "step": 12850 }, { "epoch": 0.3528555738605162, "grad_norm": 0.37986671924591064, "learning_rate": 1.8521772196656513e-05, "loss": 0.5157, "step": 12851 }, { "epoch": 0.3528830313014827, "grad_norm": 0.4034775495529175, "learning_rate": 1.852154619906966e-05, "loss": 0.532, "step": 12852 }, { "epoch": 0.3529104887424492, "grad_norm": 0.4948137700557709, "learning_rate": 1.8521320185587405e-05, "loss": 0.6043, "step": 12853 }, { "epoch": 0.35293794618341573, "grad_norm": 0.3808092474937439, "learning_rate": 1.852109415621017e-05, "loss": 0.6346, "step": 12854 }, { "epoch": 0.3529654036243822, "grad_norm": 0.3583548963069916, "learning_rate": 1.852086811093837e-05, "loss": 0.4636, "step": 12855 }, { "epoch": 0.3529928610653487, "grad_norm": 0.37770089507102966, "learning_rate": 1.8520642049772437e-05, "loss": 0.4505, "step": 12856 }, { "epoch": 0.3530203185063152, "grad_norm": 0.3642883598804474, "learning_rate": 1.8520415972712783e-05, "loss": 0.5099, "step": 12857 }, { "epoch": 0.3530477759472817, "grad_norm": 0.34953078627586365, "learning_rate": 1.8520189879759836e-05, "loss": 0.4943, "step": 12858 }, { "epoch": 0.3530752333882482, "grad_norm": 0.3851338028907776, "learning_rate": 1.8519963770914014e-05, "loss": 0.5536, "step": 12859 }, { "epoch": 0.3531026908292147, "grad_norm": 0.41256478428840637, "learning_rate": 1.8519737646175743e-05, "loss": 0.564, "step": 12860 }, { "epoch": 0.35313014827018124, "grad_norm": 0.4466272294521332, "learning_rate": 1.851951150554544e-05, "loss": 0.5406, "step": 12861 }, { "epoch": 0.35315760571114774, "grad_norm": 0.3466949462890625, "learning_rate": 1.8519285349023527e-05, "loss": 0.5324, "step": 12862 }, { "epoch": 0.35318506315211423, "grad_norm": 0.40668949484825134, "learning_rate": 1.8519059176610433e-05, "loss": 0.4992, "step": 12863 }, { "epoch": 0.3532125205930807, "grad_norm": 0.35628795623779297, "learning_rate": 1.851883298830657e-05, "loss": 0.502, "step": 12864 }, { "epoch": 0.3532399780340472, "grad_norm": 0.37629130482673645, "learning_rate": 1.8518606784112367e-05, "loss": 0.4996, "step": 12865 }, { "epoch": 0.3532674354750137, "grad_norm": 0.3676777184009552, "learning_rate": 1.8518380564028242e-05, "loss": 0.4634, "step": 12866 }, { "epoch": 0.3532948929159802, "grad_norm": 0.4142403304576874, "learning_rate": 1.851815432805462e-05, "loss": 0.5415, "step": 12867 }, { "epoch": 0.35332235035694676, "grad_norm": 0.33964458107948303, "learning_rate": 1.8517928076191922e-05, "loss": 0.5115, "step": 12868 }, { "epoch": 0.35334980779791325, "grad_norm": 0.41167959570884705, "learning_rate": 1.8517701808440568e-05, "loss": 0.5489, "step": 12869 }, { "epoch": 0.35337726523887975, "grad_norm": 0.3361583948135376, "learning_rate": 1.8517475524800982e-05, "loss": 0.5227, "step": 12870 }, { "epoch": 0.35340472267984624, "grad_norm": 0.37269118428230286, "learning_rate": 1.8517249225273583e-05, "loss": 0.5123, "step": 12871 }, { "epoch": 0.35343218012081273, "grad_norm": 0.36148127913475037, "learning_rate": 1.85170229098588e-05, "loss": 0.5627, "step": 12872 }, { "epoch": 0.3534596375617792, "grad_norm": 0.33478206396102905, "learning_rate": 1.851679657855705e-05, "loss": 0.4503, "step": 12873 }, { "epoch": 0.3534870950027457, "grad_norm": 0.34238138794898987, "learning_rate": 1.8516570231368757e-05, "loss": 0.54, "step": 12874 }, { "epoch": 0.35351455244371227, "grad_norm": 0.387583464384079, "learning_rate": 1.8516343868294337e-05, "loss": 0.5691, "step": 12875 }, { "epoch": 0.35354200988467877, "grad_norm": 0.43100106716156006, "learning_rate": 1.8516117489334224e-05, "loss": 0.4988, "step": 12876 }, { "epoch": 0.35356946732564526, "grad_norm": 0.36195090413093567, "learning_rate": 1.851589109448883e-05, "loss": 0.5612, "step": 12877 }, { "epoch": 0.35359692476661175, "grad_norm": 0.3720771074295044, "learning_rate": 1.8515664683758583e-05, "loss": 0.4335, "step": 12878 }, { "epoch": 0.35362438220757825, "grad_norm": 0.4265027642250061, "learning_rate": 1.85154382571439e-05, "loss": 0.5071, "step": 12879 }, { "epoch": 0.35365183964854474, "grad_norm": 0.37814003229141235, "learning_rate": 1.851521181464521e-05, "loss": 0.5772, "step": 12880 }, { "epoch": 0.35367929708951124, "grad_norm": 0.34996068477630615, "learning_rate": 1.8514985356262932e-05, "loss": 0.5582, "step": 12881 }, { "epoch": 0.3537067545304778, "grad_norm": 0.4252006709575653, "learning_rate": 1.851475888199749e-05, "loss": 0.616, "step": 12882 }, { "epoch": 0.3537342119714443, "grad_norm": 0.3613317012786865, "learning_rate": 1.8514532391849304e-05, "loss": 0.4737, "step": 12883 }, { "epoch": 0.3537616694124108, "grad_norm": 0.3640919029712677, "learning_rate": 1.8514305885818796e-05, "loss": 0.5329, "step": 12884 }, { "epoch": 0.35378912685337727, "grad_norm": 0.31983834505081177, "learning_rate": 1.8514079363906393e-05, "loss": 0.4673, "step": 12885 }, { "epoch": 0.35381658429434376, "grad_norm": 0.3426242172718048, "learning_rate": 1.8513852826112512e-05, "loss": 0.4935, "step": 12886 }, { "epoch": 0.35384404173531026, "grad_norm": 0.3996743857860565, "learning_rate": 1.851362627243758e-05, "loss": 0.5476, "step": 12887 }, { "epoch": 0.35387149917627675, "grad_norm": 0.3796069324016571, "learning_rate": 1.8513399702882016e-05, "loss": 0.4978, "step": 12888 }, { "epoch": 0.3538989566172433, "grad_norm": 0.3981380760669708, "learning_rate": 1.8513173117446245e-05, "loss": 0.5627, "step": 12889 }, { "epoch": 0.3539264140582098, "grad_norm": 0.3749481439590454, "learning_rate": 1.851294651613069e-05, "loss": 0.5093, "step": 12890 }, { "epoch": 0.3539538714991763, "grad_norm": 0.3840797543525696, "learning_rate": 1.851271989893577e-05, "loss": 0.5261, "step": 12891 }, { "epoch": 0.3539813289401428, "grad_norm": 0.3574707806110382, "learning_rate": 1.8512493265861914e-05, "loss": 0.4366, "step": 12892 }, { "epoch": 0.3540087863811093, "grad_norm": 0.3929075002670288, "learning_rate": 1.851226661690954e-05, "loss": 0.6172, "step": 12893 }, { "epoch": 0.35403624382207577, "grad_norm": 0.3466016352176666, "learning_rate": 1.851203995207907e-05, "loss": 0.5613, "step": 12894 }, { "epoch": 0.35406370126304226, "grad_norm": 0.3712824285030365, "learning_rate": 1.8511813271370932e-05, "loss": 0.5098, "step": 12895 }, { "epoch": 0.3540911587040088, "grad_norm": 0.39508551359176636, "learning_rate": 1.8511586574785542e-05, "loss": 0.5356, "step": 12896 }, { "epoch": 0.3541186161449753, "grad_norm": 0.3669546842575073, "learning_rate": 1.851135986232333e-05, "loss": 0.473, "step": 12897 }, { "epoch": 0.3541460735859418, "grad_norm": 0.36558735370635986, "learning_rate": 1.8511133133984712e-05, "loss": 0.4587, "step": 12898 }, { "epoch": 0.3541735310269083, "grad_norm": 0.6372311115264893, "learning_rate": 1.8510906389770118e-05, "loss": 0.4192, "step": 12899 }, { "epoch": 0.3542009884678748, "grad_norm": 0.38712602853775024, "learning_rate": 1.8510679629679968e-05, "loss": 0.5134, "step": 12900 }, { "epoch": 0.3542284459088413, "grad_norm": 0.37107235193252563, "learning_rate": 1.851045285371468e-05, "loss": 0.5634, "step": 12901 }, { "epoch": 0.3542559033498078, "grad_norm": 0.3554193377494812, "learning_rate": 1.851022606187468e-05, "loss": 0.498, "step": 12902 }, { "epoch": 0.3542833607907743, "grad_norm": 0.3444443345069885, "learning_rate": 1.8509999254160396e-05, "loss": 0.5057, "step": 12903 }, { "epoch": 0.3543108182317408, "grad_norm": 0.3791157305240631, "learning_rate": 1.8509772430572245e-05, "loss": 0.5177, "step": 12904 }, { "epoch": 0.3543382756727073, "grad_norm": 0.4048106074333191, "learning_rate": 1.8509545591110652e-05, "loss": 0.6075, "step": 12905 }, { "epoch": 0.3543657331136738, "grad_norm": 0.3326372504234314, "learning_rate": 1.8509318735776046e-05, "loss": 0.4806, "step": 12906 }, { "epoch": 0.3543931905546403, "grad_norm": 0.3435409665107727, "learning_rate": 1.850909186456884e-05, "loss": 0.523, "step": 12907 }, { "epoch": 0.3544206479956068, "grad_norm": 0.378873735666275, "learning_rate": 1.850886497748946e-05, "loss": 0.578, "step": 12908 }, { "epoch": 0.3544481054365733, "grad_norm": 0.39823320508003235, "learning_rate": 1.8508638074538335e-05, "loss": 0.5833, "step": 12909 }, { "epoch": 0.35447556287753984, "grad_norm": 0.3670293688774109, "learning_rate": 1.8508411155715885e-05, "loss": 0.5578, "step": 12910 }, { "epoch": 0.35450302031850633, "grad_norm": 0.3475448489189148, "learning_rate": 1.850818422102253e-05, "loss": 0.5059, "step": 12911 }, { "epoch": 0.35453047775947283, "grad_norm": 0.33383315801620483, "learning_rate": 1.8507957270458696e-05, "loss": 0.5205, "step": 12912 }, { "epoch": 0.3545579352004393, "grad_norm": 0.3599684238433838, "learning_rate": 1.850773030402481e-05, "loss": 0.5265, "step": 12913 }, { "epoch": 0.3545853926414058, "grad_norm": 0.3549947738647461, "learning_rate": 1.8507503321721287e-05, "loss": 0.5596, "step": 12914 }, { "epoch": 0.3546128500823723, "grad_norm": 0.34308668971061707, "learning_rate": 1.8507276323548557e-05, "loss": 0.5148, "step": 12915 }, { "epoch": 0.3546403075233388, "grad_norm": 0.3525458574295044, "learning_rate": 1.850704930950704e-05, "loss": 0.4756, "step": 12916 }, { "epoch": 0.35466776496430535, "grad_norm": 0.3469778597354889, "learning_rate": 1.850682227959716e-05, "loss": 0.531, "step": 12917 }, { "epoch": 0.35469522240527185, "grad_norm": 0.3996882736682892, "learning_rate": 1.8506595233819345e-05, "loss": 0.5354, "step": 12918 }, { "epoch": 0.35472267984623834, "grad_norm": 0.4604485034942627, "learning_rate": 1.8506368172174014e-05, "loss": 0.586, "step": 12919 }, { "epoch": 0.35475013728720484, "grad_norm": 0.36522454023361206, "learning_rate": 1.850614109466159e-05, "loss": 0.5225, "step": 12920 }, { "epoch": 0.35477759472817133, "grad_norm": 0.3789752125740051, "learning_rate": 1.85059140012825e-05, "loss": 0.5469, "step": 12921 }, { "epoch": 0.3548050521691378, "grad_norm": 0.38411080837249756, "learning_rate": 1.8505686892037166e-05, "loss": 0.5517, "step": 12922 }, { "epoch": 0.3548325096101043, "grad_norm": 0.3515183925628662, "learning_rate": 1.8505459766926004e-05, "loss": 0.5019, "step": 12923 }, { "epoch": 0.3548599670510708, "grad_norm": 0.4095139801502228, "learning_rate": 1.8505232625949455e-05, "loss": 0.5275, "step": 12924 }, { "epoch": 0.35488742449203736, "grad_norm": 0.3980804681777954, "learning_rate": 1.8505005469107927e-05, "loss": 0.5866, "step": 12925 }, { "epoch": 0.35491488193300386, "grad_norm": 0.3363350033760071, "learning_rate": 1.8504778296401852e-05, "loss": 0.4267, "step": 12926 }, { "epoch": 0.35494233937397035, "grad_norm": 0.3788478374481201, "learning_rate": 1.8504551107831646e-05, "loss": 0.5497, "step": 12927 }, { "epoch": 0.35496979681493684, "grad_norm": 0.39669445157051086, "learning_rate": 1.8504323903397743e-05, "loss": 0.5068, "step": 12928 }, { "epoch": 0.35499725425590334, "grad_norm": 0.3539443612098694, "learning_rate": 1.850409668310056e-05, "loss": 0.536, "step": 12929 }, { "epoch": 0.35502471169686983, "grad_norm": 0.35917991399765015, "learning_rate": 1.8503869446940522e-05, "loss": 0.468, "step": 12930 }, { "epoch": 0.3550521691378363, "grad_norm": 0.37359488010406494, "learning_rate": 1.8503642194918055e-05, "loss": 0.5642, "step": 12931 }, { "epoch": 0.3550796265788029, "grad_norm": 0.4151769280433655, "learning_rate": 1.850341492703358e-05, "loss": 0.6652, "step": 12932 }, { "epoch": 0.35510708401976937, "grad_norm": 0.37705880403518677, "learning_rate": 1.850318764328752e-05, "loss": 0.5299, "step": 12933 }, { "epoch": 0.35513454146073586, "grad_norm": 0.3238154649734497, "learning_rate": 1.8502960343680304e-05, "loss": 0.5032, "step": 12934 }, { "epoch": 0.35516199890170236, "grad_norm": 0.3359978497028351, "learning_rate": 1.8502733028212355e-05, "loss": 0.488, "step": 12935 }, { "epoch": 0.35518945634266885, "grad_norm": 0.35553961992263794, "learning_rate": 1.850250569688409e-05, "loss": 0.506, "step": 12936 }, { "epoch": 0.35521691378363535, "grad_norm": 0.5508958697319031, "learning_rate": 1.8502278349695943e-05, "loss": 0.5668, "step": 12937 }, { "epoch": 0.35524437122460184, "grad_norm": 0.38974401354789734, "learning_rate": 1.850205098664833e-05, "loss": 0.4646, "step": 12938 }, { "epoch": 0.3552718286655684, "grad_norm": 0.3833053708076477, "learning_rate": 1.850182360774168e-05, "loss": 0.5745, "step": 12939 }, { "epoch": 0.3552992861065349, "grad_norm": 0.375117689371109, "learning_rate": 1.8501596212976417e-05, "loss": 0.5945, "step": 12940 }, { "epoch": 0.3553267435475014, "grad_norm": 0.4733688235282898, "learning_rate": 1.850136880235296e-05, "loss": 0.4298, "step": 12941 }, { "epoch": 0.35535420098846787, "grad_norm": 0.4538436532020569, "learning_rate": 1.8501141375871738e-05, "loss": 0.4678, "step": 12942 }, { "epoch": 0.35538165842943437, "grad_norm": 0.36682039499282837, "learning_rate": 1.8500913933533175e-05, "loss": 0.5335, "step": 12943 }, { "epoch": 0.35540911587040086, "grad_norm": 0.4080280363559723, "learning_rate": 1.8500686475337694e-05, "loss": 0.5377, "step": 12944 }, { "epoch": 0.35543657331136735, "grad_norm": 0.3294958770275116, "learning_rate": 1.850045900128572e-05, "loss": 0.4046, "step": 12945 }, { "epoch": 0.3554640307523339, "grad_norm": 0.32070714235305786, "learning_rate": 1.8500231511377674e-05, "loss": 0.4308, "step": 12946 }, { "epoch": 0.3554914881933004, "grad_norm": 0.32708218693733215, "learning_rate": 1.850000400561399e-05, "loss": 0.4791, "step": 12947 }, { "epoch": 0.3555189456342669, "grad_norm": 0.3574189245700836, "learning_rate": 1.849977648399508e-05, "loss": 0.5955, "step": 12948 }, { "epoch": 0.3555464030752334, "grad_norm": 0.38079383969306946, "learning_rate": 1.8499548946521374e-05, "loss": 0.5889, "step": 12949 }, { "epoch": 0.3555738605161999, "grad_norm": 0.3683728277683258, "learning_rate": 1.84993213931933e-05, "loss": 0.5528, "step": 12950 }, { "epoch": 0.3556013179571664, "grad_norm": 0.36731061339378357, "learning_rate": 1.8499093824011277e-05, "loss": 0.6018, "step": 12951 }, { "epoch": 0.35562877539813287, "grad_norm": 0.4952526390552521, "learning_rate": 1.849886623897573e-05, "loss": 0.4918, "step": 12952 }, { "epoch": 0.3556562328390994, "grad_norm": 0.33449846506118774, "learning_rate": 1.8498638638087087e-05, "loss": 0.4079, "step": 12953 }, { "epoch": 0.3556836902800659, "grad_norm": 0.3640614449977875, "learning_rate": 1.849841102134577e-05, "loss": 0.5628, "step": 12954 }, { "epoch": 0.3557111477210324, "grad_norm": 0.37276479601860046, "learning_rate": 1.8498183388752204e-05, "loss": 0.5214, "step": 12955 }, { "epoch": 0.3557386051619989, "grad_norm": 0.35916051268577576, "learning_rate": 1.849795574030681e-05, "loss": 0.5291, "step": 12956 }, { "epoch": 0.3557660626029654, "grad_norm": 0.37045803666114807, "learning_rate": 1.8497728076010024e-05, "loss": 0.5691, "step": 12957 }, { "epoch": 0.3557935200439319, "grad_norm": 0.39132675528526306, "learning_rate": 1.8497500395862258e-05, "loss": 0.6215, "step": 12958 }, { "epoch": 0.3558209774848984, "grad_norm": 0.34931838512420654, "learning_rate": 1.849727269986394e-05, "loss": 0.5387, "step": 12959 }, { "epoch": 0.35584843492586493, "grad_norm": 0.37427324056625366, "learning_rate": 1.84970449880155e-05, "loss": 0.4876, "step": 12960 }, { "epoch": 0.3558758923668314, "grad_norm": 0.3529096841812134, "learning_rate": 1.849681726031736e-05, "loss": 0.5528, "step": 12961 }, { "epoch": 0.3559033498077979, "grad_norm": 0.35555729269981384, "learning_rate": 1.849658951676994e-05, "loss": 0.5054, "step": 12962 }, { "epoch": 0.3559308072487644, "grad_norm": 0.37021487951278687, "learning_rate": 1.8496361757373674e-05, "loss": 0.4592, "step": 12963 }, { "epoch": 0.3559582646897309, "grad_norm": 0.3301765024662018, "learning_rate": 1.8496133982128977e-05, "loss": 0.5248, "step": 12964 }, { "epoch": 0.3559857221306974, "grad_norm": 0.4845414161682129, "learning_rate": 1.8495906191036277e-05, "loss": 0.5785, "step": 12965 }, { "epoch": 0.3560131795716639, "grad_norm": 0.34625008702278137, "learning_rate": 1.8495678384096004e-05, "loss": 0.4992, "step": 12966 }, { "epoch": 0.35604063701263045, "grad_norm": 0.48607999086380005, "learning_rate": 1.8495450561308577e-05, "loss": 0.4906, "step": 12967 }, { "epoch": 0.35606809445359694, "grad_norm": 0.3235108554363251, "learning_rate": 1.849522272267443e-05, "loss": 0.5672, "step": 12968 }, { "epoch": 0.35609555189456343, "grad_norm": 0.3906828463077545, "learning_rate": 1.849499486819397e-05, "loss": 0.4886, "step": 12969 }, { "epoch": 0.3561230093355299, "grad_norm": 0.3271802067756653, "learning_rate": 1.8494766997867643e-05, "loss": 0.4656, "step": 12970 }, { "epoch": 0.3561504667764964, "grad_norm": 0.3509131669998169, "learning_rate": 1.849453911169586e-05, "loss": 0.5182, "step": 12971 }, { "epoch": 0.3561779242174629, "grad_norm": 0.3706603944301605, "learning_rate": 1.8494311209679052e-05, "loss": 0.4967, "step": 12972 }, { "epoch": 0.3562053816584294, "grad_norm": 0.39906302094459534, "learning_rate": 1.8494083291817643e-05, "loss": 0.4853, "step": 12973 }, { "epoch": 0.35623283909939596, "grad_norm": 0.39738091826438904, "learning_rate": 1.8493855358112055e-05, "loss": 0.5229, "step": 12974 }, { "epoch": 0.35626029654036245, "grad_norm": 0.43350231647491455, "learning_rate": 1.849362740856272e-05, "loss": 0.5823, "step": 12975 }, { "epoch": 0.35628775398132895, "grad_norm": 0.3906891345977783, "learning_rate": 1.8493399443170052e-05, "loss": 0.5572, "step": 12976 }, { "epoch": 0.35631521142229544, "grad_norm": 0.3743065893650055, "learning_rate": 1.8493171461934488e-05, "loss": 0.5273, "step": 12977 }, { "epoch": 0.35634266886326194, "grad_norm": 0.4400683343410492, "learning_rate": 1.8492943464856447e-05, "loss": 0.5607, "step": 12978 }, { "epoch": 0.35637012630422843, "grad_norm": 0.40293678641319275, "learning_rate": 1.8492715451936355e-05, "loss": 0.5352, "step": 12979 }, { "epoch": 0.3563975837451949, "grad_norm": 0.3455598056316376, "learning_rate": 1.8492487423174642e-05, "loss": 0.485, "step": 12980 }, { "epoch": 0.3564250411861615, "grad_norm": 0.37043312191963196, "learning_rate": 1.8492259378571727e-05, "loss": 0.4995, "step": 12981 }, { "epoch": 0.35645249862712797, "grad_norm": 0.3668033480644226, "learning_rate": 1.8492031318128038e-05, "loss": 0.4731, "step": 12982 }, { "epoch": 0.35647995606809446, "grad_norm": 0.40452343225479126, "learning_rate": 1.8491803241844e-05, "loss": 0.6119, "step": 12983 }, { "epoch": 0.35650741350906096, "grad_norm": 0.3592095375061035, "learning_rate": 1.849157514972004e-05, "loss": 0.5198, "step": 12984 }, { "epoch": 0.35653487095002745, "grad_norm": 0.4131249189376831, "learning_rate": 1.8491347041756582e-05, "loss": 0.5608, "step": 12985 }, { "epoch": 0.35656232839099394, "grad_norm": 0.3285183012485504, "learning_rate": 1.849111891795405e-05, "loss": 0.4859, "step": 12986 }, { "epoch": 0.35658978583196044, "grad_norm": 0.40792542695999146, "learning_rate": 1.849089077831287e-05, "loss": 0.411, "step": 12987 }, { "epoch": 0.356617243272927, "grad_norm": 0.38132575154304504, "learning_rate": 1.8490662622833472e-05, "loss": 0.5168, "step": 12988 }, { "epoch": 0.3566447007138935, "grad_norm": 0.43043336272239685, "learning_rate": 1.8490434451516277e-05, "loss": 0.446, "step": 12989 }, { "epoch": 0.35667215815486, "grad_norm": 0.34616991877555847, "learning_rate": 1.849020626436171e-05, "loss": 0.4123, "step": 12990 }, { "epoch": 0.35669961559582647, "grad_norm": 0.3501774072647095, "learning_rate": 1.84899780613702e-05, "loss": 0.5301, "step": 12991 }, { "epoch": 0.35672707303679296, "grad_norm": 0.3966620862483978, "learning_rate": 1.8489749842542176e-05, "loss": 0.5644, "step": 12992 }, { "epoch": 0.35675453047775946, "grad_norm": 0.3552907109260559, "learning_rate": 1.8489521607878054e-05, "loss": 0.5126, "step": 12993 }, { "epoch": 0.35678198791872595, "grad_norm": 0.36467838287353516, "learning_rate": 1.8489293357378268e-05, "loss": 0.4886, "step": 12994 }, { "epoch": 0.3568094453596925, "grad_norm": 0.369855672121048, "learning_rate": 1.8489065091043235e-05, "loss": 0.48, "step": 12995 }, { "epoch": 0.356836902800659, "grad_norm": 0.3316080570220947, "learning_rate": 1.8488836808873388e-05, "loss": 0.4843, "step": 12996 }, { "epoch": 0.3568643602416255, "grad_norm": 0.3672640323638916, "learning_rate": 1.848860851086915e-05, "loss": 0.524, "step": 12997 }, { "epoch": 0.356891817682592, "grad_norm": 0.3153313994407654, "learning_rate": 1.8488380197030952e-05, "loss": 0.3922, "step": 12998 }, { "epoch": 0.3569192751235585, "grad_norm": 0.3830428421497345, "learning_rate": 1.8488151867359213e-05, "loss": 0.5361, "step": 12999 }, { "epoch": 0.35694673256452497, "grad_norm": 0.3186010718345642, "learning_rate": 1.8487923521854362e-05, "loss": 0.4292, "step": 13000 }, { "epoch": 0.35697419000549147, "grad_norm": 0.418986976146698, "learning_rate": 1.8487695160516825e-05, "loss": 0.5912, "step": 13001 }, { "epoch": 0.357001647446458, "grad_norm": 0.29527774453163147, "learning_rate": 1.848746678334703e-05, "loss": 0.4461, "step": 13002 }, { "epoch": 0.3570291048874245, "grad_norm": 0.33104610443115234, "learning_rate": 1.84872383903454e-05, "loss": 0.4369, "step": 13003 }, { "epoch": 0.357056562328391, "grad_norm": 0.3309026062488556, "learning_rate": 1.8487009981512356e-05, "loss": 0.4262, "step": 13004 }, { "epoch": 0.3570840197693575, "grad_norm": 0.34110087156295776, "learning_rate": 1.848678155684833e-05, "loss": 0.4564, "step": 13005 }, { "epoch": 0.357111477210324, "grad_norm": 0.3449648916721344, "learning_rate": 1.8486553116353753e-05, "loss": 0.5038, "step": 13006 }, { "epoch": 0.3571389346512905, "grad_norm": 0.48508477210998535, "learning_rate": 1.8486324660029044e-05, "loss": 0.5809, "step": 13007 }, { "epoch": 0.357166392092257, "grad_norm": 0.3240523934364319, "learning_rate": 1.848609618787463e-05, "loss": 0.4684, "step": 13008 }, { "epoch": 0.35719384953322353, "grad_norm": 0.33209729194641113, "learning_rate": 1.848586769989094e-05, "loss": 0.4949, "step": 13009 }, { "epoch": 0.35722130697419, "grad_norm": 0.35862213373184204, "learning_rate": 1.8485639196078398e-05, "loss": 0.4779, "step": 13010 }, { "epoch": 0.3572487644151565, "grad_norm": 0.42602968215942383, "learning_rate": 1.848541067643743e-05, "loss": 0.4632, "step": 13011 }, { "epoch": 0.357276221856123, "grad_norm": 0.36546412110328674, "learning_rate": 1.8485182140968462e-05, "loss": 0.4212, "step": 13012 }, { "epoch": 0.3573036792970895, "grad_norm": 0.3444959223270416, "learning_rate": 1.8484953589671925e-05, "loss": 0.5027, "step": 13013 }, { "epoch": 0.357331136738056, "grad_norm": 0.3905417323112488, "learning_rate": 1.848472502254824e-05, "loss": 0.5873, "step": 13014 }, { "epoch": 0.3573585941790225, "grad_norm": 0.3522876501083374, "learning_rate": 1.848449643959783e-05, "loss": 0.511, "step": 13015 }, { "epoch": 0.35738605161998904, "grad_norm": 0.4264282286167145, "learning_rate": 1.8484267840821128e-05, "loss": 0.5367, "step": 13016 }, { "epoch": 0.35741350906095554, "grad_norm": 0.40064695477485657, "learning_rate": 1.848403922621856e-05, "loss": 0.5791, "step": 13017 }, { "epoch": 0.35744096650192203, "grad_norm": 0.33717551827430725, "learning_rate": 1.848381059579055e-05, "loss": 0.4977, "step": 13018 }, { "epoch": 0.3574684239428885, "grad_norm": 0.3351287841796875, "learning_rate": 1.848358194953753e-05, "loss": 0.4445, "step": 13019 }, { "epoch": 0.357495881383855, "grad_norm": 0.3363860845565796, "learning_rate": 1.8483353287459917e-05, "loss": 0.5623, "step": 13020 }, { "epoch": 0.3575233388248215, "grad_norm": 0.5071297883987427, "learning_rate": 1.8483124609558145e-05, "loss": 0.555, "step": 13021 }, { "epoch": 0.357550796265788, "grad_norm": 0.4095757305622101, "learning_rate": 1.8482895915832638e-05, "loss": 0.5408, "step": 13022 }, { "epoch": 0.35757825370675456, "grad_norm": 0.44816747307777405, "learning_rate": 1.848266720628382e-05, "loss": 0.518, "step": 13023 }, { "epoch": 0.35760571114772105, "grad_norm": 0.4228252172470093, "learning_rate": 1.8482438480912122e-05, "loss": 0.49, "step": 13024 }, { "epoch": 0.35763316858868754, "grad_norm": 0.3684860169887543, "learning_rate": 1.848220973971797e-05, "loss": 0.5092, "step": 13025 }, { "epoch": 0.35766062602965404, "grad_norm": 0.3365960419178009, "learning_rate": 1.848198098270179e-05, "loss": 0.5303, "step": 13026 }, { "epoch": 0.35768808347062053, "grad_norm": 0.368191123008728, "learning_rate": 1.8481752209864005e-05, "loss": 0.5781, "step": 13027 }, { "epoch": 0.357715540911587, "grad_norm": 0.36805853247642517, "learning_rate": 1.8481523421205048e-05, "loss": 0.5166, "step": 13028 }, { "epoch": 0.3577429983525535, "grad_norm": 0.3759543001651764, "learning_rate": 1.8481294616725342e-05, "loss": 0.5516, "step": 13029 }, { "epoch": 0.35777045579352007, "grad_norm": 0.35152962803840637, "learning_rate": 1.8481065796425316e-05, "loss": 0.5144, "step": 13030 }, { "epoch": 0.35779791323448656, "grad_norm": 0.358797550201416, "learning_rate": 1.8480836960305396e-05, "loss": 0.4505, "step": 13031 }, { "epoch": 0.35782537067545306, "grad_norm": 0.3807108402252197, "learning_rate": 1.8480608108366006e-05, "loss": 0.4824, "step": 13032 }, { "epoch": 0.35785282811641955, "grad_norm": 0.4163348376750946, "learning_rate": 1.8480379240607575e-05, "loss": 0.5701, "step": 13033 }, { "epoch": 0.35788028555738605, "grad_norm": 0.44013962149620056, "learning_rate": 1.848015035703053e-05, "loss": 0.507, "step": 13034 }, { "epoch": 0.35790774299835254, "grad_norm": 0.32004866003990173, "learning_rate": 1.84799214576353e-05, "loss": 0.4683, "step": 13035 }, { "epoch": 0.35793520043931903, "grad_norm": 0.4632068872451782, "learning_rate": 1.8479692542422308e-05, "loss": 0.5794, "step": 13036 }, { "epoch": 0.3579626578802856, "grad_norm": 0.40559110045433044, "learning_rate": 1.8479463611391983e-05, "loss": 0.6156, "step": 13037 }, { "epoch": 0.3579901153212521, "grad_norm": 0.3532363772392273, "learning_rate": 1.8479234664544754e-05, "loss": 0.4866, "step": 13038 }, { "epoch": 0.35801757276221857, "grad_norm": 0.32363638281822205, "learning_rate": 1.8479005701881042e-05, "loss": 0.4546, "step": 13039 }, { "epoch": 0.35804503020318507, "grad_norm": 0.5130186676979065, "learning_rate": 1.8478776723401283e-05, "loss": 0.5997, "step": 13040 }, { "epoch": 0.35807248764415156, "grad_norm": 0.4118053615093231, "learning_rate": 1.8478547729105898e-05, "loss": 0.6036, "step": 13041 }, { "epoch": 0.35809994508511805, "grad_norm": 0.3968612551689148, "learning_rate": 1.8478318718995313e-05, "loss": 0.555, "step": 13042 }, { "epoch": 0.35812740252608455, "grad_norm": 0.3946733772754669, "learning_rate": 1.847808969306996e-05, "loss": 0.5078, "step": 13043 }, { "epoch": 0.3581548599670511, "grad_norm": 0.3719615638256073, "learning_rate": 1.8477860651330263e-05, "loss": 0.5209, "step": 13044 }, { "epoch": 0.3581823174080176, "grad_norm": 0.380568265914917, "learning_rate": 1.847763159377665e-05, "loss": 0.4757, "step": 13045 }, { "epoch": 0.3582097748489841, "grad_norm": 0.3815455734729767, "learning_rate": 1.8477402520409547e-05, "loss": 0.541, "step": 13046 }, { "epoch": 0.3582372322899506, "grad_norm": 0.3085971772670746, "learning_rate": 1.8477173431229386e-05, "loss": 0.4161, "step": 13047 }, { "epoch": 0.3582646897309171, "grad_norm": 0.36803120374679565, "learning_rate": 1.847694432623659e-05, "loss": 0.4783, "step": 13048 }, { "epoch": 0.35829214717188357, "grad_norm": 0.359698086977005, "learning_rate": 1.8476715205431585e-05, "loss": 0.5494, "step": 13049 }, { "epoch": 0.35831960461285006, "grad_norm": 0.37097787857055664, "learning_rate": 1.8476486068814807e-05, "loss": 0.532, "step": 13050 }, { "epoch": 0.3583470620538166, "grad_norm": 0.3404868245124817, "learning_rate": 1.847625691638667e-05, "loss": 0.4956, "step": 13051 }, { "epoch": 0.3583745194947831, "grad_norm": 0.37275034189224243, "learning_rate": 1.847602774814761e-05, "loss": 0.5066, "step": 13052 }, { "epoch": 0.3584019769357496, "grad_norm": 0.3579447269439697, "learning_rate": 1.8475798564098056e-05, "loss": 0.4605, "step": 13053 }, { "epoch": 0.3584294343767161, "grad_norm": 0.36870473623275757, "learning_rate": 1.847556936423843e-05, "loss": 0.5008, "step": 13054 }, { "epoch": 0.3584568918176826, "grad_norm": 0.44797536730766296, "learning_rate": 1.8475340148569164e-05, "loss": 0.4853, "step": 13055 }, { "epoch": 0.3584843492586491, "grad_norm": 0.37473922967910767, "learning_rate": 1.847511091709068e-05, "loss": 0.5637, "step": 13056 }, { "epoch": 0.3585118066996156, "grad_norm": 0.3497028350830078, "learning_rate": 1.8474881669803412e-05, "loss": 0.5354, "step": 13057 }, { "epoch": 0.35853926414058207, "grad_norm": 0.41104856133461, "learning_rate": 1.8474652406707782e-05, "loss": 0.5502, "step": 13058 }, { "epoch": 0.3585667215815486, "grad_norm": 0.37893322110176086, "learning_rate": 1.8474423127804224e-05, "loss": 0.5007, "step": 13059 }, { "epoch": 0.3585941790225151, "grad_norm": 0.44994205236434937, "learning_rate": 1.8474193833093158e-05, "loss": 0.5665, "step": 13060 }, { "epoch": 0.3586216364634816, "grad_norm": 0.3552468419075012, "learning_rate": 1.847396452257502e-05, "loss": 0.5719, "step": 13061 }, { "epoch": 0.3586490939044481, "grad_norm": 0.3795653283596039, "learning_rate": 1.8473735196250234e-05, "loss": 0.4946, "step": 13062 }, { "epoch": 0.3586765513454146, "grad_norm": 0.43495243787765503, "learning_rate": 1.8473505854119227e-05, "loss": 0.5005, "step": 13063 }, { "epoch": 0.3587040087863811, "grad_norm": 0.3547394573688507, "learning_rate": 1.8473276496182423e-05, "loss": 0.5087, "step": 13064 }, { "epoch": 0.3587314662273476, "grad_norm": 0.3855637311935425, "learning_rate": 1.847304712244026e-05, "loss": 0.5825, "step": 13065 }, { "epoch": 0.35875892366831413, "grad_norm": 0.38034605979919434, "learning_rate": 1.8472817732893154e-05, "loss": 0.5195, "step": 13066 }, { "epoch": 0.3587863811092806, "grad_norm": 0.3455883860588074, "learning_rate": 1.847258832754154e-05, "loss": 0.4379, "step": 13067 }, { "epoch": 0.3588138385502471, "grad_norm": 0.4013471305370331, "learning_rate": 1.8472358906385848e-05, "loss": 0.4941, "step": 13068 }, { "epoch": 0.3588412959912136, "grad_norm": 0.3758194148540497, "learning_rate": 1.84721294694265e-05, "loss": 0.4903, "step": 13069 }, { "epoch": 0.3588687534321801, "grad_norm": 0.3752117455005646, "learning_rate": 1.8471900016663925e-05, "loss": 0.5051, "step": 13070 }, { "epoch": 0.3588962108731466, "grad_norm": 0.4019255042076111, "learning_rate": 1.8471670548098557e-05, "loss": 0.4613, "step": 13071 }, { "epoch": 0.3589236683141131, "grad_norm": 0.3693023920059204, "learning_rate": 1.8471441063730816e-05, "loss": 0.3838, "step": 13072 }, { "epoch": 0.35895112575507965, "grad_norm": 0.3698081374168396, "learning_rate": 1.8471211563561134e-05, "loss": 0.5156, "step": 13073 }, { "epoch": 0.35897858319604614, "grad_norm": 0.3611887991428375, "learning_rate": 1.847098204758994e-05, "loss": 0.4362, "step": 13074 }, { "epoch": 0.35900604063701264, "grad_norm": 0.38488391041755676, "learning_rate": 1.847075251581766e-05, "loss": 0.5033, "step": 13075 }, { "epoch": 0.35903349807797913, "grad_norm": 0.35095134377479553, "learning_rate": 1.8470522968244723e-05, "loss": 0.4971, "step": 13076 }, { "epoch": 0.3590609555189456, "grad_norm": 0.4106890559196472, "learning_rate": 1.847029340487156e-05, "loss": 0.5372, "step": 13077 }, { "epoch": 0.3590884129599121, "grad_norm": 0.3366299271583557, "learning_rate": 1.8470063825698593e-05, "loss": 0.5388, "step": 13078 }, { "epoch": 0.3591158704008786, "grad_norm": 0.4108119308948517, "learning_rate": 1.8469834230726252e-05, "loss": 0.4623, "step": 13079 }, { "epoch": 0.35914332784184516, "grad_norm": 0.331798255443573, "learning_rate": 1.8469604619954972e-05, "loss": 0.4875, "step": 13080 }, { "epoch": 0.35917078528281166, "grad_norm": 0.3658464550971985, "learning_rate": 1.8469374993385175e-05, "loss": 0.5501, "step": 13081 }, { "epoch": 0.35919824272377815, "grad_norm": 0.33240431547164917, "learning_rate": 1.846914535101729e-05, "loss": 0.4777, "step": 13082 }, { "epoch": 0.35922570016474464, "grad_norm": 0.4235239028930664, "learning_rate": 1.8468915692851744e-05, "loss": 0.6611, "step": 13083 }, { "epoch": 0.35925315760571114, "grad_norm": 0.37577369809150696, "learning_rate": 1.8468686018888967e-05, "loss": 0.5779, "step": 13084 }, { "epoch": 0.35928061504667763, "grad_norm": 0.36330413818359375, "learning_rate": 1.846845632912939e-05, "loss": 0.4816, "step": 13085 }, { "epoch": 0.3593080724876441, "grad_norm": 0.37065958976745605, "learning_rate": 1.8468226623573436e-05, "loss": 0.4527, "step": 13086 }, { "epoch": 0.3593355299286107, "grad_norm": 0.35448479652404785, "learning_rate": 1.846799690222154e-05, "loss": 0.5491, "step": 13087 }, { "epoch": 0.35936298736957717, "grad_norm": 0.3839220106601715, "learning_rate": 1.8467767165074127e-05, "loss": 0.5742, "step": 13088 }, { "epoch": 0.35939044481054366, "grad_norm": 0.37982094287872314, "learning_rate": 1.8467537412131623e-05, "loss": 0.5859, "step": 13089 }, { "epoch": 0.35941790225151016, "grad_norm": 0.3659600615501404, "learning_rate": 1.846730764339446e-05, "loss": 0.5112, "step": 13090 }, { "epoch": 0.35944535969247665, "grad_norm": 0.41604796051979065, "learning_rate": 1.846707785886307e-05, "loss": 0.5074, "step": 13091 }, { "epoch": 0.35947281713344315, "grad_norm": 0.31994569301605225, "learning_rate": 1.8466848058537873e-05, "loss": 0.4347, "step": 13092 }, { "epoch": 0.35950027457440964, "grad_norm": 0.393343448638916, "learning_rate": 1.84666182424193e-05, "loss": 0.5366, "step": 13093 }, { "epoch": 0.3595277320153762, "grad_norm": 0.3669256567955017, "learning_rate": 1.8466388410507786e-05, "loss": 0.4809, "step": 13094 }, { "epoch": 0.3595551894563427, "grad_norm": 0.3888470530509949, "learning_rate": 1.8466158562803755e-05, "loss": 0.4876, "step": 13095 }, { "epoch": 0.3595826468973092, "grad_norm": 0.34312117099761963, "learning_rate": 1.8465928699307635e-05, "loss": 0.4739, "step": 13096 }, { "epoch": 0.35961010433827567, "grad_norm": 0.3285973370075226, "learning_rate": 1.8465698820019857e-05, "loss": 0.4729, "step": 13097 }, { "epoch": 0.35963756177924217, "grad_norm": 0.38379350304603577, "learning_rate": 1.8465468924940846e-05, "loss": 0.5174, "step": 13098 }, { "epoch": 0.35966501922020866, "grad_norm": 0.3782350420951843, "learning_rate": 1.8465239014071037e-05, "loss": 0.4385, "step": 13099 }, { "epoch": 0.35969247666117515, "grad_norm": 0.38308411836624146, "learning_rate": 1.8465009087410854e-05, "loss": 0.5082, "step": 13100 }, { "epoch": 0.3597199341021417, "grad_norm": 0.734104335308075, "learning_rate": 1.8464779144960726e-05, "loss": 0.5013, "step": 13101 }, { "epoch": 0.3597473915431082, "grad_norm": 0.37240442633628845, "learning_rate": 1.8464549186721088e-05, "loss": 0.5436, "step": 13102 }, { "epoch": 0.3597748489840747, "grad_norm": 0.389226496219635, "learning_rate": 1.8464319212692362e-05, "loss": 0.5195, "step": 13103 }, { "epoch": 0.3598023064250412, "grad_norm": 0.41736090183258057, "learning_rate": 1.8464089222874973e-05, "loss": 0.5272, "step": 13104 }, { "epoch": 0.3598297638660077, "grad_norm": 0.3341962695121765, "learning_rate": 1.8463859217269364e-05, "loss": 0.4099, "step": 13105 }, { "epoch": 0.3598572213069742, "grad_norm": 0.3410189747810364, "learning_rate": 1.8463629195875952e-05, "loss": 0.5594, "step": 13106 }, { "epoch": 0.35988467874794067, "grad_norm": 0.3729605972766876, "learning_rate": 1.846339915869517e-05, "loss": 0.4815, "step": 13107 }, { "epoch": 0.3599121361889072, "grad_norm": 0.35947906970977783, "learning_rate": 1.8463169105727454e-05, "loss": 0.4737, "step": 13108 }, { "epoch": 0.3599395936298737, "grad_norm": 0.3681703805923462, "learning_rate": 1.846293903697322e-05, "loss": 0.4602, "step": 13109 }, { "epoch": 0.3599670510708402, "grad_norm": 0.40846383571624756, "learning_rate": 1.8462708952432903e-05, "loss": 0.6694, "step": 13110 }, { "epoch": 0.3599945085118067, "grad_norm": 0.3333899676799774, "learning_rate": 1.8462478852106937e-05, "loss": 0.5265, "step": 13111 }, { "epoch": 0.3600219659527732, "grad_norm": 0.3525049686431885, "learning_rate": 1.8462248735995746e-05, "loss": 0.4571, "step": 13112 }, { "epoch": 0.3600494233937397, "grad_norm": 0.32668542861938477, "learning_rate": 1.8462018604099757e-05, "loss": 0.4737, "step": 13113 }, { "epoch": 0.3600768808347062, "grad_norm": 0.36160656809806824, "learning_rate": 1.8461788456419408e-05, "loss": 0.5553, "step": 13114 }, { "epoch": 0.36010433827567273, "grad_norm": 0.3445206582546234, "learning_rate": 1.8461558292955118e-05, "loss": 0.466, "step": 13115 }, { "epoch": 0.3601317957166392, "grad_norm": 0.33575722575187683, "learning_rate": 1.8461328113707325e-05, "loss": 0.4661, "step": 13116 }, { "epoch": 0.3601592531576057, "grad_norm": 0.3560367226600647, "learning_rate": 1.846109791867645e-05, "loss": 0.5741, "step": 13117 }, { "epoch": 0.3601867105985722, "grad_norm": 0.40430423617362976, "learning_rate": 1.846086770786293e-05, "loss": 0.544, "step": 13118 }, { "epoch": 0.3602141680395387, "grad_norm": 0.37835022807121277, "learning_rate": 1.846063748126719e-05, "loss": 0.4992, "step": 13119 }, { "epoch": 0.3602416254805052, "grad_norm": 0.33275657892227173, "learning_rate": 1.846040723888966e-05, "loss": 0.4596, "step": 13120 }, { "epoch": 0.3602690829214717, "grad_norm": 0.41295212507247925, "learning_rate": 1.8460176980730777e-05, "loss": 0.448, "step": 13121 }, { "epoch": 0.36029654036243824, "grad_norm": 0.35496413707733154, "learning_rate": 1.8459946706790958e-05, "loss": 0.5629, "step": 13122 }, { "epoch": 0.36032399780340474, "grad_norm": 0.38105282187461853, "learning_rate": 1.8459716417070637e-05, "loss": 0.5751, "step": 13123 }, { "epoch": 0.36035145524437123, "grad_norm": 0.3538092076778412, "learning_rate": 1.8459486111570245e-05, "loss": 0.5372, "step": 13124 }, { "epoch": 0.3603789126853377, "grad_norm": 0.32643380761146545, "learning_rate": 1.8459255790290215e-05, "loss": 0.5287, "step": 13125 }, { "epoch": 0.3604063701263042, "grad_norm": 0.3678705394268036, "learning_rate": 1.845902545323097e-05, "loss": 0.4908, "step": 13126 }, { "epoch": 0.3604338275672707, "grad_norm": 0.4382529556751251, "learning_rate": 1.8458795100392947e-05, "loss": 0.5614, "step": 13127 }, { "epoch": 0.3604612850082372, "grad_norm": 0.3411422073841095, "learning_rate": 1.8458564731776566e-05, "loss": 0.475, "step": 13128 }, { "epoch": 0.36048874244920376, "grad_norm": 0.35569262504577637, "learning_rate": 1.8458334347382264e-05, "loss": 0.5252, "step": 13129 }, { "epoch": 0.36051619989017025, "grad_norm": 0.3134927749633789, "learning_rate": 1.8458103947210467e-05, "loss": 0.4775, "step": 13130 }, { "epoch": 0.36054365733113675, "grad_norm": 0.3469892740249634, "learning_rate": 1.845787353126161e-05, "loss": 0.4792, "step": 13131 }, { "epoch": 0.36057111477210324, "grad_norm": 0.3935772776603699, "learning_rate": 1.8457643099536117e-05, "loss": 0.5468, "step": 13132 }, { "epoch": 0.36059857221306973, "grad_norm": 0.4262789189815521, "learning_rate": 1.8457412652034423e-05, "loss": 0.5306, "step": 13133 }, { "epoch": 0.36062602965403623, "grad_norm": 0.42334407567977905, "learning_rate": 1.8457182188756956e-05, "loss": 0.5394, "step": 13134 }, { "epoch": 0.3606534870950027, "grad_norm": 0.4125106632709503, "learning_rate": 1.845695170970414e-05, "loss": 0.5264, "step": 13135 }, { "epoch": 0.3606809445359693, "grad_norm": 0.4267720878124237, "learning_rate": 1.8456721214876416e-05, "loss": 0.5849, "step": 13136 }, { "epoch": 0.36070840197693577, "grad_norm": 0.33667778968811035, "learning_rate": 1.8456490704274205e-05, "loss": 0.4892, "step": 13137 }, { "epoch": 0.36073585941790226, "grad_norm": 0.3911696672439575, "learning_rate": 1.8456260177897934e-05, "loss": 0.5899, "step": 13138 }, { "epoch": 0.36076331685886875, "grad_norm": 0.406293123960495, "learning_rate": 1.8456029635748046e-05, "loss": 0.5856, "step": 13139 }, { "epoch": 0.36079077429983525, "grad_norm": 0.35822010040283203, "learning_rate": 1.845579907782496e-05, "loss": 0.5028, "step": 13140 }, { "epoch": 0.36081823174080174, "grad_norm": 0.4146757125854492, "learning_rate": 1.8455568504129117e-05, "loss": 0.5706, "step": 13141 }, { "epoch": 0.36084568918176824, "grad_norm": 0.3982425332069397, "learning_rate": 1.8455337914660932e-05, "loss": 0.5577, "step": 13142 }, { "epoch": 0.3608731466227348, "grad_norm": 0.3931189775466919, "learning_rate": 1.845510730942085e-05, "loss": 0.6377, "step": 13143 }, { "epoch": 0.3609006040637013, "grad_norm": 0.419359028339386, "learning_rate": 1.845487668840929e-05, "loss": 0.6127, "step": 13144 }, { "epoch": 0.3609280615046678, "grad_norm": 0.34221982955932617, "learning_rate": 1.8454646051626682e-05, "loss": 0.52, "step": 13145 }, { "epoch": 0.36095551894563427, "grad_norm": 0.3729400336742401, "learning_rate": 1.8454415399073466e-05, "loss": 0.5021, "step": 13146 }, { "epoch": 0.36098297638660076, "grad_norm": 0.35129475593566895, "learning_rate": 1.845418473075007e-05, "loss": 0.5305, "step": 13147 }, { "epoch": 0.36101043382756726, "grad_norm": 0.3269643187522888, "learning_rate": 1.8453954046656917e-05, "loss": 0.5117, "step": 13148 }, { "epoch": 0.36103789126853375, "grad_norm": 0.5407090187072754, "learning_rate": 1.845372334679444e-05, "loss": 0.5464, "step": 13149 }, { "epoch": 0.3610653487095003, "grad_norm": 0.3360693156719208, "learning_rate": 1.8453492631163074e-05, "loss": 0.5566, "step": 13150 }, { "epoch": 0.3610928061504668, "grad_norm": 0.5453652739524841, "learning_rate": 1.8453261899763247e-05, "loss": 0.5698, "step": 13151 }, { "epoch": 0.3611202635914333, "grad_norm": 0.42272889614105225, "learning_rate": 1.8453031152595387e-05, "loss": 0.6524, "step": 13152 }, { "epoch": 0.3611477210323998, "grad_norm": 0.3863818049430847, "learning_rate": 1.8452800389659927e-05, "loss": 0.5219, "step": 13153 }, { "epoch": 0.3611751784733663, "grad_norm": 0.41362398862838745, "learning_rate": 1.8452569610957294e-05, "loss": 0.6084, "step": 13154 }, { "epoch": 0.36120263591433277, "grad_norm": 0.3814869523048401, "learning_rate": 1.8452338816487923e-05, "loss": 0.4526, "step": 13155 }, { "epoch": 0.36123009335529926, "grad_norm": 0.3521794080734253, "learning_rate": 1.845210800625224e-05, "loss": 0.5706, "step": 13156 }, { "epoch": 0.3612575507962658, "grad_norm": 0.34441110491752625, "learning_rate": 1.845187718025068e-05, "loss": 0.4927, "step": 13157 }, { "epoch": 0.3612850082372323, "grad_norm": 0.3489800989627838, "learning_rate": 1.8451646338483673e-05, "loss": 0.5497, "step": 13158 }, { "epoch": 0.3613124656781988, "grad_norm": 0.42725157737731934, "learning_rate": 1.8451415480951645e-05, "loss": 0.4967, "step": 13159 }, { "epoch": 0.3613399231191653, "grad_norm": 0.34447064995765686, "learning_rate": 1.845118460765503e-05, "loss": 0.418, "step": 13160 }, { "epoch": 0.3613673805601318, "grad_norm": 0.40395209193229675, "learning_rate": 1.8450953718594263e-05, "loss": 0.5611, "step": 13161 }, { "epoch": 0.3613948380010983, "grad_norm": 0.39303383231163025, "learning_rate": 1.8450722813769766e-05, "loss": 0.5256, "step": 13162 }, { "epoch": 0.3614222954420648, "grad_norm": 0.38242316246032715, "learning_rate": 1.8450491893181973e-05, "loss": 0.5202, "step": 13163 }, { "epoch": 0.36144975288303133, "grad_norm": 0.3126254379749298, "learning_rate": 1.8450260956831317e-05, "loss": 0.4693, "step": 13164 }, { "epoch": 0.3614772103239978, "grad_norm": 0.3535137176513672, "learning_rate": 1.8450030004718228e-05, "loss": 0.4772, "step": 13165 }, { "epoch": 0.3615046677649643, "grad_norm": 0.3869374990463257, "learning_rate": 1.8449799036843136e-05, "loss": 0.5096, "step": 13166 }, { "epoch": 0.3615321252059308, "grad_norm": 0.3483353555202484, "learning_rate": 1.844956805320647e-05, "loss": 0.5399, "step": 13167 }, { "epoch": 0.3615595826468973, "grad_norm": 0.3912252187728882, "learning_rate": 1.8449337053808663e-05, "loss": 0.4449, "step": 13168 }, { "epoch": 0.3615870400878638, "grad_norm": 0.40818560123443604, "learning_rate": 1.8449106038650146e-05, "loss": 0.5448, "step": 13169 }, { "epoch": 0.3616144975288303, "grad_norm": 0.43586814403533936, "learning_rate": 1.844887500773135e-05, "loss": 0.5357, "step": 13170 }, { "epoch": 0.36164195496979684, "grad_norm": 0.520947277545929, "learning_rate": 1.844864396105271e-05, "loss": 0.5185, "step": 13171 }, { "epoch": 0.36166941241076334, "grad_norm": 0.3438819646835327, "learning_rate": 1.8448412898614645e-05, "loss": 0.5169, "step": 13172 }, { "epoch": 0.36169686985172983, "grad_norm": 0.36924096941947937, "learning_rate": 1.8448181820417595e-05, "loss": 0.5146, "step": 13173 }, { "epoch": 0.3617243272926963, "grad_norm": 0.37733086943626404, "learning_rate": 1.8447950726461995e-05, "loss": 0.5214, "step": 13174 }, { "epoch": 0.3617517847336628, "grad_norm": 0.3620932102203369, "learning_rate": 1.844771961674827e-05, "loss": 0.5114, "step": 13175 }, { "epoch": 0.3617792421746293, "grad_norm": 0.3815581500530243, "learning_rate": 1.8447488491276846e-05, "loss": 0.4929, "step": 13176 }, { "epoch": 0.3618066996155958, "grad_norm": 0.3580508828163147, "learning_rate": 1.844725735004816e-05, "loss": 0.4502, "step": 13177 }, { "epoch": 0.36183415705656236, "grad_norm": 0.38225069642066956, "learning_rate": 1.844702619306265e-05, "loss": 0.5262, "step": 13178 }, { "epoch": 0.36186161449752885, "grad_norm": 0.42518308758735657, "learning_rate": 1.8446795020320734e-05, "loss": 0.5644, "step": 13179 }, { "epoch": 0.36188907193849534, "grad_norm": 0.37176260352134705, "learning_rate": 1.8446563831822854e-05, "loss": 0.5339, "step": 13180 }, { "epoch": 0.36191652937946184, "grad_norm": 0.3548765182495117, "learning_rate": 1.844633262756943e-05, "loss": 0.4857, "step": 13181 }, { "epoch": 0.36194398682042833, "grad_norm": 0.36172688007354736, "learning_rate": 1.8446101407560903e-05, "loss": 0.4875, "step": 13182 }, { "epoch": 0.3619714442613948, "grad_norm": 0.44124525785446167, "learning_rate": 1.8445870171797703e-05, "loss": 0.5258, "step": 13183 }, { "epoch": 0.3619989017023613, "grad_norm": 0.3759884536266327, "learning_rate": 1.844563892028026e-05, "loss": 0.5499, "step": 13184 }, { "epoch": 0.36202635914332787, "grad_norm": 0.3830901086330414, "learning_rate": 1.8445407653009006e-05, "loss": 0.5356, "step": 13185 }, { "epoch": 0.36205381658429436, "grad_norm": 0.44437599182128906, "learning_rate": 1.8445176369984366e-05, "loss": 0.5315, "step": 13186 }, { "epoch": 0.36208127402526086, "grad_norm": 0.3915611803531647, "learning_rate": 1.8444945071206777e-05, "loss": 0.6632, "step": 13187 }, { "epoch": 0.36210873146622735, "grad_norm": 0.35195913910865784, "learning_rate": 1.8444713756676672e-05, "loss": 0.4534, "step": 13188 }, { "epoch": 0.36213618890719385, "grad_norm": 0.3453114628791809, "learning_rate": 1.844448242639448e-05, "loss": 0.5676, "step": 13189 }, { "epoch": 0.36216364634816034, "grad_norm": 0.41373419761657715, "learning_rate": 1.8444251080360636e-05, "loss": 0.5032, "step": 13190 }, { "epoch": 0.36219110378912683, "grad_norm": 0.384966641664505, "learning_rate": 1.8444019718575566e-05, "loss": 0.5519, "step": 13191 }, { "epoch": 0.3622185612300933, "grad_norm": 0.3556605577468872, "learning_rate": 1.844378834103971e-05, "loss": 0.484, "step": 13192 }, { "epoch": 0.3622460186710599, "grad_norm": 0.35879799723625183, "learning_rate": 1.8443556947753486e-05, "loss": 0.4987, "step": 13193 }, { "epoch": 0.36227347611202637, "grad_norm": 0.35934722423553467, "learning_rate": 1.8443325538717337e-05, "loss": 0.5528, "step": 13194 }, { "epoch": 0.36230093355299287, "grad_norm": 0.34954097867012024, "learning_rate": 1.844309411393169e-05, "loss": 0.4357, "step": 13195 }, { "epoch": 0.36232839099395936, "grad_norm": 0.3962495028972626, "learning_rate": 1.844286267339698e-05, "loss": 0.5581, "step": 13196 }, { "epoch": 0.36235584843492585, "grad_norm": 0.36189860105514526, "learning_rate": 1.844263121711363e-05, "loss": 0.4055, "step": 13197 }, { "epoch": 0.36238330587589235, "grad_norm": 0.34917619824409485, "learning_rate": 1.8442399745082085e-05, "loss": 0.5146, "step": 13198 }, { "epoch": 0.36241076331685884, "grad_norm": 0.3552559018135071, "learning_rate": 1.8442168257302765e-05, "loss": 0.5284, "step": 13199 }, { "epoch": 0.3624382207578254, "grad_norm": 0.36629894375801086, "learning_rate": 1.844193675377611e-05, "loss": 0.5283, "step": 13200 }, { "epoch": 0.3624656781987919, "grad_norm": 0.40735283493995667, "learning_rate": 1.844170523450255e-05, "loss": 0.6192, "step": 13201 }, { "epoch": 0.3624931356397584, "grad_norm": 0.3476910889148712, "learning_rate": 1.8441473699482514e-05, "loss": 0.5274, "step": 13202 }, { "epoch": 0.3625205930807249, "grad_norm": 0.3860296905040741, "learning_rate": 1.8441242148716435e-05, "loss": 0.4959, "step": 13203 }, { "epoch": 0.36254805052169137, "grad_norm": 0.36859825253486633, "learning_rate": 1.8441010582204745e-05, "loss": 0.4489, "step": 13204 }, { "epoch": 0.36257550796265786, "grad_norm": 0.3629544675350189, "learning_rate": 1.8440778999947878e-05, "loss": 0.4756, "step": 13205 }, { "epoch": 0.36260296540362436, "grad_norm": 0.5369066596031189, "learning_rate": 1.844054740194626e-05, "loss": 0.4895, "step": 13206 }, { "epoch": 0.3626304228445909, "grad_norm": 0.3333066999912262, "learning_rate": 1.844031578820033e-05, "loss": 0.5562, "step": 13207 }, { "epoch": 0.3626578802855574, "grad_norm": 0.3899949789047241, "learning_rate": 1.8440084158710518e-05, "loss": 0.5443, "step": 13208 }, { "epoch": 0.3626853377265239, "grad_norm": 0.4164447486400604, "learning_rate": 1.8439852513477253e-05, "loss": 0.5486, "step": 13209 }, { "epoch": 0.3627127951674904, "grad_norm": 0.31232699751853943, "learning_rate": 1.843962085250097e-05, "loss": 0.4765, "step": 13210 }, { "epoch": 0.3627402526084569, "grad_norm": 0.3985652029514313, "learning_rate": 1.84393891757821e-05, "loss": 0.4922, "step": 13211 }, { "epoch": 0.3627677100494234, "grad_norm": 0.3238932490348816, "learning_rate": 1.8439157483321076e-05, "loss": 0.4735, "step": 13212 }, { "epoch": 0.36279516749038987, "grad_norm": 0.35253220796585083, "learning_rate": 1.843892577511833e-05, "loss": 0.5657, "step": 13213 }, { "epoch": 0.3628226249313564, "grad_norm": 0.35974276065826416, "learning_rate": 1.84386940511743e-05, "loss": 0.4492, "step": 13214 }, { "epoch": 0.3628500823723229, "grad_norm": 0.33148103952407837, "learning_rate": 1.8438462311489403e-05, "loss": 0.4519, "step": 13215 }, { "epoch": 0.3628775398132894, "grad_norm": 0.36951327323913574, "learning_rate": 1.8438230556064087e-05, "loss": 0.5158, "step": 13216 }, { "epoch": 0.3629049972542559, "grad_norm": 0.3457728922367096, "learning_rate": 1.843799878489877e-05, "loss": 0.5452, "step": 13217 }, { "epoch": 0.3629324546952224, "grad_norm": 0.35920214653015137, "learning_rate": 1.8437766997993902e-05, "loss": 0.454, "step": 13218 }, { "epoch": 0.3629599121361889, "grad_norm": 0.36417731642723083, "learning_rate": 1.8437535195349897e-05, "loss": 0.5292, "step": 13219 }, { "epoch": 0.3629873695771554, "grad_norm": 0.4050922691822052, "learning_rate": 1.8437303376967204e-05, "loss": 0.6041, "step": 13220 }, { "epoch": 0.36301482701812193, "grad_norm": 0.375888466835022, "learning_rate": 1.8437071542846242e-05, "loss": 0.455, "step": 13221 }, { "epoch": 0.3630422844590884, "grad_norm": 0.3890566825866699, "learning_rate": 1.843683969298745e-05, "loss": 0.4854, "step": 13222 }, { "epoch": 0.3630697419000549, "grad_norm": 0.42696088552474976, "learning_rate": 1.843660782739126e-05, "loss": 0.4708, "step": 13223 }, { "epoch": 0.3630971993410214, "grad_norm": 0.3846897780895233, "learning_rate": 1.8436375946058102e-05, "loss": 0.5251, "step": 13224 }, { "epoch": 0.3631246567819879, "grad_norm": 0.39463454484939575, "learning_rate": 1.8436144048988413e-05, "loss": 0.5094, "step": 13225 }, { "epoch": 0.3631521142229544, "grad_norm": 0.35220158100128174, "learning_rate": 1.843591213618262e-05, "loss": 0.4811, "step": 13226 }, { "epoch": 0.3631795716639209, "grad_norm": 0.3524731993675232, "learning_rate": 1.8435680207641158e-05, "loss": 0.5088, "step": 13227 }, { "epoch": 0.36320702910488745, "grad_norm": 0.3048100173473358, "learning_rate": 1.8435448263364462e-05, "loss": 0.4389, "step": 13228 }, { "epoch": 0.36323448654585394, "grad_norm": 0.3411879241466522, "learning_rate": 1.8435216303352964e-05, "loss": 0.4781, "step": 13229 }, { "epoch": 0.36326194398682043, "grad_norm": 0.49210768938064575, "learning_rate": 1.843498432760709e-05, "loss": 0.5579, "step": 13230 }, { "epoch": 0.36328940142778693, "grad_norm": 0.5711226463317871, "learning_rate": 1.8434752336127285e-05, "loss": 0.5607, "step": 13231 }, { "epoch": 0.3633168588687534, "grad_norm": 0.37616774439811707, "learning_rate": 1.843452032891397e-05, "loss": 0.5404, "step": 13232 }, { "epoch": 0.3633443163097199, "grad_norm": 0.4114203155040741, "learning_rate": 1.8434288305967584e-05, "loss": 0.5403, "step": 13233 }, { "epoch": 0.3633717737506864, "grad_norm": 0.37984347343444824, "learning_rate": 1.8434056267288558e-05, "loss": 0.5362, "step": 13234 }, { "epoch": 0.36339923119165296, "grad_norm": 0.3873461186885834, "learning_rate": 1.843382421287733e-05, "loss": 0.5504, "step": 13235 }, { "epoch": 0.36342668863261945, "grad_norm": 0.35314255952835083, "learning_rate": 1.843359214273432e-05, "loss": 0.4769, "step": 13236 }, { "epoch": 0.36345414607358595, "grad_norm": 0.36450108885765076, "learning_rate": 1.8433360056859976e-05, "loss": 0.4736, "step": 13237 }, { "epoch": 0.36348160351455244, "grad_norm": 0.44163474440574646, "learning_rate": 1.843312795525472e-05, "loss": 0.5611, "step": 13238 }, { "epoch": 0.36350906095551894, "grad_norm": 0.391765832901001, "learning_rate": 1.843289583791899e-05, "loss": 0.5211, "step": 13239 }, { "epoch": 0.36353651839648543, "grad_norm": 0.3340924382209778, "learning_rate": 1.8432663704853215e-05, "loss": 0.4928, "step": 13240 }, { "epoch": 0.3635639758374519, "grad_norm": 0.3424559533596039, "learning_rate": 1.8432431556057832e-05, "loss": 0.4978, "step": 13241 }, { "epoch": 0.3635914332784185, "grad_norm": 0.3453432619571686, "learning_rate": 1.8432199391533275e-05, "loss": 0.5094, "step": 13242 }, { "epoch": 0.36361889071938497, "grad_norm": 0.35446932911872864, "learning_rate": 1.8431967211279974e-05, "loss": 0.4666, "step": 13243 }, { "epoch": 0.36364634816035146, "grad_norm": 0.3816090524196625, "learning_rate": 1.843173501529836e-05, "loss": 0.5249, "step": 13244 }, { "epoch": 0.36367380560131796, "grad_norm": 0.3792325258255005, "learning_rate": 1.8431502803588875e-05, "loss": 0.488, "step": 13245 }, { "epoch": 0.36370126304228445, "grad_norm": 0.3273666203022003, "learning_rate": 1.843127057615194e-05, "loss": 0.4314, "step": 13246 }, { "epoch": 0.36372872048325094, "grad_norm": 0.42347052693367004, "learning_rate": 1.8431038332988002e-05, "loss": 0.4779, "step": 13247 }, { "epoch": 0.36375617792421744, "grad_norm": 0.3682761490345001, "learning_rate": 1.843080607409748e-05, "loss": 0.5004, "step": 13248 }, { "epoch": 0.363783635365184, "grad_norm": 0.37068304419517517, "learning_rate": 1.8430573799480818e-05, "loss": 0.5762, "step": 13249 }, { "epoch": 0.3638110928061505, "grad_norm": 0.4145568907260895, "learning_rate": 1.843034150913844e-05, "loss": 0.5163, "step": 13250 }, { "epoch": 0.363838550247117, "grad_norm": 0.48526614904403687, "learning_rate": 1.843010920307079e-05, "loss": 0.6048, "step": 13251 }, { "epoch": 0.36386600768808347, "grad_norm": 0.4196273982524872, "learning_rate": 1.8429876881278295e-05, "loss": 0.5088, "step": 13252 }, { "epoch": 0.36389346512904996, "grad_norm": 0.33530065417289734, "learning_rate": 1.8429644543761384e-05, "loss": 0.5107, "step": 13253 }, { "epoch": 0.36392092257001646, "grad_norm": 0.5058258175849915, "learning_rate": 1.84294121905205e-05, "loss": 0.4961, "step": 13254 }, { "epoch": 0.36394838001098295, "grad_norm": 0.3661845624446869, "learning_rate": 1.8429179821556072e-05, "loss": 0.5008, "step": 13255 }, { "epoch": 0.3639758374519495, "grad_norm": 0.3951188623905182, "learning_rate": 1.8428947436868533e-05, "loss": 0.6414, "step": 13256 }, { "epoch": 0.364003294892916, "grad_norm": 0.4396534562110901, "learning_rate": 1.8428715036458317e-05, "loss": 0.6328, "step": 13257 }, { "epoch": 0.3640307523338825, "grad_norm": 0.36103951930999756, "learning_rate": 1.8428482620325855e-05, "loss": 0.5646, "step": 13258 }, { "epoch": 0.364058209774849, "grad_norm": 0.3870995044708252, "learning_rate": 1.8428250188471584e-05, "loss": 0.4883, "step": 13259 }, { "epoch": 0.3640856672158155, "grad_norm": 0.3736732304096222, "learning_rate": 1.8428017740895935e-05, "loss": 0.5266, "step": 13260 }, { "epoch": 0.364113124656782, "grad_norm": 0.35943564772605896, "learning_rate": 1.8427785277599347e-05, "loss": 0.4844, "step": 13261 }, { "epoch": 0.36414058209774847, "grad_norm": 0.3676561713218689, "learning_rate": 1.8427552798582248e-05, "loss": 0.5371, "step": 13262 }, { "epoch": 0.364168039538715, "grad_norm": 0.3424157202243805, "learning_rate": 1.8427320303845074e-05, "loss": 0.473, "step": 13263 }, { "epoch": 0.3641954969796815, "grad_norm": 0.35734015703201294, "learning_rate": 1.8427087793388253e-05, "loss": 0.4592, "step": 13264 }, { "epoch": 0.364222954420648, "grad_norm": 0.4090169668197632, "learning_rate": 1.842685526721223e-05, "loss": 0.5029, "step": 13265 }, { "epoch": 0.3642504118616145, "grad_norm": 0.3673754036426544, "learning_rate": 1.8426622725317427e-05, "loss": 0.5285, "step": 13266 }, { "epoch": 0.364277869302581, "grad_norm": 0.3527148365974426, "learning_rate": 1.8426390167704287e-05, "loss": 0.5705, "step": 13267 }, { "epoch": 0.3643053267435475, "grad_norm": 0.37671226263046265, "learning_rate": 1.8426157594373237e-05, "loss": 0.5725, "step": 13268 }, { "epoch": 0.364332784184514, "grad_norm": 0.46316054463386536, "learning_rate": 1.8425925005324718e-05, "loss": 0.5113, "step": 13269 }, { "epoch": 0.36436024162548053, "grad_norm": 0.40641331672668457, "learning_rate": 1.8425692400559154e-05, "loss": 0.5243, "step": 13270 }, { "epoch": 0.364387699066447, "grad_norm": 0.3726562261581421, "learning_rate": 1.8425459780076988e-05, "loss": 0.4781, "step": 13271 }, { "epoch": 0.3644151565074135, "grad_norm": 0.37236955761909485, "learning_rate": 1.842522714387865e-05, "loss": 0.4593, "step": 13272 }, { "epoch": 0.36444261394838, "grad_norm": 0.3926211893558502, "learning_rate": 1.8424994491964577e-05, "loss": 0.5173, "step": 13273 }, { "epoch": 0.3644700713893465, "grad_norm": 0.46923619508743286, "learning_rate": 1.8424761824335195e-05, "loss": 0.5126, "step": 13274 }, { "epoch": 0.364497528830313, "grad_norm": 0.646196186542511, "learning_rate": 1.8424529140990947e-05, "loss": 0.5773, "step": 13275 }, { "epoch": 0.3645249862712795, "grad_norm": 0.3653760552406311, "learning_rate": 1.8424296441932262e-05, "loss": 0.6268, "step": 13276 }, { "epoch": 0.36455244371224604, "grad_norm": 0.3569766581058502, "learning_rate": 1.8424063727159578e-05, "loss": 0.4741, "step": 13277 }, { "epoch": 0.36457990115321254, "grad_norm": 0.3716336488723755, "learning_rate": 1.842383099667332e-05, "loss": 0.6097, "step": 13278 }, { "epoch": 0.36460735859417903, "grad_norm": 0.3834122121334076, "learning_rate": 1.8423598250473936e-05, "loss": 0.5966, "step": 13279 }, { "epoch": 0.3646348160351455, "grad_norm": 0.3711458146572113, "learning_rate": 1.842336548856185e-05, "loss": 0.4877, "step": 13280 }, { "epoch": 0.364662273476112, "grad_norm": 0.3967645764350891, "learning_rate": 1.8423132710937498e-05, "loss": 0.4957, "step": 13281 }, { "epoch": 0.3646897309170785, "grad_norm": 0.3778120279312134, "learning_rate": 1.8422899917601315e-05, "loss": 0.5248, "step": 13282 }, { "epoch": 0.364717188358045, "grad_norm": 0.40993818640708923, "learning_rate": 1.842266710855374e-05, "loss": 0.5725, "step": 13283 }, { "epoch": 0.36474464579901156, "grad_norm": 0.36683452129364014, "learning_rate": 1.8422434283795195e-05, "loss": 0.5839, "step": 13284 }, { "epoch": 0.36477210323997805, "grad_norm": 0.3821203112602234, "learning_rate": 1.8422201443326125e-05, "loss": 0.5705, "step": 13285 }, { "epoch": 0.36479956068094455, "grad_norm": 0.3230799734592438, "learning_rate": 1.8421968587146962e-05, "loss": 0.484, "step": 13286 }, { "epoch": 0.36482701812191104, "grad_norm": 0.3225272297859192, "learning_rate": 1.842173571525814e-05, "loss": 0.4209, "step": 13287 }, { "epoch": 0.36485447556287753, "grad_norm": 0.43021076917648315, "learning_rate": 1.8421502827660093e-05, "loss": 0.4809, "step": 13288 }, { "epoch": 0.36488193300384403, "grad_norm": 0.476222425699234, "learning_rate": 1.8421269924353257e-05, "loss": 0.4386, "step": 13289 }, { "epoch": 0.3649093904448105, "grad_norm": 0.34194478392601013, "learning_rate": 1.842103700533806e-05, "loss": 0.4293, "step": 13290 }, { "epoch": 0.36493684788577707, "grad_norm": 0.39638620615005493, "learning_rate": 1.8420804070614944e-05, "loss": 0.4768, "step": 13291 }, { "epoch": 0.36496430532674357, "grad_norm": 0.33885541558265686, "learning_rate": 1.842057112018434e-05, "loss": 0.4805, "step": 13292 }, { "epoch": 0.36499176276771006, "grad_norm": 0.3689919114112854, "learning_rate": 1.8420338154046685e-05, "loss": 0.494, "step": 13293 }, { "epoch": 0.36501922020867655, "grad_norm": 0.4467167258262634, "learning_rate": 1.8420105172202412e-05, "loss": 0.5304, "step": 13294 }, { "epoch": 0.36504667764964305, "grad_norm": 0.3628845810890198, "learning_rate": 1.8419872174651952e-05, "loss": 0.5268, "step": 13295 }, { "epoch": 0.36507413509060954, "grad_norm": 0.37182343006134033, "learning_rate": 1.8419639161395747e-05, "loss": 0.5787, "step": 13296 }, { "epoch": 0.36510159253157604, "grad_norm": 0.38182374835014343, "learning_rate": 1.8419406132434226e-05, "loss": 0.58, "step": 13297 }, { "epoch": 0.3651290499725426, "grad_norm": 0.3162412941455841, "learning_rate": 1.8419173087767827e-05, "loss": 0.4231, "step": 13298 }, { "epoch": 0.3651565074135091, "grad_norm": 0.3668927550315857, "learning_rate": 1.841894002739698e-05, "loss": 0.5457, "step": 13299 }, { "epoch": 0.3651839648544756, "grad_norm": 0.3886664807796478, "learning_rate": 1.8418706951322124e-05, "loss": 0.5313, "step": 13300 }, { "epoch": 0.36521142229544207, "grad_norm": 0.35190919041633606, "learning_rate": 1.8418473859543694e-05, "loss": 0.5529, "step": 13301 }, { "epoch": 0.36523887973640856, "grad_norm": 0.38471975922584534, "learning_rate": 1.8418240752062124e-05, "loss": 0.5425, "step": 13302 }, { "epoch": 0.36526633717737506, "grad_norm": 0.35083475708961487, "learning_rate": 1.8418007628877848e-05, "loss": 0.5016, "step": 13303 }, { "epoch": 0.36529379461834155, "grad_norm": 0.397013783454895, "learning_rate": 1.8417774489991298e-05, "loss": 0.5111, "step": 13304 }, { "epoch": 0.3653212520593081, "grad_norm": 0.34346768260002136, "learning_rate": 1.8417541335402912e-05, "loss": 0.518, "step": 13305 }, { "epoch": 0.3653487095002746, "grad_norm": 0.42549484968185425, "learning_rate": 1.841730816511313e-05, "loss": 0.5396, "step": 13306 }, { "epoch": 0.3653761669412411, "grad_norm": 0.332579106092453, "learning_rate": 1.8417074979122378e-05, "loss": 0.5163, "step": 13307 }, { "epoch": 0.3654036243822076, "grad_norm": 0.3657087981700897, "learning_rate": 1.8416841777431096e-05, "loss": 0.526, "step": 13308 }, { "epoch": 0.3654310818231741, "grad_norm": 0.35419946908950806, "learning_rate": 1.8416608560039714e-05, "loss": 0.5068, "step": 13309 }, { "epoch": 0.36545853926414057, "grad_norm": 0.35014206171035767, "learning_rate": 1.8416375326948674e-05, "loss": 0.532, "step": 13310 }, { "epoch": 0.36548599670510706, "grad_norm": 0.369619756937027, "learning_rate": 1.841614207815841e-05, "loss": 0.5229, "step": 13311 }, { "epoch": 0.3655134541460736, "grad_norm": 0.48485833406448364, "learning_rate": 1.8415908813669352e-05, "loss": 0.5445, "step": 13312 }, { "epoch": 0.3655409115870401, "grad_norm": 0.36239004135131836, "learning_rate": 1.841567553348194e-05, "loss": 0.5601, "step": 13313 }, { "epoch": 0.3655683690280066, "grad_norm": 0.559315025806427, "learning_rate": 1.8415442237596602e-05, "loss": 0.4434, "step": 13314 }, { "epoch": 0.3655958264689731, "grad_norm": 0.3629678785800934, "learning_rate": 1.8415208926013784e-05, "loss": 0.4974, "step": 13315 }, { "epoch": 0.3656232839099396, "grad_norm": 0.3580913543701172, "learning_rate": 1.8414975598733913e-05, "loss": 0.4208, "step": 13316 }, { "epoch": 0.3656507413509061, "grad_norm": 0.35370516777038574, "learning_rate": 1.8414742255757428e-05, "loss": 0.5659, "step": 13317 }, { "epoch": 0.3656781987918726, "grad_norm": 0.41372695565223694, "learning_rate": 1.841450889708476e-05, "loss": 0.5267, "step": 13318 }, { "epoch": 0.3657056562328391, "grad_norm": 0.4300137162208557, "learning_rate": 1.841427552271635e-05, "loss": 0.5192, "step": 13319 }, { "epoch": 0.3657331136738056, "grad_norm": 0.3510093688964844, "learning_rate": 1.8414042132652632e-05, "loss": 0.54, "step": 13320 }, { "epoch": 0.3657605711147721, "grad_norm": 0.38661229610443115, "learning_rate": 1.8413808726894038e-05, "loss": 0.5037, "step": 13321 }, { "epoch": 0.3657880285557386, "grad_norm": 0.36446958780288696, "learning_rate": 1.8413575305441003e-05, "loss": 0.479, "step": 13322 }, { "epoch": 0.3658154859967051, "grad_norm": 0.354449599981308, "learning_rate": 1.8413341868293966e-05, "loss": 0.5303, "step": 13323 }, { "epoch": 0.3658429434376716, "grad_norm": 0.36447587609291077, "learning_rate": 1.8413108415453367e-05, "loss": 0.4952, "step": 13324 }, { "epoch": 0.3658704008786381, "grad_norm": 0.3748722970485687, "learning_rate": 1.841287494691963e-05, "loss": 0.5207, "step": 13325 }, { "epoch": 0.3658978583196046, "grad_norm": 0.3768123388290405, "learning_rate": 1.8412641462693197e-05, "loss": 0.4826, "step": 13326 }, { "epoch": 0.36592531576057113, "grad_norm": 0.3471638262271881, "learning_rate": 1.8412407962774503e-05, "loss": 0.4881, "step": 13327 }, { "epoch": 0.36595277320153763, "grad_norm": 0.35733985900878906, "learning_rate": 1.8412174447163985e-05, "loss": 0.4858, "step": 13328 }, { "epoch": 0.3659802306425041, "grad_norm": 0.3693062365055084, "learning_rate": 1.8411940915862074e-05, "loss": 0.5779, "step": 13329 }, { "epoch": 0.3660076880834706, "grad_norm": 0.37515556812286377, "learning_rate": 1.841170736886921e-05, "loss": 0.4679, "step": 13330 }, { "epoch": 0.3660351455244371, "grad_norm": 0.4107637107372284, "learning_rate": 1.8411473806185825e-05, "loss": 0.4869, "step": 13331 }, { "epoch": 0.3660626029654036, "grad_norm": 0.3195800185203552, "learning_rate": 1.841124022781236e-05, "loss": 0.4851, "step": 13332 }, { "epoch": 0.3660900604063701, "grad_norm": 0.3781949579715729, "learning_rate": 1.8411006633749245e-05, "loss": 0.5283, "step": 13333 }, { "epoch": 0.36611751784733665, "grad_norm": 0.3918650448322296, "learning_rate": 1.8410773023996917e-05, "loss": 0.5671, "step": 13334 }, { "epoch": 0.36614497528830314, "grad_norm": 0.3476279675960541, "learning_rate": 1.8410539398555815e-05, "loss": 0.494, "step": 13335 }, { "epoch": 0.36617243272926964, "grad_norm": 0.3372535705566406, "learning_rate": 1.841030575742637e-05, "loss": 0.4992, "step": 13336 }, { "epoch": 0.36619989017023613, "grad_norm": 0.37522092461586, "learning_rate": 1.8410072100609024e-05, "loss": 0.5081, "step": 13337 }, { "epoch": 0.3662273476112026, "grad_norm": 0.37428897619247437, "learning_rate": 1.840983842810421e-05, "loss": 0.481, "step": 13338 }, { "epoch": 0.3662548050521691, "grad_norm": 0.34694284200668335, "learning_rate": 1.840960473991236e-05, "loss": 0.4469, "step": 13339 }, { "epoch": 0.3662822624931356, "grad_norm": 0.9279324412345886, "learning_rate": 1.8409371036033918e-05, "loss": 0.5331, "step": 13340 }, { "epoch": 0.36630971993410216, "grad_norm": 0.3372042775154114, "learning_rate": 1.8409137316469308e-05, "loss": 0.5046, "step": 13341 }, { "epoch": 0.36633717737506866, "grad_norm": 0.5034865140914917, "learning_rate": 1.8408903581218976e-05, "loss": 0.5451, "step": 13342 }, { "epoch": 0.36636463481603515, "grad_norm": 0.3319418430328369, "learning_rate": 1.8408669830283356e-05, "loss": 0.4404, "step": 13343 }, { "epoch": 0.36639209225700164, "grad_norm": 0.3865717649459839, "learning_rate": 1.8408436063662884e-05, "loss": 0.597, "step": 13344 }, { "epoch": 0.36641954969796814, "grad_norm": 0.38112810254096985, "learning_rate": 1.8408202281357992e-05, "loss": 0.5088, "step": 13345 }, { "epoch": 0.36644700713893463, "grad_norm": 0.42365604639053345, "learning_rate": 1.8407968483369122e-05, "loss": 0.5389, "step": 13346 }, { "epoch": 0.3664744645799011, "grad_norm": 0.37940099835395813, "learning_rate": 1.8407734669696706e-05, "loss": 0.5693, "step": 13347 }, { "epoch": 0.3665019220208677, "grad_norm": 0.4405479431152344, "learning_rate": 1.840750084034118e-05, "loss": 0.562, "step": 13348 }, { "epoch": 0.36652937946183417, "grad_norm": 0.34387168288230896, "learning_rate": 1.8407266995302984e-05, "loss": 0.5312, "step": 13349 }, { "epoch": 0.36655683690280066, "grad_norm": 0.5587919354438782, "learning_rate": 1.840703313458255e-05, "loss": 0.4429, "step": 13350 }, { "epoch": 0.36658429434376716, "grad_norm": 0.44959282875061035, "learning_rate": 1.840679925818032e-05, "loss": 0.564, "step": 13351 }, { "epoch": 0.36661175178473365, "grad_norm": 0.4271070957183838, "learning_rate": 1.8406565366096722e-05, "loss": 0.5375, "step": 13352 }, { "epoch": 0.36663920922570015, "grad_norm": 0.39512133598327637, "learning_rate": 1.8406331458332196e-05, "loss": 0.6326, "step": 13353 }, { "epoch": 0.36666666666666664, "grad_norm": 0.4771334230899811, "learning_rate": 1.840609753488718e-05, "loss": 0.6156, "step": 13354 }, { "epoch": 0.3666941241076332, "grad_norm": 0.36504268646240234, "learning_rate": 1.840586359576211e-05, "loss": 0.4946, "step": 13355 }, { "epoch": 0.3667215815485997, "grad_norm": 0.41533011198043823, "learning_rate": 1.8405629640957422e-05, "loss": 0.514, "step": 13356 }, { "epoch": 0.3667490389895662, "grad_norm": 0.41306278109550476, "learning_rate": 1.8405395670473552e-05, "loss": 0.6179, "step": 13357 }, { "epoch": 0.3667764964305327, "grad_norm": 0.34606435894966125, "learning_rate": 1.8405161684310936e-05, "loss": 0.4607, "step": 13358 }, { "epoch": 0.36680395387149917, "grad_norm": 0.36875876784324646, "learning_rate": 1.840492768247001e-05, "loss": 0.6177, "step": 13359 }, { "epoch": 0.36683141131246566, "grad_norm": 0.3480891287326813, "learning_rate": 1.8404693664951212e-05, "loss": 0.532, "step": 13360 }, { "epoch": 0.36685886875343215, "grad_norm": 0.4269990921020508, "learning_rate": 1.8404459631754978e-05, "loss": 0.5067, "step": 13361 }, { "epoch": 0.3668863261943987, "grad_norm": 0.3628652095794678, "learning_rate": 1.8404225582881742e-05, "loss": 0.537, "step": 13362 }, { "epoch": 0.3669137836353652, "grad_norm": 0.3758205473423004, "learning_rate": 1.8403991518331947e-05, "loss": 0.5416, "step": 13363 }, { "epoch": 0.3669412410763317, "grad_norm": 0.40794235467910767, "learning_rate": 1.840375743810602e-05, "loss": 0.5665, "step": 13364 }, { "epoch": 0.3669686985172982, "grad_norm": 0.37940531969070435, "learning_rate": 1.8403523342204408e-05, "loss": 0.5604, "step": 13365 }, { "epoch": 0.3669961559582647, "grad_norm": 0.41265398263931274, "learning_rate": 1.8403289230627543e-05, "loss": 0.4935, "step": 13366 }, { "epoch": 0.3670236133992312, "grad_norm": 0.35071253776550293, "learning_rate": 1.840305510337586e-05, "loss": 0.486, "step": 13367 }, { "epoch": 0.36705107084019767, "grad_norm": 0.32659846544265747, "learning_rate": 1.8402820960449796e-05, "loss": 0.5739, "step": 13368 }, { "epoch": 0.3670785282811642, "grad_norm": 0.3483704626560211, "learning_rate": 1.840258680184979e-05, "loss": 0.5128, "step": 13369 }, { "epoch": 0.3671059857221307, "grad_norm": 0.39068934321403503, "learning_rate": 1.840235262757628e-05, "loss": 0.4804, "step": 13370 }, { "epoch": 0.3671334431630972, "grad_norm": 0.44622352719306946, "learning_rate": 1.84021184376297e-05, "loss": 0.5305, "step": 13371 }, { "epoch": 0.3671609006040637, "grad_norm": 0.32348060607910156, "learning_rate": 1.8401884232010487e-05, "loss": 0.4738, "step": 13372 }, { "epoch": 0.3671883580450302, "grad_norm": 0.42738184332847595, "learning_rate": 1.8401650010719073e-05, "loss": 0.5034, "step": 13373 }, { "epoch": 0.3672158154859967, "grad_norm": 0.41680678725242615, "learning_rate": 1.8401415773755908e-05, "loss": 0.499, "step": 13374 }, { "epoch": 0.3672432729269632, "grad_norm": 0.37779971957206726, "learning_rate": 1.8401181521121416e-05, "loss": 0.4817, "step": 13375 }, { "epoch": 0.36727073036792973, "grad_norm": 0.38788700103759766, "learning_rate": 1.8400947252816042e-05, "loss": 0.5086, "step": 13376 }, { "epoch": 0.3672981878088962, "grad_norm": 0.38359692692756653, "learning_rate": 1.840071296884022e-05, "loss": 0.4914, "step": 13377 }, { "epoch": 0.3673256452498627, "grad_norm": 0.6101033687591553, "learning_rate": 1.8400478669194386e-05, "loss": 0.555, "step": 13378 }, { "epoch": 0.3673531026908292, "grad_norm": 0.8869770765304565, "learning_rate": 1.840024435387898e-05, "loss": 0.5054, "step": 13379 }, { "epoch": 0.3673805601317957, "grad_norm": 0.39183473587036133, "learning_rate": 1.8400010022894434e-05, "loss": 0.4972, "step": 13380 }, { "epoch": 0.3674080175727622, "grad_norm": 0.33186694979667664, "learning_rate": 1.8399775676241192e-05, "loss": 0.5564, "step": 13381 }, { "epoch": 0.3674354750137287, "grad_norm": 0.3348950445652008, "learning_rate": 1.8399541313919685e-05, "loss": 0.4643, "step": 13382 }, { "epoch": 0.36746293245469525, "grad_norm": 0.39207109808921814, "learning_rate": 1.839930693593035e-05, "loss": 0.5221, "step": 13383 }, { "epoch": 0.36749038989566174, "grad_norm": 0.37054672837257385, "learning_rate": 1.8399072542273632e-05, "loss": 0.4804, "step": 13384 }, { "epoch": 0.36751784733662823, "grad_norm": 0.38118672370910645, "learning_rate": 1.839883813294996e-05, "loss": 0.5, "step": 13385 }, { "epoch": 0.36754530477759473, "grad_norm": 0.3411831855773926, "learning_rate": 1.839860370795978e-05, "loss": 0.5216, "step": 13386 }, { "epoch": 0.3675727622185612, "grad_norm": 0.3712208867073059, "learning_rate": 1.839836926730352e-05, "loss": 0.4941, "step": 13387 }, { "epoch": 0.3676002196595277, "grad_norm": 0.42792174220085144, "learning_rate": 1.8398134810981615e-05, "loss": 0.5228, "step": 13388 }, { "epoch": 0.3676276771004942, "grad_norm": 0.4152697026729584, "learning_rate": 1.839790033899451e-05, "loss": 0.5454, "step": 13389 }, { "epoch": 0.36765513454146076, "grad_norm": 0.3684476912021637, "learning_rate": 1.8397665851342647e-05, "loss": 0.5698, "step": 13390 }, { "epoch": 0.36768259198242725, "grad_norm": 0.4602998197078705, "learning_rate": 1.8397431348026457e-05, "loss": 0.4696, "step": 13391 }, { "epoch": 0.36771004942339375, "grad_norm": 0.3246760666370392, "learning_rate": 1.8397196829046372e-05, "loss": 0.4816, "step": 13392 }, { "epoch": 0.36773750686436024, "grad_norm": 0.3695104420185089, "learning_rate": 1.8396962294402834e-05, "loss": 0.5457, "step": 13393 }, { "epoch": 0.36776496430532674, "grad_norm": 0.3247019648551941, "learning_rate": 1.8396727744096286e-05, "loss": 0.4431, "step": 13394 }, { "epoch": 0.36779242174629323, "grad_norm": 0.3578256666660309, "learning_rate": 1.839649317812716e-05, "loss": 0.5124, "step": 13395 }, { "epoch": 0.3678198791872597, "grad_norm": 0.35054466128349304, "learning_rate": 1.8396258596495893e-05, "loss": 0.5506, "step": 13396 }, { "epoch": 0.3678473366282263, "grad_norm": 0.456494003534317, "learning_rate": 1.8396023999202922e-05, "loss": 0.5143, "step": 13397 }, { "epoch": 0.36787479406919277, "grad_norm": 0.3311481475830078, "learning_rate": 1.8395789386248692e-05, "loss": 0.4286, "step": 13398 }, { "epoch": 0.36790225151015926, "grad_norm": 0.3279074728488922, "learning_rate": 1.839555475763363e-05, "loss": 0.5202, "step": 13399 }, { "epoch": 0.36792970895112576, "grad_norm": 0.3917251229286194, "learning_rate": 1.839532011335818e-05, "loss": 0.5842, "step": 13400 }, { "epoch": 0.36795716639209225, "grad_norm": 0.37432000041007996, "learning_rate": 1.839508545342278e-05, "loss": 0.5285, "step": 13401 }, { "epoch": 0.36798462383305874, "grad_norm": 0.3498668074607849, "learning_rate": 1.8394850777827867e-05, "loss": 0.5762, "step": 13402 }, { "epoch": 0.36801208127402524, "grad_norm": 0.3375849425792694, "learning_rate": 1.8394616086573874e-05, "loss": 0.467, "step": 13403 }, { "epoch": 0.3680395387149918, "grad_norm": 0.3397333323955536, "learning_rate": 1.8394381379661247e-05, "loss": 0.4955, "step": 13404 }, { "epoch": 0.3680669961559583, "grad_norm": 0.34500449895858765, "learning_rate": 1.839414665709042e-05, "loss": 0.4981, "step": 13405 }, { "epoch": 0.3680944535969248, "grad_norm": 0.3981265425682068, "learning_rate": 1.8393911918861826e-05, "loss": 0.5512, "step": 13406 }, { "epoch": 0.36812191103789127, "grad_norm": 0.3952910006046295, "learning_rate": 1.839367716497591e-05, "loss": 0.4454, "step": 13407 }, { "epoch": 0.36814936847885776, "grad_norm": 0.3936050832271576, "learning_rate": 1.8393442395433104e-05, "loss": 0.4764, "step": 13408 }, { "epoch": 0.36817682591982426, "grad_norm": 0.33462223410606384, "learning_rate": 1.8393207610233853e-05, "loss": 0.4174, "step": 13409 }, { "epoch": 0.36820428336079075, "grad_norm": 0.41745761036872864, "learning_rate": 1.839297280937859e-05, "loss": 0.4574, "step": 13410 }, { "epoch": 0.3682317408017573, "grad_norm": 0.37675222754478455, "learning_rate": 1.8392737992867754e-05, "loss": 0.5584, "step": 13411 }, { "epoch": 0.3682591982427238, "grad_norm": 0.3506487011909485, "learning_rate": 1.8392503160701783e-05, "loss": 0.5312, "step": 13412 }, { "epoch": 0.3682866556836903, "grad_norm": 0.3985677659511566, "learning_rate": 1.8392268312881115e-05, "loss": 0.4947, "step": 13413 }, { "epoch": 0.3683141131246568, "grad_norm": 0.3659696877002716, "learning_rate": 1.8392033449406188e-05, "loss": 0.551, "step": 13414 }, { "epoch": 0.3683415705656233, "grad_norm": 0.3907739818096161, "learning_rate": 1.839179857027744e-05, "loss": 0.5036, "step": 13415 }, { "epoch": 0.36836902800658977, "grad_norm": 0.358896940946579, "learning_rate": 1.8391563675495308e-05, "loss": 0.5042, "step": 13416 }, { "epoch": 0.36839648544755627, "grad_norm": 0.40342676639556885, "learning_rate": 1.8391328765060234e-05, "loss": 0.5728, "step": 13417 }, { "epoch": 0.3684239428885228, "grad_norm": 0.36156249046325684, "learning_rate": 1.839109383897265e-05, "loss": 0.4264, "step": 13418 }, { "epoch": 0.3684514003294893, "grad_norm": 0.37061867117881775, "learning_rate": 1.8390858897233e-05, "loss": 0.5392, "step": 13419 }, { "epoch": 0.3684788577704558, "grad_norm": 0.33382901549339294, "learning_rate": 1.8390623939841723e-05, "loss": 0.4712, "step": 13420 }, { "epoch": 0.3685063152114223, "grad_norm": 0.40484169125556946, "learning_rate": 1.8390388966799252e-05, "loss": 0.5934, "step": 13421 }, { "epoch": 0.3685337726523888, "grad_norm": 0.36332035064697266, "learning_rate": 1.8390153978106028e-05, "loss": 0.5676, "step": 13422 }, { "epoch": 0.3685612300933553, "grad_norm": 0.36634042859077454, "learning_rate": 1.8389918973762487e-05, "loss": 0.5966, "step": 13423 }, { "epoch": 0.3685886875343218, "grad_norm": 0.4249381422996521, "learning_rate": 1.8389683953769068e-05, "loss": 0.5258, "step": 13424 }, { "epoch": 0.36861614497528833, "grad_norm": 0.35385769605636597, "learning_rate": 1.8389448918126217e-05, "loss": 0.4605, "step": 13425 }, { "epoch": 0.3686436024162548, "grad_norm": 0.3607059121131897, "learning_rate": 1.838921386683436e-05, "loss": 0.5206, "step": 13426 }, { "epoch": 0.3686710598572213, "grad_norm": 0.40478041768074036, "learning_rate": 1.8388978799893947e-05, "loss": 0.4819, "step": 13427 }, { "epoch": 0.3686985172981878, "grad_norm": 0.3853946924209595, "learning_rate": 1.838874371730541e-05, "loss": 0.5599, "step": 13428 }, { "epoch": 0.3687259747391543, "grad_norm": 0.37743517756462097, "learning_rate": 1.838850861906918e-05, "loss": 0.4975, "step": 13429 }, { "epoch": 0.3687534321801208, "grad_norm": 0.35615721344947815, "learning_rate": 1.8388273505185714e-05, "loss": 0.5103, "step": 13430 }, { "epoch": 0.3687808896210873, "grad_norm": 0.3602025806903839, "learning_rate": 1.838803837565544e-05, "loss": 0.4604, "step": 13431 }, { "epoch": 0.36880834706205384, "grad_norm": 0.37747034430503845, "learning_rate": 1.8387803230478795e-05, "loss": 0.4539, "step": 13432 }, { "epoch": 0.36883580450302034, "grad_norm": 0.40808364748954773, "learning_rate": 1.8387568069656218e-05, "loss": 0.575, "step": 13433 }, { "epoch": 0.36886326194398683, "grad_norm": 0.3677160441875458, "learning_rate": 1.8387332893188152e-05, "loss": 0.4999, "step": 13434 }, { "epoch": 0.3688907193849533, "grad_norm": 0.3718004524707794, "learning_rate": 1.8387097701075032e-05, "loss": 0.5094, "step": 13435 }, { "epoch": 0.3689181768259198, "grad_norm": 0.41247034072875977, "learning_rate": 1.8386862493317302e-05, "loss": 0.5522, "step": 13436 }, { "epoch": 0.3689456342668863, "grad_norm": 0.36637231707572937, "learning_rate": 1.8386627269915392e-05, "loss": 0.5134, "step": 13437 }, { "epoch": 0.3689730917078528, "grad_norm": 0.3613288104534149, "learning_rate": 1.8386392030869745e-05, "loss": 0.4786, "step": 13438 }, { "epoch": 0.36900054914881936, "grad_norm": 0.43460720777511597, "learning_rate": 1.8386156776180803e-05, "loss": 0.5397, "step": 13439 }, { "epoch": 0.36902800658978585, "grad_norm": 0.5147268772125244, "learning_rate": 1.8385921505849e-05, "loss": 0.4, "step": 13440 }, { "epoch": 0.36905546403075234, "grad_norm": 0.9536510109901428, "learning_rate": 1.838568621987478e-05, "loss": 0.4629, "step": 13441 }, { "epoch": 0.36908292147171884, "grad_norm": 0.4116055965423584, "learning_rate": 1.8385450918258578e-05, "loss": 0.5894, "step": 13442 }, { "epoch": 0.36911037891268533, "grad_norm": 0.36125192046165466, "learning_rate": 1.838521560100083e-05, "loss": 0.5136, "step": 13443 }, { "epoch": 0.3691378363536518, "grad_norm": 0.5452654361724854, "learning_rate": 1.838498026810198e-05, "loss": 0.4699, "step": 13444 }, { "epoch": 0.3691652937946183, "grad_norm": 0.3885881304740906, "learning_rate": 1.838474491956247e-05, "loss": 0.5478, "step": 13445 }, { "epoch": 0.36919275123558487, "grad_norm": 0.4133382737636566, "learning_rate": 1.838450955538273e-05, "loss": 0.5612, "step": 13446 }, { "epoch": 0.36922020867655136, "grad_norm": 0.3920286297798157, "learning_rate": 1.8384274175563206e-05, "loss": 0.6014, "step": 13447 }, { "epoch": 0.36924766611751786, "grad_norm": 0.35896822810173035, "learning_rate": 1.8384038780104333e-05, "loss": 0.5208, "step": 13448 }, { "epoch": 0.36927512355848435, "grad_norm": 0.35007244348526, "learning_rate": 1.8383803369006553e-05, "loss": 0.4882, "step": 13449 }, { "epoch": 0.36930258099945085, "grad_norm": 0.297490656375885, "learning_rate": 1.83835679422703e-05, "loss": 0.4041, "step": 13450 }, { "epoch": 0.36933003844041734, "grad_norm": 0.5628771185874939, "learning_rate": 1.8383332499896025e-05, "loss": 0.4768, "step": 13451 }, { "epoch": 0.36935749588138383, "grad_norm": 0.3211683928966522, "learning_rate": 1.838309704188415e-05, "loss": 0.515, "step": 13452 }, { "epoch": 0.3693849533223504, "grad_norm": 0.3560747802257538, "learning_rate": 1.8382861568235132e-05, "loss": 0.4266, "step": 13453 }, { "epoch": 0.3694124107633169, "grad_norm": 0.40635713934898376, "learning_rate": 1.8382626078949397e-05, "loss": 0.4771, "step": 13454 }, { "epoch": 0.3694398682042834, "grad_norm": 0.731410562992096, "learning_rate": 1.838239057402739e-05, "loss": 0.5702, "step": 13455 }, { "epoch": 0.36946732564524987, "grad_norm": 0.3759952783584595, "learning_rate": 1.838215505346955e-05, "loss": 0.5052, "step": 13456 }, { "epoch": 0.36949478308621636, "grad_norm": 0.3417091965675354, "learning_rate": 1.8381919517276315e-05, "loss": 0.4519, "step": 13457 }, { "epoch": 0.36952224052718285, "grad_norm": 0.32542070746421814, "learning_rate": 1.8381683965448123e-05, "loss": 0.4789, "step": 13458 }, { "epoch": 0.36954969796814935, "grad_norm": 0.3838872015476227, "learning_rate": 1.8381448397985418e-05, "loss": 0.5371, "step": 13459 }, { "epoch": 0.36957715540911584, "grad_norm": 0.39199793338775635, "learning_rate": 1.8381212814888635e-05, "loss": 0.5497, "step": 13460 }, { "epoch": 0.3696046128500824, "grad_norm": 0.3589835464954376, "learning_rate": 1.8380977216158217e-05, "loss": 0.5077, "step": 13461 }, { "epoch": 0.3696320702910489, "grad_norm": 0.3949224352836609, "learning_rate": 1.83807416017946e-05, "loss": 0.5287, "step": 13462 }, { "epoch": 0.3696595277320154, "grad_norm": 0.35895246267318726, "learning_rate": 1.8380505971798222e-05, "loss": 0.4998, "step": 13463 }, { "epoch": 0.3696869851729819, "grad_norm": 0.35966747999191284, "learning_rate": 1.838027032616953e-05, "loss": 0.49, "step": 13464 }, { "epoch": 0.36971444261394837, "grad_norm": 0.3691622316837311, "learning_rate": 1.8380034664908958e-05, "loss": 0.483, "step": 13465 }, { "epoch": 0.36974190005491486, "grad_norm": 0.34271135926246643, "learning_rate": 1.8379798988016947e-05, "loss": 0.4742, "step": 13466 }, { "epoch": 0.36976935749588136, "grad_norm": 0.46871277689933777, "learning_rate": 1.8379563295493937e-05, "loss": 0.5121, "step": 13467 }, { "epoch": 0.3697968149368479, "grad_norm": 0.34897077083587646, "learning_rate": 1.8379327587340366e-05, "loss": 0.4713, "step": 13468 }, { "epoch": 0.3698242723778144, "grad_norm": 0.3653803765773773, "learning_rate": 1.8379091863556677e-05, "loss": 0.4804, "step": 13469 }, { "epoch": 0.3698517298187809, "grad_norm": 0.35771897435188293, "learning_rate": 1.8378856124143304e-05, "loss": 0.4895, "step": 13470 }, { "epoch": 0.3698791872597474, "grad_norm": 0.36172038316726685, "learning_rate": 1.8378620369100694e-05, "loss": 0.5288, "step": 13471 }, { "epoch": 0.3699066447007139, "grad_norm": 0.3492182791233063, "learning_rate": 1.837838459842928e-05, "loss": 0.544, "step": 13472 }, { "epoch": 0.3699341021416804, "grad_norm": 0.3304196000099182, "learning_rate": 1.8378148812129502e-05, "loss": 0.4002, "step": 13473 }, { "epoch": 0.36996155958264687, "grad_norm": 0.43680962920188904, "learning_rate": 1.8377913010201805e-05, "loss": 0.5018, "step": 13474 }, { "epoch": 0.3699890170236134, "grad_norm": 0.3614024817943573, "learning_rate": 1.8377677192646625e-05, "loss": 0.4945, "step": 13475 }, { "epoch": 0.3700164744645799, "grad_norm": 0.3626757562160492, "learning_rate": 1.8377441359464408e-05, "loss": 0.5217, "step": 13476 }, { "epoch": 0.3700439319055464, "grad_norm": 0.38755473494529724, "learning_rate": 1.8377205510655586e-05, "loss": 0.543, "step": 13477 }, { "epoch": 0.3700713893465129, "grad_norm": 0.3552868366241455, "learning_rate": 1.8376969646220598e-05, "loss": 0.4732, "step": 13478 }, { "epoch": 0.3700988467874794, "grad_norm": 0.3445885181427002, "learning_rate": 1.8376733766159895e-05, "loss": 0.4834, "step": 13479 }, { "epoch": 0.3701263042284459, "grad_norm": 0.3257645070552826, "learning_rate": 1.8376497870473908e-05, "loss": 0.4803, "step": 13480 }, { "epoch": 0.3701537616694124, "grad_norm": 0.5585044026374817, "learning_rate": 1.8376261959163076e-05, "loss": 0.5694, "step": 13481 }, { "epoch": 0.37018121911037893, "grad_norm": 0.40360313653945923, "learning_rate": 1.8376026032227845e-05, "loss": 0.5159, "step": 13482 }, { "epoch": 0.37020867655134543, "grad_norm": 0.3324277400970459, "learning_rate": 1.837579008966865e-05, "loss": 0.551, "step": 13483 }, { "epoch": 0.3702361339923119, "grad_norm": 0.489388108253479, "learning_rate": 1.837555413148594e-05, "loss": 0.519, "step": 13484 }, { "epoch": 0.3702635914332784, "grad_norm": 0.35615482926368713, "learning_rate": 1.837531815768014e-05, "loss": 0.4934, "step": 13485 }, { "epoch": 0.3702910488742449, "grad_norm": 0.42173638939857483, "learning_rate": 1.83750821682517e-05, "loss": 0.504, "step": 13486 }, { "epoch": 0.3703185063152114, "grad_norm": 0.3315696716308594, "learning_rate": 1.837484616320106e-05, "loss": 0.4793, "step": 13487 }, { "epoch": 0.3703459637561779, "grad_norm": 0.34786510467529297, "learning_rate": 1.8374610142528658e-05, "loss": 0.4626, "step": 13488 }, { "epoch": 0.37037342119714445, "grad_norm": 0.39985573291778564, "learning_rate": 1.8374374106234937e-05, "loss": 0.5236, "step": 13489 }, { "epoch": 0.37040087863811094, "grad_norm": 0.3681473135948181, "learning_rate": 1.8374138054320333e-05, "loss": 0.4869, "step": 13490 }, { "epoch": 0.37042833607907744, "grad_norm": 0.40194016695022583, "learning_rate": 1.8373901986785292e-05, "loss": 0.5491, "step": 13491 }, { "epoch": 0.37045579352004393, "grad_norm": 0.4003845155239105, "learning_rate": 1.837366590363025e-05, "loss": 0.6287, "step": 13492 }, { "epoch": 0.3704832509610104, "grad_norm": 0.3771802484989166, "learning_rate": 1.837342980485565e-05, "loss": 0.5069, "step": 13493 }, { "epoch": 0.3705107084019769, "grad_norm": 0.3755689263343811, "learning_rate": 1.837319369046193e-05, "loss": 0.51, "step": 13494 }, { "epoch": 0.3705381658429434, "grad_norm": 0.393771767616272, "learning_rate": 1.837295756044953e-05, "loss": 0.6057, "step": 13495 }, { "epoch": 0.37056562328390996, "grad_norm": 0.32857391238212585, "learning_rate": 1.837272141481889e-05, "loss": 0.5052, "step": 13496 }, { "epoch": 0.37059308072487646, "grad_norm": 0.38793322443962097, "learning_rate": 1.8372485253570456e-05, "loss": 0.4669, "step": 13497 }, { "epoch": 0.37062053816584295, "grad_norm": 0.3856356143951416, "learning_rate": 1.8372249076704665e-05, "loss": 0.6104, "step": 13498 }, { "epoch": 0.37064799560680944, "grad_norm": 0.3609086275100708, "learning_rate": 1.8372012884221955e-05, "loss": 0.5664, "step": 13499 }, { "epoch": 0.37067545304777594, "grad_norm": 0.35234951972961426, "learning_rate": 1.837177667612277e-05, "loss": 0.5655, "step": 13500 }, { "epoch": 0.37070291048874243, "grad_norm": 0.42199960350990295, "learning_rate": 1.837154045240755e-05, "loss": 0.453, "step": 13501 }, { "epoch": 0.3707303679297089, "grad_norm": 0.33890193700790405, "learning_rate": 1.8371304213076734e-05, "loss": 0.4537, "step": 13502 }, { "epoch": 0.3707578253706755, "grad_norm": 0.3710574805736542, "learning_rate": 1.8371067958130765e-05, "loss": 0.494, "step": 13503 }, { "epoch": 0.37078528281164197, "grad_norm": 0.3753916621208191, "learning_rate": 1.837083168757008e-05, "loss": 0.527, "step": 13504 }, { "epoch": 0.37081274025260846, "grad_norm": 0.39860716462135315, "learning_rate": 1.8370595401395124e-05, "loss": 0.5814, "step": 13505 }, { "epoch": 0.37084019769357496, "grad_norm": 0.32664522528648376, "learning_rate": 1.8370359099606335e-05, "loss": 0.4786, "step": 13506 }, { "epoch": 0.37086765513454145, "grad_norm": 0.33673277497291565, "learning_rate": 1.8370122782204158e-05, "loss": 0.5172, "step": 13507 }, { "epoch": 0.37089511257550795, "grad_norm": 0.4607031047344208, "learning_rate": 1.8369886449189026e-05, "loss": 0.5134, "step": 13508 }, { "epoch": 0.37092257001647444, "grad_norm": 0.31031668186187744, "learning_rate": 1.836965010056139e-05, "loss": 0.426, "step": 13509 }, { "epoch": 0.370950027457441, "grad_norm": 0.5600857734680176, "learning_rate": 1.8369413736321678e-05, "loss": 0.552, "step": 13510 }, { "epoch": 0.3709774848984075, "grad_norm": 0.4069803059101105, "learning_rate": 1.8369177356470344e-05, "loss": 0.537, "step": 13511 }, { "epoch": 0.371004942339374, "grad_norm": 0.3489322066307068, "learning_rate": 1.8368940961007823e-05, "loss": 0.4271, "step": 13512 }, { "epoch": 0.37103239978034047, "grad_norm": 0.41022416949272156, "learning_rate": 1.8368704549934552e-05, "loss": 0.5953, "step": 13513 }, { "epoch": 0.37105985722130697, "grad_norm": 0.43324166536331177, "learning_rate": 1.836846812325098e-05, "loss": 0.5222, "step": 13514 }, { "epoch": 0.37108731466227346, "grad_norm": 0.3590271472930908, "learning_rate": 1.836823168095754e-05, "loss": 0.5132, "step": 13515 }, { "epoch": 0.37111477210323995, "grad_norm": 0.33972927927970886, "learning_rate": 1.8367995223054682e-05, "loss": 0.4821, "step": 13516 }, { "epoch": 0.3711422295442065, "grad_norm": 0.4470710754394531, "learning_rate": 1.8367758749542842e-05, "loss": 0.5411, "step": 13517 }, { "epoch": 0.371169686985173, "grad_norm": 0.35545089840888977, "learning_rate": 1.8367522260422458e-05, "loss": 0.4309, "step": 13518 }, { "epoch": 0.3711971444261395, "grad_norm": 0.36455854773521423, "learning_rate": 1.8367285755693975e-05, "loss": 0.535, "step": 13519 }, { "epoch": 0.371224601867106, "grad_norm": 0.3926265835762024, "learning_rate": 1.836704923535783e-05, "loss": 0.4792, "step": 13520 }, { "epoch": 0.3712520593080725, "grad_norm": 0.3918059766292572, "learning_rate": 1.8366812699414476e-05, "loss": 0.4376, "step": 13521 }, { "epoch": 0.371279516749039, "grad_norm": 0.33682382106781006, "learning_rate": 1.836657614786434e-05, "loss": 0.4591, "step": 13522 }, { "epoch": 0.37130697419000547, "grad_norm": 0.6314610838890076, "learning_rate": 1.8366339580707873e-05, "loss": 0.4806, "step": 13523 }, { "epoch": 0.371334431630972, "grad_norm": 0.39139148592948914, "learning_rate": 1.836610299794551e-05, "loss": 0.4873, "step": 13524 }, { "epoch": 0.3713618890719385, "grad_norm": 0.3173617422580719, "learning_rate": 1.8365866399577693e-05, "loss": 0.5543, "step": 13525 }, { "epoch": 0.371389346512905, "grad_norm": 0.36914023756980896, "learning_rate": 1.836562978560487e-05, "loss": 0.5502, "step": 13526 }, { "epoch": 0.3714168039538715, "grad_norm": 0.4496302306652069, "learning_rate": 1.8365393156027473e-05, "loss": 0.5325, "step": 13527 }, { "epoch": 0.371444261394838, "grad_norm": 0.3897566795349121, "learning_rate": 1.8365156510845948e-05, "loss": 0.4833, "step": 13528 }, { "epoch": 0.3714717188358045, "grad_norm": 0.4587305188179016, "learning_rate": 1.8364919850060737e-05, "loss": 0.5332, "step": 13529 }, { "epoch": 0.371499176276771, "grad_norm": 0.37378132343292236, "learning_rate": 1.8364683173672282e-05, "loss": 0.4551, "step": 13530 }, { "epoch": 0.37152663371773753, "grad_norm": 0.3466116189956665, "learning_rate": 1.8364446481681022e-05, "loss": 0.4896, "step": 13531 }, { "epoch": 0.371554091158704, "grad_norm": 0.3967624604701996, "learning_rate": 1.8364209774087402e-05, "loss": 0.5253, "step": 13532 }, { "epoch": 0.3715815485996705, "grad_norm": 0.6458766460418701, "learning_rate": 1.836397305089186e-05, "loss": 0.5823, "step": 13533 }, { "epoch": 0.371609006040637, "grad_norm": 0.3637692332267761, "learning_rate": 1.8363736312094836e-05, "loss": 0.5276, "step": 13534 }, { "epoch": 0.3716364634816035, "grad_norm": 0.3686804175376892, "learning_rate": 1.8363499557696777e-05, "loss": 0.5979, "step": 13535 }, { "epoch": 0.37166392092257, "grad_norm": 0.5284117460250854, "learning_rate": 1.8363262787698124e-05, "loss": 0.5848, "step": 13536 }, { "epoch": 0.3716913783635365, "grad_norm": 0.365894079208374, "learning_rate": 1.8363026002099315e-05, "loss": 0.4886, "step": 13537 }, { "epoch": 0.37171883580450304, "grad_norm": 0.3758604824542999, "learning_rate": 1.836278920090079e-05, "loss": 0.5221, "step": 13538 }, { "epoch": 0.37174629324546954, "grad_norm": 0.36317819356918335, "learning_rate": 1.8362552384102995e-05, "loss": 0.5065, "step": 13539 }, { "epoch": 0.37177375068643603, "grad_norm": 0.365349680185318, "learning_rate": 1.8362315551706372e-05, "loss": 0.5459, "step": 13540 }, { "epoch": 0.3718012081274025, "grad_norm": 0.4305405616760254, "learning_rate": 1.8362078703711366e-05, "loss": 0.5543, "step": 13541 }, { "epoch": 0.371828665568369, "grad_norm": 0.37639522552490234, "learning_rate": 1.8361841840118407e-05, "loss": 0.5227, "step": 13542 }, { "epoch": 0.3718561230093355, "grad_norm": 0.4040382206439972, "learning_rate": 1.836160496092795e-05, "loss": 0.6356, "step": 13543 }, { "epoch": 0.371883580450302, "grad_norm": 0.3789028823375702, "learning_rate": 1.8361368066140428e-05, "loss": 0.5378, "step": 13544 }, { "epoch": 0.37191103789126856, "grad_norm": 0.3810126483440399, "learning_rate": 1.8361131155756285e-05, "loss": 0.4782, "step": 13545 }, { "epoch": 0.37193849533223505, "grad_norm": 0.5209364295005798, "learning_rate": 1.836089422977597e-05, "loss": 0.5124, "step": 13546 }, { "epoch": 0.37196595277320155, "grad_norm": 0.331025630235672, "learning_rate": 1.8360657288199912e-05, "loss": 0.5262, "step": 13547 }, { "epoch": 0.37199341021416804, "grad_norm": 0.5741205811500549, "learning_rate": 1.8360420331028563e-05, "loss": 0.5602, "step": 13548 }, { "epoch": 0.37202086765513454, "grad_norm": 0.36510753631591797, "learning_rate": 1.836018335826236e-05, "loss": 0.5406, "step": 13549 }, { "epoch": 0.37204832509610103, "grad_norm": 0.5135376453399658, "learning_rate": 1.8359946369901744e-05, "loss": 0.5116, "step": 13550 }, { "epoch": 0.3720757825370675, "grad_norm": 0.4119746685028076, "learning_rate": 1.8359709365947166e-05, "loss": 0.494, "step": 13551 }, { "epoch": 0.3721032399780341, "grad_norm": 0.3640202581882477, "learning_rate": 1.835947234639906e-05, "loss": 0.5645, "step": 13552 }, { "epoch": 0.37213069741900057, "grad_norm": 0.3495558202266693, "learning_rate": 1.8359235311257867e-05, "loss": 0.5596, "step": 13553 }, { "epoch": 0.37215815485996706, "grad_norm": 0.4443800449371338, "learning_rate": 1.8358998260524034e-05, "loss": 0.4834, "step": 13554 }, { "epoch": 0.37218561230093355, "grad_norm": 0.3818444013595581, "learning_rate": 1.8358761194198e-05, "loss": 0.4913, "step": 13555 }, { "epoch": 0.37221306974190005, "grad_norm": 0.344837486743927, "learning_rate": 1.835852411228021e-05, "loss": 0.5345, "step": 13556 }, { "epoch": 0.37224052718286654, "grad_norm": 0.3685751259326935, "learning_rate": 1.8358287014771107e-05, "loss": 0.5384, "step": 13557 }, { "epoch": 0.37226798462383304, "grad_norm": 0.33291080594062805, "learning_rate": 1.835804990167113e-05, "loss": 0.4715, "step": 13558 }, { "epoch": 0.3722954420647996, "grad_norm": 0.4233565032482147, "learning_rate": 1.835781277298072e-05, "loss": 0.5745, "step": 13559 }, { "epoch": 0.3723228995057661, "grad_norm": 0.37394723296165466, "learning_rate": 1.8357575628700325e-05, "loss": 0.5363, "step": 13560 }, { "epoch": 0.3723503569467326, "grad_norm": 0.3805095851421356, "learning_rate": 1.835733846883038e-05, "loss": 0.4837, "step": 13561 }, { "epoch": 0.37237781438769907, "grad_norm": 0.3747875392436981, "learning_rate": 1.8357101293371332e-05, "loss": 0.4738, "step": 13562 }, { "epoch": 0.37240527182866556, "grad_norm": 0.3979951739311218, "learning_rate": 1.835686410232363e-05, "loss": 0.4797, "step": 13563 }, { "epoch": 0.37243272926963206, "grad_norm": 0.35897505283355713, "learning_rate": 1.8356626895687703e-05, "loss": 0.4499, "step": 13564 }, { "epoch": 0.37246018671059855, "grad_norm": 0.32801464200019836, "learning_rate": 1.8356389673464e-05, "loss": 0.494, "step": 13565 }, { "epoch": 0.3724876441515651, "grad_norm": 0.3646252453327179, "learning_rate": 1.8356152435652962e-05, "loss": 0.503, "step": 13566 }, { "epoch": 0.3725151015925316, "grad_norm": 0.34575527906417847, "learning_rate": 1.8355915182255035e-05, "loss": 0.4632, "step": 13567 }, { "epoch": 0.3725425590334981, "grad_norm": 0.35846036672592163, "learning_rate": 1.8355677913270658e-05, "loss": 0.5131, "step": 13568 }, { "epoch": 0.3725700164744646, "grad_norm": 0.3653334975242615, "learning_rate": 1.8355440628700278e-05, "loss": 0.5606, "step": 13569 }, { "epoch": 0.3725974739154311, "grad_norm": 0.36575305461883545, "learning_rate": 1.835520332854433e-05, "loss": 0.4491, "step": 13570 }, { "epoch": 0.37262493135639757, "grad_norm": 0.3377826511859894, "learning_rate": 1.8354966012803262e-05, "loss": 0.453, "step": 13571 }, { "epoch": 0.37265238879736406, "grad_norm": 0.4060770273208618, "learning_rate": 1.8354728681477517e-05, "loss": 0.6031, "step": 13572 }, { "epoch": 0.3726798462383306, "grad_norm": 0.45115557312965393, "learning_rate": 1.8354491334567535e-05, "loss": 0.5014, "step": 13573 }, { "epoch": 0.3727073036792971, "grad_norm": 0.34351640939712524, "learning_rate": 1.8354253972073763e-05, "loss": 0.456, "step": 13574 }, { "epoch": 0.3727347611202636, "grad_norm": 0.3583424985408783, "learning_rate": 1.835401659399664e-05, "loss": 0.4888, "step": 13575 }, { "epoch": 0.3727622185612301, "grad_norm": 0.41385048627853394, "learning_rate": 1.8353779200336607e-05, "loss": 0.545, "step": 13576 }, { "epoch": 0.3727896760021966, "grad_norm": 0.44504550099372864, "learning_rate": 1.8353541791094114e-05, "loss": 0.5236, "step": 13577 }, { "epoch": 0.3728171334431631, "grad_norm": 0.3668714761734009, "learning_rate": 1.8353304366269596e-05, "loss": 0.4663, "step": 13578 }, { "epoch": 0.3728445908841296, "grad_norm": 0.3718351423740387, "learning_rate": 1.8353066925863504e-05, "loss": 0.5264, "step": 13579 }, { "epoch": 0.37287204832509613, "grad_norm": 0.3916780650615692, "learning_rate": 1.8352829469876268e-05, "loss": 0.4821, "step": 13580 }, { "epoch": 0.3728995057660626, "grad_norm": 0.36601608991622925, "learning_rate": 1.8352591998308346e-05, "loss": 0.5395, "step": 13581 }, { "epoch": 0.3729269632070291, "grad_norm": 0.3421332836151123, "learning_rate": 1.8352354511160174e-05, "loss": 0.458, "step": 13582 }, { "epoch": 0.3729544206479956, "grad_norm": 0.3327290713787079, "learning_rate": 1.8352117008432193e-05, "loss": 0.4733, "step": 13583 }, { "epoch": 0.3729818780889621, "grad_norm": 0.3519177734851837, "learning_rate": 1.8351879490124846e-05, "loss": 0.5193, "step": 13584 }, { "epoch": 0.3730093355299286, "grad_norm": 0.42011547088623047, "learning_rate": 1.835164195623858e-05, "loss": 0.6162, "step": 13585 }, { "epoch": 0.3730367929708951, "grad_norm": 0.35859423875808716, "learning_rate": 1.8351404406773837e-05, "loss": 0.5022, "step": 13586 }, { "epoch": 0.37306425041186164, "grad_norm": 0.38481977581977844, "learning_rate": 1.835116684173106e-05, "loss": 0.5267, "step": 13587 }, { "epoch": 0.37309170785282814, "grad_norm": 0.44922056794166565, "learning_rate": 1.835092926111069e-05, "loss": 0.5257, "step": 13588 }, { "epoch": 0.37311916529379463, "grad_norm": 0.43255722522735596, "learning_rate": 1.835069166491317e-05, "loss": 0.52, "step": 13589 }, { "epoch": 0.3731466227347611, "grad_norm": 0.3337383270263672, "learning_rate": 1.8350454053138945e-05, "loss": 0.4295, "step": 13590 }, { "epoch": 0.3731740801757276, "grad_norm": 0.4449613392353058, "learning_rate": 1.8350216425788463e-05, "loss": 0.5014, "step": 13591 }, { "epoch": 0.3732015376166941, "grad_norm": 0.38105249404907227, "learning_rate": 1.834997878286216e-05, "loss": 0.4788, "step": 13592 }, { "epoch": 0.3732289950576606, "grad_norm": 0.37735384702682495, "learning_rate": 1.834974112436048e-05, "loss": 0.5306, "step": 13593 }, { "epoch": 0.3732564524986271, "grad_norm": 0.3525785505771637, "learning_rate": 1.8349503450283866e-05, "loss": 0.506, "step": 13594 }, { "epoch": 0.37328390993959365, "grad_norm": 0.42406898736953735, "learning_rate": 1.834926576063277e-05, "loss": 0.5199, "step": 13595 }, { "epoch": 0.37331136738056014, "grad_norm": 0.34691059589385986, "learning_rate": 1.8349028055407623e-05, "loss": 0.4972, "step": 13596 }, { "epoch": 0.37333882482152664, "grad_norm": 0.3701878488063812, "learning_rate": 1.8348790334608876e-05, "loss": 0.5093, "step": 13597 }, { "epoch": 0.37336628226249313, "grad_norm": 0.3630932867527008, "learning_rate": 1.834855259823697e-05, "loss": 0.4475, "step": 13598 }, { "epoch": 0.3733937397034596, "grad_norm": 0.42347002029418945, "learning_rate": 1.8348314846292346e-05, "loss": 0.5745, "step": 13599 }, { "epoch": 0.3734211971444261, "grad_norm": 0.3519357442855835, "learning_rate": 1.834807707877545e-05, "loss": 0.4855, "step": 13600 }, { "epoch": 0.3734486545853926, "grad_norm": 0.34048527479171753, "learning_rate": 1.834783929568673e-05, "loss": 0.4465, "step": 13601 }, { "epoch": 0.37347611202635916, "grad_norm": 0.4002440273761749, "learning_rate": 1.8347601497026627e-05, "loss": 0.5746, "step": 13602 }, { "epoch": 0.37350356946732566, "grad_norm": 0.4054774045944214, "learning_rate": 1.834736368279558e-05, "loss": 0.4422, "step": 13603 }, { "epoch": 0.37353102690829215, "grad_norm": 0.3621819019317627, "learning_rate": 1.8347125852994034e-05, "loss": 0.4875, "step": 13604 }, { "epoch": 0.37355848434925865, "grad_norm": 0.3768141269683838, "learning_rate": 1.8346888007622435e-05, "loss": 0.5672, "step": 13605 }, { "epoch": 0.37358594179022514, "grad_norm": 0.3424219489097595, "learning_rate": 1.834665014668123e-05, "loss": 0.4903, "step": 13606 }, { "epoch": 0.37361339923119163, "grad_norm": 0.39286351203918457, "learning_rate": 1.8346412270170853e-05, "loss": 0.576, "step": 13607 }, { "epoch": 0.37364085667215813, "grad_norm": 0.4168302118778229, "learning_rate": 1.8346174378091756e-05, "loss": 0.5432, "step": 13608 }, { "epoch": 0.3736683141131247, "grad_norm": 0.38480257987976074, "learning_rate": 1.834593647044438e-05, "loss": 0.5046, "step": 13609 }, { "epoch": 0.37369577155409117, "grad_norm": 0.46783673763275146, "learning_rate": 1.8345698547229167e-05, "loss": 0.5493, "step": 13610 }, { "epoch": 0.37372322899505767, "grad_norm": 0.39057886600494385, "learning_rate": 1.8345460608446564e-05, "loss": 0.501, "step": 13611 }, { "epoch": 0.37375068643602416, "grad_norm": 0.3717291057109833, "learning_rate": 1.834522265409701e-05, "loss": 0.4769, "step": 13612 }, { "epoch": 0.37377814387699065, "grad_norm": 0.37747645378112793, "learning_rate": 1.834498468418096e-05, "loss": 0.5474, "step": 13613 }, { "epoch": 0.37380560131795715, "grad_norm": 0.33693552017211914, "learning_rate": 1.8344746698698843e-05, "loss": 0.5204, "step": 13614 }, { "epoch": 0.37383305875892364, "grad_norm": 0.40179744362831116, "learning_rate": 1.8344508697651117e-05, "loss": 0.5916, "step": 13615 }, { "epoch": 0.3738605161998902, "grad_norm": 0.3203463554382324, "learning_rate": 1.834427068103821e-05, "loss": 0.5132, "step": 13616 }, { "epoch": 0.3738879736408567, "grad_norm": 0.34138023853302, "learning_rate": 1.834403264886058e-05, "loss": 0.5146, "step": 13617 }, { "epoch": 0.3739154310818232, "grad_norm": 0.36731094121932983, "learning_rate": 1.8343794601118668e-05, "loss": 0.5548, "step": 13618 }, { "epoch": 0.3739428885227897, "grad_norm": 0.40143483877182007, "learning_rate": 1.8343556537812916e-05, "loss": 0.4933, "step": 13619 }, { "epoch": 0.37397034596375617, "grad_norm": 0.402612566947937, "learning_rate": 1.8343318458943762e-05, "loss": 0.6493, "step": 13620 }, { "epoch": 0.37399780340472266, "grad_norm": 0.35615062713623047, "learning_rate": 1.834308036451166e-05, "loss": 0.4543, "step": 13621 }, { "epoch": 0.37402526084568916, "grad_norm": 0.3366524279117584, "learning_rate": 1.834284225451705e-05, "loss": 0.4641, "step": 13622 }, { "epoch": 0.3740527182866557, "grad_norm": 0.3823081851005554, "learning_rate": 1.834260412896038e-05, "loss": 0.5218, "step": 13623 }, { "epoch": 0.3740801757276222, "grad_norm": 0.601913571357727, "learning_rate": 1.8342365987842085e-05, "loss": 0.5449, "step": 13624 }, { "epoch": 0.3741076331685887, "grad_norm": 0.5608243942260742, "learning_rate": 1.834212783116262e-05, "loss": 0.5353, "step": 13625 }, { "epoch": 0.3741350906095552, "grad_norm": 0.3408958613872528, "learning_rate": 1.834188965892242e-05, "loss": 0.5415, "step": 13626 }, { "epoch": 0.3741625480505217, "grad_norm": 0.600914478302002, "learning_rate": 1.8341651471121935e-05, "loss": 0.5485, "step": 13627 }, { "epoch": 0.3741900054914882, "grad_norm": 0.3909434974193573, "learning_rate": 1.8341413267761608e-05, "loss": 0.5724, "step": 13628 }, { "epoch": 0.37421746293245467, "grad_norm": 0.3877597451210022, "learning_rate": 1.8341175048841883e-05, "loss": 0.5566, "step": 13629 }, { "epoch": 0.3742449203734212, "grad_norm": 0.3789316415786743, "learning_rate": 1.8340936814363203e-05, "loss": 0.5659, "step": 13630 }, { "epoch": 0.3742723778143877, "grad_norm": 0.4058796465396881, "learning_rate": 1.8340698564326014e-05, "loss": 0.573, "step": 13631 }, { "epoch": 0.3742998352553542, "grad_norm": 0.3654789328575134, "learning_rate": 1.834046029873076e-05, "loss": 0.5153, "step": 13632 }, { "epoch": 0.3743272926963207, "grad_norm": 0.3973502218723297, "learning_rate": 1.8340222017577886e-05, "loss": 0.6001, "step": 13633 }, { "epoch": 0.3743547501372872, "grad_norm": 0.4984470009803772, "learning_rate": 1.8339983720867834e-05, "loss": 0.5084, "step": 13634 }, { "epoch": 0.3743822075782537, "grad_norm": 0.33285006880760193, "learning_rate": 1.8339745408601057e-05, "loss": 0.4862, "step": 13635 }, { "epoch": 0.3744096650192202, "grad_norm": 0.4968203008174896, "learning_rate": 1.8339507080777987e-05, "loss": 0.567, "step": 13636 }, { "epoch": 0.37443712246018673, "grad_norm": 0.3765037953853607, "learning_rate": 1.8339268737399076e-05, "loss": 0.5417, "step": 13637 }, { "epoch": 0.3744645799011532, "grad_norm": 0.4549657106399536, "learning_rate": 1.8339030378464767e-05, "loss": 0.5379, "step": 13638 }, { "epoch": 0.3744920373421197, "grad_norm": 0.3607883155345917, "learning_rate": 1.8338792003975508e-05, "loss": 0.428, "step": 13639 }, { "epoch": 0.3745194947830862, "grad_norm": 0.42920708656311035, "learning_rate": 1.833855361393174e-05, "loss": 0.5118, "step": 13640 }, { "epoch": 0.3745469522240527, "grad_norm": 0.3624459207057953, "learning_rate": 1.8338315208333904e-05, "loss": 0.4847, "step": 13641 }, { "epoch": 0.3745744096650192, "grad_norm": 0.351351261138916, "learning_rate": 1.833807678718245e-05, "loss": 0.5052, "step": 13642 }, { "epoch": 0.3746018671059857, "grad_norm": 0.3489241600036621, "learning_rate": 1.8337838350477822e-05, "loss": 0.4668, "step": 13643 }, { "epoch": 0.37462932454695225, "grad_norm": 0.35177093744277954, "learning_rate": 1.8337599898220466e-05, "loss": 0.4491, "step": 13644 }, { "epoch": 0.37465678198791874, "grad_norm": 0.42894965410232544, "learning_rate": 1.8337361430410822e-05, "loss": 0.5328, "step": 13645 }, { "epoch": 0.37468423942888524, "grad_norm": 1.0149849653244019, "learning_rate": 1.8337122947049342e-05, "loss": 0.5272, "step": 13646 }, { "epoch": 0.37471169686985173, "grad_norm": 1.2288599014282227, "learning_rate": 1.8336884448136466e-05, "loss": 0.5258, "step": 13647 }, { "epoch": 0.3747391543108182, "grad_norm": 0.3333112299442291, "learning_rate": 1.8336645933672637e-05, "loss": 0.5264, "step": 13648 }, { "epoch": 0.3747666117517847, "grad_norm": 0.35462716221809387, "learning_rate": 1.8336407403658307e-05, "loss": 0.5786, "step": 13649 }, { "epoch": 0.3747940691927512, "grad_norm": 0.35489848256111145, "learning_rate": 1.833616885809391e-05, "loss": 0.4705, "step": 13650 }, { "epoch": 0.37482152663371776, "grad_norm": 0.33283761143684387, "learning_rate": 1.8335930296979903e-05, "loss": 0.4588, "step": 13651 }, { "epoch": 0.37484898407468425, "grad_norm": 0.3648979067802429, "learning_rate": 1.8335691720316723e-05, "loss": 0.497, "step": 13652 }, { "epoch": 0.37487644151565075, "grad_norm": 0.4289059340953827, "learning_rate": 1.8335453128104818e-05, "loss": 0.5956, "step": 13653 }, { "epoch": 0.37490389895661724, "grad_norm": 0.40145841240882874, "learning_rate": 1.8335214520344634e-05, "loss": 0.6205, "step": 13654 }, { "epoch": 0.37493135639758374, "grad_norm": 0.3786064088344574, "learning_rate": 1.833497589703661e-05, "loss": 0.5545, "step": 13655 }, { "epoch": 0.37495881383855023, "grad_norm": 0.38580256700515747, "learning_rate": 1.8334737258181203e-05, "loss": 0.5043, "step": 13656 }, { "epoch": 0.3749862712795167, "grad_norm": 0.39813482761383057, "learning_rate": 1.8334498603778844e-05, "loss": 0.5518, "step": 13657 }, { "epoch": 0.3750137287204833, "grad_norm": 0.5074609518051147, "learning_rate": 1.8334259933829988e-05, "loss": 0.5085, "step": 13658 }, { "epoch": 0.37504118616144977, "grad_norm": 0.3537602424621582, "learning_rate": 1.8334021248335076e-05, "loss": 0.5276, "step": 13659 }, { "epoch": 0.37506864360241626, "grad_norm": 0.3575875461101532, "learning_rate": 1.8333782547294555e-05, "loss": 0.5754, "step": 13660 }, { "epoch": 0.37509610104338276, "grad_norm": 0.37960201501846313, "learning_rate": 1.833354383070887e-05, "loss": 0.4578, "step": 13661 }, { "epoch": 0.37512355848434925, "grad_norm": 0.35247349739074707, "learning_rate": 1.833330509857847e-05, "loss": 0.4847, "step": 13662 }, { "epoch": 0.37515101592531575, "grad_norm": 0.3333853483200073, "learning_rate": 1.833306635090379e-05, "loss": 0.4591, "step": 13663 }, { "epoch": 0.37517847336628224, "grad_norm": 0.3834823966026306, "learning_rate": 1.8332827587685288e-05, "loss": 0.4895, "step": 13664 }, { "epoch": 0.3752059308072488, "grad_norm": 0.4257274866104126, "learning_rate": 1.83325888089234e-05, "loss": 0.5602, "step": 13665 }, { "epoch": 0.3752333882482153, "grad_norm": 0.37803417444229126, "learning_rate": 1.8332350014618573e-05, "loss": 0.5855, "step": 13666 }, { "epoch": 0.3752608456891818, "grad_norm": 0.35939860343933105, "learning_rate": 1.8332111204771252e-05, "loss": 0.4967, "step": 13667 }, { "epoch": 0.37528830313014827, "grad_norm": 0.4147886633872986, "learning_rate": 1.8331872379381886e-05, "loss": 0.5377, "step": 13668 }, { "epoch": 0.37531576057111476, "grad_norm": 0.3812200129032135, "learning_rate": 1.8331633538450923e-05, "loss": 0.5529, "step": 13669 }, { "epoch": 0.37534321801208126, "grad_norm": 0.4146744906902313, "learning_rate": 1.83313946819788e-05, "loss": 0.5655, "step": 13670 }, { "epoch": 0.37537067545304775, "grad_norm": 0.34130722284317017, "learning_rate": 1.8331155809965967e-05, "loss": 0.5698, "step": 13671 }, { "epoch": 0.3753981328940143, "grad_norm": 0.33568617701530457, "learning_rate": 1.833091692241287e-05, "loss": 0.4654, "step": 13672 }, { "epoch": 0.3754255903349808, "grad_norm": 0.36410287022590637, "learning_rate": 1.8330678019319955e-05, "loss": 0.5194, "step": 13673 }, { "epoch": 0.3754530477759473, "grad_norm": 0.39468976855278015, "learning_rate": 1.8330439100687667e-05, "loss": 0.4904, "step": 13674 }, { "epoch": 0.3754805052169138, "grad_norm": 0.4069713354110718, "learning_rate": 1.8330200166516448e-05, "loss": 0.493, "step": 13675 }, { "epoch": 0.3755079626578803, "grad_norm": 0.3312029242515564, "learning_rate": 1.8329961216806752e-05, "loss": 0.4935, "step": 13676 }, { "epoch": 0.3755354200988468, "grad_norm": 0.3970401883125305, "learning_rate": 1.8329722251559016e-05, "loss": 0.5809, "step": 13677 }, { "epoch": 0.37556287753981327, "grad_norm": 0.3626049757003784, "learning_rate": 1.832948327077369e-05, "loss": 0.5469, "step": 13678 }, { "epoch": 0.3755903349807798, "grad_norm": 0.37325718998908997, "learning_rate": 1.832924427445122e-05, "loss": 0.4604, "step": 13679 }, { "epoch": 0.3756177924217463, "grad_norm": 0.39087650179862976, "learning_rate": 1.832900526259205e-05, "loss": 0.5751, "step": 13680 }, { "epoch": 0.3756452498627128, "grad_norm": 0.3618304431438446, "learning_rate": 1.8328766235196628e-05, "loss": 0.4677, "step": 13681 }, { "epoch": 0.3756727073036793, "grad_norm": 0.4601861536502838, "learning_rate": 1.8328527192265396e-05, "loss": 0.5857, "step": 13682 }, { "epoch": 0.3757001647446458, "grad_norm": 0.37404394149780273, "learning_rate": 1.8328288133798808e-05, "loss": 0.5684, "step": 13683 }, { "epoch": 0.3757276221856123, "grad_norm": 0.3630339205265045, "learning_rate": 1.83280490597973e-05, "loss": 0.5135, "step": 13684 }, { "epoch": 0.3757550796265788, "grad_norm": 0.5023232102394104, "learning_rate": 1.8327809970261325e-05, "loss": 0.5391, "step": 13685 }, { "epoch": 0.37578253706754533, "grad_norm": 0.3846082389354706, "learning_rate": 1.8327570865191323e-05, "loss": 0.5462, "step": 13686 }, { "epoch": 0.3758099945085118, "grad_norm": 0.3777088522911072, "learning_rate": 1.832733174458775e-05, "loss": 0.4621, "step": 13687 }, { "epoch": 0.3758374519494783, "grad_norm": 0.34557801485061646, "learning_rate": 1.8327092608451038e-05, "loss": 0.4571, "step": 13688 }, { "epoch": 0.3758649093904448, "grad_norm": 0.37498417496681213, "learning_rate": 1.8326853456781647e-05, "loss": 0.4586, "step": 13689 }, { "epoch": 0.3758923668314113, "grad_norm": 0.3447803854942322, "learning_rate": 1.8326614289580012e-05, "loss": 0.4999, "step": 13690 }, { "epoch": 0.3759198242723778, "grad_norm": 0.33163172006607056, "learning_rate": 1.8326375106846585e-05, "loss": 0.4342, "step": 13691 }, { "epoch": 0.3759472817133443, "grad_norm": 0.3422918915748596, "learning_rate": 1.832613590858181e-05, "loss": 0.4347, "step": 13692 }, { "epoch": 0.37597473915431084, "grad_norm": 0.375461220741272, "learning_rate": 1.832589669478614e-05, "loss": 0.5111, "step": 13693 }, { "epoch": 0.37600219659527734, "grad_norm": 0.3888751268386841, "learning_rate": 1.832565746546001e-05, "loss": 0.5852, "step": 13694 }, { "epoch": 0.37602965403624383, "grad_norm": 0.476036936044693, "learning_rate": 1.832541822060387e-05, "loss": 0.53, "step": 13695 }, { "epoch": 0.3760571114772103, "grad_norm": 0.35866236686706543, "learning_rate": 1.832517896021817e-05, "loss": 0.4801, "step": 13696 }, { "epoch": 0.3760845689181768, "grad_norm": 0.35138964653015137, "learning_rate": 1.8324939684303354e-05, "loss": 0.5017, "step": 13697 }, { "epoch": 0.3761120263591433, "grad_norm": 0.4006747305393219, "learning_rate": 1.8324700392859872e-05, "loss": 0.5287, "step": 13698 }, { "epoch": 0.3761394838001098, "grad_norm": 0.3433343172073364, "learning_rate": 1.8324461085888162e-05, "loss": 0.5652, "step": 13699 }, { "epoch": 0.37616694124107636, "grad_norm": 0.47992366552352905, "learning_rate": 1.8324221763388678e-05, "loss": 0.6073, "step": 13700 }, { "epoch": 0.37619439868204285, "grad_norm": 0.3678651750087738, "learning_rate": 1.8323982425361864e-05, "loss": 0.4974, "step": 13701 }, { "epoch": 0.37622185612300935, "grad_norm": 0.35094812512397766, "learning_rate": 1.8323743071808163e-05, "loss": 0.4943, "step": 13702 }, { "epoch": 0.37624931356397584, "grad_norm": 0.4463077485561371, "learning_rate": 1.8323503702728027e-05, "loss": 0.5303, "step": 13703 }, { "epoch": 0.37627677100494233, "grad_norm": 0.44660282135009766, "learning_rate": 1.8323264318121898e-05, "loss": 0.5485, "step": 13704 }, { "epoch": 0.37630422844590883, "grad_norm": 0.4668506681919098, "learning_rate": 1.832302491799023e-05, "loss": 0.4704, "step": 13705 }, { "epoch": 0.3763316858868753, "grad_norm": 0.3822377920150757, "learning_rate": 1.8322785502333458e-05, "loss": 0.5873, "step": 13706 }, { "epoch": 0.37635914332784187, "grad_norm": 0.3679632842540741, "learning_rate": 1.8322546071152037e-05, "loss": 0.5189, "step": 13707 }, { "epoch": 0.37638660076880837, "grad_norm": 0.37616217136383057, "learning_rate": 1.832230662444641e-05, "loss": 0.5226, "step": 13708 }, { "epoch": 0.37641405820977486, "grad_norm": 0.37874022126197815, "learning_rate": 1.8322067162217026e-05, "loss": 0.5314, "step": 13709 }, { "epoch": 0.37644151565074135, "grad_norm": 0.3716884255409241, "learning_rate": 1.832182768446433e-05, "loss": 0.5288, "step": 13710 }, { "epoch": 0.37646897309170785, "grad_norm": 0.43205004930496216, "learning_rate": 1.832158819118877e-05, "loss": 0.5817, "step": 13711 }, { "epoch": 0.37649643053267434, "grad_norm": 0.38665080070495605, "learning_rate": 1.8321348682390794e-05, "loss": 0.5458, "step": 13712 }, { "epoch": 0.37652388797364084, "grad_norm": 0.3597867488861084, "learning_rate": 1.8321109158070843e-05, "loss": 0.4999, "step": 13713 }, { "epoch": 0.3765513454146074, "grad_norm": 0.4103391468524933, "learning_rate": 1.832086961822937e-05, "loss": 0.5114, "step": 13714 }, { "epoch": 0.3765788028555739, "grad_norm": 0.3207416236400604, "learning_rate": 1.832063006286682e-05, "loss": 0.4862, "step": 13715 }, { "epoch": 0.3766062602965404, "grad_norm": 0.35100480914115906, "learning_rate": 1.832039049198364e-05, "loss": 0.5566, "step": 13716 }, { "epoch": 0.37663371773750687, "grad_norm": 0.34482520818710327, "learning_rate": 1.8320150905580272e-05, "loss": 0.524, "step": 13717 }, { "epoch": 0.37666117517847336, "grad_norm": 0.4472881555557251, "learning_rate": 1.831991130365717e-05, "loss": 0.5996, "step": 13718 }, { "epoch": 0.37668863261943986, "grad_norm": 0.37280508875846863, "learning_rate": 1.8319671686214778e-05, "loss": 0.5024, "step": 13719 }, { "epoch": 0.37671609006040635, "grad_norm": 0.3883019685745239, "learning_rate": 1.8319432053253545e-05, "loss": 0.5398, "step": 13720 }, { "epoch": 0.3767435475013729, "grad_norm": 0.4250096380710602, "learning_rate": 1.8319192404773912e-05, "loss": 0.538, "step": 13721 }, { "epoch": 0.3767710049423394, "grad_norm": 0.4090476632118225, "learning_rate": 1.831895274077633e-05, "loss": 0.5007, "step": 13722 }, { "epoch": 0.3767984623833059, "grad_norm": 0.4028523862361908, "learning_rate": 1.8318713061261248e-05, "loss": 0.5148, "step": 13723 }, { "epoch": 0.3768259198242724, "grad_norm": 0.36102262139320374, "learning_rate": 1.831847336622911e-05, "loss": 0.4788, "step": 13724 }, { "epoch": 0.3768533772652389, "grad_norm": 0.3572014570236206, "learning_rate": 1.8318233655680365e-05, "loss": 0.5058, "step": 13725 }, { "epoch": 0.37688083470620537, "grad_norm": 0.3868630528450012, "learning_rate": 1.8317993929615462e-05, "loss": 0.4662, "step": 13726 }, { "epoch": 0.37690829214717186, "grad_norm": 0.38985690474510193, "learning_rate": 1.831775418803484e-05, "loss": 0.4921, "step": 13727 }, { "epoch": 0.37693574958813836, "grad_norm": 0.36705881357192993, "learning_rate": 1.8317514430938956e-05, "loss": 0.4957, "step": 13728 }, { "epoch": 0.3769632070291049, "grad_norm": 0.4421990215778351, "learning_rate": 1.831727465832825e-05, "loss": 0.5236, "step": 13729 }, { "epoch": 0.3769906644700714, "grad_norm": 0.32195189595222473, "learning_rate": 1.8317034870203175e-05, "loss": 0.4051, "step": 13730 }, { "epoch": 0.3770181219110379, "grad_norm": 0.3715932369232178, "learning_rate": 1.8316795066564172e-05, "loss": 0.4398, "step": 13731 }, { "epoch": 0.3770455793520044, "grad_norm": 0.38214507699012756, "learning_rate": 1.8316555247411697e-05, "loss": 0.5176, "step": 13732 }, { "epoch": 0.3770730367929709, "grad_norm": 0.3481929302215576, "learning_rate": 1.831631541274619e-05, "loss": 0.4478, "step": 13733 }, { "epoch": 0.3771004942339374, "grad_norm": 0.4311850368976593, "learning_rate": 1.8316075562568096e-05, "loss": 0.5566, "step": 13734 }, { "epoch": 0.37712795167490387, "grad_norm": 0.42929187417030334, "learning_rate": 1.831583569687787e-05, "loss": 0.5601, "step": 13735 }, { "epoch": 0.3771554091158704, "grad_norm": 0.419512540102005, "learning_rate": 1.8315595815675958e-05, "loss": 0.5238, "step": 13736 }, { "epoch": 0.3771828665568369, "grad_norm": 0.37914663553237915, "learning_rate": 1.8315355918962803e-05, "loss": 0.5192, "step": 13737 }, { "epoch": 0.3772103239978034, "grad_norm": 0.4331400692462921, "learning_rate": 1.831511600673886e-05, "loss": 0.5302, "step": 13738 }, { "epoch": 0.3772377814387699, "grad_norm": 0.35491684079170227, "learning_rate": 1.831487607900457e-05, "loss": 0.4751, "step": 13739 }, { "epoch": 0.3772652388797364, "grad_norm": 0.3807246685028076, "learning_rate": 1.831463613576038e-05, "loss": 0.5499, "step": 13740 }, { "epoch": 0.3772926963207029, "grad_norm": 0.4057830274105072, "learning_rate": 1.831439617700674e-05, "loss": 0.5762, "step": 13741 }, { "epoch": 0.3773201537616694, "grad_norm": 0.3789764642715454, "learning_rate": 1.8314156202744096e-05, "loss": 0.4833, "step": 13742 }, { "epoch": 0.37734761120263594, "grad_norm": 0.4038592576980591, "learning_rate": 1.83139162129729e-05, "loss": 0.5502, "step": 13743 }, { "epoch": 0.37737506864360243, "grad_norm": 0.3741764724254608, "learning_rate": 1.8313676207693595e-05, "loss": 0.4961, "step": 13744 }, { "epoch": 0.3774025260845689, "grad_norm": 0.3956778049468994, "learning_rate": 1.8313436186906634e-05, "loss": 0.5162, "step": 13745 }, { "epoch": 0.3774299835255354, "grad_norm": 0.3458876311779022, "learning_rate": 1.8313196150612458e-05, "loss": 0.4656, "step": 13746 }, { "epoch": 0.3774574409665019, "grad_norm": 0.36063453555107117, "learning_rate": 1.8312956098811518e-05, "loss": 0.5755, "step": 13747 }, { "epoch": 0.3774848984074684, "grad_norm": 0.375021368265152, "learning_rate": 1.8312716031504264e-05, "loss": 0.5577, "step": 13748 }, { "epoch": 0.3775123558484349, "grad_norm": 0.3624420762062073, "learning_rate": 1.831247594869114e-05, "loss": 0.5517, "step": 13749 }, { "epoch": 0.37753981328940145, "grad_norm": 0.3998534083366394, "learning_rate": 1.831223585037259e-05, "loss": 0.5863, "step": 13750 }, { "epoch": 0.37756727073036794, "grad_norm": 0.4463280141353607, "learning_rate": 1.8311995736549076e-05, "loss": 0.5406, "step": 13751 }, { "epoch": 0.37759472817133444, "grad_norm": 0.4440414011478424, "learning_rate": 1.8311755607221033e-05, "loss": 0.5913, "step": 13752 }, { "epoch": 0.37762218561230093, "grad_norm": 0.3355647623538971, "learning_rate": 1.8311515462388913e-05, "loss": 0.46, "step": 13753 }, { "epoch": 0.3776496430532674, "grad_norm": 0.3777865767478943, "learning_rate": 1.8311275302053166e-05, "loss": 0.4793, "step": 13754 }, { "epoch": 0.3776771004942339, "grad_norm": 0.36069706082344055, "learning_rate": 1.8311035126214235e-05, "loss": 0.5107, "step": 13755 }, { "epoch": 0.3777045579352004, "grad_norm": 0.4785435199737549, "learning_rate": 1.8310794934872572e-05, "loss": 0.4713, "step": 13756 }, { "epoch": 0.37773201537616696, "grad_norm": 0.4115641415119171, "learning_rate": 1.8310554728028624e-05, "loss": 0.5505, "step": 13757 }, { "epoch": 0.37775947281713346, "grad_norm": 0.5262405276298523, "learning_rate": 1.8310314505682842e-05, "loss": 0.5945, "step": 13758 }, { "epoch": 0.37778693025809995, "grad_norm": 0.37769246101379395, "learning_rate": 1.831007426783567e-05, "loss": 0.5276, "step": 13759 }, { "epoch": 0.37781438769906645, "grad_norm": 0.37171003222465515, "learning_rate": 1.830983401448755e-05, "loss": 0.4441, "step": 13760 }, { "epoch": 0.37784184514003294, "grad_norm": 0.4143519699573517, "learning_rate": 1.8309593745638945e-05, "loss": 0.6698, "step": 13761 }, { "epoch": 0.37786930258099943, "grad_norm": 0.4104626774787903, "learning_rate": 1.8309353461290293e-05, "loss": 0.5241, "step": 13762 }, { "epoch": 0.3778967600219659, "grad_norm": 0.41879019141197205, "learning_rate": 1.8309113161442043e-05, "loss": 0.5456, "step": 13763 }, { "epoch": 0.3779242174629325, "grad_norm": 0.591071367263794, "learning_rate": 1.830887284609465e-05, "loss": 0.4948, "step": 13764 }, { "epoch": 0.37795167490389897, "grad_norm": 0.6019638180732727, "learning_rate": 1.8308632515248553e-05, "loss": 0.5714, "step": 13765 }, { "epoch": 0.37797913234486546, "grad_norm": 0.5351055860519409, "learning_rate": 1.8308392168904205e-05, "loss": 0.6151, "step": 13766 }, { "epoch": 0.37800658978583196, "grad_norm": 0.37898558378219604, "learning_rate": 1.8308151807062057e-05, "loss": 0.4675, "step": 13767 }, { "epoch": 0.37803404722679845, "grad_norm": 0.33782288432121277, "learning_rate": 1.830791142972255e-05, "loss": 0.5295, "step": 13768 }, { "epoch": 0.37806150466776495, "grad_norm": 0.34968432784080505, "learning_rate": 1.8307671036886142e-05, "loss": 0.5673, "step": 13769 }, { "epoch": 0.37808896210873144, "grad_norm": 0.3685080409049988, "learning_rate": 1.8307430628553273e-05, "loss": 0.4932, "step": 13770 }, { "epoch": 0.378116419549698, "grad_norm": 0.3498368561267853, "learning_rate": 1.8307190204724393e-05, "loss": 0.5004, "step": 13771 }, { "epoch": 0.3781438769906645, "grad_norm": 0.35040590167045593, "learning_rate": 1.830694976539995e-05, "loss": 0.5532, "step": 13772 }, { "epoch": 0.378171334431631, "grad_norm": 0.46094202995300293, "learning_rate": 1.83067093105804e-05, "loss": 0.5126, "step": 13773 }, { "epoch": 0.3781987918725975, "grad_norm": 0.35522404313087463, "learning_rate": 1.830646884026618e-05, "loss": 0.4716, "step": 13774 }, { "epoch": 0.37822624931356397, "grad_norm": 0.3499138653278351, "learning_rate": 1.830622835445775e-05, "loss": 0.5419, "step": 13775 }, { "epoch": 0.37825370675453046, "grad_norm": 0.3815102279186249, "learning_rate": 1.830598785315555e-05, "loss": 0.5275, "step": 13776 }, { "epoch": 0.37828116419549696, "grad_norm": 0.34564128518104553, "learning_rate": 1.8305747336360034e-05, "loss": 0.4466, "step": 13777 }, { "epoch": 0.3783086216364635, "grad_norm": 0.3671666085720062, "learning_rate": 1.8305506804071645e-05, "loss": 0.498, "step": 13778 }, { "epoch": 0.37833607907743, "grad_norm": 0.35796022415161133, "learning_rate": 1.8305266256290838e-05, "loss": 0.5484, "step": 13779 }, { "epoch": 0.3783635365183965, "grad_norm": 0.35656628012657166, "learning_rate": 1.8305025693018058e-05, "loss": 0.54, "step": 13780 }, { "epoch": 0.378390993959363, "grad_norm": 0.4042435884475708, "learning_rate": 1.8304785114253757e-05, "loss": 0.5974, "step": 13781 }, { "epoch": 0.3784184514003295, "grad_norm": 0.37099510431289673, "learning_rate": 1.8304544519998376e-05, "loss": 0.5037, "step": 13782 }, { "epoch": 0.378445908841296, "grad_norm": 0.44777950644493103, "learning_rate": 1.830430391025237e-05, "loss": 0.6092, "step": 13783 }, { "epoch": 0.37847336628226247, "grad_norm": 0.3814082145690918, "learning_rate": 1.830406328501619e-05, "loss": 0.5229, "step": 13784 }, { "epoch": 0.378500823723229, "grad_norm": 0.4603172540664673, "learning_rate": 1.8303822644290278e-05, "loss": 0.543, "step": 13785 }, { "epoch": 0.3785282811641955, "grad_norm": 0.3858430087566376, "learning_rate": 1.830358198807509e-05, "loss": 0.5969, "step": 13786 }, { "epoch": 0.378555738605162, "grad_norm": 0.38362088799476624, "learning_rate": 1.8303341316371068e-05, "loss": 0.5464, "step": 13787 }, { "epoch": 0.3785831960461285, "grad_norm": 0.4237290918827057, "learning_rate": 1.8303100629178666e-05, "loss": 0.4442, "step": 13788 }, { "epoch": 0.378610653487095, "grad_norm": 0.36064085364341736, "learning_rate": 1.830285992649833e-05, "loss": 0.524, "step": 13789 }, { "epoch": 0.3786381109280615, "grad_norm": 0.4329741299152374, "learning_rate": 1.8302619208330513e-05, "loss": 0.6093, "step": 13790 }, { "epoch": 0.378665568369028, "grad_norm": 0.37747669219970703, "learning_rate": 1.830237847467566e-05, "loss": 0.4857, "step": 13791 }, { "epoch": 0.37869302580999453, "grad_norm": 0.38027170300483704, "learning_rate": 1.8302137725534223e-05, "loss": 0.5112, "step": 13792 }, { "epoch": 0.378720483250961, "grad_norm": 0.4061524569988251, "learning_rate": 1.8301896960906648e-05, "loss": 0.5197, "step": 13793 }, { "epoch": 0.3787479406919275, "grad_norm": 0.38775455951690674, "learning_rate": 1.830165618079338e-05, "loss": 0.4315, "step": 13794 }, { "epoch": 0.378775398132894, "grad_norm": 0.48909252882003784, "learning_rate": 1.8301415385194882e-05, "loss": 0.5184, "step": 13795 }, { "epoch": 0.3788028555738605, "grad_norm": 1.6780592203140259, "learning_rate": 1.8301174574111592e-05, "loss": 0.5745, "step": 13796 }, { "epoch": 0.378830313014827, "grad_norm": 0.3711349368095398, "learning_rate": 1.8300933747543957e-05, "loss": 0.4913, "step": 13797 }, { "epoch": 0.3788577704557935, "grad_norm": 0.3502753674983978, "learning_rate": 1.8300692905492438e-05, "loss": 0.4641, "step": 13798 }, { "epoch": 0.37888522789676005, "grad_norm": 0.44958019256591797, "learning_rate": 1.8300452047957478e-05, "loss": 0.5263, "step": 13799 }, { "epoch": 0.37891268533772654, "grad_norm": 0.36767876148223877, "learning_rate": 1.830021117493952e-05, "loss": 0.4953, "step": 13800 }, { "epoch": 0.37894014277869303, "grad_norm": 0.37956705689430237, "learning_rate": 1.8299970286439023e-05, "loss": 0.692, "step": 13801 }, { "epoch": 0.37896760021965953, "grad_norm": 0.39017900824546814, "learning_rate": 1.829972938245643e-05, "loss": 0.5399, "step": 13802 }, { "epoch": 0.378995057660626, "grad_norm": 0.3339664936065674, "learning_rate": 1.8299488462992197e-05, "loss": 0.4303, "step": 13803 }, { "epoch": 0.3790225151015925, "grad_norm": 0.3564557731151581, "learning_rate": 1.8299247528046764e-05, "loss": 0.5023, "step": 13804 }, { "epoch": 0.379049972542559, "grad_norm": 0.32645806670188904, "learning_rate": 1.8299006577620585e-05, "loss": 0.555, "step": 13805 }, { "epoch": 0.37907742998352556, "grad_norm": 0.4376627206802368, "learning_rate": 1.8298765611714116e-05, "loss": 0.4829, "step": 13806 }, { "epoch": 0.37910488742449205, "grad_norm": 0.4172261357307434, "learning_rate": 1.8298524630327798e-05, "loss": 0.569, "step": 13807 }, { "epoch": 0.37913234486545855, "grad_norm": 0.3637852668762207, "learning_rate": 1.829828363346208e-05, "loss": 0.5444, "step": 13808 }, { "epoch": 0.37915980230642504, "grad_norm": 0.3990805447101593, "learning_rate": 1.8298042621117417e-05, "loss": 0.5135, "step": 13809 }, { "epoch": 0.37918725974739154, "grad_norm": 0.3700745403766632, "learning_rate": 1.8297801593294256e-05, "loss": 0.5305, "step": 13810 }, { "epoch": 0.37921471718835803, "grad_norm": 0.5437168478965759, "learning_rate": 1.8297560549993044e-05, "loss": 0.5261, "step": 13811 }, { "epoch": 0.3792421746293245, "grad_norm": 0.3400055766105652, "learning_rate": 1.8297319491214237e-05, "loss": 0.5335, "step": 13812 }, { "epoch": 0.3792696320702911, "grad_norm": 0.3455132246017456, "learning_rate": 1.829707841695828e-05, "loss": 0.5365, "step": 13813 }, { "epoch": 0.37929708951125757, "grad_norm": 0.32821378111839294, "learning_rate": 1.8296837327225624e-05, "loss": 0.4527, "step": 13814 }, { "epoch": 0.37932454695222406, "grad_norm": 0.3797248303890228, "learning_rate": 1.8296596222016717e-05, "loss": 0.521, "step": 13815 }, { "epoch": 0.37935200439319056, "grad_norm": 0.35051530599594116, "learning_rate": 1.8296355101332012e-05, "loss": 0.5434, "step": 13816 }, { "epoch": 0.37937946183415705, "grad_norm": 0.3556916415691376, "learning_rate": 1.8296113965171954e-05, "loss": 0.4998, "step": 13817 }, { "epoch": 0.37940691927512354, "grad_norm": 0.47228267788887024, "learning_rate": 1.8295872813536998e-05, "loss": 0.5363, "step": 13818 }, { "epoch": 0.37943437671609004, "grad_norm": 0.3994893431663513, "learning_rate": 1.829563164642759e-05, "loss": 0.496, "step": 13819 }, { "epoch": 0.3794618341570566, "grad_norm": 0.3593752682209015, "learning_rate": 1.8295390463844184e-05, "loss": 0.5136, "step": 13820 }, { "epoch": 0.3794892915980231, "grad_norm": 0.35013872385025024, "learning_rate": 1.8295149265787224e-05, "loss": 0.5363, "step": 13821 }, { "epoch": 0.3795167490389896, "grad_norm": 0.4385868310928345, "learning_rate": 1.8294908052257164e-05, "loss": 0.5925, "step": 13822 }, { "epoch": 0.37954420647995607, "grad_norm": 0.3450391888618469, "learning_rate": 1.8294666823254452e-05, "loss": 0.5212, "step": 13823 }, { "epoch": 0.37957166392092256, "grad_norm": 0.4294957220554352, "learning_rate": 1.829442557877954e-05, "loss": 0.5014, "step": 13824 }, { "epoch": 0.37959912136188906, "grad_norm": 0.317129522562027, "learning_rate": 1.829418431883288e-05, "loss": 0.3968, "step": 13825 }, { "epoch": 0.37962657880285555, "grad_norm": 0.4086359441280365, "learning_rate": 1.8293943043414913e-05, "loss": 0.5317, "step": 13826 }, { "epoch": 0.3796540362438221, "grad_norm": 0.325891375541687, "learning_rate": 1.82937017525261e-05, "loss": 0.4868, "step": 13827 }, { "epoch": 0.3796814936847886, "grad_norm": 0.3927406966686249, "learning_rate": 1.8293460446166884e-05, "loss": 0.5368, "step": 13828 }, { "epoch": 0.3797089511257551, "grad_norm": 0.533870279788971, "learning_rate": 1.829321912433772e-05, "loss": 0.4927, "step": 13829 }, { "epoch": 0.3797364085667216, "grad_norm": 0.3444902002811432, "learning_rate": 1.8292977787039053e-05, "loss": 0.508, "step": 13830 }, { "epoch": 0.3797638660076881, "grad_norm": 0.3405660092830658, "learning_rate": 1.8292736434271336e-05, "loss": 0.4414, "step": 13831 }, { "epoch": 0.37979132344865457, "grad_norm": 0.43637871742248535, "learning_rate": 1.829249506603502e-05, "loss": 0.5539, "step": 13832 }, { "epoch": 0.37981878088962107, "grad_norm": 1.0683155059814453, "learning_rate": 1.829225368233055e-05, "loss": 0.3956, "step": 13833 }, { "epoch": 0.3798462383305876, "grad_norm": 0.3641483187675476, "learning_rate": 1.8292012283158384e-05, "loss": 0.4792, "step": 13834 }, { "epoch": 0.3798736957715541, "grad_norm": 0.3743440508842468, "learning_rate": 1.829177086851897e-05, "loss": 0.5113, "step": 13835 }, { "epoch": 0.3799011532125206, "grad_norm": 0.37791576981544495, "learning_rate": 1.8291529438412753e-05, "loss": 0.5443, "step": 13836 }, { "epoch": 0.3799286106534871, "grad_norm": 0.3418792486190796, "learning_rate": 1.829128799284019e-05, "loss": 0.5972, "step": 13837 }, { "epoch": 0.3799560680944536, "grad_norm": 0.47639182209968567, "learning_rate": 1.829104653180173e-05, "loss": 0.4946, "step": 13838 }, { "epoch": 0.3799835255354201, "grad_norm": 0.3716491460800171, "learning_rate": 1.8290805055297816e-05, "loss": 0.4612, "step": 13839 }, { "epoch": 0.3800109829763866, "grad_norm": 0.4136045277118683, "learning_rate": 1.829056356332891e-05, "loss": 0.6134, "step": 13840 }, { "epoch": 0.38003844041735313, "grad_norm": 0.36192354559898376, "learning_rate": 1.8290322055895454e-05, "loss": 0.5571, "step": 13841 }, { "epoch": 0.3800658978583196, "grad_norm": 0.4953792691230774, "learning_rate": 1.82900805329979e-05, "loss": 0.6025, "step": 13842 }, { "epoch": 0.3800933552992861, "grad_norm": 0.3811550736427307, "learning_rate": 1.8289838994636705e-05, "loss": 0.5277, "step": 13843 }, { "epoch": 0.3801208127402526, "grad_norm": 0.3509882688522339, "learning_rate": 1.828959744081231e-05, "loss": 0.501, "step": 13844 }, { "epoch": 0.3801482701812191, "grad_norm": 0.36605462431907654, "learning_rate": 1.8289355871525174e-05, "loss": 0.5052, "step": 13845 }, { "epoch": 0.3801757276221856, "grad_norm": 0.48164623975753784, "learning_rate": 1.828911428677574e-05, "loss": 0.5333, "step": 13846 }, { "epoch": 0.3802031850631521, "grad_norm": 0.41701173782348633, "learning_rate": 1.8288872686564463e-05, "loss": 0.5299, "step": 13847 }, { "epoch": 0.38023064250411864, "grad_norm": 0.4387149512767792, "learning_rate": 1.8288631070891797e-05, "loss": 0.4693, "step": 13848 }, { "epoch": 0.38025809994508514, "grad_norm": 0.3895706832408905, "learning_rate": 1.8288389439758184e-05, "loss": 0.4672, "step": 13849 }, { "epoch": 0.38028555738605163, "grad_norm": 0.3437942564487457, "learning_rate": 1.8288147793164078e-05, "loss": 0.4924, "step": 13850 }, { "epoch": 0.3803130148270181, "grad_norm": 0.3893894851207733, "learning_rate": 1.8287906131109934e-05, "loss": 0.6084, "step": 13851 }, { "epoch": 0.3803404722679846, "grad_norm": 0.3889264166355133, "learning_rate": 1.8287664453596202e-05, "loss": 0.5275, "step": 13852 }, { "epoch": 0.3803679297089511, "grad_norm": 0.34500807523727417, "learning_rate": 1.828742276062333e-05, "loss": 0.4904, "step": 13853 }, { "epoch": 0.3803953871499176, "grad_norm": 0.45810258388519287, "learning_rate": 1.8287181052191766e-05, "loss": 0.6142, "step": 13854 }, { "epoch": 0.38042284459088416, "grad_norm": 0.3453022837638855, "learning_rate": 1.8286939328301967e-05, "loss": 0.4724, "step": 13855 }, { "epoch": 0.38045030203185065, "grad_norm": 0.3395344614982605, "learning_rate": 1.828669758895438e-05, "loss": 0.4971, "step": 13856 }, { "epoch": 0.38047775947281715, "grad_norm": 0.4008251130580902, "learning_rate": 1.8286455834149456e-05, "loss": 0.5027, "step": 13857 }, { "epoch": 0.38050521691378364, "grad_norm": 0.39034906029701233, "learning_rate": 1.828621406388765e-05, "loss": 0.6209, "step": 13858 }, { "epoch": 0.38053267435475013, "grad_norm": 0.41206830739974976, "learning_rate": 1.828597227816941e-05, "loss": 0.5874, "step": 13859 }, { "epoch": 0.3805601317957166, "grad_norm": 0.4211733341217041, "learning_rate": 1.828573047699518e-05, "loss": 0.5405, "step": 13860 }, { "epoch": 0.3805875892366831, "grad_norm": 0.4897020757198334, "learning_rate": 1.828548866036543e-05, "loss": 0.5716, "step": 13861 }, { "epoch": 0.3806150466776496, "grad_norm": 0.37768909335136414, "learning_rate": 1.828524682828059e-05, "loss": 0.5444, "step": 13862 }, { "epoch": 0.38064250411861617, "grad_norm": 0.3970952033996582, "learning_rate": 1.8285004980741126e-05, "loss": 0.5229, "step": 13863 }, { "epoch": 0.38066996155958266, "grad_norm": 0.3401690721511841, "learning_rate": 1.828476311774748e-05, "loss": 0.547, "step": 13864 }, { "epoch": 0.38069741900054915, "grad_norm": 0.38028448820114136, "learning_rate": 1.8284521239300108e-05, "loss": 0.5722, "step": 13865 }, { "epoch": 0.38072487644151565, "grad_norm": 0.4574624300003052, "learning_rate": 1.828427934539946e-05, "loss": 0.4947, "step": 13866 }, { "epoch": 0.38075233388248214, "grad_norm": 0.4043523371219635, "learning_rate": 1.8284037436045986e-05, "loss": 0.5845, "step": 13867 }, { "epoch": 0.38077979132344864, "grad_norm": 0.3510778248310089, "learning_rate": 1.828379551124014e-05, "loss": 0.4833, "step": 13868 }, { "epoch": 0.38080724876441513, "grad_norm": 0.384405255317688, "learning_rate": 1.8283553570982372e-05, "loss": 0.5664, "step": 13869 }, { "epoch": 0.3808347062053817, "grad_norm": 0.48133954405784607, "learning_rate": 1.828331161527313e-05, "loss": 0.6205, "step": 13870 }, { "epoch": 0.3808621636463482, "grad_norm": 0.387666255235672, "learning_rate": 1.828306964411287e-05, "loss": 0.4264, "step": 13871 }, { "epoch": 0.38088962108731467, "grad_norm": 0.37103021144866943, "learning_rate": 1.8282827657502042e-05, "loss": 0.4947, "step": 13872 }, { "epoch": 0.38091707852828116, "grad_norm": 0.36014223098754883, "learning_rate": 1.8282585655441094e-05, "loss": 0.5068, "step": 13873 }, { "epoch": 0.38094453596924766, "grad_norm": 0.40497809648513794, "learning_rate": 1.828234363793048e-05, "loss": 0.6456, "step": 13874 }, { "epoch": 0.38097199341021415, "grad_norm": 0.4754997789859772, "learning_rate": 1.8282101604970655e-05, "loss": 0.5316, "step": 13875 }, { "epoch": 0.38099945085118064, "grad_norm": 0.3719891309738159, "learning_rate": 1.8281859556562063e-05, "loss": 0.4907, "step": 13876 }, { "epoch": 0.3810269082921472, "grad_norm": 0.35601556301116943, "learning_rate": 1.828161749270516e-05, "loss": 0.4763, "step": 13877 }, { "epoch": 0.3810543657331137, "grad_norm": 0.33887535333633423, "learning_rate": 1.8281375413400406e-05, "loss": 0.5315, "step": 13878 }, { "epoch": 0.3810818231740802, "grad_norm": 0.40073615312576294, "learning_rate": 1.8281133318648235e-05, "loss": 0.5426, "step": 13879 }, { "epoch": 0.3811092806150467, "grad_norm": 0.35356661677360535, "learning_rate": 1.828089120844911e-05, "loss": 0.5358, "step": 13880 }, { "epoch": 0.38113673805601317, "grad_norm": 0.3895391523838043, "learning_rate": 1.8280649082803478e-05, "loss": 0.5153, "step": 13881 }, { "epoch": 0.38116419549697966, "grad_norm": 0.36865657567977905, "learning_rate": 1.8280406941711795e-05, "loss": 0.5019, "step": 13882 }, { "epoch": 0.38119165293794616, "grad_norm": 0.5565499067306519, "learning_rate": 1.828016478517451e-05, "loss": 0.4671, "step": 13883 }, { "epoch": 0.3812191103789127, "grad_norm": 0.3717564344406128, "learning_rate": 1.8279922613192075e-05, "loss": 0.5273, "step": 13884 }, { "epoch": 0.3812465678198792, "grad_norm": 0.3725970983505249, "learning_rate": 1.827968042576494e-05, "loss": 0.5265, "step": 13885 }, { "epoch": 0.3812740252608457, "grad_norm": 0.36449384689331055, "learning_rate": 1.8279438222893556e-05, "loss": 0.5094, "step": 13886 }, { "epoch": 0.3813014827018122, "grad_norm": 0.39664626121520996, "learning_rate": 1.8279196004578384e-05, "loss": 0.5491, "step": 13887 }, { "epoch": 0.3813289401427787, "grad_norm": 0.3828599154949188, "learning_rate": 1.8278953770819864e-05, "loss": 0.5368, "step": 13888 }, { "epoch": 0.3813563975837452, "grad_norm": 0.3700379729270935, "learning_rate": 1.8278711521618456e-05, "loss": 0.514, "step": 13889 }, { "epoch": 0.38138385502471167, "grad_norm": 0.3430037498474121, "learning_rate": 1.8278469256974607e-05, "loss": 0.5059, "step": 13890 }, { "epoch": 0.3814113124656782, "grad_norm": 0.35493800044059753, "learning_rate": 1.8278226976888768e-05, "loss": 0.4991, "step": 13891 }, { "epoch": 0.3814387699066447, "grad_norm": 0.3717474639415741, "learning_rate": 1.8277984681361397e-05, "loss": 0.5731, "step": 13892 }, { "epoch": 0.3814662273476112, "grad_norm": 0.35460329055786133, "learning_rate": 1.8277742370392943e-05, "loss": 0.5226, "step": 13893 }, { "epoch": 0.3814936847885777, "grad_norm": 0.3527994453907013, "learning_rate": 1.827750004398386e-05, "loss": 0.4094, "step": 13894 }, { "epoch": 0.3815211422295442, "grad_norm": 0.38936564326286316, "learning_rate": 1.827725770213459e-05, "loss": 0.4847, "step": 13895 }, { "epoch": 0.3815485996705107, "grad_norm": 0.33073586225509644, "learning_rate": 1.8277015344845597e-05, "loss": 0.5038, "step": 13896 }, { "epoch": 0.3815760571114772, "grad_norm": 0.36350539326667786, "learning_rate": 1.827677297211733e-05, "loss": 0.4847, "step": 13897 }, { "epoch": 0.38160351455244373, "grad_norm": 0.3488752245903015, "learning_rate": 1.827653058395024e-05, "loss": 0.477, "step": 13898 }, { "epoch": 0.38163097199341023, "grad_norm": 0.35129135847091675, "learning_rate": 1.8276288180344773e-05, "loss": 0.4928, "step": 13899 }, { "epoch": 0.3816584294343767, "grad_norm": 0.3267246186733246, "learning_rate": 1.8276045761301394e-05, "loss": 0.4782, "step": 13900 }, { "epoch": 0.3816858868753432, "grad_norm": 0.43150362372398376, "learning_rate": 1.8275803326820545e-05, "loss": 0.4833, "step": 13901 }, { "epoch": 0.3817133443163097, "grad_norm": 0.37292107939720154, "learning_rate": 1.8275560876902682e-05, "loss": 0.5014, "step": 13902 }, { "epoch": 0.3817408017572762, "grad_norm": 0.3498885929584503, "learning_rate": 1.8275318411548254e-05, "loss": 0.4885, "step": 13903 }, { "epoch": 0.3817682591982427, "grad_norm": 0.40131014585494995, "learning_rate": 1.827507593075772e-05, "loss": 0.4835, "step": 13904 }, { "epoch": 0.38179571663920925, "grad_norm": 0.37820082902908325, "learning_rate": 1.8274833434531527e-05, "loss": 0.5764, "step": 13905 }, { "epoch": 0.38182317408017574, "grad_norm": 0.3251263201236725, "learning_rate": 1.827459092287013e-05, "loss": 0.5124, "step": 13906 }, { "epoch": 0.38185063152114224, "grad_norm": 0.39515992999076843, "learning_rate": 1.8274348395773976e-05, "loss": 0.5622, "step": 13907 }, { "epoch": 0.38187808896210873, "grad_norm": 0.3760525584220886, "learning_rate": 1.8274105853243524e-05, "loss": 0.4205, "step": 13908 }, { "epoch": 0.3819055464030752, "grad_norm": 0.36672553420066833, "learning_rate": 1.8273863295279223e-05, "loss": 0.5298, "step": 13909 }, { "epoch": 0.3819330038440417, "grad_norm": 0.3371449112892151, "learning_rate": 1.8273620721881527e-05, "loss": 0.4396, "step": 13910 }, { "epoch": 0.3819604612850082, "grad_norm": 0.4214079976081848, "learning_rate": 1.827337813305089e-05, "loss": 0.5577, "step": 13911 }, { "epoch": 0.38198791872597476, "grad_norm": 0.3772503435611725, "learning_rate": 1.827313552878776e-05, "loss": 0.5273, "step": 13912 }, { "epoch": 0.38201537616694126, "grad_norm": 0.4505605697631836, "learning_rate": 1.827289290909259e-05, "loss": 0.5291, "step": 13913 }, { "epoch": 0.38204283360790775, "grad_norm": 0.41761326789855957, "learning_rate": 1.8272650273965836e-05, "loss": 0.5467, "step": 13914 }, { "epoch": 0.38207029104887424, "grad_norm": 0.3570132851600647, "learning_rate": 1.827240762340795e-05, "loss": 0.6023, "step": 13915 }, { "epoch": 0.38209774848984074, "grad_norm": 0.4497723877429962, "learning_rate": 1.827216495741938e-05, "loss": 0.5705, "step": 13916 }, { "epoch": 0.38212520593080723, "grad_norm": 0.36400407552719116, "learning_rate": 1.8271922276000587e-05, "loss": 0.4924, "step": 13917 }, { "epoch": 0.3821526633717737, "grad_norm": 0.3898507356643677, "learning_rate": 1.8271679579152014e-05, "loss": 0.498, "step": 13918 }, { "epoch": 0.3821801208127403, "grad_norm": 0.35017484426498413, "learning_rate": 1.8271436866874123e-05, "loss": 0.4883, "step": 13919 }, { "epoch": 0.38220757825370677, "grad_norm": 0.37291964888572693, "learning_rate": 1.827119413916736e-05, "loss": 0.5296, "step": 13920 }, { "epoch": 0.38223503569467326, "grad_norm": 0.3607848882675171, "learning_rate": 1.827095139603218e-05, "loss": 0.4777, "step": 13921 }, { "epoch": 0.38226249313563976, "grad_norm": 0.3584233522415161, "learning_rate": 1.8270708637469034e-05, "loss": 0.4104, "step": 13922 }, { "epoch": 0.38228995057660625, "grad_norm": 0.3595774173736572, "learning_rate": 1.827046586347838e-05, "loss": 0.4619, "step": 13923 }, { "epoch": 0.38231740801757275, "grad_norm": 0.3315858840942383, "learning_rate": 1.8270223074060668e-05, "loss": 0.3786, "step": 13924 }, { "epoch": 0.38234486545853924, "grad_norm": 0.30800914764404297, "learning_rate": 1.826998026921635e-05, "loss": 0.4011, "step": 13925 }, { "epoch": 0.3823723228995058, "grad_norm": 0.38222214579582214, "learning_rate": 1.8269737448945877e-05, "loss": 0.5894, "step": 13926 }, { "epoch": 0.3823997803404723, "grad_norm": 0.3308667838573456, "learning_rate": 1.8269494613249704e-05, "loss": 0.4619, "step": 13927 }, { "epoch": 0.3824272377814388, "grad_norm": 0.3982369303703308, "learning_rate": 1.8269251762128286e-05, "loss": 0.4722, "step": 13928 }, { "epoch": 0.38245469522240527, "grad_norm": 0.3653436303138733, "learning_rate": 1.8269008895582073e-05, "loss": 0.482, "step": 13929 }, { "epoch": 0.38248215266337177, "grad_norm": 0.31632134318351746, "learning_rate": 1.8268766013611522e-05, "loss": 0.4787, "step": 13930 }, { "epoch": 0.38250961010433826, "grad_norm": 0.3835001587867737, "learning_rate": 1.826852311621708e-05, "loss": 0.3859, "step": 13931 }, { "epoch": 0.38253706754530475, "grad_norm": 0.3605281412601471, "learning_rate": 1.8268280203399205e-05, "loss": 0.5087, "step": 13932 }, { "epoch": 0.3825645249862713, "grad_norm": 0.3690866529941559, "learning_rate": 1.8268037275158348e-05, "loss": 0.5236, "step": 13933 }, { "epoch": 0.3825919824272378, "grad_norm": 0.3921816051006317, "learning_rate": 1.8267794331494965e-05, "loss": 0.4789, "step": 13934 }, { "epoch": 0.3826194398682043, "grad_norm": 0.34866631031036377, "learning_rate": 1.82675513724095e-05, "loss": 0.5621, "step": 13935 }, { "epoch": 0.3826468973091708, "grad_norm": 0.3998859226703644, "learning_rate": 1.826730839790242e-05, "loss": 0.5026, "step": 13936 }, { "epoch": 0.3826743547501373, "grad_norm": 0.3465539515018463, "learning_rate": 1.826706540797417e-05, "loss": 0.445, "step": 13937 }, { "epoch": 0.3827018121911038, "grad_norm": 0.4993263781070709, "learning_rate": 1.8266822402625202e-05, "loss": 0.5743, "step": 13938 }, { "epoch": 0.38272926963207027, "grad_norm": 0.408836305141449, "learning_rate": 1.8266579381855973e-05, "loss": 0.5285, "step": 13939 }, { "epoch": 0.3827567270730368, "grad_norm": 0.33840101957321167, "learning_rate": 1.8266336345666936e-05, "loss": 0.4905, "step": 13940 }, { "epoch": 0.3827841845140033, "grad_norm": 0.39024287462234497, "learning_rate": 1.8266093294058542e-05, "loss": 0.4985, "step": 13941 }, { "epoch": 0.3828116419549698, "grad_norm": 0.4047720432281494, "learning_rate": 1.8265850227031248e-05, "loss": 0.5125, "step": 13942 }, { "epoch": 0.3828390993959363, "grad_norm": 0.37581437826156616, "learning_rate": 1.8265607144585504e-05, "loss": 0.5274, "step": 13943 }, { "epoch": 0.3828665568369028, "grad_norm": 0.3401198387145996, "learning_rate": 1.8265364046721763e-05, "loss": 0.5265, "step": 13944 }, { "epoch": 0.3828940142778693, "grad_norm": 0.4069547951221466, "learning_rate": 1.8265120933440483e-05, "loss": 0.4694, "step": 13945 }, { "epoch": 0.3829214717188358, "grad_norm": 0.3465431034564972, "learning_rate": 1.8264877804742112e-05, "loss": 0.5206, "step": 13946 }, { "epoch": 0.38294892915980233, "grad_norm": 0.3165905475616455, "learning_rate": 1.826463466062711e-05, "loss": 0.4064, "step": 13947 }, { "epoch": 0.3829763866007688, "grad_norm": 0.3559388518333435, "learning_rate": 1.826439150109592e-05, "loss": 0.585, "step": 13948 }, { "epoch": 0.3830038440417353, "grad_norm": 0.43184831738471985, "learning_rate": 1.8264148326149004e-05, "loss": 0.5302, "step": 13949 }, { "epoch": 0.3830313014827018, "grad_norm": 0.3486242890357971, "learning_rate": 1.826390513578682e-05, "loss": 0.56, "step": 13950 }, { "epoch": 0.3830587589236683, "grad_norm": 0.4300706386566162, "learning_rate": 1.826366193000981e-05, "loss": 0.4843, "step": 13951 }, { "epoch": 0.3830862163646348, "grad_norm": 0.39146852493286133, "learning_rate": 1.826341870881843e-05, "loss": 0.5198, "step": 13952 }, { "epoch": 0.3831136738056013, "grad_norm": 0.4085060954093933, "learning_rate": 1.8263175472213143e-05, "loss": 0.4467, "step": 13953 }, { "epoch": 0.38314113124656785, "grad_norm": 0.37857067584991455, "learning_rate": 1.8262932220194392e-05, "loss": 0.4888, "step": 13954 }, { "epoch": 0.38316858868753434, "grad_norm": 0.36319029331207275, "learning_rate": 1.8262688952762637e-05, "loss": 0.5147, "step": 13955 }, { "epoch": 0.38319604612850083, "grad_norm": 0.3573656380176544, "learning_rate": 1.826244566991833e-05, "loss": 0.4803, "step": 13956 }, { "epoch": 0.3832235035694673, "grad_norm": 0.3512285351753235, "learning_rate": 1.8262202371661925e-05, "loss": 0.5616, "step": 13957 }, { "epoch": 0.3832509610104338, "grad_norm": 0.42042943835258484, "learning_rate": 1.8261959057993876e-05, "loss": 0.6351, "step": 13958 }, { "epoch": 0.3832784184514003, "grad_norm": 0.4108961820602417, "learning_rate": 1.8261715728914636e-05, "loss": 0.5151, "step": 13959 }, { "epoch": 0.3833058758923668, "grad_norm": 0.3968958556652069, "learning_rate": 1.826147238442466e-05, "loss": 0.552, "step": 13960 }, { "epoch": 0.38333333333333336, "grad_norm": 0.39028382301330566, "learning_rate": 1.82612290245244e-05, "loss": 0.543, "step": 13961 }, { "epoch": 0.38336079077429985, "grad_norm": 0.3289088010787964, "learning_rate": 1.826098564921431e-05, "loss": 0.3926, "step": 13962 }, { "epoch": 0.38338824821526635, "grad_norm": 0.3756379187107086, "learning_rate": 1.826074225849485e-05, "loss": 0.5692, "step": 13963 }, { "epoch": 0.38341570565623284, "grad_norm": 0.3858208954334259, "learning_rate": 1.8260498852366467e-05, "loss": 0.5092, "step": 13964 }, { "epoch": 0.38344316309719934, "grad_norm": 0.41005489230155945, "learning_rate": 1.8260255430829613e-05, "loss": 0.4794, "step": 13965 }, { "epoch": 0.38347062053816583, "grad_norm": 0.43410614132881165, "learning_rate": 1.8260011993884748e-05, "loss": 0.5918, "step": 13966 }, { "epoch": 0.3834980779791323, "grad_norm": 0.4048735201358795, "learning_rate": 1.825976854153233e-05, "loss": 0.5153, "step": 13967 }, { "epoch": 0.3835255354200989, "grad_norm": 0.36828282475471497, "learning_rate": 1.8259525073772804e-05, "loss": 0.5631, "step": 13968 }, { "epoch": 0.38355299286106537, "grad_norm": 0.3681231737136841, "learning_rate": 1.8259281590606622e-05, "loss": 0.417, "step": 13969 }, { "epoch": 0.38358045030203186, "grad_norm": 0.3890265226364136, "learning_rate": 1.8259038092034253e-05, "loss": 0.4867, "step": 13970 }, { "epoch": 0.38360790774299836, "grad_norm": 0.360311359167099, "learning_rate": 1.8258794578056136e-05, "loss": 0.4867, "step": 13971 }, { "epoch": 0.38363536518396485, "grad_norm": 0.34107324481010437, "learning_rate": 1.8258551048672733e-05, "loss": 0.5005, "step": 13972 }, { "epoch": 0.38366282262493134, "grad_norm": 0.3347267806529999, "learning_rate": 1.8258307503884495e-05, "loss": 0.4645, "step": 13973 }, { "epoch": 0.38369028006589784, "grad_norm": 0.39427459239959717, "learning_rate": 1.825806394369188e-05, "loss": 0.5398, "step": 13974 }, { "epoch": 0.3837177375068644, "grad_norm": 0.38120797276496887, "learning_rate": 1.825782036809534e-05, "loss": 0.5797, "step": 13975 }, { "epoch": 0.3837451949478309, "grad_norm": 0.483855277299881, "learning_rate": 1.8257576777095328e-05, "loss": 0.4858, "step": 13976 }, { "epoch": 0.3837726523887974, "grad_norm": 0.38435935974121094, "learning_rate": 1.8257333170692303e-05, "loss": 0.4475, "step": 13977 }, { "epoch": 0.38380010982976387, "grad_norm": 0.3731548488140106, "learning_rate": 1.825708954888671e-05, "loss": 0.5471, "step": 13978 }, { "epoch": 0.38382756727073036, "grad_norm": 0.343902587890625, "learning_rate": 1.8256845911679014e-05, "loss": 0.4471, "step": 13979 }, { "epoch": 0.38385502471169686, "grad_norm": 0.31429341435432434, "learning_rate": 1.8256602259069665e-05, "loss": 0.4512, "step": 13980 }, { "epoch": 0.38388248215266335, "grad_norm": 0.36044129729270935, "learning_rate": 1.8256358591059115e-05, "loss": 0.4701, "step": 13981 }, { "epoch": 0.3839099395936299, "grad_norm": 0.3851945996284485, "learning_rate": 1.8256114907647826e-05, "loss": 0.5746, "step": 13982 }, { "epoch": 0.3839373970345964, "grad_norm": 0.4410184621810913, "learning_rate": 1.825587120883624e-05, "loss": 0.6042, "step": 13983 }, { "epoch": 0.3839648544755629, "grad_norm": 0.38871657848358154, "learning_rate": 1.8255627494624824e-05, "loss": 0.4139, "step": 13984 }, { "epoch": 0.3839923119165294, "grad_norm": 0.4199775159358978, "learning_rate": 1.825538376501403e-05, "loss": 0.4382, "step": 13985 }, { "epoch": 0.3840197693574959, "grad_norm": 0.36846691370010376, "learning_rate": 1.8255140020004306e-05, "loss": 0.5365, "step": 13986 }, { "epoch": 0.38404722679846237, "grad_norm": 0.40677642822265625, "learning_rate": 1.8254896259596116e-05, "loss": 0.5233, "step": 13987 }, { "epoch": 0.38407468423942887, "grad_norm": 0.38567695021629333, "learning_rate": 1.8254652483789907e-05, "loss": 0.5867, "step": 13988 }, { "epoch": 0.3841021416803954, "grad_norm": 0.3708449602127075, "learning_rate": 1.8254408692586136e-05, "loss": 0.5614, "step": 13989 }, { "epoch": 0.3841295991213619, "grad_norm": 0.418364018201828, "learning_rate": 1.825416488598526e-05, "loss": 0.4937, "step": 13990 }, { "epoch": 0.3841570565623284, "grad_norm": 0.41072723269462585, "learning_rate": 1.825392106398773e-05, "loss": 0.5583, "step": 13991 }, { "epoch": 0.3841845140032949, "grad_norm": 0.40427446365356445, "learning_rate": 1.8253677226594003e-05, "loss": 0.5418, "step": 13992 }, { "epoch": 0.3842119714442614, "grad_norm": 0.3257812261581421, "learning_rate": 1.8253433373804538e-05, "loss": 0.4247, "step": 13993 }, { "epoch": 0.3842394288852279, "grad_norm": 0.7855621576309204, "learning_rate": 1.825318950561978e-05, "loss": 0.4454, "step": 13994 }, { "epoch": 0.3842668863261944, "grad_norm": 0.35842397809028625, "learning_rate": 1.8252945622040192e-05, "loss": 0.4693, "step": 13995 }, { "epoch": 0.3842943437671609, "grad_norm": 0.43894949555397034, "learning_rate": 1.8252701723066225e-05, "loss": 0.5711, "step": 13996 }, { "epoch": 0.3843218012081274, "grad_norm": 0.3998887240886688, "learning_rate": 1.8252457808698338e-05, "loss": 0.609, "step": 13997 }, { "epoch": 0.3843492586490939, "grad_norm": 0.3589972257614136, "learning_rate": 1.8252213878936982e-05, "loss": 0.5141, "step": 13998 }, { "epoch": 0.3843767160900604, "grad_norm": 0.32889533042907715, "learning_rate": 1.8251969933782612e-05, "loss": 0.4457, "step": 13999 }, { "epoch": 0.3844041735310269, "grad_norm": 0.4249653220176697, "learning_rate": 1.8251725973235685e-05, "loss": 0.558, "step": 14000 }, { "epoch": 0.3844316309719934, "grad_norm": 0.3567918837070465, "learning_rate": 1.8251481997296654e-05, "loss": 0.4887, "step": 14001 }, { "epoch": 0.3844590884129599, "grad_norm": 0.3720819056034088, "learning_rate": 1.8251238005965978e-05, "loss": 0.5025, "step": 14002 }, { "epoch": 0.3844865458539264, "grad_norm": 0.39419111609458923, "learning_rate": 1.825099399924411e-05, "loss": 0.5973, "step": 14003 }, { "epoch": 0.38451400329489294, "grad_norm": 0.3126358389854431, "learning_rate": 1.8250749977131504e-05, "loss": 0.4178, "step": 14004 }, { "epoch": 0.38454146073585943, "grad_norm": 0.3296810984611511, "learning_rate": 1.8250505939628615e-05, "loss": 0.5636, "step": 14005 }, { "epoch": 0.3845689181768259, "grad_norm": 0.5792973637580872, "learning_rate": 1.82502618867359e-05, "loss": 0.4358, "step": 14006 }, { "epoch": 0.3845963756177924, "grad_norm": 0.3696569502353668, "learning_rate": 1.8250017818453815e-05, "loss": 0.4272, "step": 14007 }, { "epoch": 0.3846238330587589, "grad_norm": 0.39589837193489075, "learning_rate": 1.824977373478281e-05, "loss": 0.572, "step": 14008 }, { "epoch": 0.3846512904997254, "grad_norm": 0.3693767488002777, "learning_rate": 1.8249529635723348e-05, "loss": 0.4989, "step": 14009 }, { "epoch": 0.3846787479406919, "grad_norm": 0.3434186279773712, "learning_rate": 1.8249285521275877e-05, "loss": 0.401, "step": 14010 }, { "epoch": 0.38470620538165845, "grad_norm": 0.36127105355262756, "learning_rate": 1.8249041391440854e-05, "loss": 0.4969, "step": 14011 }, { "epoch": 0.38473366282262494, "grad_norm": 0.4253843426704407, "learning_rate": 1.8248797246218738e-05, "loss": 0.4871, "step": 14012 }, { "epoch": 0.38476112026359144, "grad_norm": 0.40651535987854004, "learning_rate": 1.8248553085609984e-05, "loss": 0.4897, "step": 14013 }, { "epoch": 0.38478857770455793, "grad_norm": 0.34978434443473816, "learning_rate": 1.8248308909615046e-05, "loss": 0.4496, "step": 14014 }, { "epoch": 0.3848160351455244, "grad_norm": 0.4662056565284729, "learning_rate": 1.8248064718234378e-05, "loss": 0.5444, "step": 14015 }, { "epoch": 0.3848434925864909, "grad_norm": 0.4043571352958679, "learning_rate": 1.8247820511468438e-05, "loss": 0.5548, "step": 14016 }, { "epoch": 0.3848709500274574, "grad_norm": 0.38807183504104614, "learning_rate": 1.8247576289317678e-05, "loss": 0.5701, "step": 14017 }, { "epoch": 0.38489840746842396, "grad_norm": 0.33410945534706116, "learning_rate": 1.8247332051782558e-05, "loss": 0.5059, "step": 14018 }, { "epoch": 0.38492586490939046, "grad_norm": 0.37202829122543335, "learning_rate": 1.8247087798863533e-05, "loss": 0.5459, "step": 14019 }, { "epoch": 0.38495332235035695, "grad_norm": 0.3552018702030182, "learning_rate": 1.8246843530561052e-05, "loss": 0.5432, "step": 14020 }, { "epoch": 0.38498077979132345, "grad_norm": 0.3733099400997162, "learning_rate": 1.824659924687558e-05, "loss": 0.5477, "step": 14021 }, { "epoch": 0.38500823723228994, "grad_norm": 0.3672376275062561, "learning_rate": 1.8246354947807567e-05, "loss": 0.5702, "step": 14022 }, { "epoch": 0.38503569467325643, "grad_norm": 0.3447495698928833, "learning_rate": 1.824611063335747e-05, "loss": 0.5636, "step": 14023 }, { "epoch": 0.38506315211422293, "grad_norm": 0.5106338858604431, "learning_rate": 1.8245866303525744e-05, "loss": 0.4314, "step": 14024 }, { "epoch": 0.3850906095551895, "grad_norm": 0.33148735761642456, "learning_rate": 1.824562195831285e-05, "loss": 0.4857, "step": 14025 }, { "epoch": 0.38511806699615597, "grad_norm": 0.6828351020812988, "learning_rate": 1.8245377597719235e-05, "loss": 0.4843, "step": 14026 }, { "epoch": 0.38514552443712247, "grad_norm": 0.3644402325153351, "learning_rate": 1.8245133221745358e-05, "loss": 0.5431, "step": 14027 }, { "epoch": 0.38517298187808896, "grad_norm": 0.3804607093334198, "learning_rate": 1.824488883039168e-05, "loss": 0.557, "step": 14028 }, { "epoch": 0.38520043931905545, "grad_norm": 0.39939379692077637, "learning_rate": 1.8244644423658648e-05, "loss": 0.5242, "step": 14029 }, { "epoch": 0.38522789676002195, "grad_norm": 0.40380755066871643, "learning_rate": 1.8244400001546727e-05, "loss": 0.5214, "step": 14030 }, { "epoch": 0.38525535420098844, "grad_norm": 0.4032784402370453, "learning_rate": 1.8244155564056367e-05, "loss": 0.5471, "step": 14031 }, { "epoch": 0.385282811641955, "grad_norm": 0.36109986901283264, "learning_rate": 1.8243911111188026e-05, "loss": 0.5066, "step": 14032 }, { "epoch": 0.3853102690829215, "grad_norm": 0.3474540412425995, "learning_rate": 1.824366664294216e-05, "loss": 0.5161, "step": 14033 }, { "epoch": 0.385337726523888, "grad_norm": 0.4094393253326416, "learning_rate": 1.8243422159319226e-05, "loss": 0.5935, "step": 14034 }, { "epoch": 0.3853651839648545, "grad_norm": 0.3866804838180542, "learning_rate": 1.8243177660319674e-05, "loss": 0.4964, "step": 14035 }, { "epoch": 0.38539264140582097, "grad_norm": 0.3495941460132599, "learning_rate": 1.8242933145943968e-05, "loss": 0.5254, "step": 14036 }, { "epoch": 0.38542009884678746, "grad_norm": 0.375085711479187, "learning_rate": 1.824268861619256e-05, "loss": 0.488, "step": 14037 }, { "epoch": 0.38544755628775396, "grad_norm": 0.381561815738678, "learning_rate": 1.824244407106591e-05, "loss": 0.5985, "step": 14038 }, { "epoch": 0.3854750137287205, "grad_norm": 0.33077648282051086, "learning_rate": 1.8242199510564468e-05, "loss": 0.3744, "step": 14039 }, { "epoch": 0.385502471169687, "grad_norm": 0.33609816431999207, "learning_rate": 1.8241954934688694e-05, "loss": 0.3931, "step": 14040 }, { "epoch": 0.3855299286106535, "grad_norm": 0.3833978474140167, "learning_rate": 1.8241710343439042e-05, "loss": 0.7034, "step": 14041 }, { "epoch": 0.38555738605162, "grad_norm": 0.39826369285583496, "learning_rate": 1.8241465736815972e-05, "loss": 0.4482, "step": 14042 }, { "epoch": 0.3855848434925865, "grad_norm": 0.3782382309436798, "learning_rate": 1.824122111481994e-05, "loss": 0.4914, "step": 14043 }, { "epoch": 0.385612300933553, "grad_norm": 0.3173300623893738, "learning_rate": 1.8240976477451396e-05, "loss": 0.4391, "step": 14044 }, { "epoch": 0.38563975837451947, "grad_norm": 0.39536771178245544, "learning_rate": 1.82407318247108e-05, "loss": 0.4865, "step": 14045 }, { "epoch": 0.385667215815486, "grad_norm": 0.353209525346756, "learning_rate": 1.824048715659861e-05, "loss": 0.514, "step": 14046 }, { "epoch": 0.3856946732564525, "grad_norm": 0.3811784088611603, "learning_rate": 1.8240242473115288e-05, "loss": 0.4977, "step": 14047 }, { "epoch": 0.385722130697419, "grad_norm": 0.36604616045951843, "learning_rate": 1.8239997774261277e-05, "loss": 0.5675, "step": 14048 }, { "epoch": 0.3857495881383855, "grad_norm": 0.33414775133132935, "learning_rate": 1.823975306003704e-05, "loss": 0.4361, "step": 14049 }, { "epoch": 0.385777045579352, "grad_norm": 0.5045976638793945, "learning_rate": 1.8239508330443035e-05, "loss": 0.4098, "step": 14050 }, { "epoch": 0.3858045030203185, "grad_norm": 0.390389084815979, "learning_rate": 1.823926358547972e-05, "loss": 0.5387, "step": 14051 }, { "epoch": 0.385831960461285, "grad_norm": 0.3621509373188019, "learning_rate": 1.8239018825147546e-05, "loss": 0.428, "step": 14052 }, { "epoch": 0.38585941790225153, "grad_norm": 0.37463808059692383, "learning_rate": 1.8238774049446972e-05, "loss": 0.52, "step": 14053 }, { "epoch": 0.385886875343218, "grad_norm": 0.41150549054145813, "learning_rate": 1.8238529258378457e-05, "loss": 0.4896, "step": 14054 }, { "epoch": 0.3859143327841845, "grad_norm": 0.339849054813385, "learning_rate": 1.8238284451942453e-05, "loss": 0.4447, "step": 14055 }, { "epoch": 0.385941790225151, "grad_norm": 0.7891032695770264, "learning_rate": 1.8238039630139418e-05, "loss": 0.572, "step": 14056 }, { "epoch": 0.3859692476661175, "grad_norm": 0.3172597885131836, "learning_rate": 1.8237794792969813e-05, "loss": 0.5291, "step": 14057 }, { "epoch": 0.385996705107084, "grad_norm": 0.36595073342323303, "learning_rate": 1.823754994043409e-05, "loss": 0.4992, "step": 14058 }, { "epoch": 0.3860241625480505, "grad_norm": 0.39051225781440735, "learning_rate": 1.8237305072532708e-05, "loss": 0.5249, "step": 14059 }, { "epoch": 0.38605161998901705, "grad_norm": 0.4220277965068817, "learning_rate": 1.8237060189266123e-05, "loss": 0.5976, "step": 14060 }, { "epoch": 0.38607907742998354, "grad_norm": 0.37225541472435, "learning_rate": 1.8236815290634794e-05, "loss": 0.4884, "step": 14061 }, { "epoch": 0.38610653487095004, "grad_norm": 0.3839455544948578, "learning_rate": 1.8236570376639172e-05, "loss": 0.5519, "step": 14062 }, { "epoch": 0.38613399231191653, "grad_norm": 0.3510797619819641, "learning_rate": 1.823632544727972e-05, "loss": 0.4345, "step": 14063 }, { "epoch": 0.386161449752883, "grad_norm": 0.4299132823944092, "learning_rate": 1.823608050255689e-05, "loss": 0.5302, "step": 14064 }, { "epoch": 0.3861889071938495, "grad_norm": 0.36782678961753845, "learning_rate": 1.8235835542471143e-05, "loss": 0.5627, "step": 14065 }, { "epoch": 0.386216364634816, "grad_norm": 0.39651867747306824, "learning_rate": 1.823559056702293e-05, "loss": 0.5497, "step": 14066 }, { "epoch": 0.38624382207578256, "grad_norm": 0.35798338055610657, "learning_rate": 1.823534557621272e-05, "loss": 0.5119, "step": 14067 }, { "epoch": 0.38627127951674906, "grad_norm": 0.36801934242248535, "learning_rate": 1.8235100570040957e-05, "loss": 0.5283, "step": 14068 }, { "epoch": 0.38629873695771555, "grad_norm": 0.40801045298576355, "learning_rate": 1.8234855548508102e-05, "loss": 0.5833, "step": 14069 }, { "epoch": 0.38632619439868204, "grad_norm": 0.405068963766098, "learning_rate": 1.8234610511614616e-05, "loss": 0.5606, "step": 14070 }, { "epoch": 0.38635365183964854, "grad_norm": 0.3675159513950348, "learning_rate": 1.8234365459360953e-05, "loss": 0.4817, "step": 14071 }, { "epoch": 0.38638110928061503, "grad_norm": 0.39243021607398987, "learning_rate": 1.8234120391747568e-05, "loss": 0.5133, "step": 14072 }, { "epoch": 0.3864085667215815, "grad_norm": 0.393812894821167, "learning_rate": 1.823387530877492e-05, "loss": 0.5517, "step": 14073 }, { "epoch": 0.3864360241625481, "grad_norm": 0.427926629781723, "learning_rate": 1.823363021044347e-05, "loss": 0.56, "step": 14074 }, { "epoch": 0.38646348160351457, "grad_norm": 0.3570818603038788, "learning_rate": 1.823338509675367e-05, "loss": 0.5352, "step": 14075 }, { "epoch": 0.38649093904448106, "grad_norm": 0.43916141986846924, "learning_rate": 1.823313996770598e-05, "loss": 0.5472, "step": 14076 }, { "epoch": 0.38651839648544756, "grad_norm": 0.3785596787929535, "learning_rate": 1.8232894823300852e-05, "loss": 0.4988, "step": 14077 }, { "epoch": 0.38654585392641405, "grad_norm": 0.40699413418769836, "learning_rate": 1.8232649663538753e-05, "loss": 0.5369, "step": 14078 }, { "epoch": 0.38657331136738055, "grad_norm": 0.3654366135597229, "learning_rate": 1.8232404488420132e-05, "loss": 0.4853, "step": 14079 }, { "epoch": 0.38660076880834704, "grad_norm": 0.3845691382884979, "learning_rate": 1.8232159297945448e-05, "loss": 0.5628, "step": 14080 }, { "epoch": 0.3866282262493136, "grad_norm": 0.4137239456176758, "learning_rate": 1.8231914092115164e-05, "loss": 0.4861, "step": 14081 }, { "epoch": 0.3866556836902801, "grad_norm": 0.3792054355144501, "learning_rate": 1.8231668870929728e-05, "loss": 0.5647, "step": 14082 }, { "epoch": 0.3866831411312466, "grad_norm": 0.3637066185474396, "learning_rate": 1.8231423634389608e-05, "loss": 0.4797, "step": 14083 }, { "epoch": 0.38671059857221307, "grad_norm": 0.35625526309013367, "learning_rate": 1.823117838249525e-05, "loss": 0.4935, "step": 14084 }, { "epoch": 0.38673805601317957, "grad_norm": 0.3656194508075714, "learning_rate": 1.823093311524712e-05, "loss": 0.5389, "step": 14085 }, { "epoch": 0.38676551345414606, "grad_norm": 0.44352519512176514, "learning_rate": 1.8230687832645672e-05, "loss": 0.5604, "step": 14086 }, { "epoch": 0.38679297089511255, "grad_norm": 0.44022423028945923, "learning_rate": 1.8230442534691363e-05, "loss": 0.4034, "step": 14087 }, { "epoch": 0.3868204283360791, "grad_norm": 0.3629774749279022, "learning_rate": 1.8230197221384657e-05, "loss": 0.4299, "step": 14088 }, { "epoch": 0.3868478857770456, "grad_norm": 0.31954848766326904, "learning_rate": 1.8229951892726e-05, "loss": 0.4094, "step": 14089 }, { "epoch": 0.3868753432180121, "grad_norm": 0.3402932584285736, "learning_rate": 1.8229706548715857e-05, "loss": 0.5206, "step": 14090 }, { "epoch": 0.3869028006589786, "grad_norm": 0.3694913387298584, "learning_rate": 1.822946118935469e-05, "loss": 0.5409, "step": 14091 }, { "epoch": 0.3869302580999451, "grad_norm": 0.40915629267692566, "learning_rate": 1.8229215814642945e-05, "loss": 0.5873, "step": 14092 }, { "epoch": 0.3869577155409116, "grad_norm": 0.3574916124343872, "learning_rate": 1.8228970424581088e-05, "loss": 0.4856, "step": 14093 }, { "epoch": 0.38698517298187807, "grad_norm": 0.42402222752571106, "learning_rate": 1.8228725019169578e-05, "loss": 0.4935, "step": 14094 }, { "epoch": 0.3870126304228446, "grad_norm": 0.34526070952415466, "learning_rate": 1.8228479598408868e-05, "loss": 0.5298, "step": 14095 }, { "epoch": 0.3870400878638111, "grad_norm": 0.41104555130004883, "learning_rate": 1.8228234162299413e-05, "loss": 0.5465, "step": 14096 }, { "epoch": 0.3870675453047776, "grad_norm": 0.3349936306476593, "learning_rate": 1.8227988710841684e-05, "loss": 0.493, "step": 14097 }, { "epoch": 0.3870950027457441, "grad_norm": 0.3800337314605713, "learning_rate": 1.822774324403612e-05, "loss": 0.5017, "step": 14098 }, { "epoch": 0.3871224601867106, "grad_norm": 0.4985032379627228, "learning_rate": 1.8227497761883195e-05, "loss": 0.4953, "step": 14099 }, { "epoch": 0.3871499176276771, "grad_norm": 8.311792373657227, "learning_rate": 1.822725226438336e-05, "loss": 0.4752, "step": 14100 }, { "epoch": 0.3871773750686436, "grad_norm": 0.35975801944732666, "learning_rate": 1.822700675153707e-05, "loss": 0.4602, "step": 14101 }, { "epoch": 0.38720483250961013, "grad_norm": 0.38360512256622314, "learning_rate": 1.822676122334479e-05, "loss": 0.4896, "step": 14102 }, { "epoch": 0.3872322899505766, "grad_norm": 0.3947080969810486, "learning_rate": 1.8226515679806976e-05, "loss": 0.6012, "step": 14103 }, { "epoch": 0.3872597473915431, "grad_norm": 0.4513695538043976, "learning_rate": 1.8226270120924084e-05, "loss": 0.5103, "step": 14104 }, { "epoch": 0.3872872048325096, "grad_norm": 0.3797403872013092, "learning_rate": 1.822602454669657e-05, "loss": 0.5841, "step": 14105 }, { "epoch": 0.3873146622734761, "grad_norm": 0.37784725427627563, "learning_rate": 1.8225778957124897e-05, "loss": 0.5177, "step": 14106 }, { "epoch": 0.3873421197144426, "grad_norm": 0.44778233766555786, "learning_rate": 1.822553335220952e-05, "loss": 0.5326, "step": 14107 }, { "epoch": 0.3873695771554091, "grad_norm": 0.588991641998291, "learning_rate": 1.8225287731950898e-05, "loss": 0.5185, "step": 14108 }, { "epoch": 0.38739703459637564, "grad_norm": 0.4092925190925598, "learning_rate": 1.822504209634949e-05, "loss": 0.5627, "step": 14109 }, { "epoch": 0.38742449203734214, "grad_norm": 0.8498042821884155, "learning_rate": 1.822479644540575e-05, "loss": 0.5154, "step": 14110 }, { "epoch": 0.38745194947830863, "grad_norm": 0.36591655015945435, "learning_rate": 1.8224550779120144e-05, "loss": 0.5236, "step": 14111 }, { "epoch": 0.3874794069192751, "grad_norm": 0.3656807541847229, "learning_rate": 1.8224305097493124e-05, "loss": 0.5845, "step": 14112 }, { "epoch": 0.3875068643602416, "grad_norm": 0.423927903175354, "learning_rate": 1.822405940052515e-05, "loss": 0.6141, "step": 14113 }, { "epoch": 0.3875343218012081, "grad_norm": 0.4219048321247101, "learning_rate": 1.822381368821668e-05, "loss": 0.5482, "step": 14114 }, { "epoch": 0.3875617792421746, "grad_norm": 0.3731614053249359, "learning_rate": 1.8223567960568175e-05, "loss": 0.4444, "step": 14115 }, { "epoch": 0.38758923668314116, "grad_norm": 0.362409770488739, "learning_rate": 1.822332221758009e-05, "loss": 0.5831, "step": 14116 }, { "epoch": 0.38761669412410765, "grad_norm": 0.3474118709564209, "learning_rate": 1.8223076459252885e-05, "loss": 0.4717, "step": 14117 }, { "epoch": 0.38764415156507415, "grad_norm": 0.44740843772888184, "learning_rate": 1.8222830685587017e-05, "loss": 0.4837, "step": 14118 }, { "epoch": 0.38767160900604064, "grad_norm": 0.3600844740867615, "learning_rate": 1.8222584896582942e-05, "loss": 0.5199, "step": 14119 }, { "epoch": 0.38769906644700713, "grad_norm": 0.37647369503974915, "learning_rate": 1.8222339092241126e-05, "loss": 0.6043, "step": 14120 }, { "epoch": 0.38772652388797363, "grad_norm": 0.5159639716148376, "learning_rate": 1.8222093272562023e-05, "loss": 0.5815, "step": 14121 }, { "epoch": 0.3877539813289401, "grad_norm": 0.35462674498558044, "learning_rate": 1.822184743754609e-05, "loss": 0.4952, "step": 14122 }, { "epoch": 0.3877814387699067, "grad_norm": 0.3894962966442108, "learning_rate": 1.8221601587193788e-05, "loss": 0.4966, "step": 14123 }, { "epoch": 0.38780889621087317, "grad_norm": 0.36336007714271545, "learning_rate": 1.8221355721505577e-05, "loss": 0.568, "step": 14124 }, { "epoch": 0.38783635365183966, "grad_norm": 0.36869803071022034, "learning_rate": 1.8221109840481912e-05, "loss": 0.556, "step": 14125 }, { "epoch": 0.38786381109280615, "grad_norm": 0.3675497770309448, "learning_rate": 1.8220863944123254e-05, "loss": 0.5077, "step": 14126 }, { "epoch": 0.38789126853377265, "grad_norm": 0.3234081268310547, "learning_rate": 1.822061803243006e-05, "loss": 0.5265, "step": 14127 }, { "epoch": 0.38791872597473914, "grad_norm": 0.3902517855167389, "learning_rate": 1.822037210540279e-05, "loss": 0.5284, "step": 14128 }, { "epoch": 0.38794618341570564, "grad_norm": 0.3330402374267578, "learning_rate": 1.8220126163041906e-05, "loss": 0.5158, "step": 14129 }, { "epoch": 0.38797364085667213, "grad_norm": 0.379728227853775, "learning_rate": 1.8219880205347858e-05, "loss": 0.4775, "step": 14130 }, { "epoch": 0.3880010982976387, "grad_norm": 0.3282727003097534, "learning_rate": 1.821963423232111e-05, "loss": 0.5201, "step": 14131 }, { "epoch": 0.3880285557386052, "grad_norm": 0.40717265009880066, "learning_rate": 1.8219388243962122e-05, "loss": 0.5493, "step": 14132 }, { "epoch": 0.38805601317957167, "grad_norm": 0.39037635922431946, "learning_rate": 1.8219142240271353e-05, "loss": 0.5555, "step": 14133 }, { "epoch": 0.38808347062053816, "grad_norm": 0.3914187252521515, "learning_rate": 1.821889622124926e-05, "loss": 0.5109, "step": 14134 }, { "epoch": 0.38811092806150466, "grad_norm": 0.36696669459342957, "learning_rate": 1.82186501868963e-05, "loss": 0.5048, "step": 14135 }, { "epoch": 0.38813838550247115, "grad_norm": 0.36073067784309387, "learning_rate": 1.8218404137212936e-05, "loss": 0.544, "step": 14136 }, { "epoch": 0.38816584294343764, "grad_norm": 0.35066846013069153, "learning_rate": 1.8218158072199624e-05, "loss": 0.4626, "step": 14137 }, { "epoch": 0.3881933003844042, "grad_norm": 0.37658631801605225, "learning_rate": 1.821791199185683e-05, "loss": 0.5338, "step": 14138 }, { "epoch": 0.3882207578253707, "grad_norm": 0.36394035816192627, "learning_rate": 1.8217665896185003e-05, "loss": 0.5462, "step": 14139 }, { "epoch": 0.3882482152663372, "grad_norm": 0.38717687129974365, "learning_rate": 1.8217419785184605e-05, "loss": 0.6125, "step": 14140 }, { "epoch": 0.3882756727073037, "grad_norm": 0.3724403381347656, "learning_rate": 1.82171736588561e-05, "loss": 0.5848, "step": 14141 }, { "epoch": 0.38830313014827017, "grad_norm": 0.38526830077171326, "learning_rate": 1.821692751719994e-05, "loss": 0.5591, "step": 14142 }, { "epoch": 0.38833058758923666, "grad_norm": 0.3456844389438629, "learning_rate": 1.821668136021659e-05, "loss": 0.5172, "step": 14143 }, { "epoch": 0.38835804503020316, "grad_norm": 0.3599540591239929, "learning_rate": 1.8216435187906504e-05, "loss": 0.4683, "step": 14144 }, { "epoch": 0.3883855024711697, "grad_norm": 0.3907967805862427, "learning_rate": 1.8216189000270143e-05, "loss": 0.5158, "step": 14145 }, { "epoch": 0.3884129599121362, "grad_norm": 0.4056791663169861, "learning_rate": 1.821594279730797e-05, "loss": 0.5442, "step": 14146 }, { "epoch": 0.3884404173531027, "grad_norm": 0.359477698802948, "learning_rate": 1.8215696579020442e-05, "loss": 0.4188, "step": 14147 }, { "epoch": 0.3884678747940692, "grad_norm": 0.3789295256137848, "learning_rate": 1.821545034540802e-05, "loss": 0.5662, "step": 14148 }, { "epoch": 0.3884953322350357, "grad_norm": 0.3442433774471283, "learning_rate": 1.8215204096471156e-05, "loss": 0.4989, "step": 14149 }, { "epoch": 0.3885227896760022, "grad_norm": 0.40632107853889465, "learning_rate": 1.8214957832210318e-05, "loss": 0.5195, "step": 14150 }, { "epoch": 0.38855024711696867, "grad_norm": 0.3592703938484192, "learning_rate": 1.821471155262596e-05, "loss": 0.5224, "step": 14151 }, { "epoch": 0.3885777045579352, "grad_norm": 0.3774462640285492, "learning_rate": 1.821446525771854e-05, "loss": 0.4692, "step": 14152 }, { "epoch": 0.3886051619989017, "grad_norm": 0.8145847916603088, "learning_rate": 1.8214218947488526e-05, "loss": 0.5129, "step": 14153 }, { "epoch": 0.3886326194398682, "grad_norm": 0.6568920612335205, "learning_rate": 1.821397262193637e-05, "loss": 0.5257, "step": 14154 }, { "epoch": 0.3886600768808347, "grad_norm": 0.3524383306503296, "learning_rate": 1.8213726281062533e-05, "loss": 0.4985, "step": 14155 }, { "epoch": 0.3886875343218012, "grad_norm": 0.3843335211277008, "learning_rate": 1.8213479924867476e-05, "loss": 0.488, "step": 14156 }, { "epoch": 0.3887149917627677, "grad_norm": 0.3518824577331543, "learning_rate": 1.8213233553351656e-05, "loss": 0.4793, "step": 14157 }, { "epoch": 0.3887424492037342, "grad_norm": 0.33451294898986816, "learning_rate": 1.8212987166515535e-05, "loss": 0.5018, "step": 14158 }, { "epoch": 0.38876990664470074, "grad_norm": 0.4087715446949005, "learning_rate": 1.8212740764359572e-05, "loss": 0.5195, "step": 14159 }, { "epoch": 0.38879736408566723, "grad_norm": 0.4526727497577667, "learning_rate": 1.8212494346884226e-05, "loss": 0.5279, "step": 14160 }, { "epoch": 0.3888248215266337, "grad_norm": 0.40942421555519104, "learning_rate": 1.8212247914089954e-05, "loss": 0.4722, "step": 14161 }, { "epoch": 0.3888522789676002, "grad_norm": 0.35815927386283875, "learning_rate": 1.8212001465977223e-05, "loss": 0.4689, "step": 14162 }, { "epoch": 0.3888797364085667, "grad_norm": 0.36330166459083557, "learning_rate": 1.8211755002546487e-05, "loss": 0.5281, "step": 14163 }, { "epoch": 0.3889071938495332, "grad_norm": 0.3240397870540619, "learning_rate": 1.8211508523798207e-05, "loss": 0.4056, "step": 14164 }, { "epoch": 0.3889346512904997, "grad_norm": 0.36651650071144104, "learning_rate": 1.821126202973284e-05, "loss": 0.5555, "step": 14165 }, { "epoch": 0.38896210873146625, "grad_norm": 0.4208066165447235, "learning_rate": 1.8211015520350856e-05, "loss": 0.5504, "step": 14166 }, { "epoch": 0.38898956617243274, "grad_norm": 0.3900386691093445, "learning_rate": 1.8210768995652702e-05, "loss": 0.5147, "step": 14167 }, { "epoch": 0.38901702361339924, "grad_norm": 0.4119853079319, "learning_rate": 1.821052245563884e-05, "loss": 0.5198, "step": 14168 }, { "epoch": 0.38904448105436573, "grad_norm": 0.31523963809013367, "learning_rate": 1.8210275900309736e-05, "loss": 0.4919, "step": 14169 }, { "epoch": 0.3890719384953322, "grad_norm": 0.3643524944782257, "learning_rate": 1.821002932966585e-05, "loss": 0.4924, "step": 14170 }, { "epoch": 0.3890993959362987, "grad_norm": 0.3701658248901367, "learning_rate": 1.8209782743707637e-05, "loss": 0.5746, "step": 14171 }, { "epoch": 0.3891268533772652, "grad_norm": 0.3582409620285034, "learning_rate": 1.820953614243556e-05, "loss": 0.4729, "step": 14172 }, { "epoch": 0.38915431081823176, "grad_norm": 0.38151058554649353, "learning_rate": 1.8209289525850073e-05, "loss": 0.5201, "step": 14173 }, { "epoch": 0.38918176825919826, "grad_norm": 0.4196951389312744, "learning_rate": 1.8209042893951647e-05, "loss": 0.6073, "step": 14174 }, { "epoch": 0.38920922570016475, "grad_norm": 0.3833915889263153, "learning_rate": 1.820879624674073e-05, "loss": 0.5417, "step": 14175 }, { "epoch": 0.38923668314113125, "grad_norm": 0.3679640591144562, "learning_rate": 1.8208549584217795e-05, "loss": 0.4772, "step": 14176 }, { "epoch": 0.38926414058209774, "grad_norm": 0.6714296936988831, "learning_rate": 1.820830290638329e-05, "loss": 0.5586, "step": 14177 }, { "epoch": 0.38929159802306423, "grad_norm": 0.3289254903793335, "learning_rate": 1.820805621323768e-05, "loss": 0.4315, "step": 14178 }, { "epoch": 0.3893190554640307, "grad_norm": 0.4000771641731262, "learning_rate": 1.820780950478143e-05, "loss": 0.4719, "step": 14179 }, { "epoch": 0.3893465129049973, "grad_norm": 0.38257360458374023, "learning_rate": 1.8207562781014997e-05, "loss": 0.4819, "step": 14180 }, { "epoch": 0.38937397034596377, "grad_norm": 0.4019245505332947, "learning_rate": 1.8207316041938835e-05, "loss": 0.4197, "step": 14181 }, { "epoch": 0.38940142778693027, "grad_norm": 0.44085603952407837, "learning_rate": 1.8207069287553407e-05, "loss": 0.6315, "step": 14182 }, { "epoch": 0.38942888522789676, "grad_norm": 0.3328128755092621, "learning_rate": 1.820682251785918e-05, "loss": 0.4699, "step": 14183 }, { "epoch": 0.38945634266886325, "grad_norm": 0.3373732268810272, "learning_rate": 1.8206575732856607e-05, "loss": 0.5531, "step": 14184 }, { "epoch": 0.38948380010982975, "grad_norm": 0.39614737033843994, "learning_rate": 1.8206328932546153e-05, "loss": 0.4916, "step": 14185 }, { "epoch": 0.38951125755079624, "grad_norm": 0.3504573702812195, "learning_rate": 1.8206082116928273e-05, "loss": 0.4965, "step": 14186 }, { "epoch": 0.3895387149917628, "grad_norm": 0.4065588414669037, "learning_rate": 1.8205835286003436e-05, "loss": 0.5832, "step": 14187 }, { "epoch": 0.3895661724327293, "grad_norm": 0.373182088136673, "learning_rate": 1.8205588439772094e-05, "loss": 0.574, "step": 14188 }, { "epoch": 0.3895936298736958, "grad_norm": 0.3479503393173218, "learning_rate": 1.820534157823471e-05, "loss": 0.4707, "step": 14189 }, { "epoch": 0.3896210873146623, "grad_norm": 0.37380242347717285, "learning_rate": 1.8205094701391747e-05, "loss": 0.4957, "step": 14190 }, { "epoch": 0.38964854475562877, "grad_norm": 0.34749823808670044, "learning_rate": 1.8204847809243664e-05, "loss": 0.4963, "step": 14191 }, { "epoch": 0.38967600219659526, "grad_norm": 0.38759130239486694, "learning_rate": 1.8204600901790922e-05, "loss": 0.4887, "step": 14192 }, { "epoch": 0.38970345963756176, "grad_norm": 0.3784238398075104, "learning_rate": 1.820435397903398e-05, "loss": 0.5379, "step": 14193 }, { "epoch": 0.3897309170785283, "grad_norm": 0.35628509521484375, "learning_rate": 1.8204107040973294e-05, "loss": 0.468, "step": 14194 }, { "epoch": 0.3897583745194948, "grad_norm": 0.3369622826576233, "learning_rate": 1.8203860087609334e-05, "loss": 0.4564, "step": 14195 }, { "epoch": 0.3897858319604613, "grad_norm": 0.3466031849384308, "learning_rate": 1.8203613118942557e-05, "loss": 0.4104, "step": 14196 }, { "epoch": 0.3898132894014278, "grad_norm": 0.341572642326355, "learning_rate": 1.8203366134973422e-05, "loss": 0.4953, "step": 14197 }, { "epoch": 0.3898407468423943, "grad_norm": 0.34590843319892883, "learning_rate": 1.8203119135702393e-05, "loss": 0.4402, "step": 14198 }, { "epoch": 0.3898682042833608, "grad_norm": 0.43109604716300964, "learning_rate": 1.8202872121129928e-05, "loss": 0.5447, "step": 14199 }, { "epoch": 0.38989566172432727, "grad_norm": 0.3146997392177582, "learning_rate": 1.8202625091256486e-05, "loss": 0.463, "step": 14200 }, { "epoch": 0.3899231191652938, "grad_norm": 0.35704031586647034, "learning_rate": 1.8202378046082533e-05, "loss": 0.4974, "step": 14201 }, { "epoch": 0.3899505766062603, "grad_norm": 0.37320858240127563, "learning_rate": 1.8202130985608525e-05, "loss": 0.5793, "step": 14202 }, { "epoch": 0.3899780340472268, "grad_norm": 0.36564984917640686, "learning_rate": 1.8201883909834927e-05, "loss": 0.5987, "step": 14203 }, { "epoch": 0.3900054914881933, "grad_norm": 0.3436274528503418, "learning_rate": 1.8201636818762197e-05, "loss": 0.5451, "step": 14204 }, { "epoch": 0.3900329489291598, "grad_norm": 0.4244338572025299, "learning_rate": 1.8201389712390797e-05, "loss": 0.6275, "step": 14205 }, { "epoch": 0.3900604063701263, "grad_norm": 0.36793252825737, "learning_rate": 1.8201142590721187e-05, "loss": 0.5598, "step": 14206 }, { "epoch": 0.3900878638110928, "grad_norm": 0.3964211642742157, "learning_rate": 1.820089545375383e-05, "loss": 0.4938, "step": 14207 }, { "epoch": 0.39011532125205933, "grad_norm": 0.41055482625961304, "learning_rate": 1.8200648301489185e-05, "loss": 0.5165, "step": 14208 }, { "epoch": 0.3901427786930258, "grad_norm": 0.4416419565677643, "learning_rate": 1.820040113392771e-05, "loss": 0.5274, "step": 14209 }, { "epoch": 0.3901702361339923, "grad_norm": 0.38212892413139343, "learning_rate": 1.8200153951069874e-05, "loss": 0.5191, "step": 14210 }, { "epoch": 0.3901976935749588, "grad_norm": 0.3509543240070343, "learning_rate": 1.8199906752916133e-05, "loss": 0.4577, "step": 14211 }, { "epoch": 0.3902251510159253, "grad_norm": 0.3406324088573456, "learning_rate": 1.8199659539466945e-05, "loss": 0.4757, "step": 14212 }, { "epoch": 0.3902526084568918, "grad_norm": 0.3732936978340149, "learning_rate": 1.8199412310722778e-05, "loss": 0.5589, "step": 14213 }, { "epoch": 0.3902800658978583, "grad_norm": 0.3693386912345886, "learning_rate": 1.819916506668409e-05, "loss": 0.5525, "step": 14214 }, { "epoch": 0.39030752333882485, "grad_norm": 0.33651870489120483, "learning_rate": 1.819891780735134e-05, "loss": 0.5016, "step": 14215 }, { "epoch": 0.39033498077979134, "grad_norm": 0.36061784625053406, "learning_rate": 1.8198670532724998e-05, "loss": 0.4646, "step": 14216 }, { "epoch": 0.39036243822075783, "grad_norm": 0.33554673194885254, "learning_rate": 1.8198423242805513e-05, "loss": 0.5279, "step": 14217 }, { "epoch": 0.39038989566172433, "grad_norm": 0.34567615389823914, "learning_rate": 1.8198175937593356e-05, "loss": 0.5831, "step": 14218 }, { "epoch": 0.3904173531026908, "grad_norm": 0.4101155698299408, "learning_rate": 1.8197928617088978e-05, "loss": 0.5026, "step": 14219 }, { "epoch": 0.3904448105436573, "grad_norm": 0.3647191822528839, "learning_rate": 1.8197681281292854e-05, "loss": 0.6059, "step": 14220 }, { "epoch": 0.3904722679846238, "grad_norm": 0.3935253322124481, "learning_rate": 1.8197433930205433e-05, "loss": 0.5325, "step": 14221 }, { "epoch": 0.39049972542559036, "grad_norm": 0.35056453943252563, "learning_rate": 1.8197186563827184e-05, "loss": 0.5034, "step": 14222 }, { "epoch": 0.39052718286655685, "grad_norm": 0.3785375654697418, "learning_rate": 1.8196939182158564e-05, "loss": 0.4699, "step": 14223 }, { "epoch": 0.39055464030752335, "grad_norm": 0.4354499876499176, "learning_rate": 1.8196691785200037e-05, "loss": 0.616, "step": 14224 }, { "epoch": 0.39058209774848984, "grad_norm": 0.34960412979125977, "learning_rate": 1.8196444372952064e-05, "loss": 0.4675, "step": 14225 }, { "epoch": 0.39060955518945634, "grad_norm": 0.3778553605079651, "learning_rate": 1.8196196945415105e-05, "loss": 0.5421, "step": 14226 }, { "epoch": 0.39063701263042283, "grad_norm": 0.396759569644928, "learning_rate": 1.8195949502589622e-05, "loss": 0.5779, "step": 14227 }, { "epoch": 0.3906644700713893, "grad_norm": 0.3644416928291321, "learning_rate": 1.819570204447608e-05, "loss": 0.497, "step": 14228 }, { "epoch": 0.3906919275123559, "grad_norm": 0.49630051851272583, "learning_rate": 1.819545457107494e-05, "loss": 0.5339, "step": 14229 }, { "epoch": 0.39071938495332237, "grad_norm": 0.3265737295150757, "learning_rate": 1.8195207082386657e-05, "loss": 0.4796, "step": 14230 }, { "epoch": 0.39074684239428886, "grad_norm": 0.3725236654281616, "learning_rate": 1.81949595784117e-05, "loss": 0.4888, "step": 14231 }, { "epoch": 0.39077429983525536, "grad_norm": 0.4259030222892761, "learning_rate": 1.8194712059150523e-05, "loss": 0.5132, "step": 14232 }, { "epoch": 0.39080175727622185, "grad_norm": 0.33178651332855225, "learning_rate": 1.8194464524603596e-05, "loss": 0.3965, "step": 14233 }, { "epoch": 0.39082921471718834, "grad_norm": 0.38900455832481384, "learning_rate": 1.819421697477138e-05, "loss": 0.5611, "step": 14234 }, { "epoch": 0.39085667215815484, "grad_norm": 0.4552185535430908, "learning_rate": 1.819396940965433e-05, "loss": 0.4966, "step": 14235 }, { "epoch": 0.3908841295991214, "grad_norm": 0.5107800960540771, "learning_rate": 1.8193721829252915e-05, "loss": 0.5086, "step": 14236 }, { "epoch": 0.3909115870400879, "grad_norm": 0.4319400489330292, "learning_rate": 1.819347423356759e-05, "loss": 0.5333, "step": 14237 }, { "epoch": 0.3909390444810544, "grad_norm": 0.337201863527298, "learning_rate": 1.8193226622598823e-05, "loss": 0.4275, "step": 14238 }, { "epoch": 0.39096650192202087, "grad_norm": 0.4444050192832947, "learning_rate": 1.8192978996347075e-05, "loss": 0.6, "step": 14239 }, { "epoch": 0.39099395936298736, "grad_norm": 0.351843923330307, "learning_rate": 1.8192731354812802e-05, "loss": 0.4171, "step": 14240 }, { "epoch": 0.39102141680395386, "grad_norm": 0.43709656596183777, "learning_rate": 1.8192483697996473e-05, "loss": 0.5885, "step": 14241 }, { "epoch": 0.39104887424492035, "grad_norm": 0.36837807297706604, "learning_rate": 1.8192236025898548e-05, "loss": 0.4684, "step": 14242 }, { "epoch": 0.3910763316858869, "grad_norm": 0.389503538608551, "learning_rate": 1.8191988338519487e-05, "loss": 0.532, "step": 14243 }, { "epoch": 0.3911037891268534, "grad_norm": 0.4001576602458954, "learning_rate": 1.8191740635859753e-05, "loss": 0.5561, "step": 14244 }, { "epoch": 0.3911312465678199, "grad_norm": 0.39867258071899414, "learning_rate": 1.8191492917919805e-05, "loss": 0.4813, "step": 14245 }, { "epoch": 0.3911587040087864, "grad_norm": 0.3352884352207184, "learning_rate": 1.8191245184700114e-05, "loss": 0.4904, "step": 14246 }, { "epoch": 0.3911861614497529, "grad_norm": 0.36604851484298706, "learning_rate": 1.8190997436201132e-05, "loss": 0.552, "step": 14247 }, { "epoch": 0.3912136188907194, "grad_norm": 0.40263667702674866, "learning_rate": 1.819074967242333e-05, "loss": 0.5472, "step": 14248 }, { "epoch": 0.39124107633168587, "grad_norm": 0.37671059370040894, "learning_rate": 1.819050189336716e-05, "loss": 0.4621, "step": 14249 }, { "epoch": 0.3912685337726524, "grad_norm": 0.4031587243080139, "learning_rate": 1.8190254099033095e-05, "loss": 0.5587, "step": 14250 }, { "epoch": 0.3912959912136189, "grad_norm": 0.3886023163795471, "learning_rate": 1.819000628942159e-05, "loss": 0.4756, "step": 14251 }, { "epoch": 0.3913234486545854, "grad_norm": 0.41531938314437866, "learning_rate": 1.8189758464533107e-05, "loss": 0.4642, "step": 14252 }, { "epoch": 0.3913509060955519, "grad_norm": 0.34660154581069946, "learning_rate": 1.818951062436811e-05, "loss": 0.5333, "step": 14253 }, { "epoch": 0.3913783635365184, "grad_norm": 0.3744528591632843, "learning_rate": 1.818926276892707e-05, "loss": 0.5384, "step": 14254 }, { "epoch": 0.3914058209774849, "grad_norm": 0.4529491066932678, "learning_rate": 1.8189014898210434e-05, "loss": 0.6175, "step": 14255 }, { "epoch": 0.3914332784184514, "grad_norm": 0.40804523229599, "learning_rate": 1.8188767012218675e-05, "loss": 0.6277, "step": 14256 }, { "epoch": 0.39146073585941793, "grad_norm": 0.40339669585227966, "learning_rate": 1.818851911095225e-05, "loss": 0.5888, "step": 14257 }, { "epoch": 0.3914881933003844, "grad_norm": 0.34468919038772583, "learning_rate": 1.8188271194411623e-05, "loss": 0.5356, "step": 14258 }, { "epoch": 0.3915156507413509, "grad_norm": 0.3805680572986603, "learning_rate": 1.8188023262597255e-05, "loss": 0.5675, "step": 14259 }, { "epoch": 0.3915431081823174, "grad_norm": 0.3082553446292877, "learning_rate": 1.8187775315509616e-05, "loss": 0.4374, "step": 14260 }, { "epoch": 0.3915705656232839, "grad_norm": 0.3613046705722809, "learning_rate": 1.818752735314916e-05, "loss": 0.5064, "step": 14261 }, { "epoch": 0.3915980230642504, "grad_norm": 0.33887192606925964, "learning_rate": 1.818727937551635e-05, "loss": 0.5095, "step": 14262 }, { "epoch": 0.3916254805052169, "grad_norm": 0.43501752614974976, "learning_rate": 1.8187031382611655e-05, "loss": 0.5368, "step": 14263 }, { "epoch": 0.3916529379461834, "grad_norm": 0.3109287619590759, "learning_rate": 1.818678337443553e-05, "loss": 0.5208, "step": 14264 }, { "epoch": 0.39168039538714994, "grad_norm": 0.34201422333717346, "learning_rate": 1.8186535350988443e-05, "loss": 0.4768, "step": 14265 }, { "epoch": 0.39170785282811643, "grad_norm": 0.355509877204895, "learning_rate": 1.8186287312270853e-05, "loss": 0.5513, "step": 14266 }, { "epoch": 0.3917353102690829, "grad_norm": 0.3723170757293701, "learning_rate": 1.8186039258283224e-05, "loss": 0.4946, "step": 14267 }, { "epoch": 0.3917627677100494, "grad_norm": 0.39129167795181274, "learning_rate": 1.8185791189026024e-05, "loss": 0.4845, "step": 14268 }, { "epoch": 0.3917902251510159, "grad_norm": 0.3755074143409729, "learning_rate": 1.8185543104499707e-05, "loss": 0.4643, "step": 14269 }, { "epoch": 0.3918176825919824, "grad_norm": 0.4093831777572632, "learning_rate": 1.818529500470474e-05, "loss": 0.6231, "step": 14270 }, { "epoch": 0.3918451400329489, "grad_norm": 0.396809458732605, "learning_rate": 1.8185046889641586e-05, "loss": 0.5317, "step": 14271 }, { "epoch": 0.39187259747391545, "grad_norm": 0.35425686836242676, "learning_rate": 1.8184798759310705e-05, "loss": 0.5118, "step": 14272 }, { "epoch": 0.39190005491488195, "grad_norm": 0.36968156695365906, "learning_rate": 1.8184550613712567e-05, "loss": 0.5982, "step": 14273 }, { "epoch": 0.39192751235584844, "grad_norm": 0.5451480150222778, "learning_rate": 1.8184302452847626e-05, "loss": 0.5875, "step": 14274 }, { "epoch": 0.39195496979681493, "grad_norm": 0.5284992456436157, "learning_rate": 1.8184054276716348e-05, "loss": 0.5075, "step": 14275 }, { "epoch": 0.39198242723778143, "grad_norm": 0.44914931058883667, "learning_rate": 1.81838060853192e-05, "loss": 0.5543, "step": 14276 }, { "epoch": 0.3920098846787479, "grad_norm": 0.4895966947078705, "learning_rate": 1.818355787865664e-05, "loss": 0.5727, "step": 14277 }, { "epoch": 0.3920373421197144, "grad_norm": 0.38273563981056213, "learning_rate": 1.8183309656729134e-05, "loss": 0.5476, "step": 14278 }, { "epoch": 0.39206479956068097, "grad_norm": 0.4048324525356293, "learning_rate": 1.818306141953714e-05, "loss": 0.5421, "step": 14279 }, { "epoch": 0.39209225700164746, "grad_norm": 0.40851259231567383, "learning_rate": 1.8182813167081125e-05, "loss": 0.5588, "step": 14280 }, { "epoch": 0.39211971444261395, "grad_norm": 0.39047983288764954, "learning_rate": 1.8182564899361558e-05, "loss": 0.529, "step": 14281 }, { "epoch": 0.39214717188358045, "grad_norm": 0.34100568294525146, "learning_rate": 1.818231661637889e-05, "loss": 0.5665, "step": 14282 }, { "epoch": 0.39217462932454694, "grad_norm": 0.3686695098876953, "learning_rate": 1.8182068318133594e-05, "loss": 0.5028, "step": 14283 }, { "epoch": 0.39220208676551344, "grad_norm": 0.3856965899467468, "learning_rate": 1.8181820004626125e-05, "loss": 0.6032, "step": 14284 }, { "epoch": 0.39222954420647993, "grad_norm": 0.38735201954841614, "learning_rate": 1.818157167585695e-05, "loss": 0.4909, "step": 14285 }, { "epoch": 0.3922570016474465, "grad_norm": 0.38801878690719604, "learning_rate": 1.8181323331826534e-05, "loss": 0.4859, "step": 14286 }, { "epoch": 0.392284459088413, "grad_norm": 0.33168232440948486, "learning_rate": 1.818107497253534e-05, "loss": 0.4551, "step": 14287 }, { "epoch": 0.39231191652937947, "grad_norm": 0.3752933740615845, "learning_rate": 1.8180826597983832e-05, "loss": 0.4787, "step": 14288 }, { "epoch": 0.39233937397034596, "grad_norm": 0.36348283290863037, "learning_rate": 1.818057820817247e-05, "loss": 0.4885, "step": 14289 }, { "epoch": 0.39236683141131246, "grad_norm": 0.41049525141716003, "learning_rate": 1.8180329803101714e-05, "loss": 0.4348, "step": 14290 }, { "epoch": 0.39239428885227895, "grad_norm": 0.3690605163574219, "learning_rate": 1.8180081382772038e-05, "loss": 0.488, "step": 14291 }, { "epoch": 0.39242174629324544, "grad_norm": 0.371503084897995, "learning_rate": 1.8179832947183896e-05, "loss": 0.5223, "step": 14292 }, { "epoch": 0.392449203734212, "grad_norm": 0.38965025544166565, "learning_rate": 1.8179584496337757e-05, "loss": 0.6257, "step": 14293 }, { "epoch": 0.3924766611751785, "grad_norm": 0.40255874395370483, "learning_rate": 1.817933603023408e-05, "loss": 0.6039, "step": 14294 }, { "epoch": 0.392504118616145, "grad_norm": 3.228938102722168, "learning_rate": 1.8179087548873333e-05, "loss": 0.6341, "step": 14295 }, { "epoch": 0.3925315760571115, "grad_norm": 0.34678196907043457, "learning_rate": 1.817883905225598e-05, "loss": 0.5029, "step": 14296 }, { "epoch": 0.39255903349807797, "grad_norm": 0.40346604585647583, "learning_rate": 1.8178590540382475e-05, "loss": 0.5979, "step": 14297 }, { "epoch": 0.39258649093904446, "grad_norm": 0.33120250701904297, "learning_rate": 1.817834201325329e-05, "loss": 0.496, "step": 14298 }, { "epoch": 0.39261394838001096, "grad_norm": 0.4096700847148895, "learning_rate": 1.817809347086889e-05, "loss": 0.5772, "step": 14299 }, { "epoch": 0.3926414058209775, "grad_norm": 0.4056386947631836, "learning_rate": 1.8177844913229735e-05, "loss": 0.5148, "step": 14300 }, { "epoch": 0.392668863261944, "grad_norm": 0.41495776176452637, "learning_rate": 1.817759634033629e-05, "loss": 0.4818, "step": 14301 }, { "epoch": 0.3926963207029105, "grad_norm": 0.3829416036605835, "learning_rate": 1.8177347752189013e-05, "loss": 0.4766, "step": 14302 }, { "epoch": 0.392723778143877, "grad_norm": 0.36880138516426086, "learning_rate": 1.8177099148788374e-05, "loss": 0.4803, "step": 14303 }, { "epoch": 0.3927512355848435, "grad_norm": 0.3927137553691864, "learning_rate": 1.817685053013484e-05, "loss": 0.461, "step": 14304 }, { "epoch": 0.39277869302581, "grad_norm": 0.3460618257522583, "learning_rate": 1.8176601896228866e-05, "loss": 0.4733, "step": 14305 }, { "epoch": 0.39280615046677647, "grad_norm": 0.3692206144332886, "learning_rate": 1.817635324707092e-05, "loss": 0.4661, "step": 14306 }, { "epoch": 0.392833607907743, "grad_norm": 0.3588316738605499, "learning_rate": 1.8176104582661467e-05, "loss": 0.5325, "step": 14307 }, { "epoch": 0.3928610653487095, "grad_norm": 0.3750323951244354, "learning_rate": 1.8175855903000968e-05, "loss": 0.5282, "step": 14308 }, { "epoch": 0.392888522789676, "grad_norm": 0.3777378499507904, "learning_rate": 1.8175607208089888e-05, "loss": 0.5075, "step": 14309 }, { "epoch": 0.3929159802306425, "grad_norm": 0.35878852009773254, "learning_rate": 1.8175358497928695e-05, "loss": 0.5083, "step": 14310 }, { "epoch": 0.392943437671609, "grad_norm": 0.4226461350917816, "learning_rate": 1.8175109772517846e-05, "loss": 0.5201, "step": 14311 }, { "epoch": 0.3929708951125755, "grad_norm": 0.3282892107963562, "learning_rate": 1.8174861031857808e-05, "loss": 0.396, "step": 14312 }, { "epoch": 0.392998352553542, "grad_norm": 0.3686719536781311, "learning_rate": 1.8174612275949046e-05, "loss": 0.4847, "step": 14313 }, { "epoch": 0.39302580999450853, "grad_norm": 1.2552671432495117, "learning_rate": 1.817436350479202e-05, "loss": 0.5559, "step": 14314 }, { "epoch": 0.39305326743547503, "grad_norm": 0.38067808747291565, "learning_rate": 1.81741147183872e-05, "loss": 0.5671, "step": 14315 }, { "epoch": 0.3930807248764415, "grad_norm": 0.4018470346927643, "learning_rate": 1.8173865916735048e-05, "loss": 0.5639, "step": 14316 }, { "epoch": 0.393108182317408, "grad_norm": 0.33744317293167114, "learning_rate": 1.8173617099836024e-05, "loss": 0.461, "step": 14317 }, { "epoch": 0.3931356397583745, "grad_norm": 0.3522387146949768, "learning_rate": 1.81733682676906e-05, "loss": 0.4593, "step": 14318 }, { "epoch": 0.393163097199341, "grad_norm": 0.33220893144607544, "learning_rate": 1.817311942029923e-05, "loss": 0.4607, "step": 14319 }, { "epoch": 0.3931905546403075, "grad_norm": 0.3800939917564392, "learning_rate": 1.8172870557662387e-05, "loss": 0.5297, "step": 14320 }, { "epoch": 0.39321801208127405, "grad_norm": 0.3762393295764923, "learning_rate": 1.8172621679780532e-05, "loss": 0.5406, "step": 14321 }, { "epoch": 0.39324546952224054, "grad_norm": 0.7611905336380005, "learning_rate": 1.817237278665413e-05, "loss": 0.4957, "step": 14322 }, { "epoch": 0.39327292696320704, "grad_norm": 0.36765310168266296, "learning_rate": 1.8172123878283643e-05, "loss": 0.5097, "step": 14323 }, { "epoch": 0.39330038440417353, "grad_norm": 0.3736571669578552, "learning_rate": 1.817187495466954e-05, "loss": 0.517, "step": 14324 }, { "epoch": 0.39332784184514, "grad_norm": 0.40405210852622986, "learning_rate": 1.8171626015812277e-05, "loss": 0.513, "step": 14325 }, { "epoch": 0.3933552992861065, "grad_norm": 0.40092921257019043, "learning_rate": 1.8171377061712324e-05, "loss": 0.5402, "step": 14326 }, { "epoch": 0.393382756727073, "grad_norm": 0.3487306833267212, "learning_rate": 1.817112809237015e-05, "loss": 0.4908, "step": 14327 }, { "epoch": 0.39341021416803956, "grad_norm": 0.4664003551006317, "learning_rate": 1.8170879107786208e-05, "loss": 0.5174, "step": 14328 }, { "epoch": 0.39343767160900606, "grad_norm": 0.3467338979244232, "learning_rate": 1.817063010796097e-05, "loss": 0.4567, "step": 14329 }, { "epoch": 0.39346512904997255, "grad_norm": 0.36842775344848633, "learning_rate": 1.8170381092894902e-05, "loss": 0.5144, "step": 14330 }, { "epoch": 0.39349258649093904, "grad_norm": 0.36066094040870667, "learning_rate": 1.8170132062588462e-05, "loss": 0.4883, "step": 14331 }, { "epoch": 0.39352004393190554, "grad_norm": 0.528595507144928, "learning_rate": 1.8169883017042124e-05, "loss": 0.6018, "step": 14332 }, { "epoch": 0.39354750137287203, "grad_norm": 0.3317814767360687, "learning_rate": 1.816963395625634e-05, "loss": 0.5174, "step": 14333 }, { "epoch": 0.3935749588138385, "grad_norm": 0.3409140706062317, "learning_rate": 1.8169384880231586e-05, "loss": 0.4609, "step": 14334 }, { "epoch": 0.3936024162548051, "grad_norm": 0.44564637541770935, "learning_rate": 1.8169135788968318e-05, "loss": 0.6392, "step": 14335 }, { "epoch": 0.39362987369577157, "grad_norm": 0.3388068675994873, "learning_rate": 1.8168886682467005e-05, "loss": 0.473, "step": 14336 }, { "epoch": 0.39365733113673806, "grad_norm": 0.38963058590888977, "learning_rate": 1.8168637560728112e-05, "loss": 0.5162, "step": 14337 }, { "epoch": 0.39368478857770456, "grad_norm": 0.4068855345249176, "learning_rate": 1.8168388423752102e-05, "loss": 0.5184, "step": 14338 }, { "epoch": 0.39371224601867105, "grad_norm": 0.3677029311656952, "learning_rate": 1.8168139271539444e-05, "loss": 0.5206, "step": 14339 }, { "epoch": 0.39373970345963755, "grad_norm": 0.3854289948940277, "learning_rate": 1.8167890104090597e-05, "loss": 0.5086, "step": 14340 }, { "epoch": 0.39376716090060404, "grad_norm": 0.41467520594596863, "learning_rate": 1.8167640921406026e-05, "loss": 0.6086, "step": 14341 }, { "epoch": 0.3937946183415706, "grad_norm": 0.3660565912723541, "learning_rate": 1.81673917234862e-05, "loss": 0.4695, "step": 14342 }, { "epoch": 0.3938220757825371, "grad_norm": 0.3693685531616211, "learning_rate": 1.816714251033158e-05, "loss": 0.4544, "step": 14343 }, { "epoch": 0.3938495332235036, "grad_norm": 0.3218265771865845, "learning_rate": 1.8166893281942636e-05, "loss": 0.5674, "step": 14344 }, { "epoch": 0.3938769906644701, "grad_norm": 0.3856382668018341, "learning_rate": 1.816664403831983e-05, "loss": 0.5532, "step": 14345 }, { "epoch": 0.39390444810543657, "grad_norm": 0.3584349751472473, "learning_rate": 1.816639477946362e-05, "loss": 0.5516, "step": 14346 }, { "epoch": 0.39393190554640306, "grad_norm": 0.3933478593826294, "learning_rate": 1.816614550537448e-05, "loss": 0.4937, "step": 14347 }, { "epoch": 0.39395936298736955, "grad_norm": 0.4056166708469391, "learning_rate": 1.8165896216052874e-05, "loss": 0.5102, "step": 14348 }, { "epoch": 0.3939868204283361, "grad_norm": 0.40892094373703003, "learning_rate": 1.8165646911499266e-05, "loss": 0.5459, "step": 14349 }, { "epoch": 0.3940142778693026, "grad_norm": 0.3326300084590912, "learning_rate": 1.816539759171412e-05, "loss": 0.4805, "step": 14350 }, { "epoch": 0.3940417353102691, "grad_norm": 0.3471738398075104, "learning_rate": 1.81651482566979e-05, "loss": 0.5342, "step": 14351 }, { "epoch": 0.3940691927512356, "grad_norm": 0.3591236472129822, "learning_rate": 1.8164898906451073e-05, "loss": 0.5799, "step": 14352 }, { "epoch": 0.3940966501922021, "grad_norm": 0.7422052025794983, "learning_rate": 1.8164649540974105e-05, "loss": 0.6046, "step": 14353 }, { "epoch": 0.3941241076331686, "grad_norm": 0.33611729741096497, "learning_rate": 1.8164400160267458e-05, "loss": 0.4617, "step": 14354 }, { "epoch": 0.39415156507413507, "grad_norm": 0.359722375869751, "learning_rate": 1.8164150764331596e-05, "loss": 0.5793, "step": 14355 }, { "epoch": 0.3941790225151016, "grad_norm": 0.35249119997024536, "learning_rate": 1.816390135316699e-05, "loss": 0.4781, "step": 14356 }, { "epoch": 0.3942064799560681, "grad_norm": 0.3764329254627228, "learning_rate": 1.8163651926774106e-05, "loss": 0.453, "step": 14357 }, { "epoch": 0.3942339373970346, "grad_norm": 0.36782407760620117, "learning_rate": 1.8163402485153403e-05, "loss": 0.5797, "step": 14358 }, { "epoch": 0.3942613948380011, "grad_norm": 0.3680437505245209, "learning_rate": 1.8163153028305348e-05, "loss": 0.4941, "step": 14359 }, { "epoch": 0.3942888522789676, "grad_norm": 0.34286239743232727, "learning_rate": 1.8162903556230406e-05, "loss": 0.4518, "step": 14360 }, { "epoch": 0.3943163097199341, "grad_norm": 0.35229504108428955, "learning_rate": 1.8162654068929043e-05, "loss": 0.6032, "step": 14361 }, { "epoch": 0.3943437671609006, "grad_norm": 0.37516653537750244, "learning_rate": 1.816240456640173e-05, "loss": 0.5732, "step": 14362 }, { "epoch": 0.39437122460186713, "grad_norm": 0.3398737907409668, "learning_rate": 1.8162155048648925e-05, "loss": 0.5044, "step": 14363 }, { "epoch": 0.3943986820428336, "grad_norm": 0.378580242395401, "learning_rate": 1.8161905515671094e-05, "loss": 0.497, "step": 14364 }, { "epoch": 0.3944261394838001, "grad_norm": 0.37471821904182434, "learning_rate": 1.8161655967468706e-05, "loss": 0.5287, "step": 14365 }, { "epoch": 0.3944535969247666, "grad_norm": 0.43900078535079956, "learning_rate": 1.8161406404042226e-05, "loss": 0.5746, "step": 14366 }, { "epoch": 0.3944810543657331, "grad_norm": 0.336305171251297, "learning_rate": 1.8161156825392114e-05, "loss": 0.5473, "step": 14367 }, { "epoch": 0.3945085118066996, "grad_norm": 0.3502452075481415, "learning_rate": 1.816090723151884e-05, "loss": 0.4754, "step": 14368 }, { "epoch": 0.3945359692476661, "grad_norm": 0.4074419438838959, "learning_rate": 1.8160657622422872e-05, "loss": 0.5292, "step": 14369 }, { "epoch": 0.39456342668863265, "grad_norm": 0.376528263092041, "learning_rate": 1.8160407998104674e-05, "loss": 0.5427, "step": 14370 }, { "epoch": 0.39459088412959914, "grad_norm": 0.32999691367149353, "learning_rate": 1.816015835856471e-05, "loss": 0.4294, "step": 14371 }, { "epoch": 0.39461834157056563, "grad_norm": 0.37979549169540405, "learning_rate": 1.8159908703803447e-05, "loss": 0.5248, "step": 14372 }, { "epoch": 0.39464579901153213, "grad_norm": 0.37842562794685364, "learning_rate": 1.8159659033821344e-05, "loss": 0.508, "step": 14373 }, { "epoch": 0.3946732564524986, "grad_norm": 0.39701932668685913, "learning_rate": 1.815940934861888e-05, "loss": 0.6142, "step": 14374 }, { "epoch": 0.3947007138934651, "grad_norm": 0.3511054515838623, "learning_rate": 1.8159159648196508e-05, "loss": 0.5042, "step": 14375 }, { "epoch": 0.3947281713344316, "grad_norm": 0.48574700951576233, "learning_rate": 1.8158909932554704e-05, "loss": 0.5188, "step": 14376 }, { "epoch": 0.39475562877539816, "grad_norm": 0.3460776209831238, "learning_rate": 1.8158660201693924e-05, "loss": 0.4909, "step": 14377 }, { "epoch": 0.39478308621636465, "grad_norm": 0.3414953351020813, "learning_rate": 1.815841045561464e-05, "loss": 0.4974, "step": 14378 }, { "epoch": 0.39481054365733115, "grad_norm": 0.3545130491256714, "learning_rate": 1.815816069431732e-05, "loss": 0.5413, "step": 14379 }, { "epoch": 0.39483800109829764, "grad_norm": 0.3637526333332062, "learning_rate": 1.8157910917802423e-05, "loss": 0.4043, "step": 14380 }, { "epoch": 0.39486545853926414, "grad_norm": 0.4308595359325409, "learning_rate": 1.815766112607042e-05, "loss": 0.5694, "step": 14381 }, { "epoch": 0.39489291598023063, "grad_norm": 0.3780059218406677, "learning_rate": 1.8157411319121774e-05, "loss": 0.5452, "step": 14382 }, { "epoch": 0.3949203734211971, "grad_norm": 0.3979055881500244, "learning_rate": 1.8157161496956954e-05, "loss": 0.5977, "step": 14383 }, { "epoch": 0.3949478308621637, "grad_norm": 0.3748335540294647, "learning_rate": 1.8156911659576424e-05, "loss": 0.4832, "step": 14384 }, { "epoch": 0.39497528830313017, "grad_norm": 0.3996511697769165, "learning_rate": 1.8156661806980647e-05, "loss": 0.5695, "step": 14385 }, { "epoch": 0.39500274574409666, "grad_norm": 0.3419221341609955, "learning_rate": 1.81564119391701e-05, "loss": 0.3724, "step": 14386 }, { "epoch": 0.39503020318506316, "grad_norm": 0.3960464894771576, "learning_rate": 1.8156162056145237e-05, "loss": 0.5139, "step": 14387 }, { "epoch": 0.39505766062602965, "grad_norm": 0.3650602400302887, "learning_rate": 1.8155912157906528e-05, "loss": 0.4956, "step": 14388 }, { "epoch": 0.39508511806699614, "grad_norm": 0.3952792286872864, "learning_rate": 1.8155662244454443e-05, "loss": 0.5511, "step": 14389 }, { "epoch": 0.39511257550796264, "grad_norm": 0.4352703094482422, "learning_rate": 1.815541231578944e-05, "loss": 0.5206, "step": 14390 }, { "epoch": 0.3951400329489292, "grad_norm": 0.9478187561035156, "learning_rate": 1.8155162371911992e-05, "loss": 0.4569, "step": 14391 }, { "epoch": 0.3951674903898957, "grad_norm": 0.4123595654964447, "learning_rate": 1.815491241282256e-05, "loss": 0.5227, "step": 14392 }, { "epoch": 0.3951949478308622, "grad_norm": 0.44079041481018066, "learning_rate": 1.8154662438521622e-05, "loss": 0.4743, "step": 14393 }, { "epoch": 0.39522240527182867, "grad_norm": 0.488781213760376, "learning_rate": 1.815441244900963e-05, "loss": 0.6081, "step": 14394 }, { "epoch": 0.39524986271279516, "grad_norm": 0.3972320556640625, "learning_rate": 1.815416244428706e-05, "loss": 0.4727, "step": 14395 }, { "epoch": 0.39527732015376166, "grad_norm": 0.4054119884967804, "learning_rate": 1.815391242435437e-05, "loss": 0.4616, "step": 14396 }, { "epoch": 0.39530477759472815, "grad_norm": 0.36461690068244934, "learning_rate": 1.8153662389212034e-05, "loss": 0.5851, "step": 14397 }, { "epoch": 0.39533223503569465, "grad_norm": 0.3512401282787323, "learning_rate": 1.8153412338860515e-05, "loss": 0.4927, "step": 14398 }, { "epoch": 0.3953596924766612, "grad_norm": 0.3708093762397766, "learning_rate": 1.8153162273300277e-05, "loss": 0.5878, "step": 14399 }, { "epoch": 0.3953871499176277, "grad_norm": 0.3932078182697296, "learning_rate": 1.815291219253179e-05, "loss": 0.4735, "step": 14400 }, { "epoch": 0.3954146073585942, "grad_norm": 0.34062668681144714, "learning_rate": 1.815266209655552e-05, "loss": 0.5021, "step": 14401 }, { "epoch": 0.3954420647995607, "grad_norm": 0.36861753463745117, "learning_rate": 1.815241198537194e-05, "loss": 0.5499, "step": 14402 }, { "epoch": 0.39546952224052717, "grad_norm": 0.3486413359642029, "learning_rate": 1.81521618589815e-05, "loss": 0.5062, "step": 14403 }, { "epoch": 0.39549697968149367, "grad_norm": 0.3656655251979828, "learning_rate": 1.815191171738468e-05, "loss": 0.5098, "step": 14404 }, { "epoch": 0.39552443712246016, "grad_norm": 0.3634564280509949, "learning_rate": 1.8151661560581944e-05, "loss": 0.5226, "step": 14405 }, { "epoch": 0.3955518945634267, "grad_norm": 0.47357064485549927, "learning_rate": 1.8151411388573756e-05, "loss": 0.5656, "step": 14406 }, { "epoch": 0.3955793520043932, "grad_norm": 0.4105850160121918, "learning_rate": 1.8151161201360582e-05, "loss": 0.5759, "step": 14407 }, { "epoch": 0.3956068094453597, "grad_norm": 0.3494517505168915, "learning_rate": 1.8150910998942895e-05, "loss": 0.5045, "step": 14408 }, { "epoch": 0.3956342668863262, "grad_norm": 0.4125193655490875, "learning_rate": 1.8150660781321157e-05, "loss": 0.513, "step": 14409 }, { "epoch": 0.3956617243272927, "grad_norm": 0.3544021248817444, "learning_rate": 1.815041054849583e-05, "loss": 0.4412, "step": 14410 }, { "epoch": 0.3956891817682592, "grad_norm": 0.4214726686477661, "learning_rate": 1.8150160300467393e-05, "loss": 0.6312, "step": 14411 }, { "epoch": 0.3957166392092257, "grad_norm": 0.3551655411720276, "learning_rate": 1.8149910037236305e-05, "loss": 0.5684, "step": 14412 }, { "epoch": 0.3957440966501922, "grad_norm": 0.33052247762680054, "learning_rate": 1.814965975880303e-05, "loss": 0.4319, "step": 14413 }, { "epoch": 0.3957715540911587, "grad_norm": 0.37991657853126526, "learning_rate": 1.814940946516804e-05, "loss": 0.4395, "step": 14414 }, { "epoch": 0.3957990115321252, "grad_norm": 0.3741739094257355, "learning_rate": 1.8149159156331803e-05, "loss": 0.5446, "step": 14415 }, { "epoch": 0.3958264689730917, "grad_norm": 0.3910101652145386, "learning_rate": 1.8148908832294777e-05, "loss": 0.5051, "step": 14416 }, { "epoch": 0.3958539264140582, "grad_norm": 0.35212814807891846, "learning_rate": 1.8148658493057438e-05, "loss": 0.5077, "step": 14417 }, { "epoch": 0.3958813838550247, "grad_norm": 0.38807716965675354, "learning_rate": 1.814840813862025e-05, "loss": 0.5274, "step": 14418 }, { "epoch": 0.3959088412959912, "grad_norm": 0.35686561465263367, "learning_rate": 1.814815776898368e-05, "loss": 0.4945, "step": 14419 }, { "epoch": 0.39593629873695774, "grad_norm": 0.3364298641681671, "learning_rate": 1.81479073841482e-05, "loss": 0.5735, "step": 14420 }, { "epoch": 0.39596375617792423, "grad_norm": 0.3943302035331726, "learning_rate": 1.8147656984114268e-05, "loss": 0.5547, "step": 14421 }, { "epoch": 0.3959912136188907, "grad_norm": 0.3697696626186371, "learning_rate": 1.8147406568882355e-05, "loss": 0.5593, "step": 14422 }, { "epoch": 0.3960186710598572, "grad_norm": 0.48685356974601746, "learning_rate": 1.814715613845293e-05, "loss": 0.4469, "step": 14423 }, { "epoch": 0.3960461285008237, "grad_norm": 3.049593687057495, "learning_rate": 1.8146905692826457e-05, "loss": 0.6475, "step": 14424 }, { "epoch": 0.3960735859417902, "grad_norm": 0.3993189334869385, "learning_rate": 1.814665523200341e-05, "loss": 0.5517, "step": 14425 }, { "epoch": 0.3961010433827567, "grad_norm": 0.39891138672828674, "learning_rate": 1.814640475598424e-05, "loss": 0.4937, "step": 14426 }, { "epoch": 0.39612850082372325, "grad_norm": 0.3261817395687103, "learning_rate": 1.8146154264769435e-05, "loss": 0.478, "step": 14427 }, { "epoch": 0.39615595826468974, "grad_norm": 0.3889504671096802, "learning_rate": 1.8145903758359447e-05, "loss": 0.5594, "step": 14428 }, { "epoch": 0.39618341570565624, "grad_norm": 0.4257969856262207, "learning_rate": 1.814565323675475e-05, "loss": 0.499, "step": 14429 }, { "epoch": 0.39621087314662273, "grad_norm": 0.38312384486198425, "learning_rate": 1.8145402699955813e-05, "loss": 0.4914, "step": 14430 }, { "epoch": 0.3962383305875892, "grad_norm": 0.3869914710521698, "learning_rate": 1.8145152147963096e-05, "loss": 0.5198, "step": 14431 }, { "epoch": 0.3962657880285557, "grad_norm": 0.4019356071949005, "learning_rate": 1.8144901580777074e-05, "loss": 0.5216, "step": 14432 }, { "epoch": 0.3962932454695222, "grad_norm": 0.3708321750164032, "learning_rate": 1.814465099839821e-05, "loss": 0.5015, "step": 14433 }, { "epoch": 0.39632070291048876, "grad_norm": 0.3907548785209656, "learning_rate": 1.814440040082697e-05, "loss": 0.4888, "step": 14434 }, { "epoch": 0.39634816035145526, "grad_norm": 0.3585502803325653, "learning_rate": 1.8144149788063827e-05, "loss": 0.4868, "step": 14435 }, { "epoch": 0.39637561779242175, "grad_norm": 0.34310612082481384, "learning_rate": 1.814389916010924e-05, "loss": 0.4488, "step": 14436 }, { "epoch": 0.39640307523338825, "grad_norm": 0.38141995668411255, "learning_rate": 1.8143648516963684e-05, "loss": 0.5288, "step": 14437 }, { "epoch": 0.39643053267435474, "grad_norm": 0.3732544183731079, "learning_rate": 1.814339785862763e-05, "loss": 0.5142, "step": 14438 }, { "epoch": 0.39645799011532123, "grad_norm": 0.3870595693588257, "learning_rate": 1.814314718510153e-05, "loss": 0.5375, "step": 14439 }, { "epoch": 0.39648544755628773, "grad_norm": 0.3490780293941498, "learning_rate": 1.8142896496385872e-05, "loss": 0.5515, "step": 14440 }, { "epoch": 0.3965129049972543, "grad_norm": 0.38211989402770996, "learning_rate": 1.814264579248111e-05, "loss": 0.5556, "step": 14441 }, { "epoch": 0.3965403624382208, "grad_norm": 0.3723224103450775, "learning_rate": 1.8142395073387714e-05, "loss": 0.5053, "step": 14442 }, { "epoch": 0.39656781987918727, "grad_norm": 0.37324294447898865, "learning_rate": 1.8142144339106148e-05, "loss": 0.5669, "step": 14443 }, { "epoch": 0.39659527732015376, "grad_norm": 0.4912918210029602, "learning_rate": 1.814189358963689e-05, "loss": 0.597, "step": 14444 }, { "epoch": 0.39662273476112025, "grad_norm": 0.34916189312934875, "learning_rate": 1.8141642824980398e-05, "loss": 0.4783, "step": 14445 }, { "epoch": 0.39665019220208675, "grad_norm": 0.4050712287425995, "learning_rate": 1.8141392045137146e-05, "loss": 0.6448, "step": 14446 }, { "epoch": 0.39667764964305324, "grad_norm": 0.37061649560928345, "learning_rate": 1.81411412501076e-05, "loss": 0.5585, "step": 14447 }, { "epoch": 0.3967051070840198, "grad_norm": 0.3359757959842682, "learning_rate": 1.8140890439892226e-05, "loss": 0.4351, "step": 14448 }, { "epoch": 0.3967325645249863, "grad_norm": 0.4002884328365326, "learning_rate": 1.8140639614491494e-05, "loss": 0.5637, "step": 14449 }, { "epoch": 0.3967600219659528, "grad_norm": 0.36259621381759644, "learning_rate": 1.814038877390587e-05, "loss": 0.5238, "step": 14450 }, { "epoch": 0.3967874794069193, "grad_norm": 0.33976393938064575, "learning_rate": 1.814013791813582e-05, "loss": 0.4088, "step": 14451 }, { "epoch": 0.39681493684788577, "grad_norm": 0.38816940784454346, "learning_rate": 1.813988704718182e-05, "loss": 0.5231, "step": 14452 }, { "epoch": 0.39684239428885226, "grad_norm": 0.3644917607307434, "learning_rate": 1.8139636161044328e-05, "loss": 0.5434, "step": 14453 }, { "epoch": 0.39686985172981876, "grad_norm": 0.3634960949420929, "learning_rate": 1.8139385259723822e-05, "loss": 0.4427, "step": 14454 }, { "epoch": 0.3968973091707853, "grad_norm": 0.3813188672065735, "learning_rate": 1.813913434322076e-05, "loss": 0.5732, "step": 14455 }, { "epoch": 0.3969247666117518, "grad_norm": 0.43568918108940125, "learning_rate": 1.8138883411535616e-05, "loss": 0.5248, "step": 14456 }, { "epoch": 0.3969522240527183, "grad_norm": 0.37514665722846985, "learning_rate": 1.8138632464668858e-05, "loss": 0.5785, "step": 14457 }, { "epoch": 0.3969796814936848, "grad_norm": 0.3837616443634033, "learning_rate": 1.813838150262095e-05, "loss": 0.5016, "step": 14458 }, { "epoch": 0.3970071389346513, "grad_norm": 0.3434537351131439, "learning_rate": 1.8138130525392366e-05, "loss": 0.5079, "step": 14459 }, { "epoch": 0.3970345963756178, "grad_norm": 0.35390210151672363, "learning_rate": 1.813787953298357e-05, "loss": 0.4906, "step": 14460 }, { "epoch": 0.39706205381658427, "grad_norm": 0.3842248022556305, "learning_rate": 1.8137628525395032e-05, "loss": 0.4683, "step": 14461 }, { "epoch": 0.3970895112575508, "grad_norm": 0.3654112219810486, "learning_rate": 1.813737750262722e-05, "loss": 0.5776, "step": 14462 }, { "epoch": 0.3971169686985173, "grad_norm": 0.3322075307369232, "learning_rate": 1.81371264646806e-05, "loss": 0.469, "step": 14463 }, { "epoch": 0.3971444261394838, "grad_norm": 0.36738666892051697, "learning_rate": 1.8136875411555644e-05, "loss": 0.515, "step": 14464 }, { "epoch": 0.3971718835804503, "grad_norm": 0.34361276030540466, "learning_rate": 1.8136624343252815e-05, "loss": 0.4605, "step": 14465 }, { "epoch": 0.3971993410214168, "grad_norm": 0.3995765149593353, "learning_rate": 1.8136373259772588e-05, "loss": 0.5117, "step": 14466 }, { "epoch": 0.3972267984623833, "grad_norm": 0.4049709141254425, "learning_rate": 1.8136122161115425e-05, "loss": 0.5081, "step": 14467 }, { "epoch": 0.3972542559033498, "grad_norm": 0.3173080086708069, "learning_rate": 1.81358710472818e-05, "loss": 0.5112, "step": 14468 }, { "epoch": 0.39728171334431633, "grad_norm": 0.36215972900390625, "learning_rate": 1.8135619918272184e-05, "loss": 0.5326, "step": 14469 }, { "epoch": 0.39730917078528283, "grad_norm": 0.3517303466796875, "learning_rate": 1.813536877408703e-05, "loss": 0.5509, "step": 14470 }, { "epoch": 0.3973366282262493, "grad_norm": 0.45490527153015137, "learning_rate": 1.813511761472682e-05, "loss": 0.5419, "step": 14471 }, { "epoch": 0.3973640856672158, "grad_norm": 0.41371825337409973, "learning_rate": 1.8134866440192023e-05, "loss": 0.5035, "step": 14472 }, { "epoch": 0.3973915431081823, "grad_norm": 0.4454978108406067, "learning_rate": 1.81346152504831e-05, "loss": 0.5101, "step": 14473 }, { "epoch": 0.3974190005491488, "grad_norm": 1.0419477224349976, "learning_rate": 1.8134364045600525e-05, "loss": 0.5199, "step": 14474 }, { "epoch": 0.3974464579901153, "grad_norm": 0.368676096200943, "learning_rate": 1.8134112825544768e-05, "loss": 0.5482, "step": 14475 }, { "epoch": 0.39747391543108185, "grad_norm": 0.3917219340801239, "learning_rate": 1.8133861590316288e-05, "loss": 0.5852, "step": 14476 }, { "epoch": 0.39750137287204834, "grad_norm": 0.4231390357017517, "learning_rate": 1.8133610339915565e-05, "loss": 0.5029, "step": 14477 }, { "epoch": 0.39752883031301484, "grad_norm": 0.374780535697937, "learning_rate": 1.813335907434306e-05, "loss": 0.4725, "step": 14478 }, { "epoch": 0.39755628775398133, "grad_norm": 0.3917389512062073, "learning_rate": 1.8133107793599246e-05, "loss": 0.5371, "step": 14479 }, { "epoch": 0.3975837451949478, "grad_norm": 0.345502108335495, "learning_rate": 1.8132856497684586e-05, "loss": 0.4588, "step": 14480 }, { "epoch": 0.3976112026359143, "grad_norm": 0.4269584119319916, "learning_rate": 1.813260518659956e-05, "loss": 0.3843, "step": 14481 }, { "epoch": 0.3976386600768808, "grad_norm": 0.35465359687805176, "learning_rate": 1.8132353860344623e-05, "loss": 0.5306, "step": 14482 }, { "epoch": 0.39766611751784736, "grad_norm": 0.38518092036247253, "learning_rate": 1.8132102518920255e-05, "loss": 0.5243, "step": 14483 }, { "epoch": 0.39769357495881386, "grad_norm": 0.3390117585659027, "learning_rate": 1.813185116232692e-05, "loss": 0.5051, "step": 14484 }, { "epoch": 0.39772103239978035, "grad_norm": 0.35954269766807556, "learning_rate": 1.8131599790565087e-05, "loss": 0.491, "step": 14485 }, { "epoch": 0.39774848984074684, "grad_norm": 0.3492797315120697, "learning_rate": 1.8131348403635223e-05, "loss": 0.5076, "step": 14486 }, { "epoch": 0.39777594728171334, "grad_norm": 0.424371600151062, "learning_rate": 1.81310970015378e-05, "loss": 0.5436, "step": 14487 }, { "epoch": 0.39780340472267983, "grad_norm": 0.3581925332546234, "learning_rate": 1.8130845584273287e-05, "loss": 0.4728, "step": 14488 }, { "epoch": 0.3978308621636463, "grad_norm": 0.3906252384185791, "learning_rate": 1.813059415184215e-05, "loss": 0.5833, "step": 14489 }, { "epoch": 0.3978583196046129, "grad_norm": 0.3868602216243744, "learning_rate": 1.8130342704244863e-05, "loss": 0.5131, "step": 14490 }, { "epoch": 0.39788577704557937, "grad_norm": 0.4076305627822876, "learning_rate": 1.813009124148189e-05, "loss": 0.5598, "step": 14491 }, { "epoch": 0.39791323448654586, "grad_norm": 0.40997737646102905, "learning_rate": 1.81298397635537e-05, "loss": 0.5484, "step": 14492 }, { "epoch": 0.39794069192751236, "grad_norm": 0.40395835041999817, "learning_rate": 1.8129588270460768e-05, "loss": 0.4636, "step": 14493 }, { "epoch": 0.39796814936847885, "grad_norm": 0.4241991639137268, "learning_rate": 1.8129336762203554e-05, "loss": 0.5185, "step": 14494 }, { "epoch": 0.39799560680944535, "grad_norm": 0.37198787927627563, "learning_rate": 1.812908523878254e-05, "loss": 0.5204, "step": 14495 }, { "epoch": 0.39802306425041184, "grad_norm": 0.3696286678314209, "learning_rate": 1.8128833700198182e-05, "loss": 0.5049, "step": 14496 }, { "epoch": 0.3980505216913784, "grad_norm": 0.39488378167152405, "learning_rate": 1.8128582146450955e-05, "loss": 0.5172, "step": 14497 }, { "epoch": 0.3980779791323449, "grad_norm": 0.48609480261802673, "learning_rate": 1.812833057754133e-05, "loss": 0.6212, "step": 14498 }, { "epoch": 0.3981054365733114, "grad_norm": 0.3461854159832001, "learning_rate": 1.8128078993469772e-05, "loss": 0.4556, "step": 14499 }, { "epoch": 0.39813289401427787, "grad_norm": 0.5373040437698364, "learning_rate": 1.8127827394236754e-05, "loss": 0.6253, "step": 14500 }, { "epoch": 0.39816035145524437, "grad_norm": 0.3424187898635864, "learning_rate": 1.8127575779842744e-05, "loss": 0.4815, "step": 14501 }, { "epoch": 0.39818780889621086, "grad_norm": 0.3860345482826233, "learning_rate": 1.812732415028821e-05, "loss": 0.5505, "step": 14502 }, { "epoch": 0.39821526633717735, "grad_norm": 0.387165904045105, "learning_rate": 1.8127072505573622e-05, "loss": 0.4657, "step": 14503 }, { "epoch": 0.3982427237781439, "grad_norm": 0.43072494864463806, "learning_rate": 1.8126820845699452e-05, "loss": 0.5798, "step": 14504 }, { "epoch": 0.3982701812191104, "grad_norm": 0.4204075336456299, "learning_rate": 1.8126569170666167e-05, "loss": 0.5313, "step": 14505 }, { "epoch": 0.3982976386600769, "grad_norm": 0.40721285343170166, "learning_rate": 1.8126317480474235e-05, "loss": 0.5391, "step": 14506 }, { "epoch": 0.3983250961010434, "grad_norm": 0.3725564777851105, "learning_rate": 1.812606577512413e-05, "loss": 0.515, "step": 14507 }, { "epoch": 0.3983525535420099, "grad_norm": 0.3631134331226349, "learning_rate": 1.812581405461632e-05, "loss": 0.4796, "step": 14508 }, { "epoch": 0.3983800109829764, "grad_norm": 0.35960710048675537, "learning_rate": 1.8125562318951267e-05, "loss": 0.4827, "step": 14509 }, { "epoch": 0.39840746842394287, "grad_norm": 0.3299707770347595, "learning_rate": 1.812531056812945e-05, "loss": 0.4821, "step": 14510 }, { "epoch": 0.3984349258649094, "grad_norm": 0.3916800320148468, "learning_rate": 1.8125058802151337e-05, "loss": 0.5145, "step": 14511 }, { "epoch": 0.3984623833058759, "grad_norm": 0.37003663182258606, "learning_rate": 1.8124807021017393e-05, "loss": 0.5541, "step": 14512 }, { "epoch": 0.3984898407468424, "grad_norm": 0.35234832763671875, "learning_rate": 1.8124555224728096e-05, "loss": 0.5012, "step": 14513 }, { "epoch": 0.3985172981878089, "grad_norm": 0.3942378759384155, "learning_rate": 1.8124303413283906e-05, "loss": 0.4889, "step": 14514 }, { "epoch": 0.3985447556287754, "grad_norm": 0.36064383387565613, "learning_rate": 1.81240515866853e-05, "loss": 0.597, "step": 14515 }, { "epoch": 0.3985722130697419, "grad_norm": 0.37178730964660645, "learning_rate": 1.8123799744932742e-05, "loss": 0.5357, "step": 14516 }, { "epoch": 0.3985996705107084, "grad_norm": 0.33967429399490356, "learning_rate": 1.8123547888026705e-05, "loss": 0.5376, "step": 14517 }, { "epoch": 0.39862712795167493, "grad_norm": 0.37728002667427063, "learning_rate": 1.812329601596766e-05, "loss": 0.5087, "step": 14518 }, { "epoch": 0.3986545853926414, "grad_norm": 0.36005493998527527, "learning_rate": 1.8123044128756072e-05, "loss": 0.5387, "step": 14519 }, { "epoch": 0.3986820428336079, "grad_norm": 0.3660268485546112, "learning_rate": 1.8122792226392422e-05, "loss": 0.473, "step": 14520 }, { "epoch": 0.3987095002745744, "grad_norm": 0.3849450349807739, "learning_rate": 1.8122540308877165e-05, "loss": 0.5186, "step": 14521 }, { "epoch": 0.3987369577155409, "grad_norm": 0.39646607637405396, "learning_rate": 1.812228837621078e-05, "loss": 0.5681, "step": 14522 }, { "epoch": 0.3987644151565074, "grad_norm": 0.4173043370246887, "learning_rate": 1.8122036428393732e-05, "loss": 0.5668, "step": 14523 }, { "epoch": 0.3987918725974739, "grad_norm": 0.3815686106681824, "learning_rate": 1.81217844654265e-05, "loss": 0.5754, "step": 14524 }, { "epoch": 0.39881933003844044, "grad_norm": 0.3157976269721985, "learning_rate": 1.812153248730954e-05, "loss": 0.4429, "step": 14525 }, { "epoch": 0.39884678747940694, "grad_norm": 0.36779212951660156, "learning_rate": 1.8121280494043338e-05, "loss": 0.4494, "step": 14526 }, { "epoch": 0.39887424492037343, "grad_norm": 0.42627009749412537, "learning_rate": 1.812102848562835e-05, "loss": 0.5596, "step": 14527 }, { "epoch": 0.3989017023613399, "grad_norm": 0.3311494290828705, "learning_rate": 1.8120776462065054e-05, "loss": 0.5265, "step": 14528 }, { "epoch": 0.3989291598023064, "grad_norm": 0.364096462726593, "learning_rate": 1.8120524423353917e-05, "loss": 0.5542, "step": 14529 }, { "epoch": 0.3989566172432729, "grad_norm": 0.3950422704219818, "learning_rate": 1.812027236949541e-05, "loss": 0.6113, "step": 14530 }, { "epoch": 0.3989840746842394, "grad_norm": 0.36956584453582764, "learning_rate": 1.8120020300490007e-05, "loss": 0.532, "step": 14531 }, { "epoch": 0.3990115321252059, "grad_norm": 0.3593474328517914, "learning_rate": 1.8119768216338172e-05, "loss": 0.4592, "step": 14532 }, { "epoch": 0.39903898956617245, "grad_norm": 0.3933591842651367, "learning_rate": 1.8119516117040375e-05, "loss": 0.4273, "step": 14533 }, { "epoch": 0.39906644700713895, "grad_norm": 0.37855952978134155, "learning_rate": 1.8119264002597094e-05, "loss": 0.5175, "step": 14534 }, { "epoch": 0.39909390444810544, "grad_norm": 0.3626176416873932, "learning_rate": 1.8119011873008792e-05, "loss": 0.5515, "step": 14535 }, { "epoch": 0.39912136188907194, "grad_norm": 0.3726052939891815, "learning_rate": 1.811875972827594e-05, "loss": 0.6072, "step": 14536 }, { "epoch": 0.39914881933003843, "grad_norm": 0.4318467974662781, "learning_rate": 1.811850756839901e-05, "loss": 0.5074, "step": 14537 }, { "epoch": 0.3991762767710049, "grad_norm": 0.3993469476699829, "learning_rate": 1.8118255393378478e-05, "loss": 0.5401, "step": 14538 }, { "epoch": 0.3992037342119714, "grad_norm": 0.41841697692871094, "learning_rate": 1.8118003203214805e-05, "loss": 0.5782, "step": 14539 }, { "epoch": 0.39923119165293797, "grad_norm": 0.39724504947662354, "learning_rate": 1.8117750997908463e-05, "loss": 0.5093, "step": 14540 }, { "epoch": 0.39925864909390446, "grad_norm": 0.3400951325893402, "learning_rate": 1.8117498777459926e-05, "loss": 0.548, "step": 14541 }, { "epoch": 0.39928610653487095, "grad_norm": 0.4158440828323364, "learning_rate": 1.8117246541869662e-05, "loss": 0.5076, "step": 14542 }, { "epoch": 0.39931356397583745, "grad_norm": 0.3467126786708832, "learning_rate": 1.8116994291138143e-05, "loss": 0.5161, "step": 14543 }, { "epoch": 0.39934102141680394, "grad_norm": 0.34650006890296936, "learning_rate": 1.811674202526584e-05, "loss": 0.5382, "step": 14544 }, { "epoch": 0.39936847885777044, "grad_norm": 0.35564664006233215, "learning_rate": 1.8116489744253222e-05, "loss": 0.5223, "step": 14545 }, { "epoch": 0.39939593629873693, "grad_norm": 0.38355106115341187, "learning_rate": 1.811623744810076e-05, "loss": 0.5258, "step": 14546 }, { "epoch": 0.3994233937397035, "grad_norm": 0.41693946719169617, "learning_rate": 1.8115985136808923e-05, "loss": 0.5383, "step": 14547 }, { "epoch": 0.39945085118067, "grad_norm": 0.35699573159217834, "learning_rate": 1.8115732810378187e-05, "loss": 0.5691, "step": 14548 }, { "epoch": 0.39947830862163647, "grad_norm": 0.3869537115097046, "learning_rate": 1.8115480468809018e-05, "loss": 0.522, "step": 14549 }, { "epoch": 0.39950576606260296, "grad_norm": 0.3860298991203308, "learning_rate": 1.8115228112101885e-05, "loss": 0.5552, "step": 14550 }, { "epoch": 0.39953322350356946, "grad_norm": 0.34042471647262573, "learning_rate": 1.8114975740257265e-05, "loss": 0.4784, "step": 14551 }, { "epoch": 0.39956068094453595, "grad_norm": 0.5583789944648743, "learning_rate": 1.8114723353275624e-05, "loss": 0.5335, "step": 14552 }, { "epoch": 0.39958813838550244, "grad_norm": 0.33001068234443665, "learning_rate": 1.8114470951157432e-05, "loss": 0.5381, "step": 14553 }, { "epoch": 0.399615595826469, "grad_norm": 0.3628605604171753, "learning_rate": 1.8114218533903165e-05, "loss": 0.5554, "step": 14554 }, { "epoch": 0.3996430532674355, "grad_norm": 0.3614124655723572, "learning_rate": 1.811396610151329e-05, "loss": 0.5385, "step": 14555 }, { "epoch": 0.399670510708402, "grad_norm": 0.753084123134613, "learning_rate": 1.8113713653988275e-05, "loss": 0.5529, "step": 14556 }, { "epoch": 0.3996979681493685, "grad_norm": 0.4159970283508301, "learning_rate": 1.8113461191328597e-05, "loss": 0.5547, "step": 14557 }, { "epoch": 0.39972542559033497, "grad_norm": 0.347696453332901, "learning_rate": 1.8113208713534727e-05, "loss": 0.4588, "step": 14558 }, { "epoch": 0.39975288303130146, "grad_norm": 0.32873183488845825, "learning_rate": 1.8112956220607133e-05, "loss": 0.4147, "step": 14559 }, { "epoch": 0.39978034047226796, "grad_norm": 0.4027496576309204, "learning_rate": 1.811270371254628e-05, "loss": 0.5228, "step": 14560 }, { "epoch": 0.3998077979132345, "grad_norm": 0.37907877564430237, "learning_rate": 1.8112451189352653e-05, "loss": 0.4891, "step": 14561 }, { "epoch": 0.399835255354201, "grad_norm": 0.3620721399784088, "learning_rate": 1.8112198651026708e-05, "loss": 0.4832, "step": 14562 }, { "epoch": 0.3998627127951675, "grad_norm": 0.4649926424026489, "learning_rate": 1.811194609756893e-05, "loss": 0.5398, "step": 14563 }, { "epoch": 0.399890170236134, "grad_norm": 0.3576048016548157, "learning_rate": 1.8111693528979782e-05, "loss": 0.5137, "step": 14564 }, { "epoch": 0.3999176276771005, "grad_norm": 0.33361539244651794, "learning_rate": 1.8111440945259733e-05, "loss": 0.4386, "step": 14565 }, { "epoch": 0.399945085118067, "grad_norm": 0.3763444423675537, "learning_rate": 1.811118834640926e-05, "loss": 0.538, "step": 14566 }, { "epoch": 0.3999725425590335, "grad_norm": 0.4196290969848633, "learning_rate": 1.8110935732428833e-05, "loss": 0.5631, "step": 14567 }, { "epoch": 0.4, "grad_norm": 0.39718329906463623, "learning_rate": 1.811068310331892e-05, "loss": 0.5706, "step": 14568 }, { "epoch": 0.4000274574409665, "grad_norm": 0.40439608693122864, "learning_rate": 1.8110430459079996e-05, "loss": 0.5172, "step": 14569 }, { "epoch": 0.400054914881933, "grad_norm": 0.5045031309127808, "learning_rate": 1.8110177799712532e-05, "loss": 0.5214, "step": 14570 }, { "epoch": 0.4000823723228995, "grad_norm": 0.32855746150016785, "learning_rate": 1.8109925125216995e-05, "loss": 0.5805, "step": 14571 }, { "epoch": 0.400109829763866, "grad_norm": 0.36896461248397827, "learning_rate": 1.810967243559386e-05, "loss": 0.5011, "step": 14572 }, { "epoch": 0.4001372872048325, "grad_norm": 0.4506534934043884, "learning_rate": 1.8109419730843597e-05, "loss": 0.5419, "step": 14573 }, { "epoch": 0.400164744645799, "grad_norm": 0.3578657805919647, "learning_rate": 1.810916701096668e-05, "loss": 0.5084, "step": 14574 }, { "epoch": 0.40019220208676554, "grad_norm": 0.351916640996933, "learning_rate": 1.810891427596358e-05, "loss": 0.55, "step": 14575 }, { "epoch": 0.40021965952773203, "grad_norm": 0.3611028790473938, "learning_rate": 1.8108661525834762e-05, "loss": 0.4616, "step": 14576 }, { "epoch": 0.4002471169686985, "grad_norm": 0.4008693993091583, "learning_rate": 1.8108408760580705e-05, "loss": 0.5085, "step": 14577 }, { "epoch": 0.400274574409665, "grad_norm": 0.33283573389053345, "learning_rate": 1.8108155980201876e-05, "loss": 0.455, "step": 14578 }, { "epoch": 0.4003020318506315, "grad_norm": 0.44301578402519226, "learning_rate": 1.810790318469875e-05, "loss": 0.58, "step": 14579 }, { "epoch": 0.400329489291598, "grad_norm": 0.3604641854763031, "learning_rate": 1.8107650374071795e-05, "loss": 0.5088, "step": 14580 }, { "epoch": 0.4003569467325645, "grad_norm": 0.3848716616630554, "learning_rate": 1.8107397548321487e-05, "loss": 0.5678, "step": 14581 }, { "epoch": 0.40038440417353105, "grad_norm": 0.32435905933380127, "learning_rate": 1.8107144707448296e-05, "loss": 0.4134, "step": 14582 }, { "epoch": 0.40041186161449754, "grad_norm": 0.36878836154937744, "learning_rate": 1.8106891851452687e-05, "loss": 0.5509, "step": 14583 }, { "epoch": 0.40043931905546404, "grad_norm": 0.38231074810028076, "learning_rate": 1.810663898033514e-05, "loss": 0.5863, "step": 14584 }, { "epoch": 0.40046677649643053, "grad_norm": 0.4346892535686493, "learning_rate": 1.8106386094096126e-05, "loss": 0.6265, "step": 14585 }, { "epoch": 0.400494233937397, "grad_norm": 0.3891448974609375, "learning_rate": 1.8106133192736113e-05, "loss": 0.5226, "step": 14586 }, { "epoch": 0.4005216913783635, "grad_norm": 0.32054242491722107, "learning_rate": 1.8105880276255575e-05, "loss": 0.428, "step": 14587 }, { "epoch": 0.40054914881933, "grad_norm": 0.34748271107673645, "learning_rate": 1.8105627344654984e-05, "loss": 0.4694, "step": 14588 }, { "epoch": 0.40057660626029656, "grad_norm": 0.3230980932712555, "learning_rate": 1.8105374397934808e-05, "loss": 0.5682, "step": 14589 }, { "epoch": 0.40060406370126306, "grad_norm": 0.38302499055862427, "learning_rate": 1.8105121436095524e-05, "loss": 0.5033, "step": 14590 }, { "epoch": 0.40063152114222955, "grad_norm": 0.3759732246398926, "learning_rate": 1.81048684591376e-05, "loss": 0.557, "step": 14591 }, { "epoch": 0.40065897858319605, "grad_norm": 0.3411097526550293, "learning_rate": 1.8104615467061513e-05, "loss": 0.5231, "step": 14592 }, { "epoch": 0.40068643602416254, "grad_norm": 0.3845718502998352, "learning_rate": 1.810436245986773e-05, "loss": 0.5494, "step": 14593 }, { "epoch": 0.40071389346512903, "grad_norm": 0.4285094141960144, "learning_rate": 1.8104109437556725e-05, "loss": 0.5376, "step": 14594 }, { "epoch": 0.40074135090609553, "grad_norm": 0.4668791592121124, "learning_rate": 1.8103856400128967e-05, "loss": 0.6033, "step": 14595 }, { "epoch": 0.4007688083470621, "grad_norm": 0.362150102853775, "learning_rate": 1.810360334758493e-05, "loss": 0.5198, "step": 14596 }, { "epoch": 0.40079626578802857, "grad_norm": 0.35687899589538574, "learning_rate": 1.810335027992509e-05, "loss": 0.5229, "step": 14597 }, { "epoch": 0.40082372322899507, "grad_norm": 0.4366203844547272, "learning_rate": 1.810309719714991e-05, "loss": 0.5117, "step": 14598 }, { "epoch": 0.40085118066996156, "grad_norm": 0.47704675793647766, "learning_rate": 1.8102844099259874e-05, "loss": 0.5685, "step": 14599 }, { "epoch": 0.40087863811092805, "grad_norm": 0.3518785238265991, "learning_rate": 1.810259098625544e-05, "loss": 0.551, "step": 14600 }, { "epoch": 0.40090609555189455, "grad_norm": 0.36356934905052185, "learning_rate": 1.8102337858137094e-05, "loss": 0.4711, "step": 14601 }, { "epoch": 0.40093355299286104, "grad_norm": 0.3679013252258301, "learning_rate": 1.81020847149053e-05, "loss": 0.5702, "step": 14602 }, { "epoch": 0.4009610104338276, "grad_norm": 0.38238468766212463, "learning_rate": 1.8101831556560533e-05, "loss": 0.5566, "step": 14603 }, { "epoch": 0.4009884678747941, "grad_norm": 0.38162165880203247, "learning_rate": 1.8101578383103264e-05, "loss": 0.5227, "step": 14604 }, { "epoch": 0.4010159253157606, "grad_norm": 0.38608041405677795, "learning_rate": 1.8101325194533965e-05, "loss": 0.5085, "step": 14605 }, { "epoch": 0.4010433827567271, "grad_norm": 0.389178991317749, "learning_rate": 1.810107199085311e-05, "loss": 0.49, "step": 14606 }, { "epoch": 0.40107084019769357, "grad_norm": 0.3352128267288208, "learning_rate": 1.810081877206117e-05, "loss": 0.4114, "step": 14607 }, { "epoch": 0.40109829763866006, "grad_norm": 0.35472002625465393, "learning_rate": 1.8100565538158617e-05, "loss": 0.5123, "step": 14608 }, { "epoch": 0.40112575507962656, "grad_norm": 0.4181106686592102, "learning_rate": 1.810031228914592e-05, "loss": 0.5225, "step": 14609 }, { "epoch": 0.4011532125205931, "grad_norm": 0.3696853518486023, "learning_rate": 1.8100059025023563e-05, "loss": 0.5066, "step": 14610 }, { "epoch": 0.4011806699615596, "grad_norm": 0.36421290040016174, "learning_rate": 1.8099805745792004e-05, "loss": 0.5232, "step": 14611 }, { "epoch": 0.4012081274025261, "grad_norm": 0.42319098114967346, "learning_rate": 1.8099552451451726e-05, "loss": 0.5266, "step": 14612 }, { "epoch": 0.4012355848434926, "grad_norm": 0.37401083111763, "learning_rate": 1.8099299142003195e-05, "loss": 0.5191, "step": 14613 }, { "epoch": 0.4012630422844591, "grad_norm": 0.4107027053833008, "learning_rate": 1.809904581744689e-05, "loss": 0.575, "step": 14614 }, { "epoch": 0.4012904997254256, "grad_norm": 0.38722971081733704, "learning_rate": 1.8098792477783274e-05, "loss": 0.5443, "step": 14615 }, { "epoch": 0.40131795716639207, "grad_norm": 0.3746570646762848, "learning_rate": 1.809853912301283e-05, "loss": 0.5467, "step": 14616 }, { "epoch": 0.4013454146073586, "grad_norm": 0.3936925530433655, "learning_rate": 1.8098285753136023e-05, "loss": 0.5053, "step": 14617 }, { "epoch": 0.4013728720483251, "grad_norm": 0.3695125877857208, "learning_rate": 1.809803236815333e-05, "loss": 0.5375, "step": 14618 }, { "epoch": 0.4014003294892916, "grad_norm": 0.3335988223552704, "learning_rate": 1.8097778968065222e-05, "loss": 0.5268, "step": 14619 }, { "epoch": 0.4014277869302581, "grad_norm": 0.3471565842628479, "learning_rate": 1.809752555287217e-05, "loss": 0.5487, "step": 14620 }, { "epoch": 0.4014552443712246, "grad_norm": 0.35443001985549927, "learning_rate": 1.8097272122574653e-05, "loss": 0.4857, "step": 14621 }, { "epoch": 0.4014827018121911, "grad_norm": 0.3892938494682312, "learning_rate": 1.8097018677173133e-05, "loss": 0.5787, "step": 14622 }, { "epoch": 0.4015101592531576, "grad_norm": 0.3583002984523773, "learning_rate": 1.809676521666809e-05, "loss": 0.4904, "step": 14623 }, { "epoch": 0.40153761669412413, "grad_norm": 0.35209596157073975, "learning_rate": 1.809651174106e-05, "loss": 0.5368, "step": 14624 }, { "epoch": 0.4015650741350906, "grad_norm": 0.37248530983924866, "learning_rate": 1.809625825034933e-05, "loss": 0.5593, "step": 14625 }, { "epoch": 0.4015925315760571, "grad_norm": 0.3603897988796234, "learning_rate": 1.809600474453655e-05, "loss": 0.4755, "step": 14626 }, { "epoch": 0.4016199890170236, "grad_norm": 0.35966944694519043, "learning_rate": 1.809575122362214e-05, "loss": 0.4805, "step": 14627 }, { "epoch": 0.4016474464579901, "grad_norm": 0.3559662997722626, "learning_rate": 1.8095497687606573e-05, "loss": 0.5058, "step": 14628 }, { "epoch": 0.4016749038989566, "grad_norm": 0.37940093874931335, "learning_rate": 1.8095244136490316e-05, "loss": 0.4732, "step": 14629 }, { "epoch": 0.4017023613399231, "grad_norm": 0.3827895522117615, "learning_rate": 1.8094990570273844e-05, "loss": 0.5145, "step": 14630 }, { "epoch": 0.40172981878088965, "grad_norm": 0.40670597553253174, "learning_rate": 1.8094736988957634e-05, "loss": 0.4797, "step": 14631 }, { "epoch": 0.40175727622185614, "grad_norm": 0.39492595195770264, "learning_rate": 1.8094483392542154e-05, "loss": 0.5621, "step": 14632 }, { "epoch": 0.40178473366282264, "grad_norm": 0.4619510769844055, "learning_rate": 1.8094229781027877e-05, "loss": 0.4798, "step": 14633 }, { "epoch": 0.40181219110378913, "grad_norm": 0.3305439054965973, "learning_rate": 1.8093976154415283e-05, "loss": 0.4672, "step": 14634 }, { "epoch": 0.4018396485447556, "grad_norm": 0.41355642676353455, "learning_rate": 1.8093722512704838e-05, "loss": 0.5364, "step": 14635 }, { "epoch": 0.4018671059857221, "grad_norm": 0.43116313219070435, "learning_rate": 1.8093468855897016e-05, "loss": 0.4914, "step": 14636 }, { "epoch": 0.4018945634266886, "grad_norm": 0.3630656898021698, "learning_rate": 1.8093215183992293e-05, "loss": 0.4595, "step": 14637 }, { "epoch": 0.40192202086765516, "grad_norm": 0.3815681040287018, "learning_rate": 1.8092961496991138e-05, "loss": 0.5282, "step": 14638 }, { "epoch": 0.40194947830862165, "grad_norm": 0.35625553131103516, "learning_rate": 1.809270779489403e-05, "loss": 0.4567, "step": 14639 }, { "epoch": 0.40197693574958815, "grad_norm": 0.37330207228660583, "learning_rate": 1.8092454077701437e-05, "loss": 0.5838, "step": 14640 }, { "epoch": 0.40200439319055464, "grad_norm": 0.37651076912879944, "learning_rate": 1.809220034541384e-05, "loss": 0.5914, "step": 14641 }, { "epoch": 0.40203185063152114, "grad_norm": 0.40282702445983887, "learning_rate": 1.8091946598031696e-05, "loss": 0.5327, "step": 14642 }, { "epoch": 0.40205930807248763, "grad_norm": 0.33519160747528076, "learning_rate": 1.8091692835555498e-05, "loss": 0.4848, "step": 14643 }, { "epoch": 0.4020867655134541, "grad_norm": 0.3642624616622925, "learning_rate": 1.8091439057985708e-05, "loss": 0.5929, "step": 14644 }, { "epoch": 0.4021142229544207, "grad_norm": 0.3314264416694641, "learning_rate": 1.80911852653228e-05, "loss": 0.5036, "step": 14645 }, { "epoch": 0.40214168039538717, "grad_norm": 0.4152487814426422, "learning_rate": 1.809093145756725e-05, "loss": 0.5889, "step": 14646 }, { "epoch": 0.40216913783635366, "grad_norm": 0.3866230845451355, "learning_rate": 1.809067763471953e-05, "loss": 0.4619, "step": 14647 }, { "epoch": 0.40219659527732016, "grad_norm": 0.3414499759674072, "learning_rate": 1.8090423796780114e-05, "loss": 0.4552, "step": 14648 }, { "epoch": 0.40222405271828665, "grad_norm": 0.4598245620727539, "learning_rate": 1.8090169943749477e-05, "loss": 0.5363, "step": 14649 }, { "epoch": 0.40225151015925315, "grad_norm": 0.42448121309280396, "learning_rate": 1.808991607562809e-05, "loss": 0.6011, "step": 14650 }, { "epoch": 0.40227896760021964, "grad_norm": 0.38276731967926025, "learning_rate": 1.8089662192416426e-05, "loss": 0.4952, "step": 14651 }, { "epoch": 0.4023064250411862, "grad_norm": 0.34084275364875793, "learning_rate": 1.8089408294114964e-05, "loss": 0.5046, "step": 14652 }, { "epoch": 0.4023338824821527, "grad_norm": 0.3555589020252228, "learning_rate": 1.808915438072417e-05, "loss": 0.6407, "step": 14653 }, { "epoch": 0.4023613399231192, "grad_norm": 0.37718692421913147, "learning_rate": 1.8088900452244523e-05, "loss": 0.5538, "step": 14654 }, { "epoch": 0.40238879736408567, "grad_norm": 0.40285661816596985, "learning_rate": 1.808864650867649e-05, "loss": 0.4983, "step": 14655 }, { "epoch": 0.40241625480505216, "grad_norm": 0.357095330953598, "learning_rate": 1.8088392550020556e-05, "loss": 0.4466, "step": 14656 }, { "epoch": 0.40244371224601866, "grad_norm": 0.350110799074173, "learning_rate": 1.8088138576277186e-05, "loss": 0.5806, "step": 14657 }, { "epoch": 0.40247116968698515, "grad_norm": 0.39888471364974976, "learning_rate": 1.808788458744686e-05, "loss": 0.5911, "step": 14658 }, { "epoch": 0.4024986271279517, "grad_norm": 0.40994754433631897, "learning_rate": 1.8087630583530042e-05, "loss": 0.6132, "step": 14659 }, { "epoch": 0.4025260845689182, "grad_norm": 0.3647705316543579, "learning_rate": 1.8087376564527215e-05, "loss": 0.5736, "step": 14660 }, { "epoch": 0.4025535420098847, "grad_norm": 0.3608563542366028, "learning_rate": 1.8087122530438847e-05, "loss": 0.5674, "step": 14661 }, { "epoch": 0.4025809994508512, "grad_norm": 0.42957037687301636, "learning_rate": 1.808686848126542e-05, "loss": 0.5049, "step": 14662 }, { "epoch": 0.4026084568918177, "grad_norm": 0.40946075320243835, "learning_rate": 1.8086614417007397e-05, "loss": 0.5047, "step": 14663 }, { "epoch": 0.4026359143327842, "grad_norm": 0.352499395608902, "learning_rate": 1.8086360337665257e-05, "loss": 0.4078, "step": 14664 }, { "epoch": 0.40266337177375067, "grad_norm": 0.32900241017341614, "learning_rate": 1.8086106243239473e-05, "loss": 0.475, "step": 14665 }, { "epoch": 0.40269082921471716, "grad_norm": 0.3819458484649658, "learning_rate": 1.8085852133730525e-05, "loss": 0.5212, "step": 14666 }, { "epoch": 0.4027182866556837, "grad_norm": 0.3217844069004059, "learning_rate": 1.8085598009138882e-05, "loss": 0.4655, "step": 14667 }, { "epoch": 0.4027457440966502, "grad_norm": 0.37932631373405457, "learning_rate": 1.8085343869465014e-05, "loss": 0.5388, "step": 14668 }, { "epoch": 0.4027732015376167, "grad_norm": 0.5394917726516724, "learning_rate": 1.80850897147094e-05, "loss": 0.5952, "step": 14669 }, { "epoch": 0.4028006589785832, "grad_norm": 0.4047355055809021, "learning_rate": 1.8084835544872513e-05, "loss": 0.557, "step": 14670 }, { "epoch": 0.4028281164195497, "grad_norm": 0.37440744042396545, "learning_rate": 1.808458135995483e-05, "loss": 0.5222, "step": 14671 }, { "epoch": 0.4028555738605162, "grad_norm": 0.4280283451080322, "learning_rate": 1.8084327159956822e-05, "loss": 0.5065, "step": 14672 }, { "epoch": 0.4028830313014827, "grad_norm": 0.41443192958831787, "learning_rate": 1.808407294487896e-05, "loss": 0.5886, "step": 14673 }, { "epoch": 0.4029104887424492, "grad_norm": 0.34922876954078674, "learning_rate": 1.8083818714721726e-05, "loss": 0.529, "step": 14674 }, { "epoch": 0.4029379461834157, "grad_norm": 0.37352287769317627, "learning_rate": 1.8083564469485588e-05, "loss": 0.5442, "step": 14675 }, { "epoch": 0.4029654036243822, "grad_norm": 0.36386463046073914, "learning_rate": 1.8083310209171023e-05, "loss": 0.4846, "step": 14676 }, { "epoch": 0.4029928610653487, "grad_norm": 0.39302533864974976, "learning_rate": 1.8083055933778503e-05, "loss": 0.6006, "step": 14677 }, { "epoch": 0.4030203185063152, "grad_norm": 0.36087071895599365, "learning_rate": 1.8082801643308505e-05, "loss": 0.4786, "step": 14678 }, { "epoch": 0.4030477759472817, "grad_norm": 0.3479931652545929, "learning_rate": 1.8082547337761503e-05, "loss": 0.5022, "step": 14679 }, { "epoch": 0.4030752333882482, "grad_norm": 0.37048014998435974, "learning_rate": 1.808229301713797e-05, "loss": 0.4879, "step": 14680 }, { "epoch": 0.40310269082921474, "grad_norm": 0.39467135071754456, "learning_rate": 1.808203868143838e-05, "loss": 0.5516, "step": 14681 }, { "epoch": 0.40313014827018123, "grad_norm": 0.35756564140319824, "learning_rate": 1.808178433066321e-05, "loss": 0.4935, "step": 14682 }, { "epoch": 0.4031576057111477, "grad_norm": 0.42653781175613403, "learning_rate": 1.808152996481293e-05, "loss": 0.5046, "step": 14683 }, { "epoch": 0.4031850631521142, "grad_norm": 0.49832984805107117, "learning_rate": 1.8081275583888024e-05, "loss": 0.5692, "step": 14684 }, { "epoch": 0.4032125205930807, "grad_norm": 0.37209516763687134, "learning_rate": 1.8081021187888953e-05, "loss": 0.6149, "step": 14685 }, { "epoch": 0.4032399780340472, "grad_norm": 0.3182792365550995, "learning_rate": 1.80807667768162e-05, "loss": 0.43, "step": 14686 }, { "epoch": 0.4032674354750137, "grad_norm": 0.3850499987602234, "learning_rate": 1.8080512350670238e-05, "loss": 0.5579, "step": 14687 }, { "epoch": 0.40329489291598025, "grad_norm": 0.5896596312522888, "learning_rate": 1.8080257909451543e-05, "loss": 0.5545, "step": 14688 }, { "epoch": 0.40332235035694675, "grad_norm": 0.3459538221359253, "learning_rate": 1.8080003453160585e-05, "loss": 0.5328, "step": 14689 }, { "epoch": 0.40334980779791324, "grad_norm": 0.35975804924964905, "learning_rate": 1.8079748981797844e-05, "loss": 0.4849, "step": 14690 }, { "epoch": 0.40337726523887973, "grad_norm": 0.40735894441604614, "learning_rate": 1.8079494495363793e-05, "loss": 0.4933, "step": 14691 }, { "epoch": 0.40340472267984623, "grad_norm": 0.34842443466186523, "learning_rate": 1.8079239993858903e-05, "loss": 0.5368, "step": 14692 }, { "epoch": 0.4034321801208127, "grad_norm": 0.35095924139022827, "learning_rate": 1.807898547728366e-05, "loss": 0.4492, "step": 14693 }, { "epoch": 0.4034596375617792, "grad_norm": 0.38816022872924805, "learning_rate": 1.807873094563852e-05, "loss": 0.527, "step": 14694 }, { "epoch": 0.40348709500274577, "grad_norm": 0.3750323951244354, "learning_rate": 1.807847639892397e-05, "loss": 0.5398, "step": 14695 }, { "epoch": 0.40351455244371226, "grad_norm": 0.3734166920185089, "learning_rate": 1.8078221837140487e-05, "loss": 0.5275, "step": 14696 }, { "epoch": 0.40354200988467875, "grad_norm": 0.4601304531097412, "learning_rate": 1.8077967260288544e-05, "loss": 0.5641, "step": 14697 }, { "epoch": 0.40356946732564525, "grad_norm": 0.41426676511764526, "learning_rate": 1.807771266836861e-05, "loss": 0.5358, "step": 14698 }, { "epoch": 0.40359692476661174, "grad_norm": 0.3694614768028259, "learning_rate": 1.8077458061381167e-05, "loss": 0.4691, "step": 14699 }, { "epoch": 0.40362438220757824, "grad_norm": 0.37172043323516846, "learning_rate": 1.807720343932668e-05, "loss": 0.5042, "step": 14700 }, { "epoch": 0.40365183964854473, "grad_norm": 0.38693997263908386, "learning_rate": 1.8076948802205637e-05, "loss": 0.5139, "step": 14701 }, { "epoch": 0.4036792970895113, "grad_norm": 0.38801929354667664, "learning_rate": 1.8076694150018506e-05, "loss": 0.5034, "step": 14702 }, { "epoch": 0.4037067545304778, "grad_norm": 0.3789730668067932, "learning_rate": 1.807643948276576e-05, "loss": 0.5463, "step": 14703 }, { "epoch": 0.40373421197144427, "grad_norm": 0.3823707103729248, "learning_rate": 1.8076184800447878e-05, "loss": 0.5941, "step": 14704 }, { "epoch": 0.40376166941241076, "grad_norm": 0.3918725550174713, "learning_rate": 1.8075930103065332e-05, "loss": 0.5635, "step": 14705 }, { "epoch": 0.40378912685337726, "grad_norm": 0.5018916726112366, "learning_rate": 1.80756753906186e-05, "loss": 0.5068, "step": 14706 }, { "epoch": 0.40381658429434375, "grad_norm": 0.3257279098033905, "learning_rate": 1.807542066310816e-05, "loss": 0.487, "step": 14707 }, { "epoch": 0.40384404173531024, "grad_norm": 0.34055647253990173, "learning_rate": 1.807516592053448e-05, "loss": 0.4276, "step": 14708 }, { "epoch": 0.4038714991762768, "grad_norm": 0.3674917221069336, "learning_rate": 1.8074911162898034e-05, "loss": 0.5285, "step": 14709 }, { "epoch": 0.4038989566172433, "grad_norm": 0.4070485830307007, "learning_rate": 1.8074656390199305e-05, "loss": 0.5518, "step": 14710 }, { "epoch": 0.4039264140582098, "grad_norm": 0.4247615933418274, "learning_rate": 1.8074401602438767e-05, "loss": 0.6084, "step": 14711 }, { "epoch": 0.4039538714991763, "grad_norm": 0.36032289266586304, "learning_rate": 1.807414679961689e-05, "loss": 0.4712, "step": 14712 }, { "epoch": 0.40398132894014277, "grad_norm": 0.39505571126937866, "learning_rate": 1.8073891981734153e-05, "loss": 0.4806, "step": 14713 }, { "epoch": 0.40400878638110926, "grad_norm": 0.3583564758300781, "learning_rate": 1.807363714879103e-05, "loss": 0.4543, "step": 14714 }, { "epoch": 0.40403624382207576, "grad_norm": 0.37262532114982605, "learning_rate": 1.8073382300788e-05, "loss": 0.5515, "step": 14715 }, { "epoch": 0.4040637012630423, "grad_norm": 0.3515414297580719, "learning_rate": 1.807312743772553e-05, "loss": 0.5656, "step": 14716 }, { "epoch": 0.4040911587040088, "grad_norm": 0.3217518627643585, "learning_rate": 1.8072872559604105e-05, "loss": 0.5052, "step": 14717 }, { "epoch": 0.4041186161449753, "grad_norm": 0.38194459676742554, "learning_rate": 1.8072617666424195e-05, "loss": 0.5457, "step": 14718 }, { "epoch": 0.4041460735859418, "grad_norm": 0.35004082322120667, "learning_rate": 1.807236275818628e-05, "loss": 0.4648, "step": 14719 }, { "epoch": 0.4041735310269083, "grad_norm": 0.3920309543609619, "learning_rate": 1.8072107834890823e-05, "loss": 0.5016, "step": 14720 }, { "epoch": 0.4042009884678748, "grad_norm": 0.4213228225708008, "learning_rate": 1.8071852896538314e-05, "loss": 0.5765, "step": 14721 }, { "epoch": 0.40422844590884127, "grad_norm": 0.5486566424369812, "learning_rate": 1.8071597943129225e-05, "loss": 0.5685, "step": 14722 }, { "epoch": 0.4042559033498078, "grad_norm": 0.4181772470474243, "learning_rate": 1.8071342974664027e-05, "loss": 0.4338, "step": 14723 }, { "epoch": 0.4042833607907743, "grad_norm": 0.34769001603126526, "learning_rate": 1.8071087991143198e-05, "loss": 0.4945, "step": 14724 }, { "epoch": 0.4043108182317408, "grad_norm": 0.33215898275375366, "learning_rate": 1.8070832992567215e-05, "loss": 0.5034, "step": 14725 }, { "epoch": 0.4043382756727073, "grad_norm": 0.38020437955856323, "learning_rate": 1.8070577978936553e-05, "loss": 0.5375, "step": 14726 }, { "epoch": 0.4043657331136738, "grad_norm": 0.3811856508255005, "learning_rate": 1.8070322950251687e-05, "loss": 0.5456, "step": 14727 }, { "epoch": 0.4043931905546403, "grad_norm": 12.372997283935547, "learning_rate": 1.8070067906513093e-05, "loss": 0.7174, "step": 14728 }, { "epoch": 0.4044206479956068, "grad_norm": 0.35053667426109314, "learning_rate": 1.8069812847721247e-05, "loss": 0.4778, "step": 14729 }, { "epoch": 0.40444810543657334, "grad_norm": 0.31888505816459656, "learning_rate": 1.8069557773876622e-05, "loss": 0.4348, "step": 14730 }, { "epoch": 0.40447556287753983, "grad_norm": 0.3730185627937317, "learning_rate": 1.8069302684979697e-05, "loss": 0.5085, "step": 14731 }, { "epoch": 0.4045030203185063, "grad_norm": 0.350248247385025, "learning_rate": 1.806904758103095e-05, "loss": 0.493, "step": 14732 }, { "epoch": 0.4045304777594728, "grad_norm": 0.33271321654319763, "learning_rate": 1.8068792462030852e-05, "loss": 0.4662, "step": 14733 }, { "epoch": 0.4045579352004393, "grad_norm": 0.4063245356082916, "learning_rate": 1.806853732797988e-05, "loss": 0.5703, "step": 14734 }, { "epoch": 0.4045853926414058, "grad_norm": 0.3604525625705719, "learning_rate": 1.8068282178878514e-05, "loss": 0.4553, "step": 14735 }, { "epoch": 0.4046128500823723, "grad_norm": 0.43405142426490784, "learning_rate": 1.806802701472722e-05, "loss": 0.4396, "step": 14736 }, { "epoch": 0.40464030752333885, "grad_norm": 0.37002667784690857, "learning_rate": 1.8067771835526487e-05, "loss": 0.5008, "step": 14737 }, { "epoch": 0.40466776496430534, "grad_norm": 0.3984539806842804, "learning_rate": 1.8067516641276782e-05, "loss": 0.513, "step": 14738 }, { "epoch": 0.40469522240527184, "grad_norm": 0.4028794765472412, "learning_rate": 1.8067261431978586e-05, "loss": 0.6076, "step": 14739 }, { "epoch": 0.40472267984623833, "grad_norm": 0.45551371574401855, "learning_rate": 1.8067006207632368e-05, "loss": 0.5745, "step": 14740 }, { "epoch": 0.4047501372872048, "grad_norm": 0.36363714933395386, "learning_rate": 1.806675096823861e-05, "loss": 0.4649, "step": 14741 }, { "epoch": 0.4047775947281713, "grad_norm": 0.6082791686058044, "learning_rate": 1.806649571379779e-05, "loss": 0.5841, "step": 14742 }, { "epoch": 0.4048050521691378, "grad_norm": 0.37922120094299316, "learning_rate": 1.806624044431038e-05, "loss": 0.5155, "step": 14743 }, { "epoch": 0.40483250961010436, "grad_norm": 0.32244646549224854, "learning_rate": 1.8065985159776857e-05, "loss": 0.4229, "step": 14744 }, { "epoch": 0.40485996705107086, "grad_norm": 0.3943827748298645, "learning_rate": 1.8065729860197697e-05, "loss": 0.4726, "step": 14745 }, { "epoch": 0.40488742449203735, "grad_norm": 0.4028409421443939, "learning_rate": 1.8065474545573373e-05, "loss": 0.5841, "step": 14746 }, { "epoch": 0.40491488193300385, "grad_norm": 0.3933042585849762, "learning_rate": 1.8065219215904368e-05, "loss": 0.6072, "step": 14747 }, { "epoch": 0.40494233937397034, "grad_norm": 0.3126187324523926, "learning_rate": 1.8064963871191156e-05, "loss": 0.5227, "step": 14748 }, { "epoch": 0.40496979681493683, "grad_norm": 0.3280591666698456, "learning_rate": 1.8064708511434213e-05, "loss": 0.517, "step": 14749 }, { "epoch": 0.4049972542559033, "grad_norm": 0.33515962958335876, "learning_rate": 1.806445313663401e-05, "loss": 0.5742, "step": 14750 }, { "epoch": 0.4050247116968699, "grad_norm": 0.39739400148391724, "learning_rate": 1.8064197746791034e-05, "loss": 0.6028, "step": 14751 }, { "epoch": 0.40505216913783637, "grad_norm": 0.368132621049881, "learning_rate": 1.806394234190575e-05, "loss": 0.5248, "step": 14752 }, { "epoch": 0.40507962657880286, "grad_norm": 0.3222810924053192, "learning_rate": 1.806368692197864e-05, "loss": 0.3576, "step": 14753 }, { "epoch": 0.40510708401976936, "grad_norm": 0.38772234320640564, "learning_rate": 1.8063431487010184e-05, "loss": 0.5263, "step": 14754 }, { "epoch": 0.40513454146073585, "grad_norm": 0.4019448459148407, "learning_rate": 1.8063176037000853e-05, "loss": 0.5117, "step": 14755 }, { "epoch": 0.40516199890170235, "grad_norm": 0.34009474515914917, "learning_rate": 1.8062920571951124e-05, "loss": 0.4982, "step": 14756 }, { "epoch": 0.40518945634266884, "grad_norm": 0.37580329179763794, "learning_rate": 1.806266509186148e-05, "loss": 0.4839, "step": 14757 }, { "epoch": 0.4052169137836354, "grad_norm": 0.4985986053943634, "learning_rate": 1.8062409596732387e-05, "loss": 0.5585, "step": 14758 }, { "epoch": 0.4052443712246019, "grad_norm": 0.353579044342041, "learning_rate": 1.8062154086564327e-05, "loss": 0.4769, "step": 14759 }, { "epoch": 0.4052718286655684, "grad_norm": 0.4060033857822418, "learning_rate": 1.806189856135778e-05, "loss": 0.571, "step": 14760 }, { "epoch": 0.4052992861065349, "grad_norm": 0.38128459453582764, "learning_rate": 1.8061643021113215e-05, "loss": 0.5477, "step": 14761 }, { "epoch": 0.40532674354750137, "grad_norm": 0.366176038980484, "learning_rate": 1.8061387465831117e-05, "loss": 0.5598, "step": 14762 }, { "epoch": 0.40535420098846786, "grad_norm": 0.3576662540435791, "learning_rate": 1.8061131895511953e-05, "loss": 0.5325, "step": 14763 }, { "epoch": 0.40538165842943436, "grad_norm": 0.3693085014820099, "learning_rate": 1.806087631015621e-05, "loss": 0.5494, "step": 14764 }, { "epoch": 0.4054091158704009, "grad_norm": 0.3811831474304199, "learning_rate": 1.806062070976436e-05, "loss": 0.4419, "step": 14765 }, { "epoch": 0.4054365733113674, "grad_norm": 0.5193513035774231, "learning_rate": 1.8060365094336877e-05, "loss": 0.515, "step": 14766 }, { "epoch": 0.4054640307523339, "grad_norm": 0.38256219029426575, "learning_rate": 1.806010946387424e-05, "loss": 0.4551, "step": 14767 }, { "epoch": 0.4054914881933004, "grad_norm": 0.3178194761276245, "learning_rate": 1.805985381837693e-05, "loss": 0.3917, "step": 14768 }, { "epoch": 0.4055189456342669, "grad_norm": 0.3867782652378082, "learning_rate": 1.8059598157845413e-05, "loss": 0.5637, "step": 14769 }, { "epoch": 0.4055464030752334, "grad_norm": 0.362291157245636, "learning_rate": 1.805934248228018e-05, "loss": 0.5107, "step": 14770 }, { "epoch": 0.40557386051619987, "grad_norm": 0.37956178188323975, "learning_rate": 1.8059086791681702e-05, "loss": 0.506, "step": 14771 }, { "epoch": 0.4056013179571664, "grad_norm": 0.3656371533870697, "learning_rate": 1.8058831086050452e-05, "loss": 0.4423, "step": 14772 }, { "epoch": 0.4056287753981329, "grad_norm": 0.39335089921951294, "learning_rate": 1.805857536538691e-05, "loss": 0.5167, "step": 14773 }, { "epoch": 0.4056562328390994, "grad_norm": 0.4061412513256073, "learning_rate": 1.8058319629691552e-05, "loss": 0.5637, "step": 14774 }, { "epoch": 0.4056836902800659, "grad_norm": 0.387127161026001, "learning_rate": 1.8058063878964857e-05, "loss": 0.4868, "step": 14775 }, { "epoch": 0.4057111477210324, "grad_norm": 0.33942294120788574, "learning_rate": 1.8057808113207302e-05, "loss": 0.5166, "step": 14776 }, { "epoch": 0.4057386051619989, "grad_norm": 0.38170599937438965, "learning_rate": 1.805755233241936e-05, "loss": 0.4661, "step": 14777 }, { "epoch": 0.4057660626029654, "grad_norm": 0.37924861907958984, "learning_rate": 1.8057296536601518e-05, "loss": 0.4775, "step": 14778 }, { "epoch": 0.40579352004393193, "grad_norm": 0.368163526058197, "learning_rate": 1.805704072575424e-05, "loss": 0.4737, "step": 14779 }, { "epoch": 0.4058209774848984, "grad_norm": 0.3912263512611389, "learning_rate": 1.805678489987801e-05, "loss": 0.5155, "step": 14780 }, { "epoch": 0.4058484349258649, "grad_norm": 0.36123356223106384, "learning_rate": 1.805652905897331e-05, "loss": 0.4784, "step": 14781 }, { "epoch": 0.4058758923668314, "grad_norm": 0.3870141804218292, "learning_rate": 1.8056273203040606e-05, "loss": 0.6058, "step": 14782 }, { "epoch": 0.4059033498077979, "grad_norm": 0.45444825291633606, "learning_rate": 1.8056017332080383e-05, "loss": 0.6091, "step": 14783 }, { "epoch": 0.4059308072487644, "grad_norm": 0.33832305669784546, "learning_rate": 1.8055761446093118e-05, "loss": 0.4856, "step": 14784 }, { "epoch": 0.4059582646897309, "grad_norm": 0.3368736505508423, "learning_rate": 1.8055505545079285e-05, "loss": 0.5075, "step": 14785 }, { "epoch": 0.40598572213069745, "grad_norm": 0.35209953784942627, "learning_rate": 1.8055249629039366e-05, "loss": 0.4638, "step": 14786 }, { "epoch": 0.40601317957166394, "grad_norm": 0.3681807219982147, "learning_rate": 1.805499369797383e-05, "loss": 0.4728, "step": 14787 }, { "epoch": 0.40604063701263043, "grad_norm": 0.3845827877521515, "learning_rate": 1.8054737751883164e-05, "loss": 0.4864, "step": 14788 }, { "epoch": 0.40606809445359693, "grad_norm": 0.39308351278305054, "learning_rate": 1.8054481790767838e-05, "loss": 0.4898, "step": 14789 }, { "epoch": 0.4060955518945634, "grad_norm": 0.3448549211025238, "learning_rate": 1.8054225814628336e-05, "loss": 0.5768, "step": 14790 }, { "epoch": 0.4061230093355299, "grad_norm": 0.35632315278053284, "learning_rate": 1.8053969823465132e-05, "loss": 0.5138, "step": 14791 }, { "epoch": 0.4061504667764964, "grad_norm": 0.4095469117164612, "learning_rate": 1.8053713817278704e-05, "loss": 0.5001, "step": 14792 }, { "epoch": 0.40617792421746296, "grad_norm": 0.5304561257362366, "learning_rate": 1.8053457796069528e-05, "loss": 0.5707, "step": 14793 }, { "epoch": 0.40620538165842945, "grad_norm": 0.35167619585990906, "learning_rate": 1.8053201759838083e-05, "loss": 0.5274, "step": 14794 }, { "epoch": 0.40623283909939595, "grad_norm": 0.37087327241897583, "learning_rate": 1.8052945708584847e-05, "loss": 0.5586, "step": 14795 }, { "epoch": 0.40626029654036244, "grad_norm": 0.3480444550514221, "learning_rate": 1.8052689642310296e-05, "loss": 0.4627, "step": 14796 }, { "epoch": 0.40628775398132894, "grad_norm": 0.36207038164138794, "learning_rate": 1.805243356101491e-05, "loss": 0.4812, "step": 14797 }, { "epoch": 0.40631521142229543, "grad_norm": 0.34522387385368347, "learning_rate": 1.8052177464699165e-05, "loss": 0.5525, "step": 14798 }, { "epoch": 0.4063426688632619, "grad_norm": 0.4199666976928711, "learning_rate": 1.805192135336354e-05, "loss": 0.5525, "step": 14799 }, { "epoch": 0.4063701263042284, "grad_norm": 0.3506101667881012, "learning_rate": 1.805166522700851e-05, "loss": 0.5734, "step": 14800 }, { "epoch": 0.40639758374519497, "grad_norm": 0.34345313906669617, "learning_rate": 1.8051409085634556e-05, "loss": 0.4338, "step": 14801 }, { "epoch": 0.40642504118616146, "grad_norm": 0.46493247151374817, "learning_rate": 1.8051152929242156e-05, "loss": 0.592, "step": 14802 }, { "epoch": 0.40645249862712796, "grad_norm": 0.3640405535697937, "learning_rate": 1.805089675783178e-05, "loss": 0.5784, "step": 14803 }, { "epoch": 0.40647995606809445, "grad_norm": 0.42128583788871765, "learning_rate": 1.805064057140392e-05, "loss": 0.4946, "step": 14804 }, { "epoch": 0.40650741350906094, "grad_norm": 0.39340364933013916, "learning_rate": 1.805038436995904e-05, "loss": 0.5172, "step": 14805 }, { "epoch": 0.40653487095002744, "grad_norm": 0.4195333421230316, "learning_rate": 1.8050128153497625e-05, "loss": 0.5529, "step": 14806 }, { "epoch": 0.40656232839099393, "grad_norm": 0.36465057730674744, "learning_rate": 1.8049871922020153e-05, "loss": 0.5356, "step": 14807 }, { "epoch": 0.4065897858319605, "grad_norm": 0.38446545600891113, "learning_rate": 1.80496156755271e-05, "loss": 0.4939, "step": 14808 }, { "epoch": 0.406617243272927, "grad_norm": 0.3240787386894226, "learning_rate": 1.8049359414018942e-05, "loss": 0.4618, "step": 14809 }, { "epoch": 0.40664470071389347, "grad_norm": 0.523323655128479, "learning_rate": 1.8049103137496166e-05, "loss": 0.5408, "step": 14810 }, { "epoch": 0.40667215815485996, "grad_norm": 0.3925125300884247, "learning_rate": 1.804884684595924e-05, "loss": 0.4855, "step": 14811 }, { "epoch": 0.40669961559582646, "grad_norm": 0.35637539625167847, "learning_rate": 1.8048590539408644e-05, "loss": 0.4949, "step": 14812 }, { "epoch": 0.40672707303679295, "grad_norm": 0.3661719858646393, "learning_rate": 1.8048334217844857e-05, "loss": 0.5132, "step": 14813 }, { "epoch": 0.40675453047775945, "grad_norm": 0.31979912519454956, "learning_rate": 1.8048077881268364e-05, "loss": 0.4578, "step": 14814 }, { "epoch": 0.406781987918726, "grad_norm": 0.47598007321357727, "learning_rate": 1.8047821529679634e-05, "loss": 0.5236, "step": 14815 }, { "epoch": 0.4068094453596925, "grad_norm": 0.3691399395465851, "learning_rate": 1.8047565163079146e-05, "loss": 0.4885, "step": 14816 }, { "epoch": 0.406836902800659, "grad_norm": 0.3480229675769806, "learning_rate": 1.804730878146738e-05, "loss": 0.5166, "step": 14817 }, { "epoch": 0.4068643602416255, "grad_norm": 0.39590421319007874, "learning_rate": 1.804705238484482e-05, "loss": 0.4298, "step": 14818 }, { "epoch": 0.40689181768259197, "grad_norm": 0.41145452857017517, "learning_rate": 1.8046795973211933e-05, "loss": 0.5628, "step": 14819 }, { "epoch": 0.40691927512355847, "grad_norm": 0.3855990469455719, "learning_rate": 1.804653954656921e-05, "loss": 0.5482, "step": 14820 }, { "epoch": 0.40694673256452496, "grad_norm": 0.7361854910850525, "learning_rate": 1.8046283104917116e-05, "loss": 0.4865, "step": 14821 }, { "epoch": 0.4069741900054915, "grad_norm": 0.3213246464729309, "learning_rate": 1.804602664825614e-05, "loss": 0.4456, "step": 14822 }, { "epoch": 0.407001647446458, "grad_norm": 0.381661593914032, "learning_rate": 1.8045770176586753e-05, "loss": 0.5176, "step": 14823 }, { "epoch": 0.4070291048874245, "grad_norm": 0.40129345655441284, "learning_rate": 1.804551368990944e-05, "loss": 0.5538, "step": 14824 }, { "epoch": 0.407056562328391, "grad_norm": 0.3722538948059082, "learning_rate": 1.8045257188224674e-05, "loss": 0.5262, "step": 14825 }, { "epoch": 0.4070840197693575, "grad_norm": 0.36713552474975586, "learning_rate": 1.8045000671532936e-05, "loss": 0.5366, "step": 14826 }, { "epoch": 0.407111477210324, "grad_norm": 0.349770724773407, "learning_rate": 1.8044744139834708e-05, "loss": 0.4978, "step": 14827 }, { "epoch": 0.4071389346512905, "grad_norm": 0.3892853856086731, "learning_rate": 1.8044487593130458e-05, "loss": 0.4774, "step": 14828 }, { "epoch": 0.407166392092257, "grad_norm": 0.38004061579704285, "learning_rate": 1.8044231031420674e-05, "loss": 0.4579, "step": 14829 }, { "epoch": 0.4071938495332235, "grad_norm": 0.41190171241760254, "learning_rate": 1.804397445470583e-05, "loss": 0.5797, "step": 14830 }, { "epoch": 0.40722130697419, "grad_norm": 0.3893198072910309, "learning_rate": 1.804371786298641e-05, "loss": 0.589, "step": 14831 }, { "epoch": 0.4072487644151565, "grad_norm": 0.3601135015487671, "learning_rate": 1.8043461256262887e-05, "loss": 0.5285, "step": 14832 }, { "epoch": 0.407276221856123, "grad_norm": 0.3108764886856079, "learning_rate": 1.8043204634535743e-05, "loss": 0.4146, "step": 14833 }, { "epoch": 0.4073036792970895, "grad_norm": 0.38985463976860046, "learning_rate": 1.8042947997805454e-05, "loss": 0.5501, "step": 14834 }, { "epoch": 0.407331136738056, "grad_norm": 0.34739187359809875, "learning_rate": 1.80426913460725e-05, "loss": 0.4597, "step": 14835 }, { "epoch": 0.40735859417902254, "grad_norm": 0.39019495248794556, "learning_rate": 1.804243467933736e-05, "loss": 0.4936, "step": 14836 }, { "epoch": 0.40738605161998903, "grad_norm": 0.3715498745441437, "learning_rate": 1.8042177997600513e-05, "loss": 0.4967, "step": 14837 }, { "epoch": 0.4074135090609555, "grad_norm": 0.3712135851383209, "learning_rate": 1.8041921300862435e-05, "loss": 0.6033, "step": 14838 }, { "epoch": 0.407440966501922, "grad_norm": 0.5873866081237793, "learning_rate": 1.8041664589123607e-05, "loss": 0.457, "step": 14839 }, { "epoch": 0.4074684239428885, "grad_norm": 0.3611989915370941, "learning_rate": 1.804140786238451e-05, "loss": 0.4988, "step": 14840 }, { "epoch": 0.407495881383855, "grad_norm": 0.4438962936401367, "learning_rate": 1.8041151120645622e-05, "loss": 0.5422, "step": 14841 }, { "epoch": 0.4075233388248215, "grad_norm": 0.7328953146934509, "learning_rate": 1.8040894363907417e-05, "loss": 0.4726, "step": 14842 }, { "epoch": 0.40755079626578805, "grad_norm": 0.40454646944999695, "learning_rate": 1.804063759217038e-05, "loss": 0.5508, "step": 14843 }, { "epoch": 0.40757825370675455, "grad_norm": 0.42692846059799194, "learning_rate": 1.8040380805434986e-05, "loss": 0.5214, "step": 14844 }, { "epoch": 0.40760571114772104, "grad_norm": 0.369985967874527, "learning_rate": 1.8040124003701716e-05, "loss": 0.4173, "step": 14845 }, { "epoch": 0.40763316858868753, "grad_norm": 0.36094173789024353, "learning_rate": 1.803986718697105e-05, "loss": 0.4513, "step": 14846 }, { "epoch": 0.407660626029654, "grad_norm": 0.37603121995925903, "learning_rate": 1.8039610355243467e-05, "loss": 0.4698, "step": 14847 }, { "epoch": 0.4076880834706205, "grad_norm": 0.3871299922466278, "learning_rate": 1.8039353508519442e-05, "loss": 0.4944, "step": 14848 }, { "epoch": 0.407715540911587, "grad_norm": 0.4270170331001282, "learning_rate": 1.8039096646799458e-05, "loss": 0.5012, "step": 14849 }, { "epoch": 0.40774299835255357, "grad_norm": 0.3768533170223236, "learning_rate": 1.8038839770083993e-05, "loss": 0.6236, "step": 14850 }, { "epoch": 0.40777045579352006, "grad_norm": 0.36142006516456604, "learning_rate": 1.8038582878373527e-05, "loss": 0.4272, "step": 14851 }, { "epoch": 0.40779791323448655, "grad_norm": 0.641895055770874, "learning_rate": 1.8038325971668534e-05, "loss": 0.5417, "step": 14852 }, { "epoch": 0.40782537067545305, "grad_norm": 0.3605983853340149, "learning_rate": 1.8038069049969504e-05, "loss": 0.55, "step": 14853 }, { "epoch": 0.40785282811641954, "grad_norm": 0.5123260617256165, "learning_rate": 1.8037812113276906e-05, "loss": 0.5381, "step": 14854 }, { "epoch": 0.40788028555738604, "grad_norm": 0.3892963230609894, "learning_rate": 1.803755516159122e-05, "loss": 0.4099, "step": 14855 }, { "epoch": 0.40790774299835253, "grad_norm": 0.36296361684799194, "learning_rate": 1.8037298194912932e-05, "loss": 0.6043, "step": 14856 }, { "epoch": 0.4079352004393191, "grad_norm": 0.40396344661712646, "learning_rate": 1.803704121324252e-05, "loss": 0.5029, "step": 14857 }, { "epoch": 0.4079626578802856, "grad_norm": 0.3551046848297119, "learning_rate": 1.803678421658046e-05, "loss": 0.4832, "step": 14858 }, { "epoch": 0.40799011532125207, "grad_norm": 0.432334303855896, "learning_rate": 1.803652720492723e-05, "loss": 0.577, "step": 14859 }, { "epoch": 0.40801757276221856, "grad_norm": 0.355660080909729, "learning_rate": 1.8036270178283312e-05, "loss": 0.4906, "step": 14860 }, { "epoch": 0.40804503020318506, "grad_norm": 0.40323567390441895, "learning_rate": 1.8036013136649186e-05, "loss": 0.5956, "step": 14861 }, { "epoch": 0.40807248764415155, "grad_norm": 0.3822386562824249, "learning_rate": 1.8035756080025332e-05, "loss": 0.5398, "step": 14862 }, { "epoch": 0.40809994508511804, "grad_norm": 0.4175046682357788, "learning_rate": 1.8035499008412224e-05, "loss": 0.558, "step": 14863 }, { "epoch": 0.4081274025260846, "grad_norm": 0.39199790358543396, "learning_rate": 1.8035241921810353e-05, "loss": 0.515, "step": 14864 }, { "epoch": 0.4081548599670511, "grad_norm": 0.3819971978664398, "learning_rate": 1.8034984820220183e-05, "loss": 0.5637, "step": 14865 }, { "epoch": 0.4081823174080176, "grad_norm": 0.43272218108177185, "learning_rate": 1.8034727703642208e-05, "loss": 0.597, "step": 14866 }, { "epoch": 0.4082097748489841, "grad_norm": 0.38267287611961365, "learning_rate": 1.8034470572076897e-05, "loss": 0.4929, "step": 14867 }, { "epoch": 0.40823723228995057, "grad_norm": 0.36657601594924927, "learning_rate": 1.8034213425524738e-05, "loss": 0.5495, "step": 14868 }, { "epoch": 0.40826468973091706, "grad_norm": 0.3921767473220825, "learning_rate": 1.8033956263986204e-05, "loss": 0.5001, "step": 14869 }, { "epoch": 0.40829214717188356, "grad_norm": 0.4294326603412628, "learning_rate": 1.8033699087461777e-05, "loss": 0.5217, "step": 14870 }, { "epoch": 0.4083196046128501, "grad_norm": 0.3309621512889862, "learning_rate": 1.803344189595194e-05, "loss": 0.4226, "step": 14871 }, { "epoch": 0.4083470620538166, "grad_norm": 0.3394681513309479, "learning_rate": 1.8033184689457167e-05, "loss": 0.4917, "step": 14872 }, { "epoch": 0.4083745194947831, "grad_norm": 0.3920499086380005, "learning_rate": 1.8032927467977943e-05, "loss": 0.5162, "step": 14873 }, { "epoch": 0.4084019769357496, "grad_norm": 0.3825346827507019, "learning_rate": 1.8032670231514747e-05, "loss": 0.5342, "step": 14874 }, { "epoch": 0.4084294343767161, "grad_norm": 0.3564504384994507, "learning_rate": 1.8032412980068054e-05, "loss": 0.492, "step": 14875 }, { "epoch": 0.4084568918176826, "grad_norm": 0.3759433329105377, "learning_rate": 1.8032155713638347e-05, "loss": 0.5656, "step": 14876 }, { "epoch": 0.40848434925864907, "grad_norm": 0.4075092375278473, "learning_rate": 1.8031898432226107e-05, "loss": 0.5701, "step": 14877 }, { "epoch": 0.4085118066996156, "grad_norm": 0.4592485725879669, "learning_rate": 1.8031641135831814e-05, "loss": 0.5478, "step": 14878 }, { "epoch": 0.4085392641405821, "grad_norm": 0.39568856358528137, "learning_rate": 1.8031383824455943e-05, "loss": 0.6091, "step": 14879 }, { "epoch": 0.4085667215815486, "grad_norm": 0.3633610010147095, "learning_rate": 1.803112649809898e-05, "loss": 0.5463, "step": 14880 }, { "epoch": 0.4085941790225151, "grad_norm": 0.369634211063385, "learning_rate": 1.8030869156761403e-05, "loss": 0.4715, "step": 14881 }, { "epoch": 0.4086216364634816, "grad_norm": 0.4010128676891327, "learning_rate": 1.8030611800443692e-05, "loss": 0.5392, "step": 14882 }, { "epoch": 0.4086490939044481, "grad_norm": 0.42297226190567017, "learning_rate": 1.803035442914633e-05, "loss": 0.5633, "step": 14883 }, { "epoch": 0.4086765513454146, "grad_norm": 0.370386004447937, "learning_rate": 1.8030097042869788e-05, "loss": 0.5202, "step": 14884 }, { "epoch": 0.40870400878638113, "grad_norm": 0.34938517212867737, "learning_rate": 1.802983964161456e-05, "loss": 0.5666, "step": 14885 }, { "epoch": 0.40873146622734763, "grad_norm": 0.355739563703537, "learning_rate": 1.802958222538111e-05, "loss": 0.4501, "step": 14886 }, { "epoch": 0.4087589236683141, "grad_norm": 0.35048630833625793, "learning_rate": 1.8029324794169932e-05, "loss": 0.4975, "step": 14887 }, { "epoch": 0.4087863811092806, "grad_norm": 0.35882246494293213, "learning_rate": 1.8029067347981497e-05, "loss": 0.5137, "step": 14888 }, { "epoch": 0.4088138385502471, "grad_norm": 0.39398396015167236, "learning_rate": 1.8028809886816293e-05, "loss": 0.5594, "step": 14889 }, { "epoch": 0.4088412959912136, "grad_norm": 0.35467544198036194, "learning_rate": 1.802855241067479e-05, "loss": 0.5525, "step": 14890 }, { "epoch": 0.4088687534321801, "grad_norm": 0.3748033046722412, "learning_rate": 1.802829491955748e-05, "loss": 0.5159, "step": 14891 }, { "epoch": 0.40889621087314665, "grad_norm": 0.3947317600250244, "learning_rate": 1.8028037413464834e-05, "loss": 0.5118, "step": 14892 }, { "epoch": 0.40892366831411314, "grad_norm": 0.41923508048057556, "learning_rate": 1.8027779892397338e-05, "loss": 0.6106, "step": 14893 }, { "epoch": 0.40895112575507964, "grad_norm": 0.40595853328704834, "learning_rate": 1.8027522356355472e-05, "loss": 0.5796, "step": 14894 }, { "epoch": 0.40897858319604613, "grad_norm": 0.7132221460342407, "learning_rate": 1.8027264805339713e-05, "loss": 0.4802, "step": 14895 }, { "epoch": 0.4090060406370126, "grad_norm": 0.3711858093738556, "learning_rate": 1.802700723935054e-05, "loss": 0.5467, "step": 14896 }, { "epoch": 0.4090334980779791, "grad_norm": 0.4466070532798767, "learning_rate": 1.802674965838844e-05, "loss": 0.494, "step": 14897 }, { "epoch": 0.4090609555189456, "grad_norm": 0.381904661655426, "learning_rate": 1.802649206245389e-05, "loss": 0.4789, "step": 14898 }, { "epoch": 0.40908841295991216, "grad_norm": 0.4134085476398468, "learning_rate": 1.802623445154737e-05, "loss": 0.5378, "step": 14899 }, { "epoch": 0.40911587040087866, "grad_norm": 0.36785486340522766, "learning_rate": 1.802597682566936e-05, "loss": 0.5536, "step": 14900 }, { "epoch": 0.40914332784184515, "grad_norm": 0.37199467420578003, "learning_rate": 1.802571918482034e-05, "loss": 0.5189, "step": 14901 }, { "epoch": 0.40917078528281164, "grad_norm": 0.38149160146713257, "learning_rate": 1.8025461529000796e-05, "loss": 0.4629, "step": 14902 }, { "epoch": 0.40919824272377814, "grad_norm": 0.3939107358455658, "learning_rate": 1.8025203858211203e-05, "loss": 0.53, "step": 14903 }, { "epoch": 0.40922570016474463, "grad_norm": 0.3472651243209839, "learning_rate": 1.8024946172452046e-05, "loss": 0.531, "step": 14904 }, { "epoch": 0.4092531576057111, "grad_norm": 0.39366745948791504, "learning_rate": 1.8024688471723803e-05, "loss": 0.5713, "step": 14905 }, { "epoch": 0.4092806150466777, "grad_norm": 0.3902493715286255, "learning_rate": 1.802443075602695e-05, "loss": 0.4766, "step": 14906 }, { "epoch": 0.40930807248764417, "grad_norm": 0.3360219895839691, "learning_rate": 1.8024173025361978e-05, "loss": 0.4521, "step": 14907 }, { "epoch": 0.40933552992861066, "grad_norm": 0.34063348174095154, "learning_rate": 1.8023915279729358e-05, "loss": 0.4791, "step": 14908 }, { "epoch": 0.40936298736957716, "grad_norm": 0.37399619817733765, "learning_rate": 1.8023657519129577e-05, "loss": 0.5386, "step": 14909 }, { "epoch": 0.40939044481054365, "grad_norm": 0.37100523710250854, "learning_rate": 1.8023399743563114e-05, "loss": 0.5286, "step": 14910 }, { "epoch": 0.40941790225151015, "grad_norm": 0.31943148374557495, "learning_rate": 1.802314195303045e-05, "loss": 0.4888, "step": 14911 }, { "epoch": 0.40944535969247664, "grad_norm": 0.36010581254959106, "learning_rate": 1.8022884147532065e-05, "loss": 0.5405, "step": 14912 }, { "epoch": 0.4094728171334432, "grad_norm": 0.39181894063949585, "learning_rate": 1.8022626327068437e-05, "loss": 0.4569, "step": 14913 }, { "epoch": 0.4095002745744097, "grad_norm": 0.3485989272594452, "learning_rate": 1.8022368491640055e-05, "loss": 0.4924, "step": 14914 }, { "epoch": 0.4095277320153762, "grad_norm": 0.36405041813850403, "learning_rate": 1.8022110641247396e-05, "loss": 0.4914, "step": 14915 }, { "epoch": 0.40955518945634267, "grad_norm": 0.4030599892139435, "learning_rate": 1.8021852775890938e-05, "loss": 0.5319, "step": 14916 }, { "epoch": 0.40958264689730917, "grad_norm": 0.33321407437324524, "learning_rate": 1.8021594895571165e-05, "loss": 0.491, "step": 14917 }, { "epoch": 0.40961010433827566, "grad_norm": 0.37069597840309143, "learning_rate": 1.8021337000288555e-05, "loss": 0.5186, "step": 14918 }, { "epoch": 0.40963756177924215, "grad_norm": 1.017444372177124, "learning_rate": 1.8021079090043595e-05, "loss": 0.557, "step": 14919 }, { "epoch": 0.4096650192202087, "grad_norm": 0.3768616318702698, "learning_rate": 1.802082116483676e-05, "loss": 0.4982, "step": 14920 }, { "epoch": 0.4096924766611752, "grad_norm": 0.38224297761917114, "learning_rate": 1.8020563224668534e-05, "loss": 0.5001, "step": 14921 }, { "epoch": 0.4097199341021417, "grad_norm": 0.391998291015625, "learning_rate": 1.8020305269539397e-05, "loss": 0.5254, "step": 14922 }, { "epoch": 0.4097473915431082, "grad_norm": 0.3757077753543854, "learning_rate": 1.8020047299449835e-05, "loss": 0.5295, "step": 14923 }, { "epoch": 0.4097748489840747, "grad_norm": 0.4474014937877655, "learning_rate": 1.801978931440032e-05, "loss": 0.5046, "step": 14924 }, { "epoch": 0.4098023064250412, "grad_norm": 0.39377737045288086, "learning_rate": 1.801953131439134e-05, "loss": 0.5348, "step": 14925 }, { "epoch": 0.40982976386600767, "grad_norm": 0.3966543972492218, "learning_rate": 1.8019273299423377e-05, "loss": 0.5605, "step": 14926 }, { "epoch": 0.4098572213069742, "grad_norm": 0.36608457565307617, "learning_rate": 1.8019015269496908e-05, "loss": 0.4768, "step": 14927 }, { "epoch": 0.4098846787479407, "grad_norm": 0.3948429226875305, "learning_rate": 1.8018757224612416e-05, "loss": 0.4694, "step": 14928 }, { "epoch": 0.4099121361889072, "grad_norm": 0.36695829033851624, "learning_rate": 1.8018499164770383e-05, "loss": 0.5066, "step": 14929 }, { "epoch": 0.4099395936298737, "grad_norm": 0.3620828092098236, "learning_rate": 1.8018241089971286e-05, "loss": 0.4607, "step": 14930 }, { "epoch": 0.4099670510708402, "grad_norm": 0.35110461711883545, "learning_rate": 1.8017983000215617e-05, "loss": 0.4851, "step": 14931 }, { "epoch": 0.4099945085118067, "grad_norm": 0.3691723942756653, "learning_rate": 1.8017724895503846e-05, "loss": 0.3953, "step": 14932 }, { "epoch": 0.4100219659527732, "grad_norm": 0.31375938653945923, "learning_rate": 1.801746677583646e-05, "loss": 0.431, "step": 14933 }, { "epoch": 0.4100494233937397, "grad_norm": 0.35002169013023376, "learning_rate": 1.801720864121394e-05, "loss": 0.4849, "step": 14934 }, { "epoch": 0.4100768808347062, "grad_norm": 0.4523935914039612, "learning_rate": 1.8016950491636764e-05, "loss": 0.5401, "step": 14935 }, { "epoch": 0.4101043382756727, "grad_norm": 0.3678473234176636, "learning_rate": 1.801669232710542e-05, "loss": 0.6036, "step": 14936 }, { "epoch": 0.4101317957166392, "grad_norm": 0.35240787267684937, "learning_rate": 1.8016434147620386e-05, "loss": 0.5077, "step": 14937 }, { "epoch": 0.4101592531576057, "grad_norm": 0.332116037607193, "learning_rate": 1.8016175953182144e-05, "loss": 0.4532, "step": 14938 }, { "epoch": 0.4101867105985722, "grad_norm": 0.3356744945049286, "learning_rate": 1.8015917743791176e-05, "loss": 0.4575, "step": 14939 }, { "epoch": 0.4102141680395387, "grad_norm": 0.342543363571167, "learning_rate": 1.801565951944796e-05, "loss": 0.5761, "step": 14940 }, { "epoch": 0.4102416254805052, "grad_norm": 0.40437307953834534, "learning_rate": 1.8015401280152986e-05, "loss": 0.4899, "step": 14941 }, { "epoch": 0.41026908292147174, "grad_norm": 0.40330877900123596, "learning_rate": 1.8015143025906724e-05, "loss": 0.4823, "step": 14942 }, { "epoch": 0.41029654036243823, "grad_norm": 0.38051944971084595, "learning_rate": 1.801488475670967e-05, "loss": 0.5441, "step": 14943 }, { "epoch": 0.4103239978034047, "grad_norm": 0.4266375005245209, "learning_rate": 1.801462647256229e-05, "loss": 0.5115, "step": 14944 }, { "epoch": 0.4103514552443712, "grad_norm": 0.35357171297073364, "learning_rate": 1.8014368173465074e-05, "loss": 0.4545, "step": 14945 }, { "epoch": 0.4103789126853377, "grad_norm": 0.33500656485557556, "learning_rate": 1.8014109859418506e-05, "loss": 0.4705, "step": 14946 }, { "epoch": 0.4104063701263042, "grad_norm": 0.353322297334671, "learning_rate": 1.8013851530423066e-05, "loss": 0.5872, "step": 14947 }, { "epoch": 0.4104338275672707, "grad_norm": 0.35315221548080444, "learning_rate": 1.8013593186479237e-05, "loss": 0.5184, "step": 14948 }, { "epoch": 0.41046128500823725, "grad_norm": 0.3813753128051758, "learning_rate": 1.8013334827587495e-05, "loss": 0.5124, "step": 14949 }, { "epoch": 0.41048874244920375, "grad_norm": 0.3620849847793579, "learning_rate": 1.801307645374833e-05, "loss": 0.4822, "step": 14950 }, { "epoch": 0.41051619989017024, "grad_norm": 0.4730587899684906, "learning_rate": 1.8012818064962217e-05, "loss": 0.4484, "step": 14951 }, { "epoch": 0.41054365733113674, "grad_norm": 0.3624492287635803, "learning_rate": 1.8012559661229642e-05, "loss": 0.4477, "step": 14952 }, { "epoch": 0.41057111477210323, "grad_norm": 0.3689165413379669, "learning_rate": 1.8012301242551086e-05, "loss": 0.4524, "step": 14953 }, { "epoch": 0.4105985722130697, "grad_norm": 0.38393163681030273, "learning_rate": 1.8012042808927027e-05, "loss": 0.4552, "step": 14954 }, { "epoch": 0.4106260296540362, "grad_norm": 0.3271533250808716, "learning_rate": 1.8011784360357955e-05, "loss": 0.475, "step": 14955 }, { "epoch": 0.41065348709500277, "grad_norm": 0.3902164399623871, "learning_rate": 1.801152589684435e-05, "loss": 0.4658, "step": 14956 }, { "epoch": 0.41068094453596926, "grad_norm": 0.3856469392776489, "learning_rate": 1.8011267418386687e-05, "loss": 0.5716, "step": 14957 }, { "epoch": 0.41070840197693576, "grad_norm": 0.38177722692489624, "learning_rate": 1.8011008924985457e-05, "loss": 0.4535, "step": 14958 }, { "epoch": 0.41073585941790225, "grad_norm": 0.3581541180610657, "learning_rate": 1.8010750416641138e-05, "loss": 0.5152, "step": 14959 }, { "epoch": 0.41076331685886874, "grad_norm": 0.379116952419281, "learning_rate": 1.8010491893354213e-05, "loss": 0.4961, "step": 14960 }, { "epoch": 0.41079077429983524, "grad_norm": 0.37521880865097046, "learning_rate": 1.8010233355125163e-05, "loss": 0.4764, "step": 14961 }, { "epoch": 0.41081823174080173, "grad_norm": 0.3584449887275696, "learning_rate": 1.800997480195447e-05, "loss": 0.5392, "step": 14962 }, { "epoch": 0.4108456891817683, "grad_norm": 0.34422430396080017, "learning_rate": 1.800971623384262e-05, "loss": 0.4945, "step": 14963 }, { "epoch": 0.4108731466227348, "grad_norm": 0.40402641892433167, "learning_rate": 1.800945765079009e-05, "loss": 0.5783, "step": 14964 }, { "epoch": 0.41090060406370127, "grad_norm": 0.4081943929195404, "learning_rate": 1.800919905279737e-05, "loss": 0.5113, "step": 14965 }, { "epoch": 0.41092806150466776, "grad_norm": 0.49681979417800903, "learning_rate": 1.8008940439864934e-05, "loss": 0.5715, "step": 14966 }, { "epoch": 0.41095551894563426, "grad_norm": 0.9535386562347412, "learning_rate": 1.800868181199327e-05, "loss": 0.6052, "step": 14967 }, { "epoch": 0.41098297638660075, "grad_norm": 0.3501814603805542, "learning_rate": 1.8008423169182856e-05, "loss": 0.4313, "step": 14968 }, { "epoch": 0.41101043382756725, "grad_norm": 0.38390710949897766, "learning_rate": 1.8008164511434177e-05, "loss": 0.6066, "step": 14969 }, { "epoch": 0.4110378912685338, "grad_norm": 0.3468882441520691, "learning_rate": 1.8007905838747716e-05, "loss": 0.5032, "step": 14970 }, { "epoch": 0.4110653487095003, "grad_norm": 0.34227868914604187, "learning_rate": 1.8007647151123955e-05, "loss": 0.5229, "step": 14971 }, { "epoch": 0.4110928061504668, "grad_norm": 0.344760000705719, "learning_rate": 1.8007388448563374e-05, "loss": 0.4993, "step": 14972 }, { "epoch": 0.4111202635914333, "grad_norm": 0.3503035306930542, "learning_rate": 1.8007129731066464e-05, "loss": 0.4702, "step": 14973 }, { "epoch": 0.41114772103239977, "grad_norm": 0.37122756242752075, "learning_rate": 1.8006870998633698e-05, "loss": 0.4972, "step": 14974 }, { "epoch": 0.41117517847336627, "grad_norm": 0.41246047616004944, "learning_rate": 1.800661225126556e-05, "loss": 0.5563, "step": 14975 }, { "epoch": 0.41120263591433276, "grad_norm": 0.35467013716697693, "learning_rate": 1.8006353488962534e-05, "loss": 0.4985, "step": 14976 }, { "epoch": 0.4112300933552993, "grad_norm": 0.3738156259059906, "learning_rate": 1.8006094711725106e-05, "loss": 0.5118, "step": 14977 }, { "epoch": 0.4112575507962658, "grad_norm": 0.39219215512275696, "learning_rate": 1.8005835919553755e-05, "loss": 0.5322, "step": 14978 }, { "epoch": 0.4112850082372323, "grad_norm": 0.36336153745651245, "learning_rate": 1.800557711244897e-05, "loss": 0.5171, "step": 14979 }, { "epoch": 0.4113124656781988, "grad_norm": 0.3850153088569641, "learning_rate": 1.800531829041122e-05, "loss": 0.518, "step": 14980 }, { "epoch": 0.4113399231191653, "grad_norm": 0.36123451590538025, "learning_rate": 1.8005059453441002e-05, "loss": 0.6024, "step": 14981 }, { "epoch": 0.4113673805601318, "grad_norm": 0.4330444931983948, "learning_rate": 1.800480060153879e-05, "loss": 0.5002, "step": 14982 }, { "epoch": 0.4113948380010983, "grad_norm": 2.0935049057006836, "learning_rate": 1.800454173470507e-05, "loss": 0.5635, "step": 14983 }, { "epoch": 0.4114222954420648, "grad_norm": 0.3442435562610626, "learning_rate": 1.8004282852940332e-05, "loss": 0.4739, "step": 14984 }, { "epoch": 0.4114497528830313, "grad_norm": 0.359579473733902, "learning_rate": 1.8004023956245044e-05, "loss": 0.4641, "step": 14985 }, { "epoch": 0.4114772103239978, "grad_norm": 0.44316166639328003, "learning_rate": 1.80037650446197e-05, "loss": 0.4923, "step": 14986 }, { "epoch": 0.4115046677649643, "grad_norm": 0.34696292877197266, "learning_rate": 1.800350611806478e-05, "loss": 0.5147, "step": 14987 }, { "epoch": 0.4115321252059308, "grad_norm": 0.37955164909362793, "learning_rate": 1.8003247176580765e-05, "loss": 0.4411, "step": 14988 }, { "epoch": 0.4115595826468973, "grad_norm": 0.339478075504303, "learning_rate": 1.800298822016814e-05, "loss": 0.5398, "step": 14989 }, { "epoch": 0.4115870400878638, "grad_norm": 0.3913586735725403, "learning_rate": 1.8002729248827386e-05, "loss": 0.554, "step": 14990 }, { "epoch": 0.41161449752883034, "grad_norm": 0.411346971988678, "learning_rate": 1.800247026255899e-05, "loss": 0.5509, "step": 14991 }, { "epoch": 0.41164195496979683, "grad_norm": 0.38574495911598206, "learning_rate": 1.8002211261363432e-05, "loss": 0.5025, "step": 14992 }, { "epoch": 0.4116694124107633, "grad_norm": 0.39728978276252747, "learning_rate": 1.80019522452412e-05, "loss": 0.4718, "step": 14993 }, { "epoch": 0.4116968698517298, "grad_norm": 0.34998294711112976, "learning_rate": 1.800169321419277e-05, "loss": 0.4728, "step": 14994 }, { "epoch": 0.4117243272926963, "grad_norm": 0.377285897731781, "learning_rate": 1.800143416821863e-05, "loss": 0.5281, "step": 14995 }, { "epoch": 0.4117517847336628, "grad_norm": 0.3662301003932953, "learning_rate": 1.8001175107319255e-05, "loss": 0.5106, "step": 14996 }, { "epoch": 0.4117792421746293, "grad_norm": 0.40407758951187134, "learning_rate": 1.800091603149514e-05, "loss": 0.5803, "step": 14997 }, { "epoch": 0.41180669961559585, "grad_norm": 0.4000510573387146, "learning_rate": 1.8000656940746763e-05, "loss": 0.6477, "step": 14998 }, { "epoch": 0.41183415705656234, "grad_norm": 0.3954298198223114, "learning_rate": 1.8000397835074607e-05, "loss": 0.5729, "step": 14999 }, { "epoch": 0.41186161449752884, "grad_norm": 0.3695518970489502, "learning_rate": 1.8000138714479157e-05, "loss": 0.5006, "step": 15000 }, { "epoch": 0.41188907193849533, "grad_norm": 0.4085278809070587, "learning_rate": 1.799987957896089e-05, "loss": 0.5519, "step": 15001 }, { "epoch": 0.4119165293794618, "grad_norm": 0.35721802711486816, "learning_rate": 1.7999620428520298e-05, "loss": 0.5789, "step": 15002 }, { "epoch": 0.4119439868204283, "grad_norm": 0.3643236756324768, "learning_rate": 1.7999361263157862e-05, "loss": 0.4836, "step": 15003 }, { "epoch": 0.4119714442613948, "grad_norm": 0.36840301752090454, "learning_rate": 1.7999102082874063e-05, "loss": 0.5299, "step": 15004 }, { "epoch": 0.41199890170236136, "grad_norm": 0.36411091685295105, "learning_rate": 1.7998842887669383e-05, "loss": 0.4696, "step": 15005 }, { "epoch": 0.41202635914332786, "grad_norm": 0.4620400667190552, "learning_rate": 1.7998583677544312e-05, "loss": 0.5299, "step": 15006 }, { "epoch": 0.41205381658429435, "grad_norm": 0.3748687505722046, "learning_rate": 1.7998324452499325e-05, "loss": 0.572, "step": 15007 }, { "epoch": 0.41208127402526085, "grad_norm": 0.36257603764533997, "learning_rate": 1.7998065212534916e-05, "loss": 0.5519, "step": 15008 }, { "epoch": 0.41210873146622734, "grad_norm": 0.34694191813468933, "learning_rate": 1.7997805957651557e-05, "loss": 0.4771, "step": 15009 }, { "epoch": 0.41213618890719383, "grad_norm": 0.37165603041648865, "learning_rate": 1.799754668784974e-05, "loss": 0.462, "step": 15010 }, { "epoch": 0.41216364634816033, "grad_norm": 0.40199095010757446, "learning_rate": 1.7997287403129945e-05, "loss": 0.4449, "step": 15011 }, { "epoch": 0.4121911037891269, "grad_norm": 0.3486410975456238, "learning_rate": 1.799702810349266e-05, "loss": 0.4606, "step": 15012 }, { "epoch": 0.41221856123009337, "grad_norm": 0.4198271930217743, "learning_rate": 1.7996768788938362e-05, "loss": 0.5556, "step": 15013 }, { "epoch": 0.41224601867105987, "grad_norm": 0.7209258675575256, "learning_rate": 1.7996509459467538e-05, "loss": 0.5187, "step": 15014 }, { "epoch": 0.41227347611202636, "grad_norm": 0.38599756360054016, "learning_rate": 1.799625011508067e-05, "loss": 0.4754, "step": 15015 }, { "epoch": 0.41230093355299285, "grad_norm": 0.38290077447891235, "learning_rate": 1.7995990755778247e-05, "loss": 0.5558, "step": 15016 }, { "epoch": 0.41232839099395935, "grad_norm": 0.42361727356910706, "learning_rate": 1.799573138156075e-05, "loss": 0.5006, "step": 15017 }, { "epoch": 0.41235584843492584, "grad_norm": 0.36995482444763184, "learning_rate": 1.7995471992428658e-05, "loss": 0.5467, "step": 15018 }, { "epoch": 0.4123833058758924, "grad_norm": 0.3529278337955475, "learning_rate": 1.799521258838246e-05, "loss": 0.5272, "step": 15019 }, { "epoch": 0.4124107633168589, "grad_norm": 0.3259970247745514, "learning_rate": 1.799495316942264e-05, "loss": 0.4275, "step": 15020 }, { "epoch": 0.4124382207578254, "grad_norm": 0.3615996837615967, "learning_rate": 1.799469373554968e-05, "loss": 0.4703, "step": 15021 }, { "epoch": 0.4124656781987919, "grad_norm": 0.33804556727409363, "learning_rate": 1.7994434286764063e-05, "loss": 0.5011, "step": 15022 }, { "epoch": 0.41249313563975837, "grad_norm": 0.3930164873600006, "learning_rate": 1.7994174823066276e-05, "loss": 0.5447, "step": 15023 }, { "epoch": 0.41252059308072486, "grad_norm": 0.3955042064189911, "learning_rate": 1.7993915344456802e-05, "loss": 0.4748, "step": 15024 }, { "epoch": 0.41254805052169136, "grad_norm": 0.3855774998664856, "learning_rate": 1.7993655850936126e-05, "loss": 0.484, "step": 15025 }, { "epoch": 0.4125755079626579, "grad_norm": 0.41312840580940247, "learning_rate": 1.7993396342504727e-05, "loss": 0.5481, "step": 15026 }, { "epoch": 0.4126029654036244, "grad_norm": 0.3739473223686218, "learning_rate": 1.7993136819163094e-05, "loss": 0.5009, "step": 15027 }, { "epoch": 0.4126304228445909, "grad_norm": 0.446560800075531, "learning_rate": 1.799287728091171e-05, "loss": 0.5383, "step": 15028 }, { "epoch": 0.4126578802855574, "grad_norm": 0.35191380977630615, "learning_rate": 1.7992617727751058e-05, "loss": 0.5401, "step": 15029 }, { "epoch": 0.4126853377265239, "grad_norm": 0.34725046157836914, "learning_rate": 1.7992358159681624e-05, "loss": 0.5707, "step": 15030 }, { "epoch": 0.4127127951674904, "grad_norm": 0.3396152853965759, "learning_rate": 1.7992098576703895e-05, "loss": 0.4215, "step": 15031 }, { "epoch": 0.41274025260845687, "grad_norm": 0.40499258041381836, "learning_rate": 1.7991838978818345e-05, "loss": 0.5106, "step": 15032 }, { "epoch": 0.4127677100494234, "grad_norm": 0.422358900308609, "learning_rate": 1.7991579366025466e-05, "loss": 0.6443, "step": 15033 }, { "epoch": 0.4127951674903899, "grad_norm": 0.36296772956848145, "learning_rate": 1.7991319738325742e-05, "loss": 0.4889, "step": 15034 }, { "epoch": 0.4128226249313564, "grad_norm": 0.3764576017856598, "learning_rate": 1.7991060095719658e-05, "loss": 0.5543, "step": 15035 }, { "epoch": 0.4128500823723229, "grad_norm": 0.3656993508338928, "learning_rate": 1.7990800438207695e-05, "loss": 0.5061, "step": 15036 }, { "epoch": 0.4128775398132894, "grad_norm": 0.35426440834999084, "learning_rate": 1.7990540765790337e-05, "loss": 0.4909, "step": 15037 }, { "epoch": 0.4129049972542559, "grad_norm": 0.36149582266807556, "learning_rate": 1.799028107846807e-05, "loss": 0.4946, "step": 15038 }, { "epoch": 0.4129324546952224, "grad_norm": 0.3987847566604614, "learning_rate": 1.799002137624138e-05, "loss": 0.508, "step": 15039 }, { "epoch": 0.41295991213618893, "grad_norm": 0.3373507559299469, "learning_rate": 1.798976165911075e-05, "loss": 0.535, "step": 15040 }, { "epoch": 0.4129873695771554, "grad_norm": 0.3186223804950714, "learning_rate": 1.7989501927076663e-05, "loss": 0.4513, "step": 15041 }, { "epoch": 0.4130148270181219, "grad_norm": 0.38160842657089233, "learning_rate": 1.7989242180139607e-05, "loss": 0.5278, "step": 15042 }, { "epoch": 0.4130422844590884, "grad_norm": 0.3726808726787567, "learning_rate": 1.7988982418300066e-05, "loss": 0.435, "step": 15043 }, { "epoch": 0.4130697419000549, "grad_norm": 0.40342631936073303, "learning_rate": 1.7988722641558522e-05, "loss": 0.5356, "step": 15044 }, { "epoch": 0.4130971993410214, "grad_norm": 0.36245793104171753, "learning_rate": 1.7988462849915456e-05, "loss": 0.5342, "step": 15045 }, { "epoch": 0.4131246567819879, "grad_norm": 0.3491043746471405, "learning_rate": 1.7988203043371363e-05, "loss": 0.5066, "step": 15046 }, { "epoch": 0.41315211422295445, "grad_norm": 0.37365448474884033, "learning_rate": 1.798794322192672e-05, "loss": 0.5032, "step": 15047 }, { "epoch": 0.41317957166392094, "grad_norm": 0.33374086022377014, "learning_rate": 1.7987683385582012e-05, "loss": 0.4711, "step": 15048 }, { "epoch": 0.41320702910488744, "grad_norm": 0.37682196497917175, "learning_rate": 1.7987423534337727e-05, "loss": 0.5006, "step": 15049 }, { "epoch": 0.41323448654585393, "grad_norm": 0.42380237579345703, "learning_rate": 1.7987163668194346e-05, "loss": 0.4874, "step": 15050 }, { "epoch": 0.4132619439868204, "grad_norm": 0.38004785776138306, "learning_rate": 1.798690378715236e-05, "loss": 0.4429, "step": 15051 }, { "epoch": 0.4132894014277869, "grad_norm": 0.3915775716304779, "learning_rate": 1.7986643891212244e-05, "loss": 0.6003, "step": 15052 }, { "epoch": 0.4133168588687534, "grad_norm": 0.4075359106063843, "learning_rate": 1.7986383980374488e-05, "loss": 0.4597, "step": 15053 }, { "epoch": 0.41334431630971996, "grad_norm": 0.32012638449668884, "learning_rate": 1.798612405463958e-05, "loss": 0.4796, "step": 15054 }, { "epoch": 0.41337177375068646, "grad_norm": 0.4112090766429901, "learning_rate": 1.7985864114008e-05, "loss": 0.593, "step": 15055 }, { "epoch": 0.41339923119165295, "grad_norm": 0.3807457387447357, "learning_rate": 1.7985604158480234e-05, "loss": 0.5002, "step": 15056 }, { "epoch": 0.41342668863261944, "grad_norm": 0.36276352405548096, "learning_rate": 1.798534418805677e-05, "loss": 0.6136, "step": 15057 }, { "epoch": 0.41345414607358594, "grad_norm": 0.30379223823547363, "learning_rate": 1.798508420273809e-05, "loss": 0.3847, "step": 15058 }, { "epoch": 0.41348160351455243, "grad_norm": 0.37754058837890625, "learning_rate": 1.7984824202524676e-05, "loss": 0.5658, "step": 15059 }, { "epoch": 0.4135090609555189, "grad_norm": 0.4127773940563202, "learning_rate": 1.7984564187417023e-05, "loss": 0.5506, "step": 15060 }, { "epoch": 0.4135365183964855, "grad_norm": 0.4121566712856293, "learning_rate": 1.7984304157415602e-05, "loss": 0.5488, "step": 15061 }, { "epoch": 0.41356397583745197, "grad_norm": 0.3987113833427429, "learning_rate": 1.798404411252091e-05, "loss": 0.5066, "step": 15062 }, { "epoch": 0.41359143327841846, "grad_norm": 0.38142672181129456, "learning_rate": 1.7983784052733427e-05, "loss": 0.5759, "step": 15063 }, { "epoch": 0.41361889071938496, "grad_norm": 0.42130959033966064, "learning_rate": 1.7983523978053638e-05, "loss": 0.5886, "step": 15064 }, { "epoch": 0.41364634816035145, "grad_norm": 0.4252500534057617, "learning_rate": 1.798326388848203e-05, "loss": 0.3582, "step": 15065 }, { "epoch": 0.41367380560131795, "grad_norm": 0.3746863007545471, "learning_rate": 1.7983003784019085e-05, "loss": 0.5456, "step": 15066 }, { "epoch": 0.41370126304228444, "grad_norm": 0.3952763080596924, "learning_rate": 1.798274366466529e-05, "loss": 0.5155, "step": 15067 }, { "epoch": 0.41372872048325093, "grad_norm": 0.4074193239212036, "learning_rate": 1.798248353042113e-05, "loss": 0.5189, "step": 15068 }, { "epoch": 0.4137561779242175, "grad_norm": 0.4592621326446533, "learning_rate": 1.7982223381287092e-05, "loss": 0.4905, "step": 15069 }, { "epoch": 0.413783635365184, "grad_norm": 0.3510452210903168, "learning_rate": 1.798196321726366e-05, "loss": 0.469, "step": 15070 }, { "epoch": 0.41381109280615047, "grad_norm": 0.4013993442058563, "learning_rate": 1.7981703038351317e-05, "loss": 0.5862, "step": 15071 }, { "epoch": 0.41383855024711697, "grad_norm": 0.3407565951347351, "learning_rate": 1.7981442844550553e-05, "loss": 0.516, "step": 15072 }, { "epoch": 0.41386600768808346, "grad_norm": 0.3507640063762665, "learning_rate": 1.798118263586185e-05, "loss": 0.4762, "step": 15073 }, { "epoch": 0.41389346512904995, "grad_norm": 0.39844363927841187, "learning_rate": 1.7980922412285694e-05, "loss": 0.5272, "step": 15074 }, { "epoch": 0.41392092257001645, "grad_norm": 0.3646073341369629, "learning_rate": 1.7980662173822567e-05, "loss": 0.4746, "step": 15075 }, { "epoch": 0.413948380010983, "grad_norm": 0.6399762630462646, "learning_rate": 1.7980401920472965e-05, "loss": 0.5321, "step": 15076 }, { "epoch": 0.4139758374519495, "grad_norm": 0.37406185269355774, "learning_rate": 1.7980141652237358e-05, "loss": 0.5278, "step": 15077 }, { "epoch": 0.414003294892916, "grad_norm": 0.33969220519065857, "learning_rate": 1.7979881369116245e-05, "loss": 0.4594, "step": 15078 }, { "epoch": 0.4140307523338825, "grad_norm": 0.4435085654258728, "learning_rate": 1.7979621071110104e-05, "loss": 0.7447, "step": 15079 }, { "epoch": 0.414058209774849, "grad_norm": 0.3593395948410034, "learning_rate": 1.7979360758219425e-05, "loss": 0.4995, "step": 15080 }, { "epoch": 0.41408566721581547, "grad_norm": 0.39058804512023926, "learning_rate": 1.797910043044469e-05, "loss": 0.6328, "step": 15081 }, { "epoch": 0.41411312465678196, "grad_norm": 0.4026370346546173, "learning_rate": 1.797884008778639e-05, "loss": 0.5121, "step": 15082 }, { "epoch": 0.4141405820977485, "grad_norm": 0.37211495637893677, "learning_rate": 1.7978579730245004e-05, "loss": 0.462, "step": 15083 }, { "epoch": 0.414168039538715, "grad_norm": 0.3663313388824463, "learning_rate": 1.7978319357821017e-05, "loss": 0.462, "step": 15084 }, { "epoch": 0.4141954969796815, "grad_norm": 0.3696273863315582, "learning_rate": 1.7978058970514923e-05, "loss": 0.5022, "step": 15085 }, { "epoch": 0.414222954420648, "grad_norm": 0.37834489345550537, "learning_rate": 1.79777985683272e-05, "loss": 0.4825, "step": 15086 }, { "epoch": 0.4142504118616145, "grad_norm": 0.36541667580604553, "learning_rate": 1.797753815125834e-05, "loss": 0.5099, "step": 15087 }, { "epoch": 0.414277869302581, "grad_norm": 0.37876564264297485, "learning_rate": 1.7977277719308817e-05, "loss": 0.4972, "step": 15088 }, { "epoch": 0.4143053267435475, "grad_norm": 0.3967914879322052, "learning_rate": 1.7977017272479133e-05, "loss": 0.4504, "step": 15089 }, { "epoch": 0.414332784184514, "grad_norm": 0.3591483533382416, "learning_rate": 1.7976756810769763e-05, "loss": 0.4715, "step": 15090 }, { "epoch": 0.4143602416254805, "grad_norm": 0.37334051728248596, "learning_rate": 1.7976496334181195e-05, "loss": 0.6164, "step": 15091 }, { "epoch": 0.414387699066447, "grad_norm": 0.42060375213623047, "learning_rate": 1.7976235842713918e-05, "loss": 0.5063, "step": 15092 }, { "epoch": 0.4144151565074135, "grad_norm": 0.3622444272041321, "learning_rate": 1.7975975336368412e-05, "loss": 0.4853, "step": 15093 }, { "epoch": 0.41444261394838, "grad_norm": 0.4200735092163086, "learning_rate": 1.797571481514517e-05, "loss": 0.5449, "step": 15094 }, { "epoch": 0.4144700713893465, "grad_norm": 0.3511117398738861, "learning_rate": 1.797545427904467e-05, "loss": 0.5939, "step": 15095 }, { "epoch": 0.414497528830313, "grad_norm": 0.4344049096107483, "learning_rate": 1.7975193728067405e-05, "loss": 0.5597, "step": 15096 }, { "epoch": 0.41452498627127954, "grad_norm": 0.32005149126052856, "learning_rate": 1.797493316221386e-05, "loss": 0.5572, "step": 15097 }, { "epoch": 0.41455244371224603, "grad_norm": 0.3578105568885803, "learning_rate": 1.7974672581484516e-05, "loss": 0.4952, "step": 15098 }, { "epoch": 0.4145799011532125, "grad_norm": 0.3380603790283203, "learning_rate": 1.7974411985879864e-05, "loss": 0.5176, "step": 15099 }, { "epoch": 0.414607358594179, "grad_norm": 0.32448604702949524, "learning_rate": 1.797415137540039e-05, "loss": 0.4692, "step": 15100 }, { "epoch": 0.4146348160351455, "grad_norm": 0.37053239345550537, "learning_rate": 1.7973890750046574e-05, "loss": 0.5067, "step": 15101 }, { "epoch": 0.414662273476112, "grad_norm": 0.41565102338790894, "learning_rate": 1.7973630109818908e-05, "loss": 0.5645, "step": 15102 }, { "epoch": 0.4146897309170785, "grad_norm": 0.40012621879577637, "learning_rate": 1.7973369454717883e-05, "loss": 0.5731, "step": 15103 }, { "epoch": 0.41471718835804505, "grad_norm": 0.33103978633880615, "learning_rate": 1.7973108784743975e-05, "loss": 0.4633, "step": 15104 }, { "epoch": 0.41474464579901155, "grad_norm": 0.3613058030605316, "learning_rate": 1.7972848099897673e-05, "loss": 0.5286, "step": 15105 }, { "epoch": 0.41477210323997804, "grad_norm": 0.35736632347106934, "learning_rate": 1.7972587400179465e-05, "loss": 0.4935, "step": 15106 }, { "epoch": 0.41479956068094453, "grad_norm": 0.38830623030662537, "learning_rate": 1.7972326685589838e-05, "loss": 0.4799, "step": 15107 }, { "epoch": 0.41482701812191103, "grad_norm": 0.4238798916339874, "learning_rate": 1.7972065956129278e-05, "loss": 0.4638, "step": 15108 }, { "epoch": 0.4148544755628775, "grad_norm": 0.4025736153125763, "learning_rate": 1.797180521179827e-05, "loss": 0.468, "step": 15109 }, { "epoch": 0.414881933003844, "grad_norm": 0.3550979793071747, "learning_rate": 1.79715444525973e-05, "loss": 0.5031, "step": 15110 }, { "epoch": 0.41490939044481057, "grad_norm": 0.3712541460990906, "learning_rate": 1.7971283678526858e-05, "loss": 0.4998, "step": 15111 }, { "epoch": 0.41493684788577706, "grad_norm": 0.40286174416542053, "learning_rate": 1.7971022889587423e-05, "loss": 0.5007, "step": 15112 }, { "epoch": 0.41496430532674355, "grad_norm": 0.332398921251297, "learning_rate": 1.797076208577949e-05, "loss": 0.4015, "step": 15113 }, { "epoch": 0.41499176276771005, "grad_norm": 0.3508703410625458, "learning_rate": 1.797050126710354e-05, "loss": 0.4766, "step": 15114 }, { "epoch": 0.41501922020867654, "grad_norm": 0.38119110465049744, "learning_rate": 1.797024043356006e-05, "loss": 0.5674, "step": 15115 }, { "epoch": 0.41504667764964304, "grad_norm": 0.3987010717391968, "learning_rate": 1.7969979585149542e-05, "loss": 0.4906, "step": 15116 }, { "epoch": 0.41507413509060953, "grad_norm": 0.3541288673877716, "learning_rate": 1.7969718721872467e-05, "loss": 0.4735, "step": 15117 }, { "epoch": 0.4151015925315761, "grad_norm": 0.39629483222961426, "learning_rate": 1.7969457843729323e-05, "loss": 0.5043, "step": 15118 }, { "epoch": 0.4151290499725426, "grad_norm": 0.39659783244132996, "learning_rate": 1.7969196950720595e-05, "loss": 0.4362, "step": 15119 }, { "epoch": 0.41515650741350907, "grad_norm": 0.3451426029205322, "learning_rate": 1.796893604284677e-05, "loss": 0.4712, "step": 15120 }, { "epoch": 0.41518396485447556, "grad_norm": 0.3320528268814087, "learning_rate": 1.7968675120108338e-05, "loss": 0.4179, "step": 15121 }, { "epoch": 0.41521142229544206, "grad_norm": 0.3702262043952942, "learning_rate": 1.7968414182505785e-05, "loss": 0.5016, "step": 15122 }, { "epoch": 0.41523887973640855, "grad_norm": 0.38519778847694397, "learning_rate": 1.7968153230039593e-05, "loss": 0.5651, "step": 15123 }, { "epoch": 0.41526633717737504, "grad_norm": 0.3897005021572113, "learning_rate": 1.7967892262710256e-05, "loss": 0.4803, "step": 15124 }, { "epoch": 0.4152937946183416, "grad_norm": 0.36023688316345215, "learning_rate": 1.7967631280518253e-05, "loss": 0.6076, "step": 15125 }, { "epoch": 0.4153212520593081, "grad_norm": 0.35873493552207947, "learning_rate": 1.7967370283464077e-05, "loss": 0.4671, "step": 15126 }, { "epoch": 0.4153487095002746, "grad_norm": 0.39464670419692993, "learning_rate": 1.796710927154821e-05, "loss": 0.5261, "step": 15127 }, { "epoch": 0.4153761669412411, "grad_norm": 0.37927767634391785, "learning_rate": 1.7966848244771146e-05, "loss": 0.4719, "step": 15128 }, { "epoch": 0.41540362438220757, "grad_norm": 0.40275925397872925, "learning_rate": 1.7966587203133364e-05, "loss": 0.5557, "step": 15129 }, { "epoch": 0.41543108182317406, "grad_norm": 0.4543324112892151, "learning_rate": 1.7966326146635354e-05, "loss": 0.5647, "step": 15130 }, { "epoch": 0.41545853926414056, "grad_norm": 0.4163845479488373, "learning_rate": 1.7966065075277605e-05, "loss": 0.6178, "step": 15131 }, { "epoch": 0.4154859967051071, "grad_norm": 0.35727325081825256, "learning_rate": 1.79658039890606e-05, "loss": 0.4937, "step": 15132 }, { "epoch": 0.4155134541460736, "grad_norm": 0.32168424129486084, "learning_rate": 1.796554288798483e-05, "loss": 0.3873, "step": 15133 }, { "epoch": 0.4155409115870401, "grad_norm": 0.389689177274704, "learning_rate": 1.796528177205078e-05, "loss": 0.579, "step": 15134 }, { "epoch": 0.4155683690280066, "grad_norm": 0.3675808310508728, "learning_rate": 1.7965020641258937e-05, "loss": 0.544, "step": 15135 }, { "epoch": 0.4155958264689731, "grad_norm": 0.3441985547542572, "learning_rate": 1.7964759495609788e-05, "loss": 0.5373, "step": 15136 }, { "epoch": 0.4156232839099396, "grad_norm": 0.3805503845214844, "learning_rate": 1.796449833510382e-05, "loss": 0.4976, "step": 15137 }, { "epoch": 0.41565074135090607, "grad_norm": 0.3442896604537964, "learning_rate": 1.796423715974152e-05, "loss": 0.5546, "step": 15138 }, { "epoch": 0.4156781987918726, "grad_norm": 0.3551958203315735, "learning_rate": 1.796397596952338e-05, "loss": 0.5508, "step": 15139 }, { "epoch": 0.4157056562328391, "grad_norm": 0.35242903232574463, "learning_rate": 1.7963714764449878e-05, "loss": 0.4451, "step": 15140 }, { "epoch": 0.4157331136738056, "grad_norm": 0.43241074681282043, "learning_rate": 1.796345354452151e-05, "loss": 0.611, "step": 15141 }, { "epoch": 0.4157605711147721, "grad_norm": 0.43856722116470337, "learning_rate": 1.796319230973876e-05, "loss": 0.5704, "step": 15142 }, { "epoch": 0.4157880285557386, "grad_norm": 0.3902216851711273, "learning_rate": 1.796293106010211e-05, "loss": 0.532, "step": 15143 }, { "epoch": 0.4158154859967051, "grad_norm": 0.3251062333583832, "learning_rate": 1.7962669795612056e-05, "loss": 0.4372, "step": 15144 }, { "epoch": 0.4158429434376716, "grad_norm": 0.3818252682685852, "learning_rate": 1.796240851626908e-05, "loss": 0.5255, "step": 15145 }, { "epoch": 0.41587040087863814, "grad_norm": 0.3514412045478821, "learning_rate": 1.796214722207367e-05, "loss": 0.4287, "step": 15146 }, { "epoch": 0.41589785831960463, "grad_norm": 0.35643914341926575, "learning_rate": 1.7961885913026315e-05, "loss": 0.5597, "step": 15147 }, { "epoch": 0.4159253157605711, "grad_norm": 0.3706134259700775, "learning_rate": 1.79616245891275e-05, "loss": 0.4681, "step": 15148 }, { "epoch": 0.4159527732015376, "grad_norm": 0.345575213432312, "learning_rate": 1.7961363250377717e-05, "loss": 0.4781, "step": 15149 }, { "epoch": 0.4159802306425041, "grad_norm": 0.3620861768722534, "learning_rate": 1.796110189677745e-05, "loss": 0.5826, "step": 15150 }, { "epoch": 0.4160076880834706, "grad_norm": 0.38967716693878174, "learning_rate": 1.7960840528327187e-05, "loss": 0.5056, "step": 15151 }, { "epoch": 0.4160351455244371, "grad_norm": 0.346583753824234, "learning_rate": 1.7960579145027414e-05, "loss": 0.5446, "step": 15152 }, { "epoch": 0.41606260296540365, "grad_norm": 0.392593652009964, "learning_rate": 1.796031774687862e-05, "loss": 0.5156, "step": 15153 }, { "epoch": 0.41609006040637014, "grad_norm": 0.3864503800868988, "learning_rate": 1.7960056333881296e-05, "loss": 0.5244, "step": 15154 }, { "epoch": 0.41611751784733664, "grad_norm": 0.4476706087589264, "learning_rate": 1.7959794906035925e-05, "loss": 0.4858, "step": 15155 }, { "epoch": 0.41614497528830313, "grad_norm": 0.37372276186943054, "learning_rate": 1.7959533463342994e-05, "loss": 0.5523, "step": 15156 }, { "epoch": 0.4161724327292696, "grad_norm": 0.3652246594429016, "learning_rate": 1.7959272005803e-05, "loss": 0.4661, "step": 15157 }, { "epoch": 0.4161998901702361, "grad_norm": 0.3733341693878174, "learning_rate": 1.7959010533416413e-05, "loss": 0.5243, "step": 15158 }, { "epoch": 0.4162273476112026, "grad_norm": 0.407382071018219, "learning_rate": 1.7958749046183735e-05, "loss": 0.5717, "step": 15159 }, { "epoch": 0.41625480505216916, "grad_norm": 0.44936129450798035, "learning_rate": 1.7958487544105453e-05, "loss": 0.4908, "step": 15160 }, { "epoch": 0.41628226249313566, "grad_norm": 0.42208752036094666, "learning_rate": 1.795822602718205e-05, "loss": 0.4351, "step": 15161 }, { "epoch": 0.41630971993410215, "grad_norm": 0.37937131524086, "learning_rate": 1.7957964495414014e-05, "loss": 0.5183, "step": 15162 }, { "epoch": 0.41633717737506865, "grad_norm": 0.31449344754219055, "learning_rate": 1.7957702948801836e-05, "loss": 0.461, "step": 15163 }, { "epoch": 0.41636463481603514, "grad_norm": 0.3396669328212738, "learning_rate": 1.7957441387346e-05, "loss": 0.479, "step": 15164 }, { "epoch": 0.41639209225700163, "grad_norm": 0.4005783498287201, "learning_rate": 1.7957179811046997e-05, "loss": 0.5654, "step": 15165 }, { "epoch": 0.4164195496979681, "grad_norm": 0.32353830337524414, "learning_rate": 1.7956918219905315e-05, "loss": 0.4461, "step": 15166 }, { "epoch": 0.4164470071389347, "grad_norm": 0.35457631945610046, "learning_rate": 1.795665661392144e-05, "loss": 0.5446, "step": 15167 }, { "epoch": 0.41647446457990117, "grad_norm": 0.3853802978992462, "learning_rate": 1.795639499309586e-05, "loss": 0.5339, "step": 15168 }, { "epoch": 0.41650192202086767, "grad_norm": 0.3242754340171814, "learning_rate": 1.795613335742907e-05, "loss": 0.4759, "step": 15169 }, { "epoch": 0.41652937946183416, "grad_norm": 0.3706827759742737, "learning_rate": 1.7955871706921547e-05, "loss": 0.5446, "step": 15170 }, { "epoch": 0.41655683690280065, "grad_norm": 0.37988412380218506, "learning_rate": 1.7955610041573782e-05, "loss": 0.4575, "step": 15171 }, { "epoch": 0.41658429434376715, "grad_norm": 0.3997329771518707, "learning_rate": 1.795534836138627e-05, "loss": 0.5489, "step": 15172 }, { "epoch": 0.41661175178473364, "grad_norm": 0.35064855217933655, "learning_rate": 1.795508666635949e-05, "loss": 0.4891, "step": 15173 }, { "epoch": 0.4166392092257002, "grad_norm": 0.39843207597732544, "learning_rate": 1.7954824956493936e-05, "loss": 0.4961, "step": 15174 }, { "epoch": 0.4166666666666667, "grad_norm": 0.39532947540283203, "learning_rate": 1.7954563231790096e-05, "loss": 0.5738, "step": 15175 }, { "epoch": 0.4166941241076332, "grad_norm": 0.4294738471508026, "learning_rate": 1.7954301492248455e-05, "loss": 0.5237, "step": 15176 }, { "epoch": 0.4167215815485997, "grad_norm": 0.4093337655067444, "learning_rate": 1.79540397378695e-05, "loss": 0.5161, "step": 15177 }, { "epoch": 0.41674903898956617, "grad_norm": 0.8251845836639404, "learning_rate": 1.795377796865373e-05, "loss": 0.5037, "step": 15178 }, { "epoch": 0.41677649643053266, "grad_norm": 0.375183641910553, "learning_rate": 1.795351618460162e-05, "loss": 0.44, "step": 15179 }, { "epoch": 0.41680395387149916, "grad_norm": 0.5054689049720764, "learning_rate": 1.7953254385713665e-05, "loss": 0.559, "step": 15180 }, { "epoch": 0.4168314113124657, "grad_norm": 0.34898144006729126, "learning_rate": 1.7952992571990352e-05, "loss": 0.4531, "step": 15181 }, { "epoch": 0.4168588687534322, "grad_norm": 0.3627343475818634, "learning_rate": 1.795273074343217e-05, "loss": 0.4641, "step": 15182 }, { "epoch": 0.4168863261943987, "grad_norm": 0.40318411588668823, "learning_rate": 1.795246890003961e-05, "loss": 0.4618, "step": 15183 }, { "epoch": 0.4169137836353652, "grad_norm": 0.37886038422584534, "learning_rate": 1.795220704181315e-05, "loss": 0.527, "step": 15184 }, { "epoch": 0.4169412410763317, "grad_norm": 0.3279079496860504, "learning_rate": 1.795194516875329e-05, "loss": 0.5095, "step": 15185 }, { "epoch": 0.4169686985172982, "grad_norm": 0.3722805380821228, "learning_rate": 1.7951683280860516e-05, "loss": 0.5185, "step": 15186 }, { "epoch": 0.41699615595826467, "grad_norm": 0.35024121403694153, "learning_rate": 1.795142137813531e-05, "loss": 0.5393, "step": 15187 }, { "epoch": 0.4170236133992312, "grad_norm": 0.3796120882034302, "learning_rate": 1.7951159460578173e-05, "loss": 0.4707, "step": 15188 }, { "epoch": 0.4170510708401977, "grad_norm": 0.32420870661735535, "learning_rate": 1.795089752818958e-05, "loss": 0.3998, "step": 15189 }, { "epoch": 0.4170785282811642, "grad_norm": 0.35028010606765747, "learning_rate": 1.7950635580970025e-05, "loss": 0.4372, "step": 15190 }, { "epoch": 0.4171059857221307, "grad_norm": 0.3392666280269623, "learning_rate": 1.795037361892e-05, "loss": 0.4134, "step": 15191 }, { "epoch": 0.4171334431630972, "grad_norm": 0.3706549406051636, "learning_rate": 1.7950111642039993e-05, "loss": 0.5169, "step": 15192 }, { "epoch": 0.4171609006040637, "grad_norm": 0.3842490017414093, "learning_rate": 1.7949849650330483e-05, "loss": 0.5704, "step": 15193 }, { "epoch": 0.4171883580450302, "grad_norm": 0.346832275390625, "learning_rate": 1.7949587643791972e-05, "loss": 0.5316, "step": 15194 }, { "epoch": 0.41721581548599673, "grad_norm": 0.4100469648838043, "learning_rate": 1.7949325622424942e-05, "loss": 0.6389, "step": 15195 }, { "epoch": 0.4172432729269632, "grad_norm": 0.40443432331085205, "learning_rate": 1.794906358622988e-05, "loss": 0.4804, "step": 15196 }, { "epoch": 0.4172707303679297, "grad_norm": 0.4335372745990753, "learning_rate": 1.7948801535207278e-05, "loss": 0.4891, "step": 15197 }, { "epoch": 0.4172981878088962, "grad_norm": 0.4176918864250183, "learning_rate": 1.7948539469357625e-05, "loss": 0.6109, "step": 15198 }, { "epoch": 0.4173256452498627, "grad_norm": 0.3630468249320984, "learning_rate": 1.794827738868141e-05, "loss": 0.4856, "step": 15199 }, { "epoch": 0.4173531026908292, "grad_norm": 0.3530628979206085, "learning_rate": 1.794801529317912e-05, "loss": 0.4325, "step": 15200 }, { "epoch": 0.4173805601317957, "grad_norm": 0.4143669903278351, "learning_rate": 1.7947753182851248e-05, "loss": 0.5734, "step": 15201 }, { "epoch": 0.4174080175727622, "grad_norm": 0.3700553774833679, "learning_rate": 1.794749105769827e-05, "loss": 0.5355, "step": 15202 }, { "epoch": 0.41743547501372874, "grad_norm": 0.38821202516555786, "learning_rate": 1.7947228917720693e-05, "loss": 0.4863, "step": 15203 }, { "epoch": 0.41746293245469523, "grad_norm": 0.4122658967971802, "learning_rate": 1.7946966762918997e-05, "loss": 0.5516, "step": 15204 }, { "epoch": 0.41749038989566173, "grad_norm": 0.49512141942977905, "learning_rate": 1.794670459329367e-05, "loss": 0.6327, "step": 15205 }, { "epoch": 0.4175178473366282, "grad_norm": 0.2973226010799408, "learning_rate": 1.7946442408845205e-05, "loss": 0.4343, "step": 15206 }, { "epoch": 0.4175453047775947, "grad_norm": 0.38532623648643494, "learning_rate": 1.7946180209574084e-05, "loss": 0.4863, "step": 15207 }, { "epoch": 0.4175727622185612, "grad_norm": 0.31764155626296997, "learning_rate": 1.7945917995480804e-05, "loss": 0.4439, "step": 15208 }, { "epoch": 0.4176002196595277, "grad_norm": 0.34586647152900696, "learning_rate": 1.794565576656585e-05, "loss": 0.457, "step": 15209 }, { "epoch": 0.41762767710049425, "grad_norm": 0.3692816495895386, "learning_rate": 1.794539352282971e-05, "loss": 0.4893, "step": 15210 }, { "epoch": 0.41765513454146075, "grad_norm": 0.41298091411590576, "learning_rate": 1.794513126427288e-05, "loss": 0.5939, "step": 15211 }, { "epoch": 0.41768259198242724, "grad_norm": 0.37909144163131714, "learning_rate": 1.7944868990895843e-05, "loss": 0.5114, "step": 15212 }, { "epoch": 0.41771004942339374, "grad_norm": 0.37444770336151123, "learning_rate": 1.794460670269909e-05, "loss": 0.5066, "step": 15213 }, { "epoch": 0.41773750686436023, "grad_norm": 0.3567681908607483, "learning_rate": 1.794434439968311e-05, "loss": 0.4792, "step": 15214 }, { "epoch": 0.4177649643053267, "grad_norm": 0.3916453719139099, "learning_rate": 1.7944082081848388e-05, "loss": 0.578, "step": 15215 }, { "epoch": 0.4177924217462932, "grad_norm": 0.3343442380428314, "learning_rate": 1.794381974919542e-05, "loss": 0.4567, "step": 15216 }, { "epoch": 0.41781987918725977, "grad_norm": 0.33129844069480896, "learning_rate": 1.7943557401724692e-05, "loss": 0.4132, "step": 15217 }, { "epoch": 0.41784733662822626, "grad_norm": 0.3678651750087738, "learning_rate": 1.7943295039436693e-05, "loss": 0.574, "step": 15218 }, { "epoch": 0.41787479406919276, "grad_norm": 0.36911168694496155, "learning_rate": 1.794303266233192e-05, "loss": 0.4939, "step": 15219 }, { "epoch": 0.41790225151015925, "grad_norm": 0.38076600432395935, "learning_rate": 1.794277027041085e-05, "loss": 0.5287, "step": 15220 }, { "epoch": 0.41792970895112574, "grad_norm": 0.3355116844177246, "learning_rate": 1.794250786367398e-05, "loss": 0.4739, "step": 15221 }, { "epoch": 0.41795716639209224, "grad_norm": 0.3640083372592926, "learning_rate": 1.79422454421218e-05, "loss": 0.5097, "step": 15222 }, { "epoch": 0.41798462383305873, "grad_norm": 0.3776264190673828, "learning_rate": 1.794198300575479e-05, "loss": 0.6441, "step": 15223 }, { "epoch": 0.4180120812740253, "grad_norm": 0.3572663962841034, "learning_rate": 1.7941720554573453e-05, "loss": 0.4427, "step": 15224 }, { "epoch": 0.4180395387149918, "grad_norm": 0.31871452927589417, "learning_rate": 1.7941458088578273e-05, "loss": 0.5184, "step": 15225 }, { "epoch": 0.41806699615595827, "grad_norm": 0.3544771075248718, "learning_rate": 1.7941195607769738e-05, "loss": 0.5256, "step": 15226 }, { "epoch": 0.41809445359692476, "grad_norm": 0.505176305770874, "learning_rate": 1.7940933112148338e-05, "loss": 0.443, "step": 15227 }, { "epoch": 0.41812191103789126, "grad_norm": 0.33331599831581116, "learning_rate": 1.7940670601714564e-05, "loss": 0.4649, "step": 15228 }, { "epoch": 0.41814936847885775, "grad_norm": 0.4302249550819397, "learning_rate": 1.79404080764689e-05, "loss": 0.5332, "step": 15229 }, { "epoch": 0.41817682591982425, "grad_norm": 0.35126766562461853, "learning_rate": 1.7940145536411848e-05, "loss": 0.5168, "step": 15230 }, { "epoch": 0.4182042833607908, "grad_norm": 0.3519653379917145, "learning_rate": 1.7939882981543888e-05, "loss": 0.4752, "step": 15231 }, { "epoch": 0.4182317408017573, "grad_norm": 0.43141913414001465, "learning_rate": 1.7939620411865513e-05, "loss": 0.5442, "step": 15232 }, { "epoch": 0.4182591982427238, "grad_norm": 0.3695811629295349, "learning_rate": 1.7939357827377208e-05, "loss": 0.4541, "step": 15233 }, { "epoch": 0.4182866556836903, "grad_norm": 0.5064337253570557, "learning_rate": 1.793909522807947e-05, "loss": 0.5272, "step": 15234 }, { "epoch": 0.4183141131246568, "grad_norm": 0.5421488881111145, "learning_rate": 1.7938832613972787e-05, "loss": 0.4435, "step": 15235 }, { "epoch": 0.41834157056562327, "grad_norm": 0.3841310739517212, "learning_rate": 1.7938569985057643e-05, "loss": 0.5321, "step": 15236 }, { "epoch": 0.41836902800658976, "grad_norm": 0.38323256373405457, "learning_rate": 1.7938307341334535e-05, "loss": 0.484, "step": 15237 }, { "epoch": 0.4183964854475563, "grad_norm": 0.436128705739975, "learning_rate": 1.793804468280395e-05, "loss": 0.4946, "step": 15238 }, { "epoch": 0.4184239428885228, "grad_norm": 0.34347254037857056, "learning_rate": 1.793778200946638e-05, "loss": 0.4633, "step": 15239 }, { "epoch": 0.4184514003294893, "grad_norm": 0.4188711643218994, "learning_rate": 1.793751932132231e-05, "loss": 0.5629, "step": 15240 }, { "epoch": 0.4184788577704558, "grad_norm": 0.4226456880569458, "learning_rate": 1.793725661837223e-05, "loss": 0.5321, "step": 15241 }, { "epoch": 0.4185063152114223, "grad_norm": 0.37490054965019226, "learning_rate": 1.793699390061664e-05, "loss": 0.5362, "step": 15242 }, { "epoch": 0.4185337726523888, "grad_norm": 0.4089394211769104, "learning_rate": 1.7936731168056018e-05, "loss": 0.4764, "step": 15243 }, { "epoch": 0.4185612300933553, "grad_norm": 0.3971954882144928, "learning_rate": 1.7936468420690866e-05, "loss": 0.6009, "step": 15244 }, { "epoch": 0.4185886875343218, "grad_norm": 0.4038858413696289, "learning_rate": 1.7936205658521658e-05, "loss": 0.4817, "step": 15245 }, { "epoch": 0.4186161449752883, "grad_norm": 1.6984504461288452, "learning_rate": 1.79359428815489e-05, "loss": 0.5065, "step": 15246 }, { "epoch": 0.4186436024162548, "grad_norm": 0.34362056851387024, "learning_rate": 1.7935680089773075e-05, "loss": 0.4995, "step": 15247 }, { "epoch": 0.4186710598572213, "grad_norm": 0.39992356300354004, "learning_rate": 1.7935417283194672e-05, "loss": 0.5487, "step": 15248 }, { "epoch": 0.4186985172981878, "grad_norm": 0.3988901674747467, "learning_rate": 1.7935154461814186e-05, "loss": 0.5087, "step": 15249 }, { "epoch": 0.4187259747391543, "grad_norm": 0.41481512784957886, "learning_rate": 1.7934891625632103e-05, "loss": 0.5348, "step": 15250 }, { "epoch": 0.4187534321801208, "grad_norm": 0.40403103828430176, "learning_rate": 1.7934628774648913e-05, "loss": 0.4787, "step": 15251 }, { "epoch": 0.41878088962108734, "grad_norm": 0.4502447545528412, "learning_rate": 1.793436590886511e-05, "loss": 0.4711, "step": 15252 }, { "epoch": 0.41880834706205383, "grad_norm": 0.4025229215621948, "learning_rate": 1.793410302828118e-05, "loss": 0.6034, "step": 15253 }, { "epoch": 0.4188358045030203, "grad_norm": 0.33284130692481995, "learning_rate": 1.7933840132897617e-05, "loss": 0.472, "step": 15254 }, { "epoch": 0.4188632619439868, "grad_norm": 0.37839657068252563, "learning_rate": 1.7933577222714907e-05, "loss": 0.479, "step": 15255 }, { "epoch": 0.4188907193849533, "grad_norm": 0.35313835740089417, "learning_rate": 1.793331429773355e-05, "loss": 0.5265, "step": 15256 }, { "epoch": 0.4189181768259198, "grad_norm": 0.42182794213294983, "learning_rate": 1.7933051357954025e-05, "loss": 0.5942, "step": 15257 }, { "epoch": 0.4189456342668863, "grad_norm": 0.4152265191078186, "learning_rate": 1.7932788403376825e-05, "loss": 0.5921, "step": 15258 }, { "epoch": 0.41897309170785285, "grad_norm": 0.32745838165283203, "learning_rate": 1.7932525434002447e-05, "loss": 0.4748, "step": 15259 }, { "epoch": 0.41900054914881935, "grad_norm": 0.38718685507774353, "learning_rate": 1.7932262449831375e-05, "loss": 0.4928, "step": 15260 }, { "epoch": 0.41902800658978584, "grad_norm": 0.37362417578697205, "learning_rate": 1.7931999450864106e-05, "loss": 0.5379, "step": 15261 }, { "epoch": 0.41905546403075233, "grad_norm": 0.3261251151561737, "learning_rate": 1.7931736437101125e-05, "loss": 0.4674, "step": 15262 }, { "epoch": 0.41908292147171883, "grad_norm": 0.37144577503204346, "learning_rate": 1.793147340854292e-05, "loss": 0.5437, "step": 15263 }, { "epoch": 0.4191103789126853, "grad_norm": 0.3454795181751251, "learning_rate": 1.7931210365189988e-05, "loss": 0.4516, "step": 15264 }, { "epoch": 0.4191378363536518, "grad_norm": 0.3380047082901001, "learning_rate": 1.7930947307042816e-05, "loss": 0.4582, "step": 15265 }, { "epoch": 0.41916529379461837, "grad_norm": 0.36208897829055786, "learning_rate": 1.79306842341019e-05, "loss": 0.5467, "step": 15266 }, { "epoch": 0.41919275123558486, "grad_norm": 0.40777358412742615, "learning_rate": 1.7930421146367725e-05, "loss": 0.4365, "step": 15267 }, { "epoch": 0.41922020867655135, "grad_norm": 0.36325693130493164, "learning_rate": 1.793015804384078e-05, "loss": 0.5694, "step": 15268 }, { "epoch": 0.41924766611751785, "grad_norm": 0.4155294895172119, "learning_rate": 1.792989492652156e-05, "loss": 0.6737, "step": 15269 }, { "epoch": 0.41927512355848434, "grad_norm": 0.3594723641872406, "learning_rate": 1.792963179441056e-05, "loss": 0.5172, "step": 15270 }, { "epoch": 0.41930258099945084, "grad_norm": 0.35906028747558594, "learning_rate": 1.7929368647508262e-05, "loss": 0.5083, "step": 15271 }, { "epoch": 0.41933003844041733, "grad_norm": 0.35050269961357117, "learning_rate": 1.7929105485815162e-05, "loss": 0.5137, "step": 15272 }, { "epoch": 0.4193574958813839, "grad_norm": 0.32556068897247314, "learning_rate": 1.792884230933175e-05, "loss": 0.4946, "step": 15273 }, { "epoch": 0.4193849533223504, "grad_norm": 0.34760379791259766, "learning_rate": 1.7928579118058516e-05, "loss": 0.4721, "step": 15274 }, { "epoch": 0.41941241076331687, "grad_norm": 0.3533351421356201, "learning_rate": 1.792831591199595e-05, "loss": 0.4822, "step": 15275 }, { "epoch": 0.41943986820428336, "grad_norm": 0.39869123697280884, "learning_rate": 1.7928052691144544e-05, "loss": 0.528, "step": 15276 }, { "epoch": 0.41946732564524986, "grad_norm": 0.40949746966362, "learning_rate": 1.7927789455504793e-05, "loss": 0.4982, "step": 15277 }, { "epoch": 0.41949478308621635, "grad_norm": 0.36607569456100464, "learning_rate": 1.792752620507718e-05, "loss": 0.4927, "step": 15278 }, { "epoch": 0.41952224052718284, "grad_norm": 0.5324241518974304, "learning_rate": 1.7927262939862205e-05, "loss": 0.5616, "step": 15279 }, { "epoch": 0.4195496979681494, "grad_norm": 0.3106774687767029, "learning_rate": 1.792699965986035e-05, "loss": 0.4386, "step": 15280 }, { "epoch": 0.4195771554091159, "grad_norm": 0.5006164908409119, "learning_rate": 1.7926736365072116e-05, "loss": 0.5248, "step": 15281 }, { "epoch": 0.4196046128500824, "grad_norm": 0.45002618432044983, "learning_rate": 1.7926473055497983e-05, "loss": 0.5205, "step": 15282 }, { "epoch": 0.4196320702910489, "grad_norm": 0.41641536355018616, "learning_rate": 1.792620973113845e-05, "loss": 0.5341, "step": 15283 }, { "epoch": 0.41965952773201537, "grad_norm": 0.35450318455696106, "learning_rate": 1.792594639199401e-05, "loss": 0.5257, "step": 15284 }, { "epoch": 0.41968698517298186, "grad_norm": 0.34146779775619507, "learning_rate": 1.7925683038065146e-05, "loss": 0.4896, "step": 15285 }, { "epoch": 0.41971444261394836, "grad_norm": 0.39708206057548523, "learning_rate": 1.7925419669352352e-05, "loss": 0.5104, "step": 15286 }, { "epoch": 0.4197419000549149, "grad_norm": 0.5250860452651978, "learning_rate": 1.7925156285856125e-05, "loss": 0.4735, "step": 15287 }, { "epoch": 0.4197693574958814, "grad_norm": 0.40989360213279724, "learning_rate": 1.792489288757695e-05, "loss": 0.55, "step": 15288 }, { "epoch": 0.4197968149368479, "grad_norm": 0.37427303194999695, "learning_rate": 1.792462947451532e-05, "loss": 0.4791, "step": 15289 }, { "epoch": 0.4198242723778144, "grad_norm": 0.4145068824291229, "learning_rate": 1.792436604667173e-05, "loss": 0.572, "step": 15290 }, { "epoch": 0.4198517298187809, "grad_norm": 0.33887937664985657, "learning_rate": 1.7924102604046664e-05, "loss": 0.4886, "step": 15291 }, { "epoch": 0.4198791872597474, "grad_norm": 0.3504195511341095, "learning_rate": 1.792383914664062e-05, "loss": 0.5077, "step": 15292 }, { "epoch": 0.41990664470071387, "grad_norm": 0.3542267680168152, "learning_rate": 1.7923575674454084e-05, "loss": 0.453, "step": 15293 }, { "epoch": 0.4199341021416804, "grad_norm": 0.35250815749168396, "learning_rate": 1.7923312187487556e-05, "loss": 0.4122, "step": 15294 }, { "epoch": 0.4199615595826469, "grad_norm": 0.3564620018005371, "learning_rate": 1.792304868574152e-05, "loss": 0.4968, "step": 15295 }, { "epoch": 0.4199890170236134, "grad_norm": 0.3526124358177185, "learning_rate": 1.7922785169216464e-05, "loss": 0.4621, "step": 15296 }, { "epoch": 0.4200164744645799, "grad_norm": 0.41022413969039917, "learning_rate": 1.792252163791289e-05, "loss": 0.5179, "step": 15297 }, { "epoch": 0.4200439319055464, "grad_norm": 0.417562335729599, "learning_rate": 1.7922258091831285e-05, "loss": 0.4451, "step": 15298 }, { "epoch": 0.4200713893465129, "grad_norm": 0.3519323170185089, "learning_rate": 1.7921994530972136e-05, "loss": 0.4995, "step": 15299 }, { "epoch": 0.4200988467874794, "grad_norm": 0.3477555513381958, "learning_rate": 1.7921730955335946e-05, "loss": 0.474, "step": 15300 }, { "epoch": 0.42012630422844593, "grad_norm": 0.32916495203971863, "learning_rate": 1.7921467364923194e-05, "loss": 0.405, "step": 15301 }, { "epoch": 0.42015376166941243, "grad_norm": 0.3810907304286957, "learning_rate": 1.792120375973438e-05, "loss": 0.5376, "step": 15302 }, { "epoch": 0.4201812191103789, "grad_norm": 0.40240195393562317, "learning_rate": 1.792094013976999e-05, "loss": 0.5426, "step": 15303 }, { "epoch": 0.4202086765513454, "grad_norm": 0.41851550340652466, "learning_rate": 1.792067650503052e-05, "loss": 0.5311, "step": 15304 }, { "epoch": 0.4202361339923119, "grad_norm": 0.3339415490627289, "learning_rate": 1.7920412855516458e-05, "loss": 0.4778, "step": 15305 }, { "epoch": 0.4202635914332784, "grad_norm": 0.38420525193214417, "learning_rate": 1.7920149191228298e-05, "loss": 0.4601, "step": 15306 }, { "epoch": 0.4202910488742449, "grad_norm": 0.383918434381485, "learning_rate": 1.791988551216654e-05, "loss": 0.5169, "step": 15307 }, { "epoch": 0.42031850631521145, "grad_norm": 0.376121461391449, "learning_rate": 1.7919621818331657e-05, "loss": 0.5187, "step": 15308 }, { "epoch": 0.42034596375617794, "grad_norm": 0.46004611253738403, "learning_rate": 1.7919358109724155e-05, "loss": 0.548, "step": 15309 }, { "epoch": 0.42037342119714444, "grad_norm": 0.3626701235771179, "learning_rate": 1.7919094386344527e-05, "loss": 0.5422, "step": 15310 }, { "epoch": 0.42040087863811093, "grad_norm": 0.4068359136581421, "learning_rate": 1.7918830648193258e-05, "loss": 0.5432, "step": 15311 }, { "epoch": 0.4204283360790774, "grad_norm": 0.4663786292076111, "learning_rate": 1.7918566895270842e-05, "loss": 0.5329, "step": 15312 }, { "epoch": 0.4204557935200439, "grad_norm": 0.38895300030708313, "learning_rate": 1.791830312757777e-05, "loss": 0.4918, "step": 15313 }, { "epoch": 0.4204832509610104, "grad_norm": 0.3841363489627838, "learning_rate": 1.791803934511454e-05, "loss": 0.5713, "step": 15314 }, { "epoch": 0.42051070840197696, "grad_norm": 0.5920829772949219, "learning_rate": 1.7917775547881634e-05, "loss": 0.554, "step": 15315 }, { "epoch": 0.42053816584294346, "grad_norm": 0.34538766741752625, "learning_rate": 1.7917511735879554e-05, "loss": 0.5213, "step": 15316 }, { "epoch": 0.42056562328390995, "grad_norm": 0.6428372859954834, "learning_rate": 1.7917247909108784e-05, "loss": 0.5185, "step": 15317 }, { "epoch": 0.42059308072487644, "grad_norm": 0.39903077483177185, "learning_rate": 1.791698406756982e-05, "loss": 0.4288, "step": 15318 }, { "epoch": 0.42062053816584294, "grad_norm": 0.3894887864589691, "learning_rate": 1.7916720211263155e-05, "loss": 0.5335, "step": 15319 }, { "epoch": 0.42064799560680943, "grad_norm": 0.3702535331249237, "learning_rate": 1.7916456340189282e-05, "loss": 0.5531, "step": 15320 }, { "epoch": 0.4206754530477759, "grad_norm": 0.434081107378006, "learning_rate": 1.7916192454348687e-05, "loss": 0.4826, "step": 15321 }, { "epoch": 0.4207029104887425, "grad_norm": 0.4079924523830414, "learning_rate": 1.7915928553741873e-05, "loss": 0.5603, "step": 15322 }, { "epoch": 0.42073036792970897, "grad_norm": 0.38439348340034485, "learning_rate": 1.7915664638369322e-05, "loss": 0.5684, "step": 15323 }, { "epoch": 0.42075782537067546, "grad_norm": 0.350631982088089, "learning_rate": 1.7915400708231526e-05, "loss": 0.4964, "step": 15324 }, { "epoch": 0.42078528281164196, "grad_norm": 0.3776085376739502, "learning_rate": 1.7915136763328988e-05, "loss": 0.5277, "step": 15325 }, { "epoch": 0.42081274025260845, "grad_norm": 0.3424610197544098, "learning_rate": 1.7914872803662187e-05, "loss": 0.5034, "step": 15326 }, { "epoch": 0.42084019769357495, "grad_norm": 0.3800703287124634, "learning_rate": 1.791460882923163e-05, "loss": 0.475, "step": 15327 }, { "epoch": 0.42086765513454144, "grad_norm": 0.36962002515792847, "learning_rate": 1.7914344840037796e-05, "loss": 0.5914, "step": 15328 }, { "epoch": 0.420895112575508, "grad_norm": 0.4176870882511139, "learning_rate": 1.7914080836081184e-05, "loss": 0.4664, "step": 15329 }, { "epoch": 0.4209225700164745, "grad_norm": 0.6321670413017273, "learning_rate": 1.791381681736229e-05, "loss": 0.5973, "step": 15330 }, { "epoch": 0.420950027457441, "grad_norm": 0.38666069507598877, "learning_rate": 1.7913552783881596e-05, "loss": 0.5608, "step": 15331 }, { "epoch": 0.4209774848984075, "grad_norm": 0.3821503221988678, "learning_rate": 1.79132887356396e-05, "loss": 0.5837, "step": 15332 }, { "epoch": 0.42100494233937397, "grad_norm": 0.3400202691555023, "learning_rate": 1.7913024672636797e-05, "loss": 0.4959, "step": 15333 }, { "epoch": 0.42103239978034046, "grad_norm": 0.4021459221839905, "learning_rate": 1.7912760594873678e-05, "loss": 0.6059, "step": 15334 }, { "epoch": 0.42105985722130695, "grad_norm": 0.3987073302268982, "learning_rate": 1.7912496502350735e-05, "loss": 0.5928, "step": 15335 }, { "epoch": 0.42108731466227345, "grad_norm": 0.3892855644226074, "learning_rate": 1.791223239506846e-05, "loss": 0.558, "step": 15336 }, { "epoch": 0.42111477210324, "grad_norm": 0.4224264323711395, "learning_rate": 1.791196827302735e-05, "loss": 0.5957, "step": 15337 }, { "epoch": 0.4211422295442065, "grad_norm": 0.35344892740249634, "learning_rate": 1.7911704136227886e-05, "loss": 0.5151, "step": 15338 }, { "epoch": 0.421169686985173, "grad_norm": 0.3787522614002228, "learning_rate": 1.7911439984670573e-05, "loss": 0.5328, "step": 15339 }, { "epoch": 0.4211971444261395, "grad_norm": 0.3843385577201843, "learning_rate": 1.79111758183559e-05, "loss": 0.4545, "step": 15340 }, { "epoch": 0.421224601867106, "grad_norm": 0.3930096924304962, "learning_rate": 1.791091163728436e-05, "loss": 0.5094, "step": 15341 }, { "epoch": 0.42125205930807247, "grad_norm": 0.3682202696800232, "learning_rate": 1.7910647441456445e-05, "loss": 0.4227, "step": 15342 }, { "epoch": 0.42127951674903896, "grad_norm": 0.323743611574173, "learning_rate": 1.7910383230872644e-05, "loss": 0.4881, "step": 15343 }, { "epoch": 0.4213069741900055, "grad_norm": 0.4034160077571869, "learning_rate": 1.7910119005533455e-05, "loss": 0.4424, "step": 15344 }, { "epoch": 0.421334431630972, "grad_norm": 0.37191087007522583, "learning_rate": 1.7909854765439368e-05, "loss": 0.5566, "step": 15345 }, { "epoch": 0.4213618890719385, "grad_norm": 0.39881840348243713, "learning_rate": 1.7909590510590883e-05, "loss": 0.5544, "step": 15346 }, { "epoch": 0.421389346512905, "grad_norm": 0.37077686190605164, "learning_rate": 1.7909326240988482e-05, "loss": 0.5477, "step": 15347 }, { "epoch": 0.4214168039538715, "grad_norm": 0.3782655596733093, "learning_rate": 1.7909061956632666e-05, "loss": 0.5419, "step": 15348 }, { "epoch": 0.421444261394838, "grad_norm": 0.36843228340148926, "learning_rate": 1.7908797657523924e-05, "loss": 0.5064, "step": 15349 }, { "epoch": 0.4214717188358045, "grad_norm": 0.40659844875335693, "learning_rate": 1.790853334366275e-05, "loss": 0.5379, "step": 15350 }, { "epoch": 0.421499176276771, "grad_norm": 0.47023528814315796, "learning_rate": 1.790826901504964e-05, "loss": 0.4699, "step": 15351 }, { "epoch": 0.4215266337177375, "grad_norm": 0.4117908775806427, "learning_rate": 1.790800467168508e-05, "loss": 0.4703, "step": 15352 }, { "epoch": 0.421554091158704, "grad_norm": 0.36200663447380066, "learning_rate": 1.790774031356957e-05, "loss": 0.5392, "step": 15353 }, { "epoch": 0.4215815485996705, "grad_norm": 0.38265615701675415, "learning_rate": 1.79074759407036e-05, "loss": 0.4552, "step": 15354 }, { "epoch": 0.421609006040637, "grad_norm": 0.42888346314430237, "learning_rate": 1.790721155308766e-05, "loss": 0.566, "step": 15355 }, { "epoch": 0.4216364634816035, "grad_norm": 0.4274563789367676, "learning_rate": 1.790694715072225e-05, "loss": 0.4981, "step": 15356 }, { "epoch": 0.42166392092257, "grad_norm": 0.40366658568382263, "learning_rate": 1.790668273360786e-05, "loss": 0.5369, "step": 15357 }, { "epoch": 0.42169137836353654, "grad_norm": 0.32469499111175537, "learning_rate": 1.7906418301744982e-05, "loss": 0.4649, "step": 15358 }, { "epoch": 0.42171883580450303, "grad_norm": 0.42720773816108704, "learning_rate": 1.7906153855134116e-05, "loss": 0.5071, "step": 15359 }, { "epoch": 0.42174629324546953, "grad_norm": 0.3534640669822693, "learning_rate": 1.790588939377574e-05, "loss": 0.4536, "step": 15360 }, { "epoch": 0.421773750686436, "grad_norm": 0.38092005252838135, "learning_rate": 1.7905624917670365e-05, "loss": 0.4192, "step": 15361 }, { "epoch": 0.4218012081274025, "grad_norm": 0.38518914580345154, "learning_rate": 1.7905360426818476e-05, "loss": 0.5292, "step": 15362 }, { "epoch": 0.421828665568369, "grad_norm": 0.3618248999118805, "learning_rate": 1.790509592122056e-05, "loss": 0.4919, "step": 15363 }, { "epoch": 0.4218561230093355, "grad_norm": 0.36720922589302063, "learning_rate": 1.7904831400877123e-05, "loss": 0.4811, "step": 15364 }, { "epoch": 0.42188358045030205, "grad_norm": 0.3925595283508301, "learning_rate": 1.790456686578865e-05, "loss": 0.5223, "step": 15365 }, { "epoch": 0.42191103789126855, "grad_norm": 0.35651329159736633, "learning_rate": 1.790430231595564e-05, "loss": 0.4053, "step": 15366 }, { "epoch": 0.42193849533223504, "grad_norm": 0.42494189739227295, "learning_rate": 1.790403775137858e-05, "loss": 0.6033, "step": 15367 }, { "epoch": 0.42196595277320154, "grad_norm": 0.4613994359970093, "learning_rate": 1.7903773172057968e-05, "loss": 0.485, "step": 15368 }, { "epoch": 0.42199341021416803, "grad_norm": 0.45521706342697144, "learning_rate": 1.7903508577994295e-05, "loss": 0.4903, "step": 15369 }, { "epoch": 0.4220208676551345, "grad_norm": 0.3662005662918091, "learning_rate": 1.790324396918806e-05, "loss": 0.5266, "step": 15370 }, { "epoch": 0.422048325096101, "grad_norm": 0.3900262117385864, "learning_rate": 1.790297934563975e-05, "loss": 0.5122, "step": 15371 }, { "epoch": 0.42207578253706757, "grad_norm": 0.38697460293769836, "learning_rate": 1.790271470734986e-05, "loss": 0.4911, "step": 15372 }, { "epoch": 0.42210323997803406, "grad_norm": 0.35743212699890137, "learning_rate": 1.7902450054318888e-05, "loss": 0.4832, "step": 15373 }, { "epoch": 0.42213069741900056, "grad_norm": 0.34955212473869324, "learning_rate": 1.7902185386547323e-05, "loss": 0.5833, "step": 15374 }, { "epoch": 0.42215815485996705, "grad_norm": 0.37907874584198, "learning_rate": 1.790192070403566e-05, "loss": 0.5617, "step": 15375 }, { "epoch": 0.42218561230093354, "grad_norm": 0.32797345519065857, "learning_rate": 1.7901656006784393e-05, "loss": 0.5617, "step": 15376 }, { "epoch": 0.42221306974190004, "grad_norm": 0.3716447055339813, "learning_rate": 1.7901391294794015e-05, "loss": 0.6241, "step": 15377 }, { "epoch": 0.42224052718286653, "grad_norm": 0.3724316954612732, "learning_rate": 1.790112656806502e-05, "loss": 0.5477, "step": 15378 }, { "epoch": 0.4222679846238331, "grad_norm": 0.3421581983566284, "learning_rate": 1.79008618265979e-05, "loss": 0.4287, "step": 15379 }, { "epoch": 0.4222954420647996, "grad_norm": 0.43447670340538025, "learning_rate": 1.7900597070393155e-05, "loss": 0.5452, "step": 15380 }, { "epoch": 0.42232289950576607, "grad_norm": 0.4265252351760864, "learning_rate": 1.7900332299451276e-05, "loss": 0.5786, "step": 15381 }, { "epoch": 0.42235035694673256, "grad_norm": 0.33777397871017456, "learning_rate": 1.790006751377275e-05, "loss": 0.504, "step": 15382 }, { "epoch": 0.42237781438769906, "grad_norm": 0.35328271985054016, "learning_rate": 1.7899802713358083e-05, "loss": 0.5487, "step": 15383 }, { "epoch": 0.42240527182866555, "grad_norm": 0.4235985279083252, "learning_rate": 1.789953789820776e-05, "loss": 0.4682, "step": 15384 }, { "epoch": 0.42243272926963205, "grad_norm": 0.3709211051464081, "learning_rate": 1.7899273068322277e-05, "loss": 0.5537, "step": 15385 }, { "epoch": 0.4224601867105986, "grad_norm": 0.3916857838630676, "learning_rate": 1.7899008223702128e-05, "loss": 0.5705, "step": 15386 }, { "epoch": 0.4224876441515651, "grad_norm": 0.29270896315574646, "learning_rate": 1.789874336434781e-05, "loss": 0.4497, "step": 15387 }, { "epoch": 0.4225151015925316, "grad_norm": 0.35953375697135925, "learning_rate": 1.789847849025981e-05, "loss": 0.5651, "step": 15388 }, { "epoch": 0.4225425590334981, "grad_norm": 0.3887077271938324, "learning_rate": 1.789821360143863e-05, "loss": 0.5316, "step": 15389 }, { "epoch": 0.42257001647446457, "grad_norm": 0.33636271953582764, "learning_rate": 1.7897948697884765e-05, "loss": 0.4817, "step": 15390 }, { "epoch": 0.42259747391543107, "grad_norm": 0.3465147316455841, "learning_rate": 1.78976837795987e-05, "loss": 0.5053, "step": 15391 }, { "epoch": 0.42262493135639756, "grad_norm": 0.3718602657318115, "learning_rate": 1.7897418846580935e-05, "loss": 0.5897, "step": 15392 }, { "epoch": 0.4226523887973641, "grad_norm": 0.3765242397785187, "learning_rate": 1.7897153898831963e-05, "loss": 0.5821, "step": 15393 }, { "epoch": 0.4226798462383306, "grad_norm": 0.43361595273017883, "learning_rate": 1.7896888936352275e-05, "loss": 0.5992, "step": 15394 }, { "epoch": 0.4227073036792971, "grad_norm": 0.3090726137161255, "learning_rate": 1.7896623959142373e-05, "loss": 0.4644, "step": 15395 }, { "epoch": 0.4227347611202636, "grad_norm": 0.3916003406047821, "learning_rate": 1.789635896720275e-05, "loss": 0.5432, "step": 15396 }, { "epoch": 0.4227622185612301, "grad_norm": 0.38945290446281433, "learning_rate": 1.7896093960533892e-05, "loss": 0.5448, "step": 15397 }, { "epoch": 0.4227896760021966, "grad_norm": 0.3650817275047302, "learning_rate": 1.78958289391363e-05, "loss": 0.483, "step": 15398 }, { "epoch": 0.4228171334431631, "grad_norm": 0.4364616274833679, "learning_rate": 1.789556390301047e-05, "loss": 0.4326, "step": 15399 }, { "epoch": 0.4228445908841296, "grad_norm": 0.3795132040977478, "learning_rate": 1.789529885215689e-05, "loss": 0.4673, "step": 15400 }, { "epoch": 0.4228720483250961, "grad_norm": 0.3504631817340851, "learning_rate": 1.7895033786576056e-05, "loss": 0.5546, "step": 15401 }, { "epoch": 0.4228995057660626, "grad_norm": 0.4262174367904663, "learning_rate": 1.7894768706268468e-05, "loss": 0.5341, "step": 15402 }, { "epoch": 0.4229269632070291, "grad_norm": 0.38668495416641235, "learning_rate": 1.7894503611234615e-05, "loss": 0.5562, "step": 15403 }, { "epoch": 0.4229544206479956, "grad_norm": 0.3437403738498688, "learning_rate": 1.7894238501474994e-05, "loss": 0.4314, "step": 15404 }, { "epoch": 0.4229818780889621, "grad_norm": 0.37566953897476196, "learning_rate": 1.78939733769901e-05, "loss": 0.4944, "step": 15405 }, { "epoch": 0.4230093355299286, "grad_norm": 0.368822306394577, "learning_rate": 1.7893708237780424e-05, "loss": 0.5117, "step": 15406 }, { "epoch": 0.42303679297089514, "grad_norm": 0.34816229343414307, "learning_rate": 1.789344308384646e-05, "loss": 0.5311, "step": 15407 }, { "epoch": 0.42306425041186163, "grad_norm": 0.34994837641716003, "learning_rate": 1.7893177915188712e-05, "loss": 0.5649, "step": 15408 }, { "epoch": 0.4230917078528281, "grad_norm": 0.3711960017681122, "learning_rate": 1.7892912731807663e-05, "loss": 0.5147, "step": 15409 }, { "epoch": 0.4231191652937946, "grad_norm": 0.3749971091747284, "learning_rate": 1.7892647533703812e-05, "loss": 0.4766, "step": 15410 }, { "epoch": 0.4231466227347611, "grad_norm": 0.3443308472633362, "learning_rate": 1.789238232087766e-05, "loss": 0.4547, "step": 15411 }, { "epoch": 0.4231740801757276, "grad_norm": 0.3372957706451416, "learning_rate": 1.789211709332969e-05, "loss": 0.4915, "step": 15412 }, { "epoch": 0.4232015376166941, "grad_norm": 0.33935970067977905, "learning_rate": 1.7891851851060406e-05, "loss": 0.4741, "step": 15413 }, { "epoch": 0.42322899505766065, "grad_norm": 0.3685728907585144, "learning_rate": 1.7891586594070296e-05, "loss": 0.5237, "step": 15414 }, { "epoch": 0.42325645249862714, "grad_norm": 0.48300641775131226, "learning_rate": 1.7891321322359862e-05, "loss": 0.4438, "step": 15415 }, { "epoch": 0.42328390993959364, "grad_norm": 0.3664702773094177, "learning_rate": 1.789105603592959e-05, "loss": 0.5254, "step": 15416 }, { "epoch": 0.42331136738056013, "grad_norm": 0.34579116106033325, "learning_rate": 1.7890790734779985e-05, "loss": 0.4631, "step": 15417 }, { "epoch": 0.4233388248215266, "grad_norm": 0.3677965998649597, "learning_rate": 1.7890525418911538e-05, "loss": 0.477, "step": 15418 }, { "epoch": 0.4233662822624931, "grad_norm": 0.3905978202819824, "learning_rate": 1.789026008832474e-05, "loss": 0.5296, "step": 15419 }, { "epoch": 0.4233937397034596, "grad_norm": 0.44650667905807495, "learning_rate": 1.788999474302009e-05, "loss": 0.5284, "step": 15420 }, { "epoch": 0.42342119714442616, "grad_norm": 0.4412221610546112, "learning_rate": 1.788972938299808e-05, "loss": 0.5425, "step": 15421 }, { "epoch": 0.42344865458539266, "grad_norm": 0.3906874358654022, "learning_rate": 1.788946400825921e-05, "loss": 0.5561, "step": 15422 }, { "epoch": 0.42347611202635915, "grad_norm": 0.34105947613716125, "learning_rate": 1.7889198618803964e-05, "loss": 0.5347, "step": 15423 }, { "epoch": 0.42350356946732565, "grad_norm": 0.3608778715133667, "learning_rate": 1.7888933214632854e-05, "loss": 0.5923, "step": 15424 }, { "epoch": 0.42353102690829214, "grad_norm": 0.3894387483596802, "learning_rate": 1.7888667795746356e-05, "loss": 0.4861, "step": 15425 }, { "epoch": 0.42355848434925863, "grad_norm": 0.34145066142082214, "learning_rate": 1.7888402362144985e-05, "loss": 0.4628, "step": 15426 }, { "epoch": 0.42358594179022513, "grad_norm": 0.3708588778972626, "learning_rate": 1.788813691382922e-05, "loss": 0.5084, "step": 15427 }, { "epoch": 0.4236133992311917, "grad_norm": 0.334555059671402, "learning_rate": 1.7887871450799563e-05, "loss": 0.4958, "step": 15428 }, { "epoch": 0.4236408566721582, "grad_norm": 0.37389659881591797, "learning_rate": 1.7887605973056506e-05, "loss": 0.4481, "step": 15429 }, { "epoch": 0.42366831411312467, "grad_norm": 0.3574800193309784, "learning_rate": 1.788734048060055e-05, "loss": 0.5238, "step": 15430 }, { "epoch": 0.42369577155409116, "grad_norm": 0.4371650218963623, "learning_rate": 1.7887074973432187e-05, "loss": 0.452, "step": 15431 }, { "epoch": 0.42372322899505765, "grad_norm": 0.4858699440956116, "learning_rate": 1.788680945155191e-05, "loss": 0.5582, "step": 15432 }, { "epoch": 0.42375068643602415, "grad_norm": 0.35705533623695374, "learning_rate": 1.7886543914960216e-05, "loss": 0.4727, "step": 15433 }, { "epoch": 0.42377814387699064, "grad_norm": 0.5150899291038513, "learning_rate": 1.78862783636576e-05, "loss": 0.5286, "step": 15434 }, { "epoch": 0.4238056013179572, "grad_norm": 0.37268877029418945, "learning_rate": 1.7886012797644557e-05, "loss": 0.5306, "step": 15435 }, { "epoch": 0.4238330587589237, "grad_norm": 0.38457512855529785, "learning_rate": 1.7885747216921586e-05, "loss": 0.5432, "step": 15436 }, { "epoch": 0.4238605161998902, "grad_norm": 0.3875664472579956, "learning_rate": 1.788548162148918e-05, "loss": 0.5192, "step": 15437 }, { "epoch": 0.4238879736408567, "grad_norm": 0.3989471197128296, "learning_rate": 1.7885216011347832e-05, "loss": 0.5489, "step": 15438 }, { "epoch": 0.42391543108182317, "grad_norm": 0.34630337357521057, "learning_rate": 1.7884950386498043e-05, "loss": 0.5238, "step": 15439 }, { "epoch": 0.42394288852278966, "grad_norm": 0.39135169982910156, "learning_rate": 1.78846847469403e-05, "loss": 0.5607, "step": 15440 }, { "epoch": 0.42397034596375616, "grad_norm": 0.41511270403862, "learning_rate": 1.7884419092675104e-05, "loss": 0.5035, "step": 15441 }, { "epoch": 0.4239978034047227, "grad_norm": 0.38577836751937866, "learning_rate": 1.7884153423702952e-05, "loss": 0.5471, "step": 15442 }, { "epoch": 0.4240252608456892, "grad_norm": 0.3693135678768158, "learning_rate": 1.788388774002434e-05, "loss": 0.5565, "step": 15443 }, { "epoch": 0.4240527182866557, "grad_norm": 0.3750303387641907, "learning_rate": 1.7883622041639753e-05, "loss": 0.5957, "step": 15444 }, { "epoch": 0.4240801757276222, "grad_norm": 0.3833070695400238, "learning_rate": 1.7883356328549703e-05, "loss": 0.5274, "step": 15445 }, { "epoch": 0.4241076331685887, "grad_norm": 0.3558944761753082, "learning_rate": 1.7883090600754673e-05, "loss": 0.5873, "step": 15446 }, { "epoch": 0.4241350906095552, "grad_norm": 0.3661985397338867, "learning_rate": 1.7882824858255165e-05, "loss": 0.5233, "step": 15447 }, { "epoch": 0.42416254805052167, "grad_norm": 0.34880223870277405, "learning_rate": 1.788255910105167e-05, "loss": 0.5122, "step": 15448 }, { "epoch": 0.4241900054914882, "grad_norm": 0.34481850266456604, "learning_rate": 1.7882293329144688e-05, "loss": 0.5121, "step": 15449 }, { "epoch": 0.4242174629324547, "grad_norm": 0.511839747428894, "learning_rate": 1.7882027542534716e-05, "loss": 0.5459, "step": 15450 }, { "epoch": 0.4242449203734212, "grad_norm": 0.3557758033275604, "learning_rate": 1.7881761741222244e-05, "loss": 0.4409, "step": 15451 }, { "epoch": 0.4242723778143877, "grad_norm": 0.3952370882034302, "learning_rate": 1.7881495925207774e-05, "loss": 0.4947, "step": 15452 }, { "epoch": 0.4242998352553542, "grad_norm": 0.3689156174659729, "learning_rate": 1.7881230094491793e-05, "loss": 0.4936, "step": 15453 }, { "epoch": 0.4243272926963207, "grad_norm": 0.3925451636314392, "learning_rate": 1.7880964249074807e-05, "loss": 0.5144, "step": 15454 }, { "epoch": 0.4243547501372872, "grad_norm": 0.42030656337738037, "learning_rate": 1.7880698388957303e-05, "loss": 0.5426, "step": 15455 }, { "epoch": 0.42438220757825373, "grad_norm": 0.4391491115093231, "learning_rate": 1.7880432514139787e-05, "loss": 0.5934, "step": 15456 }, { "epoch": 0.42440966501922023, "grad_norm": 0.34903261065483093, "learning_rate": 1.7880166624622745e-05, "loss": 0.5526, "step": 15457 }, { "epoch": 0.4244371224601867, "grad_norm": 0.4167464077472687, "learning_rate": 1.787990072040668e-05, "loss": 0.5524, "step": 15458 }, { "epoch": 0.4244645799011532, "grad_norm": 0.39602577686309814, "learning_rate": 1.7879634801492082e-05, "loss": 0.5683, "step": 15459 }, { "epoch": 0.4244920373421197, "grad_norm": 0.39758631587028503, "learning_rate": 1.787936886787945e-05, "loss": 0.5902, "step": 15460 }, { "epoch": 0.4245194947830862, "grad_norm": 0.3783600926399231, "learning_rate": 1.7879102919569283e-05, "loss": 0.5631, "step": 15461 }, { "epoch": 0.4245469522240527, "grad_norm": 0.3531072735786438, "learning_rate": 1.7878836956562077e-05, "loss": 0.4457, "step": 15462 }, { "epoch": 0.42457440966501925, "grad_norm": 0.37754878401756287, "learning_rate": 1.787857097885832e-05, "loss": 0.6168, "step": 15463 }, { "epoch": 0.42460186710598574, "grad_norm": 0.41106197237968445, "learning_rate": 1.7878304986458513e-05, "loss": 0.3917, "step": 15464 }, { "epoch": 0.42462932454695224, "grad_norm": 0.37918969988822937, "learning_rate": 1.7878038979363158e-05, "loss": 0.4776, "step": 15465 }, { "epoch": 0.42465678198791873, "grad_norm": 0.38154786825180054, "learning_rate": 1.7877772957572743e-05, "loss": 0.4492, "step": 15466 }, { "epoch": 0.4246842394288852, "grad_norm": 0.365256130695343, "learning_rate": 1.7877506921087767e-05, "loss": 0.4682, "step": 15467 }, { "epoch": 0.4247116968698517, "grad_norm": 0.3869714140892029, "learning_rate": 1.7877240869908727e-05, "loss": 0.5545, "step": 15468 }, { "epoch": 0.4247391543108182, "grad_norm": 0.3317021429538727, "learning_rate": 1.787697480403612e-05, "loss": 0.5378, "step": 15469 }, { "epoch": 0.4247666117517847, "grad_norm": 0.3870697617530823, "learning_rate": 1.787670872347044e-05, "loss": 0.4851, "step": 15470 }, { "epoch": 0.42479406919275126, "grad_norm": 0.39599868655204773, "learning_rate": 1.787644262821218e-05, "loss": 0.4983, "step": 15471 }, { "epoch": 0.42482152663371775, "grad_norm": 0.3547077476978302, "learning_rate": 1.7876176518261846e-05, "loss": 0.4915, "step": 15472 }, { "epoch": 0.42484898407468424, "grad_norm": 0.41535645723342896, "learning_rate": 1.7875910393619927e-05, "loss": 0.5171, "step": 15473 }, { "epoch": 0.42487644151565074, "grad_norm": 0.3791908025741577, "learning_rate": 1.787564425428692e-05, "loss": 0.5711, "step": 15474 }, { "epoch": 0.42490389895661723, "grad_norm": 0.413862943649292, "learning_rate": 1.787537810026333e-05, "loss": 0.4639, "step": 15475 }, { "epoch": 0.4249313563975837, "grad_norm": 0.9719947576522827, "learning_rate": 1.7875111931549635e-05, "loss": 0.5075, "step": 15476 }, { "epoch": 0.4249588138385502, "grad_norm": 0.39916470646858215, "learning_rate": 1.7874845748146354e-05, "loss": 0.5914, "step": 15477 }, { "epoch": 0.42498627127951677, "grad_norm": 0.35332438349723816, "learning_rate": 1.7874579550053962e-05, "loss": 0.4748, "step": 15478 }, { "epoch": 0.42501372872048326, "grad_norm": 0.31516095995903015, "learning_rate": 1.7874313337272973e-05, "loss": 0.47, "step": 15479 }, { "epoch": 0.42504118616144976, "grad_norm": 0.34321728348731995, "learning_rate": 1.7874047109803873e-05, "loss": 0.4548, "step": 15480 }, { "epoch": 0.42506864360241625, "grad_norm": 0.38090986013412476, "learning_rate": 1.7873780867647165e-05, "loss": 0.59, "step": 15481 }, { "epoch": 0.42509610104338275, "grad_norm": 0.36997613310813904, "learning_rate": 1.7873514610803338e-05, "loss": 0.5494, "step": 15482 }, { "epoch": 0.42512355848434924, "grad_norm": 0.6323895454406738, "learning_rate": 1.78732483392729e-05, "loss": 0.6484, "step": 15483 }, { "epoch": 0.42515101592531573, "grad_norm": 0.36001527309417725, "learning_rate": 1.7872982053056335e-05, "loss": 0.4752, "step": 15484 }, { "epoch": 0.4251784733662823, "grad_norm": 0.35151728987693787, "learning_rate": 1.787271575215415e-05, "loss": 0.5044, "step": 15485 }, { "epoch": 0.4252059308072488, "grad_norm": 0.38568857312202454, "learning_rate": 1.7872449436566835e-05, "loss": 0.5656, "step": 15486 }, { "epoch": 0.42523338824821527, "grad_norm": 0.37312862277030945, "learning_rate": 1.7872183106294888e-05, "loss": 0.4892, "step": 15487 }, { "epoch": 0.42526084568918177, "grad_norm": 0.3548831343650818, "learning_rate": 1.787191676133881e-05, "loss": 0.5054, "step": 15488 }, { "epoch": 0.42528830313014826, "grad_norm": 0.34941452741622925, "learning_rate": 1.787165040169909e-05, "loss": 0.5537, "step": 15489 }, { "epoch": 0.42531576057111475, "grad_norm": 0.363538920879364, "learning_rate": 1.7871384027376236e-05, "loss": 0.5248, "step": 15490 }, { "epoch": 0.42534321801208125, "grad_norm": 0.3919290602207184, "learning_rate": 1.7871117638370737e-05, "loss": 0.4593, "step": 15491 }, { "epoch": 0.4253706754530478, "grad_norm": 0.4524669945240021, "learning_rate": 1.787085123468309e-05, "loss": 0.5695, "step": 15492 }, { "epoch": 0.4253981328940143, "grad_norm": 0.641028642654419, "learning_rate": 1.787058481631379e-05, "loss": 0.5674, "step": 15493 }, { "epoch": 0.4254255903349808, "grad_norm": 0.370451956987381, "learning_rate": 1.787031838326334e-05, "loss": 0.5705, "step": 15494 }, { "epoch": 0.4254530477759473, "grad_norm": 0.38285014033317566, "learning_rate": 1.7870051935532237e-05, "loss": 0.4993, "step": 15495 }, { "epoch": 0.4254805052169138, "grad_norm": 0.35659530758857727, "learning_rate": 1.7869785473120975e-05, "loss": 0.4735, "step": 15496 }, { "epoch": 0.42550796265788027, "grad_norm": 0.30287298560142517, "learning_rate": 1.786951899603005e-05, "loss": 0.4148, "step": 15497 }, { "epoch": 0.42553542009884676, "grad_norm": 0.30575618147850037, "learning_rate": 1.7869252504259958e-05, "loss": 0.4075, "step": 15498 }, { "epoch": 0.4255628775398133, "grad_norm": 0.3632853031158447, "learning_rate": 1.7868985997811202e-05, "loss": 0.5725, "step": 15499 }, { "epoch": 0.4255903349807798, "grad_norm": 0.338909387588501, "learning_rate": 1.7868719476684273e-05, "loss": 0.499, "step": 15500 }, { "epoch": 0.4256177924217463, "grad_norm": 0.38204896450042725, "learning_rate": 1.7868452940879673e-05, "loss": 0.583, "step": 15501 }, { "epoch": 0.4256452498627128, "grad_norm": 0.358659952878952, "learning_rate": 1.7868186390397893e-05, "loss": 0.5472, "step": 15502 }, { "epoch": 0.4256727073036793, "grad_norm": 0.37160035967826843, "learning_rate": 1.786791982523944e-05, "loss": 0.5612, "step": 15503 }, { "epoch": 0.4257001647446458, "grad_norm": 0.3632577955722809, "learning_rate": 1.7867653245404804e-05, "loss": 0.4379, "step": 15504 }, { "epoch": 0.4257276221856123, "grad_norm": 0.3386332094669342, "learning_rate": 1.7867386650894482e-05, "loss": 0.5031, "step": 15505 }, { "epoch": 0.4257550796265788, "grad_norm": 0.4070073664188385, "learning_rate": 1.786712004170897e-05, "loss": 0.4057, "step": 15506 }, { "epoch": 0.4257825370675453, "grad_norm": 0.5327192544937134, "learning_rate": 1.7866853417848774e-05, "loss": 0.5336, "step": 15507 }, { "epoch": 0.4258099945085118, "grad_norm": 0.37817007303237915, "learning_rate": 1.7866586779314382e-05, "loss": 0.4843, "step": 15508 }, { "epoch": 0.4258374519494783, "grad_norm": 0.39681151509284973, "learning_rate": 1.7866320126106296e-05, "loss": 0.4492, "step": 15509 }, { "epoch": 0.4258649093904448, "grad_norm": 0.3777090311050415, "learning_rate": 1.7866053458225014e-05, "loss": 0.5529, "step": 15510 }, { "epoch": 0.4258923668314113, "grad_norm": 0.3981437683105469, "learning_rate": 1.7865786775671027e-05, "loss": 0.5088, "step": 15511 }, { "epoch": 0.4259198242723778, "grad_norm": 0.3897170126438141, "learning_rate": 1.786552007844484e-05, "loss": 0.5161, "step": 15512 }, { "epoch": 0.42594728171334434, "grad_norm": 0.3950134515762329, "learning_rate": 1.786525336654695e-05, "loss": 0.6138, "step": 15513 }, { "epoch": 0.42597473915431083, "grad_norm": 0.35477912425994873, "learning_rate": 1.7864986639977848e-05, "loss": 0.5591, "step": 15514 }, { "epoch": 0.4260021965952773, "grad_norm": 0.3761753737926483, "learning_rate": 1.7864719898738038e-05, "loss": 0.4484, "step": 15515 }, { "epoch": 0.4260296540362438, "grad_norm": 0.3998357355594635, "learning_rate": 1.7864453142828017e-05, "loss": 0.5599, "step": 15516 }, { "epoch": 0.4260571114772103, "grad_norm": 0.4130476713180542, "learning_rate": 1.7864186372248276e-05, "loss": 0.5833, "step": 15517 }, { "epoch": 0.4260845689181768, "grad_norm": 0.3810631334781647, "learning_rate": 1.786391958699932e-05, "loss": 0.5305, "step": 15518 }, { "epoch": 0.4261120263591433, "grad_norm": 0.35093677043914795, "learning_rate": 1.7863652787081648e-05, "loss": 0.516, "step": 15519 }, { "epoch": 0.42613948380010985, "grad_norm": 0.4066230058670044, "learning_rate": 1.7863385972495747e-05, "loss": 0.589, "step": 15520 }, { "epoch": 0.42616694124107635, "grad_norm": 0.33948323130607605, "learning_rate": 1.7863119143242125e-05, "loss": 0.4913, "step": 15521 }, { "epoch": 0.42619439868204284, "grad_norm": 0.4075924754142761, "learning_rate": 1.7862852299321277e-05, "loss": 0.5275, "step": 15522 }, { "epoch": 0.42622185612300933, "grad_norm": 0.3457423150539398, "learning_rate": 1.78625854407337e-05, "loss": 0.4625, "step": 15523 }, { "epoch": 0.42624931356397583, "grad_norm": 23.693344116210938, "learning_rate": 1.7862318567479887e-05, "loss": 1.7582, "step": 15524 }, { "epoch": 0.4262767710049423, "grad_norm": 0.3927377462387085, "learning_rate": 1.7862051679560343e-05, "loss": 0.6889, "step": 15525 }, { "epoch": 0.4263042284459088, "grad_norm": 0.4061179459095001, "learning_rate": 1.7861784776975562e-05, "loss": 0.5745, "step": 15526 }, { "epoch": 0.42633168588687537, "grad_norm": 0.4117131531238556, "learning_rate": 1.7861517859726046e-05, "loss": 0.4809, "step": 15527 }, { "epoch": 0.42635914332784186, "grad_norm": 0.5080301761627197, "learning_rate": 1.7861250927812292e-05, "loss": 0.3935, "step": 15528 }, { "epoch": 0.42638660076880835, "grad_norm": 0.3831770718097687, "learning_rate": 1.7860983981234795e-05, "loss": 0.5158, "step": 15529 }, { "epoch": 0.42641405820977485, "grad_norm": 0.4029556214809418, "learning_rate": 1.7860717019994053e-05, "loss": 0.4868, "step": 15530 }, { "epoch": 0.42644151565074134, "grad_norm": 0.3776521384716034, "learning_rate": 1.786045004409056e-05, "loss": 0.4885, "step": 15531 }, { "epoch": 0.42646897309170784, "grad_norm": 0.3726821541786194, "learning_rate": 1.7860183053524824e-05, "loss": 0.542, "step": 15532 }, { "epoch": 0.42649643053267433, "grad_norm": 0.3104356527328491, "learning_rate": 1.785991604829734e-05, "loss": 0.4587, "step": 15533 }, { "epoch": 0.4265238879736409, "grad_norm": 0.38669925928115845, "learning_rate": 1.78596490284086e-05, "loss": 0.515, "step": 15534 }, { "epoch": 0.4265513454146074, "grad_norm": 0.3674103617668152, "learning_rate": 1.7859381993859107e-05, "loss": 0.5832, "step": 15535 }, { "epoch": 0.42657880285557387, "grad_norm": 0.46413689851760864, "learning_rate": 1.785911494464936e-05, "loss": 0.5874, "step": 15536 }, { "epoch": 0.42660626029654036, "grad_norm": 0.37777215242385864, "learning_rate": 1.7858847880779852e-05, "loss": 0.537, "step": 15537 }, { "epoch": 0.42663371773750686, "grad_norm": 0.40580570697784424, "learning_rate": 1.7858580802251085e-05, "loss": 0.5329, "step": 15538 }, { "epoch": 0.42666117517847335, "grad_norm": 0.39893338084220886, "learning_rate": 1.785831370906356e-05, "loss": 0.7214, "step": 15539 }, { "epoch": 0.42668863261943984, "grad_norm": 0.3465839624404907, "learning_rate": 1.7858046601217765e-05, "loss": 0.5092, "step": 15540 }, { "epoch": 0.4267160900604064, "grad_norm": 0.3527071177959442, "learning_rate": 1.7857779478714215e-05, "loss": 0.5211, "step": 15541 }, { "epoch": 0.4267435475013729, "grad_norm": 0.3720036447048187, "learning_rate": 1.785751234155339e-05, "loss": 0.5585, "step": 15542 }, { "epoch": 0.4267710049423394, "grad_norm": 0.3364792466163635, "learning_rate": 1.7857245189735803e-05, "loss": 0.4701, "step": 15543 }, { "epoch": 0.4267984623833059, "grad_norm": 0.4178813397884369, "learning_rate": 1.785697802326194e-05, "loss": 0.548, "step": 15544 }, { "epoch": 0.42682591982427237, "grad_norm": 0.35511258244514465, "learning_rate": 1.785671084213231e-05, "loss": 0.5645, "step": 15545 }, { "epoch": 0.42685337726523886, "grad_norm": 0.33630019426345825, "learning_rate": 1.7856443646347405e-05, "loss": 0.4947, "step": 15546 }, { "epoch": 0.42688083470620536, "grad_norm": 0.33837372064590454, "learning_rate": 1.7856176435907723e-05, "loss": 0.4678, "step": 15547 }, { "epoch": 0.4269082921471719, "grad_norm": 0.38518592715263367, "learning_rate": 1.7855909210813765e-05, "loss": 0.4959, "step": 15548 }, { "epoch": 0.4269357495881384, "grad_norm": 0.4091208279132843, "learning_rate": 1.7855641971066033e-05, "loss": 0.5571, "step": 15549 }, { "epoch": 0.4269632070291049, "grad_norm": 0.329557329416275, "learning_rate": 1.7855374716665016e-05, "loss": 0.4602, "step": 15550 }, { "epoch": 0.4269906644700714, "grad_norm": 0.3863663971424103, "learning_rate": 1.785510744761122e-05, "loss": 0.5546, "step": 15551 }, { "epoch": 0.4270181219110379, "grad_norm": 0.4092973470687866, "learning_rate": 1.7854840163905144e-05, "loss": 0.5138, "step": 15552 }, { "epoch": 0.4270455793520044, "grad_norm": 0.3504412770271301, "learning_rate": 1.7854572865547282e-05, "loss": 0.4939, "step": 15553 }, { "epoch": 0.4270730367929709, "grad_norm": 0.3976462185382843, "learning_rate": 1.7854305552538135e-05, "loss": 0.5249, "step": 15554 }, { "epoch": 0.4271004942339374, "grad_norm": 0.41781318187713623, "learning_rate": 1.7854038224878204e-05, "loss": 0.5382, "step": 15555 }, { "epoch": 0.4271279516749039, "grad_norm": 0.390360563993454, "learning_rate": 1.785377088256798e-05, "loss": 0.4549, "step": 15556 }, { "epoch": 0.4271554091158704, "grad_norm": 0.3617633283138275, "learning_rate": 1.785350352560797e-05, "loss": 0.5615, "step": 15557 }, { "epoch": 0.4271828665568369, "grad_norm": 0.37597525119781494, "learning_rate": 1.785323615399867e-05, "loss": 0.5104, "step": 15558 }, { "epoch": 0.4272103239978034, "grad_norm": 0.3892720341682434, "learning_rate": 1.7852968767740576e-05, "loss": 0.5601, "step": 15559 }, { "epoch": 0.4272377814387699, "grad_norm": 0.35516464710235596, "learning_rate": 1.785270136683419e-05, "loss": 0.5918, "step": 15560 }, { "epoch": 0.4272652388797364, "grad_norm": 0.39645999670028687, "learning_rate": 1.7852433951280012e-05, "loss": 0.4935, "step": 15561 }, { "epoch": 0.42729269632070294, "grad_norm": 0.35321134328842163, "learning_rate": 1.7852166521078533e-05, "loss": 0.5262, "step": 15562 }, { "epoch": 0.42732015376166943, "grad_norm": 0.3558225929737091, "learning_rate": 1.785189907623026e-05, "loss": 0.5069, "step": 15563 }, { "epoch": 0.4273476112026359, "grad_norm": 0.3179214894771576, "learning_rate": 1.785163161673569e-05, "loss": 0.4557, "step": 15564 }, { "epoch": 0.4273750686436024, "grad_norm": 0.40835005044937134, "learning_rate": 1.785136414259532e-05, "loss": 0.4868, "step": 15565 }, { "epoch": 0.4274025260845689, "grad_norm": 0.3481159806251526, "learning_rate": 1.785109665380965e-05, "loss": 0.5348, "step": 15566 }, { "epoch": 0.4274299835255354, "grad_norm": 0.3252103626728058, "learning_rate": 1.785082915037918e-05, "loss": 0.4683, "step": 15567 }, { "epoch": 0.4274574409665019, "grad_norm": 0.4563353955745697, "learning_rate": 1.7850561632304405e-05, "loss": 0.5595, "step": 15568 }, { "epoch": 0.42748489840746845, "grad_norm": 0.3681945204734802, "learning_rate": 1.785029409958583e-05, "loss": 0.5617, "step": 15569 }, { "epoch": 0.42751235584843494, "grad_norm": 0.4106462001800537, "learning_rate": 1.785002655222395e-05, "loss": 0.5231, "step": 15570 }, { "epoch": 0.42753981328940144, "grad_norm": 0.3568187355995178, "learning_rate": 1.7849758990219267e-05, "loss": 0.4958, "step": 15571 }, { "epoch": 0.42756727073036793, "grad_norm": 0.4265635013580322, "learning_rate": 1.7849491413572274e-05, "loss": 0.6026, "step": 15572 }, { "epoch": 0.4275947281713344, "grad_norm": 0.3427497148513794, "learning_rate": 1.7849223822283476e-05, "loss": 0.4668, "step": 15573 }, { "epoch": 0.4276221856123009, "grad_norm": 0.39538654685020447, "learning_rate": 1.784895621635337e-05, "loss": 0.4363, "step": 15574 }, { "epoch": 0.4276496430532674, "grad_norm": 0.34898945689201355, "learning_rate": 1.784868859578246e-05, "loss": 0.5734, "step": 15575 }, { "epoch": 0.42767710049423396, "grad_norm": 0.37825992703437805, "learning_rate": 1.7848420960571237e-05, "loss": 0.5426, "step": 15576 }, { "epoch": 0.42770455793520046, "grad_norm": 0.358203649520874, "learning_rate": 1.78481533107202e-05, "loss": 0.4538, "step": 15577 }, { "epoch": 0.42773201537616695, "grad_norm": 0.43523499369621277, "learning_rate": 1.784788564622986e-05, "loss": 0.5345, "step": 15578 }, { "epoch": 0.42775947281713345, "grad_norm": 0.3684293031692505, "learning_rate": 1.7847617967100706e-05, "loss": 0.5107, "step": 15579 }, { "epoch": 0.42778693025809994, "grad_norm": 0.3635784387588501, "learning_rate": 1.7847350273333237e-05, "loss": 0.5002, "step": 15580 }, { "epoch": 0.42781438769906643, "grad_norm": 0.351823091506958, "learning_rate": 1.7847082564927958e-05, "loss": 0.5423, "step": 15581 }, { "epoch": 0.42784184514003293, "grad_norm": 0.3754054009914398, "learning_rate": 1.7846814841885364e-05, "loss": 0.5409, "step": 15582 }, { "epoch": 0.4278693025809995, "grad_norm": 0.3931616544723511, "learning_rate": 1.7846547104205955e-05, "loss": 0.5188, "step": 15583 }, { "epoch": 0.42789676002196597, "grad_norm": 0.32828494906425476, "learning_rate": 1.7846279351890234e-05, "loss": 0.4684, "step": 15584 }, { "epoch": 0.42792421746293247, "grad_norm": 0.4146119952201843, "learning_rate": 1.7846011584938695e-05, "loss": 0.5103, "step": 15585 }, { "epoch": 0.42795167490389896, "grad_norm": 0.37571293115615845, "learning_rate": 1.7845743803351846e-05, "loss": 0.5181, "step": 15586 }, { "epoch": 0.42797913234486545, "grad_norm": 0.36555659770965576, "learning_rate": 1.7845476007130176e-05, "loss": 0.4412, "step": 15587 }, { "epoch": 0.42800658978583195, "grad_norm": 0.45493435859680176, "learning_rate": 1.7845208196274188e-05, "loss": 0.5366, "step": 15588 }, { "epoch": 0.42803404722679844, "grad_norm": 0.40772348642349243, "learning_rate": 1.7844940370784385e-05, "loss": 0.5096, "step": 15589 }, { "epoch": 0.428061504667765, "grad_norm": 0.338485449552536, "learning_rate": 1.7844672530661265e-05, "loss": 0.49, "step": 15590 }, { "epoch": 0.4280889621087315, "grad_norm": 0.3944374620914459, "learning_rate": 1.7844404675905325e-05, "loss": 0.433, "step": 15591 }, { "epoch": 0.428116419549698, "grad_norm": 0.380371630191803, "learning_rate": 1.7844136806517067e-05, "loss": 0.5572, "step": 15592 }, { "epoch": 0.4281438769906645, "grad_norm": 0.35681915283203125, "learning_rate": 1.7843868922496992e-05, "loss": 0.5443, "step": 15593 }, { "epoch": 0.42817133443163097, "grad_norm": 0.35569679737091064, "learning_rate": 1.7843601023845596e-05, "loss": 0.5328, "step": 15594 }, { "epoch": 0.42819879187259746, "grad_norm": 0.34454721212387085, "learning_rate": 1.7843333110563383e-05, "loss": 0.524, "step": 15595 }, { "epoch": 0.42822624931356396, "grad_norm": 0.3646050691604614, "learning_rate": 1.7843065182650847e-05, "loss": 0.5225, "step": 15596 }, { "epoch": 0.4282537067545305, "grad_norm": 0.36839067935943604, "learning_rate": 1.7842797240108494e-05, "loss": 0.4887, "step": 15597 }, { "epoch": 0.428281164195497, "grad_norm": 0.37263861298561096, "learning_rate": 1.784252928293682e-05, "loss": 0.4894, "step": 15598 }, { "epoch": 0.4283086216364635, "grad_norm": 0.363040953874588, "learning_rate": 1.7842261311136327e-05, "loss": 0.4799, "step": 15599 }, { "epoch": 0.42833607907743, "grad_norm": 0.4820467233657837, "learning_rate": 1.7841993324707513e-05, "loss": 0.5081, "step": 15600 }, { "epoch": 0.4283635365183965, "grad_norm": 0.33750930428504944, "learning_rate": 1.7841725323650876e-05, "loss": 0.4539, "step": 15601 }, { "epoch": 0.428390993959363, "grad_norm": 0.3770262897014618, "learning_rate": 1.7841457307966923e-05, "loss": 0.5165, "step": 15602 }, { "epoch": 0.42841845140032947, "grad_norm": 0.405598908662796, "learning_rate": 1.7841189277656144e-05, "loss": 0.5542, "step": 15603 }, { "epoch": 0.42844590884129596, "grad_norm": 0.3619769215583801, "learning_rate": 1.784092123271905e-05, "loss": 0.5529, "step": 15604 }, { "epoch": 0.4284733662822625, "grad_norm": 0.3692227303981781, "learning_rate": 1.7840653173156135e-05, "loss": 0.4853, "step": 15605 }, { "epoch": 0.428500823723229, "grad_norm": 0.3580462634563446, "learning_rate": 1.7840385098967897e-05, "loss": 0.5003, "step": 15606 }, { "epoch": 0.4285282811641955, "grad_norm": 0.3320994973182678, "learning_rate": 1.784011701015484e-05, "loss": 0.5899, "step": 15607 }, { "epoch": 0.428555738605162, "grad_norm": 0.401951402425766, "learning_rate": 1.783984890671746e-05, "loss": 0.526, "step": 15608 }, { "epoch": 0.4285831960461285, "grad_norm": 0.3477945923805237, "learning_rate": 1.783958078865626e-05, "loss": 0.5004, "step": 15609 }, { "epoch": 0.428610653487095, "grad_norm": 0.3977945148944855, "learning_rate": 1.783931265597174e-05, "loss": 0.5186, "step": 15610 }, { "epoch": 0.4286381109280615, "grad_norm": 0.5047829747200012, "learning_rate": 1.78390445086644e-05, "loss": 0.5397, "step": 15611 }, { "epoch": 0.428665568369028, "grad_norm": 0.3687898814678192, "learning_rate": 1.7838776346734743e-05, "loss": 0.5076, "step": 15612 }, { "epoch": 0.4286930258099945, "grad_norm": 0.36475813388824463, "learning_rate": 1.783850817018326e-05, "loss": 0.4385, "step": 15613 }, { "epoch": 0.428720483250961, "grad_norm": 0.42715758085250854, "learning_rate": 1.7838239979010463e-05, "loss": 0.5348, "step": 15614 }, { "epoch": 0.4287479406919275, "grad_norm": 0.3834131062030792, "learning_rate": 1.7837971773216846e-05, "loss": 0.4934, "step": 15615 }, { "epoch": 0.428775398132894, "grad_norm": 0.4326578378677368, "learning_rate": 1.783770355280291e-05, "loss": 0.4872, "step": 15616 }, { "epoch": 0.4288028555738605, "grad_norm": 0.36710941791534424, "learning_rate": 1.7837435317769152e-05, "loss": 0.5424, "step": 15617 }, { "epoch": 0.428830313014827, "grad_norm": 0.42682603001594543, "learning_rate": 1.783716706811608e-05, "loss": 0.5265, "step": 15618 }, { "epoch": 0.42885777045579354, "grad_norm": 0.39651522040367126, "learning_rate": 1.783689880384419e-05, "loss": 0.5145, "step": 15619 }, { "epoch": 0.42888522789676004, "grad_norm": 0.40778475999832153, "learning_rate": 1.7836630524953978e-05, "loss": 0.4944, "step": 15620 }, { "epoch": 0.42891268533772653, "grad_norm": 0.3755621314048767, "learning_rate": 1.7836362231445953e-05, "loss": 0.5253, "step": 15621 }, { "epoch": 0.428940142778693, "grad_norm": 0.35571184754371643, "learning_rate": 1.7836093923320608e-05, "loss": 0.517, "step": 15622 }, { "epoch": 0.4289676002196595, "grad_norm": 0.3492415249347687, "learning_rate": 1.783582560057845e-05, "loss": 0.4927, "step": 15623 }, { "epoch": 0.428995057660626, "grad_norm": 0.3760034739971161, "learning_rate": 1.7835557263219976e-05, "loss": 0.5559, "step": 15624 }, { "epoch": 0.4290225151015925, "grad_norm": 0.41185203194618225, "learning_rate": 1.7835288911245687e-05, "loss": 0.5259, "step": 15625 }, { "epoch": 0.42904997254255905, "grad_norm": 0.4010952413082123, "learning_rate": 1.7835020544656082e-05, "loss": 0.5477, "step": 15626 }, { "epoch": 0.42907742998352555, "grad_norm": 0.37991026043891907, "learning_rate": 1.7834752163451666e-05, "loss": 0.5669, "step": 15627 }, { "epoch": 0.42910488742449204, "grad_norm": 0.39822471141815186, "learning_rate": 1.7834483767632933e-05, "loss": 0.4211, "step": 15628 }, { "epoch": 0.42913234486545854, "grad_norm": 0.4352516233921051, "learning_rate": 1.7834215357200388e-05, "loss": 0.5102, "step": 15629 }, { "epoch": 0.42915980230642503, "grad_norm": 0.3892457187175751, "learning_rate": 1.7833946932154532e-05, "loss": 0.5131, "step": 15630 }, { "epoch": 0.4291872597473915, "grad_norm": 0.3551495671272278, "learning_rate": 1.7833678492495864e-05, "loss": 0.4878, "step": 15631 }, { "epoch": 0.429214717188358, "grad_norm": 0.3664412498474121, "learning_rate": 1.7833410038224888e-05, "loss": 0.5307, "step": 15632 }, { "epoch": 0.42924217462932457, "grad_norm": 0.38042640686035156, "learning_rate": 1.7833141569342095e-05, "loss": 0.4434, "step": 15633 }, { "epoch": 0.42926963207029106, "grad_norm": 0.34326887130737305, "learning_rate": 1.7832873085848e-05, "loss": 0.4484, "step": 15634 }, { "epoch": 0.42929708951125756, "grad_norm": 0.42982205748558044, "learning_rate": 1.7832604587743098e-05, "loss": 0.4555, "step": 15635 }, { "epoch": 0.42932454695222405, "grad_norm": 0.40658050775527954, "learning_rate": 1.7832336075027882e-05, "loss": 0.5462, "step": 15636 }, { "epoch": 0.42935200439319054, "grad_norm": 0.39834654331207275, "learning_rate": 1.7832067547702862e-05, "loss": 0.6259, "step": 15637 }, { "epoch": 0.42937946183415704, "grad_norm": 0.397270530462265, "learning_rate": 1.7831799005768536e-05, "loss": 0.5267, "step": 15638 }, { "epoch": 0.42940691927512353, "grad_norm": 0.3909752666950226, "learning_rate": 1.783153044922541e-05, "loss": 0.6079, "step": 15639 }, { "epoch": 0.4294343767160901, "grad_norm": 0.3312550187110901, "learning_rate": 1.7831261878073974e-05, "loss": 0.5022, "step": 15640 }, { "epoch": 0.4294618341570566, "grad_norm": 0.4100955128669739, "learning_rate": 1.7830993292314736e-05, "loss": 0.4716, "step": 15641 }, { "epoch": 0.42948929159802307, "grad_norm": 0.41584843397140503, "learning_rate": 1.78307246919482e-05, "loss": 0.4369, "step": 15642 }, { "epoch": 0.42951674903898956, "grad_norm": 0.40486595034599304, "learning_rate": 1.783045607697486e-05, "loss": 0.5571, "step": 15643 }, { "epoch": 0.42954420647995606, "grad_norm": 0.34573739767074585, "learning_rate": 1.783018744739522e-05, "loss": 0.5268, "step": 15644 }, { "epoch": 0.42957166392092255, "grad_norm": 0.3542966842651367, "learning_rate": 1.7829918803209783e-05, "loss": 0.4583, "step": 15645 }, { "epoch": 0.42959912136188905, "grad_norm": 0.33474448323249817, "learning_rate": 1.7829650144419046e-05, "loss": 0.5061, "step": 15646 }, { "epoch": 0.4296265788028556, "grad_norm": 0.3493189513683319, "learning_rate": 1.7829381471023516e-05, "loss": 0.4287, "step": 15647 }, { "epoch": 0.4296540362438221, "grad_norm": 0.3732379674911499, "learning_rate": 1.782911278302369e-05, "loss": 0.4615, "step": 15648 }, { "epoch": 0.4296814936847886, "grad_norm": 0.361259400844574, "learning_rate": 1.7828844080420067e-05, "loss": 0.4837, "step": 15649 }, { "epoch": 0.4297089511257551, "grad_norm": 0.36637619137763977, "learning_rate": 1.782857536321315e-05, "loss": 0.4921, "step": 15650 }, { "epoch": 0.4297364085667216, "grad_norm": 0.37268027663230896, "learning_rate": 1.7828306631403448e-05, "loss": 0.5531, "step": 15651 }, { "epoch": 0.42976386600768807, "grad_norm": 0.43603235483169556, "learning_rate": 1.7828037884991452e-05, "loss": 0.5553, "step": 15652 }, { "epoch": 0.42979132344865456, "grad_norm": 0.372663289308548, "learning_rate": 1.7827769123977665e-05, "loss": 0.4901, "step": 15653 }, { "epoch": 0.4298187808896211, "grad_norm": 0.3810310363769531, "learning_rate": 1.7827500348362593e-05, "loss": 0.5234, "step": 15654 }, { "epoch": 0.4298462383305876, "grad_norm": 0.352780818939209, "learning_rate": 1.7827231558146732e-05, "loss": 0.5417, "step": 15655 }, { "epoch": 0.4298736957715541, "grad_norm": 0.4100407660007477, "learning_rate": 1.7826962753330588e-05, "loss": 0.5434, "step": 15656 }, { "epoch": 0.4299011532125206, "grad_norm": 0.38979974389076233, "learning_rate": 1.782669393391466e-05, "loss": 0.5606, "step": 15657 }, { "epoch": 0.4299286106534871, "grad_norm": 0.363499253988266, "learning_rate": 1.7826425099899453e-05, "loss": 0.496, "step": 15658 }, { "epoch": 0.4299560680944536, "grad_norm": 0.3653847277164459, "learning_rate": 1.7826156251285458e-05, "loss": 0.5111, "step": 15659 }, { "epoch": 0.4299835255354201, "grad_norm": 0.37519705295562744, "learning_rate": 1.782588738807319e-05, "loss": 0.5995, "step": 15660 }, { "epoch": 0.4300109829763866, "grad_norm": 0.3726786971092224, "learning_rate": 1.7825618510263144e-05, "loss": 0.5448, "step": 15661 }, { "epoch": 0.4300384404173531, "grad_norm": 0.3909270763397217, "learning_rate": 1.7825349617855818e-05, "loss": 0.5069, "step": 15662 }, { "epoch": 0.4300658978583196, "grad_norm": 0.37019577622413635, "learning_rate": 1.7825080710851718e-05, "loss": 0.6331, "step": 15663 }, { "epoch": 0.4300933552992861, "grad_norm": 0.3984910249710083, "learning_rate": 1.7824811789251347e-05, "loss": 0.537, "step": 15664 }, { "epoch": 0.4301208127402526, "grad_norm": 0.3523688316345215, "learning_rate": 1.7824542853055203e-05, "loss": 0.4739, "step": 15665 }, { "epoch": 0.4301482701812191, "grad_norm": 0.3890755772590637, "learning_rate": 1.7824273902263792e-05, "loss": 0.5956, "step": 15666 }, { "epoch": 0.4301757276221856, "grad_norm": 0.38757261633872986, "learning_rate": 1.7824004936877612e-05, "loss": 0.4991, "step": 15667 }, { "epoch": 0.43020318506315214, "grad_norm": 0.3760043978691101, "learning_rate": 1.7823735956897165e-05, "loss": 0.4655, "step": 15668 }, { "epoch": 0.43023064250411863, "grad_norm": 0.3279905617237091, "learning_rate": 1.782346696232295e-05, "loss": 0.4742, "step": 15669 }, { "epoch": 0.4302580999450851, "grad_norm": 0.5020060539245605, "learning_rate": 1.7823197953155477e-05, "loss": 0.5759, "step": 15670 }, { "epoch": 0.4302855573860516, "grad_norm": 0.3679308593273163, "learning_rate": 1.782292892939524e-05, "loss": 0.5334, "step": 15671 }, { "epoch": 0.4303130148270181, "grad_norm": 0.3719729781150818, "learning_rate": 1.782265989104275e-05, "loss": 0.4177, "step": 15672 }, { "epoch": 0.4303404722679846, "grad_norm": 0.3461589217185974, "learning_rate": 1.7822390838098495e-05, "loss": 0.5041, "step": 15673 }, { "epoch": 0.4303679297089511, "grad_norm": 0.33373895287513733, "learning_rate": 1.7822121770562986e-05, "loss": 0.5332, "step": 15674 }, { "epoch": 0.43039538714991765, "grad_norm": 0.3466026782989502, "learning_rate": 1.7821852688436726e-05, "loss": 0.5046, "step": 15675 }, { "epoch": 0.43042284459088415, "grad_norm": 0.3929496705532074, "learning_rate": 1.782158359172021e-05, "loss": 0.5747, "step": 15676 }, { "epoch": 0.43045030203185064, "grad_norm": 0.603589653968811, "learning_rate": 1.782131448041395e-05, "loss": 0.4462, "step": 15677 }, { "epoch": 0.43047775947281713, "grad_norm": 0.4259941279888153, "learning_rate": 1.7821045354518438e-05, "loss": 0.4838, "step": 15678 }, { "epoch": 0.43050521691378363, "grad_norm": 0.35709646344184875, "learning_rate": 1.7820776214034182e-05, "loss": 0.5316, "step": 15679 }, { "epoch": 0.4305326743547501, "grad_norm": 0.41204413771629333, "learning_rate": 1.782050705896168e-05, "loss": 0.6177, "step": 15680 }, { "epoch": 0.4305601317957166, "grad_norm": 0.38293275237083435, "learning_rate": 1.782023788930144e-05, "loss": 0.5499, "step": 15681 }, { "epoch": 0.43058758923668317, "grad_norm": 0.4198997914791107, "learning_rate": 1.7819968705053957e-05, "loss": 0.4566, "step": 15682 }, { "epoch": 0.43061504667764966, "grad_norm": 0.3612712025642395, "learning_rate": 1.781969950621974e-05, "loss": 0.5657, "step": 15683 }, { "epoch": 0.43064250411861615, "grad_norm": 0.3902244567871094, "learning_rate": 1.7819430292799286e-05, "loss": 0.555, "step": 15684 }, { "epoch": 0.43066996155958265, "grad_norm": 0.3809455931186676, "learning_rate": 1.7819161064793098e-05, "loss": 0.5291, "step": 15685 }, { "epoch": 0.43069741900054914, "grad_norm": 0.46708962321281433, "learning_rate": 1.781889182220168e-05, "loss": 0.5517, "step": 15686 }, { "epoch": 0.43072487644151564, "grad_norm": 0.3488490879535675, "learning_rate": 1.7818622565025534e-05, "loss": 0.5213, "step": 15687 }, { "epoch": 0.43075233388248213, "grad_norm": 0.41209137439727783, "learning_rate": 1.7818353293265158e-05, "loss": 0.4224, "step": 15688 }, { "epoch": 0.4307797913234487, "grad_norm": 0.45707350969314575, "learning_rate": 1.7818084006921063e-05, "loss": 0.6717, "step": 15689 }, { "epoch": 0.4308072487644152, "grad_norm": 0.3382502794265747, "learning_rate": 1.7817814705993738e-05, "loss": 0.4591, "step": 15690 }, { "epoch": 0.43083470620538167, "grad_norm": 0.3718818426132202, "learning_rate": 1.7817545390483703e-05, "loss": 0.5394, "step": 15691 }, { "epoch": 0.43086216364634816, "grad_norm": 0.33813679218292236, "learning_rate": 1.7817276060391444e-05, "loss": 0.5399, "step": 15692 }, { "epoch": 0.43088962108731466, "grad_norm": 0.45246621966362, "learning_rate": 1.7817006715717472e-05, "loss": 0.5457, "step": 15693 }, { "epoch": 0.43091707852828115, "grad_norm": 0.38487163186073303, "learning_rate": 1.781673735646229e-05, "loss": 0.4991, "step": 15694 }, { "epoch": 0.43094453596924764, "grad_norm": 0.41845324635505676, "learning_rate": 1.7816467982626396e-05, "loss": 0.5384, "step": 15695 }, { "epoch": 0.4309719934102142, "grad_norm": 0.34164661169052124, "learning_rate": 1.7816198594210294e-05, "loss": 0.5523, "step": 15696 }, { "epoch": 0.4309994508511807, "grad_norm": 0.37607812881469727, "learning_rate": 1.781592919121449e-05, "loss": 0.4913, "step": 15697 }, { "epoch": 0.4310269082921472, "grad_norm": 0.3684045374393463, "learning_rate": 1.7815659773639478e-05, "loss": 0.5275, "step": 15698 }, { "epoch": 0.4310543657331137, "grad_norm": 0.44977059960365295, "learning_rate": 1.781539034148577e-05, "loss": 0.515, "step": 15699 }, { "epoch": 0.43108182317408017, "grad_norm": 0.3925391435623169, "learning_rate": 1.7815120894753865e-05, "loss": 0.5727, "step": 15700 }, { "epoch": 0.43110928061504666, "grad_norm": 0.4505382478237152, "learning_rate": 1.7814851433444263e-05, "loss": 0.5379, "step": 15701 }, { "epoch": 0.43113673805601316, "grad_norm": 0.3827342092990875, "learning_rate": 1.781458195755747e-05, "loss": 0.4273, "step": 15702 }, { "epoch": 0.4311641954969797, "grad_norm": 0.39571887254714966, "learning_rate": 1.7814312467093987e-05, "loss": 0.491, "step": 15703 }, { "epoch": 0.4311916529379462, "grad_norm": 0.3637155592441559, "learning_rate": 1.7814042962054317e-05, "loss": 0.5778, "step": 15704 }, { "epoch": 0.4312191103789127, "grad_norm": 0.34734776616096497, "learning_rate": 1.7813773442438968e-05, "loss": 0.4339, "step": 15705 }, { "epoch": 0.4312465678198792, "grad_norm": 0.39142775535583496, "learning_rate": 1.781350390824843e-05, "loss": 0.5682, "step": 15706 }, { "epoch": 0.4312740252608457, "grad_norm": 0.4007240831851959, "learning_rate": 1.7813234359483216e-05, "loss": 0.5592, "step": 15707 }, { "epoch": 0.4313014827018122, "grad_norm": 0.378548800945282, "learning_rate": 1.7812964796143826e-05, "loss": 0.5796, "step": 15708 }, { "epoch": 0.43132894014277867, "grad_norm": 0.3980883061885834, "learning_rate": 1.7812695218230764e-05, "loss": 0.4332, "step": 15709 }, { "epoch": 0.4313563975837452, "grad_norm": 0.3982264995574951, "learning_rate": 1.781242562574453e-05, "loss": 0.5949, "step": 15710 }, { "epoch": 0.4313838550247117, "grad_norm": 0.3674687147140503, "learning_rate": 1.7812156018685633e-05, "loss": 0.5322, "step": 15711 }, { "epoch": 0.4314113124656782, "grad_norm": 0.35201963782310486, "learning_rate": 1.781188639705457e-05, "loss": 0.5392, "step": 15712 }, { "epoch": 0.4314387699066447, "grad_norm": 0.33073943853378296, "learning_rate": 1.7811616760851845e-05, "loss": 0.5223, "step": 15713 }, { "epoch": 0.4314662273476112, "grad_norm": 0.35256484150886536, "learning_rate": 1.781134711007796e-05, "loss": 0.4886, "step": 15714 }, { "epoch": 0.4314936847885777, "grad_norm": 0.39341309666633606, "learning_rate": 1.7811077444733422e-05, "loss": 0.5896, "step": 15715 }, { "epoch": 0.4315211422295442, "grad_norm": 0.33381661772727966, "learning_rate": 1.7810807764818728e-05, "loss": 0.4581, "step": 15716 }, { "epoch": 0.43154859967051074, "grad_norm": 0.3790580928325653, "learning_rate": 1.781053807033439e-05, "loss": 0.5242, "step": 15717 }, { "epoch": 0.43157605711147723, "grad_norm": 0.3390536904335022, "learning_rate": 1.78102683612809e-05, "loss": 0.4294, "step": 15718 }, { "epoch": 0.4316035145524437, "grad_norm": 0.3827509582042694, "learning_rate": 1.780999863765877e-05, "loss": 0.5301, "step": 15719 }, { "epoch": 0.4316309719934102, "grad_norm": 0.41508498787879944, "learning_rate": 1.78097288994685e-05, "loss": 0.5467, "step": 15720 }, { "epoch": 0.4316584294343767, "grad_norm": 0.3679489493370056, "learning_rate": 1.7809459146710596e-05, "loss": 0.4673, "step": 15721 }, { "epoch": 0.4316858868753432, "grad_norm": 0.3585484027862549, "learning_rate": 1.780918937938555e-05, "loss": 0.5203, "step": 15722 }, { "epoch": 0.4317133443163097, "grad_norm": 0.3760419189929962, "learning_rate": 1.780891959749388e-05, "loss": 0.5436, "step": 15723 }, { "epoch": 0.43174080175727625, "grad_norm": 0.3591921031475067, "learning_rate": 1.780864980103608e-05, "loss": 0.4432, "step": 15724 }, { "epoch": 0.43176825919824274, "grad_norm": 0.3487963080406189, "learning_rate": 1.7808379990012657e-05, "loss": 0.4889, "step": 15725 }, { "epoch": 0.43179571663920924, "grad_norm": 0.3573169708251953, "learning_rate": 1.7808110164424115e-05, "loss": 0.4482, "step": 15726 }, { "epoch": 0.43182317408017573, "grad_norm": 0.4218449294567108, "learning_rate": 1.7807840324270953e-05, "loss": 0.4779, "step": 15727 }, { "epoch": 0.4318506315211422, "grad_norm": 0.4103866219520569, "learning_rate": 1.7807570469553677e-05, "loss": 0.58, "step": 15728 }, { "epoch": 0.4318780889621087, "grad_norm": 1.0443987846374512, "learning_rate": 1.780730060027279e-05, "loss": 0.5899, "step": 15729 }, { "epoch": 0.4319055464030752, "grad_norm": 0.43774178624153137, "learning_rate": 1.7807030716428794e-05, "loss": 0.5366, "step": 15730 }, { "epoch": 0.43193300384404176, "grad_norm": 0.3651607036590576, "learning_rate": 1.78067608180222e-05, "loss": 0.56, "step": 15731 }, { "epoch": 0.43196046128500826, "grad_norm": 0.37602555751800537, "learning_rate": 1.78064909050535e-05, "loss": 0.5628, "step": 15732 }, { "epoch": 0.43198791872597475, "grad_norm": 0.3664279282093048, "learning_rate": 1.7806220977523205e-05, "loss": 0.5677, "step": 15733 }, { "epoch": 0.43201537616694125, "grad_norm": 0.36824747920036316, "learning_rate": 1.7805951035431813e-05, "loss": 0.4528, "step": 15734 }, { "epoch": 0.43204283360790774, "grad_norm": 0.5187387466430664, "learning_rate": 1.7805681078779833e-05, "loss": 0.4766, "step": 15735 }, { "epoch": 0.43207029104887423, "grad_norm": 0.348093718290329, "learning_rate": 1.7805411107567767e-05, "loss": 0.5126, "step": 15736 }, { "epoch": 0.4320977484898407, "grad_norm": 0.3847460448741913, "learning_rate": 1.780514112179612e-05, "loss": 0.5325, "step": 15737 }, { "epoch": 0.4321252059308072, "grad_norm": 0.40005144476890564, "learning_rate": 1.7804871121465392e-05, "loss": 0.4937, "step": 15738 }, { "epoch": 0.43215266337177377, "grad_norm": 0.37596601247787476, "learning_rate": 1.7804601106576086e-05, "loss": 0.4866, "step": 15739 }, { "epoch": 0.43218012081274026, "grad_norm": 0.33173397183418274, "learning_rate": 1.7804331077128712e-05, "loss": 0.4568, "step": 15740 }, { "epoch": 0.43220757825370676, "grad_norm": 0.376644492149353, "learning_rate": 1.7804061033123767e-05, "loss": 0.5061, "step": 15741 }, { "epoch": 0.43223503569467325, "grad_norm": 0.3960675597190857, "learning_rate": 1.780379097456176e-05, "loss": 0.5436, "step": 15742 }, { "epoch": 0.43226249313563975, "grad_norm": 0.39394718408584595, "learning_rate": 1.780352090144319e-05, "loss": 0.5307, "step": 15743 }, { "epoch": 0.43228995057660624, "grad_norm": 0.4712206721305847, "learning_rate": 1.7803250813768563e-05, "loss": 0.5065, "step": 15744 }, { "epoch": 0.43231740801757274, "grad_norm": 0.44236811995506287, "learning_rate": 1.7802980711538383e-05, "loss": 0.6437, "step": 15745 }, { "epoch": 0.4323448654585393, "grad_norm": 0.4044110178947449, "learning_rate": 1.7802710594753155e-05, "loss": 0.5749, "step": 15746 }, { "epoch": 0.4323723228995058, "grad_norm": 0.3664276599884033, "learning_rate": 1.7802440463413376e-05, "loss": 0.4981, "step": 15747 }, { "epoch": 0.4323997803404723, "grad_norm": 0.4061742424964905, "learning_rate": 1.7802170317519562e-05, "loss": 0.5459, "step": 15748 }, { "epoch": 0.43242723778143877, "grad_norm": 0.4023115038871765, "learning_rate": 1.7801900157072207e-05, "loss": 0.5721, "step": 15749 }, { "epoch": 0.43245469522240526, "grad_norm": 0.4223284423351288, "learning_rate": 1.7801629982071817e-05, "loss": 0.4946, "step": 15750 }, { "epoch": 0.43248215266337175, "grad_norm": 0.40260612964630127, "learning_rate": 1.78013597925189e-05, "loss": 0.5203, "step": 15751 }, { "epoch": 0.43250961010433825, "grad_norm": 0.38852378726005554, "learning_rate": 1.7801089588413956e-05, "loss": 0.5835, "step": 15752 }, { "epoch": 0.4325370675453048, "grad_norm": 0.3788832128047943, "learning_rate": 1.780081936975749e-05, "loss": 0.5258, "step": 15753 }, { "epoch": 0.4325645249862713, "grad_norm": 0.39573419094085693, "learning_rate": 1.7800549136550006e-05, "loss": 0.4873, "step": 15754 }, { "epoch": 0.4325919824272378, "grad_norm": 0.35904091596603394, "learning_rate": 1.780027888879201e-05, "loss": 0.5215, "step": 15755 }, { "epoch": 0.4326194398682043, "grad_norm": 0.40415456891059875, "learning_rate": 1.7800008626484004e-05, "loss": 0.5075, "step": 15756 }, { "epoch": 0.4326468973091708, "grad_norm": 0.4173988699913025, "learning_rate": 1.779973834962649e-05, "loss": 0.6431, "step": 15757 }, { "epoch": 0.43267435475013727, "grad_norm": 0.4011816084384918, "learning_rate": 1.7799468058219973e-05, "loss": 0.4507, "step": 15758 }, { "epoch": 0.43270181219110376, "grad_norm": 0.36061280965805054, "learning_rate": 1.7799197752264962e-05, "loss": 0.4138, "step": 15759 }, { "epoch": 0.4327292696320703, "grad_norm": 0.4089283347129822, "learning_rate": 1.7798927431761957e-05, "loss": 0.4733, "step": 15760 }, { "epoch": 0.4327567270730368, "grad_norm": 0.3595152795314789, "learning_rate": 1.7798657096711466e-05, "loss": 0.5674, "step": 15761 }, { "epoch": 0.4327841845140033, "grad_norm": 0.3656414747238159, "learning_rate": 1.7798386747113987e-05, "loss": 0.5041, "step": 15762 }, { "epoch": 0.4328116419549698, "grad_norm": 0.3366455137729645, "learning_rate": 1.7798116382970028e-05, "loss": 0.4678, "step": 15763 }, { "epoch": 0.4328390993959363, "grad_norm": 0.37512555718421936, "learning_rate": 1.779784600428009e-05, "loss": 0.5195, "step": 15764 }, { "epoch": 0.4328665568369028, "grad_norm": 0.3332734704017639, "learning_rate": 1.7797575611044688e-05, "loss": 0.5018, "step": 15765 }, { "epoch": 0.4328940142778693, "grad_norm": 0.31853818893432617, "learning_rate": 1.7797305203264316e-05, "loss": 0.4782, "step": 15766 }, { "epoch": 0.4329214717188358, "grad_norm": 0.4094507098197937, "learning_rate": 1.7797034780939477e-05, "loss": 0.5251, "step": 15767 }, { "epoch": 0.4329489291598023, "grad_norm": 0.37258896231651306, "learning_rate": 1.7796764344070686e-05, "loss": 0.4758, "step": 15768 }, { "epoch": 0.4329763866007688, "grad_norm": 0.3203381299972534, "learning_rate": 1.7796493892658437e-05, "loss": 0.4703, "step": 15769 }, { "epoch": 0.4330038440417353, "grad_norm": 0.38322409987449646, "learning_rate": 1.779622342670324e-05, "loss": 0.4961, "step": 15770 }, { "epoch": 0.4330313014827018, "grad_norm": 0.36165711283683777, "learning_rate": 1.7795952946205597e-05, "loss": 0.486, "step": 15771 }, { "epoch": 0.4330587589236683, "grad_norm": 0.41877514123916626, "learning_rate": 1.779568245116601e-05, "loss": 0.4868, "step": 15772 }, { "epoch": 0.4330862163646348, "grad_norm": 0.3353801965713501, "learning_rate": 1.7795411941584995e-05, "loss": 0.4253, "step": 15773 }, { "epoch": 0.43311367380560134, "grad_norm": 0.3681931495666504, "learning_rate": 1.7795141417463045e-05, "loss": 0.4948, "step": 15774 }, { "epoch": 0.43314113124656783, "grad_norm": 0.38289928436279297, "learning_rate": 1.7794870878800666e-05, "loss": 0.5024, "step": 15775 }, { "epoch": 0.43316858868753433, "grad_norm": 0.3968794047832489, "learning_rate": 1.779460032559837e-05, "loss": 0.4581, "step": 15776 }, { "epoch": 0.4331960461285008, "grad_norm": 0.360524445772171, "learning_rate": 1.779432975785665e-05, "loss": 0.4894, "step": 15777 }, { "epoch": 0.4332235035694673, "grad_norm": 0.4136369824409485, "learning_rate": 1.7794059175576022e-05, "loss": 0.5057, "step": 15778 }, { "epoch": 0.4332509610104338, "grad_norm": 0.34836655855178833, "learning_rate": 1.779378857875699e-05, "loss": 0.5076, "step": 15779 }, { "epoch": 0.4332784184514003, "grad_norm": 0.537964940071106, "learning_rate": 1.779351796740005e-05, "loss": 0.4526, "step": 15780 }, { "epoch": 0.43330587589236685, "grad_norm": 0.4050541818141937, "learning_rate": 1.779324734150571e-05, "loss": 0.5703, "step": 15781 }, { "epoch": 0.43333333333333335, "grad_norm": 0.41144153475761414, "learning_rate": 1.7792976701074478e-05, "loss": 0.6084, "step": 15782 }, { "epoch": 0.43336079077429984, "grad_norm": 0.3459450900554657, "learning_rate": 1.7792706046106856e-05, "loss": 0.4806, "step": 15783 }, { "epoch": 0.43338824821526634, "grad_norm": 0.4050538241863251, "learning_rate": 1.7792435376603352e-05, "loss": 0.4685, "step": 15784 }, { "epoch": 0.43341570565623283, "grad_norm": 0.3708988428115845, "learning_rate": 1.779216469256447e-05, "loss": 0.5081, "step": 15785 }, { "epoch": 0.4334431630971993, "grad_norm": 0.38712260127067566, "learning_rate": 1.779189399399071e-05, "loss": 0.5654, "step": 15786 }, { "epoch": 0.4334706205381658, "grad_norm": 0.37014782428741455, "learning_rate": 1.7791623280882584e-05, "loss": 0.4849, "step": 15787 }, { "epoch": 0.43349807797913237, "grad_norm": 0.34224873781204224, "learning_rate": 1.7791352553240592e-05, "loss": 0.5033, "step": 15788 }, { "epoch": 0.43352553542009886, "grad_norm": 0.36535584926605225, "learning_rate": 1.7791081811065243e-05, "loss": 0.5386, "step": 15789 }, { "epoch": 0.43355299286106536, "grad_norm": 0.36339256167411804, "learning_rate": 1.7790811054357038e-05, "loss": 0.5052, "step": 15790 }, { "epoch": 0.43358045030203185, "grad_norm": 0.366231232881546, "learning_rate": 1.7790540283116485e-05, "loss": 0.437, "step": 15791 }, { "epoch": 0.43360790774299834, "grad_norm": 0.3615324795246124, "learning_rate": 1.7790269497344085e-05, "loss": 0.494, "step": 15792 }, { "epoch": 0.43363536518396484, "grad_norm": 0.43290185928344727, "learning_rate": 1.7789998697040348e-05, "loss": 0.5271, "step": 15793 }, { "epoch": 0.43366282262493133, "grad_norm": 0.39158308506011963, "learning_rate": 1.7789727882205778e-05, "loss": 0.5931, "step": 15794 }, { "epoch": 0.4336902800658979, "grad_norm": 0.3939250111579895, "learning_rate": 1.778945705284088e-05, "loss": 0.5306, "step": 15795 }, { "epoch": 0.4337177375068644, "grad_norm": 0.3552763760089874, "learning_rate": 1.7789186208946156e-05, "loss": 0.5332, "step": 15796 }, { "epoch": 0.43374519494783087, "grad_norm": 0.42389288544654846, "learning_rate": 1.7788915350522115e-05, "loss": 0.5166, "step": 15797 }, { "epoch": 0.43377265238879736, "grad_norm": 0.34257790446281433, "learning_rate": 1.7788644477569263e-05, "loss": 0.532, "step": 15798 }, { "epoch": 0.43380010982976386, "grad_norm": 0.4072161018848419, "learning_rate": 1.7788373590088098e-05, "loss": 0.58, "step": 15799 }, { "epoch": 0.43382756727073035, "grad_norm": 0.3685208261013031, "learning_rate": 1.7788102688079133e-05, "loss": 0.5027, "step": 15800 }, { "epoch": 0.43385502471169685, "grad_norm": 0.3383324444293976, "learning_rate": 1.778783177154287e-05, "loss": 0.5142, "step": 15801 }, { "epoch": 0.4338824821526634, "grad_norm": 0.3743751347064972, "learning_rate": 1.7787560840479822e-05, "loss": 0.5313, "step": 15802 }, { "epoch": 0.4339099395936299, "grad_norm": 0.3608703911304474, "learning_rate": 1.778728989489048e-05, "loss": 0.5412, "step": 15803 }, { "epoch": 0.4339373970345964, "grad_norm": 0.4003990590572357, "learning_rate": 1.778701893477536e-05, "loss": 0.4596, "step": 15804 }, { "epoch": 0.4339648544755629, "grad_norm": 0.34891417622566223, "learning_rate": 1.7786747960134962e-05, "loss": 0.5037, "step": 15805 }, { "epoch": 0.43399231191652937, "grad_norm": 0.3308875858783722, "learning_rate": 1.7786476970969796e-05, "loss": 0.5438, "step": 15806 }, { "epoch": 0.43401976935749587, "grad_norm": 0.4000769555568695, "learning_rate": 1.7786205967280363e-05, "loss": 0.5221, "step": 15807 }, { "epoch": 0.43404722679846236, "grad_norm": 0.3394167721271515, "learning_rate": 1.7785934949067175e-05, "loss": 0.4068, "step": 15808 }, { "epoch": 0.4340746842394289, "grad_norm": 0.42152225971221924, "learning_rate": 1.778566391633073e-05, "loss": 0.483, "step": 15809 }, { "epoch": 0.4341021416803954, "grad_norm": 0.3226282000541687, "learning_rate": 1.7785392869071537e-05, "loss": 0.4935, "step": 15810 }, { "epoch": 0.4341295991213619, "grad_norm": 0.41348370909690857, "learning_rate": 1.77851218072901e-05, "loss": 0.656, "step": 15811 }, { "epoch": 0.4341570565623284, "grad_norm": 0.4105367660522461, "learning_rate": 1.778485073098693e-05, "loss": 0.499, "step": 15812 }, { "epoch": 0.4341845140032949, "grad_norm": 0.35402336716651917, "learning_rate": 1.7784579640162526e-05, "loss": 0.4048, "step": 15813 }, { "epoch": 0.4342119714442614, "grad_norm": 0.3282882571220398, "learning_rate": 1.7784308534817396e-05, "loss": 0.5468, "step": 15814 }, { "epoch": 0.4342394288852279, "grad_norm": 0.37348800897598267, "learning_rate": 1.7784037414952048e-05, "loss": 0.5173, "step": 15815 }, { "epoch": 0.4342668863261944, "grad_norm": 0.3790699541568756, "learning_rate": 1.7783766280566986e-05, "loss": 0.5527, "step": 15816 }, { "epoch": 0.4342943437671609, "grad_norm": 0.4281144142150879, "learning_rate": 1.7783495131662713e-05, "loss": 0.6128, "step": 15817 }, { "epoch": 0.4343218012081274, "grad_norm": 0.3457939624786377, "learning_rate": 1.778322396823974e-05, "loss": 0.497, "step": 15818 }, { "epoch": 0.4343492586490939, "grad_norm": 0.3558986783027649, "learning_rate": 1.778295279029857e-05, "loss": 0.5087, "step": 15819 }, { "epoch": 0.4343767160900604, "grad_norm": 0.3898905813694, "learning_rate": 1.778268159783971e-05, "loss": 0.5652, "step": 15820 }, { "epoch": 0.4344041735310269, "grad_norm": 0.4207228422164917, "learning_rate": 1.7782410390863664e-05, "loss": 0.542, "step": 15821 }, { "epoch": 0.4344316309719934, "grad_norm": 0.3639105558395386, "learning_rate": 1.7782139169370937e-05, "loss": 0.5587, "step": 15822 }, { "epoch": 0.43445908841295994, "grad_norm": 0.3443910777568817, "learning_rate": 1.7781867933362037e-05, "loss": 0.4582, "step": 15823 }, { "epoch": 0.43448654585392643, "grad_norm": 0.3996908366680145, "learning_rate": 1.778159668283747e-05, "loss": 0.5949, "step": 15824 }, { "epoch": 0.4345140032948929, "grad_norm": 0.3983134329319, "learning_rate": 1.778132541779774e-05, "loss": 0.4831, "step": 15825 }, { "epoch": 0.4345414607358594, "grad_norm": 0.5080505013465881, "learning_rate": 1.778105413824336e-05, "loss": 0.5284, "step": 15826 }, { "epoch": 0.4345689181768259, "grad_norm": 0.3386547267436981, "learning_rate": 1.7780782844174823e-05, "loss": 0.4783, "step": 15827 }, { "epoch": 0.4345963756177924, "grad_norm": 0.3632126748561859, "learning_rate": 1.7780511535592648e-05, "loss": 0.5375, "step": 15828 }, { "epoch": 0.4346238330587589, "grad_norm": 0.42568525671958923, "learning_rate": 1.7780240212497336e-05, "loss": 0.4829, "step": 15829 }, { "epoch": 0.43465129049972545, "grad_norm": 0.3632119297981262, "learning_rate": 1.777996887488939e-05, "loss": 0.4436, "step": 15830 }, { "epoch": 0.43467874794069195, "grad_norm": 0.3579495847225189, "learning_rate": 1.777969752276932e-05, "loss": 0.5076, "step": 15831 }, { "epoch": 0.43470620538165844, "grad_norm": 0.4616347849369049, "learning_rate": 1.777942615613763e-05, "loss": 0.4859, "step": 15832 }, { "epoch": 0.43473366282262493, "grad_norm": 0.3953580856323242, "learning_rate": 1.777915477499483e-05, "loss": 0.4798, "step": 15833 }, { "epoch": 0.4347611202635914, "grad_norm": 0.4218102693557739, "learning_rate": 1.7778883379341422e-05, "loss": 0.5308, "step": 15834 }, { "epoch": 0.4347885777045579, "grad_norm": 0.43501290678977966, "learning_rate": 1.7778611969177913e-05, "loss": 0.5512, "step": 15835 }, { "epoch": 0.4348160351455244, "grad_norm": 0.35984867811203003, "learning_rate": 1.777834054450481e-05, "loss": 0.5118, "step": 15836 }, { "epoch": 0.43484349258649097, "grad_norm": 0.3327392339706421, "learning_rate": 1.777806910532262e-05, "loss": 0.507, "step": 15837 }, { "epoch": 0.43487095002745746, "grad_norm": 0.36942344903945923, "learning_rate": 1.777779765163185e-05, "loss": 0.5255, "step": 15838 }, { "epoch": 0.43489840746842395, "grad_norm": 0.3470667600631714, "learning_rate": 1.7777526183433006e-05, "loss": 0.534, "step": 15839 }, { "epoch": 0.43492586490939045, "grad_norm": 0.3204260468482971, "learning_rate": 1.777725470072659e-05, "loss": 0.3954, "step": 15840 }, { "epoch": 0.43495332235035694, "grad_norm": 0.382302850484848, "learning_rate": 1.7776983203513113e-05, "loss": 0.5311, "step": 15841 }, { "epoch": 0.43498077979132344, "grad_norm": 0.37130507826805115, "learning_rate": 1.7776711691793084e-05, "loss": 0.594, "step": 15842 }, { "epoch": 0.43500823723228993, "grad_norm": 0.38654929399490356, "learning_rate": 1.7776440165567e-05, "loss": 0.5264, "step": 15843 }, { "epoch": 0.4350356946732565, "grad_norm": 0.3921875059604645, "learning_rate": 1.7776168624835375e-05, "loss": 0.454, "step": 15844 }, { "epoch": 0.435063152114223, "grad_norm": 0.35898104310035706, "learning_rate": 1.7775897069598714e-05, "loss": 0.5018, "step": 15845 }, { "epoch": 0.43509060955518947, "grad_norm": 0.39647793769836426, "learning_rate": 1.7775625499857522e-05, "loss": 0.5282, "step": 15846 }, { "epoch": 0.43511806699615596, "grad_norm": 0.41702693700790405, "learning_rate": 1.7775353915612306e-05, "loss": 0.6002, "step": 15847 }, { "epoch": 0.43514552443712246, "grad_norm": 0.3622141182422638, "learning_rate": 1.777508231686358e-05, "loss": 0.5222, "step": 15848 }, { "epoch": 0.43517298187808895, "grad_norm": 0.3850437104701996, "learning_rate": 1.7774810703611836e-05, "loss": 0.5773, "step": 15849 }, { "epoch": 0.43520043931905544, "grad_norm": 0.34381839632987976, "learning_rate": 1.777453907585759e-05, "loss": 0.478, "step": 15850 }, { "epoch": 0.435227896760022, "grad_norm": 0.3511781394481659, "learning_rate": 1.777426743360135e-05, "loss": 0.5042, "step": 15851 }, { "epoch": 0.4352553542009885, "grad_norm": 0.3984534740447998, "learning_rate": 1.777399577684362e-05, "loss": 0.5013, "step": 15852 }, { "epoch": 0.435282811641955, "grad_norm": 0.4096635580062866, "learning_rate": 1.7773724105584908e-05, "loss": 0.5068, "step": 15853 }, { "epoch": 0.4353102690829215, "grad_norm": 0.4613991677761078, "learning_rate": 1.7773452419825714e-05, "loss": 0.5379, "step": 15854 }, { "epoch": 0.43533772652388797, "grad_norm": 0.4078718423843384, "learning_rate": 1.7773180719566554e-05, "loss": 0.5974, "step": 15855 }, { "epoch": 0.43536518396485446, "grad_norm": 0.3718605041503906, "learning_rate": 1.777290900480793e-05, "loss": 0.4965, "step": 15856 }, { "epoch": 0.43539264140582096, "grad_norm": 0.3643260896205902, "learning_rate": 1.777263727555035e-05, "loss": 0.5187, "step": 15857 }, { "epoch": 0.4354200988467875, "grad_norm": 0.36632901430130005, "learning_rate": 1.777236553179432e-05, "loss": 0.5217, "step": 15858 }, { "epoch": 0.435447556287754, "grad_norm": 0.3820452094078064, "learning_rate": 1.777209377354035e-05, "loss": 0.4462, "step": 15859 }, { "epoch": 0.4354750137287205, "grad_norm": 0.42883503437042236, "learning_rate": 1.7771822000788942e-05, "loss": 0.4884, "step": 15860 }, { "epoch": 0.435502471169687, "grad_norm": 0.42689090967178345, "learning_rate": 1.777155021354061e-05, "loss": 0.4985, "step": 15861 }, { "epoch": 0.4355299286106535, "grad_norm": 0.34809309244155884, "learning_rate": 1.777127841179585e-05, "loss": 0.4945, "step": 15862 }, { "epoch": 0.43555738605162, "grad_norm": 0.3639553189277649, "learning_rate": 1.7771006595555177e-05, "loss": 0.4359, "step": 15863 }, { "epoch": 0.43558484349258647, "grad_norm": 0.3971739113330841, "learning_rate": 1.77707347648191e-05, "loss": 0.6052, "step": 15864 }, { "epoch": 0.435612300933553, "grad_norm": 0.39819633960723877, "learning_rate": 1.777046291958812e-05, "loss": 0.5129, "step": 15865 }, { "epoch": 0.4356397583745195, "grad_norm": 0.3583029508590698, "learning_rate": 1.7770191059862746e-05, "loss": 0.4252, "step": 15866 }, { "epoch": 0.435667215815486, "grad_norm": 0.32921040058135986, "learning_rate": 1.776991918564349e-05, "loss": 0.4538, "step": 15867 }, { "epoch": 0.4356946732564525, "grad_norm": 0.4123268723487854, "learning_rate": 1.776964729693085e-05, "loss": 0.5082, "step": 15868 }, { "epoch": 0.435722130697419, "grad_norm": 0.37465164065361023, "learning_rate": 1.776937539372534e-05, "loss": 0.5876, "step": 15869 }, { "epoch": 0.4357495881383855, "grad_norm": 0.3566734194755554, "learning_rate": 1.7769103476027465e-05, "loss": 0.487, "step": 15870 }, { "epoch": 0.435777045579352, "grad_norm": 0.35565823316574097, "learning_rate": 1.7768831543837734e-05, "loss": 0.5222, "step": 15871 }, { "epoch": 0.4358045030203185, "grad_norm": 0.3792523145675659, "learning_rate": 1.7768559597156648e-05, "loss": 0.4595, "step": 15872 }, { "epoch": 0.43583196046128503, "grad_norm": 0.33798009157180786, "learning_rate": 1.7768287635984722e-05, "loss": 0.5186, "step": 15873 }, { "epoch": 0.4358594179022515, "grad_norm": 0.38798460364341736, "learning_rate": 1.776801566032246e-05, "loss": 0.5179, "step": 15874 }, { "epoch": 0.435886875343218, "grad_norm": 0.4192604124546051, "learning_rate": 1.776774367017037e-05, "loss": 0.5442, "step": 15875 }, { "epoch": 0.4359143327841845, "grad_norm": 0.4031027853488922, "learning_rate": 1.7767471665528958e-05, "loss": 0.4791, "step": 15876 }, { "epoch": 0.435941790225151, "grad_norm": 0.3764681816101074, "learning_rate": 1.7767199646398735e-05, "loss": 0.5617, "step": 15877 }, { "epoch": 0.4359692476661175, "grad_norm": 0.5068613886833191, "learning_rate": 1.7766927612780204e-05, "loss": 0.5039, "step": 15878 }, { "epoch": 0.435996705107084, "grad_norm": 0.34902673959732056, "learning_rate": 1.7766655564673875e-05, "loss": 0.5218, "step": 15879 }, { "epoch": 0.43602416254805054, "grad_norm": 0.3677148222923279, "learning_rate": 1.776638350208025e-05, "loss": 0.533, "step": 15880 }, { "epoch": 0.43605161998901704, "grad_norm": 0.3835904598236084, "learning_rate": 1.7766111424999844e-05, "loss": 0.4955, "step": 15881 }, { "epoch": 0.43607907742998353, "grad_norm": 0.3737671673297882, "learning_rate": 1.7765839333433163e-05, "loss": 0.4889, "step": 15882 }, { "epoch": 0.43610653487095, "grad_norm": 0.379873663187027, "learning_rate": 1.776556722738071e-05, "loss": 0.5123, "step": 15883 }, { "epoch": 0.4361339923119165, "grad_norm": 0.3733636736869812, "learning_rate": 1.7765295106843e-05, "loss": 0.6575, "step": 15884 }, { "epoch": 0.436161449752883, "grad_norm": 0.3931286633014679, "learning_rate": 1.776502297182053e-05, "loss": 0.4399, "step": 15885 }, { "epoch": 0.4361889071938495, "grad_norm": 0.3562195897102356, "learning_rate": 1.776475082231382e-05, "loss": 0.5434, "step": 15886 }, { "epoch": 0.43621636463481606, "grad_norm": 0.37582406401634216, "learning_rate": 1.7764478658323367e-05, "loss": 0.48, "step": 15887 }, { "epoch": 0.43624382207578255, "grad_norm": 0.38222822546958923, "learning_rate": 1.776420647984969e-05, "loss": 0.448, "step": 15888 }, { "epoch": 0.43627127951674904, "grad_norm": 0.3018167018890381, "learning_rate": 1.7763934286893282e-05, "loss": 0.4426, "step": 15889 }, { "epoch": 0.43629873695771554, "grad_norm": 0.41410377621650696, "learning_rate": 1.7763662079454662e-05, "loss": 0.5309, "step": 15890 }, { "epoch": 0.43632619439868203, "grad_norm": 0.39254194498062134, "learning_rate": 1.7763389857534333e-05, "loss": 0.632, "step": 15891 }, { "epoch": 0.4363536518396485, "grad_norm": 0.35128727555274963, "learning_rate": 1.7763117621132803e-05, "loss": 0.6042, "step": 15892 }, { "epoch": 0.436381109280615, "grad_norm": 0.3649732172489166, "learning_rate": 1.7762845370250585e-05, "loss": 0.5337, "step": 15893 }, { "epoch": 0.43640856672158157, "grad_norm": 0.3686985373497009, "learning_rate": 1.7762573104888182e-05, "loss": 0.5088, "step": 15894 }, { "epoch": 0.43643602416254806, "grad_norm": 0.3927343487739563, "learning_rate": 1.77623008250461e-05, "loss": 0.5556, "step": 15895 }, { "epoch": 0.43646348160351456, "grad_norm": 0.4350680410861969, "learning_rate": 1.776202853072485e-05, "loss": 0.5117, "step": 15896 }, { "epoch": 0.43649093904448105, "grad_norm": 0.44280120730400085, "learning_rate": 1.7761756221924942e-05, "loss": 0.5189, "step": 15897 }, { "epoch": 0.43651839648544755, "grad_norm": 0.3498659133911133, "learning_rate": 1.776148389864688e-05, "loss": 0.4527, "step": 15898 }, { "epoch": 0.43654585392641404, "grad_norm": 0.4022500514984131, "learning_rate": 1.7761211560891173e-05, "loss": 0.5029, "step": 15899 }, { "epoch": 0.43657331136738053, "grad_norm": 0.35254722833633423, "learning_rate": 1.776093920865833e-05, "loss": 0.5989, "step": 15900 }, { "epoch": 0.4366007688083471, "grad_norm": 0.3519185483455658, "learning_rate": 1.7760666841948857e-05, "loss": 0.5858, "step": 15901 }, { "epoch": 0.4366282262493136, "grad_norm": 0.37374967336654663, "learning_rate": 1.7760394460763264e-05, "loss": 0.4929, "step": 15902 }, { "epoch": 0.43665568369028007, "grad_norm": 0.37551864981651306, "learning_rate": 1.7760122065102058e-05, "loss": 0.5283, "step": 15903 }, { "epoch": 0.43668314113124657, "grad_norm": 0.341462641954422, "learning_rate": 1.775984965496575e-05, "loss": 0.4672, "step": 15904 }, { "epoch": 0.43671059857221306, "grad_norm": 0.4056234359741211, "learning_rate": 1.7759577230354844e-05, "loss": 0.5502, "step": 15905 }, { "epoch": 0.43673805601317955, "grad_norm": 0.36891016364097595, "learning_rate": 1.775930479126985e-05, "loss": 0.4921, "step": 15906 }, { "epoch": 0.43676551345414605, "grad_norm": 0.36526918411254883, "learning_rate": 1.7759032337711275e-05, "loss": 0.519, "step": 15907 }, { "epoch": 0.4367929708951126, "grad_norm": 0.31434789299964905, "learning_rate": 1.7758759869679628e-05, "loss": 0.4556, "step": 15908 }, { "epoch": 0.4368204283360791, "grad_norm": 0.40769606828689575, "learning_rate": 1.775848738717542e-05, "loss": 0.5344, "step": 15909 }, { "epoch": 0.4368478857770456, "grad_norm": 0.3707984685897827, "learning_rate": 1.7758214890199156e-05, "loss": 0.5645, "step": 15910 }, { "epoch": 0.4368753432180121, "grad_norm": 0.3611975908279419, "learning_rate": 1.7757942378751345e-05, "loss": 0.5132, "step": 15911 }, { "epoch": 0.4369028006589786, "grad_norm": 0.3599609136581421, "learning_rate": 1.7757669852832493e-05, "loss": 0.5169, "step": 15912 }, { "epoch": 0.43693025809994507, "grad_norm": 0.33238449692726135, "learning_rate": 1.7757397312443118e-05, "loss": 0.4484, "step": 15913 }, { "epoch": 0.43695771554091156, "grad_norm": 0.37518590688705444, "learning_rate": 1.7757124757583712e-05, "loss": 0.5403, "step": 15914 }, { "epoch": 0.4369851729818781, "grad_norm": 0.5720561146736145, "learning_rate": 1.77568521882548e-05, "loss": 0.4525, "step": 15915 }, { "epoch": 0.4370126304228446, "grad_norm": 0.3296835124492645, "learning_rate": 1.7756579604456878e-05, "loss": 0.5188, "step": 15916 }, { "epoch": 0.4370400878638111, "grad_norm": 0.3669154644012451, "learning_rate": 1.775630700619046e-05, "loss": 0.5832, "step": 15917 }, { "epoch": 0.4370675453047776, "grad_norm": 0.4243949055671692, "learning_rate": 1.7756034393456057e-05, "loss": 0.5369, "step": 15918 }, { "epoch": 0.4370950027457441, "grad_norm": 0.3647055923938751, "learning_rate": 1.775576176625417e-05, "loss": 0.4694, "step": 15919 }, { "epoch": 0.4371224601867106, "grad_norm": 0.3840551972389221, "learning_rate": 1.7755489124585315e-05, "loss": 0.6116, "step": 15920 }, { "epoch": 0.4371499176276771, "grad_norm": 0.4129382371902466, "learning_rate": 1.7755216468449995e-05, "loss": 0.5035, "step": 15921 }, { "epoch": 0.4371773750686436, "grad_norm": 0.40011605620384216, "learning_rate": 1.7754943797848725e-05, "loss": 0.5375, "step": 15922 }, { "epoch": 0.4372048325096101, "grad_norm": 0.408597856760025, "learning_rate": 1.775467111278201e-05, "loss": 0.5139, "step": 15923 }, { "epoch": 0.4372322899505766, "grad_norm": 0.40908995270729065, "learning_rate": 1.7754398413250355e-05, "loss": 0.5248, "step": 15924 }, { "epoch": 0.4372597473915431, "grad_norm": 0.362725168466568, "learning_rate": 1.7754125699254272e-05, "loss": 0.4925, "step": 15925 }, { "epoch": 0.4372872048325096, "grad_norm": 0.3752734363079071, "learning_rate": 1.775385297079427e-05, "loss": 0.4853, "step": 15926 }, { "epoch": 0.4373146622734761, "grad_norm": 0.3731890916824341, "learning_rate": 1.7753580227870858e-05, "loss": 0.4801, "step": 15927 }, { "epoch": 0.4373421197144426, "grad_norm": 0.379780650138855, "learning_rate": 1.7753307470484543e-05, "loss": 0.5181, "step": 15928 }, { "epoch": 0.43736957715540914, "grad_norm": 0.3538089096546173, "learning_rate": 1.775303469863584e-05, "loss": 0.418, "step": 15929 }, { "epoch": 0.43739703459637563, "grad_norm": 0.35898011922836304, "learning_rate": 1.7752761912325246e-05, "loss": 0.5556, "step": 15930 }, { "epoch": 0.4374244920373421, "grad_norm": 0.4202660322189331, "learning_rate": 1.775248911155328e-05, "loss": 0.6093, "step": 15931 }, { "epoch": 0.4374519494783086, "grad_norm": 0.3578537702560425, "learning_rate": 1.775221629632045e-05, "loss": 0.4687, "step": 15932 }, { "epoch": 0.4374794069192751, "grad_norm": 0.34583160281181335, "learning_rate": 1.775194346662726e-05, "loss": 0.4549, "step": 15933 }, { "epoch": 0.4375068643602416, "grad_norm": 0.34837228059768677, "learning_rate": 1.775167062247422e-05, "loss": 0.4639, "step": 15934 }, { "epoch": 0.4375343218012081, "grad_norm": 0.37006428837776184, "learning_rate": 1.7751397763861843e-05, "loss": 0.571, "step": 15935 }, { "epoch": 0.43756177924217465, "grad_norm": 0.36324918270111084, "learning_rate": 1.775112489079063e-05, "loss": 0.5134, "step": 15936 }, { "epoch": 0.43758923668314115, "grad_norm": 0.3556821942329407, "learning_rate": 1.7750852003261102e-05, "loss": 0.4784, "step": 15937 }, { "epoch": 0.43761669412410764, "grad_norm": 0.7487929463386536, "learning_rate": 1.7750579101273758e-05, "loss": 0.5224, "step": 15938 }, { "epoch": 0.43764415156507414, "grad_norm": 0.36051034927368164, "learning_rate": 1.775030618482911e-05, "loss": 0.509, "step": 15939 }, { "epoch": 0.43767160900604063, "grad_norm": 0.3660929501056671, "learning_rate": 1.7750033253927665e-05, "loss": 0.5167, "step": 15940 }, { "epoch": 0.4376990664470071, "grad_norm": 0.3967372179031372, "learning_rate": 1.774976030856994e-05, "loss": 0.5214, "step": 15941 }, { "epoch": 0.4377265238879736, "grad_norm": 0.35566696524620056, "learning_rate": 1.7749487348756432e-05, "loss": 0.4693, "step": 15942 }, { "epoch": 0.43775398132894017, "grad_norm": 0.34724199771881104, "learning_rate": 1.7749214374487663e-05, "loss": 0.4744, "step": 15943 }, { "epoch": 0.43778143876990666, "grad_norm": 0.41620585322380066, "learning_rate": 1.7748941385764135e-05, "loss": 0.5457, "step": 15944 }, { "epoch": 0.43780889621087316, "grad_norm": 0.34973692893981934, "learning_rate": 1.7748668382586355e-05, "loss": 0.5638, "step": 15945 }, { "epoch": 0.43783635365183965, "grad_norm": 0.35186755657196045, "learning_rate": 1.7748395364954836e-05, "loss": 0.4097, "step": 15946 }, { "epoch": 0.43786381109280614, "grad_norm": 0.381716787815094, "learning_rate": 1.7748122332870088e-05, "loss": 0.5121, "step": 15947 }, { "epoch": 0.43789126853377264, "grad_norm": 0.3242526352405548, "learning_rate": 1.774784928633262e-05, "loss": 0.4819, "step": 15948 }, { "epoch": 0.43791872597473913, "grad_norm": 0.3681618869304657, "learning_rate": 1.7747576225342938e-05, "loss": 0.4791, "step": 15949 }, { "epoch": 0.4379461834157057, "grad_norm": 0.35470667481422424, "learning_rate": 1.7747303149901552e-05, "loss": 0.4978, "step": 15950 }, { "epoch": 0.4379736408566722, "grad_norm": 0.3981340825557709, "learning_rate": 1.7747030060008978e-05, "loss": 0.5466, "step": 15951 }, { "epoch": 0.43800109829763867, "grad_norm": 0.5080363750457764, "learning_rate": 1.7746756955665716e-05, "loss": 0.6029, "step": 15952 }, { "epoch": 0.43802855573860516, "grad_norm": 0.40614375472068787, "learning_rate": 1.7746483836872283e-05, "loss": 0.6181, "step": 15953 }, { "epoch": 0.43805601317957166, "grad_norm": 0.32994258403778076, "learning_rate": 1.7746210703629182e-05, "loss": 0.4073, "step": 15954 }, { "epoch": 0.43808347062053815, "grad_norm": 0.36442553997039795, "learning_rate": 1.7745937555936926e-05, "loss": 0.4854, "step": 15955 }, { "epoch": 0.43811092806150465, "grad_norm": 0.375436395406723, "learning_rate": 1.7745664393796026e-05, "loss": 0.4908, "step": 15956 }, { "epoch": 0.4381383855024712, "grad_norm": 0.3867364227771759, "learning_rate": 1.774539121720699e-05, "loss": 0.5117, "step": 15957 }, { "epoch": 0.4381658429434377, "grad_norm": 0.36884868144989014, "learning_rate": 1.7745118026170326e-05, "loss": 0.4815, "step": 15958 }, { "epoch": 0.4381933003844042, "grad_norm": 0.36027681827545166, "learning_rate": 1.7744844820686547e-05, "loss": 0.4714, "step": 15959 }, { "epoch": 0.4382207578253707, "grad_norm": 0.3996996581554413, "learning_rate": 1.774457160075616e-05, "loss": 0.6128, "step": 15960 }, { "epoch": 0.43824821526633717, "grad_norm": 0.35728779435157776, "learning_rate": 1.7744298366379673e-05, "loss": 0.5347, "step": 15961 }, { "epoch": 0.43827567270730367, "grad_norm": 0.363992840051651, "learning_rate": 1.77440251175576e-05, "loss": 0.5158, "step": 15962 }, { "epoch": 0.43830313014827016, "grad_norm": 0.3694213926792145, "learning_rate": 1.7743751854290448e-05, "loss": 0.5596, "step": 15963 }, { "epoch": 0.4383305875892367, "grad_norm": 0.4645237326622009, "learning_rate": 1.774347857657873e-05, "loss": 0.6765, "step": 15964 }, { "epoch": 0.4383580450302032, "grad_norm": 0.4396706223487854, "learning_rate": 1.7743205284422947e-05, "loss": 0.5916, "step": 15965 }, { "epoch": 0.4383855024711697, "grad_norm": 0.4184701144695282, "learning_rate": 1.7742931977823617e-05, "loss": 0.5544, "step": 15966 }, { "epoch": 0.4384129599121362, "grad_norm": 0.34663859009742737, "learning_rate": 1.774265865678125e-05, "loss": 0.4803, "step": 15967 }, { "epoch": 0.4384404173531027, "grad_norm": 0.4119214117527008, "learning_rate": 1.774238532129635e-05, "loss": 0.5347, "step": 15968 }, { "epoch": 0.4384678747940692, "grad_norm": 0.3629450798034668, "learning_rate": 1.7742111971369436e-05, "loss": 0.4938, "step": 15969 }, { "epoch": 0.4384953322350357, "grad_norm": 0.354852557182312, "learning_rate": 1.7741838607001006e-05, "loss": 0.4066, "step": 15970 }, { "epoch": 0.4385227896760022, "grad_norm": 0.3971484899520874, "learning_rate": 1.774156522819158e-05, "loss": 0.498, "step": 15971 }, { "epoch": 0.4385502471169687, "grad_norm": 0.405958354473114, "learning_rate": 1.774129183494166e-05, "loss": 0.5127, "step": 15972 }, { "epoch": 0.4385777045579352, "grad_norm": 0.34912818670272827, "learning_rate": 1.7741018427251765e-05, "loss": 0.5524, "step": 15973 }, { "epoch": 0.4386051619989017, "grad_norm": 0.35008862614631653, "learning_rate": 1.7740745005122395e-05, "loss": 0.4608, "step": 15974 }, { "epoch": 0.4386326194398682, "grad_norm": 0.35633668303489685, "learning_rate": 1.774047156855407e-05, "loss": 0.4098, "step": 15975 }, { "epoch": 0.4386600768808347, "grad_norm": 0.36366939544677734, "learning_rate": 1.7740198117547293e-05, "loss": 0.4346, "step": 15976 }, { "epoch": 0.4386875343218012, "grad_norm": 0.3880147337913513, "learning_rate": 1.7739924652102573e-05, "loss": 0.5519, "step": 15977 }, { "epoch": 0.43871499176276774, "grad_norm": 0.38407576084136963, "learning_rate": 1.7739651172220428e-05, "loss": 0.496, "step": 15978 }, { "epoch": 0.43874244920373423, "grad_norm": 0.3575894832611084, "learning_rate": 1.773937767790136e-05, "loss": 0.5439, "step": 15979 }, { "epoch": 0.4387699066447007, "grad_norm": 0.44234228134155273, "learning_rate": 1.7739104169145888e-05, "loss": 0.5525, "step": 15980 }, { "epoch": 0.4387973640856672, "grad_norm": 0.4053306579589844, "learning_rate": 1.773883064595451e-05, "loss": 0.5838, "step": 15981 }, { "epoch": 0.4388248215266337, "grad_norm": 0.37242254614830017, "learning_rate": 1.7738557108327744e-05, "loss": 0.4766, "step": 15982 }, { "epoch": 0.4388522789676002, "grad_norm": 0.4313550293445587, "learning_rate": 1.77382835562661e-05, "loss": 0.5848, "step": 15983 }, { "epoch": 0.4388797364085667, "grad_norm": 0.37778764963150024, "learning_rate": 1.7738009989770088e-05, "loss": 0.5486, "step": 15984 }, { "epoch": 0.43890719384953325, "grad_norm": 0.3821971118450165, "learning_rate": 1.7737736408840218e-05, "loss": 0.5471, "step": 15985 }, { "epoch": 0.43893465129049974, "grad_norm": 0.3401161730289459, "learning_rate": 1.7737462813477e-05, "loss": 0.513, "step": 15986 }, { "epoch": 0.43896210873146624, "grad_norm": 0.46170490980148315, "learning_rate": 1.7737189203680944e-05, "loss": 0.5863, "step": 15987 }, { "epoch": 0.43898956617243273, "grad_norm": 0.42784059047698975, "learning_rate": 1.7736915579452563e-05, "loss": 0.5263, "step": 15988 }, { "epoch": 0.4390170236133992, "grad_norm": 0.3759987950325012, "learning_rate": 1.773664194079236e-05, "loss": 0.4963, "step": 15989 }, { "epoch": 0.4390444810543657, "grad_norm": 0.3591609001159668, "learning_rate": 1.7736368287700854e-05, "loss": 0.5078, "step": 15990 }, { "epoch": 0.4390719384953322, "grad_norm": 0.3699112832546234, "learning_rate": 1.7736094620178552e-05, "loss": 0.5625, "step": 15991 }, { "epoch": 0.43909939593629876, "grad_norm": 0.374315083026886, "learning_rate": 1.7735820938225963e-05, "loss": 0.4263, "step": 15992 }, { "epoch": 0.43912685337726526, "grad_norm": 0.37277668714523315, "learning_rate": 1.77355472418436e-05, "loss": 0.5318, "step": 15993 }, { "epoch": 0.43915431081823175, "grad_norm": 0.34824442863464355, "learning_rate": 1.773527353103197e-05, "loss": 0.4746, "step": 15994 }, { "epoch": 0.43918176825919825, "grad_norm": 0.33013543486595154, "learning_rate": 1.773499980579159e-05, "loss": 0.4901, "step": 15995 }, { "epoch": 0.43920922570016474, "grad_norm": 0.41612404584884644, "learning_rate": 1.7734726066122966e-05, "loss": 0.5448, "step": 15996 }, { "epoch": 0.43923668314113123, "grad_norm": 0.3917672336101532, "learning_rate": 1.773445231202661e-05, "loss": 0.521, "step": 15997 }, { "epoch": 0.43926414058209773, "grad_norm": 0.39520263671875, "learning_rate": 1.7734178543503028e-05, "loss": 0.5653, "step": 15998 }, { "epoch": 0.4392915980230643, "grad_norm": 0.3708845376968384, "learning_rate": 1.7733904760552737e-05, "loss": 0.5165, "step": 15999 }, { "epoch": 0.43931905546403077, "grad_norm": 0.3643205463886261, "learning_rate": 1.7733630963176246e-05, "loss": 0.5219, "step": 16000 }, { "epoch": 0.43934651290499727, "grad_norm": 0.4000820815563202, "learning_rate": 1.7733357151374062e-05, "loss": 0.5394, "step": 16001 }, { "epoch": 0.43937397034596376, "grad_norm": 0.3798181414604187, "learning_rate": 1.7733083325146704e-05, "loss": 0.5084, "step": 16002 }, { "epoch": 0.43940142778693025, "grad_norm": 0.41463640332221985, "learning_rate": 1.7732809484494672e-05, "loss": 0.5425, "step": 16003 }, { "epoch": 0.43942888522789675, "grad_norm": 0.4158388078212738, "learning_rate": 1.7732535629418483e-05, "loss": 0.5312, "step": 16004 }, { "epoch": 0.43945634266886324, "grad_norm": 0.40611496567726135, "learning_rate": 1.773226175991865e-05, "loss": 0.5462, "step": 16005 }, { "epoch": 0.43948380010982974, "grad_norm": 0.3649519085884094, "learning_rate": 1.7731987875995677e-05, "loss": 0.5693, "step": 16006 }, { "epoch": 0.4395112575507963, "grad_norm": 0.37972137331962585, "learning_rate": 1.7731713977650084e-05, "loss": 0.506, "step": 16007 }, { "epoch": 0.4395387149917628, "grad_norm": 0.34527742862701416, "learning_rate": 1.7731440064882372e-05, "loss": 0.5338, "step": 16008 }, { "epoch": 0.4395661724327293, "grad_norm": 0.3539807200431824, "learning_rate": 1.773116613769306e-05, "loss": 0.4615, "step": 16009 }, { "epoch": 0.43959362987369577, "grad_norm": 0.621793270111084, "learning_rate": 1.7730892196082653e-05, "loss": 0.564, "step": 16010 }, { "epoch": 0.43962108731466226, "grad_norm": 0.4323682487010956, "learning_rate": 1.7730618240051667e-05, "loss": 0.4779, "step": 16011 }, { "epoch": 0.43964854475562876, "grad_norm": 0.3663260042667389, "learning_rate": 1.7730344269600608e-05, "loss": 0.5115, "step": 16012 }, { "epoch": 0.43967600219659525, "grad_norm": 0.33200037479400635, "learning_rate": 1.7730070284729993e-05, "loss": 0.5014, "step": 16013 }, { "epoch": 0.4397034596375618, "grad_norm": 0.3666228950023651, "learning_rate": 1.772979628544033e-05, "loss": 0.4506, "step": 16014 }, { "epoch": 0.4397309170785283, "grad_norm": 0.37638646364212036, "learning_rate": 1.7729522271732124e-05, "loss": 0.4647, "step": 16015 }, { "epoch": 0.4397583745194948, "grad_norm": 0.33108824491500854, "learning_rate": 1.7729248243605897e-05, "loss": 0.5169, "step": 16016 }, { "epoch": 0.4397858319604613, "grad_norm": 0.393625408411026, "learning_rate": 1.7728974201062153e-05, "loss": 0.4905, "step": 16017 }, { "epoch": 0.4398132894014278, "grad_norm": 0.9037589430809021, "learning_rate": 1.7728700144101407e-05, "loss": 0.5634, "step": 16018 }, { "epoch": 0.43984074684239427, "grad_norm": 0.36234351992607117, "learning_rate": 1.7728426072724168e-05, "loss": 0.4974, "step": 16019 }, { "epoch": 0.43986820428336076, "grad_norm": 0.35893815755844116, "learning_rate": 1.772815198693095e-05, "loss": 0.5594, "step": 16020 }, { "epoch": 0.4398956617243273, "grad_norm": 0.3755025565624237, "learning_rate": 1.772787788672226e-05, "loss": 0.5783, "step": 16021 }, { "epoch": 0.4399231191652938, "grad_norm": 0.3486843705177307, "learning_rate": 1.7727603772098607e-05, "loss": 0.5039, "step": 16022 }, { "epoch": 0.4399505766062603, "grad_norm": 0.4275088608264923, "learning_rate": 1.772732964306051e-05, "loss": 0.5082, "step": 16023 }, { "epoch": 0.4399780340472268, "grad_norm": 0.35277193784713745, "learning_rate": 1.7727055499608478e-05, "loss": 0.5262, "step": 16024 }, { "epoch": 0.4400054914881933, "grad_norm": 0.34900686144828796, "learning_rate": 1.772678134174302e-05, "loss": 0.5394, "step": 16025 }, { "epoch": 0.4400329489291598, "grad_norm": 0.9462416172027588, "learning_rate": 1.772650716946465e-05, "loss": 0.5213, "step": 16026 }, { "epoch": 0.4400604063701263, "grad_norm": 0.39672985672950745, "learning_rate": 1.7726232982773877e-05, "loss": 0.5738, "step": 16027 }, { "epoch": 0.4400878638110928, "grad_norm": 0.4004582464694977, "learning_rate": 1.7725958781671217e-05, "loss": 0.5891, "step": 16028 }, { "epoch": 0.4401153212520593, "grad_norm": 0.3605201244354248, "learning_rate": 1.7725684566157176e-05, "loss": 0.5586, "step": 16029 }, { "epoch": 0.4401427786930258, "grad_norm": 0.3654342293739319, "learning_rate": 1.7725410336232265e-05, "loss": 0.5247, "step": 16030 }, { "epoch": 0.4401702361339923, "grad_norm": 0.3668719530105591, "learning_rate": 1.7725136091897002e-05, "loss": 0.4921, "step": 16031 }, { "epoch": 0.4401976935749588, "grad_norm": 0.40508851408958435, "learning_rate": 1.772486183315189e-05, "loss": 0.5492, "step": 16032 }, { "epoch": 0.4402251510159253, "grad_norm": 0.40457937121391296, "learning_rate": 1.772458755999745e-05, "loss": 0.598, "step": 16033 }, { "epoch": 0.4402526084568918, "grad_norm": 0.3663124740123749, "learning_rate": 1.7724313272434183e-05, "loss": 0.5028, "step": 16034 }, { "epoch": 0.44028006589785834, "grad_norm": 0.3289175033569336, "learning_rate": 1.772403897046261e-05, "loss": 0.4817, "step": 16035 }, { "epoch": 0.44030752333882484, "grad_norm": 0.38116955757141113, "learning_rate": 1.772376465408324e-05, "loss": 0.5333, "step": 16036 }, { "epoch": 0.44033498077979133, "grad_norm": 0.5627240538597107, "learning_rate": 1.7723490323296582e-05, "loss": 0.5056, "step": 16037 }, { "epoch": 0.4403624382207578, "grad_norm": 0.346038818359375, "learning_rate": 1.772321597810315e-05, "loss": 0.5113, "step": 16038 }, { "epoch": 0.4403898956617243, "grad_norm": 0.35738298296928406, "learning_rate": 1.7722941618503456e-05, "loss": 0.5896, "step": 16039 }, { "epoch": 0.4404173531026908, "grad_norm": 0.3827212154865265, "learning_rate": 1.772266724449801e-05, "loss": 0.4865, "step": 16040 }, { "epoch": 0.4404448105436573, "grad_norm": 0.45579320192337036, "learning_rate": 1.7722392856087326e-05, "loss": 0.5915, "step": 16041 }, { "epoch": 0.44047226798462386, "grad_norm": 0.3681230843067169, "learning_rate": 1.7722118453271915e-05, "loss": 0.5357, "step": 16042 }, { "epoch": 0.44049972542559035, "grad_norm": 0.36544346809387207, "learning_rate": 1.7721844036052288e-05, "loss": 0.4919, "step": 16043 }, { "epoch": 0.44052718286655684, "grad_norm": 0.4000301659107208, "learning_rate": 1.7721569604428954e-05, "loss": 0.5949, "step": 16044 }, { "epoch": 0.44055464030752334, "grad_norm": 0.38843491673469543, "learning_rate": 1.772129515840243e-05, "loss": 0.4496, "step": 16045 }, { "epoch": 0.44058209774848983, "grad_norm": 0.38684719800949097, "learning_rate": 1.7721020697973228e-05, "loss": 0.5477, "step": 16046 }, { "epoch": 0.4406095551894563, "grad_norm": 0.416536420583725, "learning_rate": 1.772074622314186e-05, "loss": 0.5375, "step": 16047 }, { "epoch": 0.4406370126304228, "grad_norm": 0.35261958837509155, "learning_rate": 1.772047173390883e-05, "loss": 0.5088, "step": 16048 }, { "epoch": 0.44066447007138937, "grad_norm": 0.3693728744983673, "learning_rate": 1.772019723027466e-05, "loss": 0.5793, "step": 16049 }, { "epoch": 0.44069192751235586, "grad_norm": 0.35269519686698914, "learning_rate": 1.7719922712239857e-05, "loss": 0.4535, "step": 16050 }, { "epoch": 0.44071938495332236, "grad_norm": 0.44733014702796936, "learning_rate": 1.7719648179804936e-05, "loss": 0.5316, "step": 16051 }, { "epoch": 0.44074684239428885, "grad_norm": 0.395713746547699, "learning_rate": 1.7719373632970406e-05, "loss": 0.5874, "step": 16052 }, { "epoch": 0.44077429983525535, "grad_norm": 0.5107450485229492, "learning_rate": 1.771909907173678e-05, "loss": 0.6614, "step": 16053 }, { "epoch": 0.44080175727622184, "grad_norm": 0.36749371886253357, "learning_rate": 1.771882449610457e-05, "loss": 0.4905, "step": 16054 }, { "epoch": 0.44082921471718833, "grad_norm": 0.4176290035247803, "learning_rate": 1.7718549906074292e-05, "loss": 0.5163, "step": 16055 }, { "epoch": 0.4408566721581549, "grad_norm": 0.3652890622615814, "learning_rate": 1.771827530164645e-05, "loss": 0.5173, "step": 16056 }, { "epoch": 0.4408841295991214, "grad_norm": 0.3827417492866516, "learning_rate": 1.771800068282157e-05, "loss": 0.5292, "step": 16057 }, { "epoch": 0.44091158704008787, "grad_norm": 0.3801558315753937, "learning_rate": 1.7717726049600145e-05, "loss": 0.5208, "step": 16058 }, { "epoch": 0.44093904448105437, "grad_norm": 0.4223015308380127, "learning_rate": 1.7717451401982704e-05, "loss": 0.5899, "step": 16059 }, { "epoch": 0.44096650192202086, "grad_norm": 0.3582773208618164, "learning_rate": 1.771717673996975e-05, "loss": 0.3916, "step": 16060 }, { "epoch": 0.44099395936298735, "grad_norm": 0.4195508658885956, "learning_rate": 1.7716902063561798e-05, "loss": 0.4862, "step": 16061 }, { "epoch": 0.44102141680395385, "grad_norm": 0.33842170238494873, "learning_rate": 1.7716627372759366e-05, "loss": 0.49, "step": 16062 }, { "epoch": 0.4410488742449204, "grad_norm": 0.46936091780662537, "learning_rate": 1.7716352667562957e-05, "loss": 0.5023, "step": 16063 }, { "epoch": 0.4410763316858869, "grad_norm": 0.331992506980896, "learning_rate": 1.7716077947973086e-05, "loss": 0.4759, "step": 16064 }, { "epoch": 0.4411037891268534, "grad_norm": 0.3947388231754303, "learning_rate": 1.771580321399027e-05, "loss": 0.5671, "step": 16065 }, { "epoch": 0.4411312465678199, "grad_norm": 0.4215451776981354, "learning_rate": 1.7715528465615017e-05, "loss": 0.4746, "step": 16066 }, { "epoch": 0.4411587040087864, "grad_norm": 0.3842114806175232, "learning_rate": 1.771525370284784e-05, "loss": 0.5236, "step": 16067 }, { "epoch": 0.44118616144975287, "grad_norm": 0.36023566126823425, "learning_rate": 1.7714978925689256e-05, "loss": 0.4682, "step": 16068 }, { "epoch": 0.44121361889071936, "grad_norm": 0.3463267982006073, "learning_rate": 1.771470413413977e-05, "loss": 0.5249, "step": 16069 }, { "epoch": 0.4412410763316859, "grad_norm": 0.355824738740921, "learning_rate": 1.7714429328199897e-05, "loss": 0.5355, "step": 16070 }, { "epoch": 0.4412685337726524, "grad_norm": 0.43725666403770447, "learning_rate": 1.7714154507870155e-05, "loss": 0.5354, "step": 16071 }, { "epoch": 0.4412959912136189, "grad_norm": 0.3928440809249878, "learning_rate": 1.771387967315105e-05, "loss": 0.4906, "step": 16072 }, { "epoch": 0.4413234486545854, "grad_norm": 0.40828660130500793, "learning_rate": 1.7713604824043104e-05, "loss": 0.4607, "step": 16073 }, { "epoch": 0.4413509060955519, "grad_norm": 0.3941815197467804, "learning_rate": 1.7713329960546815e-05, "loss": 0.5703, "step": 16074 }, { "epoch": 0.4413783635365184, "grad_norm": 0.4187332093715668, "learning_rate": 1.771305508266271e-05, "loss": 0.4597, "step": 16075 }, { "epoch": 0.4414058209774849, "grad_norm": 0.34143298864364624, "learning_rate": 1.771278019039129e-05, "loss": 0.4331, "step": 16076 }, { "epoch": 0.4414332784184514, "grad_norm": 0.36700600385665894, "learning_rate": 1.771250528373308e-05, "loss": 0.553, "step": 16077 }, { "epoch": 0.4414607358594179, "grad_norm": 0.41106295585632324, "learning_rate": 1.771223036268858e-05, "loss": 0.4853, "step": 16078 }, { "epoch": 0.4414881933003844, "grad_norm": 0.39432722330093384, "learning_rate": 1.7711955427258312e-05, "loss": 0.5155, "step": 16079 }, { "epoch": 0.4415156507413509, "grad_norm": 0.3491683900356293, "learning_rate": 1.7711680477442783e-05, "loss": 0.5097, "step": 16080 }, { "epoch": 0.4415431081823174, "grad_norm": 0.3833048641681671, "learning_rate": 1.7711405513242513e-05, "loss": 0.5377, "step": 16081 }, { "epoch": 0.4415705656232839, "grad_norm": 0.6803582310676575, "learning_rate": 1.7711130534658007e-05, "loss": 0.663, "step": 16082 }, { "epoch": 0.4415980230642504, "grad_norm": 0.3410532474517822, "learning_rate": 1.771085554168978e-05, "loss": 0.4344, "step": 16083 }, { "epoch": 0.44162548050521694, "grad_norm": 0.3486756682395935, "learning_rate": 1.7710580534338353e-05, "loss": 0.4403, "step": 16084 }, { "epoch": 0.44165293794618343, "grad_norm": 0.34327831864356995, "learning_rate": 1.7710305512604227e-05, "loss": 0.4837, "step": 16085 }, { "epoch": 0.4416803953871499, "grad_norm": 0.39238032698631287, "learning_rate": 1.7710030476487922e-05, "loss": 0.4501, "step": 16086 }, { "epoch": 0.4417078528281164, "grad_norm": 0.4031986594200134, "learning_rate": 1.7709755425989947e-05, "loss": 0.51, "step": 16087 }, { "epoch": 0.4417353102690829, "grad_norm": 0.36213165521621704, "learning_rate": 1.770948036111082e-05, "loss": 0.4201, "step": 16088 }, { "epoch": 0.4417627677100494, "grad_norm": 0.38540422916412354, "learning_rate": 1.770920528185105e-05, "loss": 0.4802, "step": 16089 }, { "epoch": 0.4417902251510159, "grad_norm": 0.3580580949783325, "learning_rate": 1.7708930188211156e-05, "loss": 0.5284, "step": 16090 }, { "epoch": 0.44181768259198245, "grad_norm": 0.4043821096420288, "learning_rate": 1.7708655080191644e-05, "loss": 0.5318, "step": 16091 }, { "epoch": 0.44184514003294895, "grad_norm": 0.3658545911312103, "learning_rate": 1.770837995779303e-05, "loss": 0.5321, "step": 16092 }, { "epoch": 0.44187259747391544, "grad_norm": 0.36862120032310486, "learning_rate": 1.7708104821015824e-05, "loss": 0.4958, "step": 16093 }, { "epoch": 0.44190005491488193, "grad_norm": 0.36862418055534363, "learning_rate": 1.7707829669860546e-05, "loss": 0.5338, "step": 16094 }, { "epoch": 0.44192751235584843, "grad_norm": 0.36742526292800903, "learning_rate": 1.7707554504327703e-05, "loss": 0.4951, "step": 16095 }, { "epoch": 0.4419549697968149, "grad_norm": 0.4655337333679199, "learning_rate": 1.7707279324417815e-05, "loss": 0.5614, "step": 16096 }, { "epoch": 0.4419824272377814, "grad_norm": 0.41464704275131226, "learning_rate": 1.7707004130131385e-05, "loss": 0.5716, "step": 16097 }, { "epoch": 0.44200988467874797, "grad_norm": 0.3828180134296417, "learning_rate": 1.770672892146894e-05, "loss": 0.539, "step": 16098 }, { "epoch": 0.44203734211971446, "grad_norm": 0.33615949749946594, "learning_rate": 1.770645369843098e-05, "loss": 0.4822, "step": 16099 }, { "epoch": 0.44206479956068095, "grad_norm": 0.3751169741153717, "learning_rate": 1.7706178461018027e-05, "loss": 0.576, "step": 16100 }, { "epoch": 0.44209225700164745, "grad_norm": 0.37492918968200684, "learning_rate": 1.770590320923059e-05, "loss": 0.5691, "step": 16101 }, { "epoch": 0.44211971444261394, "grad_norm": 0.37807121872901917, "learning_rate": 1.7705627943069186e-05, "loss": 0.6017, "step": 16102 }, { "epoch": 0.44214717188358044, "grad_norm": 0.34820759296417236, "learning_rate": 1.7705352662534325e-05, "loss": 0.4913, "step": 16103 }, { "epoch": 0.44217462932454693, "grad_norm": 0.3840310573577881, "learning_rate": 1.770507736762652e-05, "loss": 0.4882, "step": 16104 }, { "epoch": 0.4422020867655135, "grad_norm": 0.3873975872993469, "learning_rate": 1.770480205834629e-05, "loss": 0.5834, "step": 16105 }, { "epoch": 0.44222954420648, "grad_norm": 0.35591810941696167, "learning_rate": 1.7704526734694146e-05, "loss": 0.5236, "step": 16106 }, { "epoch": 0.44225700164744647, "grad_norm": 0.4110228419303894, "learning_rate": 1.7704251396670596e-05, "loss": 0.5723, "step": 16107 }, { "epoch": 0.44228445908841296, "grad_norm": 0.3800068497657776, "learning_rate": 1.7703976044276167e-05, "loss": 0.5783, "step": 16108 }, { "epoch": 0.44231191652937946, "grad_norm": 0.4323784112930298, "learning_rate": 1.7703700677511356e-05, "loss": 0.5162, "step": 16109 }, { "epoch": 0.44233937397034595, "grad_norm": 0.3532456159591675, "learning_rate": 1.7703425296376685e-05, "loss": 0.5206, "step": 16110 }, { "epoch": 0.44236683141131244, "grad_norm": 0.3582749664783478, "learning_rate": 1.770314990087267e-05, "loss": 0.4802, "step": 16111 }, { "epoch": 0.442394288852279, "grad_norm": 0.4507274627685547, "learning_rate": 1.770287449099982e-05, "loss": 0.5662, "step": 16112 }, { "epoch": 0.4424217462932455, "grad_norm": 0.39153197407722473, "learning_rate": 1.770259906675865e-05, "loss": 0.6087, "step": 16113 }, { "epoch": 0.442449203734212, "grad_norm": 0.36976945400238037, "learning_rate": 1.7702323628149677e-05, "loss": 0.4115, "step": 16114 }, { "epoch": 0.4424766611751785, "grad_norm": 0.3431207537651062, "learning_rate": 1.770204817517341e-05, "loss": 0.5089, "step": 16115 }, { "epoch": 0.44250411861614497, "grad_norm": 0.3672163188457489, "learning_rate": 1.7701772707830364e-05, "loss": 0.4928, "step": 16116 }, { "epoch": 0.44253157605711146, "grad_norm": 0.7632066607475281, "learning_rate": 1.7701497226121058e-05, "loss": 0.6196, "step": 16117 }, { "epoch": 0.44255903349807796, "grad_norm": 0.4191618859767914, "learning_rate": 1.7701221730045998e-05, "loss": 0.4831, "step": 16118 }, { "epoch": 0.4425864909390445, "grad_norm": 0.36553719639778137, "learning_rate": 1.77009462196057e-05, "loss": 0.4359, "step": 16119 }, { "epoch": 0.442613948380011, "grad_norm": 0.40466609597206116, "learning_rate": 1.7700670694800682e-05, "loss": 0.5705, "step": 16120 }, { "epoch": 0.4426414058209775, "grad_norm": 0.42002734541893005, "learning_rate": 1.7700395155631456e-05, "loss": 0.4483, "step": 16121 }, { "epoch": 0.442668863261944, "grad_norm": 0.4822810888290405, "learning_rate": 1.7700119602098536e-05, "loss": 0.5338, "step": 16122 }, { "epoch": 0.4426963207029105, "grad_norm": 0.36181890964508057, "learning_rate": 1.769984403420243e-05, "loss": 0.5017, "step": 16123 }, { "epoch": 0.442723778143877, "grad_norm": 0.35915908217430115, "learning_rate": 1.7699568451943663e-05, "loss": 0.436, "step": 16124 }, { "epoch": 0.44275123558484347, "grad_norm": 0.3797287940979004, "learning_rate": 1.7699292855322743e-05, "loss": 0.4695, "step": 16125 }, { "epoch": 0.44277869302581, "grad_norm": 0.3674972653388977, "learning_rate": 1.7699017244340183e-05, "loss": 0.5474, "step": 16126 }, { "epoch": 0.4428061504667765, "grad_norm": 0.31156811118125916, "learning_rate": 1.7698741618996498e-05, "loss": 0.4717, "step": 16127 }, { "epoch": 0.442833607907743, "grad_norm": 0.3621383607387543, "learning_rate": 1.7698465979292203e-05, "loss": 0.5196, "step": 16128 }, { "epoch": 0.4428610653487095, "grad_norm": 0.3658513128757477, "learning_rate": 1.7698190325227816e-05, "loss": 0.5065, "step": 16129 }, { "epoch": 0.442888522789676, "grad_norm": 0.39195987582206726, "learning_rate": 1.7697914656803842e-05, "loss": 0.5713, "step": 16130 }, { "epoch": 0.4429159802306425, "grad_norm": 0.3768465220928192, "learning_rate": 1.76976389740208e-05, "loss": 0.506, "step": 16131 }, { "epoch": 0.442943437671609, "grad_norm": 0.4017414152622223, "learning_rate": 1.7697363276879207e-05, "loss": 0.4919, "step": 16132 }, { "epoch": 0.44297089511257554, "grad_norm": 0.3649878203868866, "learning_rate": 1.7697087565379572e-05, "loss": 0.4927, "step": 16133 }, { "epoch": 0.44299835255354203, "grad_norm": 0.3686313331127167, "learning_rate": 1.7696811839522418e-05, "loss": 0.5682, "step": 16134 }, { "epoch": 0.4430258099945085, "grad_norm": 0.350781112909317, "learning_rate": 1.7696536099308248e-05, "loss": 0.6492, "step": 16135 }, { "epoch": 0.443053267435475, "grad_norm": 0.35362258553504944, "learning_rate": 1.7696260344737584e-05, "loss": 0.4782, "step": 16136 }, { "epoch": 0.4430807248764415, "grad_norm": 0.3760995864868164, "learning_rate": 1.7695984575810935e-05, "loss": 0.5567, "step": 16137 }, { "epoch": 0.443108182317408, "grad_norm": 0.3840887248516083, "learning_rate": 1.769570879252882e-05, "loss": 0.5861, "step": 16138 }, { "epoch": 0.4431356397583745, "grad_norm": 1.1924145221710205, "learning_rate": 1.7695432994891755e-05, "loss": 0.6142, "step": 16139 }, { "epoch": 0.443163097199341, "grad_norm": 0.34570831060409546, "learning_rate": 1.769515718290025e-05, "loss": 0.531, "step": 16140 }, { "epoch": 0.44319055464030754, "grad_norm": 0.3442162573337555, "learning_rate": 1.769488135655482e-05, "loss": 0.5362, "step": 16141 }, { "epoch": 0.44321801208127404, "grad_norm": 0.3471300005912781, "learning_rate": 1.769460551585598e-05, "loss": 0.5045, "step": 16142 }, { "epoch": 0.44324546952224053, "grad_norm": 0.3458541929721832, "learning_rate": 1.7694329660804243e-05, "loss": 0.4815, "step": 16143 }, { "epoch": 0.443272926963207, "grad_norm": 0.39499083161354065, "learning_rate": 1.7694053791400127e-05, "loss": 0.5356, "step": 16144 }, { "epoch": 0.4433003844041735, "grad_norm": 0.3441265821456909, "learning_rate": 1.769377790764415e-05, "loss": 0.4906, "step": 16145 }, { "epoch": 0.44332784184514, "grad_norm": 0.3962032198905945, "learning_rate": 1.7693502009536817e-05, "loss": 0.5878, "step": 16146 }, { "epoch": 0.4433552992861065, "grad_norm": 0.37307220697402954, "learning_rate": 1.7693226097078646e-05, "loss": 0.5527, "step": 16147 }, { "epoch": 0.44338275672707306, "grad_norm": 0.3970467150211334, "learning_rate": 1.7692950170270155e-05, "loss": 0.4801, "step": 16148 }, { "epoch": 0.44341021416803955, "grad_norm": 0.4585181474685669, "learning_rate": 1.7692674229111855e-05, "loss": 0.5949, "step": 16149 }, { "epoch": 0.44343767160900605, "grad_norm": 0.3821594715118408, "learning_rate": 1.7692398273604264e-05, "loss": 0.5545, "step": 16150 }, { "epoch": 0.44346512904997254, "grad_norm": 0.37809842824935913, "learning_rate": 1.7692122303747895e-05, "loss": 0.5014, "step": 16151 }, { "epoch": 0.44349258649093903, "grad_norm": 0.42849352955818176, "learning_rate": 1.7691846319543262e-05, "loss": 0.5843, "step": 16152 }, { "epoch": 0.4435200439319055, "grad_norm": 0.34685197472572327, "learning_rate": 1.769157032099088e-05, "loss": 0.5099, "step": 16153 }, { "epoch": 0.443547501372872, "grad_norm": 0.5483766794204712, "learning_rate": 1.7691294308091268e-05, "loss": 0.4879, "step": 16154 }, { "epoch": 0.44357495881383857, "grad_norm": 0.3368704915046692, "learning_rate": 1.769101828084493e-05, "loss": 0.4746, "step": 16155 }, { "epoch": 0.44360241625480507, "grad_norm": 0.3965184688568115, "learning_rate": 1.7690742239252396e-05, "loss": 0.4848, "step": 16156 }, { "epoch": 0.44362987369577156, "grad_norm": 0.45751336216926575, "learning_rate": 1.7690466183314172e-05, "loss": 0.5505, "step": 16157 }, { "epoch": 0.44365733113673805, "grad_norm": 0.3538562059402466, "learning_rate": 1.7690190113030768e-05, "loss": 0.4348, "step": 16158 }, { "epoch": 0.44368478857770455, "grad_norm": 0.3939152657985687, "learning_rate": 1.7689914028402712e-05, "loss": 0.5203, "step": 16159 }, { "epoch": 0.44371224601867104, "grad_norm": 0.38007253408432007, "learning_rate": 1.768963792943051e-05, "loss": 0.4956, "step": 16160 }, { "epoch": 0.44373970345963754, "grad_norm": 0.3676116168498993, "learning_rate": 1.768936181611468e-05, "loss": 0.4464, "step": 16161 }, { "epoch": 0.4437671609006041, "grad_norm": 0.44770869612693787, "learning_rate": 1.768908568845573e-05, "loss": 0.5426, "step": 16162 }, { "epoch": 0.4437946183415706, "grad_norm": 0.3889642059803009, "learning_rate": 1.7688809546454185e-05, "loss": 0.5029, "step": 16163 }, { "epoch": 0.4438220757825371, "grad_norm": 0.4090629816055298, "learning_rate": 1.7688533390110557e-05, "loss": 0.6285, "step": 16164 }, { "epoch": 0.44384953322350357, "grad_norm": 0.41927003860473633, "learning_rate": 1.7688257219425363e-05, "loss": 0.5276, "step": 16165 }, { "epoch": 0.44387699066447006, "grad_norm": 0.38014259934425354, "learning_rate": 1.7687981034399115e-05, "loss": 0.4902, "step": 16166 }, { "epoch": 0.44390444810543656, "grad_norm": 0.41481706500053406, "learning_rate": 1.768770483503232e-05, "loss": 0.5298, "step": 16167 }, { "epoch": 0.44393190554640305, "grad_norm": 0.36542627215385437, "learning_rate": 1.768742862132551e-05, "loss": 0.4866, "step": 16168 }, { "epoch": 0.4439593629873696, "grad_norm": 0.38311904668807983, "learning_rate": 1.768715239327919e-05, "loss": 0.5188, "step": 16169 }, { "epoch": 0.4439868204283361, "grad_norm": 0.35593292117118835, "learning_rate": 1.768687615089388e-05, "loss": 0.5022, "step": 16170 }, { "epoch": 0.4440142778693026, "grad_norm": 0.39767372608184814, "learning_rate": 1.768659989417009e-05, "loss": 0.5877, "step": 16171 }, { "epoch": 0.4440417353102691, "grad_norm": 0.4220234751701355, "learning_rate": 1.7686323623108336e-05, "loss": 0.5661, "step": 16172 }, { "epoch": 0.4440691927512356, "grad_norm": 0.400147408246994, "learning_rate": 1.7686047337709137e-05, "loss": 0.5115, "step": 16173 }, { "epoch": 0.44409665019220207, "grad_norm": 0.36150652170181274, "learning_rate": 1.768577103797301e-05, "loss": 0.5075, "step": 16174 }, { "epoch": 0.44412410763316856, "grad_norm": 0.38955724239349365, "learning_rate": 1.768549472390046e-05, "loss": 0.4574, "step": 16175 }, { "epoch": 0.4441515650741351, "grad_norm": 0.43840545415878296, "learning_rate": 1.7685218395492016e-05, "loss": 0.6293, "step": 16176 }, { "epoch": 0.4441790225151016, "grad_norm": 0.3827981948852539, "learning_rate": 1.7684942052748183e-05, "loss": 0.5715, "step": 16177 }, { "epoch": 0.4442064799560681, "grad_norm": 0.3449585735797882, "learning_rate": 1.7684665695669482e-05, "loss": 0.5107, "step": 16178 }, { "epoch": 0.4442339373970346, "grad_norm": 0.343605637550354, "learning_rate": 1.7684389324256426e-05, "loss": 0.4787, "step": 16179 }, { "epoch": 0.4442613948380011, "grad_norm": 0.3180278241634369, "learning_rate": 1.768411293850953e-05, "loss": 0.4046, "step": 16180 }, { "epoch": 0.4442888522789676, "grad_norm": 0.361375629901886, "learning_rate": 1.7683836538429314e-05, "loss": 0.5961, "step": 16181 }, { "epoch": 0.4443163097199341, "grad_norm": 0.4050460457801819, "learning_rate": 1.768356012401629e-05, "loss": 0.4457, "step": 16182 }, { "epoch": 0.4443437671609006, "grad_norm": 0.33976760506629944, "learning_rate": 1.7683283695270973e-05, "loss": 0.4215, "step": 16183 }, { "epoch": 0.4443712246018671, "grad_norm": 0.3804028332233429, "learning_rate": 1.7683007252193878e-05, "loss": 0.5192, "step": 16184 }, { "epoch": 0.4443986820428336, "grad_norm": 0.38831809163093567, "learning_rate": 1.7682730794785524e-05, "loss": 0.5415, "step": 16185 }, { "epoch": 0.4444261394838001, "grad_norm": 0.3560028076171875, "learning_rate": 1.7682454323046426e-05, "loss": 0.5036, "step": 16186 }, { "epoch": 0.4444535969247666, "grad_norm": 0.49475836753845215, "learning_rate": 1.7682177836977096e-05, "loss": 0.5972, "step": 16187 }, { "epoch": 0.4444810543657331, "grad_norm": 0.3918502926826477, "learning_rate": 1.7681901336578056e-05, "loss": 0.4851, "step": 16188 }, { "epoch": 0.4445085118066996, "grad_norm": 0.33554813265800476, "learning_rate": 1.7681624821849817e-05, "loss": 0.4604, "step": 16189 }, { "epoch": 0.44453596924766614, "grad_norm": 0.3732165992259979, "learning_rate": 1.7681348292792894e-05, "loss": 0.5247, "step": 16190 }, { "epoch": 0.44456342668863263, "grad_norm": 0.37881898880004883, "learning_rate": 1.768107174940781e-05, "loss": 0.5832, "step": 16191 }, { "epoch": 0.44459088412959913, "grad_norm": 0.34582021832466125, "learning_rate": 1.7680795191695067e-05, "loss": 0.4623, "step": 16192 }, { "epoch": 0.4446183415705656, "grad_norm": 0.35325780510902405, "learning_rate": 1.76805186196552e-05, "loss": 0.4293, "step": 16193 }, { "epoch": 0.4446457990115321, "grad_norm": 0.39461180567741394, "learning_rate": 1.7680242033288705e-05, "loss": 0.6124, "step": 16194 }, { "epoch": 0.4446732564524986, "grad_norm": 0.43209999799728394, "learning_rate": 1.7679965432596112e-05, "loss": 0.5031, "step": 16195 }, { "epoch": 0.4447007138934651, "grad_norm": 0.36966854333877563, "learning_rate": 1.7679688817577932e-05, "loss": 0.439, "step": 16196 }, { "epoch": 0.44472817133443165, "grad_norm": 0.3481385409832001, "learning_rate": 1.767941218823468e-05, "loss": 0.5069, "step": 16197 }, { "epoch": 0.44475562877539815, "grad_norm": 0.3758752942085266, "learning_rate": 1.7679135544566874e-05, "loss": 0.4603, "step": 16198 }, { "epoch": 0.44478308621636464, "grad_norm": 0.3594307005405426, "learning_rate": 1.767885888657503e-05, "loss": 0.5164, "step": 16199 }, { "epoch": 0.44481054365733114, "grad_norm": 0.37687569856643677, "learning_rate": 1.7678582214259662e-05, "loss": 0.474, "step": 16200 }, { "epoch": 0.44483800109829763, "grad_norm": 0.3937789499759674, "learning_rate": 1.7678305527621292e-05, "loss": 0.5442, "step": 16201 }, { "epoch": 0.4448654585392641, "grad_norm": 0.42549073696136475, "learning_rate": 1.767802882666043e-05, "loss": 0.5092, "step": 16202 }, { "epoch": 0.4448929159802306, "grad_norm": 0.3591281771659851, "learning_rate": 1.767775211137759e-05, "loss": 0.4268, "step": 16203 }, { "epoch": 0.44492037342119717, "grad_norm": 0.32215529680252075, "learning_rate": 1.767747538177329e-05, "loss": 0.4439, "step": 16204 }, { "epoch": 0.44494783086216366, "grad_norm": 0.41082727909088135, "learning_rate": 1.7677198637848053e-05, "loss": 0.5418, "step": 16205 }, { "epoch": 0.44497528830313016, "grad_norm": 0.44698724150657654, "learning_rate": 1.7676921879602388e-05, "loss": 0.4789, "step": 16206 }, { "epoch": 0.44500274574409665, "grad_norm": 0.38168463110923767, "learning_rate": 1.7676645107036815e-05, "loss": 0.6087, "step": 16207 }, { "epoch": 0.44503020318506314, "grad_norm": 0.369469553232193, "learning_rate": 1.767636832015185e-05, "loss": 0.5152, "step": 16208 }, { "epoch": 0.44505766062602964, "grad_norm": 0.3668217957019806, "learning_rate": 1.7676091518948003e-05, "loss": 0.4783, "step": 16209 }, { "epoch": 0.44508511806699613, "grad_norm": 0.32934874296188354, "learning_rate": 1.7675814703425798e-05, "loss": 0.5009, "step": 16210 }, { "epoch": 0.4451125755079627, "grad_norm": 0.40517526865005493, "learning_rate": 1.767553787358575e-05, "loss": 0.5101, "step": 16211 }, { "epoch": 0.4451400329489292, "grad_norm": 0.41313445568084717, "learning_rate": 1.767526102942837e-05, "loss": 0.5107, "step": 16212 }, { "epoch": 0.44516749038989567, "grad_norm": 0.4035915732383728, "learning_rate": 1.7674984170954185e-05, "loss": 0.5529, "step": 16213 }, { "epoch": 0.44519494783086216, "grad_norm": 0.35228946805000305, "learning_rate": 1.7674707298163703e-05, "loss": 0.5109, "step": 16214 }, { "epoch": 0.44522240527182866, "grad_norm": 0.44823500514030457, "learning_rate": 1.767443041105744e-05, "loss": 0.5309, "step": 16215 }, { "epoch": 0.44524986271279515, "grad_norm": 0.33598530292510986, "learning_rate": 1.767415350963592e-05, "loss": 0.5263, "step": 16216 }, { "epoch": 0.44527732015376165, "grad_norm": 0.3530315160751343, "learning_rate": 1.767387659389965e-05, "loss": 0.467, "step": 16217 }, { "epoch": 0.4453047775947282, "grad_norm": 3.568052291870117, "learning_rate": 1.7673599663849147e-05, "loss": 0.5451, "step": 16218 }, { "epoch": 0.4453322350356947, "grad_norm": 0.3755253553390503, "learning_rate": 1.7673322719484936e-05, "loss": 0.4366, "step": 16219 }, { "epoch": 0.4453596924766612, "grad_norm": 0.4278234839439392, "learning_rate": 1.767304576080753e-05, "loss": 0.4535, "step": 16220 }, { "epoch": 0.4453871499176277, "grad_norm": 0.3449489176273346, "learning_rate": 1.7672768787817444e-05, "loss": 0.4796, "step": 16221 }, { "epoch": 0.4454146073585942, "grad_norm": 0.38805052638053894, "learning_rate": 1.7672491800515198e-05, "loss": 0.5628, "step": 16222 }, { "epoch": 0.44544206479956067, "grad_norm": 0.34644490480422974, "learning_rate": 1.76722147989013e-05, "loss": 0.4583, "step": 16223 }, { "epoch": 0.44546952224052716, "grad_norm": 0.3869039714336395, "learning_rate": 1.7671937782976278e-05, "loss": 0.507, "step": 16224 }, { "epoch": 0.4454969796814937, "grad_norm": 0.4172741770744324, "learning_rate": 1.767166075274064e-05, "loss": 0.4705, "step": 16225 }, { "epoch": 0.4455244371224602, "grad_norm": 0.3480086326599121, "learning_rate": 1.7671383708194908e-05, "loss": 0.5207, "step": 16226 }, { "epoch": 0.4455518945634267, "grad_norm": 0.3607298731803894, "learning_rate": 1.7671106649339595e-05, "loss": 0.6085, "step": 16227 }, { "epoch": 0.4455793520043932, "grad_norm": 0.5689302682876587, "learning_rate": 1.7670829576175222e-05, "loss": 0.5208, "step": 16228 }, { "epoch": 0.4456068094453597, "grad_norm": 0.34528982639312744, "learning_rate": 1.7670552488702305e-05, "loss": 0.4352, "step": 16229 }, { "epoch": 0.4456342668863262, "grad_norm": 0.34856030344963074, "learning_rate": 1.7670275386921354e-05, "loss": 0.5256, "step": 16230 }, { "epoch": 0.4456617243272927, "grad_norm": 0.35272538661956787, "learning_rate": 1.7669998270832896e-05, "loss": 0.4831, "step": 16231 }, { "epoch": 0.4456891817682592, "grad_norm": 0.3707428574562073, "learning_rate": 1.766972114043744e-05, "loss": 0.5278, "step": 16232 }, { "epoch": 0.4457166392092257, "grad_norm": 0.3768356442451477, "learning_rate": 1.766944399573551e-05, "loss": 0.5009, "step": 16233 }, { "epoch": 0.4457440966501922, "grad_norm": 0.3978661596775055, "learning_rate": 1.7669166836727616e-05, "loss": 0.4154, "step": 16234 }, { "epoch": 0.4457715540911587, "grad_norm": 0.3842734694480896, "learning_rate": 1.766888966341428e-05, "loss": 0.5915, "step": 16235 }, { "epoch": 0.4457990115321252, "grad_norm": 0.3481404781341553, "learning_rate": 1.7668612475796013e-05, "loss": 0.561, "step": 16236 }, { "epoch": 0.4458264689730917, "grad_norm": 0.40750834345817566, "learning_rate": 1.7668335273873342e-05, "loss": 0.4909, "step": 16237 }, { "epoch": 0.4458539264140582, "grad_norm": 0.40168800950050354, "learning_rate": 1.7668058057646775e-05, "loss": 0.4593, "step": 16238 }, { "epoch": 0.44588138385502474, "grad_norm": 0.3796750009059906, "learning_rate": 1.7667780827116833e-05, "loss": 0.5066, "step": 16239 }, { "epoch": 0.44590884129599123, "grad_norm": 0.42957910895347595, "learning_rate": 1.7667503582284036e-05, "loss": 0.5139, "step": 16240 }, { "epoch": 0.4459362987369577, "grad_norm": 0.38503462076187134, "learning_rate": 1.7667226323148894e-05, "loss": 0.468, "step": 16241 }, { "epoch": 0.4459637561779242, "grad_norm": 0.47551414370536804, "learning_rate": 1.766694904971193e-05, "loss": 0.5744, "step": 16242 }, { "epoch": 0.4459912136188907, "grad_norm": 0.3701879680156708, "learning_rate": 1.7666671761973654e-05, "loss": 0.4785, "step": 16243 }, { "epoch": 0.4460186710598572, "grad_norm": 0.37971368432044983, "learning_rate": 1.7666394459934592e-05, "loss": 0.4618, "step": 16244 }, { "epoch": 0.4460461285008237, "grad_norm": 0.40801090002059937, "learning_rate": 1.7666117143595256e-05, "loss": 0.4549, "step": 16245 }, { "epoch": 0.44607358594179025, "grad_norm": 0.3757406771183014, "learning_rate": 1.7665839812956168e-05, "loss": 0.4706, "step": 16246 }, { "epoch": 0.44610104338275675, "grad_norm": 0.3197157680988312, "learning_rate": 1.7665562468017838e-05, "loss": 0.4485, "step": 16247 }, { "epoch": 0.44612850082372324, "grad_norm": 0.3615194857120514, "learning_rate": 1.766528510878079e-05, "loss": 0.4598, "step": 16248 }, { "epoch": 0.44615595826468973, "grad_norm": 0.3849776089191437, "learning_rate": 1.7665007735245537e-05, "loss": 0.4806, "step": 16249 }, { "epoch": 0.44618341570565623, "grad_norm": 0.39970749616622925, "learning_rate": 1.7664730347412602e-05, "loss": 0.5201, "step": 16250 }, { "epoch": 0.4462108731466227, "grad_norm": 0.37025195360183716, "learning_rate": 1.7664452945282496e-05, "loss": 0.5385, "step": 16251 }, { "epoch": 0.4462383305875892, "grad_norm": 0.38511526584625244, "learning_rate": 1.7664175528855735e-05, "loss": 0.5565, "step": 16252 }, { "epoch": 0.44626578802855577, "grad_norm": 0.36795148253440857, "learning_rate": 1.7663898098132846e-05, "loss": 0.5326, "step": 16253 }, { "epoch": 0.44629324546952226, "grad_norm": 0.4097501337528229, "learning_rate": 1.7663620653114342e-05, "loss": 0.5579, "step": 16254 }, { "epoch": 0.44632070291048875, "grad_norm": 0.35103651881217957, "learning_rate": 1.7663343193800734e-05, "loss": 0.532, "step": 16255 }, { "epoch": 0.44634816035145525, "grad_norm": 0.5811704397201538, "learning_rate": 1.766306572019255e-05, "loss": 0.5144, "step": 16256 }, { "epoch": 0.44637561779242174, "grad_norm": 0.3651686906814575, "learning_rate": 1.76627882322903e-05, "loss": 0.5173, "step": 16257 }, { "epoch": 0.44640307523338824, "grad_norm": 0.35018861293792725, "learning_rate": 1.7662510730094506e-05, "loss": 0.5519, "step": 16258 }, { "epoch": 0.44643053267435473, "grad_norm": 0.3284205496311188, "learning_rate": 1.7662233213605682e-05, "loss": 0.4564, "step": 16259 }, { "epoch": 0.4464579901153213, "grad_norm": 0.38832947611808777, "learning_rate": 1.7661955682824346e-05, "loss": 0.5222, "step": 16260 }, { "epoch": 0.4464854475562878, "grad_norm": 0.3543276786804199, "learning_rate": 1.766167813775102e-05, "loss": 0.5202, "step": 16261 }, { "epoch": 0.44651290499725427, "grad_norm": 0.3370468020439148, "learning_rate": 1.766140057838622e-05, "loss": 0.4582, "step": 16262 }, { "epoch": 0.44654036243822076, "grad_norm": 0.3501153588294983, "learning_rate": 1.766112300473046e-05, "loss": 0.4786, "step": 16263 }, { "epoch": 0.44656781987918726, "grad_norm": 0.44563642144203186, "learning_rate": 1.7660845416784264e-05, "loss": 0.6021, "step": 16264 }, { "epoch": 0.44659527732015375, "grad_norm": 0.5630598664283752, "learning_rate": 1.7660567814548144e-05, "loss": 0.568, "step": 16265 }, { "epoch": 0.44662273476112024, "grad_norm": 0.5015395283699036, "learning_rate": 1.766029019802262e-05, "loss": 0.6243, "step": 16266 }, { "epoch": 0.4466501922020868, "grad_norm": 0.4601952135562897, "learning_rate": 1.766001256720821e-05, "loss": 0.6387, "step": 16267 }, { "epoch": 0.4466776496430533, "grad_norm": 0.378411203622818, "learning_rate": 1.7659734922105433e-05, "loss": 0.4411, "step": 16268 }, { "epoch": 0.4467051070840198, "grad_norm": 0.35130318999290466, "learning_rate": 1.7659457262714802e-05, "loss": 0.4549, "step": 16269 }, { "epoch": 0.4467325645249863, "grad_norm": 0.37039053440093994, "learning_rate": 1.765917958903684e-05, "loss": 0.5502, "step": 16270 }, { "epoch": 0.44676002196595277, "grad_norm": 0.30707189440727234, "learning_rate": 1.7658901901072068e-05, "loss": 0.3914, "step": 16271 }, { "epoch": 0.44678747940691926, "grad_norm": 0.331959068775177, "learning_rate": 1.7658624198820994e-05, "loss": 0.553, "step": 16272 }, { "epoch": 0.44681493684788576, "grad_norm": 0.3585236668586731, "learning_rate": 1.7658346482284144e-05, "loss": 0.4691, "step": 16273 }, { "epoch": 0.44684239428885225, "grad_norm": 0.3942684233188629, "learning_rate": 1.7658068751462032e-05, "loss": 0.4986, "step": 16274 }, { "epoch": 0.4468698517298188, "grad_norm": 0.46478286385536194, "learning_rate": 1.765779100635518e-05, "loss": 0.6143, "step": 16275 }, { "epoch": 0.4468973091707853, "grad_norm": 0.3873024880886078, "learning_rate": 1.76575132469641e-05, "loss": 0.5387, "step": 16276 }, { "epoch": 0.4469247666117518, "grad_norm": 0.3873789310455322, "learning_rate": 1.765723547328932e-05, "loss": 0.5833, "step": 16277 }, { "epoch": 0.4469522240527183, "grad_norm": 0.31765511631965637, "learning_rate": 1.7656957685331345e-05, "loss": 0.4492, "step": 16278 }, { "epoch": 0.4469796814936848, "grad_norm": 0.3743450343608856, "learning_rate": 1.7656679883090705e-05, "loss": 0.5575, "step": 16279 }, { "epoch": 0.44700713893465127, "grad_norm": 0.37586691975593567, "learning_rate": 1.7656402066567914e-05, "loss": 0.5527, "step": 16280 }, { "epoch": 0.44703459637561777, "grad_norm": 0.4324176609516144, "learning_rate": 1.7656124235763487e-05, "loss": 0.5686, "step": 16281 }, { "epoch": 0.4470620538165843, "grad_norm": 0.6613055467605591, "learning_rate": 1.7655846390677945e-05, "loss": 0.5365, "step": 16282 }, { "epoch": 0.4470895112575508, "grad_norm": 0.3653600215911865, "learning_rate": 1.7655568531311804e-05, "loss": 0.5201, "step": 16283 }, { "epoch": 0.4471169686985173, "grad_norm": 0.5308765172958374, "learning_rate": 1.7655290657665585e-05, "loss": 0.5979, "step": 16284 }, { "epoch": 0.4471444261394838, "grad_norm": 0.4483291506767273, "learning_rate": 1.7655012769739807e-05, "loss": 0.615, "step": 16285 }, { "epoch": 0.4471718835804503, "grad_norm": 0.40119820833206177, "learning_rate": 1.765473486753499e-05, "loss": 0.5195, "step": 16286 }, { "epoch": 0.4471993410214168, "grad_norm": 0.38622960448265076, "learning_rate": 1.7654456951051644e-05, "loss": 0.515, "step": 16287 }, { "epoch": 0.4472267984623833, "grad_norm": 0.39907050132751465, "learning_rate": 1.7654179020290295e-05, "loss": 0.5029, "step": 16288 }, { "epoch": 0.44725425590334983, "grad_norm": 0.46260976791381836, "learning_rate": 1.7653901075251457e-05, "loss": 0.4661, "step": 16289 }, { "epoch": 0.4472817133443163, "grad_norm": 0.34621086716651917, "learning_rate": 1.7653623115935655e-05, "loss": 0.4999, "step": 16290 }, { "epoch": 0.4473091707852828, "grad_norm": 0.3318263292312622, "learning_rate": 1.76533451423434e-05, "loss": 0.505, "step": 16291 }, { "epoch": 0.4473366282262493, "grad_norm": 0.47858351469039917, "learning_rate": 1.7653067154475216e-05, "loss": 0.6846, "step": 16292 }, { "epoch": 0.4473640856672158, "grad_norm": 0.4143407642841339, "learning_rate": 1.7652789152331616e-05, "loss": 0.5102, "step": 16293 }, { "epoch": 0.4473915431081823, "grad_norm": 0.3776983320713043, "learning_rate": 1.7652511135913125e-05, "loss": 0.6003, "step": 16294 }, { "epoch": 0.4474190005491488, "grad_norm": 0.38930270075798035, "learning_rate": 1.765223310522026e-05, "loss": 0.5305, "step": 16295 }, { "epoch": 0.44744645799011534, "grad_norm": 0.35578814148902893, "learning_rate": 1.7651955060253533e-05, "loss": 0.4775, "step": 16296 }, { "epoch": 0.44747391543108184, "grad_norm": 0.3677821457386017, "learning_rate": 1.7651677001013468e-05, "loss": 0.5214, "step": 16297 }, { "epoch": 0.44750137287204833, "grad_norm": 0.37774255871772766, "learning_rate": 1.7651398927500586e-05, "loss": 0.4977, "step": 16298 }, { "epoch": 0.4475288303130148, "grad_norm": 1.4527891874313354, "learning_rate": 1.7651120839715398e-05, "loss": 0.4528, "step": 16299 }, { "epoch": 0.4475562877539813, "grad_norm": 0.3749931752681732, "learning_rate": 1.7650842737658434e-05, "loss": 0.5544, "step": 16300 }, { "epoch": 0.4475837451949478, "grad_norm": 0.3598018288612366, "learning_rate": 1.7650564621330202e-05, "loss": 0.475, "step": 16301 }, { "epoch": 0.4476112026359143, "grad_norm": 0.3201737403869629, "learning_rate": 1.7650286490731226e-05, "loss": 0.441, "step": 16302 }, { "epoch": 0.44763866007688086, "grad_norm": 0.3528090715408325, "learning_rate": 1.7650008345862025e-05, "loss": 0.4561, "step": 16303 }, { "epoch": 0.44766611751784735, "grad_norm": 0.3508578836917877, "learning_rate": 1.7649730186723118e-05, "loss": 0.4942, "step": 16304 }, { "epoch": 0.44769357495881384, "grad_norm": 0.3582995533943176, "learning_rate": 1.7649452013315022e-05, "loss": 0.4798, "step": 16305 }, { "epoch": 0.44772103239978034, "grad_norm": 0.3848623037338257, "learning_rate": 1.7649173825638253e-05, "loss": 0.4964, "step": 16306 }, { "epoch": 0.44774848984074683, "grad_norm": 0.40599629282951355, "learning_rate": 1.764889562369334e-05, "loss": 0.4903, "step": 16307 }, { "epoch": 0.4477759472817133, "grad_norm": 0.3705848455429077, "learning_rate": 1.764861740748079e-05, "loss": 0.4842, "step": 16308 }, { "epoch": 0.4478034047226798, "grad_norm": 0.4040386974811554, "learning_rate": 1.764833917700113e-05, "loss": 0.6046, "step": 16309 }, { "epoch": 0.44783086216364637, "grad_norm": 0.3588997423648834, "learning_rate": 1.7648060932254875e-05, "loss": 0.4552, "step": 16310 }, { "epoch": 0.44785831960461286, "grad_norm": 0.45170530676841736, "learning_rate": 1.7647782673242547e-05, "loss": 0.5861, "step": 16311 }, { "epoch": 0.44788577704557936, "grad_norm": 0.41263341903686523, "learning_rate": 1.7647504399964663e-05, "loss": 0.4992, "step": 16312 }, { "epoch": 0.44791323448654585, "grad_norm": 0.581620991230011, "learning_rate": 1.764722611242174e-05, "loss": 0.5214, "step": 16313 }, { "epoch": 0.44794069192751235, "grad_norm": 0.351747989654541, "learning_rate": 1.7646947810614302e-05, "loss": 0.5327, "step": 16314 }, { "epoch": 0.44796814936847884, "grad_norm": 0.35103926062583923, "learning_rate": 1.7646669494542866e-05, "loss": 0.5819, "step": 16315 }, { "epoch": 0.44799560680944533, "grad_norm": 0.35802045464515686, "learning_rate": 1.7646391164207948e-05, "loss": 0.4569, "step": 16316 }, { "epoch": 0.4480230642504119, "grad_norm": 0.3239123523235321, "learning_rate": 1.7646112819610072e-05, "loss": 0.4572, "step": 16317 }, { "epoch": 0.4480505216913784, "grad_norm": 0.4031168222427368, "learning_rate": 1.7645834460749758e-05, "loss": 0.5162, "step": 16318 }, { "epoch": 0.4480779791323449, "grad_norm": 0.3830220699310303, "learning_rate": 1.764555608762752e-05, "loss": 0.6239, "step": 16319 }, { "epoch": 0.44810543657331137, "grad_norm": 0.6425554156303406, "learning_rate": 1.7645277700243878e-05, "loss": 0.5061, "step": 16320 }, { "epoch": 0.44813289401427786, "grad_norm": 0.3940119743347168, "learning_rate": 1.7644999298599353e-05, "loss": 0.5286, "step": 16321 }, { "epoch": 0.44816035145524435, "grad_norm": 0.4258232116699219, "learning_rate": 1.7644720882694466e-05, "loss": 0.569, "step": 16322 }, { "epoch": 0.44818780889621085, "grad_norm": 0.36462703347206116, "learning_rate": 1.7644442452529736e-05, "loss": 0.4462, "step": 16323 }, { "epoch": 0.4482152663371774, "grad_norm": 0.37443259358406067, "learning_rate": 1.7644164008105676e-05, "loss": 0.576, "step": 16324 }, { "epoch": 0.4482427237781439, "grad_norm": 0.4350542426109314, "learning_rate": 1.764388554942282e-05, "loss": 0.4763, "step": 16325 }, { "epoch": 0.4482701812191104, "grad_norm": 0.3826397955417633, "learning_rate": 1.764360707648167e-05, "loss": 0.5288, "step": 16326 }, { "epoch": 0.4482976386600769, "grad_norm": 0.38793954253196716, "learning_rate": 1.7643328589282754e-05, "loss": 0.4606, "step": 16327 }, { "epoch": 0.4483250961010434, "grad_norm": 0.38546791672706604, "learning_rate": 1.764305008782659e-05, "loss": 0.5234, "step": 16328 }, { "epoch": 0.44835255354200987, "grad_norm": 0.35235854983329773, "learning_rate": 1.76427715721137e-05, "loss": 0.5573, "step": 16329 }, { "epoch": 0.44838001098297636, "grad_norm": 0.3640965223312378, "learning_rate": 1.7642493042144605e-05, "loss": 0.5417, "step": 16330 }, { "epoch": 0.4484074684239429, "grad_norm": 0.36048293113708496, "learning_rate": 1.7642214497919817e-05, "loss": 0.5115, "step": 16331 }, { "epoch": 0.4484349258649094, "grad_norm": 0.4432981610298157, "learning_rate": 1.7641935939439858e-05, "loss": 0.5573, "step": 16332 }, { "epoch": 0.4484623833058759, "grad_norm": 0.40263521671295166, "learning_rate": 1.7641657366705252e-05, "loss": 0.46, "step": 16333 }, { "epoch": 0.4484898407468424, "grad_norm": 0.3744945228099823, "learning_rate": 1.7641378779716518e-05, "loss": 0.5329, "step": 16334 }, { "epoch": 0.4485172981878089, "grad_norm": 0.4188179075717926, "learning_rate": 1.764110017847417e-05, "loss": 0.484, "step": 16335 }, { "epoch": 0.4485447556287754, "grad_norm": 0.38507163524627686, "learning_rate": 1.7640821562978737e-05, "loss": 0.4475, "step": 16336 }, { "epoch": 0.4485722130697419, "grad_norm": 0.42068207263946533, "learning_rate": 1.764054293323073e-05, "loss": 0.4505, "step": 16337 }, { "epoch": 0.4485996705107084, "grad_norm": 0.4947984516620636, "learning_rate": 1.7640264289230668e-05, "loss": 0.5289, "step": 16338 }, { "epoch": 0.4486271279516749, "grad_norm": 0.3511262834072113, "learning_rate": 1.763998563097908e-05, "loss": 0.4485, "step": 16339 }, { "epoch": 0.4486545853926414, "grad_norm": 0.3525216281414032, "learning_rate": 1.7639706958476475e-05, "loss": 0.525, "step": 16340 }, { "epoch": 0.4486820428336079, "grad_norm": 0.42244017124176025, "learning_rate": 1.7639428271723385e-05, "loss": 0.5467, "step": 16341 }, { "epoch": 0.4487095002745744, "grad_norm": 0.4891397953033447, "learning_rate": 1.7639149570720316e-05, "loss": 0.4652, "step": 16342 }, { "epoch": 0.4487369577155409, "grad_norm": 0.3815208971500397, "learning_rate": 1.76388708554678e-05, "loss": 0.5407, "step": 16343 }, { "epoch": 0.4487644151565074, "grad_norm": 0.43269941210746765, "learning_rate": 1.763859212596635e-05, "loss": 0.5679, "step": 16344 }, { "epoch": 0.44879187259747394, "grad_norm": 0.3986944556236267, "learning_rate": 1.763831338221649e-05, "loss": 0.5428, "step": 16345 }, { "epoch": 0.44881933003844043, "grad_norm": 0.4186573624610901, "learning_rate": 1.7638034624218738e-05, "loss": 0.4772, "step": 16346 }, { "epoch": 0.44884678747940693, "grad_norm": 0.3791615664958954, "learning_rate": 1.763775585197361e-05, "loss": 0.6196, "step": 16347 }, { "epoch": 0.4488742449203734, "grad_norm": 0.38311004638671875, "learning_rate": 1.763747706548163e-05, "loss": 0.4991, "step": 16348 }, { "epoch": 0.4489017023613399, "grad_norm": 0.3693498969078064, "learning_rate": 1.7637198264743318e-05, "loss": 0.5746, "step": 16349 }, { "epoch": 0.4489291598023064, "grad_norm": 0.45124709606170654, "learning_rate": 1.7636919449759197e-05, "loss": 0.5092, "step": 16350 }, { "epoch": 0.4489566172432729, "grad_norm": 0.35207122564315796, "learning_rate": 1.763664062052978e-05, "loss": 0.4291, "step": 16351 }, { "epoch": 0.44898407468423945, "grad_norm": 0.3303559720516205, "learning_rate": 1.7636361777055594e-05, "loss": 0.4881, "step": 16352 }, { "epoch": 0.44901153212520595, "grad_norm": 0.35786306858062744, "learning_rate": 1.7636082919337153e-05, "loss": 0.5943, "step": 16353 }, { "epoch": 0.44903898956617244, "grad_norm": 0.3418938219547272, "learning_rate": 1.763580404737498e-05, "loss": 0.4484, "step": 16354 }, { "epoch": 0.44906644700713894, "grad_norm": 0.3868178129196167, "learning_rate": 1.7635525161169596e-05, "loss": 0.5722, "step": 16355 }, { "epoch": 0.44909390444810543, "grad_norm": 0.4371788799762726, "learning_rate": 1.7635246260721525e-05, "loss": 0.462, "step": 16356 }, { "epoch": 0.4491213618890719, "grad_norm": 0.36801877617836, "learning_rate": 1.7634967346031278e-05, "loss": 0.5276, "step": 16357 }, { "epoch": 0.4491488193300384, "grad_norm": 0.3286571502685547, "learning_rate": 1.763468841709938e-05, "loss": 0.5339, "step": 16358 }, { "epoch": 0.44917627677100497, "grad_norm": 0.3956201672554016, "learning_rate": 1.7634409473926352e-05, "loss": 0.5911, "step": 16359 }, { "epoch": 0.44920373421197146, "grad_norm": 0.3090289533138275, "learning_rate": 1.7634130516512715e-05, "loss": 0.484, "step": 16360 }, { "epoch": 0.44923119165293796, "grad_norm": 0.4280528724193573, "learning_rate": 1.7633851544858988e-05, "loss": 0.6085, "step": 16361 }, { "epoch": 0.44925864909390445, "grad_norm": 1.4353727102279663, "learning_rate": 1.7633572558965692e-05, "loss": 0.4605, "step": 16362 }, { "epoch": 0.44928610653487094, "grad_norm": 0.3505908250808716, "learning_rate": 1.7633293558833345e-05, "loss": 0.4974, "step": 16363 }, { "epoch": 0.44931356397583744, "grad_norm": 0.36935925483703613, "learning_rate": 1.763301454446247e-05, "loss": 0.5498, "step": 16364 }, { "epoch": 0.44934102141680393, "grad_norm": 0.41407841444015503, "learning_rate": 1.763273551585359e-05, "loss": 0.4967, "step": 16365 }, { "epoch": 0.4493684788577705, "grad_norm": 0.3528873026371002, "learning_rate": 1.7632456473007217e-05, "loss": 0.4289, "step": 16366 }, { "epoch": 0.449395936298737, "grad_norm": 0.41084054112434387, "learning_rate": 1.7632177415923878e-05, "loss": 0.5812, "step": 16367 }, { "epoch": 0.44942339373970347, "grad_norm": 0.3799944221973419, "learning_rate": 1.763189834460409e-05, "loss": 0.5829, "step": 16368 }, { "epoch": 0.44945085118066996, "grad_norm": 0.3394142687320709, "learning_rate": 1.763161925904838e-05, "loss": 0.5174, "step": 16369 }, { "epoch": 0.44947830862163646, "grad_norm": 0.3679940104484558, "learning_rate": 1.7631340159257262e-05, "loss": 0.559, "step": 16370 }, { "epoch": 0.44950576606260295, "grad_norm": 0.360752671957016, "learning_rate": 1.763106104523126e-05, "loss": 0.5027, "step": 16371 }, { "epoch": 0.44953322350356945, "grad_norm": 0.4499969482421875, "learning_rate": 1.7630781916970894e-05, "loss": 0.4975, "step": 16372 }, { "epoch": 0.449560680944536, "grad_norm": 0.41656696796417236, "learning_rate": 1.7630502774476683e-05, "loss": 0.5876, "step": 16373 }, { "epoch": 0.4495881383855025, "grad_norm": 0.3844637870788574, "learning_rate": 1.763022361774915e-05, "loss": 0.5468, "step": 16374 }, { "epoch": 0.449615595826469, "grad_norm": 0.37302976846694946, "learning_rate": 1.7629944446788815e-05, "loss": 0.4634, "step": 16375 }, { "epoch": 0.4496430532674355, "grad_norm": 0.33911624550819397, "learning_rate": 1.7629665261596198e-05, "loss": 0.4856, "step": 16376 }, { "epoch": 0.44967051070840197, "grad_norm": 0.3796495795249939, "learning_rate": 1.762938606217182e-05, "loss": 0.516, "step": 16377 }, { "epoch": 0.44969796814936847, "grad_norm": 0.40785104036331177, "learning_rate": 1.7629106848516204e-05, "loss": 0.5102, "step": 16378 }, { "epoch": 0.44972542559033496, "grad_norm": 0.3765966296195984, "learning_rate": 1.7628827620629863e-05, "loss": 0.5434, "step": 16379 }, { "epoch": 0.4497528830313015, "grad_norm": 0.4066828489303589, "learning_rate": 1.762854837851333e-05, "loss": 0.5474, "step": 16380 }, { "epoch": 0.449780340472268, "grad_norm": 0.44063472747802734, "learning_rate": 1.7628269122167115e-05, "loss": 0.4619, "step": 16381 }, { "epoch": 0.4498077979132345, "grad_norm": 0.4751841723918915, "learning_rate": 1.7627989851591744e-05, "loss": 0.4975, "step": 16382 }, { "epoch": 0.449835255354201, "grad_norm": 0.38785117864608765, "learning_rate": 1.762771056678774e-05, "loss": 0.5455, "step": 16383 }, { "epoch": 0.4498627127951675, "grad_norm": 0.404065877199173, "learning_rate": 1.762743126775562e-05, "loss": 0.5912, "step": 16384 }, { "epoch": 0.449890170236134, "grad_norm": 0.372230589389801, "learning_rate": 1.7627151954495904e-05, "loss": 0.4384, "step": 16385 }, { "epoch": 0.4499176276771005, "grad_norm": 0.42826223373413086, "learning_rate": 1.7626872627009118e-05, "loss": 0.5441, "step": 16386 }, { "epoch": 0.449945085118067, "grad_norm": 0.35649630427360535, "learning_rate": 1.762659328529578e-05, "loss": 0.5179, "step": 16387 }, { "epoch": 0.4499725425590335, "grad_norm": 0.31712231040000916, "learning_rate": 1.7626313929356415e-05, "loss": 0.4624, "step": 16388 }, { "epoch": 0.45, "grad_norm": 0.3887706995010376, "learning_rate": 1.7626034559191534e-05, "loss": 0.5044, "step": 16389 }, { "epoch": 0.4500274574409665, "grad_norm": 0.34697288274765015, "learning_rate": 1.7625755174801668e-05, "loss": 0.5219, "step": 16390 }, { "epoch": 0.450054914881933, "grad_norm": 0.37770694494247437, "learning_rate": 1.7625475776187333e-05, "loss": 0.5232, "step": 16391 }, { "epoch": 0.4500823723228995, "grad_norm": 0.4143514931201935, "learning_rate": 1.7625196363349055e-05, "loss": 0.5276, "step": 16392 }, { "epoch": 0.450109829763866, "grad_norm": 0.3799136281013489, "learning_rate": 1.762491693628735e-05, "loss": 0.4993, "step": 16393 }, { "epoch": 0.45013728720483254, "grad_norm": 0.41576096415519714, "learning_rate": 1.762463749500274e-05, "loss": 0.5654, "step": 16394 }, { "epoch": 0.45016474464579903, "grad_norm": 0.4133335053920746, "learning_rate": 1.762435803949575e-05, "loss": 0.5191, "step": 16395 }, { "epoch": 0.4501922020867655, "grad_norm": 0.34034594893455505, "learning_rate": 1.76240785697669e-05, "loss": 0.5233, "step": 16396 }, { "epoch": 0.450219659527732, "grad_norm": 0.414831280708313, "learning_rate": 1.7623799085816705e-05, "loss": 0.6038, "step": 16397 }, { "epoch": 0.4502471169686985, "grad_norm": 0.36482420563697815, "learning_rate": 1.7623519587645696e-05, "loss": 0.5843, "step": 16398 }, { "epoch": 0.450274574409665, "grad_norm": 0.3563075661659241, "learning_rate": 1.7623240075254385e-05, "loss": 0.5435, "step": 16399 }, { "epoch": 0.4503020318506315, "grad_norm": 0.359744668006897, "learning_rate": 1.7622960548643302e-05, "loss": 0.5668, "step": 16400 }, { "epoch": 0.45032948929159805, "grad_norm": 0.4468838572502136, "learning_rate": 1.7622681007812965e-05, "loss": 0.4241, "step": 16401 }, { "epoch": 0.45035694673256454, "grad_norm": 0.461291640996933, "learning_rate": 1.7622401452763896e-05, "loss": 0.5636, "step": 16402 }, { "epoch": 0.45038440417353104, "grad_norm": 0.38259997963905334, "learning_rate": 1.7622121883496614e-05, "loss": 0.5486, "step": 16403 }, { "epoch": 0.45041186161449753, "grad_norm": 0.3538258671760559, "learning_rate": 1.762184230001164e-05, "loss": 0.5426, "step": 16404 }, { "epoch": 0.450439319055464, "grad_norm": 0.3682671785354614, "learning_rate": 1.7621562702309498e-05, "loss": 0.5223, "step": 16405 }, { "epoch": 0.4504667764964305, "grad_norm": 0.38607680797576904, "learning_rate": 1.762128309039071e-05, "loss": 0.5431, "step": 16406 }, { "epoch": 0.450494233937397, "grad_norm": 0.34689515829086304, "learning_rate": 1.7621003464255798e-05, "loss": 0.4496, "step": 16407 }, { "epoch": 0.4505216913783635, "grad_norm": 0.38456472754478455, "learning_rate": 1.762072382390528e-05, "loss": 0.5704, "step": 16408 }, { "epoch": 0.45054914881933006, "grad_norm": 0.3719666004180908, "learning_rate": 1.7620444169339683e-05, "loss": 0.5503, "step": 16409 }, { "epoch": 0.45057660626029655, "grad_norm": 0.3527688682079315, "learning_rate": 1.7620164500559524e-05, "loss": 0.5336, "step": 16410 }, { "epoch": 0.45060406370126305, "grad_norm": 0.3233591318130493, "learning_rate": 1.761988481756532e-05, "loss": 0.4437, "step": 16411 }, { "epoch": 0.45063152114222954, "grad_norm": 0.37388744950294495, "learning_rate": 1.761960512035761e-05, "loss": 0.5335, "step": 16412 }, { "epoch": 0.45065897858319603, "grad_norm": 0.7181567549705505, "learning_rate": 1.7619325408936896e-05, "loss": 0.5282, "step": 16413 }, { "epoch": 0.45068643602416253, "grad_norm": 0.46058544516563416, "learning_rate": 1.761904568330371e-05, "loss": 0.5384, "step": 16414 }, { "epoch": 0.450713893465129, "grad_norm": 0.4352286159992218, "learning_rate": 1.7618765943458577e-05, "loss": 0.5966, "step": 16415 }, { "epoch": 0.4507413509060956, "grad_norm": 0.42245474457740784, "learning_rate": 1.7618486189402007e-05, "loss": 0.5201, "step": 16416 }, { "epoch": 0.45076880834706207, "grad_norm": 0.36213329434394836, "learning_rate": 1.7618206421134532e-05, "loss": 0.5418, "step": 16417 }, { "epoch": 0.45079626578802856, "grad_norm": 0.43807539343833923, "learning_rate": 1.761792663865667e-05, "loss": 0.4667, "step": 16418 }, { "epoch": 0.45082372322899505, "grad_norm": 0.3651374578475952, "learning_rate": 1.7617646841968942e-05, "loss": 0.513, "step": 16419 }, { "epoch": 0.45085118066996155, "grad_norm": 0.3257370591163635, "learning_rate": 1.7617367031071874e-05, "loss": 0.5543, "step": 16420 }, { "epoch": 0.45087863811092804, "grad_norm": 0.39248254895210266, "learning_rate": 1.7617087205965987e-05, "loss": 0.5061, "step": 16421 }, { "epoch": 0.45090609555189454, "grad_norm": 0.34287458658218384, "learning_rate": 1.7616807366651797e-05, "loss": 0.5245, "step": 16422 }, { "epoch": 0.4509335529928611, "grad_norm": 0.41643720865249634, "learning_rate": 1.7616527513129832e-05, "loss": 0.5041, "step": 16423 }, { "epoch": 0.4509610104338276, "grad_norm": 0.4319380819797516, "learning_rate": 1.761624764540061e-05, "loss": 0.5755, "step": 16424 }, { "epoch": 0.4509884678747941, "grad_norm": 0.39449480175971985, "learning_rate": 1.7615967763464658e-05, "loss": 0.5994, "step": 16425 }, { "epoch": 0.45101592531576057, "grad_norm": 0.47950389981269836, "learning_rate": 1.7615687867322496e-05, "loss": 0.4804, "step": 16426 }, { "epoch": 0.45104338275672706, "grad_norm": 0.37265294790267944, "learning_rate": 1.7615407956974646e-05, "loss": 0.5347, "step": 16427 }, { "epoch": 0.45107084019769356, "grad_norm": 0.3874069154262543, "learning_rate": 1.761512803242163e-05, "loss": 0.5433, "step": 16428 }, { "epoch": 0.45109829763866005, "grad_norm": 0.3805796205997467, "learning_rate": 1.7614848093663966e-05, "loss": 0.5563, "step": 16429 }, { "epoch": 0.4511257550796266, "grad_norm": 0.43778395652770996, "learning_rate": 1.761456814070218e-05, "loss": 0.5776, "step": 16430 }, { "epoch": 0.4511532125205931, "grad_norm": 0.4236373007297516, "learning_rate": 1.7614288173536796e-05, "loss": 0.5399, "step": 16431 }, { "epoch": 0.4511806699615596, "grad_norm": 0.3807057738304138, "learning_rate": 1.7614008192168336e-05, "loss": 0.5802, "step": 16432 }, { "epoch": 0.4512081274025261, "grad_norm": 0.38247713446617126, "learning_rate": 1.761372819659732e-05, "loss": 0.4717, "step": 16433 }, { "epoch": 0.4512355848434926, "grad_norm": 0.40510234236717224, "learning_rate": 1.7613448186824268e-05, "loss": 0.5246, "step": 16434 }, { "epoch": 0.45126304228445907, "grad_norm": 0.3878079950809479, "learning_rate": 1.7613168162849706e-05, "loss": 0.484, "step": 16435 }, { "epoch": 0.45129049972542556, "grad_norm": 0.4531189203262329, "learning_rate": 1.7612888124674155e-05, "loss": 0.4557, "step": 16436 }, { "epoch": 0.4513179571663921, "grad_norm": 0.4005299508571625, "learning_rate": 1.761260807229814e-05, "loss": 0.5528, "step": 16437 }, { "epoch": 0.4513454146073586, "grad_norm": 0.3891911804676056, "learning_rate": 1.7612328005722183e-05, "loss": 0.5397, "step": 16438 }, { "epoch": 0.4513728720483251, "grad_norm": 0.3791523873806, "learning_rate": 1.76120479249468e-05, "loss": 0.5335, "step": 16439 }, { "epoch": 0.4514003294892916, "grad_norm": 0.3671433925628662, "learning_rate": 1.761176782997252e-05, "loss": 0.5041, "step": 16440 }, { "epoch": 0.4514277869302581, "grad_norm": 0.36536648869514465, "learning_rate": 1.7611487720799868e-05, "loss": 0.4742, "step": 16441 }, { "epoch": 0.4514552443712246, "grad_norm": 0.40077856183052063, "learning_rate": 1.7611207597429357e-05, "loss": 0.5251, "step": 16442 }, { "epoch": 0.4514827018121911, "grad_norm": 0.4081714451313019, "learning_rate": 1.7610927459861514e-05, "loss": 0.5526, "step": 16443 }, { "epoch": 0.45151015925315763, "grad_norm": 0.3407379388809204, "learning_rate": 1.7610647308096863e-05, "loss": 0.4684, "step": 16444 }, { "epoch": 0.4515376166941241, "grad_norm": 0.336406409740448, "learning_rate": 1.761036714213593e-05, "loss": 0.4811, "step": 16445 }, { "epoch": 0.4515650741350906, "grad_norm": 0.34787511825561523, "learning_rate": 1.761008696197923e-05, "loss": 0.4534, "step": 16446 }, { "epoch": 0.4515925315760571, "grad_norm": 0.35643380880355835, "learning_rate": 1.7609806767627286e-05, "loss": 0.4762, "step": 16447 }, { "epoch": 0.4516199890170236, "grad_norm": 0.39195793867111206, "learning_rate": 1.7609526559080627e-05, "loss": 0.4229, "step": 16448 }, { "epoch": 0.4516474464579901, "grad_norm": 0.3406163454055786, "learning_rate": 1.7609246336339775e-05, "loss": 0.4972, "step": 16449 }, { "epoch": 0.4516749038989566, "grad_norm": 0.43508389592170715, "learning_rate": 1.7608966099405246e-05, "loss": 0.5158, "step": 16450 }, { "epoch": 0.45170236133992314, "grad_norm": 0.3196220397949219, "learning_rate": 1.7608685848277566e-05, "loss": 0.4222, "step": 16451 }, { "epoch": 0.45172981878088964, "grad_norm": 0.3754172921180725, "learning_rate": 1.7608405582957262e-05, "loss": 0.5944, "step": 16452 }, { "epoch": 0.45175727622185613, "grad_norm": 0.4495798647403717, "learning_rate": 1.760812530344485e-05, "loss": 0.5269, "step": 16453 }, { "epoch": 0.4517847336628226, "grad_norm": 0.3724677264690399, "learning_rate": 1.760784500974086e-05, "loss": 0.5034, "step": 16454 }, { "epoch": 0.4518121911037891, "grad_norm": 0.3524262607097626, "learning_rate": 1.7607564701845807e-05, "loss": 0.4793, "step": 16455 }, { "epoch": 0.4518396485447556, "grad_norm": 0.3692380487918854, "learning_rate": 1.7607284379760218e-05, "loss": 0.6116, "step": 16456 }, { "epoch": 0.4518671059857221, "grad_norm": 0.40111252665519714, "learning_rate": 1.7607004043484615e-05, "loss": 0.4631, "step": 16457 }, { "epoch": 0.45189456342668866, "grad_norm": 0.38076698780059814, "learning_rate": 1.7606723693019526e-05, "loss": 0.5746, "step": 16458 }, { "epoch": 0.45192202086765515, "grad_norm": 0.39562347531318665, "learning_rate": 1.7606443328365467e-05, "loss": 0.5699, "step": 16459 }, { "epoch": 0.45194947830862164, "grad_norm": 0.32691484689712524, "learning_rate": 1.7606162949522962e-05, "loss": 0.5291, "step": 16460 }, { "epoch": 0.45197693574958814, "grad_norm": 0.3756413757801056, "learning_rate": 1.7605882556492536e-05, "loss": 0.5387, "step": 16461 }, { "epoch": 0.45200439319055463, "grad_norm": 0.42511433362960815, "learning_rate": 1.7605602149274715e-05, "loss": 0.5201, "step": 16462 }, { "epoch": 0.4520318506315211, "grad_norm": 0.3948748707771301, "learning_rate": 1.7605321727870014e-05, "loss": 0.4637, "step": 16463 }, { "epoch": 0.4520593080724876, "grad_norm": 0.3371366560459137, "learning_rate": 1.7605041292278965e-05, "loss": 0.5317, "step": 16464 }, { "epoch": 0.45208676551345417, "grad_norm": 0.4076662063598633, "learning_rate": 1.7604760842502082e-05, "loss": 0.5597, "step": 16465 }, { "epoch": 0.45211422295442066, "grad_norm": 0.33447667956352234, "learning_rate": 1.7604480378539897e-05, "loss": 0.396, "step": 16466 }, { "epoch": 0.45214168039538716, "grad_norm": 0.4558180868625641, "learning_rate": 1.7604199900392927e-05, "loss": 0.5404, "step": 16467 }, { "epoch": 0.45216913783635365, "grad_norm": 0.45525142550468445, "learning_rate": 1.7603919408061698e-05, "loss": 0.4513, "step": 16468 }, { "epoch": 0.45219659527732015, "grad_norm": 0.36419957876205444, "learning_rate": 1.760363890154673e-05, "loss": 0.508, "step": 16469 }, { "epoch": 0.45222405271828664, "grad_norm": 0.3514192998409271, "learning_rate": 1.760335838084855e-05, "loss": 0.5151, "step": 16470 }, { "epoch": 0.45225151015925313, "grad_norm": 0.3583473265171051, "learning_rate": 1.7603077845967683e-05, "loss": 0.4706, "step": 16471 }, { "epoch": 0.4522789676002197, "grad_norm": 0.36676880717277527, "learning_rate": 1.7602797296904647e-05, "loss": 0.4781, "step": 16472 }, { "epoch": 0.4523064250411862, "grad_norm": 0.41546207666397095, "learning_rate": 1.7602516733659966e-05, "loss": 0.5038, "step": 16473 }, { "epoch": 0.45233388248215267, "grad_norm": 0.4245785176753998, "learning_rate": 1.7602236156234164e-05, "loss": 0.5652, "step": 16474 }, { "epoch": 0.45236133992311917, "grad_norm": 0.3646193742752075, "learning_rate": 1.7601955564627767e-05, "loss": 0.6295, "step": 16475 }, { "epoch": 0.45238879736408566, "grad_norm": 0.3531426787376404, "learning_rate": 1.76016749588413e-05, "loss": 0.5023, "step": 16476 }, { "epoch": 0.45241625480505215, "grad_norm": 0.41452303528785706, "learning_rate": 1.760139433887528e-05, "loss": 0.5779, "step": 16477 }, { "epoch": 0.45244371224601865, "grad_norm": 0.39359986782073975, "learning_rate": 1.7601113704730232e-05, "loss": 0.5835, "step": 16478 }, { "epoch": 0.4524711696869852, "grad_norm": 0.4056164622306824, "learning_rate": 1.760083305640668e-05, "loss": 0.5205, "step": 16479 }, { "epoch": 0.4524986271279517, "grad_norm": 0.39417484402656555, "learning_rate": 1.760055239390515e-05, "loss": 0.5561, "step": 16480 }, { "epoch": 0.4525260845689182, "grad_norm": 0.3654676377773285, "learning_rate": 1.7600271717226167e-05, "loss": 0.5203, "step": 16481 }, { "epoch": 0.4525535420098847, "grad_norm": 0.3931303322315216, "learning_rate": 1.759999102637025e-05, "loss": 0.4846, "step": 16482 }, { "epoch": 0.4525809994508512, "grad_norm": 0.37459105253219604, "learning_rate": 1.7599710321337925e-05, "loss": 0.5199, "step": 16483 }, { "epoch": 0.45260845689181767, "grad_norm": 0.7336270809173584, "learning_rate": 1.759942960212971e-05, "loss": 0.596, "step": 16484 }, { "epoch": 0.45263591433278416, "grad_norm": 0.33560794591903687, "learning_rate": 1.7599148868746136e-05, "loss": 0.5198, "step": 16485 }, { "epoch": 0.4526633717737507, "grad_norm": 0.40384554862976074, "learning_rate": 1.7598868121187725e-05, "loss": 0.5007, "step": 16486 }, { "epoch": 0.4526908292147172, "grad_norm": 0.40007176995277405, "learning_rate": 1.7598587359454998e-05, "loss": 0.571, "step": 16487 }, { "epoch": 0.4527182866556837, "grad_norm": 0.36748895049095154, "learning_rate": 1.7598306583548482e-05, "loss": 0.5234, "step": 16488 }, { "epoch": 0.4527457440966502, "grad_norm": 0.35370126366615295, "learning_rate": 1.75980257934687e-05, "loss": 0.4468, "step": 16489 }, { "epoch": 0.4527732015376167, "grad_norm": 0.32824206352233887, "learning_rate": 1.7597744989216172e-05, "loss": 0.4697, "step": 16490 }, { "epoch": 0.4528006589785832, "grad_norm": 0.4980722665786743, "learning_rate": 1.7597464170791428e-05, "loss": 0.5322, "step": 16491 }, { "epoch": 0.4528281164195497, "grad_norm": 0.3525618612766266, "learning_rate": 1.7597183338194985e-05, "loss": 0.471, "step": 16492 }, { "epoch": 0.4528555738605162, "grad_norm": 0.34973886609077454, "learning_rate": 1.7596902491427374e-05, "loss": 0.4754, "step": 16493 }, { "epoch": 0.4528830313014827, "grad_norm": 0.38278862833976746, "learning_rate": 1.759662163048911e-05, "loss": 0.5587, "step": 16494 }, { "epoch": 0.4529104887424492, "grad_norm": 0.34794360399246216, "learning_rate": 1.7596340755380725e-05, "loss": 0.5122, "step": 16495 }, { "epoch": 0.4529379461834157, "grad_norm": 0.3971276879310608, "learning_rate": 1.7596059866102742e-05, "loss": 0.4915, "step": 16496 }, { "epoch": 0.4529654036243822, "grad_norm": 0.34436362981796265, "learning_rate": 1.759577896265568e-05, "loss": 0.5522, "step": 16497 }, { "epoch": 0.4529928610653487, "grad_norm": 0.39027896523475647, "learning_rate": 1.7595498045040068e-05, "loss": 0.5039, "step": 16498 }, { "epoch": 0.4530203185063152, "grad_norm": 0.446169912815094, "learning_rate": 1.7595217113256427e-05, "loss": 0.5018, "step": 16499 }, { "epoch": 0.45304777594728174, "grad_norm": 0.41163384914398193, "learning_rate": 1.7594936167305282e-05, "loss": 0.5874, "step": 16500 }, { "epoch": 0.45307523338824823, "grad_norm": 0.4378408193588257, "learning_rate": 1.7594655207187155e-05, "loss": 0.6295, "step": 16501 }, { "epoch": 0.4531026908292147, "grad_norm": 0.3744196593761444, "learning_rate": 1.7594374232902576e-05, "loss": 0.5828, "step": 16502 }, { "epoch": 0.4531301482701812, "grad_norm": 0.38281193375587463, "learning_rate": 1.759409324445206e-05, "loss": 0.5031, "step": 16503 }, { "epoch": 0.4531576057111477, "grad_norm": 0.47974494099617004, "learning_rate": 1.7593812241836143e-05, "loss": 0.6065, "step": 16504 }, { "epoch": 0.4531850631521142, "grad_norm": 0.3786256015300751, "learning_rate": 1.7593531225055338e-05, "loss": 0.5128, "step": 16505 }, { "epoch": 0.4532125205930807, "grad_norm": 0.352970689535141, "learning_rate": 1.7593250194110173e-05, "loss": 0.5215, "step": 16506 }, { "epoch": 0.45323997803404725, "grad_norm": 0.3822009861469269, "learning_rate": 1.7592969149001177e-05, "loss": 0.5164, "step": 16507 }, { "epoch": 0.45326743547501375, "grad_norm": 0.35719236731529236, "learning_rate": 1.759268808972887e-05, "loss": 0.5537, "step": 16508 }, { "epoch": 0.45329489291598024, "grad_norm": 0.348567396402359, "learning_rate": 1.7592407016293773e-05, "loss": 0.5002, "step": 16509 }, { "epoch": 0.45332235035694673, "grad_norm": 0.40390071272850037, "learning_rate": 1.7592125928696416e-05, "loss": 0.4902, "step": 16510 }, { "epoch": 0.45334980779791323, "grad_norm": 0.38037532567977905, "learning_rate": 1.759184482693732e-05, "loss": 0.4783, "step": 16511 }, { "epoch": 0.4533772652388797, "grad_norm": 0.3663862645626068, "learning_rate": 1.759156371101701e-05, "loss": 0.4507, "step": 16512 }, { "epoch": 0.4534047226798462, "grad_norm": 0.4120646119117737, "learning_rate": 1.759128258093601e-05, "loss": 0.4852, "step": 16513 }, { "epoch": 0.45343218012081277, "grad_norm": 0.3911692798137665, "learning_rate": 1.7591001436694847e-05, "loss": 0.57, "step": 16514 }, { "epoch": 0.45345963756177926, "grad_norm": 0.34604448080062866, "learning_rate": 1.7590720278294046e-05, "loss": 0.5579, "step": 16515 }, { "epoch": 0.45348709500274575, "grad_norm": 0.4464185833930969, "learning_rate": 1.7590439105734124e-05, "loss": 0.4766, "step": 16516 }, { "epoch": 0.45351455244371225, "grad_norm": 0.37113016843795776, "learning_rate": 1.7590157919015612e-05, "loss": 0.6062, "step": 16517 }, { "epoch": 0.45354200988467874, "grad_norm": 0.374452143907547, "learning_rate": 1.7589876718139032e-05, "loss": 0.5737, "step": 16518 }, { "epoch": 0.45356946732564524, "grad_norm": 0.40791723132133484, "learning_rate": 1.758959550310491e-05, "loss": 0.4911, "step": 16519 }, { "epoch": 0.45359692476661173, "grad_norm": 0.8731909394264221, "learning_rate": 1.7589314273913775e-05, "loss": 0.4884, "step": 16520 }, { "epoch": 0.4536243822075783, "grad_norm": 0.47034579515457153, "learning_rate": 1.758903303056614e-05, "loss": 0.6221, "step": 16521 }, { "epoch": 0.4536518396485448, "grad_norm": 0.33338698744773865, "learning_rate": 1.758875177306254e-05, "loss": 0.5129, "step": 16522 }, { "epoch": 0.45367929708951127, "grad_norm": 0.3691624402999878, "learning_rate": 1.7588470501403495e-05, "loss": 0.501, "step": 16523 }, { "epoch": 0.45370675453047776, "grad_norm": 0.3896225392818451, "learning_rate": 1.758818921558953e-05, "loss": 0.5427, "step": 16524 }, { "epoch": 0.45373421197144426, "grad_norm": 0.3702406585216522, "learning_rate": 1.7587907915621173e-05, "loss": 0.5763, "step": 16525 }, { "epoch": 0.45376166941241075, "grad_norm": 0.36843329668045044, "learning_rate": 1.7587626601498938e-05, "loss": 0.5177, "step": 16526 }, { "epoch": 0.45378912685337724, "grad_norm": 0.3660006821155548, "learning_rate": 1.7587345273223366e-05, "loss": 0.5179, "step": 16527 }, { "epoch": 0.4538165842943438, "grad_norm": 0.3529936373233795, "learning_rate": 1.758706393079497e-05, "loss": 0.506, "step": 16528 }, { "epoch": 0.4538440417353103, "grad_norm": 0.43203458189964294, "learning_rate": 1.758678257421428e-05, "loss": 0.4945, "step": 16529 }, { "epoch": 0.4538714991762768, "grad_norm": 0.36017531156539917, "learning_rate": 1.7586501203481815e-05, "loss": 0.5482, "step": 16530 }, { "epoch": 0.4538989566172433, "grad_norm": 0.36587652564048767, "learning_rate": 1.758621981859811e-05, "loss": 0.425, "step": 16531 }, { "epoch": 0.45392641405820977, "grad_norm": 0.3845268785953522, "learning_rate": 1.758593841956368e-05, "loss": 0.5048, "step": 16532 }, { "epoch": 0.45395387149917626, "grad_norm": 0.3753984868526459, "learning_rate": 1.7585657006379055e-05, "loss": 0.4884, "step": 16533 }, { "epoch": 0.45398132894014276, "grad_norm": 0.4043693244457245, "learning_rate": 1.7585375579044758e-05, "loss": 0.5196, "step": 16534 }, { "epoch": 0.4540087863811093, "grad_norm": 0.3651009500026703, "learning_rate": 1.7585094137561314e-05, "loss": 0.4542, "step": 16535 }, { "epoch": 0.4540362438220758, "grad_norm": 0.3863637447357178, "learning_rate": 1.7584812681929252e-05, "loss": 0.5231, "step": 16536 }, { "epoch": 0.4540637012630423, "grad_norm": 0.3809734582901001, "learning_rate": 1.7584531212149087e-05, "loss": 0.5055, "step": 16537 }, { "epoch": 0.4540911587040088, "grad_norm": 0.37339386343955994, "learning_rate": 1.7584249728221358e-05, "loss": 0.5225, "step": 16538 }, { "epoch": 0.4541186161449753, "grad_norm": 0.3471454679965973, "learning_rate": 1.758396823014658e-05, "loss": 0.4743, "step": 16539 }, { "epoch": 0.4541460735859418, "grad_norm": 0.38942334055900574, "learning_rate": 1.7583686717925276e-05, "loss": 0.5299, "step": 16540 }, { "epoch": 0.4541735310269083, "grad_norm": 0.34102708101272583, "learning_rate": 1.758340519155798e-05, "loss": 0.4184, "step": 16541 }, { "epoch": 0.45420098846787477, "grad_norm": 0.3690069317817688, "learning_rate": 1.7583123651045214e-05, "loss": 0.454, "step": 16542 }, { "epoch": 0.4542284459088413, "grad_norm": 0.3641393482685089, "learning_rate": 1.75828420963875e-05, "loss": 0.4798, "step": 16543 }, { "epoch": 0.4542559033498078, "grad_norm": 0.3586338758468628, "learning_rate": 1.7582560527585365e-05, "loss": 0.5407, "step": 16544 }, { "epoch": 0.4542833607907743, "grad_norm": 0.37158146500587463, "learning_rate": 1.7582278944639335e-05, "loss": 0.5927, "step": 16545 }, { "epoch": 0.4543108182317408, "grad_norm": 0.42106330394744873, "learning_rate": 1.7581997347549935e-05, "loss": 0.5533, "step": 16546 }, { "epoch": 0.4543382756727073, "grad_norm": 0.40784454345703125, "learning_rate": 1.7581715736317693e-05, "loss": 0.5134, "step": 16547 }, { "epoch": 0.4543657331136738, "grad_norm": 0.3454005718231201, "learning_rate": 1.758143411094313e-05, "loss": 0.4547, "step": 16548 }, { "epoch": 0.4543931905546403, "grad_norm": 0.3688507080078125, "learning_rate": 1.7581152471426767e-05, "loss": 0.4753, "step": 16549 }, { "epoch": 0.45442064799560683, "grad_norm": 0.3741554021835327, "learning_rate": 1.758087081776914e-05, "loss": 0.5003, "step": 16550 }, { "epoch": 0.4544481054365733, "grad_norm": 0.43804216384887695, "learning_rate": 1.758058914997077e-05, "loss": 0.5459, "step": 16551 }, { "epoch": 0.4544755628775398, "grad_norm": 0.3778623044490814, "learning_rate": 1.758030746803218e-05, "loss": 0.5833, "step": 16552 }, { "epoch": 0.4545030203185063, "grad_norm": 0.3499540090560913, "learning_rate": 1.7580025771953898e-05, "loss": 0.481, "step": 16553 }, { "epoch": 0.4545304777594728, "grad_norm": 0.31239765882492065, "learning_rate": 1.7579744061736448e-05, "loss": 0.4267, "step": 16554 }, { "epoch": 0.4545579352004393, "grad_norm": 0.3770048916339874, "learning_rate": 1.7579462337380358e-05, "loss": 0.5068, "step": 16555 }, { "epoch": 0.4545853926414058, "grad_norm": 0.43326085805892944, "learning_rate": 1.757918059888615e-05, "loss": 0.5871, "step": 16556 }, { "epoch": 0.45461285008237234, "grad_norm": 0.40523549914360046, "learning_rate": 1.757889884625435e-05, "loss": 0.5912, "step": 16557 }, { "epoch": 0.45464030752333884, "grad_norm": 0.38553765416145325, "learning_rate": 1.757861707948549e-05, "loss": 0.611, "step": 16558 }, { "epoch": 0.45466776496430533, "grad_norm": 0.34674856066703796, "learning_rate": 1.7578335298580086e-05, "loss": 0.5222, "step": 16559 }, { "epoch": 0.4546952224052718, "grad_norm": 0.4021919369697571, "learning_rate": 1.7578053503538666e-05, "loss": 0.5343, "step": 16560 }, { "epoch": 0.4547226798462383, "grad_norm": 0.466279000043869, "learning_rate": 1.757777169436176e-05, "loss": 0.557, "step": 16561 }, { "epoch": 0.4547501372872048, "grad_norm": 0.498821884393692, "learning_rate": 1.757748987104989e-05, "loss": 0.4803, "step": 16562 }, { "epoch": 0.4547775947281713, "grad_norm": 0.3816353380680084, "learning_rate": 1.7577208033603586e-05, "loss": 0.4876, "step": 16563 }, { "epoch": 0.45480505216913786, "grad_norm": 0.3880798816680908, "learning_rate": 1.7576926182023368e-05, "loss": 0.4954, "step": 16564 }, { "epoch": 0.45483250961010435, "grad_norm": 0.4021984934806824, "learning_rate": 1.7576644316309764e-05, "loss": 0.5995, "step": 16565 }, { "epoch": 0.45485996705107085, "grad_norm": 0.42004838585853577, "learning_rate": 1.7576362436463304e-05, "loss": 0.5183, "step": 16566 }, { "epoch": 0.45488742449203734, "grad_norm": 0.4238894283771515, "learning_rate": 1.7576080542484505e-05, "loss": 0.5577, "step": 16567 }, { "epoch": 0.45491488193300383, "grad_norm": 0.4555741250514984, "learning_rate": 1.75757986343739e-05, "loss": 0.6249, "step": 16568 }, { "epoch": 0.45494233937397033, "grad_norm": 0.5429349541664124, "learning_rate": 1.7575516712132015e-05, "loss": 0.4369, "step": 16569 }, { "epoch": 0.4549697968149368, "grad_norm": 0.3127163350582123, "learning_rate": 1.757523477575937e-05, "loss": 0.427, "step": 16570 }, { "epoch": 0.45499725425590337, "grad_norm": 0.39038074016571045, "learning_rate": 1.7574952825256498e-05, "loss": 0.5468, "step": 16571 }, { "epoch": 0.45502471169686987, "grad_norm": 0.5138571858406067, "learning_rate": 1.7574670860623917e-05, "loss": 0.5059, "step": 16572 }, { "epoch": 0.45505216913783636, "grad_norm": 0.41994452476501465, "learning_rate": 1.7574388881862162e-05, "loss": 0.4652, "step": 16573 }, { "epoch": 0.45507962657880285, "grad_norm": 0.44227197766304016, "learning_rate": 1.7574106888971752e-05, "loss": 0.5353, "step": 16574 }, { "epoch": 0.45510708401976935, "grad_norm": 0.345552921295166, "learning_rate": 1.7573824881953215e-05, "loss": 0.5109, "step": 16575 }, { "epoch": 0.45513454146073584, "grad_norm": 0.3799777626991272, "learning_rate": 1.7573542860807077e-05, "loss": 0.5686, "step": 16576 }, { "epoch": 0.45516199890170234, "grad_norm": 0.40512946248054504, "learning_rate": 1.7573260825533866e-05, "loss": 0.4583, "step": 16577 }, { "epoch": 0.4551894563426689, "grad_norm": 0.38737088441848755, "learning_rate": 1.7572978776134103e-05, "loss": 0.4859, "step": 16578 }, { "epoch": 0.4552169137836354, "grad_norm": 0.46383780241012573, "learning_rate": 1.7572696712608318e-05, "loss": 0.54, "step": 16579 }, { "epoch": 0.4552443712246019, "grad_norm": 0.3988005816936493, "learning_rate": 1.757241463495704e-05, "loss": 0.516, "step": 16580 }, { "epoch": 0.45527182866556837, "grad_norm": 0.35547006130218506, "learning_rate": 1.7572132543180787e-05, "loss": 0.5222, "step": 16581 }, { "epoch": 0.45529928610653486, "grad_norm": 0.35235831141471863, "learning_rate": 1.7571850437280095e-05, "loss": 0.3366, "step": 16582 }, { "epoch": 0.45532674354750136, "grad_norm": 0.3560246527194977, "learning_rate": 1.7571568317255484e-05, "loss": 0.5256, "step": 16583 }, { "epoch": 0.45535420098846785, "grad_norm": 0.36116281151771545, "learning_rate": 1.757128618310748e-05, "loss": 0.4665, "step": 16584 }, { "epoch": 0.4553816584294344, "grad_norm": 0.3748455047607422, "learning_rate": 1.757100403483661e-05, "loss": 0.6031, "step": 16585 }, { "epoch": 0.4554091158704009, "grad_norm": 0.39700865745544434, "learning_rate": 1.7570721872443406e-05, "loss": 0.4209, "step": 16586 }, { "epoch": 0.4554365733113674, "grad_norm": 0.5671699047088623, "learning_rate": 1.7570439695928385e-05, "loss": 0.5576, "step": 16587 }, { "epoch": 0.4554640307523339, "grad_norm": 0.3827188014984131, "learning_rate": 1.757015750529208e-05, "loss": 0.5963, "step": 16588 }, { "epoch": 0.4554914881933004, "grad_norm": 0.3311350345611572, "learning_rate": 1.7569875300535016e-05, "loss": 0.5518, "step": 16589 }, { "epoch": 0.45551894563426687, "grad_norm": 0.3538949191570282, "learning_rate": 1.7569593081657713e-05, "loss": 0.5305, "step": 16590 }, { "epoch": 0.45554640307523336, "grad_norm": 0.4465828537940979, "learning_rate": 1.756931084866071e-05, "loss": 0.487, "step": 16591 }, { "epoch": 0.4555738605161999, "grad_norm": 0.42497649788856506, "learning_rate": 1.756902860154452e-05, "loss": 0.5926, "step": 16592 }, { "epoch": 0.4556013179571664, "grad_norm": 0.46656814217567444, "learning_rate": 1.7568746340309682e-05, "loss": 0.5881, "step": 16593 }, { "epoch": 0.4556287753981329, "grad_norm": 0.33339646458625793, "learning_rate": 1.7568464064956715e-05, "loss": 0.5144, "step": 16594 }, { "epoch": 0.4556562328390994, "grad_norm": 0.3568938374519348, "learning_rate": 1.7568181775486145e-05, "loss": 0.5803, "step": 16595 }, { "epoch": 0.4556836902800659, "grad_norm": 0.38381120562553406, "learning_rate": 1.75678994718985e-05, "loss": 0.6159, "step": 16596 }, { "epoch": 0.4557111477210324, "grad_norm": 0.41803234815597534, "learning_rate": 1.756761715419431e-05, "loss": 0.5217, "step": 16597 }, { "epoch": 0.4557386051619989, "grad_norm": 0.39402201771736145, "learning_rate": 1.7567334822374094e-05, "loss": 0.5001, "step": 16598 }, { "epoch": 0.4557660626029654, "grad_norm": 0.36497536301612854, "learning_rate": 1.7567052476438386e-05, "loss": 0.4662, "step": 16599 }, { "epoch": 0.4557935200439319, "grad_norm": 0.48700135946273804, "learning_rate": 1.756677011638771e-05, "loss": 0.5115, "step": 16600 }, { "epoch": 0.4558209774848984, "grad_norm": 0.4429991543292999, "learning_rate": 1.7566487742222594e-05, "loss": 0.4987, "step": 16601 }, { "epoch": 0.4558484349258649, "grad_norm": 0.34190091490745544, "learning_rate": 1.756620535394356e-05, "loss": 0.509, "step": 16602 }, { "epoch": 0.4558758923668314, "grad_norm": 0.3770783245563507, "learning_rate": 1.7565922951551142e-05, "loss": 0.5336, "step": 16603 }, { "epoch": 0.4559033498077979, "grad_norm": 0.37652865052223206, "learning_rate": 1.7565640535045864e-05, "loss": 0.4614, "step": 16604 }, { "epoch": 0.4559308072487644, "grad_norm": 0.3874792158603668, "learning_rate": 1.7565358104428247e-05, "loss": 0.4834, "step": 16605 }, { "epoch": 0.45595826468973094, "grad_norm": 0.3723720908164978, "learning_rate": 1.7565075659698825e-05, "loss": 0.4479, "step": 16606 }, { "epoch": 0.45598572213069744, "grad_norm": 0.3963593542575836, "learning_rate": 1.756479320085812e-05, "loss": 0.6025, "step": 16607 }, { "epoch": 0.45601317957166393, "grad_norm": 0.48961296677589417, "learning_rate": 1.7564510727906666e-05, "loss": 0.5453, "step": 16608 }, { "epoch": 0.4560406370126304, "grad_norm": 0.3994031548500061, "learning_rate": 1.7564228240844983e-05, "loss": 0.5269, "step": 16609 }, { "epoch": 0.4560680944535969, "grad_norm": 0.3720681369304657, "learning_rate": 1.7563945739673598e-05, "loss": 0.509, "step": 16610 }, { "epoch": 0.4560955518945634, "grad_norm": 0.4138161540031433, "learning_rate": 1.7563663224393042e-05, "loss": 0.5282, "step": 16611 }, { "epoch": 0.4561230093355299, "grad_norm": 0.3703373670578003, "learning_rate": 1.756338069500384e-05, "loss": 0.4826, "step": 16612 }, { "epoch": 0.45615046677649645, "grad_norm": 0.4052475094795227, "learning_rate": 1.756309815150652e-05, "loss": 0.4918, "step": 16613 }, { "epoch": 0.45617792421746295, "grad_norm": 0.36041557788848877, "learning_rate": 1.7562815593901605e-05, "loss": 0.5902, "step": 16614 }, { "epoch": 0.45620538165842944, "grad_norm": 0.35031044483184814, "learning_rate": 1.7562533022189628e-05, "loss": 0.4645, "step": 16615 }, { "epoch": 0.45623283909939594, "grad_norm": 0.3535565733909607, "learning_rate": 1.756225043637111e-05, "loss": 0.5314, "step": 16616 }, { "epoch": 0.45626029654036243, "grad_norm": 0.3753267824649811, "learning_rate": 1.7561967836446586e-05, "loss": 0.5159, "step": 16617 }, { "epoch": 0.4562877539813289, "grad_norm": 0.34340569376945496, "learning_rate": 1.7561685222416575e-05, "loss": 0.4581, "step": 16618 }, { "epoch": 0.4563152114222954, "grad_norm": 0.3571034371852875, "learning_rate": 1.756140259428161e-05, "loss": 0.5259, "step": 16619 }, { "epoch": 0.45634266886326197, "grad_norm": 0.3868033289909363, "learning_rate": 1.7561119952042213e-05, "loss": 0.5184, "step": 16620 }, { "epoch": 0.45637012630422846, "grad_norm": 0.3761453926563263, "learning_rate": 1.7560837295698915e-05, "loss": 0.5033, "step": 16621 }, { "epoch": 0.45639758374519496, "grad_norm": 0.3405040502548218, "learning_rate": 1.7560554625252244e-05, "loss": 0.4424, "step": 16622 }, { "epoch": 0.45642504118616145, "grad_norm": 0.35442841053009033, "learning_rate": 1.7560271940702725e-05, "loss": 0.524, "step": 16623 }, { "epoch": 0.45645249862712794, "grad_norm": 0.344390869140625, "learning_rate": 1.7559989242050884e-05, "loss": 0.4655, "step": 16624 }, { "epoch": 0.45647995606809444, "grad_norm": 0.47831660509109497, "learning_rate": 1.7559706529297252e-05, "loss": 0.5965, "step": 16625 }, { "epoch": 0.45650741350906093, "grad_norm": 0.37052714824676514, "learning_rate": 1.7559423802442354e-05, "loss": 0.6006, "step": 16626 }, { "epoch": 0.4565348709500275, "grad_norm": 0.38625040650367737, "learning_rate": 1.755914106148672e-05, "loss": 0.5618, "step": 16627 }, { "epoch": 0.456562328390994, "grad_norm": 0.35605087876319885, "learning_rate": 1.7558858306430873e-05, "loss": 0.446, "step": 16628 }, { "epoch": 0.45658978583196047, "grad_norm": 0.4145413637161255, "learning_rate": 1.7558575537275343e-05, "loss": 0.5476, "step": 16629 }, { "epoch": 0.45661724327292696, "grad_norm": 0.37305736541748047, "learning_rate": 1.7558292754020655e-05, "loss": 0.6043, "step": 16630 }, { "epoch": 0.45664470071389346, "grad_norm": 0.3537141978740692, "learning_rate": 1.7558009956667344e-05, "loss": 0.5077, "step": 16631 }, { "epoch": 0.45667215815485995, "grad_norm": 0.36483174562454224, "learning_rate": 1.755772714521593e-05, "loss": 0.5514, "step": 16632 }, { "epoch": 0.45669961559582645, "grad_norm": 0.35622337460517883, "learning_rate": 1.755744431966694e-05, "loss": 0.6007, "step": 16633 }, { "epoch": 0.456727073036793, "grad_norm": 0.8003289699554443, "learning_rate": 1.7557161480020905e-05, "loss": 0.4634, "step": 16634 }, { "epoch": 0.4567545304777595, "grad_norm": 0.4081186354160309, "learning_rate": 1.7556878626278354e-05, "loss": 0.526, "step": 16635 }, { "epoch": 0.456781987918726, "grad_norm": 0.5972845554351807, "learning_rate": 1.7556595758439813e-05, "loss": 0.5012, "step": 16636 }, { "epoch": 0.4568094453596925, "grad_norm": 0.35675859451293945, "learning_rate": 1.7556312876505807e-05, "loss": 0.4866, "step": 16637 }, { "epoch": 0.456836902800659, "grad_norm": 0.3735601007938385, "learning_rate": 1.7556029980476864e-05, "loss": 0.5557, "step": 16638 }, { "epoch": 0.45686436024162547, "grad_norm": 0.353425532579422, "learning_rate": 1.7555747070353517e-05, "loss": 0.5167, "step": 16639 }, { "epoch": 0.45689181768259196, "grad_norm": 0.4468083381652832, "learning_rate": 1.755546414613629e-05, "loss": 0.5285, "step": 16640 }, { "epoch": 0.4569192751235585, "grad_norm": 0.3692243695259094, "learning_rate": 1.7555181207825712e-05, "loss": 0.5675, "step": 16641 }, { "epoch": 0.456946732564525, "grad_norm": 0.33781954646110535, "learning_rate": 1.7554898255422306e-05, "loss": 0.4514, "step": 16642 }, { "epoch": 0.4569741900054915, "grad_norm": 0.38140323758125305, "learning_rate": 1.7554615288926605e-05, "loss": 0.4385, "step": 16643 }, { "epoch": 0.457001647446458, "grad_norm": 0.35141217708587646, "learning_rate": 1.7554332308339136e-05, "loss": 0.4929, "step": 16644 }, { "epoch": 0.4570291048874245, "grad_norm": 0.3962617516517639, "learning_rate": 1.7554049313660426e-05, "loss": 0.478, "step": 16645 }, { "epoch": 0.457056562328391, "grad_norm": 0.3716096878051758, "learning_rate": 1.7553766304891002e-05, "loss": 0.5303, "step": 16646 }, { "epoch": 0.4570840197693575, "grad_norm": 0.41047346591949463, "learning_rate": 1.7553483282031398e-05, "loss": 0.4643, "step": 16647 }, { "epoch": 0.457111477210324, "grad_norm": 0.3691604733467102, "learning_rate": 1.755320024508213e-05, "loss": 0.5111, "step": 16648 }, { "epoch": 0.4571389346512905, "grad_norm": 0.4332333505153656, "learning_rate": 1.7552917194043734e-05, "loss": 0.6241, "step": 16649 }, { "epoch": 0.457166392092257, "grad_norm": 0.38561490178108215, "learning_rate": 1.755263412891674e-05, "loss": 0.4974, "step": 16650 }, { "epoch": 0.4571938495332235, "grad_norm": 0.343030720949173, "learning_rate": 1.755235104970167e-05, "loss": 0.5167, "step": 16651 }, { "epoch": 0.45722130697419, "grad_norm": 0.3809076249599457, "learning_rate": 1.7552067956399057e-05, "loss": 0.4581, "step": 16652 }, { "epoch": 0.4572487644151565, "grad_norm": 0.40163013339042664, "learning_rate": 1.7551784849009424e-05, "loss": 0.5214, "step": 16653 }, { "epoch": 0.457276221856123, "grad_norm": 1.0973079204559326, "learning_rate": 1.7551501727533304e-05, "loss": 0.4283, "step": 16654 }, { "epoch": 0.45730367929708954, "grad_norm": 0.39215803146362305, "learning_rate": 1.7551218591971223e-05, "loss": 0.4807, "step": 16655 }, { "epoch": 0.45733113673805603, "grad_norm": 0.37104201316833496, "learning_rate": 1.7550935442323713e-05, "loss": 0.5496, "step": 16656 }, { "epoch": 0.4573585941790225, "grad_norm": 0.32840776443481445, "learning_rate": 1.755065227859129e-05, "loss": 0.421, "step": 16657 }, { "epoch": 0.457386051619989, "grad_norm": 0.30561700463294983, "learning_rate": 1.75503691007745e-05, "loss": 0.472, "step": 16658 }, { "epoch": 0.4574135090609555, "grad_norm": 0.37705928087234497, "learning_rate": 1.7550085908873856e-05, "loss": 0.5483, "step": 16659 }, { "epoch": 0.457440966501922, "grad_norm": 0.3470211625099182, "learning_rate": 1.7549802702889893e-05, "loss": 0.4864, "step": 16660 }, { "epoch": 0.4574684239428885, "grad_norm": 0.3672449588775635, "learning_rate": 1.754951948282314e-05, "loss": 0.5152, "step": 16661 }, { "epoch": 0.45749588138385505, "grad_norm": 0.3696678578853607, "learning_rate": 1.754923624867412e-05, "loss": 0.4842, "step": 16662 }, { "epoch": 0.45752333882482155, "grad_norm": 0.3849492073059082, "learning_rate": 1.754895300044337e-05, "loss": 0.5216, "step": 16663 }, { "epoch": 0.45755079626578804, "grad_norm": 0.4267268478870392, "learning_rate": 1.754866973813141e-05, "loss": 0.5594, "step": 16664 }, { "epoch": 0.45757825370675453, "grad_norm": 0.3731401562690735, "learning_rate": 1.7548386461738772e-05, "loss": 0.5424, "step": 16665 }, { "epoch": 0.45760571114772103, "grad_norm": 0.4468056559562683, "learning_rate": 1.7548103171265983e-05, "loss": 0.4999, "step": 16666 }, { "epoch": 0.4576331685886875, "grad_norm": 0.37811264395713806, "learning_rate": 1.7547819866713575e-05, "loss": 0.4929, "step": 16667 }, { "epoch": 0.457660626029654, "grad_norm": 0.4378097951412201, "learning_rate": 1.7547536548082073e-05, "loss": 0.6317, "step": 16668 }, { "epoch": 0.45768808347062057, "grad_norm": 0.3596460819244385, "learning_rate": 1.7547253215372007e-05, "loss": 0.5302, "step": 16669 }, { "epoch": 0.45771554091158706, "grad_norm": 0.39432626962661743, "learning_rate": 1.7546969868583903e-05, "loss": 0.5539, "step": 16670 }, { "epoch": 0.45774299835255355, "grad_norm": 0.38308092951774597, "learning_rate": 1.7546686507718292e-05, "loss": 0.486, "step": 16671 }, { "epoch": 0.45777045579352005, "grad_norm": 0.38907116651535034, "learning_rate": 1.7546403132775704e-05, "loss": 0.4817, "step": 16672 }, { "epoch": 0.45779791323448654, "grad_norm": 0.3749037981033325, "learning_rate": 1.7546119743756663e-05, "loss": 0.5016, "step": 16673 }, { "epoch": 0.45782537067545304, "grad_norm": 0.4085845351219177, "learning_rate": 1.7545836340661702e-05, "loss": 0.5157, "step": 16674 }, { "epoch": 0.45785282811641953, "grad_norm": 0.4375322163105011, "learning_rate": 1.7545552923491348e-05, "loss": 0.5177, "step": 16675 }, { "epoch": 0.457880285557386, "grad_norm": 0.38231489062309265, "learning_rate": 1.7545269492246128e-05, "loss": 0.5856, "step": 16676 }, { "epoch": 0.4579077429983526, "grad_norm": 0.3738904595375061, "learning_rate": 1.7544986046926575e-05, "loss": 0.4724, "step": 16677 }, { "epoch": 0.45793520043931907, "grad_norm": 0.5630139708518982, "learning_rate": 1.754470258753321e-05, "loss": 0.5109, "step": 16678 }, { "epoch": 0.45796265788028556, "grad_norm": 0.34499862790107727, "learning_rate": 1.7544419114066572e-05, "loss": 0.4394, "step": 16679 }, { "epoch": 0.45799011532125206, "grad_norm": 0.35807371139526367, "learning_rate": 1.7544135626527184e-05, "loss": 0.438, "step": 16680 }, { "epoch": 0.45801757276221855, "grad_norm": 0.37431856989860535, "learning_rate": 1.754385212491557e-05, "loss": 0.5276, "step": 16681 }, { "epoch": 0.45804503020318504, "grad_norm": 0.4551090598106384, "learning_rate": 1.7543568609232267e-05, "loss": 0.5146, "step": 16682 }, { "epoch": 0.45807248764415154, "grad_norm": 0.371967077255249, "learning_rate": 1.7543285079477804e-05, "loss": 0.4934, "step": 16683 }, { "epoch": 0.4580999450851181, "grad_norm": 0.3527933955192566, "learning_rate": 1.7543001535652706e-05, "loss": 0.4255, "step": 16684 }, { "epoch": 0.4581274025260846, "grad_norm": 0.3506869971752167, "learning_rate": 1.75427179777575e-05, "loss": 0.5534, "step": 16685 }, { "epoch": 0.4581548599670511, "grad_norm": 0.41786202788352966, "learning_rate": 1.7542434405792715e-05, "loss": 0.5485, "step": 16686 }, { "epoch": 0.45818231740801757, "grad_norm": 0.3686217963695526, "learning_rate": 1.754215081975889e-05, "loss": 0.488, "step": 16687 }, { "epoch": 0.45820977484898406, "grad_norm": 0.33872222900390625, "learning_rate": 1.7541867219656543e-05, "loss": 0.5612, "step": 16688 }, { "epoch": 0.45823723228995056, "grad_norm": 0.3853532671928406, "learning_rate": 1.7541583605486207e-05, "loss": 0.5288, "step": 16689 }, { "epoch": 0.45826468973091705, "grad_norm": 0.905916154384613, "learning_rate": 1.7541299977248408e-05, "loss": 0.5022, "step": 16690 }, { "epoch": 0.4582921471718836, "grad_norm": 0.31432631611824036, "learning_rate": 1.754101633494368e-05, "loss": 0.4364, "step": 16691 }, { "epoch": 0.4583196046128501, "grad_norm": 0.37017613649368286, "learning_rate": 1.754073267857255e-05, "loss": 0.5579, "step": 16692 }, { "epoch": 0.4583470620538166, "grad_norm": 0.4064299464225769, "learning_rate": 1.7540449008135544e-05, "loss": 0.5189, "step": 16693 }, { "epoch": 0.4583745194947831, "grad_norm": 0.34623581171035767, "learning_rate": 1.75401653236332e-05, "loss": 0.5296, "step": 16694 }, { "epoch": 0.4584019769357496, "grad_norm": 0.4108019471168518, "learning_rate": 1.7539881625066035e-05, "loss": 0.5246, "step": 16695 }, { "epoch": 0.45842943437671607, "grad_norm": 0.3541775941848755, "learning_rate": 1.7539597912434586e-05, "loss": 0.5397, "step": 16696 }, { "epoch": 0.45845689181768257, "grad_norm": 0.41107672452926636, "learning_rate": 1.753931418573938e-05, "loss": 0.5178, "step": 16697 }, { "epoch": 0.4584843492586491, "grad_norm": 0.3717249929904938, "learning_rate": 1.753903044498095e-05, "loss": 0.5166, "step": 16698 }, { "epoch": 0.4585118066996156, "grad_norm": 0.4021792709827423, "learning_rate": 1.753874669015982e-05, "loss": 0.5156, "step": 16699 }, { "epoch": 0.4585392641405821, "grad_norm": 0.41659197211265564, "learning_rate": 1.753846292127652e-05, "loss": 0.5074, "step": 16700 }, { "epoch": 0.4585667215815486, "grad_norm": 0.3796684145927429, "learning_rate": 1.753817913833158e-05, "loss": 0.5701, "step": 16701 }, { "epoch": 0.4585941790225151, "grad_norm": 0.3828224241733551, "learning_rate": 1.7537895341325533e-05, "loss": 0.5344, "step": 16702 }, { "epoch": 0.4586216364634816, "grad_norm": 0.3642633855342865, "learning_rate": 1.7537611530258905e-05, "loss": 0.4574, "step": 16703 }, { "epoch": 0.4586490939044481, "grad_norm": 0.3427603840827942, "learning_rate": 1.7537327705132224e-05, "loss": 0.4947, "step": 16704 }, { "epoch": 0.45867655134541463, "grad_norm": 0.4845469892024994, "learning_rate": 1.7537043865946023e-05, "loss": 0.5604, "step": 16705 }, { "epoch": 0.4587040087863811, "grad_norm": 0.3538109064102173, "learning_rate": 1.7536760012700827e-05, "loss": 0.4682, "step": 16706 }, { "epoch": 0.4587314662273476, "grad_norm": 0.3690710663795471, "learning_rate": 1.753647614539717e-05, "loss": 0.5767, "step": 16707 }, { "epoch": 0.4587589236683141, "grad_norm": 0.3553994596004486, "learning_rate": 1.753619226403558e-05, "loss": 0.4549, "step": 16708 }, { "epoch": 0.4587863811092806, "grad_norm": 0.3812497854232788, "learning_rate": 1.7535908368616587e-05, "loss": 0.5672, "step": 16709 }, { "epoch": 0.4588138385502471, "grad_norm": 0.354568213224411, "learning_rate": 1.7535624459140715e-05, "loss": 0.551, "step": 16710 }, { "epoch": 0.4588412959912136, "grad_norm": 0.3369874656200409, "learning_rate": 1.7535340535608505e-05, "loss": 0.463, "step": 16711 }, { "epoch": 0.45886875343218014, "grad_norm": 0.36775216460227966, "learning_rate": 1.7535056598020477e-05, "loss": 0.5142, "step": 16712 }, { "epoch": 0.45889621087314664, "grad_norm": 0.3682059645652771, "learning_rate": 1.753477264637716e-05, "loss": 0.3859, "step": 16713 }, { "epoch": 0.45892366831411313, "grad_norm": 0.34382978081703186, "learning_rate": 1.7534488680679095e-05, "loss": 0.4977, "step": 16714 }, { "epoch": 0.4589511257550796, "grad_norm": 0.6165159344673157, "learning_rate": 1.7534204700926797e-05, "loss": 0.5074, "step": 16715 }, { "epoch": 0.4589785831960461, "grad_norm": 0.3677917718887329, "learning_rate": 1.7533920707120807e-05, "loss": 0.5544, "step": 16716 }, { "epoch": 0.4590060406370126, "grad_norm": 0.4692487418651581, "learning_rate": 1.7533636699261645e-05, "loss": 0.4814, "step": 16717 }, { "epoch": 0.4590334980779791, "grad_norm": 0.52354896068573, "learning_rate": 1.753335267734985e-05, "loss": 0.5397, "step": 16718 }, { "epoch": 0.45906095551894566, "grad_norm": 0.3549933433532715, "learning_rate": 1.7533068641385947e-05, "loss": 0.4893, "step": 16719 }, { "epoch": 0.45908841295991215, "grad_norm": 0.3826676309108734, "learning_rate": 1.7532784591370467e-05, "loss": 0.5759, "step": 16720 }, { "epoch": 0.45911587040087865, "grad_norm": 0.4681667685508728, "learning_rate": 1.753250052730394e-05, "loss": 0.5063, "step": 16721 }, { "epoch": 0.45914332784184514, "grad_norm": 0.40760254859924316, "learning_rate": 1.7532216449186896e-05, "loss": 0.5342, "step": 16722 }, { "epoch": 0.45917078528281163, "grad_norm": 0.3598197102546692, "learning_rate": 1.7531932357019862e-05, "loss": 0.4858, "step": 16723 }, { "epoch": 0.4591982427237781, "grad_norm": 0.3490156829357147, "learning_rate": 1.753164825080337e-05, "loss": 0.4262, "step": 16724 }, { "epoch": 0.4592257001647446, "grad_norm": 0.3888266086578369, "learning_rate": 1.7531364130537953e-05, "loss": 0.6043, "step": 16725 }, { "epoch": 0.45925315760571117, "grad_norm": 0.4135594666004181, "learning_rate": 1.7531079996224138e-05, "loss": 0.5756, "step": 16726 }, { "epoch": 0.45928061504667766, "grad_norm": 0.33224770426750183, "learning_rate": 1.7530795847862455e-05, "loss": 0.4661, "step": 16727 }, { "epoch": 0.45930807248764416, "grad_norm": 0.3955075442790985, "learning_rate": 1.753051168545343e-05, "loss": 0.5325, "step": 16728 }, { "epoch": 0.45933552992861065, "grad_norm": 0.3918803930282593, "learning_rate": 1.7530227508997603e-05, "loss": 0.4975, "step": 16729 }, { "epoch": 0.45936298736957715, "grad_norm": 0.4016367495059967, "learning_rate": 1.7529943318495495e-05, "loss": 0.4978, "step": 16730 }, { "epoch": 0.45939044481054364, "grad_norm": 0.808336615562439, "learning_rate": 1.752965911394764e-05, "loss": 0.4729, "step": 16731 }, { "epoch": 0.45941790225151014, "grad_norm": 0.40157440304756165, "learning_rate": 1.7529374895354565e-05, "loss": 0.5586, "step": 16732 }, { "epoch": 0.4594453596924767, "grad_norm": 0.40272584557533264, "learning_rate": 1.7529090662716807e-05, "loss": 0.5211, "step": 16733 }, { "epoch": 0.4594728171334432, "grad_norm": 0.33085769414901733, "learning_rate": 1.752880641603489e-05, "loss": 0.5189, "step": 16734 }, { "epoch": 0.4595002745744097, "grad_norm": 0.38534781336784363, "learning_rate": 1.7528522155309346e-05, "loss": 0.4922, "step": 16735 }, { "epoch": 0.45952773201537617, "grad_norm": 0.38339370489120483, "learning_rate": 1.7528237880540707e-05, "loss": 0.5197, "step": 16736 }, { "epoch": 0.45955518945634266, "grad_norm": 0.48465147614479065, "learning_rate": 1.75279535917295e-05, "loss": 0.5936, "step": 16737 }, { "epoch": 0.45958264689730915, "grad_norm": 0.4105777144432068, "learning_rate": 1.752766928887626e-05, "loss": 0.5193, "step": 16738 }, { "epoch": 0.45961010433827565, "grad_norm": 0.36207860708236694, "learning_rate": 1.752738497198151e-05, "loss": 0.5441, "step": 16739 }, { "epoch": 0.4596375617792422, "grad_norm": 0.3734719753265381, "learning_rate": 1.7527100641045783e-05, "loss": 0.5642, "step": 16740 }, { "epoch": 0.4596650192202087, "grad_norm": 0.354739785194397, "learning_rate": 1.7526816296069616e-05, "loss": 0.4664, "step": 16741 }, { "epoch": 0.4596924766611752, "grad_norm": 0.37582337856292725, "learning_rate": 1.7526531937053532e-05, "loss": 0.4562, "step": 16742 }, { "epoch": 0.4597199341021417, "grad_norm": 0.3974027931690216, "learning_rate": 1.7526247563998064e-05, "loss": 0.5608, "step": 16743 }, { "epoch": 0.4597473915431082, "grad_norm": 0.37814322113990784, "learning_rate": 1.7525963176903742e-05, "loss": 0.4445, "step": 16744 }, { "epoch": 0.45977484898407467, "grad_norm": 0.38701900839805603, "learning_rate": 1.7525678775771097e-05, "loss": 0.5904, "step": 16745 }, { "epoch": 0.45980230642504116, "grad_norm": 0.38682299852371216, "learning_rate": 1.752539436060066e-05, "loss": 0.5112, "step": 16746 }, { "epoch": 0.4598297638660077, "grad_norm": 0.3622158169746399, "learning_rate": 1.7525109931392957e-05, "loss": 0.5587, "step": 16747 }, { "epoch": 0.4598572213069742, "grad_norm": 0.3827058672904968, "learning_rate": 1.752482548814853e-05, "loss": 0.4842, "step": 16748 }, { "epoch": 0.4598846787479407, "grad_norm": 0.5879247188568115, "learning_rate": 1.7524541030867893e-05, "loss": 0.5222, "step": 16749 }, { "epoch": 0.4599121361889072, "grad_norm": 0.36257269978523254, "learning_rate": 1.752425655955159e-05, "loss": 0.5449, "step": 16750 }, { "epoch": 0.4599395936298737, "grad_norm": 0.37266722321510315, "learning_rate": 1.7523972074200148e-05, "loss": 0.496, "step": 16751 }, { "epoch": 0.4599670510708402, "grad_norm": 0.35239923000335693, "learning_rate": 1.7523687574814096e-05, "loss": 0.563, "step": 16752 }, { "epoch": 0.4599945085118067, "grad_norm": 0.3448857367038727, "learning_rate": 1.752340306139396e-05, "loss": 0.4717, "step": 16753 }, { "epoch": 0.4600219659527732, "grad_norm": 0.3562699258327484, "learning_rate": 1.752311853394028e-05, "loss": 0.5533, "step": 16754 }, { "epoch": 0.4600494233937397, "grad_norm": 0.42588892579078674, "learning_rate": 1.7522833992453588e-05, "loss": 0.5401, "step": 16755 }, { "epoch": 0.4600768808347062, "grad_norm": 0.3418267071247101, "learning_rate": 1.7522549436934405e-05, "loss": 0.4017, "step": 16756 }, { "epoch": 0.4601043382756727, "grad_norm": 0.3489398658275604, "learning_rate": 1.7522264867383264e-05, "loss": 0.5726, "step": 16757 }, { "epoch": 0.4601317957166392, "grad_norm": 0.43054139614105225, "learning_rate": 1.75219802838007e-05, "loss": 0.537, "step": 16758 }, { "epoch": 0.4601592531576057, "grad_norm": 0.31304270029067993, "learning_rate": 1.7521695686187244e-05, "loss": 0.5054, "step": 16759 }, { "epoch": 0.4601867105985722, "grad_norm": 0.3848211169242859, "learning_rate": 1.7521411074543424e-05, "loss": 0.5323, "step": 16760 }, { "epoch": 0.46021416803953874, "grad_norm": 0.4069370627403259, "learning_rate": 1.752112644886977e-05, "loss": 0.51, "step": 16761 }, { "epoch": 0.46024162548050523, "grad_norm": 0.35784202814102173, "learning_rate": 1.7520841809166816e-05, "loss": 0.4229, "step": 16762 }, { "epoch": 0.46026908292147173, "grad_norm": 0.38886719942092896, "learning_rate": 1.752055715543509e-05, "loss": 0.5118, "step": 16763 }, { "epoch": 0.4602965403624382, "grad_norm": 0.40310996770858765, "learning_rate": 1.752027248767513e-05, "loss": 0.5681, "step": 16764 }, { "epoch": 0.4603239978034047, "grad_norm": 0.3980056345462799, "learning_rate": 1.7519987805887457e-05, "loss": 0.4813, "step": 16765 }, { "epoch": 0.4603514552443712, "grad_norm": 0.38054293394088745, "learning_rate": 1.7519703110072605e-05, "loss": 0.4898, "step": 16766 }, { "epoch": 0.4603789126853377, "grad_norm": 0.3632095754146576, "learning_rate": 1.751941840023111e-05, "loss": 0.516, "step": 16767 }, { "epoch": 0.46040637012630425, "grad_norm": 0.36414584517478943, "learning_rate": 1.75191336763635e-05, "loss": 0.46, "step": 16768 }, { "epoch": 0.46043382756727075, "grad_norm": 0.3761109709739685, "learning_rate": 1.7518848938470303e-05, "loss": 0.555, "step": 16769 }, { "epoch": 0.46046128500823724, "grad_norm": 0.3865005373954773, "learning_rate": 1.7518564186552058e-05, "loss": 0.4395, "step": 16770 }, { "epoch": 0.46048874244920374, "grad_norm": 0.4563003182411194, "learning_rate": 1.7518279420609284e-05, "loss": 0.5633, "step": 16771 }, { "epoch": 0.46051619989017023, "grad_norm": 0.36994031071662903, "learning_rate": 1.7517994640642525e-05, "loss": 0.5266, "step": 16772 }, { "epoch": 0.4605436573311367, "grad_norm": 0.45069026947021484, "learning_rate": 1.7517709846652306e-05, "loss": 0.5147, "step": 16773 }, { "epoch": 0.4605711147721032, "grad_norm": 0.3383224308490753, "learning_rate": 1.7517425038639157e-05, "loss": 0.5143, "step": 16774 }, { "epoch": 0.46059857221306977, "grad_norm": 0.3929903507232666, "learning_rate": 1.751714021660361e-05, "loss": 0.548, "step": 16775 }, { "epoch": 0.46062602965403626, "grad_norm": 0.3944860100746155, "learning_rate": 1.75168553805462e-05, "loss": 0.4374, "step": 16776 }, { "epoch": 0.46065348709500276, "grad_norm": 0.3671780526638031, "learning_rate": 1.7516570530467457e-05, "loss": 0.5177, "step": 16777 }, { "epoch": 0.46068094453596925, "grad_norm": 0.3906111419200897, "learning_rate": 1.7516285666367907e-05, "loss": 0.5124, "step": 16778 }, { "epoch": 0.46070840197693574, "grad_norm": 0.49627116322517395, "learning_rate": 1.7516000788248088e-05, "loss": 0.4591, "step": 16779 }, { "epoch": 0.46073585941790224, "grad_norm": 0.37780576944351196, "learning_rate": 1.7515715896108525e-05, "loss": 0.536, "step": 16780 }, { "epoch": 0.46076331685886873, "grad_norm": 0.39213982224464417, "learning_rate": 1.7515430989949754e-05, "loss": 0.548, "step": 16781 }, { "epoch": 0.4607907742998353, "grad_norm": 0.34261542558670044, "learning_rate": 1.751514606977231e-05, "loss": 0.4236, "step": 16782 }, { "epoch": 0.4608182317408018, "grad_norm": 0.3767632246017456, "learning_rate": 1.7514861135576718e-05, "loss": 0.5187, "step": 16783 }, { "epoch": 0.46084568918176827, "grad_norm": 0.410241037607193, "learning_rate": 1.751457618736351e-05, "loss": 0.468, "step": 16784 }, { "epoch": 0.46087314662273476, "grad_norm": 0.35502946376800537, "learning_rate": 1.7514291225133217e-05, "loss": 0.4548, "step": 16785 }, { "epoch": 0.46090060406370126, "grad_norm": 0.3606296181678772, "learning_rate": 1.7514006248886376e-05, "loss": 0.4148, "step": 16786 }, { "epoch": 0.46092806150466775, "grad_norm": 0.3438888192176819, "learning_rate": 1.7513721258623518e-05, "loss": 0.4475, "step": 16787 }, { "epoch": 0.46095551894563425, "grad_norm": 0.40374937653541565, "learning_rate": 1.7513436254345165e-05, "loss": 0.5647, "step": 16788 }, { "epoch": 0.4609829763866008, "grad_norm": 0.4112730324268341, "learning_rate": 1.751315123605186e-05, "loss": 0.558, "step": 16789 }, { "epoch": 0.4610104338275673, "grad_norm": 0.3497674763202667, "learning_rate": 1.7512866203744126e-05, "loss": 0.4261, "step": 16790 }, { "epoch": 0.4610378912685338, "grad_norm": 0.3575612008571625, "learning_rate": 1.7512581157422503e-05, "loss": 0.5114, "step": 16791 }, { "epoch": 0.4610653487095003, "grad_norm": 0.40974360704421997, "learning_rate": 1.7512296097087517e-05, "loss": 0.5366, "step": 16792 }, { "epoch": 0.46109280615046677, "grad_norm": 0.36033526062965393, "learning_rate": 1.7512011022739702e-05, "loss": 0.5395, "step": 16793 }, { "epoch": 0.46112026359143327, "grad_norm": 0.3374375104904175, "learning_rate": 1.7511725934379587e-05, "loss": 0.4715, "step": 16794 }, { "epoch": 0.46114772103239976, "grad_norm": 0.36099907755851746, "learning_rate": 1.7511440832007707e-05, "loss": 0.5124, "step": 16795 }, { "epoch": 0.4611751784733663, "grad_norm": 0.35933640599250793, "learning_rate": 1.7511155715624594e-05, "loss": 0.5333, "step": 16796 }, { "epoch": 0.4612026359143328, "grad_norm": 0.4124352037906647, "learning_rate": 1.7510870585230774e-05, "loss": 0.4312, "step": 16797 }, { "epoch": 0.4612300933552993, "grad_norm": 0.3248565196990967, "learning_rate": 1.7510585440826785e-05, "loss": 0.4607, "step": 16798 }, { "epoch": 0.4612575507962658, "grad_norm": 0.400428831577301, "learning_rate": 1.751030028241316e-05, "loss": 0.5708, "step": 16799 }, { "epoch": 0.4612850082372323, "grad_norm": 0.3571847081184387, "learning_rate": 1.7510015109990424e-05, "loss": 0.5049, "step": 16800 }, { "epoch": 0.4613124656781988, "grad_norm": 0.36050134897232056, "learning_rate": 1.7509729923559113e-05, "loss": 0.485, "step": 16801 }, { "epoch": 0.4613399231191653, "grad_norm": 0.42100104689598083, "learning_rate": 1.7509444723119758e-05, "loss": 0.4477, "step": 16802 }, { "epoch": 0.4613673805601318, "grad_norm": 0.34857067465782166, "learning_rate": 1.7509159508672897e-05, "loss": 0.6103, "step": 16803 }, { "epoch": 0.4613948380010983, "grad_norm": 0.9561535716056824, "learning_rate": 1.750887428021905e-05, "loss": 0.506, "step": 16804 }, { "epoch": 0.4614222954420648, "grad_norm": 0.41193318367004395, "learning_rate": 1.750858903775876e-05, "loss": 0.472, "step": 16805 }, { "epoch": 0.4614497528830313, "grad_norm": 0.398209810256958, "learning_rate": 1.7508303781292556e-05, "loss": 0.5108, "step": 16806 }, { "epoch": 0.4614772103239978, "grad_norm": 0.3357483148574829, "learning_rate": 1.750801851082097e-05, "loss": 0.4024, "step": 16807 }, { "epoch": 0.4615046677649643, "grad_norm": 0.3751375079154968, "learning_rate": 1.750773322634453e-05, "loss": 0.4384, "step": 16808 }, { "epoch": 0.4615321252059308, "grad_norm": 0.36944738030433655, "learning_rate": 1.7507447927863773e-05, "loss": 0.5253, "step": 16809 }, { "epoch": 0.4615595826468973, "grad_norm": 0.3787977993488312, "learning_rate": 1.7507162615379228e-05, "loss": 0.5318, "step": 16810 }, { "epoch": 0.46158704008786383, "grad_norm": 0.3563523590564728, "learning_rate": 1.750687728889143e-05, "loss": 0.5303, "step": 16811 }, { "epoch": 0.4616144975288303, "grad_norm": 0.37688148021698, "learning_rate": 1.7506591948400912e-05, "loss": 0.4775, "step": 16812 }, { "epoch": 0.4616419549697968, "grad_norm": 0.4257352948188782, "learning_rate": 1.75063065939082e-05, "loss": 0.5303, "step": 16813 }, { "epoch": 0.4616694124107633, "grad_norm": 0.41676396131515503, "learning_rate": 1.7506021225413833e-05, "loss": 0.4791, "step": 16814 }, { "epoch": 0.4616968698517298, "grad_norm": 0.3048132061958313, "learning_rate": 1.750573584291834e-05, "loss": 0.4278, "step": 16815 }, { "epoch": 0.4617243272926963, "grad_norm": 0.36794623732566833, "learning_rate": 1.750545044642225e-05, "loss": 0.5332, "step": 16816 }, { "epoch": 0.4617517847336628, "grad_norm": 0.3846539855003357, "learning_rate": 1.7505165035926107e-05, "loss": 0.5538, "step": 16817 }, { "epoch": 0.46177924217462935, "grad_norm": 0.3650066554546356, "learning_rate": 1.7504879611430433e-05, "loss": 0.4931, "step": 16818 }, { "epoch": 0.46180669961559584, "grad_norm": 0.38273653388023376, "learning_rate": 1.750459417293576e-05, "loss": 0.4998, "step": 16819 }, { "epoch": 0.46183415705656233, "grad_norm": 0.39580708742141724, "learning_rate": 1.7504308720442626e-05, "loss": 0.4672, "step": 16820 }, { "epoch": 0.4618616144975288, "grad_norm": 0.3511931002140045, "learning_rate": 1.750402325395156e-05, "loss": 0.4962, "step": 16821 }, { "epoch": 0.4618890719384953, "grad_norm": 0.3633881211280823, "learning_rate": 1.75037377734631e-05, "loss": 0.4512, "step": 16822 }, { "epoch": 0.4619165293794618, "grad_norm": 0.3536127209663391, "learning_rate": 1.750345227897777e-05, "loss": 0.4467, "step": 16823 }, { "epoch": 0.4619439868204283, "grad_norm": 0.35829076170921326, "learning_rate": 1.750316677049611e-05, "loss": 0.5636, "step": 16824 }, { "epoch": 0.46197144426139486, "grad_norm": 0.39390814304351807, "learning_rate": 1.7502881248018643e-05, "loss": 0.5137, "step": 16825 }, { "epoch": 0.46199890170236135, "grad_norm": 0.3566391170024872, "learning_rate": 1.7502595711545915e-05, "loss": 0.4717, "step": 16826 }, { "epoch": 0.46202635914332785, "grad_norm": 0.37286216020584106, "learning_rate": 1.7502310161078447e-05, "loss": 0.4786, "step": 16827 }, { "epoch": 0.46205381658429434, "grad_norm": 0.35542032122612, "learning_rate": 1.750202459661678e-05, "loss": 0.4233, "step": 16828 }, { "epoch": 0.46208127402526084, "grad_norm": 0.3472912907600403, "learning_rate": 1.750173901816144e-05, "loss": 0.4868, "step": 16829 }, { "epoch": 0.46210873146622733, "grad_norm": 0.3682062327861786, "learning_rate": 1.7501453425712962e-05, "loss": 0.5481, "step": 16830 }, { "epoch": 0.4621361889071938, "grad_norm": 0.3861781060695648, "learning_rate": 1.7501167819271877e-05, "loss": 0.5119, "step": 16831 }, { "epoch": 0.4621636463481604, "grad_norm": 0.3630113899707794, "learning_rate": 1.7500882198838726e-05, "loss": 0.449, "step": 16832 }, { "epoch": 0.46219110378912687, "grad_norm": 0.37886691093444824, "learning_rate": 1.7500596564414034e-05, "loss": 0.517, "step": 16833 }, { "epoch": 0.46221856123009336, "grad_norm": 0.4012962281703949, "learning_rate": 1.7500310915998332e-05, "loss": 0.5408, "step": 16834 }, { "epoch": 0.46224601867105986, "grad_norm": 0.3696017861366272, "learning_rate": 1.7500025253592158e-05, "loss": 0.4565, "step": 16835 }, { "epoch": 0.46227347611202635, "grad_norm": 0.34557557106018066, "learning_rate": 1.7499739577196044e-05, "loss": 0.5124, "step": 16836 }, { "epoch": 0.46230093355299284, "grad_norm": 0.3859192430973053, "learning_rate": 1.749945388681052e-05, "loss": 0.5362, "step": 16837 }, { "epoch": 0.46232839099395934, "grad_norm": 0.35960614681243896, "learning_rate": 1.7499168182436124e-05, "loss": 0.5242, "step": 16838 }, { "epoch": 0.4623558484349259, "grad_norm": 0.36309146881103516, "learning_rate": 1.7498882464073384e-05, "loss": 0.5479, "step": 16839 }, { "epoch": 0.4623833058758924, "grad_norm": 0.37063369154930115, "learning_rate": 1.7498596731722833e-05, "loss": 0.5123, "step": 16840 }, { "epoch": 0.4624107633168589, "grad_norm": 0.34388142824172974, "learning_rate": 1.749831098538501e-05, "loss": 0.4754, "step": 16841 }, { "epoch": 0.46243822075782537, "grad_norm": 0.4301467835903168, "learning_rate": 1.749802522506044e-05, "loss": 0.5208, "step": 16842 }, { "epoch": 0.46246567819879186, "grad_norm": 0.4151974022388458, "learning_rate": 1.7497739450749664e-05, "loss": 0.5694, "step": 16843 }, { "epoch": 0.46249313563975836, "grad_norm": 0.3697217106819153, "learning_rate": 1.7497453662453207e-05, "loss": 0.4461, "step": 16844 }, { "epoch": 0.46252059308072485, "grad_norm": 0.3955533802509308, "learning_rate": 1.749716786017161e-05, "loss": 0.4794, "step": 16845 }, { "epoch": 0.4625480505216914, "grad_norm": 0.3472885489463806, "learning_rate": 1.74968820439054e-05, "loss": 0.4487, "step": 16846 }, { "epoch": 0.4625755079626579, "grad_norm": 0.36240869760513306, "learning_rate": 1.749659621365511e-05, "loss": 0.5615, "step": 16847 }, { "epoch": 0.4626029654036244, "grad_norm": 0.3584919571876526, "learning_rate": 1.7496310369421278e-05, "loss": 0.539, "step": 16848 }, { "epoch": 0.4626304228445909, "grad_norm": 0.38911494612693787, "learning_rate": 1.7496024511204433e-05, "loss": 0.5602, "step": 16849 }, { "epoch": 0.4626578802855574, "grad_norm": 0.4066694378852844, "learning_rate": 1.749573863900511e-05, "loss": 0.5408, "step": 16850 }, { "epoch": 0.46268533772652387, "grad_norm": 0.43434837460517883, "learning_rate": 1.7495452752823846e-05, "loss": 0.56, "step": 16851 }, { "epoch": 0.46271279516749036, "grad_norm": 0.3808327317237854, "learning_rate": 1.7495166852661168e-05, "loss": 0.5396, "step": 16852 }, { "epoch": 0.4627402526084569, "grad_norm": 0.416063129901886, "learning_rate": 1.7494880938517608e-05, "loss": 0.5674, "step": 16853 }, { "epoch": 0.4627677100494234, "grad_norm": 0.37894684076309204, "learning_rate": 1.7494595010393707e-05, "loss": 0.5168, "step": 16854 }, { "epoch": 0.4627951674903899, "grad_norm": 0.34506964683532715, "learning_rate": 1.7494309068289993e-05, "loss": 0.4749, "step": 16855 }, { "epoch": 0.4628226249313564, "grad_norm": 0.42959165573120117, "learning_rate": 1.7494023112207e-05, "loss": 0.5954, "step": 16856 }, { "epoch": 0.4628500823723229, "grad_norm": 0.3894118070602417, "learning_rate": 1.7493737142145264e-05, "loss": 0.5206, "step": 16857 }, { "epoch": 0.4628775398132894, "grad_norm": 0.3563203811645508, "learning_rate": 1.7493451158105314e-05, "loss": 0.4956, "step": 16858 }, { "epoch": 0.4629049972542559, "grad_norm": 0.3687094449996948, "learning_rate": 1.7493165160087687e-05, "loss": 0.5392, "step": 16859 }, { "epoch": 0.46293245469522243, "grad_norm": 0.4027034342288971, "learning_rate": 1.749287914809292e-05, "loss": 0.4506, "step": 16860 }, { "epoch": 0.4629599121361889, "grad_norm": 0.4208536744117737, "learning_rate": 1.7492593122121534e-05, "loss": 0.5461, "step": 16861 }, { "epoch": 0.4629873695771554, "grad_norm": 0.39534875750541687, "learning_rate": 1.7492307082174076e-05, "loss": 0.5556, "step": 16862 }, { "epoch": 0.4630148270181219, "grad_norm": 0.42969900369644165, "learning_rate": 1.749202102825107e-05, "loss": 0.5124, "step": 16863 }, { "epoch": 0.4630422844590884, "grad_norm": 0.3934958875179291, "learning_rate": 1.7491734960353057e-05, "loss": 0.563, "step": 16864 }, { "epoch": 0.4630697419000549, "grad_norm": 0.32671916484832764, "learning_rate": 1.7491448878480568e-05, "loss": 0.4948, "step": 16865 }, { "epoch": 0.4630971993410214, "grad_norm": 0.347296804189682, "learning_rate": 1.7491162782634132e-05, "loss": 0.4806, "step": 16866 }, { "epoch": 0.46312465678198794, "grad_norm": 0.3363370895385742, "learning_rate": 1.749087667281429e-05, "loss": 0.554, "step": 16867 }, { "epoch": 0.46315211422295444, "grad_norm": 0.37550386786460876, "learning_rate": 1.749059054902157e-05, "loss": 0.5572, "step": 16868 }, { "epoch": 0.46317957166392093, "grad_norm": 0.3542129099369049, "learning_rate": 1.7490304411256507e-05, "loss": 0.4829, "step": 16869 }, { "epoch": 0.4632070291048874, "grad_norm": 0.39087581634521484, "learning_rate": 1.749001825951964e-05, "loss": 0.5121, "step": 16870 }, { "epoch": 0.4632344865458539, "grad_norm": 0.3955361545085907, "learning_rate": 1.7489732093811495e-05, "loss": 0.5387, "step": 16871 }, { "epoch": 0.4632619439868204, "grad_norm": 0.3747192621231079, "learning_rate": 1.748944591413261e-05, "loss": 0.5457, "step": 16872 }, { "epoch": 0.4632894014277869, "grad_norm": 0.387796014547348, "learning_rate": 1.7489159720483518e-05, "loss": 0.4478, "step": 16873 }, { "epoch": 0.46331685886875346, "grad_norm": 0.4140513837337494, "learning_rate": 1.748887351286475e-05, "loss": 0.5409, "step": 16874 }, { "epoch": 0.46334431630971995, "grad_norm": 0.3534620702266693, "learning_rate": 1.7488587291276848e-05, "loss": 0.5053, "step": 16875 }, { "epoch": 0.46337177375068644, "grad_norm": 0.369500070810318, "learning_rate": 1.7488301055720337e-05, "loss": 0.579, "step": 16876 }, { "epoch": 0.46339923119165294, "grad_norm": 0.3907381594181061, "learning_rate": 1.7488014806195755e-05, "loss": 0.4658, "step": 16877 }, { "epoch": 0.46342668863261943, "grad_norm": 0.40527015924453735, "learning_rate": 1.7487728542703637e-05, "loss": 0.5886, "step": 16878 }, { "epoch": 0.4634541460735859, "grad_norm": 0.3770305812358856, "learning_rate": 1.7487442265244518e-05, "loss": 0.4667, "step": 16879 }, { "epoch": 0.4634816035145524, "grad_norm": 0.33930259943008423, "learning_rate": 1.7487155973818924e-05, "loss": 0.5368, "step": 16880 }, { "epoch": 0.46350906095551897, "grad_norm": 0.3857060372829437, "learning_rate": 1.7486869668427396e-05, "loss": 0.4756, "step": 16881 }, { "epoch": 0.46353651839648546, "grad_norm": 0.36525505781173706, "learning_rate": 1.7486583349070466e-05, "loss": 0.5236, "step": 16882 }, { "epoch": 0.46356397583745196, "grad_norm": 0.4278118908405304, "learning_rate": 1.748629701574867e-05, "loss": 0.5805, "step": 16883 }, { "epoch": 0.46359143327841845, "grad_norm": 0.42358213663101196, "learning_rate": 1.748601066846254e-05, "loss": 0.454, "step": 16884 }, { "epoch": 0.46361889071938495, "grad_norm": 0.5455300807952881, "learning_rate": 1.7485724307212612e-05, "loss": 0.5106, "step": 16885 }, { "epoch": 0.46364634816035144, "grad_norm": 0.35954588651657104, "learning_rate": 1.7485437931999417e-05, "loss": 0.4567, "step": 16886 }, { "epoch": 0.46367380560131793, "grad_norm": 0.4351702332496643, "learning_rate": 1.7485151542823493e-05, "loss": 0.5292, "step": 16887 }, { "epoch": 0.4637012630422845, "grad_norm": 0.39526185393333435, "learning_rate": 1.7484865139685372e-05, "loss": 0.5565, "step": 16888 }, { "epoch": 0.463728720483251, "grad_norm": 0.32998332381248474, "learning_rate": 1.748457872258559e-05, "loss": 0.4119, "step": 16889 }, { "epoch": 0.46375617792421747, "grad_norm": 0.3972926437854767, "learning_rate": 1.7484292291524677e-05, "loss": 0.5431, "step": 16890 }, { "epoch": 0.46378363536518397, "grad_norm": 0.5110490918159485, "learning_rate": 1.748400584650317e-05, "loss": 0.3907, "step": 16891 }, { "epoch": 0.46381109280615046, "grad_norm": 0.4238927662372589, "learning_rate": 1.7483719387521607e-05, "loss": 0.4693, "step": 16892 }, { "epoch": 0.46383855024711695, "grad_norm": 0.3512466549873352, "learning_rate": 1.7483432914580515e-05, "loss": 0.5358, "step": 16893 }, { "epoch": 0.46386600768808345, "grad_norm": 0.34300950169563293, "learning_rate": 1.7483146427680435e-05, "loss": 0.5605, "step": 16894 }, { "epoch": 0.46389346512905, "grad_norm": 0.37212440371513367, "learning_rate": 1.7482859926821895e-05, "loss": 0.4435, "step": 16895 }, { "epoch": 0.4639209225700165, "grad_norm": 0.41316771507263184, "learning_rate": 1.7482573412005435e-05, "loss": 0.5916, "step": 16896 }, { "epoch": 0.463948380010983, "grad_norm": 0.36366701126098633, "learning_rate": 1.748228688323159e-05, "loss": 0.5128, "step": 16897 }, { "epoch": 0.4639758374519495, "grad_norm": 0.36917686462402344, "learning_rate": 1.7482000340500892e-05, "loss": 0.5045, "step": 16898 }, { "epoch": 0.464003294892916, "grad_norm": 0.3510574400424957, "learning_rate": 1.7481713783813872e-05, "loss": 0.5248, "step": 16899 }, { "epoch": 0.46403075233388247, "grad_norm": 0.38627496361732483, "learning_rate": 1.748142721317107e-05, "loss": 0.5203, "step": 16900 }, { "epoch": 0.46405820977484896, "grad_norm": 0.3708861172199249, "learning_rate": 1.7481140628573017e-05, "loss": 0.5187, "step": 16901 }, { "epoch": 0.4640856672158155, "grad_norm": 0.4005656838417053, "learning_rate": 1.748085403002025e-05, "loss": 0.548, "step": 16902 }, { "epoch": 0.464113124656782, "grad_norm": 0.3799499571323395, "learning_rate": 1.74805674175133e-05, "loss": 0.5023, "step": 16903 }, { "epoch": 0.4641405820977485, "grad_norm": 0.32577183842658997, "learning_rate": 1.7480280791052707e-05, "loss": 0.4986, "step": 16904 }, { "epoch": 0.464168039538715, "grad_norm": 0.3593858778476715, "learning_rate": 1.7479994150639005e-05, "loss": 0.5637, "step": 16905 }, { "epoch": 0.4641954969796815, "grad_norm": 0.36932694911956787, "learning_rate": 1.747970749627272e-05, "loss": 0.5814, "step": 16906 }, { "epoch": 0.464222954420648, "grad_norm": 0.35311880707740784, "learning_rate": 1.74794208279544e-05, "loss": 0.5058, "step": 16907 }, { "epoch": 0.4642504118616145, "grad_norm": 0.38506314158439636, "learning_rate": 1.747913414568457e-05, "loss": 0.4921, "step": 16908 }, { "epoch": 0.464277869302581, "grad_norm": 0.33022457361221313, "learning_rate": 1.7478847449463767e-05, "loss": 0.46, "step": 16909 }, { "epoch": 0.4643053267435475, "grad_norm": 0.3791036903858185, "learning_rate": 1.7478560739292528e-05, "loss": 0.5254, "step": 16910 }, { "epoch": 0.464332784184514, "grad_norm": 0.36364808678627014, "learning_rate": 1.7478274015171387e-05, "loss": 0.4532, "step": 16911 }, { "epoch": 0.4643602416254805, "grad_norm": 0.3688960671424866, "learning_rate": 1.7477987277100878e-05, "loss": 0.4948, "step": 16912 }, { "epoch": 0.464387699066447, "grad_norm": 0.3501456379890442, "learning_rate": 1.7477700525081535e-05, "loss": 0.4685, "step": 16913 }, { "epoch": 0.4644151565074135, "grad_norm": 0.37293192744255066, "learning_rate": 1.7477413759113894e-05, "loss": 0.5603, "step": 16914 }, { "epoch": 0.46444261394838, "grad_norm": 0.3926141560077667, "learning_rate": 1.7477126979198487e-05, "loss": 0.5315, "step": 16915 }, { "epoch": 0.46447007138934654, "grad_norm": 0.3564329445362091, "learning_rate": 1.7476840185335855e-05, "loss": 0.5437, "step": 16916 }, { "epoch": 0.46449752883031303, "grad_norm": 0.35250774025917053, "learning_rate": 1.747655337752653e-05, "loss": 0.4839, "step": 16917 }, { "epoch": 0.4645249862712795, "grad_norm": 0.3680199980735779, "learning_rate": 1.7476266555771047e-05, "loss": 0.5269, "step": 16918 }, { "epoch": 0.464552443712246, "grad_norm": 0.311867892742157, "learning_rate": 1.747597972006994e-05, "loss": 0.4694, "step": 16919 }, { "epoch": 0.4645799011532125, "grad_norm": 0.3477252423763275, "learning_rate": 1.7475692870423743e-05, "loss": 0.4901, "step": 16920 }, { "epoch": 0.464607358594179, "grad_norm": 0.45639994740486145, "learning_rate": 1.7475406006832996e-05, "loss": 0.5554, "step": 16921 }, { "epoch": 0.4646348160351455, "grad_norm": 0.4128105640411377, "learning_rate": 1.7475119129298228e-05, "loss": 0.5161, "step": 16922 }, { "epoch": 0.46466227347611205, "grad_norm": 0.3596931993961334, "learning_rate": 1.747483223781998e-05, "loss": 0.4753, "step": 16923 }, { "epoch": 0.46468973091707855, "grad_norm": 0.6810435056686401, "learning_rate": 1.747454533239878e-05, "loss": 0.5572, "step": 16924 }, { "epoch": 0.46471718835804504, "grad_norm": 0.33691033720970154, "learning_rate": 1.747425841303517e-05, "loss": 0.4882, "step": 16925 }, { "epoch": 0.46474464579901154, "grad_norm": 0.3709375560283661, "learning_rate": 1.7473971479729683e-05, "loss": 0.4873, "step": 16926 }, { "epoch": 0.46477210323997803, "grad_norm": 0.407829225063324, "learning_rate": 1.7473684532482852e-05, "loss": 0.5208, "step": 16927 }, { "epoch": 0.4647995606809445, "grad_norm": 0.4466405212879181, "learning_rate": 1.7473397571295215e-05, "loss": 0.4939, "step": 16928 }, { "epoch": 0.464827018121911, "grad_norm": 0.35260090231895447, "learning_rate": 1.7473110596167305e-05, "loss": 0.3674, "step": 16929 }, { "epoch": 0.46485447556287757, "grad_norm": 0.3948887884616852, "learning_rate": 1.7472823607099663e-05, "loss": 0.4387, "step": 16930 }, { "epoch": 0.46488193300384406, "grad_norm": 0.41016730666160583, "learning_rate": 1.7472536604092816e-05, "loss": 0.5005, "step": 16931 }, { "epoch": 0.46490939044481056, "grad_norm": 0.4022945463657379, "learning_rate": 1.7472249587147304e-05, "loss": 0.5541, "step": 16932 }, { "epoch": 0.46493684788577705, "grad_norm": 0.3817179799079895, "learning_rate": 1.747196255626366e-05, "loss": 0.5131, "step": 16933 }, { "epoch": 0.46496430532674354, "grad_norm": 0.34984561800956726, "learning_rate": 1.7471675511442426e-05, "loss": 0.5223, "step": 16934 }, { "epoch": 0.46499176276771004, "grad_norm": 0.3803809583187103, "learning_rate": 1.747138845268413e-05, "loss": 0.4469, "step": 16935 }, { "epoch": 0.46501922020867653, "grad_norm": 0.3400759696960449, "learning_rate": 1.747110137998931e-05, "loss": 0.3834, "step": 16936 }, { "epoch": 0.4650466776496431, "grad_norm": 0.370835542678833, "learning_rate": 1.74708142933585e-05, "loss": 0.5243, "step": 16937 }, { "epoch": 0.4650741350906096, "grad_norm": 0.3897378146648407, "learning_rate": 1.7470527192792236e-05, "loss": 0.5223, "step": 16938 }, { "epoch": 0.46510159253157607, "grad_norm": 0.3241879940032959, "learning_rate": 1.7470240078291056e-05, "loss": 0.4534, "step": 16939 }, { "epoch": 0.46512904997254256, "grad_norm": 0.3836115002632141, "learning_rate": 1.7469952949855496e-05, "loss": 0.4817, "step": 16940 }, { "epoch": 0.46515650741350906, "grad_norm": 0.35602566599845886, "learning_rate": 1.7469665807486088e-05, "loss": 0.416, "step": 16941 }, { "epoch": 0.46518396485447555, "grad_norm": 0.3718385100364685, "learning_rate": 1.7469378651183367e-05, "loss": 0.57, "step": 16942 }, { "epoch": 0.46521142229544205, "grad_norm": 0.3564950227737427, "learning_rate": 1.7469091480947876e-05, "loss": 0.4983, "step": 16943 }, { "epoch": 0.46523887973640854, "grad_norm": 0.39068007469177246, "learning_rate": 1.7468804296780143e-05, "loss": 0.4702, "step": 16944 }, { "epoch": 0.4652663371773751, "grad_norm": 0.4072388708591461, "learning_rate": 1.7468517098680704e-05, "loss": 0.4962, "step": 16945 }, { "epoch": 0.4652937946183416, "grad_norm": 0.36905354261398315, "learning_rate": 1.74682298866501e-05, "loss": 0.4121, "step": 16946 }, { "epoch": 0.4653212520593081, "grad_norm": 0.35261309146881104, "learning_rate": 1.746794266068886e-05, "loss": 0.5765, "step": 16947 }, { "epoch": 0.46534870950027457, "grad_norm": 0.3956209719181061, "learning_rate": 1.7467655420797527e-05, "loss": 0.4802, "step": 16948 }, { "epoch": 0.46537616694124107, "grad_norm": 0.4002109467983246, "learning_rate": 1.7467368166976632e-05, "loss": 0.5069, "step": 16949 }, { "epoch": 0.46540362438220756, "grad_norm": 0.40009215474128723, "learning_rate": 1.746708089922671e-05, "loss": 0.4971, "step": 16950 }, { "epoch": 0.46543108182317405, "grad_norm": 0.4305175244808197, "learning_rate": 1.74667936175483e-05, "loss": 0.4911, "step": 16951 }, { "epoch": 0.4654585392641406, "grad_norm": 0.3981086015701294, "learning_rate": 1.7466506321941942e-05, "loss": 0.4692, "step": 16952 }, { "epoch": 0.4654859967051071, "grad_norm": 0.3695143759250641, "learning_rate": 1.746621901240816e-05, "loss": 0.5526, "step": 16953 }, { "epoch": 0.4655134541460736, "grad_norm": 0.36620327830314636, "learning_rate": 1.7465931688947502e-05, "loss": 0.5559, "step": 16954 }, { "epoch": 0.4655409115870401, "grad_norm": 0.3817724883556366, "learning_rate": 1.7465644351560494e-05, "loss": 0.6128, "step": 16955 }, { "epoch": 0.4655683690280066, "grad_norm": 0.3779057562351227, "learning_rate": 1.746535700024768e-05, "loss": 0.5136, "step": 16956 }, { "epoch": 0.4655958264689731, "grad_norm": 0.38283276557922363, "learning_rate": 1.7465069635009588e-05, "loss": 0.5056, "step": 16957 }, { "epoch": 0.46562328390993957, "grad_norm": 0.3471296429634094, "learning_rate": 1.7464782255846762e-05, "loss": 0.4745, "step": 16958 }, { "epoch": 0.4656507413509061, "grad_norm": 0.3841201961040497, "learning_rate": 1.7464494862759732e-05, "loss": 0.5886, "step": 16959 }, { "epoch": 0.4656781987918726, "grad_norm": 0.35044562816619873, "learning_rate": 1.746420745574904e-05, "loss": 0.5026, "step": 16960 }, { "epoch": 0.4657056562328391, "grad_norm": 0.4019415080547333, "learning_rate": 1.7463920034815214e-05, "loss": 0.5566, "step": 16961 }, { "epoch": 0.4657331136738056, "grad_norm": 0.34497979283332825, "learning_rate": 1.74636325999588e-05, "loss": 0.4795, "step": 16962 }, { "epoch": 0.4657605711147721, "grad_norm": 1.4787840843200684, "learning_rate": 1.746334515118033e-05, "loss": 0.5336, "step": 16963 }, { "epoch": 0.4657880285557386, "grad_norm": 0.37970170378685, "learning_rate": 1.7463057688480338e-05, "loss": 0.5058, "step": 16964 }, { "epoch": 0.4658154859967051, "grad_norm": 0.3907909393310547, "learning_rate": 1.746277021185936e-05, "loss": 0.5677, "step": 16965 }, { "epoch": 0.46584294343767163, "grad_norm": 0.3656117022037506, "learning_rate": 1.746248272131793e-05, "loss": 0.4848, "step": 16966 }, { "epoch": 0.4658704008786381, "grad_norm": 0.39899101853370667, "learning_rate": 1.7462195216856592e-05, "loss": 0.5361, "step": 16967 }, { "epoch": 0.4658978583196046, "grad_norm": 0.3936983048915863, "learning_rate": 1.746190769847588e-05, "loss": 0.552, "step": 16968 }, { "epoch": 0.4659253157605711, "grad_norm": 0.37411943078041077, "learning_rate": 1.7461620166176328e-05, "loss": 0.5189, "step": 16969 }, { "epoch": 0.4659527732015376, "grad_norm": 0.3574541509151459, "learning_rate": 1.746133261995847e-05, "loss": 0.4944, "step": 16970 }, { "epoch": 0.4659802306425041, "grad_norm": 0.3404518961906433, "learning_rate": 1.7461045059822845e-05, "loss": 0.4202, "step": 16971 }, { "epoch": 0.4660076880834706, "grad_norm": 0.37138357758522034, "learning_rate": 1.7460757485769993e-05, "loss": 0.5168, "step": 16972 }, { "epoch": 0.46603514552443714, "grad_norm": 0.3662682771682739, "learning_rate": 1.7460469897800445e-05, "loss": 0.5283, "step": 16973 }, { "epoch": 0.46606260296540364, "grad_norm": 0.36791056394577026, "learning_rate": 1.746018229591474e-05, "loss": 0.5588, "step": 16974 }, { "epoch": 0.46609006040637013, "grad_norm": 0.38002684712409973, "learning_rate": 1.7459894680113415e-05, "loss": 0.5575, "step": 16975 }, { "epoch": 0.4661175178473366, "grad_norm": 0.3701854944229126, "learning_rate": 1.7459607050397004e-05, "loss": 0.504, "step": 16976 }, { "epoch": 0.4661449752883031, "grad_norm": 0.37627002596855164, "learning_rate": 1.7459319406766047e-05, "loss": 0.5113, "step": 16977 }, { "epoch": 0.4661724327292696, "grad_norm": 0.3704990744590759, "learning_rate": 1.7459031749221077e-05, "loss": 0.5687, "step": 16978 }, { "epoch": 0.4661998901702361, "grad_norm": 0.4129607677459717, "learning_rate": 1.7458744077762632e-05, "loss": 0.5296, "step": 16979 }, { "epoch": 0.46622734761120266, "grad_norm": 0.3615747094154358, "learning_rate": 1.745845639239125e-05, "loss": 0.5095, "step": 16980 }, { "epoch": 0.46625480505216915, "grad_norm": 0.5112725496292114, "learning_rate": 1.7458168693107465e-05, "loss": 0.5105, "step": 16981 }, { "epoch": 0.46628226249313565, "grad_norm": 0.3190946877002716, "learning_rate": 1.7457880979911816e-05, "loss": 0.4477, "step": 16982 }, { "epoch": 0.46630971993410214, "grad_norm": 0.354484885931015, "learning_rate": 1.7457593252804838e-05, "loss": 0.4583, "step": 16983 }, { "epoch": 0.46633717737506863, "grad_norm": 0.3711962401866913, "learning_rate": 1.7457305511787068e-05, "loss": 0.537, "step": 16984 }, { "epoch": 0.46636463481603513, "grad_norm": 0.37025657296180725, "learning_rate": 1.745701775685904e-05, "loss": 0.5478, "step": 16985 }, { "epoch": 0.4663920922570016, "grad_norm": 0.3695226311683655, "learning_rate": 1.74567299880213e-05, "loss": 0.5184, "step": 16986 }, { "epoch": 0.46641954969796817, "grad_norm": 0.34491240978240967, "learning_rate": 1.745644220527438e-05, "loss": 0.4835, "step": 16987 }, { "epoch": 0.46644700713893467, "grad_norm": 0.4063764214515686, "learning_rate": 1.745615440861881e-05, "loss": 0.5507, "step": 16988 }, { "epoch": 0.46647446457990116, "grad_norm": 0.36633872985839844, "learning_rate": 1.7455866598055134e-05, "loss": 0.491, "step": 16989 }, { "epoch": 0.46650192202086765, "grad_norm": 0.3731537163257599, "learning_rate": 1.7455578773583887e-05, "loss": 0.5034, "step": 16990 }, { "epoch": 0.46652937946183415, "grad_norm": 0.3913518488407135, "learning_rate": 1.7455290935205606e-05, "loss": 0.5587, "step": 16991 }, { "epoch": 0.46655683690280064, "grad_norm": 0.326567679643631, "learning_rate": 1.745500308292083e-05, "loss": 0.5078, "step": 16992 }, { "epoch": 0.46658429434376714, "grad_norm": 0.3894326984882355, "learning_rate": 1.7454715216730088e-05, "loss": 0.5606, "step": 16993 }, { "epoch": 0.4666117517847337, "grad_norm": 0.4274847209453583, "learning_rate": 1.7454427336633926e-05, "loss": 0.6441, "step": 16994 }, { "epoch": 0.4666392092257002, "grad_norm": 0.3944520652294159, "learning_rate": 1.7454139442632882e-05, "loss": 0.5051, "step": 16995 }, { "epoch": 0.4666666666666667, "grad_norm": 0.38332846760749817, "learning_rate": 1.7453851534727487e-05, "loss": 0.573, "step": 16996 }, { "epoch": 0.46669412410763317, "grad_norm": 0.4687936007976532, "learning_rate": 1.7453563612918276e-05, "loss": 0.5567, "step": 16997 }, { "epoch": 0.46672158154859966, "grad_norm": 0.35550495982170105, "learning_rate": 1.7453275677205792e-05, "loss": 0.5016, "step": 16998 }, { "epoch": 0.46674903898956616, "grad_norm": 0.34160637855529785, "learning_rate": 1.745298772759057e-05, "loss": 0.4392, "step": 16999 }, { "epoch": 0.46677649643053265, "grad_norm": 0.3520333766937256, "learning_rate": 1.7452699764073148e-05, "loss": 0.4602, "step": 17000 }, { "epoch": 0.4668039538714992, "grad_norm": 0.39728832244873047, "learning_rate": 1.7452411786654063e-05, "loss": 0.4297, "step": 17001 }, { "epoch": 0.4668314113124657, "grad_norm": 0.3837659955024719, "learning_rate": 1.7452123795333852e-05, "loss": 0.4175, "step": 17002 }, { "epoch": 0.4668588687534322, "grad_norm": 0.3627690374851227, "learning_rate": 1.745183579011305e-05, "loss": 0.5607, "step": 17003 }, { "epoch": 0.4668863261943987, "grad_norm": 0.3546083867549896, "learning_rate": 1.7451547770992193e-05, "loss": 0.4899, "step": 17004 }, { "epoch": 0.4669137836353652, "grad_norm": 0.4754144251346588, "learning_rate": 1.7451259737971826e-05, "loss": 0.4895, "step": 17005 }, { "epoch": 0.46694124107633167, "grad_norm": 0.4153731167316437, "learning_rate": 1.745097169105248e-05, "loss": 0.512, "step": 17006 }, { "epoch": 0.46696869851729816, "grad_norm": 0.35857516527175903, "learning_rate": 1.7450683630234694e-05, "loss": 0.4971, "step": 17007 }, { "epoch": 0.4669961559582647, "grad_norm": 0.4314795732498169, "learning_rate": 1.7450395555519004e-05, "loss": 0.5285, "step": 17008 }, { "epoch": 0.4670236133992312, "grad_norm": 0.3575865626335144, "learning_rate": 1.7450107466905946e-05, "loss": 0.402, "step": 17009 }, { "epoch": 0.4670510708401977, "grad_norm": 0.4053703248500824, "learning_rate": 1.7449819364396066e-05, "loss": 0.4522, "step": 17010 }, { "epoch": 0.4670785282811642, "grad_norm": 0.3791302740573883, "learning_rate": 1.744953124798989e-05, "loss": 0.5497, "step": 17011 }, { "epoch": 0.4671059857221307, "grad_norm": 0.37876513600349426, "learning_rate": 1.7449243117687964e-05, "loss": 0.532, "step": 17012 }, { "epoch": 0.4671334431630972, "grad_norm": 0.3893001079559326, "learning_rate": 1.744895497349082e-05, "loss": 0.5496, "step": 17013 }, { "epoch": 0.4671609006040637, "grad_norm": 0.3640681803226471, "learning_rate": 1.7448666815399e-05, "loss": 0.499, "step": 17014 }, { "epoch": 0.4671883580450302, "grad_norm": 0.5210261940956116, "learning_rate": 1.7448378643413034e-05, "loss": 0.5961, "step": 17015 }, { "epoch": 0.4672158154859967, "grad_norm": 0.4215487241744995, "learning_rate": 1.7448090457533465e-05, "loss": 0.4838, "step": 17016 }, { "epoch": 0.4672432729269632, "grad_norm": 0.3975925147533417, "learning_rate": 1.7447802257760835e-05, "loss": 0.5274, "step": 17017 }, { "epoch": 0.4672707303679297, "grad_norm": 0.44974949955940247, "learning_rate": 1.7447514044095675e-05, "loss": 0.4845, "step": 17018 }, { "epoch": 0.4672981878088962, "grad_norm": 0.43210074305534363, "learning_rate": 1.744722581653852e-05, "loss": 0.588, "step": 17019 }, { "epoch": 0.4673256452498627, "grad_norm": 0.37265270948410034, "learning_rate": 1.7446937575089918e-05, "loss": 0.5303, "step": 17020 }, { "epoch": 0.4673531026908292, "grad_norm": 0.38527217507362366, "learning_rate": 1.74466493197504e-05, "loss": 0.5507, "step": 17021 }, { "epoch": 0.46738056013179574, "grad_norm": 0.3656613826751709, "learning_rate": 1.74463610505205e-05, "loss": 0.5041, "step": 17022 }, { "epoch": 0.46740801757276224, "grad_norm": 0.3706596791744232, "learning_rate": 1.7446072767400767e-05, "loss": 0.5841, "step": 17023 }, { "epoch": 0.46743547501372873, "grad_norm": 0.4103105366230011, "learning_rate": 1.7445784470391725e-05, "loss": 0.4717, "step": 17024 }, { "epoch": 0.4674629324546952, "grad_norm": 0.31466102600097656, "learning_rate": 1.7445496159493922e-05, "loss": 0.4463, "step": 17025 }, { "epoch": 0.4674903898956617, "grad_norm": 0.35096168518066406, "learning_rate": 1.7445207834707892e-05, "loss": 0.4729, "step": 17026 }, { "epoch": 0.4675178473366282, "grad_norm": 0.42214953899383545, "learning_rate": 1.7444919496034174e-05, "loss": 0.5363, "step": 17027 }, { "epoch": 0.4675453047775947, "grad_norm": 0.37205764651298523, "learning_rate": 1.74446311434733e-05, "loss": 0.5603, "step": 17028 }, { "epoch": 0.46757276221856126, "grad_norm": 0.5457680821418762, "learning_rate": 1.7444342777025816e-05, "loss": 0.4764, "step": 17029 }, { "epoch": 0.46760021965952775, "grad_norm": 0.3835238814353943, "learning_rate": 1.744405439669226e-05, "loss": 0.5535, "step": 17030 }, { "epoch": 0.46762767710049424, "grad_norm": 0.35217994451522827, "learning_rate": 1.7443766002473164e-05, "loss": 0.5784, "step": 17031 }, { "epoch": 0.46765513454146074, "grad_norm": 0.36157482862472534, "learning_rate": 1.7443477594369067e-05, "loss": 0.6066, "step": 17032 }, { "epoch": 0.46768259198242723, "grad_norm": 0.38532060384750366, "learning_rate": 1.744318917238051e-05, "loss": 0.5051, "step": 17033 }, { "epoch": 0.4677100494233937, "grad_norm": 0.35546156764030457, "learning_rate": 1.744290073650803e-05, "loss": 0.5136, "step": 17034 }, { "epoch": 0.4677375068643602, "grad_norm": 0.39217129349708557, "learning_rate": 1.7442612286752166e-05, "loss": 0.5312, "step": 17035 }, { "epoch": 0.46776496430532677, "grad_norm": 0.4183287024497986, "learning_rate": 1.7442323823113452e-05, "loss": 0.6398, "step": 17036 }, { "epoch": 0.46779242174629326, "grad_norm": 0.3687008321285248, "learning_rate": 1.744203534559243e-05, "loss": 0.5007, "step": 17037 }, { "epoch": 0.46781987918725976, "grad_norm": 0.3718061149120331, "learning_rate": 1.7441746854189636e-05, "loss": 0.4448, "step": 17038 }, { "epoch": 0.46784733662822625, "grad_norm": 0.3340950608253479, "learning_rate": 1.744145834890561e-05, "loss": 0.4408, "step": 17039 }, { "epoch": 0.46787479406919275, "grad_norm": 0.40376371145248413, "learning_rate": 1.744116982974089e-05, "loss": 0.5291, "step": 17040 }, { "epoch": 0.46790225151015924, "grad_norm": 0.3274785578250885, "learning_rate": 1.744088129669601e-05, "loss": 0.5535, "step": 17041 }, { "epoch": 0.46792970895112573, "grad_norm": 0.4073832631111145, "learning_rate": 1.7440592749771513e-05, "loss": 0.5216, "step": 17042 }, { "epoch": 0.4679571663920923, "grad_norm": 0.3420123755931854, "learning_rate": 1.744030418896794e-05, "loss": 0.6042, "step": 17043 }, { "epoch": 0.4679846238330588, "grad_norm": 0.3958752155303955, "learning_rate": 1.744001561428582e-05, "loss": 0.5853, "step": 17044 }, { "epoch": 0.46801208127402527, "grad_norm": 0.3438078761100769, "learning_rate": 1.7439727025725697e-05, "loss": 0.537, "step": 17045 }, { "epoch": 0.46803953871499177, "grad_norm": 0.38933736085891724, "learning_rate": 1.7439438423288112e-05, "loss": 0.5139, "step": 17046 }, { "epoch": 0.46806699615595826, "grad_norm": 0.6319993734359741, "learning_rate": 1.7439149806973596e-05, "loss": 0.5205, "step": 17047 }, { "epoch": 0.46809445359692475, "grad_norm": 0.33319899439811707, "learning_rate": 1.743886117678269e-05, "loss": 0.5282, "step": 17048 }, { "epoch": 0.46812191103789125, "grad_norm": 0.4043929874897003, "learning_rate": 1.7438572532715937e-05, "loss": 0.4846, "step": 17049 }, { "epoch": 0.4681493684788578, "grad_norm": 0.35247987508773804, "learning_rate": 1.743828387477387e-05, "loss": 0.4346, "step": 17050 }, { "epoch": 0.4681768259198243, "grad_norm": 0.40560659766197205, "learning_rate": 1.7437995202957034e-05, "loss": 0.5689, "step": 17051 }, { "epoch": 0.4682042833607908, "grad_norm": 0.3724038600921631, "learning_rate": 1.743770651726596e-05, "loss": 0.524, "step": 17052 }, { "epoch": 0.4682317408017573, "grad_norm": 0.4546937644481659, "learning_rate": 1.743741781770119e-05, "loss": 0.4643, "step": 17053 }, { "epoch": 0.4682591982427238, "grad_norm": 0.40008124709129333, "learning_rate": 1.743712910426326e-05, "loss": 0.5553, "step": 17054 }, { "epoch": 0.46828665568369027, "grad_norm": 0.3586139976978302, "learning_rate": 1.743684037695271e-05, "loss": 0.416, "step": 17055 }, { "epoch": 0.46831411312465676, "grad_norm": 0.35657480359077454, "learning_rate": 1.7436551635770083e-05, "loss": 0.5133, "step": 17056 }, { "epoch": 0.4683415705656233, "grad_norm": 0.3596322536468506, "learning_rate": 1.743626288071591e-05, "loss": 0.4517, "step": 17057 }, { "epoch": 0.4683690280065898, "grad_norm": 0.3723606467247009, "learning_rate": 1.743597411179074e-05, "loss": 0.4819, "step": 17058 }, { "epoch": 0.4683964854475563, "grad_norm": 0.37674784660339355, "learning_rate": 1.7435685328995097e-05, "loss": 0.5572, "step": 17059 }, { "epoch": 0.4684239428885228, "grad_norm": 0.3661544620990753, "learning_rate": 1.7435396532329533e-05, "loss": 0.5054, "step": 17060 }, { "epoch": 0.4684514003294893, "grad_norm": 0.3502786457538605, "learning_rate": 1.7435107721794577e-05, "loss": 0.4988, "step": 17061 }, { "epoch": 0.4684788577704558, "grad_norm": 0.33850374817848206, "learning_rate": 1.7434818897390774e-05, "loss": 0.5173, "step": 17062 }, { "epoch": 0.4685063152114223, "grad_norm": 0.36681485176086426, "learning_rate": 1.743453005911866e-05, "loss": 0.4708, "step": 17063 }, { "epoch": 0.4685337726523888, "grad_norm": 0.3799437880516052, "learning_rate": 1.7434241206978778e-05, "loss": 0.4842, "step": 17064 }, { "epoch": 0.4685612300933553, "grad_norm": 0.3844027817249298, "learning_rate": 1.7433952340971664e-05, "loss": 0.5357, "step": 17065 }, { "epoch": 0.4685886875343218, "grad_norm": 0.34639549255371094, "learning_rate": 1.743366346109785e-05, "loss": 0.5118, "step": 17066 }, { "epoch": 0.4686161449752883, "grad_norm": 0.36602044105529785, "learning_rate": 1.7433374567357885e-05, "loss": 0.5176, "step": 17067 }, { "epoch": 0.4686436024162548, "grad_norm": 0.39535290002822876, "learning_rate": 1.7433085659752302e-05, "loss": 0.5714, "step": 17068 }, { "epoch": 0.4686710598572213, "grad_norm": 0.36868980526924133, "learning_rate": 1.7432796738281644e-05, "loss": 0.48, "step": 17069 }, { "epoch": 0.4686985172981878, "grad_norm": 0.3926275074481964, "learning_rate": 1.7432507802946446e-05, "loss": 0.4301, "step": 17070 }, { "epoch": 0.46872597473915434, "grad_norm": 0.4059324264526367, "learning_rate": 1.7432218853747254e-05, "loss": 0.5655, "step": 17071 }, { "epoch": 0.46875343218012083, "grad_norm": 0.35474029183387756, "learning_rate": 1.7431929890684595e-05, "loss": 0.5338, "step": 17072 }, { "epoch": 0.4687808896210873, "grad_norm": 0.33851268887519836, "learning_rate": 1.743164091375902e-05, "loss": 0.5497, "step": 17073 }, { "epoch": 0.4688083470620538, "grad_norm": 0.41660940647125244, "learning_rate": 1.743135192297106e-05, "loss": 0.5281, "step": 17074 }, { "epoch": 0.4688358045030203, "grad_norm": 0.36567676067352295, "learning_rate": 1.743106291832126e-05, "loss": 0.4765, "step": 17075 }, { "epoch": 0.4688632619439868, "grad_norm": 0.3957929015159607, "learning_rate": 1.743077389981015e-05, "loss": 0.5363, "step": 17076 }, { "epoch": 0.4688907193849533, "grad_norm": 0.3438754975795746, "learning_rate": 1.7430484867438277e-05, "loss": 0.5228, "step": 17077 }, { "epoch": 0.4689181768259198, "grad_norm": 0.39070719480514526, "learning_rate": 1.743019582120618e-05, "loss": 0.5657, "step": 17078 }, { "epoch": 0.46894563426688635, "grad_norm": 0.4292851984500885, "learning_rate": 1.7429906761114398e-05, "loss": 0.5352, "step": 17079 }, { "epoch": 0.46897309170785284, "grad_norm": 0.3695099353790283, "learning_rate": 1.7429617687163464e-05, "loss": 0.5028, "step": 17080 }, { "epoch": 0.46900054914881933, "grad_norm": 0.35412099957466125, "learning_rate": 1.7429328599353926e-05, "loss": 0.4933, "step": 17081 }, { "epoch": 0.46902800658978583, "grad_norm": 0.33713799715042114, "learning_rate": 1.7429039497686316e-05, "loss": 0.4789, "step": 17082 }, { "epoch": 0.4690554640307523, "grad_norm": 0.36816883087158203, "learning_rate": 1.7428750382161176e-05, "loss": 0.5654, "step": 17083 }, { "epoch": 0.4690829214717188, "grad_norm": 0.34501931071281433, "learning_rate": 1.7428461252779047e-05, "loss": 0.4471, "step": 17084 }, { "epoch": 0.4691103789126853, "grad_norm": 0.3594520688056946, "learning_rate": 1.7428172109540466e-05, "loss": 0.4965, "step": 17085 }, { "epoch": 0.46913783635365186, "grad_norm": 0.35713744163513184, "learning_rate": 1.7427882952445974e-05, "loss": 0.4592, "step": 17086 }, { "epoch": 0.46916529379461835, "grad_norm": 0.40955662727355957, "learning_rate": 1.7427593781496107e-05, "loss": 0.6714, "step": 17087 }, { "epoch": 0.46919275123558485, "grad_norm": 0.39261043071746826, "learning_rate": 1.7427304596691414e-05, "loss": 0.5391, "step": 17088 }, { "epoch": 0.46922020867655134, "grad_norm": 0.3557354509830475, "learning_rate": 1.7427015398032422e-05, "loss": 0.4875, "step": 17089 }, { "epoch": 0.46924766611751784, "grad_norm": 0.35868677496910095, "learning_rate": 1.7426726185519675e-05, "loss": 0.5847, "step": 17090 }, { "epoch": 0.46927512355848433, "grad_norm": 0.37853068113327026, "learning_rate": 1.7426436959153714e-05, "loss": 0.5713, "step": 17091 }, { "epoch": 0.4693025809994508, "grad_norm": 0.36245396733283997, "learning_rate": 1.742614771893508e-05, "loss": 0.4608, "step": 17092 }, { "epoch": 0.4693300384404174, "grad_norm": 0.37499722838401794, "learning_rate": 1.742585846486431e-05, "loss": 0.5165, "step": 17093 }, { "epoch": 0.46935749588138387, "grad_norm": 0.3808550238609314, "learning_rate": 1.7425569196941943e-05, "loss": 0.5054, "step": 17094 }, { "epoch": 0.46938495332235036, "grad_norm": 0.3840847313404083, "learning_rate": 1.7425279915168518e-05, "loss": 0.5066, "step": 17095 }, { "epoch": 0.46941241076331686, "grad_norm": 0.38980793952941895, "learning_rate": 1.742499061954458e-05, "loss": 0.4451, "step": 17096 }, { "epoch": 0.46943986820428335, "grad_norm": 0.3526729345321655, "learning_rate": 1.7424701310070663e-05, "loss": 0.4987, "step": 17097 }, { "epoch": 0.46946732564524984, "grad_norm": 0.43383216857910156, "learning_rate": 1.7424411986747304e-05, "loss": 0.5346, "step": 17098 }, { "epoch": 0.46949478308621634, "grad_norm": 0.38595640659332275, "learning_rate": 1.7424122649575054e-05, "loss": 0.4737, "step": 17099 }, { "epoch": 0.4695222405271829, "grad_norm": 0.36706170439720154, "learning_rate": 1.7423833298554443e-05, "loss": 0.5451, "step": 17100 }, { "epoch": 0.4695496979681494, "grad_norm": 0.3900626003742218, "learning_rate": 1.742354393368601e-05, "loss": 0.5214, "step": 17101 }, { "epoch": 0.4695771554091159, "grad_norm": 0.3775525689125061, "learning_rate": 1.7423254554970302e-05, "loss": 0.4867, "step": 17102 }, { "epoch": 0.46960461285008237, "grad_norm": 0.38418447971343994, "learning_rate": 1.7422965162407854e-05, "loss": 0.4668, "step": 17103 }, { "epoch": 0.46963207029104886, "grad_norm": 0.530857503414154, "learning_rate": 1.7422675755999206e-05, "loss": 0.5614, "step": 17104 }, { "epoch": 0.46965952773201536, "grad_norm": 0.41304031014442444, "learning_rate": 1.7422386335744903e-05, "loss": 0.475, "step": 17105 }, { "epoch": 0.46968698517298185, "grad_norm": 0.39012306928634644, "learning_rate": 1.7422096901645477e-05, "loss": 0.4957, "step": 17106 }, { "epoch": 0.4697144426139484, "grad_norm": 0.3557257652282715, "learning_rate": 1.742180745370147e-05, "loss": 0.4211, "step": 17107 }, { "epoch": 0.4697419000549149, "grad_norm": 0.39167526364326477, "learning_rate": 1.742151799191343e-05, "loss": 0.5109, "step": 17108 }, { "epoch": 0.4697693574958814, "grad_norm": 0.3547302484512329, "learning_rate": 1.7421228516281884e-05, "loss": 0.4138, "step": 17109 }, { "epoch": 0.4697968149368479, "grad_norm": 0.37484145164489746, "learning_rate": 1.742093902680738e-05, "loss": 0.5001, "step": 17110 }, { "epoch": 0.4698242723778144, "grad_norm": 0.3967234194278717, "learning_rate": 1.7420649523490452e-05, "loss": 0.4571, "step": 17111 }, { "epoch": 0.46985172981878087, "grad_norm": 0.3972424268722534, "learning_rate": 1.742036000633165e-05, "loss": 0.4868, "step": 17112 }, { "epoch": 0.46987918725974737, "grad_norm": 0.37020254135131836, "learning_rate": 1.7420070475331506e-05, "loss": 0.53, "step": 17113 }, { "epoch": 0.4699066447007139, "grad_norm": 0.36313048005104065, "learning_rate": 1.7419780930490562e-05, "loss": 0.4602, "step": 17114 }, { "epoch": 0.4699341021416804, "grad_norm": 0.39895719289779663, "learning_rate": 1.741949137180936e-05, "loss": 0.4637, "step": 17115 }, { "epoch": 0.4699615595826469, "grad_norm": 0.4086248278617859, "learning_rate": 1.7419201799288438e-05, "loss": 0.5815, "step": 17116 }, { "epoch": 0.4699890170236134, "grad_norm": 0.386262983083725, "learning_rate": 1.7418912212928335e-05, "loss": 0.5744, "step": 17117 }, { "epoch": 0.4700164744645799, "grad_norm": 0.38011878728866577, "learning_rate": 1.7418622612729597e-05, "loss": 0.5795, "step": 17118 }, { "epoch": 0.4700439319055464, "grad_norm": 0.3548242449760437, "learning_rate": 1.7418332998692756e-05, "loss": 0.4951, "step": 17119 }, { "epoch": 0.4700713893465129, "grad_norm": 0.4745754301548004, "learning_rate": 1.741804337081836e-05, "loss": 0.5378, "step": 17120 }, { "epoch": 0.47009884678747943, "grad_norm": 0.6391884684562683, "learning_rate": 1.741775372910694e-05, "loss": 0.4535, "step": 17121 }, { "epoch": 0.4701263042284459, "grad_norm": 0.43966150283813477, "learning_rate": 1.7417464073559047e-05, "loss": 0.4634, "step": 17122 }, { "epoch": 0.4701537616694124, "grad_norm": 0.36903369426727295, "learning_rate": 1.7417174404175214e-05, "loss": 0.4907, "step": 17123 }, { "epoch": 0.4701812191103789, "grad_norm": 0.35011449456214905, "learning_rate": 1.741688472095598e-05, "loss": 0.4668, "step": 17124 }, { "epoch": 0.4702086765513454, "grad_norm": 0.36873772740364075, "learning_rate": 1.7416595023901896e-05, "loss": 0.4938, "step": 17125 }, { "epoch": 0.4702361339923119, "grad_norm": 0.40376386046409607, "learning_rate": 1.7416305313013492e-05, "loss": 0.5829, "step": 17126 }, { "epoch": 0.4702635914332784, "grad_norm": 0.35475465655326843, "learning_rate": 1.741601558829131e-05, "loss": 0.524, "step": 17127 }, { "epoch": 0.47029104887424494, "grad_norm": 0.3583265542984009, "learning_rate": 1.7415725849735895e-05, "loss": 0.4652, "step": 17128 }, { "epoch": 0.47031850631521144, "grad_norm": 0.3792871832847595, "learning_rate": 1.741543609734778e-05, "loss": 0.4661, "step": 17129 }, { "epoch": 0.47034596375617793, "grad_norm": 0.4408613443374634, "learning_rate": 1.741514633112751e-05, "loss": 0.5244, "step": 17130 }, { "epoch": 0.4703734211971444, "grad_norm": 0.41571420431137085, "learning_rate": 1.7414856551075632e-05, "loss": 0.4589, "step": 17131 }, { "epoch": 0.4704008786381109, "grad_norm": 0.3950871527194977, "learning_rate": 1.7414566757192677e-05, "loss": 0.5385, "step": 17132 }, { "epoch": 0.4704283360790774, "grad_norm": 0.5954047441482544, "learning_rate": 1.7414276949479188e-05, "loss": 0.5433, "step": 17133 }, { "epoch": 0.4704557935200439, "grad_norm": 0.3341102600097656, "learning_rate": 1.7413987127935705e-05, "loss": 0.4754, "step": 17134 }, { "epoch": 0.47048325096101046, "grad_norm": 0.355400025844574, "learning_rate": 1.741369729256277e-05, "loss": 0.4684, "step": 17135 }, { "epoch": 0.47051070840197695, "grad_norm": 0.3946392238140106, "learning_rate": 1.7413407443360925e-05, "loss": 0.5206, "step": 17136 }, { "epoch": 0.47053816584294345, "grad_norm": 0.44840216636657715, "learning_rate": 1.7413117580330706e-05, "loss": 0.5188, "step": 17137 }, { "epoch": 0.47056562328390994, "grad_norm": 0.3565295934677124, "learning_rate": 1.7412827703472658e-05, "loss": 0.5274, "step": 17138 }, { "epoch": 0.47059308072487643, "grad_norm": 0.4233197867870331, "learning_rate": 1.741253781278732e-05, "loss": 0.4912, "step": 17139 }, { "epoch": 0.4706205381658429, "grad_norm": 0.37272974848747253, "learning_rate": 1.7412247908275236e-05, "loss": 0.4704, "step": 17140 }, { "epoch": 0.4706479956068094, "grad_norm": 0.4280697703361511, "learning_rate": 1.7411957989936944e-05, "loss": 0.4893, "step": 17141 }, { "epoch": 0.47067545304777597, "grad_norm": 0.3766244351863861, "learning_rate": 1.741166805777298e-05, "loss": 0.4785, "step": 17142 }, { "epoch": 0.47070291048874247, "grad_norm": 0.4235098659992218, "learning_rate": 1.741137811178389e-05, "loss": 0.5092, "step": 17143 }, { "epoch": 0.47073036792970896, "grad_norm": 0.3231426477432251, "learning_rate": 1.741108815197022e-05, "loss": 0.4401, "step": 17144 }, { "epoch": 0.47075782537067545, "grad_norm": 0.42804139852523804, "learning_rate": 1.74107981783325e-05, "loss": 0.5619, "step": 17145 }, { "epoch": 0.47078528281164195, "grad_norm": 0.3764135539531708, "learning_rate": 1.741050819087128e-05, "loss": 0.5209, "step": 17146 }, { "epoch": 0.47081274025260844, "grad_norm": 0.44187411665916443, "learning_rate": 1.7410218189587096e-05, "loss": 0.5341, "step": 17147 }, { "epoch": 0.47084019769357494, "grad_norm": 0.42096319794654846, "learning_rate": 1.7409928174480487e-05, "loss": 0.4891, "step": 17148 }, { "epoch": 0.4708676551345415, "grad_norm": 0.4597965478897095, "learning_rate": 1.7409638145552e-05, "loss": 0.5254, "step": 17149 }, { "epoch": 0.470895112575508, "grad_norm": 0.38269561529159546, "learning_rate": 1.7409348102802174e-05, "loss": 0.5232, "step": 17150 }, { "epoch": 0.4709225700164745, "grad_norm": 0.34176668524742126, "learning_rate": 1.7409058046231547e-05, "loss": 0.4408, "step": 17151 }, { "epoch": 0.47095002745744097, "grad_norm": 0.3873199224472046, "learning_rate": 1.7408767975840663e-05, "loss": 0.5479, "step": 17152 }, { "epoch": 0.47097748489840746, "grad_norm": 0.364918977022171, "learning_rate": 1.740847789163006e-05, "loss": 0.4832, "step": 17153 }, { "epoch": 0.47100494233937396, "grad_norm": 0.4034755527973175, "learning_rate": 1.740818779360028e-05, "loss": 0.4591, "step": 17154 }, { "epoch": 0.47103239978034045, "grad_norm": 0.36387234926223755, "learning_rate": 1.740789768175187e-05, "loss": 0.5125, "step": 17155 }, { "epoch": 0.471059857221307, "grad_norm": 0.40485408902168274, "learning_rate": 1.7407607556085365e-05, "loss": 0.5518, "step": 17156 }, { "epoch": 0.4710873146622735, "grad_norm": 0.3466642200946808, "learning_rate": 1.7407317416601305e-05, "loss": 0.533, "step": 17157 }, { "epoch": 0.47111477210324, "grad_norm": 0.4467039406299591, "learning_rate": 1.7407027263300237e-05, "loss": 0.4533, "step": 17158 }, { "epoch": 0.4711422295442065, "grad_norm": 0.3722216486930847, "learning_rate": 1.74067370961827e-05, "loss": 0.5154, "step": 17159 }, { "epoch": 0.471169686985173, "grad_norm": 0.38394734263420105, "learning_rate": 1.7406446915249233e-05, "loss": 0.4602, "step": 17160 }, { "epoch": 0.47119714442613947, "grad_norm": 0.3970642685890198, "learning_rate": 1.7406156720500376e-05, "loss": 0.5399, "step": 17161 }, { "epoch": 0.47122460186710596, "grad_norm": 0.3529106378555298, "learning_rate": 1.7405866511936677e-05, "loss": 0.5054, "step": 17162 }, { "epoch": 0.4712520593080725, "grad_norm": 0.3469769060611725, "learning_rate": 1.740557628955867e-05, "loss": 0.4584, "step": 17163 }, { "epoch": 0.471279516749039, "grad_norm": 0.3683535158634186, "learning_rate": 1.74052860533669e-05, "loss": 0.5395, "step": 17164 }, { "epoch": 0.4713069741900055, "grad_norm": 0.43552398681640625, "learning_rate": 1.740499580336191e-05, "loss": 0.5815, "step": 17165 }, { "epoch": 0.471334431630972, "grad_norm": 0.4493972361087799, "learning_rate": 1.740470553954424e-05, "loss": 0.5978, "step": 17166 }, { "epoch": 0.4713618890719385, "grad_norm": 0.35988086462020874, "learning_rate": 1.7404415261914425e-05, "loss": 0.5215, "step": 17167 }, { "epoch": 0.471389346512905, "grad_norm": 0.31829819083213806, "learning_rate": 1.740412497047302e-05, "loss": 0.3859, "step": 17168 }, { "epoch": 0.4714168039538715, "grad_norm": 0.44183000922203064, "learning_rate": 1.7403834665220555e-05, "loss": 0.5121, "step": 17169 }, { "epoch": 0.471444261394838, "grad_norm": 0.3646385967731476, "learning_rate": 1.740354434615758e-05, "loss": 0.4621, "step": 17170 }, { "epoch": 0.4714717188358045, "grad_norm": 0.39171311259269714, "learning_rate": 1.740325401328462e-05, "loss": 0.5741, "step": 17171 }, { "epoch": 0.471499176276771, "grad_norm": 0.362976610660553, "learning_rate": 1.740296366660224e-05, "loss": 0.4865, "step": 17172 }, { "epoch": 0.4715266337177375, "grad_norm": 0.3351900577545166, "learning_rate": 1.7402673306110966e-05, "loss": 0.4377, "step": 17173 }, { "epoch": 0.471554091158704, "grad_norm": 0.36703336238861084, "learning_rate": 1.7402382931811342e-05, "loss": 0.5317, "step": 17174 }, { "epoch": 0.4715815485996705, "grad_norm": 0.3941352367401123, "learning_rate": 1.7402092543703912e-05, "loss": 0.5881, "step": 17175 }, { "epoch": 0.471609006040637, "grad_norm": 0.33394235372543335, "learning_rate": 1.7401802141789218e-05, "loss": 0.5129, "step": 17176 }, { "epoch": 0.47163646348160354, "grad_norm": 0.32967448234558105, "learning_rate": 1.7401511726067802e-05, "loss": 0.39, "step": 17177 }, { "epoch": 0.47166392092257003, "grad_norm": 0.47130271792411804, "learning_rate": 1.7401221296540203e-05, "loss": 0.413, "step": 17178 }, { "epoch": 0.47169137836353653, "grad_norm": 0.34721434116363525, "learning_rate": 1.7400930853206964e-05, "loss": 0.4424, "step": 17179 }, { "epoch": 0.471718835804503, "grad_norm": 0.3731633424758911, "learning_rate": 1.7400640396068625e-05, "loss": 0.5283, "step": 17180 }, { "epoch": 0.4717462932454695, "grad_norm": 0.36002546548843384, "learning_rate": 1.7400349925125733e-05, "loss": 0.4912, "step": 17181 }, { "epoch": 0.471773750686436, "grad_norm": 0.37416812777519226, "learning_rate": 1.7400059440378824e-05, "loss": 0.509, "step": 17182 }, { "epoch": 0.4718012081274025, "grad_norm": 0.36918312311172485, "learning_rate": 1.7399768941828445e-05, "loss": 0.5735, "step": 17183 }, { "epoch": 0.47182866556836905, "grad_norm": 0.3621938228607178, "learning_rate": 1.7399478429475132e-05, "loss": 0.5517, "step": 17184 }, { "epoch": 0.47185612300933555, "grad_norm": 0.37911492586135864, "learning_rate": 1.739918790331943e-05, "loss": 0.4181, "step": 17185 }, { "epoch": 0.47188358045030204, "grad_norm": 0.3568498194217682, "learning_rate": 1.7398897363361883e-05, "loss": 0.4099, "step": 17186 }, { "epoch": 0.47191103789126854, "grad_norm": 0.3792688548564911, "learning_rate": 1.7398606809603032e-05, "loss": 0.5262, "step": 17187 }, { "epoch": 0.47193849533223503, "grad_norm": 0.3800898790359497, "learning_rate": 1.7398316242043415e-05, "loss": 0.5531, "step": 17188 }, { "epoch": 0.4719659527732015, "grad_norm": 0.34380286931991577, "learning_rate": 1.7398025660683578e-05, "loss": 0.4272, "step": 17189 }, { "epoch": 0.471993410214168, "grad_norm": 0.37594109773635864, "learning_rate": 1.7397735065524063e-05, "loss": 0.5275, "step": 17190 }, { "epoch": 0.47202086765513457, "grad_norm": 0.6572182178497314, "learning_rate": 1.739744445656541e-05, "loss": 0.5887, "step": 17191 }, { "epoch": 0.47204832509610106, "grad_norm": 0.37441498041152954, "learning_rate": 1.7397153833808166e-05, "loss": 0.536, "step": 17192 }, { "epoch": 0.47207578253706756, "grad_norm": 0.3591223657131195, "learning_rate": 1.7396863197252864e-05, "loss": 0.4204, "step": 17193 }, { "epoch": 0.47210323997803405, "grad_norm": 0.3884643018245697, "learning_rate": 1.7396572546900052e-05, "loss": 0.5997, "step": 17194 }, { "epoch": 0.47213069741900054, "grad_norm": 0.367172509431839, "learning_rate": 1.7396281882750273e-05, "loss": 0.584, "step": 17195 }, { "epoch": 0.47215815485996704, "grad_norm": 0.36013105511665344, "learning_rate": 1.739599120480407e-05, "loss": 0.4929, "step": 17196 }, { "epoch": 0.47218561230093353, "grad_norm": 0.3591015338897705, "learning_rate": 1.7395700513061978e-05, "loss": 0.5317, "step": 17197 }, { "epoch": 0.4722130697419001, "grad_norm": 0.35685884952545166, "learning_rate": 1.7395409807524543e-05, "loss": 0.4849, "step": 17198 }, { "epoch": 0.4722405271828666, "grad_norm": 0.3693915903568268, "learning_rate": 1.7395119088192312e-05, "loss": 0.4857, "step": 17199 }, { "epoch": 0.47226798462383307, "grad_norm": 0.41792190074920654, "learning_rate": 1.7394828355065825e-05, "loss": 0.4891, "step": 17200 }, { "epoch": 0.47229544206479956, "grad_norm": 0.33272621035575867, "learning_rate": 1.739453760814562e-05, "loss": 0.4955, "step": 17201 }, { "epoch": 0.47232289950576606, "grad_norm": 0.48918411135673523, "learning_rate": 1.7394246847432245e-05, "loss": 0.5484, "step": 17202 }, { "epoch": 0.47235035694673255, "grad_norm": 0.34855470061302185, "learning_rate": 1.7393956072926236e-05, "loss": 0.4117, "step": 17203 }, { "epoch": 0.47237781438769905, "grad_norm": 0.3952394425868988, "learning_rate": 1.739366528462814e-05, "loss": 0.5747, "step": 17204 }, { "epoch": 0.4724052718286656, "grad_norm": 0.4538654685020447, "learning_rate": 1.73933744825385e-05, "loss": 0.6068, "step": 17205 }, { "epoch": 0.4724327292696321, "grad_norm": 0.37930038571357727, "learning_rate": 1.739308366665786e-05, "loss": 0.6068, "step": 17206 }, { "epoch": 0.4724601867105986, "grad_norm": 0.3464859426021576, "learning_rate": 1.7392792836986752e-05, "loss": 0.4583, "step": 17207 }, { "epoch": 0.4724876441515651, "grad_norm": 0.3632785677909851, "learning_rate": 1.739250199352573e-05, "loss": 0.5256, "step": 17208 }, { "epoch": 0.4725151015925316, "grad_norm": 0.3598712384700775, "learning_rate": 1.7392211136275336e-05, "loss": 0.429, "step": 17209 }, { "epoch": 0.47254255903349807, "grad_norm": 0.40367990732192993, "learning_rate": 1.7391920265236102e-05, "loss": 0.5902, "step": 17210 }, { "epoch": 0.47257001647446456, "grad_norm": 0.4128073453903198, "learning_rate": 1.739162938040858e-05, "loss": 0.481, "step": 17211 }, { "epoch": 0.47259747391543105, "grad_norm": 0.3733974099159241, "learning_rate": 1.739133848179331e-05, "loss": 0.5262, "step": 17212 }, { "epoch": 0.4726249313563976, "grad_norm": 0.42053601145744324, "learning_rate": 1.7391047569390836e-05, "loss": 0.5679, "step": 17213 }, { "epoch": 0.4726523887973641, "grad_norm": 0.3420772850513458, "learning_rate": 1.7390756643201698e-05, "loss": 0.4654, "step": 17214 }, { "epoch": 0.4726798462383306, "grad_norm": 0.37414565682411194, "learning_rate": 1.739046570322644e-05, "loss": 0.4965, "step": 17215 }, { "epoch": 0.4727073036792971, "grad_norm": 0.3983024060726166, "learning_rate": 1.7390174749465608e-05, "loss": 0.6005, "step": 17216 }, { "epoch": 0.4727347611202636, "grad_norm": 0.41155120730400085, "learning_rate": 1.738988378191974e-05, "loss": 0.4524, "step": 17217 }, { "epoch": 0.4727622185612301, "grad_norm": 0.38649871945381165, "learning_rate": 1.7389592800589374e-05, "loss": 0.5498, "step": 17218 }, { "epoch": 0.47278967600219657, "grad_norm": 0.36010658740997314, "learning_rate": 1.7389301805475064e-05, "loss": 0.5136, "step": 17219 }, { "epoch": 0.4728171334431631, "grad_norm": 0.3788629472255707, "learning_rate": 1.738901079657735e-05, "loss": 0.6178, "step": 17220 }, { "epoch": 0.4728445908841296, "grad_norm": 0.37550729513168335, "learning_rate": 1.738871977389677e-05, "loss": 0.5969, "step": 17221 }, { "epoch": 0.4728720483250961, "grad_norm": 0.3857876658439636, "learning_rate": 1.7388428737433868e-05, "loss": 0.4896, "step": 17222 }, { "epoch": 0.4728995057660626, "grad_norm": 0.3998032212257385, "learning_rate": 1.7388137687189192e-05, "loss": 0.5497, "step": 17223 }, { "epoch": 0.4729269632070291, "grad_norm": 0.31837695837020874, "learning_rate": 1.7387846623163277e-05, "loss": 0.4098, "step": 17224 }, { "epoch": 0.4729544206479956, "grad_norm": 0.43702268600463867, "learning_rate": 1.738755554535667e-05, "loss": 0.6094, "step": 17225 }, { "epoch": 0.4729818780889621, "grad_norm": 0.3653499484062195, "learning_rate": 1.738726445376992e-05, "loss": 0.4394, "step": 17226 }, { "epoch": 0.47300933552992863, "grad_norm": 0.4202346205711365, "learning_rate": 1.7386973348403558e-05, "loss": 0.4675, "step": 17227 }, { "epoch": 0.4730367929708951, "grad_norm": 0.34099262952804565, "learning_rate": 1.7386682229258136e-05, "loss": 0.4607, "step": 17228 }, { "epoch": 0.4730642504118616, "grad_norm": 0.3644240200519562, "learning_rate": 1.7386391096334194e-05, "loss": 0.5676, "step": 17229 }, { "epoch": 0.4730917078528281, "grad_norm": 0.35538753867149353, "learning_rate": 1.7386099949632273e-05, "loss": 0.496, "step": 17230 }, { "epoch": 0.4731191652937946, "grad_norm": 0.3731416165828705, "learning_rate": 1.738580878915292e-05, "loss": 0.4976, "step": 17231 }, { "epoch": 0.4731466227347611, "grad_norm": 0.44499915838241577, "learning_rate": 1.7385517614896678e-05, "loss": 0.5091, "step": 17232 }, { "epoch": 0.4731740801757276, "grad_norm": 0.3652336895465851, "learning_rate": 1.7385226426864082e-05, "loss": 0.475, "step": 17233 }, { "epoch": 0.47320153761669415, "grad_norm": 0.3302925229072571, "learning_rate": 1.7384935225055688e-05, "loss": 0.4976, "step": 17234 }, { "epoch": 0.47322899505766064, "grad_norm": 0.3312305808067322, "learning_rate": 1.738464400947203e-05, "loss": 0.458, "step": 17235 }, { "epoch": 0.47325645249862713, "grad_norm": 0.4262823164463043, "learning_rate": 1.7384352780113656e-05, "loss": 0.5381, "step": 17236 }, { "epoch": 0.47328390993959363, "grad_norm": 0.3880760669708252, "learning_rate": 1.7384061536981106e-05, "loss": 0.5175, "step": 17237 }, { "epoch": 0.4733113673805601, "grad_norm": 0.35953396558761597, "learning_rate": 1.7383770280074926e-05, "loss": 0.5672, "step": 17238 }, { "epoch": 0.4733388248215266, "grad_norm": 0.3842301368713379, "learning_rate": 1.7383479009395655e-05, "loss": 0.5227, "step": 17239 }, { "epoch": 0.4733662822624931, "grad_norm": 0.332232266664505, "learning_rate": 1.7383187724943842e-05, "loss": 0.4854, "step": 17240 }, { "epoch": 0.47339373970345966, "grad_norm": 0.4168449640274048, "learning_rate": 1.7382896426720025e-05, "loss": 0.5288, "step": 17241 }, { "epoch": 0.47342119714442615, "grad_norm": 0.4474450647830963, "learning_rate": 1.738260511472475e-05, "loss": 0.503, "step": 17242 }, { "epoch": 0.47344865458539265, "grad_norm": 0.404805064201355, "learning_rate": 1.7382313788958563e-05, "loss": 0.4929, "step": 17243 }, { "epoch": 0.47347611202635914, "grad_norm": 0.4483509361743927, "learning_rate": 1.7382022449422e-05, "loss": 0.5267, "step": 17244 }, { "epoch": 0.47350356946732564, "grad_norm": 0.4038238525390625, "learning_rate": 1.7381731096115615e-05, "loss": 0.5427, "step": 17245 }, { "epoch": 0.47353102690829213, "grad_norm": 0.4074070155620575, "learning_rate": 1.7381439729039942e-05, "loss": 0.4967, "step": 17246 }, { "epoch": 0.4735584843492586, "grad_norm": 0.3859017789363861, "learning_rate": 1.7381148348195526e-05, "loss": 0.5314, "step": 17247 }, { "epoch": 0.4735859417902252, "grad_norm": 0.5953295826911926, "learning_rate": 1.7380856953582917e-05, "loss": 0.517, "step": 17248 }, { "epoch": 0.47361339923119167, "grad_norm": 0.3646122217178345, "learning_rate": 1.7380565545202652e-05, "loss": 0.5108, "step": 17249 }, { "epoch": 0.47364085667215816, "grad_norm": 0.4172845482826233, "learning_rate": 1.7380274123055275e-05, "loss": 0.5473, "step": 17250 }, { "epoch": 0.47366831411312466, "grad_norm": 0.3587459325790405, "learning_rate": 1.7379982687141333e-05, "loss": 0.5128, "step": 17251 }, { "epoch": 0.47369577155409115, "grad_norm": 0.35335248708724976, "learning_rate": 1.7379691237461368e-05, "loss": 0.5197, "step": 17252 }, { "epoch": 0.47372322899505764, "grad_norm": 0.36500540375709534, "learning_rate": 1.7379399774015924e-05, "loss": 0.5641, "step": 17253 }, { "epoch": 0.47375068643602414, "grad_norm": 0.35103997588157654, "learning_rate": 1.7379108296805545e-05, "loss": 0.55, "step": 17254 }, { "epoch": 0.4737781438769907, "grad_norm": 0.40318673849105835, "learning_rate": 1.737881680583077e-05, "loss": 0.5044, "step": 17255 }, { "epoch": 0.4738056013179572, "grad_norm": 0.4028772711753845, "learning_rate": 1.737852530109215e-05, "loss": 0.5394, "step": 17256 }, { "epoch": 0.4738330587589237, "grad_norm": 0.4003382623195648, "learning_rate": 1.7378233782590226e-05, "loss": 0.5069, "step": 17257 }, { "epoch": 0.47386051619989017, "grad_norm": 0.38466304540634155, "learning_rate": 1.7377942250325537e-05, "loss": 0.5049, "step": 17258 }, { "epoch": 0.47388797364085666, "grad_norm": 0.40776029229164124, "learning_rate": 1.7377650704298634e-05, "loss": 0.5028, "step": 17259 }, { "epoch": 0.47391543108182316, "grad_norm": 0.43944212794303894, "learning_rate": 1.7377359144510057e-05, "loss": 0.5289, "step": 17260 }, { "epoch": 0.47394288852278965, "grad_norm": 0.4240504503250122, "learning_rate": 1.7377067570960352e-05, "loss": 0.4963, "step": 17261 }, { "epoch": 0.4739703459637562, "grad_norm": 0.3660351037979126, "learning_rate": 1.737677598365006e-05, "loss": 0.4969, "step": 17262 }, { "epoch": 0.4739978034047227, "grad_norm": 0.37510454654693604, "learning_rate": 1.7376484382579725e-05, "loss": 0.5447, "step": 17263 }, { "epoch": 0.4740252608456892, "grad_norm": 0.38862836360931396, "learning_rate": 1.7376192767749894e-05, "loss": 0.4905, "step": 17264 }, { "epoch": 0.4740527182866557, "grad_norm": 0.35776373744010925, "learning_rate": 1.737590113916111e-05, "loss": 0.4723, "step": 17265 }, { "epoch": 0.4740801757276222, "grad_norm": 0.4024008512496948, "learning_rate": 1.7375609496813916e-05, "loss": 0.5367, "step": 17266 }, { "epoch": 0.47410763316858867, "grad_norm": 0.3687393069267273, "learning_rate": 1.7375317840708853e-05, "loss": 0.4479, "step": 17267 }, { "epoch": 0.47413509060955517, "grad_norm": 0.3861641585826874, "learning_rate": 1.7375026170846474e-05, "loss": 0.528, "step": 17268 }, { "epoch": 0.4741625480505217, "grad_norm": 0.38521334528923035, "learning_rate": 1.7374734487227315e-05, "loss": 0.5967, "step": 17269 }, { "epoch": 0.4741900054914882, "grad_norm": 0.4410783648490906, "learning_rate": 1.737444278985192e-05, "loss": 0.5497, "step": 17270 }, { "epoch": 0.4742174629324547, "grad_norm": 0.42397835850715637, "learning_rate": 1.7374151078720836e-05, "loss": 0.5411, "step": 17271 }, { "epoch": 0.4742449203734212, "grad_norm": 0.3946220874786377, "learning_rate": 1.737385935383461e-05, "loss": 0.5628, "step": 17272 }, { "epoch": 0.4742723778143877, "grad_norm": 0.37670767307281494, "learning_rate": 1.737356761519378e-05, "loss": 0.5462, "step": 17273 }, { "epoch": 0.4742998352553542, "grad_norm": 0.455321341753006, "learning_rate": 1.7373275862798897e-05, "loss": 0.588, "step": 17274 }, { "epoch": 0.4743272926963207, "grad_norm": 0.38799625635147095, "learning_rate": 1.7372984096650495e-05, "loss": 0.5018, "step": 17275 }, { "epoch": 0.47435475013728723, "grad_norm": 0.4008844196796417, "learning_rate": 1.737269231674913e-05, "loss": 0.542, "step": 17276 }, { "epoch": 0.4743822075782537, "grad_norm": 0.4597949683666229, "learning_rate": 1.7372400523095337e-05, "loss": 0.6019, "step": 17277 }, { "epoch": 0.4744096650192202, "grad_norm": 0.31671902537345886, "learning_rate": 1.7372108715689665e-05, "loss": 0.427, "step": 17278 }, { "epoch": 0.4744371224601867, "grad_norm": 0.3999205231666565, "learning_rate": 1.7371816894532657e-05, "loss": 0.5337, "step": 17279 }, { "epoch": 0.4744645799011532, "grad_norm": 0.3543127477169037, "learning_rate": 1.737152505962486e-05, "loss": 0.4671, "step": 17280 }, { "epoch": 0.4744920373421197, "grad_norm": 0.36503762006759644, "learning_rate": 1.7371233210966814e-05, "loss": 0.4329, "step": 17281 }, { "epoch": 0.4745194947830862, "grad_norm": 0.3354511857032776, "learning_rate": 1.7370941348559068e-05, "loss": 0.5202, "step": 17282 }, { "epoch": 0.47454695222405274, "grad_norm": 0.3819139897823334, "learning_rate": 1.737064947240216e-05, "loss": 0.5134, "step": 17283 }, { "epoch": 0.47457440966501924, "grad_norm": 0.3565080761909485, "learning_rate": 1.737035758249664e-05, "loss": 0.482, "step": 17284 }, { "epoch": 0.47460186710598573, "grad_norm": 0.39283961057662964, "learning_rate": 1.7370065678843052e-05, "loss": 0.4679, "step": 17285 }, { "epoch": 0.4746293245469522, "grad_norm": 0.35014277696609497, "learning_rate": 1.736977376144194e-05, "loss": 0.4961, "step": 17286 }, { "epoch": 0.4746567819879187, "grad_norm": 0.37778040766716003, "learning_rate": 1.7369481830293847e-05, "loss": 0.516, "step": 17287 }, { "epoch": 0.4746842394288852, "grad_norm": 0.3363319933414459, "learning_rate": 1.7369189885399314e-05, "loss": 0.4738, "step": 17288 }, { "epoch": 0.4747116968698517, "grad_norm": 0.3324213922023773, "learning_rate": 1.7368897926758894e-05, "loss": 0.5213, "step": 17289 }, { "epoch": 0.47473915431081826, "grad_norm": 0.4516817033290863, "learning_rate": 1.7368605954373125e-05, "loss": 0.6487, "step": 17290 }, { "epoch": 0.47476661175178475, "grad_norm": 0.6064024567604065, "learning_rate": 1.736831396824256e-05, "loss": 0.472, "step": 17291 }, { "epoch": 0.47479406919275124, "grad_norm": 0.3879409432411194, "learning_rate": 1.736802196836773e-05, "loss": 0.5094, "step": 17292 }, { "epoch": 0.47482152663371774, "grad_norm": 0.3599814176559448, "learning_rate": 1.7367729954749194e-05, "loss": 0.4996, "step": 17293 }, { "epoch": 0.47484898407468423, "grad_norm": 0.383217453956604, "learning_rate": 1.7367437927387484e-05, "loss": 0.4346, "step": 17294 }, { "epoch": 0.4748764415156507, "grad_norm": 0.3607107102870941, "learning_rate": 1.7367145886283155e-05, "loss": 0.4242, "step": 17295 }, { "epoch": 0.4749038989566172, "grad_norm": 0.3496772348880768, "learning_rate": 1.7366853831436746e-05, "loss": 0.4394, "step": 17296 }, { "epoch": 0.47493135639758377, "grad_norm": 0.38314926624298096, "learning_rate": 1.7366561762848803e-05, "loss": 0.4963, "step": 17297 }, { "epoch": 0.47495881383855026, "grad_norm": 0.3727840185165405, "learning_rate": 1.7366269680519875e-05, "loss": 0.5107, "step": 17298 }, { "epoch": 0.47498627127951676, "grad_norm": 0.39647218585014343, "learning_rate": 1.7365977584450497e-05, "loss": 0.5597, "step": 17299 }, { "epoch": 0.47501372872048325, "grad_norm": 0.6762796640396118, "learning_rate": 1.7365685474641222e-05, "loss": 0.465, "step": 17300 }, { "epoch": 0.47504118616144975, "grad_norm": 0.4140743315219879, "learning_rate": 1.7365393351092598e-05, "loss": 0.5154, "step": 17301 }, { "epoch": 0.47506864360241624, "grad_norm": 0.420852929353714, "learning_rate": 1.7365101213805157e-05, "loss": 0.5467, "step": 17302 }, { "epoch": 0.47509610104338273, "grad_norm": 0.6223469972610474, "learning_rate": 1.7364809062779454e-05, "loss": 0.5541, "step": 17303 }, { "epoch": 0.4751235584843493, "grad_norm": 0.4063449203968048, "learning_rate": 1.7364516898016033e-05, "loss": 0.4877, "step": 17304 }, { "epoch": 0.4751510159253158, "grad_norm": 0.5202169418334961, "learning_rate": 1.7364224719515438e-05, "loss": 0.501, "step": 17305 }, { "epoch": 0.4751784733662823, "grad_norm": 0.4030936658382416, "learning_rate": 1.7363932527278212e-05, "loss": 0.4689, "step": 17306 }, { "epoch": 0.47520593080724877, "grad_norm": 0.3792634904384613, "learning_rate": 1.7363640321304903e-05, "loss": 0.5148, "step": 17307 }, { "epoch": 0.47523338824821526, "grad_norm": 0.37605535984039307, "learning_rate": 1.736334810159605e-05, "loss": 0.4869, "step": 17308 }, { "epoch": 0.47526084568918175, "grad_norm": 0.40993282198905945, "learning_rate": 1.736305586815221e-05, "loss": 0.5332, "step": 17309 }, { "epoch": 0.47528830313014825, "grad_norm": 0.3890884518623352, "learning_rate": 1.7362763620973916e-05, "loss": 0.4945, "step": 17310 }, { "epoch": 0.4753157605711148, "grad_norm": 0.34221163392066956, "learning_rate": 1.7362471360061718e-05, "loss": 0.5349, "step": 17311 }, { "epoch": 0.4753432180120813, "grad_norm": 0.523823082447052, "learning_rate": 1.736217908541616e-05, "loss": 0.4289, "step": 17312 }, { "epoch": 0.4753706754530478, "grad_norm": 0.355613112449646, "learning_rate": 1.7361886797037795e-05, "loss": 0.4693, "step": 17313 }, { "epoch": 0.4753981328940143, "grad_norm": 0.42061594128608704, "learning_rate": 1.7361594494927155e-05, "loss": 0.6207, "step": 17314 }, { "epoch": 0.4754255903349808, "grad_norm": 0.37371620535850525, "learning_rate": 1.7361302179084796e-05, "loss": 0.4454, "step": 17315 }, { "epoch": 0.47545304777594727, "grad_norm": 0.39288410544395447, "learning_rate": 1.7361009849511254e-05, "loss": 0.5075, "step": 17316 }, { "epoch": 0.47548050521691376, "grad_norm": 0.4103913903236389, "learning_rate": 1.7360717506207084e-05, "loss": 0.5251, "step": 17317 }, { "epoch": 0.4755079626578803, "grad_norm": 0.3623792231082916, "learning_rate": 1.736042514917282e-05, "loss": 0.4756, "step": 17318 }, { "epoch": 0.4755354200988468, "grad_norm": 0.340503454208374, "learning_rate": 1.736013277840902e-05, "loss": 0.4593, "step": 17319 }, { "epoch": 0.4755628775398133, "grad_norm": 0.38084450364112854, "learning_rate": 1.735984039391622e-05, "loss": 0.5572, "step": 17320 }, { "epoch": 0.4755903349807798, "grad_norm": 0.42625728249549866, "learning_rate": 1.7359547995694975e-05, "loss": 0.62, "step": 17321 }, { "epoch": 0.4756177924217463, "grad_norm": 0.3708992600440979, "learning_rate": 1.735925558374582e-05, "loss": 0.5233, "step": 17322 }, { "epoch": 0.4756452498627128, "grad_norm": 0.4071897566318512, "learning_rate": 1.73589631580693e-05, "loss": 0.5477, "step": 17323 }, { "epoch": 0.4756727073036793, "grad_norm": 0.35251614451408386, "learning_rate": 1.735867071866597e-05, "loss": 0.4574, "step": 17324 }, { "epoch": 0.4757001647446458, "grad_norm": 0.5738990306854248, "learning_rate": 1.7358378265536368e-05, "loss": 0.5683, "step": 17325 }, { "epoch": 0.4757276221856123, "grad_norm": 0.43464571237564087, "learning_rate": 1.7358085798681045e-05, "loss": 0.4777, "step": 17326 }, { "epoch": 0.4757550796265788, "grad_norm": 0.33663681149482727, "learning_rate": 1.7357793318100543e-05, "loss": 0.4766, "step": 17327 }, { "epoch": 0.4757825370675453, "grad_norm": 0.4021676182746887, "learning_rate": 1.7357500823795406e-05, "loss": 0.5029, "step": 17328 }, { "epoch": 0.4758099945085118, "grad_norm": 0.37662041187286377, "learning_rate": 1.735720831576618e-05, "loss": 0.4684, "step": 17329 }, { "epoch": 0.4758374519494783, "grad_norm": 0.38047337532043457, "learning_rate": 1.7356915794013415e-05, "loss": 0.5406, "step": 17330 }, { "epoch": 0.4758649093904448, "grad_norm": 0.4084271490573883, "learning_rate": 1.7356623258537656e-05, "loss": 0.5619, "step": 17331 }, { "epoch": 0.47589236683141134, "grad_norm": 0.3907347023487091, "learning_rate": 1.7356330709339445e-05, "loss": 0.5044, "step": 17332 }, { "epoch": 0.47591982427237783, "grad_norm": 0.3495807647705078, "learning_rate": 1.735603814641933e-05, "loss": 0.4672, "step": 17333 }, { "epoch": 0.47594728171334433, "grad_norm": 0.35436978936195374, "learning_rate": 1.7355745569777854e-05, "loss": 0.4731, "step": 17334 }, { "epoch": 0.4759747391543108, "grad_norm": 0.4180624783039093, "learning_rate": 1.7355452979415566e-05, "loss": 0.5568, "step": 17335 }, { "epoch": 0.4760021965952773, "grad_norm": 0.3657536506652832, "learning_rate": 1.735516037533301e-05, "loss": 0.5173, "step": 17336 }, { "epoch": 0.4760296540362438, "grad_norm": 0.4378049969673157, "learning_rate": 1.7354867757530735e-05, "loss": 0.6014, "step": 17337 }, { "epoch": 0.4760571114772103, "grad_norm": 0.43598273396492004, "learning_rate": 1.735457512600928e-05, "loss": 0.5198, "step": 17338 }, { "epoch": 0.47608456891817685, "grad_norm": 0.38626915216445923, "learning_rate": 1.7354282480769197e-05, "loss": 0.4536, "step": 17339 }, { "epoch": 0.47611202635914335, "grad_norm": 0.36826831102371216, "learning_rate": 1.7353989821811034e-05, "loss": 0.4051, "step": 17340 }, { "epoch": 0.47613948380010984, "grad_norm": 0.3723454177379608, "learning_rate": 1.7353697149135327e-05, "loss": 0.5006, "step": 17341 }, { "epoch": 0.47616694124107634, "grad_norm": 0.3407902717590332, "learning_rate": 1.735340446274263e-05, "loss": 0.472, "step": 17342 }, { "epoch": 0.47619439868204283, "grad_norm": 0.5272567868232727, "learning_rate": 1.735311176263349e-05, "loss": 0.5114, "step": 17343 }, { "epoch": 0.4762218561230093, "grad_norm": 0.37096962332725525, "learning_rate": 1.7352819048808444e-05, "loss": 0.4979, "step": 17344 }, { "epoch": 0.4762493135639758, "grad_norm": 0.4156540334224701, "learning_rate": 1.7352526321268045e-05, "loss": 0.5121, "step": 17345 }, { "epoch": 0.4762767710049423, "grad_norm": 0.39967218041419983, "learning_rate": 1.7352233580012838e-05, "loss": 0.6195, "step": 17346 }, { "epoch": 0.47630422844590886, "grad_norm": 0.37623417377471924, "learning_rate": 1.735194082504337e-05, "loss": 0.5032, "step": 17347 }, { "epoch": 0.47633168588687536, "grad_norm": 0.3679143488407135, "learning_rate": 1.7351648056360186e-05, "loss": 0.5125, "step": 17348 }, { "epoch": 0.47635914332784185, "grad_norm": 0.4195724427700043, "learning_rate": 1.7351355273963833e-05, "loss": 0.5978, "step": 17349 }, { "epoch": 0.47638660076880834, "grad_norm": 0.3589080274105072, "learning_rate": 1.7351062477854854e-05, "loss": 0.3483, "step": 17350 }, { "epoch": 0.47641405820977484, "grad_norm": 0.3734091520309448, "learning_rate": 1.7350769668033796e-05, "loss": 0.5217, "step": 17351 }, { "epoch": 0.47644151565074133, "grad_norm": 0.34398144483566284, "learning_rate": 1.735047684450121e-05, "loss": 0.4923, "step": 17352 }, { "epoch": 0.4764689730917078, "grad_norm": 0.3816189765930176, "learning_rate": 1.7350184007257634e-05, "loss": 0.4929, "step": 17353 }, { "epoch": 0.4764964305326744, "grad_norm": 0.38596463203430176, "learning_rate": 1.7349891156303623e-05, "loss": 0.5035, "step": 17354 }, { "epoch": 0.47652388797364087, "grad_norm": 0.3599514067173004, "learning_rate": 1.734959829163972e-05, "loss": 0.481, "step": 17355 }, { "epoch": 0.47655134541460736, "grad_norm": 0.3589027225971222, "learning_rate": 1.734930541326647e-05, "loss": 0.5475, "step": 17356 }, { "epoch": 0.47657880285557386, "grad_norm": 0.3633432984352112, "learning_rate": 1.7349012521184416e-05, "loss": 0.4721, "step": 17357 }, { "epoch": 0.47660626029654035, "grad_norm": 0.37459900975227356, "learning_rate": 1.7348719615394113e-05, "loss": 0.5462, "step": 17358 }, { "epoch": 0.47663371773750685, "grad_norm": 0.3886943459510803, "learning_rate": 1.7348426695896102e-05, "loss": 0.5796, "step": 17359 }, { "epoch": 0.47666117517847334, "grad_norm": 0.49584949016571045, "learning_rate": 1.7348133762690927e-05, "loss": 0.5096, "step": 17360 }, { "epoch": 0.4766886326194399, "grad_norm": 0.3323688507080078, "learning_rate": 1.734784081577914e-05, "loss": 0.4871, "step": 17361 }, { "epoch": 0.4767160900604064, "grad_norm": 0.37866467237472534, "learning_rate": 1.734754785516128e-05, "loss": 0.5645, "step": 17362 }, { "epoch": 0.4767435475013729, "grad_norm": 0.34868693351745605, "learning_rate": 1.73472548808379e-05, "loss": 0.4782, "step": 17363 }, { "epoch": 0.47677100494233937, "grad_norm": 0.40447738766670227, "learning_rate": 1.7346961892809547e-05, "loss": 0.5112, "step": 17364 }, { "epoch": 0.47679846238330587, "grad_norm": 0.357263445854187, "learning_rate": 1.7346668891076763e-05, "loss": 0.5571, "step": 17365 }, { "epoch": 0.47682591982427236, "grad_norm": 0.34988224506378174, "learning_rate": 1.7346375875640098e-05, "loss": 0.4864, "step": 17366 }, { "epoch": 0.47685337726523885, "grad_norm": 0.3967995047569275, "learning_rate": 1.73460828465001e-05, "loss": 0.5511, "step": 17367 }, { "epoch": 0.4768808347062054, "grad_norm": 0.4132901132106781, "learning_rate": 1.734578980365731e-05, "loss": 0.6095, "step": 17368 }, { "epoch": 0.4769082921471719, "grad_norm": 0.33256980776786804, "learning_rate": 1.7345496747112276e-05, "loss": 0.4923, "step": 17369 }, { "epoch": 0.4769357495881384, "grad_norm": 0.40854552388191223, "learning_rate": 1.734520367686555e-05, "loss": 0.5656, "step": 17370 }, { "epoch": 0.4769632070291049, "grad_norm": 0.44924187660217285, "learning_rate": 1.734491059291767e-05, "loss": 0.4898, "step": 17371 }, { "epoch": 0.4769906644700714, "grad_norm": 0.42592760920524597, "learning_rate": 1.734461749526919e-05, "loss": 0.5275, "step": 17372 }, { "epoch": 0.4770181219110379, "grad_norm": 0.5115145444869995, "learning_rate": 1.7344324383920653e-05, "loss": 0.4352, "step": 17373 }, { "epoch": 0.47704557935200437, "grad_norm": 0.40516889095306396, "learning_rate": 1.7344031258872608e-05, "loss": 0.4788, "step": 17374 }, { "epoch": 0.4770730367929709, "grad_norm": 0.41437679529190063, "learning_rate": 1.73437381201256e-05, "loss": 0.477, "step": 17375 }, { "epoch": 0.4771004942339374, "grad_norm": 0.3685512840747833, "learning_rate": 1.734344496768018e-05, "loss": 0.4909, "step": 17376 }, { "epoch": 0.4771279516749039, "grad_norm": 0.3972471058368683, "learning_rate": 1.734315180153689e-05, "loss": 0.5639, "step": 17377 }, { "epoch": 0.4771554091158704, "grad_norm": 0.416051983833313, "learning_rate": 1.7342858621696276e-05, "loss": 0.6111, "step": 17378 }, { "epoch": 0.4771828665568369, "grad_norm": 0.3596571683883667, "learning_rate": 1.7342565428158888e-05, "loss": 0.5325, "step": 17379 }, { "epoch": 0.4772103239978034, "grad_norm": 0.38166409730911255, "learning_rate": 1.7342272220925273e-05, "loss": 0.5376, "step": 17380 }, { "epoch": 0.4772377814387699, "grad_norm": 0.3678920567035675, "learning_rate": 1.7341978999995975e-05, "loss": 0.4626, "step": 17381 }, { "epoch": 0.47726523887973643, "grad_norm": 0.35474836826324463, "learning_rate": 1.7341685765371547e-05, "loss": 0.513, "step": 17382 }, { "epoch": 0.4772926963207029, "grad_norm": 0.38975921273231506, "learning_rate": 1.7341392517052527e-05, "loss": 0.4393, "step": 17383 }, { "epoch": 0.4773201537616694, "grad_norm": 0.3774014413356781, "learning_rate": 1.734109925503947e-05, "loss": 0.496, "step": 17384 }, { "epoch": 0.4773476112026359, "grad_norm": 0.319079726934433, "learning_rate": 1.7340805979332916e-05, "loss": 0.393, "step": 17385 }, { "epoch": 0.4773750686436024, "grad_norm": 0.45210450887680054, "learning_rate": 1.734051268993342e-05, "loss": 0.4461, "step": 17386 }, { "epoch": 0.4774025260845689, "grad_norm": 0.49747586250305176, "learning_rate": 1.7340219386841525e-05, "loss": 0.5088, "step": 17387 }, { "epoch": 0.4774299835255354, "grad_norm": 0.35782289505004883, "learning_rate": 1.7339926070057774e-05, "loss": 0.5117, "step": 17388 }, { "epoch": 0.47745744096650194, "grad_norm": 0.3840772211551666, "learning_rate": 1.7339632739582725e-05, "loss": 0.5206, "step": 17389 }, { "epoch": 0.47748489840746844, "grad_norm": 0.4035729765892029, "learning_rate": 1.7339339395416913e-05, "loss": 0.4382, "step": 17390 }, { "epoch": 0.47751235584843493, "grad_norm": 0.41780993342399597, "learning_rate": 1.733904603756089e-05, "loss": 0.5717, "step": 17391 }, { "epoch": 0.4775398132894014, "grad_norm": 0.35625511407852173, "learning_rate": 1.7338752666015206e-05, "loss": 0.5464, "step": 17392 }, { "epoch": 0.4775672707303679, "grad_norm": 0.37332481145858765, "learning_rate": 1.7338459280780405e-05, "loss": 0.5484, "step": 17393 }, { "epoch": 0.4775947281713344, "grad_norm": 0.3746476471424103, "learning_rate": 1.733816588185704e-05, "loss": 0.447, "step": 17394 }, { "epoch": 0.4776221856123009, "grad_norm": 0.35769957304000854, "learning_rate": 1.7337872469245647e-05, "loss": 0.4575, "step": 17395 }, { "epoch": 0.47764964305326746, "grad_norm": 0.4278643727302551, "learning_rate": 1.733757904294678e-05, "loss": 0.5866, "step": 17396 }, { "epoch": 0.47767710049423395, "grad_norm": 0.382748007774353, "learning_rate": 1.7337285602960993e-05, "loss": 0.5526, "step": 17397 }, { "epoch": 0.47770455793520045, "grad_norm": 0.37162765860557556, "learning_rate": 1.733699214928882e-05, "loss": 0.5788, "step": 17398 }, { "epoch": 0.47773201537616694, "grad_norm": 0.3568292260169983, "learning_rate": 1.7336698681930817e-05, "loss": 0.5862, "step": 17399 }, { "epoch": 0.47775947281713343, "grad_norm": 0.6592757105827332, "learning_rate": 1.733640520088753e-05, "loss": 0.5302, "step": 17400 }, { "epoch": 0.47778693025809993, "grad_norm": 0.3465222418308258, "learning_rate": 1.7336111706159508e-05, "loss": 0.4362, "step": 17401 }, { "epoch": 0.4778143876990664, "grad_norm": 0.3710457384586334, "learning_rate": 1.7335818197747292e-05, "loss": 0.4709, "step": 17402 }, { "epoch": 0.477841845140033, "grad_norm": 0.33137112855911255, "learning_rate": 1.7335524675651435e-05, "loss": 0.4214, "step": 17403 }, { "epoch": 0.47786930258099947, "grad_norm": 0.35789304971694946, "learning_rate": 1.7335231139872483e-05, "loss": 0.5367, "step": 17404 }, { "epoch": 0.47789676002196596, "grad_norm": 0.36171191930770874, "learning_rate": 1.7334937590410984e-05, "loss": 0.5059, "step": 17405 }, { "epoch": 0.47792421746293245, "grad_norm": 0.36463484168052673, "learning_rate": 1.7334644027267487e-05, "loss": 0.433, "step": 17406 }, { "epoch": 0.47795167490389895, "grad_norm": 0.3833870589733124, "learning_rate": 1.7334350450442534e-05, "loss": 0.4867, "step": 17407 }, { "epoch": 0.47797913234486544, "grad_norm": 0.4165036976337433, "learning_rate": 1.733405685993668e-05, "loss": 0.5567, "step": 17408 }, { "epoch": 0.47800658978583194, "grad_norm": 0.35925546288490295, "learning_rate": 1.7333763255750467e-05, "loss": 0.4963, "step": 17409 }, { "epoch": 0.4780340472267985, "grad_norm": 0.3920578956604004, "learning_rate": 1.7333469637884447e-05, "loss": 0.5691, "step": 17410 }, { "epoch": 0.478061504667765, "grad_norm": 0.4162949323654175, "learning_rate": 1.7333176006339164e-05, "loss": 0.5596, "step": 17411 }, { "epoch": 0.4780889621087315, "grad_norm": 0.3206687271595001, "learning_rate": 1.7332882361115172e-05, "loss": 0.3999, "step": 17412 }, { "epoch": 0.47811641954969797, "grad_norm": 0.4134502112865448, "learning_rate": 1.7332588702213008e-05, "loss": 0.5225, "step": 17413 }, { "epoch": 0.47814387699066446, "grad_norm": 0.32541438937187195, "learning_rate": 1.7332295029633227e-05, "loss": 0.4886, "step": 17414 }, { "epoch": 0.47817133443163096, "grad_norm": 0.7432602047920227, "learning_rate": 1.7332001343376378e-05, "loss": 0.3887, "step": 17415 }, { "epoch": 0.47819879187259745, "grad_norm": 0.3295065462589264, "learning_rate": 1.7331707643443003e-05, "loss": 0.4282, "step": 17416 }, { "epoch": 0.478226249313564, "grad_norm": 0.3790801465511322, "learning_rate": 1.7331413929833657e-05, "loss": 0.5246, "step": 17417 }, { "epoch": 0.4782537067545305, "grad_norm": 0.40568724274635315, "learning_rate": 1.733112020254888e-05, "loss": 0.5278, "step": 17418 }, { "epoch": 0.478281164195497, "grad_norm": 0.3514539301395416, "learning_rate": 1.7330826461589227e-05, "loss": 0.474, "step": 17419 }, { "epoch": 0.4783086216364635, "grad_norm": 0.4348537027835846, "learning_rate": 1.7330532706955243e-05, "loss": 0.508, "step": 17420 }, { "epoch": 0.47833607907743, "grad_norm": 0.48672258853912354, "learning_rate": 1.7330238938647475e-05, "loss": 0.5473, "step": 17421 }, { "epoch": 0.47836353651839647, "grad_norm": 0.357158899307251, "learning_rate": 1.732994515666647e-05, "loss": 0.4893, "step": 17422 }, { "epoch": 0.47839099395936296, "grad_norm": 0.3885211646556854, "learning_rate": 1.732965136101278e-05, "loss": 0.5622, "step": 17423 }, { "epoch": 0.4784184514003295, "grad_norm": 0.3971414268016815, "learning_rate": 1.732935755168695e-05, "loss": 0.4967, "step": 17424 }, { "epoch": 0.478445908841296, "grad_norm": 0.4435804784297943, "learning_rate": 1.732906372868953e-05, "loss": 0.5426, "step": 17425 }, { "epoch": 0.4784733662822625, "grad_norm": 0.34197506308555603, "learning_rate": 1.7328769892021067e-05, "loss": 0.4665, "step": 17426 }, { "epoch": 0.478500823723229, "grad_norm": 0.4920865297317505, "learning_rate": 1.7328476041682108e-05, "loss": 0.5275, "step": 17427 }, { "epoch": 0.4785282811641955, "grad_norm": 0.3378271460533142, "learning_rate": 1.7328182177673203e-05, "loss": 0.4022, "step": 17428 }, { "epoch": 0.478555738605162, "grad_norm": 0.37281525135040283, "learning_rate": 1.73278882999949e-05, "loss": 0.5112, "step": 17429 }, { "epoch": 0.4785831960461285, "grad_norm": 0.3964836001396179, "learning_rate": 1.7327594408647746e-05, "loss": 0.4992, "step": 17430 }, { "epoch": 0.47861065348709503, "grad_norm": 0.4037831127643585, "learning_rate": 1.7327300503632288e-05, "loss": 0.5196, "step": 17431 }, { "epoch": 0.4786381109280615, "grad_norm": 0.35960671305656433, "learning_rate": 1.7327006584949076e-05, "loss": 0.5348, "step": 17432 }, { "epoch": 0.478665568369028, "grad_norm": 0.4321916401386261, "learning_rate": 1.732671265259866e-05, "loss": 0.4439, "step": 17433 }, { "epoch": 0.4786930258099945, "grad_norm": 0.4099774658679962, "learning_rate": 1.7326418706581586e-05, "loss": 0.5167, "step": 17434 }, { "epoch": 0.478720483250961, "grad_norm": 0.3795802593231201, "learning_rate": 1.7326124746898405e-05, "loss": 0.6486, "step": 17435 }, { "epoch": 0.4787479406919275, "grad_norm": 0.3593728244304657, "learning_rate": 1.732583077354966e-05, "loss": 0.4988, "step": 17436 }, { "epoch": 0.478775398132894, "grad_norm": 0.40003809332847595, "learning_rate": 1.73255367865359e-05, "loss": 0.5522, "step": 17437 }, { "epoch": 0.47880285557386054, "grad_norm": 0.34275415539741516, "learning_rate": 1.732524278585768e-05, "loss": 0.4467, "step": 17438 }, { "epoch": 0.47883031301482704, "grad_norm": 0.37368786334991455, "learning_rate": 1.7324948771515547e-05, "loss": 0.4802, "step": 17439 }, { "epoch": 0.47885777045579353, "grad_norm": 0.3622078001499176, "learning_rate": 1.7324654743510042e-05, "loss": 0.5243, "step": 17440 }, { "epoch": 0.47888522789676, "grad_norm": 0.5485128164291382, "learning_rate": 1.732436070184172e-05, "loss": 0.5257, "step": 17441 }, { "epoch": 0.4789126853377265, "grad_norm": 0.36230897903442383, "learning_rate": 1.7324066646511127e-05, "loss": 0.4803, "step": 17442 }, { "epoch": 0.478940142778693, "grad_norm": 0.39385026693344116, "learning_rate": 1.7323772577518812e-05, "loss": 0.5651, "step": 17443 }, { "epoch": 0.4789676002196595, "grad_norm": 0.3838667571544647, "learning_rate": 1.7323478494865322e-05, "loss": 0.4886, "step": 17444 }, { "epoch": 0.47899505766062606, "grad_norm": 0.39377760887145996, "learning_rate": 1.732318439855121e-05, "loss": 0.552, "step": 17445 }, { "epoch": 0.47902251510159255, "grad_norm": 0.33924782276153564, "learning_rate": 1.732289028857702e-05, "loss": 0.4368, "step": 17446 }, { "epoch": 0.47904997254255904, "grad_norm": 0.3385504186153412, "learning_rate": 1.7322596164943303e-05, "loss": 0.5336, "step": 17447 }, { "epoch": 0.47907742998352554, "grad_norm": 0.39383769035339355, "learning_rate": 1.732230202765061e-05, "loss": 0.5068, "step": 17448 }, { "epoch": 0.47910488742449203, "grad_norm": 0.37099388241767883, "learning_rate": 1.7322007876699484e-05, "loss": 0.5869, "step": 17449 }, { "epoch": 0.4791323448654585, "grad_norm": 0.3543555438518524, "learning_rate": 1.7321713712090474e-05, "loss": 0.4848, "step": 17450 }, { "epoch": 0.479159802306425, "grad_norm": 0.35517990589141846, "learning_rate": 1.7321419533824136e-05, "loss": 0.4658, "step": 17451 }, { "epoch": 0.47918725974739157, "grad_norm": 0.4681800305843353, "learning_rate": 1.7321125341901008e-05, "loss": 0.5598, "step": 17452 }, { "epoch": 0.47921471718835806, "grad_norm": 0.379406601190567, "learning_rate": 1.732083113632165e-05, "loss": 0.4897, "step": 17453 }, { "epoch": 0.47924217462932456, "grad_norm": 0.39931923151016235, "learning_rate": 1.73205369170866e-05, "loss": 0.4873, "step": 17454 }, { "epoch": 0.47926963207029105, "grad_norm": 0.4038063585758209, "learning_rate": 1.732024268419642e-05, "loss": 0.5414, "step": 17455 }, { "epoch": 0.47929708951125755, "grad_norm": 0.39987099170684814, "learning_rate": 1.7319948437651642e-05, "loss": 0.6193, "step": 17456 }, { "epoch": 0.47932454695222404, "grad_norm": 0.4186311364173889, "learning_rate": 1.731965417745283e-05, "loss": 0.467, "step": 17457 }, { "epoch": 0.47935200439319053, "grad_norm": 0.41336333751678467, "learning_rate": 1.7319359903600525e-05, "loss": 0.5531, "step": 17458 }, { "epoch": 0.4793794618341571, "grad_norm": 0.34689077734947205, "learning_rate": 1.7319065616095278e-05, "loss": 0.452, "step": 17459 }, { "epoch": 0.4794069192751236, "grad_norm": 0.40383651852607727, "learning_rate": 1.731877131493764e-05, "loss": 0.5256, "step": 17460 }, { "epoch": 0.47943437671609007, "grad_norm": 0.3858388364315033, "learning_rate": 1.7318477000128153e-05, "loss": 0.5357, "step": 17461 }, { "epoch": 0.47946183415705657, "grad_norm": 0.35135722160339355, "learning_rate": 1.7318182671667373e-05, "loss": 0.5068, "step": 17462 }, { "epoch": 0.47948929159802306, "grad_norm": 0.3773167133331299, "learning_rate": 1.7317888329555847e-05, "loss": 0.5784, "step": 17463 }, { "epoch": 0.47951674903898955, "grad_norm": 0.416190505027771, "learning_rate": 1.7317593973794124e-05, "loss": 0.5415, "step": 17464 }, { "epoch": 0.47954420647995605, "grad_norm": 0.3321760892868042, "learning_rate": 1.731729960438275e-05, "loss": 0.4449, "step": 17465 }, { "epoch": 0.4795716639209226, "grad_norm": 0.4396505653858185, "learning_rate": 1.731700522132228e-05, "loss": 0.5076, "step": 17466 }, { "epoch": 0.4795991213618891, "grad_norm": 0.4887462556362152, "learning_rate": 1.731671082461326e-05, "loss": 0.4939, "step": 17467 }, { "epoch": 0.4796265788028556, "grad_norm": 0.5533541440963745, "learning_rate": 1.7316416414256238e-05, "loss": 0.5278, "step": 17468 }, { "epoch": 0.4796540362438221, "grad_norm": 0.376334547996521, "learning_rate": 1.731612199025176e-05, "loss": 0.5495, "step": 17469 }, { "epoch": 0.4796814936847886, "grad_norm": 0.3600611090660095, "learning_rate": 1.7315827552600387e-05, "loss": 0.4328, "step": 17470 }, { "epoch": 0.47970895112575507, "grad_norm": 0.4096277356147766, "learning_rate": 1.7315533101302654e-05, "loss": 0.5295, "step": 17471 }, { "epoch": 0.47973640856672156, "grad_norm": 0.37855756282806396, "learning_rate": 1.731523863635912e-05, "loss": 0.526, "step": 17472 }, { "epoch": 0.4797638660076881, "grad_norm": 0.33261558413505554, "learning_rate": 1.731494415777033e-05, "loss": 0.4864, "step": 17473 }, { "epoch": 0.4797913234486546, "grad_norm": 0.452090322971344, "learning_rate": 1.731464966553684e-05, "loss": 0.56, "step": 17474 }, { "epoch": 0.4798187808896211, "grad_norm": 0.39700597524642944, "learning_rate": 1.731435515965919e-05, "loss": 0.5507, "step": 17475 }, { "epoch": 0.4798462383305876, "grad_norm": 0.440921813249588, "learning_rate": 1.7314060640137928e-05, "loss": 0.6252, "step": 17476 }, { "epoch": 0.4798736957715541, "grad_norm": 0.3513808250427246, "learning_rate": 1.7313766106973614e-05, "loss": 0.488, "step": 17477 }, { "epoch": 0.4799011532125206, "grad_norm": 0.3945593535900116, "learning_rate": 1.731347156016679e-05, "loss": 0.5702, "step": 17478 }, { "epoch": 0.4799286106534871, "grad_norm": 0.3711896240711212, "learning_rate": 1.731317699971801e-05, "loss": 0.4465, "step": 17479 }, { "epoch": 0.47995606809445357, "grad_norm": 1.2150377035140991, "learning_rate": 1.731288242562782e-05, "loss": 0.3888, "step": 17480 }, { "epoch": 0.4799835255354201, "grad_norm": 0.3237757682800293, "learning_rate": 1.7312587837896767e-05, "loss": 0.5429, "step": 17481 }, { "epoch": 0.4800109829763866, "grad_norm": 0.3663346767425537, "learning_rate": 1.7312293236525407e-05, "loss": 0.5199, "step": 17482 }, { "epoch": 0.4800384404173531, "grad_norm": 0.4141944646835327, "learning_rate": 1.731199862151429e-05, "loss": 0.502, "step": 17483 }, { "epoch": 0.4800658978583196, "grad_norm": 0.34636250138282776, "learning_rate": 1.7311703992863954e-05, "loss": 0.4012, "step": 17484 }, { "epoch": 0.4800933552992861, "grad_norm": 0.378449022769928, "learning_rate": 1.731140935057496e-05, "loss": 0.5304, "step": 17485 }, { "epoch": 0.4801208127402526, "grad_norm": 0.35368916392326355, "learning_rate": 1.7311114694647853e-05, "loss": 0.4437, "step": 17486 }, { "epoch": 0.4801482701812191, "grad_norm": 0.3583977222442627, "learning_rate": 1.7310820025083185e-05, "loss": 0.5821, "step": 17487 }, { "epoch": 0.48017572762218563, "grad_norm": 0.3363000452518463, "learning_rate": 1.7310525341881503e-05, "loss": 0.4685, "step": 17488 }, { "epoch": 0.4802031850631521, "grad_norm": 0.3884904086589813, "learning_rate": 1.731023064504336e-05, "loss": 0.4743, "step": 17489 }, { "epoch": 0.4802306425041186, "grad_norm": 0.40167930722236633, "learning_rate": 1.73099359345693e-05, "loss": 0.5354, "step": 17490 }, { "epoch": 0.4802580999450851, "grad_norm": 0.3394714593887329, "learning_rate": 1.730964121045988e-05, "loss": 0.4821, "step": 17491 }, { "epoch": 0.4802855573860516, "grad_norm": 0.4138287603855133, "learning_rate": 1.7309346472715647e-05, "loss": 0.5194, "step": 17492 }, { "epoch": 0.4803130148270181, "grad_norm": 0.37954211235046387, "learning_rate": 1.730905172133715e-05, "loss": 0.4895, "step": 17493 }, { "epoch": 0.4803404722679846, "grad_norm": 0.4558545649051666, "learning_rate": 1.7308756956324933e-05, "loss": 0.5496, "step": 17494 }, { "epoch": 0.48036792970895115, "grad_norm": 0.35365408658981323, "learning_rate": 1.730846217767956e-05, "loss": 0.461, "step": 17495 }, { "epoch": 0.48039538714991764, "grad_norm": 0.38356998562812805, "learning_rate": 1.7308167385401568e-05, "loss": 0.5622, "step": 17496 }, { "epoch": 0.48042284459088413, "grad_norm": 0.4530021846294403, "learning_rate": 1.730787257949151e-05, "loss": 0.5308, "step": 17497 }, { "epoch": 0.48045030203185063, "grad_norm": 0.38855478167533875, "learning_rate": 1.7307577759949938e-05, "loss": 0.5576, "step": 17498 }, { "epoch": 0.4804777594728171, "grad_norm": 0.3981868624687195, "learning_rate": 1.73072829267774e-05, "loss": 0.5289, "step": 17499 }, { "epoch": 0.4805052169137836, "grad_norm": 0.3578651547431946, "learning_rate": 1.7306988079974448e-05, "loss": 0.5111, "step": 17500 }, { "epoch": 0.4805326743547501, "grad_norm": 0.7941781282424927, "learning_rate": 1.7306693219541632e-05, "loss": 0.5024, "step": 17501 }, { "epoch": 0.48056013179571666, "grad_norm": 0.3841637670993805, "learning_rate": 1.7306398345479503e-05, "loss": 0.617, "step": 17502 }, { "epoch": 0.48058758923668315, "grad_norm": 0.37433546781539917, "learning_rate": 1.7306103457788608e-05, "loss": 0.5372, "step": 17503 }, { "epoch": 0.48061504667764965, "grad_norm": 0.3689453601837158, "learning_rate": 1.7305808556469497e-05, "loss": 0.5285, "step": 17504 }, { "epoch": 0.48064250411861614, "grad_norm": 0.3881574869155884, "learning_rate": 1.730551364152272e-05, "loss": 0.5139, "step": 17505 }, { "epoch": 0.48066996155958264, "grad_norm": 0.349735289812088, "learning_rate": 1.7305218712948832e-05, "loss": 0.4557, "step": 17506 }, { "epoch": 0.48069741900054913, "grad_norm": 0.40166714787483215, "learning_rate": 1.7304923770748378e-05, "loss": 0.6314, "step": 17507 }, { "epoch": 0.4807248764415156, "grad_norm": 0.3618466258049011, "learning_rate": 1.7304628814921908e-05, "loss": 0.5358, "step": 17508 }, { "epoch": 0.4807523338824822, "grad_norm": 0.36432695388793945, "learning_rate": 1.7304333845469975e-05, "loss": 0.4864, "step": 17509 }, { "epoch": 0.48077979132344867, "grad_norm": 0.34232115745544434, "learning_rate": 1.7304038862393127e-05, "loss": 0.4741, "step": 17510 }, { "epoch": 0.48080724876441516, "grad_norm": 0.4414617717266083, "learning_rate": 1.7303743865691917e-05, "loss": 0.5352, "step": 17511 }, { "epoch": 0.48083470620538166, "grad_norm": 0.41822654008865356, "learning_rate": 1.7303448855366897e-05, "loss": 0.4749, "step": 17512 }, { "epoch": 0.48086216364634815, "grad_norm": 0.32531991600990295, "learning_rate": 1.7303153831418605e-05, "loss": 0.5071, "step": 17513 }, { "epoch": 0.48088962108731464, "grad_norm": 0.3722848892211914, "learning_rate": 1.7302858793847608e-05, "loss": 0.4415, "step": 17514 }, { "epoch": 0.48091707852828114, "grad_norm": 0.3323245048522949, "learning_rate": 1.7302563742654444e-05, "loss": 0.4227, "step": 17515 }, { "epoch": 0.4809445359692477, "grad_norm": 0.36743345856666565, "learning_rate": 1.7302268677839672e-05, "loss": 0.4705, "step": 17516 }, { "epoch": 0.4809719934102142, "grad_norm": 0.3477756083011627, "learning_rate": 1.7301973599403835e-05, "loss": 0.4857, "step": 17517 }, { "epoch": 0.4809994508511807, "grad_norm": 0.42857837677001953, "learning_rate": 1.7301678507347488e-05, "loss": 0.4849, "step": 17518 }, { "epoch": 0.48102690829214717, "grad_norm": 0.37254664301872253, "learning_rate": 1.730138340167118e-05, "loss": 0.5707, "step": 17519 }, { "epoch": 0.48105436573311366, "grad_norm": 0.3649071753025055, "learning_rate": 1.730108828237546e-05, "loss": 0.4952, "step": 17520 }, { "epoch": 0.48108182317408016, "grad_norm": 0.40212082862854004, "learning_rate": 1.7300793149460884e-05, "loss": 0.5023, "step": 17521 }, { "epoch": 0.48110928061504665, "grad_norm": 0.39454036951065063, "learning_rate": 1.7300498002927996e-05, "loss": 0.5574, "step": 17522 }, { "epoch": 0.4811367380560132, "grad_norm": 0.37781229615211487, "learning_rate": 1.7300202842777348e-05, "loss": 0.5258, "step": 17523 }, { "epoch": 0.4811641954969797, "grad_norm": 0.3983345031738281, "learning_rate": 1.7299907669009494e-05, "loss": 0.4984, "step": 17524 }, { "epoch": 0.4811916529379462, "grad_norm": 8.726655960083008, "learning_rate": 1.7299612481624983e-05, "loss": 0.5938, "step": 17525 }, { "epoch": 0.4812191103789127, "grad_norm": 0.36584287881851196, "learning_rate": 1.7299317280624362e-05, "loss": 0.5196, "step": 17526 }, { "epoch": 0.4812465678198792, "grad_norm": 0.36755555868148804, "learning_rate": 1.729902206600819e-05, "loss": 0.5884, "step": 17527 }, { "epoch": 0.4812740252608457, "grad_norm": 0.45232316851615906, "learning_rate": 1.7298726837777007e-05, "loss": 0.5614, "step": 17528 }, { "epoch": 0.48130148270181217, "grad_norm": 0.40691834688186646, "learning_rate": 1.729843159593137e-05, "loss": 0.5021, "step": 17529 }, { "epoch": 0.4813289401427787, "grad_norm": 0.395847886800766, "learning_rate": 1.7298136340471827e-05, "loss": 0.5716, "step": 17530 }, { "epoch": 0.4813563975837452, "grad_norm": 0.37636908888816833, "learning_rate": 1.7297841071398936e-05, "loss": 0.5358, "step": 17531 }, { "epoch": 0.4813838550247117, "grad_norm": 0.378365159034729, "learning_rate": 1.7297545788713236e-05, "loss": 0.5223, "step": 17532 }, { "epoch": 0.4814113124656782, "grad_norm": 0.32043442130088806, "learning_rate": 1.7297250492415287e-05, "loss": 0.4096, "step": 17533 }, { "epoch": 0.4814387699066447, "grad_norm": 0.48982542753219604, "learning_rate": 1.7296955182505636e-05, "loss": 0.5413, "step": 17534 }, { "epoch": 0.4814662273476112, "grad_norm": 0.42933884263038635, "learning_rate": 1.7296659858984837e-05, "loss": 0.5423, "step": 17535 }, { "epoch": 0.4814936847885777, "grad_norm": 0.3843173086643219, "learning_rate": 1.729636452185344e-05, "loss": 0.495, "step": 17536 }, { "epoch": 0.48152114222954423, "grad_norm": 0.37789106369018555, "learning_rate": 1.7296069171111986e-05, "loss": 0.4728, "step": 17537 }, { "epoch": 0.4815485996705107, "grad_norm": 0.34102383255958557, "learning_rate": 1.729577380676104e-05, "loss": 0.4965, "step": 17538 }, { "epoch": 0.4815760571114772, "grad_norm": 0.3841340243816376, "learning_rate": 1.7295478428801147e-05, "loss": 0.5327, "step": 17539 }, { "epoch": 0.4816035145524437, "grad_norm": 0.39339905977249146, "learning_rate": 1.7295183037232858e-05, "loss": 0.5008, "step": 17540 }, { "epoch": 0.4816309719934102, "grad_norm": 0.37303659319877625, "learning_rate": 1.7294887632056724e-05, "loss": 0.4647, "step": 17541 }, { "epoch": 0.4816584294343767, "grad_norm": 0.38660871982574463, "learning_rate": 1.7294592213273298e-05, "loss": 0.5364, "step": 17542 }, { "epoch": 0.4816858868753432, "grad_norm": 0.34620922803878784, "learning_rate": 1.7294296780883127e-05, "loss": 0.5384, "step": 17543 }, { "epoch": 0.48171334431630974, "grad_norm": 0.35966548323631287, "learning_rate": 1.7294001334886768e-05, "loss": 0.5636, "step": 17544 }, { "epoch": 0.48174080175727624, "grad_norm": 0.36454638838768005, "learning_rate": 1.7293705875284766e-05, "loss": 0.4932, "step": 17545 }, { "epoch": 0.48176825919824273, "grad_norm": 0.33886945247650146, "learning_rate": 1.7293410402077673e-05, "loss": 0.5102, "step": 17546 }, { "epoch": 0.4817957166392092, "grad_norm": 0.3629171848297119, "learning_rate": 1.7293114915266046e-05, "loss": 0.4913, "step": 17547 }, { "epoch": 0.4818231740801757, "grad_norm": 0.35926923155784607, "learning_rate": 1.729281941485043e-05, "loss": 0.4538, "step": 17548 }, { "epoch": 0.4818506315211422, "grad_norm": 0.4834381937980652, "learning_rate": 1.7292523900831375e-05, "loss": 0.5516, "step": 17549 }, { "epoch": 0.4818780889621087, "grad_norm": 0.3769710063934326, "learning_rate": 1.729222837320944e-05, "loss": 0.5632, "step": 17550 }, { "epoch": 0.48190554640307526, "grad_norm": 0.37136945128440857, "learning_rate": 1.7291932831985167e-05, "loss": 0.5031, "step": 17551 }, { "epoch": 0.48193300384404175, "grad_norm": 0.4483974575996399, "learning_rate": 1.7291637277159114e-05, "loss": 0.518, "step": 17552 }, { "epoch": 0.48196046128500825, "grad_norm": 0.3857955038547516, "learning_rate": 1.7291341708731834e-05, "loss": 0.4514, "step": 17553 }, { "epoch": 0.48198791872597474, "grad_norm": 0.4026235044002533, "learning_rate": 1.729104612670387e-05, "loss": 0.4607, "step": 17554 }, { "epoch": 0.48201537616694123, "grad_norm": 0.3762950599193573, "learning_rate": 1.729075053107578e-05, "loss": 0.5142, "step": 17555 }, { "epoch": 0.48204283360790773, "grad_norm": 0.3551711142063141, "learning_rate": 1.7290454921848116e-05, "loss": 0.5151, "step": 17556 }, { "epoch": 0.4820702910488742, "grad_norm": 0.47367215156555176, "learning_rate": 1.729015929902142e-05, "loss": 0.5051, "step": 17557 }, { "epoch": 0.48209774848984077, "grad_norm": 0.36164391040802, "learning_rate": 1.7289863662596254e-05, "loss": 0.5294, "step": 17558 }, { "epoch": 0.48212520593080727, "grad_norm": 0.3566454350948334, "learning_rate": 1.7289568012573164e-05, "loss": 0.4389, "step": 17559 }, { "epoch": 0.48215266337177376, "grad_norm": 0.39334964752197266, "learning_rate": 1.7289272348952706e-05, "loss": 0.4705, "step": 17560 }, { "epoch": 0.48218012081274025, "grad_norm": 0.39020371437072754, "learning_rate": 1.7288976671735428e-05, "loss": 0.4926, "step": 17561 }, { "epoch": 0.48220757825370675, "grad_norm": 0.685217022895813, "learning_rate": 1.728868098092188e-05, "loss": 0.486, "step": 17562 }, { "epoch": 0.48223503569467324, "grad_norm": 0.3806471526622772, "learning_rate": 1.728838527651262e-05, "loss": 0.5608, "step": 17563 }, { "epoch": 0.48226249313563974, "grad_norm": 0.4207320213317871, "learning_rate": 1.728808955850819e-05, "loss": 0.5379, "step": 17564 }, { "epoch": 0.4822899505766063, "grad_norm": 0.49942415952682495, "learning_rate": 1.728779382690915e-05, "loss": 0.5297, "step": 17565 }, { "epoch": 0.4823174080175728, "grad_norm": 0.35278117656707764, "learning_rate": 1.7287498081716048e-05, "loss": 0.4864, "step": 17566 }, { "epoch": 0.4823448654585393, "grad_norm": 0.3763061761856079, "learning_rate": 1.7287202322929436e-05, "loss": 0.4749, "step": 17567 }, { "epoch": 0.48237232289950577, "grad_norm": 0.4010665714740753, "learning_rate": 1.7286906550549866e-05, "loss": 0.5812, "step": 17568 }, { "epoch": 0.48239978034047226, "grad_norm": 0.38473519682884216, "learning_rate": 1.7286610764577887e-05, "loss": 0.5492, "step": 17569 }, { "epoch": 0.48242723778143876, "grad_norm": 0.34626853466033936, "learning_rate": 1.7286314965014055e-05, "loss": 0.4982, "step": 17570 }, { "epoch": 0.48245469522240525, "grad_norm": 0.34542596340179443, "learning_rate": 1.7286019151858922e-05, "loss": 0.4617, "step": 17571 }, { "epoch": 0.4824821526633718, "grad_norm": 1.4327795505523682, "learning_rate": 1.7285723325113037e-05, "loss": 0.6346, "step": 17572 }, { "epoch": 0.4825096101043383, "grad_norm": 0.3287796974182129, "learning_rate": 1.7285427484776952e-05, "loss": 0.4203, "step": 17573 }, { "epoch": 0.4825370675453048, "grad_norm": 0.3998962640762329, "learning_rate": 1.728513163085122e-05, "loss": 0.5498, "step": 17574 }, { "epoch": 0.4825645249862713, "grad_norm": 0.3500562906265259, "learning_rate": 1.7284835763336392e-05, "loss": 0.5026, "step": 17575 }, { "epoch": 0.4825919824272378, "grad_norm": 0.366131991147995, "learning_rate": 1.728453988223302e-05, "loss": 0.4747, "step": 17576 }, { "epoch": 0.48261943986820427, "grad_norm": 0.39842483401298523, "learning_rate": 1.7284243987541656e-05, "loss": 0.5342, "step": 17577 }, { "epoch": 0.48264689730917076, "grad_norm": 0.43158096075057983, "learning_rate": 1.7283948079262853e-05, "loss": 0.4946, "step": 17578 }, { "epoch": 0.4826743547501373, "grad_norm": 0.3594059348106384, "learning_rate": 1.728365215739716e-05, "loss": 0.5607, "step": 17579 }, { "epoch": 0.4827018121911038, "grad_norm": 0.3587777614593506, "learning_rate": 1.7283356221945134e-05, "loss": 0.546, "step": 17580 }, { "epoch": 0.4827292696320703, "grad_norm": 0.3895912766456604, "learning_rate": 1.7283060272907322e-05, "loss": 0.5415, "step": 17581 }, { "epoch": 0.4827567270730368, "grad_norm": 0.39170390367507935, "learning_rate": 1.728276431028428e-05, "loss": 0.5401, "step": 17582 }, { "epoch": 0.4827841845140033, "grad_norm": 0.3051326274871826, "learning_rate": 1.7282468334076556e-05, "loss": 0.4546, "step": 17583 }, { "epoch": 0.4828116419549698, "grad_norm": 0.32393914461135864, "learning_rate": 1.7282172344284706e-05, "loss": 0.5166, "step": 17584 }, { "epoch": 0.4828390993959363, "grad_norm": 0.38260024785995483, "learning_rate": 1.728187634090928e-05, "loss": 0.4789, "step": 17585 }, { "epoch": 0.4828665568369028, "grad_norm": 0.3701643645763397, "learning_rate": 1.728158032395083e-05, "loss": 0.529, "step": 17586 }, { "epoch": 0.4828940142778693, "grad_norm": 0.4043022692203522, "learning_rate": 1.728128429340991e-05, "loss": 0.5084, "step": 17587 }, { "epoch": 0.4829214717188358, "grad_norm": 0.39740023016929626, "learning_rate": 1.7280988249287068e-05, "loss": 0.5231, "step": 17588 }, { "epoch": 0.4829489291598023, "grad_norm": 0.3731479346752167, "learning_rate": 1.728069219158286e-05, "loss": 0.5639, "step": 17589 }, { "epoch": 0.4829763866007688, "grad_norm": 0.31423524022102356, "learning_rate": 1.728039612029784e-05, "loss": 0.3872, "step": 17590 }, { "epoch": 0.4830038440417353, "grad_norm": 0.35348615050315857, "learning_rate": 1.7280100035432554e-05, "loss": 0.4287, "step": 17591 }, { "epoch": 0.4830313014827018, "grad_norm": 0.3880104422569275, "learning_rate": 1.727980393698756e-05, "loss": 0.5239, "step": 17592 }, { "epoch": 0.48305875892366834, "grad_norm": 0.3838599622249603, "learning_rate": 1.7279507824963408e-05, "loss": 0.4896, "step": 17593 }, { "epoch": 0.48308621636463484, "grad_norm": 0.3518800437450409, "learning_rate": 1.727921169936065e-05, "loss": 0.4628, "step": 17594 }, { "epoch": 0.48311367380560133, "grad_norm": 0.3722374141216278, "learning_rate": 1.7278915560179838e-05, "loss": 0.5902, "step": 17595 }, { "epoch": 0.4831411312465678, "grad_norm": 0.41177743673324585, "learning_rate": 1.7278619407421526e-05, "loss": 0.5467, "step": 17596 }, { "epoch": 0.4831685886875343, "grad_norm": 0.4365810751914978, "learning_rate": 1.7278323241086268e-05, "loss": 0.5168, "step": 17597 }, { "epoch": 0.4831960461285008, "grad_norm": 0.363593727350235, "learning_rate": 1.727802706117461e-05, "loss": 0.469, "step": 17598 }, { "epoch": 0.4832235035694673, "grad_norm": 0.5995293259620667, "learning_rate": 1.7277730867687115e-05, "loss": 0.4623, "step": 17599 }, { "epoch": 0.48325096101043385, "grad_norm": 0.35891056060791016, "learning_rate": 1.7277434660624324e-05, "loss": 0.5226, "step": 17600 }, { "epoch": 0.48327841845140035, "grad_norm": 0.3373907804489136, "learning_rate": 1.7277138439986797e-05, "loss": 0.4773, "step": 17601 }, { "epoch": 0.48330587589236684, "grad_norm": 0.38734203577041626, "learning_rate": 1.7276842205775083e-05, "loss": 0.4817, "step": 17602 }, { "epoch": 0.48333333333333334, "grad_norm": 0.369619220495224, "learning_rate": 1.7276545957989736e-05, "loss": 0.504, "step": 17603 }, { "epoch": 0.48336079077429983, "grad_norm": 0.4390208423137665, "learning_rate": 1.727624969663131e-05, "loss": 0.4843, "step": 17604 }, { "epoch": 0.4833882482152663, "grad_norm": 0.35928797721862793, "learning_rate": 1.727595342170035e-05, "loss": 0.5454, "step": 17605 }, { "epoch": 0.4834157056562328, "grad_norm": 0.4214595854282379, "learning_rate": 1.7275657133197422e-05, "loss": 0.4239, "step": 17606 }, { "epoch": 0.48344316309719937, "grad_norm": 0.33992287516593933, "learning_rate": 1.7275360831123068e-05, "loss": 0.4196, "step": 17607 }, { "epoch": 0.48347062053816586, "grad_norm": 0.399882972240448, "learning_rate": 1.7275064515477843e-05, "loss": 0.4567, "step": 17608 }, { "epoch": 0.48349807797913236, "grad_norm": 0.38239866495132446, "learning_rate": 1.7274768186262303e-05, "loss": 0.5698, "step": 17609 }, { "epoch": 0.48352553542009885, "grad_norm": 0.3959523141384125, "learning_rate": 1.7274471843476997e-05, "loss": 0.5248, "step": 17610 }, { "epoch": 0.48355299286106534, "grad_norm": 0.7146145105361938, "learning_rate": 1.727417548712248e-05, "loss": 0.5416, "step": 17611 }, { "epoch": 0.48358045030203184, "grad_norm": 0.3821927309036255, "learning_rate": 1.7273879117199307e-05, "loss": 0.442, "step": 17612 }, { "epoch": 0.48360790774299833, "grad_norm": 0.30212587118148804, "learning_rate": 1.7273582733708024e-05, "loss": 0.4278, "step": 17613 }, { "epoch": 0.4836353651839648, "grad_norm": 0.4030287265777588, "learning_rate": 1.7273286336649187e-05, "loss": 0.4813, "step": 17614 }, { "epoch": 0.4836628226249314, "grad_norm": 0.35988548398017883, "learning_rate": 1.7272989926023352e-05, "loss": 0.5829, "step": 17615 }, { "epoch": 0.48369028006589787, "grad_norm": 0.3900493085384369, "learning_rate": 1.727269350183107e-05, "loss": 0.5691, "step": 17616 }, { "epoch": 0.48371773750686436, "grad_norm": 0.4267294108867645, "learning_rate": 1.7272397064072892e-05, "loss": 0.5188, "step": 17617 }, { "epoch": 0.48374519494783086, "grad_norm": 0.369585782289505, "learning_rate": 1.727210061274937e-05, "loss": 0.484, "step": 17618 }, { "epoch": 0.48377265238879735, "grad_norm": 0.36233434081077576, "learning_rate": 1.7271804147861063e-05, "loss": 0.5087, "step": 17619 }, { "epoch": 0.48380010982976385, "grad_norm": 0.3509266972541809, "learning_rate": 1.727150766940852e-05, "loss": 0.4535, "step": 17620 }, { "epoch": 0.48382756727073034, "grad_norm": 0.38026779890060425, "learning_rate": 1.7271211177392296e-05, "loss": 0.4999, "step": 17621 }, { "epoch": 0.4838550247116969, "grad_norm": 0.36419108510017395, "learning_rate": 1.7270914671812936e-05, "loss": 0.5126, "step": 17622 }, { "epoch": 0.4838824821526634, "grad_norm": 0.3929356634616852, "learning_rate": 1.7270618152671007e-05, "loss": 0.4982, "step": 17623 }, { "epoch": 0.4839099395936299, "grad_norm": 0.3975479304790497, "learning_rate": 1.727032161996705e-05, "loss": 0.5078, "step": 17624 }, { "epoch": 0.4839373970345964, "grad_norm": 0.3420872688293457, "learning_rate": 1.7270025073701625e-05, "loss": 0.4021, "step": 17625 }, { "epoch": 0.48396485447556287, "grad_norm": 0.37143760919570923, "learning_rate": 1.726972851387528e-05, "loss": 0.4966, "step": 17626 }, { "epoch": 0.48399231191652936, "grad_norm": 0.38254067301750183, "learning_rate": 1.7269431940488575e-05, "loss": 0.5179, "step": 17627 }, { "epoch": 0.48401976935749585, "grad_norm": 0.461773157119751, "learning_rate": 1.7269135353542058e-05, "loss": 0.4837, "step": 17628 }, { "epoch": 0.4840472267984624, "grad_norm": 0.3826406002044678, "learning_rate": 1.7268838753036282e-05, "loss": 0.628, "step": 17629 }, { "epoch": 0.4840746842394289, "grad_norm": 0.3479669690132141, "learning_rate": 1.7268542138971802e-05, "loss": 0.4842, "step": 17630 }, { "epoch": 0.4841021416803954, "grad_norm": 0.4209999740123749, "learning_rate": 1.726824551134917e-05, "loss": 0.4249, "step": 17631 }, { "epoch": 0.4841295991213619, "grad_norm": 0.41264402866363525, "learning_rate": 1.7267948870168943e-05, "loss": 0.5495, "step": 17632 }, { "epoch": 0.4841570565623284, "grad_norm": 0.5419906973838806, "learning_rate": 1.7267652215431672e-05, "loss": 0.6256, "step": 17633 }, { "epoch": 0.4841845140032949, "grad_norm": 0.4007457196712494, "learning_rate": 1.726735554713791e-05, "loss": 0.5186, "step": 17634 }, { "epoch": 0.48421197144426137, "grad_norm": 0.4351632297039032, "learning_rate": 1.726705886528821e-05, "loss": 0.5843, "step": 17635 }, { "epoch": 0.4842394288852279, "grad_norm": 0.3462987542152405, "learning_rate": 1.7266762169883125e-05, "loss": 0.5435, "step": 17636 }, { "epoch": 0.4842668863261944, "grad_norm": 0.3828832507133484, "learning_rate": 1.726646546092321e-05, "loss": 0.5138, "step": 17637 }, { "epoch": 0.4842943437671609, "grad_norm": 0.36050868034362793, "learning_rate": 1.7266168738409015e-05, "loss": 0.4904, "step": 17638 }, { "epoch": 0.4843218012081274, "grad_norm": 0.3506377041339874, "learning_rate": 1.72658720023411e-05, "loss": 0.6007, "step": 17639 }, { "epoch": 0.4843492586490939, "grad_norm": 0.3758707642555237, "learning_rate": 1.7265575252720013e-05, "loss": 0.4562, "step": 17640 }, { "epoch": 0.4843767160900604, "grad_norm": 0.3868147134780884, "learning_rate": 1.726527848954631e-05, "loss": 0.5278, "step": 17641 }, { "epoch": 0.4844041735310269, "grad_norm": 0.38815340399742126, "learning_rate": 1.7264981712820542e-05, "loss": 0.4971, "step": 17642 }, { "epoch": 0.48443163097199343, "grad_norm": 0.39664438366889954, "learning_rate": 1.7264684922543265e-05, "loss": 0.5281, "step": 17643 }, { "epoch": 0.4844590884129599, "grad_norm": 0.35527917742729187, "learning_rate": 1.7264388118715033e-05, "loss": 0.4465, "step": 17644 }, { "epoch": 0.4844865458539264, "grad_norm": 0.35456177592277527, "learning_rate": 1.7264091301336398e-05, "loss": 0.5876, "step": 17645 }, { "epoch": 0.4845140032948929, "grad_norm": 0.417082816362381, "learning_rate": 1.7263794470407915e-05, "loss": 0.5318, "step": 17646 }, { "epoch": 0.4845414607358594, "grad_norm": 0.36527732014656067, "learning_rate": 1.7263497625930138e-05, "loss": 0.4935, "step": 17647 }, { "epoch": 0.4845689181768259, "grad_norm": 0.33819037675857544, "learning_rate": 1.7263200767903615e-05, "loss": 0.447, "step": 17648 }, { "epoch": 0.4845963756177924, "grad_norm": 0.3693745732307434, "learning_rate": 1.726290389632891e-05, "loss": 0.5062, "step": 17649 }, { "epoch": 0.48462383305875895, "grad_norm": 0.3844183087348938, "learning_rate": 1.726260701120657e-05, "loss": 0.5564, "step": 17650 }, { "epoch": 0.48465129049972544, "grad_norm": 0.505126416683197, "learning_rate": 1.726231011253715e-05, "loss": 0.4717, "step": 17651 }, { "epoch": 0.48467874794069193, "grad_norm": 0.4464876353740692, "learning_rate": 1.7262013200321202e-05, "loss": 0.4816, "step": 17652 }, { "epoch": 0.48470620538165843, "grad_norm": 0.3749581277370453, "learning_rate": 1.726171627455928e-05, "loss": 0.4694, "step": 17653 }, { "epoch": 0.4847336628226249, "grad_norm": 0.3258917033672333, "learning_rate": 1.726141933525194e-05, "loss": 0.4424, "step": 17654 }, { "epoch": 0.4847611202635914, "grad_norm": 0.38427838683128357, "learning_rate": 1.7261122382399736e-05, "loss": 0.5642, "step": 17655 }, { "epoch": 0.4847885777045579, "grad_norm": 0.39062902331352234, "learning_rate": 1.7260825416003225e-05, "loss": 0.5155, "step": 17656 }, { "epoch": 0.48481603514552446, "grad_norm": 0.3522874414920807, "learning_rate": 1.7260528436062954e-05, "loss": 0.4645, "step": 17657 }, { "epoch": 0.48484349258649095, "grad_norm": 0.3333744704723358, "learning_rate": 1.726023144257948e-05, "loss": 0.4059, "step": 17658 }, { "epoch": 0.48487095002745745, "grad_norm": 0.35040608048439026, "learning_rate": 1.725993443555336e-05, "loss": 0.4422, "step": 17659 }, { "epoch": 0.48489840746842394, "grad_norm": 0.38689297437667847, "learning_rate": 1.7259637414985142e-05, "loss": 0.4881, "step": 17660 }, { "epoch": 0.48492586490939044, "grad_norm": 0.28546157479286194, "learning_rate": 1.7259340380875384e-05, "loss": 0.4163, "step": 17661 }, { "epoch": 0.48495332235035693, "grad_norm": 0.37033697962760925, "learning_rate": 1.7259043333224638e-05, "loss": 0.4612, "step": 17662 }, { "epoch": 0.4849807797913234, "grad_norm": 0.36841344833374023, "learning_rate": 1.725874627203346e-05, "loss": 0.4873, "step": 17663 }, { "epoch": 0.48500823723229, "grad_norm": 0.3662201762199402, "learning_rate": 1.7258449197302408e-05, "loss": 0.4551, "step": 17664 }, { "epoch": 0.48503569467325647, "grad_norm": 0.3655455708503723, "learning_rate": 1.7258152109032028e-05, "loss": 0.4261, "step": 17665 }, { "epoch": 0.48506315211422296, "grad_norm": 0.3785841166973114, "learning_rate": 1.7257855007222877e-05, "loss": 0.4984, "step": 17666 }, { "epoch": 0.48509060955518946, "grad_norm": 0.38041505217552185, "learning_rate": 1.725755789187551e-05, "loss": 0.5377, "step": 17667 }, { "epoch": 0.48511806699615595, "grad_norm": 0.46540001034736633, "learning_rate": 1.7257260762990483e-05, "loss": 0.4513, "step": 17668 }, { "epoch": 0.48514552443712244, "grad_norm": 0.4044991433620453, "learning_rate": 1.7256963620568345e-05, "loss": 0.5229, "step": 17669 }, { "epoch": 0.48517298187808894, "grad_norm": 0.361749529838562, "learning_rate": 1.7256666464609658e-05, "loss": 0.5523, "step": 17670 }, { "epoch": 0.4852004393190555, "grad_norm": 0.35886600613594055, "learning_rate": 1.725636929511497e-05, "loss": 0.4604, "step": 17671 }, { "epoch": 0.485227896760022, "grad_norm": 0.37631893157958984, "learning_rate": 1.725607211208484e-05, "loss": 0.5914, "step": 17672 }, { "epoch": 0.4852553542009885, "grad_norm": 0.4514726996421814, "learning_rate": 1.7255774915519814e-05, "loss": 0.6165, "step": 17673 }, { "epoch": 0.48528281164195497, "grad_norm": 0.3857898414134979, "learning_rate": 1.7255477705420455e-05, "loss": 0.508, "step": 17674 }, { "epoch": 0.48531026908292146, "grad_norm": 0.3787430226802826, "learning_rate": 1.7255180481787315e-05, "loss": 0.5334, "step": 17675 }, { "epoch": 0.48533772652388796, "grad_norm": 0.37013018131256104, "learning_rate": 1.7254883244620947e-05, "loss": 0.4507, "step": 17676 }, { "epoch": 0.48536518396485445, "grad_norm": 0.3900471329689026, "learning_rate": 1.7254585993921906e-05, "loss": 0.5595, "step": 17677 }, { "epoch": 0.485392641405821, "grad_norm": 0.4384017884731293, "learning_rate": 1.7254288729690748e-05, "loss": 0.5558, "step": 17678 }, { "epoch": 0.4854200988467875, "grad_norm": 0.39514613151550293, "learning_rate": 1.7253991451928025e-05, "loss": 0.5791, "step": 17679 }, { "epoch": 0.485447556287754, "grad_norm": 0.41059666872024536, "learning_rate": 1.7253694160634293e-05, "loss": 0.5374, "step": 17680 }, { "epoch": 0.4854750137287205, "grad_norm": 0.3740375339984894, "learning_rate": 1.7253396855810108e-05, "loss": 0.5159, "step": 17681 }, { "epoch": 0.485502471169687, "grad_norm": 0.38929611444473267, "learning_rate": 1.725309953745602e-05, "loss": 0.5914, "step": 17682 }, { "epoch": 0.48552992861065347, "grad_norm": 0.3967209458351135, "learning_rate": 1.7252802205572586e-05, "loss": 0.4576, "step": 17683 }, { "epoch": 0.48555738605161997, "grad_norm": 0.36684665083885193, "learning_rate": 1.7252504860160364e-05, "loss": 0.4431, "step": 17684 }, { "epoch": 0.4855848434925865, "grad_norm": 0.3648223280906677, "learning_rate": 1.7252207501219905e-05, "loss": 0.4704, "step": 17685 }, { "epoch": 0.485612300933553, "grad_norm": 0.37341010570526123, "learning_rate": 1.7251910128751763e-05, "loss": 0.5593, "step": 17686 }, { "epoch": 0.4856397583745195, "grad_norm": 0.3693113625049591, "learning_rate": 1.7251612742756495e-05, "loss": 0.4869, "step": 17687 }, { "epoch": 0.485667215815486, "grad_norm": 0.4018973112106323, "learning_rate": 1.7251315343234653e-05, "loss": 0.5608, "step": 17688 }, { "epoch": 0.4856946732564525, "grad_norm": 0.3755534291267395, "learning_rate": 1.7251017930186796e-05, "loss": 0.5178, "step": 17689 }, { "epoch": 0.485722130697419, "grad_norm": 0.368085116147995, "learning_rate": 1.7250720503613477e-05, "loss": 0.544, "step": 17690 }, { "epoch": 0.4857495881383855, "grad_norm": 0.3669165372848511, "learning_rate": 1.725042306351525e-05, "loss": 0.5612, "step": 17691 }, { "epoch": 0.48577704557935203, "grad_norm": 0.4460585117340088, "learning_rate": 1.7250125609892668e-05, "loss": 0.4415, "step": 17692 }, { "epoch": 0.4858045030203185, "grad_norm": 0.32796958088874817, "learning_rate": 1.7249828142746287e-05, "loss": 0.4523, "step": 17693 }, { "epoch": 0.485831960461285, "grad_norm": 0.37613967061042786, "learning_rate": 1.7249530662076663e-05, "loss": 0.4694, "step": 17694 }, { "epoch": 0.4858594179022515, "grad_norm": 0.3745920956134796, "learning_rate": 1.724923316788435e-05, "loss": 0.5097, "step": 17695 }, { "epoch": 0.485886875343218, "grad_norm": 0.4276891350746155, "learning_rate": 1.724893566016991e-05, "loss": 0.5917, "step": 17696 }, { "epoch": 0.4859143327841845, "grad_norm": 0.46024858951568604, "learning_rate": 1.7248638138933885e-05, "loss": 0.6246, "step": 17697 }, { "epoch": 0.485941790225151, "grad_norm": 0.4026954770088196, "learning_rate": 1.7248340604176836e-05, "loss": 0.5479, "step": 17698 }, { "epoch": 0.48596924766611754, "grad_norm": 0.3717985153198242, "learning_rate": 1.724804305589932e-05, "loss": 0.5054, "step": 17699 }, { "epoch": 0.48599670510708404, "grad_norm": 0.39193832874298096, "learning_rate": 1.724774549410189e-05, "loss": 0.5072, "step": 17700 }, { "epoch": 0.48602416254805053, "grad_norm": 0.39475783705711365, "learning_rate": 1.7247447918785104e-05, "loss": 0.5128, "step": 17701 }, { "epoch": 0.486051619989017, "grad_norm": 0.6941933631896973, "learning_rate": 1.724715032994951e-05, "loss": 0.5091, "step": 17702 }, { "epoch": 0.4860790774299835, "grad_norm": 0.6703605651855469, "learning_rate": 1.7246852727595672e-05, "loss": 0.5039, "step": 17703 }, { "epoch": 0.48610653487095, "grad_norm": 0.37660545110702515, "learning_rate": 1.7246555111724136e-05, "loss": 0.4944, "step": 17704 }, { "epoch": 0.4861339923119165, "grad_norm": 0.4162742495536804, "learning_rate": 1.724625748233547e-05, "loss": 0.5702, "step": 17705 }, { "epoch": 0.48616144975288306, "grad_norm": 0.4582599997520447, "learning_rate": 1.724595983943021e-05, "loss": 0.5816, "step": 17706 }, { "epoch": 0.48618890719384955, "grad_norm": 0.4030217230319977, "learning_rate": 1.7245662183008928e-05, "loss": 0.4908, "step": 17707 }, { "epoch": 0.48621636463481605, "grad_norm": 0.4112563133239746, "learning_rate": 1.7245364513072177e-05, "loss": 0.433, "step": 17708 }, { "epoch": 0.48624382207578254, "grad_norm": 0.3883416950702667, "learning_rate": 1.7245066829620503e-05, "loss": 0.541, "step": 17709 }, { "epoch": 0.48627127951674903, "grad_norm": 0.3631395995616913, "learning_rate": 1.724476913265447e-05, "loss": 0.3886, "step": 17710 }, { "epoch": 0.4862987369577155, "grad_norm": 0.3502767086029053, "learning_rate": 1.724447142217463e-05, "loss": 0.5055, "step": 17711 }, { "epoch": 0.486326194398682, "grad_norm": 0.42143526673316956, "learning_rate": 1.724417369818154e-05, "loss": 0.5261, "step": 17712 }, { "epoch": 0.48635365183964857, "grad_norm": 0.4726541042327881, "learning_rate": 1.724387596067575e-05, "loss": 0.5536, "step": 17713 }, { "epoch": 0.48638110928061506, "grad_norm": 0.4135623574256897, "learning_rate": 1.724357820965782e-05, "loss": 0.5136, "step": 17714 }, { "epoch": 0.48640856672158156, "grad_norm": 0.3986741006374359, "learning_rate": 1.724328044512831e-05, "loss": 0.5075, "step": 17715 }, { "epoch": 0.48643602416254805, "grad_norm": 0.40754055976867676, "learning_rate": 1.7242982667087765e-05, "loss": 0.5188, "step": 17716 }, { "epoch": 0.48646348160351455, "grad_norm": 0.35497453808784485, "learning_rate": 1.7242684875536747e-05, "loss": 0.4892, "step": 17717 }, { "epoch": 0.48649093904448104, "grad_norm": 0.3958967328071594, "learning_rate": 1.724238707047581e-05, "loss": 0.5378, "step": 17718 }, { "epoch": 0.48651839648544754, "grad_norm": 0.386508971452713, "learning_rate": 1.724208925190551e-05, "loss": 0.4982, "step": 17719 }, { "epoch": 0.4865458539264141, "grad_norm": 0.3687201142311096, "learning_rate": 1.7241791419826402e-05, "loss": 0.5619, "step": 17720 }, { "epoch": 0.4865733113673806, "grad_norm": 0.3781968653202057, "learning_rate": 1.7241493574239043e-05, "loss": 0.5327, "step": 17721 }, { "epoch": 0.4866007688083471, "grad_norm": 0.4008883535861969, "learning_rate": 1.7241195715143985e-05, "loss": 0.5526, "step": 17722 }, { "epoch": 0.48662822624931357, "grad_norm": 0.3475266098976135, "learning_rate": 1.7240897842541787e-05, "loss": 0.4823, "step": 17723 }, { "epoch": 0.48665568369028006, "grad_norm": 0.43884342908859253, "learning_rate": 1.7240599956433e-05, "loss": 0.5722, "step": 17724 }, { "epoch": 0.48668314113124655, "grad_norm": 0.36870115995407104, "learning_rate": 1.7240302056818188e-05, "loss": 0.554, "step": 17725 }, { "epoch": 0.48671059857221305, "grad_norm": 0.3334406912326813, "learning_rate": 1.72400041436979e-05, "loss": 0.5224, "step": 17726 }, { "epoch": 0.4867380560131796, "grad_norm": 0.38367241621017456, "learning_rate": 1.7239706217072694e-05, "loss": 0.564, "step": 17727 }, { "epoch": 0.4867655134541461, "grad_norm": 0.43796539306640625, "learning_rate": 1.7239408276943126e-05, "loss": 0.6198, "step": 17728 }, { "epoch": 0.4867929708951126, "grad_norm": 0.3972180485725403, "learning_rate": 1.7239110323309748e-05, "loss": 0.5096, "step": 17729 }, { "epoch": 0.4868204283360791, "grad_norm": 0.38561907410621643, "learning_rate": 1.7238812356173124e-05, "loss": 0.5126, "step": 17730 }, { "epoch": 0.4868478857770456, "grad_norm": 0.4878607988357544, "learning_rate": 1.72385143755338e-05, "loss": 0.5495, "step": 17731 }, { "epoch": 0.48687534321801207, "grad_norm": 0.39124777913093567, "learning_rate": 1.7238216381392337e-05, "loss": 0.4609, "step": 17732 }, { "epoch": 0.48690280065897856, "grad_norm": 0.4016071856021881, "learning_rate": 1.7237918373749292e-05, "loss": 0.5538, "step": 17733 }, { "epoch": 0.4869302580999451, "grad_norm": 0.39538928866386414, "learning_rate": 1.7237620352605217e-05, "loss": 0.4511, "step": 17734 }, { "epoch": 0.4869577155409116, "grad_norm": 0.3696497976779938, "learning_rate": 1.7237322317960673e-05, "loss": 0.5199, "step": 17735 }, { "epoch": 0.4869851729818781, "grad_norm": 0.39079752564430237, "learning_rate": 1.7237024269816213e-05, "loss": 0.4762, "step": 17736 }, { "epoch": 0.4870126304228446, "grad_norm": 0.3570830523967743, "learning_rate": 1.723672620817239e-05, "loss": 0.4736, "step": 17737 }, { "epoch": 0.4870400878638111, "grad_norm": 0.3696151077747345, "learning_rate": 1.7236428133029763e-05, "loss": 0.5347, "step": 17738 }, { "epoch": 0.4870675453047776, "grad_norm": 0.3928174376487732, "learning_rate": 1.723613004438889e-05, "loss": 0.534, "step": 17739 }, { "epoch": 0.4870950027457441, "grad_norm": 0.364764541387558, "learning_rate": 1.7235831942250323e-05, "loss": 0.5242, "step": 17740 }, { "epoch": 0.4871224601867106, "grad_norm": 0.3651586174964905, "learning_rate": 1.723553382661462e-05, "loss": 0.4969, "step": 17741 }, { "epoch": 0.4871499176276771, "grad_norm": 0.34638652205467224, "learning_rate": 1.723523569748234e-05, "loss": 0.4894, "step": 17742 }, { "epoch": 0.4871773750686436, "grad_norm": 0.42067965865135193, "learning_rate": 1.7234937554854036e-05, "loss": 0.5934, "step": 17743 }, { "epoch": 0.4872048325096101, "grad_norm": 0.4183940589427948, "learning_rate": 1.7234639398730263e-05, "loss": 0.4686, "step": 17744 }, { "epoch": 0.4872322899505766, "grad_norm": 0.3619309365749359, "learning_rate": 1.7234341229111574e-05, "loss": 0.5376, "step": 17745 }, { "epoch": 0.4872597473915431, "grad_norm": 0.3527815043926239, "learning_rate": 1.7234043045998536e-05, "loss": 0.5539, "step": 17746 }, { "epoch": 0.4872872048325096, "grad_norm": 0.41010531783103943, "learning_rate": 1.7233744849391695e-05, "loss": 0.4477, "step": 17747 }, { "epoch": 0.4873146622734761, "grad_norm": 0.391961932182312, "learning_rate": 1.7233446639291614e-05, "loss": 0.5731, "step": 17748 }, { "epoch": 0.48734211971444263, "grad_norm": 0.4832632839679718, "learning_rate": 1.7233148415698845e-05, "loss": 0.5361, "step": 17749 }, { "epoch": 0.48736957715540913, "grad_norm": 0.3956652581691742, "learning_rate": 1.7232850178613948e-05, "loss": 0.5125, "step": 17750 }, { "epoch": 0.4873970345963756, "grad_norm": 0.3980465531349182, "learning_rate": 1.7232551928037474e-05, "loss": 0.6334, "step": 17751 }, { "epoch": 0.4874244920373421, "grad_norm": 0.35887694358825684, "learning_rate": 1.7232253663969983e-05, "loss": 0.5906, "step": 17752 }, { "epoch": 0.4874519494783086, "grad_norm": 0.3828239142894745, "learning_rate": 1.723195538641203e-05, "loss": 0.5064, "step": 17753 }, { "epoch": 0.4874794069192751, "grad_norm": 0.3923206627368927, "learning_rate": 1.7231657095364172e-05, "loss": 0.5904, "step": 17754 }, { "epoch": 0.4875068643602416, "grad_norm": 0.39551278948783875, "learning_rate": 1.723135879082697e-05, "loss": 0.5041, "step": 17755 }, { "epoch": 0.48753432180120815, "grad_norm": 0.379682332277298, "learning_rate": 1.723106047280097e-05, "loss": 0.5614, "step": 17756 }, { "epoch": 0.48756177924217464, "grad_norm": 0.4454176127910614, "learning_rate": 1.723076214128674e-05, "loss": 0.534, "step": 17757 }, { "epoch": 0.48758923668314114, "grad_norm": 0.3913172483444214, "learning_rate": 1.7230463796284824e-05, "loss": 0.5007, "step": 17758 }, { "epoch": 0.48761669412410763, "grad_norm": 0.34683042764663696, "learning_rate": 1.723016543779579e-05, "loss": 0.5721, "step": 17759 }, { "epoch": 0.4876441515650741, "grad_norm": 0.39055755734443665, "learning_rate": 1.7229867065820188e-05, "loss": 0.5999, "step": 17760 }, { "epoch": 0.4876716090060406, "grad_norm": 0.40304601192474365, "learning_rate": 1.722956868035858e-05, "loss": 0.4563, "step": 17761 }, { "epoch": 0.4876990664470071, "grad_norm": 0.3663020730018616, "learning_rate": 1.7229270281411514e-05, "loss": 0.4746, "step": 17762 }, { "epoch": 0.48772652388797366, "grad_norm": 0.3913702964782715, "learning_rate": 1.7228971868979552e-05, "loss": 0.551, "step": 17763 }, { "epoch": 0.48775398132894016, "grad_norm": 0.4230533242225647, "learning_rate": 1.7228673443063252e-05, "loss": 0.5509, "step": 17764 }, { "epoch": 0.48778143876990665, "grad_norm": 0.37927526235580444, "learning_rate": 1.7228375003663168e-05, "loss": 0.5323, "step": 17765 }, { "epoch": 0.48780889621087314, "grad_norm": 0.581646740436554, "learning_rate": 1.7228076550779857e-05, "loss": 0.4903, "step": 17766 }, { "epoch": 0.48783635365183964, "grad_norm": 0.4110003411769867, "learning_rate": 1.7227778084413878e-05, "loss": 0.5128, "step": 17767 }, { "epoch": 0.48786381109280613, "grad_norm": 0.38262462615966797, "learning_rate": 1.7227479604565788e-05, "loss": 0.5139, "step": 17768 }, { "epoch": 0.4878912685337726, "grad_norm": 0.399765282869339, "learning_rate": 1.722718111123614e-05, "loss": 0.4737, "step": 17769 }, { "epoch": 0.4879187259747392, "grad_norm": 0.3757775127887726, "learning_rate": 1.7226882604425493e-05, "loss": 0.5441, "step": 17770 }, { "epoch": 0.48794618341570567, "grad_norm": 0.41653019189834595, "learning_rate": 1.7226584084134402e-05, "loss": 0.5535, "step": 17771 }, { "epoch": 0.48797364085667216, "grad_norm": 0.3651380240917206, "learning_rate": 1.7226285550363422e-05, "loss": 0.4327, "step": 17772 }, { "epoch": 0.48800109829763866, "grad_norm": 0.442548930644989, "learning_rate": 1.722598700311312e-05, "loss": 0.5168, "step": 17773 }, { "epoch": 0.48802855573860515, "grad_norm": 0.41931644082069397, "learning_rate": 1.722568844238404e-05, "loss": 0.5024, "step": 17774 }, { "epoch": 0.48805601317957165, "grad_norm": 0.4078995883464813, "learning_rate": 1.722538986817675e-05, "loss": 0.4495, "step": 17775 }, { "epoch": 0.48808347062053814, "grad_norm": 0.44596463441848755, "learning_rate": 1.72250912804918e-05, "loss": 0.5204, "step": 17776 }, { "epoch": 0.4881109280615047, "grad_norm": 0.37142249941825867, "learning_rate": 1.7224792679329744e-05, "loss": 0.4793, "step": 17777 }, { "epoch": 0.4881383855024712, "grad_norm": 0.87308669090271, "learning_rate": 1.722449406469115e-05, "loss": 0.5331, "step": 17778 }, { "epoch": 0.4881658429434377, "grad_norm": 0.4292074143886566, "learning_rate": 1.7224195436576566e-05, "loss": 0.4822, "step": 17779 }, { "epoch": 0.48819330038440417, "grad_norm": 0.3964207172393799, "learning_rate": 1.7223896794986552e-05, "loss": 0.5059, "step": 17780 }, { "epoch": 0.48822075782537067, "grad_norm": 0.3670021891593933, "learning_rate": 1.7223598139921667e-05, "loss": 0.5503, "step": 17781 }, { "epoch": 0.48824821526633716, "grad_norm": 0.3594217002391815, "learning_rate": 1.7223299471382464e-05, "loss": 0.5641, "step": 17782 }, { "epoch": 0.48827567270730365, "grad_norm": 0.33374544978141785, "learning_rate": 1.7223000789369503e-05, "loss": 0.46, "step": 17783 }, { "epoch": 0.4883031301482702, "grad_norm": 0.4914587140083313, "learning_rate": 1.722270209388334e-05, "loss": 0.5049, "step": 17784 }, { "epoch": 0.4883305875892367, "grad_norm": 0.4681309163570404, "learning_rate": 1.7222403384924532e-05, "loss": 0.5703, "step": 17785 }, { "epoch": 0.4883580450302032, "grad_norm": 0.39005160331726074, "learning_rate": 1.7222104662493633e-05, "loss": 0.5462, "step": 17786 }, { "epoch": 0.4883855024711697, "grad_norm": 0.34874671697616577, "learning_rate": 1.722180592659121e-05, "loss": 0.5274, "step": 17787 }, { "epoch": 0.4884129599121362, "grad_norm": 0.3604627847671509, "learning_rate": 1.722150717721781e-05, "loss": 0.4946, "step": 17788 }, { "epoch": 0.4884404173531027, "grad_norm": 0.36221233010292053, "learning_rate": 1.7221208414373995e-05, "loss": 0.4904, "step": 17789 }, { "epoch": 0.48846787479406917, "grad_norm": 0.392921507358551, "learning_rate": 1.722090963806032e-05, "loss": 0.5234, "step": 17790 }, { "epoch": 0.4884953322350357, "grad_norm": 0.3834966719150543, "learning_rate": 1.7220610848277347e-05, "loss": 0.5297, "step": 17791 }, { "epoch": 0.4885227896760022, "grad_norm": 0.3801875114440918, "learning_rate": 1.7220312045025633e-05, "loss": 0.5291, "step": 17792 }, { "epoch": 0.4885502471169687, "grad_norm": 0.36013099551200867, "learning_rate": 1.7220013228305727e-05, "loss": 0.4818, "step": 17793 }, { "epoch": 0.4885777045579352, "grad_norm": 0.40967345237731934, "learning_rate": 1.7219714398118196e-05, "loss": 0.5286, "step": 17794 }, { "epoch": 0.4886051619989017, "grad_norm": 0.3858114778995514, "learning_rate": 1.721941555446359e-05, "loss": 0.5689, "step": 17795 }, { "epoch": 0.4886326194398682, "grad_norm": 0.3556840717792511, "learning_rate": 1.721911669734247e-05, "loss": 0.4983, "step": 17796 }, { "epoch": 0.4886600768808347, "grad_norm": 0.40683645009994507, "learning_rate": 1.7218817826755394e-05, "loss": 0.5373, "step": 17797 }, { "epoch": 0.48868753432180123, "grad_norm": 0.3931632339954376, "learning_rate": 1.721851894270292e-05, "loss": 0.5447, "step": 17798 }, { "epoch": 0.4887149917627677, "grad_norm": 0.3568495512008667, "learning_rate": 1.72182200451856e-05, "loss": 0.4652, "step": 17799 }, { "epoch": 0.4887424492037342, "grad_norm": 0.47889214754104614, "learning_rate": 1.7217921134204003e-05, "loss": 0.5162, "step": 17800 }, { "epoch": 0.4887699066447007, "grad_norm": 0.37562096118927, "learning_rate": 1.7217622209758676e-05, "loss": 0.559, "step": 17801 }, { "epoch": 0.4887973640856672, "grad_norm": 0.41337838768959045, "learning_rate": 1.7217323271850178e-05, "loss": 0.523, "step": 17802 }, { "epoch": 0.4888248215266337, "grad_norm": 0.3839432895183563, "learning_rate": 1.721702432047907e-05, "loss": 0.52, "step": 17803 }, { "epoch": 0.4888522789676002, "grad_norm": 0.41358956694602966, "learning_rate": 1.7216725355645908e-05, "loss": 0.4747, "step": 17804 }, { "epoch": 0.48887973640856675, "grad_norm": 0.34398898482322693, "learning_rate": 1.721642637735125e-05, "loss": 0.4782, "step": 17805 }, { "epoch": 0.48890719384953324, "grad_norm": 0.339236319065094, "learning_rate": 1.7216127385595656e-05, "loss": 0.4916, "step": 17806 }, { "epoch": 0.48893465129049973, "grad_norm": 0.3330595791339874, "learning_rate": 1.7215828380379676e-05, "loss": 0.4974, "step": 17807 }, { "epoch": 0.4889621087314662, "grad_norm": 0.35836082696914673, "learning_rate": 1.721552936170388e-05, "loss": 0.5627, "step": 17808 }, { "epoch": 0.4889895661724327, "grad_norm": 0.39471641182899475, "learning_rate": 1.7215230329568813e-05, "loss": 0.5509, "step": 17809 }, { "epoch": 0.4890170236133992, "grad_norm": 0.36818867921829224, "learning_rate": 1.721493128397504e-05, "loss": 0.4794, "step": 17810 }, { "epoch": 0.4890444810543657, "grad_norm": 0.40194636583328247, "learning_rate": 1.721463222492312e-05, "loss": 0.505, "step": 17811 }, { "epoch": 0.48907193849533226, "grad_norm": 0.3780379295349121, "learning_rate": 1.7214333152413603e-05, "loss": 0.531, "step": 17812 }, { "epoch": 0.48909939593629875, "grad_norm": 0.35710835456848145, "learning_rate": 1.721403406644706e-05, "loss": 0.4576, "step": 17813 }, { "epoch": 0.48912685337726525, "grad_norm": 0.4048587679862976, "learning_rate": 1.7213734967024035e-05, "loss": 0.5566, "step": 17814 }, { "epoch": 0.48915431081823174, "grad_norm": 0.367737352848053, "learning_rate": 1.7213435854145096e-05, "loss": 0.4334, "step": 17815 }, { "epoch": 0.48918176825919824, "grad_norm": 0.3767624795436859, "learning_rate": 1.7213136727810795e-05, "loss": 0.5709, "step": 17816 }, { "epoch": 0.48920922570016473, "grad_norm": 0.38018324971199036, "learning_rate": 1.721283758802169e-05, "loss": 0.5281, "step": 17817 }, { "epoch": 0.4892366831411312, "grad_norm": 0.3547159433364868, "learning_rate": 1.7212538434778338e-05, "loss": 0.5004, "step": 17818 }, { "epoch": 0.4892641405820978, "grad_norm": 0.4002813994884491, "learning_rate": 1.7212239268081304e-05, "loss": 0.5828, "step": 17819 }, { "epoch": 0.48929159802306427, "grad_norm": 0.3951385021209717, "learning_rate": 1.7211940087931144e-05, "loss": 0.5007, "step": 17820 }, { "epoch": 0.48931905546403076, "grad_norm": 0.45461228489875793, "learning_rate": 1.7211640894328413e-05, "loss": 0.4812, "step": 17821 }, { "epoch": 0.48934651290499726, "grad_norm": 0.7183622717857361, "learning_rate": 1.7211341687273664e-05, "loss": 0.6024, "step": 17822 }, { "epoch": 0.48937397034596375, "grad_norm": 0.3656640946865082, "learning_rate": 1.721104246676747e-05, "loss": 0.4904, "step": 17823 }, { "epoch": 0.48940142778693024, "grad_norm": 0.3793434202671051, "learning_rate": 1.7210743232810375e-05, "loss": 0.4961, "step": 17824 }, { "epoch": 0.48942888522789674, "grad_norm": 0.434174120426178, "learning_rate": 1.7210443985402943e-05, "loss": 0.5813, "step": 17825 }, { "epoch": 0.4894563426688633, "grad_norm": 0.4670186936855316, "learning_rate": 1.7210144724545732e-05, "loss": 0.525, "step": 17826 }, { "epoch": 0.4894838001098298, "grad_norm": 0.37180399894714355, "learning_rate": 1.72098454502393e-05, "loss": 0.5165, "step": 17827 }, { "epoch": 0.4895112575507963, "grad_norm": 0.48265594244003296, "learning_rate": 1.7209546162484207e-05, "loss": 0.5835, "step": 17828 }, { "epoch": 0.48953871499176277, "grad_norm": 0.37286481261253357, "learning_rate": 1.7209246861281007e-05, "loss": 0.4576, "step": 17829 }, { "epoch": 0.48956617243272926, "grad_norm": 0.37007829546928406, "learning_rate": 1.7208947546630262e-05, "loss": 0.4671, "step": 17830 }, { "epoch": 0.48959362987369576, "grad_norm": 0.3685915172100067, "learning_rate": 1.720864821853253e-05, "loss": 0.5054, "step": 17831 }, { "epoch": 0.48962108731466225, "grad_norm": 0.3932049870491028, "learning_rate": 1.7208348876988363e-05, "loss": 0.5398, "step": 17832 }, { "epoch": 0.4896485447556288, "grad_norm": 0.38906607031822205, "learning_rate": 1.720804952199833e-05, "loss": 0.5248, "step": 17833 }, { "epoch": 0.4896760021965953, "grad_norm": 0.6179790496826172, "learning_rate": 1.7207750153562982e-05, "loss": 0.4539, "step": 17834 }, { "epoch": 0.4897034596375618, "grad_norm": 0.4705316424369812, "learning_rate": 1.720745077168288e-05, "loss": 0.573, "step": 17835 }, { "epoch": 0.4897309170785283, "grad_norm": 0.3870733082294464, "learning_rate": 1.720715137635858e-05, "loss": 0.4338, "step": 17836 }, { "epoch": 0.4897583745194948, "grad_norm": 0.36236003041267395, "learning_rate": 1.7206851967590647e-05, "loss": 0.5013, "step": 17837 }, { "epoch": 0.48978583196046127, "grad_norm": 0.37478938698768616, "learning_rate": 1.720655254537963e-05, "loss": 0.5495, "step": 17838 }, { "epoch": 0.48981328940142776, "grad_norm": 0.40118634700775146, "learning_rate": 1.7206253109726098e-05, "loss": 0.5129, "step": 17839 }, { "epoch": 0.4898407468423943, "grad_norm": 0.39819326996803284, "learning_rate": 1.72059536606306e-05, "loss": 0.5018, "step": 17840 }, { "epoch": 0.4898682042833608, "grad_norm": 0.3462105691432953, "learning_rate": 1.72056541980937e-05, "loss": 0.4351, "step": 17841 }, { "epoch": 0.4898956617243273, "grad_norm": 0.4187363386154175, "learning_rate": 1.720535472211595e-05, "loss": 0.6056, "step": 17842 }, { "epoch": 0.4899231191652938, "grad_norm": 0.3979317545890808, "learning_rate": 1.720505523269792e-05, "loss": 0.5157, "step": 17843 }, { "epoch": 0.4899505766062603, "grad_norm": 0.3539043664932251, "learning_rate": 1.720475572984016e-05, "loss": 0.4649, "step": 17844 }, { "epoch": 0.4899780340472268, "grad_norm": 0.354744553565979, "learning_rate": 1.7204456213543234e-05, "loss": 0.5292, "step": 17845 }, { "epoch": 0.4900054914881933, "grad_norm": 0.390675812959671, "learning_rate": 1.7204156683807696e-05, "loss": 0.4385, "step": 17846 }, { "epoch": 0.49003294892915983, "grad_norm": 0.37188923358917236, "learning_rate": 1.7203857140634105e-05, "loss": 0.5212, "step": 17847 }, { "epoch": 0.4900604063701263, "grad_norm": 0.36838841438293457, "learning_rate": 1.7203557584023022e-05, "loss": 0.5346, "step": 17848 }, { "epoch": 0.4900878638110928, "grad_norm": 0.38904353976249695, "learning_rate": 1.7203258013975004e-05, "loss": 0.5247, "step": 17849 }, { "epoch": 0.4901153212520593, "grad_norm": 0.3701168894767761, "learning_rate": 1.7202958430490614e-05, "loss": 0.5009, "step": 17850 }, { "epoch": 0.4901427786930258, "grad_norm": 0.36962389945983887, "learning_rate": 1.7202658833570404e-05, "loss": 0.4566, "step": 17851 }, { "epoch": 0.4901702361339923, "grad_norm": 0.3574495315551758, "learning_rate": 1.7202359223214938e-05, "loss": 0.4716, "step": 17852 }, { "epoch": 0.4901976935749588, "grad_norm": 0.38047119975090027, "learning_rate": 1.7202059599424773e-05, "loss": 0.4642, "step": 17853 }, { "epoch": 0.49022515101592534, "grad_norm": 0.4187179207801819, "learning_rate": 1.720175996220047e-05, "loss": 0.4832, "step": 17854 }, { "epoch": 0.49025260845689184, "grad_norm": 0.4672851860523224, "learning_rate": 1.7201460311542582e-05, "loss": 0.554, "step": 17855 }, { "epoch": 0.49028006589785833, "grad_norm": 0.3704807758331299, "learning_rate": 1.7201160647451674e-05, "loss": 0.4841, "step": 17856 }, { "epoch": 0.4903075233388248, "grad_norm": 0.39375290274620056, "learning_rate": 1.7200860969928304e-05, "loss": 0.5586, "step": 17857 }, { "epoch": 0.4903349807797913, "grad_norm": 0.37536078691482544, "learning_rate": 1.720056127897303e-05, "loss": 0.514, "step": 17858 }, { "epoch": 0.4903624382207578, "grad_norm": 0.4565924108028412, "learning_rate": 1.720026157458641e-05, "loss": 0.5844, "step": 17859 }, { "epoch": 0.4903898956617243, "grad_norm": 0.3822723925113678, "learning_rate": 1.7199961856769006e-05, "loss": 0.5155, "step": 17860 }, { "epoch": 0.49041735310269086, "grad_norm": 0.394534170627594, "learning_rate": 1.7199662125521375e-05, "loss": 0.5347, "step": 17861 }, { "epoch": 0.49044481054365735, "grad_norm": 0.352857768535614, "learning_rate": 1.7199362380844075e-05, "loss": 0.54, "step": 17862 }, { "epoch": 0.49047226798462384, "grad_norm": 0.36084800958633423, "learning_rate": 1.7199062622737666e-05, "loss": 0.5633, "step": 17863 }, { "epoch": 0.49049972542559034, "grad_norm": 0.4460386335849762, "learning_rate": 1.719876285120271e-05, "loss": 0.5463, "step": 17864 }, { "epoch": 0.49052718286655683, "grad_norm": 0.3726047873497009, "learning_rate": 1.7198463066239762e-05, "loss": 0.4529, "step": 17865 }, { "epoch": 0.4905546403075233, "grad_norm": 0.3607197403907776, "learning_rate": 1.7198163267849385e-05, "loss": 0.4782, "step": 17866 }, { "epoch": 0.4905820977484898, "grad_norm": 0.36153388023376465, "learning_rate": 1.7197863456032133e-05, "loss": 0.5919, "step": 17867 }, { "epoch": 0.49060955518945637, "grad_norm": 0.34177306294441223, "learning_rate": 1.7197563630788568e-05, "loss": 0.4944, "step": 17868 }, { "epoch": 0.49063701263042286, "grad_norm": 0.357936829328537, "learning_rate": 1.7197263792119253e-05, "loss": 0.5592, "step": 17869 }, { "epoch": 0.49066447007138936, "grad_norm": 0.3403005599975586, "learning_rate": 1.719696394002474e-05, "loss": 0.4252, "step": 17870 }, { "epoch": 0.49069192751235585, "grad_norm": 0.35397663712501526, "learning_rate": 1.7196664074505596e-05, "loss": 0.5081, "step": 17871 }, { "epoch": 0.49071938495332235, "grad_norm": 0.3690434396266937, "learning_rate": 1.7196364195562372e-05, "loss": 0.5202, "step": 17872 }, { "epoch": 0.49074684239428884, "grad_norm": 0.41324275732040405, "learning_rate": 1.7196064303195638e-05, "loss": 0.518, "step": 17873 }, { "epoch": 0.49077429983525533, "grad_norm": 0.3777472972869873, "learning_rate": 1.719576439740594e-05, "loss": 0.6197, "step": 17874 }, { "epoch": 0.4908017572762219, "grad_norm": 0.3294455111026764, "learning_rate": 1.719546447819385e-05, "loss": 0.4153, "step": 17875 }, { "epoch": 0.4908292147171884, "grad_norm": 0.33800798654556274, "learning_rate": 1.719516454555992e-05, "loss": 0.4814, "step": 17876 }, { "epoch": 0.49085667215815487, "grad_norm": 0.3439382314682007, "learning_rate": 1.7194864599504714e-05, "loss": 0.5159, "step": 17877 }, { "epoch": 0.49088412959912137, "grad_norm": 0.6582164168357849, "learning_rate": 1.719456464002879e-05, "loss": 0.5819, "step": 17878 }, { "epoch": 0.49091158704008786, "grad_norm": 0.3737907111644745, "learning_rate": 1.7194264667132704e-05, "loss": 0.5806, "step": 17879 }, { "epoch": 0.49093904448105435, "grad_norm": 0.41890832781791687, "learning_rate": 1.7193964680817022e-05, "loss": 0.4302, "step": 17880 }, { "epoch": 0.49096650192202085, "grad_norm": 0.48428452014923096, "learning_rate": 1.7193664681082296e-05, "loss": 0.5703, "step": 17881 }, { "epoch": 0.49099395936298734, "grad_norm": 0.4254513680934906, "learning_rate": 1.719336466792909e-05, "loss": 0.5109, "step": 17882 }, { "epoch": 0.4910214168039539, "grad_norm": 0.3602256178855896, "learning_rate": 1.719306464135796e-05, "loss": 0.5252, "step": 17883 }, { "epoch": 0.4910488742449204, "grad_norm": 0.3605472147464752, "learning_rate": 1.7192764601369473e-05, "loss": 0.5492, "step": 17884 }, { "epoch": 0.4910763316858869, "grad_norm": 0.37658411264419556, "learning_rate": 1.7192464547964184e-05, "loss": 0.51, "step": 17885 }, { "epoch": 0.4911037891268534, "grad_norm": 0.42590972781181335, "learning_rate": 1.7192164481142652e-05, "loss": 0.534, "step": 17886 }, { "epoch": 0.49113124656781987, "grad_norm": 0.42022639513015747, "learning_rate": 1.719186440090544e-05, "loss": 0.6338, "step": 17887 }, { "epoch": 0.49115870400878636, "grad_norm": 0.3934406340122223, "learning_rate": 1.7191564307253102e-05, "loss": 0.5449, "step": 17888 }, { "epoch": 0.49118616144975286, "grad_norm": 0.6173372268676758, "learning_rate": 1.7191264200186202e-05, "loss": 0.6196, "step": 17889 }, { "epoch": 0.4912136188907194, "grad_norm": 0.40943050384521484, "learning_rate": 1.7190964079705302e-05, "loss": 0.5122, "step": 17890 }, { "epoch": 0.4912410763316859, "grad_norm": 0.37756669521331787, "learning_rate": 1.7190663945810958e-05, "loss": 0.5149, "step": 17891 }, { "epoch": 0.4912685337726524, "grad_norm": 0.4090964198112488, "learning_rate": 1.7190363798503725e-05, "loss": 0.5179, "step": 17892 }, { "epoch": 0.4912959912136189, "grad_norm": 0.3988071084022522, "learning_rate": 1.7190063637784176e-05, "loss": 0.578, "step": 17893 }, { "epoch": 0.4913234486545854, "grad_norm": 0.391082763671875, "learning_rate": 1.718976346365286e-05, "loss": 0.5947, "step": 17894 }, { "epoch": 0.4913509060955519, "grad_norm": 0.3932049870491028, "learning_rate": 1.7189463276110343e-05, "loss": 0.562, "step": 17895 }, { "epoch": 0.49137836353651837, "grad_norm": 0.38366004824638367, "learning_rate": 1.718916307515718e-05, "loss": 0.5369, "step": 17896 }, { "epoch": 0.4914058209774849, "grad_norm": 0.36642587184906006, "learning_rate": 1.7188862860793933e-05, "loss": 0.5546, "step": 17897 }, { "epoch": 0.4914332784184514, "grad_norm": 0.498079776763916, "learning_rate": 1.7188562633021165e-05, "loss": 0.5736, "step": 17898 }, { "epoch": 0.4914607358594179, "grad_norm": 0.3934246003627777, "learning_rate": 1.718826239183943e-05, "loss": 0.584, "step": 17899 }, { "epoch": 0.4914881933003844, "grad_norm": 0.5527629852294922, "learning_rate": 1.7187962137249293e-05, "loss": 0.5108, "step": 17900 }, { "epoch": 0.4915156507413509, "grad_norm": 0.39081281423568726, "learning_rate": 1.7187661869251314e-05, "loss": 0.5853, "step": 17901 }, { "epoch": 0.4915431081823174, "grad_norm": 0.363167405128479, "learning_rate": 1.718736158784605e-05, "loss": 0.4909, "step": 17902 }, { "epoch": 0.4915705656232839, "grad_norm": 0.3828846216201782, "learning_rate": 1.7187061293034064e-05, "loss": 0.526, "step": 17903 }, { "epoch": 0.49159802306425043, "grad_norm": 0.3723444938659668, "learning_rate": 1.7186760984815914e-05, "loss": 0.5524, "step": 17904 }, { "epoch": 0.4916254805052169, "grad_norm": 0.386844664812088, "learning_rate": 1.718646066319216e-05, "loss": 0.5125, "step": 17905 }, { "epoch": 0.4916529379461834, "grad_norm": 0.37585577368736267, "learning_rate": 1.7186160328163365e-05, "loss": 0.5492, "step": 17906 }, { "epoch": 0.4916803953871499, "grad_norm": 0.4624541699886322, "learning_rate": 1.7185859979730083e-05, "loss": 0.5042, "step": 17907 }, { "epoch": 0.4917078528281164, "grad_norm": 0.3886857032775879, "learning_rate": 1.718555961789288e-05, "loss": 0.5182, "step": 17908 }, { "epoch": 0.4917353102690829, "grad_norm": 0.3442637324333191, "learning_rate": 1.718525924265232e-05, "loss": 0.4573, "step": 17909 }, { "epoch": 0.4917627677100494, "grad_norm": 0.39258092641830444, "learning_rate": 1.7184958854008956e-05, "loss": 0.5201, "step": 17910 }, { "epoch": 0.49179022515101595, "grad_norm": 0.3832319974899292, "learning_rate": 1.718465845196335e-05, "loss": 0.4977, "step": 17911 }, { "epoch": 0.49181768259198244, "grad_norm": 0.4401923418045044, "learning_rate": 1.718435803651606e-05, "loss": 0.5025, "step": 17912 }, { "epoch": 0.49184514003294894, "grad_norm": 0.3987842798233032, "learning_rate": 1.7184057607667653e-05, "loss": 0.5542, "step": 17913 }, { "epoch": 0.49187259747391543, "grad_norm": 0.5196675062179565, "learning_rate": 1.7183757165418686e-05, "loss": 0.4658, "step": 17914 }, { "epoch": 0.4919000549148819, "grad_norm": 0.3787429928779602, "learning_rate": 1.7183456709769715e-05, "loss": 0.5184, "step": 17915 }, { "epoch": 0.4919275123558484, "grad_norm": 0.3541223406791687, "learning_rate": 1.718315624072131e-05, "loss": 0.4816, "step": 17916 }, { "epoch": 0.4919549697968149, "grad_norm": 0.35931575298309326, "learning_rate": 1.7182855758274023e-05, "loss": 0.5089, "step": 17917 }, { "epoch": 0.49198242723778146, "grad_norm": 0.4177033603191376, "learning_rate": 1.7182555262428416e-05, "loss": 0.5114, "step": 17918 }, { "epoch": 0.49200988467874796, "grad_norm": 0.41591504216194153, "learning_rate": 1.7182254753185055e-05, "loss": 0.4676, "step": 17919 }, { "epoch": 0.49203734211971445, "grad_norm": 0.32278892397880554, "learning_rate": 1.7181954230544496e-05, "loss": 0.3803, "step": 17920 }, { "epoch": 0.49206479956068094, "grad_norm": 0.44925805926322937, "learning_rate": 1.7181653694507297e-05, "loss": 0.5494, "step": 17921 }, { "epoch": 0.49209225700164744, "grad_norm": 0.3613086938858032, "learning_rate": 1.7181353145074022e-05, "loss": 0.4524, "step": 17922 }, { "epoch": 0.49211971444261393, "grad_norm": 0.37967050075531006, "learning_rate": 1.7181052582245233e-05, "loss": 0.5439, "step": 17923 }, { "epoch": 0.4921471718835804, "grad_norm": 0.38003477454185486, "learning_rate": 1.718075200602149e-05, "loss": 0.5241, "step": 17924 }, { "epoch": 0.492174629324547, "grad_norm": 0.37857815623283386, "learning_rate": 1.718045141640335e-05, "loss": 0.4687, "step": 17925 }, { "epoch": 0.49220208676551347, "grad_norm": 0.38082700967788696, "learning_rate": 1.718015081339138e-05, "loss": 0.5949, "step": 17926 }, { "epoch": 0.49222954420647996, "grad_norm": 0.4298068583011627, "learning_rate": 1.7179850196986132e-05, "loss": 0.5038, "step": 17927 }, { "epoch": 0.49225700164744646, "grad_norm": 0.36844637989997864, "learning_rate": 1.7179549567188177e-05, "loss": 0.4615, "step": 17928 }, { "epoch": 0.49228445908841295, "grad_norm": 0.3754223883152008, "learning_rate": 1.7179248923998065e-05, "loss": 0.4936, "step": 17929 }, { "epoch": 0.49231191652937945, "grad_norm": 0.36460793018341064, "learning_rate": 1.717894826741637e-05, "loss": 0.532, "step": 17930 }, { "epoch": 0.49233937397034594, "grad_norm": 0.36904463171958923, "learning_rate": 1.717864759744364e-05, "loss": 0.5395, "step": 17931 }, { "epoch": 0.4923668314113125, "grad_norm": 0.4602816700935364, "learning_rate": 1.717834691408044e-05, "loss": 0.5378, "step": 17932 }, { "epoch": 0.492394288852279, "grad_norm": 0.3438127040863037, "learning_rate": 1.7178046217327336e-05, "loss": 0.4993, "step": 17933 }, { "epoch": 0.4924217462932455, "grad_norm": 0.41845324635505676, "learning_rate": 1.7177745507184883e-05, "loss": 0.4536, "step": 17934 }, { "epoch": 0.49244920373421197, "grad_norm": 0.3562749922275543, "learning_rate": 1.7177444783653642e-05, "loss": 0.4707, "step": 17935 }, { "epoch": 0.49247666117517847, "grad_norm": 0.38490015268325806, "learning_rate": 1.7177144046734178e-05, "loss": 0.5615, "step": 17936 }, { "epoch": 0.49250411861614496, "grad_norm": 0.3355584740638733, "learning_rate": 1.717684329642705e-05, "loss": 0.5449, "step": 17937 }, { "epoch": 0.49253157605711145, "grad_norm": 0.3565799295902252, "learning_rate": 1.7176542532732816e-05, "loss": 0.5075, "step": 17938 }, { "epoch": 0.492559033498078, "grad_norm": 0.4044046401977539, "learning_rate": 1.717624175565204e-05, "loss": 0.5568, "step": 17939 }, { "epoch": 0.4925864909390445, "grad_norm": 0.41660937666893005, "learning_rate": 1.7175940965185284e-05, "loss": 0.586, "step": 17940 }, { "epoch": 0.492613948380011, "grad_norm": 0.3532661199569702, "learning_rate": 1.717564016133311e-05, "loss": 0.4902, "step": 17941 }, { "epoch": 0.4926414058209775, "grad_norm": 0.4710400104522705, "learning_rate": 1.7175339344096074e-05, "loss": 0.4162, "step": 17942 }, { "epoch": 0.492668863261944, "grad_norm": 0.40276315808296204, "learning_rate": 1.7175038513474742e-05, "loss": 0.5906, "step": 17943 }, { "epoch": 0.4926963207029105, "grad_norm": 0.43990251421928406, "learning_rate": 1.717473766946967e-05, "loss": 0.5918, "step": 17944 }, { "epoch": 0.49272377814387697, "grad_norm": 0.3555658757686615, "learning_rate": 1.717443681208142e-05, "loss": 0.5193, "step": 17945 }, { "epoch": 0.4927512355848435, "grad_norm": 0.7733058333396912, "learning_rate": 1.7174135941310565e-05, "loss": 0.562, "step": 17946 }, { "epoch": 0.49277869302581, "grad_norm": 0.40725457668304443, "learning_rate": 1.717383505715765e-05, "loss": 0.5227, "step": 17947 }, { "epoch": 0.4928061504667765, "grad_norm": 0.33708226680755615, "learning_rate": 1.7173534159623248e-05, "loss": 0.5235, "step": 17948 }, { "epoch": 0.492833607907743, "grad_norm": 0.3559558689594269, "learning_rate": 1.7173233248707908e-05, "loss": 0.4507, "step": 17949 }, { "epoch": 0.4928610653487095, "grad_norm": 0.3661240041255951, "learning_rate": 1.7172932324412203e-05, "loss": 0.5816, "step": 17950 }, { "epoch": 0.492888522789676, "grad_norm": 0.34928640723228455, "learning_rate": 1.717263138673669e-05, "loss": 0.6052, "step": 17951 }, { "epoch": 0.4929159802306425, "grad_norm": 0.4056544303894043, "learning_rate": 1.717233043568193e-05, "loss": 0.521, "step": 17952 }, { "epoch": 0.49294343767160903, "grad_norm": 0.35231298208236694, "learning_rate": 1.7172029471248485e-05, "loss": 0.5052, "step": 17953 }, { "epoch": 0.4929708951125755, "grad_norm": 0.36980488896369934, "learning_rate": 1.7171728493436912e-05, "loss": 0.4648, "step": 17954 }, { "epoch": 0.492998352553542, "grad_norm": 0.35177719593048096, "learning_rate": 1.717142750224778e-05, "loss": 0.4914, "step": 17955 }, { "epoch": 0.4930258099945085, "grad_norm": 0.391419380903244, "learning_rate": 1.717112649768165e-05, "loss": 0.5274, "step": 17956 }, { "epoch": 0.493053267435475, "grad_norm": 0.4985537528991699, "learning_rate": 1.717082547973907e-05, "loss": 0.508, "step": 17957 }, { "epoch": 0.4930807248764415, "grad_norm": 0.40397703647613525, "learning_rate": 1.717052444842062e-05, "loss": 0.471, "step": 17958 }, { "epoch": 0.493108182317408, "grad_norm": 0.3993542790412903, "learning_rate": 1.7170223403726853e-05, "loss": 0.4813, "step": 17959 }, { "epoch": 0.49313563975837454, "grad_norm": 0.3657904863357544, "learning_rate": 1.716992234565833e-05, "loss": 0.5423, "step": 17960 }, { "epoch": 0.49316309719934104, "grad_norm": 0.3577600419521332, "learning_rate": 1.7169621274215614e-05, "loss": 0.4443, "step": 17961 }, { "epoch": 0.49319055464030753, "grad_norm": 0.37999626994132996, "learning_rate": 1.7169320189399267e-05, "loss": 0.5793, "step": 17962 }, { "epoch": 0.493218012081274, "grad_norm": 0.3497711420059204, "learning_rate": 1.7169019091209846e-05, "loss": 0.4359, "step": 17963 }, { "epoch": 0.4932454695222405, "grad_norm": 0.3446301221847534, "learning_rate": 1.716871797964792e-05, "loss": 0.4769, "step": 17964 }, { "epoch": 0.493272926963207, "grad_norm": 0.7012301683425903, "learning_rate": 1.7168416854714042e-05, "loss": 0.4696, "step": 17965 }, { "epoch": 0.4933003844041735, "grad_norm": 0.4118887782096863, "learning_rate": 1.716811571640878e-05, "loss": 0.5541, "step": 17966 }, { "epoch": 0.49332784184514006, "grad_norm": 0.40469327569007874, "learning_rate": 1.71678145647327e-05, "loss": 0.6024, "step": 17967 }, { "epoch": 0.49335529928610655, "grad_norm": 0.40736010670661926, "learning_rate": 1.7167513399686354e-05, "loss": 0.5511, "step": 17968 }, { "epoch": 0.49338275672707305, "grad_norm": 0.3747415542602539, "learning_rate": 1.716721222127031e-05, "loss": 0.6216, "step": 17969 }, { "epoch": 0.49341021416803954, "grad_norm": 0.3698968291282654, "learning_rate": 1.7166911029485122e-05, "loss": 0.5185, "step": 17970 }, { "epoch": 0.49343767160900603, "grad_norm": 0.38291239738464355, "learning_rate": 1.716660982433136e-05, "loss": 0.5424, "step": 17971 }, { "epoch": 0.49346512904997253, "grad_norm": 0.3592331111431122, "learning_rate": 1.7166308605809586e-05, "loss": 0.5174, "step": 17972 }, { "epoch": 0.493492586490939, "grad_norm": 0.5112324953079224, "learning_rate": 1.716600737392036e-05, "loss": 0.4069, "step": 17973 }, { "epoch": 0.49352004393190557, "grad_norm": 0.5105748772621155, "learning_rate": 1.7165706128664237e-05, "loss": 0.5715, "step": 17974 }, { "epoch": 0.49354750137287207, "grad_norm": 0.5488637089729309, "learning_rate": 1.7165404870041792e-05, "loss": 0.5079, "step": 17975 }, { "epoch": 0.49357495881383856, "grad_norm": 0.3652249872684479, "learning_rate": 1.7165103598053576e-05, "loss": 0.5069, "step": 17976 }, { "epoch": 0.49360241625480505, "grad_norm": 0.3680688440799713, "learning_rate": 1.7164802312700157e-05, "loss": 0.4701, "step": 17977 }, { "epoch": 0.49362987369577155, "grad_norm": 0.35927268862724304, "learning_rate": 1.7164501013982096e-05, "loss": 0.4324, "step": 17978 }, { "epoch": 0.49365733113673804, "grad_norm": 0.3858721852302551, "learning_rate": 1.7164199701899952e-05, "loss": 0.5049, "step": 17979 }, { "epoch": 0.49368478857770454, "grad_norm": 0.32894790172576904, "learning_rate": 1.7163898376454288e-05, "loss": 0.4862, "step": 17980 }, { "epoch": 0.4937122460186711, "grad_norm": 0.4254773259162903, "learning_rate": 1.7163597037645666e-05, "loss": 0.4804, "step": 17981 }, { "epoch": 0.4937397034596376, "grad_norm": 0.4106791615486145, "learning_rate": 1.7163295685474652e-05, "loss": 0.4613, "step": 17982 }, { "epoch": 0.4937671609006041, "grad_norm": 0.5800666809082031, "learning_rate": 1.7162994319941805e-05, "loss": 0.5556, "step": 17983 }, { "epoch": 0.49379461834157057, "grad_norm": 0.4093939960002899, "learning_rate": 1.7162692941047688e-05, "loss": 0.5835, "step": 17984 }, { "epoch": 0.49382207578253706, "grad_norm": 0.4323198199272156, "learning_rate": 1.7162391548792862e-05, "loss": 0.5259, "step": 17985 }, { "epoch": 0.49384953322350356, "grad_norm": 0.34572163224220276, "learning_rate": 1.716209014317789e-05, "loss": 0.4967, "step": 17986 }, { "epoch": 0.49387699066447005, "grad_norm": 0.39716726541519165, "learning_rate": 1.7161788724203334e-05, "loss": 0.5343, "step": 17987 }, { "epoch": 0.4939044481054366, "grad_norm": 0.3746080696582794, "learning_rate": 1.7161487291869755e-05, "loss": 0.501, "step": 17988 }, { "epoch": 0.4939319055464031, "grad_norm": 0.3894653022289276, "learning_rate": 1.7161185846177715e-05, "loss": 0.4788, "step": 17989 }, { "epoch": 0.4939593629873696, "grad_norm": 0.33428505063056946, "learning_rate": 1.7160884387127782e-05, "loss": 0.4437, "step": 17990 }, { "epoch": 0.4939868204283361, "grad_norm": 0.35403043031692505, "learning_rate": 1.716058291472051e-05, "loss": 0.4946, "step": 17991 }, { "epoch": 0.4940142778693026, "grad_norm": 0.4354105591773987, "learning_rate": 1.716028142895647e-05, "loss": 0.4935, "step": 17992 }, { "epoch": 0.49404173531026907, "grad_norm": 0.3614565134048462, "learning_rate": 1.7159979929836216e-05, "loss": 0.5861, "step": 17993 }, { "epoch": 0.49406919275123556, "grad_norm": 0.45869335532188416, "learning_rate": 1.7159678417360317e-05, "loss": 0.4983, "step": 17994 }, { "epoch": 0.4940966501922021, "grad_norm": 0.3388749957084656, "learning_rate": 1.7159376891529328e-05, "loss": 0.4993, "step": 17995 }, { "epoch": 0.4941241076331686, "grad_norm": 0.32819220423698425, "learning_rate": 1.7159075352343824e-05, "loss": 0.4836, "step": 17996 }, { "epoch": 0.4941515650741351, "grad_norm": 0.34097370505332947, "learning_rate": 1.715877379980435e-05, "loss": 0.4151, "step": 17997 }, { "epoch": 0.4941790225151016, "grad_norm": 0.39025625586509705, "learning_rate": 1.7158472233911484e-05, "loss": 0.5564, "step": 17998 }, { "epoch": 0.4942064799560681, "grad_norm": 0.42084693908691406, "learning_rate": 1.7158170654665784e-05, "loss": 0.4823, "step": 17999 }, { "epoch": 0.4942339373970346, "grad_norm": 0.3919021189212799, "learning_rate": 1.715786906206781e-05, "loss": 0.4491, "step": 18000 }, { "epoch": 0.4942613948380011, "grad_norm": 0.4029986560344696, "learning_rate": 1.7157567456118124e-05, "loss": 0.6284, "step": 18001 }, { "epoch": 0.4942888522789676, "grad_norm": 0.48697495460510254, "learning_rate": 1.715726583681729e-05, "loss": 0.5612, "step": 18002 }, { "epoch": 0.4943163097199341, "grad_norm": 0.44692298769950867, "learning_rate": 1.715696420416587e-05, "loss": 0.5549, "step": 18003 }, { "epoch": 0.4943437671609006, "grad_norm": 0.42928484082221985, "learning_rate": 1.715666255816443e-05, "loss": 0.4872, "step": 18004 }, { "epoch": 0.4943712246018671, "grad_norm": 0.3850124478340149, "learning_rate": 1.715636089881353e-05, "loss": 0.4985, "step": 18005 }, { "epoch": 0.4943986820428336, "grad_norm": 0.33896538615226746, "learning_rate": 1.7156059226113732e-05, "loss": 0.5162, "step": 18006 }, { "epoch": 0.4944261394838001, "grad_norm": 0.3595883250236511, "learning_rate": 1.71557575400656e-05, "loss": 0.4516, "step": 18007 }, { "epoch": 0.4944535969247666, "grad_norm": 0.37072330713272095, "learning_rate": 1.7155455840669696e-05, "loss": 0.4496, "step": 18008 }, { "epoch": 0.49448105436573314, "grad_norm": 0.8181400299072266, "learning_rate": 1.7155154127926582e-05, "loss": 0.4257, "step": 18009 }, { "epoch": 0.49450851180669964, "grad_norm": 0.41068658232688904, "learning_rate": 1.715485240183682e-05, "loss": 0.4534, "step": 18010 }, { "epoch": 0.49453596924766613, "grad_norm": 0.5140672922134399, "learning_rate": 1.7154550662400978e-05, "loss": 0.5455, "step": 18011 }, { "epoch": 0.4945634266886326, "grad_norm": 0.3912648558616638, "learning_rate": 1.7154248909619616e-05, "loss": 0.568, "step": 18012 }, { "epoch": 0.4945908841295991, "grad_norm": 0.3619721233844757, "learning_rate": 1.7153947143493297e-05, "loss": 0.5626, "step": 18013 }, { "epoch": 0.4946183415705656, "grad_norm": 0.3864416182041168, "learning_rate": 1.715364536402258e-05, "loss": 0.4842, "step": 18014 }, { "epoch": 0.4946457990115321, "grad_norm": 0.3768928647041321, "learning_rate": 1.7153343571208037e-05, "loss": 0.5151, "step": 18015 }, { "epoch": 0.4946732564524986, "grad_norm": 0.3891160488128662, "learning_rate": 1.715304176505022e-05, "loss": 0.5483, "step": 18016 }, { "epoch": 0.49470071389346515, "grad_norm": 0.3617023527622223, "learning_rate": 1.71527399455497e-05, "loss": 0.4773, "step": 18017 }, { "epoch": 0.49472817133443164, "grad_norm": 0.3538110852241516, "learning_rate": 1.7152438112707033e-05, "loss": 0.4997, "step": 18018 }, { "epoch": 0.49475562877539814, "grad_norm": 0.34884747862815857, "learning_rate": 1.715213626652279e-05, "loss": 0.4793, "step": 18019 }, { "epoch": 0.49478308621636463, "grad_norm": 0.3991345763206482, "learning_rate": 1.715183440699753e-05, "loss": 0.4694, "step": 18020 }, { "epoch": 0.4948105436573311, "grad_norm": 0.3554777204990387, "learning_rate": 1.715153253413181e-05, "loss": 0.5, "step": 18021 }, { "epoch": 0.4948380010982976, "grad_norm": 0.37285226583480835, "learning_rate": 1.715123064792621e-05, "loss": 0.5002, "step": 18022 }, { "epoch": 0.4948654585392641, "grad_norm": 0.3851296007633209, "learning_rate": 1.7150928748381275e-05, "loss": 0.5208, "step": 18023 }, { "epoch": 0.49489291598023066, "grad_norm": 0.3638288676738739, "learning_rate": 1.7150626835497577e-05, "loss": 0.4052, "step": 18024 }, { "epoch": 0.49492037342119716, "grad_norm": 0.33369502425193787, "learning_rate": 1.715032490927568e-05, "loss": 0.4928, "step": 18025 }, { "epoch": 0.49494783086216365, "grad_norm": 0.3929636478424072, "learning_rate": 1.7150022969716143e-05, "loss": 0.4684, "step": 18026 }, { "epoch": 0.49497528830313015, "grad_norm": 0.36121219396591187, "learning_rate": 1.714972101681953e-05, "loss": 0.5708, "step": 18027 }, { "epoch": 0.49500274574409664, "grad_norm": 0.36061331629753113, "learning_rate": 1.7149419050586408e-05, "loss": 0.4898, "step": 18028 }, { "epoch": 0.49503020318506313, "grad_norm": 0.3356732428073883, "learning_rate": 1.7149117071017338e-05, "loss": 0.3656, "step": 18029 }, { "epoch": 0.4950576606260296, "grad_norm": 0.3659035861492157, "learning_rate": 1.714881507811288e-05, "loss": 0.5126, "step": 18030 }, { "epoch": 0.4950851180669962, "grad_norm": 0.39169132709503174, "learning_rate": 1.71485130718736e-05, "loss": 0.5724, "step": 18031 }, { "epoch": 0.49511257550796267, "grad_norm": 0.3460921347141266, "learning_rate": 1.7148211052300063e-05, "loss": 0.4246, "step": 18032 }, { "epoch": 0.49514003294892917, "grad_norm": 0.35078978538513184, "learning_rate": 1.714790901939283e-05, "loss": 0.5396, "step": 18033 }, { "epoch": 0.49516749038989566, "grad_norm": 0.34466609358787537, "learning_rate": 1.7147606973152465e-05, "loss": 0.4826, "step": 18034 }, { "epoch": 0.49519494783086215, "grad_norm": 0.3673054575920105, "learning_rate": 1.7147304913579534e-05, "loss": 0.5652, "step": 18035 }, { "epoch": 0.49522240527182865, "grad_norm": 0.3689931035041809, "learning_rate": 1.71470028406746e-05, "loss": 0.5129, "step": 18036 }, { "epoch": 0.49524986271279514, "grad_norm": 0.35773035883903503, "learning_rate": 1.714670075443822e-05, "loss": 0.4482, "step": 18037 }, { "epoch": 0.4952773201537617, "grad_norm": 0.35809704661369324, "learning_rate": 1.7146398654870964e-05, "loss": 0.4814, "step": 18038 }, { "epoch": 0.4953047775947282, "grad_norm": 0.38772737979888916, "learning_rate": 1.7146096541973395e-05, "loss": 0.5132, "step": 18039 }, { "epoch": 0.4953322350356947, "grad_norm": 0.3958050310611725, "learning_rate": 1.7145794415746073e-05, "loss": 0.5531, "step": 18040 }, { "epoch": 0.4953596924766612, "grad_norm": 0.3745381236076355, "learning_rate": 1.7145492276189565e-05, "loss": 0.4381, "step": 18041 }, { "epoch": 0.49538714991762767, "grad_norm": 0.3577905297279358, "learning_rate": 1.7145190123304432e-05, "loss": 0.4597, "step": 18042 }, { "epoch": 0.49541460735859416, "grad_norm": 0.42759400606155396, "learning_rate": 1.714488795709124e-05, "loss": 0.579, "step": 18043 }, { "epoch": 0.49544206479956066, "grad_norm": 0.3459891378879547, "learning_rate": 1.714458577755055e-05, "loss": 0.5199, "step": 18044 }, { "epoch": 0.4954695222405272, "grad_norm": 0.36716020107269287, "learning_rate": 1.7144283584682928e-05, "loss": 0.4999, "step": 18045 }, { "epoch": 0.4954969796814937, "grad_norm": 0.4022444188594818, "learning_rate": 1.714398137848894e-05, "loss": 0.4998, "step": 18046 }, { "epoch": 0.4955244371224602, "grad_norm": 0.34617486596107483, "learning_rate": 1.7143679158969144e-05, "loss": 0.4736, "step": 18047 }, { "epoch": 0.4955518945634267, "grad_norm": 0.3869198262691498, "learning_rate": 1.7143376926124107e-05, "loss": 0.499, "step": 18048 }, { "epoch": 0.4955793520043932, "grad_norm": 0.37870630621910095, "learning_rate": 1.714307467995439e-05, "loss": 0.4988, "step": 18049 }, { "epoch": 0.4956068094453597, "grad_norm": 0.3864934742450714, "learning_rate": 1.714277242046056e-05, "loss": 0.4675, "step": 18050 }, { "epoch": 0.49563426688632617, "grad_norm": 0.35694417357444763, "learning_rate": 1.714247014764318e-05, "loss": 0.4959, "step": 18051 }, { "epoch": 0.4956617243272927, "grad_norm": 0.3551012873649597, "learning_rate": 1.7142167861502814e-05, "loss": 0.5176, "step": 18052 }, { "epoch": 0.4956891817682592, "grad_norm": 0.3825741112232208, "learning_rate": 1.714186556204003e-05, "loss": 0.4916, "step": 18053 }, { "epoch": 0.4957166392092257, "grad_norm": 0.37723904848098755, "learning_rate": 1.7141563249255377e-05, "loss": 0.4727, "step": 18054 }, { "epoch": 0.4957440966501922, "grad_norm": 0.3839629590511322, "learning_rate": 1.7141260923149437e-05, "loss": 0.4692, "step": 18055 }, { "epoch": 0.4957715540911587, "grad_norm": 0.4407203495502472, "learning_rate": 1.7140958583722763e-05, "loss": 0.4978, "step": 18056 }, { "epoch": 0.4957990115321252, "grad_norm": 0.6031894683837891, "learning_rate": 1.7140656230975927e-05, "loss": 0.4283, "step": 18057 }, { "epoch": 0.4958264689730917, "grad_norm": 0.334059476852417, "learning_rate": 1.714035386490948e-05, "loss": 0.4806, "step": 18058 }, { "epoch": 0.49585392641405823, "grad_norm": 0.38559871912002563, "learning_rate": 1.7140051485524e-05, "loss": 0.5556, "step": 18059 }, { "epoch": 0.4958813838550247, "grad_norm": 0.44070833921432495, "learning_rate": 1.713974909282004e-05, "loss": 0.5922, "step": 18060 }, { "epoch": 0.4959088412959912, "grad_norm": 0.39782440662384033, "learning_rate": 1.7139446686798176e-05, "loss": 0.4515, "step": 18061 }, { "epoch": 0.4959362987369577, "grad_norm": 0.39064180850982666, "learning_rate": 1.713914426745896e-05, "loss": 0.5619, "step": 18062 }, { "epoch": 0.4959637561779242, "grad_norm": 0.4381251931190491, "learning_rate": 1.7138841834802965e-05, "loss": 0.4434, "step": 18063 }, { "epoch": 0.4959912136188907, "grad_norm": 0.321460485458374, "learning_rate": 1.7138539388830748e-05, "loss": 0.4483, "step": 18064 }, { "epoch": 0.4960186710598572, "grad_norm": 0.36805370450019836, "learning_rate": 1.713823692954288e-05, "loss": 0.5595, "step": 18065 }, { "epoch": 0.49604612850082375, "grad_norm": 0.358269602060318, "learning_rate": 1.713793445693992e-05, "loss": 0.4391, "step": 18066 }, { "epoch": 0.49607358594179024, "grad_norm": 0.35984277725219727, "learning_rate": 1.7137631971022435e-05, "loss": 0.4822, "step": 18067 }, { "epoch": 0.49610104338275673, "grad_norm": 0.350449800491333, "learning_rate": 1.7137329471790987e-05, "loss": 0.4933, "step": 18068 }, { "epoch": 0.49612850082372323, "grad_norm": 0.40611281991004944, "learning_rate": 1.713702695924614e-05, "loss": 0.5037, "step": 18069 }, { "epoch": 0.4961559582646897, "grad_norm": 0.3518425226211548, "learning_rate": 1.7136724433388467e-05, "loss": 0.4511, "step": 18070 }, { "epoch": 0.4961834157056562, "grad_norm": 0.3978702127933502, "learning_rate": 1.713642189421852e-05, "loss": 0.4753, "step": 18071 }, { "epoch": 0.4962108731466227, "grad_norm": 0.3420800566673279, "learning_rate": 1.7136119341736868e-05, "loss": 0.414, "step": 18072 }, { "epoch": 0.49623833058758926, "grad_norm": 0.31877195835113525, "learning_rate": 1.7135816775944074e-05, "loss": 0.4297, "step": 18073 }, { "epoch": 0.49626578802855575, "grad_norm": 1.425424575805664, "learning_rate": 1.7135514196840706e-05, "loss": 0.4955, "step": 18074 }, { "epoch": 0.49629324546952225, "grad_norm": 0.3340473771095276, "learning_rate": 1.713521160442733e-05, "loss": 0.5199, "step": 18075 }, { "epoch": 0.49632070291048874, "grad_norm": 0.3660655915737152, "learning_rate": 1.7134908998704506e-05, "loss": 0.5354, "step": 18076 }, { "epoch": 0.49634816035145524, "grad_norm": 0.35943353176116943, "learning_rate": 1.7134606379672795e-05, "loss": 0.4962, "step": 18077 }, { "epoch": 0.49637561779242173, "grad_norm": 0.3563924729824066, "learning_rate": 1.713430374733277e-05, "loss": 0.5419, "step": 18078 }, { "epoch": 0.4964030752333882, "grad_norm": 0.3076569437980652, "learning_rate": 1.713400110168499e-05, "loss": 0.4523, "step": 18079 }, { "epoch": 0.4964305326743548, "grad_norm": 0.4074445962905884, "learning_rate": 1.7133698442730023e-05, "loss": 0.5238, "step": 18080 }, { "epoch": 0.49645799011532127, "grad_norm": 0.3895065486431122, "learning_rate": 1.713339577046843e-05, "loss": 0.5606, "step": 18081 }, { "epoch": 0.49648544755628776, "grad_norm": 0.3602030575275421, "learning_rate": 1.7133093084900777e-05, "loss": 0.5084, "step": 18082 }, { "epoch": 0.49651290499725426, "grad_norm": 0.40063247084617615, "learning_rate": 1.7132790386027628e-05, "loss": 0.4587, "step": 18083 }, { "epoch": 0.49654036243822075, "grad_norm": 0.4001257121562958, "learning_rate": 1.713248767384955e-05, "loss": 0.4913, "step": 18084 }, { "epoch": 0.49656781987918724, "grad_norm": 1.5800052881240845, "learning_rate": 1.713218494836711e-05, "loss": 0.5936, "step": 18085 }, { "epoch": 0.49659527732015374, "grad_norm": 0.35585376620292664, "learning_rate": 1.7131882209580863e-05, "loss": 0.5106, "step": 18086 }, { "epoch": 0.4966227347611203, "grad_norm": 0.3544674813747406, "learning_rate": 1.7131579457491378e-05, "loss": 0.559, "step": 18087 }, { "epoch": 0.4966501922020868, "grad_norm": 0.3898366689682007, "learning_rate": 1.7131276692099224e-05, "loss": 0.4706, "step": 18088 }, { "epoch": 0.4966776496430533, "grad_norm": 0.43339136242866516, "learning_rate": 1.7130973913404964e-05, "loss": 0.5251, "step": 18089 }, { "epoch": 0.49670510708401977, "grad_norm": 0.37210848927497864, "learning_rate": 1.713067112140916e-05, "loss": 0.5752, "step": 18090 }, { "epoch": 0.49673256452498626, "grad_norm": 0.3562559187412262, "learning_rate": 1.713036831611238e-05, "loss": 0.541, "step": 18091 }, { "epoch": 0.49676002196595276, "grad_norm": 0.3666997253894806, "learning_rate": 1.7130065497515188e-05, "loss": 0.5319, "step": 18092 }, { "epoch": 0.49678747940691925, "grad_norm": 0.3696627616882324, "learning_rate": 1.7129762665618144e-05, "loss": 0.5675, "step": 18093 }, { "epoch": 0.4968149368478858, "grad_norm": 0.36546790599823, "learning_rate": 1.7129459820421822e-05, "loss": 0.4403, "step": 18094 }, { "epoch": 0.4968423942888523, "grad_norm": 0.4002673029899597, "learning_rate": 1.7129156961926778e-05, "loss": 0.4591, "step": 18095 }, { "epoch": 0.4968698517298188, "grad_norm": 0.38140302896499634, "learning_rate": 1.7128854090133584e-05, "loss": 0.6107, "step": 18096 }, { "epoch": 0.4968973091707853, "grad_norm": 0.43380630016326904, "learning_rate": 1.7128551205042797e-05, "loss": 0.6419, "step": 18097 }, { "epoch": 0.4969247666117518, "grad_norm": 0.3346729576587677, "learning_rate": 1.7128248306654995e-05, "loss": 0.5199, "step": 18098 }, { "epoch": 0.49695222405271827, "grad_norm": 0.47727271914482117, "learning_rate": 1.7127945394970727e-05, "loss": 0.564, "step": 18099 }, { "epoch": 0.49697968149368477, "grad_norm": 0.39138948917388916, "learning_rate": 1.712764246999057e-05, "loss": 0.6505, "step": 18100 }, { "epoch": 0.4970071389346513, "grad_norm": 0.36027175188064575, "learning_rate": 1.7127339531715085e-05, "loss": 0.5194, "step": 18101 }, { "epoch": 0.4970345963756178, "grad_norm": 0.3931269645690918, "learning_rate": 1.7127036580144835e-05, "loss": 0.5443, "step": 18102 }, { "epoch": 0.4970620538165843, "grad_norm": 0.4084882438182831, "learning_rate": 1.712673361528039e-05, "loss": 0.405, "step": 18103 }, { "epoch": 0.4970895112575508, "grad_norm": 0.45017755031585693, "learning_rate": 1.7126430637122307e-05, "loss": 0.467, "step": 18104 }, { "epoch": 0.4971169686985173, "grad_norm": 0.4806169867515564, "learning_rate": 1.712612764567116e-05, "loss": 0.5294, "step": 18105 }, { "epoch": 0.4971444261394838, "grad_norm": 0.9122720956802368, "learning_rate": 1.712582464092751e-05, "loss": 0.5502, "step": 18106 }, { "epoch": 0.4971718835804503, "grad_norm": 0.35052505135536194, "learning_rate": 1.7125521622891924e-05, "loss": 0.4327, "step": 18107 }, { "epoch": 0.49719934102141683, "grad_norm": 0.3698379695415497, "learning_rate": 1.7125218591564964e-05, "loss": 0.5529, "step": 18108 }, { "epoch": 0.4972267984623833, "grad_norm": 0.4450494349002838, "learning_rate": 1.71249155469472e-05, "loss": 0.4653, "step": 18109 }, { "epoch": 0.4972542559033498, "grad_norm": 0.3529316782951355, "learning_rate": 1.7124612489039193e-05, "loss": 0.4957, "step": 18110 }, { "epoch": 0.4972817133443163, "grad_norm": 0.3796810805797577, "learning_rate": 1.7124309417841508e-05, "loss": 0.5098, "step": 18111 }, { "epoch": 0.4973091707852828, "grad_norm": 0.3867679238319397, "learning_rate": 1.7124006333354716e-05, "loss": 0.4491, "step": 18112 }, { "epoch": 0.4973366282262493, "grad_norm": 0.3879712224006653, "learning_rate": 1.7123703235579373e-05, "loss": 0.4744, "step": 18113 }, { "epoch": 0.4973640856672158, "grad_norm": 0.4261866807937622, "learning_rate": 1.7123400124516055e-05, "loss": 0.565, "step": 18114 }, { "epoch": 0.49739154310818234, "grad_norm": 0.3776656687259674, "learning_rate": 1.7123097000165318e-05, "loss": 0.4791, "step": 18115 }, { "epoch": 0.49741900054914884, "grad_norm": 0.392581969499588, "learning_rate": 1.7122793862527737e-05, "loss": 0.408, "step": 18116 }, { "epoch": 0.49744645799011533, "grad_norm": 0.35223355889320374, "learning_rate": 1.7122490711603865e-05, "loss": 0.5375, "step": 18117 }, { "epoch": 0.4974739154310818, "grad_norm": 0.42664703726768494, "learning_rate": 1.712218754739428e-05, "loss": 0.5554, "step": 18118 }, { "epoch": 0.4975013728720483, "grad_norm": 0.3635009527206421, "learning_rate": 1.712188436989954e-05, "loss": 0.437, "step": 18119 }, { "epoch": 0.4975288303130148, "grad_norm": 0.36371028423309326, "learning_rate": 1.712158117912021e-05, "loss": 0.467, "step": 18120 }, { "epoch": 0.4975562877539813, "grad_norm": 0.41507041454315186, "learning_rate": 1.7121277975056863e-05, "loss": 0.3786, "step": 18121 }, { "epoch": 0.49758374519494786, "grad_norm": 0.3716070055961609, "learning_rate": 1.7120974757710057e-05, "loss": 0.5227, "step": 18122 }, { "epoch": 0.49761120263591435, "grad_norm": 0.442789763212204, "learning_rate": 1.7120671527080364e-05, "loss": 0.4972, "step": 18123 }, { "epoch": 0.49763866007688085, "grad_norm": 0.3918672204017639, "learning_rate": 1.7120368283168344e-05, "loss": 0.5105, "step": 18124 }, { "epoch": 0.49766611751784734, "grad_norm": 0.3890630006790161, "learning_rate": 1.7120065025974563e-05, "loss": 0.4685, "step": 18125 }, { "epoch": 0.49769357495881383, "grad_norm": 0.3442296087741852, "learning_rate": 1.711976175549959e-05, "loss": 0.402, "step": 18126 }, { "epoch": 0.4977210323997803, "grad_norm": 0.4134598672389984, "learning_rate": 1.7119458471743988e-05, "loss": 0.4537, "step": 18127 }, { "epoch": 0.4977484898407468, "grad_norm": 0.39220383763313293, "learning_rate": 1.7119155174708325e-05, "loss": 0.4202, "step": 18128 }, { "epoch": 0.49777594728171337, "grad_norm": 0.4110613763332367, "learning_rate": 1.711885186439316e-05, "loss": 0.5621, "step": 18129 }, { "epoch": 0.49780340472267987, "grad_norm": 0.383037805557251, "learning_rate": 1.711854854079907e-05, "loss": 0.5798, "step": 18130 }, { "epoch": 0.49783086216364636, "grad_norm": 0.3736554980278015, "learning_rate": 1.7118245203926616e-05, "loss": 0.5163, "step": 18131 }, { "epoch": 0.49785831960461285, "grad_norm": 0.36420005559921265, "learning_rate": 1.711794185377636e-05, "loss": 0.4981, "step": 18132 }, { "epoch": 0.49788577704557935, "grad_norm": 0.38444778323173523, "learning_rate": 1.7117638490348868e-05, "loss": 0.533, "step": 18133 }, { "epoch": 0.49791323448654584, "grad_norm": 0.33724531531333923, "learning_rate": 1.7117335113644712e-05, "loss": 0.508, "step": 18134 }, { "epoch": 0.49794069192751234, "grad_norm": 0.4041788876056671, "learning_rate": 1.7117031723664456e-05, "loss": 0.6482, "step": 18135 }, { "epoch": 0.4979681493684789, "grad_norm": 0.38526666164398193, "learning_rate": 1.7116728320408662e-05, "loss": 0.5896, "step": 18136 }, { "epoch": 0.4979956068094454, "grad_norm": 0.35115107893943787, "learning_rate": 1.7116424903877898e-05, "loss": 0.5894, "step": 18137 }, { "epoch": 0.4980230642504119, "grad_norm": 0.3842831552028656, "learning_rate": 1.7116121474072727e-05, "loss": 0.4847, "step": 18138 }, { "epoch": 0.49805052169137837, "grad_norm": 0.4195261299610138, "learning_rate": 1.7115818030993724e-05, "loss": 0.4895, "step": 18139 }, { "epoch": 0.49807797913234486, "grad_norm": 0.3534254729747772, "learning_rate": 1.711551457464145e-05, "loss": 0.4726, "step": 18140 }, { "epoch": 0.49810543657331136, "grad_norm": 0.38802364468574524, "learning_rate": 1.7115211105016465e-05, "loss": 0.4639, "step": 18141 }, { "epoch": 0.49813289401427785, "grad_norm": 0.34438204765319824, "learning_rate": 1.7114907622119344e-05, "loss": 0.4647, "step": 18142 }, { "epoch": 0.4981603514552444, "grad_norm": 0.4019126296043396, "learning_rate": 1.7114604125950646e-05, "loss": 0.5288, "step": 18143 }, { "epoch": 0.4981878088962109, "grad_norm": 0.35834503173828125, "learning_rate": 1.7114300616510945e-05, "loss": 0.561, "step": 18144 }, { "epoch": 0.4982152663371774, "grad_norm": 0.3469700515270233, "learning_rate": 1.71139970938008e-05, "loss": 0.4863, "step": 18145 }, { "epoch": 0.4982427237781439, "grad_norm": 0.38737839460372925, "learning_rate": 1.711369355782078e-05, "loss": 0.5523, "step": 18146 }, { "epoch": 0.4982701812191104, "grad_norm": 0.36940619349479675, "learning_rate": 1.7113390008571452e-05, "loss": 0.4204, "step": 18147 }, { "epoch": 0.49829763866007687, "grad_norm": 0.38066819310188293, "learning_rate": 1.7113086446053383e-05, "loss": 0.4734, "step": 18148 }, { "epoch": 0.49832509610104336, "grad_norm": 0.3531523048877716, "learning_rate": 1.7112782870267136e-05, "loss": 0.5487, "step": 18149 }, { "epoch": 0.49835255354200986, "grad_norm": 0.3583870530128479, "learning_rate": 1.7112479281213277e-05, "loss": 0.524, "step": 18150 }, { "epoch": 0.4983800109829764, "grad_norm": 0.38869017362594604, "learning_rate": 1.7112175678892374e-05, "loss": 0.4794, "step": 18151 }, { "epoch": 0.4984074684239429, "grad_norm": 0.3680994510650635, "learning_rate": 1.7111872063304998e-05, "loss": 0.5377, "step": 18152 }, { "epoch": 0.4984349258649094, "grad_norm": 0.3784719407558441, "learning_rate": 1.7111568434451706e-05, "loss": 0.5343, "step": 18153 }, { "epoch": 0.4984623833058759, "grad_norm": 0.4397129714488983, "learning_rate": 1.7111264792333072e-05, "loss": 0.48, "step": 18154 }, { "epoch": 0.4984898407468424, "grad_norm": 0.45577865839004517, "learning_rate": 1.7110961136949655e-05, "loss": 0.6058, "step": 18155 }, { "epoch": 0.4985172981878089, "grad_norm": 1.1771347522735596, "learning_rate": 1.711065746830203e-05, "loss": 0.4763, "step": 18156 }, { "epoch": 0.49854475562877537, "grad_norm": 2.076005220413208, "learning_rate": 1.7110353786390758e-05, "loss": 0.566, "step": 18157 }, { "epoch": 0.4985722130697419, "grad_norm": 0.3940882682800293, "learning_rate": 1.711005009121641e-05, "loss": 0.5302, "step": 18158 }, { "epoch": 0.4985996705107084, "grad_norm": 0.4110874831676483, "learning_rate": 1.7109746382779545e-05, "loss": 0.6214, "step": 18159 }, { "epoch": 0.4986271279516749, "grad_norm": 0.3741133511066437, "learning_rate": 1.7109442661080735e-05, "loss": 0.5446, "step": 18160 }, { "epoch": 0.4986545853926414, "grad_norm": 0.3794558048248291, "learning_rate": 1.7109138926120548e-05, "loss": 0.5918, "step": 18161 }, { "epoch": 0.4986820428336079, "grad_norm": 0.39069801568984985, "learning_rate": 1.7108835177899545e-05, "loss": 0.4758, "step": 18162 }, { "epoch": 0.4987095002745744, "grad_norm": 0.3896941840648651, "learning_rate": 1.7108531416418296e-05, "loss": 0.5588, "step": 18163 }, { "epoch": 0.4987369577155409, "grad_norm": 0.35241642594337463, "learning_rate": 1.7108227641677365e-05, "loss": 0.4834, "step": 18164 }, { "epoch": 0.49876441515650743, "grad_norm": 0.41385412216186523, "learning_rate": 1.7107923853677327e-05, "loss": 0.4554, "step": 18165 }, { "epoch": 0.49879187259747393, "grad_norm": 0.4130232334136963, "learning_rate": 1.7107620052418736e-05, "loss": 0.4958, "step": 18166 }, { "epoch": 0.4988193300384404, "grad_norm": 0.3209391236305237, "learning_rate": 1.7107316237902168e-05, "loss": 0.3982, "step": 18167 }, { "epoch": 0.4988467874794069, "grad_norm": 0.39276057481765747, "learning_rate": 1.7107012410128186e-05, "loss": 0.5627, "step": 18168 }, { "epoch": 0.4988742449203734, "grad_norm": 0.3911188244819641, "learning_rate": 1.710670856909736e-05, "loss": 0.5331, "step": 18169 }, { "epoch": 0.4989017023613399, "grad_norm": 0.3907928168773651, "learning_rate": 1.710640471481025e-05, "loss": 0.5347, "step": 18170 }, { "epoch": 0.4989291598023064, "grad_norm": 0.35195717215538025, "learning_rate": 1.7106100847267432e-05, "loss": 0.4662, "step": 18171 }, { "epoch": 0.49895661724327295, "grad_norm": 0.5661927461624146, "learning_rate": 1.7105796966469465e-05, "loss": 0.572, "step": 18172 }, { "epoch": 0.49898407468423944, "grad_norm": 0.3568142056465149, "learning_rate": 1.710549307241692e-05, "loss": 0.549, "step": 18173 }, { "epoch": 0.49901153212520594, "grad_norm": 0.40312129259109497, "learning_rate": 1.710518916511036e-05, "loss": 0.6038, "step": 18174 }, { "epoch": 0.49903898956617243, "grad_norm": 0.4097234308719635, "learning_rate": 1.7104885244550357e-05, "loss": 0.4919, "step": 18175 }, { "epoch": 0.4990664470071389, "grad_norm": 0.4440852105617523, "learning_rate": 1.7104581310737476e-05, "loss": 0.57, "step": 18176 }, { "epoch": 0.4990939044481054, "grad_norm": 0.38012751936912537, "learning_rate": 1.710427736367228e-05, "loss": 0.6219, "step": 18177 }, { "epoch": 0.4991213618890719, "grad_norm": 0.34073367714881897, "learning_rate": 1.710397340335534e-05, "loss": 0.4911, "step": 18178 }, { "epoch": 0.49914881933003846, "grad_norm": 0.36202433705329895, "learning_rate": 1.7103669429787227e-05, "loss": 0.532, "step": 18179 }, { "epoch": 0.49917627677100496, "grad_norm": 0.3895537853240967, "learning_rate": 1.7103365442968498e-05, "loss": 0.5897, "step": 18180 }, { "epoch": 0.49920373421197145, "grad_norm": 0.3531620502471924, "learning_rate": 1.7103061442899727e-05, "loss": 0.5032, "step": 18181 }, { "epoch": 0.49923119165293794, "grad_norm": 0.4048424959182739, "learning_rate": 1.710275742958148e-05, "loss": 0.5477, "step": 18182 }, { "epoch": 0.49925864909390444, "grad_norm": 0.3806428015232086, "learning_rate": 1.7102453403014325e-05, "loss": 0.4504, "step": 18183 }, { "epoch": 0.49928610653487093, "grad_norm": 0.3631454408168793, "learning_rate": 1.7102149363198825e-05, "loss": 0.4073, "step": 18184 }, { "epoch": 0.4993135639758374, "grad_norm": 0.39270731806755066, "learning_rate": 1.710184531013555e-05, "loss": 0.5698, "step": 18185 }, { "epoch": 0.499341021416804, "grad_norm": 0.36737629771232605, "learning_rate": 1.710154124382507e-05, "loss": 0.4557, "step": 18186 }, { "epoch": 0.49936847885777047, "grad_norm": 0.40107691287994385, "learning_rate": 1.710123716426794e-05, "loss": 0.5576, "step": 18187 }, { "epoch": 0.49939593629873696, "grad_norm": 0.3575264513492584, "learning_rate": 1.7100933071464747e-05, "loss": 0.5534, "step": 18188 }, { "epoch": 0.49942339373970346, "grad_norm": 0.4202025234699249, "learning_rate": 1.710062896541604e-05, "loss": 0.5612, "step": 18189 }, { "epoch": 0.49945085118066995, "grad_norm": 0.3865579068660736, "learning_rate": 1.71003248461224e-05, "loss": 0.5457, "step": 18190 }, { "epoch": 0.49947830862163645, "grad_norm": 0.49026867747306824, "learning_rate": 1.7100020713584384e-05, "loss": 0.5435, "step": 18191 }, { "epoch": 0.49950576606260294, "grad_norm": 0.3526456952095032, "learning_rate": 1.7099716567802565e-05, "loss": 0.3906, "step": 18192 }, { "epoch": 0.4995332235035695, "grad_norm": 0.3963441252708435, "learning_rate": 1.7099412408777504e-05, "loss": 0.525, "step": 18193 }, { "epoch": 0.499560680944536, "grad_norm": 0.34657251834869385, "learning_rate": 1.709910823650978e-05, "loss": 0.5679, "step": 18194 }, { "epoch": 0.4995881383855025, "grad_norm": 0.4281478226184845, "learning_rate": 1.709880405099995e-05, "loss": 0.4361, "step": 18195 }, { "epoch": 0.499615595826469, "grad_norm": 0.3741914629936218, "learning_rate": 1.7098499852248586e-05, "loss": 0.4941, "step": 18196 }, { "epoch": 0.49964305326743547, "grad_norm": 0.40962764620780945, "learning_rate": 1.709819564025625e-05, "loss": 0.5098, "step": 18197 }, { "epoch": 0.49967051070840196, "grad_norm": 0.37838900089263916, "learning_rate": 1.7097891415023515e-05, "loss": 0.4555, "step": 18198 }, { "epoch": 0.49969796814936845, "grad_norm": 0.3677780032157898, "learning_rate": 1.7097587176550948e-05, "loss": 0.4762, "step": 18199 }, { "epoch": 0.499725425590335, "grad_norm": 0.3437698483467102, "learning_rate": 1.709728292483912e-05, "loss": 0.5154, "step": 18200 }, { "epoch": 0.4997528830313015, "grad_norm": 0.4435120224952698, "learning_rate": 1.709697865988859e-05, "loss": 0.4561, "step": 18201 }, { "epoch": 0.499780340472268, "grad_norm": 0.3807353377342224, "learning_rate": 1.7096674381699928e-05, "loss": 0.5293, "step": 18202 }, { "epoch": 0.4998077979132345, "grad_norm": 0.37894707918167114, "learning_rate": 1.7096370090273708e-05, "loss": 0.5655, "step": 18203 }, { "epoch": 0.499835255354201, "grad_norm": 0.3475729823112488, "learning_rate": 1.709606578561049e-05, "loss": 0.4888, "step": 18204 }, { "epoch": 0.4998627127951675, "grad_norm": 0.3682712912559509, "learning_rate": 1.7095761467710844e-05, "loss": 0.5781, "step": 18205 }, { "epoch": 0.49989017023613397, "grad_norm": 0.36894920468330383, "learning_rate": 1.709545713657534e-05, "loss": 0.4365, "step": 18206 }, { "epoch": 0.4999176276771005, "grad_norm": 0.5045278668403625, "learning_rate": 1.7095152792204542e-05, "loss": 0.4748, "step": 18207 }, { "epoch": 0.499945085118067, "grad_norm": 0.357883483171463, "learning_rate": 1.709484843459902e-05, "loss": 0.5728, "step": 18208 }, { "epoch": 0.4999725425590335, "grad_norm": 0.3584842085838318, "learning_rate": 1.7094544063759342e-05, "loss": 0.5232, "step": 18209 }, { "epoch": 0.5, "grad_norm": 0.45562970638275146, "learning_rate": 1.7094239679686074e-05, "loss": 0.596, "step": 18210 }, { "epoch": 0.5000274574409665, "grad_norm": 0.41492700576782227, "learning_rate": 1.7093935282379787e-05, "loss": 0.4968, "step": 18211 }, { "epoch": 0.500054914881933, "grad_norm": 0.3725951015949249, "learning_rate": 1.7093630871841042e-05, "loss": 0.4976, "step": 18212 }, { "epoch": 0.5000823723228995, "grad_norm": 0.3786039650440216, "learning_rate": 1.7093326448070416e-05, "loss": 0.4984, "step": 18213 }, { "epoch": 0.500109829763866, "grad_norm": 0.43422070145606995, "learning_rate": 1.709302201106847e-05, "loss": 0.4695, "step": 18214 }, { "epoch": 0.5001372872048325, "grad_norm": 0.3747687339782715, "learning_rate": 1.7092717560835778e-05, "loss": 0.4471, "step": 18215 }, { "epoch": 0.500164744645799, "grad_norm": 0.3790091276168823, "learning_rate": 1.7092413097372897e-05, "loss": 0.4797, "step": 18216 }, { "epoch": 0.5001922020867655, "grad_norm": 0.4182693362236023, "learning_rate": 1.7092108620680408e-05, "loss": 0.4999, "step": 18217 }, { "epoch": 0.5002196595277321, "grad_norm": 0.42537251114845276, "learning_rate": 1.709180413075887e-05, "loss": 0.536, "step": 18218 }, { "epoch": 0.5002471169686985, "grad_norm": 0.3446579575538635, "learning_rate": 1.7091499627608853e-05, "loss": 0.4541, "step": 18219 }, { "epoch": 0.500274574409665, "grad_norm": 0.37186503410339355, "learning_rate": 1.709119511123093e-05, "loss": 0.5694, "step": 18220 }, { "epoch": 0.5003020318506315, "grad_norm": 0.49211129546165466, "learning_rate": 1.709089058162566e-05, "loss": 0.5075, "step": 18221 }, { "epoch": 0.500329489291598, "grad_norm": 0.3689965009689331, "learning_rate": 1.7090586038793618e-05, "loss": 0.5325, "step": 18222 }, { "epoch": 0.5003569467325645, "grad_norm": 0.3700341284275055, "learning_rate": 1.7090281482735372e-05, "loss": 0.5386, "step": 18223 }, { "epoch": 0.500384404173531, "grad_norm": 0.5286343097686768, "learning_rate": 1.7089976913451486e-05, "loss": 0.5047, "step": 18224 }, { "epoch": 0.5004118616144976, "grad_norm": 0.43405434489250183, "learning_rate": 1.708967233094253e-05, "loss": 0.6526, "step": 18225 }, { "epoch": 0.500439319055464, "grad_norm": 0.37092649936676025, "learning_rate": 1.7089367735209073e-05, "loss": 0.5747, "step": 18226 }, { "epoch": 0.5004667764964306, "grad_norm": 0.6884369254112244, "learning_rate": 1.708906312625168e-05, "loss": 0.495, "step": 18227 }, { "epoch": 0.500494233937397, "grad_norm": 0.34716156125068665, "learning_rate": 1.7088758504070926e-05, "loss": 0.4412, "step": 18228 }, { "epoch": 0.5005216913783636, "grad_norm": 0.35005030035972595, "learning_rate": 1.708845386866737e-05, "loss": 0.501, "step": 18229 }, { "epoch": 0.50054914881933, "grad_norm": 0.3413712680339813, "learning_rate": 1.708814922004159e-05, "loss": 0.502, "step": 18230 }, { "epoch": 0.5005766062602965, "grad_norm": 0.3684580326080322, "learning_rate": 1.708784455819415e-05, "loss": 0.4705, "step": 18231 }, { "epoch": 0.5006040637012631, "grad_norm": 0.38489511609077454, "learning_rate": 1.7087539883125616e-05, "loss": 0.5279, "step": 18232 }, { "epoch": 0.5006315211422295, "grad_norm": 0.4198662340641022, "learning_rate": 1.7087235194836552e-05, "loss": 0.4914, "step": 18233 }, { "epoch": 0.5006589785831961, "grad_norm": 0.3855777382850647, "learning_rate": 1.708693049332754e-05, "loss": 0.5283, "step": 18234 }, { "epoch": 0.5006864360241625, "grad_norm": 0.40187713503837585, "learning_rate": 1.7086625778599138e-05, "loss": 0.627, "step": 18235 }, { "epoch": 0.5007138934651291, "grad_norm": 0.34507349133491516, "learning_rate": 1.7086321050651916e-05, "loss": 0.5656, "step": 18236 }, { "epoch": 0.5007413509060955, "grad_norm": 0.3510937988758087, "learning_rate": 1.7086016309486448e-05, "loss": 0.4213, "step": 18237 }, { "epoch": 0.500768808347062, "grad_norm": 0.3694456219673157, "learning_rate": 1.7085711555103294e-05, "loss": 0.4606, "step": 18238 }, { "epoch": 0.5007962657880286, "grad_norm": 0.37695667147636414, "learning_rate": 1.708540678750303e-05, "loss": 0.4652, "step": 18239 }, { "epoch": 0.500823723228995, "grad_norm": 0.3377726972103119, "learning_rate": 1.7085102006686213e-05, "loss": 0.4584, "step": 18240 }, { "epoch": 0.5008511806699616, "grad_norm": 0.3316347002983093, "learning_rate": 1.7084797212653427e-05, "loss": 0.411, "step": 18241 }, { "epoch": 0.500878638110928, "grad_norm": 0.39750754833221436, "learning_rate": 1.7084492405405232e-05, "loss": 0.5506, "step": 18242 }, { "epoch": 0.5009060955518946, "grad_norm": 0.40273407101631165, "learning_rate": 1.7084187584942196e-05, "loss": 0.4896, "step": 18243 }, { "epoch": 0.500933552992861, "grad_norm": 0.36837077140808105, "learning_rate": 1.708388275126489e-05, "loss": 0.5987, "step": 18244 }, { "epoch": 0.5009610104338276, "grad_norm": 0.41695207357406616, "learning_rate": 1.708357790437388e-05, "loss": 0.5609, "step": 18245 }, { "epoch": 0.5009884678747941, "grad_norm": 0.36511173844337463, "learning_rate": 1.7083273044269736e-05, "loss": 0.5273, "step": 18246 }, { "epoch": 0.5010159253157606, "grad_norm": 0.351592093706131, "learning_rate": 1.708296817095303e-05, "loss": 0.5429, "step": 18247 }, { "epoch": 0.5010433827567271, "grad_norm": 0.4118198752403259, "learning_rate": 1.7082663284424326e-05, "loss": 0.519, "step": 18248 }, { "epoch": 0.5010708401976935, "grad_norm": 0.42447248101234436, "learning_rate": 1.7082358384684193e-05, "loss": 0.5345, "step": 18249 }, { "epoch": 0.5010982976386601, "grad_norm": 0.39835605025291443, "learning_rate": 1.7082053471733206e-05, "loss": 0.5517, "step": 18250 }, { "epoch": 0.5011257550796265, "grad_norm": 0.3643490970134735, "learning_rate": 1.7081748545571926e-05, "loss": 0.5309, "step": 18251 }, { "epoch": 0.5011532125205931, "grad_norm": 0.39660945534706116, "learning_rate": 1.7081443606200924e-05, "loss": 0.5927, "step": 18252 }, { "epoch": 0.5011806699615596, "grad_norm": 0.42283761501312256, "learning_rate": 1.7081138653620772e-05, "loss": 0.6286, "step": 18253 }, { "epoch": 0.5012081274025261, "grad_norm": 0.3496248722076416, "learning_rate": 1.7080833687832035e-05, "loss": 0.5675, "step": 18254 }, { "epoch": 0.5012355848434926, "grad_norm": 0.4316891133785248, "learning_rate": 1.708052870883528e-05, "loss": 0.5737, "step": 18255 }, { "epoch": 0.5012630422844591, "grad_norm": 0.3601992428302765, "learning_rate": 1.7080223716631082e-05, "loss": 0.5624, "step": 18256 }, { "epoch": 0.5012904997254256, "grad_norm": 0.375296413898468, "learning_rate": 1.7079918711220008e-05, "loss": 0.6066, "step": 18257 }, { "epoch": 0.501317957166392, "grad_norm": 0.3431360423564911, "learning_rate": 1.7079613692602626e-05, "loss": 0.5197, "step": 18258 }, { "epoch": 0.5013454146073586, "grad_norm": 0.3618892431259155, "learning_rate": 1.7079308660779502e-05, "loss": 0.4989, "step": 18259 }, { "epoch": 0.5013728720483251, "grad_norm": 0.3886778652667999, "learning_rate": 1.707900361575121e-05, "loss": 0.5228, "step": 18260 }, { "epoch": 0.5014003294892916, "grad_norm": 0.34657061100006104, "learning_rate": 1.707869855751832e-05, "loss": 0.4668, "step": 18261 }, { "epoch": 0.5014277869302581, "grad_norm": 0.3365839123725891, "learning_rate": 1.7078393486081393e-05, "loss": 0.5278, "step": 18262 }, { "epoch": 0.5014552443712246, "grad_norm": 0.4252789616584778, "learning_rate": 1.7078088401441005e-05, "loss": 0.5295, "step": 18263 }, { "epoch": 0.5014827018121911, "grad_norm": 0.379203200340271, "learning_rate": 1.707778330359772e-05, "loss": 0.5286, "step": 18264 }, { "epoch": 0.5015101592531576, "grad_norm": 0.34421226382255554, "learning_rate": 1.7077478192552114e-05, "loss": 0.4326, "step": 18265 }, { "epoch": 0.5015376166941241, "grad_norm": 0.3814091086387634, "learning_rate": 1.707717306830475e-05, "loss": 0.5532, "step": 18266 }, { "epoch": 0.5015650741350907, "grad_norm": 0.3631158173084259, "learning_rate": 1.7076867930856204e-05, "loss": 0.5211, "step": 18267 }, { "epoch": 0.5015925315760571, "grad_norm": 0.3957575559616089, "learning_rate": 1.7076562780207036e-05, "loss": 0.5986, "step": 18268 }, { "epoch": 0.5016199890170236, "grad_norm": 0.37048327922821045, "learning_rate": 1.7076257616357822e-05, "loss": 0.4908, "step": 18269 }, { "epoch": 0.5016474464579901, "grad_norm": 0.3629724085330963, "learning_rate": 1.7075952439309128e-05, "loss": 0.4554, "step": 18270 }, { "epoch": 0.5016749038989566, "grad_norm": 0.35151106119155884, "learning_rate": 1.7075647249061525e-05, "loss": 0.5277, "step": 18271 }, { "epoch": 0.5017023613399231, "grad_norm": 0.35424408316612244, "learning_rate": 1.707534204561558e-05, "loss": 0.4205, "step": 18272 }, { "epoch": 0.5017298187808896, "grad_norm": 0.3324435353279114, "learning_rate": 1.7075036828971864e-05, "loss": 0.4818, "step": 18273 }, { "epoch": 0.5017572762218562, "grad_norm": 0.40016046166419983, "learning_rate": 1.7074731599130946e-05, "loss": 0.5589, "step": 18274 }, { "epoch": 0.5017847336628226, "grad_norm": 0.5063086748123169, "learning_rate": 1.7074426356093398e-05, "loss": 0.4383, "step": 18275 }, { "epoch": 0.5018121911037892, "grad_norm": 0.41734758019447327, "learning_rate": 1.7074121099859788e-05, "loss": 0.5431, "step": 18276 }, { "epoch": 0.5018396485447556, "grad_norm": 0.35300785303115845, "learning_rate": 1.707381583043068e-05, "loss": 0.4788, "step": 18277 }, { "epoch": 0.5018671059857222, "grad_norm": 0.3644106388092041, "learning_rate": 1.7073510547806652e-05, "loss": 0.4759, "step": 18278 }, { "epoch": 0.5018945634266886, "grad_norm": 0.40512365102767944, "learning_rate": 1.7073205251988265e-05, "loss": 0.5304, "step": 18279 }, { "epoch": 0.5019220208676551, "grad_norm": 0.401725709438324, "learning_rate": 1.7072899942976096e-05, "loss": 0.5472, "step": 18280 }, { "epoch": 0.5019494783086217, "grad_norm": 0.38179975748062134, "learning_rate": 1.707259462077071e-05, "loss": 0.5124, "step": 18281 }, { "epoch": 0.5019769357495881, "grad_norm": 0.39495307207107544, "learning_rate": 1.707228928537268e-05, "loss": 0.513, "step": 18282 }, { "epoch": 0.5020043931905547, "grad_norm": 0.3934388756752014, "learning_rate": 1.7071983936782568e-05, "loss": 0.5166, "step": 18283 }, { "epoch": 0.5020318506315211, "grad_norm": 0.40163472294807434, "learning_rate": 1.707167857500095e-05, "loss": 0.4805, "step": 18284 }, { "epoch": 0.5020593080724877, "grad_norm": 0.34462910890579224, "learning_rate": 1.70713732000284e-05, "loss": 0.475, "step": 18285 }, { "epoch": 0.5020867655134541, "grad_norm": 0.5391959547996521, "learning_rate": 1.7071067811865477e-05, "loss": 0.5429, "step": 18286 }, { "epoch": 0.5021142229544207, "grad_norm": 0.4408515393733978, "learning_rate": 1.7070762410512758e-05, "loss": 0.5543, "step": 18287 }, { "epoch": 0.5021416803953872, "grad_norm": 0.47167643904685974, "learning_rate": 1.7070456995970808e-05, "loss": 0.5634, "step": 18288 }, { "epoch": 0.5021691378363536, "grad_norm": 0.4184386134147644, "learning_rate": 1.7070151568240202e-05, "loss": 0.4756, "step": 18289 }, { "epoch": 0.5021965952773202, "grad_norm": 0.406840980052948, "learning_rate": 1.7069846127321503e-05, "loss": 0.5366, "step": 18290 }, { "epoch": 0.5022240527182866, "grad_norm": 0.39505401253700256, "learning_rate": 1.7069540673215287e-05, "loss": 0.5259, "step": 18291 }, { "epoch": 0.5022515101592532, "grad_norm": 0.3747074007987976, "learning_rate": 1.7069235205922124e-05, "loss": 0.4102, "step": 18292 }, { "epoch": 0.5022789676002196, "grad_norm": 0.45754820108413696, "learning_rate": 1.7068929725442575e-05, "loss": 0.5674, "step": 18293 }, { "epoch": 0.5023064250411862, "grad_norm": 0.4053472578525543, "learning_rate": 1.706862423177722e-05, "loss": 0.5061, "step": 18294 }, { "epoch": 0.5023338824821527, "grad_norm": 0.4034256637096405, "learning_rate": 1.7068318724926626e-05, "loss": 0.5094, "step": 18295 }, { "epoch": 0.5023613399231192, "grad_norm": 0.4249768555164337, "learning_rate": 1.706801320489136e-05, "loss": 0.5928, "step": 18296 }, { "epoch": 0.5023887973640857, "grad_norm": 0.42865055799484253, "learning_rate": 1.7067707671671992e-05, "loss": 0.4687, "step": 18297 }, { "epoch": 0.5024162548050521, "grad_norm": 0.374934583902359, "learning_rate": 1.7067402125269096e-05, "loss": 0.5065, "step": 18298 }, { "epoch": 0.5024437122460187, "grad_norm": 0.3550419509410858, "learning_rate": 1.7067096565683236e-05, "loss": 0.4872, "step": 18299 }, { "epoch": 0.5024711696869851, "grad_norm": 0.4121793210506439, "learning_rate": 1.706679099291499e-05, "loss": 0.5057, "step": 18300 }, { "epoch": 0.5024986271279517, "grad_norm": 0.4242143929004669, "learning_rate": 1.7066485406964917e-05, "loss": 0.5103, "step": 18301 }, { "epoch": 0.5025260845689182, "grad_norm": 0.3878735601902008, "learning_rate": 1.70661798078336e-05, "loss": 0.3969, "step": 18302 }, { "epoch": 0.5025535420098847, "grad_norm": 0.4393337666988373, "learning_rate": 1.7065874195521595e-05, "loss": 0.54, "step": 18303 }, { "epoch": 0.5025809994508512, "grad_norm": 0.4017086625099182, "learning_rate": 1.7065568570029488e-05, "loss": 0.4868, "step": 18304 }, { "epoch": 0.5026084568918177, "grad_norm": 0.42388954758644104, "learning_rate": 1.7065262931357835e-05, "loss": 0.5149, "step": 18305 }, { "epoch": 0.5026359143327842, "grad_norm": 0.44178521633148193, "learning_rate": 1.706495727950721e-05, "loss": 0.519, "step": 18306 }, { "epoch": 0.5026633717737506, "grad_norm": 0.3854296803474426, "learning_rate": 1.706465161447819e-05, "loss": 0.5029, "step": 18307 }, { "epoch": 0.5026908292147172, "grad_norm": 0.4020719826221466, "learning_rate": 1.7064345936271336e-05, "loss": 0.5849, "step": 18308 }, { "epoch": 0.5027182866556837, "grad_norm": 0.4111078381538391, "learning_rate": 1.7064040244887224e-05, "loss": 0.612, "step": 18309 }, { "epoch": 0.5027457440966502, "grad_norm": 0.5885664224624634, "learning_rate": 1.7063734540326424e-05, "loss": 0.5875, "step": 18310 }, { "epoch": 0.5027732015376167, "grad_norm": 0.4228686988353729, "learning_rate": 1.7063428822589504e-05, "loss": 0.5024, "step": 18311 }, { "epoch": 0.5028006589785832, "grad_norm": 0.3949965834617615, "learning_rate": 1.706312309167703e-05, "loss": 0.4586, "step": 18312 }, { "epoch": 0.5028281164195497, "grad_norm": 0.35988566279411316, "learning_rate": 1.7062817347589584e-05, "loss": 0.5248, "step": 18313 }, { "epoch": 0.5028555738605162, "grad_norm": 0.4265875816345215, "learning_rate": 1.7062511590327725e-05, "loss": 0.6016, "step": 18314 }, { "epoch": 0.5028830313014827, "grad_norm": 0.5422950387001038, "learning_rate": 1.706220581989203e-05, "loss": 0.5075, "step": 18315 }, { "epoch": 0.5029104887424493, "grad_norm": 0.4016532301902771, "learning_rate": 1.7061900036283066e-05, "loss": 0.5456, "step": 18316 }, { "epoch": 0.5029379461834157, "grad_norm": 0.35709452629089355, "learning_rate": 1.7061594239501405e-05, "loss": 0.5078, "step": 18317 }, { "epoch": 0.5029654036243822, "grad_norm": 0.32176473736763, "learning_rate": 1.706128842954762e-05, "loss": 0.4078, "step": 18318 }, { "epoch": 0.5029928610653487, "grad_norm": 0.380409300327301, "learning_rate": 1.706098260642227e-05, "loss": 0.5399, "step": 18319 }, { "epoch": 0.5030203185063152, "grad_norm": 0.4212191104888916, "learning_rate": 1.7060676770125942e-05, "loss": 0.5708, "step": 18320 }, { "epoch": 0.5030477759472817, "grad_norm": 0.36827605962753296, "learning_rate": 1.7060370920659194e-05, "loss": 0.4795, "step": 18321 }, { "epoch": 0.5030752333882482, "grad_norm": 0.37226402759552, "learning_rate": 1.70600650580226e-05, "loss": 0.4898, "step": 18322 }, { "epoch": 0.5031026908292148, "grad_norm": 0.38775354623794556, "learning_rate": 1.7059759182216736e-05, "loss": 0.5815, "step": 18323 }, { "epoch": 0.5031301482701812, "grad_norm": 0.37222155928611755, "learning_rate": 1.7059453293242166e-05, "loss": 0.5038, "step": 18324 }, { "epoch": 0.5031576057111478, "grad_norm": 0.3465757668018341, "learning_rate": 1.705914739109946e-05, "loss": 0.4827, "step": 18325 }, { "epoch": 0.5031850631521142, "grad_norm": 0.4027126729488373, "learning_rate": 1.7058841475789197e-05, "loss": 0.5028, "step": 18326 }, { "epoch": 0.5032125205930807, "grad_norm": 0.427123486995697, "learning_rate": 1.7058535547311933e-05, "loss": 0.5075, "step": 18327 }, { "epoch": 0.5032399780340472, "grad_norm": 0.4352464973926544, "learning_rate": 1.7058229605668254e-05, "loss": 0.4555, "step": 18328 }, { "epoch": 0.5032674354750137, "grad_norm": 0.33354371786117554, "learning_rate": 1.7057923650858722e-05, "loss": 0.4538, "step": 18329 }, { "epoch": 0.5032948929159803, "grad_norm": 0.34731248021125793, "learning_rate": 1.7057617682883912e-05, "loss": 0.4766, "step": 18330 }, { "epoch": 0.5033223503569467, "grad_norm": 0.35552549362182617, "learning_rate": 1.7057311701744388e-05, "loss": 0.5096, "step": 18331 }, { "epoch": 0.5033498077979133, "grad_norm": 0.3730519413948059, "learning_rate": 1.7057005707440728e-05, "loss": 0.5076, "step": 18332 }, { "epoch": 0.5033772652388797, "grad_norm": 0.3710536062717438, "learning_rate": 1.70566996999735e-05, "loss": 0.4792, "step": 18333 }, { "epoch": 0.5034047226798463, "grad_norm": 0.37105944752693176, "learning_rate": 1.7056393679343272e-05, "loss": 0.5302, "step": 18334 }, { "epoch": 0.5034321801208127, "grad_norm": 0.4189833402633667, "learning_rate": 1.705608764555062e-05, "loss": 0.5046, "step": 18335 }, { "epoch": 0.5034596375617792, "grad_norm": 0.43591082096099854, "learning_rate": 1.7055781598596115e-05, "loss": 0.5204, "step": 18336 }, { "epoch": 0.5034870950027458, "grad_norm": 0.4085991680622101, "learning_rate": 1.7055475538480323e-05, "loss": 0.4695, "step": 18337 }, { "epoch": 0.5035145524437122, "grad_norm": 0.34525325894355774, "learning_rate": 1.7055169465203818e-05, "loss": 0.5076, "step": 18338 }, { "epoch": 0.5035420098846788, "grad_norm": 0.3884754180908203, "learning_rate": 1.705486337876717e-05, "loss": 0.4782, "step": 18339 }, { "epoch": 0.5035694673256452, "grad_norm": 0.34470629692077637, "learning_rate": 1.705455727917095e-05, "loss": 0.5067, "step": 18340 }, { "epoch": 0.5035969247666118, "grad_norm": 0.33834901452064514, "learning_rate": 1.7054251166415726e-05, "loss": 0.4363, "step": 18341 }, { "epoch": 0.5036243822075782, "grad_norm": 0.806690514087677, "learning_rate": 1.7053945040502076e-05, "loss": 0.4645, "step": 18342 }, { "epoch": 0.5036518396485448, "grad_norm": 0.39665651321411133, "learning_rate": 1.7053638901430565e-05, "loss": 0.4778, "step": 18343 }, { "epoch": 0.5036792970895112, "grad_norm": 0.369426965713501, "learning_rate": 1.705333274920177e-05, "loss": 0.6007, "step": 18344 }, { "epoch": 0.5037067545304778, "grad_norm": 0.3898095488548279, "learning_rate": 1.7053026583816255e-05, "loss": 0.4429, "step": 18345 }, { "epoch": 0.5037342119714443, "grad_norm": 0.45592570304870605, "learning_rate": 1.7052720405274594e-05, "loss": 0.5001, "step": 18346 }, { "epoch": 0.5037616694124107, "grad_norm": 0.37848100066185, "learning_rate": 1.705241421357736e-05, "loss": 0.5036, "step": 18347 }, { "epoch": 0.5037891268533773, "grad_norm": 0.41575393080711365, "learning_rate": 1.705210800872512e-05, "loss": 0.5532, "step": 18348 }, { "epoch": 0.5038165842943437, "grad_norm": 0.4008714556694031, "learning_rate": 1.705180179071845e-05, "loss": 0.5605, "step": 18349 }, { "epoch": 0.5038440417353103, "grad_norm": 0.37351468205451965, "learning_rate": 1.7051495559557918e-05, "loss": 0.4615, "step": 18350 }, { "epoch": 0.5038714991762767, "grad_norm": 0.3717767298221588, "learning_rate": 1.7051189315244098e-05, "loss": 0.5199, "step": 18351 }, { "epoch": 0.5038989566172433, "grad_norm": 1.138818621635437, "learning_rate": 1.7050883057777557e-05, "loss": 0.4915, "step": 18352 }, { "epoch": 0.5039264140582098, "grad_norm": 0.41945356130599976, "learning_rate": 1.705057678715887e-05, "loss": 0.5253, "step": 18353 }, { "epoch": 0.5039538714991763, "grad_norm": 0.445121705532074, "learning_rate": 1.705027050338861e-05, "loss": 0.5167, "step": 18354 }, { "epoch": 0.5039813289401428, "grad_norm": 1.1888108253479004, "learning_rate": 1.704996420646734e-05, "loss": 0.5426, "step": 18355 }, { "epoch": 0.5040087863811092, "grad_norm": 0.35418960452079773, "learning_rate": 1.704965789639564e-05, "loss": 0.4427, "step": 18356 }, { "epoch": 0.5040362438220758, "grad_norm": 0.3944084942340851, "learning_rate": 1.704935157317408e-05, "loss": 0.5292, "step": 18357 }, { "epoch": 0.5040637012630422, "grad_norm": 0.416432648897171, "learning_rate": 1.7049045236803224e-05, "loss": 0.5317, "step": 18358 }, { "epoch": 0.5040911587040088, "grad_norm": 0.3697250485420227, "learning_rate": 1.7048738887283654e-05, "loss": 0.4639, "step": 18359 }, { "epoch": 0.5041186161449753, "grad_norm": 0.355241596698761, "learning_rate": 1.704843252461593e-05, "loss": 0.471, "step": 18360 }, { "epoch": 0.5041460735859418, "grad_norm": 0.37289518117904663, "learning_rate": 1.7048126148800634e-05, "loss": 0.485, "step": 18361 }, { "epoch": 0.5041735310269083, "grad_norm": 0.3860722780227661, "learning_rate": 1.7047819759838337e-05, "loss": 0.5578, "step": 18362 }, { "epoch": 0.5042009884678748, "grad_norm": 0.545289158821106, "learning_rate": 1.7047513357729598e-05, "loss": 0.5628, "step": 18363 }, { "epoch": 0.5042284459088413, "grad_norm": 0.5187903046607971, "learning_rate": 1.7047206942475e-05, "loss": 0.5131, "step": 18364 }, { "epoch": 0.5042559033498077, "grad_norm": 0.37178850173950195, "learning_rate": 1.7046900514075115e-05, "loss": 0.5683, "step": 18365 }, { "epoch": 0.5042833607907743, "grad_norm": 0.40599679946899414, "learning_rate": 1.704659407253051e-05, "loss": 0.5557, "step": 18366 }, { "epoch": 0.5043108182317408, "grad_norm": 0.39342787861824036, "learning_rate": 1.704628761784176e-05, "loss": 0.4974, "step": 18367 }, { "epoch": 0.5043382756727073, "grad_norm": 0.3889973759651184, "learning_rate": 1.704598115000943e-05, "loss": 0.5442, "step": 18368 }, { "epoch": 0.5043657331136738, "grad_norm": 0.38861945271492004, "learning_rate": 1.7045674669034104e-05, "loss": 0.4776, "step": 18369 }, { "epoch": 0.5043931905546403, "grad_norm": 0.35963380336761475, "learning_rate": 1.7045368174916336e-05, "loss": 0.494, "step": 18370 }, { "epoch": 0.5044206479956068, "grad_norm": 0.45133689045906067, "learning_rate": 1.7045061667656716e-05, "loss": 0.5189, "step": 18371 }, { "epoch": 0.5044481054365733, "grad_norm": 0.5000846982002258, "learning_rate": 1.7044755147255804e-05, "loss": 0.5723, "step": 18372 }, { "epoch": 0.5044755628775398, "grad_norm": 0.45064589381217957, "learning_rate": 1.7044448613714175e-05, "loss": 0.5694, "step": 18373 }, { "epoch": 0.5045030203185064, "grad_norm": 0.5103515982627869, "learning_rate": 1.7044142067032402e-05, "loss": 0.6114, "step": 18374 }, { "epoch": 0.5045304777594728, "grad_norm": 0.39816761016845703, "learning_rate": 1.7043835507211054e-05, "loss": 0.5825, "step": 18375 }, { "epoch": 0.5045579352004393, "grad_norm": 0.42666882276535034, "learning_rate": 1.7043528934250707e-05, "loss": 0.5932, "step": 18376 }, { "epoch": 0.5045853926414058, "grad_norm": 0.3621983230113983, "learning_rate": 1.7043222348151927e-05, "loss": 0.488, "step": 18377 }, { "epoch": 0.5046128500823723, "grad_norm": 0.37762001156806946, "learning_rate": 1.7042915748915294e-05, "loss": 0.4816, "step": 18378 }, { "epoch": 0.5046403075233388, "grad_norm": 0.35587337613105774, "learning_rate": 1.7042609136541372e-05, "loss": 0.5208, "step": 18379 }, { "epoch": 0.5046677649643053, "grad_norm": 0.37579256296157837, "learning_rate": 1.704230251103074e-05, "loss": 0.5565, "step": 18380 }, { "epoch": 0.5046952224052719, "grad_norm": 0.32276269793510437, "learning_rate": 1.704199587238396e-05, "loss": 0.4632, "step": 18381 }, { "epoch": 0.5047226798462383, "grad_norm": 0.35776588320732117, "learning_rate": 1.7041689220601613e-05, "loss": 0.5006, "step": 18382 }, { "epoch": 0.5047501372872049, "grad_norm": 0.3579261302947998, "learning_rate": 1.704138255568427e-05, "loss": 0.5576, "step": 18383 }, { "epoch": 0.5047775947281713, "grad_norm": 0.43764299154281616, "learning_rate": 1.70410758776325e-05, "loss": 0.4691, "step": 18384 }, { "epoch": 0.5048050521691378, "grad_norm": 0.39894604682922363, "learning_rate": 1.7040769186446876e-05, "loss": 0.5837, "step": 18385 }, { "epoch": 0.5048325096101043, "grad_norm": 0.3664305806159973, "learning_rate": 1.7040462482127972e-05, "loss": 0.4945, "step": 18386 }, { "epoch": 0.5048599670510708, "grad_norm": 0.37880370020866394, "learning_rate": 1.7040155764676357e-05, "loss": 0.4569, "step": 18387 }, { "epoch": 0.5048874244920374, "grad_norm": 0.37551209330558777, "learning_rate": 1.7039849034092605e-05, "loss": 0.4934, "step": 18388 }, { "epoch": 0.5049148819330038, "grad_norm": 0.3922435939311981, "learning_rate": 1.7039542290377284e-05, "loss": 0.5652, "step": 18389 }, { "epoch": 0.5049423393739704, "grad_norm": 0.37158751487731934, "learning_rate": 1.7039235533530976e-05, "loss": 0.5084, "step": 18390 }, { "epoch": 0.5049697968149368, "grad_norm": 0.38584575057029724, "learning_rate": 1.7038928763554242e-05, "loss": 0.4803, "step": 18391 }, { "epoch": 0.5049972542559034, "grad_norm": 0.4343508780002594, "learning_rate": 1.7038621980447664e-05, "loss": 0.4334, "step": 18392 }, { "epoch": 0.5050247116968698, "grad_norm": 0.35185033082962036, "learning_rate": 1.7038315184211807e-05, "loss": 0.4676, "step": 18393 }, { "epoch": 0.5050521691378363, "grad_norm": 0.30139675736427307, "learning_rate": 1.7038008374847245e-05, "loss": 0.4636, "step": 18394 }, { "epoch": 0.5050796265788029, "grad_norm": 0.5083524584770203, "learning_rate": 1.7037701552354553e-05, "loss": 0.4812, "step": 18395 }, { "epoch": 0.5051070840197693, "grad_norm": 0.35708630084991455, "learning_rate": 1.70373947167343e-05, "loss": 0.4813, "step": 18396 }, { "epoch": 0.5051345414607359, "grad_norm": 0.36972567439079285, "learning_rate": 1.703708786798706e-05, "loss": 0.5131, "step": 18397 }, { "epoch": 0.5051619989017023, "grad_norm": 0.4062800705432892, "learning_rate": 1.7036781006113407e-05, "loss": 0.5598, "step": 18398 }, { "epoch": 0.5051894563426689, "grad_norm": 0.41372150182724, "learning_rate": 1.7036474131113908e-05, "loss": 0.5145, "step": 18399 }, { "epoch": 0.5052169137836353, "grad_norm": 0.5446830987930298, "learning_rate": 1.7036167242989142e-05, "loss": 0.4838, "step": 18400 }, { "epoch": 0.5052443712246019, "grad_norm": 0.4011324346065521, "learning_rate": 1.7035860341739676e-05, "loss": 0.4942, "step": 18401 }, { "epoch": 0.5052718286655684, "grad_norm": 0.37036147713661194, "learning_rate": 1.7035553427366084e-05, "loss": 0.5387, "step": 18402 }, { "epoch": 0.5052992861065349, "grad_norm": 0.38475868105888367, "learning_rate": 1.7035246499868943e-05, "loss": 0.5602, "step": 18403 }, { "epoch": 0.5053267435475014, "grad_norm": 0.3593212962150574, "learning_rate": 1.703493955924882e-05, "loss": 0.5289, "step": 18404 }, { "epoch": 0.5053542009884678, "grad_norm": 0.3718862533569336, "learning_rate": 1.703463260550629e-05, "loss": 0.5301, "step": 18405 }, { "epoch": 0.5053816584294344, "grad_norm": 0.38726404309272766, "learning_rate": 1.7034325638641923e-05, "loss": 0.5967, "step": 18406 }, { "epoch": 0.5054091158704008, "grad_norm": 0.41226547956466675, "learning_rate": 1.7034018658656295e-05, "loss": 0.537, "step": 18407 }, { "epoch": 0.5054365733113674, "grad_norm": 0.3216029405593872, "learning_rate": 1.7033711665549978e-05, "loss": 0.4885, "step": 18408 }, { "epoch": 0.5054640307523339, "grad_norm": 0.37696218490600586, "learning_rate": 1.7033404659323544e-05, "loss": 0.5344, "step": 18409 }, { "epoch": 0.5054914881933004, "grad_norm": 0.4029962420463562, "learning_rate": 1.7033097639977566e-05, "loss": 0.5695, "step": 18410 }, { "epoch": 0.5055189456342669, "grad_norm": 0.4111739695072174, "learning_rate": 1.7032790607512613e-05, "loss": 0.4809, "step": 18411 }, { "epoch": 0.5055464030752334, "grad_norm": 0.4192233383655548, "learning_rate": 1.7032483561929264e-05, "loss": 0.4916, "step": 18412 }, { "epoch": 0.5055738605161999, "grad_norm": 0.3672683835029602, "learning_rate": 1.703217650322809e-05, "loss": 0.5369, "step": 18413 }, { "epoch": 0.5056013179571663, "grad_norm": 0.3909851908683777, "learning_rate": 1.703186943140966e-05, "loss": 0.4609, "step": 18414 }, { "epoch": 0.5056287753981329, "grad_norm": 0.41092073917388916, "learning_rate": 1.703156234647455e-05, "loss": 0.5245, "step": 18415 }, { "epoch": 0.5056562328390994, "grad_norm": 0.4065009355545044, "learning_rate": 1.703125524842333e-05, "loss": 0.5343, "step": 18416 }, { "epoch": 0.5056836902800659, "grad_norm": 0.3893324136734009, "learning_rate": 1.7030948137256574e-05, "loss": 0.5313, "step": 18417 }, { "epoch": 0.5057111477210324, "grad_norm": 0.4140928387641907, "learning_rate": 1.703064101297486e-05, "loss": 0.6168, "step": 18418 }, { "epoch": 0.5057386051619989, "grad_norm": 0.34070661664009094, "learning_rate": 1.7030333875578753e-05, "loss": 0.3746, "step": 18419 }, { "epoch": 0.5057660626029654, "grad_norm": 0.4070437550544739, "learning_rate": 1.7030026725068828e-05, "loss": 0.5295, "step": 18420 }, { "epoch": 0.5057935200439319, "grad_norm": 0.3943770229816437, "learning_rate": 1.7029719561445665e-05, "loss": 0.5746, "step": 18421 }, { "epoch": 0.5058209774848984, "grad_norm": 0.4280271530151367, "learning_rate": 1.7029412384709828e-05, "loss": 0.5067, "step": 18422 }, { "epoch": 0.505848434925865, "grad_norm": 0.34529566764831543, "learning_rate": 1.7029105194861894e-05, "loss": 0.5148, "step": 18423 }, { "epoch": 0.5058758923668314, "grad_norm": 0.373695969581604, "learning_rate": 1.7028797991902436e-05, "loss": 0.4919, "step": 18424 }, { "epoch": 0.5059033498077979, "grad_norm": 0.3645274043083191, "learning_rate": 1.7028490775832026e-05, "loss": 0.5248, "step": 18425 }, { "epoch": 0.5059308072487644, "grad_norm": 0.3779239356517792, "learning_rate": 1.7028183546651236e-05, "loss": 0.4929, "step": 18426 }, { "epoch": 0.5059582646897309, "grad_norm": 0.41468146443367004, "learning_rate": 1.7027876304360643e-05, "loss": 0.4947, "step": 18427 }, { "epoch": 0.5059857221306974, "grad_norm": 0.3308921456336975, "learning_rate": 1.7027569048960816e-05, "loss": 0.4178, "step": 18428 }, { "epoch": 0.5060131795716639, "grad_norm": 0.39613571763038635, "learning_rate": 1.702726178045233e-05, "loss": 0.514, "step": 18429 }, { "epoch": 0.5060406370126305, "grad_norm": 0.4320034086704254, "learning_rate": 1.702695449883576e-05, "loss": 0.535, "step": 18430 }, { "epoch": 0.5060680944535969, "grad_norm": 0.33359208703041077, "learning_rate": 1.7026647204111677e-05, "loss": 0.4323, "step": 18431 }, { "epoch": 0.5060955518945635, "grad_norm": 0.37847700715065, "learning_rate": 1.702633989628065e-05, "loss": 0.5516, "step": 18432 }, { "epoch": 0.5061230093355299, "grad_norm": 0.396894633769989, "learning_rate": 1.702603257534326e-05, "loss": 0.5429, "step": 18433 }, { "epoch": 0.5061504667764964, "grad_norm": 0.404427170753479, "learning_rate": 1.7025725241300077e-05, "loss": 0.5313, "step": 18434 }, { "epoch": 0.5061779242174629, "grad_norm": 0.42284753918647766, "learning_rate": 1.7025417894151674e-05, "loss": 0.4743, "step": 18435 }, { "epoch": 0.5062053816584294, "grad_norm": 0.41090935468673706, "learning_rate": 1.702511053389862e-05, "loss": 0.5158, "step": 18436 }, { "epoch": 0.506232839099396, "grad_norm": 0.36088189482688904, "learning_rate": 1.7024803160541497e-05, "loss": 0.4265, "step": 18437 }, { "epoch": 0.5062602965403624, "grad_norm": 0.3547121286392212, "learning_rate": 1.7024495774080875e-05, "loss": 0.528, "step": 18438 }, { "epoch": 0.506287753981329, "grad_norm": 0.38209617137908936, "learning_rate": 1.7024188374517324e-05, "loss": 0.5344, "step": 18439 }, { "epoch": 0.5063152114222954, "grad_norm": 0.3724437355995178, "learning_rate": 1.7023880961851418e-05, "loss": 0.493, "step": 18440 }, { "epoch": 0.506342668863262, "grad_norm": 0.35482069849967957, "learning_rate": 1.7023573536083734e-05, "loss": 0.5301, "step": 18441 }, { "epoch": 0.5063701263042284, "grad_norm": 0.33121055364608765, "learning_rate": 1.702326609721485e-05, "loss": 0.4844, "step": 18442 }, { "epoch": 0.506397583745195, "grad_norm": 0.33862969279289246, "learning_rate": 1.7022958645245323e-05, "loss": 0.4786, "step": 18443 }, { "epoch": 0.5064250411861615, "grad_norm": 0.37278664112091064, "learning_rate": 1.702265118017574e-05, "loss": 0.59, "step": 18444 }, { "epoch": 0.5064524986271279, "grad_norm": 0.363298237323761, "learning_rate": 1.7022343702006675e-05, "loss": 0.4775, "step": 18445 }, { "epoch": 0.5064799560680945, "grad_norm": 0.34477025270462036, "learning_rate": 1.7022036210738696e-05, "loss": 0.4828, "step": 18446 }, { "epoch": 0.5065074135090609, "grad_norm": 0.3330436050891876, "learning_rate": 1.7021728706372376e-05, "loss": 0.521, "step": 18447 }, { "epoch": 0.5065348709500275, "grad_norm": 0.372567355632782, "learning_rate": 1.702142118890829e-05, "loss": 0.4893, "step": 18448 }, { "epoch": 0.5065623283909939, "grad_norm": 0.3821532130241394, "learning_rate": 1.7021113658347018e-05, "loss": 0.4744, "step": 18449 }, { "epoch": 0.5065897858319605, "grad_norm": 0.4222080111503601, "learning_rate": 1.7020806114689126e-05, "loss": 0.4908, "step": 18450 }, { "epoch": 0.506617243272927, "grad_norm": 0.36788201332092285, "learning_rate": 1.7020498557935187e-05, "loss": 0.5006, "step": 18451 }, { "epoch": 0.5066447007138934, "grad_norm": 0.44815585017204285, "learning_rate": 1.702019098808578e-05, "loss": 0.5283, "step": 18452 }, { "epoch": 0.50667215815486, "grad_norm": 0.42253613471984863, "learning_rate": 1.7019883405141476e-05, "loss": 0.5097, "step": 18453 }, { "epoch": 0.5066996155958264, "grad_norm": 0.348308265209198, "learning_rate": 1.701957580910285e-05, "loss": 0.4941, "step": 18454 }, { "epoch": 0.506727073036793, "grad_norm": 0.37384557723999023, "learning_rate": 1.701926819997047e-05, "loss": 0.4002, "step": 18455 }, { "epoch": 0.5067545304777594, "grad_norm": 0.40464481711387634, "learning_rate": 1.701896057774492e-05, "loss": 0.5161, "step": 18456 }, { "epoch": 0.506781987918726, "grad_norm": 0.39037540555000305, "learning_rate": 1.7018652942426766e-05, "loss": 0.4508, "step": 18457 }, { "epoch": 0.5068094453596925, "grad_norm": 0.3811308741569519, "learning_rate": 1.7018345294016584e-05, "loss": 0.5884, "step": 18458 }, { "epoch": 0.506836902800659, "grad_norm": 0.38915011286735535, "learning_rate": 1.7018037632514948e-05, "loss": 0.544, "step": 18459 }, { "epoch": 0.5068643602416255, "grad_norm": 0.36638984084129333, "learning_rate": 1.7017729957922432e-05, "loss": 0.4872, "step": 18460 }, { "epoch": 0.506891817682592, "grad_norm": 0.35239800810813904, "learning_rate": 1.7017422270239608e-05, "loss": 0.4945, "step": 18461 }, { "epoch": 0.5069192751235585, "grad_norm": 0.3758838474750519, "learning_rate": 1.7017114569467056e-05, "loss": 0.4405, "step": 18462 }, { "epoch": 0.5069467325645249, "grad_norm": 0.40256282687187195, "learning_rate": 1.7016806855605343e-05, "loss": 0.6294, "step": 18463 }, { "epoch": 0.5069741900054915, "grad_norm": 0.3415452241897583, "learning_rate": 1.7016499128655048e-05, "loss": 0.5271, "step": 18464 }, { "epoch": 0.507001647446458, "grad_norm": 0.3873440623283386, "learning_rate": 1.7016191388616738e-05, "loss": 0.4829, "step": 18465 }, { "epoch": 0.5070291048874245, "grad_norm": 0.39236801862716675, "learning_rate": 1.7015883635490996e-05, "loss": 0.5772, "step": 18466 }, { "epoch": 0.507056562328391, "grad_norm": 0.39547935128211975, "learning_rate": 1.7015575869278392e-05, "loss": 0.5087, "step": 18467 }, { "epoch": 0.5070840197693575, "grad_norm": 0.41498100757598877, "learning_rate": 1.70152680899795e-05, "loss": 0.4721, "step": 18468 }, { "epoch": 0.507111477210324, "grad_norm": 0.3713070750236511, "learning_rate": 1.7014960297594888e-05, "loss": 0.5034, "step": 18469 }, { "epoch": 0.5071389346512905, "grad_norm": 0.3827979266643524, "learning_rate": 1.701465249212514e-05, "loss": 0.4533, "step": 18470 }, { "epoch": 0.507166392092257, "grad_norm": 0.37760066986083984, "learning_rate": 1.701434467357083e-05, "loss": 0.58, "step": 18471 }, { "epoch": 0.5071938495332236, "grad_norm": 0.4089290201663971, "learning_rate": 1.7014036841932523e-05, "loss": 0.4889, "step": 18472 }, { "epoch": 0.50722130697419, "grad_norm": 0.44893211126327515, "learning_rate": 1.70137289972108e-05, "loss": 0.4222, "step": 18473 }, { "epoch": 0.5072487644151565, "grad_norm": 0.3953304886817932, "learning_rate": 1.7013421139406237e-05, "loss": 0.4881, "step": 18474 }, { "epoch": 0.507276221856123, "grad_norm": 0.3444829285144806, "learning_rate": 1.7013113268519402e-05, "loss": 0.529, "step": 18475 }, { "epoch": 0.5073036792970895, "grad_norm": 0.3682324290275574, "learning_rate": 1.7012805384550876e-05, "loss": 0.4937, "step": 18476 }, { "epoch": 0.507331136738056, "grad_norm": 0.4004669487476349, "learning_rate": 1.7012497487501226e-05, "loss": 0.5631, "step": 18477 }, { "epoch": 0.5073585941790225, "grad_norm": 0.40080103278160095, "learning_rate": 1.701218957737103e-05, "loss": 0.5464, "step": 18478 }, { "epoch": 0.5073860516199891, "grad_norm": 0.36055049300193787, "learning_rate": 1.7011881654160865e-05, "loss": 0.4823, "step": 18479 }, { "epoch": 0.5074135090609555, "grad_norm": 0.34053465723991394, "learning_rate": 1.70115737178713e-05, "loss": 0.4418, "step": 18480 }, { "epoch": 0.507440966501922, "grad_norm": 0.4014171063899994, "learning_rate": 1.7011265768502912e-05, "loss": 0.6195, "step": 18481 }, { "epoch": 0.5074684239428885, "grad_norm": 0.37327343225479126, "learning_rate": 1.7010957806056278e-05, "loss": 0.5075, "step": 18482 }, { "epoch": 0.507495881383855, "grad_norm": 0.32317957282066345, "learning_rate": 1.701064983053197e-05, "loss": 0.4789, "step": 18483 }, { "epoch": 0.5075233388248215, "grad_norm": 0.35024353861808777, "learning_rate": 1.701034184193056e-05, "loss": 0.5356, "step": 18484 }, { "epoch": 0.507550796265788, "grad_norm": 0.36012688279151917, "learning_rate": 1.701003384025263e-05, "loss": 0.5482, "step": 18485 }, { "epoch": 0.5075782537067546, "grad_norm": 0.38247373700141907, "learning_rate": 1.7009725825498743e-05, "loss": 0.5114, "step": 18486 }, { "epoch": 0.507605711147721, "grad_norm": 0.37171828746795654, "learning_rate": 1.7009417797669487e-05, "loss": 0.5281, "step": 18487 }, { "epoch": 0.5076331685886876, "grad_norm": 0.3859644830226898, "learning_rate": 1.7009109756765426e-05, "loss": 0.475, "step": 18488 }, { "epoch": 0.507660626029654, "grad_norm": 0.4409497082233429, "learning_rate": 1.7008801702787138e-05, "loss": 0.5357, "step": 18489 }, { "epoch": 0.5076880834706206, "grad_norm": 0.4524669349193573, "learning_rate": 1.7008493635735197e-05, "loss": 0.4869, "step": 18490 }, { "epoch": 0.507715540911587, "grad_norm": 0.3758498430252075, "learning_rate": 1.700818555561018e-05, "loss": 0.6059, "step": 18491 }, { "epoch": 0.5077429983525535, "grad_norm": 0.3690436780452728, "learning_rate": 1.7007877462412658e-05, "loss": 0.4565, "step": 18492 }, { "epoch": 0.5077704557935201, "grad_norm": 0.3430847227573395, "learning_rate": 1.700756935614321e-05, "loss": 0.5392, "step": 18493 }, { "epoch": 0.5077979132344865, "grad_norm": 0.48653444647789, "learning_rate": 1.7007261236802408e-05, "loss": 0.5555, "step": 18494 }, { "epoch": 0.5078253706754531, "grad_norm": 0.3584933876991272, "learning_rate": 1.7006953104390827e-05, "loss": 0.449, "step": 18495 }, { "epoch": 0.5078528281164195, "grad_norm": 0.36981719732284546, "learning_rate": 1.7006644958909047e-05, "loss": 0.4526, "step": 18496 }, { "epoch": 0.5078802855573861, "grad_norm": 0.36832261085510254, "learning_rate": 1.7006336800357632e-05, "loss": 0.4799, "step": 18497 }, { "epoch": 0.5079077429983525, "grad_norm": 0.3313303589820862, "learning_rate": 1.7006028628737167e-05, "loss": 0.4966, "step": 18498 }, { "epoch": 0.5079352004393191, "grad_norm": 0.35143229365348816, "learning_rate": 1.7005720444048217e-05, "loss": 0.5179, "step": 18499 }, { "epoch": 0.5079626578802856, "grad_norm": 0.33716607093811035, "learning_rate": 1.7005412246291368e-05, "loss": 0.5125, "step": 18500 }, { "epoch": 0.507990115321252, "grad_norm": 0.4248391091823578, "learning_rate": 1.7005104035467182e-05, "loss": 0.5793, "step": 18501 }, { "epoch": 0.5080175727622186, "grad_norm": 0.34109556674957275, "learning_rate": 1.700479581157625e-05, "loss": 0.4814, "step": 18502 }, { "epoch": 0.508045030203185, "grad_norm": 0.3499755263328552, "learning_rate": 1.700448757461913e-05, "loss": 0.5376, "step": 18503 }, { "epoch": 0.5080724876441516, "grad_norm": 0.3302028775215149, "learning_rate": 1.700417932459641e-05, "loss": 0.3928, "step": 18504 }, { "epoch": 0.508099945085118, "grad_norm": 0.48318666219711304, "learning_rate": 1.7003871061508662e-05, "loss": 0.53, "step": 18505 }, { "epoch": 0.5081274025260846, "grad_norm": 0.3688989579677582, "learning_rate": 1.7003562785356457e-05, "loss": 0.5281, "step": 18506 }, { "epoch": 0.5081548599670511, "grad_norm": 0.35562682151794434, "learning_rate": 1.7003254496140368e-05, "loss": 0.4383, "step": 18507 }, { "epoch": 0.5081823174080176, "grad_norm": 0.3539496958255768, "learning_rate": 1.7002946193860978e-05, "loss": 0.4099, "step": 18508 }, { "epoch": 0.5082097748489841, "grad_norm": 0.3534518778324127, "learning_rate": 1.7002637878518856e-05, "loss": 0.4595, "step": 18509 }, { "epoch": 0.5082372322899505, "grad_norm": 0.3501948118209839, "learning_rate": 1.700232955011458e-05, "loss": 0.503, "step": 18510 }, { "epoch": 0.5082646897309171, "grad_norm": 0.41264107823371887, "learning_rate": 1.700202120864873e-05, "loss": 0.5417, "step": 18511 }, { "epoch": 0.5082921471718835, "grad_norm": 0.3893459737300873, "learning_rate": 1.700171285412187e-05, "loss": 0.5581, "step": 18512 }, { "epoch": 0.5083196046128501, "grad_norm": 0.37234675884246826, "learning_rate": 1.7001404486534584e-05, "loss": 0.5283, "step": 18513 }, { "epoch": 0.5083470620538166, "grad_norm": 0.33495548367500305, "learning_rate": 1.700109610588744e-05, "loss": 0.4895, "step": 18514 }, { "epoch": 0.5083745194947831, "grad_norm": 0.39584073424339294, "learning_rate": 1.700078771218102e-05, "loss": 0.4969, "step": 18515 }, { "epoch": 0.5084019769357496, "grad_norm": 0.32126128673553467, "learning_rate": 1.7000479305415896e-05, "loss": 0.3748, "step": 18516 }, { "epoch": 0.5084294343767161, "grad_norm": 0.3946602940559387, "learning_rate": 1.7000170885592643e-05, "loss": 0.5811, "step": 18517 }, { "epoch": 0.5084568918176826, "grad_norm": 0.37806057929992676, "learning_rate": 1.6999862452711838e-05, "loss": 0.539, "step": 18518 }, { "epoch": 0.508484349258649, "grad_norm": 0.3795132040977478, "learning_rate": 1.6999554006774058e-05, "loss": 0.5238, "step": 18519 }, { "epoch": 0.5085118066996156, "grad_norm": 0.40288108587265015, "learning_rate": 1.699924554777987e-05, "loss": 0.5716, "step": 18520 }, { "epoch": 0.5085392641405821, "grad_norm": 0.3266240060329437, "learning_rate": 1.699893707572986e-05, "loss": 0.4348, "step": 18521 }, { "epoch": 0.5085667215815486, "grad_norm": 0.4246068000793457, "learning_rate": 1.69986285906246e-05, "loss": 0.5689, "step": 18522 }, { "epoch": 0.5085941790225151, "grad_norm": 0.3663918077945709, "learning_rate": 1.699832009246466e-05, "loss": 0.4884, "step": 18523 }, { "epoch": 0.5086216364634816, "grad_norm": 0.4137879014015198, "learning_rate": 1.699801158125062e-05, "loss": 0.5469, "step": 18524 }, { "epoch": 0.5086490939044481, "grad_norm": 0.36332714557647705, "learning_rate": 1.6997703056983054e-05, "loss": 0.5094, "step": 18525 }, { "epoch": 0.5086765513454146, "grad_norm": 0.40160202980041504, "learning_rate": 1.699739451966254e-05, "loss": 0.5555, "step": 18526 }, { "epoch": 0.5087040087863811, "grad_norm": 0.4350310266017914, "learning_rate": 1.699708596928965e-05, "loss": 0.4567, "step": 18527 }, { "epoch": 0.5087314662273477, "grad_norm": 0.46098700165748596, "learning_rate": 1.699677740586496e-05, "loss": 0.46, "step": 18528 }, { "epoch": 0.5087589236683141, "grad_norm": 0.42546144127845764, "learning_rate": 1.6996468829389053e-05, "loss": 0.5681, "step": 18529 }, { "epoch": 0.5087863811092806, "grad_norm": 0.3744295835494995, "learning_rate": 1.6996160239862498e-05, "loss": 0.4578, "step": 18530 }, { "epoch": 0.5088138385502471, "grad_norm": 0.41190043091773987, "learning_rate": 1.699585163728587e-05, "loss": 0.5348, "step": 18531 }, { "epoch": 0.5088412959912136, "grad_norm": 0.34983187913894653, "learning_rate": 1.699554302165974e-05, "loss": 0.4719, "step": 18532 }, { "epoch": 0.5088687534321801, "grad_norm": 0.34879937767982483, "learning_rate": 1.6995234392984692e-05, "loss": 0.5471, "step": 18533 }, { "epoch": 0.5088962108731466, "grad_norm": 0.4051634967327118, "learning_rate": 1.6994925751261308e-05, "loss": 0.5294, "step": 18534 }, { "epoch": 0.5089236683141132, "grad_norm": 0.3325008749961853, "learning_rate": 1.6994617096490146e-05, "loss": 0.4345, "step": 18535 }, { "epoch": 0.5089511257550796, "grad_norm": 0.3721499741077423, "learning_rate": 1.6994308428671793e-05, "loss": 0.5197, "step": 18536 }, { "epoch": 0.5089785831960462, "grad_norm": 11.047492980957031, "learning_rate": 1.6993999747806823e-05, "loss": 0.4796, "step": 18537 }, { "epoch": 0.5090060406370126, "grad_norm": 0.3265029788017273, "learning_rate": 1.699369105389581e-05, "loss": 0.466, "step": 18538 }, { "epoch": 0.5090334980779792, "grad_norm": 0.4406310021877289, "learning_rate": 1.699338234693933e-05, "loss": 0.5537, "step": 18539 }, { "epoch": 0.5090609555189456, "grad_norm": 0.8421952724456787, "learning_rate": 1.6993073626937963e-05, "loss": 0.5562, "step": 18540 }, { "epoch": 0.5090884129599121, "grad_norm": 0.4519234895706177, "learning_rate": 1.6992764893892283e-05, "loss": 0.587, "step": 18541 }, { "epoch": 0.5091158704008787, "grad_norm": 0.6256911158561707, "learning_rate": 1.699245614780286e-05, "loss": 0.4674, "step": 18542 }, { "epoch": 0.5091433278418451, "grad_norm": 0.3460980951786041, "learning_rate": 1.6992147388670278e-05, "loss": 0.5032, "step": 18543 }, { "epoch": 0.5091707852828117, "grad_norm": 0.34802815318107605, "learning_rate": 1.6991838616495105e-05, "loss": 0.4186, "step": 18544 }, { "epoch": 0.5091982427237781, "grad_norm": 0.3901326060295105, "learning_rate": 1.699152983127793e-05, "loss": 0.5498, "step": 18545 }, { "epoch": 0.5092257001647447, "grad_norm": 0.37601596117019653, "learning_rate": 1.699122103301931e-05, "loss": 0.4724, "step": 18546 }, { "epoch": 0.5092531576057111, "grad_norm": 0.3541049361228943, "learning_rate": 1.699091222171984e-05, "loss": 0.4945, "step": 18547 }, { "epoch": 0.5092806150466777, "grad_norm": 0.39047351479530334, "learning_rate": 1.6990603397380084e-05, "loss": 0.4833, "step": 18548 }, { "epoch": 0.5093080724876442, "grad_norm": 0.39342573285102844, "learning_rate": 1.699029456000062e-05, "loss": 0.448, "step": 18549 }, { "epoch": 0.5093355299286106, "grad_norm": 0.3610215485095978, "learning_rate": 1.6989985709582025e-05, "loss": 0.5514, "step": 18550 }, { "epoch": 0.5093629873695772, "grad_norm": 0.36175137758255005, "learning_rate": 1.698967684612488e-05, "loss": 0.4391, "step": 18551 }, { "epoch": 0.5093904448105436, "grad_norm": 0.41140732169151306, "learning_rate": 1.6989367969629755e-05, "loss": 0.5873, "step": 18552 }, { "epoch": 0.5094179022515102, "grad_norm": 0.4560134708881378, "learning_rate": 1.698905908009723e-05, "loss": 0.4758, "step": 18553 }, { "epoch": 0.5094453596924766, "grad_norm": 0.4010449945926666, "learning_rate": 1.6988750177527876e-05, "loss": 0.5922, "step": 18554 }, { "epoch": 0.5094728171334432, "grad_norm": 0.39636436104774475, "learning_rate": 1.6988441261922273e-05, "loss": 0.6019, "step": 18555 }, { "epoch": 0.5095002745744097, "grad_norm": 0.360472172498703, "learning_rate": 1.6988132333280995e-05, "loss": 0.4303, "step": 18556 }, { "epoch": 0.5095277320153762, "grad_norm": 0.6750775575637817, "learning_rate": 1.6987823391604622e-05, "loss": 0.5403, "step": 18557 }, { "epoch": 0.5095551894563427, "grad_norm": 0.5308595895767212, "learning_rate": 1.6987514436893727e-05, "loss": 0.5554, "step": 18558 }, { "epoch": 0.5095826468973091, "grad_norm": 0.3575540781021118, "learning_rate": 1.698720546914889e-05, "loss": 0.5226, "step": 18559 }, { "epoch": 0.5096101043382757, "grad_norm": 0.41215983033180237, "learning_rate": 1.698689648837068e-05, "loss": 0.5227, "step": 18560 }, { "epoch": 0.5096375617792421, "grad_norm": 0.36694127321243286, "learning_rate": 1.6986587494559684e-05, "loss": 0.5915, "step": 18561 }, { "epoch": 0.5096650192202087, "grad_norm": 0.3662472367286682, "learning_rate": 1.698627848771647e-05, "loss": 0.5197, "step": 18562 }, { "epoch": 0.5096924766611752, "grad_norm": 0.3692583441734314, "learning_rate": 1.6985969467841614e-05, "loss": 0.5422, "step": 18563 }, { "epoch": 0.5097199341021417, "grad_norm": 0.38701361417770386, "learning_rate": 1.6985660434935697e-05, "loss": 0.5076, "step": 18564 }, { "epoch": 0.5097473915431082, "grad_norm": 0.4137991964817047, "learning_rate": 1.6985351388999296e-05, "loss": 0.5329, "step": 18565 }, { "epoch": 0.5097748489840747, "grad_norm": 0.38359352946281433, "learning_rate": 1.6985042330032983e-05, "loss": 0.5363, "step": 18566 }, { "epoch": 0.5098023064250412, "grad_norm": 0.43564051389694214, "learning_rate": 1.698473325803734e-05, "loss": 0.4878, "step": 18567 }, { "epoch": 0.5098297638660076, "grad_norm": 0.3857683539390564, "learning_rate": 1.6984424173012936e-05, "loss": 0.5011, "step": 18568 }, { "epoch": 0.5098572213069742, "grad_norm": 0.3547968566417694, "learning_rate": 1.6984115074960352e-05, "loss": 0.4722, "step": 18569 }, { "epoch": 0.5098846787479407, "grad_norm": 0.3517572283744812, "learning_rate": 1.6983805963880165e-05, "loss": 0.399, "step": 18570 }, { "epoch": 0.5099121361889072, "grad_norm": 0.40543392300605774, "learning_rate": 1.6983496839772953e-05, "loss": 0.5177, "step": 18571 }, { "epoch": 0.5099395936298737, "grad_norm": 0.3801492154598236, "learning_rate": 1.6983187702639286e-05, "loss": 0.5783, "step": 18572 }, { "epoch": 0.5099670510708402, "grad_norm": 0.38578012585639954, "learning_rate": 1.698287855247975e-05, "loss": 0.5654, "step": 18573 }, { "epoch": 0.5099945085118067, "grad_norm": 0.3702903389930725, "learning_rate": 1.6982569389294914e-05, "loss": 0.5424, "step": 18574 }, { "epoch": 0.5100219659527732, "grad_norm": 0.34418368339538574, "learning_rate": 1.698226021308536e-05, "loss": 0.5871, "step": 18575 }, { "epoch": 0.5100494233937397, "grad_norm": 0.40446576476097107, "learning_rate": 1.698195102385166e-05, "loss": 0.4726, "step": 18576 }, { "epoch": 0.5100768808347063, "grad_norm": 0.39791324734687805, "learning_rate": 1.6981641821594392e-05, "loss": 0.5136, "step": 18577 }, { "epoch": 0.5101043382756727, "grad_norm": 0.3416958153247833, "learning_rate": 1.6981332606314136e-05, "loss": 0.4682, "step": 18578 }, { "epoch": 0.5101317957166392, "grad_norm": 0.39136114716529846, "learning_rate": 1.6981023378011467e-05, "loss": 0.5478, "step": 18579 }, { "epoch": 0.5101592531576057, "grad_norm": 0.38259679079055786, "learning_rate": 1.698071413668696e-05, "loss": 0.561, "step": 18580 }, { "epoch": 0.5101867105985722, "grad_norm": 0.7645190954208374, "learning_rate": 1.6980404882341192e-05, "loss": 0.4556, "step": 18581 }, { "epoch": 0.5102141680395387, "grad_norm": 0.3744647204875946, "learning_rate": 1.6980095614974742e-05, "loss": 0.5442, "step": 18582 }, { "epoch": 0.5102416254805052, "grad_norm": 0.3610951602458954, "learning_rate": 1.6979786334588187e-05, "loss": 0.4813, "step": 18583 }, { "epoch": 0.5102690829214718, "grad_norm": 0.3937985599040985, "learning_rate": 1.69794770411821e-05, "loss": 0.5403, "step": 18584 }, { "epoch": 0.5102965403624382, "grad_norm": 0.40564388036727905, "learning_rate": 1.6979167734757065e-05, "loss": 0.5783, "step": 18585 }, { "epoch": 0.5103239978034048, "grad_norm": 0.4010675549507141, "learning_rate": 1.697885841531365e-05, "loss": 0.496, "step": 18586 }, { "epoch": 0.5103514552443712, "grad_norm": 0.34909769892692566, "learning_rate": 1.697854908285244e-05, "loss": 0.4958, "step": 18587 }, { "epoch": 0.5103789126853377, "grad_norm": 0.37876346707344055, "learning_rate": 1.6978239737374008e-05, "loss": 0.4921, "step": 18588 }, { "epoch": 0.5104063701263042, "grad_norm": 0.4419255256652832, "learning_rate": 1.6977930378878933e-05, "loss": 0.5654, "step": 18589 }, { "epoch": 0.5104338275672707, "grad_norm": 0.39342525601387024, "learning_rate": 1.6977621007367786e-05, "loss": 0.5281, "step": 18590 }, { "epoch": 0.5104612850082373, "grad_norm": 0.5626476407051086, "learning_rate": 1.6977311622841152e-05, "loss": 0.57, "step": 18591 }, { "epoch": 0.5104887424492037, "grad_norm": 0.3687600791454315, "learning_rate": 1.6977002225299606e-05, "loss": 0.4953, "step": 18592 }, { "epoch": 0.5105161998901703, "grad_norm": 0.42727363109588623, "learning_rate": 1.6976692814743724e-05, "loss": 0.4888, "step": 18593 }, { "epoch": 0.5105436573311367, "grad_norm": 0.3891254961490631, "learning_rate": 1.697638339117408e-05, "loss": 0.5175, "step": 18594 }, { "epoch": 0.5105711147721033, "grad_norm": 0.3617144525051117, "learning_rate": 1.6976073954591255e-05, "loss": 0.5267, "step": 18595 }, { "epoch": 0.5105985722130697, "grad_norm": 0.6117919683456421, "learning_rate": 1.697576450499583e-05, "loss": 0.5822, "step": 18596 }, { "epoch": 0.5106260296540363, "grad_norm": 1.0328830480575562, "learning_rate": 1.6975455042388372e-05, "loss": 0.6459, "step": 18597 }, { "epoch": 0.5106534870950028, "grad_norm": 0.40537548065185547, "learning_rate": 1.697514556676947e-05, "loss": 0.4756, "step": 18598 }, { "epoch": 0.5106809445359692, "grad_norm": 0.3591884970664978, "learning_rate": 1.697483607813969e-05, "loss": 0.5038, "step": 18599 }, { "epoch": 0.5107084019769358, "grad_norm": 0.3887330889701843, "learning_rate": 1.697452657649962e-05, "loss": 0.404, "step": 18600 }, { "epoch": 0.5107358594179022, "grad_norm": 0.3723224699497223, "learning_rate": 1.6974217061849828e-05, "loss": 0.515, "step": 18601 }, { "epoch": 0.5107633168588688, "grad_norm": 0.32704028487205505, "learning_rate": 1.6973907534190896e-05, "loss": 0.5061, "step": 18602 }, { "epoch": 0.5107907742998352, "grad_norm": 0.3994567394256592, "learning_rate": 1.69735979935234e-05, "loss": 0.4985, "step": 18603 }, { "epoch": 0.5108182317408018, "grad_norm": 0.3492930233478546, "learning_rate": 1.697328843984792e-05, "loss": 0.5188, "step": 18604 }, { "epoch": 0.5108456891817683, "grad_norm": 0.35215985774993896, "learning_rate": 1.697297887316503e-05, "loss": 0.5272, "step": 18605 }, { "epoch": 0.5108731466227348, "grad_norm": 0.40904736518859863, "learning_rate": 1.6972669293475306e-05, "loss": 0.5188, "step": 18606 }, { "epoch": 0.5109006040637013, "grad_norm": 0.3817182779312134, "learning_rate": 1.6972359700779334e-05, "loss": 0.4998, "step": 18607 }, { "epoch": 0.5109280615046677, "grad_norm": 0.38020649552345276, "learning_rate": 1.6972050095077683e-05, "loss": 0.5159, "step": 18608 }, { "epoch": 0.5109555189456343, "grad_norm": 0.3936541676521301, "learning_rate": 1.697174047637093e-05, "loss": 0.5203, "step": 18609 }, { "epoch": 0.5109829763866007, "grad_norm": 0.3356493413448334, "learning_rate": 1.6971430844659663e-05, "loss": 0.4879, "step": 18610 }, { "epoch": 0.5110104338275673, "grad_norm": 0.41974425315856934, "learning_rate": 1.697112119994445e-05, "loss": 0.4914, "step": 18611 }, { "epoch": 0.5110378912685337, "grad_norm": 0.47035714983940125, "learning_rate": 1.6970811542225868e-05, "loss": 0.6621, "step": 18612 }, { "epoch": 0.5110653487095003, "grad_norm": 0.5819621086120605, "learning_rate": 1.69705018715045e-05, "loss": 0.5546, "step": 18613 }, { "epoch": 0.5110928061504668, "grad_norm": 0.388677716255188, "learning_rate": 1.697019218778092e-05, "loss": 0.6283, "step": 18614 }, { "epoch": 0.5111202635914333, "grad_norm": 0.3554789125919342, "learning_rate": 1.696988249105571e-05, "loss": 0.5392, "step": 18615 }, { "epoch": 0.5111477210323998, "grad_norm": 0.3696957528591156, "learning_rate": 1.6969572781329443e-05, "loss": 0.4747, "step": 18616 }, { "epoch": 0.5111751784733662, "grad_norm": 0.4348759949207306, "learning_rate": 1.6969263058602695e-05, "loss": 0.5746, "step": 18617 }, { "epoch": 0.5112026359143328, "grad_norm": 0.36525896191596985, "learning_rate": 1.6968953322876052e-05, "loss": 0.4612, "step": 18618 }, { "epoch": 0.5112300933552992, "grad_norm": 0.38608282804489136, "learning_rate": 1.6968643574150082e-05, "loss": 0.5005, "step": 18619 }, { "epoch": 0.5112575507962658, "grad_norm": 0.42632946372032166, "learning_rate": 1.696833381242537e-05, "loss": 0.3586, "step": 18620 }, { "epoch": 0.5112850082372323, "grad_norm": 0.45274803042411804, "learning_rate": 1.6968024037702493e-05, "loss": 0.4786, "step": 18621 }, { "epoch": 0.5113124656781988, "grad_norm": 0.3850913345813751, "learning_rate": 1.6967714249982027e-05, "loss": 0.4923, "step": 18622 }, { "epoch": 0.5113399231191653, "grad_norm": 0.3569629192352295, "learning_rate": 1.6967404449264548e-05, "loss": 0.4786, "step": 18623 }, { "epoch": 0.5113673805601318, "grad_norm": 0.40551385283470154, "learning_rate": 1.6967094635550636e-05, "loss": 0.5194, "step": 18624 }, { "epoch": 0.5113948380010983, "grad_norm": 0.3647242486476898, "learning_rate": 1.6966784808840873e-05, "loss": 0.5596, "step": 18625 }, { "epoch": 0.5114222954420647, "grad_norm": 0.3421878218650818, "learning_rate": 1.6966474969135826e-05, "loss": 0.4712, "step": 18626 }, { "epoch": 0.5114497528830313, "grad_norm": 0.40208321809768677, "learning_rate": 1.6966165116436088e-05, "loss": 0.5151, "step": 18627 }, { "epoch": 0.5114772103239978, "grad_norm": 0.3541579246520996, "learning_rate": 1.6965855250742223e-05, "loss": 0.4928, "step": 18628 }, { "epoch": 0.5115046677649643, "grad_norm": 0.37649309635162354, "learning_rate": 1.6965545372054816e-05, "loss": 0.4658, "step": 18629 }, { "epoch": 0.5115321252059308, "grad_norm": 0.39454054832458496, "learning_rate": 1.696523548037444e-05, "loss": 0.526, "step": 18630 }, { "epoch": 0.5115595826468973, "grad_norm": 0.3725613057613373, "learning_rate": 1.696492557570168e-05, "loss": 0.5371, "step": 18631 }, { "epoch": 0.5115870400878638, "grad_norm": 0.4415969252586365, "learning_rate": 1.6964615658037113e-05, "loss": 0.545, "step": 18632 }, { "epoch": 0.5116144975288303, "grad_norm": 0.33665210008621216, "learning_rate": 1.696430572738131e-05, "loss": 0.4665, "step": 18633 }, { "epoch": 0.5116419549697968, "grad_norm": 0.3869999051094055, "learning_rate": 1.6963995783734858e-05, "loss": 0.5197, "step": 18634 }, { "epoch": 0.5116694124107634, "grad_norm": 0.46202272176742554, "learning_rate": 1.6963685827098332e-05, "loss": 0.5361, "step": 18635 }, { "epoch": 0.5116968698517298, "grad_norm": 0.3704226613044739, "learning_rate": 1.6963375857472306e-05, "loss": 0.4737, "step": 18636 }, { "epoch": 0.5117243272926963, "grad_norm": 0.39509662985801697, "learning_rate": 1.6963065874857364e-05, "loss": 0.5215, "step": 18637 }, { "epoch": 0.5117517847336628, "grad_norm": 0.3687683343887329, "learning_rate": 1.6962755879254084e-05, "loss": 0.5164, "step": 18638 }, { "epoch": 0.5117792421746293, "grad_norm": 0.45334988832473755, "learning_rate": 1.6962445870663037e-05, "loss": 0.5489, "step": 18639 }, { "epoch": 0.5118066996155958, "grad_norm": 0.3462483584880829, "learning_rate": 1.6962135849084807e-05, "loss": 0.464, "step": 18640 }, { "epoch": 0.5118341570565623, "grad_norm": 0.39044636487960815, "learning_rate": 1.6961825814519976e-05, "loss": 0.4817, "step": 18641 }, { "epoch": 0.5118616144975289, "grad_norm": 0.43549829721450806, "learning_rate": 1.696151576696911e-05, "loss": 0.5207, "step": 18642 }, { "epoch": 0.5118890719384953, "grad_norm": 0.4025256335735321, "learning_rate": 1.6961205706432803e-05, "loss": 0.4985, "step": 18643 }, { "epoch": 0.5119165293794619, "grad_norm": 0.3609309196472168, "learning_rate": 1.6960895632911626e-05, "loss": 0.5128, "step": 18644 }, { "epoch": 0.5119439868204283, "grad_norm": 0.35793304443359375, "learning_rate": 1.6960585546406152e-05, "loss": 0.5142, "step": 18645 }, { "epoch": 0.5119714442613948, "grad_norm": 0.3976730704307556, "learning_rate": 1.6960275446916968e-05, "loss": 0.4744, "step": 18646 }, { "epoch": 0.5119989017023613, "grad_norm": 0.3531462848186493, "learning_rate": 1.6959965334444645e-05, "loss": 0.5081, "step": 18647 }, { "epoch": 0.5120263591433278, "grad_norm": 0.40081000328063965, "learning_rate": 1.6959655208989767e-05, "loss": 0.5454, "step": 18648 }, { "epoch": 0.5120538165842944, "grad_norm": 0.4007335901260376, "learning_rate": 1.695934507055291e-05, "loss": 0.4822, "step": 18649 }, { "epoch": 0.5120812740252608, "grad_norm": 0.4033169746398926, "learning_rate": 1.6959034919134656e-05, "loss": 0.4781, "step": 18650 }, { "epoch": 0.5121087314662274, "grad_norm": 0.45100030303001404, "learning_rate": 1.6958724754735582e-05, "loss": 0.6013, "step": 18651 }, { "epoch": 0.5121361889071938, "grad_norm": 0.31972330808639526, "learning_rate": 1.695841457735626e-05, "loss": 0.3841, "step": 18652 }, { "epoch": 0.5121636463481604, "grad_norm": 0.4122544229030609, "learning_rate": 1.6958104386997275e-05, "loss": 0.5663, "step": 18653 }, { "epoch": 0.5121911037891268, "grad_norm": 0.38360199332237244, "learning_rate": 1.6957794183659205e-05, "loss": 0.4299, "step": 18654 }, { "epoch": 0.5122185612300933, "grad_norm": 0.3529968857765198, "learning_rate": 1.695748396734263e-05, "loss": 0.5183, "step": 18655 }, { "epoch": 0.5122460186710599, "grad_norm": 0.33444714546203613, "learning_rate": 1.695717373804813e-05, "loss": 0.4281, "step": 18656 }, { "epoch": 0.5122734761120263, "grad_norm": 0.740058183670044, "learning_rate": 1.6956863495776274e-05, "loss": 0.5563, "step": 18657 }, { "epoch": 0.5123009335529929, "grad_norm": 0.3847612738609314, "learning_rate": 1.6956553240527648e-05, "loss": 0.5064, "step": 18658 }, { "epoch": 0.5123283909939593, "grad_norm": 0.36713463068008423, "learning_rate": 1.6956242972302832e-05, "loss": 0.4453, "step": 18659 }, { "epoch": 0.5123558484349259, "grad_norm": 0.4261859059333801, "learning_rate": 1.69559326911024e-05, "loss": 0.579, "step": 18660 }, { "epoch": 0.5123833058758923, "grad_norm": 0.3419932723045349, "learning_rate": 1.695562239692694e-05, "loss": 0.5424, "step": 18661 }, { "epoch": 0.5124107633168589, "grad_norm": 0.4075855016708374, "learning_rate": 1.6955312089777018e-05, "loss": 0.5153, "step": 18662 }, { "epoch": 0.5124382207578254, "grad_norm": 0.3754998445510864, "learning_rate": 1.695500176965322e-05, "loss": 0.4877, "step": 18663 }, { "epoch": 0.5124656781987919, "grad_norm": 0.39077243208885193, "learning_rate": 1.6954691436556127e-05, "loss": 0.5372, "step": 18664 }, { "epoch": 0.5124931356397584, "grad_norm": 0.37289750576019287, "learning_rate": 1.6954381090486313e-05, "loss": 0.4619, "step": 18665 }, { "epoch": 0.5125205930807248, "grad_norm": 0.3652800917625427, "learning_rate": 1.695407073144436e-05, "loss": 0.5017, "step": 18666 }, { "epoch": 0.5125480505216914, "grad_norm": 0.3635663390159607, "learning_rate": 1.6953760359430843e-05, "loss": 0.5171, "step": 18667 }, { "epoch": 0.5125755079626578, "grad_norm": 0.3609192669391632, "learning_rate": 1.6953449974446347e-05, "loss": 0.4721, "step": 18668 }, { "epoch": 0.5126029654036244, "grad_norm": 0.3625755310058594, "learning_rate": 1.6953139576491443e-05, "loss": 0.5494, "step": 18669 }, { "epoch": 0.5126304228445909, "grad_norm": 0.36530402302742004, "learning_rate": 1.6952829165566715e-05, "loss": 0.5441, "step": 18670 }, { "epoch": 0.5126578802855574, "grad_norm": 0.3785923421382904, "learning_rate": 1.6952518741672745e-05, "loss": 0.604, "step": 18671 }, { "epoch": 0.5126853377265239, "grad_norm": 0.4319320023059845, "learning_rate": 1.6952208304810107e-05, "loss": 0.5503, "step": 18672 }, { "epoch": 0.5127127951674904, "grad_norm": 0.3328680396080017, "learning_rate": 1.695189785497938e-05, "loss": 0.503, "step": 18673 }, { "epoch": 0.5127402526084569, "grad_norm": 0.3545232117176056, "learning_rate": 1.6951587392181147e-05, "loss": 0.5577, "step": 18674 }, { "epoch": 0.5127677100494233, "grad_norm": 0.4864352345466614, "learning_rate": 1.6951276916415983e-05, "loss": 0.4282, "step": 18675 }, { "epoch": 0.5127951674903899, "grad_norm": 0.3901040554046631, "learning_rate": 1.695096642768447e-05, "loss": 0.617, "step": 18676 }, { "epoch": 0.5128226249313564, "grad_norm": 0.3679579794406891, "learning_rate": 1.6950655925987186e-05, "loss": 0.5204, "step": 18677 }, { "epoch": 0.5128500823723229, "grad_norm": 0.36346349120140076, "learning_rate": 1.695034541132471e-05, "loss": 0.5315, "step": 18678 }, { "epoch": 0.5128775398132894, "grad_norm": 0.39569810032844543, "learning_rate": 1.6950034883697624e-05, "loss": 0.5607, "step": 18679 }, { "epoch": 0.5129049972542559, "grad_norm": 0.38267233967781067, "learning_rate": 1.69497243431065e-05, "loss": 0.4949, "step": 18680 }, { "epoch": 0.5129324546952224, "grad_norm": 0.44071489572525024, "learning_rate": 1.6949413789551926e-05, "loss": 0.5923, "step": 18681 }, { "epoch": 0.5129599121361889, "grad_norm": 0.42524248361587524, "learning_rate": 1.6949103223034476e-05, "loss": 0.4822, "step": 18682 }, { "epoch": 0.5129873695771554, "grad_norm": 0.4341190755367279, "learning_rate": 1.694879264355473e-05, "loss": 0.5332, "step": 18683 }, { "epoch": 0.513014827018122, "grad_norm": 0.4226025640964508, "learning_rate": 1.6948482051113268e-05, "loss": 0.5348, "step": 18684 }, { "epoch": 0.5130422844590884, "grad_norm": 0.42955827713012695, "learning_rate": 1.694817144571067e-05, "loss": 0.5475, "step": 18685 }, { "epoch": 0.5130697419000549, "grad_norm": 0.347808837890625, "learning_rate": 1.6947860827347513e-05, "loss": 0.4666, "step": 18686 }, { "epoch": 0.5130971993410214, "grad_norm": 0.4013981223106384, "learning_rate": 1.6947550196024383e-05, "loss": 0.5247, "step": 18687 }, { "epoch": 0.5131246567819879, "grad_norm": 0.36478009819984436, "learning_rate": 1.694723955174185e-05, "loss": 0.5127, "step": 18688 }, { "epoch": 0.5131521142229544, "grad_norm": 0.4477667808532715, "learning_rate": 1.6946928894500496e-05, "loss": 0.4817, "step": 18689 }, { "epoch": 0.5131795716639209, "grad_norm": 0.4636624753475189, "learning_rate": 1.6946618224300908e-05, "loss": 0.4902, "step": 18690 }, { "epoch": 0.5132070291048875, "grad_norm": 0.3757869601249695, "learning_rate": 1.6946307541143654e-05, "loss": 0.5616, "step": 18691 }, { "epoch": 0.5132344865458539, "grad_norm": 0.3931494355201721, "learning_rate": 1.6945996845029324e-05, "loss": 0.6243, "step": 18692 }, { "epoch": 0.5132619439868205, "grad_norm": 0.40652647614479065, "learning_rate": 1.6945686135958493e-05, "loss": 0.5298, "step": 18693 }, { "epoch": 0.5132894014277869, "grad_norm": 0.33743950724601746, "learning_rate": 1.694537541393174e-05, "loss": 0.5316, "step": 18694 }, { "epoch": 0.5133168588687534, "grad_norm": 0.3642979860305786, "learning_rate": 1.6945064678949643e-05, "loss": 0.5043, "step": 18695 }, { "epoch": 0.5133443163097199, "grad_norm": 0.33725517988204956, "learning_rate": 1.6944753931012787e-05, "loss": 0.4582, "step": 18696 }, { "epoch": 0.5133717737506864, "grad_norm": 0.35771727561950684, "learning_rate": 1.6944443170121744e-05, "loss": 0.4816, "step": 18697 }, { "epoch": 0.513399231191653, "grad_norm": 0.34944266080856323, "learning_rate": 1.6944132396277097e-05, "loss": 0.4948, "step": 18698 }, { "epoch": 0.5134266886326194, "grad_norm": 0.41065287590026855, "learning_rate": 1.6943821609479433e-05, "loss": 0.6726, "step": 18699 }, { "epoch": 0.513454146073586, "grad_norm": 0.3977707326412201, "learning_rate": 1.6943510809729324e-05, "loss": 0.4571, "step": 18700 }, { "epoch": 0.5134816035145524, "grad_norm": 0.39695772528648376, "learning_rate": 1.694319999702735e-05, "loss": 0.4825, "step": 18701 }, { "epoch": 0.513509060955519, "grad_norm": 0.37188735604286194, "learning_rate": 1.694288917137409e-05, "loss": 0.5005, "step": 18702 }, { "epoch": 0.5135365183964854, "grad_norm": 0.3639078736305237, "learning_rate": 1.6942578332770127e-05, "loss": 0.5598, "step": 18703 }, { "epoch": 0.513563975837452, "grad_norm": 0.37298837304115295, "learning_rate": 1.694226748121604e-05, "loss": 0.4974, "step": 18704 }, { "epoch": 0.5135914332784185, "grad_norm": 0.37391629815101624, "learning_rate": 1.694195661671241e-05, "loss": 0.4718, "step": 18705 }, { "epoch": 0.5136188907193849, "grad_norm": 0.3683781027793884, "learning_rate": 1.6941645739259813e-05, "loss": 0.5232, "step": 18706 }, { "epoch": 0.5136463481603515, "grad_norm": 0.3985866606235504, "learning_rate": 1.6941334848858835e-05, "loss": 0.6029, "step": 18707 }, { "epoch": 0.5136738056013179, "grad_norm": 0.33321788907051086, "learning_rate": 1.694102394551005e-05, "loss": 0.4268, "step": 18708 }, { "epoch": 0.5137012630422845, "grad_norm": 0.3857799470424652, "learning_rate": 1.694071302921404e-05, "loss": 0.4973, "step": 18709 }, { "epoch": 0.5137287204832509, "grad_norm": 0.41173428297042847, "learning_rate": 1.6940402099971382e-05, "loss": 0.4399, "step": 18710 }, { "epoch": 0.5137561779242175, "grad_norm": 0.36251530051231384, "learning_rate": 1.694009115778266e-05, "loss": 0.4312, "step": 18711 }, { "epoch": 0.513783635365184, "grad_norm": 0.46105244755744934, "learning_rate": 1.6939780202648454e-05, "loss": 0.5213, "step": 18712 }, { "epoch": 0.5138110928061504, "grad_norm": 0.39996084570884705, "learning_rate": 1.6939469234569347e-05, "loss": 0.5372, "step": 18713 }, { "epoch": 0.513838550247117, "grad_norm": 0.34587258100509644, "learning_rate": 1.6939158253545906e-05, "loss": 0.4491, "step": 18714 }, { "epoch": 0.5138660076880834, "grad_norm": 0.4025329649448395, "learning_rate": 1.693884725957873e-05, "loss": 0.5304, "step": 18715 }, { "epoch": 0.51389346512905, "grad_norm": 0.39447852969169617, "learning_rate": 1.6938536252668382e-05, "loss": 0.4364, "step": 18716 }, { "epoch": 0.5139209225700164, "grad_norm": 0.3497292697429657, "learning_rate": 1.6938225232815454e-05, "loss": 0.5211, "step": 18717 }, { "epoch": 0.513948380010983, "grad_norm": 0.38576507568359375, "learning_rate": 1.693791420002052e-05, "loss": 0.5468, "step": 18718 }, { "epoch": 0.5139758374519495, "grad_norm": 0.5414435863494873, "learning_rate": 1.6937603154284158e-05, "loss": 0.4854, "step": 18719 }, { "epoch": 0.514003294892916, "grad_norm": 0.36633604764938354, "learning_rate": 1.6937292095606956e-05, "loss": 0.5951, "step": 18720 }, { "epoch": 0.5140307523338825, "grad_norm": 0.37443187832832336, "learning_rate": 1.693698102398949e-05, "loss": 0.4905, "step": 18721 }, { "epoch": 0.514058209774849, "grad_norm": 0.3995816111564636, "learning_rate": 1.693666993943234e-05, "loss": 0.4953, "step": 18722 }, { "epoch": 0.5140856672158155, "grad_norm": 0.370633989572525, "learning_rate": 1.6936358841936087e-05, "loss": 0.5643, "step": 18723 }, { "epoch": 0.5141131246567819, "grad_norm": 0.3958241641521454, "learning_rate": 1.693604773150131e-05, "loss": 0.5241, "step": 18724 }, { "epoch": 0.5141405820977485, "grad_norm": 0.39405784010887146, "learning_rate": 1.693573660812859e-05, "loss": 0.4746, "step": 18725 }, { "epoch": 0.514168039538715, "grad_norm": 0.3756093382835388, "learning_rate": 1.693542547181851e-05, "loss": 0.5182, "step": 18726 }, { "epoch": 0.5141954969796815, "grad_norm": 0.3983602523803711, "learning_rate": 1.6935114322571646e-05, "loss": 0.5274, "step": 18727 }, { "epoch": 0.514222954420648, "grad_norm": 0.47980302572250366, "learning_rate": 1.6934803160388576e-05, "loss": 0.514, "step": 18728 }, { "epoch": 0.5142504118616145, "grad_norm": 0.3720860779285431, "learning_rate": 1.693449198526989e-05, "loss": 0.5201, "step": 18729 }, { "epoch": 0.514277869302581, "grad_norm": 0.3469696640968323, "learning_rate": 1.6934180797216165e-05, "loss": 0.4985, "step": 18730 }, { "epoch": 0.5143053267435475, "grad_norm": 0.35542330145835876, "learning_rate": 1.6933869596227978e-05, "loss": 0.5535, "step": 18731 }, { "epoch": 0.514332784184514, "grad_norm": 0.3780272603034973, "learning_rate": 1.6933558382305907e-05, "loss": 0.5508, "step": 18732 }, { "epoch": 0.5143602416254806, "grad_norm": 0.377352237701416, "learning_rate": 1.693324715545054e-05, "loss": 0.472, "step": 18733 }, { "epoch": 0.514387699066447, "grad_norm": 0.3795408010482788, "learning_rate": 1.6932935915662458e-05, "loss": 0.591, "step": 18734 }, { "epoch": 0.5144151565074135, "grad_norm": 0.3489115238189697, "learning_rate": 1.6932624662942232e-05, "loss": 0.4992, "step": 18735 }, { "epoch": 0.51444261394838, "grad_norm": 0.30747392773628235, "learning_rate": 1.693231339729045e-05, "loss": 0.404, "step": 18736 }, { "epoch": 0.5144700713893465, "grad_norm": 0.4092087149620056, "learning_rate": 1.6932002118707692e-05, "loss": 0.4681, "step": 18737 }, { "epoch": 0.514497528830313, "grad_norm": 0.36396750807762146, "learning_rate": 1.6931690827194533e-05, "loss": 0.5207, "step": 18738 }, { "epoch": 0.5145249862712795, "grad_norm": 0.41024070978164673, "learning_rate": 1.693137952275156e-05, "loss": 0.5597, "step": 18739 }, { "epoch": 0.5145524437122461, "grad_norm": 0.4256361126899719, "learning_rate": 1.6931068205379357e-05, "loss": 0.5328, "step": 18740 }, { "epoch": 0.5145799011532125, "grad_norm": 0.4832627773284912, "learning_rate": 1.6930756875078496e-05, "loss": 0.6346, "step": 18741 }, { "epoch": 0.514607358594179, "grad_norm": 0.3839092254638672, "learning_rate": 1.693044553184956e-05, "loss": 0.4724, "step": 18742 }, { "epoch": 0.5146348160351455, "grad_norm": 0.39744871854782104, "learning_rate": 1.6930134175693135e-05, "loss": 0.4635, "step": 18743 }, { "epoch": 0.514662273476112, "grad_norm": 0.4211215376853943, "learning_rate": 1.69298228066098e-05, "loss": 0.4833, "step": 18744 }, { "epoch": 0.5146897309170785, "grad_norm": 1.0832326412200928, "learning_rate": 1.6929511424600128e-05, "loss": 0.4974, "step": 18745 }, { "epoch": 0.514717188358045, "grad_norm": 0.3572729825973511, "learning_rate": 1.6929200029664707e-05, "loss": 0.4772, "step": 18746 }, { "epoch": 0.5147446457990116, "grad_norm": 0.3841545879840851, "learning_rate": 1.692888862180412e-05, "loss": 0.5167, "step": 18747 }, { "epoch": 0.514772103239978, "grad_norm": 0.347608357667923, "learning_rate": 1.692857720101894e-05, "loss": 0.4893, "step": 18748 }, { "epoch": 0.5147995606809446, "grad_norm": 0.4141027331352234, "learning_rate": 1.692826576730975e-05, "loss": 0.4606, "step": 18749 }, { "epoch": 0.514827018121911, "grad_norm": 0.37261366844177246, "learning_rate": 1.692795432067714e-05, "loss": 0.5286, "step": 18750 }, { "epoch": 0.5148544755628776, "grad_norm": 0.38296252489089966, "learning_rate": 1.692764286112168e-05, "loss": 0.4677, "step": 18751 }, { "epoch": 0.514881933003844, "grad_norm": 0.5819352865219116, "learning_rate": 1.6927331388643956e-05, "loss": 0.4296, "step": 18752 }, { "epoch": 0.5149093904448105, "grad_norm": 0.3692910969257355, "learning_rate": 1.6927019903244545e-05, "loss": 0.5019, "step": 18753 }, { "epoch": 0.5149368478857771, "grad_norm": 0.37307360768318176, "learning_rate": 1.6926708404924036e-05, "loss": 0.4965, "step": 18754 }, { "epoch": 0.5149643053267435, "grad_norm": 0.3535226881504059, "learning_rate": 1.6926396893683002e-05, "loss": 0.4561, "step": 18755 }, { "epoch": 0.5149917627677101, "grad_norm": 0.4212149381637573, "learning_rate": 1.6926085369522026e-05, "loss": 0.5733, "step": 18756 }, { "epoch": 0.5150192202086765, "grad_norm": 0.3680340051651001, "learning_rate": 1.6925773832441693e-05, "loss": 0.5809, "step": 18757 }, { "epoch": 0.5150466776496431, "grad_norm": 0.36845651268959045, "learning_rate": 1.692546228244258e-05, "loss": 0.5125, "step": 18758 }, { "epoch": 0.5150741350906095, "grad_norm": 0.35343456268310547, "learning_rate": 1.692515071952527e-05, "loss": 0.4875, "step": 18759 }, { "epoch": 0.5151015925315761, "grad_norm": 0.3867940604686737, "learning_rate": 1.692483914369034e-05, "loss": 0.5542, "step": 18760 }, { "epoch": 0.5151290499725426, "grad_norm": 0.35180389881134033, "learning_rate": 1.6924527554938383e-05, "loss": 0.4377, "step": 18761 }, { "epoch": 0.515156507413509, "grad_norm": 0.40311694145202637, "learning_rate": 1.6924215953269968e-05, "loss": 0.5199, "step": 18762 }, { "epoch": 0.5151839648544756, "grad_norm": 0.4167497456073761, "learning_rate": 1.6923904338685677e-05, "loss": 0.4961, "step": 18763 }, { "epoch": 0.515211422295442, "grad_norm": 0.3496154844760895, "learning_rate": 1.6923592711186098e-05, "loss": 0.5143, "step": 18764 }, { "epoch": 0.5152388797364086, "grad_norm": 0.3706246018409729, "learning_rate": 1.6923281070771808e-05, "loss": 0.5876, "step": 18765 }, { "epoch": 0.515266337177375, "grad_norm": 0.839541494846344, "learning_rate": 1.692296941744339e-05, "loss": 0.4229, "step": 18766 }, { "epoch": 0.5152937946183416, "grad_norm": 0.4071706235408783, "learning_rate": 1.6922657751201424e-05, "loss": 0.5691, "step": 18767 }, { "epoch": 0.5153212520593081, "grad_norm": 0.368060439825058, "learning_rate": 1.692234607204649e-05, "loss": 0.4829, "step": 18768 }, { "epoch": 0.5153487095002746, "grad_norm": 0.44660070538520813, "learning_rate": 1.6922034379979175e-05, "loss": 0.5723, "step": 18769 }, { "epoch": 0.5153761669412411, "grad_norm": 0.3612874448299408, "learning_rate": 1.6921722675000052e-05, "loss": 0.5339, "step": 18770 }, { "epoch": 0.5154036243822075, "grad_norm": 0.3635653257369995, "learning_rate": 1.692141095710971e-05, "loss": 0.5775, "step": 18771 }, { "epoch": 0.5154310818231741, "grad_norm": 0.397318571805954, "learning_rate": 1.6921099226308725e-05, "loss": 0.5178, "step": 18772 }, { "epoch": 0.5154585392641405, "grad_norm": 0.5644494891166687, "learning_rate": 1.6920787482597686e-05, "loss": 0.4837, "step": 18773 }, { "epoch": 0.5154859967051071, "grad_norm": 0.3683586120605469, "learning_rate": 1.6920475725977167e-05, "loss": 0.5553, "step": 18774 }, { "epoch": 0.5155134541460736, "grad_norm": 0.30593615770339966, "learning_rate": 1.692016395644775e-05, "loss": 0.4244, "step": 18775 }, { "epoch": 0.5155409115870401, "grad_norm": 0.3539428412914276, "learning_rate": 1.691985217401002e-05, "loss": 0.4183, "step": 18776 }, { "epoch": 0.5155683690280066, "grad_norm": 0.4569878876209259, "learning_rate": 1.6919540378664557e-05, "loss": 0.6154, "step": 18777 }, { "epoch": 0.5155958264689731, "grad_norm": 0.4301442503929138, "learning_rate": 1.691922857041194e-05, "loss": 0.4793, "step": 18778 }, { "epoch": 0.5156232839099396, "grad_norm": 0.39175912737846375, "learning_rate": 1.6918916749252757e-05, "loss": 0.4643, "step": 18779 }, { "epoch": 0.515650741350906, "grad_norm": 0.387448787689209, "learning_rate": 1.6918604915187585e-05, "loss": 0.4342, "step": 18780 }, { "epoch": 0.5156781987918726, "grad_norm": 0.37361636757850647, "learning_rate": 1.691829306821701e-05, "loss": 0.4601, "step": 18781 }, { "epoch": 0.5157056562328391, "grad_norm": 0.44291234016418457, "learning_rate": 1.6917981208341605e-05, "loss": 0.5441, "step": 18782 }, { "epoch": 0.5157331136738056, "grad_norm": 0.3519397974014282, "learning_rate": 1.691766933556196e-05, "loss": 0.4934, "step": 18783 }, { "epoch": 0.5157605711147721, "grad_norm": 0.3506133258342743, "learning_rate": 1.6917357449878654e-05, "loss": 0.4645, "step": 18784 }, { "epoch": 0.5157880285557386, "grad_norm": 0.3506285846233368, "learning_rate": 1.6917045551292265e-05, "loss": 0.4973, "step": 18785 }, { "epoch": 0.5158154859967051, "grad_norm": 0.37170928716659546, "learning_rate": 1.6916733639803383e-05, "loss": 0.529, "step": 18786 }, { "epoch": 0.5158429434376716, "grad_norm": 0.4235914647579193, "learning_rate": 1.6916421715412583e-05, "loss": 0.5689, "step": 18787 }, { "epoch": 0.5158704008786381, "grad_norm": 0.3889610767364502, "learning_rate": 1.691610977812045e-05, "loss": 0.508, "step": 18788 }, { "epoch": 0.5158978583196047, "grad_norm": 0.3707149028778076, "learning_rate": 1.6915797827927563e-05, "loss": 0.4782, "step": 18789 }, { "epoch": 0.5159253157605711, "grad_norm": 0.3864535689353943, "learning_rate": 1.6915485864834507e-05, "loss": 0.5469, "step": 18790 }, { "epoch": 0.5159527732015377, "grad_norm": 0.4112805128097534, "learning_rate": 1.6915173888841863e-05, "loss": 0.5741, "step": 18791 }, { "epoch": 0.5159802306425041, "grad_norm": 0.4148314595222473, "learning_rate": 1.6914861899950212e-05, "loss": 0.5119, "step": 18792 }, { "epoch": 0.5160076880834706, "grad_norm": 0.371114581823349, "learning_rate": 1.6914549898160137e-05, "loss": 0.5253, "step": 18793 }, { "epoch": 0.5160351455244371, "grad_norm": 0.36905744671821594, "learning_rate": 1.691423788347222e-05, "loss": 0.5451, "step": 18794 }, { "epoch": 0.5160626029654036, "grad_norm": 0.41001689434051514, "learning_rate": 1.6913925855887043e-05, "loss": 0.5793, "step": 18795 }, { "epoch": 0.5160900604063702, "grad_norm": 0.40007156133651733, "learning_rate": 1.6913613815405187e-05, "loss": 0.5028, "step": 18796 }, { "epoch": 0.5161175178473366, "grad_norm": 0.3751251995563507, "learning_rate": 1.6913301762027234e-05, "loss": 0.5733, "step": 18797 }, { "epoch": 0.5161449752883032, "grad_norm": 0.40554285049438477, "learning_rate": 1.6912989695753766e-05, "loss": 0.5038, "step": 18798 }, { "epoch": 0.5161724327292696, "grad_norm": 0.3731381893157959, "learning_rate": 1.6912677616585368e-05, "loss": 0.5508, "step": 18799 }, { "epoch": 0.5161998901702362, "grad_norm": 0.36792001128196716, "learning_rate": 1.6912365524522617e-05, "loss": 0.5041, "step": 18800 }, { "epoch": 0.5162273476112026, "grad_norm": 0.3725389838218689, "learning_rate": 1.69120534195661e-05, "loss": 0.5288, "step": 18801 }, { "epoch": 0.5162548050521691, "grad_norm": 0.4181652367115021, "learning_rate": 1.6911741301716397e-05, "loss": 0.4711, "step": 18802 }, { "epoch": 0.5162822624931357, "grad_norm": 0.3564399480819702, "learning_rate": 1.6911429170974093e-05, "loss": 0.4633, "step": 18803 }, { "epoch": 0.5163097199341021, "grad_norm": 0.3559333086013794, "learning_rate": 1.691111702733976e-05, "loss": 0.4973, "step": 18804 }, { "epoch": 0.5163371773750687, "grad_norm": 0.4321770966053009, "learning_rate": 1.6910804870814e-05, "loss": 0.5367, "step": 18805 }, { "epoch": 0.5163646348160351, "grad_norm": 0.4373331665992737, "learning_rate": 1.6910492701397373e-05, "loss": 0.5032, "step": 18806 }, { "epoch": 0.5163920922570017, "grad_norm": 0.3789956867694855, "learning_rate": 1.6910180519090475e-05, "loss": 0.5715, "step": 18807 }, { "epoch": 0.5164195496979681, "grad_norm": 0.48342832922935486, "learning_rate": 1.6909868323893887e-05, "loss": 0.4966, "step": 18808 }, { "epoch": 0.5164470071389347, "grad_norm": 0.4300185441970825, "learning_rate": 1.690955611580819e-05, "loss": 0.4823, "step": 18809 }, { "epoch": 0.5164744645799012, "grad_norm": 0.4283064901828766, "learning_rate": 1.690924389483396e-05, "loss": 0.5611, "step": 18810 }, { "epoch": 0.5165019220208676, "grad_norm": 0.39149588346481323, "learning_rate": 1.6908931660971787e-05, "loss": 0.4987, "step": 18811 }, { "epoch": 0.5165293794618342, "grad_norm": 0.3893202245235443, "learning_rate": 1.6908619414222254e-05, "loss": 0.5691, "step": 18812 }, { "epoch": 0.5165568369028006, "grad_norm": 0.3993658423423767, "learning_rate": 1.690830715458594e-05, "loss": 0.4775, "step": 18813 }, { "epoch": 0.5165842943437672, "grad_norm": 0.4057522714138031, "learning_rate": 1.6907994882063426e-05, "loss": 0.5112, "step": 18814 }, { "epoch": 0.5166117517847336, "grad_norm": 0.3967551589012146, "learning_rate": 1.69076825966553e-05, "loss": 0.5309, "step": 18815 }, { "epoch": 0.5166392092257002, "grad_norm": 0.30361127853393555, "learning_rate": 1.690737029836214e-05, "loss": 0.4173, "step": 18816 }, { "epoch": 0.5166666666666667, "grad_norm": 0.5026010870933533, "learning_rate": 1.6907057987184528e-05, "loss": 0.5583, "step": 18817 }, { "epoch": 0.5166941241076332, "grad_norm": 0.3627612888813019, "learning_rate": 1.6906745663123048e-05, "loss": 0.4892, "step": 18818 }, { "epoch": 0.5167215815485997, "grad_norm": 0.35617539286613464, "learning_rate": 1.6906433326178288e-05, "loss": 0.4149, "step": 18819 }, { "epoch": 0.5167490389895661, "grad_norm": 0.39057257771492004, "learning_rate": 1.6906120976350818e-05, "loss": 0.4964, "step": 18820 }, { "epoch": 0.5167764964305327, "grad_norm": 0.4334745705127716, "learning_rate": 1.6905808613641233e-05, "loss": 0.4087, "step": 18821 }, { "epoch": 0.5168039538714991, "grad_norm": 0.3689413368701935, "learning_rate": 1.690549623805011e-05, "loss": 0.484, "step": 18822 }, { "epoch": 0.5168314113124657, "grad_norm": 0.4021020829677582, "learning_rate": 1.6905183849578036e-05, "loss": 0.5846, "step": 18823 }, { "epoch": 0.5168588687534322, "grad_norm": 0.4564671218395233, "learning_rate": 1.6904871448225586e-05, "loss": 0.5047, "step": 18824 }, { "epoch": 0.5168863261943987, "grad_norm": 0.41377317905426025, "learning_rate": 1.6904559033993345e-05, "loss": 0.4696, "step": 18825 }, { "epoch": 0.5169137836353652, "grad_norm": 0.37164074182510376, "learning_rate": 1.69042466068819e-05, "loss": 0.4517, "step": 18826 }, { "epoch": 0.5169412410763317, "grad_norm": 0.3550777733325958, "learning_rate": 1.6903934166891836e-05, "loss": 0.4554, "step": 18827 }, { "epoch": 0.5169686985172982, "grad_norm": 0.499445378780365, "learning_rate": 1.6903621714023725e-05, "loss": 0.4969, "step": 18828 }, { "epoch": 0.5169961559582646, "grad_norm": 0.3504363000392914, "learning_rate": 1.690330924827816e-05, "loss": 0.4984, "step": 18829 }, { "epoch": 0.5170236133992312, "grad_norm": 0.39997759461402893, "learning_rate": 1.6902996769655718e-05, "loss": 0.5287, "step": 18830 }, { "epoch": 0.5170510708401977, "grad_norm": 0.36369842290878296, "learning_rate": 1.6902684278156983e-05, "loss": 0.4719, "step": 18831 }, { "epoch": 0.5170785282811642, "grad_norm": 0.3649572730064392, "learning_rate": 1.690237177378254e-05, "loss": 0.4636, "step": 18832 }, { "epoch": 0.5171059857221307, "grad_norm": 0.31605830788612366, "learning_rate": 1.690205925653297e-05, "loss": 0.4612, "step": 18833 }, { "epoch": 0.5171334431630972, "grad_norm": 0.425552636384964, "learning_rate": 1.690174672640886e-05, "loss": 0.4878, "step": 18834 }, { "epoch": 0.5171609006040637, "grad_norm": 0.4156164824962616, "learning_rate": 1.690143418341079e-05, "loss": 0.5563, "step": 18835 }, { "epoch": 0.5171883580450302, "grad_norm": 0.3762718439102173, "learning_rate": 1.6901121627539335e-05, "loss": 0.4427, "step": 18836 }, { "epoch": 0.5172158154859967, "grad_norm": 0.38159283995628357, "learning_rate": 1.6900809058795095e-05, "loss": 0.4843, "step": 18837 }, { "epoch": 0.5172432729269633, "grad_norm": 0.391265869140625, "learning_rate": 1.6900496477178638e-05, "loss": 0.4648, "step": 18838 }, { "epoch": 0.5172707303679297, "grad_norm": 0.3617302477359772, "learning_rate": 1.6900183882690552e-05, "loss": 0.4665, "step": 18839 }, { "epoch": 0.5172981878088962, "grad_norm": 0.38383200764656067, "learning_rate": 1.6899871275331422e-05, "loss": 0.4323, "step": 18840 }, { "epoch": 0.5173256452498627, "grad_norm": 0.3692295253276825, "learning_rate": 1.6899558655101832e-05, "loss": 0.4835, "step": 18841 }, { "epoch": 0.5173531026908292, "grad_norm": 0.38920947909355164, "learning_rate": 1.689924602200236e-05, "loss": 0.4768, "step": 18842 }, { "epoch": 0.5173805601317957, "grad_norm": 0.3644101619720459, "learning_rate": 1.6898933376033594e-05, "loss": 0.4699, "step": 18843 }, { "epoch": 0.5174080175727622, "grad_norm": 0.41527339816093445, "learning_rate": 1.689862071719611e-05, "loss": 0.521, "step": 18844 }, { "epoch": 0.5174354750137288, "grad_norm": 0.375967413187027, "learning_rate": 1.6898308045490503e-05, "loss": 0.5251, "step": 18845 }, { "epoch": 0.5174629324546952, "grad_norm": 0.3504123091697693, "learning_rate": 1.689799536091735e-05, "loss": 0.5549, "step": 18846 }, { "epoch": 0.5174903898956618, "grad_norm": 0.4193456470966339, "learning_rate": 1.689768266347723e-05, "loss": 0.4644, "step": 18847 }, { "epoch": 0.5175178473366282, "grad_norm": 0.3487400710582733, "learning_rate": 1.6897369953170733e-05, "loss": 0.5096, "step": 18848 }, { "epoch": 0.5175453047775948, "grad_norm": 0.38032928109169006, "learning_rate": 1.689705722999844e-05, "loss": 0.4863, "step": 18849 }, { "epoch": 0.5175727622185612, "grad_norm": 0.32705259323120117, "learning_rate": 1.6896744493960935e-05, "loss": 0.465, "step": 18850 }, { "epoch": 0.5176002196595277, "grad_norm": 0.4102742075920105, "learning_rate": 1.6896431745058798e-05, "loss": 0.498, "step": 18851 }, { "epoch": 0.5176276771004943, "grad_norm": 0.37644630670547485, "learning_rate": 1.6896118983292615e-05, "loss": 0.5379, "step": 18852 }, { "epoch": 0.5176551345414607, "grad_norm": 0.41633620858192444, "learning_rate": 1.6895806208662966e-05, "loss": 0.5288, "step": 18853 }, { "epoch": 0.5176825919824273, "grad_norm": 0.36218395829200745, "learning_rate": 1.6895493421170445e-05, "loss": 0.4467, "step": 18854 }, { "epoch": 0.5177100494233937, "grad_norm": 0.857867956161499, "learning_rate": 1.6895180620815623e-05, "loss": 0.5101, "step": 18855 }, { "epoch": 0.5177375068643603, "grad_norm": 0.40396907925605774, "learning_rate": 1.689486780759909e-05, "loss": 0.452, "step": 18856 }, { "epoch": 0.5177649643053267, "grad_norm": 0.32300132513046265, "learning_rate": 1.6894554981521423e-05, "loss": 0.4841, "step": 18857 }, { "epoch": 0.5177924217462933, "grad_norm": 0.4168510437011719, "learning_rate": 1.6894242142583217e-05, "loss": 0.4724, "step": 18858 }, { "epoch": 0.5178198791872598, "grad_norm": 0.31230735778808594, "learning_rate": 1.6893929290785046e-05, "loss": 0.5072, "step": 18859 }, { "epoch": 0.5178473366282262, "grad_norm": 0.37159502506256104, "learning_rate": 1.6893616426127498e-05, "loss": 0.5002, "step": 18860 }, { "epoch": 0.5178747940691928, "grad_norm": 0.3580821454524994, "learning_rate": 1.6893303548611152e-05, "loss": 0.5423, "step": 18861 }, { "epoch": 0.5179022515101592, "grad_norm": 0.3922225832939148, "learning_rate": 1.68929906582366e-05, "loss": 0.5602, "step": 18862 }, { "epoch": 0.5179297089511258, "grad_norm": 0.37729525566101074, "learning_rate": 1.689267775500442e-05, "loss": 0.452, "step": 18863 }, { "epoch": 0.5179571663920922, "grad_norm": 0.3419925570487976, "learning_rate": 1.6892364838915193e-05, "loss": 0.5423, "step": 18864 }, { "epoch": 0.5179846238330588, "grad_norm": 0.4055200517177582, "learning_rate": 1.6892051909969508e-05, "loss": 0.5667, "step": 18865 }, { "epoch": 0.5180120812740253, "grad_norm": 0.3803432285785675, "learning_rate": 1.6891738968167942e-05, "loss": 0.4915, "step": 18866 }, { "epoch": 0.5180395387149918, "grad_norm": 0.4027034640312195, "learning_rate": 1.6891426013511084e-05, "loss": 0.5371, "step": 18867 }, { "epoch": 0.5180669961559583, "grad_norm": 0.39849868416786194, "learning_rate": 1.689111304599952e-05, "loss": 0.5944, "step": 18868 }, { "epoch": 0.5180944535969247, "grad_norm": 0.41467034816741943, "learning_rate": 1.689080006563383e-05, "loss": 0.6254, "step": 18869 }, { "epoch": 0.5181219110378913, "grad_norm": 0.36746782064437866, "learning_rate": 1.68904870724146e-05, "loss": 0.4657, "step": 18870 }, { "epoch": 0.5181493684788577, "grad_norm": 0.35142430663108826, "learning_rate": 1.689017406634241e-05, "loss": 0.447, "step": 18871 }, { "epoch": 0.5181768259198243, "grad_norm": 0.41278815269470215, "learning_rate": 1.6889861047417852e-05, "loss": 0.6125, "step": 18872 }, { "epoch": 0.5182042833607908, "grad_norm": 0.3636972904205322, "learning_rate": 1.6889548015641496e-05, "loss": 0.5045, "step": 18873 }, { "epoch": 0.5182317408017573, "grad_norm": 0.37217506766319275, "learning_rate": 1.688923497101394e-05, "loss": 0.4823, "step": 18874 }, { "epoch": 0.5182591982427238, "grad_norm": 0.41376161575317383, "learning_rate": 1.6888921913535758e-05, "loss": 0.4995, "step": 18875 }, { "epoch": 0.5182866556836903, "grad_norm": 0.3779754638671875, "learning_rate": 1.6888608843207538e-05, "loss": 0.554, "step": 18876 }, { "epoch": 0.5183141131246568, "grad_norm": 0.43648841977119446, "learning_rate": 1.688829576002987e-05, "loss": 0.517, "step": 18877 }, { "epoch": 0.5183415705656232, "grad_norm": 0.4185589551925659, "learning_rate": 1.6887982664003323e-05, "loss": 0.4881, "step": 18878 }, { "epoch": 0.5183690280065898, "grad_norm": 0.3509986102581024, "learning_rate": 1.6887669555128496e-05, "loss": 0.4606, "step": 18879 }, { "epoch": 0.5183964854475562, "grad_norm": 0.37593621015548706, "learning_rate": 1.6887356433405963e-05, "loss": 0.4485, "step": 18880 }, { "epoch": 0.5184239428885228, "grad_norm": 0.38500288128852844, "learning_rate": 1.6887043298836318e-05, "loss": 0.5719, "step": 18881 }, { "epoch": 0.5184514003294893, "grad_norm": 0.35354694724082947, "learning_rate": 1.6886730151420134e-05, "loss": 0.4029, "step": 18882 }, { "epoch": 0.5184788577704558, "grad_norm": 0.39938586950302124, "learning_rate": 1.6886416991158e-05, "loss": 0.4793, "step": 18883 }, { "epoch": 0.5185063152114223, "grad_norm": 0.34833166003227234, "learning_rate": 1.6886103818050504e-05, "loss": 0.4443, "step": 18884 }, { "epoch": 0.5185337726523888, "grad_norm": 0.5801127552986145, "learning_rate": 1.6885790632098225e-05, "loss": 0.519, "step": 18885 }, { "epoch": 0.5185612300933553, "grad_norm": 0.34382110834121704, "learning_rate": 1.6885477433301747e-05, "loss": 0.5122, "step": 18886 }, { "epoch": 0.5185886875343217, "grad_norm": 0.36740732192993164, "learning_rate": 1.688516422166166e-05, "loss": 0.4082, "step": 18887 }, { "epoch": 0.5186161449752883, "grad_norm": 0.3785931468009949, "learning_rate": 1.688485099717854e-05, "loss": 0.4239, "step": 18888 }, { "epoch": 0.5186436024162548, "grad_norm": 0.32962173223495483, "learning_rate": 1.6884537759852978e-05, "loss": 0.4656, "step": 18889 }, { "epoch": 0.5186710598572213, "grad_norm": 0.3573894798755646, "learning_rate": 1.6884224509685555e-05, "loss": 0.4986, "step": 18890 }, { "epoch": 0.5186985172981878, "grad_norm": 0.35526520013809204, "learning_rate": 1.6883911246676858e-05, "loss": 0.5408, "step": 18891 }, { "epoch": 0.5187259747391543, "grad_norm": 0.4247969388961792, "learning_rate": 1.6883597970827467e-05, "loss": 0.5214, "step": 18892 }, { "epoch": 0.5187534321801208, "grad_norm": 0.3954562544822693, "learning_rate": 1.6883284682137968e-05, "loss": 0.5536, "step": 18893 }, { "epoch": 0.5187808896210873, "grad_norm": 0.37607818841934204, "learning_rate": 1.6882971380608947e-05, "loss": 0.4758, "step": 18894 }, { "epoch": 0.5188083470620538, "grad_norm": 0.3653302490711212, "learning_rate": 1.688265806624099e-05, "loss": 0.4951, "step": 18895 }, { "epoch": 0.5188358045030204, "grad_norm": 0.3687068521976471, "learning_rate": 1.6882344739034677e-05, "loss": 0.4298, "step": 18896 }, { "epoch": 0.5188632619439868, "grad_norm": 0.35975098609924316, "learning_rate": 1.6882031398990594e-05, "loss": 0.5151, "step": 18897 }, { "epoch": 0.5188907193849533, "grad_norm": 0.40958842635154724, "learning_rate": 1.6881718046109327e-05, "loss": 0.4686, "step": 18898 }, { "epoch": 0.5189181768259198, "grad_norm": 0.3691691756248474, "learning_rate": 1.688140468039146e-05, "loss": 0.5172, "step": 18899 }, { "epoch": 0.5189456342668863, "grad_norm": 0.37004923820495605, "learning_rate": 1.6881091301837574e-05, "loss": 0.4887, "step": 18900 }, { "epoch": 0.5189730917078528, "grad_norm": 0.4172004461288452, "learning_rate": 1.688077791044826e-05, "loss": 0.5199, "step": 18901 }, { "epoch": 0.5190005491488193, "grad_norm": 0.4217553734779358, "learning_rate": 1.6880464506224098e-05, "loss": 0.535, "step": 18902 }, { "epoch": 0.5190280065897859, "grad_norm": 0.37289392948150635, "learning_rate": 1.6880151089165672e-05, "loss": 0.5035, "step": 18903 }, { "epoch": 0.5190554640307523, "grad_norm": 0.37046805024147034, "learning_rate": 1.687983765927357e-05, "loss": 0.45, "step": 18904 }, { "epoch": 0.5190829214717189, "grad_norm": 0.3984874188899994, "learning_rate": 1.6879524216548376e-05, "loss": 0.5783, "step": 18905 }, { "epoch": 0.5191103789126853, "grad_norm": 0.3827870488166809, "learning_rate": 1.6879210760990673e-05, "loss": 0.4769, "step": 18906 }, { "epoch": 0.5191378363536518, "grad_norm": 0.3532126843929291, "learning_rate": 1.6878897292601047e-05, "loss": 0.4995, "step": 18907 }, { "epoch": 0.5191652937946183, "grad_norm": 0.3421732187271118, "learning_rate": 1.687858381138008e-05, "loss": 0.5196, "step": 18908 }, { "epoch": 0.5191927512355848, "grad_norm": 0.40706828236579895, "learning_rate": 1.687827031732836e-05, "loss": 0.6051, "step": 18909 }, { "epoch": 0.5192202086765514, "grad_norm": 0.4036581814289093, "learning_rate": 1.6877956810446467e-05, "loss": 0.5454, "step": 18910 }, { "epoch": 0.5192476661175178, "grad_norm": 0.38375523686408997, "learning_rate": 1.6877643290734993e-05, "loss": 0.5424, "step": 18911 }, { "epoch": 0.5192751235584844, "grad_norm": 0.7249485850334167, "learning_rate": 1.687732975819452e-05, "loss": 0.5052, "step": 18912 }, { "epoch": 0.5193025809994508, "grad_norm": 0.41477179527282715, "learning_rate": 1.687701621282563e-05, "loss": 0.3901, "step": 18913 }, { "epoch": 0.5193300384404174, "grad_norm": 0.3768211603164673, "learning_rate": 1.687670265462891e-05, "loss": 0.5523, "step": 18914 }, { "epoch": 0.5193574958813838, "grad_norm": 0.3704836964607239, "learning_rate": 1.6876389083604947e-05, "loss": 0.4602, "step": 18915 }, { "epoch": 0.5193849533223504, "grad_norm": 0.362648606300354, "learning_rate": 1.6876075499754322e-05, "loss": 0.5352, "step": 18916 }, { "epoch": 0.5194124107633169, "grad_norm": 0.41108834743499756, "learning_rate": 1.687576190307762e-05, "loss": 0.4648, "step": 18917 }, { "epoch": 0.5194398682042833, "grad_norm": 0.33999669551849365, "learning_rate": 1.687544829357543e-05, "loss": 0.4661, "step": 18918 }, { "epoch": 0.5194673256452499, "grad_norm": 0.309731125831604, "learning_rate": 1.6875134671248335e-05, "loss": 0.492, "step": 18919 }, { "epoch": 0.5194947830862163, "grad_norm": 0.40000537037849426, "learning_rate": 1.687482103609692e-05, "loss": 0.4741, "step": 18920 }, { "epoch": 0.5195222405271829, "grad_norm": 0.3324780762195587, "learning_rate": 1.6874507388121767e-05, "loss": 0.4851, "step": 18921 }, { "epoch": 0.5195496979681493, "grad_norm": 0.3614453673362732, "learning_rate": 1.6874193727323464e-05, "loss": 0.5433, "step": 18922 }, { "epoch": 0.5195771554091159, "grad_norm": 0.36453866958618164, "learning_rate": 1.6873880053702593e-05, "loss": 0.4899, "step": 18923 }, { "epoch": 0.5196046128500824, "grad_norm": 0.36976873874664307, "learning_rate": 1.6873566367259745e-05, "loss": 0.4902, "step": 18924 }, { "epoch": 0.5196320702910489, "grad_norm": 0.3609448969364166, "learning_rate": 1.6873252667995507e-05, "loss": 0.4659, "step": 18925 }, { "epoch": 0.5196595277320154, "grad_norm": 0.3573462963104248, "learning_rate": 1.687293895591045e-05, "loss": 0.5346, "step": 18926 }, { "epoch": 0.5196869851729818, "grad_norm": 0.39950841665267944, "learning_rate": 1.6872625231005173e-05, "loss": 0.551, "step": 18927 }, { "epoch": 0.5197144426139484, "grad_norm": 0.39500850439071655, "learning_rate": 1.6872311493280256e-05, "loss": 0.47, "step": 18928 }, { "epoch": 0.5197419000549148, "grad_norm": 0.4183475971221924, "learning_rate": 1.6871997742736285e-05, "loss": 0.5135, "step": 18929 }, { "epoch": 0.5197693574958814, "grad_norm": 0.3440958559513092, "learning_rate": 1.6871683979373842e-05, "loss": 0.4997, "step": 18930 }, { "epoch": 0.5197968149368479, "grad_norm": 0.48556432127952576, "learning_rate": 1.687137020319352e-05, "loss": 0.5053, "step": 18931 }, { "epoch": 0.5198242723778144, "grad_norm": 0.37415406107902527, "learning_rate": 1.6871056414195897e-05, "loss": 0.4915, "step": 18932 }, { "epoch": 0.5198517298187809, "grad_norm": 0.38712629675865173, "learning_rate": 1.687074261238156e-05, "loss": 0.5993, "step": 18933 }, { "epoch": 0.5198791872597474, "grad_norm": 0.3410363495349884, "learning_rate": 1.6870428797751097e-05, "loss": 0.4679, "step": 18934 }, { "epoch": 0.5199066447007139, "grad_norm": 0.3978807330131531, "learning_rate": 1.687011497030509e-05, "loss": 0.4774, "step": 18935 }, { "epoch": 0.5199341021416803, "grad_norm": 0.39604371786117554, "learning_rate": 1.6869801130044127e-05, "loss": 0.4859, "step": 18936 }, { "epoch": 0.5199615595826469, "grad_norm": 0.38651683926582336, "learning_rate": 1.686948727696879e-05, "loss": 0.5124, "step": 18937 }, { "epoch": 0.5199890170236134, "grad_norm": 0.3792102634906769, "learning_rate": 1.6869173411079672e-05, "loss": 0.4449, "step": 18938 }, { "epoch": 0.5200164744645799, "grad_norm": 0.4417475759983063, "learning_rate": 1.6868859532377346e-05, "loss": 0.5169, "step": 18939 }, { "epoch": 0.5200439319055464, "grad_norm": 0.3277568221092224, "learning_rate": 1.686854564086241e-05, "loss": 0.4254, "step": 18940 }, { "epoch": 0.5200713893465129, "grad_norm": 0.3965734839439392, "learning_rate": 1.686823173653544e-05, "loss": 0.5064, "step": 18941 }, { "epoch": 0.5200988467874794, "grad_norm": 0.5173834562301636, "learning_rate": 1.6867917819397032e-05, "loss": 0.5693, "step": 18942 }, { "epoch": 0.5201263042284459, "grad_norm": 0.37954169511795044, "learning_rate": 1.6867603889447762e-05, "loss": 0.5235, "step": 18943 }, { "epoch": 0.5201537616694124, "grad_norm": 0.36921656131744385, "learning_rate": 1.6867289946688217e-05, "loss": 0.4392, "step": 18944 }, { "epoch": 0.520181219110379, "grad_norm": 0.6706227660179138, "learning_rate": 1.6866975991118986e-05, "loss": 0.5024, "step": 18945 }, { "epoch": 0.5202086765513454, "grad_norm": 0.338155061006546, "learning_rate": 1.686666202274065e-05, "loss": 0.4868, "step": 18946 }, { "epoch": 0.5202361339923119, "grad_norm": 0.3850488066673279, "learning_rate": 1.6866348041553803e-05, "loss": 0.4124, "step": 18947 }, { "epoch": 0.5202635914332784, "grad_norm": 0.3647080957889557, "learning_rate": 1.6866034047559023e-05, "loss": 0.515, "step": 18948 }, { "epoch": 0.5202910488742449, "grad_norm": 0.42123132944107056, "learning_rate": 1.6865720040756896e-05, "loss": 0.5809, "step": 18949 }, { "epoch": 0.5203185063152114, "grad_norm": 0.37151408195495605, "learning_rate": 1.6865406021148013e-05, "loss": 0.4974, "step": 18950 }, { "epoch": 0.5203459637561779, "grad_norm": 0.3892247676849365, "learning_rate": 1.6865091988732958e-05, "loss": 0.4593, "step": 18951 }, { "epoch": 0.5203734211971445, "grad_norm": 0.3638782799243927, "learning_rate": 1.6864777943512313e-05, "loss": 0.557, "step": 18952 }, { "epoch": 0.5204008786381109, "grad_norm": 0.33876314759254456, "learning_rate": 1.6864463885486664e-05, "loss": 0.5474, "step": 18953 }, { "epoch": 0.5204283360790775, "grad_norm": 0.36781045794487, "learning_rate": 1.68641498146566e-05, "loss": 0.4119, "step": 18954 }, { "epoch": 0.5204557935200439, "grad_norm": 0.4282263219356537, "learning_rate": 1.686383573102271e-05, "loss": 0.5223, "step": 18955 }, { "epoch": 0.5204832509610104, "grad_norm": 0.3488922417163849, "learning_rate": 1.6863521634585574e-05, "loss": 0.4718, "step": 18956 }, { "epoch": 0.5205107084019769, "grad_norm": 0.40156278014183044, "learning_rate": 1.6863207525345776e-05, "loss": 0.5981, "step": 18957 }, { "epoch": 0.5205381658429434, "grad_norm": 0.4275887906551361, "learning_rate": 1.6862893403303908e-05, "loss": 0.5561, "step": 18958 }, { "epoch": 0.52056562328391, "grad_norm": 0.42413780093193054, "learning_rate": 1.686257926846055e-05, "loss": 0.504, "step": 18959 }, { "epoch": 0.5205930807248764, "grad_norm": 0.40675750374794006, "learning_rate": 1.6862265120816298e-05, "loss": 0.5008, "step": 18960 }, { "epoch": 0.520620538165843, "grad_norm": 0.3755193054676056, "learning_rate": 1.6861950960371728e-05, "loss": 0.51, "step": 18961 }, { "epoch": 0.5206479956068094, "grad_norm": 0.3663368821144104, "learning_rate": 1.6861636787127426e-05, "loss": 0.5025, "step": 18962 }, { "epoch": 0.520675453047776, "grad_norm": 0.3582582175731659, "learning_rate": 1.6861322601083986e-05, "loss": 0.4836, "step": 18963 }, { "epoch": 0.5207029104887424, "grad_norm": 0.39167824387550354, "learning_rate": 1.686100840224199e-05, "loss": 0.4351, "step": 18964 }, { "epoch": 0.520730367929709, "grad_norm": 0.3699859082698822, "learning_rate": 1.686069419060202e-05, "loss": 0.5099, "step": 18965 }, { "epoch": 0.5207578253706755, "grad_norm": 0.3388902246952057, "learning_rate": 1.6860379966164667e-05, "loss": 0.4623, "step": 18966 }, { "epoch": 0.5207852828116419, "grad_norm": 0.4440940320491791, "learning_rate": 1.6860065728930516e-05, "loss": 0.4369, "step": 18967 }, { "epoch": 0.5208127402526085, "grad_norm": 0.4070604145526886, "learning_rate": 1.6859751478900153e-05, "loss": 0.5745, "step": 18968 }, { "epoch": 0.5208401976935749, "grad_norm": 0.375446081161499, "learning_rate": 1.6859437216074166e-05, "loss": 0.5097, "step": 18969 }, { "epoch": 0.5208676551345415, "grad_norm": 0.4081982970237732, "learning_rate": 1.6859122940453135e-05, "loss": 0.5803, "step": 18970 }, { "epoch": 0.5208951125755079, "grad_norm": 0.38314640522003174, "learning_rate": 1.6858808652037656e-05, "loss": 0.5522, "step": 18971 }, { "epoch": 0.5209225700164745, "grad_norm": 0.4085412621498108, "learning_rate": 1.6858494350828306e-05, "loss": 0.4966, "step": 18972 }, { "epoch": 0.520950027457441, "grad_norm": 0.38129785656929016, "learning_rate": 1.6858180036825675e-05, "loss": 0.5145, "step": 18973 }, { "epoch": 0.5209774848984075, "grad_norm": 0.3605664372444153, "learning_rate": 1.685786571003035e-05, "loss": 0.5401, "step": 18974 }, { "epoch": 0.521004942339374, "grad_norm": 0.38751456141471863, "learning_rate": 1.6857551370442916e-05, "loss": 0.5329, "step": 18975 }, { "epoch": 0.5210323997803404, "grad_norm": 0.33605173230171204, "learning_rate": 1.685723701806396e-05, "loss": 0.4425, "step": 18976 }, { "epoch": 0.521059857221307, "grad_norm": 0.4295981824398041, "learning_rate": 1.685692265289407e-05, "loss": 0.4915, "step": 18977 }, { "epoch": 0.5210873146622734, "grad_norm": 0.40637025237083435, "learning_rate": 1.6856608274933832e-05, "loss": 0.5908, "step": 18978 }, { "epoch": 0.52111477210324, "grad_norm": 0.41403141617774963, "learning_rate": 1.685629388418383e-05, "loss": 0.5373, "step": 18979 }, { "epoch": 0.5211422295442065, "grad_norm": 0.4050733745098114, "learning_rate": 1.685597948064465e-05, "loss": 0.4454, "step": 18980 }, { "epoch": 0.521169686985173, "grad_norm": 0.5830341577529907, "learning_rate": 1.6855665064316878e-05, "loss": 0.4889, "step": 18981 }, { "epoch": 0.5211971444261395, "grad_norm": 0.37284013628959656, "learning_rate": 1.6855350635201108e-05, "loss": 0.5457, "step": 18982 }, { "epoch": 0.521224601867106, "grad_norm": 0.4030427038669586, "learning_rate": 1.6855036193297918e-05, "loss": 0.467, "step": 18983 }, { "epoch": 0.5212520593080725, "grad_norm": 0.3743803799152374, "learning_rate": 1.68547217386079e-05, "loss": 0.4617, "step": 18984 }, { "epoch": 0.5212795167490389, "grad_norm": 0.3789041042327881, "learning_rate": 1.6854407271131633e-05, "loss": 0.5723, "step": 18985 }, { "epoch": 0.5213069741900055, "grad_norm": 0.3483228087425232, "learning_rate": 1.6854092790869713e-05, "loss": 0.5315, "step": 18986 }, { "epoch": 0.521334431630972, "grad_norm": 0.36210906505584717, "learning_rate": 1.685377829782272e-05, "loss": 0.4958, "step": 18987 }, { "epoch": 0.5213618890719385, "grad_norm": 0.3325515687465668, "learning_rate": 1.6853463791991248e-05, "loss": 0.4478, "step": 18988 }, { "epoch": 0.521389346512905, "grad_norm": 0.29631656408309937, "learning_rate": 1.6853149273375876e-05, "loss": 0.416, "step": 18989 }, { "epoch": 0.5214168039538715, "grad_norm": 0.4588885009288788, "learning_rate": 1.685283474197719e-05, "loss": 0.6048, "step": 18990 }, { "epoch": 0.521444261394838, "grad_norm": 0.35932573676109314, "learning_rate": 1.6852520197795783e-05, "loss": 0.4855, "step": 18991 }, { "epoch": 0.5214717188358045, "grad_norm": 0.381866991519928, "learning_rate": 1.685220564083224e-05, "loss": 0.4896, "step": 18992 }, { "epoch": 0.521499176276771, "grad_norm": 0.3347355127334595, "learning_rate": 1.6851891071087144e-05, "loss": 0.4967, "step": 18993 }, { "epoch": 0.5215266337177376, "grad_norm": 0.38889291882514954, "learning_rate": 1.6851576488561086e-05, "loss": 0.5557, "step": 18994 }, { "epoch": 0.521554091158704, "grad_norm": 0.40667206048965454, "learning_rate": 1.685126189325465e-05, "loss": 0.44, "step": 18995 }, { "epoch": 0.5215815485996705, "grad_norm": 0.39112624526023865, "learning_rate": 1.6850947285168424e-05, "loss": 0.478, "step": 18996 }, { "epoch": 0.521609006040637, "grad_norm": 0.3906094431877136, "learning_rate": 1.6850632664302996e-05, "loss": 0.5045, "step": 18997 }, { "epoch": 0.5216364634816035, "grad_norm": 0.4233337342739105, "learning_rate": 1.6850318030658947e-05, "loss": 0.4588, "step": 18998 }, { "epoch": 0.52166392092257, "grad_norm": 0.8310515880584717, "learning_rate": 1.6850003384236874e-05, "loss": 0.4558, "step": 18999 }, { "epoch": 0.5216913783635365, "grad_norm": 0.39209645986557007, "learning_rate": 1.6849688725037356e-05, "loss": 0.4938, "step": 19000 }, { "epoch": 0.5217188358045031, "grad_norm": 0.3315217196941376, "learning_rate": 1.6849374053060984e-05, "loss": 0.4768, "step": 19001 }, { "epoch": 0.5217462932454695, "grad_norm": 0.34605923295021057, "learning_rate": 1.6849059368308342e-05, "loss": 0.4888, "step": 19002 }, { "epoch": 0.521773750686436, "grad_norm": 0.34762102365493774, "learning_rate": 1.6848744670780018e-05, "loss": 0.4464, "step": 19003 }, { "epoch": 0.5218012081274025, "grad_norm": 0.3611046075820923, "learning_rate": 1.68484299604766e-05, "loss": 0.5133, "step": 19004 }, { "epoch": 0.521828665568369, "grad_norm": 0.3573717474937439, "learning_rate": 1.6848115237398676e-05, "loss": 0.5184, "step": 19005 }, { "epoch": 0.5218561230093355, "grad_norm": 0.3892163932323456, "learning_rate": 1.6847800501546827e-05, "loss": 0.5398, "step": 19006 }, { "epoch": 0.521883580450302, "grad_norm": 0.39087963104248047, "learning_rate": 1.6847485752921647e-05, "loss": 0.5102, "step": 19007 }, { "epoch": 0.5219110378912686, "grad_norm": 0.33482688665390015, "learning_rate": 1.6847170991523722e-05, "loss": 0.4235, "step": 19008 }, { "epoch": 0.521938495332235, "grad_norm": 0.4007732570171356, "learning_rate": 1.6846856217353634e-05, "loss": 0.5739, "step": 19009 }, { "epoch": 0.5219659527732016, "grad_norm": 0.36928123235702515, "learning_rate": 1.6846541430411977e-05, "loss": 0.5038, "step": 19010 }, { "epoch": 0.521993410214168, "grad_norm": 0.3360326290130615, "learning_rate": 1.6846226630699332e-05, "loss": 0.4954, "step": 19011 }, { "epoch": 0.5220208676551346, "grad_norm": 0.3612794876098633, "learning_rate": 1.6845911818216298e-05, "loss": 0.4892, "step": 19012 }, { "epoch": 0.522048325096101, "grad_norm": 0.3557289242744446, "learning_rate": 1.6845596992963445e-05, "loss": 0.419, "step": 19013 }, { "epoch": 0.5220757825370675, "grad_norm": 0.42474544048309326, "learning_rate": 1.6845282154941368e-05, "loss": 0.5192, "step": 19014 }, { "epoch": 0.5221032399780341, "grad_norm": 0.4863705635070801, "learning_rate": 1.684496730415066e-05, "loss": 0.638, "step": 19015 }, { "epoch": 0.5221306974190005, "grad_norm": 0.3901522159576416, "learning_rate": 1.6844652440591903e-05, "loss": 0.5138, "step": 19016 }, { "epoch": 0.5221581548599671, "grad_norm": 0.3456403911113739, "learning_rate": 1.684433756426568e-05, "loss": 0.4732, "step": 19017 }, { "epoch": 0.5221856123009335, "grad_norm": 0.36034902930259705, "learning_rate": 1.6844022675172588e-05, "loss": 0.4414, "step": 19018 }, { "epoch": 0.5222130697419001, "grad_norm": 0.6282547116279602, "learning_rate": 1.684370777331321e-05, "loss": 0.5038, "step": 19019 }, { "epoch": 0.5222405271828665, "grad_norm": 0.39684560894966125, "learning_rate": 1.6843392858688124e-05, "loss": 0.4987, "step": 19020 }, { "epoch": 0.5222679846238331, "grad_norm": 0.36534571647644043, "learning_rate": 1.6843077931297935e-05, "loss": 0.53, "step": 19021 }, { "epoch": 0.5222954420647996, "grad_norm": 0.3717254400253296, "learning_rate": 1.6842762991143216e-05, "loss": 0.5134, "step": 19022 }, { "epoch": 0.522322899505766, "grad_norm": 0.3810361921787262, "learning_rate": 1.6842448038224565e-05, "loss": 0.4903, "step": 19023 }, { "epoch": 0.5223503569467326, "grad_norm": 0.3518698215484619, "learning_rate": 1.684213307254256e-05, "loss": 0.5593, "step": 19024 }, { "epoch": 0.522377814387699, "grad_norm": 0.38406476378440857, "learning_rate": 1.6841818094097796e-05, "loss": 0.4837, "step": 19025 }, { "epoch": 0.5224052718286656, "grad_norm": 0.4197063744068146, "learning_rate": 1.6841503102890857e-05, "loss": 0.5328, "step": 19026 }, { "epoch": 0.522432729269632, "grad_norm": 0.4662550687789917, "learning_rate": 1.6841188098922333e-05, "loss": 0.6513, "step": 19027 }, { "epoch": 0.5224601867105986, "grad_norm": 0.3890591263771057, "learning_rate": 1.6840873082192807e-05, "loss": 0.5274, "step": 19028 }, { "epoch": 0.5224876441515651, "grad_norm": 0.37159547209739685, "learning_rate": 1.684055805270287e-05, "loss": 0.5208, "step": 19029 }, { "epoch": 0.5225151015925316, "grad_norm": 0.406217098236084, "learning_rate": 1.684024301045311e-05, "loss": 0.5012, "step": 19030 }, { "epoch": 0.5225425590334981, "grad_norm": 0.4032571315765381, "learning_rate": 1.6839927955444113e-05, "loss": 0.5541, "step": 19031 }, { "epoch": 0.5225700164744645, "grad_norm": 0.37870749831199646, "learning_rate": 1.6839612887676468e-05, "loss": 0.5808, "step": 19032 }, { "epoch": 0.5225974739154311, "grad_norm": 0.3680466115474701, "learning_rate": 1.683929780715076e-05, "loss": 0.5608, "step": 19033 }, { "epoch": 0.5226249313563975, "grad_norm": 0.3540470600128174, "learning_rate": 1.6838982713867577e-05, "loss": 0.4708, "step": 19034 }, { "epoch": 0.5226523887973641, "grad_norm": 0.392848402261734, "learning_rate": 1.6838667607827514e-05, "loss": 0.6004, "step": 19035 }, { "epoch": 0.5226798462383306, "grad_norm": 0.34266504645347595, "learning_rate": 1.683835248903115e-05, "loss": 0.4722, "step": 19036 }, { "epoch": 0.5227073036792971, "grad_norm": 0.36578235030174255, "learning_rate": 1.6838037357479076e-05, "loss": 0.5194, "step": 19037 }, { "epoch": 0.5227347611202636, "grad_norm": 0.3790718913078308, "learning_rate": 1.683772221317188e-05, "loss": 0.5469, "step": 19038 }, { "epoch": 0.5227622185612301, "grad_norm": 0.3754431903362274, "learning_rate": 1.683740705611015e-05, "loss": 0.4712, "step": 19039 }, { "epoch": 0.5227896760021966, "grad_norm": 0.34920555353164673, "learning_rate": 1.6837091886294477e-05, "loss": 0.4987, "step": 19040 }, { "epoch": 0.522817133443163, "grad_norm": 0.3569197654724121, "learning_rate": 1.683677670372544e-05, "loss": 0.5049, "step": 19041 }, { "epoch": 0.5228445908841296, "grad_norm": 0.38602152466773987, "learning_rate": 1.6836461508403633e-05, "loss": 0.4615, "step": 19042 }, { "epoch": 0.5228720483250962, "grad_norm": 0.37662768363952637, "learning_rate": 1.683614630032965e-05, "loss": 0.5781, "step": 19043 }, { "epoch": 0.5228995057660626, "grad_norm": 0.35551366209983826, "learning_rate": 1.6835831079504065e-05, "loss": 0.3967, "step": 19044 }, { "epoch": 0.5229269632070291, "grad_norm": 0.34908440709114075, "learning_rate": 1.6835515845927477e-05, "loss": 0.5556, "step": 19045 }, { "epoch": 0.5229544206479956, "grad_norm": 0.5056825280189514, "learning_rate": 1.683520059960047e-05, "loss": 0.5672, "step": 19046 }, { "epoch": 0.5229818780889621, "grad_norm": 0.4828684628009796, "learning_rate": 1.683488534052363e-05, "loss": 0.5068, "step": 19047 }, { "epoch": 0.5230093355299286, "grad_norm": 0.40769538283348083, "learning_rate": 1.6834570068697548e-05, "loss": 0.5259, "step": 19048 }, { "epoch": 0.5230367929708951, "grad_norm": 0.3575684428215027, "learning_rate": 1.683425478412281e-05, "loss": 0.4924, "step": 19049 }, { "epoch": 0.5230642504118617, "grad_norm": 0.3488028645515442, "learning_rate": 1.683393948680001e-05, "loss": 0.4974, "step": 19050 }, { "epoch": 0.5230917078528281, "grad_norm": 0.35067445039749146, "learning_rate": 1.683362417672973e-05, "loss": 0.5336, "step": 19051 }, { "epoch": 0.5231191652937947, "grad_norm": 0.3355090618133545, "learning_rate": 1.6833308853912558e-05, "loss": 0.5602, "step": 19052 }, { "epoch": 0.5231466227347611, "grad_norm": 0.34359169006347656, "learning_rate": 1.6832993518349087e-05, "loss": 0.5229, "step": 19053 }, { "epoch": 0.5231740801757276, "grad_norm": 0.3683093786239624, "learning_rate": 1.68326781700399e-05, "loss": 0.5799, "step": 19054 }, { "epoch": 0.5232015376166941, "grad_norm": 0.3590908944606781, "learning_rate": 1.6832362808985587e-05, "loss": 0.4762, "step": 19055 }, { "epoch": 0.5232289950576606, "grad_norm": 0.34998616576194763, "learning_rate": 1.683204743518674e-05, "loss": 0.5516, "step": 19056 }, { "epoch": 0.5232564524986272, "grad_norm": 0.4020542800426483, "learning_rate": 1.683173204864394e-05, "loss": 0.4969, "step": 19057 }, { "epoch": 0.5232839099395936, "grad_norm": 0.36883658170700073, "learning_rate": 1.683141664935778e-05, "loss": 0.4507, "step": 19058 }, { "epoch": 0.5233113673805602, "grad_norm": 0.4067384898662567, "learning_rate": 1.6831101237328852e-05, "loss": 0.5319, "step": 19059 }, { "epoch": 0.5233388248215266, "grad_norm": 0.41168221831321716, "learning_rate": 1.6830785812557734e-05, "loss": 0.4813, "step": 19060 }, { "epoch": 0.5233662822624932, "grad_norm": 0.4140578508377075, "learning_rate": 1.6830470375045026e-05, "loss": 0.4805, "step": 19061 }, { "epoch": 0.5233937397034596, "grad_norm": 0.3432563245296478, "learning_rate": 1.6830154924791308e-05, "loss": 0.4876, "step": 19062 }, { "epoch": 0.5234211971444261, "grad_norm": 0.3607037365436554, "learning_rate": 1.682983946179717e-05, "loss": 0.5461, "step": 19063 }, { "epoch": 0.5234486545853927, "grad_norm": 0.3550116717815399, "learning_rate": 1.6829523986063202e-05, "loss": 0.4348, "step": 19064 }, { "epoch": 0.5234761120263591, "grad_norm": 0.34829017519950867, "learning_rate": 1.6829208497589993e-05, "loss": 0.4782, "step": 19065 }, { "epoch": 0.5235035694673257, "grad_norm": 0.379910945892334, "learning_rate": 1.6828892996378133e-05, "loss": 0.5545, "step": 19066 }, { "epoch": 0.5235310269082921, "grad_norm": 0.3391853868961334, "learning_rate": 1.6828577482428202e-05, "loss": 0.5107, "step": 19067 }, { "epoch": 0.5235584843492587, "grad_norm": 0.36302855610847473, "learning_rate": 1.6828261955740796e-05, "loss": 0.4878, "step": 19068 }, { "epoch": 0.5235859417902251, "grad_norm": 0.38021528720855713, "learning_rate": 1.6827946416316505e-05, "loss": 0.5515, "step": 19069 }, { "epoch": 0.5236133992311917, "grad_norm": 0.38448575139045715, "learning_rate": 1.682763086415591e-05, "loss": 0.518, "step": 19070 }, { "epoch": 0.5236408566721582, "grad_norm": 0.4058588445186615, "learning_rate": 1.6827315299259612e-05, "loss": 0.5351, "step": 19071 }, { "epoch": 0.5236683141131246, "grad_norm": 0.35425302386283875, "learning_rate": 1.682699972162819e-05, "loss": 0.4809, "step": 19072 }, { "epoch": 0.5236957715540912, "grad_norm": 0.3681386113166809, "learning_rate": 1.6826684131262228e-05, "loss": 0.4666, "step": 19073 }, { "epoch": 0.5237232289950576, "grad_norm": 0.3552451431751251, "learning_rate": 1.682636852816233e-05, "loss": 0.4897, "step": 19074 }, { "epoch": 0.5237506864360242, "grad_norm": 0.3851637840270996, "learning_rate": 1.682605291232907e-05, "loss": 0.4846, "step": 19075 }, { "epoch": 0.5237781438769906, "grad_norm": 0.4089570939540863, "learning_rate": 1.6825737283763042e-05, "loss": 0.5516, "step": 19076 }, { "epoch": 0.5238056013179572, "grad_norm": 0.3768284320831299, "learning_rate": 1.682542164246484e-05, "loss": 0.4807, "step": 19077 }, { "epoch": 0.5238330587589237, "grad_norm": 0.4083001911640167, "learning_rate": 1.6825105988435045e-05, "loss": 0.5598, "step": 19078 }, { "epoch": 0.5238605161998902, "grad_norm": 0.3849581778049469, "learning_rate": 1.682479032167425e-05, "loss": 0.5283, "step": 19079 }, { "epoch": 0.5238879736408567, "grad_norm": 0.3595966696739197, "learning_rate": 1.6824474642183044e-05, "loss": 0.4643, "step": 19080 }, { "epoch": 0.5239154310818231, "grad_norm": 0.358089417219162, "learning_rate": 1.682415894996201e-05, "loss": 0.4943, "step": 19081 }, { "epoch": 0.5239428885227897, "grad_norm": 0.40837711095809937, "learning_rate": 1.6823843245011748e-05, "loss": 0.5168, "step": 19082 }, { "epoch": 0.5239703459637561, "grad_norm": 0.432856947183609, "learning_rate": 1.6823527527332836e-05, "loss": 0.5199, "step": 19083 }, { "epoch": 0.5239978034047227, "grad_norm": 0.3765791356563568, "learning_rate": 1.6823211796925868e-05, "loss": 0.523, "step": 19084 }, { "epoch": 0.5240252608456892, "grad_norm": 0.36400994658470154, "learning_rate": 1.6822896053791434e-05, "loss": 0.5365, "step": 19085 }, { "epoch": 0.5240527182866557, "grad_norm": 0.3268885612487793, "learning_rate": 1.682258029793012e-05, "loss": 0.4864, "step": 19086 }, { "epoch": 0.5240801757276222, "grad_norm": 0.362437903881073, "learning_rate": 1.682226452934252e-05, "loss": 0.5164, "step": 19087 }, { "epoch": 0.5241076331685887, "grad_norm": 0.5012758374214172, "learning_rate": 1.6821948748029213e-05, "loss": 0.5113, "step": 19088 }, { "epoch": 0.5241350906095552, "grad_norm": 0.38912877440452576, "learning_rate": 1.6821632953990795e-05, "loss": 0.4998, "step": 19089 }, { "epoch": 0.5241625480505216, "grad_norm": 0.39192768931388855, "learning_rate": 1.682131714722786e-05, "loss": 0.4712, "step": 19090 }, { "epoch": 0.5241900054914882, "grad_norm": 0.4605688452720642, "learning_rate": 1.6821001327740988e-05, "loss": 0.5488, "step": 19091 }, { "epoch": 0.5242174629324547, "grad_norm": 0.38520562648773193, "learning_rate": 1.682068549553077e-05, "loss": 0.5742, "step": 19092 }, { "epoch": 0.5242449203734212, "grad_norm": 0.3702854514122009, "learning_rate": 1.68203696505978e-05, "loss": 0.5099, "step": 19093 }, { "epoch": 0.5242723778143877, "grad_norm": 0.36112210154533386, "learning_rate": 1.682005379294266e-05, "loss": 0.4653, "step": 19094 }, { "epoch": 0.5242998352553542, "grad_norm": 0.4613388478755951, "learning_rate": 1.6819737922565944e-05, "loss": 0.4833, "step": 19095 }, { "epoch": 0.5243272926963207, "grad_norm": 0.5363832712173462, "learning_rate": 1.681942203946824e-05, "loss": 0.4851, "step": 19096 }, { "epoch": 0.5243547501372872, "grad_norm": 0.3826412558555603, "learning_rate": 1.681910614365014e-05, "loss": 0.5987, "step": 19097 }, { "epoch": 0.5243822075782537, "grad_norm": 0.3670894503593445, "learning_rate": 1.6818790235112228e-05, "loss": 0.5889, "step": 19098 }, { "epoch": 0.5244096650192203, "grad_norm": 0.3783116042613983, "learning_rate": 1.6818474313855096e-05, "loss": 0.5307, "step": 19099 }, { "epoch": 0.5244371224601867, "grad_norm": 0.39593833684921265, "learning_rate": 1.6818158379879338e-05, "loss": 0.425, "step": 19100 }, { "epoch": 0.5244645799011532, "grad_norm": 0.3575367331504822, "learning_rate": 1.681784243318553e-05, "loss": 0.5199, "step": 19101 }, { "epoch": 0.5244920373421197, "grad_norm": 0.43858593702316284, "learning_rate": 1.6817526473774276e-05, "loss": 0.4494, "step": 19102 }, { "epoch": 0.5245194947830862, "grad_norm": 0.3298320770263672, "learning_rate": 1.681721050164616e-05, "loss": 0.5206, "step": 19103 }, { "epoch": 0.5245469522240527, "grad_norm": 0.37781253457069397, "learning_rate": 1.6816894516801766e-05, "loss": 0.5744, "step": 19104 }, { "epoch": 0.5245744096650192, "grad_norm": 0.37422916293144226, "learning_rate": 1.681657851924169e-05, "loss": 0.5231, "step": 19105 }, { "epoch": 0.5246018671059858, "grad_norm": 0.40899068117141724, "learning_rate": 1.6816262508966522e-05, "loss": 0.5018, "step": 19106 }, { "epoch": 0.5246293245469522, "grad_norm": 0.3656761944293976, "learning_rate": 1.6815946485976844e-05, "loss": 0.4819, "step": 19107 }, { "epoch": 0.5246567819879188, "grad_norm": 0.34786659479141235, "learning_rate": 1.6815630450273253e-05, "loss": 0.5409, "step": 19108 }, { "epoch": 0.5246842394288852, "grad_norm": 0.4714853763580322, "learning_rate": 1.6815314401856337e-05, "loss": 0.4328, "step": 19109 }, { "epoch": 0.5247116968698518, "grad_norm": 0.4433494806289673, "learning_rate": 1.6814998340726683e-05, "loss": 0.5435, "step": 19110 }, { "epoch": 0.5247391543108182, "grad_norm": 0.3345203995704651, "learning_rate": 1.6814682266884883e-05, "loss": 0.4675, "step": 19111 }, { "epoch": 0.5247666117517847, "grad_norm": 0.37820133566856384, "learning_rate": 1.6814366180331525e-05, "loss": 0.408, "step": 19112 }, { "epoch": 0.5247940691927513, "grad_norm": 0.35987579822540283, "learning_rate": 1.6814050081067198e-05, "loss": 0.5361, "step": 19113 }, { "epoch": 0.5248215266337177, "grad_norm": 0.4102112650871277, "learning_rate": 1.6813733969092494e-05, "loss": 0.488, "step": 19114 }, { "epoch": 0.5248489840746843, "grad_norm": 0.36655667424201965, "learning_rate": 1.6813417844408003e-05, "loss": 0.5156, "step": 19115 }, { "epoch": 0.5248764415156507, "grad_norm": 0.3796371519565582, "learning_rate": 1.681310170701431e-05, "loss": 0.5377, "step": 19116 }, { "epoch": 0.5249038989566173, "grad_norm": 0.6902063488960266, "learning_rate": 1.681278555691201e-05, "loss": 0.6154, "step": 19117 }, { "epoch": 0.5249313563975837, "grad_norm": 0.3748316168785095, "learning_rate": 1.681246939410169e-05, "loss": 0.5481, "step": 19118 }, { "epoch": 0.5249588138385503, "grad_norm": 0.4381031394004822, "learning_rate": 1.6812153218583936e-05, "loss": 0.4791, "step": 19119 }, { "epoch": 0.5249862712795168, "grad_norm": 0.417222261428833, "learning_rate": 1.6811837030359347e-05, "loss": 0.5294, "step": 19120 }, { "epoch": 0.5250137287204832, "grad_norm": 0.36938661336898804, "learning_rate": 1.681152082942851e-05, "loss": 0.4965, "step": 19121 }, { "epoch": 0.5250411861614498, "grad_norm": 0.3393406271934509, "learning_rate": 1.681120461579201e-05, "loss": 0.454, "step": 19122 }, { "epoch": 0.5250686436024162, "grad_norm": 0.4087430238723755, "learning_rate": 1.681088838945044e-05, "loss": 0.5229, "step": 19123 }, { "epoch": 0.5250961010433828, "grad_norm": 0.3442004919052124, "learning_rate": 1.681057215040439e-05, "loss": 0.4991, "step": 19124 }, { "epoch": 0.5251235584843492, "grad_norm": 0.3997524082660675, "learning_rate": 1.6810255898654446e-05, "loss": 0.5104, "step": 19125 }, { "epoch": 0.5251510159253158, "grad_norm": 0.4781751036643982, "learning_rate": 1.68099396342012e-05, "loss": 0.4815, "step": 19126 }, { "epoch": 0.5251784733662823, "grad_norm": 0.4363936483860016, "learning_rate": 1.6809623357045247e-05, "loss": 0.5673, "step": 19127 }, { "epoch": 0.5252059308072488, "grad_norm": 0.4246797561645508, "learning_rate": 1.6809307067187176e-05, "loss": 0.4991, "step": 19128 }, { "epoch": 0.5252333882482153, "grad_norm": 0.3933110535144806, "learning_rate": 1.680899076462757e-05, "loss": 0.4935, "step": 19129 }, { "epoch": 0.5252608456891817, "grad_norm": 0.37479984760284424, "learning_rate": 1.680867444936702e-05, "loss": 0.5772, "step": 19130 }, { "epoch": 0.5252883031301483, "grad_norm": 0.3583928942680359, "learning_rate": 1.6808358121406122e-05, "loss": 0.5011, "step": 19131 }, { "epoch": 0.5253157605711147, "grad_norm": 0.5592814087867737, "learning_rate": 1.6808041780745465e-05, "loss": 0.6281, "step": 19132 }, { "epoch": 0.5253432180120813, "grad_norm": 0.37775158882141113, "learning_rate": 1.6807725427385636e-05, "loss": 0.6027, "step": 19133 }, { "epoch": 0.5253706754530478, "grad_norm": 0.3713689148426056, "learning_rate": 1.6807409061327227e-05, "loss": 0.4605, "step": 19134 }, { "epoch": 0.5253981328940143, "grad_norm": 0.4083419740200043, "learning_rate": 1.6807092682570826e-05, "loss": 0.4927, "step": 19135 }, { "epoch": 0.5254255903349808, "grad_norm": 0.6965022087097168, "learning_rate": 1.6806776291117025e-05, "loss": 0.482, "step": 19136 }, { "epoch": 0.5254530477759473, "grad_norm": 0.3704613149166107, "learning_rate": 1.6806459886966414e-05, "loss": 0.513, "step": 19137 }, { "epoch": 0.5254805052169138, "grad_norm": 0.3603249788284302, "learning_rate": 1.6806143470119582e-05, "loss": 0.5186, "step": 19138 }, { "epoch": 0.5255079626578802, "grad_norm": 0.5188543796539307, "learning_rate": 1.680582704057712e-05, "loss": 0.5246, "step": 19139 }, { "epoch": 0.5255354200988468, "grad_norm": 0.3727228045463562, "learning_rate": 1.680551059833962e-05, "loss": 0.4973, "step": 19140 }, { "epoch": 0.5255628775398133, "grad_norm": 0.3985389173030853, "learning_rate": 1.6805194143407672e-05, "loss": 0.4901, "step": 19141 }, { "epoch": 0.5255903349807798, "grad_norm": 0.352114737033844, "learning_rate": 1.680487767578186e-05, "loss": 0.5139, "step": 19142 }, { "epoch": 0.5256177924217463, "grad_norm": 0.36314526200294495, "learning_rate": 1.680456119546278e-05, "loss": 0.5115, "step": 19143 }, { "epoch": 0.5256452498627128, "grad_norm": 0.38099151849746704, "learning_rate": 1.6804244702451028e-05, "loss": 0.5594, "step": 19144 }, { "epoch": 0.5256727073036793, "grad_norm": 0.3631795048713684, "learning_rate": 1.6803928196747183e-05, "loss": 0.5411, "step": 19145 }, { "epoch": 0.5257001647446458, "grad_norm": 0.38922378420829773, "learning_rate": 1.680361167835184e-05, "loss": 0.5838, "step": 19146 }, { "epoch": 0.5257276221856123, "grad_norm": 0.40888550877571106, "learning_rate": 1.6803295147265594e-05, "loss": 0.5213, "step": 19147 }, { "epoch": 0.5257550796265787, "grad_norm": 0.37229689955711365, "learning_rate": 1.6802978603489028e-05, "loss": 0.4656, "step": 19148 }, { "epoch": 0.5257825370675453, "grad_norm": 0.3934831917285919, "learning_rate": 1.6802662047022736e-05, "loss": 0.611, "step": 19149 }, { "epoch": 0.5258099945085118, "grad_norm": 0.40785467624664307, "learning_rate": 1.6802345477867305e-05, "loss": 0.5479, "step": 19150 }, { "epoch": 0.5258374519494783, "grad_norm": 0.37173759937286377, "learning_rate": 1.6802028896023333e-05, "loss": 0.5218, "step": 19151 }, { "epoch": 0.5258649093904448, "grad_norm": 0.39577949047088623, "learning_rate": 1.6801712301491405e-05, "loss": 0.6018, "step": 19152 }, { "epoch": 0.5258923668314113, "grad_norm": 0.34941163659095764, "learning_rate": 1.680139569427211e-05, "loss": 0.4685, "step": 19153 }, { "epoch": 0.5259198242723778, "grad_norm": 0.3749145567417145, "learning_rate": 1.6801079074366044e-05, "loss": 0.5333, "step": 19154 }, { "epoch": 0.5259472817133443, "grad_norm": 0.38903799653053284, "learning_rate": 1.6800762441773794e-05, "loss": 0.51, "step": 19155 }, { "epoch": 0.5259747391543108, "grad_norm": 0.38631096482276917, "learning_rate": 1.6800445796495952e-05, "loss": 0.4882, "step": 19156 }, { "epoch": 0.5260021965952774, "grad_norm": 0.42841726541519165, "learning_rate": 1.680012913853311e-05, "loss": 0.5682, "step": 19157 }, { "epoch": 0.5260296540362438, "grad_norm": 0.4430391192436218, "learning_rate": 1.6799812467885856e-05, "loss": 0.5116, "step": 19158 }, { "epoch": 0.5260571114772103, "grad_norm": 0.3784981667995453, "learning_rate": 1.6799495784554778e-05, "loss": 0.4363, "step": 19159 }, { "epoch": 0.5260845689181768, "grad_norm": 0.3874782621860504, "learning_rate": 1.6799179088540475e-05, "loss": 0.4998, "step": 19160 }, { "epoch": 0.5261120263591433, "grad_norm": 0.3836885392665863, "learning_rate": 1.679886237984353e-05, "loss": 0.5111, "step": 19161 }, { "epoch": 0.5261394838001098, "grad_norm": 0.3838655352592468, "learning_rate": 1.6798545658464536e-05, "loss": 0.4947, "step": 19162 }, { "epoch": 0.5261669412410763, "grad_norm": 0.3336462378501892, "learning_rate": 1.6798228924404085e-05, "loss": 0.4848, "step": 19163 }, { "epoch": 0.5261943986820429, "grad_norm": 0.38034048676490784, "learning_rate": 1.679791217766277e-05, "loss": 0.4948, "step": 19164 }, { "epoch": 0.5262218561230093, "grad_norm": 0.40128204226493835, "learning_rate": 1.6797595418241176e-05, "loss": 0.5157, "step": 19165 }, { "epoch": 0.5262493135639759, "grad_norm": 0.41343289613723755, "learning_rate": 1.67972786461399e-05, "loss": 0.5459, "step": 19166 }, { "epoch": 0.5262767710049423, "grad_norm": 0.42055070400238037, "learning_rate": 1.6796961861359526e-05, "loss": 0.5641, "step": 19167 }, { "epoch": 0.5263042284459089, "grad_norm": 0.3727434277534485, "learning_rate": 1.679664506390065e-05, "loss": 0.5265, "step": 19168 }, { "epoch": 0.5263316858868753, "grad_norm": 0.36285459995269775, "learning_rate": 1.679632825376386e-05, "loss": 0.4876, "step": 19169 }, { "epoch": 0.5263591433278418, "grad_norm": 0.3798579275608063, "learning_rate": 1.6796011430949753e-05, "loss": 0.5014, "step": 19170 }, { "epoch": 0.5263866007688084, "grad_norm": 0.39925527572631836, "learning_rate": 1.6795694595458916e-05, "loss": 0.5523, "step": 19171 }, { "epoch": 0.5264140582097748, "grad_norm": 0.38314545154571533, "learning_rate": 1.6795377747291937e-05, "loss": 0.5554, "step": 19172 }, { "epoch": 0.5264415156507414, "grad_norm": 0.36303719878196716, "learning_rate": 1.6795060886449414e-05, "loss": 0.5354, "step": 19173 }, { "epoch": 0.5264689730917078, "grad_norm": 0.33871346712112427, "learning_rate": 1.6794744012931924e-05, "loss": 0.4674, "step": 19174 }, { "epoch": 0.5264964305326744, "grad_norm": 0.5686480402946472, "learning_rate": 1.679442712674008e-05, "loss": 0.4889, "step": 19175 }, { "epoch": 0.5265238879736408, "grad_norm": 0.35752493143081665, "learning_rate": 1.6794110227874448e-05, "loss": 0.4996, "step": 19176 }, { "epoch": 0.5265513454146074, "grad_norm": 0.3468191921710968, "learning_rate": 1.679379331633564e-05, "loss": 0.505, "step": 19177 }, { "epoch": 0.5265788028555739, "grad_norm": 0.35147663950920105, "learning_rate": 1.679347639212424e-05, "loss": 0.3926, "step": 19178 }, { "epoch": 0.5266062602965403, "grad_norm": 0.37651383876800537, "learning_rate": 1.6793159455240834e-05, "loss": 0.4702, "step": 19179 }, { "epoch": 0.5266337177375069, "grad_norm": 0.45632681250572205, "learning_rate": 1.6792842505686023e-05, "loss": 0.5899, "step": 19180 }, { "epoch": 0.5266611751784733, "grad_norm": 0.33764728903770447, "learning_rate": 1.679252554346039e-05, "loss": 0.4295, "step": 19181 }, { "epoch": 0.5266886326194399, "grad_norm": 0.35461992025375366, "learning_rate": 1.6792208568564527e-05, "loss": 0.51, "step": 19182 }, { "epoch": 0.5267160900604063, "grad_norm": 0.3508410155773163, "learning_rate": 1.6791891580999028e-05, "loss": 0.5155, "step": 19183 }, { "epoch": 0.5267435475013729, "grad_norm": 0.4389016032218933, "learning_rate": 1.6791574580764484e-05, "loss": 0.5051, "step": 19184 }, { "epoch": 0.5267710049423394, "grad_norm": 0.3596161901950836, "learning_rate": 1.6791257567861485e-05, "loss": 0.5367, "step": 19185 }, { "epoch": 0.5267984623833059, "grad_norm": 0.3945158123970032, "learning_rate": 1.6790940542290623e-05, "loss": 0.5702, "step": 19186 }, { "epoch": 0.5268259198242724, "grad_norm": 0.3644779622554779, "learning_rate": 1.679062350405249e-05, "loss": 0.4033, "step": 19187 }, { "epoch": 0.5268533772652388, "grad_norm": 0.32592862844467163, "learning_rate": 1.679030645314768e-05, "loss": 0.4927, "step": 19188 }, { "epoch": 0.5268808347062054, "grad_norm": 0.369797945022583, "learning_rate": 1.6789989389576775e-05, "loss": 0.5258, "step": 19189 }, { "epoch": 0.5269082921471718, "grad_norm": 0.3862202763557434, "learning_rate": 1.678967231334038e-05, "loss": 0.5778, "step": 19190 }, { "epoch": 0.5269357495881384, "grad_norm": 0.40515604615211487, "learning_rate": 1.6789355224439075e-05, "loss": 0.5871, "step": 19191 }, { "epoch": 0.5269632070291049, "grad_norm": 0.33311527967453003, "learning_rate": 1.6789038122873454e-05, "loss": 0.4278, "step": 19192 }, { "epoch": 0.5269906644700714, "grad_norm": 0.37520310282707214, "learning_rate": 1.678872100864411e-05, "loss": 0.5767, "step": 19193 }, { "epoch": 0.5270181219110379, "grad_norm": 0.38766831159591675, "learning_rate": 1.6788403881751637e-05, "loss": 0.4963, "step": 19194 }, { "epoch": 0.5270455793520044, "grad_norm": 0.34604915976524353, "learning_rate": 1.6788086742196626e-05, "loss": 0.5282, "step": 19195 }, { "epoch": 0.5270730367929709, "grad_norm": 0.4100956320762634, "learning_rate": 1.6787769589979665e-05, "loss": 0.4834, "step": 19196 }, { "epoch": 0.5271004942339373, "grad_norm": 0.3557393252849579, "learning_rate": 1.6787452425101344e-05, "loss": 0.4351, "step": 19197 }, { "epoch": 0.5271279516749039, "grad_norm": 0.43311917781829834, "learning_rate": 1.678713524756226e-05, "loss": 0.5406, "step": 19198 }, { "epoch": 0.5271554091158704, "grad_norm": 0.3942387104034424, "learning_rate": 1.6786818057363005e-05, "loss": 0.5457, "step": 19199 }, { "epoch": 0.5271828665568369, "grad_norm": 0.3914620876312256, "learning_rate": 1.6786500854504167e-05, "loss": 0.4688, "step": 19200 }, { "epoch": 0.5272103239978034, "grad_norm": 0.350506454706192, "learning_rate": 1.6786183638986336e-05, "loss": 0.5624, "step": 19201 }, { "epoch": 0.5272377814387699, "grad_norm": 0.410220205783844, "learning_rate": 1.678586641081011e-05, "loss": 0.494, "step": 19202 }, { "epoch": 0.5272652388797364, "grad_norm": 0.36749276518821716, "learning_rate": 1.6785549169976078e-05, "loss": 0.437, "step": 19203 }, { "epoch": 0.5272926963207029, "grad_norm": 0.4050450026988983, "learning_rate": 1.678523191648483e-05, "loss": 0.5465, "step": 19204 }, { "epoch": 0.5273201537616694, "grad_norm": 0.3726862072944641, "learning_rate": 1.6784914650336953e-05, "loss": 0.4705, "step": 19205 }, { "epoch": 0.527347611202636, "grad_norm": 0.3481220602989197, "learning_rate": 1.6784597371533052e-05, "loss": 0.3943, "step": 19206 }, { "epoch": 0.5273750686436024, "grad_norm": 0.34966224431991577, "learning_rate": 1.678428008007371e-05, "loss": 0.4859, "step": 19207 }, { "epoch": 0.527402526084569, "grad_norm": 0.3816086947917938, "learning_rate": 1.678396277595952e-05, "loss": 0.5367, "step": 19208 }, { "epoch": 0.5274299835255354, "grad_norm": 0.3312292993068695, "learning_rate": 1.6783645459191075e-05, "loss": 0.5053, "step": 19209 }, { "epoch": 0.5274574409665019, "grad_norm": 0.3624593913555145, "learning_rate": 1.6783328129768963e-05, "loss": 0.5113, "step": 19210 }, { "epoch": 0.5274848984074684, "grad_norm": 0.3641042113304138, "learning_rate": 1.6783010787693782e-05, "loss": 0.4552, "step": 19211 }, { "epoch": 0.5275123558484349, "grad_norm": 0.37600281834602356, "learning_rate": 1.678269343296612e-05, "loss": 0.4865, "step": 19212 }, { "epoch": 0.5275398132894015, "grad_norm": 0.3687096834182739, "learning_rate": 1.678237606558657e-05, "loss": 0.4042, "step": 19213 }, { "epoch": 0.5275672707303679, "grad_norm": 0.41955721378326416, "learning_rate": 1.6782058685555726e-05, "loss": 0.4385, "step": 19214 }, { "epoch": 0.5275947281713345, "grad_norm": 0.42253705859184265, "learning_rate": 1.6781741292874175e-05, "loss": 0.5351, "step": 19215 }, { "epoch": 0.5276221856123009, "grad_norm": 0.41899731755256653, "learning_rate": 1.6781423887542513e-05, "loss": 0.5544, "step": 19216 }, { "epoch": 0.5276496430532674, "grad_norm": 0.35970762372016907, "learning_rate": 1.678110646956133e-05, "loss": 0.417, "step": 19217 }, { "epoch": 0.5276771004942339, "grad_norm": 0.4153427183628082, "learning_rate": 1.678078903893122e-05, "loss": 0.5379, "step": 19218 }, { "epoch": 0.5277045579352004, "grad_norm": 0.33959513902664185, "learning_rate": 1.6780471595652776e-05, "loss": 0.4885, "step": 19219 }, { "epoch": 0.527732015376167, "grad_norm": 0.38064736127853394, "learning_rate": 1.678015413972659e-05, "loss": 0.5122, "step": 19220 }, { "epoch": 0.5277594728171334, "grad_norm": 0.366242378950119, "learning_rate": 1.6779836671153247e-05, "loss": 0.4187, "step": 19221 }, { "epoch": 0.5277869302581, "grad_norm": 0.4022712707519531, "learning_rate": 1.6779519189933345e-05, "loss": 0.5217, "step": 19222 }, { "epoch": 0.5278143876990664, "grad_norm": 0.3661412000656128, "learning_rate": 1.677920169606748e-05, "loss": 0.5254, "step": 19223 }, { "epoch": 0.527841845140033, "grad_norm": 0.5110522508621216, "learning_rate": 1.677888418955624e-05, "loss": 0.5004, "step": 19224 }, { "epoch": 0.5278693025809994, "grad_norm": 0.3969288468360901, "learning_rate": 1.6778566670400214e-05, "loss": 0.5056, "step": 19225 }, { "epoch": 0.527896760021966, "grad_norm": 0.38329246640205383, "learning_rate": 1.67782491386e-05, "loss": 0.4941, "step": 19226 }, { "epoch": 0.5279242174629325, "grad_norm": 0.3618462085723877, "learning_rate": 1.677793159415619e-05, "loss": 0.4618, "step": 19227 }, { "epoch": 0.5279516749038989, "grad_norm": 0.4099544882774353, "learning_rate": 1.677761403706937e-05, "loss": 0.5499, "step": 19228 }, { "epoch": 0.5279791323448655, "grad_norm": 0.4067249894142151, "learning_rate": 1.6777296467340137e-05, "loss": 0.4838, "step": 19229 }, { "epoch": 0.5280065897858319, "grad_norm": 0.36066070199012756, "learning_rate": 1.6776978884969085e-05, "loss": 0.4862, "step": 19230 }, { "epoch": 0.5280340472267985, "grad_norm": 0.4472638964653015, "learning_rate": 1.6776661289956805e-05, "loss": 0.4321, "step": 19231 }, { "epoch": 0.5280615046677649, "grad_norm": 0.44418203830718994, "learning_rate": 1.677634368230389e-05, "loss": 0.523, "step": 19232 }, { "epoch": 0.5280889621087315, "grad_norm": 0.35203954577445984, "learning_rate": 1.6776026062010924e-05, "loss": 0.4311, "step": 19233 }, { "epoch": 0.528116419549698, "grad_norm": 0.8560782074928284, "learning_rate": 1.6775708429078513e-05, "loss": 0.5344, "step": 19234 }, { "epoch": 0.5281438769906645, "grad_norm": 0.3702181577682495, "learning_rate": 1.6775390783507243e-05, "loss": 0.4686, "step": 19235 }, { "epoch": 0.528171334431631, "grad_norm": 0.37020552158355713, "learning_rate": 1.6775073125297705e-05, "loss": 0.466, "step": 19236 }, { "epoch": 0.5281987918725974, "grad_norm": 0.3982091248035431, "learning_rate": 1.6774755454450494e-05, "loss": 0.4897, "step": 19237 }, { "epoch": 0.528226249313564, "grad_norm": 0.37093690037727356, "learning_rate": 1.67744377709662e-05, "loss": 0.4723, "step": 19238 }, { "epoch": 0.5282537067545304, "grad_norm": 0.44469910860061646, "learning_rate": 1.677412007484542e-05, "loss": 0.6077, "step": 19239 }, { "epoch": 0.528281164195497, "grad_norm": 1.986665964126587, "learning_rate": 1.677380236608874e-05, "loss": 0.6419, "step": 19240 }, { "epoch": 0.5283086216364635, "grad_norm": 0.3858656883239746, "learning_rate": 1.6773484644696764e-05, "loss": 0.4817, "step": 19241 }, { "epoch": 0.52833607907743, "grad_norm": 0.33428171277046204, "learning_rate": 1.6773166910670073e-05, "loss": 0.4901, "step": 19242 }, { "epoch": 0.5283635365183965, "grad_norm": 0.36057883501052856, "learning_rate": 1.6772849164009265e-05, "loss": 0.4988, "step": 19243 }, { "epoch": 0.528390993959363, "grad_norm": 0.37854069471359253, "learning_rate": 1.677253140471493e-05, "loss": 0.5069, "step": 19244 }, { "epoch": 0.5284184514003295, "grad_norm": 0.3335108757019043, "learning_rate": 1.6772213632787663e-05, "loss": 0.4378, "step": 19245 }, { "epoch": 0.5284459088412959, "grad_norm": 0.3537657558917999, "learning_rate": 1.6771895848228057e-05, "loss": 0.4227, "step": 19246 }, { "epoch": 0.5284733662822625, "grad_norm": 0.37530162930488586, "learning_rate": 1.6771578051036703e-05, "loss": 0.5332, "step": 19247 }, { "epoch": 0.528500823723229, "grad_norm": 0.4401248097419739, "learning_rate": 1.6771260241214194e-05, "loss": 0.5919, "step": 19248 }, { "epoch": 0.5285282811641955, "grad_norm": 0.39247605204582214, "learning_rate": 1.6770942418761126e-05, "loss": 0.5619, "step": 19249 }, { "epoch": 0.528555738605162, "grad_norm": 0.3852739930152893, "learning_rate": 1.6770624583678085e-05, "loss": 0.5261, "step": 19250 }, { "epoch": 0.5285831960461285, "grad_norm": 0.3521665334701538, "learning_rate": 1.6770306735965675e-05, "loss": 0.5043, "step": 19251 }, { "epoch": 0.528610653487095, "grad_norm": 0.3663382828235626, "learning_rate": 1.6769988875624474e-05, "loss": 0.4656, "step": 19252 }, { "epoch": 0.5286381109280615, "grad_norm": 0.36031660437583923, "learning_rate": 1.676967100265509e-05, "loss": 0.4376, "step": 19253 }, { "epoch": 0.528665568369028, "grad_norm": 0.4829384684562683, "learning_rate": 1.6769353117058104e-05, "loss": 0.4404, "step": 19254 }, { "epoch": 0.5286930258099946, "grad_norm": 0.35596874356269836, "learning_rate": 1.6769035218834117e-05, "loss": 0.5053, "step": 19255 }, { "epoch": 0.528720483250961, "grad_norm": 0.41918402910232544, "learning_rate": 1.6768717307983718e-05, "loss": 0.5429, "step": 19256 }, { "epoch": 0.5287479406919275, "grad_norm": 0.3656597435474396, "learning_rate": 1.67683993845075e-05, "loss": 0.4672, "step": 19257 }, { "epoch": 0.528775398132894, "grad_norm": 0.37589871883392334, "learning_rate": 1.6768081448406057e-05, "loss": 0.5168, "step": 19258 }, { "epoch": 0.5288028555738605, "grad_norm": 0.41445696353912354, "learning_rate": 1.6767763499679987e-05, "loss": 0.6173, "step": 19259 }, { "epoch": 0.528830313014827, "grad_norm": 0.3878379166126251, "learning_rate": 1.676744553832987e-05, "loss": 0.5528, "step": 19260 }, { "epoch": 0.5288577704557935, "grad_norm": 0.38230928778648376, "learning_rate": 1.6767127564356312e-05, "loss": 0.5268, "step": 19261 }, { "epoch": 0.5288852278967601, "grad_norm": 0.35823333263397217, "learning_rate": 1.67668095777599e-05, "loss": 0.524, "step": 19262 }, { "epoch": 0.5289126853377265, "grad_norm": 0.4199070334434509, "learning_rate": 1.6766491578541228e-05, "loss": 0.5025, "step": 19263 }, { "epoch": 0.5289401427786931, "grad_norm": 0.3432691991329193, "learning_rate": 1.676617356670089e-05, "loss": 0.5571, "step": 19264 }, { "epoch": 0.5289676002196595, "grad_norm": 0.39311617612838745, "learning_rate": 1.676585554223948e-05, "loss": 0.4831, "step": 19265 }, { "epoch": 0.528995057660626, "grad_norm": 0.4017024338245392, "learning_rate": 1.6765537505157587e-05, "loss": 0.4741, "step": 19266 }, { "epoch": 0.5290225151015925, "grad_norm": 0.36922845244407654, "learning_rate": 1.676521945545581e-05, "loss": 0.4801, "step": 19267 }, { "epoch": 0.529049972542559, "grad_norm": 0.38912126421928406, "learning_rate": 1.676490139313474e-05, "loss": 0.4906, "step": 19268 }, { "epoch": 0.5290774299835256, "grad_norm": 0.3729131817817688, "learning_rate": 1.6764583318194966e-05, "loss": 0.545, "step": 19269 }, { "epoch": 0.529104887424492, "grad_norm": 0.3657056391239166, "learning_rate": 1.676426523063709e-05, "loss": 0.535, "step": 19270 }, { "epoch": 0.5291323448654586, "grad_norm": 0.4061165452003479, "learning_rate": 1.6763947130461698e-05, "loss": 0.5143, "step": 19271 }, { "epoch": 0.529159802306425, "grad_norm": 0.37780505418777466, "learning_rate": 1.6763629017669384e-05, "loss": 0.5414, "step": 19272 }, { "epoch": 0.5291872597473916, "grad_norm": 0.49078184366226196, "learning_rate": 1.6763310892260746e-05, "loss": 0.5486, "step": 19273 }, { "epoch": 0.529214717188358, "grad_norm": 0.3650389015674591, "learning_rate": 1.676299275423637e-05, "loss": 0.5328, "step": 19274 }, { "epoch": 0.5292421746293245, "grad_norm": 0.38523370027542114, "learning_rate": 1.676267460359686e-05, "loss": 0.45, "step": 19275 }, { "epoch": 0.5292696320702911, "grad_norm": 0.3226696848869324, "learning_rate": 1.6762356440342804e-05, "loss": 0.4151, "step": 19276 }, { "epoch": 0.5292970895112575, "grad_norm": 0.3528428077697754, "learning_rate": 1.6762038264474793e-05, "loss": 0.5442, "step": 19277 }, { "epoch": 0.5293245469522241, "grad_norm": 0.39372432231903076, "learning_rate": 1.6761720075993418e-05, "loss": 0.4767, "step": 19278 }, { "epoch": 0.5293520043931905, "grad_norm": 0.34649986028671265, "learning_rate": 1.6761401874899282e-05, "loss": 0.4394, "step": 19279 }, { "epoch": 0.5293794618341571, "grad_norm": 0.40644749999046326, "learning_rate": 1.676108366119297e-05, "loss": 0.4769, "step": 19280 }, { "epoch": 0.5294069192751235, "grad_norm": 0.3695274293422699, "learning_rate": 1.6760765434875082e-05, "loss": 0.5228, "step": 19281 }, { "epoch": 0.5294343767160901, "grad_norm": 0.3814099133014679, "learning_rate": 1.6760447195946207e-05, "loss": 0.5211, "step": 19282 }, { "epoch": 0.5294618341570566, "grad_norm": 0.3743430972099304, "learning_rate": 1.6760128944406942e-05, "loss": 0.555, "step": 19283 }, { "epoch": 0.529489291598023, "grad_norm": 0.4357761740684509, "learning_rate": 1.6759810680257878e-05, "loss": 0.5919, "step": 19284 }, { "epoch": 0.5295167490389896, "grad_norm": 0.4259355664253235, "learning_rate": 1.675949240349961e-05, "loss": 0.5835, "step": 19285 }, { "epoch": 0.529544206479956, "grad_norm": 0.3843569755554199, "learning_rate": 1.675917411413273e-05, "loss": 0.4336, "step": 19286 }, { "epoch": 0.5295716639209226, "grad_norm": 0.4091778099536896, "learning_rate": 1.6758855812157833e-05, "loss": 0.5874, "step": 19287 }, { "epoch": 0.529599121361889, "grad_norm": 0.35755568742752075, "learning_rate": 1.6758537497575515e-05, "loss": 0.5012, "step": 19288 }, { "epoch": 0.5296265788028556, "grad_norm": 0.35312119126319885, "learning_rate": 1.6758219170386366e-05, "loss": 0.4488, "step": 19289 }, { "epoch": 0.5296540362438221, "grad_norm": 0.7040501236915588, "learning_rate": 1.675790083059098e-05, "loss": 0.4751, "step": 19290 }, { "epoch": 0.5296814936847886, "grad_norm": 0.3672594726085663, "learning_rate": 1.6757582478189954e-05, "loss": 0.5421, "step": 19291 }, { "epoch": 0.5297089511257551, "grad_norm": 0.47420787811279297, "learning_rate": 1.675726411318388e-05, "loss": 0.588, "step": 19292 }, { "epoch": 0.5297364085667216, "grad_norm": 0.34615710377693176, "learning_rate": 1.675694573557335e-05, "loss": 0.4502, "step": 19293 }, { "epoch": 0.5297638660076881, "grad_norm": 0.3715755343437195, "learning_rate": 1.6756627345358963e-05, "loss": 0.5576, "step": 19294 }, { "epoch": 0.5297913234486545, "grad_norm": 0.37166696786880493, "learning_rate": 1.6756308942541305e-05, "loss": 0.6345, "step": 19295 }, { "epoch": 0.5298187808896211, "grad_norm": 0.38494718074798584, "learning_rate": 1.675599052712098e-05, "loss": 0.5423, "step": 19296 }, { "epoch": 0.5298462383305876, "grad_norm": 0.37315356731414795, "learning_rate": 1.6755672099098572e-05, "loss": 0.5271, "step": 19297 }, { "epoch": 0.5298736957715541, "grad_norm": 0.3780669867992401, "learning_rate": 1.675535365847468e-05, "loss": 0.5476, "step": 19298 }, { "epoch": 0.5299011532125206, "grad_norm": 0.3897204101085663, "learning_rate": 1.67550352052499e-05, "loss": 0.5864, "step": 19299 }, { "epoch": 0.5299286106534871, "grad_norm": 0.427480548620224, "learning_rate": 1.675471673942482e-05, "loss": 0.5317, "step": 19300 }, { "epoch": 0.5299560680944536, "grad_norm": 0.34892576932907104, "learning_rate": 1.6754398261000038e-05, "loss": 0.406, "step": 19301 }, { "epoch": 0.52998352553542, "grad_norm": 0.3658982515335083, "learning_rate": 1.675407976997615e-05, "loss": 0.4957, "step": 19302 }, { "epoch": 0.5300109829763866, "grad_norm": 0.38249024748802185, "learning_rate": 1.6753761266353744e-05, "loss": 0.5187, "step": 19303 }, { "epoch": 0.5300384404173532, "grad_norm": 0.3669368326663971, "learning_rate": 1.675344275013342e-05, "loss": 0.463, "step": 19304 }, { "epoch": 0.5300658978583196, "grad_norm": 0.3971758186817169, "learning_rate": 1.675312422131577e-05, "loss": 0.4702, "step": 19305 }, { "epoch": 0.5300933552992861, "grad_norm": 0.37217795848846436, "learning_rate": 1.6752805679901384e-05, "loss": 0.505, "step": 19306 }, { "epoch": 0.5301208127402526, "grad_norm": 0.34922534227371216, "learning_rate": 1.6752487125890865e-05, "loss": 0.4916, "step": 19307 }, { "epoch": 0.5301482701812191, "grad_norm": 0.35785776376724243, "learning_rate": 1.67521685592848e-05, "loss": 0.498, "step": 19308 }, { "epoch": 0.5301757276221856, "grad_norm": 0.4068968594074249, "learning_rate": 1.6751849980083787e-05, "loss": 0.5453, "step": 19309 }, { "epoch": 0.5302031850631521, "grad_norm": 0.41245296597480774, "learning_rate": 1.675153138828842e-05, "loss": 0.5195, "step": 19310 }, { "epoch": 0.5302306425041187, "grad_norm": 0.3719991147518158, "learning_rate": 1.6751212783899288e-05, "loss": 0.5692, "step": 19311 }, { "epoch": 0.5302580999450851, "grad_norm": 0.40111422538757324, "learning_rate": 1.6750894166916993e-05, "loss": 0.5016, "step": 19312 }, { "epoch": 0.5302855573860517, "grad_norm": 0.33629217743873596, "learning_rate": 1.6750575537342123e-05, "loss": 0.5545, "step": 19313 }, { "epoch": 0.5303130148270181, "grad_norm": 0.35935845971107483, "learning_rate": 1.675025689517528e-05, "loss": 0.574, "step": 19314 }, { "epoch": 0.5303404722679846, "grad_norm": 0.36656132340431213, "learning_rate": 1.674993824041705e-05, "loss": 0.4275, "step": 19315 }, { "epoch": 0.5303679297089511, "grad_norm": 0.44042009115219116, "learning_rate": 1.674961957306803e-05, "loss": 0.4919, "step": 19316 }, { "epoch": 0.5303953871499176, "grad_norm": 0.3526274561882019, "learning_rate": 1.6749300893128815e-05, "loss": 0.4881, "step": 19317 }, { "epoch": 0.5304228445908842, "grad_norm": 0.3255390226840973, "learning_rate": 1.67489822006e-05, "loss": 0.5277, "step": 19318 }, { "epoch": 0.5304503020318506, "grad_norm": 0.4094306230545044, "learning_rate": 1.674866349548218e-05, "loss": 0.494, "step": 19319 }, { "epoch": 0.5304777594728172, "grad_norm": 0.41542762517929077, "learning_rate": 1.6748344777775953e-05, "loss": 0.5518, "step": 19320 }, { "epoch": 0.5305052169137836, "grad_norm": 0.386592835187912, "learning_rate": 1.67480260474819e-05, "loss": 0.5107, "step": 19321 }, { "epoch": 0.5305326743547502, "grad_norm": 0.3939725160598755, "learning_rate": 1.6747707304600632e-05, "loss": 0.5195, "step": 19322 }, { "epoch": 0.5305601317957166, "grad_norm": 0.44558098912239075, "learning_rate": 1.6747388549132732e-05, "loss": 0.5514, "step": 19323 }, { "epoch": 0.5305875892366831, "grad_norm": 0.35958537459373474, "learning_rate": 1.67470697810788e-05, "loss": 0.4606, "step": 19324 }, { "epoch": 0.5306150466776497, "grad_norm": 0.3878563642501831, "learning_rate": 1.674675100043943e-05, "loss": 0.4868, "step": 19325 }, { "epoch": 0.5306425041186161, "grad_norm": 0.38232284784317017, "learning_rate": 1.6746432207215217e-05, "loss": 0.5182, "step": 19326 }, { "epoch": 0.5306699615595827, "grad_norm": 0.45993366837501526, "learning_rate": 1.6746113401406753e-05, "loss": 0.4371, "step": 19327 }, { "epoch": 0.5306974190005491, "grad_norm": 0.3306426703929901, "learning_rate": 1.6745794583014636e-05, "loss": 0.4711, "step": 19328 }, { "epoch": 0.5307248764415157, "grad_norm": 0.39526742696762085, "learning_rate": 1.674547575203946e-05, "loss": 0.4176, "step": 19329 }, { "epoch": 0.5307523338824821, "grad_norm": 0.4159373939037323, "learning_rate": 1.674515690848182e-05, "loss": 0.579, "step": 19330 }, { "epoch": 0.5307797913234487, "grad_norm": 0.351931095123291, "learning_rate": 1.6744838052342306e-05, "loss": 0.5115, "step": 19331 }, { "epoch": 0.5308072487644152, "grad_norm": 0.3727240264415741, "learning_rate": 1.6744519183621514e-05, "loss": 0.5881, "step": 19332 }, { "epoch": 0.5308347062053816, "grad_norm": 0.35540154576301575, "learning_rate": 1.6744200302320046e-05, "loss": 0.5236, "step": 19333 }, { "epoch": 0.5308621636463482, "grad_norm": 0.3660944700241089, "learning_rate": 1.674388140843849e-05, "loss": 0.5086, "step": 19334 }, { "epoch": 0.5308896210873146, "grad_norm": 0.5373280644416809, "learning_rate": 1.6743562501977443e-05, "loss": 0.5284, "step": 19335 }, { "epoch": 0.5309170785282812, "grad_norm": 0.36367037892341614, "learning_rate": 1.6743243582937497e-05, "loss": 0.4809, "step": 19336 }, { "epoch": 0.5309445359692476, "grad_norm": 0.36581721901893616, "learning_rate": 1.6742924651319253e-05, "loss": 0.5139, "step": 19337 }, { "epoch": 0.5309719934102142, "grad_norm": 0.43284475803375244, "learning_rate": 1.6742605707123303e-05, "loss": 0.5425, "step": 19338 }, { "epoch": 0.5309994508511807, "grad_norm": 0.3975452780723572, "learning_rate": 1.6742286750350237e-05, "loss": 0.5604, "step": 19339 }, { "epoch": 0.5310269082921472, "grad_norm": 0.4012303054332733, "learning_rate": 1.6741967781000657e-05, "loss": 0.5165, "step": 19340 }, { "epoch": 0.5310543657331137, "grad_norm": 0.46749037504196167, "learning_rate": 1.674164879907516e-05, "loss": 0.4705, "step": 19341 }, { "epoch": 0.5310818231740801, "grad_norm": 0.42247387766838074, "learning_rate": 1.6741329804574328e-05, "loss": 0.5521, "step": 19342 }, { "epoch": 0.5311092806150467, "grad_norm": 0.4112529158592224, "learning_rate": 1.674101079749877e-05, "loss": 0.5556, "step": 19343 }, { "epoch": 0.5311367380560131, "grad_norm": 0.42487868666648865, "learning_rate": 1.674069177784907e-05, "loss": 0.4801, "step": 19344 }, { "epoch": 0.5311641954969797, "grad_norm": 0.3428756594657898, "learning_rate": 1.6740372745625837e-05, "loss": 0.4262, "step": 19345 }, { "epoch": 0.5311916529379462, "grad_norm": 0.3145662546157837, "learning_rate": 1.674005370082965e-05, "loss": 0.37, "step": 19346 }, { "epoch": 0.5312191103789127, "grad_norm": 0.402011901140213, "learning_rate": 1.6739734643461116e-05, "loss": 0.5044, "step": 19347 }, { "epoch": 0.5312465678198792, "grad_norm": 0.3490923345088959, "learning_rate": 1.6739415573520823e-05, "loss": 0.5299, "step": 19348 }, { "epoch": 0.5312740252608457, "grad_norm": 0.38590601086616516, "learning_rate": 1.673909649100937e-05, "loss": 0.5258, "step": 19349 }, { "epoch": 0.5313014827018122, "grad_norm": 0.42892301082611084, "learning_rate": 1.6738777395927354e-05, "loss": 0.5804, "step": 19350 }, { "epoch": 0.5313289401427786, "grad_norm": 0.3846324682235718, "learning_rate": 1.6738458288275362e-05, "loss": 0.562, "step": 19351 }, { "epoch": 0.5313563975837452, "grad_norm": 0.3779344856739044, "learning_rate": 1.6738139168053998e-05, "loss": 0.5586, "step": 19352 }, { "epoch": 0.5313838550247117, "grad_norm": 0.4812759757041931, "learning_rate": 1.673782003526385e-05, "loss": 0.6317, "step": 19353 }, { "epoch": 0.5314113124656782, "grad_norm": 0.5619891285896301, "learning_rate": 1.6737500889905524e-05, "loss": 0.4823, "step": 19354 }, { "epoch": 0.5314387699066447, "grad_norm": 0.4811290204524994, "learning_rate": 1.6737181731979602e-05, "loss": 0.57, "step": 19355 }, { "epoch": 0.5314662273476112, "grad_norm": 0.4494136571884155, "learning_rate": 1.673686256148669e-05, "loss": 0.4852, "step": 19356 }, { "epoch": 0.5314936847885777, "grad_norm": 0.40900710225105286, "learning_rate": 1.6736543378427377e-05, "loss": 0.497, "step": 19357 }, { "epoch": 0.5315211422295442, "grad_norm": 0.3726123869419098, "learning_rate": 1.673622418280226e-05, "loss": 0.503, "step": 19358 }, { "epoch": 0.5315485996705107, "grad_norm": 0.36603936553001404, "learning_rate": 1.673590497461194e-05, "loss": 0.4778, "step": 19359 }, { "epoch": 0.5315760571114773, "grad_norm": 0.37542518973350525, "learning_rate": 1.6735585753857e-05, "loss": 0.457, "step": 19360 }, { "epoch": 0.5316035145524437, "grad_norm": 0.3842017948627472, "learning_rate": 1.6735266520538046e-05, "loss": 0.5336, "step": 19361 }, { "epoch": 0.5316309719934103, "grad_norm": 0.30286693572998047, "learning_rate": 1.6734947274655674e-05, "loss": 0.4092, "step": 19362 }, { "epoch": 0.5316584294343767, "grad_norm": 0.38227716088294983, "learning_rate": 1.6734628016210473e-05, "loss": 0.4975, "step": 19363 }, { "epoch": 0.5316858868753432, "grad_norm": 0.34778478741645813, "learning_rate": 1.673430874520304e-05, "loss": 0.4239, "step": 19364 }, { "epoch": 0.5317133443163097, "grad_norm": 0.358067125082016, "learning_rate": 1.6733989461633972e-05, "loss": 0.5301, "step": 19365 }, { "epoch": 0.5317408017572762, "grad_norm": 0.36643579602241516, "learning_rate": 1.6733670165503865e-05, "loss": 0.4524, "step": 19366 }, { "epoch": 0.5317682591982428, "grad_norm": 0.3559860587120056, "learning_rate": 1.6733350856813313e-05, "loss": 0.5266, "step": 19367 }, { "epoch": 0.5317957166392092, "grad_norm": 0.41223931312561035, "learning_rate": 1.6733031535562916e-05, "loss": 0.5905, "step": 19368 }, { "epoch": 0.5318231740801758, "grad_norm": 0.3269866406917572, "learning_rate": 1.6732712201753263e-05, "loss": 0.4526, "step": 19369 }, { "epoch": 0.5318506315211422, "grad_norm": 0.37068304419517517, "learning_rate": 1.6732392855384954e-05, "loss": 0.5972, "step": 19370 }, { "epoch": 0.5318780889621088, "grad_norm": 0.38224315643310547, "learning_rate": 1.6732073496458582e-05, "loss": 0.5475, "step": 19371 }, { "epoch": 0.5319055464030752, "grad_norm": 0.37334686517715454, "learning_rate": 1.6731754124974747e-05, "loss": 0.5157, "step": 19372 }, { "epoch": 0.5319330038440417, "grad_norm": 0.36014029383659363, "learning_rate": 1.673143474093404e-05, "loss": 0.4956, "step": 19373 }, { "epoch": 0.5319604612850083, "grad_norm": 0.39152592420578003, "learning_rate": 1.673111534433706e-05, "loss": 0.5721, "step": 19374 }, { "epoch": 0.5319879187259747, "grad_norm": 0.4061623811721802, "learning_rate": 1.67307959351844e-05, "loss": 0.5032, "step": 19375 }, { "epoch": 0.5320153761669413, "grad_norm": 0.3938007056713104, "learning_rate": 1.673047651347666e-05, "loss": 0.5489, "step": 19376 }, { "epoch": 0.5320428336079077, "grad_norm": 0.3850138187408447, "learning_rate": 1.6730157079214433e-05, "loss": 0.4767, "step": 19377 }, { "epoch": 0.5320702910488743, "grad_norm": 0.4848293364048004, "learning_rate": 1.6729837632398313e-05, "loss": 0.5507, "step": 19378 }, { "epoch": 0.5320977484898407, "grad_norm": 0.4155327379703522, "learning_rate": 1.6729518173028898e-05, "loss": 0.5454, "step": 19379 }, { "epoch": 0.5321252059308073, "grad_norm": 0.3578982651233673, "learning_rate": 1.6729198701106782e-05, "loss": 0.4317, "step": 19380 }, { "epoch": 0.5321526633717738, "grad_norm": 0.387445867061615, "learning_rate": 1.6728879216632567e-05, "loss": 0.535, "step": 19381 }, { "epoch": 0.5321801208127402, "grad_norm": 0.3569889962673187, "learning_rate": 1.6728559719606844e-05, "loss": 0.4938, "step": 19382 }, { "epoch": 0.5322075782537068, "grad_norm": 0.3981384038925171, "learning_rate": 1.6728240210030208e-05, "loss": 0.5419, "step": 19383 }, { "epoch": 0.5322350356946732, "grad_norm": 0.4326855540275574, "learning_rate": 1.6727920687903257e-05, "loss": 0.5005, "step": 19384 }, { "epoch": 0.5322624931356398, "grad_norm": 0.3628528416156769, "learning_rate": 1.6727601153226585e-05, "loss": 0.5246, "step": 19385 }, { "epoch": 0.5322899505766062, "grad_norm": 0.3546999394893646, "learning_rate": 1.6727281606000794e-05, "loss": 0.4722, "step": 19386 }, { "epoch": 0.5323174080175728, "grad_norm": 0.3851064145565033, "learning_rate": 1.672696204622647e-05, "loss": 0.5097, "step": 19387 }, { "epoch": 0.5323448654585393, "grad_norm": 0.4190930128097534, "learning_rate": 1.672664247390422e-05, "loss": 0.4946, "step": 19388 }, { "epoch": 0.5323723228995058, "grad_norm": 0.42522090673446655, "learning_rate": 1.672632288903463e-05, "loss": 0.5349, "step": 19389 }, { "epoch": 0.5323997803404723, "grad_norm": 0.36830562353134155, "learning_rate": 1.6726003291618306e-05, "loss": 0.4823, "step": 19390 }, { "epoch": 0.5324272377814387, "grad_norm": 0.40661484003067017, "learning_rate": 1.6725683681655834e-05, "loss": 0.5418, "step": 19391 }, { "epoch": 0.5324546952224053, "grad_norm": 0.40643957257270813, "learning_rate": 1.672536405914782e-05, "loss": 0.6065, "step": 19392 }, { "epoch": 0.5324821526633717, "grad_norm": 0.3483487069606781, "learning_rate": 1.6725044424094852e-05, "loss": 0.5358, "step": 19393 }, { "epoch": 0.5325096101043383, "grad_norm": 0.35646894574165344, "learning_rate": 1.672472477649753e-05, "loss": 0.4959, "step": 19394 }, { "epoch": 0.5325370675453048, "grad_norm": 0.3634638786315918, "learning_rate": 1.6724405116356454e-05, "loss": 0.4514, "step": 19395 }, { "epoch": 0.5325645249862713, "grad_norm": 0.389377236366272, "learning_rate": 1.6724085443672216e-05, "loss": 0.5676, "step": 19396 }, { "epoch": 0.5325919824272378, "grad_norm": 0.36246708035469055, "learning_rate": 1.672376575844541e-05, "loss": 0.4794, "step": 19397 }, { "epoch": 0.5326194398682043, "grad_norm": 0.3850906789302826, "learning_rate": 1.6723446060676635e-05, "loss": 0.4898, "step": 19398 }, { "epoch": 0.5326468973091708, "grad_norm": 0.3455125093460083, "learning_rate": 1.6723126350366485e-05, "loss": 0.4897, "step": 19399 }, { "epoch": 0.5326743547501372, "grad_norm": 0.3611743450164795, "learning_rate": 1.672280662751556e-05, "loss": 0.4593, "step": 19400 }, { "epoch": 0.5327018121911038, "grad_norm": 0.38606566190719604, "learning_rate": 1.6722486892124458e-05, "loss": 0.5685, "step": 19401 }, { "epoch": 0.5327292696320703, "grad_norm": 0.39980944991111755, "learning_rate": 1.6722167144193773e-05, "loss": 0.5342, "step": 19402 }, { "epoch": 0.5327567270730368, "grad_norm": 0.4139919579029083, "learning_rate": 1.6721847383724097e-05, "loss": 0.4493, "step": 19403 }, { "epoch": 0.5327841845140033, "grad_norm": 0.5198220014572144, "learning_rate": 1.672152761071603e-05, "loss": 0.4518, "step": 19404 }, { "epoch": 0.5328116419549698, "grad_norm": 0.3754253089427948, "learning_rate": 1.672120782517017e-05, "loss": 0.5559, "step": 19405 }, { "epoch": 0.5328390993959363, "grad_norm": 0.42607948184013367, "learning_rate": 1.6720888027087112e-05, "loss": 0.5164, "step": 19406 }, { "epoch": 0.5328665568369028, "grad_norm": 0.4902487099170685, "learning_rate": 1.6720568216467455e-05, "loss": 0.5572, "step": 19407 }, { "epoch": 0.5328940142778693, "grad_norm": 0.36007651686668396, "learning_rate": 1.6720248393311795e-05, "loss": 0.481, "step": 19408 }, { "epoch": 0.5329214717188359, "grad_norm": 0.4118850529193878, "learning_rate": 1.671992855762072e-05, "loss": 0.5233, "step": 19409 }, { "epoch": 0.5329489291598023, "grad_norm": 0.5909167528152466, "learning_rate": 1.6719608709394837e-05, "loss": 0.5569, "step": 19410 }, { "epoch": 0.5329763866007688, "grad_norm": 0.42809680104255676, "learning_rate": 1.6719288848634736e-05, "loss": 0.5395, "step": 19411 }, { "epoch": 0.5330038440417353, "grad_norm": 0.36552363634109497, "learning_rate": 1.671896897534102e-05, "loss": 0.4904, "step": 19412 }, { "epoch": 0.5330313014827018, "grad_norm": 0.4246695935726166, "learning_rate": 1.6718649089514286e-05, "loss": 0.4116, "step": 19413 }, { "epoch": 0.5330587589236683, "grad_norm": 0.353443443775177, "learning_rate": 1.671832919115512e-05, "loss": 0.5022, "step": 19414 }, { "epoch": 0.5330862163646348, "grad_norm": 0.4004979729652405, "learning_rate": 1.671800928026413e-05, "loss": 0.6082, "step": 19415 }, { "epoch": 0.5331136738056013, "grad_norm": 0.38645175099372864, "learning_rate": 1.671768935684191e-05, "loss": 0.5039, "step": 19416 }, { "epoch": 0.5331411312465678, "grad_norm": 0.3336629867553711, "learning_rate": 1.671736942088905e-05, "loss": 0.5085, "step": 19417 }, { "epoch": 0.5331685886875344, "grad_norm": 0.3915872275829315, "learning_rate": 1.6717049472406155e-05, "loss": 0.4709, "step": 19418 }, { "epoch": 0.5331960461285008, "grad_norm": 0.3691037893295288, "learning_rate": 1.6716729511393822e-05, "loss": 0.5088, "step": 19419 }, { "epoch": 0.5332235035694673, "grad_norm": 0.35532331466674805, "learning_rate": 1.671640953785264e-05, "loss": 0.4534, "step": 19420 }, { "epoch": 0.5332509610104338, "grad_norm": 0.3765157163143158, "learning_rate": 1.6716089551783212e-05, "loss": 0.6152, "step": 19421 }, { "epoch": 0.5332784184514003, "grad_norm": 0.3504530191421509, "learning_rate": 1.671576955318613e-05, "loss": 0.4223, "step": 19422 }, { "epoch": 0.5333058758923668, "grad_norm": 0.34128376841545105, "learning_rate": 1.6715449542062004e-05, "loss": 0.4331, "step": 19423 }, { "epoch": 0.5333333333333333, "grad_norm": 0.3494567275047302, "learning_rate": 1.6715129518411412e-05, "loss": 0.5329, "step": 19424 }, { "epoch": 0.5333607907742999, "grad_norm": 0.35481521487236023, "learning_rate": 1.6714809482234965e-05, "loss": 0.5924, "step": 19425 }, { "epoch": 0.5333882482152663, "grad_norm": 0.3485960066318512, "learning_rate": 1.6714489433533252e-05, "loss": 0.5248, "step": 19426 }, { "epoch": 0.5334157056562329, "grad_norm": 0.38447362184524536, "learning_rate": 1.6714169372306876e-05, "loss": 0.4684, "step": 19427 }, { "epoch": 0.5334431630971993, "grad_norm": 0.3922148644924164, "learning_rate": 1.671384929855643e-05, "loss": 0.514, "step": 19428 }, { "epoch": 0.5334706205381659, "grad_norm": 0.37937527894973755, "learning_rate": 1.6713529212282514e-05, "loss": 0.521, "step": 19429 }, { "epoch": 0.5334980779791323, "grad_norm": 0.37518739700317383, "learning_rate": 1.671320911348572e-05, "loss": 0.534, "step": 19430 }, { "epoch": 0.5335255354200988, "grad_norm": 0.40079787373542786, "learning_rate": 1.671288900216665e-05, "loss": 0.5135, "step": 19431 }, { "epoch": 0.5335529928610654, "grad_norm": 1.6351467370986938, "learning_rate": 1.6712568878325897e-05, "loss": 0.5528, "step": 19432 }, { "epoch": 0.5335804503020318, "grad_norm": 0.4146071970462799, "learning_rate": 1.6712248741964067e-05, "loss": 0.5319, "step": 19433 }, { "epoch": 0.5336079077429984, "grad_norm": 0.36332717537879944, "learning_rate": 1.6711928593081744e-05, "loss": 0.4655, "step": 19434 }, { "epoch": 0.5336353651839648, "grad_norm": 0.3975016474723816, "learning_rate": 1.6711608431679536e-05, "loss": 0.4954, "step": 19435 }, { "epoch": 0.5336628226249314, "grad_norm": 0.37246260046958923, "learning_rate": 1.671128825775804e-05, "loss": 0.5373, "step": 19436 }, { "epoch": 0.5336902800658978, "grad_norm": 0.5320195555686951, "learning_rate": 1.6710968071317842e-05, "loss": 0.5725, "step": 19437 }, { "epoch": 0.5337177375068644, "grad_norm": 0.3479880690574646, "learning_rate": 1.6710647872359548e-05, "loss": 0.5159, "step": 19438 }, { "epoch": 0.5337451949478309, "grad_norm": 0.37756481766700745, "learning_rate": 1.6710327660883758e-05, "loss": 0.5319, "step": 19439 }, { "epoch": 0.5337726523887973, "grad_norm": 0.3727855682373047, "learning_rate": 1.671000743689106e-05, "loss": 0.4487, "step": 19440 }, { "epoch": 0.5338001098297639, "grad_norm": 0.33395010232925415, "learning_rate": 1.6709687200382057e-05, "loss": 0.4525, "step": 19441 }, { "epoch": 0.5338275672707303, "grad_norm": 0.37841561436653137, "learning_rate": 1.670936695135735e-05, "loss": 0.4651, "step": 19442 }, { "epoch": 0.5338550247116969, "grad_norm": 0.4073631465435028, "learning_rate": 1.6709046689817528e-05, "loss": 0.4926, "step": 19443 }, { "epoch": 0.5338824821526633, "grad_norm": 0.36022499203681946, "learning_rate": 1.6708726415763197e-05, "loss": 0.5275, "step": 19444 }, { "epoch": 0.5339099395936299, "grad_norm": 0.4838637411594391, "learning_rate": 1.6708406129194948e-05, "loss": 0.5518, "step": 19445 }, { "epoch": 0.5339373970345964, "grad_norm": 0.4033936858177185, "learning_rate": 1.6708085830113382e-05, "loss": 0.5515, "step": 19446 }, { "epoch": 0.5339648544755629, "grad_norm": 0.44100967049598694, "learning_rate": 1.6707765518519093e-05, "loss": 0.5942, "step": 19447 }, { "epoch": 0.5339923119165294, "grad_norm": 0.46947649121284485, "learning_rate": 1.6707445194412678e-05, "loss": 0.5428, "step": 19448 }, { "epoch": 0.5340197693574958, "grad_norm": 0.35154369473457336, "learning_rate": 1.6707124857794742e-05, "loss": 0.45, "step": 19449 }, { "epoch": 0.5340472267984624, "grad_norm": 0.38932543992996216, "learning_rate": 1.6706804508665872e-05, "loss": 0.5022, "step": 19450 }, { "epoch": 0.5340746842394288, "grad_norm": 0.4461614191532135, "learning_rate": 1.6706484147026678e-05, "loss": 0.4794, "step": 19451 }, { "epoch": 0.5341021416803954, "grad_norm": 0.3579641878604889, "learning_rate": 1.6706163772877745e-05, "loss": 0.4962, "step": 19452 }, { "epoch": 0.5341295991213619, "grad_norm": 0.4525510370731354, "learning_rate": 1.6705843386219678e-05, "loss": 0.5302, "step": 19453 }, { "epoch": 0.5341570565623284, "grad_norm": 0.3582257330417633, "learning_rate": 1.6705522987053072e-05, "loss": 0.4275, "step": 19454 }, { "epoch": 0.5341845140032949, "grad_norm": 0.598111629486084, "learning_rate": 1.6705202575378527e-05, "loss": 0.6786, "step": 19455 }, { "epoch": 0.5342119714442614, "grad_norm": 0.34954091906547546, "learning_rate": 1.670488215119664e-05, "loss": 0.5039, "step": 19456 }, { "epoch": 0.5342394288852279, "grad_norm": 0.38412410020828247, "learning_rate": 1.6704561714508005e-05, "loss": 0.5333, "step": 19457 }, { "epoch": 0.5342668863261943, "grad_norm": 0.35423600673675537, "learning_rate": 1.670424126531322e-05, "loss": 0.4699, "step": 19458 }, { "epoch": 0.5342943437671609, "grad_norm": 0.3286384046077728, "learning_rate": 1.6703920803612892e-05, "loss": 0.4765, "step": 19459 }, { "epoch": 0.5343218012081274, "grad_norm": 0.39541956782341003, "learning_rate": 1.6703600329407607e-05, "loss": 0.5133, "step": 19460 }, { "epoch": 0.5343492586490939, "grad_norm": 0.41839295625686646, "learning_rate": 1.6703279842697974e-05, "loss": 0.5174, "step": 19461 }, { "epoch": 0.5343767160900604, "grad_norm": 0.33179572224617004, "learning_rate": 1.670295934348458e-05, "loss": 0.3967, "step": 19462 }, { "epoch": 0.5344041735310269, "grad_norm": 0.45312127470970154, "learning_rate": 1.6702638831768027e-05, "loss": 0.5373, "step": 19463 }, { "epoch": 0.5344316309719934, "grad_norm": 0.32612162828445435, "learning_rate": 1.6702318307548915e-05, "loss": 0.4661, "step": 19464 }, { "epoch": 0.5344590884129599, "grad_norm": 0.3838077187538147, "learning_rate": 1.6701997770827838e-05, "loss": 0.5123, "step": 19465 }, { "epoch": 0.5344865458539264, "grad_norm": 0.41694149374961853, "learning_rate": 1.6701677221605398e-05, "loss": 0.5021, "step": 19466 }, { "epoch": 0.534514003294893, "grad_norm": 0.3411122262477875, "learning_rate": 1.6701356659882192e-05, "loss": 0.4493, "step": 19467 }, { "epoch": 0.5345414607358594, "grad_norm": 0.373210072517395, "learning_rate": 1.6701036085658816e-05, "loss": 0.4359, "step": 19468 }, { "epoch": 0.534568918176826, "grad_norm": 0.3401247560977936, "learning_rate": 1.670071549893587e-05, "loss": 0.5228, "step": 19469 }, { "epoch": 0.5345963756177924, "grad_norm": 0.36342838406562805, "learning_rate": 1.670039489971395e-05, "loss": 0.478, "step": 19470 }, { "epoch": 0.5346238330587589, "grad_norm": 0.32962462306022644, "learning_rate": 1.6700074287993654e-05, "loss": 0.4443, "step": 19471 }, { "epoch": 0.5346512904997254, "grad_norm": 0.4055817127227783, "learning_rate": 1.669975366377558e-05, "loss": 0.5739, "step": 19472 }, { "epoch": 0.5346787479406919, "grad_norm": 0.3414079546928406, "learning_rate": 1.669943302706033e-05, "loss": 0.4437, "step": 19473 }, { "epoch": 0.5347062053816585, "grad_norm": 0.397477388381958, "learning_rate": 1.6699112377848496e-05, "loss": 0.6548, "step": 19474 }, { "epoch": 0.5347336628226249, "grad_norm": 0.3588618040084839, "learning_rate": 1.6698791716140684e-05, "loss": 0.4869, "step": 19475 }, { "epoch": 0.5347611202635915, "grad_norm": 0.39268118143081665, "learning_rate": 1.6698471041937482e-05, "loss": 0.485, "step": 19476 }, { "epoch": 0.5347885777045579, "grad_norm": 0.33474844694137573, "learning_rate": 1.66981503552395e-05, "loss": 0.462, "step": 19477 }, { "epoch": 0.5348160351455244, "grad_norm": 0.3634570240974426, "learning_rate": 1.6697829656047323e-05, "loss": 0.5039, "step": 19478 }, { "epoch": 0.5348434925864909, "grad_norm": 0.369048148393631, "learning_rate": 1.669750894436156e-05, "loss": 0.5159, "step": 19479 }, { "epoch": 0.5348709500274574, "grad_norm": 0.3656540811061859, "learning_rate": 1.6697188220182807e-05, "loss": 0.5153, "step": 19480 }, { "epoch": 0.534898407468424, "grad_norm": 0.35698211193084717, "learning_rate": 1.6696867483511657e-05, "loss": 0.5303, "step": 19481 }, { "epoch": 0.5349258649093904, "grad_norm": 0.37191689014434814, "learning_rate": 1.6696546734348713e-05, "loss": 0.4961, "step": 19482 }, { "epoch": 0.534953322350357, "grad_norm": 0.3809208273887634, "learning_rate": 1.6696225972694574e-05, "loss": 0.5134, "step": 19483 }, { "epoch": 0.5349807797913234, "grad_norm": 0.36249545216560364, "learning_rate": 1.669590519854983e-05, "loss": 0.5125, "step": 19484 }, { "epoch": 0.53500823723229, "grad_norm": 0.4077986776828766, "learning_rate": 1.669558441191509e-05, "loss": 0.493, "step": 19485 }, { "epoch": 0.5350356946732564, "grad_norm": 0.34413641691207886, "learning_rate": 1.669526361279095e-05, "loss": 0.4461, "step": 19486 }, { "epoch": 0.535063152114223, "grad_norm": 0.3963479995727539, "learning_rate": 1.6694942801178005e-05, "loss": 0.5726, "step": 19487 }, { "epoch": 0.5350906095551895, "grad_norm": 0.41516783833503723, "learning_rate": 1.6694621977076854e-05, "loss": 0.6221, "step": 19488 }, { "epoch": 0.5351180669961559, "grad_norm": 0.37626516819000244, "learning_rate": 1.6694301140488095e-05, "loss": 0.508, "step": 19489 }, { "epoch": 0.5351455244371225, "grad_norm": 0.5348260402679443, "learning_rate": 1.6693980291412333e-05, "loss": 0.4481, "step": 19490 }, { "epoch": 0.5351729818780889, "grad_norm": 0.4007846713066101, "learning_rate": 1.6693659429850156e-05, "loss": 0.5339, "step": 19491 }, { "epoch": 0.5352004393190555, "grad_norm": 0.3535490930080414, "learning_rate": 1.6693338555802168e-05, "loss": 0.4733, "step": 19492 }, { "epoch": 0.5352278967600219, "grad_norm": 0.35954803228378296, "learning_rate": 1.6693017669268972e-05, "loss": 0.4199, "step": 19493 }, { "epoch": 0.5352553542009885, "grad_norm": 0.4301975965499878, "learning_rate": 1.6692696770251155e-05, "loss": 0.537, "step": 19494 }, { "epoch": 0.535282811641955, "grad_norm": 0.3862122893333435, "learning_rate": 1.669237585874933e-05, "loss": 0.4806, "step": 19495 }, { "epoch": 0.5353102690829215, "grad_norm": 0.3595523238182068, "learning_rate": 1.6692054934764083e-05, "loss": 0.5402, "step": 19496 }, { "epoch": 0.535337726523888, "grad_norm": 0.36400356888771057, "learning_rate": 1.6691733998296018e-05, "loss": 0.4536, "step": 19497 }, { "epoch": 0.5353651839648544, "grad_norm": 0.4129052758216858, "learning_rate": 1.6691413049345734e-05, "loss": 0.4403, "step": 19498 }, { "epoch": 0.535392641405821, "grad_norm": 0.3922627568244934, "learning_rate": 1.669109208791383e-05, "loss": 0.5433, "step": 19499 }, { "epoch": 0.5354200988467874, "grad_norm": 0.41947463154792786, "learning_rate": 1.6690771114000904e-05, "loss": 0.4916, "step": 19500 }, { "epoch": 0.535447556287754, "grad_norm": 0.39780300855636597, "learning_rate": 1.6690450127607555e-05, "loss": 0.588, "step": 19501 }, { "epoch": 0.5354750137287205, "grad_norm": 0.3834473490715027, "learning_rate": 1.669012912873438e-05, "loss": 0.5678, "step": 19502 }, { "epoch": 0.535502471169687, "grad_norm": 0.36770862340927124, "learning_rate": 1.6689808117381978e-05, "loss": 0.4926, "step": 19503 }, { "epoch": 0.5355299286106535, "grad_norm": 0.35598456859588623, "learning_rate": 1.6689487093550948e-05, "loss": 0.4897, "step": 19504 }, { "epoch": 0.53555738605162, "grad_norm": 0.3710547089576721, "learning_rate": 1.668916605724189e-05, "loss": 0.5387, "step": 19505 }, { "epoch": 0.5355848434925865, "grad_norm": 0.33104440569877625, "learning_rate": 1.6688845008455406e-05, "loss": 0.4598, "step": 19506 }, { "epoch": 0.5356123009335529, "grad_norm": 0.3871075212955475, "learning_rate": 1.6688523947192087e-05, "loss": 0.5331, "step": 19507 }, { "epoch": 0.5356397583745195, "grad_norm": 0.3626008927822113, "learning_rate": 1.6688202873452538e-05, "loss": 0.4945, "step": 19508 }, { "epoch": 0.535667215815486, "grad_norm": 0.40361687541007996, "learning_rate": 1.6687881787237352e-05, "loss": 0.6209, "step": 19509 }, { "epoch": 0.5356946732564525, "grad_norm": 0.3698068857192993, "learning_rate": 1.6687560688547137e-05, "loss": 0.4905, "step": 19510 }, { "epoch": 0.535722130697419, "grad_norm": 0.39064499735832214, "learning_rate": 1.6687239577382487e-05, "loss": 0.5625, "step": 19511 }, { "epoch": 0.5357495881383855, "grad_norm": 0.3648398220539093, "learning_rate": 1.6686918453744e-05, "loss": 0.4794, "step": 19512 }, { "epoch": 0.535777045579352, "grad_norm": 0.3869546949863434, "learning_rate": 1.6686597317632275e-05, "loss": 0.4639, "step": 19513 }, { "epoch": 0.5358045030203185, "grad_norm": 0.3686214089393616, "learning_rate": 1.668627616904791e-05, "loss": 0.4809, "step": 19514 }, { "epoch": 0.535831960461285, "grad_norm": 0.47000452876091003, "learning_rate": 1.6685955007991508e-05, "loss": 0.6073, "step": 19515 }, { "epoch": 0.5358594179022516, "grad_norm": 0.4056825637817383, "learning_rate": 1.6685633834463665e-05, "loss": 0.5532, "step": 19516 }, { "epoch": 0.535886875343218, "grad_norm": 0.4026842713356018, "learning_rate": 1.6685312648464982e-05, "loss": 0.4995, "step": 19517 }, { "epoch": 0.5359143327841845, "grad_norm": 0.3878988027572632, "learning_rate": 1.668499144999606e-05, "loss": 0.4169, "step": 19518 }, { "epoch": 0.535941790225151, "grad_norm": 0.3538348972797394, "learning_rate": 1.6684670239057493e-05, "loss": 0.449, "step": 19519 }, { "epoch": 0.5359692476661175, "grad_norm": 1.2887301445007324, "learning_rate": 1.6684349015649882e-05, "loss": 0.5159, "step": 19520 }, { "epoch": 0.535996705107084, "grad_norm": 0.372111976146698, "learning_rate": 1.6684027779773827e-05, "loss": 0.4869, "step": 19521 }, { "epoch": 0.5360241625480505, "grad_norm": 0.3894801139831543, "learning_rate": 1.6683706531429925e-05, "loss": 0.4879, "step": 19522 }, { "epoch": 0.5360516199890171, "grad_norm": 0.38229602575302124, "learning_rate": 1.668338527061878e-05, "loss": 0.5506, "step": 19523 }, { "epoch": 0.5360790774299835, "grad_norm": 0.37080323696136475, "learning_rate": 1.668306399734099e-05, "loss": 0.5202, "step": 19524 }, { "epoch": 0.5361065348709501, "grad_norm": 0.4076196551322937, "learning_rate": 1.6682742711597146e-05, "loss": 0.4917, "step": 19525 }, { "epoch": 0.5361339923119165, "grad_norm": 0.501453697681427, "learning_rate": 1.6682421413387856e-05, "loss": 0.4904, "step": 19526 }, { "epoch": 0.536161449752883, "grad_norm": 0.39683592319488525, "learning_rate": 1.668210010271372e-05, "loss": 0.5252, "step": 19527 }, { "epoch": 0.5361889071938495, "grad_norm": 0.36515218019485474, "learning_rate": 1.6681778779575335e-05, "loss": 0.5096, "step": 19528 }, { "epoch": 0.536216364634816, "grad_norm": 0.36251187324523926, "learning_rate": 1.6681457443973297e-05, "loss": 0.5489, "step": 19529 }, { "epoch": 0.5362438220757826, "grad_norm": 0.34332624077796936, "learning_rate": 1.668113609590821e-05, "loss": 0.4719, "step": 19530 }, { "epoch": 0.536271279516749, "grad_norm": 0.35295575857162476, "learning_rate": 1.6680814735380672e-05, "loss": 0.4415, "step": 19531 }, { "epoch": 0.5362987369577156, "grad_norm": 0.3575795888900757, "learning_rate": 1.6680493362391284e-05, "loss": 0.5676, "step": 19532 }, { "epoch": 0.536326194398682, "grad_norm": 0.5017375946044922, "learning_rate": 1.668017197694064e-05, "loss": 0.4377, "step": 19533 }, { "epoch": 0.5363536518396486, "grad_norm": 0.37745586037635803, "learning_rate": 1.6679850579029347e-05, "loss": 0.4742, "step": 19534 }, { "epoch": 0.536381109280615, "grad_norm": 0.36235329508781433, "learning_rate": 1.6679529168657996e-05, "loss": 0.5173, "step": 19535 }, { "epoch": 0.5364085667215815, "grad_norm": 0.5407199263572693, "learning_rate": 1.6679207745827195e-05, "loss": 0.5166, "step": 19536 }, { "epoch": 0.5364360241625481, "grad_norm": 0.3663750886917114, "learning_rate": 1.6678886310537537e-05, "loss": 0.5077, "step": 19537 }, { "epoch": 0.5364634816035145, "grad_norm": 0.3933117985725403, "learning_rate": 1.6678564862789632e-05, "loss": 0.5631, "step": 19538 }, { "epoch": 0.5364909390444811, "grad_norm": 0.3892660439014435, "learning_rate": 1.6678243402584063e-05, "loss": 0.5211, "step": 19539 }, { "epoch": 0.5365183964854475, "grad_norm": 0.3741360604763031, "learning_rate": 1.6677921929921443e-05, "loss": 0.566, "step": 19540 }, { "epoch": 0.5365458539264141, "grad_norm": 0.358063668012619, "learning_rate": 1.6677600444802365e-05, "loss": 0.5631, "step": 19541 }, { "epoch": 0.5365733113673805, "grad_norm": 0.3932938277721405, "learning_rate": 1.6677278947227435e-05, "loss": 0.5006, "step": 19542 }, { "epoch": 0.5366007688083471, "grad_norm": 0.395263135433197, "learning_rate": 1.6676957437197244e-05, "loss": 0.5385, "step": 19543 }, { "epoch": 0.5366282262493136, "grad_norm": 0.3611341416835785, "learning_rate": 1.66766359147124e-05, "loss": 0.5269, "step": 19544 }, { "epoch": 0.53665568369028, "grad_norm": 0.46949702501296997, "learning_rate": 1.6676314379773497e-05, "loss": 0.5801, "step": 19545 }, { "epoch": 0.5366831411312466, "grad_norm": 0.38301512598991394, "learning_rate": 1.6675992832381133e-05, "loss": 0.5956, "step": 19546 }, { "epoch": 0.536710598572213, "grad_norm": 0.4478590488433838, "learning_rate": 1.6675671272535918e-05, "loss": 0.4952, "step": 19547 }, { "epoch": 0.5367380560131796, "grad_norm": 0.3638934791088104, "learning_rate": 1.6675349700238438e-05, "loss": 0.4479, "step": 19548 }, { "epoch": 0.536765513454146, "grad_norm": 0.38088804483413696, "learning_rate": 1.667502811548931e-05, "loss": 0.5368, "step": 19549 }, { "epoch": 0.5367929708951126, "grad_norm": 0.3511832654476166, "learning_rate": 1.6674706518289118e-05, "loss": 0.4467, "step": 19550 }, { "epoch": 0.5368204283360791, "grad_norm": 0.43620362877845764, "learning_rate": 1.6674384908638467e-05, "loss": 0.494, "step": 19551 }, { "epoch": 0.5368478857770456, "grad_norm": 0.4272417426109314, "learning_rate": 1.6674063286537964e-05, "loss": 0.5377, "step": 19552 }, { "epoch": 0.5368753432180121, "grad_norm": 0.4310001730918884, "learning_rate": 1.6673741651988197e-05, "loss": 0.553, "step": 19553 }, { "epoch": 0.5369028006589786, "grad_norm": 0.43435004353523254, "learning_rate": 1.6673420004989776e-05, "loss": 0.5623, "step": 19554 }, { "epoch": 0.5369302580999451, "grad_norm": 0.3597783148288727, "learning_rate": 1.6673098345543295e-05, "loss": 0.5213, "step": 19555 }, { "epoch": 0.5369577155409115, "grad_norm": 0.4145207703113556, "learning_rate": 1.6672776673649353e-05, "loss": 0.501, "step": 19556 }, { "epoch": 0.5369851729818781, "grad_norm": 0.40807145833969116, "learning_rate": 1.6672454989308557e-05, "loss": 0.5411, "step": 19557 }, { "epoch": 0.5370126304228446, "grad_norm": 0.3861338496208191, "learning_rate": 1.66721332925215e-05, "loss": 0.4544, "step": 19558 }, { "epoch": 0.5370400878638111, "grad_norm": 0.3859710991382599, "learning_rate": 1.6671811583288787e-05, "loss": 0.4696, "step": 19559 }, { "epoch": 0.5370675453047776, "grad_norm": 0.3972319960594177, "learning_rate": 1.6671489861611016e-05, "loss": 0.5156, "step": 19560 }, { "epoch": 0.5370950027457441, "grad_norm": 0.3845534324645996, "learning_rate": 1.6671168127488785e-05, "loss": 0.6021, "step": 19561 }, { "epoch": 0.5371224601867106, "grad_norm": 0.33772993087768555, "learning_rate": 1.66708463809227e-05, "loss": 0.4551, "step": 19562 }, { "epoch": 0.537149917627677, "grad_norm": 0.37826231122016907, "learning_rate": 1.6670524621913357e-05, "loss": 0.4793, "step": 19563 }, { "epoch": 0.5371773750686436, "grad_norm": 0.6146863698959351, "learning_rate": 1.667020285046135e-05, "loss": 0.4942, "step": 19564 }, { "epoch": 0.5372048325096102, "grad_norm": 0.37128037214279175, "learning_rate": 1.6669881066567292e-05, "loss": 0.4819, "step": 19565 }, { "epoch": 0.5372322899505766, "grad_norm": 0.3733628988265991, "learning_rate": 1.6669559270231776e-05, "loss": 0.4395, "step": 19566 }, { "epoch": 0.5372597473915431, "grad_norm": 0.5011045336723328, "learning_rate": 1.6669237461455402e-05, "loss": 0.5421, "step": 19567 }, { "epoch": 0.5372872048325096, "grad_norm": 0.35989755392074585, "learning_rate": 1.666891564023877e-05, "loss": 0.4673, "step": 19568 }, { "epoch": 0.5373146622734761, "grad_norm": 0.3490027189254761, "learning_rate": 1.6668593806582485e-05, "loss": 0.5623, "step": 19569 }, { "epoch": 0.5373421197144426, "grad_norm": 0.3739790618419647, "learning_rate": 1.6668271960487144e-05, "loss": 0.4394, "step": 19570 }, { "epoch": 0.5373695771554091, "grad_norm": 0.37620607018470764, "learning_rate": 1.6667950101953345e-05, "loss": 0.6586, "step": 19571 }, { "epoch": 0.5373970345963757, "grad_norm": 0.3925766050815582, "learning_rate": 1.6667628230981693e-05, "loss": 0.5538, "step": 19572 }, { "epoch": 0.5374244920373421, "grad_norm": 0.38071608543395996, "learning_rate": 1.6667306347572786e-05, "loss": 0.4397, "step": 19573 }, { "epoch": 0.5374519494783087, "grad_norm": 0.32436901330947876, "learning_rate": 1.6666984451727226e-05, "loss": 0.4069, "step": 19574 }, { "epoch": 0.5374794069192751, "grad_norm": 0.41464418172836304, "learning_rate": 1.6666662543445612e-05, "loss": 0.5224, "step": 19575 }, { "epoch": 0.5375068643602416, "grad_norm": 0.3798460066318512, "learning_rate": 1.6666340622728543e-05, "loss": 0.5785, "step": 19576 }, { "epoch": 0.5375343218012081, "grad_norm": 0.35782644152641296, "learning_rate": 1.666601868957662e-05, "loss": 0.5091, "step": 19577 }, { "epoch": 0.5375617792421746, "grad_norm": 0.3931209444999695, "learning_rate": 1.666569674399045e-05, "loss": 0.5409, "step": 19578 }, { "epoch": 0.5375892366831412, "grad_norm": 0.3354811668395996, "learning_rate": 1.666537478597062e-05, "loss": 0.4652, "step": 19579 }, { "epoch": 0.5376166941241076, "grad_norm": 0.4022499918937683, "learning_rate": 1.6665052815517744e-05, "loss": 0.5398, "step": 19580 }, { "epoch": 0.5376441515650742, "grad_norm": 0.3516586422920227, "learning_rate": 1.6664730832632417e-05, "loss": 0.4691, "step": 19581 }, { "epoch": 0.5376716090060406, "grad_norm": 0.4316345155239105, "learning_rate": 1.6664408837315238e-05, "loss": 0.56, "step": 19582 }, { "epoch": 0.5376990664470072, "grad_norm": 0.3638538420200348, "learning_rate": 1.6664086829566813e-05, "loss": 0.5327, "step": 19583 }, { "epoch": 0.5377265238879736, "grad_norm": 0.38836950063705444, "learning_rate": 1.6663764809387736e-05, "loss": 0.5134, "step": 19584 }, { "epoch": 0.5377539813289401, "grad_norm": 0.4173794686794281, "learning_rate": 1.666344277677861e-05, "loss": 0.5892, "step": 19585 }, { "epoch": 0.5377814387699067, "grad_norm": 0.37348833680152893, "learning_rate": 1.6663120731740038e-05, "loss": 0.56, "step": 19586 }, { "epoch": 0.5378088962108731, "grad_norm": 0.3790948987007141, "learning_rate": 1.666279867427262e-05, "loss": 0.4763, "step": 19587 }, { "epoch": 0.5378363536518397, "grad_norm": 0.37248486280441284, "learning_rate": 1.6662476604376957e-05, "loss": 0.4963, "step": 19588 }, { "epoch": 0.5378638110928061, "grad_norm": 0.34528523683547974, "learning_rate": 1.666215452205364e-05, "loss": 0.4889, "step": 19589 }, { "epoch": 0.5378912685337727, "grad_norm": 0.3619472086429596, "learning_rate": 1.666183242730329e-05, "loss": 0.5034, "step": 19590 }, { "epoch": 0.5379187259747391, "grad_norm": 0.45127251744270325, "learning_rate": 1.6661510320126494e-05, "loss": 0.5417, "step": 19591 }, { "epoch": 0.5379461834157057, "grad_norm": 0.34888535737991333, "learning_rate": 1.666118820052385e-05, "loss": 0.4825, "step": 19592 }, { "epoch": 0.5379736408566722, "grad_norm": 0.4056456685066223, "learning_rate": 1.6660866068495965e-05, "loss": 0.5644, "step": 19593 }, { "epoch": 0.5380010982976386, "grad_norm": 0.4055737555027008, "learning_rate": 1.6660543924043443e-05, "loss": 0.495, "step": 19594 }, { "epoch": 0.5380285557386052, "grad_norm": 0.3782793879508972, "learning_rate": 1.666022176716688e-05, "loss": 0.5025, "step": 19595 }, { "epoch": 0.5380560131795716, "grad_norm": 0.4030816853046417, "learning_rate": 1.6659899597866873e-05, "loss": 0.5524, "step": 19596 }, { "epoch": 0.5380834706205382, "grad_norm": 0.4220723807811737, "learning_rate": 1.665957741614403e-05, "loss": 0.5699, "step": 19597 }, { "epoch": 0.5381109280615046, "grad_norm": 0.3879261314868927, "learning_rate": 1.665925522199895e-05, "loss": 0.5417, "step": 19598 }, { "epoch": 0.5381383855024712, "grad_norm": 0.37236934900283813, "learning_rate": 1.6658933015432237e-05, "loss": 0.615, "step": 19599 }, { "epoch": 0.5381658429434377, "grad_norm": 0.4347868263721466, "learning_rate": 1.6658610796444485e-05, "loss": 0.5158, "step": 19600 }, { "epoch": 0.5381933003844042, "grad_norm": 0.4138695299625397, "learning_rate": 1.66582885650363e-05, "loss": 0.5549, "step": 19601 }, { "epoch": 0.5382207578253707, "grad_norm": 0.3784598112106323, "learning_rate": 1.6657966321208283e-05, "loss": 0.4752, "step": 19602 }, { "epoch": 0.5382482152663371, "grad_norm": 0.3576120138168335, "learning_rate": 1.665764406496103e-05, "loss": 0.485, "step": 19603 }, { "epoch": 0.5382756727073037, "grad_norm": 0.3706781268119812, "learning_rate": 1.6657321796295147e-05, "loss": 0.5242, "step": 19604 }, { "epoch": 0.5383031301482701, "grad_norm": 0.3756777048110962, "learning_rate": 1.6656999515211233e-05, "loss": 0.4762, "step": 19605 }, { "epoch": 0.5383305875892367, "grad_norm": 0.3773339092731476, "learning_rate": 1.6656677221709894e-05, "loss": 0.5483, "step": 19606 }, { "epoch": 0.5383580450302032, "grad_norm": 0.39330536127090454, "learning_rate": 1.6656354915791727e-05, "loss": 0.4778, "step": 19607 }, { "epoch": 0.5383855024711697, "grad_norm": 0.39864784479141235, "learning_rate": 1.665603259745733e-05, "loss": 0.4644, "step": 19608 }, { "epoch": 0.5384129599121362, "grad_norm": 0.3666151463985443, "learning_rate": 1.6655710266707312e-05, "loss": 0.554, "step": 19609 }, { "epoch": 0.5384404173531027, "grad_norm": 0.37031984329223633, "learning_rate": 1.6655387923542266e-05, "loss": 0.5727, "step": 19610 }, { "epoch": 0.5384678747940692, "grad_norm": 0.5167794823646545, "learning_rate": 1.66550655679628e-05, "loss": 0.5509, "step": 19611 }, { "epoch": 0.5384953322350357, "grad_norm": 0.3514653146266937, "learning_rate": 1.6654743199969513e-05, "loss": 0.3962, "step": 19612 }, { "epoch": 0.5385227896760022, "grad_norm": 0.4097999930381775, "learning_rate": 1.6654420819563007e-05, "loss": 0.5839, "step": 19613 }, { "epoch": 0.5385502471169687, "grad_norm": 0.34315863251686096, "learning_rate": 1.665409842674388e-05, "loss": 0.49, "step": 19614 }, { "epoch": 0.5385777045579352, "grad_norm": 0.3985462486743927, "learning_rate": 1.6653776021512737e-05, "loss": 0.3926, "step": 19615 }, { "epoch": 0.5386051619989017, "grad_norm": 0.4783534109592438, "learning_rate": 1.6653453603870175e-05, "loss": 0.5492, "step": 19616 }, { "epoch": 0.5386326194398682, "grad_norm": 0.4241456389427185, "learning_rate": 1.66531311738168e-05, "loss": 0.5457, "step": 19617 }, { "epoch": 0.5386600768808347, "grad_norm": 0.39854535460472107, "learning_rate": 1.6652808731353217e-05, "loss": 0.5544, "step": 19618 }, { "epoch": 0.5386875343218012, "grad_norm": 0.3660352826118469, "learning_rate": 1.6652486276480016e-05, "loss": 0.6189, "step": 19619 }, { "epoch": 0.5387149917627677, "grad_norm": 0.34752213954925537, "learning_rate": 1.6652163809197807e-05, "loss": 0.4629, "step": 19620 }, { "epoch": 0.5387424492037343, "grad_norm": 0.36711394786834717, "learning_rate": 1.665184132950719e-05, "loss": 0.5732, "step": 19621 }, { "epoch": 0.5387699066447007, "grad_norm": 0.37179097533226013, "learning_rate": 1.6651518837408763e-05, "loss": 0.4761, "step": 19622 }, { "epoch": 0.5387973640856673, "grad_norm": 0.36111170053482056, "learning_rate": 1.6651196332903135e-05, "loss": 0.489, "step": 19623 }, { "epoch": 0.5388248215266337, "grad_norm": 0.3930927813053131, "learning_rate": 1.66508738159909e-05, "loss": 0.5, "step": 19624 }, { "epoch": 0.5388522789676002, "grad_norm": 0.3669028580188751, "learning_rate": 1.6650551286672665e-05, "loss": 0.4623, "step": 19625 }, { "epoch": 0.5388797364085667, "grad_norm": 0.36012929677963257, "learning_rate": 1.6650228744949026e-05, "loss": 0.4774, "step": 19626 }, { "epoch": 0.5389071938495332, "grad_norm": 0.4288899004459381, "learning_rate": 1.664990619082059e-05, "loss": 0.5048, "step": 19627 }, { "epoch": 0.5389346512904998, "grad_norm": 0.37341082096099854, "learning_rate": 1.6649583624287955e-05, "loss": 0.5127, "step": 19628 }, { "epoch": 0.5389621087314662, "grad_norm": 0.3527744710445404, "learning_rate": 1.6649261045351726e-05, "loss": 0.4873, "step": 19629 }, { "epoch": 0.5389895661724328, "grad_norm": 0.40748733282089233, "learning_rate": 1.6648938454012502e-05, "loss": 0.4656, "step": 19630 }, { "epoch": 0.5390170236133992, "grad_norm": 0.3947871923446655, "learning_rate": 1.6648615850270886e-05, "loss": 0.5514, "step": 19631 }, { "epoch": 0.5390444810543658, "grad_norm": 0.374775230884552, "learning_rate": 1.6648293234127478e-05, "loss": 0.5077, "step": 19632 }, { "epoch": 0.5390719384953322, "grad_norm": 0.4691302180290222, "learning_rate": 1.6647970605582884e-05, "loss": 0.5262, "step": 19633 }, { "epoch": 0.5390993959362987, "grad_norm": 0.3694054186344147, "learning_rate": 1.66476479646377e-05, "loss": 0.5038, "step": 19634 }, { "epoch": 0.5391268533772653, "grad_norm": 0.4255426228046417, "learning_rate": 1.6647325311292534e-05, "loss": 0.5921, "step": 19635 }, { "epoch": 0.5391543108182317, "grad_norm": 0.36368417739868164, "learning_rate": 1.6647002645547984e-05, "loss": 0.5089, "step": 19636 }, { "epoch": 0.5391817682591983, "grad_norm": 0.3619469106197357, "learning_rate": 1.664667996740465e-05, "loss": 0.4919, "step": 19637 }, { "epoch": 0.5392092257001647, "grad_norm": 0.40337297320365906, "learning_rate": 1.664635727686314e-05, "loss": 0.5254, "step": 19638 }, { "epoch": 0.5392366831411313, "grad_norm": 0.3473639488220215, "learning_rate": 1.664603457392405e-05, "loss": 0.4542, "step": 19639 }, { "epoch": 0.5392641405820977, "grad_norm": 0.4249575734138489, "learning_rate": 1.6645711858587987e-05, "loss": 0.4889, "step": 19640 }, { "epoch": 0.5392915980230643, "grad_norm": 0.3889387845993042, "learning_rate": 1.6645389130855547e-05, "loss": 0.5033, "step": 19641 }, { "epoch": 0.5393190554640308, "grad_norm": 0.528272271156311, "learning_rate": 1.6645066390727338e-05, "loss": 0.415, "step": 19642 }, { "epoch": 0.5393465129049972, "grad_norm": 0.6863782405853271, "learning_rate": 1.6644743638203958e-05, "loss": 0.5139, "step": 19643 }, { "epoch": 0.5393739703459638, "grad_norm": 0.40019291639328003, "learning_rate": 1.664442087328601e-05, "loss": 0.5405, "step": 19644 }, { "epoch": 0.5394014277869302, "grad_norm": 0.423967182636261, "learning_rate": 1.6644098095974098e-05, "loss": 0.4415, "step": 19645 }, { "epoch": 0.5394288852278968, "grad_norm": 0.41218364238739014, "learning_rate": 1.6643775306268818e-05, "loss": 0.5463, "step": 19646 }, { "epoch": 0.5394563426688632, "grad_norm": 0.3414413630962372, "learning_rate": 1.6643452504170784e-05, "loss": 0.4776, "step": 19647 }, { "epoch": 0.5394838001098298, "grad_norm": 0.38970914483070374, "learning_rate": 1.6643129689680585e-05, "loss": 0.5706, "step": 19648 }, { "epoch": 0.5395112575507963, "grad_norm": 0.3647949993610382, "learning_rate": 1.664280686279883e-05, "loss": 0.5133, "step": 19649 }, { "epoch": 0.5395387149917628, "grad_norm": 0.41058462858200073, "learning_rate": 1.664248402352612e-05, "loss": 0.5389, "step": 19650 }, { "epoch": 0.5395661724327293, "grad_norm": 0.38013148307800293, "learning_rate": 1.6642161171863057e-05, "loss": 0.4251, "step": 19651 }, { "epoch": 0.5395936298736957, "grad_norm": 0.3205028176307678, "learning_rate": 1.6641838307810246e-05, "loss": 0.4335, "step": 19652 }, { "epoch": 0.5396210873146623, "grad_norm": 0.3801233768463135, "learning_rate": 1.6641515431368284e-05, "loss": 0.5292, "step": 19653 }, { "epoch": 0.5396485447556287, "grad_norm": 0.38884609937667847, "learning_rate": 1.6641192542537776e-05, "loss": 0.5239, "step": 19654 }, { "epoch": 0.5396760021965953, "grad_norm": 0.4128047823905945, "learning_rate": 1.6640869641319328e-05, "loss": 0.47, "step": 19655 }, { "epoch": 0.5397034596375618, "grad_norm": 0.38024958968162537, "learning_rate": 1.6640546727713537e-05, "loss": 0.5224, "step": 19656 }, { "epoch": 0.5397309170785283, "grad_norm": 0.3610941767692566, "learning_rate": 1.6640223801721004e-05, "loss": 0.4908, "step": 19657 }, { "epoch": 0.5397583745194948, "grad_norm": 0.4074316918849945, "learning_rate": 1.6639900863342336e-05, "loss": 0.5398, "step": 19658 }, { "epoch": 0.5397858319604613, "grad_norm": 0.3671301603317261, "learning_rate": 1.663957791257813e-05, "loss": 0.4529, "step": 19659 }, { "epoch": 0.5398132894014278, "grad_norm": 0.4052479565143585, "learning_rate": 1.6639254949429e-05, "loss": 0.4611, "step": 19660 }, { "epoch": 0.5398407468423942, "grad_norm": 0.369039922952652, "learning_rate": 1.6638931973895537e-05, "loss": 0.4418, "step": 19661 }, { "epoch": 0.5398682042833608, "grad_norm": 0.40478718280792236, "learning_rate": 1.6638608985978347e-05, "loss": 0.5244, "step": 19662 }, { "epoch": 0.5398956617243273, "grad_norm": 0.4216059744358063, "learning_rate": 1.663828598567803e-05, "loss": 0.4934, "step": 19663 }, { "epoch": 0.5399231191652938, "grad_norm": 0.416543573141098, "learning_rate": 1.6637962972995195e-05, "loss": 0.6032, "step": 19664 }, { "epoch": 0.5399505766062603, "grad_norm": 0.33994969725608826, "learning_rate": 1.6637639947930436e-05, "loss": 0.4535, "step": 19665 }, { "epoch": 0.5399780340472268, "grad_norm": 0.38874557614326477, "learning_rate": 1.6637316910484363e-05, "loss": 0.4717, "step": 19666 }, { "epoch": 0.5400054914881933, "grad_norm": 0.35424157977104187, "learning_rate": 1.6636993860657575e-05, "loss": 0.43, "step": 19667 }, { "epoch": 0.5400329489291598, "grad_norm": 0.4333404302597046, "learning_rate": 1.6636670798450675e-05, "loss": 0.5507, "step": 19668 }, { "epoch": 0.5400604063701263, "grad_norm": 0.3633587062358856, "learning_rate": 1.6636347723864264e-05, "loss": 0.4522, "step": 19669 }, { "epoch": 0.5400878638110929, "grad_norm": 0.4357368052005768, "learning_rate": 1.663602463689895e-05, "loss": 0.4461, "step": 19670 }, { "epoch": 0.5401153212520593, "grad_norm": 0.4932962656021118, "learning_rate": 1.663570153755533e-05, "loss": 0.4946, "step": 19671 }, { "epoch": 0.5401427786930258, "grad_norm": 0.40217944979667664, "learning_rate": 1.6635378425834006e-05, "loss": 0.5324, "step": 19672 }, { "epoch": 0.5401702361339923, "grad_norm": 0.4372687041759491, "learning_rate": 1.663505530173559e-05, "loss": 0.5732, "step": 19673 }, { "epoch": 0.5401976935749588, "grad_norm": 0.331733763217926, "learning_rate": 1.6634732165260678e-05, "loss": 0.4766, "step": 19674 }, { "epoch": 0.5402251510159253, "grad_norm": 0.41974011063575745, "learning_rate": 1.6634409016409863e-05, "loss": 0.604, "step": 19675 }, { "epoch": 0.5402526084568918, "grad_norm": 0.40951383113861084, "learning_rate": 1.663408585518377e-05, "loss": 0.474, "step": 19676 }, { "epoch": 0.5402800658978584, "grad_norm": 0.3722664415836334, "learning_rate": 1.663376268158298e-05, "loss": 0.4871, "step": 19677 }, { "epoch": 0.5403075233388248, "grad_norm": 0.4712080955505371, "learning_rate": 1.663343949560811e-05, "loss": 0.5512, "step": 19678 }, { "epoch": 0.5403349807797914, "grad_norm": 0.49468183517456055, "learning_rate": 1.663311629725976e-05, "loss": 0.4656, "step": 19679 }, { "epoch": 0.5403624382207578, "grad_norm": 0.39845213294029236, "learning_rate": 1.6632793086538526e-05, "loss": 0.5805, "step": 19680 }, { "epoch": 0.5403898956617244, "grad_norm": 0.8063944578170776, "learning_rate": 1.663246986344502e-05, "loss": 0.5527, "step": 19681 }, { "epoch": 0.5404173531026908, "grad_norm": 0.3747590184211731, "learning_rate": 1.6632146627979838e-05, "loss": 0.4817, "step": 19682 }, { "epoch": 0.5404448105436573, "grad_norm": 1.1819287538528442, "learning_rate": 1.6631823380143588e-05, "loss": 0.4655, "step": 19683 }, { "epoch": 0.5404722679846238, "grad_norm": 0.3925994634628296, "learning_rate": 1.663150011993687e-05, "loss": 0.503, "step": 19684 }, { "epoch": 0.5404997254255903, "grad_norm": 0.3898352384567261, "learning_rate": 1.6631176847360287e-05, "loss": 0.5026, "step": 19685 }, { "epoch": 0.5405271828665569, "grad_norm": 0.3709852397441864, "learning_rate": 1.6630853562414442e-05, "loss": 0.5266, "step": 19686 }, { "epoch": 0.5405546403075233, "grad_norm": 0.39966267347335815, "learning_rate": 1.6630530265099945e-05, "loss": 0.4963, "step": 19687 }, { "epoch": 0.5405820977484899, "grad_norm": 0.3299880027770996, "learning_rate": 1.6630206955417384e-05, "loss": 0.4508, "step": 19688 }, { "epoch": 0.5406095551894563, "grad_norm": 0.33317139744758606, "learning_rate": 1.6629883633367377e-05, "loss": 0.4805, "step": 19689 }, { "epoch": 0.5406370126304229, "grad_norm": 0.32179224491119385, "learning_rate": 1.662956029895052e-05, "loss": 0.4063, "step": 19690 }, { "epoch": 0.5406644700713893, "grad_norm": 0.4001958668231964, "learning_rate": 1.6629236952167414e-05, "loss": 0.5574, "step": 19691 }, { "epoch": 0.5406919275123558, "grad_norm": 0.3608749806880951, "learning_rate": 1.6628913593018668e-05, "loss": 0.5095, "step": 19692 }, { "epoch": 0.5407193849533224, "grad_norm": 0.4081554114818573, "learning_rate": 1.662859022150488e-05, "loss": 0.5511, "step": 19693 }, { "epoch": 0.5407468423942888, "grad_norm": 0.3965798318386078, "learning_rate": 1.662826683762666e-05, "loss": 0.4919, "step": 19694 }, { "epoch": 0.5407742998352554, "grad_norm": 0.3306994140148163, "learning_rate": 1.6627943441384605e-05, "loss": 0.4551, "step": 19695 }, { "epoch": 0.5408017572762218, "grad_norm": 0.37543752789497375, "learning_rate": 1.6627620032779316e-05, "loss": 0.4376, "step": 19696 }, { "epoch": 0.5408292147171884, "grad_norm": 0.37513861060142517, "learning_rate": 1.6627296611811405e-05, "loss": 0.5408, "step": 19697 }, { "epoch": 0.5408566721581548, "grad_norm": 0.4088978171348572, "learning_rate": 1.6626973178481468e-05, "loss": 0.536, "step": 19698 }, { "epoch": 0.5408841295991214, "grad_norm": 0.3924083113670349, "learning_rate": 1.6626649732790115e-05, "loss": 0.5389, "step": 19699 }, { "epoch": 0.5409115870400879, "grad_norm": 0.3386351764202118, "learning_rate": 1.6626326274737943e-05, "loss": 0.47, "step": 19700 }, { "epoch": 0.5409390444810543, "grad_norm": 0.3930121064186096, "learning_rate": 1.6626002804325555e-05, "loss": 0.4808, "step": 19701 }, { "epoch": 0.5409665019220209, "grad_norm": 0.368355929851532, "learning_rate": 1.662567932155356e-05, "loss": 0.5509, "step": 19702 }, { "epoch": 0.5409939593629873, "grad_norm": 0.368168443441391, "learning_rate": 1.6625355826422557e-05, "loss": 0.4559, "step": 19703 }, { "epoch": 0.5410214168039539, "grad_norm": 0.41686174273490906, "learning_rate": 1.662503231893315e-05, "loss": 0.5343, "step": 19704 }, { "epoch": 0.5410488742449203, "grad_norm": 0.49016883969306946, "learning_rate": 1.6624708799085948e-05, "loss": 0.4855, "step": 19705 }, { "epoch": 0.5410763316858869, "grad_norm": 0.36993491649627686, "learning_rate": 1.6624385266881544e-05, "loss": 0.5289, "step": 19706 }, { "epoch": 0.5411037891268534, "grad_norm": 0.347615122795105, "learning_rate": 1.662406172232055e-05, "loss": 0.4832, "step": 19707 }, { "epoch": 0.5411312465678199, "grad_norm": 0.4150081276893616, "learning_rate": 1.6623738165403568e-05, "loss": 0.4779, "step": 19708 }, { "epoch": 0.5411587040087864, "grad_norm": 0.3809777796268463, "learning_rate": 1.6623414596131196e-05, "loss": 0.5871, "step": 19709 }, { "epoch": 0.5411861614497528, "grad_norm": 0.41042959690093994, "learning_rate": 1.6623091014504046e-05, "loss": 0.5607, "step": 19710 }, { "epoch": 0.5412136188907194, "grad_norm": 0.3646673560142517, "learning_rate": 1.6622767420522716e-05, "loss": 0.439, "step": 19711 }, { "epoch": 0.5412410763316858, "grad_norm": 0.3686259984970093, "learning_rate": 1.662244381418781e-05, "loss": 0.497, "step": 19712 }, { "epoch": 0.5412685337726524, "grad_norm": 0.33834195137023926, "learning_rate": 1.6622120195499937e-05, "loss": 0.495, "step": 19713 }, { "epoch": 0.5412959912136189, "grad_norm": 0.3719194829463959, "learning_rate": 1.662179656445969e-05, "loss": 0.499, "step": 19714 }, { "epoch": 0.5413234486545854, "grad_norm": 0.33387500047683716, "learning_rate": 1.6621472921067683e-05, "loss": 0.513, "step": 19715 }, { "epoch": 0.5413509060955519, "grad_norm": 0.403230220079422, "learning_rate": 1.6621149265324512e-05, "loss": 0.5752, "step": 19716 }, { "epoch": 0.5413783635365184, "grad_norm": 0.41798949241638184, "learning_rate": 1.6620825597230788e-05, "loss": 0.5668, "step": 19717 }, { "epoch": 0.5414058209774849, "grad_norm": 0.4527246356010437, "learning_rate": 1.662050191678711e-05, "loss": 0.645, "step": 19718 }, { "epoch": 0.5414332784184513, "grad_norm": 0.3787775933742523, "learning_rate": 1.6620178223994082e-05, "loss": 0.5642, "step": 19719 }, { "epoch": 0.5414607358594179, "grad_norm": 0.34913957118988037, "learning_rate": 1.6619854518852313e-05, "loss": 0.525, "step": 19720 }, { "epoch": 0.5414881933003844, "grad_norm": 0.3984299898147583, "learning_rate": 1.6619530801362396e-05, "loss": 0.5623, "step": 19721 }, { "epoch": 0.5415156507413509, "grad_norm": 0.3409484326839447, "learning_rate": 1.6619207071524947e-05, "loss": 0.4561, "step": 19722 }, { "epoch": 0.5415431081823174, "grad_norm": 0.37484094500541687, "learning_rate": 1.661888332934056e-05, "loss": 0.4705, "step": 19723 }, { "epoch": 0.5415705656232839, "grad_norm": 0.33636876940727234, "learning_rate": 1.6618559574809845e-05, "loss": 0.4928, "step": 19724 }, { "epoch": 0.5415980230642504, "grad_norm": 0.39195311069488525, "learning_rate": 1.6618235807933404e-05, "loss": 0.456, "step": 19725 }, { "epoch": 0.5416254805052169, "grad_norm": 0.3852764070034027, "learning_rate": 1.661791202871184e-05, "loss": 0.5234, "step": 19726 }, { "epoch": 0.5416529379461834, "grad_norm": 0.3700140714645386, "learning_rate": 1.6617588237145758e-05, "loss": 0.444, "step": 19727 }, { "epoch": 0.54168039538715, "grad_norm": 0.4701383113861084, "learning_rate": 1.6617264433235766e-05, "loss": 0.4782, "step": 19728 }, { "epoch": 0.5417078528281164, "grad_norm": 0.3594917058944702, "learning_rate": 1.6616940616982454e-05, "loss": 0.488, "step": 19729 }, { "epoch": 0.541735310269083, "grad_norm": 0.521018922328949, "learning_rate": 1.6616616788386446e-05, "loss": 0.5972, "step": 19730 }, { "epoch": 0.5417627677100494, "grad_norm": 0.4083833396434784, "learning_rate": 1.661629294744833e-05, "loss": 0.4958, "step": 19731 }, { "epoch": 0.5417902251510159, "grad_norm": 0.33342042565345764, "learning_rate": 1.661596909416872e-05, "loss": 0.4315, "step": 19732 }, { "epoch": 0.5418176825919824, "grad_norm": 0.35604435205459595, "learning_rate": 1.6615645228548212e-05, "loss": 0.5215, "step": 19733 }, { "epoch": 0.5418451400329489, "grad_norm": 0.5902740359306335, "learning_rate": 1.6615321350587415e-05, "loss": 0.5676, "step": 19734 }, { "epoch": 0.5418725974739155, "grad_norm": 0.3753475248813629, "learning_rate": 1.6614997460286937e-05, "loss": 0.5914, "step": 19735 }, { "epoch": 0.5419000549148819, "grad_norm": 0.40451812744140625, "learning_rate": 1.6614673557647375e-05, "loss": 0.5193, "step": 19736 }, { "epoch": 0.5419275123558485, "grad_norm": 0.3802521526813507, "learning_rate": 1.6614349642669334e-05, "loss": 0.5441, "step": 19737 }, { "epoch": 0.5419549697968149, "grad_norm": 0.38647064566612244, "learning_rate": 1.661402571535342e-05, "loss": 0.5473, "step": 19738 }, { "epoch": 0.5419824272377815, "grad_norm": 0.34574073553085327, "learning_rate": 1.6613701775700236e-05, "loss": 0.5766, "step": 19739 }, { "epoch": 0.5420098846787479, "grad_norm": 0.37236279249191284, "learning_rate": 1.661337782371039e-05, "loss": 0.422, "step": 19740 }, { "epoch": 0.5420373421197144, "grad_norm": 0.46751081943511963, "learning_rate": 1.6613053859384483e-05, "loss": 0.5561, "step": 19741 }, { "epoch": 0.542064799560681, "grad_norm": 0.36395853757858276, "learning_rate": 1.661272988272312e-05, "loss": 0.4268, "step": 19742 }, { "epoch": 0.5420922570016474, "grad_norm": 0.33163565397262573, "learning_rate": 1.6612405893726903e-05, "loss": 0.5508, "step": 19743 }, { "epoch": 0.542119714442614, "grad_norm": 0.3685644865036011, "learning_rate": 1.661208189239644e-05, "loss": 0.5331, "step": 19744 }, { "epoch": 0.5421471718835804, "grad_norm": 0.3723931610584259, "learning_rate": 1.6611757878732337e-05, "loss": 0.4317, "step": 19745 }, { "epoch": 0.542174629324547, "grad_norm": 0.39240095019340515, "learning_rate": 1.6611433852735192e-05, "loss": 0.5263, "step": 19746 }, { "epoch": 0.5422020867655134, "grad_norm": 0.3801056444644928, "learning_rate": 1.6611109814405613e-05, "loss": 0.5278, "step": 19747 }, { "epoch": 0.54222954420648, "grad_norm": 0.3725067675113678, "learning_rate": 1.6610785763744204e-05, "loss": 0.5321, "step": 19748 }, { "epoch": 0.5422570016474465, "grad_norm": 0.325718492269516, "learning_rate": 1.661046170075157e-05, "loss": 0.5313, "step": 19749 }, { "epoch": 0.5422844590884129, "grad_norm": 0.4323998689651489, "learning_rate": 1.6610137625428315e-05, "loss": 0.4627, "step": 19750 }, { "epoch": 0.5423119165293795, "grad_norm": 0.32611972093582153, "learning_rate": 1.6609813537775042e-05, "loss": 0.5317, "step": 19751 }, { "epoch": 0.5423393739703459, "grad_norm": 0.38168543577194214, "learning_rate": 1.660948943779236e-05, "loss": 0.4942, "step": 19752 }, { "epoch": 0.5423668314113125, "grad_norm": 0.36407700181007385, "learning_rate": 1.660916532548087e-05, "loss": 0.5882, "step": 19753 }, { "epoch": 0.5423942888522789, "grad_norm": 0.4563869535923004, "learning_rate": 1.660884120084118e-05, "loss": 0.4929, "step": 19754 }, { "epoch": 0.5424217462932455, "grad_norm": 0.3369366526603699, "learning_rate": 1.660851706387389e-05, "loss": 0.4959, "step": 19755 }, { "epoch": 0.542449203734212, "grad_norm": 0.3881751596927643, "learning_rate": 1.6608192914579603e-05, "loss": 0.528, "step": 19756 }, { "epoch": 0.5424766611751785, "grad_norm": 0.3909081816673279, "learning_rate": 1.6607868752958927e-05, "loss": 0.5098, "step": 19757 }, { "epoch": 0.542504118616145, "grad_norm": 0.37460073828697205, "learning_rate": 1.660754457901247e-05, "loss": 0.5275, "step": 19758 }, { "epoch": 0.5425315760571114, "grad_norm": 0.3406619131565094, "learning_rate": 1.6607220392740836e-05, "loss": 0.3846, "step": 19759 }, { "epoch": 0.542559033498078, "grad_norm": 0.5270332098007202, "learning_rate": 1.660689619414462e-05, "loss": 0.7174, "step": 19760 }, { "epoch": 0.5425864909390444, "grad_norm": 0.35631123185157776, "learning_rate": 1.660657198322444e-05, "loss": 0.541, "step": 19761 }, { "epoch": 0.542613948380011, "grad_norm": 0.4103354513645172, "learning_rate": 1.6606247759980893e-05, "loss": 0.5174, "step": 19762 }, { "epoch": 0.5426414058209775, "grad_norm": 0.3479849100112915, "learning_rate": 1.6605923524414584e-05, "loss": 0.4681, "step": 19763 }, { "epoch": 0.542668863261944, "grad_norm": 0.4861186146736145, "learning_rate": 1.660559927652612e-05, "loss": 0.4675, "step": 19764 }, { "epoch": 0.5426963207029105, "grad_norm": 0.3949238359928131, "learning_rate": 1.6605275016316104e-05, "loss": 0.5294, "step": 19765 }, { "epoch": 0.542723778143877, "grad_norm": 0.384653776884079, "learning_rate": 1.6604950743785144e-05, "loss": 0.4792, "step": 19766 }, { "epoch": 0.5427512355848435, "grad_norm": 0.40103763341903687, "learning_rate": 1.6604626458933843e-05, "loss": 0.4875, "step": 19767 }, { "epoch": 0.5427786930258099, "grad_norm": 0.6704285144805908, "learning_rate": 1.6604302161762803e-05, "loss": 0.4883, "step": 19768 }, { "epoch": 0.5428061504667765, "grad_norm": 0.3627004325389862, "learning_rate": 1.6603977852272635e-05, "loss": 0.5116, "step": 19769 }, { "epoch": 0.542833607907743, "grad_norm": 0.37011951208114624, "learning_rate": 1.6603653530463937e-05, "loss": 0.4848, "step": 19770 }, { "epoch": 0.5428610653487095, "grad_norm": 0.3776332437992096, "learning_rate": 1.660332919633732e-05, "loss": 0.4752, "step": 19771 }, { "epoch": 0.542888522789676, "grad_norm": 0.36433079838752747, "learning_rate": 1.660300484989339e-05, "loss": 0.4772, "step": 19772 }, { "epoch": 0.5429159802306425, "grad_norm": 0.47110483050346375, "learning_rate": 1.660268049113274e-05, "loss": 0.5827, "step": 19773 }, { "epoch": 0.542943437671609, "grad_norm": 0.3398415148258209, "learning_rate": 1.660235612005599e-05, "loss": 0.482, "step": 19774 }, { "epoch": 0.5429708951125755, "grad_norm": 0.439911812543869, "learning_rate": 1.6602031736663734e-05, "loss": 0.5697, "step": 19775 }, { "epoch": 0.542998352553542, "grad_norm": 0.37539270520210266, "learning_rate": 1.6601707340956585e-05, "loss": 0.4586, "step": 19776 }, { "epoch": 0.5430258099945086, "grad_norm": 0.4149915277957916, "learning_rate": 1.6601382932935147e-05, "loss": 0.4996, "step": 19777 }, { "epoch": 0.543053267435475, "grad_norm": 0.4294966161251068, "learning_rate": 1.6601058512600017e-05, "loss": 0.5679, "step": 19778 }, { "epoch": 0.5430807248764415, "grad_norm": 0.6289628148078918, "learning_rate": 1.660073407995181e-05, "loss": 0.5011, "step": 19779 }, { "epoch": 0.543108182317408, "grad_norm": 0.39760518074035645, "learning_rate": 1.660040963499113e-05, "loss": 0.5833, "step": 19780 }, { "epoch": 0.5431356397583745, "grad_norm": 0.38614699244499207, "learning_rate": 1.660008517771857e-05, "loss": 0.4741, "step": 19781 }, { "epoch": 0.543163097199341, "grad_norm": 0.345188170671463, "learning_rate": 1.6599760708134754e-05, "loss": 0.464, "step": 19782 }, { "epoch": 0.5431905546403075, "grad_norm": 0.380639910697937, "learning_rate": 1.659943622624027e-05, "loss": 0.531, "step": 19783 }, { "epoch": 0.5432180120812741, "grad_norm": 0.35951054096221924, "learning_rate": 1.659911173203574e-05, "loss": 0.5059, "step": 19784 }, { "epoch": 0.5432454695222405, "grad_norm": 0.45215538144111633, "learning_rate": 1.6598787225521755e-05, "loss": 0.5197, "step": 19785 }, { "epoch": 0.5432729269632071, "grad_norm": 0.35310760140419006, "learning_rate": 1.6598462706698927e-05, "loss": 0.5281, "step": 19786 }, { "epoch": 0.5433003844041735, "grad_norm": 0.3925228416919708, "learning_rate": 1.659813817556786e-05, "loss": 0.5595, "step": 19787 }, { "epoch": 0.54332784184514, "grad_norm": 0.3976326584815979, "learning_rate": 1.6597813632129156e-05, "loss": 0.5611, "step": 19788 }, { "epoch": 0.5433552992861065, "grad_norm": 0.35029137134552, "learning_rate": 1.6597489076383427e-05, "loss": 0.4473, "step": 19789 }, { "epoch": 0.543382756727073, "grad_norm": 0.39030909538269043, "learning_rate": 1.6597164508331278e-05, "loss": 0.5677, "step": 19790 }, { "epoch": 0.5434102141680396, "grad_norm": 0.41844263672828674, "learning_rate": 1.659683992797331e-05, "loss": 0.5404, "step": 19791 }, { "epoch": 0.543437671609006, "grad_norm": 0.38570094108581543, "learning_rate": 1.6596515335310126e-05, "loss": 0.5107, "step": 19792 }, { "epoch": 0.5434651290499726, "grad_norm": 0.3631781041622162, "learning_rate": 1.659619073034234e-05, "loss": 0.5213, "step": 19793 }, { "epoch": 0.543492586490939, "grad_norm": 0.375789076089859, "learning_rate": 1.659586611307055e-05, "loss": 0.5676, "step": 19794 }, { "epoch": 0.5435200439319056, "grad_norm": 0.3611355125904083, "learning_rate": 1.6595541483495364e-05, "loss": 0.4821, "step": 19795 }, { "epoch": 0.543547501372872, "grad_norm": 0.35457876324653625, "learning_rate": 1.659521684161739e-05, "loss": 0.4821, "step": 19796 }, { "epoch": 0.5435749588138385, "grad_norm": 0.4038681387901306, "learning_rate": 1.6594892187437235e-05, "loss": 0.5316, "step": 19797 }, { "epoch": 0.5436024162548051, "grad_norm": 0.36495110392570496, "learning_rate": 1.6594567520955497e-05, "loss": 0.5212, "step": 19798 }, { "epoch": 0.5436298736957715, "grad_norm": 0.4144493639469147, "learning_rate": 1.6594242842172787e-05, "loss": 0.4697, "step": 19799 }, { "epoch": 0.5436573311367381, "grad_norm": 0.3487352728843689, "learning_rate": 1.659391815108971e-05, "loss": 0.4188, "step": 19800 }, { "epoch": 0.5436847885777045, "grad_norm": 0.32795536518096924, "learning_rate": 1.6593593447706865e-05, "loss": 0.493, "step": 19801 }, { "epoch": 0.5437122460186711, "grad_norm": 0.41444724798202515, "learning_rate": 1.6593268732024872e-05, "loss": 0.5163, "step": 19802 }, { "epoch": 0.5437397034596375, "grad_norm": 0.5474461317062378, "learning_rate": 1.6592944004044323e-05, "loss": 0.4267, "step": 19803 }, { "epoch": 0.5437671609006041, "grad_norm": 0.31163036823272705, "learning_rate": 1.6592619263765836e-05, "loss": 0.5019, "step": 19804 }, { "epoch": 0.5437946183415706, "grad_norm": 0.36536309123039246, "learning_rate": 1.6592294511190005e-05, "loss": 0.509, "step": 19805 }, { "epoch": 0.543822075782537, "grad_norm": 0.3765247166156769, "learning_rate": 1.659196974631744e-05, "loss": 0.4728, "step": 19806 }, { "epoch": 0.5438495332235036, "grad_norm": 0.41895371675491333, "learning_rate": 1.6591644969148748e-05, "loss": 0.5124, "step": 19807 }, { "epoch": 0.54387699066447, "grad_norm": 0.4211632013320923, "learning_rate": 1.6591320179684534e-05, "loss": 0.5398, "step": 19808 }, { "epoch": 0.5439044481054366, "grad_norm": 0.33502358198165894, "learning_rate": 1.6590995377925407e-05, "loss": 0.4524, "step": 19809 }, { "epoch": 0.543931905546403, "grad_norm": 0.3772011697292328, "learning_rate": 1.6590670563871966e-05, "loss": 0.5484, "step": 19810 }, { "epoch": 0.5439593629873696, "grad_norm": 0.3493700325489044, "learning_rate": 1.6590345737524827e-05, "loss": 0.4875, "step": 19811 }, { "epoch": 0.5439868204283361, "grad_norm": 0.3970179557800293, "learning_rate": 1.6590020898884584e-05, "loss": 0.5285, "step": 19812 }, { "epoch": 0.5440142778693026, "grad_norm": 0.4384833574295044, "learning_rate": 1.658969604795185e-05, "loss": 0.4729, "step": 19813 }, { "epoch": 0.5440417353102691, "grad_norm": 0.4911278784275055, "learning_rate": 1.658937118472723e-05, "loss": 0.4531, "step": 19814 }, { "epoch": 0.5440691927512356, "grad_norm": 0.3607952892780304, "learning_rate": 1.658904630921133e-05, "loss": 0.4949, "step": 19815 }, { "epoch": 0.5440966501922021, "grad_norm": 0.3903752863407135, "learning_rate": 1.658872142140476e-05, "loss": 0.4285, "step": 19816 }, { "epoch": 0.5441241076331685, "grad_norm": 0.38979509472846985, "learning_rate": 1.6588396521308116e-05, "loss": 0.5003, "step": 19817 }, { "epoch": 0.5441515650741351, "grad_norm": 0.43649065494537354, "learning_rate": 1.658807160892201e-05, "loss": 0.6059, "step": 19818 }, { "epoch": 0.5441790225151016, "grad_norm": 0.3632781207561493, "learning_rate": 1.6587746684247048e-05, "loss": 0.5108, "step": 19819 }, { "epoch": 0.5442064799560681, "grad_norm": 0.33911895751953125, "learning_rate": 1.6587421747283837e-05, "loss": 0.5107, "step": 19820 }, { "epoch": 0.5442339373970346, "grad_norm": 0.4169740378856659, "learning_rate": 1.6587096798032984e-05, "loss": 0.5783, "step": 19821 }, { "epoch": 0.5442613948380011, "grad_norm": 0.4253537654876709, "learning_rate": 1.658677183649509e-05, "loss": 0.5287, "step": 19822 }, { "epoch": 0.5442888522789676, "grad_norm": 0.4095418155193329, "learning_rate": 1.6586446862670765e-05, "loss": 0.5586, "step": 19823 }, { "epoch": 0.544316309719934, "grad_norm": 0.42518872022628784, "learning_rate": 1.658612187656062e-05, "loss": 0.5913, "step": 19824 }, { "epoch": 0.5443437671609006, "grad_norm": 0.38951805233955383, "learning_rate": 1.6585796878165246e-05, "loss": 0.4834, "step": 19825 }, { "epoch": 0.5443712246018672, "grad_norm": 0.41901078820228577, "learning_rate": 1.6585471867485262e-05, "loss": 0.5682, "step": 19826 }, { "epoch": 0.5443986820428336, "grad_norm": 0.3741600215435028, "learning_rate": 1.6585146844521275e-05, "loss": 0.4851, "step": 19827 }, { "epoch": 0.5444261394838001, "grad_norm": 0.4327826499938965, "learning_rate": 1.658482180927388e-05, "loss": 0.5348, "step": 19828 }, { "epoch": 0.5444535969247666, "grad_norm": 0.44957804679870605, "learning_rate": 1.65844967617437e-05, "loss": 0.5408, "step": 19829 }, { "epoch": 0.5444810543657331, "grad_norm": 0.37372153997421265, "learning_rate": 1.6584171701931328e-05, "loss": 0.5312, "step": 19830 }, { "epoch": 0.5445085118066996, "grad_norm": 0.4408765137195587, "learning_rate": 1.6583846629837373e-05, "loss": 0.5584, "step": 19831 }, { "epoch": 0.5445359692476661, "grad_norm": 0.3691295385360718, "learning_rate": 1.658352154546244e-05, "loss": 0.5633, "step": 19832 }, { "epoch": 0.5445634266886327, "grad_norm": 0.39765238761901855, "learning_rate": 1.6583196448807142e-05, "loss": 0.5416, "step": 19833 }, { "epoch": 0.5445908841295991, "grad_norm": 0.389516681432724, "learning_rate": 1.6582871339872083e-05, "loss": 0.5358, "step": 19834 }, { "epoch": 0.5446183415705657, "grad_norm": 0.35538914799690247, "learning_rate": 1.6582546218657864e-05, "loss": 0.4351, "step": 19835 }, { "epoch": 0.5446457990115321, "grad_norm": 0.4093911349773407, "learning_rate": 1.65822210851651e-05, "loss": 0.5751, "step": 19836 }, { "epoch": 0.5446732564524986, "grad_norm": 0.39062345027923584, "learning_rate": 1.6581895939394386e-05, "loss": 0.5054, "step": 19837 }, { "epoch": 0.5447007138934651, "grad_norm": 0.38360562920570374, "learning_rate": 1.6581570781346344e-05, "loss": 0.4828, "step": 19838 }, { "epoch": 0.5447281713344316, "grad_norm": 0.35508477687835693, "learning_rate": 1.6581245611021568e-05, "loss": 0.4784, "step": 19839 }, { "epoch": 0.5447556287753982, "grad_norm": 0.37319493293762207, "learning_rate": 1.6580920428420666e-05, "loss": 0.4168, "step": 19840 }, { "epoch": 0.5447830862163646, "grad_norm": 0.34628257155418396, "learning_rate": 1.658059523354425e-05, "loss": 0.5151, "step": 19841 }, { "epoch": 0.5448105436573312, "grad_norm": 0.35994282364845276, "learning_rate": 1.6580270026392922e-05, "loss": 0.5298, "step": 19842 }, { "epoch": 0.5448380010982976, "grad_norm": 0.37631887197494507, "learning_rate": 1.6579944806967293e-05, "loss": 0.4957, "step": 19843 }, { "epoch": 0.5448654585392642, "grad_norm": 0.3660814166069031, "learning_rate": 1.6579619575267964e-05, "loss": 0.497, "step": 19844 }, { "epoch": 0.5448929159802306, "grad_norm": 0.3706534802913666, "learning_rate": 1.6579294331295544e-05, "loss": 0.5189, "step": 19845 }, { "epoch": 0.5449203734211971, "grad_norm": 0.6812811493873596, "learning_rate": 1.6578969075050643e-05, "loss": 0.4195, "step": 19846 }, { "epoch": 0.5449478308621637, "grad_norm": 0.37600138783454895, "learning_rate": 1.6578643806533864e-05, "loss": 0.5122, "step": 19847 }, { "epoch": 0.5449752883031301, "grad_norm": 0.4015718996524811, "learning_rate": 1.657831852574581e-05, "loss": 0.5476, "step": 19848 }, { "epoch": 0.5450027457440967, "grad_norm": 0.3526046872138977, "learning_rate": 1.65779932326871e-05, "loss": 0.4018, "step": 19849 }, { "epoch": 0.5450302031850631, "grad_norm": 0.3703595995903015, "learning_rate": 1.6577667927358327e-05, "loss": 0.4676, "step": 19850 }, { "epoch": 0.5450576606260297, "grad_norm": 0.4097972810268402, "learning_rate": 1.657734260976011e-05, "loss": 0.5479, "step": 19851 }, { "epoch": 0.5450851180669961, "grad_norm": 0.35021325945854187, "learning_rate": 1.6577017279893042e-05, "loss": 0.445, "step": 19852 }, { "epoch": 0.5451125755079627, "grad_norm": 0.4068858325481415, "learning_rate": 1.6576691937757744e-05, "loss": 0.4878, "step": 19853 }, { "epoch": 0.5451400329489292, "grad_norm": 0.3431015908718109, "learning_rate": 1.6576366583354816e-05, "loss": 0.4881, "step": 19854 }, { "epoch": 0.5451674903898956, "grad_norm": 0.4202606678009033, "learning_rate": 1.6576041216684862e-05, "loss": 0.543, "step": 19855 }, { "epoch": 0.5451949478308622, "grad_norm": 0.37662434577941895, "learning_rate": 1.6575715837748493e-05, "loss": 0.5043, "step": 19856 }, { "epoch": 0.5452224052718286, "grad_norm": 0.34563884139060974, "learning_rate": 1.657539044654632e-05, "loss": 0.4677, "step": 19857 }, { "epoch": 0.5452498627127952, "grad_norm": 0.3965144753456116, "learning_rate": 1.6575065043078938e-05, "loss": 0.4758, "step": 19858 }, { "epoch": 0.5452773201537616, "grad_norm": 0.30080172419548035, "learning_rate": 1.6574739627346966e-05, "loss": 0.4499, "step": 19859 }, { "epoch": 0.5453047775947282, "grad_norm": 0.38530978560447693, "learning_rate": 1.6574414199351007e-05, "loss": 0.5337, "step": 19860 }, { "epoch": 0.5453322350356947, "grad_norm": 0.39120858907699585, "learning_rate": 1.6574088759091664e-05, "loss": 0.5913, "step": 19861 }, { "epoch": 0.5453596924766612, "grad_norm": 0.36986586451530457, "learning_rate": 1.657376330656955e-05, "loss": 0.5628, "step": 19862 }, { "epoch": 0.5453871499176277, "grad_norm": 0.3609204888343811, "learning_rate": 1.6573437841785264e-05, "loss": 0.4349, "step": 19863 }, { "epoch": 0.5454146073585942, "grad_norm": 0.41389545798301697, "learning_rate": 1.6573112364739423e-05, "loss": 0.4703, "step": 19864 }, { "epoch": 0.5454420647995607, "grad_norm": 0.42142918705940247, "learning_rate": 1.657278687543263e-05, "loss": 0.5401, "step": 19865 }, { "epoch": 0.5454695222405271, "grad_norm": 0.38261914253234863, "learning_rate": 1.657246137386549e-05, "loss": 0.4814, "step": 19866 }, { "epoch": 0.5454969796814937, "grad_norm": 0.33335936069488525, "learning_rate": 1.6572135860038612e-05, "loss": 0.5063, "step": 19867 }, { "epoch": 0.5455244371224602, "grad_norm": 0.35635629296302795, "learning_rate": 1.6571810333952606e-05, "loss": 0.5045, "step": 19868 }, { "epoch": 0.5455518945634267, "grad_norm": 0.38845014572143555, "learning_rate": 1.657148479560807e-05, "loss": 0.4981, "step": 19869 }, { "epoch": 0.5455793520043932, "grad_norm": 0.4398126006126404, "learning_rate": 1.6571159245005624e-05, "loss": 0.4352, "step": 19870 }, { "epoch": 0.5456068094453597, "grad_norm": 0.4743586778640747, "learning_rate": 1.6570833682145862e-05, "loss": 0.4885, "step": 19871 }, { "epoch": 0.5456342668863262, "grad_norm": 0.42740827798843384, "learning_rate": 1.6570508107029405e-05, "loss": 0.5417, "step": 19872 }, { "epoch": 0.5456617243272927, "grad_norm": 0.39774811267852783, "learning_rate": 1.657018251965685e-05, "loss": 0.5068, "step": 19873 }, { "epoch": 0.5456891817682592, "grad_norm": 0.3877210021018982, "learning_rate": 1.6569856920028805e-05, "loss": 0.4582, "step": 19874 }, { "epoch": 0.5457166392092258, "grad_norm": 0.39974072575569153, "learning_rate": 1.6569531308145886e-05, "loss": 0.5063, "step": 19875 }, { "epoch": 0.5457440966501922, "grad_norm": 0.392890065908432, "learning_rate": 1.6569205684008688e-05, "loss": 0.4573, "step": 19876 }, { "epoch": 0.5457715540911587, "grad_norm": 0.4114449620246887, "learning_rate": 1.656888004761783e-05, "loss": 0.5142, "step": 19877 }, { "epoch": 0.5457990115321252, "grad_norm": 0.38773471117019653, "learning_rate": 1.6568554398973914e-05, "loss": 0.5387, "step": 19878 }, { "epoch": 0.5458264689730917, "grad_norm": 0.39508503675460815, "learning_rate": 1.6568228738077542e-05, "loss": 0.5104, "step": 19879 }, { "epoch": 0.5458539264140582, "grad_norm": 0.37887847423553467, "learning_rate": 1.6567903064929334e-05, "loss": 0.4798, "step": 19880 }, { "epoch": 0.5458813838550247, "grad_norm": 0.3163958191871643, "learning_rate": 1.6567577379529884e-05, "loss": 0.4533, "step": 19881 }, { "epoch": 0.5459088412959913, "grad_norm": 0.4045231342315674, "learning_rate": 1.656725168187981e-05, "loss": 0.4632, "step": 19882 }, { "epoch": 0.5459362987369577, "grad_norm": 0.3875863254070282, "learning_rate": 1.6566925971979716e-05, "loss": 0.4604, "step": 19883 }, { "epoch": 0.5459637561779243, "grad_norm": 0.4426783621311188, "learning_rate": 1.6566600249830208e-05, "loss": 0.6009, "step": 19884 }, { "epoch": 0.5459912136188907, "grad_norm": 0.3932056725025177, "learning_rate": 1.6566274515431895e-05, "loss": 0.4196, "step": 19885 }, { "epoch": 0.5460186710598572, "grad_norm": 0.3497641980648041, "learning_rate": 1.6565948768785383e-05, "loss": 0.4668, "step": 19886 }, { "epoch": 0.5460461285008237, "grad_norm": 0.4184666574001312, "learning_rate": 1.6565623009891284e-05, "loss": 0.4272, "step": 19887 }, { "epoch": 0.5460735859417902, "grad_norm": 0.3688105642795563, "learning_rate": 1.65652972387502e-05, "loss": 0.4213, "step": 19888 }, { "epoch": 0.5461010433827568, "grad_norm": 0.3591783940792084, "learning_rate": 1.6564971455362743e-05, "loss": 0.4571, "step": 19889 }, { "epoch": 0.5461285008237232, "grad_norm": 0.4305286407470703, "learning_rate": 1.6564645659729514e-05, "loss": 0.5211, "step": 19890 }, { "epoch": 0.5461559582646898, "grad_norm": 0.36632540822029114, "learning_rate": 1.656431985185113e-05, "loss": 0.4425, "step": 19891 }, { "epoch": 0.5461834157056562, "grad_norm": 0.37213972210884094, "learning_rate": 1.6563994031728194e-05, "loss": 0.5397, "step": 19892 }, { "epoch": 0.5462108731466228, "grad_norm": 0.40355736017227173, "learning_rate": 1.6563668199361314e-05, "loss": 0.487, "step": 19893 }, { "epoch": 0.5462383305875892, "grad_norm": 0.3889090120792389, "learning_rate": 1.65633423547511e-05, "loss": 0.4844, "step": 19894 }, { "epoch": 0.5462657880285557, "grad_norm": 0.4028017222881317, "learning_rate": 1.6563016497898154e-05, "loss": 0.5599, "step": 19895 }, { "epoch": 0.5462932454695223, "grad_norm": 0.3708517253398895, "learning_rate": 1.656269062880309e-05, "loss": 0.4762, "step": 19896 }, { "epoch": 0.5463207029104887, "grad_norm": 0.4672447144985199, "learning_rate": 1.6562364747466512e-05, "loss": 0.5197, "step": 19897 }, { "epoch": 0.5463481603514553, "grad_norm": 0.3863880932331085, "learning_rate": 1.6562038853889027e-05, "loss": 0.4877, "step": 19898 }, { "epoch": 0.5463756177924217, "grad_norm": 0.3615390956401825, "learning_rate": 1.6561712948071253e-05, "loss": 0.5007, "step": 19899 }, { "epoch": 0.5464030752333883, "grad_norm": 0.3584060072898865, "learning_rate": 1.6561387030013784e-05, "loss": 0.5078, "step": 19900 }, { "epoch": 0.5464305326743547, "grad_norm": 0.3494945764541626, "learning_rate": 1.6561061099717235e-05, "loss": 0.5717, "step": 19901 }, { "epoch": 0.5464579901153213, "grad_norm": 0.4984592795372009, "learning_rate": 1.656073515718221e-05, "loss": 0.5021, "step": 19902 }, { "epoch": 0.5464854475562878, "grad_norm": 0.5854906439781189, "learning_rate": 1.6560409202409324e-05, "loss": 0.5717, "step": 19903 }, { "epoch": 0.5465129049972542, "grad_norm": 0.4218493402004242, "learning_rate": 1.656008323539918e-05, "loss": 0.5161, "step": 19904 }, { "epoch": 0.5465403624382208, "grad_norm": 0.3701759874820709, "learning_rate": 1.6559757256152388e-05, "loss": 0.4088, "step": 19905 }, { "epoch": 0.5465678198791872, "grad_norm": 0.35595473647117615, "learning_rate": 1.6559431264669553e-05, "loss": 0.4858, "step": 19906 }, { "epoch": 0.5465952773201538, "grad_norm": 0.4086250364780426, "learning_rate": 1.655910526095129e-05, "loss": 0.6159, "step": 19907 }, { "epoch": 0.5466227347611202, "grad_norm": 0.43254274129867554, "learning_rate": 1.65587792449982e-05, "loss": 0.4831, "step": 19908 }, { "epoch": 0.5466501922020868, "grad_norm": 0.3610321581363678, "learning_rate": 1.655845321681089e-05, "loss": 0.4254, "step": 19909 }, { "epoch": 0.5466776496430533, "grad_norm": 0.3576514720916748, "learning_rate": 1.6558127176389973e-05, "loss": 0.5133, "step": 19910 }, { "epoch": 0.5467051070840198, "grad_norm": 0.41317903995513916, "learning_rate": 1.6557801123736056e-05, "loss": 0.5717, "step": 19911 }, { "epoch": 0.5467325645249863, "grad_norm": 0.4436034560203552, "learning_rate": 1.655747505884975e-05, "loss": 0.5666, "step": 19912 }, { "epoch": 0.5467600219659527, "grad_norm": 0.35503655672073364, "learning_rate": 1.6557148981731656e-05, "loss": 0.4905, "step": 19913 }, { "epoch": 0.5467874794069193, "grad_norm": 0.38608771562576294, "learning_rate": 1.655682289238239e-05, "loss": 0.5559, "step": 19914 }, { "epoch": 0.5468149368478857, "grad_norm": 0.36867034435272217, "learning_rate": 1.6556496790802552e-05, "loss": 0.5028, "step": 19915 }, { "epoch": 0.5468423942888523, "grad_norm": 0.40870118141174316, "learning_rate": 1.6556170676992754e-05, "loss": 0.4378, "step": 19916 }, { "epoch": 0.5468698517298188, "grad_norm": 0.3659781813621521, "learning_rate": 1.655584455095361e-05, "loss": 0.5354, "step": 19917 }, { "epoch": 0.5468973091707853, "grad_norm": 0.30427059531211853, "learning_rate": 1.655551841268572e-05, "loss": 0.4317, "step": 19918 }, { "epoch": 0.5469247666117518, "grad_norm": 0.3560012876987457, "learning_rate": 1.65551922621897e-05, "loss": 0.5209, "step": 19919 }, { "epoch": 0.5469522240527183, "grad_norm": 0.3907695412635803, "learning_rate": 1.6554866099466154e-05, "loss": 0.5049, "step": 19920 }, { "epoch": 0.5469796814936848, "grad_norm": 0.41198354959487915, "learning_rate": 1.6554539924515688e-05, "loss": 0.5299, "step": 19921 }, { "epoch": 0.5470071389346512, "grad_norm": 0.3357371687889099, "learning_rate": 1.6554213737338915e-05, "loss": 0.5208, "step": 19922 }, { "epoch": 0.5470345963756178, "grad_norm": 0.3806905448436737, "learning_rate": 1.655388753793644e-05, "loss": 0.4939, "step": 19923 }, { "epoch": 0.5470620538165843, "grad_norm": 0.3641279339790344, "learning_rate": 1.6553561326308872e-05, "loss": 0.4343, "step": 19924 }, { "epoch": 0.5470895112575508, "grad_norm": 0.4182398021221161, "learning_rate": 1.6553235102456822e-05, "loss": 0.5325, "step": 19925 }, { "epoch": 0.5471169686985173, "grad_norm": 0.35697558522224426, "learning_rate": 1.6552908866380898e-05, "loss": 0.4123, "step": 19926 }, { "epoch": 0.5471444261394838, "grad_norm": 0.5132626891136169, "learning_rate": 1.6552582618081706e-05, "loss": 0.556, "step": 19927 }, { "epoch": 0.5471718835804503, "grad_norm": 0.4633185565471649, "learning_rate": 1.6552256357559855e-05, "loss": 0.5683, "step": 19928 }, { "epoch": 0.5471993410214168, "grad_norm": 0.3541932702064514, "learning_rate": 1.6551930084815955e-05, "loss": 0.5065, "step": 19929 }, { "epoch": 0.5472267984623833, "grad_norm": 0.42525404691696167, "learning_rate": 1.6551603799850618e-05, "loss": 0.5638, "step": 19930 }, { "epoch": 0.5472542559033499, "grad_norm": 0.3838592767715454, "learning_rate": 1.6551277502664444e-05, "loss": 0.5113, "step": 19931 }, { "epoch": 0.5472817133443163, "grad_norm": 0.3613794147968292, "learning_rate": 1.6550951193258052e-05, "loss": 0.458, "step": 19932 }, { "epoch": 0.5473091707852829, "grad_norm": 0.36043307185173035, "learning_rate": 1.655062487163204e-05, "loss": 0.7077, "step": 19933 }, { "epoch": 0.5473366282262493, "grad_norm": 0.3506811559200287, "learning_rate": 1.6550298537787023e-05, "loss": 0.506, "step": 19934 }, { "epoch": 0.5473640856672158, "grad_norm": 0.4453998804092407, "learning_rate": 1.654997219172361e-05, "loss": 0.5744, "step": 19935 }, { "epoch": 0.5473915431081823, "grad_norm": 0.3625525236129761, "learning_rate": 1.6549645833442407e-05, "loss": 0.435, "step": 19936 }, { "epoch": 0.5474190005491488, "grad_norm": 0.45043864846229553, "learning_rate": 1.6549319462944027e-05, "loss": 0.5754, "step": 19937 }, { "epoch": 0.5474464579901154, "grad_norm": 0.41232365369796753, "learning_rate": 1.654899308022907e-05, "loss": 0.4608, "step": 19938 }, { "epoch": 0.5474739154310818, "grad_norm": 0.3940925598144531, "learning_rate": 1.654866668529816e-05, "loss": 0.4864, "step": 19939 }, { "epoch": 0.5475013728720484, "grad_norm": 0.3694845736026764, "learning_rate": 1.654834027815189e-05, "loss": 0.4968, "step": 19940 }, { "epoch": 0.5475288303130148, "grad_norm": 0.32306110858917236, "learning_rate": 1.6548013858790873e-05, "loss": 0.547, "step": 19941 }, { "epoch": 0.5475562877539814, "grad_norm": 0.3939873278141022, "learning_rate": 1.6547687427215726e-05, "loss": 0.5109, "step": 19942 }, { "epoch": 0.5475837451949478, "grad_norm": 0.36899036169052124, "learning_rate": 1.654736098342705e-05, "loss": 0.5147, "step": 19943 }, { "epoch": 0.5476112026359143, "grad_norm": 0.36131832003593445, "learning_rate": 1.6547034527425453e-05, "loss": 0.4444, "step": 19944 }, { "epoch": 0.5476386600768809, "grad_norm": 0.3484152555465698, "learning_rate": 1.654670805921155e-05, "loss": 0.4486, "step": 19945 }, { "epoch": 0.5476661175178473, "grad_norm": 0.37589648365974426, "learning_rate": 1.6546381578785947e-05, "loss": 0.4855, "step": 19946 }, { "epoch": 0.5476935749588139, "grad_norm": 0.4037702679634094, "learning_rate": 1.6546055086149255e-05, "loss": 0.5966, "step": 19947 }, { "epoch": 0.5477210323997803, "grad_norm": 0.4214918613433838, "learning_rate": 1.654572858130208e-05, "loss": 0.6011, "step": 19948 }, { "epoch": 0.5477484898407469, "grad_norm": 0.42284688353538513, "learning_rate": 1.6545402064245028e-05, "loss": 0.541, "step": 19949 }, { "epoch": 0.5477759472817133, "grad_norm": 0.3538191616535187, "learning_rate": 1.654507553497872e-05, "loss": 0.4497, "step": 19950 }, { "epoch": 0.5478034047226799, "grad_norm": 0.33278578519821167, "learning_rate": 1.654474899350375e-05, "loss": 0.3818, "step": 19951 }, { "epoch": 0.5478308621636463, "grad_norm": 0.4145607650279999, "learning_rate": 1.6544422439820737e-05, "loss": 0.4852, "step": 19952 }, { "epoch": 0.5478583196046128, "grad_norm": 0.38194042444229126, "learning_rate": 1.6544095873930288e-05, "loss": 0.518, "step": 19953 }, { "epoch": 0.5478857770455794, "grad_norm": 0.49839431047439575, "learning_rate": 1.6543769295833007e-05, "loss": 0.5454, "step": 19954 }, { "epoch": 0.5479132344865458, "grad_norm": 0.36886119842529297, "learning_rate": 1.6543442705529513e-05, "loss": 0.4344, "step": 19955 }, { "epoch": 0.5479406919275124, "grad_norm": 0.37153902649879456, "learning_rate": 1.654311610302041e-05, "loss": 0.4863, "step": 19956 }, { "epoch": 0.5479681493684788, "grad_norm": 0.38288459181785583, "learning_rate": 1.6542789488306302e-05, "loss": 0.5403, "step": 19957 }, { "epoch": 0.5479956068094454, "grad_norm": 0.34385743737220764, "learning_rate": 1.654246286138781e-05, "loss": 0.4741, "step": 19958 }, { "epoch": 0.5480230642504118, "grad_norm": 0.3775956928730011, "learning_rate": 1.6542136222265532e-05, "loss": 0.4401, "step": 19959 }, { "epoch": 0.5480505216913784, "grad_norm": 0.4213370084762573, "learning_rate": 1.6541809570940084e-05, "loss": 0.5059, "step": 19960 }, { "epoch": 0.5480779791323449, "grad_norm": 0.37350255250930786, "learning_rate": 1.6541482907412073e-05, "loss": 0.4938, "step": 19961 }, { "epoch": 0.5481054365733113, "grad_norm": 0.3922489881515503, "learning_rate": 1.6541156231682108e-05, "loss": 0.5791, "step": 19962 }, { "epoch": 0.5481328940142779, "grad_norm": 0.3642899990081787, "learning_rate": 1.6540829543750797e-05, "loss": 0.5212, "step": 19963 }, { "epoch": 0.5481603514552443, "grad_norm": 0.46141111850738525, "learning_rate": 1.6540502843618755e-05, "loss": 0.5354, "step": 19964 }, { "epoch": 0.5481878088962109, "grad_norm": 0.4368208944797516, "learning_rate": 1.6540176131286586e-05, "loss": 0.5585, "step": 19965 }, { "epoch": 0.5482152663371773, "grad_norm": 0.3540128767490387, "learning_rate": 1.6539849406754902e-05, "loss": 0.4757, "step": 19966 }, { "epoch": 0.5482427237781439, "grad_norm": 0.3513127565383911, "learning_rate": 1.653952267002431e-05, "loss": 0.4945, "step": 19967 }, { "epoch": 0.5482701812191104, "grad_norm": 0.5294954776763916, "learning_rate": 1.6539195921095423e-05, "loss": 0.5971, "step": 19968 }, { "epoch": 0.5482976386600769, "grad_norm": 0.49086958169937134, "learning_rate": 1.6538869159968847e-05, "loss": 0.5841, "step": 19969 }, { "epoch": 0.5483250961010434, "grad_norm": 0.3714213967323303, "learning_rate": 1.6538542386645193e-05, "loss": 0.4981, "step": 19970 }, { "epoch": 0.5483525535420098, "grad_norm": 0.31804320216178894, "learning_rate": 1.653821560112507e-05, "loss": 0.4849, "step": 19971 }, { "epoch": 0.5483800109829764, "grad_norm": 0.5123619437217712, "learning_rate": 1.653788880340909e-05, "loss": 0.6265, "step": 19972 }, { "epoch": 0.5484074684239428, "grad_norm": 0.3769945800304413, "learning_rate": 1.6537561993497862e-05, "loss": 0.619, "step": 19973 }, { "epoch": 0.5484349258649094, "grad_norm": 0.4603307545185089, "learning_rate": 1.653723517139199e-05, "loss": 0.5796, "step": 19974 }, { "epoch": 0.5484623833058759, "grad_norm": 0.3400490880012512, "learning_rate": 1.653690833709209e-05, "loss": 0.4955, "step": 19975 }, { "epoch": 0.5484898407468424, "grad_norm": 0.36047425866127014, "learning_rate": 1.653658149059877e-05, "loss": 0.5009, "step": 19976 }, { "epoch": 0.5485172981878089, "grad_norm": 0.40989553928375244, "learning_rate": 1.653625463191264e-05, "loss": 0.4637, "step": 19977 }, { "epoch": 0.5485447556287754, "grad_norm": 0.4031884968280792, "learning_rate": 1.6535927761034308e-05, "loss": 0.4905, "step": 19978 }, { "epoch": 0.5485722130697419, "grad_norm": 0.38078421354293823, "learning_rate": 1.6535600877964383e-05, "loss": 0.4295, "step": 19979 }, { "epoch": 0.5485996705107083, "grad_norm": 0.3663323223590851, "learning_rate": 1.653527398270348e-05, "loss": 0.5086, "step": 19980 }, { "epoch": 0.5486271279516749, "grad_norm": 0.3929852843284607, "learning_rate": 1.6534947075252205e-05, "loss": 0.487, "step": 19981 }, { "epoch": 0.5486545853926414, "grad_norm": 0.4373931288719177, "learning_rate": 1.6534620155611164e-05, "loss": 0.5237, "step": 19982 }, { "epoch": 0.5486820428336079, "grad_norm": 0.4501955211162567, "learning_rate": 1.6534293223780974e-05, "loss": 0.4773, "step": 19983 }, { "epoch": 0.5487095002745744, "grad_norm": 0.4095143675804138, "learning_rate": 1.6533966279762245e-05, "loss": 0.4761, "step": 19984 }, { "epoch": 0.5487369577155409, "grad_norm": 0.38518980145454407, "learning_rate": 1.6533639323555575e-05, "loss": 0.4765, "step": 19985 }, { "epoch": 0.5487644151565074, "grad_norm": 0.9824503660202026, "learning_rate": 1.6533312355161588e-05, "loss": 0.5444, "step": 19986 }, { "epoch": 0.5487918725974739, "grad_norm": 0.3876451253890991, "learning_rate": 1.6532985374580888e-05, "loss": 0.5532, "step": 19987 }, { "epoch": 0.5488193300384404, "grad_norm": 0.3673728406429291, "learning_rate": 1.653265838181408e-05, "loss": 0.5136, "step": 19988 }, { "epoch": 0.548846787479407, "grad_norm": 0.3760165572166443, "learning_rate": 1.6532331376861786e-05, "loss": 0.4365, "step": 19989 }, { "epoch": 0.5488742449203734, "grad_norm": 0.3851488530635834, "learning_rate": 1.6532004359724608e-05, "loss": 0.4773, "step": 19990 }, { "epoch": 0.54890170236134, "grad_norm": 0.348590224981308, "learning_rate": 1.6531677330403155e-05, "loss": 0.4045, "step": 19991 }, { "epoch": 0.5489291598023064, "grad_norm": 0.3545083701610565, "learning_rate": 1.6531350288898038e-05, "loss": 0.5637, "step": 19992 }, { "epoch": 0.5489566172432729, "grad_norm": 0.6794771552085876, "learning_rate": 1.653102323520987e-05, "loss": 0.4612, "step": 19993 }, { "epoch": 0.5489840746842394, "grad_norm": 0.38179340958595276, "learning_rate": 1.653069616933926e-05, "loss": 0.5202, "step": 19994 }, { "epoch": 0.5490115321252059, "grad_norm": 0.4083690345287323, "learning_rate": 1.6530369091286816e-05, "loss": 0.6058, "step": 19995 }, { "epoch": 0.5490389895661725, "grad_norm": 0.3734842836856842, "learning_rate": 1.653004200105315e-05, "loss": 0.5235, "step": 19996 }, { "epoch": 0.5490664470071389, "grad_norm": 0.37014156579971313, "learning_rate": 1.6529714898638874e-05, "loss": 0.4794, "step": 19997 }, { "epoch": 0.5490939044481055, "grad_norm": 0.361056387424469, "learning_rate": 1.652938778404459e-05, "loss": 0.5904, "step": 19998 }, { "epoch": 0.5491213618890719, "grad_norm": 0.39034304022789, "learning_rate": 1.6529060657270917e-05, "loss": 0.4734, "step": 19999 }, { "epoch": 0.5491488193300385, "grad_norm": 0.36120620369911194, "learning_rate": 1.652873351831846e-05, "loss": 0.4637, "step": 20000 }, { "epoch": 0.5491762767710049, "grad_norm": 0.3380375802516937, "learning_rate": 1.6528406367187836e-05, "loss": 0.4947, "step": 20001 }, { "epoch": 0.5492037342119714, "grad_norm": 0.8314143419265747, "learning_rate": 1.652807920387965e-05, "loss": 0.4797, "step": 20002 }, { "epoch": 0.549231191652938, "grad_norm": 0.3772182762622833, "learning_rate": 1.6527752028394506e-05, "loss": 0.4366, "step": 20003 }, { "epoch": 0.5492586490939044, "grad_norm": 0.38172775506973267, "learning_rate": 1.6527424840733027e-05, "loss": 0.4718, "step": 20004 }, { "epoch": 0.549286106534871, "grad_norm": 0.3680098056793213, "learning_rate": 1.6527097640895812e-05, "loss": 0.5151, "step": 20005 }, { "epoch": 0.5493135639758374, "grad_norm": 0.34811854362487793, "learning_rate": 1.652677042888348e-05, "loss": 0.5155, "step": 20006 }, { "epoch": 0.549341021416804, "grad_norm": 0.3689127564430237, "learning_rate": 1.6526443204696644e-05, "loss": 0.5305, "step": 20007 }, { "epoch": 0.5493684788577704, "grad_norm": 0.43038809299468994, "learning_rate": 1.65261159683359e-05, "loss": 0.5025, "step": 20008 }, { "epoch": 0.549395936298737, "grad_norm": 0.4039490222930908, "learning_rate": 1.652578871980187e-05, "loss": 0.5353, "step": 20009 }, { "epoch": 0.5494233937397035, "grad_norm": 0.37857624888420105, "learning_rate": 1.652546145909516e-05, "loss": 0.4927, "step": 20010 }, { "epoch": 0.5494508511806699, "grad_norm": 0.38521790504455566, "learning_rate": 1.6525134186216384e-05, "loss": 0.4383, "step": 20011 }, { "epoch": 0.5494783086216365, "grad_norm": 0.3654613196849823, "learning_rate": 1.652480690116615e-05, "loss": 0.4851, "step": 20012 }, { "epoch": 0.5495057660626029, "grad_norm": 0.368190199136734, "learning_rate": 1.6524479603945066e-05, "loss": 0.5419, "step": 20013 }, { "epoch": 0.5495332235035695, "grad_norm": 0.34037917852401733, "learning_rate": 1.6524152294553748e-05, "loss": 0.5214, "step": 20014 }, { "epoch": 0.5495606809445359, "grad_norm": 0.3990129232406616, "learning_rate": 1.6523824972992803e-05, "loss": 0.54, "step": 20015 }, { "epoch": 0.5495881383855025, "grad_norm": 0.3980567157268524, "learning_rate": 1.6523497639262837e-05, "loss": 0.5447, "step": 20016 }, { "epoch": 0.549615595826469, "grad_norm": 0.3174089193344116, "learning_rate": 1.6523170293364473e-05, "loss": 0.4117, "step": 20017 }, { "epoch": 0.5496430532674355, "grad_norm": 0.39309269189834595, "learning_rate": 1.6522842935298312e-05, "loss": 0.5208, "step": 20018 }, { "epoch": 0.549670510708402, "grad_norm": 0.41756364703178406, "learning_rate": 1.6522515565064967e-05, "loss": 0.5971, "step": 20019 }, { "epoch": 0.5496979681493684, "grad_norm": 0.3815871775150299, "learning_rate": 1.652218818266505e-05, "loss": 0.5244, "step": 20020 }, { "epoch": 0.549725425590335, "grad_norm": 0.3868386149406433, "learning_rate": 1.6521860788099165e-05, "loss": 0.5916, "step": 20021 }, { "epoch": 0.5497528830313014, "grad_norm": 0.3638834059238434, "learning_rate": 1.6521533381367936e-05, "loss": 0.481, "step": 20022 }, { "epoch": 0.549780340472268, "grad_norm": 0.39774858951568604, "learning_rate": 1.652120596247196e-05, "loss": 0.5566, "step": 20023 }, { "epoch": 0.5498077979132345, "grad_norm": 0.3977053761482239, "learning_rate": 1.652087853141186e-05, "loss": 0.4616, "step": 20024 }, { "epoch": 0.549835255354201, "grad_norm": 0.380033940076828, "learning_rate": 1.6520551088188234e-05, "loss": 0.5125, "step": 20025 }, { "epoch": 0.5498627127951675, "grad_norm": 0.3897496461868286, "learning_rate": 1.6520223632801697e-05, "loss": 0.5308, "step": 20026 }, { "epoch": 0.549890170236134, "grad_norm": 0.500124454498291, "learning_rate": 1.651989616525287e-05, "loss": 0.5877, "step": 20027 }, { "epoch": 0.5499176276771005, "grad_norm": 0.3661353588104248, "learning_rate": 1.651956868554235e-05, "loss": 0.4939, "step": 20028 }, { "epoch": 0.549945085118067, "grad_norm": 0.39508748054504395, "learning_rate": 1.6519241193670754e-05, "loss": 0.6036, "step": 20029 }, { "epoch": 0.5499725425590335, "grad_norm": 0.3221937417984009, "learning_rate": 1.6518913689638692e-05, "loss": 0.4767, "step": 20030 }, { "epoch": 0.55, "grad_norm": 0.3603828549385071, "learning_rate": 1.651858617344678e-05, "loss": 0.5051, "step": 20031 }, { "epoch": 0.5500274574409665, "grad_norm": 0.3769824802875519, "learning_rate": 1.651825864509562e-05, "loss": 0.5751, "step": 20032 }, { "epoch": 0.550054914881933, "grad_norm": 0.3834088444709778, "learning_rate": 1.651793110458583e-05, "loss": 0.4872, "step": 20033 }, { "epoch": 0.5500823723228995, "grad_norm": 0.40044623613357544, "learning_rate": 1.6517603551918016e-05, "loss": 0.4601, "step": 20034 }, { "epoch": 0.550109829763866, "grad_norm": 0.5327764749526978, "learning_rate": 1.6517275987092793e-05, "loss": 0.5834, "step": 20035 }, { "epoch": 0.5501372872048325, "grad_norm": 0.37087032198905945, "learning_rate": 1.651694841011077e-05, "loss": 0.528, "step": 20036 }, { "epoch": 0.550164744645799, "grad_norm": 0.36727747321128845, "learning_rate": 1.6516620820972554e-05, "loss": 0.505, "step": 20037 }, { "epoch": 0.5501922020867656, "grad_norm": 0.3190910816192627, "learning_rate": 1.6516293219678768e-05, "loss": 0.5048, "step": 20038 }, { "epoch": 0.550219659527732, "grad_norm": 0.40944188833236694, "learning_rate": 1.6515965606230008e-05, "loss": 0.6021, "step": 20039 }, { "epoch": 0.5502471169686985, "grad_norm": 0.49952229857444763, "learning_rate": 1.65156379806269e-05, "loss": 0.6147, "step": 20040 }, { "epoch": 0.550274574409665, "grad_norm": 0.371192067861557, "learning_rate": 1.6515310342870038e-05, "loss": 0.6078, "step": 20041 }, { "epoch": 0.5503020318506315, "grad_norm": 0.34411999583244324, "learning_rate": 1.6514982692960047e-05, "loss": 0.479, "step": 20042 }, { "epoch": 0.550329489291598, "grad_norm": 0.33148688077926636, "learning_rate": 1.651465503089754e-05, "loss": 0.5092, "step": 20043 }, { "epoch": 0.5503569467325645, "grad_norm": 0.3717426061630249, "learning_rate": 1.6514327356683114e-05, "loss": 0.4716, "step": 20044 }, { "epoch": 0.5503844041735311, "grad_norm": 0.36683133244514465, "learning_rate": 1.6513999670317394e-05, "loss": 0.4692, "step": 20045 }, { "epoch": 0.5504118616144975, "grad_norm": 0.3692915737628937, "learning_rate": 1.6513671971800983e-05, "loss": 0.5044, "step": 20046 }, { "epoch": 0.5504393190554641, "grad_norm": 0.4241466224193573, "learning_rate": 1.6513344261134495e-05, "loss": 0.5323, "step": 20047 }, { "epoch": 0.5504667764964305, "grad_norm": 0.36769038438796997, "learning_rate": 1.651301653831854e-05, "loss": 0.4786, "step": 20048 }, { "epoch": 0.550494233937397, "grad_norm": 0.4476281702518463, "learning_rate": 1.651268880335373e-05, "loss": 0.5607, "step": 20049 }, { "epoch": 0.5505216913783635, "grad_norm": 0.37187573313713074, "learning_rate": 1.651236105624068e-05, "loss": 0.5287, "step": 20050 }, { "epoch": 0.55054914881933, "grad_norm": 0.3753018379211426, "learning_rate": 1.6512033296979994e-05, "loss": 0.4092, "step": 20051 }, { "epoch": 0.5505766062602966, "grad_norm": 0.39985421299934387, "learning_rate": 1.651170552557229e-05, "loss": 0.5441, "step": 20052 }, { "epoch": 0.550604063701263, "grad_norm": 0.36357244849205017, "learning_rate": 1.6511377742018178e-05, "loss": 0.5332, "step": 20053 }, { "epoch": 0.5506315211422296, "grad_norm": 0.3607281446456909, "learning_rate": 1.6511049946318266e-05, "loss": 0.5544, "step": 20054 }, { "epoch": 0.550658978583196, "grad_norm": 0.3391963243484497, "learning_rate": 1.651072213847317e-05, "loss": 0.4539, "step": 20055 }, { "epoch": 0.5506864360241626, "grad_norm": 0.43458035588264465, "learning_rate": 1.6510394318483498e-05, "loss": 0.4617, "step": 20056 }, { "epoch": 0.550713893465129, "grad_norm": 0.37135159969329834, "learning_rate": 1.6510066486349862e-05, "loss": 0.5056, "step": 20057 }, { "epoch": 0.5507413509060956, "grad_norm": 0.35332560539245605, "learning_rate": 1.6509738642072878e-05, "loss": 0.4339, "step": 20058 }, { "epoch": 0.5507688083470621, "grad_norm": 0.37952399253845215, "learning_rate": 1.6509410785653147e-05, "loss": 0.4894, "step": 20059 }, { "epoch": 0.5507962657880285, "grad_norm": 0.3433246910572052, "learning_rate": 1.650908291709129e-05, "loss": 0.4539, "step": 20060 }, { "epoch": 0.5508237232289951, "grad_norm": 0.3899567723274231, "learning_rate": 1.650875503638792e-05, "loss": 0.5184, "step": 20061 }, { "epoch": 0.5508511806699615, "grad_norm": 0.4263851046562195, "learning_rate": 1.650842714354364e-05, "loss": 0.5597, "step": 20062 }, { "epoch": 0.5508786381109281, "grad_norm": 0.3864534795284271, "learning_rate": 1.6508099238559067e-05, "loss": 0.4951, "step": 20063 }, { "epoch": 0.5509060955518945, "grad_norm": 0.5819922685623169, "learning_rate": 1.6507771321434813e-05, "loss": 0.533, "step": 20064 }, { "epoch": 0.5509335529928611, "grad_norm": 0.31596049666404724, "learning_rate": 1.6507443392171486e-05, "loss": 0.3735, "step": 20065 }, { "epoch": 0.5509610104338276, "grad_norm": 0.377443790435791, "learning_rate": 1.6507115450769705e-05, "loss": 0.486, "step": 20066 }, { "epoch": 0.550988467874794, "grad_norm": 0.46061384677886963, "learning_rate": 1.6506787497230075e-05, "loss": 0.5749, "step": 20067 }, { "epoch": 0.5510159253157606, "grad_norm": 0.38082119822502136, "learning_rate": 1.650645953155321e-05, "loss": 0.5199, "step": 20068 }, { "epoch": 0.551043382756727, "grad_norm": 0.40951216220855713, "learning_rate": 1.6506131553739718e-05, "loss": 0.5188, "step": 20069 }, { "epoch": 0.5510708401976936, "grad_norm": 0.3669876158237457, "learning_rate": 1.6505803563790214e-05, "loss": 0.4213, "step": 20070 }, { "epoch": 0.55109829763866, "grad_norm": 0.37700650095939636, "learning_rate": 1.6505475561705313e-05, "loss": 0.5001, "step": 20071 }, { "epoch": 0.5511257550796266, "grad_norm": 0.3806541860103607, "learning_rate": 1.650514754748562e-05, "loss": 0.633, "step": 20072 }, { "epoch": 0.5511532125205931, "grad_norm": 0.4037267863750458, "learning_rate": 1.6504819521131756e-05, "loss": 0.456, "step": 20073 }, { "epoch": 0.5511806699615596, "grad_norm": 0.41256874799728394, "learning_rate": 1.6504491482644326e-05, "loss": 0.4879, "step": 20074 }, { "epoch": 0.5512081274025261, "grad_norm": 0.3532904088497162, "learning_rate": 1.650416343202394e-05, "loss": 0.3871, "step": 20075 }, { "epoch": 0.5512355848434926, "grad_norm": 0.36304399371147156, "learning_rate": 1.6503835369271215e-05, "loss": 0.4851, "step": 20076 }, { "epoch": 0.5512630422844591, "grad_norm": 0.3868916928768158, "learning_rate": 1.6503507294386763e-05, "loss": 0.5403, "step": 20077 }, { "epoch": 0.5512904997254255, "grad_norm": 0.3792324662208557, "learning_rate": 1.6503179207371193e-05, "loss": 0.5279, "step": 20078 }, { "epoch": 0.5513179571663921, "grad_norm": 0.3800071179866791, "learning_rate": 1.650285110822512e-05, "loss": 0.5332, "step": 20079 }, { "epoch": 0.5513454146073586, "grad_norm": 0.402853399515152, "learning_rate": 1.650252299694915e-05, "loss": 0.503, "step": 20080 }, { "epoch": 0.5513728720483251, "grad_norm": 0.38800516724586487, "learning_rate": 1.6502194873543902e-05, "loss": 0.4937, "step": 20081 }, { "epoch": 0.5514003294892916, "grad_norm": 0.3792102038860321, "learning_rate": 1.6501866738009984e-05, "loss": 0.4799, "step": 20082 }, { "epoch": 0.5514277869302581, "grad_norm": 0.5356727242469788, "learning_rate": 1.6501538590348014e-05, "loss": 0.5811, "step": 20083 }, { "epoch": 0.5514552443712246, "grad_norm": 0.3773467540740967, "learning_rate": 1.650121043055859e-05, "loss": 0.4712, "step": 20084 }, { "epoch": 0.5514827018121911, "grad_norm": 0.3755953013896942, "learning_rate": 1.650088225864234e-05, "loss": 0.5593, "step": 20085 }, { "epoch": 0.5515101592531576, "grad_norm": 0.38026559352874756, "learning_rate": 1.650055407459987e-05, "loss": 0.5286, "step": 20086 }, { "epoch": 0.5515376166941242, "grad_norm": 0.393631249666214, "learning_rate": 1.650022587843179e-05, "loss": 0.496, "step": 20087 }, { "epoch": 0.5515650741350906, "grad_norm": 0.3675176203250885, "learning_rate": 1.6499897670138718e-05, "loss": 0.4561, "step": 20088 }, { "epoch": 0.5515925315760571, "grad_norm": 0.346792072057724, "learning_rate": 1.6499569449721256e-05, "loss": 0.4357, "step": 20089 }, { "epoch": 0.5516199890170236, "grad_norm": 0.3499426245689392, "learning_rate": 1.649924121718003e-05, "loss": 0.5146, "step": 20090 }, { "epoch": 0.5516474464579901, "grad_norm": 0.3559553027153015, "learning_rate": 1.649891297251564e-05, "loss": 0.4824, "step": 20091 }, { "epoch": 0.5516749038989566, "grad_norm": 0.3966233730316162, "learning_rate": 1.6498584715728703e-05, "loss": 0.4536, "step": 20092 }, { "epoch": 0.5517023613399231, "grad_norm": 0.3696562945842743, "learning_rate": 1.649825644681983e-05, "loss": 0.5782, "step": 20093 }, { "epoch": 0.5517298187808897, "grad_norm": 0.39399251341819763, "learning_rate": 1.6497928165789637e-05, "loss": 0.491, "step": 20094 }, { "epoch": 0.5517572762218561, "grad_norm": 0.3877287805080414, "learning_rate": 1.6497599872638736e-05, "loss": 0.5827, "step": 20095 }, { "epoch": 0.5517847336628227, "grad_norm": 0.3807818293571472, "learning_rate": 1.6497271567367733e-05, "loss": 0.5399, "step": 20096 }, { "epoch": 0.5518121911037891, "grad_norm": 0.3848816454410553, "learning_rate": 1.649694324997725e-05, "loss": 0.5023, "step": 20097 }, { "epoch": 0.5518396485447556, "grad_norm": 0.3904176354408264, "learning_rate": 1.6496614920467894e-05, "loss": 0.4987, "step": 20098 }, { "epoch": 0.5518671059857221, "grad_norm": 0.3425867259502411, "learning_rate": 1.649628657884027e-05, "loss": 0.4905, "step": 20099 }, { "epoch": 0.5518945634266886, "grad_norm": 0.40030649304389954, "learning_rate": 1.649595822509501e-05, "loss": 0.4511, "step": 20100 }, { "epoch": 0.5519220208676552, "grad_norm": 0.35986724495887756, "learning_rate": 1.6495629859232706e-05, "loss": 0.5015, "step": 20101 }, { "epoch": 0.5519494783086216, "grad_norm": 0.38332024216651917, "learning_rate": 1.649530148125398e-05, "loss": 0.5208, "step": 20102 }, { "epoch": 0.5519769357495882, "grad_norm": 0.45404988527297974, "learning_rate": 1.6494973091159444e-05, "loss": 0.4997, "step": 20103 }, { "epoch": 0.5520043931905546, "grad_norm": 0.38846924901008606, "learning_rate": 1.649464468894971e-05, "loss": 0.524, "step": 20104 }, { "epoch": 0.5520318506315212, "grad_norm": 0.35313335061073303, "learning_rate": 1.6494316274625394e-05, "loss": 0.464, "step": 20105 }, { "epoch": 0.5520593080724876, "grad_norm": 0.6483606696128845, "learning_rate": 1.6493987848187104e-05, "loss": 0.6209, "step": 20106 }, { "epoch": 0.5520867655134541, "grad_norm": 0.40844833850860596, "learning_rate": 1.6493659409635456e-05, "loss": 0.5482, "step": 20107 }, { "epoch": 0.5521142229544207, "grad_norm": 0.371170312166214, "learning_rate": 1.6493330958971054e-05, "loss": 0.5611, "step": 20108 }, { "epoch": 0.5521416803953871, "grad_norm": 0.41387030482292175, "learning_rate": 1.649300249619452e-05, "loss": 0.5135, "step": 20109 }, { "epoch": 0.5521691378363537, "grad_norm": 0.37586623430252075, "learning_rate": 1.6492674021306468e-05, "loss": 0.5031, "step": 20110 }, { "epoch": 0.5521965952773201, "grad_norm": 0.37025442719459534, "learning_rate": 1.6492345534307503e-05, "loss": 0.4481, "step": 20111 }, { "epoch": 0.5522240527182867, "grad_norm": 0.3399997353553772, "learning_rate": 1.6492017035198245e-05, "loss": 0.4974, "step": 20112 }, { "epoch": 0.5522515101592531, "grad_norm": 0.3896021544933319, "learning_rate": 1.64916885239793e-05, "loss": 0.5862, "step": 20113 }, { "epoch": 0.5522789676002197, "grad_norm": 0.3528329133987427, "learning_rate": 1.6491360000651284e-05, "loss": 0.5606, "step": 20114 }, { "epoch": 0.5523064250411862, "grad_norm": 0.39975109696388245, "learning_rate": 1.649103146521481e-05, "loss": 0.5135, "step": 20115 }, { "epoch": 0.5523338824821526, "grad_norm": 0.3727113604545593, "learning_rate": 1.649070291767049e-05, "loss": 0.4772, "step": 20116 }, { "epoch": 0.5523613399231192, "grad_norm": 0.3703918755054474, "learning_rate": 1.6490374358018942e-05, "loss": 0.5031, "step": 20117 }, { "epoch": 0.5523887973640856, "grad_norm": 0.3716111481189728, "learning_rate": 1.6490045786260772e-05, "loss": 0.4968, "step": 20118 }, { "epoch": 0.5524162548050522, "grad_norm": 0.3828426003456116, "learning_rate": 1.648971720239659e-05, "loss": 0.5466, "step": 20119 }, { "epoch": 0.5524437122460186, "grad_norm": 0.38084739446640015, "learning_rate": 1.648938860642702e-05, "loss": 0.5146, "step": 20120 }, { "epoch": 0.5524711696869852, "grad_norm": 0.38489416241645813, "learning_rate": 1.6489059998352668e-05, "loss": 0.4457, "step": 20121 }, { "epoch": 0.5524986271279517, "grad_norm": 0.4236896336078644, "learning_rate": 1.6488731378174148e-05, "loss": 0.4164, "step": 20122 }, { "epoch": 0.5525260845689182, "grad_norm": 0.38429537415504456, "learning_rate": 1.6488402745892075e-05, "loss": 0.5335, "step": 20123 }, { "epoch": 0.5525535420098847, "grad_norm": 0.37772315740585327, "learning_rate": 1.6488074101507054e-05, "loss": 0.5199, "step": 20124 }, { "epoch": 0.5525809994508512, "grad_norm": 0.32997050881385803, "learning_rate": 1.6487745445019707e-05, "loss": 0.4016, "step": 20125 }, { "epoch": 0.5526084568918177, "grad_norm": 0.3660522997379303, "learning_rate": 1.6487416776430648e-05, "loss": 0.5462, "step": 20126 }, { "epoch": 0.5526359143327841, "grad_norm": 0.39341914653778076, "learning_rate": 1.648708809574048e-05, "loss": 0.547, "step": 20127 }, { "epoch": 0.5526633717737507, "grad_norm": 0.4059383273124695, "learning_rate": 1.6486759402949827e-05, "loss": 0.4632, "step": 20128 }, { "epoch": 0.5526908292147172, "grad_norm": 0.399560809135437, "learning_rate": 1.6486430698059294e-05, "loss": 0.4935, "step": 20129 }, { "epoch": 0.5527182866556837, "grad_norm": 0.3889644742012024, "learning_rate": 1.64861019810695e-05, "loss": 0.521, "step": 20130 }, { "epoch": 0.5527457440966502, "grad_norm": 0.34877192974090576, "learning_rate": 1.6485773251981053e-05, "loss": 0.5288, "step": 20131 }, { "epoch": 0.5527732015376167, "grad_norm": 0.3701987862586975, "learning_rate": 1.6485444510794572e-05, "loss": 0.5434, "step": 20132 }, { "epoch": 0.5528006589785832, "grad_norm": 0.3864341080188751, "learning_rate": 1.6485115757510665e-05, "loss": 0.4969, "step": 20133 }, { "epoch": 0.5528281164195497, "grad_norm": 0.39359405636787415, "learning_rate": 1.648478699212995e-05, "loss": 0.4887, "step": 20134 }, { "epoch": 0.5528555738605162, "grad_norm": 0.37667250633239746, "learning_rate": 1.6484458214653032e-05, "loss": 0.4567, "step": 20135 }, { "epoch": 0.5528830313014828, "grad_norm": 0.3994331955909729, "learning_rate": 1.6484129425080537e-05, "loss": 0.5266, "step": 20136 }, { "epoch": 0.5529104887424492, "grad_norm": 0.3508080542087555, "learning_rate": 1.6483800623413064e-05, "loss": 0.496, "step": 20137 }, { "epoch": 0.5529379461834157, "grad_norm": 0.3907933533191681, "learning_rate": 1.6483471809651238e-05, "loss": 0.4908, "step": 20138 }, { "epoch": 0.5529654036243822, "grad_norm": 0.3922945559024811, "learning_rate": 1.648314298379567e-05, "loss": 0.4738, "step": 20139 }, { "epoch": 0.5529928610653487, "grad_norm": 0.4048019349575043, "learning_rate": 1.6482814145846967e-05, "loss": 0.516, "step": 20140 }, { "epoch": 0.5530203185063152, "grad_norm": 0.3539634346961975, "learning_rate": 1.6482485295805748e-05, "loss": 0.5106, "step": 20141 }, { "epoch": 0.5530477759472817, "grad_norm": 0.3490460515022278, "learning_rate": 1.648215643367262e-05, "loss": 0.5639, "step": 20142 }, { "epoch": 0.5530752333882483, "grad_norm": 0.8991808295249939, "learning_rate": 1.648182755944821e-05, "loss": 0.5663, "step": 20143 }, { "epoch": 0.5531026908292147, "grad_norm": 0.37436914443969727, "learning_rate": 1.6481498673133115e-05, "loss": 0.4932, "step": 20144 }, { "epoch": 0.5531301482701813, "grad_norm": 0.34102290868759155, "learning_rate": 1.648116977472796e-05, "loss": 0.4246, "step": 20145 }, { "epoch": 0.5531576057111477, "grad_norm": 0.38094717264175415, "learning_rate": 1.6480840864233357e-05, "loss": 0.5437, "step": 20146 }, { "epoch": 0.5531850631521142, "grad_norm": 0.36383137106895447, "learning_rate": 1.6480511941649915e-05, "loss": 0.4531, "step": 20147 }, { "epoch": 0.5532125205930807, "grad_norm": 0.3569903075695038, "learning_rate": 1.648018300697825e-05, "loss": 0.5086, "step": 20148 }, { "epoch": 0.5532399780340472, "grad_norm": 0.3627673387527466, "learning_rate": 1.6479854060218976e-05, "loss": 0.5018, "step": 20149 }, { "epoch": 0.5532674354750138, "grad_norm": 1.430594563484192, "learning_rate": 1.6479525101372705e-05, "loss": 0.4988, "step": 20150 }, { "epoch": 0.5532948929159802, "grad_norm": 0.34283581376075745, "learning_rate": 1.647919613044005e-05, "loss": 0.4482, "step": 20151 }, { "epoch": 0.5533223503569468, "grad_norm": 0.37608855962753296, "learning_rate": 1.647886714742163e-05, "loss": 0.457, "step": 20152 }, { "epoch": 0.5533498077979132, "grad_norm": 0.4209042489528656, "learning_rate": 1.6478538152318054e-05, "loss": 0.5435, "step": 20153 }, { "epoch": 0.5533772652388798, "grad_norm": 0.3934909999370575, "learning_rate": 1.6478209145129936e-05, "loss": 0.5019, "step": 20154 }, { "epoch": 0.5534047226798462, "grad_norm": 0.3750247061252594, "learning_rate": 1.647788012585789e-05, "loss": 0.5321, "step": 20155 }, { "epoch": 0.5534321801208127, "grad_norm": 0.4152064323425293, "learning_rate": 1.647755109450253e-05, "loss": 0.5454, "step": 20156 }, { "epoch": 0.5534596375617793, "grad_norm": 0.349907249212265, "learning_rate": 1.647722205106447e-05, "loss": 0.469, "step": 20157 }, { "epoch": 0.5534870950027457, "grad_norm": 0.5042437314987183, "learning_rate": 1.6476892995544324e-05, "loss": 0.6294, "step": 20158 }, { "epoch": 0.5535145524437123, "grad_norm": 0.4379086494445801, "learning_rate": 1.6476563927942706e-05, "loss": 0.442, "step": 20159 }, { "epoch": 0.5535420098846787, "grad_norm": 0.39005881547927856, "learning_rate": 1.647623484826023e-05, "loss": 0.5591, "step": 20160 }, { "epoch": 0.5535694673256453, "grad_norm": 0.45695826411247253, "learning_rate": 1.6475905756497505e-05, "loss": 0.5648, "step": 20161 }, { "epoch": 0.5535969247666117, "grad_norm": 0.38863369822502136, "learning_rate": 1.6475576652655152e-05, "loss": 0.4341, "step": 20162 }, { "epoch": 0.5536243822075783, "grad_norm": 0.4194428622722626, "learning_rate": 1.647524753673378e-05, "loss": 0.498, "step": 20163 }, { "epoch": 0.5536518396485448, "grad_norm": 0.36987099051475525, "learning_rate": 1.6474918408734007e-05, "loss": 0.4219, "step": 20164 }, { "epoch": 0.5536792970895112, "grad_norm": 0.4030095934867859, "learning_rate": 1.647458926865644e-05, "loss": 0.5006, "step": 20165 }, { "epoch": 0.5537067545304778, "grad_norm": 0.41510286927223206, "learning_rate": 1.6474260116501704e-05, "loss": 0.5771, "step": 20166 }, { "epoch": 0.5537342119714442, "grad_norm": 0.4058142900466919, "learning_rate": 1.64739309522704e-05, "loss": 0.5397, "step": 20167 }, { "epoch": 0.5537616694124108, "grad_norm": 0.41471534967422485, "learning_rate": 1.6473601775963156e-05, "loss": 0.529, "step": 20168 }, { "epoch": 0.5537891268533772, "grad_norm": 0.3378960192203522, "learning_rate": 1.6473272587580576e-05, "loss": 0.4891, "step": 20169 }, { "epoch": 0.5538165842943438, "grad_norm": 0.33718016743659973, "learning_rate": 1.6472943387123273e-05, "loss": 0.4271, "step": 20170 }, { "epoch": 0.5538440417353103, "grad_norm": 0.33630335330963135, "learning_rate": 1.6472614174591868e-05, "loss": 0.4647, "step": 20171 }, { "epoch": 0.5538714991762768, "grad_norm": 0.32663705945014954, "learning_rate": 1.647228494998697e-05, "loss": 0.3977, "step": 20172 }, { "epoch": 0.5538989566172433, "grad_norm": 0.385714054107666, "learning_rate": 1.6471955713309197e-05, "loss": 0.4813, "step": 20173 }, { "epoch": 0.5539264140582097, "grad_norm": 0.3425789773464203, "learning_rate": 1.6471626464559158e-05, "loss": 0.3523, "step": 20174 }, { "epoch": 0.5539538714991763, "grad_norm": 0.41745099425315857, "learning_rate": 1.647129720373747e-05, "loss": 0.4518, "step": 20175 }, { "epoch": 0.5539813289401427, "grad_norm": 0.37504270672798157, "learning_rate": 1.6470967930844752e-05, "loss": 0.4461, "step": 20176 }, { "epoch": 0.5540087863811093, "grad_norm": 0.3643954396247864, "learning_rate": 1.6470638645881608e-05, "loss": 0.4122, "step": 20177 }, { "epoch": 0.5540362438220758, "grad_norm": 0.4053237736225128, "learning_rate": 1.647030934884866e-05, "loss": 0.5481, "step": 20178 }, { "epoch": 0.5540637012630423, "grad_norm": 0.5372850298881531, "learning_rate": 1.646998003974652e-05, "loss": 0.4937, "step": 20179 }, { "epoch": 0.5540911587040088, "grad_norm": 0.3336838185787201, "learning_rate": 1.6469650718575802e-05, "loss": 0.4375, "step": 20180 }, { "epoch": 0.5541186161449753, "grad_norm": 0.3654647767543793, "learning_rate": 1.6469321385337123e-05, "loss": 0.4984, "step": 20181 }, { "epoch": 0.5541460735859418, "grad_norm": 0.4210224449634552, "learning_rate": 1.646899204003109e-05, "loss": 0.444, "step": 20182 }, { "epoch": 0.5541735310269083, "grad_norm": 0.3753102421760559, "learning_rate": 1.646866268265833e-05, "loss": 0.4679, "step": 20183 }, { "epoch": 0.5542009884678748, "grad_norm": 0.35331031680107117, "learning_rate": 1.646833331321944e-05, "loss": 0.4725, "step": 20184 }, { "epoch": 0.5542284459088413, "grad_norm": 0.3715324401855469, "learning_rate": 1.646800393171505e-05, "loss": 0.4497, "step": 20185 }, { "epoch": 0.5542559033498078, "grad_norm": 0.36299705505371094, "learning_rate": 1.6467674538145765e-05, "loss": 0.5811, "step": 20186 }, { "epoch": 0.5542833607907743, "grad_norm": 0.4170243442058563, "learning_rate": 1.6467345132512206e-05, "loss": 0.576, "step": 20187 }, { "epoch": 0.5543108182317408, "grad_norm": 0.36679068207740784, "learning_rate": 1.6467015714814985e-05, "loss": 0.4459, "step": 20188 }, { "epoch": 0.5543382756727073, "grad_norm": 0.38606953620910645, "learning_rate": 1.6466686285054713e-05, "loss": 0.5984, "step": 20189 }, { "epoch": 0.5543657331136738, "grad_norm": 0.3983590006828308, "learning_rate": 1.6466356843232007e-05, "loss": 0.5433, "step": 20190 }, { "epoch": 0.5543931905546403, "grad_norm": 0.3884018659591675, "learning_rate": 1.646602738934748e-05, "loss": 0.5426, "step": 20191 }, { "epoch": 0.5544206479956069, "grad_norm": 0.38308918476104736, "learning_rate": 1.6465697923401752e-05, "loss": 0.5367, "step": 20192 }, { "epoch": 0.5544481054365733, "grad_norm": 0.35512298345565796, "learning_rate": 1.6465368445395432e-05, "loss": 0.5146, "step": 20193 }, { "epoch": 0.5544755628775399, "grad_norm": 0.38658955693244934, "learning_rate": 1.6465038955329135e-05, "loss": 0.56, "step": 20194 }, { "epoch": 0.5545030203185063, "grad_norm": 0.4527896046638489, "learning_rate": 1.646470945320348e-05, "loss": 0.4865, "step": 20195 }, { "epoch": 0.5545304777594728, "grad_norm": 0.3851822018623352, "learning_rate": 1.6464379939019076e-05, "loss": 0.5476, "step": 20196 }, { "epoch": 0.5545579352004393, "grad_norm": 0.369793176651001, "learning_rate": 1.646405041277654e-05, "loss": 0.5347, "step": 20197 }, { "epoch": 0.5545853926414058, "grad_norm": 0.3561685085296631, "learning_rate": 1.646372087447649e-05, "loss": 0.5033, "step": 20198 }, { "epoch": 0.5546128500823724, "grad_norm": 0.4042803943157196, "learning_rate": 1.6463391324119537e-05, "loss": 0.5421, "step": 20199 }, { "epoch": 0.5546403075233388, "grad_norm": 0.3857647180557251, "learning_rate": 1.6463061761706292e-05, "loss": 0.5306, "step": 20200 }, { "epoch": 0.5546677649643054, "grad_norm": 0.5380737781524658, "learning_rate": 1.6462732187237377e-05, "loss": 0.4844, "step": 20201 }, { "epoch": 0.5546952224052718, "grad_norm": 0.4317311942577362, "learning_rate": 1.6462402600713404e-05, "loss": 0.5477, "step": 20202 }, { "epoch": 0.5547226798462384, "grad_norm": 0.40832772850990295, "learning_rate": 1.646207300213499e-05, "loss": 0.5288, "step": 20203 }, { "epoch": 0.5547501372872048, "grad_norm": 0.36950185894966125, "learning_rate": 1.646174339150274e-05, "loss": 0.4961, "step": 20204 }, { "epoch": 0.5547775947281713, "grad_norm": 0.465455561876297, "learning_rate": 1.6461413768817285e-05, "loss": 0.5052, "step": 20205 }, { "epoch": 0.5548050521691379, "grad_norm": 0.39981111884117126, "learning_rate": 1.6461084134079227e-05, "loss": 0.5161, "step": 20206 }, { "epoch": 0.5548325096101043, "grad_norm": 0.3676760792732239, "learning_rate": 1.6460754487289182e-05, "loss": 0.5319, "step": 20207 }, { "epoch": 0.5548599670510709, "grad_norm": 0.35794395208358765, "learning_rate": 1.6460424828447775e-05, "loss": 0.5074, "step": 20208 }, { "epoch": 0.5548874244920373, "grad_norm": 0.4003955125808716, "learning_rate": 1.6460095157555607e-05, "loss": 0.5108, "step": 20209 }, { "epoch": 0.5549148819330039, "grad_norm": 0.37843626737594604, "learning_rate": 1.64597654746133e-05, "loss": 0.5142, "step": 20210 }, { "epoch": 0.5549423393739703, "grad_norm": 0.3527695834636688, "learning_rate": 1.6459435779621474e-05, "loss": 0.4822, "step": 20211 }, { "epoch": 0.5549697968149369, "grad_norm": 0.37994199991226196, "learning_rate": 1.6459106072580735e-05, "loss": 0.4243, "step": 20212 }, { "epoch": 0.5549972542559034, "grad_norm": 0.45816242694854736, "learning_rate": 1.6458776353491704e-05, "loss": 0.5781, "step": 20213 }, { "epoch": 0.5550247116968698, "grad_norm": 0.3571593463420868, "learning_rate": 1.6458446622354992e-05, "loss": 0.4706, "step": 20214 }, { "epoch": 0.5550521691378364, "grad_norm": 0.34383267164230347, "learning_rate": 1.645811687917122e-05, "loss": 0.4708, "step": 20215 }, { "epoch": 0.5550796265788028, "grad_norm": 0.3769657611846924, "learning_rate": 1.6457787123940993e-05, "loss": 0.549, "step": 20216 }, { "epoch": 0.5551070840197694, "grad_norm": 0.333621621131897, "learning_rate": 1.645745735666493e-05, "loss": 0.5055, "step": 20217 }, { "epoch": 0.5551345414607358, "grad_norm": 0.41225188970565796, "learning_rate": 1.6457127577343653e-05, "loss": 0.5758, "step": 20218 }, { "epoch": 0.5551619989017024, "grad_norm": 0.3868938386440277, "learning_rate": 1.6456797785977772e-05, "loss": 0.5381, "step": 20219 }, { "epoch": 0.5551894563426688, "grad_norm": 0.39834484457969666, "learning_rate": 1.64564679825679e-05, "loss": 0.5125, "step": 20220 }, { "epoch": 0.5552169137836354, "grad_norm": 0.35582175850868225, "learning_rate": 1.6456138167114658e-05, "loss": 0.5039, "step": 20221 }, { "epoch": 0.5552443712246019, "grad_norm": 0.401955783367157, "learning_rate": 1.6455808339618654e-05, "loss": 0.5202, "step": 20222 }, { "epoch": 0.5552718286655683, "grad_norm": 0.40118566155433655, "learning_rate": 1.645547850008051e-05, "loss": 0.4946, "step": 20223 }, { "epoch": 0.5552992861065349, "grad_norm": 0.3765190541744232, "learning_rate": 1.6455148648500838e-05, "loss": 0.4676, "step": 20224 }, { "epoch": 0.5553267435475013, "grad_norm": 0.35392647981643677, "learning_rate": 1.6454818784880248e-05, "loss": 0.4823, "step": 20225 }, { "epoch": 0.5553542009884679, "grad_norm": 0.39039677381515503, "learning_rate": 1.645448890921937e-05, "loss": 0.4638, "step": 20226 }, { "epoch": 0.5553816584294343, "grad_norm": 0.40326356887817383, "learning_rate": 1.64541590215188e-05, "loss": 0.5153, "step": 20227 }, { "epoch": 0.5554091158704009, "grad_norm": 0.3760007917881012, "learning_rate": 1.645382912177917e-05, "loss": 0.5392, "step": 20228 }, { "epoch": 0.5554365733113674, "grad_norm": 0.4069371521472931, "learning_rate": 1.6453499210001085e-05, "loss": 0.5595, "step": 20229 }, { "epoch": 0.5554640307523339, "grad_norm": 0.41930606961250305, "learning_rate": 1.645316928618517e-05, "loss": 0.4984, "step": 20230 }, { "epoch": 0.5554914881933004, "grad_norm": 0.3605698347091675, "learning_rate": 1.645283935033203e-05, "loss": 0.4143, "step": 20231 }, { "epoch": 0.5555189456342668, "grad_norm": 0.39063334465026855, "learning_rate": 1.645250940244229e-05, "loss": 0.5276, "step": 20232 }, { "epoch": 0.5555464030752334, "grad_norm": 0.40996408462524414, "learning_rate": 1.6452179442516555e-05, "loss": 0.4733, "step": 20233 }, { "epoch": 0.5555738605161998, "grad_norm": 0.43693968653678894, "learning_rate": 1.6451849470555447e-05, "loss": 0.555, "step": 20234 }, { "epoch": 0.5556013179571664, "grad_norm": 0.36617717146873474, "learning_rate": 1.6451519486559583e-05, "loss": 0.5412, "step": 20235 }, { "epoch": 0.5556287753981329, "grad_norm": 0.3859819769859314, "learning_rate": 1.6451189490529576e-05, "loss": 0.5583, "step": 20236 }, { "epoch": 0.5556562328390994, "grad_norm": 0.34744104743003845, "learning_rate": 1.645085948246604e-05, "loss": 0.51, "step": 20237 }, { "epoch": 0.5556836902800659, "grad_norm": 0.39332884550094604, "learning_rate": 1.6450529462369594e-05, "loss": 0.608, "step": 20238 }, { "epoch": 0.5557111477210324, "grad_norm": 0.42024800181388855, "learning_rate": 1.645019943024085e-05, "loss": 0.5195, "step": 20239 }, { "epoch": 0.5557386051619989, "grad_norm": 0.3658624589443207, "learning_rate": 1.6449869386080424e-05, "loss": 0.5168, "step": 20240 }, { "epoch": 0.5557660626029653, "grad_norm": 0.38009509444236755, "learning_rate": 1.6449539329888934e-05, "loss": 0.4777, "step": 20241 }, { "epoch": 0.5557935200439319, "grad_norm": 0.3355199992656708, "learning_rate": 1.6449209261667e-05, "loss": 0.3961, "step": 20242 }, { "epoch": 0.5558209774848984, "grad_norm": 0.44806209206581116, "learning_rate": 1.6448879181415227e-05, "loss": 0.5083, "step": 20243 }, { "epoch": 0.5558484349258649, "grad_norm": 0.390697717666626, "learning_rate": 1.6448549089134237e-05, "loss": 0.5189, "step": 20244 }, { "epoch": 0.5558758923668314, "grad_norm": 0.5547188520431519, "learning_rate": 1.6448218984824644e-05, "loss": 0.5482, "step": 20245 }, { "epoch": 0.5559033498077979, "grad_norm": 0.35165131092071533, "learning_rate": 1.6447888868487065e-05, "loss": 0.6403, "step": 20246 }, { "epoch": 0.5559308072487644, "grad_norm": 0.38964730501174927, "learning_rate": 1.6447558740122116e-05, "loss": 0.4937, "step": 20247 }, { "epoch": 0.5559582646897309, "grad_norm": 0.36910247802734375, "learning_rate": 1.644722859973041e-05, "loss": 0.5111, "step": 20248 }, { "epoch": 0.5559857221306974, "grad_norm": 0.37214407324790955, "learning_rate": 1.6446898447312568e-05, "loss": 0.4828, "step": 20249 }, { "epoch": 0.556013179571664, "grad_norm": 0.3713202476501465, "learning_rate": 1.6446568282869202e-05, "loss": 0.481, "step": 20250 }, { "epoch": 0.5560406370126304, "grad_norm": 0.7811570167541504, "learning_rate": 1.6446238106400927e-05, "loss": 0.5893, "step": 20251 }, { "epoch": 0.556068094453597, "grad_norm": 0.39297011494636536, "learning_rate": 1.6445907917908365e-05, "loss": 0.5298, "step": 20252 }, { "epoch": 0.5560955518945634, "grad_norm": 0.4151470959186554, "learning_rate": 1.644557771739212e-05, "loss": 0.491, "step": 20253 }, { "epoch": 0.5561230093355299, "grad_norm": 0.4163072407245636, "learning_rate": 1.644524750485282e-05, "loss": 0.5641, "step": 20254 }, { "epoch": 0.5561504667764964, "grad_norm": 0.3912801444530487, "learning_rate": 1.644491728029108e-05, "loss": 0.505, "step": 20255 }, { "epoch": 0.5561779242174629, "grad_norm": 0.34802675247192383, "learning_rate": 1.6444587043707506e-05, "loss": 0.5029, "step": 20256 }, { "epoch": 0.5562053816584295, "grad_norm": 0.3699260652065277, "learning_rate": 1.6444256795102723e-05, "loss": 0.4956, "step": 20257 }, { "epoch": 0.5562328390993959, "grad_norm": 0.3644157946109772, "learning_rate": 1.644392653447734e-05, "loss": 0.557, "step": 20258 }, { "epoch": 0.5562602965403625, "grad_norm": 0.4045586585998535, "learning_rate": 1.6443596261831984e-05, "loss": 0.5679, "step": 20259 }, { "epoch": 0.5562877539813289, "grad_norm": 0.34530389308929443, "learning_rate": 1.644326597716726e-05, "loss": 0.4793, "step": 20260 }, { "epoch": 0.5563152114222955, "grad_norm": 0.38252878189086914, "learning_rate": 1.644293568048379e-05, "loss": 0.5495, "step": 20261 }, { "epoch": 0.5563426688632619, "grad_norm": 0.33399298787117004, "learning_rate": 1.644260537178219e-05, "loss": 0.5079, "step": 20262 }, { "epoch": 0.5563701263042284, "grad_norm": 0.3800945281982422, "learning_rate": 1.6442275051063075e-05, "loss": 0.5506, "step": 20263 }, { "epoch": 0.556397583745195, "grad_norm": 0.41455358266830444, "learning_rate": 1.644194471832706e-05, "loss": 0.5274, "step": 20264 }, { "epoch": 0.5564250411861614, "grad_norm": 0.38693058490753174, "learning_rate": 1.644161437357476e-05, "loss": 0.504, "step": 20265 }, { "epoch": 0.556452498627128, "grad_norm": 0.3884378671646118, "learning_rate": 1.6441284016806795e-05, "loss": 0.4561, "step": 20266 }, { "epoch": 0.5564799560680944, "grad_norm": 0.4349499046802521, "learning_rate": 1.6440953648023778e-05, "loss": 0.6189, "step": 20267 }, { "epoch": 0.556507413509061, "grad_norm": 0.4187443256378174, "learning_rate": 1.644062326722633e-05, "loss": 0.4806, "step": 20268 }, { "epoch": 0.5565348709500274, "grad_norm": 0.36575835943222046, "learning_rate": 1.644029287441506e-05, "loss": 0.4565, "step": 20269 }, { "epoch": 0.556562328390994, "grad_norm": 0.3781886398792267, "learning_rate": 1.643996246959059e-05, "loss": 0.4968, "step": 20270 }, { "epoch": 0.5565897858319605, "grad_norm": 0.3672904372215271, "learning_rate": 1.6439632052753536e-05, "loss": 0.4758, "step": 20271 }, { "epoch": 0.5566172432729269, "grad_norm": 0.372806578874588, "learning_rate": 1.6439301623904513e-05, "loss": 0.4556, "step": 20272 }, { "epoch": 0.5566447007138935, "grad_norm": 0.3715762794017792, "learning_rate": 1.6438971183044135e-05, "loss": 0.5142, "step": 20273 }, { "epoch": 0.5566721581548599, "grad_norm": 0.33996066451072693, "learning_rate": 1.6438640730173017e-05, "loss": 0.4307, "step": 20274 }, { "epoch": 0.5566996155958265, "grad_norm": 0.3362007141113281, "learning_rate": 1.6438310265291785e-05, "loss": 0.4394, "step": 20275 }, { "epoch": 0.5567270730367929, "grad_norm": 0.33020609617233276, "learning_rate": 1.6437979788401052e-05, "loss": 0.4969, "step": 20276 }, { "epoch": 0.5567545304777595, "grad_norm": 0.361072301864624, "learning_rate": 1.6437649299501425e-05, "loss": 0.487, "step": 20277 }, { "epoch": 0.556781987918726, "grad_norm": 0.4077190160751343, "learning_rate": 1.6437318798593534e-05, "loss": 0.5812, "step": 20278 }, { "epoch": 0.5568094453596925, "grad_norm": 0.4131031334400177, "learning_rate": 1.643698828567798e-05, "loss": 0.4492, "step": 20279 }, { "epoch": 0.556836902800659, "grad_norm": 0.3791561722755432, "learning_rate": 1.6436657760755398e-05, "loss": 0.4551, "step": 20280 }, { "epoch": 0.5568643602416254, "grad_norm": 0.49113011360168457, "learning_rate": 1.643632722382639e-05, "loss": 0.547, "step": 20281 }, { "epoch": 0.556891817682592, "grad_norm": 0.3744960129261017, "learning_rate": 1.643599667489158e-05, "loss": 0.4444, "step": 20282 }, { "epoch": 0.5569192751235584, "grad_norm": 0.41314318776130676, "learning_rate": 1.643566611395158e-05, "loss": 0.5765, "step": 20283 }, { "epoch": 0.556946732564525, "grad_norm": 0.39940470457077026, "learning_rate": 1.643533554100701e-05, "loss": 0.5775, "step": 20284 }, { "epoch": 0.5569741900054915, "grad_norm": 0.32721057534217834, "learning_rate": 1.643500495605848e-05, "loss": 0.4452, "step": 20285 }, { "epoch": 0.557001647446458, "grad_norm": 0.4730414152145386, "learning_rate": 1.6434674359106617e-05, "loss": 0.6058, "step": 20286 }, { "epoch": 0.5570291048874245, "grad_norm": 0.4019206166267395, "learning_rate": 1.643434375015203e-05, "loss": 0.5868, "step": 20287 }, { "epoch": 0.557056562328391, "grad_norm": 0.36188942193984985, "learning_rate": 1.6434013129195345e-05, "loss": 0.509, "step": 20288 }, { "epoch": 0.5570840197693575, "grad_norm": 0.4097791314125061, "learning_rate": 1.6433682496237165e-05, "loss": 0.594, "step": 20289 }, { "epoch": 0.557111477210324, "grad_norm": 0.3902775049209595, "learning_rate": 1.6433351851278114e-05, "loss": 0.4938, "step": 20290 }, { "epoch": 0.5571389346512905, "grad_norm": 0.3682023286819458, "learning_rate": 1.6433021194318812e-05, "loss": 0.5764, "step": 20291 }, { "epoch": 0.557166392092257, "grad_norm": 0.3933098316192627, "learning_rate": 1.643269052535987e-05, "loss": 0.5054, "step": 20292 }, { "epoch": 0.5571938495332235, "grad_norm": 0.3762798011302948, "learning_rate": 1.6432359844401907e-05, "loss": 0.4858, "step": 20293 }, { "epoch": 0.55722130697419, "grad_norm": 0.374263733625412, "learning_rate": 1.6432029151445544e-05, "loss": 0.4535, "step": 20294 }, { "epoch": 0.5572487644151565, "grad_norm": 0.42095598578453064, "learning_rate": 1.643169844649139e-05, "loss": 0.5233, "step": 20295 }, { "epoch": 0.557276221856123, "grad_norm": 0.4902118742465973, "learning_rate": 1.6431367729540065e-05, "loss": 0.4329, "step": 20296 }, { "epoch": 0.5573036792970895, "grad_norm": 0.3666732609272003, "learning_rate": 1.6431037000592188e-05, "loss": 0.4686, "step": 20297 }, { "epoch": 0.557331136738056, "grad_norm": 0.44683846831321716, "learning_rate": 1.6430706259648373e-05, "loss": 0.5253, "step": 20298 }, { "epoch": 0.5573585941790226, "grad_norm": 0.39229485392570496, "learning_rate": 1.643037550670924e-05, "loss": 0.4764, "step": 20299 }, { "epoch": 0.557386051619989, "grad_norm": 0.631496012210846, "learning_rate": 1.6430044741775403e-05, "loss": 0.5023, "step": 20300 }, { "epoch": 0.5574135090609555, "grad_norm": 0.40017467737197876, "learning_rate": 1.6429713964847483e-05, "loss": 0.4775, "step": 20301 }, { "epoch": 0.557440966501922, "grad_norm": 0.3605625629425049, "learning_rate": 1.642938317592609e-05, "loss": 0.522, "step": 20302 }, { "epoch": 0.5574684239428885, "grad_norm": 0.35034605860710144, "learning_rate": 1.642905237501185e-05, "loss": 0.4934, "step": 20303 }, { "epoch": 0.557495881383855, "grad_norm": 0.34260040521621704, "learning_rate": 1.6428721562105373e-05, "loss": 0.3538, "step": 20304 }, { "epoch": 0.5575233388248215, "grad_norm": 0.3356960117816925, "learning_rate": 1.6428390737207278e-05, "loss": 0.4125, "step": 20305 }, { "epoch": 0.5575507962657881, "grad_norm": 0.5235896110534668, "learning_rate": 1.6428059900318185e-05, "loss": 0.5063, "step": 20306 }, { "epoch": 0.5575782537067545, "grad_norm": 0.36490899324417114, "learning_rate": 1.6427729051438706e-05, "loss": 0.5124, "step": 20307 }, { "epoch": 0.5576057111477211, "grad_norm": 0.4243975281715393, "learning_rate": 1.6427398190569463e-05, "loss": 0.5073, "step": 20308 }, { "epoch": 0.5576331685886875, "grad_norm": 0.3476022481918335, "learning_rate": 1.642706731771107e-05, "loss": 0.5432, "step": 20309 }, { "epoch": 0.557660626029654, "grad_norm": 0.35392364859580994, "learning_rate": 1.6426736432864145e-05, "loss": 0.4314, "step": 20310 }, { "epoch": 0.5576880834706205, "grad_norm": 0.4082086980342865, "learning_rate": 1.6426405536029303e-05, "loss": 0.5363, "step": 20311 }, { "epoch": 0.557715540911587, "grad_norm": 0.4229736030101776, "learning_rate": 1.6426074627207166e-05, "loss": 0.4929, "step": 20312 }, { "epoch": 0.5577429983525536, "grad_norm": 0.41695255041122437, "learning_rate": 1.6425743706398348e-05, "loss": 0.4876, "step": 20313 }, { "epoch": 0.55777045579352, "grad_norm": 0.36287322640419006, "learning_rate": 1.6425412773603468e-05, "loss": 0.4064, "step": 20314 }, { "epoch": 0.5577979132344866, "grad_norm": 0.36662808060646057, "learning_rate": 1.6425081828823143e-05, "loss": 0.4678, "step": 20315 }, { "epoch": 0.557825370675453, "grad_norm": 0.4197006821632385, "learning_rate": 1.642475087205799e-05, "loss": 0.4678, "step": 20316 }, { "epoch": 0.5578528281164196, "grad_norm": 0.37479016184806824, "learning_rate": 1.6424419903308627e-05, "loss": 0.4857, "step": 20317 }, { "epoch": 0.557880285557386, "grad_norm": 0.4174877405166626, "learning_rate": 1.6424088922575667e-05, "loss": 0.4791, "step": 20318 }, { "epoch": 0.5579077429983526, "grad_norm": 0.3609885275363922, "learning_rate": 1.642375792985973e-05, "loss": 0.4123, "step": 20319 }, { "epoch": 0.5579352004393191, "grad_norm": 0.37020015716552734, "learning_rate": 1.6423426925161437e-05, "loss": 0.4968, "step": 20320 }, { "epoch": 0.5579626578802855, "grad_norm": 0.35799267888069153, "learning_rate": 1.6423095908481405e-05, "loss": 0.5312, "step": 20321 }, { "epoch": 0.5579901153212521, "grad_norm": 0.3939209282398224, "learning_rate": 1.6422764879820247e-05, "loss": 0.4989, "step": 20322 }, { "epoch": 0.5580175727622185, "grad_norm": 0.3827025890350342, "learning_rate": 1.642243383917858e-05, "loss": 0.5449, "step": 20323 }, { "epoch": 0.5580450302031851, "grad_norm": 0.37037405371665955, "learning_rate": 1.6422102786557026e-05, "loss": 0.4664, "step": 20324 }, { "epoch": 0.5580724876441515, "grad_norm": 0.3948405086994171, "learning_rate": 1.64217717219562e-05, "loss": 0.4899, "step": 20325 }, { "epoch": 0.5580999450851181, "grad_norm": 0.3822045922279358, "learning_rate": 1.642144064537672e-05, "loss": 0.5845, "step": 20326 }, { "epoch": 0.5581274025260846, "grad_norm": 0.46515029668807983, "learning_rate": 1.6421109556819204e-05, "loss": 0.4953, "step": 20327 }, { "epoch": 0.558154859967051, "grad_norm": 0.3494088053703308, "learning_rate": 1.6420778456284272e-05, "loss": 0.4731, "step": 20328 }, { "epoch": 0.5581823174080176, "grad_norm": 0.4271300137042999, "learning_rate": 1.6420447343772538e-05, "loss": 0.5994, "step": 20329 }, { "epoch": 0.558209774848984, "grad_norm": 0.3628305196762085, "learning_rate": 1.642011621928462e-05, "loss": 0.4477, "step": 20330 }, { "epoch": 0.5582372322899506, "grad_norm": 0.4301166534423828, "learning_rate": 1.6419785082821134e-05, "loss": 0.5865, "step": 20331 }, { "epoch": 0.558264689730917, "grad_norm": 0.3920251727104187, "learning_rate": 1.6419453934382704e-05, "loss": 0.4655, "step": 20332 }, { "epoch": 0.5582921471718836, "grad_norm": 0.38450363278388977, "learning_rate": 1.641912277396994e-05, "loss": 0.4461, "step": 20333 }, { "epoch": 0.5583196046128501, "grad_norm": 0.4028998613357544, "learning_rate": 1.641879160158346e-05, "loss": 0.5963, "step": 20334 }, { "epoch": 0.5583470620538166, "grad_norm": 0.3507046699523926, "learning_rate": 1.6418460417223892e-05, "loss": 0.4609, "step": 20335 }, { "epoch": 0.5583745194947831, "grad_norm": 0.36255598068237305, "learning_rate": 1.6418129220891844e-05, "loss": 0.5174, "step": 20336 }, { "epoch": 0.5584019769357496, "grad_norm": 0.31922098994255066, "learning_rate": 1.6417798012587938e-05, "loss": 0.3855, "step": 20337 }, { "epoch": 0.5584294343767161, "grad_norm": 0.40900370478630066, "learning_rate": 1.6417466792312786e-05, "loss": 0.5105, "step": 20338 }, { "epoch": 0.5584568918176825, "grad_norm": 0.4212241768836975, "learning_rate": 1.6417135560067016e-05, "loss": 0.5891, "step": 20339 }, { "epoch": 0.5584843492586491, "grad_norm": 0.39285406470298767, "learning_rate": 1.6416804315851237e-05, "loss": 0.4827, "step": 20340 }, { "epoch": 0.5585118066996156, "grad_norm": 0.42513421177864075, "learning_rate": 1.641647305966607e-05, "loss": 0.6229, "step": 20341 }, { "epoch": 0.5585392641405821, "grad_norm": 0.387540727853775, "learning_rate": 1.6416141791512132e-05, "loss": 0.5591, "step": 20342 }, { "epoch": 0.5585667215815486, "grad_norm": 0.40205785632133484, "learning_rate": 1.6415810511390044e-05, "loss": 0.5949, "step": 20343 }, { "epoch": 0.5585941790225151, "grad_norm": 0.4196454584598541, "learning_rate": 1.641547921930042e-05, "loss": 0.5344, "step": 20344 }, { "epoch": 0.5586216364634816, "grad_norm": 0.3594636023044586, "learning_rate": 1.641514791524388e-05, "loss": 0.5517, "step": 20345 }, { "epoch": 0.5586490939044481, "grad_norm": 0.3402223587036133, "learning_rate": 1.641481659922104e-05, "loss": 0.4633, "step": 20346 }, { "epoch": 0.5586765513454146, "grad_norm": 0.39276614785194397, "learning_rate": 1.6414485271232522e-05, "loss": 0.5008, "step": 20347 }, { "epoch": 0.5587040087863812, "grad_norm": 0.36474600434303284, "learning_rate": 1.6414153931278943e-05, "loss": 0.5357, "step": 20348 }, { "epoch": 0.5587314662273476, "grad_norm": 0.3599739968776703, "learning_rate": 1.6413822579360916e-05, "loss": 0.542, "step": 20349 }, { "epoch": 0.5587589236683141, "grad_norm": 0.33788546919822693, "learning_rate": 1.6413491215479066e-05, "loss": 0.542, "step": 20350 }, { "epoch": 0.5587863811092806, "grad_norm": 0.3851885199546814, "learning_rate": 1.6413159839634005e-05, "loss": 0.5092, "step": 20351 }, { "epoch": 0.5588138385502471, "grad_norm": 0.35423871874809265, "learning_rate": 1.6412828451826357e-05, "loss": 0.4999, "step": 20352 }, { "epoch": 0.5588412959912136, "grad_norm": 0.37766820192337036, "learning_rate": 1.641249705205674e-05, "loss": 0.5341, "step": 20353 }, { "epoch": 0.5588687534321801, "grad_norm": 0.3562803566455841, "learning_rate": 1.641216564032576e-05, "loss": 0.4646, "step": 20354 }, { "epoch": 0.5588962108731467, "grad_norm": 0.4079355001449585, "learning_rate": 1.641183421663405e-05, "loss": 0.4001, "step": 20355 }, { "epoch": 0.5589236683141131, "grad_norm": 0.4961546063423157, "learning_rate": 1.6411502780982225e-05, "loss": 0.4566, "step": 20356 }, { "epoch": 0.5589511257550797, "grad_norm": 0.41032278537750244, "learning_rate": 1.6411171333370898e-05, "loss": 0.5528, "step": 20357 }, { "epoch": 0.5589785831960461, "grad_norm": 0.38336676359176636, "learning_rate": 1.641083987380069e-05, "loss": 0.517, "step": 20358 }, { "epoch": 0.5590060406370126, "grad_norm": 0.4026595652103424, "learning_rate": 1.641050840227222e-05, "loss": 0.4975, "step": 20359 }, { "epoch": 0.5590334980779791, "grad_norm": 0.38539934158325195, "learning_rate": 1.6410176918786106e-05, "loss": 0.5285, "step": 20360 }, { "epoch": 0.5590609555189456, "grad_norm": 0.4016752541065216, "learning_rate": 1.6409845423342967e-05, "loss": 0.4864, "step": 20361 }, { "epoch": 0.5590884129599122, "grad_norm": 0.38341930508613586, "learning_rate": 1.640951391594342e-05, "loss": 0.3714, "step": 20362 }, { "epoch": 0.5591158704008786, "grad_norm": 0.39866358041763306, "learning_rate": 1.6409182396588086e-05, "loss": 0.4346, "step": 20363 }, { "epoch": 0.5591433278418452, "grad_norm": 0.3741065263748169, "learning_rate": 1.6408850865277577e-05, "loss": 0.5289, "step": 20364 }, { "epoch": 0.5591707852828116, "grad_norm": 0.36073973774909973, "learning_rate": 1.640851932201252e-05, "loss": 0.4967, "step": 20365 }, { "epoch": 0.5591982427237782, "grad_norm": 0.38460129499435425, "learning_rate": 1.6408187766793524e-05, "loss": 0.4745, "step": 20366 }, { "epoch": 0.5592257001647446, "grad_norm": 0.365278035402298, "learning_rate": 1.6407856199621216e-05, "loss": 0.4322, "step": 20367 }, { "epoch": 0.5592531576057111, "grad_norm": 0.3886480927467346, "learning_rate": 1.6407524620496212e-05, "loss": 0.533, "step": 20368 }, { "epoch": 0.5592806150466777, "grad_norm": 0.37326401472091675, "learning_rate": 1.640719302941913e-05, "loss": 0.4704, "step": 20369 }, { "epoch": 0.5593080724876441, "grad_norm": 0.44183996319770813, "learning_rate": 1.6406861426390582e-05, "loss": 0.5959, "step": 20370 }, { "epoch": 0.5593355299286107, "grad_norm": 0.3419206738471985, "learning_rate": 1.64065298114112e-05, "loss": 0.4639, "step": 20371 }, { "epoch": 0.5593629873695771, "grad_norm": 0.3730214238166809, "learning_rate": 1.6406198184481587e-05, "loss": 0.5031, "step": 20372 }, { "epoch": 0.5593904448105437, "grad_norm": 0.36851269006729126, "learning_rate": 1.6405866545602377e-05, "loss": 0.4117, "step": 20373 }, { "epoch": 0.5594179022515101, "grad_norm": 0.38713768124580383, "learning_rate": 1.640553489477418e-05, "loss": 0.4534, "step": 20374 }, { "epoch": 0.5594453596924767, "grad_norm": 0.34501412510871887, "learning_rate": 1.6405203231997613e-05, "loss": 0.5184, "step": 20375 }, { "epoch": 0.5594728171334432, "grad_norm": 0.42019110918045044, "learning_rate": 1.64048715572733e-05, "loss": 0.5626, "step": 20376 }, { "epoch": 0.5595002745744097, "grad_norm": 0.3870408535003662, "learning_rate": 1.6404539870601858e-05, "loss": 0.5051, "step": 20377 }, { "epoch": 0.5595277320153762, "grad_norm": 0.4209171533584595, "learning_rate": 1.6404208171983904e-05, "loss": 0.5551, "step": 20378 }, { "epoch": 0.5595551894563426, "grad_norm": 0.3529369831085205, "learning_rate": 1.6403876461420058e-05, "loss": 0.5361, "step": 20379 }, { "epoch": 0.5595826468973092, "grad_norm": 0.37445148825645447, "learning_rate": 1.6403544738910937e-05, "loss": 0.4731, "step": 20380 }, { "epoch": 0.5596101043382756, "grad_norm": 0.38545623421669006, "learning_rate": 1.6403213004457163e-05, "loss": 0.4258, "step": 20381 }, { "epoch": 0.5596375617792422, "grad_norm": 0.40043121576309204, "learning_rate": 1.640288125805935e-05, "loss": 0.5229, "step": 20382 }, { "epoch": 0.5596650192202087, "grad_norm": 0.406954824924469, "learning_rate": 1.6402549499718124e-05, "loss": 0.5272, "step": 20383 }, { "epoch": 0.5596924766611752, "grad_norm": 0.38899990916252136, "learning_rate": 1.64022177294341e-05, "loss": 0.4918, "step": 20384 }, { "epoch": 0.5597199341021417, "grad_norm": 0.3781847655773163, "learning_rate": 1.6401885947207894e-05, "loss": 0.6125, "step": 20385 }, { "epoch": 0.5597473915431082, "grad_norm": 0.35411155223846436, "learning_rate": 1.640155415304013e-05, "loss": 0.5248, "step": 20386 }, { "epoch": 0.5597748489840747, "grad_norm": 0.40730565786361694, "learning_rate": 1.6401222346931422e-05, "loss": 0.4913, "step": 20387 }, { "epoch": 0.5598023064250411, "grad_norm": 0.4449562132358551, "learning_rate": 1.6400890528882392e-05, "loss": 0.436, "step": 20388 }, { "epoch": 0.5598297638660077, "grad_norm": 0.34323009848594666, "learning_rate": 1.640055869889366e-05, "loss": 0.4688, "step": 20389 }, { "epoch": 0.5598572213069742, "grad_norm": 0.41725197434425354, "learning_rate": 1.640022685696584e-05, "loss": 0.4817, "step": 20390 }, { "epoch": 0.5598846787479407, "grad_norm": 0.36510708928108215, "learning_rate": 1.6399895003099556e-05, "loss": 0.5198, "step": 20391 }, { "epoch": 0.5599121361889072, "grad_norm": 0.471561998128891, "learning_rate": 1.6399563137295428e-05, "loss": 0.5221, "step": 20392 }, { "epoch": 0.5599395936298737, "grad_norm": 0.42003482580184937, "learning_rate": 1.639923125955407e-05, "loss": 0.423, "step": 20393 }, { "epoch": 0.5599670510708402, "grad_norm": 0.425125390291214, "learning_rate": 1.63988993698761e-05, "loss": 0.531, "step": 20394 }, { "epoch": 0.5599945085118067, "grad_norm": 0.4303893744945526, "learning_rate": 1.639856746826214e-05, "loss": 0.531, "step": 20395 }, { "epoch": 0.5600219659527732, "grad_norm": 0.40897682309150696, "learning_rate": 1.6398235554712815e-05, "loss": 0.6042, "step": 20396 }, { "epoch": 0.5600494233937398, "grad_norm": 0.3443061411380768, "learning_rate": 1.6397903629228734e-05, "loss": 0.5019, "step": 20397 }, { "epoch": 0.5600768808347062, "grad_norm": 0.4209725558757782, "learning_rate": 1.6397571691810522e-05, "loss": 0.5717, "step": 20398 }, { "epoch": 0.5601043382756727, "grad_norm": 0.42184874415397644, "learning_rate": 1.6397239742458798e-05, "loss": 0.5397, "step": 20399 }, { "epoch": 0.5601317957166392, "grad_norm": 0.3342571258544922, "learning_rate": 1.6396907781174182e-05, "loss": 0.4405, "step": 20400 }, { "epoch": 0.5601592531576057, "grad_norm": 0.41280466318130493, "learning_rate": 1.6396575807957288e-05, "loss": 0.5489, "step": 20401 }, { "epoch": 0.5601867105985722, "grad_norm": 0.37300026416778564, "learning_rate": 1.639624382280874e-05, "loss": 0.5055, "step": 20402 }, { "epoch": 0.5602141680395387, "grad_norm": 0.33588624000549316, "learning_rate": 1.6395911825729154e-05, "loss": 0.4558, "step": 20403 }, { "epoch": 0.5602416254805053, "grad_norm": 0.37404316663742065, "learning_rate": 1.6395579816719155e-05, "loss": 0.51, "step": 20404 }, { "epoch": 0.5602690829214717, "grad_norm": 0.36189815402030945, "learning_rate": 1.6395247795779353e-05, "loss": 0.541, "step": 20405 }, { "epoch": 0.5602965403624383, "grad_norm": 0.3943541347980499, "learning_rate": 1.6394915762910377e-05, "loss": 0.5908, "step": 20406 }, { "epoch": 0.5603239978034047, "grad_norm": 0.3556414246559143, "learning_rate": 1.639458371811284e-05, "loss": 0.5195, "step": 20407 }, { "epoch": 0.5603514552443712, "grad_norm": 0.41693490743637085, "learning_rate": 1.6394251661387363e-05, "loss": 0.6013, "step": 20408 }, { "epoch": 0.5603789126853377, "grad_norm": 0.3524847626686096, "learning_rate": 1.639391959273457e-05, "loss": 0.4729, "step": 20409 }, { "epoch": 0.5604063701263042, "grad_norm": 0.35882365703582764, "learning_rate": 1.6393587512155073e-05, "loss": 0.5655, "step": 20410 }, { "epoch": 0.5604338275672708, "grad_norm": 0.4013114869594574, "learning_rate": 1.6393255419649495e-05, "loss": 0.4661, "step": 20411 }, { "epoch": 0.5604612850082372, "grad_norm": 0.3949970006942749, "learning_rate": 1.6392923315218453e-05, "loss": 0.5321, "step": 20412 }, { "epoch": 0.5604887424492038, "grad_norm": 0.3918602764606476, "learning_rate": 1.6392591198862572e-05, "loss": 0.5157, "step": 20413 }, { "epoch": 0.5605161998901702, "grad_norm": 0.40095436573028564, "learning_rate": 1.6392259070582466e-05, "loss": 0.5455, "step": 20414 }, { "epoch": 0.5605436573311368, "grad_norm": 0.37093040347099304, "learning_rate": 1.6391926930378762e-05, "loss": 0.5755, "step": 20415 }, { "epoch": 0.5605711147721032, "grad_norm": 0.33476921916007996, "learning_rate": 1.6391594778252064e-05, "loss": 0.4887, "step": 20416 }, { "epoch": 0.5605985722130697, "grad_norm": 0.38206055760383606, "learning_rate": 1.639126261420301e-05, "loss": 0.5912, "step": 20417 }, { "epoch": 0.5606260296540363, "grad_norm": 0.35230541229248047, "learning_rate": 1.6390930438232212e-05, "loss": 0.4501, "step": 20418 }, { "epoch": 0.5606534870950027, "grad_norm": 0.3683376908302307, "learning_rate": 1.6390598250340282e-05, "loss": 0.4785, "step": 20419 }, { "epoch": 0.5606809445359693, "grad_norm": 0.3759908974170685, "learning_rate": 1.639026605052785e-05, "loss": 0.547, "step": 20420 }, { "epoch": 0.5607084019769357, "grad_norm": 0.36794212460517883, "learning_rate": 1.6389933838795532e-05, "loss": 0.5454, "step": 20421 }, { "epoch": 0.5607358594179023, "grad_norm": 0.34605079889297485, "learning_rate": 1.6389601615143952e-05, "loss": 0.4995, "step": 20422 }, { "epoch": 0.5607633168588687, "grad_norm": 0.39285945892333984, "learning_rate": 1.6389269379573723e-05, "loss": 0.5574, "step": 20423 }, { "epoch": 0.5607907742998353, "grad_norm": 0.3999477028846741, "learning_rate": 1.6388937132085463e-05, "loss": 0.5773, "step": 20424 }, { "epoch": 0.5608182317408018, "grad_norm": 0.3509948253631592, "learning_rate": 1.6388604872679802e-05, "loss": 0.4419, "step": 20425 }, { "epoch": 0.5608456891817682, "grad_norm": 0.36289724707603455, "learning_rate": 1.6388272601357353e-05, "loss": 0.4979, "step": 20426 }, { "epoch": 0.5608731466227348, "grad_norm": 0.32781678438186646, "learning_rate": 1.6387940318118736e-05, "loss": 0.4255, "step": 20427 }, { "epoch": 0.5609006040637012, "grad_norm": 0.42143720388412476, "learning_rate": 1.6387608022964568e-05, "loss": 0.5306, "step": 20428 }, { "epoch": 0.5609280615046678, "grad_norm": 0.4510056674480438, "learning_rate": 1.6387275715895475e-05, "loss": 0.503, "step": 20429 }, { "epoch": 0.5609555189456342, "grad_norm": 0.38523828983306885, "learning_rate": 1.638694339691208e-05, "loss": 0.5108, "step": 20430 }, { "epoch": 0.5609829763866008, "grad_norm": 0.37688902020454407, "learning_rate": 1.6386611066014987e-05, "loss": 0.5498, "step": 20431 }, { "epoch": 0.5610104338275673, "grad_norm": 0.39533963799476624, "learning_rate": 1.638627872320483e-05, "loss": 0.5535, "step": 20432 }, { "epoch": 0.5610378912685338, "grad_norm": 0.4607431888580322, "learning_rate": 1.638594636848223e-05, "loss": 0.6839, "step": 20433 }, { "epoch": 0.5610653487095003, "grad_norm": 0.3446064293384552, "learning_rate": 1.6385614001847795e-05, "loss": 0.4959, "step": 20434 }, { "epoch": 0.5610928061504667, "grad_norm": 0.38158342242240906, "learning_rate": 1.6385281623302156e-05, "loss": 0.4809, "step": 20435 }, { "epoch": 0.5611202635914333, "grad_norm": 0.38331338763237, "learning_rate": 1.6384949232845925e-05, "loss": 0.4966, "step": 20436 }, { "epoch": 0.5611477210323997, "grad_norm": 0.3598603904247284, "learning_rate": 1.638461683047973e-05, "loss": 0.5126, "step": 20437 }, { "epoch": 0.5611751784733663, "grad_norm": 0.4226660132408142, "learning_rate": 1.6384284416204185e-05, "loss": 0.6028, "step": 20438 }, { "epoch": 0.5612026359143328, "grad_norm": 0.5422347784042358, "learning_rate": 1.6383951990019912e-05, "loss": 0.5188, "step": 20439 }, { "epoch": 0.5612300933552993, "grad_norm": 0.3989599347114563, "learning_rate": 1.638361955192753e-05, "loss": 0.5339, "step": 20440 }, { "epoch": 0.5612575507962658, "grad_norm": 0.3992835283279419, "learning_rate": 1.638328710192766e-05, "loss": 0.4268, "step": 20441 }, { "epoch": 0.5612850082372323, "grad_norm": 0.3764842450618744, "learning_rate": 1.6382954640020924e-05, "loss": 0.5273, "step": 20442 }, { "epoch": 0.5613124656781988, "grad_norm": 0.5680180788040161, "learning_rate": 1.638262216620794e-05, "loss": 0.5577, "step": 20443 }, { "epoch": 0.5613399231191653, "grad_norm": 0.38193342089653015, "learning_rate": 1.6382289680489326e-05, "loss": 0.5073, "step": 20444 }, { "epoch": 0.5613673805601318, "grad_norm": 0.40487897396087646, "learning_rate": 1.6381957182865708e-05, "loss": 0.4969, "step": 20445 }, { "epoch": 0.5613948380010984, "grad_norm": 0.4708491861820221, "learning_rate": 1.6381624673337703e-05, "loss": 0.5174, "step": 20446 }, { "epoch": 0.5614222954420648, "grad_norm": 0.41322144865989685, "learning_rate": 1.638129215190593e-05, "loss": 0.5695, "step": 20447 }, { "epoch": 0.5614497528830313, "grad_norm": 0.37356120347976685, "learning_rate": 1.638095961857101e-05, "loss": 0.5248, "step": 20448 }, { "epoch": 0.5614772103239978, "grad_norm": 0.4235539436340332, "learning_rate": 1.6380627073333566e-05, "loss": 0.6227, "step": 20449 }, { "epoch": 0.5615046677649643, "grad_norm": 0.41826963424682617, "learning_rate": 1.6380294516194214e-05, "loss": 0.5614, "step": 20450 }, { "epoch": 0.5615321252059308, "grad_norm": 0.3944030702114105, "learning_rate": 1.6379961947153576e-05, "loss": 0.4832, "step": 20451 }, { "epoch": 0.5615595826468973, "grad_norm": 0.3871837258338928, "learning_rate": 1.6379629366212273e-05, "loss": 0.4851, "step": 20452 }, { "epoch": 0.5615870400878639, "grad_norm": 0.35583120584487915, "learning_rate": 1.6379296773370927e-05, "loss": 0.4568, "step": 20453 }, { "epoch": 0.5616144975288303, "grad_norm": 0.38624107837677, "learning_rate": 1.6378964168630153e-05, "loss": 0.524, "step": 20454 }, { "epoch": 0.5616419549697969, "grad_norm": 0.3432099223136902, "learning_rate": 1.637863155199058e-05, "loss": 0.5285, "step": 20455 }, { "epoch": 0.5616694124107633, "grad_norm": 0.40188169479370117, "learning_rate": 1.637829892345282e-05, "loss": 0.486, "step": 20456 }, { "epoch": 0.5616968698517298, "grad_norm": 0.3673308193683624, "learning_rate": 1.6377966283017497e-05, "loss": 0.4796, "step": 20457 }, { "epoch": 0.5617243272926963, "grad_norm": 0.3970005512237549, "learning_rate": 1.637763363068523e-05, "loss": 0.5812, "step": 20458 }, { "epoch": 0.5617517847336628, "grad_norm": 0.3507813513278961, "learning_rate": 1.637730096645664e-05, "loss": 0.5108, "step": 20459 }, { "epoch": 0.5617792421746294, "grad_norm": 0.3389035761356354, "learning_rate": 1.6376968290332352e-05, "loss": 0.4885, "step": 20460 }, { "epoch": 0.5618066996155958, "grad_norm": 0.4083121716976166, "learning_rate": 1.6376635602312983e-05, "loss": 0.5542, "step": 20461 }, { "epoch": 0.5618341570565624, "grad_norm": 0.4159267246723175, "learning_rate": 1.637630290239915e-05, "loss": 0.5982, "step": 20462 }, { "epoch": 0.5618616144975288, "grad_norm": 0.3484916090965271, "learning_rate": 1.6375970190591478e-05, "loss": 0.606, "step": 20463 }, { "epoch": 0.5618890719384954, "grad_norm": 0.4427848160266876, "learning_rate": 1.637563746689059e-05, "loss": 0.4523, "step": 20464 }, { "epoch": 0.5619165293794618, "grad_norm": 0.35420048236846924, "learning_rate": 1.6375304731297096e-05, "loss": 0.4767, "step": 20465 }, { "epoch": 0.5619439868204283, "grad_norm": 0.46661150455474854, "learning_rate": 1.637497198381163e-05, "loss": 0.5099, "step": 20466 }, { "epoch": 0.5619714442613949, "grad_norm": 0.33800482749938965, "learning_rate": 1.6374639224434807e-05, "loss": 0.5166, "step": 20467 }, { "epoch": 0.5619989017023613, "grad_norm": 0.3861345052719116, "learning_rate": 1.6374306453167245e-05, "loss": 0.4851, "step": 20468 }, { "epoch": 0.5620263591433279, "grad_norm": 0.3708644509315491, "learning_rate": 1.637397367000957e-05, "loss": 0.5617, "step": 20469 }, { "epoch": 0.5620538165842943, "grad_norm": 0.5480740666389465, "learning_rate": 1.6373640874962393e-05, "loss": 0.5286, "step": 20470 }, { "epoch": 0.5620812740252609, "grad_norm": 0.374271035194397, "learning_rate": 1.6373308068026344e-05, "loss": 0.5005, "step": 20471 }, { "epoch": 0.5621087314662273, "grad_norm": 0.41216185688972473, "learning_rate": 1.6372975249202045e-05, "loss": 0.5153, "step": 20472 }, { "epoch": 0.5621361889071939, "grad_norm": 0.3257453441619873, "learning_rate": 1.6372642418490114e-05, "loss": 0.4802, "step": 20473 }, { "epoch": 0.5621636463481604, "grad_norm": 0.3707584738731384, "learning_rate": 1.637230957589117e-05, "loss": 0.4928, "step": 20474 }, { "epoch": 0.5621911037891268, "grad_norm": 0.4390774667263031, "learning_rate": 1.6371976721405828e-05, "loss": 0.4771, "step": 20475 }, { "epoch": 0.5622185612300934, "grad_norm": 0.3385012745857239, "learning_rate": 1.6371643855034724e-05, "loss": 0.4805, "step": 20476 }, { "epoch": 0.5622460186710598, "grad_norm": 0.41389021277427673, "learning_rate": 1.6371310976778463e-05, "loss": 0.5064, "step": 20477 }, { "epoch": 0.5622734761120264, "grad_norm": 0.3531809449195862, "learning_rate": 1.637097808663768e-05, "loss": 0.4797, "step": 20478 }, { "epoch": 0.5623009335529928, "grad_norm": 0.43921464681625366, "learning_rate": 1.6370645184612988e-05, "loss": 0.4555, "step": 20479 }, { "epoch": 0.5623283909939594, "grad_norm": 0.380204439163208, "learning_rate": 1.637031227070501e-05, "loss": 0.464, "step": 20480 }, { "epoch": 0.5623558484349259, "grad_norm": 0.3961621820926666, "learning_rate": 1.6369979344914364e-05, "loss": 0.4871, "step": 20481 }, { "epoch": 0.5623833058758924, "grad_norm": 0.3721272647380829, "learning_rate": 1.6369646407241677e-05, "loss": 0.4711, "step": 20482 }, { "epoch": 0.5624107633168589, "grad_norm": 0.3873903155326843, "learning_rate": 1.636931345768756e-05, "loss": 0.4953, "step": 20483 }, { "epoch": 0.5624382207578253, "grad_norm": 0.38575196266174316, "learning_rate": 1.6368980496252647e-05, "loss": 0.4748, "step": 20484 }, { "epoch": 0.5624656781987919, "grad_norm": 0.351106196641922, "learning_rate": 1.636864752293755e-05, "loss": 0.4359, "step": 20485 }, { "epoch": 0.5624931356397583, "grad_norm": 0.36982810497283936, "learning_rate": 1.6368314537742894e-05, "loss": 0.4639, "step": 20486 }, { "epoch": 0.5625205930807249, "grad_norm": 0.391107976436615, "learning_rate": 1.6367981540669297e-05, "loss": 0.5383, "step": 20487 }, { "epoch": 0.5625480505216913, "grad_norm": 0.35814088582992554, "learning_rate": 1.636764853171738e-05, "loss": 0.565, "step": 20488 }, { "epoch": 0.5625755079626579, "grad_norm": 0.3530321419239044, "learning_rate": 1.6367315510887772e-05, "loss": 0.5116, "step": 20489 }, { "epoch": 0.5626029654036244, "grad_norm": 0.3638050854206085, "learning_rate": 1.6366982478181085e-05, "loss": 0.5173, "step": 20490 }, { "epoch": 0.5626304228445909, "grad_norm": 0.39184877276420593, "learning_rate": 1.6366649433597945e-05, "loss": 0.5285, "step": 20491 }, { "epoch": 0.5626578802855574, "grad_norm": 0.33228856325149536, "learning_rate": 1.636631637713897e-05, "loss": 0.502, "step": 20492 }, { "epoch": 0.5626853377265238, "grad_norm": 0.37851908802986145, "learning_rate": 1.6365983308804786e-05, "loss": 0.4957, "step": 20493 }, { "epoch": 0.5627127951674904, "grad_norm": 1.2311793565750122, "learning_rate": 1.6365650228596006e-05, "loss": 0.5018, "step": 20494 }, { "epoch": 0.5627402526084568, "grad_norm": 0.37898775935173035, "learning_rate": 1.6365317136513265e-05, "loss": 0.4589, "step": 20495 }, { "epoch": 0.5627677100494234, "grad_norm": 0.39820873737335205, "learning_rate": 1.6364984032557168e-05, "loss": 0.4892, "step": 20496 }, { "epoch": 0.5627951674903899, "grad_norm": 0.3844335973262787, "learning_rate": 1.6364650916728345e-05, "loss": 0.5247, "step": 20497 }, { "epoch": 0.5628226249313564, "grad_norm": 0.4531098008155823, "learning_rate": 1.636431778902742e-05, "loss": 0.4119, "step": 20498 }, { "epoch": 0.5628500823723229, "grad_norm": 0.37701529264450073, "learning_rate": 1.6363984649455012e-05, "loss": 0.4687, "step": 20499 }, { "epoch": 0.5628775398132894, "grad_norm": 0.36235129833221436, "learning_rate": 1.636365149801174e-05, "loss": 0.5374, "step": 20500 }, { "epoch": 0.5629049972542559, "grad_norm": 0.3634245693683624, "learning_rate": 1.6363318334698224e-05, "loss": 0.5438, "step": 20501 }, { "epoch": 0.5629324546952224, "grad_norm": 0.37933874130249023, "learning_rate": 1.6362985159515093e-05, "loss": 0.5677, "step": 20502 }, { "epoch": 0.5629599121361889, "grad_norm": 0.3296794295310974, "learning_rate": 1.6362651972462963e-05, "loss": 0.428, "step": 20503 }, { "epoch": 0.5629873695771554, "grad_norm": 0.36243385076522827, "learning_rate": 1.636231877354245e-05, "loss": 0.4724, "step": 20504 }, { "epoch": 0.5630148270181219, "grad_norm": 0.420883446931839, "learning_rate": 1.636198556275419e-05, "loss": 0.5682, "step": 20505 }, { "epoch": 0.5630422844590884, "grad_norm": 0.409927636384964, "learning_rate": 1.6361652340098794e-05, "loss": 0.5514, "step": 20506 }, { "epoch": 0.5630697419000549, "grad_norm": 0.3318100869655609, "learning_rate": 1.6361319105576884e-05, "loss": 0.4381, "step": 20507 }, { "epoch": 0.5630971993410214, "grad_norm": 0.40708494186401367, "learning_rate": 1.6360985859189083e-05, "loss": 0.556, "step": 20508 }, { "epoch": 0.5631246567819879, "grad_norm": 0.3521324098110199, "learning_rate": 1.6360652600936015e-05, "loss": 0.5071, "step": 20509 }, { "epoch": 0.5631521142229544, "grad_norm": 0.39441928267478943, "learning_rate": 1.63603193308183e-05, "loss": 0.5, "step": 20510 }, { "epoch": 0.563179571663921, "grad_norm": 0.38234609365463257, "learning_rate": 1.635998604883656e-05, "loss": 0.5001, "step": 20511 }, { "epoch": 0.5632070291048874, "grad_norm": 0.38792383670806885, "learning_rate": 1.6359652754991414e-05, "loss": 0.5311, "step": 20512 }, { "epoch": 0.563234486545854, "grad_norm": 0.38032466173171997, "learning_rate": 1.635931944928349e-05, "loss": 0.4997, "step": 20513 }, { "epoch": 0.5632619439868204, "grad_norm": 0.9085074663162231, "learning_rate": 1.6358986131713402e-05, "loss": 0.5253, "step": 20514 }, { "epoch": 0.5632894014277869, "grad_norm": 0.4365297257900238, "learning_rate": 1.6358652802281776e-05, "loss": 0.5287, "step": 20515 }, { "epoch": 0.5633168588687534, "grad_norm": 0.37197962403297424, "learning_rate": 1.6358319460989232e-05, "loss": 0.4992, "step": 20516 }, { "epoch": 0.5633443163097199, "grad_norm": 0.3430827856063843, "learning_rate": 1.6357986107836395e-05, "loss": 0.458, "step": 20517 }, { "epoch": 0.5633717737506865, "grad_norm": 0.4030255675315857, "learning_rate": 1.6357652742823883e-05, "loss": 0.5169, "step": 20518 }, { "epoch": 0.5633992311916529, "grad_norm": 0.36033737659454346, "learning_rate": 1.635731936595232e-05, "loss": 0.51, "step": 20519 }, { "epoch": 0.5634266886326195, "grad_norm": 0.4218926727771759, "learning_rate": 1.635698597722233e-05, "loss": 0.5234, "step": 20520 }, { "epoch": 0.5634541460735859, "grad_norm": 0.38989686965942383, "learning_rate": 1.635665257663453e-05, "loss": 0.5357, "step": 20521 }, { "epoch": 0.5634816035145525, "grad_norm": 1.6895956993103027, "learning_rate": 1.6356319164189544e-05, "loss": 0.5343, "step": 20522 }, { "epoch": 0.5635090609555189, "grad_norm": 0.3469582498073578, "learning_rate": 1.6355985739887992e-05, "loss": 0.4968, "step": 20523 }, { "epoch": 0.5635365183964854, "grad_norm": 0.3591817021369934, "learning_rate": 1.63556523037305e-05, "loss": 0.5546, "step": 20524 }, { "epoch": 0.563563975837452, "grad_norm": 0.35904863476753235, "learning_rate": 1.6355318855717686e-05, "loss": 0.4843, "step": 20525 }, { "epoch": 0.5635914332784184, "grad_norm": 0.422787606716156, "learning_rate": 1.635498539585018e-05, "loss": 0.484, "step": 20526 }, { "epoch": 0.563618890719385, "grad_norm": 0.33982449769973755, "learning_rate": 1.6354651924128592e-05, "loss": 0.5312, "step": 20527 }, { "epoch": 0.5636463481603514, "grad_norm": 0.3451935052871704, "learning_rate": 1.635431844055355e-05, "loss": 0.4159, "step": 20528 }, { "epoch": 0.563673805601318, "grad_norm": 0.3671390414237976, "learning_rate": 1.635398494512568e-05, "loss": 0.4797, "step": 20529 }, { "epoch": 0.5637012630422844, "grad_norm": 0.43958479166030884, "learning_rate": 1.6353651437845596e-05, "loss": 0.5209, "step": 20530 }, { "epoch": 0.563728720483251, "grad_norm": 0.42482295632362366, "learning_rate": 1.6353317918713928e-05, "loss": 0.6279, "step": 20531 }, { "epoch": 0.5637561779242175, "grad_norm": 0.32556870579719543, "learning_rate": 1.6352984387731294e-05, "loss": 0.4519, "step": 20532 }, { "epoch": 0.5637836353651839, "grad_norm": 0.41658976674079895, "learning_rate": 1.6352650844898316e-05, "loss": 0.5664, "step": 20533 }, { "epoch": 0.5638110928061505, "grad_norm": 0.7431604862213135, "learning_rate": 1.6352317290215615e-05, "loss": 0.5009, "step": 20534 }, { "epoch": 0.5638385502471169, "grad_norm": 0.40580061078071594, "learning_rate": 1.6351983723683814e-05, "loss": 0.5781, "step": 20535 }, { "epoch": 0.5638660076880835, "grad_norm": 0.43296632170677185, "learning_rate": 1.6351650145303536e-05, "loss": 0.5489, "step": 20536 }, { "epoch": 0.5638934651290499, "grad_norm": 0.4112752079963684, "learning_rate": 1.6351316555075406e-05, "loss": 0.5608, "step": 20537 }, { "epoch": 0.5639209225700165, "grad_norm": 0.38435736298561096, "learning_rate": 1.6350982953000042e-05, "loss": 0.4707, "step": 20538 }, { "epoch": 0.563948380010983, "grad_norm": 0.38297969102859497, "learning_rate": 1.635064933907807e-05, "loss": 0.4393, "step": 20539 }, { "epoch": 0.5639758374519495, "grad_norm": 0.3612883687019348, "learning_rate": 1.6350315713310107e-05, "loss": 0.5467, "step": 20540 }, { "epoch": 0.564003294892916, "grad_norm": 0.36423230171203613, "learning_rate": 1.6349982075696778e-05, "loss": 0.5446, "step": 20541 }, { "epoch": 0.5640307523338824, "grad_norm": 0.4499000906944275, "learning_rate": 1.634964842623871e-05, "loss": 0.4986, "step": 20542 }, { "epoch": 0.564058209774849, "grad_norm": 0.4161536395549774, "learning_rate": 1.6349314764936516e-05, "loss": 0.4904, "step": 20543 }, { "epoch": 0.5640856672158154, "grad_norm": 0.41302576661109924, "learning_rate": 1.634898109179083e-05, "loss": 0.5273, "step": 20544 }, { "epoch": 0.564113124656782, "grad_norm": 0.40110400319099426, "learning_rate": 1.6348647406802264e-05, "loss": 0.4771, "step": 20545 }, { "epoch": 0.5641405820977485, "grad_norm": 0.39073535799980164, "learning_rate": 1.6348313709971445e-05, "loss": 0.5179, "step": 20546 }, { "epoch": 0.564168039538715, "grad_norm": 0.34682658314704895, "learning_rate": 1.6347980001298995e-05, "loss": 0.4034, "step": 20547 }, { "epoch": 0.5641954969796815, "grad_norm": 0.39446476101875305, "learning_rate": 1.6347646280785532e-05, "loss": 0.5412, "step": 20548 }, { "epoch": 0.564222954420648, "grad_norm": 0.5643588900566101, "learning_rate": 1.6347312548431686e-05, "loss": 0.4564, "step": 20549 }, { "epoch": 0.5642504118616145, "grad_norm": 0.37165629863739014, "learning_rate": 1.6346978804238076e-05, "loss": 0.424, "step": 20550 }, { "epoch": 0.564277869302581, "grad_norm": 0.3623132109642029, "learning_rate": 1.6346645048205326e-05, "loss": 0.4298, "step": 20551 }, { "epoch": 0.5643053267435475, "grad_norm": 0.4554952383041382, "learning_rate": 1.6346311280334054e-05, "loss": 0.5396, "step": 20552 }, { "epoch": 0.564332784184514, "grad_norm": 0.36759448051452637, "learning_rate": 1.634597750062489e-05, "loss": 0.4815, "step": 20553 }, { "epoch": 0.5643602416254805, "grad_norm": 0.3791283071041107, "learning_rate": 1.6345643709078452e-05, "loss": 0.4883, "step": 20554 }, { "epoch": 0.564387699066447, "grad_norm": 0.3639301657676697, "learning_rate": 1.634530990569536e-05, "loss": 0.5113, "step": 20555 }, { "epoch": 0.5644151565074135, "grad_norm": 0.34587305784225464, "learning_rate": 1.6344976090476242e-05, "loss": 0.4742, "step": 20556 }, { "epoch": 0.56444261394838, "grad_norm": 0.3502821624279022, "learning_rate": 1.634464226342172e-05, "loss": 0.4956, "step": 20557 }, { "epoch": 0.5644700713893465, "grad_norm": 0.37109315395355225, "learning_rate": 1.6344308424532412e-05, "loss": 0.4559, "step": 20558 }, { "epoch": 0.564497528830313, "grad_norm": 0.3434556722640991, "learning_rate": 1.634397457380895e-05, "loss": 0.4525, "step": 20559 }, { "epoch": 0.5645249862712796, "grad_norm": 0.42299553751945496, "learning_rate": 1.6343640711251946e-05, "loss": 0.5383, "step": 20560 }, { "epoch": 0.564552443712246, "grad_norm": 0.39741137623786926, "learning_rate": 1.6343306836862026e-05, "loss": 0.5523, "step": 20561 }, { "epoch": 0.5645799011532125, "grad_norm": 0.3998224139213562, "learning_rate": 1.6342972950639818e-05, "loss": 0.5939, "step": 20562 }, { "epoch": 0.564607358594179, "grad_norm": 0.3534794747829437, "learning_rate": 1.6342639052585938e-05, "loss": 0.4206, "step": 20563 }, { "epoch": 0.5646348160351455, "grad_norm": 0.3768831491470337, "learning_rate": 1.634230514270101e-05, "loss": 0.4989, "step": 20564 }, { "epoch": 0.564662273476112, "grad_norm": 0.4396803677082062, "learning_rate": 1.6341971220985662e-05, "loss": 0.606, "step": 20565 }, { "epoch": 0.5646897309170785, "grad_norm": 0.3371696472167969, "learning_rate": 1.6341637287440516e-05, "loss": 0.5437, "step": 20566 }, { "epoch": 0.5647171883580451, "grad_norm": 0.34939059615135193, "learning_rate": 1.634130334206619e-05, "loss": 0.4362, "step": 20567 }, { "epoch": 0.5647446457990115, "grad_norm": 0.3888326585292816, "learning_rate": 1.6340969384863304e-05, "loss": 0.5749, "step": 20568 }, { "epoch": 0.5647721032399781, "grad_norm": 0.4547484219074249, "learning_rate": 1.6340635415832492e-05, "loss": 0.5706, "step": 20569 }, { "epoch": 0.5647995606809445, "grad_norm": 0.5122742652893066, "learning_rate": 1.6340301434974372e-05, "loss": 0.52, "step": 20570 }, { "epoch": 0.564827018121911, "grad_norm": 0.42701253294944763, "learning_rate": 1.6339967442289566e-05, "loss": 0.6288, "step": 20571 }, { "epoch": 0.5648544755628775, "grad_norm": 0.37062329053878784, "learning_rate": 1.6339633437778695e-05, "loss": 0.4396, "step": 20572 }, { "epoch": 0.564881933003844, "grad_norm": 0.3864344656467438, "learning_rate": 1.6339299421442387e-05, "loss": 0.536, "step": 20573 }, { "epoch": 0.5649093904448106, "grad_norm": 0.40329092741012573, "learning_rate": 1.6338965393281258e-05, "loss": 0.4858, "step": 20574 }, { "epoch": 0.564936847885777, "grad_norm": 0.4018467962741852, "learning_rate": 1.633863135329594e-05, "loss": 0.476, "step": 20575 }, { "epoch": 0.5649643053267436, "grad_norm": 0.3691905438899994, "learning_rate": 1.6338297301487047e-05, "loss": 0.4344, "step": 20576 }, { "epoch": 0.56499176276771, "grad_norm": 0.39165663719177246, "learning_rate": 1.633796323785521e-05, "loss": 0.575, "step": 20577 }, { "epoch": 0.5650192202086766, "grad_norm": 0.35369277000427246, "learning_rate": 1.6337629162401048e-05, "loss": 0.5044, "step": 20578 }, { "epoch": 0.565046677649643, "grad_norm": 0.40925437211990356, "learning_rate": 1.6337295075125185e-05, "loss": 0.4858, "step": 20579 }, { "epoch": 0.5650741350906096, "grad_norm": 0.3648555278778076, "learning_rate": 1.6336960976028242e-05, "loss": 0.5997, "step": 20580 }, { "epoch": 0.5651015925315761, "grad_norm": 0.33511003851890564, "learning_rate": 1.6336626865110846e-05, "loss": 0.4579, "step": 20581 }, { "epoch": 0.5651290499725425, "grad_norm": 0.32862919569015503, "learning_rate": 1.6336292742373617e-05, "loss": 0.4863, "step": 20582 }, { "epoch": 0.5651565074135091, "grad_norm": 0.3548717796802521, "learning_rate": 1.633595860781718e-05, "loss": 0.4828, "step": 20583 }, { "epoch": 0.5651839648544755, "grad_norm": 0.38445577025413513, "learning_rate": 1.6335624461442163e-05, "loss": 0.5373, "step": 20584 }, { "epoch": 0.5652114222954421, "grad_norm": 0.38352179527282715, "learning_rate": 1.633529030324918e-05, "loss": 0.4574, "step": 20585 }, { "epoch": 0.5652388797364085, "grad_norm": 0.3737546503543854, "learning_rate": 1.6334956133238857e-05, "loss": 0.4499, "step": 20586 }, { "epoch": 0.5652663371773751, "grad_norm": 0.3576468825340271, "learning_rate": 1.6334621951411823e-05, "loss": 0.5772, "step": 20587 }, { "epoch": 0.5652937946183416, "grad_norm": 0.4032418429851532, "learning_rate": 1.6334287757768693e-05, "loss": 0.4726, "step": 20588 }, { "epoch": 0.565321252059308, "grad_norm": 0.4289211630821228, "learning_rate": 1.6333953552310098e-05, "loss": 0.5093, "step": 20589 }, { "epoch": 0.5653487095002746, "grad_norm": 0.4060177505016327, "learning_rate": 1.6333619335036656e-05, "loss": 0.4829, "step": 20590 }, { "epoch": 0.565376166941241, "grad_norm": 0.38711047172546387, "learning_rate": 1.6333285105948996e-05, "loss": 0.5025, "step": 20591 }, { "epoch": 0.5654036243822076, "grad_norm": 0.38262107968330383, "learning_rate": 1.6332950865047733e-05, "loss": 0.519, "step": 20592 }, { "epoch": 0.565431081823174, "grad_norm": 0.3862617611885071, "learning_rate": 1.6332616612333494e-05, "loss": 0.4674, "step": 20593 }, { "epoch": 0.5654585392641406, "grad_norm": 0.3858075737953186, "learning_rate": 1.633228234780691e-05, "loss": 0.525, "step": 20594 }, { "epoch": 0.5654859967051071, "grad_norm": 0.34345167875289917, "learning_rate": 1.6331948071468598e-05, "loss": 0.4533, "step": 20595 }, { "epoch": 0.5655134541460736, "grad_norm": 0.42987656593322754, "learning_rate": 1.6331613783319177e-05, "loss": 0.5383, "step": 20596 }, { "epoch": 0.5655409115870401, "grad_norm": 0.4039032757282257, "learning_rate": 1.633127948335928e-05, "loss": 0.6023, "step": 20597 }, { "epoch": 0.5655683690280066, "grad_norm": 0.37783902883529663, "learning_rate": 1.6330945171589525e-05, "loss": 0.4647, "step": 20598 }, { "epoch": 0.5655958264689731, "grad_norm": 0.38612523674964905, "learning_rate": 1.6330610848010535e-05, "loss": 0.53, "step": 20599 }, { "epoch": 0.5656232839099395, "grad_norm": 0.4021594226360321, "learning_rate": 1.6330276512622936e-05, "loss": 0.5054, "step": 20600 }, { "epoch": 0.5656507413509061, "grad_norm": 0.38004571199417114, "learning_rate": 1.632994216542735e-05, "loss": 0.5199, "step": 20601 }, { "epoch": 0.5656781987918726, "grad_norm": 0.38056614995002747, "learning_rate": 1.6329607806424405e-05, "loss": 0.4945, "step": 20602 }, { "epoch": 0.5657056562328391, "grad_norm": 0.33992618322372437, "learning_rate": 1.632927343561472e-05, "loss": 0.5024, "step": 20603 }, { "epoch": 0.5657331136738056, "grad_norm": 0.38081398606300354, "learning_rate": 1.6328939052998918e-05, "loss": 0.4859, "step": 20604 }, { "epoch": 0.5657605711147721, "grad_norm": 0.3655245900154114, "learning_rate": 1.6328604658577626e-05, "loss": 0.5711, "step": 20605 }, { "epoch": 0.5657880285557386, "grad_norm": 0.35513079166412354, "learning_rate": 1.6328270252351466e-05, "loss": 0.5539, "step": 20606 }, { "epoch": 0.5658154859967051, "grad_norm": 0.44321513175964355, "learning_rate": 1.6327935834321062e-05, "loss": 0.5245, "step": 20607 }, { "epoch": 0.5658429434376716, "grad_norm": 0.5778223276138306, "learning_rate": 1.6327601404487037e-05, "loss": 0.6337, "step": 20608 }, { "epoch": 0.5658704008786382, "grad_norm": 0.37304192781448364, "learning_rate": 1.6327266962850018e-05, "loss": 0.528, "step": 20609 }, { "epoch": 0.5658978583196046, "grad_norm": 0.38240474462509155, "learning_rate": 1.6326932509410624e-05, "loss": 0.5773, "step": 20610 }, { "epoch": 0.5659253157605711, "grad_norm": 0.36785581707954407, "learning_rate": 1.6326598044169487e-05, "loss": 0.4724, "step": 20611 }, { "epoch": 0.5659527732015376, "grad_norm": 0.37862733006477356, "learning_rate": 1.632626356712722e-05, "loss": 0.5163, "step": 20612 }, { "epoch": 0.5659802306425041, "grad_norm": 0.3996167480945587, "learning_rate": 1.6325929078284454e-05, "loss": 0.5266, "step": 20613 }, { "epoch": 0.5660076880834706, "grad_norm": 0.3466898798942566, "learning_rate": 1.632559457764181e-05, "loss": 0.5301, "step": 20614 }, { "epoch": 0.5660351455244371, "grad_norm": 0.3221718668937683, "learning_rate": 1.6325260065199916e-05, "loss": 0.4801, "step": 20615 }, { "epoch": 0.5660626029654037, "grad_norm": 0.35972461104393005, "learning_rate": 1.6324925540959393e-05, "loss": 0.5777, "step": 20616 }, { "epoch": 0.5660900604063701, "grad_norm": 0.36867082118988037, "learning_rate": 1.6324591004920863e-05, "loss": 0.582, "step": 20617 }, { "epoch": 0.5661175178473367, "grad_norm": 0.39707890152931213, "learning_rate": 1.6324256457084954e-05, "loss": 0.4785, "step": 20618 }, { "epoch": 0.5661449752883031, "grad_norm": 0.37865668535232544, "learning_rate": 1.6323921897452287e-05, "loss": 0.504, "step": 20619 }, { "epoch": 0.5661724327292696, "grad_norm": 0.3887976109981537, "learning_rate": 1.6323587326023488e-05, "loss": 0.4716, "step": 20620 }, { "epoch": 0.5661998901702361, "grad_norm": 0.48873403668403625, "learning_rate": 1.6323252742799182e-05, "loss": 0.5692, "step": 20621 }, { "epoch": 0.5662273476112026, "grad_norm": 0.47238636016845703, "learning_rate": 1.632291814777999e-05, "loss": 0.4266, "step": 20622 }, { "epoch": 0.5662548050521692, "grad_norm": 0.40625491738319397, "learning_rate": 1.6322583540966535e-05, "loss": 0.505, "step": 20623 }, { "epoch": 0.5662822624931356, "grad_norm": 0.39686572551727295, "learning_rate": 1.6322248922359447e-05, "loss": 0.4815, "step": 20624 }, { "epoch": 0.5663097199341022, "grad_norm": 0.36998096108436584, "learning_rate": 1.6321914291959347e-05, "loss": 0.5339, "step": 20625 }, { "epoch": 0.5663371773750686, "grad_norm": 0.3614301085472107, "learning_rate": 1.6321579649766857e-05, "loss": 0.46, "step": 20626 }, { "epoch": 0.5663646348160352, "grad_norm": 0.45861056447029114, "learning_rate": 1.6321244995782605e-05, "loss": 0.5394, "step": 20627 }, { "epoch": 0.5663920922570016, "grad_norm": 0.4457368850708008, "learning_rate": 1.6320910330007213e-05, "loss": 0.5521, "step": 20628 }, { "epoch": 0.5664195496979681, "grad_norm": 0.3619052469730377, "learning_rate": 1.6320575652441303e-05, "loss": 0.4457, "step": 20629 }, { "epoch": 0.5664470071389347, "grad_norm": 0.36880192160606384, "learning_rate": 1.6320240963085507e-05, "loss": 0.4812, "step": 20630 }, { "epoch": 0.5664744645799011, "grad_norm": 0.35267242789268494, "learning_rate": 1.6319906261940442e-05, "loss": 0.5019, "step": 20631 }, { "epoch": 0.5665019220208677, "grad_norm": 0.3671497106552124, "learning_rate": 1.6319571549006735e-05, "loss": 0.494, "step": 20632 }, { "epoch": 0.5665293794618341, "grad_norm": 0.4104071259498596, "learning_rate": 1.631923682428501e-05, "loss": 0.5459, "step": 20633 }, { "epoch": 0.5665568369028007, "grad_norm": 0.408366322517395, "learning_rate": 1.6318902087775893e-05, "loss": 0.5498, "step": 20634 }, { "epoch": 0.5665842943437671, "grad_norm": 0.36114656925201416, "learning_rate": 1.6318567339480004e-05, "loss": 0.4774, "step": 20635 }, { "epoch": 0.5666117517847337, "grad_norm": 0.3958599269390106, "learning_rate": 1.6318232579397973e-05, "loss": 0.4726, "step": 20636 }, { "epoch": 0.5666392092257002, "grad_norm": 0.3740885853767395, "learning_rate": 1.631789780753042e-05, "loss": 0.4366, "step": 20637 }, { "epoch": 0.5666666666666667, "grad_norm": 0.4239836037158966, "learning_rate": 1.6317563023877974e-05, "loss": 0.5034, "step": 20638 }, { "epoch": 0.5666941241076332, "grad_norm": 0.3563210964202881, "learning_rate": 1.6317228228441252e-05, "loss": 0.5036, "step": 20639 }, { "epoch": 0.5667215815485996, "grad_norm": 0.3894434869289398, "learning_rate": 1.6316893421220886e-05, "loss": 0.4667, "step": 20640 }, { "epoch": 0.5667490389895662, "grad_norm": 0.35611388087272644, "learning_rate": 1.63165586022175e-05, "loss": 0.4945, "step": 20641 }, { "epoch": 0.5667764964305326, "grad_norm": 0.3770473003387451, "learning_rate": 1.631622377143171e-05, "loss": 0.4952, "step": 20642 }, { "epoch": 0.5668039538714992, "grad_norm": 0.3648149371147156, "learning_rate": 1.631588892886415e-05, "loss": 0.5729, "step": 20643 }, { "epoch": 0.5668314113124657, "grad_norm": 0.6053686141967773, "learning_rate": 1.6315554074515444e-05, "loss": 0.5167, "step": 20644 }, { "epoch": 0.5668588687534322, "grad_norm": 0.4376186430454254, "learning_rate": 1.6315219208386207e-05, "loss": 0.5963, "step": 20645 }, { "epoch": 0.5668863261943987, "grad_norm": 0.3602019250392914, "learning_rate": 1.6314884330477075e-05, "loss": 0.5479, "step": 20646 }, { "epoch": 0.5669137836353652, "grad_norm": 0.37741193175315857, "learning_rate": 1.631454944078867e-05, "loss": 0.4755, "step": 20647 }, { "epoch": 0.5669412410763317, "grad_norm": 0.40227916836738586, "learning_rate": 1.6314214539321614e-05, "loss": 0.4988, "step": 20648 }, { "epoch": 0.5669686985172981, "grad_norm": 0.4117346704006195, "learning_rate": 1.631387962607653e-05, "loss": 0.5559, "step": 20649 }, { "epoch": 0.5669961559582647, "grad_norm": 0.40767139196395874, "learning_rate": 1.631354470105405e-05, "loss": 0.5064, "step": 20650 }, { "epoch": 0.5670236133992312, "grad_norm": 0.3871402442455292, "learning_rate": 1.631320976425479e-05, "loss": 0.4596, "step": 20651 }, { "epoch": 0.5670510708401977, "grad_norm": 0.3619860112667084, "learning_rate": 1.631287481567938e-05, "loss": 0.4459, "step": 20652 }, { "epoch": 0.5670785282811642, "grad_norm": 0.44328808784484863, "learning_rate": 1.6312539855328446e-05, "loss": 0.4874, "step": 20653 }, { "epoch": 0.5671059857221307, "grad_norm": 0.4399421811103821, "learning_rate": 1.6312204883202606e-05, "loss": 0.5613, "step": 20654 }, { "epoch": 0.5671334431630972, "grad_norm": 0.3754312992095947, "learning_rate": 1.631186989930249e-05, "loss": 0.4939, "step": 20655 }, { "epoch": 0.5671609006040637, "grad_norm": 0.37809258699417114, "learning_rate": 1.6311534903628722e-05, "loss": 0.486, "step": 20656 }, { "epoch": 0.5671883580450302, "grad_norm": 0.3609181046485901, "learning_rate": 1.631119989618193e-05, "loss": 0.5496, "step": 20657 }, { "epoch": 0.5672158154859968, "grad_norm": 0.3719877600669861, "learning_rate": 1.6310864876962736e-05, "loss": 0.4589, "step": 20658 }, { "epoch": 0.5672432729269632, "grad_norm": 0.3936196267604828, "learning_rate": 1.6310529845971762e-05, "loss": 0.5409, "step": 20659 }, { "epoch": 0.5672707303679297, "grad_norm": 0.35478487610816956, "learning_rate": 1.6310194803209636e-05, "loss": 0.4424, "step": 20660 }, { "epoch": 0.5672981878088962, "grad_norm": 0.3888084590435028, "learning_rate": 1.6309859748676985e-05, "loss": 0.5723, "step": 20661 }, { "epoch": 0.5673256452498627, "grad_norm": 0.37210872769355774, "learning_rate": 1.630952468237443e-05, "loss": 0.4834, "step": 20662 }, { "epoch": 0.5673531026908292, "grad_norm": 0.3810447156429291, "learning_rate": 1.6309189604302598e-05, "loss": 0.6435, "step": 20663 }, { "epoch": 0.5673805601317957, "grad_norm": 0.36711788177490234, "learning_rate": 1.6308854514462113e-05, "loss": 0.5457, "step": 20664 }, { "epoch": 0.5674080175727623, "grad_norm": 0.3771422803401947, "learning_rate": 1.6308519412853603e-05, "loss": 0.4932, "step": 20665 }, { "epoch": 0.5674354750137287, "grad_norm": 0.384708970785141, "learning_rate": 1.6308184299477688e-05, "loss": 0.4517, "step": 20666 }, { "epoch": 0.5674629324546953, "grad_norm": 0.40480321645736694, "learning_rate": 1.6307849174334997e-05, "loss": 0.5561, "step": 20667 }, { "epoch": 0.5674903898956617, "grad_norm": 0.3800649344921112, "learning_rate": 1.6307514037426155e-05, "loss": 0.5578, "step": 20668 }, { "epoch": 0.5675178473366282, "grad_norm": 0.35724785923957825, "learning_rate": 1.6307178888751785e-05, "loss": 0.4581, "step": 20669 }, { "epoch": 0.5675453047775947, "grad_norm": 0.4085018038749695, "learning_rate": 1.6306843728312513e-05, "loss": 0.4831, "step": 20670 }, { "epoch": 0.5675727622185612, "grad_norm": 0.44947561621665955, "learning_rate": 1.6306508556108967e-05, "loss": 0.4686, "step": 20671 }, { "epoch": 0.5676002196595278, "grad_norm": 0.3525388538837433, "learning_rate": 1.6306173372141766e-05, "loss": 0.5209, "step": 20672 }, { "epoch": 0.5676276771004942, "grad_norm": 0.38501447439193726, "learning_rate": 1.6305838176411545e-05, "loss": 0.4661, "step": 20673 }, { "epoch": 0.5676551345414608, "grad_norm": 0.47890838980674744, "learning_rate": 1.6305502968918915e-05, "loss": 0.5151, "step": 20674 }, { "epoch": 0.5676825919824272, "grad_norm": 0.3509874641895294, "learning_rate": 1.6305167749664515e-05, "loss": 0.563, "step": 20675 }, { "epoch": 0.5677100494233938, "grad_norm": 0.4421960115432739, "learning_rate": 1.6304832518648964e-05, "loss": 0.5555, "step": 20676 }, { "epoch": 0.5677375068643602, "grad_norm": 0.3734467327594757, "learning_rate": 1.630449727587289e-05, "loss": 0.5124, "step": 20677 }, { "epoch": 0.5677649643053267, "grad_norm": 0.37376290559768677, "learning_rate": 1.6304162021336914e-05, "loss": 0.4483, "step": 20678 }, { "epoch": 0.5677924217462933, "grad_norm": 0.36558496952056885, "learning_rate": 1.630382675504166e-05, "loss": 0.4724, "step": 20679 }, { "epoch": 0.5678198791872597, "grad_norm": 0.3931330144405365, "learning_rate": 1.6303491476987762e-05, "loss": 0.535, "step": 20680 }, { "epoch": 0.5678473366282263, "grad_norm": 0.41127002239227295, "learning_rate": 1.6303156187175843e-05, "loss": 0.5436, "step": 20681 }, { "epoch": 0.5678747940691927, "grad_norm": 0.3633127212524414, "learning_rate": 1.630282088560652e-05, "loss": 0.5502, "step": 20682 }, { "epoch": 0.5679022515101593, "grad_norm": 0.3832589089870453, "learning_rate": 1.6302485572280427e-05, "loss": 0.5613, "step": 20683 }, { "epoch": 0.5679297089511257, "grad_norm": 0.4189336895942688, "learning_rate": 1.6302150247198188e-05, "loss": 0.5514, "step": 20684 }, { "epoch": 0.5679571663920923, "grad_norm": 0.3969650864601135, "learning_rate": 1.6301814910360426e-05, "loss": 0.5791, "step": 20685 }, { "epoch": 0.5679846238330588, "grad_norm": 0.32324519753456116, "learning_rate": 1.630147956176777e-05, "loss": 0.4231, "step": 20686 }, { "epoch": 0.5680120812740252, "grad_norm": 0.3660754859447479, "learning_rate": 1.630114420142084e-05, "loss": 0.4809, "step": 20687 }, { "epoch": 0.5680395387149918, "grad_norm": 0.3757556974887848, "learning_rate": 1.630080882932027e-05, "loss": 0.4609, "step": 20688 }, { "epoch": 0.5680669961559582, "grad_norm": 0.33011165261268616, "learning_rate": 1.6300473445466676e-05, "loss": 0.4868, "step": 20689 }, { "epoch": 0.5680944535969248, "grad_norm": 0.40938350558280945, "learning_rate": 1.6300138049860687e-05, "loss": 0.5726, "step": 20690 }, { "epoch": 0.5681219110378912, "grad_norm": 0.37226852774620056, "learning_rate": 1.6299802642502932e-05, "loss": 0.4965, "step": 20691 }, { "epoch": 0.5681493684788578, "grad_norm": 0.40996435284614563, "learning_rate": 1.6299467223394035e-05, "loss": 0.4976, "step": 20692 }, { "epoch": 0.5681768259198243, "grad_norm": 0.3443029522895813, "learning_rate": 1.6299131792534623e-05, "loss": 0.4405, "step": 20693 }, { "epoch": 0.5682042833607908, "grad_norm": 0.4025702476501465, "learning_rate": 1.6298796349925316e-05, "loss": 0.421, "step": 20694 }, { "epoch": 0.5682317408017573, "grad_norm": 0.467110812664032, "learning_rate": 1.6298460895566743e-05, "loss": 0.5649, "step": 20695 }, { "epoch": 0.5682591982427238, "grad_norm": 0.3692920207977295, "learning_rate": 1.6298125429459532e-05, "loss": 0.5803, "step": 20696 }, { "epoch": 0.5682866556836903, "grad_norm": 0.32728537917137146, "learning_rate": 1.6297789951604306e-05, "loss": 0.4064, "step": 20697 }, { "epoch": 0.5683141131246567, "grad_norm": 0.36716797947883606, "learning_rate": 1.6297454462001692e-05, "loss": 0.4541, "step": 20698 }, { "epoch": 0.5683415705656233, "grad_norm": 0.3563305735588074, "learning_rate": 1.629711896065232e-05, "loss": 0.5207, "step": 20699 }, { "epoch": 0.5683690280065898, "grad_norm": 0.4711093604564667, "learning_rate": 1.6296783447556803e-05, "loss": 0.5506, "step": 20700 }, { "epoch": 0.5683964854475563, "grad_norm": 0.364092081785202, "learning_rate": 1.6296447922715782e-05, "loss": 0.4402, "step": 20701 }, { "epoch": 0.5684239428885228, "grad_norm": 0.38028061389923096, "learning_rate": 1.6296112386129873e-05, "loss": 0.4157, "step": 20702 }, { "epoch": 0.5684514003294893, "grad_norm": 0.4465927183628082, "learning_rate": 1.6295776837799704e-05, "loss": 0.4349, "step": 20703 }, { "epoch": 0.5684788577704558, "grad_norm": 0.3695653975009918, "learning_rate": 1.6295441277725907e-05, "loss": 0.499, "step": 20704 }, { "epoch": 0.5685063152114223, "grad_norm": 0.3308243453502655, "learning_rate": 1.6295105705909096e-05, "loss": 0.4501, "step": 20705 }, { "epoch": 0.5685337726523888, "grad_norm": 0.47346052527427673, "learning_rate": 1.6294770122349908e-05, "loss": 0.5306, "step": 20706 }, { "epoch": 0.5685612300933554, "grad_norm": 0.4769842028617859, "learning_rate": 1.6294434527048963e-05, "loss": 0.597, "step": 20707 }, { "epoch": 0.5685886875343218, "grad_norm": 0.3582133650779724, "learning_rate": 1.629409892000689e-05, "loss": 0.4829, "step": 20708 }, { "epoch": 0.5686161449752883, "grad_norm": 0.37460020184516907, "learning_rate": 1.629376330122431e-05, "loss": 0.5115, "step": 20709 }, { "epoch": 0.5686436024162548, "grad_norm": 0.3501301407814026, "learning_rate": 1.6293427670701856e-05, "loss": 0.503, "step": 20710 }, { "epoch": 0.5686710598572213, "grad_norm": 0.395555704832077, "learning_rate": 1.629309202844015e-05, "loss": 0.5321, "step": 20711 }, { "epoch": 0.5686985172981878, "grad_norm": 0.3623402714729309, "learning_rate": 1.629275637443982e-05, "loss": 0.5655, "step": 20712 }, { "epoch": 0.5687259747391543, "grad_norm": 0.3653925061225891, "learning_rate": 1.629242070870149e-05, "loss": 0.4906, "step": 20713 }, { "epoch": 0.5687534321801209, "grad_norm": 0.3690163493156433, "learning_rate": 1.6292085031225787e-05, "loss": 0.5121, "step": 20714 }, { "epoch": 0.5687808896210873, "grad_norm": 0.45783352851867676, "learning_rate": 1.629174934201334e-05, "loss": 0.5668, "step": 20715 }, { "epoch": 0.5688083470620539, "grad_norm": 0.3524676561355591, "learning_rate": 1.6291413641064767e-05, "loss": 0.4856, "step": 20716 }, { "epoch": 0.5688358045030203, "grad_norm": 0.35023990273475647, "learning_rate": 1.62910779283807e-05, "loss": 0.442, "step": 20717 }, { "epoch": 0.5688632619439868, "grad_norm": 0.4144151508808136, "learning_rate": 1.629074220396177e-05, "loss": 0.4952, "step": 20718 }, { "epoch": 0.5688907193849533, "grad_norm": 0.3929734528064728, "learning_rate": 1.6290406467808594e-05, "loss": 0.5873, "step": 20719 }, { "epoch": 0.5689181768259198, "grad_norm": 0.32519757747650146, "learning_rate": 1.6290070719921806e-05, "loss": 0.4721, "step": 20720 }, { "epoch": 0.5689456342668864, "grad_norm": 0.36838969588279724, "learning_rate": 1.6289734960302026e-05, "loss": 0.5232, "step": 20721 }, { "epoch": 0.5689730917078528, "grad_norm": 0.3608323037624359, "learning_rate": 1.628939918894988e-05, "loss": 0.4537, "step": 20722 }, { "epoch": 0.5690005491488194, "grad_norm": 0.37272539734840393, "learning_rate": 1.6289063405866003e-05, "loss": 0.5036, "step": 20723 }, { "epoch": 0.5690280065897858, "grad_norm": 0.41393107175827026, "learning_rate": 1.628872761105101e-05, "loss": 0.5339, "step": 20724 }, { "epoch": 0.5690554640307524, "grad_norm": 0.4032852053642273, "learning_rate": 1.6288391804505537e-05, "loss": 0.5571, "step": 20725 }, { "epoch": 0.5690829214717188, "grad_norm": 0.36378344893455505, "learning_rate": 1.6288055986230206e-05, "loss": 0.4508, "step": 20726 }, { "epoch": 0.5691103789126853, "grad_norm": 0.44989144802093506, "learning_rate": 1.628772015622564e-05, "loss": 0.4847, "step": 20727 }, { "epoch": 0.5691378363536519, "grad_norm": 0.37476539611816406, "learning_rate": 1.628738431449247e-05, "loss": 0.6091, "step": 20728 }, { "epoch": 0.5691652937946183, "grad_norm": 0.44592922925949097, "learning_rate": 1.6287048461031325e-05, "loss": 0.5321, "step": 20729 }, { "epoch": 0.5691927512355849, "grad_norm": 0.37697380781173706, "learning_rate": 1.6286712595842824e-05, "loss": 0.4531, "step": 20730 }, { "epoch": 0.5692202086765513, "grad_norm": 0.35438278317451477, "learning_rate": 1.62863767189276e-05, "loss": 0.4983, "step": 20731 }, { "epoch": 0.5692476661175179, "grad_norm": 0.3275255560874939, "learning_rate": 1.6286040830286278e-05, "loss": 0.4352, "step": 20732 }, { "epoch": 0.5692751235584843, "grad_norm": 0.395128071308136, "learning_rate": 1.628570492991948e-05, "loss": 0.4879, "step": 20733 }, { "epoch": 0.5693025809994509, "grad_norm": 0.3747974634170532, "learning_rate": 1.6285369017827838e-05, "loss": 0.5018, "step": 20734 }, { "epoch": 0.5693300384404174, "grad_norm": 0.3588474690914154, "learning_rate": 1.6285033094011978e-05, "loss": 0.4793, "step": 20735 }, { "epoch": 0.5693574958813838, "grad_norm": 0.38819620013237, "learning_rate": 1.628469715847252e-05, "loss": 0.4896, "step": 20736 }, { "epoch": 0.5693849533223504, "grad_norm": 0.36193135380744934, "learning_rate": 1.6284361211210104e-05, "loss": 0.5608, "step": 20737 }, { "epoch": 0.5694124107633168, "grad_norm": 0.37300702929496765, "learning_rate": 1.6284025252225345e-05, "loss": 0.5027, "step": 20738 }, { "epoch": 0.5694398682042834, "grad_norm": 0.4326159358024597, "learning_rate": 1.6283689281518872e-05, "loss": 0.5242, "step": 20739 }, { "epoch": 0.5694673256452498, "grad_norm": 0.36351820826530457, "learning_rate": 1.6283353299091317e-05, "loss": 0.4641, "step": 20740 }, { "epoch": 0.5694947830862164, "grad_norm": 0.39974188804626465, "learning_rate": 1.6283017304943296e-05, "loss": 0.4745, "step": 20741 }, { "epoch": 0.5695222405271829, "grad_norm": 0.3838856816291809, "learning_rate": 1.6282681299075445e-05, "loss": 0.458, "step": 20742 }, { "epoch": 0.5695496979681494, "grad_norm": 0.3757867217063904, "learning_rate": 1.6282345281488393e-05, "loss": 0.484, "step": 20743 }, { "epoch": 0.5695771554091159, "grad_norm": 0.4008026719093323, "learning_rate": 1.6282009252182758e-05, "loss": 0.5413, "step": 20744 }, { "epoch": 0.5696046128500823, "grad_norm": 0.3786238133907318, "learning_rate": 1.628167321115917e-05, "loss": 0.5206, "step": 20745 }, { "epoch": 0.5696320702910489, "grad_norm": 0.4045695662498474, "learning_rate": 1.628133715841826e-05, "loss": 0.5099, "step": 20746 }, { "epoch": 0.5696595277320153, "grad_norm": 0.32261624932289124, "learning_rate": 1.6281001093960648e-05, "loss": 0.441, "step": 20747 }, { "epoch": 0.5696869851729819, "grad_norm": 0.3629243075847626, "learning_rate": 1.6280665017786966e-05, "loss": 0.5246, "step": 20748 }, { "epoch": 0.5697144426139484, "grad_norm": 0.4072001278400421, "learning_rate": 1.628032892989784e-05, "loss": 0.5547, "step": 20749 }, { "epoch": 0.5697419000549149, "grad_norm": 0.3848189413547516, "learning_rate": 1.6279992830293894e-05, "loss": 0.5154, "step": 20750 }, { "epoch": 0.5697693574958814, "grad_norm": 0.35627439618110657, "learning_rate": 1.6279656718975757e-05, "loss": 0.482, "step": 20751 }, { "epoch": 0.5697968149368479, "grad_norm": 0.34826377034187317, "learning_rate": 1.6279320595944053e-05, "loss": 0.4264, "step": 20752 }, { "epoch": 0.5698242723778144, "grad_norm": 0.3492867052555084, "learning_rate": 1.6278984461199422e-05, "loss": 0.5389, "step": 20753 }, { "epoch": 0.5698517298187808, "grad_norm": 0.34018373489379883, "learning_rate": 1.6278648314742473e-05, "loss": 0.4602, "step": 20754 }, { "epoch": 0.5698791872597474, "grad_norm": 0.4100825786590576, "learning_rate": 1.627831215657384e-05, "loss": 0.5717, "step": 20755 }, { "epoch": 0.5699066447007138, "grad_norm": 0.40944650769233704, "learning_rate": 1.6277975986694156e-05, "loss": 0.5656, "step": 20756 }, { "epoch": 0.5699341021416804, "grad_norm": 0.49932214617729187, "learning_rate": 1.627763980510404e-05, "loss": 0.4762, "step": 20757 }, { "epoch": 0.5699615595826469, "grad_norm": 0.39093250036239624, "learning_rate": 1.6277303611804124e-05, "loss": 0.4871, "step": 20758 }, { "epoch": 0.5699890170236134, "grad_norm": 0.3415631949901581, "learning_rate": 1.627696740679503e-05, "loss": 0.4606, "step": 20759 }, { "epoch": 0.5700164744645799, "grad_norm": 0.3604094088077545, "learning_rate": 1.627663119007739e-05, "loss": 0.4841, "step": 20760 }, { "epoch": 0.5700439319055464, "grad_norm": 0.39569517970085144, "learning_rate": 1.6276294961651832e-05, "loss": 0.5709, "step": 20761 }, { "epoch": 0.5700713893465129, "grad_norm": 0.38825440406799316, "learning_rate": 1.6275958721518978e-05, "loss": 0.5436, "step": 20762 }, { "epoch": 0.5700988467874794, "grad_norm": 0.45905476808547974, "learning_rate": 1.6275622469679458e-05, "loss": 0.4511, "step": 20763 }, { "epoch": 0.5701263042284459, "grad_norm": 0.6257897615432739, "learning_rate": 1.62752862061339e-05, "loss": 0.5942, "step": 20764 }, { "epoch": 0.5701537616694125, "grad_norm": 0.3969634175300598, "learning_rate": 1.627494993088293e-05, "loss": 0.5264, "step": 20765 }, { "epoch": 0.5701812191103789, "grad_norm": 0.35166501998901367, "learning_rate": 1.6274613643927176e-05, "loss": 0.4563, "step": 20766 }, { "epoch": 0.5702086765513454, "grad_norm": 0.47065263986587524, "learning_rate": 1.6274277345267266e-05, "loss": 0.5773, "step": 20767 }, { "epoch": 0.5702361339923119, "grad_norm": 0.3358026444911957, "learning_rate": 1.6273941034903825e-05, "loss": 0.4474, "step": 20768 }, { "epoch": 0.5702635914332784, "grad_norm": 0.3949466049671173, "learning_rate": 1.6273604712837484e-05, "loss": 0.4536, "step": 20769 }, { "epoch": 0.5702910488742449, "grad_norm": 0.3889349102973938, "learning_rate": 1.627326837906886e-05, "loss": 0.6163, "step": 20770 }, { "epoch": 0.5703185063152114, "grad_norm": 0.36021649837493896, "learning_rate": 1.6272932033598597e-05, "loss": 0.4744, "step": 20771 }, { "epoch": 0.570345963756178, "grad_norm": 0.34802865982055664, "learning_rate": 1.6272595676427312e-05, "loss": 0.5055, "step": 20772 }, { "epoch": 0.5703734211971444, "grad_norm": 0.6298422813415527, "learning_rate": 1.627225930755563e-05, "loss": 0.6408, "step": 20773 }, { "epoch": 0.570400878638111, "grad_norm": 0.4019562602043152, "learning_rate": 1.6271922926984187e-05, "loss": 0.5395, "step": 20774 }, { "epoch": 0.5704283360790774, "grad_norm": 0.3766404390335083, "learning_rate": 1.6271586534713604e-05, "loss": 0.6466, "step": 20775 }, { "epoch": 0.5704557935200439, "grad_norm": 0.3403806686401367, "learning_rate": 1.627125013074451e-05, "loss": 0.4684, "step": 20776 }, { "epoch": 0.5704832509610104, "grad_norm": 0.40492501854896545, "learning_rate": 1.6270913715077536e-05, "loss": 0.4909, "step": 20777 }, { "epoch": 0.5705107084019769, "grad_norm": 0.3950720429420471, "learning_rate": 1.6270577287713304e-05, "loss": 0.5314, "step": 20778 }, { "epoch": 0.5705381658429435, "grad_norm": 0.3490827679634094, "learning_rate": 1.6270240848652442e-05, "loss": 0.4769, "step": 20779 }, { "epoch": 0.5705656232839099, "grad_norm": 0.41417697072029114, "learning_rate": 1.6269904397895583e-05, "loss": 0.4648, "step": 20780 }, { "epoch": 0.5705930807248765, "grad_norm": 0.3802061975002289, "learning_rate": 1.626956793544335e-05, "loss": 0.5899, "step": 20781 }, { "epoch": 0.5706205381658429, "grad_norm": 0.3225896656513214, "learning_rate": 1.626923146129637e-05, "loss": 0.4419, "step": 20782 }, { "epoch": 0.5706479956068095, "grad_norm": 0.3852909505367279, "learning_rate": 1.6268894975455275e-05, "loss": 0.5831, "step": 20783 }, { "epoch": 0.5706754530477759, "grad_norm": 0.4464075565338135, "learning_rate": 1.626855847792069e-05, "loss": 0.5556, "step": 20784 }, { "epoch": 0.5707029104887424, "grad_norm": 0.45898228883743286, "learning_rate": 1.626822196869324e-05, "loss": 0.5365, "step": 20785 }, { "epoch": 0.570730367929709, "grad_norm": 0.3733334243297577, "learning_rate": 1.626788544777356e-05, "loss": 0.5002, "step": 20786 }, { "epoch": 0.5707578253706754, "grad_norm": 0.437216192483902, "learning_rate": 1.6267548915162268e-05, "loss": 0.4999, "step": 20787 }, { "epoch": 0.570785282811642, "grad_norm": 0.4099981188774109, "learning_rate": 1.6267212370859998e-05, "loss": 0.4634, "step": 20788 }, { "epoch": 0.5708127402526084, "grad_norm": 0.3484162390232086, "learning_rate": 1.626687581486738e-05, "loss": 0.5316, "step": 20789 }, { "epoch": 0.570840197693575, "grad_norm": 0.37505197525024414, "learning_rate": 1.6266539247185035e-05, "loss": 0.493, "step": 20790 }, { "epoch": 0.5708676551345414, "grad_norm": 0.39721667766571045, "learning_rate": 1.62662026678136e-05, "loss": 0.5306, "step": 20791 }, { "epoch": 0.570895112575508, "grad_norm": 0.3930392563343048, "learning_rate": 1.626586607675369e-05, "loss": 0.621, "step": 20792 }, { "epoch": 0.5709225700164745, "grad_norm": 0.39057016372680664, "learning_rate": 1.6265529474005944e-05, "loss": 0.5178, "step": 20793 }, { "epoch": 0.570950027457441, "grad_norm": 0.41890037059783936, "learning_rate": 1.6265192859570983e-05, "loss": 0.4644, "step": 20794 }, { "epoch": 0.5709774848984075, "grad_norm": 0.35801297426223755, "learning_rate": 1.626485623344944e-05, "loss": 0.4881, "step": 20795 }, { "epoch": 0.5710049423393739, "grad_norm": 0.4181388020515442, "learning_rate": 1.626451959564194e-05, "loss": 0.4388, "step": 20796 }, { "epoch": 0.5710323997803405, "grad_norm": 0.3628963828086853, "learning_rate": 1.626418294614911e-05, "loss": 0.419, "step": 20797 }, { "epoch": 0.5710598572213069, "grad_norm": 0.363842248916626, "learning_rate": 1.626384628497158e-05, "loss": 0.5569, "step": 20798 }, { "epoch": 0.5710873146622735, "grad_norm": 0.4370478391647339, "learning_rate": 1.626350961210998e-05, "loss": 0.5522, "step": 20799 }, { "epoch": 0.57111477210324, "grad_norm": 0.4899080693721771, "learning_rate": 1.626317292756493e-05, "loss": 0.5712, "step": 20800 }, { "epoch": 0.5711422295442065, "grad_norm": 0.38881683349609375, "learning_rate": 1.6262836231337073e-05, "loss": 0.5661, "step": 20801 }, { "epoch": 0.571169686985173, "grad_norm": 0.34586387872695923, "learning_rate": 1.626249952342702e-05, "loss": 0.4947, "step": 20802 }, { "epoch": 0.5711971444261394, "grad_norm": 0.37428396940231323, "learning_rate": 1.626216280383541e-05, "loss": 0.4752, "step": 20803 }, { "epoch": 0.571224601867106, "grad_norm": 0.3929687738418579, "learning_rate": 1.6261826072562865e-05, "loss": 0.4334, "step": 20804 }, { "epoch": 0.5712520593080724, "grad_norm": 0.39043331146240234, "learning_rate": 1.6261489329610017e-05, "loss": 0.4999, "step": 20805 }, { "epoch": 0.571279516749039, "grad_norm": 0.4174078404903412, "learning_rate": 1.6261152574977496e-05, "loss": 0.5296, "step": 20806 }, { "epoch": 0.5713069741900055, "grad_norm": 0.36779022216796875, "learning_rate": 1.6260815808665922e-05, "loss": 0.4548, "step": 20807 }, { "epoch": 0.571334431630972, "grad_norm": 0.37761592864990234, "learning_rate": 1.6260479030675932e-05, "loss": 0.5019, "step": 20808 }, { "epoch": 0.5713618890719385, "grad_norm": 0.3448602557182312, "learning_rate": 1.6260142241008147e-05, "loss": 0.5168, "step": 20809 }, { "epoch": 0.571389346512905, "grad_norm": 0.3351631760597229, "learning_rate": 1.6259805439663202e-05, "loss": 0.4422, "step": 20810 }, { "epoch": 0.5714168039538715, "grad_norm": 0.3722843825817108, "learning_rate": 1.625946862664172e-05, "loss": 0.5829, "step": 20811 }, { "epoch": 0.571444261394838, "grad_norm": 0.3840782344341278, "learning_rate": 1.625913180194433e-05, "loss": 0.4595, "step": 20812 }, { "epoch": 0.5714717188358045, "grad_norm": 0.3596605360507965, "learning_rate": 1.6258794965571665e-05, "loss": 0.4956, "step": 20813 }, { "epoch": 0.571499176276771, "grad_norm": 0.39231932163238525, "learning_rate": 1.6258458117524346e-05, "loss": 0.5434, "step": 20814 }, { "epoch": 0.5715266337177375, "grad_norm": 0.3562372922897339, "learning_rate": 1.625812125780301e-05, "loss": 0.5049, "step": 20815 }, { "epoch": 0.571554091158704, "grad_norm": 0.4121272563934326, "learning_rate": 1.6257784386408277e-05, "loss": 0.4883, "step": 20816 }, { "epoch": 0.5715815485996705, "grad_norm": 0.40300092101097107, "learning_rate": 1.625744750334078e-05, "loss": 0.4823, "step": 20817 }, { "epoch": 0.571609006040637, "grad_norm": 0.35874757170677185, "learning_rate": 1.6257110608601146e-05, "loss": 0.4713, "step": 20818 }, { "epoch": 0.5716364634816035, "grad_norm": 0.4096626043319702, "learning_rate": 1.6256773702190004e-05, "loss": 0.5252, "step": 20819 }, { "epoch": 0.57166392092257, "grad_norm": 0.4090612828731537, "learning_rate": 1.6256436784107982e-05, "loss": 0.4493, "step": 20820 }, { "epoch": 0.5716913783635366, "grad_norm": 0.39767199754714966, "learning_rate": 1.625609985435571e-05, "loss": 0.4155, "step": 20821 }, { "epoch": 0.571718835804503, "grad_norm": 0.3441488444805145, "learning_rate": 1.6255762912933812e-05, "loss": 0.4463, "step": 20822 }, { "epoch": 0.5717462932454696, "grad_norm": 0.36611121892929077, "learning_rate": 1.625542595984292e-05, "loss": 0.4148, "step": 20823 }, { "epoch": 0.571773750686436, "grad_norm": 0.3731817305088043, "learning_rate": 1.6255088995083664e-05, "loss": 0.5233, "step": 20824 }, { "epoch": 0.5718012081274025, "grad_norm": 1.1249477863311768, "learning_rate": 1.6254752018656667e-05, "loss": 0.4851, "step": 20825 }, { "epoch": 0.571828665568369, "grad_norm": 0.44153451919555664, "learning_rate": 1.6254415030562567e-05, "loss": 0.6318, "step": 20826 }, { "epoch": 0.5718561230093355, "grad_norm": 0.5559028387069702, "learning_rate": 1.6254078030801984e-05, "loss": 0.4594, "step": 20827 }, { "epoch": 0.5718835804503021, "grad_norm": 0.397697776556015, "learning_rate": 1.625374101937555e-05, "loss": 0.5461, "step": 20828 }, { "epoch": 0.5719110378912685, "grad_norm": 0.35584378242492676, "learning_rate": 1.625340399628389e-05, "loss": 0.5161, "step": 20829 }, { "epoch": 0.5719384953322351, "grad_norm": 0.3685246706008911, "learning_rate": 1.6253066961527636e-05, "loss": 0.4804, "step": 20830 }, { "epoch": 0.5719659527732015, "grad_norm": 0.33928605914115906, "learning_rate": 1.625272991510742e-05, "loss": 0.3917, "step": 20831 }, { "epoch": 0.571993410214168, "grad_norm": 0.3365391194820404, "learning_rate": 1.6252392857023865e-05, "loss": 0.541, "step": 20832 }, { "epoch": 0.5720208676551345, "grad_norm": 0.4132090210914612, "learning_rate": 1.6252055787277602e-05, "loss": 0.5169, "step": 20833 }, { "epoch": 0.572048325096101, "grad_norm": 0.4078461229801178, "learning_rate": 1.625171870586926e-05, "loss": 0.5701, "step": 20834 }, { "epoch": 0.5720757825370676, "grad_norm": 0.3717350363731384, "learning_rate": 1.6251381612799467e-05, "loss": 0.4895, "step": 20835 }, { "epoch": 0.572103239978034, "grad_norm": 0.3987646698951721, "learning_rate": 1.6251044508068852e-05, "loss": 0.5737, "step": 20836 }, { "epoch": 0.5721306974190006, "grad_norm": 0.41768792271614075, "learning_rate": 1.6250707391678046e-05, "loss": 0.554, "step": 20837 }, { "epoch": 0.572158154859967, "grad_norm": 0.39522528648376465, "learning_rate": 1.625037026362767e-05, "loss": 0.5034, "step": 20838 }, { "epoch": 0.5721856123009336, "grad_norm": 0.3506503999233246, "learning_rate": 1.6250033123918364e-05, "loss": 0.6323, "step": 20839 }, { "epoch": 0.5722130697419, "grad_norm": 0.3963997960090637, "learning_rate": 1.624969597255075e-05, "loss": 0.4387, "step": 20840 }, { "epoch": 0.5722405271828666, "grad_norm": 0.42788976430892944, "learning_rate": 1.6249358809525457e-05, "loss": 0.5299, "step": 20841 }, { "epoch": 0.5722679846238331, "grad_norm": 0.3837425708770752, "learning_rate": 1.6249021634843117e-05, "loss": 0.4491, "step": 20842 }, { "epoch": 0.5722954420647995, "grad_norm": 0.42310038208961487, "learning_rate": 1.6248684448504354e-05, "loss": 0.5876, "step": 20843 }, { "epoch": 0.5723228995057661, "grad_norm": 3.306506395339966, "learning_rate": 1.6248347250509805e-05, "loss": 0.4376, "step": 20844 }, { "epoch": 0.5723503569467325, "grad_norm": 0.3460780084133148, "learning_rate": 1.6248010040860092e-05, "loss": 0.471, "step": 20845 }, { "epoch": 0.5723778143876991, "grad_norm": 0.3610418736934662, "learning_rate": 1.6247672819555846e-05, "loss": 0.4459, "step": 20846 }, { "epoch": 0.5724052718286655, "grad_norm": 0.3698329031467438, "learning_rate": 1.6247335586597694e-05, "loss": 0.5315, "step": 20847 }, { "epoch": 0.5724327292696321, "grad_norm": 0.3579369783401489, "learning_rate": 1.624699834198627e-05, "loss": 0.5123, "step": 20848 }, { "epoch": 0.5724601867105986, "grad_norm": 0.35882753133773804, "learning_rate": 1.6246661085722202e-05, "loss": 0.4616, "step": 20849 }, { "epoch": 0.5724876441515651, "grad_norm": 0.3345099687576294, "learning_rate": 1.6246323817806113e-05, "loss": 0.4502, "step": 20850 }, { "epoch": 0.5725151015925316, "grad_norm": 0.5213518738746643, "learning_rate": 1.6245986538238638e-05, "loss": 0.5564, "step": 20851 }, { "epoch": 0.572542559033498, "grad_norm": 0.3860699236392975, "learning_rate": 1.6245649247020405e-05, "loss": 0.496, "step": 20852 }, { "epoch": 0.5725700164744646, "grad_norm": 0.40391871333122253, "learning_rate": 1.6245311944152043e-05, "loss": 0.5427, "step": 20853 }, { "epoch": 0.572597473915431, "grad_norm": 0.3620806634426117, "learning_rate": 1.624497462963418e-05, "loss": 0.452, "step": 20854 }, { "epoch": 0.5726249313563976, "grad_norm": 0.3836316764354706, "learning_rate": 1.6244637303467444e-05, "loss": 0.4435, "step": 20855 }, { "epoch": 0.5726523887973641, "grad_norm": 0.4403822720050812, "learning_rate": 1.624429996565247e-05, "loss": 0.6015, "step": 20856 }, { "epoch": 0.5726798462383306, "grad_norm": 0.4436423182487488, "learning_rate": 1.6243962616189884e-05, "loss": 0.4471, "step": 20857 }, { "epoch": 0.5727073036792971, "grad_norm": 0.45510226488113403, "learning_rate": 1.6243625255080316e-05, "loss": 0.6406, "step": 20858 }, { "epoch": 0.5727347611202636, "grad_norm": 0.37637847661972046, "learning_rate": 1.624328788232439e-05, "loss": 0.4531, "step": 20859 }, { "epoch": 0.5727622185612301, "grad_norm": 0.3584069609642029, "learning_rate": 1.624295049792274e-05, "loss": 0.4796, "step": 20860 }, { "epoch": 0.5727896760021965, "grad_norm": 0.42730018496513367, "learning_rate": 1.6242613101875998e-05, "loss": 0.5945, "step": 20861 }, { "epoch": 0.5728171334431631, "grad_norm": 0.3859197497367859, "learning_rate": 1.6242275694184792e-05, "loss": 0.5374, "step": 20862 }, { "epoch": 0.5728445908841296, "grad_norm": 0.36612799763679504, "learning_rate": 1.6241938274849742e-05, "loss": 0.5169, "step": 20863 }, { "epoch": 0.5728720483250961, "grad_norm": 0.40852996706962585, "learning_rate": 1.624160084387149e-05, "loss": 0.4881, "step": 20864 }, { "epoch": 0.5728995057660626, "grad_norm": 0.41497281193733215, "learning_rate": 1.6241263401250656e-05, "loss": 0.492, "step": 20865 }, { "epoch": 0.5729269632070291, "grad_norm": 0.34270015358924866, "learning_rate": 1.6240925946987877e-05, "loss": 0.4324, "step": 20866 }, { "epoch": 0.5729544206479956, "grad_norm": 0.3793346881866455, "learning_rate": 1.6240588481083783e-05, "loss": 0.5018, "step": 20867 }, { "epoch": 0.5729818780889621, "grad_norm": 0.4204308092594147, "learning_rate": 1.6240251003538995e-05, "loss": 0.5883, "step": 20868 }, { "epoch": 0.5730093355299286, "grad_norm": 0.36468371748924255, "learning_rate": 1.623991351435415e-05, "loss": 0.5228, "step": 20869 }, { "epoch": 0.5730367929708952, "grad_norm": 0.366685152053833, "learning_rate": 1.623957601352987e-05, "loss": 0.4966, "step": 20870 }, { "epoch": 0.5730642504118616, "grad_norm": 0.38735780119895935, "learning_rate": 1.623923850106679e-05, "loss": 0.5082, "step": 20871 }, { "epoch": 0.5730917078528281, "grad_norm": 0.3310830891132355, "learning_rate": 1.6238900976965543e-05, "loss": 0.4612, "step": 20872 }, { "epoch": 0.5731191652937946, "grad_norm": 0.3623993396759033, "learning_rate": 1.6238563441226753e-05, "loss": 0.5521, "step": 20873 }, { "epoch": 0.5731466227347611, "grad_norm": 0.39407986402511597, "learning_rate": 1.623822589385105e-05, "loss": 0.4995, "step": 20874 }, { "epoch": 0.5731740801757276, "grad_norm": 0.47070375084877014, "learning_rate": 1.6237888334839066e-05, "loss": 0.4838, "step": 20875 }, { "epoch": 0.5732015376166941, "grad_norm": 0.37321561574935913, "learning_rate": 1.6237550764191426e-05, "loss": 0.4633, "step": 20876 }, { "epoch": 0.5732289950576607, "grad_norm": 0.37014269828796387, "learning_rate": 1.6237213181908765e-05, "loss": 0.5358, "step": 20877 }, { "epoch": 0.5732564524986271, "grad_norm": 0.37294018268585205, "learning_rate": 1.6236875587991714e-05, "loss": 0.428, "step": 20878 }, { "epoch": 0.5732839099395937, "grad_norm": 0.3668651580810547, "learning_rate": 1.6236537982440895e-05, "loss": 0.4605, "step": 20879 }, { "epoch": 0.5733113673805601, "grad_norm": 0.39758241176605225, "learning_rate": 1.6236200365256943e-05, "loss": 0.4552, "step": 20880 }, { "epoch": 0.5733388248215266, "grad_norm": 0.40630170702934265, "learning_rate": 1.6235862736440488e-05, "loss": 0.5292, "step": 20881 }, { "epoch": 0.5733662822624931, "grad_norm": 0.34289655089378357, "learning_rate": 1.623552509599216e-05, "loss": 0.4709, "step": 20882 }, { "epoch": 0.5733937397034596, "grad_norm": 0.3872988522052765, "learning_rate": 1.6235187443912584e-05, "loss": 0.5593, "step": 20883 }, { "epoch": 0.5734211971444262, "grad_norm": 0.40005964040756226, "learning_rate": 1.62348497802024e-05, "loss": 0.5476, "step": 20884 }, { "epoch": 0.5734486545853926, "grad_norm": 0.4384595453739166, "learning_rate": 1.6234512104862226e-05, "loss": 0.5195, "step": 20885 }, { "epoch": 0.5734761120263592, "grad_norm": 0.4018765985965729, "learning_rate": 1.6234174417892697e-05, "loss": 0.5077, "step": 20886 }, { "epoch": 0.5735035694673256, "grad_norm": 0.3703628182411194, "learning_rate": 1.6233836719294445e-05, "loss": 0.5445, "step": 20887 }, { "epoch": 0.5735310269082922, "grad_norm": 0.363452285528183, "learning_rate": 1.6233499009068094e-05, "loss": 0.4546, "step": 20888 }, { "epoch": 0.5735584843492586, "grad_norm": 0.477959007024765, "learning_rate": 1.6233161287214282e-05, "loss": 0.517, "step": 20889 }, { "epoch": 0.5735859417902252, "grad_norm": 0.396270215511322, "learning_rate": 1.6232823553733635e-05, "loss": 0.4023, "step": 20890 }, { "epoch": 0.5736133992311917, "grad_norm": 0.3235037326812744, "learning_rate": 1.623248580862678e-05, "loss": 0.3914, "step": 20891 }, { "epoch": 0.5736408566721581, "grad_norm": 0.42866232991218567, "learning_rate": 1.6232148051894352e-05, "loss": 0.5046, "step": 20892 }, { "epoch": 0.5736683141131247, "grad_norm": 1.563644289970398, "learning_rate": 1.6231810283536974e-05, "loss": 0.4843, "step": 20893 }, { "epoch": 0.5736957715540911, "grad_norm": 0.3844318985939026, "learning_rate": 1.623147250355529e-05, "loss": 0.5123, "step": 20894 }, { "epoch": 0.5737232289950577, "grad_norm": 0.39729180932044983, "learning_rate": 1.6231134711949912e-05, "loss": 0.5683, "step": 20895 }, { "epoch": 0.5737506864360241, "grad_norm": 0.4076063632965088, "learning_rate": 1.6230796908721482e-05, "loss": 0.5382, "step": 20896 }, { "epoch": 0.5737781438769907, "grad_norm": 0.41465601325035095, "learning_rate": 1.623045909387063e-05, "loss": 0.4914, "step": 20897 }, { "epoch": 0.5738056013179572, "grad_norm": 0.3559700846672058, "learning_rate": 1.623012126739798e-05, "loss": 0.5096, "step": 20898 }, { "epoch": 0.5738330587589237, "grad_norm": 0.4624376893043518, "learning_rate": 1.6229783429304167e-05, "loss": 0.468, "step": 20899 }, { "epoch": 0.5738605161998902, "grad_norm": 0.3423447608947754, "learning_rate": 1.6229445579589816e-05, "loss": 0.4798, "step": 20900 }, { "epoch": 0.5738879736408566, "grad_norm": 0.38177987933158875, "learning_rate": 1.6229107718255566e-05, "loss": 0.5044, "step": 20901 }, { "epoch": 0.5739154310818232, "grad_norm": 0.39088937640190125, "learning_rate": 1.622876984530204e-05, "loss": 0.4487, "step": 20902 }, { "epoch": 0.5739428885227896, "grad_norm": 0.3567960858345032, "learning_rate": 1.6228431960729868e-05, "loss": 0.4918, "step": 20903 }, { "epoch": 0.5739703459637562, "grad_norm": 0.38986262679100037, "learning_rate": 1.6228094064539687e-05, "loss": 0.5044, "step": 20904 }, { "epoch": 0.5739978034047227, "grad_norm": 0.3946370780467987, "learning_rate": 1.622775615673212e-05, "loss": 0.5157, "step": 20905 }, { "epoch": 0.5740252608456892, "grad_norm": 0.3690735399723053, "learning_rate": 1.6227418237307802e-05, "loss": 0.4896, "step": 20906 }, { "epoch": 0.5740527182866557, "grad_norm": 0.38739410042762756, "learning_rate": 1.622708030626736e-05, "loss": 0.5535, "step": 20907 }, { "epoch": 0.5740801757276222, "grad_norm": 0.3946773111820221, "learning_rate": 1.6226742363611424e-05, "loss": 0.5047, "step": 20908 }, { "epoch": 0.5741076331685887, "grad_norm": 0.39362606406211853, "learning_rate": 1.6226404409340627e-05, "loss": 0.5301, "step": 20909 }, { "epoch": 0.5741350906095551, "grad_norm": 0.35539260506629944, "learning_rate": 1.62260664434556e-05, "loss": 0.4427, "step": 20910 }, { "epoch": 0.5741625480505217, "grad_norm": 0.39691585302352905, "learning_rate": 1.622572846595697e-05, "loss": 0.5408, "step": 20911 }, { "epoch": 0.5741900054914882, "grad_norm": 0.40126833319664, "learning_rate": 1.622539047684537e-05, "loss": 0.5192, "step": 20912 }, { "epoch": 0.5742174629324547, "grad_norm": 0.4018343687057495, "learning_rate": 1.622505247612143e-05, "loss": 0.5073, "step": 20913 }, { "epoch": 0.5742449203734212, "grad_norm": 0.362333744764328, "learning_rate": 1.622471446378578e-05, "loss": 0.4694, "step": 20914 }, { "epoch": 0.5742723778143877, "grad_norm": 0.36925196647644043, "learning_rate": 1.6224376439839057e-05, "loss": 0.5072, "step": 20915 }, { "epoch": 0.5742998352553542, "grad_norm": 0.392006516456604, "learning_rate": 1.622403840428188e-05, "loss": 0.5588, "step": 20916 }, { "epoch": 0.5743272926963207, "grad_norm": 0.34326276183128357, "learning_rate": 1.6223700357114883e-05, "loss": 0.3995, "step": 20917 }, { "epoch": 0.5743547501372872, "grad_norm": 1.1203263998031616, "learning_rate": 1.62233622983387e-05, "loss": 0.5348, "step": 20918 }, { "epoch": 0.5743822075782538, "grad_norm": 0.3369385898113251, "learning_rate": 1.622302422795396e-05, "loss": 0.4114, "step": 20919 }, { "epoch": 0.5744096650192202, "grad_norm": 0.38501250743865967, "learning_rate": 1.622268614596129e-05, "loss": 0.4154, "step": 20920 }, { "epoch": 0.5744371224601867, "grad_norm": 0.4021569490432739, "learning_rate": 1.622234805236133e-05, "loss": 0.5231, "step": 20921 }, { "epoch": 0.5744645799011532, "grad_norm": 0.36398550868034363, "learning_rate": 1.6222009947154704e-05, "loss": 0.5018, "step": 20922 }, { "epoch": 0.5744920373421197, "grad_norm": 0.34999579191207886, "learning_rate": 1.6221671830342046e-05, "loss": 0.5464, "step": 20923 }, { "epoch": 0.5745194947830862, "grad_norm": 0.3825182616710663, "learning_rate": 1.622133370192398e-05, "loss": 0.5473, "step": 20924 }, { "epoch": 0.5745469522240527, "grad_norm": 0.3554196059703827, "learning_rate": 1.622099556190114e-05, "loss": 0.4717, "step": 20925 }, { "epoch": 0.5745744096650193, "grad_norm": 0.451681524515152, "learning_rate": 1.622065741027416e-05, "loss": 0.5604, "step": 20926 }, { "epoch": 0.5746018671059857, "grad_norm": 0.34658780694007874, "learning_rate": 1.622031924704367e-05, "loss": 0.4499, "step": 20927 }, { "epoch": 0.5746293245469523, "grad_norm": 0.38811370730400085, "learning_rate": 1.62199810722103e-05, "loss": 0.5206, "step": 20928 }, { "epoch": 0.5746567819879187, "grad_norm": 0.3392069637775421, "learning_rate": 1.6219642885774677e-05, "loss": 0.5029, "step": 20929 }, { "epoch": 0.5746842394288852, "grad_norm": 0.5031186938285828, "learning_rate": 1.6219304687737434e-05, "loss": 0.665, "step": 20930 }, { "epoch": 0.5747116968698517, "grad_norm": 0.3625243306159973, "learning_rate": 1.6218966478099204e-05, "loss": 0.401, "step": 20931 }, { "epoch": 0.5747391543108182, "grad_norm": 0.38520389795303345, "learning_rate": 1.621862825686062e-05, "loss": 0.4656, "step": 20932 }, { "epoch": 0.5747666117517848, "grad_norm": 0.5292840600013733, "learning_rate": 1.6218290024022305e-05, "loss": 0.582, "step": 20933 }, { "epoch": 0.5747940691927512, "grad_norm": 0.3592855632305145, "learning_rate": 1.62179517795849e-05, "loss": 0.439, "step": 20934 }, { "epoch": 0.5748215266337178, "grad_norm": 0.41490501165390015, "learning_rate": 1.6217613523549026e-05, "loss": 0.5182, "step": 20935 }, { "epoch": 0.5748489840746842, "grad_norm": 0.39287495613098145, "learning_rate": 1.6217275255915318e-05, "loss": 0.466, "step": 20936 }, { "epoch": 0.5748764415156508, "grad_norm": 0.40467017889022827, "learning_rate": 1.621693697668441e-05, "loss": 0.4536, "step": 20937 }, { "epoch": 0.5749038989566172, "grad_norm": 0.370430052280426, "learning_rate": 1.6216598685856927e-05, "loss": 0.5371, "step": 20938 }, { "epoch": 0.5749313563975837, "grad_norm": 0.4402439296245575, "learning_rate": 1.6216260383433506e-05, "loss": 0.5273, "step": 20939 }, { "epoch": 0.5749588138385503, "grad_norm": 0.41700252890586853, "learning_rate": 1.6215922069414772e-05, "loss": 0.4763, "step": 20940 }, { "epoch": 0.5749862712795167, "grad_norm": 0.42033445835113525, "learning_rate": 1.621558374380136e-05, "loss": 0.5222, "step": 20941 }, { "epoch": 0.5750137287204833, "grad_norm": 0.40189656615257263, "learning_rate": 1.6215245406593907e-05, "loss": 0.5572, "step": 20942 }, { "epoch": 0.5750411861614497, "grad_norm": 0.3530421555042267, "learning_rate": 1.6214907057793033e-05, "loss": 0.473, "step": 20943 }, { "epoch": 0.5750686436024163, "grad_norm": 0.4205865263938904, "learning_rate": 1.6214568697399374e-05, "loss": 0.4832, "step": 20944 }, { "epoch": 0.5750961010433827, "grad_norm": 0.3558129072189331, "learning_rate": 1.6214230325413558e-05, "loss": 0.4942, "step": 20945 }, { "epoch": 0.5751235584843493, "grad_norm": 0.3731186091899872, "learning_rate": 1.6213891941836225e-05, "loss": 0.4753, "step": 20946 }, { "epoch": 0.5751510159253158, "grad_norm": 0.3688884675502777, "learning_rate": 1.6213553546667995e-05, "loss": 0.5166, "step": 20947 }, { "epoch": 0.5751784733662823, "grad_norm": 0.44873446226119995, "learning_rate": 1.6213215139909508e-05, "loss": 0.5124, "step": 20948 }, { "epoch": 0.5752059308072488, "grad_norm": 0.3517833352088928, "learning_rate": 1.621287672156139e-05, "loss": 0.4566, "step": 20949 }, { "epoch": 0.5752333882482152, "grad_norm": 0.42841485142707825, "learning_rate": 1.6212538291624274e-05, "loss": 0.4801, "step": 20950 }, { "epoch": 0.5752608456891818, "grad_norm": 0.32258570194244385, "learning_rate": 1.621219985009879e-05, "loss": 0.5041, "step": 20951 }, { "epoch": 0.5752883031301482, "grad_norm": 0.3912141025066376, "learning_rate": 1.6211861396985572e-05, "loss": 0.525, "step": 20952 }, { "epoch": 0.5753157605711148, "grad_norm": 0.37746307253837585, "learning_rate": 1.621152293228525e-05, "loss": 0.5511, "step": 20953 }, { "epoch": 0.5753432180120813, "grad_norm": 0.35619834065437317, "learning_rate": 1.6211184455998457e-05, "loss": 0.477, "step": 20954 }, { "epoch": 0.5753706754530478, "grad_norm": 0.35199007391929626, "learning_rate": 1.621084596812582e-05, "loss": 0.4749, "step": 20955 }, { "epoch": 0.5753981328940143, "grad_norm": 0.4098707139492035, "learning_rate": 1.6210507468667974e-05, "loss": 0.5146, "step": 20956 }, { "epoch": 0.5754255903349808, "grad_norm": 0.375472754240036, "learning_rate": 1.621016895762555e-05, "loss": 0.5561, "step": 20957 }, { "epoch": 0.5754530477759473, "grad_norm": 0.636448085308075, "learning_rate": 1.6209830434999178e-05, "loss": 0.594, "step": 20958 }, { "epoch": 0.5754805052169137, "grad_norm": 0.41762927174568176, "learning_rate": 1.620949190078949e-05, "loss": 0.5451, "step": 20959 }, { "epoch": 0.5755079626578803, "grad_norm": 0.38104188442230225, "learning_rate": 1.6209153354997118e-05, "loss": 0.4831, "step": 20960 }, { "epoch": 0.5755354200988468, "grad_norm": 0.4120206832885742, "learning_rate": 1.6208814797622695e-05, "loss": 0.511, "step": 20961 }, { "epoch": 0.5755628775398133, "grad_norm": 0.3670699894428253, "learning_rate": 1.620847622866685e-05, "loss": 0.5287, "step": 20962 }, { "epoch": 0.5755903349807798, "grad_norm": 0.3656221330165863, "learning_rate": 1.620813764813021e-05, "loss": 0.4572, "step": 20963 }, { "epoch": 0.5756177924217463, "grad_norm": 0.4117569923400879, "learning_rate": 1.620779905601342e-05, "loss": 0.4497, "step": 20964 }, { "epoch": 0.5756452498627128, "grad_norm": 0.34083014726638794, "learning_rate": 1.6207460452317102e-05, "loss": 0.5295, "step": 20965 }, { "epoch": 0.5756727073036793, "grad_norm": 0.860584020614624, "learning_rate": 1.6207121837041882e-05, "loss": 0.3959, "step": 20966 }, { "epoch": 0.5757001647446458, "grad_norm": 0.3538675606250763, "learning_rate": 1.6206783210188406e-05, "loss": 0.5081, "step": 20967 }, { "epoch": 0.5757276221856124, "grad_norm": 0.3399595320224762, "learning_rate": 1.6206444571757297e-05, "loss": 0.4543, "step": 20968 }, { "epoch": 0.5757550796265788, "grad_norm": 0.3452746570110321, "learning_rate": 1.6206105921749186e-05, "loss": 0.4283, "step": 20969 }, { "epoch": 0.5757825370675453, "grad_norm": 0.34506797790527344, "learning_rate": 1.620576726016471e-05, "loss": 0.4694, "step": 20970 }, { "epoch": 0.5758099945085118, "grad_norm": 0.42218878865242004, "learning_rate": 1.6205428587004494e-05, "loss": 0.6049, "step": 20971 }, { "epoch": 0.5758374519494783, "grad_norm": 0.6600610613822937, "learning_rate": 1.6205089902269174e-05, "loss": 0.5867, "step": 20972 }, { "epoch": 0.5758649093904448, "grad_norm": 0.4079875946044922, "learning_rate": 1.6204751205959382e-05, "loss": 0.5122, "step": 20973 }, { "epoch": 0.5758923668314113, "grad_norm": 0.4054216146469116, "learning_rate": 1.620441249807575e-05, "loss": 0.4784, "step": 20974 }, { "epoch": 0.5759198242723779, "grad_norm": 0.330305814743042, "learning_rate": 1.62040737786189e-05, "loss": 0.4496, "step": 20975 }, { "epoch": 0.5759472817133443, "grad_norm": 0.38914692401885986, "learning_rate": 1.620373504758948e-05, "loss": 0.4724, "step": 20976 }, { "epoch": 0.5759747391543109, "grad_norm": 0.36408349871635437, "learning_rate": 1.6203396304988115e-05, "loss": 0.5023, "step": 20977 }, { "epoch": 0.5760021965952773, "grad_norm": 0.38993039727211, "learning_rate": 1.6203057550815432e-05, "loss": 0.5167, "step": 20978 }, { "epoch": 0.5760296540362438, "grad_norm": 0.6605259776115417, "learning_rate": 1.620271878507207e-05, "loss": 0.5387, "step": 20979 }, { "epoch": 0.5760571114772103, "grad_norm": 0.40047940611839294, "learning_rate": 1.6202380007758657e-05, "loss": 0.4737, "step": 20980 }, { "epoch": 0.5760845689181768, "grad_norm": 0.40364524722099304, "learning_rate": 1.6202041218875825e-05, "loss": 0.4898, "step": 20981 }, { "epoch": 0.5761120263591434, "grad_norm": 0.39270633459091187, "learning_rate": 1.6201702418424206e-05, "loss": 0.4992, "step": 20982 }, { "epoch": 0.5761394838001098, "grad_norm": 0.38883131742477417, "learning_rate": 1.6201363606404435e-05, "loss": 0.5466, "step": 20983 }, { "epoch": 0.5761669412410764, "grad_norm": 0.35050728917121887, "learning_rate": 1.620102478281714e-05, "loss": 0.486, "step": 20984 }, { "epoch": 0.5761943986820428, "grad_norm": 0.3443639874458313, "learning_rate": 1.6200685947662953e-05, "loss": 0.4771, "step": 20985 }, { "epoch": 0.5762218561230094, "grad_norm": 0.41249746084213257, "learning_rate": 1.6200347100942512e-05, "loss": 0.5198, "step": 20986 }, { "epoch": 0.5762493135639758, "grad_norm": 0.38163426518440247, "learning_rate": 1.620000824265644e-05, "loss": 0.4926, "step": 20987 }, { "epoch": 0.5762767710049423, "grad_norm": 0.498576819896698, "learning_rate": 1.6199669372805375e-05, "loss": 0.4822, "step": 20988 }, { "epoch": 0.5763042284459089, "grad_norm": 0.39927858114242554, "learning_rate": 1.619933049138995e-05, "loss": 0.5195, "step": 20989 }, { "epoch": 0.5763316858868753, "grad_norm": 0.3743223547935486, "learning_rate": 1.6198991598410794e-05, "loss": 0.502, "step": 20990 }, { "epoch": 0.5763591433278419, "grad_norm": 0.34389349818229675, "learning_rate": 1.619865269386854e-05, "loss": 0.4954, "step": 20991 }, { "epoch": 0.5763866007688083, "grad_norm": 0.3621140420436859, "learning_rate": 1.619831377776382e-05, "loss": 0.4838, "step": 20992 }, { "epoch": 0.5764140582097749, "grad_norm": 0.38879209756851196, "learning_rate": 1.6197974850097267e-05, "loss": 0.5618, "step": 20993 }, { "epoch": 0.5764415156507413, "grad_norm": 0.3893915116786957, "learning_rate": 1.6197635910869512e-05, "loss": 0.5639, "step": 20994 }, { "epoch": 0.5764689730917079, "grad_norm": 0.38905787467956543, "learning_rate": 1.6197296960081187e-05, "loss": 0.4451, "step": 20995 }, { "epoch": 0.5764964305326744, "grad_norm": 0.43307432532310486, "learning_rate": 1.619695799773293e-05, "loss": 0.4706, "step": 20996 }, { "epoch": 0.5765238879736408, "grad_norm": 0.3920542895793915, "learning_rate": 1.6196619023825365e-05, "loss": 0.5303, "step": 20997 }, { "epoch": 0.5765513454146074, "grad_norm": 0.33225229382514954, "learning_rate": 1.6196280038359125e-05, "loss": 0.4674, "step": 20998 }, { "epoch": 0.5765788028555738, "grad_norm": 0.40328145027160645, "learning_rate": 1.6195941041334853e-05, "loss": 0.4806, "step": 20999 }, { "epoch": 0.5766062602965404, "grad_norm": 0.36586275696754456, "learning_rate": 1.6195602032753165e-05, "loss": 0.5511, "step": 21000 }, { "epoch": 0.5766337177375068, "grad_norm": 0.36848708987236023, "learning_rate": 1.6195263012614705e-05, "loss": 0.4804, "step": 21001 }, { "epoch": 0.5766611751784734, "grad_norm": 0.3625204265117645, "learning_rate": 1.6194923980920104e-05, "loss": 0.475, "step": 21002 }, { "epoch": 0.5766886326194399, "grad_norm": 0.43720880150794983, "learning_rate": 1.619458493766999e-05, "loss": 0.5495, "step": 21003 }, { "epoch": 0.5767160900604064, "grad_norm": 0.3983374536037445, "learning_rate": 1.6194245882865e-05, "loss": 0.4801, "step": 21004 }, { "epoch": 0.5767435475013729, "grad_norm": 0.34298333525657654, "learning_rate": 1.6193906816505765e-05, "loss": 0.5039, "step": 21005 }, { "epoch": 0.5767710049423393, "grad_norm": 0.36116135120391846, "learning_rate": 1.6193567738592916e-05, "loss": 0.4947, "step": 21006 }, { "epoch": 0.5767984623833059, "grad_norm": 0.40506628155708313, "learning_rate": 1.6193228649127087e-05, "loss": 0.5664, "step": 21007 }, { "epoch": 0.5768259198242723, "grad_norm": 0.42807918787002563, "learning_rate": 1.6192889548108907e-05, "loss": 0.4915, "step": 21008 }, { "epoch": 0.5768533772652389, "grad_norm": 0.38928791880607605, "learning_rate": 1.6192550435539013e-05, "loss": 0.4908, "step": 21009 }, { "epoch": 0.5768808347062054, "grad_norm": 0.35756006836891174, "learning_rate": 1.6192211311418037e-05, "loss": 0.4533, "step": 21010 }, { "epoch": 0.5769082921471719, "grad_norm": 0.6645637154579163, "learning_rate": 1.619187217574661e-05, "loss": 0.5124, "step": 21011 }, { "epoch": 0.5769357495881384, "grad_norm": 0.3633092939853668, "learning_rate": 1.619153302852537e-05, "loss": 0.4427, "step": 21012 }, { "epoch": 0.5769632070291049, "grad_norm": 0.4048064947128296, "learning_rate": 1.6191193869754934e-05, "loss": 0.5095, "step": 21013 }, { "epoch": 0.5769906644700714, "grad_norm": 0.3969387710094452, "learning_rate": 1.6190854699435954e-05, "loss": 0.4429, "step": 21014 }, { "epoch": 0.5770181219110379, "grad_norm": 0.4186748266220093, "learning_rate": 1.6190515517569048e-05, "loss": 0.4779, "step": 21015 }, { "epoch": 0.5770455793520044, "grad_norm": 0.3775739073753357, "learning_rate": 1.619017632415486e-05, "loss": 0.4971, "step": 21016 }, { "epoch": 0.577073036792971, "grad_norm": 0.3960813581943512, "learning_rate": 1.618983711919402e-05, "loss": 0.4947, "step": 21017 }, { "epoch": 0.5771004942339374, "grad_norm": 0.43048200011253357, "learning_rate": 1.618949790268715e-05, "loss": 0.5819, "step": 21018 }, { "epoch": 0.5771279516749039, "grad_norm": 0.3707483410835266, "learning_rate": 1.6189158674634894e-05, "loss": 0.4882, "step": 21019 }, { "epoch": 0.5771554091158704, "grad_norm": 0.419536828994751, "learning_rate": 1.6188819435037882e-05, "loss": 0.527, "step": 21020 }, { "epoch": 0.5771828665568369, "grad_norm": 0.37728551030158997, "learning_rate": 1.618848018389675e-05, "loss": 0.4705, "step": 21021 }, { "epoch": 0.5772103239978034, "grad_norm": 0.38708680868148804, "learning_rate": 1.6188140921212126e-05, "loss": 0.574, "step": 21022 }, { "epoch": 0.5772377814387699, "grad_norm": 0.36911657452583313, "learning_rate": 1.618780164698464e-05, "loss": 0.48, "step": 21023 }, { "epoch": 0.5772652388797364, "grad_norm": 0.3945554792881012, "learning_rate": 1.6187462361214934e-05, "loss": 0.5436, "step": 21024 }, { "epoch": 0.5772926963207029, "grad_norm": 0.42925116419792175, "learning_rate": 1.618712306390363e-05, "loss": 0.5696, "step": 21025 }, { "epoch": 0.5773201537616695, "grad_norm": 0.3835682272911072, "learning_rate": 1.6186783755051375e-05, "loss": 0.5289, "step": 21026 }, { "epoch": 0.5773476112026359, "grad_norm": 0.3795356750488281, "learning_rate": 1.6186444434658787e-05, "loss": 0.483, "step": 21027 }, { "epoch": 0.5773750686436024, "grad_norm": 0.3714221119880676, "learning_rate": 1.6186105102726507e-05, "loss": 0.5668, "step": 21028 }, { "epoch": 0.5774025260845689, "grad_norm": 0.46108782291412354, "learning_rate": 1.6185765759255168e-05, "loss": 0.4929, "step": 21029 }, { "epoch": 0.5774299835255354, "grad_norm": 0.3710031509399414, "learning_rate": 1.61854264042454e-05, "loss": 0.4736, "step": 21030 }, { "epoch": 0.5774574409665019, "grad_norm": 0.4024684727191925, "learning_rate": 1.6185087037697838e-05, "loss": 0.5044, "step": 21031 }, { "epoch": 0.5774848984074684, "grad_norm": 0.40853792428970337, "learning_rate": 1.6184747659613118e-05, "loss": 0.563, "step": 21032 }, { "epoch": 0.577512355848435, "grad_norm": 0.43153294920921326, "learning_rate": 1.6184408269991866e-05, "loss": 0.5902, "step": 21033 }, { "epoch": 0.5775398132894014, "grad_norm": 0.46708911657333374, "learning_rate": 1.618406886883472e-05, "loss": 0.525, "step": 21034 }, { "epoch": 0.577567270730368, "grad_norm": 0.37165382504463196, "learning_rate": 1.618372945614231e-05, "loss": 0.473, "step": 21035 }, { "epoch": 0.5775947281713344, "grad_norm": 0.4049939513206482, "learning_rate": 1.6183390031915274e-05, "loss": 0.5777, "step": 21036 }, { "epoch": 0.5776221856123009, "grad_norm": 0.38714590668678284, "learning_rate": 1.618305059615424e-05, "loss": 0.5438, "step": 21037 }, { "epoch": 0.5776496430532674, "grad_norm": 0.369933545589447, "learning_rate": 1.618271114885984e-05, "loss": 0.566, "step": 21038 }, { "epoch": 0.5776771004942339, "grad_norm": 0.48515790700912476, "learning_rate": 1.6182371690032717e-05, "loss": 0.495, "step": 21039 }, { "epoch": 0.5777045579352005, "grad_norm": 0.5232720971107483, "learning_rate": 1.6182032219673495e-05, "loss": 0.4974, "step": 21040 }, { "epoch": 0.5777320153761669, "grad_norm": 0.3647095859050751, "learning_rate": 1.6181692737782806e-05, "loss": 0.568, "step": 21041 }, { "epoch": 0.5777594728171335, "grad_norm": 0.5132710933685303, "learning_rate": 1.6181353244361293e-05, "loss": 0.6286, "step": 21042 }, { "epoch": 0.5777869302580999, "grad_norm": 0.4020809233188629, "learning_rate": 1.618101373940958e-05, "loss": 0.4174, "step": 21043 }, { "epoch": 0.5778143876990665, "grad_norm": 0.39014431834220886, "learning_rate": 1.6180674222928306e-05, "loss": 0.5443, "step": 21044 }, { "epoch": 0.5778418451400329, "grad_norm": 0.3430628776550293, "learning_rate": 1.61803346949181e-05, "loss": 0.502, "step": 21045 }, { "epoch": 0.5778693025809994, "grad_norm": 0.39478838443756104, "learning_rate": 1.61799951553796e-05, "loss": 0.5033, "step": 21046 }, { "epoch": 0.577896760021966, "grad_norm": 0.3374863266944885, "learning_rate": 1.617965560431343e-05, "loss": 0.4757, "step": 21047 }, { "epoch": 0.5779242174629324, "grad_norm": 0.35532885789871216, "learning_rate": 1.6179316041720235e-05, "loss": 0.5351, "step": 21048 }, { "epoch": 0.577951674903899, "grad_norm": 0.37188029289245605, "learning_rate": 1.6178976467600645e-05, "loss": 0.5509, "step": 21049 }, { "epoch": 0.5779791323448654, "grad_norm": 0.3592908978462219, "learning_rate": 1.617863688195529e-05, "loss": 0.4856, "step": 21050 }, { "epoch": 0.578006589785832, "grad_norm": 0.37347546219825745, "learning_rate": 1.6178297284784802e-05, "loss": 0.442, "step": 21051 }, { "epoch": 0.5780340472267984, "grad_norm": 0.388260155916214, "learning_rate": 1.6177957676089822e-05, "loss": 0.48, "step": 21052 }, { "epoch": 0.578061504667765, "grad_norm": 0.4198438823223114, "learning_rate": 1.617761805587098e-05, "loss": 0.4734, "step": 21053 }, { "epoch": 0.5780889621087315, "grad_norm": 0.378131240606308, "learning_rate": 1.6177278424128904e-05, "loss": 0.563, "step": 21054 }, { "epoch": 0.578116419549698, "grad_norm": 0.4165869951248169, "learning_rate": 1.6176938780864233e-05, "loss": 0.554, "step": 21055 }, { "epoch": 0.5781438769906645, "grad_norm": 0.38825780153274536, "learning_rate": 1.6176599126077602e-05, "loss": 0.5302, "step": 21056 }, { "epoch": 0.5781713344316309, "grad_norm": 0.4153463542461395, "learning_rate": 1.6176259459769644e-05, "loss": 0.547, "step": 21057 }, { "epoch": 0.5781987918725975, "grad_norm": 0.33497610688209534, "learning_rate": 1.6175919781940987e-05, "loss": 0.4941, "step": 21058 }, { "epoch": 0.5782262493135639, "grad_norm": 0.37138551473617554, "learning_rate": 1.6175580092592268e-05, "loss": 0.5127, "step": 21059 }, { "epoch": 0.5782537067545305, "grad_norm": 0.5510372519493103, "learning_rate": 1.6175240391724125e-05, "loss": 0.4957, "step": 21060 }, { "epoch": 0.578281164195497, "grad_norm": 0.417133092880249, "learning_rate": 1.6174900679337185e-05, "loss": 0.554, "step": 21061 }, { "epoch": 0.5783086216364635, "grad_norm": 0.3699566721916199, "learning_rate": 1.6174560955432083e-05, "loss": 0.5348, "step": 21062 }, { "epoch": 0.57833607907743, "grad_norm": 0.39545226097106934, "learning_rate": 1.617422122000946e-05, "loss": 0.5859, "step": 21063 }, { "epoch": 0.5783635365183964, "grad_norm": 0.3440853953361511, "learning_rate": 1.6173881473069936e-05, "loss": 0.4487, "step": 21064 }, { "epoch": 0.578390993959363, "grad_norm": 0.36826151609420776, "learning_rate": 1.6173541714614158e-05, "loss": 0.4578, "step": 21065 }, { "epoch": 0.5784184514003294, "grad_norm": 0.4085372984409332, "learning_rate": 1.6173201944642753e-05, "loss": 0.5586, "step": 21066 }, { "epoch": 0.578445908841296, "grad_norm": 0.407913476228714, "learning_rate": 1.617286216315636e-05, "loss": 0.4888, "step": 21067 }, { "epoch": 0.5784733662822625, "grad_norm": 0.3701746165752411, "learning_rate": 1.6172522370155602e-05, "loss": 0.5033, "step": 21068 }, { "epoch": 0.578500823723229, "grad_norm": 0.3585933744907379, "learning_rate": 1.6172182565641123e-05, "loss": 0.4864, "step": 21069 }, { "epoch": 0.5785282811641955, "grad_norm": 0.36838412284851074, "learning_rate": 1.617184274961355e-05, "loss": 0.549, "step": 21070 }, { "epoch": 0.578555738605162, "grad_norm": 0.3568468689918518, "learning_rate": 1.617150292207353e-05, "loss": 0.4534, "step": 21071 }, { "epoch": 0.5785831960461285, "grad_norm": 0.4885975420475006, "learning_rate": 1.6171163083021678e-05, "loss": 0.5392, "step": 21072 }, { "epoch": 0.578610653487095, "grad_norm": 0.3947300910949707, "learning_rate": 1.617082323245864e-05, "loss": 0.4258, "step": 21073 }, { "epoch": 0.5786381109280615, "grad_norm": 0.3736065626144409, "learning_rate": 1.617048337038505e-05, "loss": 0.4635, "step": 21074 }, { "epoch": 0.578665568369028, "grad_norm": 0.4010109305381775, "learning_rate": 1.6170143496801534e-05, "loss": 0.5469, "step": 21075 }, { "epoch": 0.5786930258099945, "grad_norm": 0.38330918550491333, "learning_rate": 1.6169803611708733e-05, "loss": 0.5539, "step": 21076 }, { "epoch": 0.578720483250961, "grad_norm": 0.3805399537086487, "learning_rate": 1.616946371510728e-05, "loss": 0.5573, "step": 21077 }, { "epoch": 0.5787479406919275, "grad_norm": 0.35181549191474915, "learning_rate": 1.616912380699781e-05, "loss": 0.4343, "step": 21078 }, { "epoch": 0.578775398132894, "grad_norm": 0.3567991554737091, "learning_rate": 1.616878388738095e-05, "loss": 0.4969, "step": 21079 }, { "epoch": 0.5788028555738605, "grad_norm": 0.37538090348243713, "learning_rate": 1.6168443956257345e-05, "loss": 0.4416, "step": 21080 }, { "epoch": 0.578830313014827, "grad_norm": 0.36170899868011475, "learning_rate": 1.6168104013627618e-05, "loss": 0.4539, "step": 21081 }, { "epoch": 0.5788577704557936, "grad_norm": 0.39925888180732727, "learning_rate": 1.6167764059492412e-05, "loss": 0.4123, "step": 21082 }, { "epoch": 0.57888522789676, "grad_norm": 0.3750722408294678, "learning_rate": 1.6167424093852354e-05, "loss": 0.5257, "step": 21083 }, { "epoch": 0.5789126853377266, "grad_norm": 0.345400869846344, "learning_rate": 1.616708411670809e-05, "loss": 0.4338, "step": 21084 }, { "epoch": 0.578940142778693, "grad_norm": 0.3888583779335022, "learning_rate": 1.6166744128060237e-05, "loss": 0.5128, "step": 21085 }, { "epoch": 0.5789676002196595, "grad_norm": 0.4158656895160675, "learning_rate": 1.616640412790944e-05, "loss": 0.4485, "step": 21086 }, { "epoch": 0.578995057660626, "grad_norm": 0.3502817749977112, "learning_rate": 1.616606411625633e-05, "loss": 0.4707, "step": 21087 }, { "epoch": 0.5790225151015925, "grad_norm": 0.40070006251335144, "learning_rate": 1.6165724093101548e-05, "loss": 0.543, "step": 21088 }, { "epoch": 0.5790499725425591, "grad_norm": 0.3595455586910248, "learning_rate": 1.6165384058445716e-05, "loss": 0.5093, "step": 21089 }, { "epoch": 0.5790774299835255, "grad_norm": 0.3320959806442261, "learning_rate": 1.6165044012289478e-05, "loss": 0.4949, "step": 21090 }, { "epoch": 0.5791048874244921, "grad_norm": 0.4028443396091461, "learning_rate": 1.6164703954633466e-05, "loss": 0.5567, "step": 21091 }, { "epoch": 0.5791323448654585, "grad_norm": 0.3670755624771118, "learning_rate": 1.6164363885478315e-05, "loss": 0.5036, "step": 21092 }, { "epoch": 0.579159802306425, "grad_norm": 0.3542087972164154, "learning_rate": 1.6164023804824654e-05, "loss": 0.4559, "step": 21093 }, { "epoch": 0.5791872597473915, "grad_norm": 0.3829369843006134, "learning_rate": 1.6163683712673125e-05, "loss": 0.5623, "step": 21094 }, { "epoch": 0.579214717188358, "grad_norm": 0.3676750361919403, "learning_rate": 1.616334360902436e-05, "loss": 0.5953, "step": 21095 }, { "epoch": 0.5792421746293246, "grad_norm": 0.4147208034992218, "learning_rate": 1.6163003493878985e-05, "loss": 0.5608, "step": 21096 }, { "epoch": 0.579269632070291, "grad_norm": 0.456452339887619, "learning_rate": 1.6162663367237645e-05, "loss": 0.5732, "step": 21097 }, { "epoch": 0.5792970895112576, "grad_norm": 0.3554527163505554, "learning_rate": 1.616232322910097e-05, "loss": 0.4997, "step": 21098 }, { "epoch": 0.579324546952224, "grad_norm": 0.3919999599456787, "learning_rate": 1.61619830794696e-05, "loss": 0.5016, "step": 21099 }, { "epoch": 0.5793520043931906, "grad_norm": 0.37867915630340576, "learning_rate": 1.6161642918344163e-05, "loss": 0.5116, "step": 21100 }, { "epoch": 0.579379461834157, "grad_norm": 0.3781815767288208, "learning_rate": 1.6161302745725292e-05, "loss": 0.5259, "step": 21101 }, { "epoch": 0.5794069192751236, "grad_norm": 0.3836900293827057, "learning_rate": 1.6160962561613628e-05, "loss": 0.555, "step": 21102 }, { "epoch": 0.5794343767160901, "grad_norm": 0.35570597648620605, "learning_rate": 1.61606223660098e-05, "loss": 0.4591, "step": 21103 }, { "epoch": 0.5794618341570565, "grad_norm": 0.510765552520752, "learning_rate": 1.616028215891445e-05, "loss": 0.5339, "step": 21104 }, { "epoch": 0.5794892915980231, "grad_norm": 0.3815319538116455, "learning_rate": 1.6159941940328202e-05, "loss": 0.5094, "step": 21105 }, { "epoch": 0.5795167490389895, "grad_norm": 0.3469858467578888, "learning_rate": 1.6159601710251698e-05, "loss": 0.5943, "step": 21106 }, { "epoch": 0.5795442064799561, "grad_norm": 0.3687015771865845, "learning_rate": 1.6159261468685572e-05, "loss": 0.5553, "step": 21107 }, { "epoch": 0.5795716639209225, "grad_norm": 0.39546167850494385, "learning_rate": 1.6158921215630458e-05, "loss": 0.5625, "step": 21108 }, { "epoch": 0.5795991213618891, "grad_norm": 0.36835646629333496, "learning_rate": 1.6158580951086988e-05, "loss": 0.449, "step": 21109 }, { "epoch": 0.5796265788028556, "grad_norm": 0.3605920374393463, "learning_rate": 1.61582406750558e-05, "loss": 0.3816, "step": 21110 }, { "epoch": 0.5796540362438221, "grad_norm": 0.3755429685115814, "learning_rate": 1.6157900387537526e-05, "loss": 0.5762, "step": 21111 }, { "epoch": 0.5796814936847886, "grad_norm": 0.4290286898612976, "learning_rate": 1.6157560088532805e-05, "loss": 0.595, "step": 21112 }, { "epoch": 0.579708951125755, "grad_norm": 0.37590986490249634, "learning_rate": 1.6157219778042267e-05, "loss": 0.5328, "step": 21113 }, { "epoch": 0.5797364085667216, "grad_norm": 0.35470399260520935, "learning_rate": 1.615687945606655e-05, "loss": 0.4506, "step": 21114 }, { "epoch": 0.579763866007688, "grad_norm": 0.42589202523231506, "learning_rate": 1.6156539122606288e-05, "loss": 0.5905, "step": 21115 }, { "epoch": 0.5797913234486546, "grad_norm": 0.3568902313709259, "learning_rate": 1.6156198777662115e-05, "loss": 0.4373, "step": 21116 }, { "epoch": 0.5798187808896211, "grad_norm": 0.3959977328777313, "learning_rate": 1.615585842123467e-05, "loss": 0.5629, "step": 21117 }, { "epoch": 0.5798462383305876, "grad_norm": 0.3742942810058594, "learning_rate": 1.615551805332458e-05, "loss": 0.4652, "step": 21118 }, { "epoch": 0.5798736957715541, "grad_norm": 0.3384424149990082, "learning_rate": 1.6155177673932486e-05, "loss": 0.3797, "step": 21119 }, { "epoch": 0.5799011532125206, "grad_norm": 0.37512215971946716, "learning_rate": 1.6154837283059022e-05, "loss": 0.5516, "step": 21120 }, { "epoch": 0.5799286106534871, "grad_norm": 0.41497108340263367, "learning_rate": 1.615449688070482e-05, "loss": 0.4927, "step": 21121 }, { "epoch": 0.5799560680944535, "grad_norm": 0.421512633562088, "learning_rate": 1.6154156466870515e-05, "loss": 0.4546, "step": 21122 }, { "epoch": 0.5799835255354201, "grad_norm": 0.352551132440567, "learning_rate": 1.615381604155675e-05, "loss": 0.438, "step": 21123 }, { "epoch": 0.5800109829763866, "grad_norm": 0.3837610185146332, "learning_rate": 1.6153475604764152e-05, "loss": 0.4162, "step": 21124 }, { "epoch": 0.5800384404173531, "grad_norm": 0.36279845237731934, "learning_rate": 1.6153135156493354e-05, "loss": 0.493, "step": 21125 }, { "epoch": 0.5800658978583196, "grad_norm": 0.3779540956020355, "learning_rate": 1.6152794696745e-05, "loss": 0.5511, "step": 21126 }, { "epoch": 0.5800933552992861, "grad_norm": 0.4185730218887329, "learning_rate": 1.6152454225519716e-05, "loss": 0.5768, "step": 21127 }, { "epoch": 0.5801208127402526, "grad_norm": 0.34817633032798767, "learning_rate": 1.6152113742818147e-05, "loss": 0.4737, "step": 21128 }, { "epoch": 0.5801482701812191, "grad_norm": 0.37130671739578247, "learning_rate": 1.6151773248640914e-05, "loss": 0.4604, "step": 21129 }, { "epoch": 0.5801757276221856, "grad_norm": 0.40660083293914795, "learning_rate": 1.6151432742988665e-05, "loss": 0.6092, "step": 21130 }, { "epoch": 0.5802031850631522, "grad_norm": 0.34974467754364014, "learning_rate": 1.6151092225862033e-05, "loss": 0.5246, "step": 21131 }, { "epoch": 0.5802306425041186, "grad_norm": 0.5082796812057495, "learning_rate": 1.615075169726165e-05, "loss": 0.5575, "step": 21132 }, { "epoch": 0.5802580999450851, "grad_norm": 0.40530067682266235, "learning_rate": 1.615041115718815e-05, "loss": 0.4551, "step": 21133 }, { "epoch": 0.5802855573860516, "grad_norm": 0.3660169243812561, "learning_rate": 1.615007060564217e-05, "loss": 0.5445, "step": 21134 }, { "epoch": 0.5803130148270181, "grad_norm": 0.4062086343765259, "learning_rate": 1.6149730042624346e-05, "loss": 0.543, "step": 21135 }, { "epoch": 0.5803404722679846, "grad_norm": 0.47891783714294434, "learning_rate": 1.614938946813531e-05, "loss": 0.4878, "step": 21136 }, { "epoch": 0.5803679297089511, "grad_norm": 0.45675143599510193, "learning_rate": 1.6149048882175703e-05, "loss": 0.4874, "step": 21137 }, { "epoch": 0.5803953871499177, "grad_norm": 0.37275102734565735, "learning_rate": 1.614870828474616e-05, "loss": 0.457, "step": 21138 }, { "epoch": 0.5804228445908841, "grad_norm": 0.320388525724411, "learning_rate": 1.614836767584731e-05, "loss": 0.4636, "step": 21139 }, { "epoch": 0.5804503020318507, "grad_norm": 0.4236691892147064, "learning_rate": 1.6148027055479793e-05, "loss": 0.4718, "step": 21140 }, { "epoch": 0.5804777594728171, "grad_norm": 0.3942870497703552, "learning_rate": 1.6147686423644243e-05, "loss": 0.5397, "step": 21141 }, { "epoch": 0.5805052169137837, "grad_norm": 0.36060890555381775, "learning_rate": 1.6147345780341293e-05, "loss": 0.479, "step": 21142 }, { "epoch": 0.5805326743547501, "grad_norm": 0.39557743072509766, "learning_rate": 1.6147005125571588e-05, "loss": 0.4087, "step": 21143 }, { "epoch": 0.5805601317957166, "grad_norm": 0.3715820014476776, "learning_rate": 1.614666445933575e-05, "loss": 0.5211, "step": 21144 }, { "epoch": 0.5805875892366832, "grad_norm": 0.34634697437286377, "learning_rate": 1.6146323781634422e-05, "loss": 0.5011, "step": 21145 }, { "epoch": 0.5806150466776496, "grad_norm": 0.39563706517219543, "learning_rate": 1.6145983092468243e-05, "loss": 0.4716, "step": 21146 }, { "epoch": 0.5806425041186162, "grad_norm": 0.36046019196510315, "learning_rate": 1.614564239183784e-05, "loss": 0.5155, "step": 21147 }, { "epoch": 0.5806699615595826, "grad_norm": 0.938191831111908, "learning_rate": 1.6145301679743854e-05, "loss": 0.5709, "step": 21148 }, { "epoch": 0.5806974190005492, "grad_norm": 0.41859158873558044, "learning_rate": 1.6144960956186918e-05, "loss": 0.5358, "step": 21149 }, { "epoch": 0.5807248764415156, "grad_norm": 0.3915003836154938, "learning_rate": 1.6144620221167668e-05, "loss": 0.5813, "step": 21150 }, { "epoch": 0.5807523338824822, "grad_norm": 0.38850530982017517, "learning_rate": 1.6144279474686743e-05, "loss": 0.4872, "step": 21151 }, { "epoch": 0.5807797913234487, "grad_norm": 0.367949515581131, "learning_rate": 1.6143938716744772e-05, "loss": 0.3929, "step": 21152 }, { "epoch": 0.5808072487644151, "grad_norm": 0.36562207341194153, "learning_rate": 1.6143597947342398e-05, "loss": 0.5352, "step": 21153 }, { "epoch": 0.5808347062053817, "grad_norm": 0.36746636033058167, "learning_rate": 1.614325716648025e-05, "loss": 0.4785, "step": 21154 }, { "epoch": 0.5808621636463481, "grad_norm": 0.3658316433429718, "learning_rate": 1.6142916374158967e-05, "loss": 0.4691, "step": 21155 }, { "epoch": 0.5808896210873147, "grad_norm": 0.43570974469184875, "learning_rate": 1.6142575570379185e-05, "loss": 0.5311, "step": 21156 }, { "epoch": 0.5809170785282811, "grad_norm": 0.35501614212989807, "learning_rate": 1.614223475514154e-05, "loss": 0.4531, "step": 21157 }, { "epoch": 0.5809445359692477, "grad_norm": 0.3405054211616516, "learning_rate": 1.6141893928446667e-05, "loss": 0.509, "step": 21158 }, { "epoch": 0.5809719934102142, "grad_norm": 0.3461112082004547, "learning_rate": 1.6141553090295202e-05, "loss": 0.4803, "step": 21159 }, { "epoch": 0.5809994508511807, "grad_norm": 0.36415600776672363, "learning_rate": 1.6141212240687776e-05, "loss": 0.4583, "step": 21160 }, { "epoch": 0.5810269082921472, "grad_norm": 0.4015338718891144, "learning_rate": 1.6140871379625033e-05, "loss": 0.5149, "step": 21161 }, { "epoch": 0.5810543657331136, "grad_norm": 0.3863988518714905, "learning_rate": 1.6140530507107605e-05, "loss": 0.5744, "step": 21162 }, { "epoch": 0.5810818231740802, "grad_norm": 0.35629045963287354, "learning_rate": 1.6140189623136127e-05, "loss": 0.4637, "step": 21163 }, { "epoch": 0.5811092806150466, "grad_norm": 0.4541007876396179, "learning_rate": 1.6139848727711235e-05, "loss": 0.4848, "step": 21164 }, { "epoch": 0.5811367380560132, "grad_norm": 0.37079057097435, "learning_rate": 1.6139507820833564e-05, "loss": 0.5271, "step": 21165 }, { "epoch": 0.5811641954969797, "grad_norm": 0.44012296199798584, "learning_rate": 1.6139166902503756e-05, "loss": 0.5668, "step": 21166 }, { "epoch": 0.5811916529379462, "grad_norm": 0.3689175546169281, "learning_rate": 1.6138825972722442e-05, "loss": 0.4456, "step": 21167 }, { "epoch": 0.5812191103789127, "grad_norm": 0.40422213077545166, "learning_rate": 1.6138485031490253e-05, "loss": 0.5222, "step": 21168 }, { "epoch": 0.5812465678198792, "grad_norm": 0.3705901801586151, "learning_rate": 1.613814407880783e-05, "loss": 0.5142, "step": 21169 }, { "epoch": 0.5812740252608457, "grad_norm": 0.37258240580558777, "learning_rate": 1.6137803114675815e-05, "loss": 0.6344, "step": 21170 }, { "epoch": 0.5813014827018121, "grad_norm": 0.42369797825813293, "learning_rate": 1.6137462139094836e-05, "loss": 0.635, "step": 21171 }, { "epoch": 0.5813289401427787, "grad_norm": 0.3720398247241974, "learning_rate": 1.613712115206553e-05, "loss": 0.6029, "step": 21172 }, { "epoch": 0.5813563975837452, "grad_norm": 0.38557180762290955, "learning_rate": 1.6136780153588537e-05, "loss": 0.556, "step": 21173 }, { "epoch": 0.5813838550247117, "grad_norm": 0.39516186714172363, "learning_rate": 1.613643914366449e-05, "loss": 0.5882, "step": 21174 }, { "epoch": 0.5814113124656782, "grad_norm": 0.3162324130535126, "learning_rate": 1.613609812229402e-05, "loss": 0.4666, "step": 21175 }, { "epoch": 0.5814387699066447, "grad_norm": 0.3697158396244049, "learning_rate": 1.6135757089477773e-05, "loss": 0.5144, "step": 21176 }, { "epoch": 0.5814662273476112, "grad_norm": 0.39619362354278564, "learning_rate": 1.6135416045216382e-05, "loss": 0.5531, "step": 21177 }, { "epoch": 0.5814936847885777, "grad_norm": 0.41623106598854065, "learning_rate": 1.613507498951048e-05, "loss": 0.5554, "step": 21178 }, { "epoch": 0.5815211422295442, "grad_norm": 0.3983595669269562, "learning_rate": 1.6134733922360705e-05, "loss": 0.5437, "step": 21179 }, { "epoch": 0.5815485996705108, "grad_norm": 0.35298386216163635, "learning_rate": 1.6134392843767694e-05, "loss": 0.4788, "step": 21180 }, { "epoch": 0.5815760571114772, "grad_norm": 0.3758779764175415, "learning_rate": 1.6134051753732083e-05, "loss": 0.4906, "step": 21181 }, { "epoch": 0.5816035145524437, "grad_norm": 0.3997713327407837, "learning_rate": 1.6133710652254507e-05, "loss": 0.396, "step": 21182 }, { "epoch": 0.5816309719934102, "grad_norm": 0.3680139482021332, "learning_rate": 1.61333695393356e-05, "loss": 0.5507, "step": 21183 }, { "epoch": 0.5816584294343767, "grad_norm": 0.4361574649810791, "learning_rate": 1.6133028414976006e-05, "loss": 0.5785, "step": 21184 }, { "epoch": 0.5816858868753432, "grad_norm": 0.38822340965270996, "learning_rate": 1.6132687279176357e-05, "loss": 0.4988, "step": 21185 }, { "epoch": 0.5817133443163097, "grad_norm": 0.4322441816329956, "learning_rate": 1.6132346131937285e-05, "loss": 0.5653, "step": 21186 }, { "epoch": 0.5817408017572763, "grad_norm": 0.3880590498447418, "learning_rate": 1.6132004973259432e-05, "loss": 0.4207, "step": 21187 }, { "epoch": 0.5817682591982427, "grad_norm": 0.41412153840065, "learning_rate": 1.6131663803143432e-05, "loss": 0.4539, "step": 21188 }, { "epoch": 0.5817957166392093, "grad_norm": 0.43592569231987, "learning_rate": 1.6131322621589924e-05, "loss": 0.5483, "step": 21189 }, { "epoch": 0.5818231740801757, "grad_norm": 0.4527169167995453, "learning_rate": 1.6130981428599542e-05, "loss": 0.5507, "step": 21190 }, { "epoch": 0.5818506315211422, "grad_norm": 0.4116746485233307, "learning_rate": 1.6130640224172918e-05, "loss": 0.4891, "step": 21191 }, { "epoch": 0.5818780889621087, "grad_norm": 0.43417349457740784, "learning_rate": 1.61302990083107e-05, "loss": 0.6141, "step": 21192 }, { "epoch": 0.5819055464030752, "grad_norm": 0.4031166732311249, "learning_rate": 1.612995778101351e-05, "loss": 0.5593, "step": 21193 }, { "epoch": 0.5819330038440418, "grad_norm": 0.37139979004859924, "learning_rate": 1.6129616542282e-05, "loss": 0.4914, "step": 21194 }, { "epoch": 0.5819604612850082, "grad_norm": 0.36679428815841675, "learning_rate": 1.6129275292116794e-05, "loss": 0.4747, "step": 21195 }, { "epoch": 0.5819879187259748, "grad_norm": 0.38614049553871155, "learning_rate": 1.6128934030518536e-05, "loss": 0.5954, "step": 21196 }, { "epoch": 0.5820153761669412, "grad_norm": 0.3812297582626343, "learning_rate": 1.612859275748786e-05, "loss": 0.5431, "step": 21197 }, { "epoch": 0.5820428336079078, "grad_norm": 0.3961833119392395, "learning_rate": 1.6128251473025402e-05, "loss": 0.5407, "step": 21198 }, { "epoch": 0.5820702910488742, "grad_norm": 0.4305078685283661, "learning_rate": 1.6127910177131797e-05, "loss": 0.5195, "step": 21199 }, { "epoch": 0.5820977484898407, "grad_norm": 0.4063071012496948, "learning_rate": 1.6127568869807685e-05, "loss": 0.5937, "step": 21200 }, { "epoch": 0.5821252059308073, "grad_norm": 0.3801192045211792, "learning_rate": 1.6127227551053704e-05, "loss": 0.5091, "step": 21201 }, { "epoch": 0.5821526633717737, "grad_norm": 0.3969520032405853, "learning_rate": 1.6126886220870487e-05, "loss": 0.4681, "step": 21202 }, { "epoch": 0.5821801208127403, "grad_norm": 0.391012579202652, "learning_rate": 1.612654487925867e-05, "loss": 0.5308, "step": 21203 }, { "epoch": 0.5822075782537067, "grad_norm": 0.3582218885421753, "learning_rate": 1.612620352621889e-05, "loss": 0.5099, "step": 21204 }, { "epoch": 0.5822350356946733, "grad_norm": 0.345019668340683, "learning_rate": 1.6125862161751787e-05, "loss": 0.4559, "step": 21205 }, { "epoch": 0.5822624931356397, "grad_norm": 0.4182300269603729, "learning_rate": 1.6125520785857996e-05, "loss": 0.5883, "step": 21206 }, { "epoch": 0.5822899505766063, "grad_norm": 0.4008396565914154, "learning_rate": 1.612517939853815e-05, "loss": 0.5124, "step": 21207 }, { "epoch": 0.5823174080175728, "grad_norm": 0.4058840870857239, "learning_rate": 1.6124837999792896e-05, "loss": 0.5477, "step": 21208 }, { "epoch": 0.5823448654585393, "grad_norm": 0.4117266535758972, "learning_rate": 1.6124496589622864e-05, "loss": 0.4727, "step": 21209 }, { "epoch": 0.5823723228995058, "grad_norm": 0.41523560881614685, "learning_rate": 1.612415516802869e-05, "loss": 0.4449, "step": 21210 }, { "epoch": 0.5823997803404722, "grad_norm": 0.3789553940296173, "learning_rate": 1.612381373501101e-05, "loss": 0.4591, "step": 21211 }, { "epoch": 0.5824272377814388, "grad_norm": 0.9850039482116699, "learning_rate": 1.612347229057046e-05, "loss": 0.5296, "step": 21212 }, { "epoch": 0.5824546952224052, "grad_norm": 0.5352584719657898, "learning_rate": 1.6123130834707686e-05, "loss": 0.4764, "step": 21213 }, { "epoch": 0.5824821526633718, "grad_norm": 0.3898897171020508, "learning_rate": 1.6122789367423317e-05, "loss": 0.5689, "step": 21214 }, { "epoch": 0.5825096101043383, "grad_norm": 0.32067176699638367, "learning_rate": 1.6122447888717992e-05, "loss": 0.3733, "step": 21215 }, { "epoch": 0.5825370675453048, "grad_norm": 0.36533036828041077, "learning_rate": 1.6122106398592345e-05, "loss": 0.4849, "step": 21216 }, { "epoch": 0.5825645249862713, "grad_norm": 0.4269741177558899, "learning_rate": 1.6121764897047018e-05, "loss": 0.57, "step": 21217 }, { "epoch": 0.5825919824272378, "grad_norm": 0.41833600401878357, "learning_rate": 1.6121423384082645e-05, "loss": 0.5917, "step": 21218 }, { "epoch": 0.5826194398682043, "grad_norm": 0.35699743032455444, "learning_rate": 1.612108185969986e-05, "loss": 0.5493, "step": 21219 }, { "epoch": 0.5826468973091707, "grad_norm": 0.3763284683227539, "learning_rate": 1.612074032389931e-05, "loss": 0.5209, "step": 21220 }, { "epoch": 0.5826743547501373, "grad_norm": 0.3895307183265686, "learning_rate": 1.612039877668162e-05, "loss": 0.4872, "step": 21221 }, { "epoch": 0.5827018121911038, "grad_norm": 0.45322877168655396, "learning_rate": 1.6120057218047437e-05, "loss": 0.5435, "step": 21222 }, { "epoch": 0.5827292696320703, "grad_norm": 0.35408976674079895, "learning_rate": 1.611971564799739e-05, "loss": 0.5325, "step": 21223 }, { "epoch": 0.5827567270730368, "grad_norm": 0.3359763026237488, "learning_rate": 1.6119374066532126e-05, "loss": 0.4269, "step": 21224 }, { "epoch": 0.5827841845140033, "grad_norm": 0.3831687867641449, "learning_rate": 1.6119032473652273e-05, "loss": 0.4267, "step": 21225 }, { "epoch": 0.5828116419549698, "grad_norm": 0.3582947850227356, "learning_rate": 1.611869086935847e-05, "loss": 0.5311, "step": 21226 }, { "epoch": 0.5828390993959363, "grad_norm": 0.34726935625076294, "learning_rate": 1.6118349253651357e-05, "loss": 0.4708, "step": 21227 }, { "epoch": 0.5828665568369028, "grad_norm": 0.45481860637664795, "learning_rate": 1.611800762653157e-05, "loss": 0.613, "step": 21228 }, { "epoch": 0.5828940142778694, "grad_norm": 0.3873506784439087, "learning_rate": 1.611766598799975e-05, "loss": 0.4866, "step": 21229 }, { "epoch": 0.5829214717188358, "grad_norm": 0.4234866201877594, "learning_rate": 1.6117324338056522e-05, "loss": 0.5567, "step": 21230 }, { "epoch": 0.5829489291598023, "grad_norm": 0.3780277669429779, "learning_rate": 1.611698267670254e-05, "loss": 0.5208, "step": 21231 }, { "epoch": 0.5829763866007688, "grad_norm": 0.3667088449001312, "learning_rate": 1.611664100393843e-05, "loss": 0.4425, "step": 21232 }, { "epoch": 0.5830038440417353, "grad_norm": 0.46584028005599976, "learning_rate": 1.611629931976483e-05, "loss": 0.4993, "step": 21233 }, { "epoch": 0.5830313014827018, "grad_norm": 0.464614599943161, "learning_rate": 1.6115957624182382e-05, "loss": 0.4573, "step": 21234 }, { "epoch": 0.5830587589236683, "grad_norm": 0.406038761138916, "learning_rate": 1.611561591719172e-05, "loss": 0.4468, "step": 21235 }, { "epoch": 0.5830862163646349, "grad_norm": 0.3388383984565735, "learning_rate": 1.6115274198793483e-05, "loss": 0.4894, "step": 21236 }, { "epoch": 0.5831136738056013, "grad_norm": 0.4083103537559509, "learning_rate": 1.6114932468988307e-05, "loss": 0.5307, "step": 21237 }, { "epoch": 0.5831411312465679, "grad_norm": 0.3521682620048523, "learning_rate": 1.611459072777683e-05, "loss": 0.4796, "step": 21238 }, { "epoch": 0.5831685886875343, "grad_norm": 0.8415577411651611, "learning_rate": 1.611424897515969e-05, "loss": 0.5235, "step": 21239 }, { "epoch": 0.5831960461285008, "grad_norm": 0.4066687524318695, "learning_rate": 1.6113907211137525e-05, "loss": 0.5972, "step": 21240 }, { "epoch": 0.5832235035694673, "grad_norm": 0.41477474570274353, "learning_rate": 1.6113565435710975e-05, "loss": 0.5149, "step": 21241 }, { "epoch": 0.5832509610104338, "grad_norm": 0.3899105489253998, "learning_rate": 1.6113223648880668e-05, "loss": 0.512, "step": 21242 }, { "epoch": 0.5832784184514004, "grad_norm": 0.3337099552154541, "learning_rate": 1.611288185064725e-05, "loss": 0.475, "step": 21243 }, { "epoch": 0.5833058758923668, "grad_norm": 0.6104755997657776, "learning_rate": 1.6112540041011358e-05, "loss": 0.5512, "step": 21244 }, { "epoch": 0.5833333333333334, "grad_norm": 0.36482909321784973, "learning_rate": 1.6112198219973625e-05, "loss": 0.4486, "step": 21245 }, { "epoch": 0.5833607907742998, "grad_norm": 0.3636209964752197, "learning_rate": 1.6111856387534697e-05, "loss": 0.4837, "step": 21246 }, { "epoch": 0.5833882482152664, "grad_norm": 0.35087481141090393, "learning_rate": 1.6111514543695204e-05, "loss": 0.4286, "step": 21247 }, { "epoch": 0.5834157056562328, "grad_norm": 0.3428530693054199, "learning_rate": 1.611117268845578e-05, "loss": 0.418, "step": 21248 }, { "epoch": 0.5834431630971993, "grad_norm": 0.37464645504951477, "learning_rate": 1.6110830821817075e-05, "loss": 0.4727, "step": 21249 }, { "epoch": 0.5834706205381659, "grad_norm": 0.3216412663459778, "learning_rate": 1.6110488943779717e-05, "loss": 0.483, "step": 21250 }, { "epoch": 0.5834980779791323, "grad_norm": 0.3745121359825134, "learning_rate": 1.611014705434435e-05, "loss": 0.5083, "step": 21251 }, { "epoch": 0.5835255354200989, "grad_norm": 0.34712153673171997, "learning_rate": 1.6109805153511606e-05, "loss": 0.5264, "step": 21252 }, { "epoch": 0.5835529928610653, "grad_norm": 0.3746022880077362, "learning_rate": 1.6109463241282127e-05, "loss": 0.4804, "step": 21253 }, { "epoch": 0.5835804503020319, "grad_norm": 0.36023518443107605, "learning_rate": 1.6109121317656548e-05, "loss": 0.5094, "step": 21254 }, { "epoch": 0.5836079077429983, "grad_norm": 0.46353960037231445, "learning_rate": 1.6108779382635506e-05, "loss": 0.4473, "step": 21255 }, { "epoch": 0.5836353651839649, "grad_norm": 0.649553656578064, "learning_rate": 1.610843743621964e-05, "loss": 0.5928, "step": 21256 }, { "epoch": 0.5836628226249314, "grad_norm": 0.43268081545829773, "learning_rate": 1.6108095478409595e-05, "loss": 0.4707, "step": 21257 }, { "epoch": 0.5836902800658978, "grad_norm": 0.37631580233573914, "learning_rate": 1.6107753509205996e-05, "loss": 0.5136, "step": 21258 }, { "epoch": 0.5837177375068644, "grad_norm": 0.3769768178462982, "learning_rate": 1.6107411528609492e-05, "loss": 0.5149, "step": 21259 }, { "epoch": 0.5837451949478308, "grad_norm": 0.34461772441864014, "learning_rate": 1.6107069536620714e-05, "loss": 0.4917, "step": 21260 }, { "epoch": 0.5837726523887974, "grad_norm": 0.36751461029052734, "learning_rate": 1.61067275332403e-05, "loss": 0.4942, "step": 21261 }, { "epoch": 0.5838001098297638, "grad_norm": 0.398088663816452, "learning_rate": 1.6106385518468895e-05, "loss": 0.5292, "step": 21262 }, { "epoch": 0.5838275672707304, "grad_norm": 0.37324175238609314, "learning_rate": 1.610604349230713e-05, "loss": 0.4398, "step": 21263 }, { "epoch": 0.5838550247116969, "grad_norm": 0.3872585892677307, "learning_rate": 1.6105701454755645e-05, "loss": 0.567, "step": 21264 }, { "epoch": 0.5838824821526634, "grad_norm": 0.37501952052116394, "learning_rate": 1.610535940581508e-05, "loss": 0.4863, "step": 21265 }, { "epoch": 0.5839099395936299, "grad_norm": 0.5118427276611328, "learning_rate": 1.6105017345486066e-05, "loss": 0.4685, "step": 21266 }, { "epoch": 0.5839373970345964, "grad_norm": 0.3743548095226288, "learning_rate": 1.610467527376925e-05, "loss": 0.4901, "step": 21267 }, { "epoch": 0.5839648544755629, "grad_norm": 0.34674790501594543, "learning_rate": 1.6104333190665264e-05, "loss": 0.5025, "step": 21268 }, { "epoch": 0.5839923119165293, "grad_norm": 0.43209540843963623, "learning_rate": 1.6103991096174752e-05, "loss": 0.5654, "step": 21269 }, { "epoch": 0.5840197693574959, "grad_norm": 0.4114006459712982, "learning_rate": 1.6103648990298342e-05, "loss": 0.4969, "step": 21270 }, { "epoch": 0.5840472267984624, "grad_norm": 0.3788813054561615, "learning_rate": 1.6103306873036684e-05, "loss": 0.4483, "step": 21271 }, { "epoch": 0.5840746842394289, "grad_norm": 0.42829933762550354, "learning_rate": 1.610296474439041e-05, "loss": 0.4866, "step": 21272 }, { "epoch": 0.5841021416803954, "grad_norm": 0.36395618319511414, "learning_rate": 1.610262260436016e-05, "loss": 0.478, "step": 21273 }, { "epoch": 0.5841295991213619, "grad_norm": 0.41468989849090576, "learning_rate": 1.6102280452946568e-05, "loss": 0.465, "step": 21274 }, { "epoch": 0.5841570565623284, "grad_norm": 0.37473928928375244, "learning_rate": 1.6101938290150275e-05, "loss": 0.4974, "step": 21275 }, { "epoch": 0.5841845140032949, "grad_norm": 0.4835295081138611, "learning_rate": 1.6101596115971923e-05, "loss": 0.5773, "step": 21276 }, { "epoch": 0.5842119714442614, "grad_norm": 0.40472400188446045, "learning_rate": 1.6101253930412142e-05, "loss": 0.5918, "step": 21277 }, { "epoch": 0.584239428885228, "grad_norm": 0.3882187604904175, "learning_rate": 1.6100911733471583e-05, "loss": 0.5431, "step": 21278 }, { "epoch": 0.5842668863261944, "grad_norm": 0.3852077126502991, "learning_rate": 1.610056952515087e-05, "loss": 0.5184, "step": 21279 }, { "epoch": 0.5842943437671609, "grad_norm": 0.3876115083694458, "learning_rate": 1.610022730545065e-05, "loss": 0.56, "step": 21280 }, { "epoch": 0.5843218012081274, "grad_norm": 0.538373589515686, "learning_rate": 1.609988507437156e-05, "loss": 0.48, "step": 21281 }, { "epoch": 0.5843492586490939, "grad_norm": 0.37171968817710876, "learning_rate": 1.6099542831914235e-05, "loss": 0.4844, "step": 21282 }, { "epoch": 0.5843767160900604, "grad_norm": 0.4479483962059021, "learning_rate": 1.6099200578079315e-05, "loss": 0.5048, "step": 21283 }, { "epoch": 0.5844041735310269, "grad_norm": 0.3600446879863739, "learning_rate": 1.6098858312867443e-05, "loss": 0.57, "step": 21284 }, { "epoch": 0.5844316309719935, "grad_norm": 0.361345499753952, "learning_rate": 1.6098516036279253e-05, "loss": 0.4852, "step": 21285 }, { "epoch": 0.5844590884129599, "grad_norm": 0.4295426607131958, "learning_rate": 1.609817374831538e-05, "loss": 0.5091, "step": 21286 }, { "epoch": 0.5844865458539265, "grad_norm": 0.4036419689655304, "learning_rate": 1.609783144897647e-05, "loss": 0.5548, "step": 21287 }, { "epoch": 0.5845140032948929, "grad_norm": 0.4391203820705414, "learning_rate": 1.609748913826316e-05, "loss": 0.5319, "step": 21288 }, { "epoch": 0.5845414607358594, "grad_norm": 0.3263184130191803, "learning_rate": 1.6097146816176084e-05, "loss": 0.4761, "step": 21289 }, { "epoch": 0.5845689181768259, "grad_norm": 0.38509148359298706, "learning_rate": 1.6096804482715883e-05, "loss": 0.5261, "step": 21290 }, { "epoch": 0.5845963756177924, "grad_norm": 0.40050920844078064, "learning_rate": 1.6096462137883198e-05, "loss": 0.5568, "step": 21291 }, { "epoch": 0.5846238330587589, "grad_norm": 0.3438301086425781, "learning_rate": 1.6096119781678662e-05, "loss": 0.5008, "step": 21292 }, { "epoch": 0.5846512904997254, "grad_norm": 0.4184826612472534, "learning_rate": 1.6095777414102922e-05, "loss": 0.5215, "step": 21293 }, { "epoch": 0.584678747940692, "grad_norm": 0.35368236899375916, "learning_rate": 1.6095435035156605e-05, "loss": 0.5029, "step": 21294 }, { "epoch": 0.5847062053816584, "grad_norm": 0.35445356369018555, "learning_rate": 1.609509264484036e-05, "loss": 0.4815, "step": 21295 }, { "epoch": 0.584733662822625, "grad_norm": 0.325330525636673, "learning_rate": 1.6094750243154825e-05, "loss": 0.4734, "step": 21296 }, { "epoch": 0.5847611202635914, "grad_norm": 0.41878825426101685, "learning_rate": 1.609440783010063e-05, "loss": 0.5775, "step": 21297 }, { "epoch": 0.5847885777045579, "grad_norm": 0.4062322974205017, "learning_rate": 1.609406540567842e-05, "loss": 0.5291, "step": 21298 }, { "epoch": 0.5848160351455244, "grad_norm": 0.4348547160625458, "learning_rate": 1.6093722969888834e-05, "loss": 0.5386, "step": 21299 }, { "epoch": 0.5848434925864909, "grad_norm": 0.3933655619621277, "learning_rate": 1.6093380522732515e-05, "loss": 0.4556, "step": 21300 }, { "epoch": 0.5848709500274575, "grad_norm": 0.3649437725543976, "learning_rate": 1.609303806421009e-05, "loss": 0.475, "step": 21301 }, { "epoch": 0.5848984074684239, "grad_norm": 0.4191082715988159, "learning_rate": 1.6092695594322205e-05, "loss": 0.5191, "step": 21302 }, { "epoch": 0.5849258649093905, "grad_norm": 0.31631165742874146, "learning_rate": 1.60923531130695e-05, "loss": 0.4017, "step": 21303 }, { "epoch": 0.5849533223503569, "grad_norm": 0.3924362361431122, "learning_rate": 1.6092010620452608e-05, "loss": 0.4752, "step": 21304 }, { "epoch": 0.5849807797913235, "grad_norm": 0.34938833117485046, "learning_rate": 1.6091668116472176e-05, "loss": 0.4758, "step": 21305 }, { "epoch": 0.5850082372322899, "grad_norm": 0.4159508943557739, "learning_rate": 1.609132560112884e-05, "loss": 0.6009, "step": 21306 }, { "epoch": 0.5850356946732564, "grad_norm": 0.38312390446662903, "learning_rate": 1.6090983074423234e-05, "loss": 0.5503, "step": 21307 }, { "epoch": 0.585063152114223, "grad_norm": 0.39639943838119507, "learning_rate": 1.6090640536356e-05, "loss": 0.518, "step": 21308 }, { "epoch": 0.5850906095551894, "grad_norm": 0.35618704557418823, "learning_rate": 1.6090297986927783e-05, "loss": 0.553, "step": 21309 }, { "epoch": 0.585118066996156, "grad_norm": 0.39169633388519287, "learning_rate": 1.6089955426139213e-05, "loss": 0.4761, "step": 21310 }, { "epoch": 0.5851455244371224, "grad_norm": 0.45335105061531067, "learning_rate": 1.6089612853990932e-05, "loss": 0.5036, "step": 21311 }, { "epoch": 0.585172981878089, "grad_norm": 0.376676470041275, "learning_rate": 1.608927027048358e-05, "loss": 0.5324, "step": 21312 }, { "epoch": 0.5852004393190554, "grad_norm": 0.370243102312088, "learning_rate": 1.6088927675617797e-05, "loss": 0.5073, "step": 21313 }, { "epoch": 0.585227896760022, "grad_norm": 0.3771556317806244, "learning_rate": 1.6088585069394217e-05, "loss": 0.557, "step": 21314 }, { "epoch": 0.5852553542009885, "grad_norm": 0.3795780539512634, "learning_rate": 1.6088242451813484e-05, "loss": 0.5261, "step": 21315 }, { "epoch": 0.585282811641955, "grad_norm": 0.4022909700870514, "learning_rate": 1.6087899822876235e-05, "loss": 0.6022, "step": 21316 }, { "epoch": 0.5853102690829215, "grad_norm": 0.40606197714805603, "learning_rate": 1.6087557182583113e-05, "loss": 0.545, "step": 21317 }, { "epoch": 0.5853377265238879, "grad_norm": 0.3656183183193207, "learning_rate": 1.6087214530934748e-05, "loss": 0.4132, "step": 21318 }, { "epoch": 0.5853651839648545, "grad_norm": 0.3661345839500427, "learning_rate": 1.6086871867931792e-05, "loss": 0.4112, "step": 21319 }, { "epoch": 0.5853926414058209, "grad_norm": 0.3930721580982208, "learning_rate": 1.6086529193574874e-05, "loss": 0.5798, "step": 21320 }, { "epoch": 0.5854200988467875, "grad_norm": 0.4215219020843506, "learning_rate": 1.6086186507864635e-05, "loss": 0.5581, "step": 21321 }, { "epoch": 0.585447556287754, "grad_norm": 0.39606142044067383, "learning_rate": 1.6085843810801718e-05, "loss": 0.4586, "step": 21322 }, { "epoch": 0.5854750137287205, "grad_norm": 0.3777654767036438, "learning_rate": 1.608550110238676e-05, "loss": 0.5102, "step": 21323 }, { "epoch": 0.585502471169687, "grad_norm": 0.4549945592880249, "learning_rate": 1.60851583826204e-05, "loss": 0.4922, "step": 21324 }, { "epoch": 0.5855299286106534, "grad_norm": 0.3838466703891754, "learning_rate": 1.6084815651503275e-05, "loss": 0.499, "step": 21325 }, { "epoch": 0.58555738605162, "grad_norm": 0.44715091586112976, "learning_rate": 1.6084472909036032e-05, "loss": 0.5304, "step": 21326 }, { "epoch": 0.5855848434925864, "grad_norm": 0.5546845197677612, "learning_rate": 1.6084130155219302e-05, "loss": 0.431, "step": 21327 }, { "epoch": 0.585612300933553, "grad_norm": 0.38248196244239807, "learning_rate": 1.6083787390053723e-05, "loss": 0.4349, "step": 21328 }, { "epoch": 0.5856397583745195, "grad_norm": 0.3472059965133667, "learning_rate": 1.6083444613539946e-05, "loss": 0.5129, "step": 21329 }, { "epoch": 0.585667215815486, "grad_norm": 0.4445887506008148, "learning_rate": 1.60831018256786e-05, "loss": 0.4564, "step": 21330 }, { "epoch": 0.5856946732564525, "grad_norm": 0.33410021662712097, "learning_rate": 1.6082759026470328e-05, "loss": 0.4825, "step": 21331 }, { "epoch": 0.585722130697419, "grad_norm": 0.39910364151000977, "learning_rate": 1.6082416215915767e-05, "loss": 0.5159, "step": 21332 }, { "epoch": 0.5857495881383855, "grad_norm": 0.36702319979667664, "learning_rate": 1.6082073394015562e-05, "loss": 0.4594, "step": 21333 }, { "epoch": 0.585777045579352, "grad_norm": 0.39802059531211853, "learning_rate": 1.6081730560770344e-05, "loss": 0.5146, "step": 21334 }, { "epoch": 0.5858045030203185, "grad_norm": 0.3603227138519287, "learning_rate": 1.608138771618076e-05, "loss": 0.4798, "step": 21335 }, { "epoch": 0.585831960461285, "grad_norm": 0.38644322752952576, "learning_rate": 1.608104486024745e-05, "loss": 0.5631, "step": 21336 }, { "epoch": 0.5858594179022515, "grad_norm": 0.40742355585098267, "learning_rate": 1.608070199297105e-05, "loss": 0.5315, "step": 21337 }, { "epoch": 0.585886875343218, "grad_norm": 0.4441278278827667, "learning_rate": 1.6080359114352196e-05, "loss": 0.5776, "step": 21338 }, { "epoch": 0.5859143327841845, "grad_norm": 0.3608386516571045, "learning_rate": 1.6080016224391536e-05, "loss": 0.4691, "step": 21339 }, { "epoch": 0.585941790225151, "grad_norm": 0.36862191557884216, "learning_rate": 1.60796733230897e-05, "loss": 0.5527, "step": 21340 }, { "epoch": 0.5859692476661175, "grad_norm": 0.4074125289916992, "learning_rate": 1.6079330410447337e-05, "loss": 0.4865, "step": 21341 }, { "epoch": 0.585996705107084, "grad_norm": 0.3703363239765167, "learning_rate": 1.607898748646508e-05, "loss": 0.471, "step": 21342 }, { "epoch": 0.5860241625480506, "grad_norm": 0.3643762171268463, "learning_rate": 1.6078644551143572e-05, "loss": 0.4486, "step": 21343 }, { "epoch": 0.586051619989017, "grad_norm": 0.40503379702568054, "learning_rate": 1.6078301604483454e-05, "loss": 0.5005, "step": 21344 }, { "epoch": 0.5860790774299836, "grad_norm": 0.40422913432121277, "learning_rate": 1.6077958646485364e-05, "loss": 0.4806, "step": 21345 }, { "epoch": 0.58610653487095, "grad_norm": 0.4324236512184143, "learning_rate": 1.6077615677149937e-05, "loss": 0.4919, "step": 21346 }, { "epoch": 0.5861339923119165, "grad_norm": 0.3854408860206604, "learning_rate": 1.607727269647782e-05, "loss": 0.5303, "step": 21347 }, { "epoch": 0.586161449752883, "grad_norm": 0.40843522548675537, "learning_rate": 1.607692970446965e-05, "loss": 0.4756, "step": 21348 }, { "epoch": 0.5861889071938495, "grad_norm": 0.39039647579193115, "learning_rate": 1.6076586701126063e-05, "loss": 0.5168, "step": 21349 }, { "epoch": 0.5862163646348161, "grad_norm": 0.4569324254989624, "learning_rate": 1.6076243686447702e-05, "loss": 0.4485, "step": 21350 }, { "epoch": 0.5862438220757825, "grad_norm": 0.3567771911621094, "learning_rate": 1.607590066043521e-05, "loss": 0.5208, "step": 21351 }, { "epoch": 0.5862712795167491, "grad_norm": 0.4875302016735077, "learning_rate": 1.6075557623089228e-05, "loss": 0.514, "step": 21352 }, { "epoch": 0.5862987369577155, "grad_norm": 0.47146013379096985, "learning_rate": 1.607521457441039e-05, "loss": 0.6466, "step": 21353 }, { "epoch": 0.586326194398682, "grad_norm": 0.3471103310585022, "learning_rate": 1.6074871514399333e-05, "loss": 0.4798, "step": 21354 }, { "epoch": 0.5863536518396485, "grad_norm": 0.5603978037834167, "learning_rate": 1.6074528443056704e-05, "loss": 0.5085, "step": 21355 }, { "epoch": 0.586381109280615, "grad_norm": 0.39028629660606384, "learning_rate": 1.6074185360383142e-05, "loss": 0.5286, "step": 21356 }, { "epoch": 0.5864085667215816, "grad_norm": 0.3981369733810425, "learning_rate": 1.6073842266379285e-05, "loss": 0.5004, "step": 21357 }, { "epoch": 0.586436024162548, "grad_norm": 0.34253403544425964, "learning_rate": 1.6073499161045773e-05, "loss": 0.4067, "step": 21358 }, { "epoch": 0.5864634816035146, "grad_norm": 0.40743759274482727, "learning_rate": 1.6073156044383246e-05, "loss": 0.5668, "step": 21359 }, { "epoch": 0.586490939044481, "grad_norm": 0.4002404510974884, "learning_rate": 1.6072812916392348e-05, "loss": 0.5749, "step": 21360 }, { "epoch": 0.5865183964854476, "grad_norm": 0.3362743556499481, "learning_rate": 1.6072469777073712e-05, "loss": 0.4684, "step": 21361 }, { "epoch": 0.586545853926414, "grad_norm": 0.4204544723033905, "learning_rate": 1.6072126626427983e-05, "loss": 0.5275, "step": 21362 }, { "epoch": 0.5865733113673806, "grad_norm": 0.4053398072719574, "learning_rate": 1.60717834644558e-05, "loss": 0.5934, "step": 21363 }, { "epoch": 0.5866007688083471, "grad_norm": 0.38261502981185913, "learning_rate": 1.6071440291157804e-05, "loss": 0.5311, "step": 21364 }, { "epoch": 0.5866282262493135, "grad_norm": 0.39222586154937744, "learning_rate": 1.607109710653463e-05, "loss": 0.4581, "step": 21365 }, { "epoch": 0.5866556836902801, "grad_norm": 0.5829600691795349, "learning_rate": 1.6070753910586927e-05, "loss": 0.5955, "step": 21366 }, { "epoch": 0.5866831411312465, "grad_norm": 0.3544497787952423, "learning_rate": 1.6070410703315327e-05, "loss": 0.4842, "step": 21367 }, { "epoch": 0.5867105985722131, "grad_norm": 0.5294165015220642, "learning_rate": 1.607006748472048e-05, "loss": 0.4461, "step": 21368 }, { "epoch": 0.5867380560131795, "grad_norm": 0.37721970677375793, "learning_rate": 1.6069724254803013e-05, "loss": 0.5206, "step": 21369 }, { "epoch": 0.5867655134541461, "grad_norm": 0.36376938223838806, "learning_rate": 1.6069381013563576e-05, "loss": 0.4579, "step": 21370 }, { "epoch": 0.5867929708951126, "grad_norm": 0.7062408328056335, "learning_rate": 1.6069037761002805e-05, "loss": 0.4722, "step": 21371 }, { "epoch": 0.5868204283360791, "grad_norm": 0.5192480087280273, "learning_rate": 1.606869449712134e-05, "loss": 0.5899, "step": 21372 }, { "epoch": 0.5868478857770456, "grad_norm": 0.37329164147377014, "learning_rate": 1.6068351221919826e-05, "loss": 0.4068, "step": 21373 }, { "epoch": 0.586875343218012, "grad_norm": 0.34479978680610657, "learning_rate": 1.6068007935398902e-05, "loss": 0.545, "step": 21374 }, { "epoch": 0.5869028006589786, "grad_norm": 1.1792371273040771, "learning_rate": 1.6067664637559203e-05, "loss": 0.5243, "step": 21375 }, { "epoch": 0.586930258099945, "grad_norm": 0.37657466530799866, "learning_rate": 1.6067321328401374e-05, "loss": 0.4415, "step": 21376 }, { "epoch": 0.5869577155409116, "grad_norm": 0.3800581991672516, "learning_rate": 1.6066978007926054e-05, "loss": 0.458, "step": 21377 }, { "epoch": 0.5869851729818781, "grad_norm": 0.4102453887462616, "learning_rate": 1.606663467613388e-05, "loss": 0.5438, "step": 21378 }, { "epoch": 0.5870126304228446, "grad_norm": 0.3837204873561859, "learning_rate": 1.6066291333025502e-05, "loss": 0.6015, "step": 21379 }, { "epoch": 0.5870400878638111, "grad_norm": 0.4148036539554596, "learning_rate": 1.6065947978601552e-05, "loss": 0.5585, "step": 21380 }, { "epoch": 0.5870675453047776, "grad_norm": 0.40851548314094543, "learning_rate": 1.6065604612862674e-05, "loss": 0.4727, "step": 21381 }, { "epoch": 0.5870950027457441, "grad_norm": 0.33512207865715027, "learning_rate": 1.6065261235809507e-05, "loss": 0.5369, "step": 21382 }, { "epoch": 0.5871224601867105, "grad_norm": 0.40586692094802856, "learning_rate": 1.606491784744269e-05, "loss": 0.4835, "step": 21383 }, { "epoch": 0.5871499176276771, "grad_norm": 0.48856794834136963, "learning_rate": 1.6064574447762867e-05, "loss": 0.5117, "step": 21384 }, { "epoch": 0.5871773750686436, "grad_norm": 0.3666112422943115, "learning_rate": 1.6064231036770675e-05, "loss": 0.467, "step": 21385 }, { "epoch": 0.5872048325096101, "grad_norm": 0.46318596601486206, "learning_rate": 1.606388761446676e-05, "loss": 0.578, "step": 21386 }, { "epoch": 0.5872322899505766, "grad_norm": 0.3672381639480591, "learning_rate": 1.606354418085176e-05, "loss": 0.515, "step": 21387 }, { "epoch": 0.5872597473915431, "grad_norm": 0.4746719300746918, "learning_rate": 1.6063200735926313e-05, "loss": 0.5309, "step": 21388 }, { "epoch": 0.5872872048325096, "grad_norm": 0.4605122208595276, "learning_rate": 1.6062857279691063e-05, "loss": 0.5403, "step": 21389 }, { "epoch": 0.5873146622734761, "grad_norm": 0.3861536681652069, "learning_rate": 1.606251381214664e-05, "loss": 0.5323, "step": 21390 }, { "epoch": 0.5873421197144426, "grad_norm": 0.40896034240722656, "learning_rate": 1.6062170333293704e-05, "loss": 0.5122, "step": 21391 }, { "epoch": 0.5873695771554092, "grad_norm": 0.4081052839756012, "learning_rate": 1.6061826843132883e-05, "loss": 0.557, "step": 21392 }, { "epoch": 0.5873970345963756, "grad_norm": 0.38191959261894226, "learning_rate": 1.6061483341664823e-05, "loss": 0.4803, "step": 21393 }, { "epoch": 0.5874244920373421, "grad_norm": 0.38186895847320557, "learning_rate": 1.6061139828890157e-05, "loss": 0.4332, "step": 21394 }, { "epoch": 0.5874519494783086, "grad_norm": 0.34676143527030945, "learning_rate": 1.6060796304809532e-05, "loss": 0.4918, "step": 21395 }, { "epoch": 0.5874794069192751, "grad_norm": 0.39021217823028564, "learning_rate": 1.606045276942359e-05, "loss": 0.489, "step": 21396 }, { "epoch": 0.5875068643602416, "grad_norm": 0.408552348613739, "learning_rate": 1.6060109222732965e-05, "loss": 0.5122, "step": 21397 }, { "epoch": 0.5875343218012081, "grad_norm": 0.40925681591033936, "learning_rate": 1.6059765664738307e-05, "loss": 0.5363, "step": 21398 }, { "epoch": 0.5875617792421747, "grad_norm": 0.39733314514160156, "learning_rate": 1.605942209544025e-05, "loss": 0.4539, "step": 21399 }, { "epoch": 0.5875892366831411, "grad_norm": 0.4093766510486603, "learning_rate": 1.6059078514839436e-05, "loss": 0.6354, "step": 21400 }, { "epoch": 0.5876166941241077, "grad_norm": 0.3743625581264496, "learning_rate": 1.6058734922936507e-05, "loss": 0.4935, "step": 21401 }, { "epoch": 0.5876441515650741, "grad_norm": 0.39678749442100525, "learning_rate": 1.6058391319732106e-05, "loss": 0.5007, "step": 21402 }, { "epoch": 0.5876716090060407, "grad_norm": 0.46316733956336975, "learning_rate": 1.605804770522687e-05, "loss": 0.5606, "step": 21403 }, { "epoch": 0.5876990664470071, "grad_norm": 0.376273512840271, "learning_rate": 1.605770407942144e-05, "loss": 0.5397, "step": 21404 }, { "epoch": 0.5877265238879736, "grad_norm": 0.35880690813064575, "learning_rate": 1.605736044231646e-05, "loss": 0.4543, "step": 21405 }, { "epoch": 0.5877539813289402, "grad_norm": 0.39584028720855713, "learning_rate": 1.6057016793912566e-05, "loss": 0.5198, "step": 21406 }, { "epoch": 0.5877814387699066, "grad_norm": 0.43890342116355896, "learning_rate": 1.6056673134210406e-05, "loss": 0.5413, "step": 21407 }, { "epoch": 0.5878088962108732, "grad_norm": 0.37428393959999084, "learning_rate": 1.6056329463210616e-05, "loss": 0.5115, "step": 21408 }, { "epoch": 0.5878363536518396, "grad_norm": 0.5834339261054993, "learning_rate": 1.6055985780913842e-05, "loss": 0.5072, "step": 21409 }, { "epoch": 0.5878638110928062, "grad_norm": 0.3962455093860626, "learning_rate": 1.605564208732072e-05, "loss": 0.4622, "step": 21410 }, { "epoch": 0.5878912685337726, "grad_norm": 0.4025016725063324, "learning_rate": 1.605529838243189e-05, "loss": 0.4574, "step": 21411 }, { "epoch": 0.5879187259747392, "grad_norm": 0.4272722899913788, "learning_rate": 1.6054954666247997e-05, "loss": 0.5129, "step": 21412 }, { "epoch": 0.5879461834157057, "grad_norm": 0.5183151960372925, "learning_rate": 1.6054610938769683e-05, "loss": 0.541, "step": 21413 }, { "epoch": 0.5879736408566721, "grad_norm": 0.49211400747299194, "learning_rate": 1.6054267199997587e-05, "loss": 0.4547, "step": 21414 }, { "epoch": 0.5880010982976387, "grad_norm": 0.3334355056285858, "learning_rate": 1.6053923449932347e-05, "loss": 0.474, "step": 21415 }, { "epoch": 0.5880285557386051, "grad_norm": 0.3532576858997345, "learning_rate": 1.6053579688574612e-05, "loss": 0.5558, "step": 21416 }, { "epoch": 0.5880560131795717, "grad_norm": 0.3742949962615967, "learning_rate": 1.6053235915925017e-05, "loss": 0.547, "step": 21417 }, { "epoch": 0.5880834706205381, "grad_norm": 0.45399484038352966, "learning_rate": 1.6052892131984204e-05, "loss": 0.4703, "step": 21418 }, { "epoch": 0.5881109280615047, "grad_norm": 0.35127493739128113, "learning_rate": 1.605254833675282e-05, "loss": 0.4683, "step": 21419 }, { "epoch": 0.5881383855024712, "grad_norm": 0.4042772650718689, "learning_rate": 1.6052204530231494e-05, "loss": 0.5646, "step": 21420 }, { "epoch": 0.5881658429434377, "grad_norm": 0.38157814741134644, "learning_rate": 1.605186071242088e-05, "loss": 0.4935, "step": 21421 }, { "epoch": 0.5881933003844042, "grad_norm": 0.39841532707214355, "learning_rate": 1.6051516883321615e-05, "loss": 0.4443, "step": 21422 }, { "epoch": 0.5882207578253706, "grad_norm": 0.4648243486881256, "learning_rate": 1.6051173042934334e-05, "loss": 0.3841, "step": 21423 }, { "epoch": 0.5882482152663372, "grad_norm": 0.37332043051719666, "learning_rate": 1.6050829191259685e-05, "loss": 0.486, "step": 21424 }, { "epoch": 0.5882756727073036, "grad_norm": 0.6334229111671448, "learning_rate": 1.6050485328298316e-05, "loss": 0.4628, "step": 21425 }, { "epoch": 0.5883031301482702, "grad_norm": 0.35021087527275085, "learning_rate": 1.6050141454050852e-05, "loss": 0.4499, "step": 21426 }, { "epoch": 0.5883305875892367, "grad_norm": 0.45326828956604004, "learning_rate": 1.604979756851795e-05, "loss": 0.6065, "step": 21427 }, { "epoch": 0.5883580450302032, "grad_norm": 0.389594703912735, "learning_rate": 1.604945367170024e-05, "loss": 0.5957, "step": 21428 }, { "epoch": 0.5883855024711697, "grad_norm": 0.45376911759376526, "learning_rate": 1.604910976359837e-05, "loss": 0.4788, "step": 21429 }, { "epoch": 0.5884129599121362, "grad_norm": 0.37075939774513245, "learning_rate": 1.604876584421298e-05, "loss": 0.5301, "step": 21430 }, { "epoch": 0.5884404173531027, "grad_norm": 0.3375810384750366, "learning_rate": 1.604842191354471e-05, "loss": 0.5108, "step": 21431 }, { "epoch": 0.5884678747940691, "grad_norm": 0.4213574230670929, "learning_rate": 1.6048077971594204e-05, "loss": 0.5315, "step": 21432 }, { "epoch": 0.5884953322350357, "grad_norm": 0.3640591502189636, "learning_rate": 1.60477340183621e-05, "loss": 0.468, "step": 21433 }, { "epoch": 0.5885227896760022, "grad_norm": 0.3819221258163452, "learning_rate": 1.6047390053849043e-05, "loss": 0.512, "step": 21434 }, { "epoch": 0.5885502471169687, "grad_norm": 0.39641863107681274, "learning_rate": 1.6047046078055675e-05, "loss": 0.5614, "step": 21435 }, { "epoch": 0.5885777045579352, "grad_norm": 0.36107152700424194, "learning_rate": 1.6046702090982633e-05, "loss": 0.47, "step": 21436 }, { "epoch": 0.5886051619989017, "grad_norm": 0.38981881737709045, "learning_rate": 1.6046358092630565e-05, "loss": 0.4093, "step": 21437 }, { "epoch": 0.5886326194398682, "grad_norm": 0.33813029527664185, "learning_rate": 1.604601408300011e-05, "loss": 0.4396, "step": 21438 }, { "epoch": 0.5886600768808347, "grad_norm": 0.387286901473999, "learning_rate": 1.6045670062091905e-05, "loss": 0.4788, "step": 21439 }, { "epoch": 0.5886875343218012, "grad_norm": 0.36994507908821106, "learning_rate": 1.6045326029906597e-05, "loss": 0.4955, "step": 21440 }, { "epoch": 0.5887149917627678, "grad_norm": 0.3722115159034729, "learning_rate": 1.6044981986444827e-05, "loss": 0.5386, "step": 21441 }, { "epoch": 0.5887424492037342, "grad_norm": 0.38520726561546326, "learning_rate": 1.604463793170724e-05, "loss": 0.4647, "step": 21442 }, { "epoch": 0.5887699066447007, "grad_norm": 0.40866199135780334, "learning_rate": 1.6044293865694468e-05, "loss": 0.4968, "step": 21443 }, { "epoch": 0.5887973640856672, "grad_norm": 0.4030528962612152, "learning_rate": 1.6043949788407162e-05, "loss": 0.5098, "step": 21444 }, { "epoch": 0.5888248215266337, "grad_norm": 0.4165848195552826, "learning_rate": 1.604360569984596e-05, "loss": 0.5729, "step": 21445 }, { "epoch": 0.5888522789676002, "grad_norm": 0.4657222330570221, "learning_rate": 1.60432616000115e-05, "loss": 0.6059, "step": 21446 }, { "epoch": 0.5888797364085667, "grad_norm": 0.3711622357368469, "learning_rate": 1.6042917488904437e-05, "loss": 0.4855, "step": 21447 }, { "epoch": 0.5889071938495333, "grad_norm": 0.42531174421310425, "learning_rate": 1.6042573366525397e-05, "loss": 0.5504, "step": 21448 }, { "epoch": 0.5889346512904997, "grad_norm": 0.3611631691455841, "learning_rate": 1.6042229232875035e-05, "loss": 0.4888, "step": 21449 }, { "epoch": 0.5889621087314663, "grad_norm": 0.46158668398857117, "learning_rate": 1.6041885087953986e-05, "loss": 0.5834, "step": 21450 }, { "epoch": 0.5889895661724327, "grad_norm": 0.4538050889968872, "learning_rate": 1.6041540931762888e-05, "loss": 0.5136, "step": 21451 }, { "epoch": 0.5890170236133992, "grad_norm": 0.36424383521080017, "learning_rate": 1.6041196764302393e-05, "loss": 0.4497, "step": 21452 }, { "epoch": 0.5890444810543657, "grad_norm": 0.3888211250305176, "learning_rate": 1.6040852585573134e-05, "loss": 0.6155, "step": 21453 }, { "epoch": 0.5890719384953322, "grad_norm": 0.40302661061286926, "learning_rate": 1.604050839557576e-05, "loss": 0.501, "step": 21454 }, { "epoch": 0.5890993959362988, "grad_norm": 0.47300952672958374, "learning_rate": 1.604016419431091e-05, "loss": 0.5313, "step": 21455 }, { "epoch": 0.5891268533772652, "grad_norm": 0.3970646560192108, "learning_rate": 1.6039819981779223e-05, "loss": 0.488, "step": 21456 }, { "epoch": 0.5891543108182318, "grad_norm": 0.4187946617603302, "learning_rate": 1.6039475757981347e-05, "loss": 0.5043, "step": 21457 }, { "epoch": 0.5891817682591982, "grad_norm": 0.3966483175754547, "learning_rate": 1.603913152291792e-05, "loss": 0.4781, "step": 21458 }, { "epoch": 0.5892092257001648, "grad_norm": 0.35040342807769775, "learning_rate": 1.6038787276589584e-05, "loss": 0.4795, "step": 21459 }, { "epoch": 0.5892366831411312, "grad_norm": 0.4027577042579651, "learning_rate": 1.6038443018996984e-05, "loss": 0.5046, "step": 21460 }, { "epoch": 0.5892641405820978, "grad_norm": 0.3712054491043091, "learning_rate": 1.603809875014076e-05, "loss": 0.5265, "step": 21461 }, { "epoch": 0.5892915980230643, "grad_norm": 0.35834985971450806, "learning_rate": 1.6037754470021556e-05, "loss": 0.5276, "step": 21462 }, { "epoch": 0.5893190554640307, "grad_norm": 0.3905424475669861, "learning_rate": 1.6037410178640008e-05, "loss": 0.4388, "step": 21463 }, { "epoch": 0.5893465129049973, "grad_norm": 0.4908965229988098, "learning_rate": 1.6037065875996768e-05, "loss": 0.5376, "step": 21464 }, { "epoch": 0.5893739703459637, "grad_norm": 0.364498496055603, "learning_rate": 1.6036721562092476e-05, "loss": 0.5324, "step": 21465 }, { "epoch": 0.5894014277869303, "grad_norm": 0.3806611895561218, "learning_rate": 1.6036377236927765e-05, "loss": 0.4883, "step": 21466 }, { "epoch": 0.5894288852278967, "grad_norm": 0.36859938502311707, "learning_rate": 1.6036032900503286e-05, "loss": 0.4271, "step": 21467 }, { "epoch": 0.5894563426688633, "grad_norm": 0.4003593325614929, "learning_rate": 1.6035688552819682e-05, "loss": 0.5073, "step": 21468 }, { "epoch": 0.5894838001098298, "grad_norm": 0.38554590940475464, "learning_rate": 1.603534419387759e-05, "loss": 0.5796, "step": 21469 }, { "epoch": 0.5895112575507963, "grad_norm": 0.4106822609901428, "learning_rate": 1.6034999823677652e-05, "loss": 0.5042, "step": 21470 }, { "epoch": 0.5895387149917628, "grad_norm": 0.36980104446411133, "learning_rate": 1.603465544222052e-05, "loss": 0.4941, "step": 21471 }, { "epoch": 0.5895661724327292, "grad_norm": 0.4236767292022705, "learning_rate": 1.6034311049506823e-05, "loss": 0.5856, "step": 21472 }, { "epoch": 0.5895936298736958, "grad_norm": 0.36444568634033203, "learning_rate": 1.6033966645537212e-05, "loss": 0.481, "step": 21473 }, { "epoch": 0.5896210873146622, "grad_norm": 0.38661208748817444, "learning_rate": 1.603362223031233e-05, "loss": 0.5416, "step": 21474 }, { "epoch": 0.5896485447556288, "grad_norm": 0.368897020816803, "learning_rate": 1.6033277803832815e-05, "loss": 0.5092, "step": 21475 }, { "epoch": 0.5896760021965953, "grad_norm": 0.34310945868492126, "learning_rate": 1.603293336609931e-05, "loss": 0.4264, "step": 21476 }, { "epoch": 0.5897034596375618, "grad_norm": 0.35825175046920776, "learning_rate": 1.6032588917112462e-05, "loss": 0.5813, "step": 21477 }, { "epoch": 0.5897309170785283, "grad_norm": 0.41216251254081726, "learning_rate": 1.6032244456872908e-05, "loss": 0.4841, "step": 21478 }, { "epoch": 0.5897583745194948, "grad_norm": 0.3895282447338104, "learning_rate": 1.603189998538129e-05, "loss": 0.543, "step": 21479 }, { "epoch": 0.5897858319604613, "grad_norm": 0.32734933495521545, "learning_rate": 1.603155550263826e-05, "loss": 0.4438, "step": 21480 }, { "epoch": 0.5898132894014277, "grad_norm": 0.4271828830242157, "learning_rate": 1.603121100864445e-05, "loss": 0.5088, "step": 21481 }, { "epoch": 0.5898407468423943, "grad_norm": 0.47016677260398865, "learning_rate": 1.6030866503400506e-05, "loss": 0.5697, "step": 21482 }, { "epoch": 0.5898682042833608, "grad_norm": 0.43538898229599, "learning_rate": 1.603052198690707e-05, "loss": 0.5391, "step": 21483 }, { "epoch": 0.5898956617243273, "grad_norm": 0.3630640506744385, "learning_rate": 1.6030177459164792e-05, "loss": 0.4457, "step": 21484 }, { "epoch": 0.5899231191652938, "grad_norm": 0.35015639662742615, "learning_rate": 1.6029832920174304e-05, "loss": 0.4808, "step": 21485 }, { "epoch": 0.5899505766062603, "grad_norm": 0.43256109952926636, "learning_rate": 1.6029488369936253e-05, "loss": 0.5233, "step": 21486 }, { "epoch": 0.5899780340472268, "grad_norm": 0.4134038984775543, "learning_rate": 1.602914380845128e-05, "loss": 0.4962, "step": 21487 }, { "epoch": 0.5900054914881933, "grad_norm": 0.35060372948646545, "learning_rate": 1.6028799235720035e-05, "loss": 0.5578, "step": 21488 }, { "epoch": 0.5900329489291598, "grad_norm": 0.3724535405635834, "learning_rate": 1.602845465174315e-05, "loss": 0.4805, "step": 21489 }, { "epoch": 0.5900604063701264, "grad_norm": 0.4152035415172577, "learning_rate": 1.6028110056521276e-05, "loss": 0.6234, "step": 21490 }, { "epoch": 0.5900878638110928, "grad_norm": 0.3733558654785156, "learning_rate": 1.6027765450055053e-05, "loss": 0.5526, "step": 21491 }, { "epoch": 0.5901153212520593, "grad_norm": 0.3646096885204315, "learning_rate": 1.602742083234512e-05, "loss": 0.4685, "step": 21492 }, { "epoch": 0.5901427786930258, "grad_norm": 0.35129112005233765, "learning_rate": 1.602707620339213e-05, "loss": 0.4139, "step": 21493 }, { "epoch": 0.5901702361339923, "grad_norm": 0.4203624427318573, "learning_rate": 1.6026731563196715e-05, "loss": 0.4642, "step": 21494 }, { "epoch": 0.5901976935749588, "grad_norm": 0.40360310673713684, "learning_rate": 1.602638691175952e-05, "loss": 0.4943, "step": 21495 }, { "epoch": 0.5902251510159253, "grad_norm": 0.44878822565078735, "learning_rate": 1.6026042249081192e-05, "loss": 0.5329, "step": 21496 }, { "epoch": 0.5902526084568919, "grad_norm": 0.6131880283355713, "learning_rate": 1.6025697575162374e-05, "loss": 0.4516, "step": 21497 }, { "epoch": 0.5902800658978583, "grad_norm": 0.3740101158618927, "learning_rate": 1.6025352890003704e-05, "loss": 0.4986, "step": 21498 }, { "epoch": 0.5903075233388249, "grad_norm": 0.3936583697795868, "learning_rate": 1.6025008193605828e-05, "loss": 0.5968, "step": 21499 }, { "epoch": 0.5903349807797913, "grad_norm": 0.3740116059780121, "learning_rate": 1.6024663485969393e-05, "loss": 0.4489, "step": 21500 }, { "epoch": 0.5903624382207578, "grad_norm": 0.3718230724334717, "learning_rate": 1.602431876709503e-05, "loss": 0.4685, "step": 21501 }, { "epoch": 0.5903898956617243, "grad_norm": 0.35741108655929565, "learning_rate": 1.6023974036983396e-05, "loss": 0.482, "step": 21502 }, { "epoch": 0.5904173531026908, "grad_norm": 0.36427491903305054, "learning_rate": 1.6023629295635125e-05, "loss": 0.5079, "step": 21503 }, { "epoch": 0.5904448105436574, "grad_norm": 0.38975247740745544, "learning_rate": 1.6023284543050863e-05, "loss": 0.5496, "step": 21504 }, { "epoch": 0.5904722679846238, "grad_norm": 0.36991143226623535, "learning_rate": 1.6022939779231255e-05, "loss": 0.478, "step": 21505 }, { "epoch": 0.5904997254255904, "grad_norm": 0.3664740324020386, "learning_rate": 1.602259500417694e-05, "loss": 0.5261, "step": 21506 }, { "epoch": 0.5905271828665568, "grad_norm": 0.35924646258354187, "learning_rate": 1.6022250217888564e-05, "loss": 0.4601, "step": 21507 }, { "epoch": 0.5905546403075234, "grad_norm": 0.39521804451942444, "learning_rate": 1.6021905420366768e-05, "loss": 0.5764, "step": 21508 }, { "epoch": 0.5905820977484898, "grad_norm": 0.33987957239151, "learning_rate": 1.6021560611612198e-05, "loss": 0.4233, "step": 21509 }, { "epoch": 0.5906095551894563, "grad_norm": 0.36575350165367126, "learning_rate": 1.6021215791625496e-05, "loss": 0.5012, "step": 21510 }, { "epoch": 0.5906370126304229, "grad_norm": 0.41507822275161743, "learning_rate": 1.6020870960407303e-05, "loss": 0.5692, "step": 21511 }, { "epoch": 0.5906644700713893, "grad_norm": 0.3615175485610962, "learning_rate": 1.6020526117958267e-05, "loss": 0.467, "step": 21512 }, { "epoch": 0.5906919275123559, "grad_norm": 0.4112790822982788, "learning_rate": 1.6020181264279024e-05, "loss": 0.5699, "step": 21513 }, { "epoch": 0.5907193849533223, "grad_norm": 0.361366868019104, "learning_rate": 1.6019836399370224e-05, "loss": 0.5469, "step": 21514 }, { "epoch": 0.5907468423942889, "grad_norm": 0.3912656009197235, "learning_rate": 1.601949152323251e-05, "loss": 0.6503, "step": 21515 }, { "epoch": 0.5907742998352553, "grad_norm": 0.3618023693561554, "learning_rate": 1.601914663586652e-05, "loss": 0.5005, "step": 21516 }, { "epoch": 0.5908017572762219, "grad_norm": 0.3914335370063782, "learning_rate": 1.60188017372729e-05, "loss": 0.5403, "step": 21517 }, { "epoch": 0.5908292147171884, "grad_norm": 0.3110455870628357, "learning_rate": 1.6018456827452292e-05, "loss": 0.4398, "step": 21518 }, { "epoch": 0.5908566721581548, "grad_norm": 0.4432625472545624, "learning_rate": 1.6018111906405344e-05, "loss": 0.449, "step": 21519 }, { "epoch": 0.5908841295991214, "grad_norm": 0.3686738610267639, "learning_rate": 1.60177669741327e-05, "loss": 0.4834, "step": 21520 }, { "epoch": 0.5909115870400878, "grad_norm": 0.4385509192943573, "learning_rate": 1.6017422030634993e-05, "loss": 0.4474, "step": 21521 }, { "epoch": 0.5909390444810544, "grad_norm": 0.42595386505126953, "learning_rate": 1.6017077075912878e-05, "loss": 0.5339, "step": 21522 }, { "epoch": 0.5909665019220208, "grad_norm": 0.37508144974708557, "learning_rate": 1.6016732109966994e-05, "loss": 0.5021, "step": 21523 }, { "epoch": 0.5909939593629874, "grad_norm": 0.38849055767059326, "learning_rate": 1.601638713279798e-05, "loss": 0.5387, "step": 21524 }, { "epoch": 0.5910214168039539, "grad_norm": 0.38650548458099365, "learning_rate": 1.601604214440649e-05, "loss": 0.5552, "step": 21525 }, { "epoch": 0.5910488742449204, "grad_norm": 0.3708053529262543, "learning_rate": 1.601569714479316e-05, "loss": 0.5473, "step": 21526 }, { "epoch": 0.5910763316858869, "grad_norm": 0.36492595076560974, "learning_rate": 1.601535213395863e-05, "loss": 0.4627, "step": 21527 }, { "epoch": 0.5911037891268534, "grad_norm": 0.5813947916030884, "learning_rate": 1.601500711190355e-05, "loss": 0.4946, "step": 21528 }, { "epoch": 0.5911312465678199, "grad_norm": 0.3584710657596588, "learning_rate": 1.601466207862856e-05, "loss": 0.4826, "step": 21529 }, { "epoch": 0.5911587040087863, "grad_norm": 0.38181042671203613, "learning_rate": 1.6014317034134313e-05, "loss": 0.483, "step": 21530 }, { "epoch": 0.5911861614497529, "grad_norm": 0.3726422190666199, "learning_rate": 1.6013971978421442e-05, "loss": 0.5159, "step": 21531 }, { "epoch": 0.5912136188907194, "grad_norm": 0.5162054300308228, "learning_rate": 1.601362691149059e-05, "loss": 0.4941, "step": 21532 }, { "epoch": 0.5912410763316859, "grad_norm": 0.3670933246612549, "learning_rate": 1.6013281833342407e-05, "loss": 0.4864, "step": 21533 }, { "epoch": 0.5912685337726524, "grad_norm": 5.97435998916626, "learning_rate": 1.6012936743977534e-05, "loss": 0.5026, "step": 21534 }, { "epoch": 0.5912959912136189, "grad_norm": 0.5258584022521973, "learning_rate": 1.6012591643396616e-05, "loss": 0.5275, "step": 21535 }, { "epoch": 0.5913234486545854, "grad_norm": 0.45063483715057373, "learning_rate": 1.6012246531600295e-05, "loss": 0.5448, "step": 21536 }, { "epoch": 0.5913509060955519, "grad_norm": 0.3921394646167755, "learning_rate": 1.6011901408589213e-05, "loss": 0.4255, "step": 21537 }, { "epoch": 0.5913783635365184, "grad_norm": 0.4393773674964905, "learning_rate": 1.601155627436402e-05, "loss": 0.491, "step": 21538 }, { "epoch": 0.591405820977485, "grad_norm": 0.3587026000022888, "learning_rate": 1.6011211128925354e-05, "loss": 0.5282, "step": 21539 }, { "epoch": 0.5914332784184514, "grad_norm": 0.3885626792907715, "learning_rate": 1.6010865972273864e-05, "loss": 0.5379, "step": 21540 }, { "epoch": 0.5914607358594179, "grad_norm": 0.39426350593566895, "learning_rate": 1.6010520804410185e-05, "loss": 0.5796, "step": 21541 }, { "epoch": 0.5914881933003844, "grad_norm": 0.3860085904598236, "learning_rate": 1.6010175625334968e-05, "loss": 0.44, "step": 21542 }, { "epoch": 0.5915156507413509, "grad_norm": 0.39274683594703674, "learning_rate": 1.600983043504886e-05, "loss": 0.5223, "step": 21543 }, { "epoch": 0.5915431081823174, "grad_norm": 0.3777177929878235, "learning_rate": 1.600948523355249e-05, "loss": 0.4979, "step": 21544 }, { "epoch": 0.5915705656232839, "grad_norm": 0.38170477747917175, "learning_rate": 1.600914002084652e-05, "loss": 0.5361, "step": 21545 }, { "epoch": 0.5915980230642505, "grad_norm": 0.35656559467315674, "learning_rate": 1.6008794796931587e-05, "loss": 0.4649, "step": 21546 }, { "epoch": 0.5916254805052169, "grad_norm": 0.335309237241745, "learning_rate": 1.600844956180833e-05, "loss": 0.4204, "step": 21547 }, { "epoch": 0.5916529379461835, "grad_norm": 0.35161134600639343, "learning_rate": 1.60081043154774e-05, "loss": 0.5201, "step": 21548 }, { "epoch": 0.5916803953871499, "grad_norm": 0.37710678577423096, "learning_rate": 1.6007759057939433e-05, "loss": 0.5502, "step": 21549 }, { "epoch": 0.5917078528281164, "grad_norm": 0.46812087297439575, "learning_rate": 1.6007413789195082e-05, "loss": 0.5133, "step": 21550 }, { "epoch": 0.5917353102690829, "grad_norm": 0.3610036075115204, "learning_rate": 1.6007068509244984e-05, "loss": 0.4856, "step": 21551 }, { "epoch": 0.5917627677100494, "grad_norm": 0.3454486131668091, "learning_rate": 1.6006723218089788e-05, "loss": 0.4375, "step": 21552 }, { "epoch": 0.591790225151016, "grad_norm": 0.3636390268802643, "learning_rate": 1.6006377915730135e-05, "loss": 0.4883, "step": 21553 }, { "epoch": 0.5918176825919824, "grad_norm": 0.4506637156009674, "learning_rate": 1.6006032602166672e-05, "loss": 0.5136, "step": 21554 }, { "epoch": 0.591845140032949, "grad_norm": 0.3723263740539551, "learning_rate": 1.600568727740004e-05, "loss": 0.478, "step": 21555 }, { "epoch": 0.5918725974739154, "grad_norm": 0.3520340621471405, "learning_rate": 1.6005341941430886e-05, "loss": 0.4243, "step": 21556 }, { "epoch": 0.591900054914882, "grad_norm": 0.4556431174278259, "learning_rate": 1.6004996594259853e-05, "loss": 0.4822, "step": 21557 }, { "epoch": 0.5919275123558484, "grad_norm": 0.39400535821914673, "learning_rate": 1.600465123588758e-05, "loss": 0.4746, "step": 21558 }, { "epoch": 0.591954969796815, "grad_norm": 0.378370463848114, "learning_rate": 1.600430586631472e-05, "loss": 0.4808, "step": 21559 }, { "epoch": 0.5919824272377814, "grad_norm": 0.39441776275634766, "learning_rate": 1.6003960485541913e-05, "loss": 0.453, "step": 21560 }, { "epoch": 0.5920098846787479, "grad_norm": 0.35849425196647644, "learning_rate": 1.6003615093569803e-05, "loss": 0.5019, "step": 21561 }, { "epoch": 0.5920373421197145, "grad_norm": 0.4814780652523041, "learning_rate": 1.6003269690399034e-05, "loss": 0.6412, "step": 21562 }, { "epoch": 0.5920647995606809, "grad_norm": 0.36142298579216003, "learning_rate": 1.600292427603025e-05, "loss": 0.4978, "step": 21563 }, { "epoch": 0.5920922570016475, "grad_norm": 0.404452383518219, "learning_rate": 1.6002578850464096e-05, "loss": 0.4992, "step": 21564 }, { "epoch": 0.5921197144426139, "grad_norm": 0.3768053650856018, "learning_rate": 1.600223341370122e-05, "loss": 0.481, "step": 21565 }, { "epoch": 0.5921471718835805, "grad_norm": 0.3810424506664276, "learning_rate": 1.600188796574226e-05, "loss": 0.4838, "step": 21566 }, { "epoch": 0.5921746293245469, "grad_norm": 0.36002296209335327, "learning_rate": 1.6001542506587865e-05, "loss": 0.46, "step": 21567 }, { "epoch": 0.5922020867655134, "grad_norm": 0.3910340964794159, "learning_rate": 1.600119703623868e-05, "loss": 0.5606, "step": 21568 }, { "epoch": 0.59222954420648, "grad_norm": 0.38204288482666016, "learning_rate": 1.6000851554695342e-05, "loss": 0.4976, "step": 21569 }, { "epoch": 0.5922570016474464, "grad_norm": 0.39516332745552063, "learning_rate": 1.6000506061958502e-05, "loss": 0.4923, "step": 21570 }, { "epoch": 0.592284459088413, "grad_norm": 0.41485074162483215, "learning_rate": 1.600016055802881e-05, "loss": 0.6442, "step": 21571 }, { "epoch": 0.5923119165293794, "grad_norm": 0.35284140706062317, "learning_rate": 1.5999815042906894e-05, "loss": 0.4311, "step": 21572 }, { "epoch": 0.592339373970346, "grad_norm": 0.4001729190349579, "learning_rate": 1.599946951659341e-05, "loss": 0.4919, "step": 21573 }, { "epoch": 0.5923668314113124, "grad_norm": 0.3734440505504608, "learning_rate": 1.5999123979089003e-05, "loss": 0.5007, "step": 21574 }, { "epoch": 0.592394288852279, "grad_norm": 0.4502127766609192, "learning_rate": 1.5998778430394317e-05, "loss": 0.5889, "step": 21575 }, { "epoch": 0.5924217462932455, "grad_norm": 0.4269816279411316, "learning_rate": 1.599843287050999e-05, "loss": 0.5137, "step": 21576 }, { "epoch": 0.592449203734212, "grad_norm": 0.36465173959732056, "learning_rate": 1.5998087299436673e-05, "loss": 0.4906, "step": 21577 }, { "epoch": 0.5924766611751785, "grad_norm": 0.35471850633621216, "learning_rate": 1.5997741717175007e-05, "loss": 0.4527, "step": 21578 }, { "epoch": 0.5925041186161449, "grad_norm": 0.39683225750923157, "learning_rate": 1.5997396123725645e-05, "loss": 0.4797, "step": 21579 }, { "epoch": 0.5925315760571115, "grad_norm": 0.38048118352890015, "learning_rate": 1.599705051908922e-05, "loss": 0.5005, "step": 21580 }, { "epoch": 0.5925590334980779, "grad_norm": 0.3964901864528656, "learning_rate": 1.5996704903266384e-05, "loss": 0.5324, "step": 21581 }, { "epoch": 0.5925864909390445, "grad_norm": 0.3867605924606323, "learning_rate": 1.5996359276257776e-05, "loss": 0.5252, "step": 21582 }, { "epoch": 0.592613948380011, "grad_norm": 0.3575596511363983, "learning_rate": 1.5996013638064044e-05, "loss": 0.5789, "step": 21583 }, { "epoch": 0.5926414058209775, "grad_norm": 0.35581186413764954, "learning_rate": 1.5995667988685838e-05, "loss": 0.4943, "step": 21584 }, { "epoch": 0.592668863261944, "grad_norm": 0.3783428966999054, "learning_rate": 1.5995322328123792e-05, "loss": 0.5036, "step": 21585 }, { "epoch": 0.5926963207029105, "grad_norm": 0.36729124188423157, "learning_rate": 1.599497665637856e-05, "loss": 0.502, "step": 21586 }, { "epoch": 0.592723778143877, "grad_norm": 0.43494847416877747, "learning_rate": 1.599463097345078e-05, "loss": 0.5367, "step": 21587 }, { "epoch": 0.5927512355848434, "grad_norm": 0.37854263186454773, "learning_rate": 1.5994285279341104e-05, "loss": 0.5144, "step": 21588 }, { "epoch": 0.59277869302581, "grad_norm": 0.42274266481399536, "learning_rate": 1.599393957405017e-05, "loss": 0.5796, "step": 21589 }, { "epoch": 0.5928061504667765, "grad_norm": 0.37950077652931213, "learning_rate": 1.599359385757863e-05, "loss": 0.4837, "step": 21590 }, { "epoch": 0.592833607907743, "grad_norm": 0.4015316367149353, "learning_rate": 1.5993248129927118e-05, "loss": 0.5578, "step": 21591 }, { "epoch": 0.5928610653487095, "grad_norm": 0.39663660526275635, "learning_rate": 1.5992902391096287e-05, "loss": 0.5335, "step": 21592 }, { "epoch": 0.592888522789676, "grad_norm": 0.35792291164398193, "learning_rate": 1.5992556641086782e-05, "loss": 0.4609, "step": 21593 }, { "epoch": 0.5929159802306425, "grad_norm": 0.3692243993282318, "learning_rate": 1.5992210879899245e-05, "loss": 0.4448, "step": 21594 }, { "epoch": 0.592943437671609, "grad_norm": 0.35977593064308167, "learning_rate": 1.5991865107534325e-05, "loss": 0.4901, "step": 21595 }, { "epoch": 0.5929708951125755, "grad_norm": 0.40563634037971497, "learning_rate": 1.599151932399266e-05, "loss": 0.4694, "step": 21596 }, { "epoch": 0.592998352553542, "grad_norm": 0.3726326823234558, "learning_rate": 1.5991173529274903e-05, "loss": 0.5585, "step": 21597 }, { "epoch": 0.5930258099945085, "grad_norm": 0.38107535243034363, "learning_rate": 1.5990827723381695e-05, "loss": 0.5229, "step": 21598 }, { "epoch": 0.593053267435475, "grad_norm": 0.5190727114677429, "learning_rate": 1.599048190631368e-05, "loss": 0.5115, "step": 21599 }, { "epoch": 0.5930807248764415, "grad_norm": 0.3553035259246826, "learning_rate": 1.5990136078071504e-05, "loss": 0.5475, "step": 21600 }, { "epoch": 0.593108182317408, "grad_norm": 0.3884257376194, "learning_rate": 1.5989790238655812e-05, "loss": 0.4872, "step": 21601 }, { "epoch": 0.5931356397583745, "grad_norm": 0.3829786479473114, "learning_rate": 1.598944438806725e-05, "loss": 0.4614, "step": 21602 }, { "epoch": 0.593163097199341, "grad_norm": 0.7189985513687134, "learning_rate": 1.5989098526306463e-05, "loss": 0.6368, "step": 21603 }, { "epoch": 0.5931905546403076, "grad_norm": 0.3992803394794464, "learning_rate": 1.5988752653374094e-05, "loss": 0.5109, "step": 21604 }, { "epoch": 0.593218012081274, "grad_norm": 0.3386070728302002, "learning_rate": 1.5988406769270793e-05, "loss": 0.4587, "step": 21605 }, { "epoch": 0.5932454695222406, "grad_norm": 0.391655296087265, "learning_rate": 1.5988060873997202e-05, "loss": 0.455, "step": 21606 }, { "epoch": 0.593272926963207, "grad_norm": 0.3804088830947876, "learning_rate": 1.5987714967553963e-05, "loss": 0.484, "step": 21607 }, { "epoch": 0.5933003844041735, "grad_norm": 0.38664335012435913, "learning_rate": 1.5987369049941727e-05, "loss": 0.5045, "step": 21608 }, { "epoch": 0.59332784184514, "grad_norm": 0.32564425468444824, "learning_rate": 1.5987023121161135e-05, "loss": 0.5075, "step": 21609 }, { "epoch": 0.5933552992861065, "grad_norm": 0.4623730778694153, "learning_rate": 1.5986677181212837e-05, "loss": 0.5123, "step": 21610 }, { "epoch": 0.5933827567270731, "grad_norm": 0.408325731754303, "learning_rate": 1.5986331230097474e-05, "loss": 0.4994, "step": 21611 }, { "epoch": 0.5934102141680395, "grad_norm": 0.37334638833999634, "learning_rate": 1.5985985267815692e-05, "loss": 0.4807, "step": 21612 }, { "epoch": 0.5934376716090061, "grad_norm": 0.3533439040184021, "learning_rate": 1.5985639294368135e-05, "loss": 0.4868, "step": 21613 }, { "epoch": 0.5934651290499725, "grad_norm": 0.3895469307899475, "learning_rate": 1.5985293309755455e-05, "loss": 0.432, "step": 21614 }, { "epoch": 0.5934925864909391, "grad_norm": 0.41556233167648315, "learning_rate": 1.5984947313978287e-05, "loss": 0.5425, "step": 21615 }, { "epoch": 0.5935200439319055, "grad_norm": 0.35045745968818665, "learning_rate": 1.5984601307037286e-05, "loss": 0.482, "step": 21616 }, { "epoch": 0.593547501372872, "grad_norm": 0.3281785249710083, "learning_rate": 1.598425528893309e-05, "loss": 0.4725, "step": 21617 }, { "epoch": 0.5935749588138386, "grad_norm": 0.34671032428741455, "learning_rate": 1.5983909259666352e-05, "loss": 0.4695, "step": 21618 }, { "epoch": 0.593602416254805, "grad_norm": 0.392383873462677, "learning_rate": 1.5983563219237713e-05, "loss": 0.3954, "step": 21619 }, { "epoch": 0.5936298736957716, "grad_norm": 0.3815106749534607, "learning_rate": 1.5983217167647817e-05, "loss": 0.4266, "step": 21620 }, { "epoch": 0.593657331136738, "grad_norm": 0.41963282227516174, "learning_rate": 1.5982871104897315e-05, "loss": 0.5144, "step": 21621 }, { "epoch": 0.5936847885777046, "grad_norm": 0.34298989176750183, "learning_rate": 1.5982525030986847e-05, "loss": 0.5187, "step": 21622 }, { "epoch": 0.593712246018671, "grad_norm": 0.3803490698337555, "learning_rate": 1.5982178945917058e-05, "loss": 0.5095, "step": 21623 }, { "epoch": 0.5937397034596376, "grad_norm": 0.4120052754878998, "learning_rate": 1.59818328496886e-05, "loss": 0.5646, "step": 21624 }, { "epoch": 0.5937671609006041, "grad_norm": 0.3682212829589844, "learning_rate": 1.5981486742302112e-05, "loss": 0.4365, "step": 21625 }, { "epoch": 0.5937946183415705, "grad_norm": 0.39813438057899475, "learning_rate": 1.5981140623758242e-05, "loss": 0.5035, "step": 21626 }, { "epoch": 0.5938220757825371, "grad_norm": 0.3904571235179901, "learning_rate": 1.5980794494057633e-05, "loss": 0.5018, "step": 21627 }, { "epoch": 0.5938495332235035, "grad_norm": 0.4379083812236786, "learning_rate": 1.5980448353200936e-05, "loss": 0.4658, "step": 21628 }, { "epoch": 0.5938769906644701, "grad_norm": 0.49432632327079773, "learning_rate": 1.5980102201188796e-05, "loss": 0.5696, "step": 21629 }, { "epoch": 0.5939044481054365, "grad_norm": 0.35707736015319824, "learning_rate": 1.5979756038021854e-05, "loss": 0.4563, "step": 21630 }, { "epoch": 0.5939319055464031, "grad_norm": 0.35836562514305115, "learning_rate": 1.597940986370076e-05, "loss": 0.4013, "step": 21631 }, { "epoch": 0.5939593629873696, "grad_norm": 0.3517371416091919, "learning_rate": 1.5979063678226155e-05, "loss": 0.4871, "step": 21632 }, { "epoch": 0.5939868204283361, "grad_norm": 0.36278876662254333, "learning_rate": 1.5978717481598695e-05, "loss": 0.5251, "step": 21633 }, { "epoch": 0.5940142778693026, "grad_norm": 0.38015633821487427, "learning_rate": 1.5978371273819013e-05, "loss": 0.5492, "step": 21634 }, { "epoch": 0.594041735310269, "grad_norm": 0.41710972785949707, "learning_rate": 1.5978025054887762e-05, "loss": 0.4936, "step": 21635 }, { "epoch": 0.5940691927512356, "grad_norm": 0.3829708397388458, "learning_rate": 1.5977678824805587e-05, "loss": 0.5297, "step": 21636 }, { "epoch": 0.594096650192202, "grad_norm": 0.3414422571659088, "learning_rate": 1.5977332583573132e-05, "loss": 0.4723, "step": 21637 }, { "epoch": 0.5941241076331686, "grad_norm": 0.4014362096786499, "learning_rate": 1.5976986331191046e-05, "loss": 0.4834, "step": 21638 }, { "epoch": 0.5941515650741351, "grad_norm": 0.4152536988258362, "learning_rate": 1.5976640067659974e-05, "loss": 0.5275, "step": 21639 }, { "epoch": 0.5941790225151016, "grad_norm": 0.4433457851409912, "learning_rate": 1.5976293792980555e-05, "loss": 0.533, "step": 21640 }, { "epoch": 0.5942064799560681, "grad_norm": 0.3586195707321167, "learning_rate": 1.597594750715344e-05, "loss": 0.5076, "step": 21641 }, { "epoch": 0.5942339373970346, "grad_norm": 0.35300853848457336, "learning_rate": 1.5975601210179282e-05, "loss": 0.4884, "step": 21642 }, { "epoch": 0.5942613948380011, "grad_norm": 0.3989848494529724, "learning_rate": 1.5975254902058718e-05, "loss": 0.4313, "step": 21643 }, { "epoch": 0.5942888522789675, "grad_norm": 0.32974159717559814, "learning_rate": 1.59749085827924e-05, "loss": 0.422, "step": 21644 }, { "epoch": 0.5943163097199341, "grad_norm": 0.3740629553794861, "learning_rate": 1.5974562252380965e-05, "loss": 0.4768, "step": 21645 }, { "epoch": 0.5943437671609006, "grad_norm": 0.4019325375556946, "learning_rate": 1.5974215910825067e-05, "loss": 0.5706, "step": 21646 }, { "epoch": 0.5943712246018671, "grad_norm": 0.37259814143180847, "learning_rate": 1.597386955812535e-05, "loss": 0.4832, "step": 21647 }, { "epoch": 0.5943986820428336, "grad_norm": 0.4027015268802643, "learning_rate": 1.5973523194282458e-05, "loss": 0.5674, "step": 21648 }, { "epoch": 0.5944261394838001, "grad_norm": 0.37741154432296753, "learning_rate": 1.597317681929704e-05, "loss": 0.5419, "step": 21649 }, { "epoch": 0.5944535969247666, "grad_norm": 0.38168174028396606, "learning_rate": 1.5972830433169738e-05, "loss": 0.4251, "step": 21650 }, { "epoch": 0.5944810543657331, "grad_norm": 0.4446272552013397, "learning_rate": 1.5972484035901203e-05, "loss": 0.6141, "step": 21651 }, { "epoch": 0.5945085118066996, "grad_norm": 0.39434128999710083, "learning_rate": 1.597213762749208e-05, "loss": 0.6002, "step": 21652 }, { "epoch": 0.5945359692476662, "grad_norm": 0.359584778547287, "learning_rate": 1.597179120794301e-05, "loss": 0.4625, "step": 21653 }, { "epoch": 0.5945634266886326, "grad_norm": 0.40658998489379883, "learning_rate": 1.5971444777254652e-05, "loss": 0.512, "step": 21654 }, { "epoch": 0.5945908841295992, "grad_norm": 0.3999905586242676, "learning_rate": 1.597109833542764e-05, "loss": 0.5061, "step": 21655 }, { "epoch": 0.5946183415705656, "grad_norm": 0.4103797972202301, "learning_rate": 1.597075188246262e-05, "loss": 0.4992, "step": 21656 }, { "epoch": 0.5946457990115321, "grad_norm": 0.33789241313934326, "learning_rate": 1.5970405418360244e-05, "loss": 0.4857, "step": 21657 }, { "epoch": 0.5946732564524986, "grad_norm": 0.36388295888900757, "learning_rate": 1.5970058943121158e-05, "loss": 0.504, "step": 21658 }, { "epoch": 0.5947007138934651, "grad_norm": 0.3837275207042694, "learning_rate": 1.5969712456746007e-05, "loss": 0.5266, "step": 21659 }, { "epoch": 0.5947281713344317, "grad_norm": 0.3193417489528656, "learning_rate": 1.5969365959235435e-05, "loss": 0.4084, "step": 21660 }, { "epoch": 0.5947556287753981, "grad_norm": 0.3747400641441345, "learning_rate": 1.5969019450590087e-05, "loss": 0.5207, "step": 21661 }, { "epoch": 0.5947830862163647, "grad_norm": 0.3875921964645386, "learning_rate": 1.5968672930810616e-05, "loss": 0.5604, "step": 21662 }, { "epoch": 0.5948105436573311, "grad_norm": 0.3370065689086914, "learning_rate": 1.5968326399897667e-05, "loss": 0.4235, "step": 21663 }, { "epoch": 0.5948380010982977, "grad_norm": 0.39944177865982056, "learning_rate": 1.5967979857851882e-05, "loss": 0.4552, "step": 21664 }, { "epoch": 0.5948654585392641, "grad_norm": 0.3652159869670868, "learning_rate": 1.5967633304673912e-05, "loss": 0.5363, "step": 21665 }, { "epoch": 0.5948929159802306, "grad_norm": 0.3915611207485199, "learning_rate": 1.59672867403644e-05, "loss": 0.5432, "step": 21666 }, { "epoch": 0.5949203734211972, "grad_norm": 0.42678290605545044, "learning_rate": 1.5966940164923993e-05, "loss": 0.4926, "step": 21667 }, { "epoch": 0.5949478308621636, "grad_norm": 0.47274208068847656, "learning_rate": 1.5966593578353342e-05, "loss": 0.5443, "step": 21668 }, { "epoch": 0.5949752883031302, "grad_norm": 0.3597835302352905, "learning_rate": 1.5966246980653085e-05, "loss": 0.4601, "step": 21669 }, { "epoch": 0.5950027457440966, "grad_norm": 0.37822869420051575, "learning_rate": 1.5965900371823875e-05, "loss": 0.4281, "step": 21670 }, { "epoch": 0.5950302031850632, "grad_norm": 0.38378146290779114, "learning_rate": 1.5965553751866356e-05, "loss": 0.6406, "step": 21671 }, { "epoch": 0.5950576606260296, "grad_norm": 0.39794933795928955, "learning_rate": 1.5965207120781176e-05, "loss": 0.4885, "step": 21672 }, { "epoch": 0.5950851180669962, "grad_norm": 0.3924503028392792, "learning_rate": 1.596486047856898e-05, "loss": 0.4998, "step": 21673 }, { "epoch": 0.5951125755079627, "grad_norm": 0.3808087706565857, "learning_rate": 1.5964513825230416e-05, "loss": 0.5058, "step": 21674 }, { "epoch": 0.5951400329489291, "grad_norm": 0.35151273012161255, "learning_rate": 1.596416716076613e-05, "loss": 0.4507, "step": 21675 }, { "epoch": 0.5951674903898957, "grad_norm": 0.36850792169570923, "learning_rate": 1.5963820485176765e-05, "loss": 0.3999, "step": 21676 }, { "epoch": 0.5951949478308621, "grad_norm": 0.4224865734577179, "learning_rate": 1.5963473798462977e-05, "loss": 0.4562, "step": 21677 }, { "epoch": 0.5952224052718287, "grad_norm": 0.40690645575523376, "learning_rate": 1.5963127100625407e-05, "loss": 0.4955, "step": 21678 }, { "epoch": 0.5952498627127951, "grad_norm": 0.33127495646476746, "learning_rate": 1.59627803916647e-05, "loss": 0.4488, "step": 21679 }, { "epoch": 0.5952773201537617, "grad_norm": 0.8051819205284119, "learning_rate": 1.5962433671581502e-05, "loss": 0.5949, "step": 21680 }, { "epoch": 0.5953047775947282, "grad_norm": 0.42313748598098755, "learning_rate": 1.5962086940376465e-05, "loss": 0.4928, "step": 21681 }, { "epoch": 0.5953322350356947, "grad_norm": 0.37392693758010864, "learning_rate": 1.5961740198050234e-05, "loss": 0.4741, "step": 21682 }, { "epoch": 0.5953596924766612, "grad_norm": 0.3998919427394867, "learning_rate": 1.5961393444603454e-05, "loss": 0.5279, "step": 21683 }, { "epoch": 0.5953871499176276, "grad_norm": 0.364777535200119, "learning_rate": 1.596104668003677e-05, "loss": 0.5009, "step": 21684 }, { "epoch": 0.5954146073585942, "grad_norm": 0.4161120653152466, "learning_rate": 1.5960699904350835e-05, "loss": 0.4943, "step": 21685 }, { "epoch": 0.5954420647995606, "grad_norm": 0.3980713188648224, "learning_rate": 1.596035311754629e-05, "loss": 0.4872, "step": 21686 }, { "epoch": 0.5954695222405272, "grad_norm": 0.3896341621875763, "learning_rate": 1.5960006319623782e-05, "loss": 0.5193, "step": 21687 }, { "epoch": 0.5954969796814937, "grad_norm": 0.3638424873352051, "learning_rate": 1.5959659510583963e-05, "loss": 0.521, "step": 21688 }, { "epoch": 0.5955244371224602, "grad_norm": 0.4099225699901581, "learning_rate": 1.5959312690427476e-05, "loss": 0.5691, "step": 21689 }, { "epoch": 0.5955518945634267, "grad_norm": 0.34615424275398254, "learning_rate": 1.595896585915497e-05, "loss": 0.4801, "step": 21690 }, { "epoch": 0.5955793520043932, "grad_norm": 0.3555276691913605, "learning_rate": 1.595861901676709e-05, "loss": 0.4594, "step": 21691 }, { "epoch": 0.5956068094453597, "grad_norm": 0.3950687050819397, "learning_rate": 1.5958272163264482e-05, "loss": 0.4772, "step": 21692 }, { "epoch": 0.5956342668863261, "grad_norm": 0.38654398918151855, "learning_rate": 1.59579252986478e-05, "loss": 0.5077, "step": 21693 }, { "epoch": 0.5956617243272927, "grad_norm": 0.3603242337703705, "learning_rate": 1.595757842291768e-05, "loss": 0.4285, "step": 21694 }, { "epoch": 0.5956891817682592, "grad_norm": 0.3700500726699829, "learning_rate": 1.5957231536074777e-05, "loss": 0.4664, "step": 21695 }, { "epoch": 0.5957166392092257, "grad_norm": 0.500645637512207, "learning_rate": 1.5956884638119737e-05, "loss": 0.5798, "step": 21696 }, { "epoch": 0.5957440966501922, "grad_norm": 0.3725320100784302, "learning_rate": 1.5956537729053204e-05, "loss": 0.4939, "step": 21697 }, { "epoch": 0.5957715540911587, "grad_norm": 0.3667197525501251, "learning_rate": 1.5956190808875828e-05, "loss": 0.5138, "step": 21698 }, { "epoch": 0.5957990115321252, "grad_norm": 0.3695150911808014, "learning_rate": 1.5955843877588256e-05, "loss": 0.4778, "step": 21699 }, { "epoch": 0.5958264689730917, "grad_norm": 0.3810870945453644, "learning_rate": 1.5955496935191136e-05, "loss": 0.4904, "step": 21700 }, { "epoch": 0.5958539264140582, "grad_norm": 0.3729828894138336, "learning_rate": 1.5955149981685107e-05, "loss": 0.5512, "step": 21701 }, { "epoch": 0.5958813838550248, "grad_norm": 0.3493822515010834, "learning_rate": 1.5954803017070828e-05, "loss": 0.4359, "step": 21702 }, { "epoch": 0.5959088412959912, "grad_norm": 0.3809067904949188, "learning_rate": 1.595445604134894e-05, "loss": 0.5269, "step": 21703 }, { "epoch": 0.5959362987369577, "grad_norm": 0.35443630814552307, "learning_rate": 1.595410905452009e-05, "loss": 0.5224, "step": 21704 }, { "epoch": 0.5959637561779242, "grad_norm": 0.34098318219184875, "learning_rate": 1.5953762056584924e-05, "loss": 0.4184, "step": 21705 }, { "epoch": 0.5959912136188907, "grad_norm": 0.4497235119342804, "learning_rate": 1.5953415047544098e-05, "loss": 0.5893, "step": 21706 }, { "epoch": 0.5960186710598572, "grad_norm": 0.3522782623767853, "learning_rate": 1.5953068027398247e-05, "loss": 0.4647, "step": 21707 }, { "epoch": 0.5960461285008237, "grad_norm": 0.3995654582977295, "learning_rate": 1.5952720996148028e-05, "loss": 0.4985, "step": 21708 }, { "epoch": 0.5960735859417903, "grad_norm": 0.34515780210494995, "learning_rate": 1.595237395379408e-05, "loss": 0.4814, "step": 21709 }, { "epoch": 0.5961010433827567, "grad_norm": 0.3649619519710541, "learning_rate": 1.595202690033706e-05, "loss": 0.4549, "step": 21710 }, { "epoch": 0.5961285008237233, "grad_norm": 0.3574879467487335, "learning_rate": 1.595167983577761e-05, "loss": 0.506, "step": 21711 }, { "epoch": 0.5961559582646897, "grad_norm": 0.393477201461792, "learning_rate": 1.595133276011637e-05, "loss": 0.5206, "step": 21712 }, { "epoch": 0.5961834157056562, "grad_norm": 0.3936120867729187, "learning_rate": 1.5950985673354004e-05, "loss": 0.5096, "step": 21713 }, { "epoch": 0.5962108731466227, "grad_norm": 0.3972810208797455, "learning_rate": 1.5950638575491145e-05, "loss": 0.5515, "step": 21714 }, { "epoch": 0.5962383305875892, "grad_norm": 0.3933134973049164, "learning_rate": 1.5950291466528445e-05, "loss": 0.5145, "step": 21715 }, { "epoch": 0.5962657880285558, "grad_norm": 0.3486424684524536, "learning_rate": 1.5949944346466553e-05, "loss": 0.5814, "step": 21716 }, { "epoch": 0.5962932454695222, "grad_norm": 0.3897119164466858, "learning_rate": 1.594959721530612e-05, "loss": 0.5085, "step": 21717 }, { "epoch": 0.5963207029104888, "grad_norm": 0.40288040041923523, "learning_rate": 1.5949250073047786e-05, "loss": 0.459, "step": 21718 }, { "epoch": 0.5963481603514552, "grad_norm": 0.6181684732437134, "learning_rate": 1.5948902919692204e-05, "loss": 0.5297, "step": 21719 }, { "epoch": 0.5963756177924218, "grad_norm": 0.39317306876182556, "learning_rate": 1.5948555755240016e-05, "loss": 0.4954, "step": 21720 }, { "epoch": 0.5964030752333882, "grad_norm": 0.4130921959877014, "learning_rate": 1.5948208579691876e-05, "loss": 0.5914, "step": 21721 }, { "epoch": 0.5964305326743548, "grad_norm": 0.40566009283065796, "learning_rate": 1.5947861393048428e-05, "loss": 0.5304, "step": 21722 }, { "epoch": 0.5964579901153213, "grad_norm": 0.33482298254966736, "learning_rate": 1.5947514195310318e-05, "loss": 0.4826, "step": 21723 }, { "epoch": 0.5964854475562877, "grad_norm": 0.3592824339866638, "learning_rate": 1.5947166986478198e-05, "loss": 0.4355, "step": 21724 }, { "epoch": 0.5965129049972543, "grad_norm": 0.3892030417919159, "learning_rate": 1.594681976655271e-05, "loss": 0.4587, "step": 21725 }, { "epoch": 0.5965403624382207, "grad_norm": 0.40509817004203796, "learning_rate": 1.5946472535534508e-05, "loss": 0.5353, "step": 21726 }, { "epoch": 0.5965678198791873, "grad_norm": 1.1417204141616821, "learning_rate": 1.594612529342424e-05, "loss": 0.501, "step": 21727 }, { "epoch": 0.5965952773201537, "grad_norm": 0.5374469757080078, "learning_rate": 1.5945778040222548e-05, "loss": 0.5004, "step": 21728 }, { "epoch": 0.5966227347611203, "grad_norm": 0.39794686436653137, "learning_rate": 1.594543077593008e-05, "loss": 0.5501, "step": 21729 }, { "epoch": 0.5966501922020868, "grad_norm": 0.38900241255760193, "learning_rate": 1.5945083500547487e-05, "loss": 0.4835, "step": 21730 }, { "epoch": 0.5966776496430533, "grad_norm": 0.35718151926994324, "learning_rate": 1.594473621407542e-05, "loss": 0.5287, "step": 21731 }, { "epoch": 0.5967051070840198, "grad_norm": 0.3481411337852478, "learning_rate": 1.5944388916514518e-05, "loss": 0.4817, "step": 21732 }, { "epoch": 0.5967325645249862, "grad_norm": 0.3608609735965729, "learning_rate": 1.5944041607865433e-05, "loss": 0.5087, "step": 21733 }, { "epoch": 0.5967600219659528, "grad_norm": 0.40064477920532227, "learning_rate": 1.5943694288128816e-05, "loss": 0.5335, "step": 21734 }, { "epoch": 0.5967874794069192, "grad_norm": 0.4062650799751282, "learning_rate": 1.594334695730531e-05, "loss": 0.4835, "step": 21735 }, { "epoch": 0.5968149368478858, "grad_norm": 0.3626977503299713, "learning_rate": 1.5942999615395566e-05, "loss": 0.4518, "step": 21736 }, { "epoch": 0.5968423942888523, "grad_norm": 0.37430527806282043, "learning_rate": 1.594265226240023e-05, "loss": 0.5147, "step": 21737 }, { "epoch": 0.5968698517298188, "grad_norm": 0.43416711688041687, "learning_rate": 1.594230489831995e-05, "loss": 0.465, "step": 21738 }, { "epoch": 0.5968973091707853, "grad_norm": 0.43734481930732727, "learning_rate": 1.594195752315538e-05, "loss": 0.4268, "step": 21739 }, { "epoch": 0.5969247666117518, "grad_norm": 0.37840673327445984, "learning_rate": 1.594161013690716e-05, "loss": 0.4629, "step": 21740 }, { "epoch": 0.5969522240527183, "grad_norm": 0.3960587680339813, "learning_rate": 1.5941262739575937e-05, "loss": 0.5095, "step": 21741 }, { "epoch": 0.5969796814936847, "grad_norm": 0.35826346278190613, "learning_rate": 1.5940915331162367e-05, "loss": 0.5009, "step": 21742 }, { "epoch": 0.5970071389346513, "grad_norm": 0.3992864787578583, "learning_rate": 1.594056791166709e-05, "loss": 0.4955, "step": 21743 }, { "epoch": 0.5970345963756178, "grad_norm": 0.4591447412967682, "learning_rate": 1.594022048109076e-05, "loss": 0.5714, "step": 21744 }, { "epoch": 0.5970620538165843, "grad_norm": 0.37396788597106934, "learning_rate": 1.5939873039434028e-05, "loss": 0.4547, "step": 21745 }, { "epoch": 0.5970895112575508, "grad_norm": 0.45405369997024536, "learning_rate": 1.593952558669753e-05, "loss": 0.6542, "step": 21746 }, { "epoch": 0.5971169686985173, "grad_norm": 0.37208497524261475, "learning_rate": 1.5939178122881924e-05, "loss": 0.4772, "step": 21747 }, { "epoch": 0.5971444261394838, "grad_norm": 0.3759411871433258, "learning_rate": 1.5938830647987854e-05, "loss": 0.5341, "step": 21748 }, { "epoch": 0.5971718835804503, "grad_norm": 0.3888380229473114, "learning_rate": 1.593848316201597e-05, "loss": 0.5228, "step": 21749 }, { "epoch": 0.5971993410214168, "grad_norm": 0.34081265330314636, "learning_rate": 1.5938135664966923e-05, "loss": 0.5037, "step": 21750 }, { "epoch": 0.5972267984623834, "grad_norm": 0.39694130420684814, "learning_rate": 1.5937788156841353e-05, "loss": 0.5178, "step": 21751 }, { "epoch": 0.5972542559033498, "grad_norm": 0.3604068160057068, "learning_rate": 1.5937440637639915e-05, "loss": 0.5385, "step": 21752 }, { "epoch": 0.5972817133443163, "grad_norm": 0.41468408703804016, "learning_rate": 1.5937093107363253e-05, "loss": 0.5523, "step": 21753 }, { "epoch": 0.5973091707852828, "grad_norm": 0.3732326030731201, "learning_rate": 1.5936745566012016e-05, "loss": 0.492, "step": 21754 }, { "epoch": 0.5973366282262493, "grad_norm": 0.3487294316291809, "learning_rate": 1.593639801358686e-05, "loss": 0.484, "step": 21755 }, { "epoch": 0.5973640856672158, "grad_norm": 0.40183690190315247, "learning_rate": 1.5936050450088424e-05, "loss": 0.5245, "step": 21756 }, { "epoch": 0.5973915431081823, "grad_norm": 0.3273717164993286, "learning_rate": 1.593570287551736e-05, "loss": 0.5097, "step": 21757 }, { "epoch": 0.5974190005491489, "grad_norm": 0.4305534064769745, "learning_rate": 1.5935355289874316e-05, "loss": 0.5523, "step": 21758 }, { "epoch": 0.5974464579901153, "grad_norm": 0.36713096499443054, "learning_rate": 1.5935007693159937e-05, "loss": 0.5107, "step": 21759 }, { "epoch": 0.5974739154310819, "grad_norm": 0.40823742747306824, "learning_rate": 1.5934660085374876e-05, "loss": 0.5858, "step": 21760 }, { "epoch": 0.5975013728720483, "grad_norm": 0.40607067942619324, "learning_rate": 1.593431246651978e-05, "loss": 0.4413, "step": 21761 }, { "epoch": 0.5975288303130148, "grad_norm": 0.37142330408096313, "learning_rate": 1.5933964836595297e-05, "loss": 0.4861, "step": 21762 }, { "epoch": 0.5975562877539813, "grad_norm": 0.3611002266407013, "learning_rate": 1.593361719560208e-05, "loss": 0.5004, "step": 21763 }, { "epoch": 0.5975837451949478, "grad_norm": 0.4214264750480652, "learning_rate": 1.5933269543540765e-05, "loss": 0.5323, "step": 21764 }, { "epoch": 0.5976112026359144, "grad_norm": 0.44664904475212097, "learning_rate": 1.5932921880412014e-05, "loss": 0.5353, "step": 21765 }, { "epoch": 0.5976386600768808, "grad_norm": 0.3887653052806854, "learning_rate": 1.593257420621647e-05, "loss": 0.5129, "step": 21766 }, { "epoch": 0.5976661175178474, "grad_norm": 0.36975815892219543, "learning_rate": 1.5932226520954777e-05, "loss": 0.4943, "step": 21767 }, { "epoch": 0.5976935749588138, "grad_norm": 0.34472185373306274, "learning_rate": 1.5931878824627593e-05, "loss": 0.4995, "step": 21768 }, { "epoch": 0.5977210323997804, "grad_norm": 0.444450706243515, "learning_rate": 1.593153111723556e-05, "loss": 0.5225, "step": 21769 }, { "epoch": 0.5977484898407468, "grad_norm": 0.38393229246139526, "learning_rate": 1.593118339877933e-05, "loss": 0.4847, "step": 21770 }, { "epoch": 0.5977759472817133, "grad_norm": 0.4024169147014618, "learning_rate": 1.593083566925955e-05, "loss": 0.4877, "step": 21771 }, { "epoch": 0.5978034047226799, "grad_norm": 0.4046167731285095, "learning_rate": 1.5930487928676864e-05, "loss": 0.6111, "step": 21772 }, { "epoch": 0.5978308621636463, "grad_norm": 0.3816922903060913, "learning_rate": 1.593014017703193e-05, "loss": 0.5304, "step": 21773 }, { "epoch": 0.5978583196046129, "grad_norm": 0.36318260431289673, "learning_rate": 1.5929792414325393e-05, "loss": 0.464, "step": 21774 }, { "epoch": 0.5978857770455793, "grad_norm": 0.34587565064430237, "learning_rate": 1.5929444640557896e-05, "loss": 0.4503, "step": 21775 }, { "epoch": 0.5979132344865459, "grad_norm": 0.4767628312110901, "learning_rate": 1.5929096855730094e-05, "loss": 0.5811, "step": 21776 }, { "epoch": 0.5979406919275123, "grad_norm": 0.4097137749195099, "learning_rate": 1.5928749059842633e-05, "loss": 0.4486, "step": 21777 }, { "epoch": 0.5979681493684789, "grad_norm": 0.3322499394416809, "learning_rate": 1.5928401252896165e-05, "loss": 0.4978, "step": 21778 }, { "epoch": 0.5979956068094454, "grad_norm": 0.3778994381427765, "learning_rate": 1.592805343489134e-05, "loss": 0.5344, "step": 21779 }, { "epoch": 0.5980230642504119, "grad_norm": 0.40297508239746094, "learning_rate": 1.5927705605828796e-05, "loss": 0.4976, "step": 21780 }, { "epoch": 0.5980505216913784, "grad_norm": 0.3926171362400055, "learning_rate": 1.592735776570919e-05, "loss": 0.4995, "step": 21781 }, { "epoch": 0.5980779791323448, "grad_norm": 0.8965089917182922, "learning_rate": 1.5927009914533172e-05, "loss": 0.5036, "step": 21782 }, { "epoch": 0.5981054365733114, "grad_norm": 0.45400169491767883, "learning_rate": 1.592666205230139e-05, "loss": 0.5436, "step": 21783 }, { "epoch": 0.5981328940142778, "grad_norm": 0.38812416791915894, "learning_rate": 1.592631417901449e-05, "loss": 0.4786, "step": 21784 }, { "epoch": 0.5981603514552444, "grad_norm": 0.40294456481933594, "learning_rate": 1.5925966294673126e-05, "loss": 0.6172, "step": 21785 }, { "epoch": 0.5981878088962109, "grad_norm": 0.4106404185295105, "learning_rate": 1.592561839927794e-05, "loss": 0.4811, "step": 21786 }, { "epoch": 0.5982152663371774, "grad_norm": 0.7015767693519592, "learning_rate": 1.5925270492829582e-05, "loss": 0.5864, "step": 21787 }, { "epoch": 0.5982427237781439, "grad_norm": 0.404568612575531, "learning_rate": 1.592492257532871e-05, "loss": 0.5595, "step": 21788 }, { "epoch": 0.5982701812191104, "grad_norm": 0.3280406892299652, "learning_rate": 1.592457464677596e-05, "loss": 0.4517, "step": 21789 }, { "epoch": 0.5982976386600769, "grad_norm": 0.4367057979106903, "learning_rate": 1.5924226707171992e-05, "loss": 0.4909, "step": 21790 }, { "epoch": 0.5983250961010433, "grad_norm": 0.40454185009002686, "learning_rate": 1.592387875651745e-05, "loss": 0.5718, "step": 21791 }, { "epoch": 0.5983525535420099, "grad_norm": 0.36648768186569214, "learning_rate": 1.5923530794812983e-05, "loss": 0.4681, "step": 21792 }, { "epoch": 0.5983800109829764, "grad_norm": 0.39048629999160767, "learning_rate": 1.592318282205924e-05, "loss": 0.4791, "step": 21793 }, { "epoch": 0.5984074684239429, "grad_norm": 0.3705051839351654, "learning_rate": 1.592283483825687e-05, "loss": 0.4728, "step": 21794 }, { "epoch": 0.5984349258649094, "grad_norm": 0.40011516213417053, "learning_rate": 1.592248684340652e-05, "loss": 0.5713, "step": 21795 }, { "epoch": 0.5984623833058759, "grad_norm": 0.39546987414360046, "learning_rate": 1.5922138837508846e-05, "loss": 0.4657, "step": 21796 }, { "epoch": 0.5984898407468424, "grad_norm": 0.3491068482398987, "learning_rate": 1.5921790820564492e-05, "loss": 0.5536, "step": 21797 }, { "epoch": 0.5985172981878089, "grad_norm": 0.3886427879333496, "learning_rate": 1.5921442792574107e-05, "loss": 0.4903, "step": 21798 }, { "epoch": 0.5985447556287754, "grad_norm": 0.3634822964668274, "learning_rate": 1.592109475353834e-05, "loss": 0.5187, "step": 21799 }, { "epoch": 0.598572213069742, "grad_norm": 0.4906805157661438, "learning_rate": 1.5920746703457845e-05, "loss": 0.595, "step": 21800 }, { "epoch": 0.5985996705107084, "grad_norm": 0.41450387239456177, "learning_rate": 1.5920398642333265e-05, "loss": 0.5043, "step": 21801 }, { "epoch": 0.5986271279516749, "grad_norm": 0.47179439663887024, "learning_rate": 1.5920050570165256e-05, "loss": 0.4622, "step": 21802 }, { "epoch": 0.5986545853926414, "grad_norm": 0.4134626090526581, "learning_rate": 1.591970248695446e-05, "loss": 0.5214, "step": 21803 }, { "epoch": 0.5986820428336079, "grad_norm": 0.33374717831611633, "learning_rate": 1.591935439270153e-05, "loss": 0.5051, "step": 21804 }, { "epoch": 0.5987095002745744, "grad_norm": 0.37324991822242737, "learning_rate": 1.5919006287407113e-05, "loss": 0.5404, "step": 21805 }, { "epoch": 0.5987369577155409, "grad_norm": 0.4127584397792816, "learning_rate": 1.5918658171071862e-05, "loss": 0.4669, "step": 21806 }, { "epoch": 0.5987644151565075, "grad_norm": 0.33512166142463684, "learning_rate": 1.5918310043696424e-05, "loss": 0.5131, "step": 21807 }, { "epoch": 0.5987918725974739, "grad_norm": 0.37965288758277893, "learning_rate": 1.591796190528145e-05, "loss": 0.5609, "step": 21808 }, { "epoch": 0.5988193300384405, "grad_norm": 0.36195001006126404, "learning_rate": 1.5917613755827588e-05, "loss": 0.6113, "step": 21809 }, { "epoch": 0.5988467874794069, "grad_norm": 0.3564576804637909, "learning_rate": 1.5917265595335486e-05, "loss": 0.5117, "step": 21810 }, { "epoch": 0.5988742449203734, "grad_norm": 0.44437670707702637, "learning_rate": 1.5916917423805796e-05, "loss": 0.5209, "step": 21811 }, { "epoch": 0.5989017023613399, "grad_norm": 0.41089722514152527, "learning_rate": 1.591656924123917e-05, "loss": 0.4502, "step": 21812 }, { "epoch": 0.5989291598023064, "grad_norm": 0.3979044556617737, "learning_rate": 1.5916221047636248e-05, "loss": 0.5347, "step": 21813 }, { "epoch": 0.598956617243273, "grad_norm": 0.35370779037475586, "learning_rate": 1.5915872842997687e-05, "loss": 0.4785, "step": 21814 }, { "epoch": 0.5989840746842394, "grad_norm": 0.3850441873073578, "learning_rate": 1.591552462732414e-05, "loss": 0.5005, "step": 21815 }, { "epoch": 0.599011532125206, "grad_norm": 0.5010243654251099, "learning_rate": 1.5915176400616247e-05, "loss": 0.561, "step": 21816 }, { "epoch": 0.5990389895661724, "grad_norm": 0.39204102754592896, "learning_rate": 1.5914828162874663e-05, "loss": 0.5109, "step": 21817 }, { "epoch": 0.599066447007139, "grad_norm": 0.4545150399208069, "learning_rate": 1.5914479914100038e-05, "loss": 0.5218, "step": 21818 }, { "epoch": 0.5990939044481054, "grad_norm": 0.35920727252960205, "learning_rate": 1.5914131654293018e-05, "loss": 0.4268, "step": 21819 }, { "epoch": 0.599121361889072, "grad_norm": 0.386434942483902, "learning_rate": 1.5913783383454255e-05, "loss": 0.5757, "step": 21820 }, { "epoch": 0.5991488193300385, "grad_norm": 0.4401495158672333, "learning_rate": 1.59134351015844e-05, "loss": 0.508, "step": 21821 }, { "epoch": 0.5991762767710049, "grad_norm": 0.43861958384513855, "learning_rate": 1.59130868086841e-05, "loss": 0.5361, "step": 21822 }, { "epoch": 0.5992037342119715, "grad_norm": 0.40603938698768616, "learning_rate": 1.5912738504754006e-05, "loss": 0.5099, "step": 21823 }, { "epoch": 0.5992311916529379, "grad_norm": 0.4112069308757782, "learning_rate": 1.5912390189794767e-05, "loss": 0.5656, "step": 21824 }, { "epoch": 0.5992586490939045, "grad_norm": 0.4254595637321472, "learning_rate": 1.5912041863807037e-05, "loss": 0.6064, "step": 21825 }, { "epoch": 0.5992861065348709, "grad_norm": 0.4631904363632202, "learning_rate": 1.591169352679146e-05, "loss": 0.4681, "step": 21826 }, { "epoch": 0.5993135639758375, "grad_norm": 0.35579225420951843, "learning_rate": 1.5911345178748686e-05, "loss": 0.5262, "step": 21827 }, { "epoch": 0.5993410214168039, "grad_norm": 0.45294302701950073, "learning_rate": 1.5910996819679366e-05, "loss": 0.5758, "step": 21828 }, { "epoch": 0.5993684788577704, "grad_norm": 0.3843606412410736, "learning_rate": 1.591064844958415e-05, "loss": 0.4943, "step": 21829 }, { "epoch": 0.599395936298737, "grad_norm": 0.34844258427619934, "learning_rate": 1.5910300068463694e-05, "loss": 0.4555, "step": 21830 }, { "epoch": 0.5994233937397034, "grad_norm": 0.4186326861381531, "learning_rate": 1.5909951676318635e-05, "loss": 0.5782, "step": 21831 }, { "epoch": 0.59945085118067, "grad_norm": 0.40605369210243225, "learning_rate": 1.5909603273149636e-05, "loss": 0.4983, "step": 21832 }, { "epoch": 0.5994783086216364, "grad_norm": 0.34883925318717957, "learning_rate": 1.5909254858957336e-05, "loss": 0.5802, "step": 21833 }, { "epoch": 0.599505766062603, "grad_norm": 0.4811926484107971, "learning_rate": 1.5908906433742393e-05, "loss": 0.5331, "step": 21834 }, { "epoch": 0.5995332235035694, "grad_norm": 0.3521099090576172, "learning_rate": 1.590855799750545e-05, "loss": 0.4773, "step": 21835 }, { "epoch": 0.599560680944536, "grad_norm": 0.3845130503177643, "learning_rate": 1.5908209550247164e-05, "loss": 0.5496, "step": 21836 }, { "epoch": 0.5995881383855025, "grad_norm": 0.3481486737728119, "learning_rate": 1.590786109196818e-05, "loss": 0.4771, "step": 21837 }, { "epoch": 0.599615595826469, "grad_norm": 0.35041698813438416, "learning_rate": 1.590751262266915e-05, "loss": 0.4893, "step": 21838 }, { "epoch": 0.5996430532674355, "grad_norm": 0.3828336000442505, "learning_rate": 1.590716414235072e-05, "loss": 0.5589, "step": 21839 }, { "epoch": 0.5996705107084019, "grad_norm": 0.36478039622306824, "learning_rate": 1.590681565101355e-05, "loss": 0.5024, "step": 21840 }, { "epoch": 0.5996979681493685, "grad_norm": 0.4144516885280609, "learning_rate": 1.590646714865828e-05, "loss": 0.5664, "step": 21841 }, { "epoch": 0.5997254255903349, "grad_norm": 0.3801555931568146, "learning_rate": 1.5906118635285562e-05, "loss": 0.569, "step": 21842 }, { "epoch": 0.5997528830313015, "grad_norm": 0.3749721348285675, "learning_rate": 1.590577011089605e-05, "loss": 0.4906, "step": 21843 }, { "epoch": 0.599780340472268, "grad_norm": 0.3475201427936554, "learning_rate": 1.5905421575490393e-05, "loss": 0.5123, "step": 21844 }, { "epoch": 0.5998077979132345, "grad_norm": 0.3830910921096802, "learning_rate": 1.5905073029069236e-05, "loss": 0.5459, "step": 21845 }, { "epoch": 0.599835255354201, "grad_norm": 0.36321207880973816, "learning_rate": 1.590472447163324e-05, "loss": 0.48, "step": 21846 }, { "epoch": 0.5998627127951675, "grad_norm": 0.37598690390586853, "learning_rate": 1.5904375903183044e-05, "loss": 0.4344, "step": 21847 }, { "epoch": 0.599890170236134, "grad_norm": 0.37261340022087097, "learning_rate": 1.59040273237193e-05, "loss": 0.5232, "step": 21848 }, { "epoch": 0.5999176276771004, "grad_norm": 0.42023324966430664, "learning_rate": 1.5903678733242664e-05, "loss": 0.6301, "step": 21849 }, { "epoch": 0.599945085118067, "grad_norm": 0.3627382516860962, "learning_rate": 1.590333013175378e-05, "loss": 0.4401, "step": 21850 }, { "epoch": 0.5999725425590335, "grad_norm": 0.38771477341651917, "learning_rate": 1.5902981519253306e-05, "loss": 0.4393, "step": 21851 }, { "epoch": 0.6, "grad_norm": 0.39763084053993225, "learning_rate": 1.5902632895741882e-05, "loss": 0.4808, "step": 21852 }, { "epoch": 0.6000274574409665, "grad_norm": 0.37308141589164734, "learning_rate": 1.5902284261220168e-05, "loss": 0.5192, "step": 21853 }, { "epoch": 0.600054914881933, "grad_norm": 0.46075206995010376, "learning_rate": 1.5901935615688812e-05, "loss": 0.5129, "step": 21854 }, { "epoch": 0.6000823723228995, "grad_norm": 0.4008239507675171, "learning_rate": 1.5901586959148456e-05, "loss": 0.5049, "step": 21855 }, { "epoch": 0.600109829763866, "grad_norm": 0.41046759486198425, "learning_rate": 1.590123829159976e-05, "loss": 0.4272, "step": 21856 }, { "epoch": 0.6001372872048325, "grad_norm": 0.4524318277835846, "learning_rate": 1.5900889613043367e-05, "loss": 0.5504, "step": 21857 }, { "epoch": 0.600164744645799, "grad_norm": 0.4111354351043701, "learning_rate": 1.5900540923479938e-05, "loss": 0.5295, "step": 21858 }, { "epoch": 0.6001922020867655, "grad_norm": 0.4033288359642029, "learning_rate": 1.5900192222910112e-05, "loss": 0.5267, "step": 21859 }, { "epoch": 0.600219659527732, "grad_norm": 0.4021080434322357, "learning_rate": 1.5899843511334546e-05, "loss": 0.5081, "step": 21860 }, { "epoch": 0.6002471169686985, "grad_norm": 0.41602179408073425, "learning_rate": 1.589949478875389e-05, "loss": 0.4854, "step": 21861 }, { "epoch": 0.600274574409665, "grad_norm": 0.3441467583179474, "learning_rate": 1.589914605516879e-05, "loss": 0.3694, "step": 21862 }, { "epoch": 0.6003020318506315, "grad_norm": 0.4055629372596741, "learning_rate": 1.5898797310579907e-05, "loss": 0.5038, "step": 21863 }, { "epoch": 0.600329489291598, "grad_norm": 0.3693135380744934, "learning_rate": 1.589844855498788e-05, "loss": 0.4934, "step": 21864 }, { "epoch": 0.6003569467325646, "grad_norm": 0.4076460003852844, "learning_rate": 1.589809978839336e-05, "loss": 0.4853, "step": 21865 }, { "epoch": 0.600384404173531, "grad_norm": 0.3547992706298828, "learning_rate": 1.589775101079701e-05, "loss": 0.513, "step": 21866 }, { "epoch": 0.6004118616144976, "grad_norm": 0.34956321120262146, "learning_rate": 1.5897402222199466e-05, "loss": 0.5112, "step": 21867 }, { "epoch": 0.600439319055464, "grad_norm": 0.3800528049468994, "learning_rate": 1.5897053422601385e-05, "loss": 0.5213, "step": 21868 }, { "epoch": 0.6004667764964305, "grad_norm": 0.35359591245651245, "learning_rate": 1.589670461200342e-05, "loss": 0.5282, "step": 21869 }, { "epoch": 0.600494233937397, "grad_norm": 0.36760810017585754, "learning_rate": 1.5896355790406216e-05, "loss": 0.4289, "step": 21870 }, { "epoch": 0.6005216913783635, "grad_norm": 0.4137773811817169, "learning_rate": 1.5896006957810428e-05, "loss": 0.5272, "step": 21871 }, { "epoch": 0.6005491488193301, "grad_norm": 0.3829239308834076, "learning_rate": 1.5895658114216707e-05, "loss": 0.5172, "step": 21872 }, { "epoch": 0.6005766062602965, "grad_norm": 0.42025095224380493, "learning_rate": 1.5895309259625696e-05, "loss": 0.5043, "step": 21873 }, { "epoch": 0.6006040637012631, "grad_norm": 0.39107775688171387, "learning_rate": 1.5894960394038057e-05, "loss": 0.5276, "step": 21874 }, { "epoch": 0.6006315211422295, "grad_norm": 0.419564813375473, "learning_rate": 1.5894611517454432e-05, "loss": 0.5057, "step": 21875 }, { "epoch": 0.6006589785831961, "grad_norm": 0.3757323920726776, "learning_rate": 1.589426262987548e-05, "loss": 0.5049, "step": 21876 }, { "epoch": 0.6006864360241625, "grad_norm": 0.4411619007587433, "learning_rate": 1.5893913731301842e-05, "loss": 0.6151, "step": 21877 }, { "epoch": 0.600713893465129, "grad_norm": 0.40758439898490906, "learning_rate": 1.5893564821734175e-05, "loss": 0.5321, "step": 21878 }, { "epoch": 0.6007413509060956, "grad_norm": 0.36475086212158203, "learning_rate": 1.5893215901173128e-05, "loss": 0.5462, "step": 21879 }, { "epoch": 0.600768808347062, "grad_norm": 0.3325038254261017, "learning_rate": 1.5892866969619355e-05, "loss": 0.4787, "step": 21880 }, { "epoch": 0.6007962657880286, "grad_norm": 0.35329484939575195, "learning_rate": 1.5892518027073504e-05, "loss": 0.5698, "step": 21881 }, { "epoch": 0.600823723228995, "grad_norm": 0.40276145935058594, "learning_rate": 1.5892169073536224e-05, "loss": 0.4872, "step": 21882 }, { "epoch": 0.6008511806699616, "grad_norm": 0.3787010908126831, "learning_rate": 1.589182010900817e-05, "loss": 0.5633, "step": 21883 }, { "epoch": 0.600878638110928, "grad_norm": 0.34360837936401367, "learning_rate": 1.589147113348999e-05, "loss": 0.5437, "step": 21884 }, { "epoch": 0.6009060955518946, "grad_norm": 0.3615754544734955, "learning_rate": 1.589112214698234e-05, "loss": 0.4644, "step": 21885 }, { "epoch": 0.6009335529928611, "grad_norm": 0.3500153124332428, "learning_rate": 1.5890773149485862e-05, "loss": 0.4627, "step": 21886 }, { "epoch": 0.6009610104338275, "grad_norm": 0.38393157720565796, "learning_rate": 1.5890424141001213e-05, "loss": 0.54, "step": 21887 }, { "epoch": 0.6009884678747941, "grad_norm": 0.4227464497089386, "learning_rate": 1.5890075121529042e-05, "loss": 0.4485, "step": 21888 }, { "epoch": 0.6010159253157605, "grad_norm": 0.3786725103855133, "learning_rate": 1.588972609107e-05, "loss": 0.4263, "step": 21889 }, { "epoch": 0.6010433827567271, "grad_norm": 0.3662354648113251, "learning_rate": 1.5889377049624744e-05, "loss": 0.5864, "step": 21890 }, { "epoch": 0.6010708401976935, "grad_norm": 0.3965330421924591, "learning_rate": 1.588902799719392e-05, "loss": 0.5326, "step": 21891 }, { "epoch": 0.6010982976386601, "grad_norm": 0.3496745824813843, "learning_rate": 1.5888678933778173e-05, "loss": 0.4418, "step": 21892 }, { "epoch": 0.6011257550796266, "grad_norm": 0.3649943768978119, "learning_rate": 1.5888329859378165e-05, "loss": 0.529, "step": 21893 }, { "epoch": 0.6011532125205931, "grad_norm": 0.4727184772491455, "learning_rate": 1.5887980773994543e-05, "loss": 0.5208, "step": 21894 }, { "epoch": 0.6011806699615596, "grad_norm": 0.4397592544555664, "learning_rate": 1.5887631677627955e-05, "loss": 0.615, "step": 21895 }, { "epoch": 0.601208127402526, "grad_norm": 0.42077216506004333, "learning_rate": 1.5887282570279054e-05, "loss": 0.4543, "step": 21896 }, { "epoch": 0.6012355848434926, "grad_norm": 0.38015732169151306, "learning_rate": 1.5886933451948495e-05, "loss": 0.5989, "step": 21897 }, { "epoch": 0.601263042284459, "grad_norm": 0.35448360443115234, "learning_rate": 1.5886584322636927e-05, "loss": 0.4769, "step": 21898 }, { "epoch": 0.6012904997254256, "grad_norm": 0.36791524291038513, "learning_rate": 1.5886235182344997e-05, "loss": 0.5012, "step": 21899 }, { "epoch": 0.6013179571663921, "grad_norm": 0.3673514425754547, "learning_rate": 1.5885886031073365e-05, "loss": 0.5225, "step": 21900 }, { "epoch": 0.6013454146073586, "grad_norm": 0.41706734895706177, "learning_rate": 1.588553686882267e-05, "loss": 0.5095, "step": 21901 }, { "epoch": 0.6013728720483251, "grad_norm": 0.3634053170681, "learning_rate": 1.5885187695593573e-05, "loss": 0.4696, "step": 21902 }, { "epoch": 0.6014003294892916, "grad_norm": 0.37967726588249207, "learning_rate": 1.5884838511386728e-05, "loss": 0.421, "step": 21903 }, { "epoch": 0.6014277869302581, "grad_norm": 0.3656013011932373, "learning_rate": 1.5884489316202773e-05, "loss": 0.4705, "step": 21904 }, { "epoch": 0.6014552443712246, "grad_norm": 0.3753873407840729, "learning_rate": 1.588414011004237e-05, "loss": 0.4496, "step": 21905 }, { "epoch": 0.6014827018121911, "grad_norm": 0.4276305139064789, "learning_rate": 1.588379089290617e-05, "loss": 0.5405, "step": 21906 }, { "epoch": 0.6015101592531577, "grad_norm": 0.3858581781387329, "learning_rate": 1.588344166479482e-05, "loss": 0.5091, "step": 21907 }, { "epoch": 0.6015376166941241, "grad_norm": 0.45314082503318787, "learning_rate": 1.5883092425708976e-05, "loss": 0.4715, "step": 21908 }, { "epoch": 0.6015650741350906, "grad_norm": 0.3316369950771332, "learning_rate": 1.5882743175649284e-05, "loss": 0.4375, "step": 21909 }, { "epoch": 0.6015925315760571, "grad_norm": 0.42109692096710205, "learning_rate": 1.5882393914616398e-05, "loss": 0.4829, "step": 21910 }, { "epoch": 0.6016199890170236, "grad_norm": 0.3924688994884491, "learning_rate": 1.588204464261097e-05, "loss": 0.5056, "step": 21911 }, { "epoch": 0.6016474464579901, "grad_norm": 0.3727668523788452, "learning_rate": 1.5881695359633652e-05, "loss": 0.4507, "step": 21912 }, { "epoch": 0.6016749038989566, "grad_norm": 0.3394390642642975, "learning_rate": 1.5881346065685097e-05, "loss": 0.4932, "step": 21913 }, { "epoch": 0.6017023613399232, "grad_norm": 0.3741621673107147, "learning_rate": 1.5880996760765953e-05, "loss": 0.517, "step": 21914 }, { "epoch": 0.6017298187808896, "grad_norm": 0.3582232594490051, "learning_rate": 1.5880647444876873e-05, "loss": 0.424, "step": 21915 }, { "epoch": 0.6017572762218562, "grad_norm": 0.3942861258983612, "learning_rate": 1.588029811801851e-05, "loss": 0.4861, "step": 21916 }, { "epoch": 0.6017847336628226, "grad_norm": 0.3412391245365143, "learning_rate": 1.587994878019151e-05, "loss": 0.4192, "step": 21917 }, { "epoch": 0.6018121911037891, "grad_norm": 0.391165554523468, "learning_rate": 1.5879599431396533e-05, "loss": 0.5178, "step": 21918 }, { "epoch": 0.6018396485447556, "grad_norm": 0.39322856068611145, "learning_rate": 1.5879250071634226e-05, "loss": 0.5361, "step": 21919 }, { "epoch": 0.6018671059857221, "grad_norm": 0.3669569790363312, "learning_rate": 1.587890070090524e-05, "loss": 0.4422, "step": 21920 }, { "epoch": 0.6018945634266887, "grad_norm": 0.46596336364746094, "learning_rate": 1.5878551319210228e-05, "loss": 0.4795, "step": 21921 }, { "epoch": 0.6019220208676551, "grad_norm": 0.3610822558403015, "learning_rate": 1.587820192654984e-05, "loss": 0.4204, "step": 21922 }, { "epoch": 0.6019494783086217, "grad_norm": 0.3694925904273987, "learning_rate": 1.5877852522924733e-05, "loss": 0.5033, "step": 21923 }, { "epoch": 0.6019769357495881, "grad_norm": 0.3494751751422882, "learning_rate": 1.587750310833555e-05, "loss": 0.3847, "step": 21924 }, { "epoch": 0.6020043931905547, "grad_norm": 0.40547749400138855, "learning_rate": 1.5877153682782955e-05, "loss": 0.4165, "step": 21925 }, { "epoch": 0.6020318506315211, "grad_norm": 0.409934937953949, "learning_rate": 1.587680424626759e-05, "loss": 0.4988, "step": 21926 }, { "epoch": 0.6020593080724876, "grad_norm": 0.3213319182395935, "learning_rate": 1.5876454798790105e-05, "loss": 0.4126, "step": 21927 }, { "epoch": 0.6020867655134542, "grad_norm": 0.38318076729774475, "learning_rate": 1.587610534035116e-05, "loss": 0.4851, "step": 21928 }, { "epoch": 0.6021142229544206, "grad_norm": 0.4118233621120453, "learning_rate": 1.5875755870951404e-05, "loss": 0.4717, "step": 21929 }, { "epoch": 0.6021416803953872, "grad_norm": 0.5177521705627441, "learning_rate": 1.5875406390591487e-05, "loss": 0.4646, "step": 21930 }, { "epoch": 0.6021691378363536, "grad_norm": 0.4124404788017273, "learning_rate": 1.587505689927206e-05, "loss": 0.4728, "step": 21931 }, { "epoch": 0.6021965952773202, "grad_norm": 0.5261924266815186, "learning_rate": 1.5874707396993776e-05, "loss": 0.4873, "step": 21932 }, { "epoch": 0.6022240527182866, "grad_norm": 0.379585862159729, "learning_rate": 1.587435788375729e-05, "loss": 0.5316, "step": 21933 }, { "epoch": 0.6022515101592532, "grad_norm": 0.3852885365486145, "learning_rate": 1.5874008359563253e-05, "loss": 0.5674, "step": 21934 }, { "epoch": 0.6022789676002197, "grad_norm": 0.3539833426475525, "learning_rate": 1.5873658824412314e-05, "loss": 0.4717, "step": 21935 }, { "epoch": 0.6023064250411861, "grad_norm": 0.3832300901412964, "learning_rate": 1.5873309278305127e-05, "loss": 0.5301, "step": 21936 }, { "epoch": 0.6023338824821527, "grad_norm": 0.39762234687805176, "learning_rate": 1.5872959721242344e-05, "loss": 0.4672, "step": 21937 }, { "epoch": 0.6023613399231191, "grad_norm": 0.37968435883522034, "learning_rate": 1.5872610153224613e-05, "loss": 0.4985, "step": 21938 }, { "epoch": 0.6023887973640857, "grad_norm": 0.36594775319099426, "learning_rate": 1.5872260574252595e-05, "loss": 0.4439, "step": 21939 }, { "epoch": 0.6024162548050521, "grad_norm": 0.4393734931945801, "learning_rate": 1.5871910984326936e-05, "loss": 0.4482, "step": 21940 }, { "epoch": 0.6024437122460187, "grad_norm": 0.3637950122356415, "learning_rate": 1.5871561383448287e-05, "loss": 0.489, "step": 21941 }, { "epoch": 0.6024711696869852, "grad_norm": 0.42783698439598083, "learning_rate": 1.58712117716173e-05, "loss": 0.5169, "step": 21942 }, { "epoch": 0.6024986271279517, "grad_norm": 0.3258659839630127, "learning_rate": 1.5870862148834633e-05, "loss": 0.3533, "step": 21943 }, { "epoch": 0.6025260845689182, "grad_norm": 0.388263076543808, "learning_rate": 1.5870512515100935e-05, "loss": 0.5363, "step": 21944 }, { "epoch": 0.6025535420098846, "grad_norm": 0.33460450172424316, "learning_rate": 1.5870162870416856e-05, "loss": 0.4975, "step": 21945 }, { "epoch": 0.6025809994508512, "grad_norm": 0.38805922865867615, "learning_rate": 1.5869813214783046e-05, "loss": 0.5059, "step": 21946 }, { "epoch": 0.6026084568918176, "grad_norm": 0.44581669569015503, "learning_rate": 1.5869463548200167e-05, "loss": 0.3835, "step": 21947 }, { "epoch": 0.6026359143327842, "grad_norm": 0.3905944228172302, "learning_rate": 1.586911387066886e-05, "loss": 0.5329, "step": 21948 }, { "epoch": 0.6026633717737507, "grad_norm": 0.3579159080982208, "learning_rate": 1.5868764182189785e-05, "loss": 0.5179, "step": 21949 }, { "epoch": 0.6026908292147172, "grad_norm": 0.37748244404792786, "learning_rate": 1.5868414482763593e-05, "loss": 0.4779, "step": 21950 }, { "epoch": 0.6027182866556837, "grad_norm": 0.3483544886112213, "learning_rate": 1.5868064772390933e-05, "loss": 0.5824, "step": 21951 }, { "epoch": 0.6027457440966502, "grad_norm": 0.5912672877311707, "learning_rate": 1.586771505107246e-05, "loss": 0.6305, "step": 21952 }, { "epoch": 0.6027732015376167, "grad_norm": 0.37713518738746643, "learning_rate": 1.5867365318808826e-05, "loss": 0.5985, "step": 21953 }, { "epoch": 0.6028006589785831, "grad_norm": 0.3961130380630493, "learning_rate": 1.586701557560068e-05, "loss": 0.5475, "step": 21954 }, { "epoch": 0.6028281164195497, "grad_norm": 0.3894003629684448, "learning_rate": 1.5866665821448682e-05, "loss": 0.5181, "step": 21955 }, { "epoch": 0.6028555738605162, "grad_norm": 0.3629873991012573, "learning_rate": 1.586631605635348e-05, "loss": 0.5163, "step": 21956 }, { "epoch": 0.6028830313014827, "grad_norm": 0.3464488983154297, "learning_rate": 1.5865966280315722e-05, "loss": 0.4484, "step": 21957 }, { "epoch": 0.6029104887424492, "grad_norm": 0.3926655352115631, "learning_rate": 1.5865616493336068e-05, "loss": 0.4958, "step": 21958 }, { "epoch": 0.6029379461834157, "grad_norm": 0.3730889856815338, "learning_rate": 1.5865266695415167e-05, "loss": 0.4261, "step": 21959 }, { "epoch": 0.6029654036243822, "grad_norm": 0.4760822355747223, "learning_rate": 1.5864916886553666e-05, "loss": 0.5575, "step": 21960 }, { "epoch": 0.6029928610653487, "grad_norm": 0.40154916048049927, "learning_rate": 1.586456706675223e-05, "loss": 0.514, "step": 21961 }, { "epoch": 0.6030203185063152, "grad_norm": 0.39313316345214844, "learning_rate": 1.5864217236011503e-05, "loss": 0.5347, "step": 21962 }, { "epoch": 0.6030477759472818, "grad_norm": 0.33279064297676086, "learning_rate": 1.586386739433214e-05, "loss": 0.4536, "step": 21963 }, { "epoch": 0.6030752333882482, "grad_norm": 0.4006602168083191, "learning_rate": 1.586351754171479e-05, "loss": 0.533, "step": 21964 }, { "epoch": 0.6031026908292147, "grad_norm": 0.38476303219795227, "learning_rate": 1.586316767816011e-05, "loss": 0.4814, "step": 21965 }, { "epoch": 0.6031301482701812, "grad_norm": 0.5103604793548584, "learning_rate": 1.586281780366875e-05, "loss": 0.6082, "step": 21966 }, { "epoch": 0.6031576057111477, "grad_norm": 0.37775400280952454, "learning_rate": 1.5862467918241366e-05, "loss": 0.594, "step": 21967 }, { "epoch": 0.6031850631521142, "grad_norm": 0.4842468500137329, "learning_rate": 1.5862118021878605e-05, "loss": 0.5558, "step": 21968 }, { "epoch": 0.6032125205930807, "grad_norm": 0.3938913643360138, "learning_rate": 1.5861768114581125e-05, "loss": 0.5114, "step": 21969 }, { "epoch": 0.6032399780340473, "grad_norm": 0.39458370208740234, "learning_rate": 1.586141819634958e-05, "loss": 0.4749, "step": 21970 }, { "epoch": 0.6032674354750137, "grad_norm": 0.37340155243873596, "learning_rate": 1.5861068267184612e-05, "loss": 0.4646, "step": 21971 }, { "epoch": 0.6032948929159803, "grad_norm": 0.30501580238342285, "learning_rate": 1.5860718327086883e-05, "loss": 0.3994, "step": 21972 }, { "epoch": 0.6033223503569467, "grad_norm": 0.3760983943939209, "learning_rate": 1.5860368376057045e-05, "loss": 0.5045, "step": 21973 }, { "epoch": 0.6033498077979133, "grad_norm": 0.3696975111961365, "learning_rate": 1.586001841409575e-05, "loss": 0.4375, "step": 21974 }, { "epoch": 0.6033772652388797, "grad_norm": 0.35255134105682373, "learning_rate": 1.5859668441203652e-05, "loss": 0.567, "step": 21975 }, { "epoch": 0.6034047226798462, "grad_norm": 0.3789617419242859, "learning_rate": 1.58593184573814e-05, "loss": 0.5169, "step": 21976 }, { "epoch": 0.6034321801208128, "grad_norm": 0.36332494020462036, "learning_rate": 1.585896846262965e-05, "loss": 0.5084, "step": 21977 }, { "epoch": 0.6034596375617792, "grad_norm": 0.41622650623321533, "learning_rate": 1.5858618456949054e-05, "loss": 0.4811, "step": 21978 }, { "epoch": 0.6034870950027458, "grad_norm": 0.39039328694343567, "learning_rate": 1.5858268440340262e-05, "loss": 0.4848, "step": 21979 }, { "epoch": 0.6035145524437122, "grad_norm": 0.38691970705986023, "learning_rate": 1.585791841280393e-05, "loss": 0.5486, "step": 21980 }, { "epoch": 0.6035420098846788, "grad_norm": 0.42754843831062317, "learning_rate": 1.5857568374340713e-05, "loss": 0.4894, "step": 21981 }, { "epoch": 0.6035694673256452, "grad_norm": 0.3838261067867279, "learning_rate": 1.585721832495126e-05, "loss": 0.6242, "step": 21982 }, { "epoch": 0.6035969247666118, "grad_norm": 0.37268924713134766, "learning_rate": 1.585686826463623e-05, "loss": 0.4947, "step": 21983 }, { "epoch": 0.6036243822075783, "grad_norm": 0.4403771162033081, "learning_rate": 1.5856518193396266e-05, "loss": 0.6128, "step": 21984 }, { "epoch": 0.6036518396485447, "grad_norm": 0.36013492941856384, "learning_rate": 1.5856168111232026e-05, "loss": 0.5198, "step": 21985 }, { "epoch": 0.6036792970895113, "grad_norm": 0.4674946963787079, "learning_rate": 1.5855818018144165e-05, "loss": 0.5743, "step": 21986 }, { "epoch": 0.6037067545304777, "grad_norm": 0.4737405478954315, "learning_rate": 1.5855467914133336e-05, "loss": 0.4853, "step": 21987 }, { "epoch": 0.6037342119714443, "grad_norm": 0.368693083524704, "learning_rate": 1.5855117799200187e-05, "loss": 0.5771, "step": 21988 }, { "epoch": 0.6037616694124107, "grad_norm": 0.3832642138004303, "learning_rate": 1.5854767673345378e-05, "loss": 0.4598, "step": 21989 }, { "epoch": 0.6037891268533773, "grad_norm": 0.4485381841659546, "learning_rate": 1.5854417536569556e-05, "loss": 0.5032, "step": 21990 }, { "epoch": 0.6038165842943438, "grad_norm": 0.44682446122169495, "learning_rate": 1.585406738887338e-05, "loss": 0.5534, "step": 21991 }, { "epoch": 0.6038440417353103, "grad_norm": 0.40228596329689026, "learning_rate": 1.5853717230257498e-05, "loss": 0.5806, "step": 21992 }, { "epoch": 0.6038714991762768, "grad_norm": 0.3701048791408539, "learning_rate": 1.5853367060722565e-05, "loss": 0.5917, "step": 21993 }, { "epoch": 0.6038989566172432, "grad_norm": 0.3823002576828003, "learning_rate": 1.5853016880269235e-05, "loss": 0.5331, "step": 21994 }, { "epoch": 0.6039264140582098, "grad_norm": 0.41685935854911804, "learning_rate": 1.585266668889816e-05, "loss": 0.4843, "step": 21995 }, { "epoch": 0.6039538714991762, "grad_norm": 0.38519036769866943, "learning_rate": 1.585231648660999e-05, "loss": 0.5083, "step": 21996 }, { "epoch": 0.6039813289401428, "grad_norm": 0.39891815185546875, "learning_rate": 1.5851966273405387e-05, "loss": 0.4825, "step": 21997 }, { "epoch": 0.6040087863811093, "grad_norm": 0.3900177776813507, "learning_rate": 1.5851616049284998e-05, "loss": 0.4841, "step": 21998 }, { "epoch": 0.6040362438220758, "grad_norm": 0.38722583651542664, "learning_rate": 1.585126581424948e-05, "loss": 0.4266, "step": 21999 }, { "epoch": 0.6040637012630423, "grad_norm": 0.43576669692993164, "learning_rate": 1.585091556829948e-05, "loss": 0.5808, "step": 22000 }, { "epoch": 0.6040911587040088, "grad_norm": 0.4100823700428009, "learning_rate": 1.5850565311435652e-05, "loss": 0.4534, "step": 22001 }, { "epoch": 0.6041186161449753, "grad_norm": 0.6824311017990112, "learning_rate": 1.5850215043658657e-05, "loss": 0.5233, "step": 22002 }, { "epoch": 0.6041460735859417, "grad_norm": 0.39708805084228516, "learning_rate": 1.5849864764969143e-05, "loss": 0.5111, "step": 22003 }, { "epoch": 0.6041735310269083, "grad_norm": 0.3641435503959656, "learning_rate": 1.5849514475367764e-05, "loss": 0.5045, "step": 22004 }, { "epoch": 0.6042009884678748, "grad_norm": 0.34734851121902466, "learning_rate": 1.5849164174855175e-05, "loss": 0.5308, "step": 22005 }, { "epoch": 0.6042284459088413, "grad_norm": 0.3554893732070923, "learning_rate": 1.5848813863432026e-05, "loss": 0.4376, "step": 22006 }, { "epoch": 0.6042559033498078, "grad_norm": 0.39100903272628784, "learning_rate": 1.5848463541098973e-05, "loss": 0.5231, "step": 22007 }, { "epoch": 0.6042833607907743, "grad_norm": 0.6587493419647217, "learning_rate": 1.5848113207856668e-05, "loss": 0.5219, "step": 22008 }, { "epoch": 0.6043108182317408, "grad_norm": 0.3491673767566681, "learning_rate": 1.5847762863705768e-05, "loss": 0.5176, "step": 22009 }, { "epoch": 0.6043382756727073, "grad_norm": 0.4434831142425537, "learning_rate": 1.584741250864692e-05, "loss": 0.5389, "step": 22010 }, { "epoch": 0.6043657331136738, "grad_norm": 0.3851216733455658, "learning_rate": 1.5847062142680784e-05, "loss": 0.5481, "step": 22011 }, { "epoch": 0.6043931905546404, "grad_norm": 0.5208058953285217, "learning_rate": 1.584671176580801e-05, "loss": 0.527, "step": 22012 }, { "epoch": 0.6044206479956068, "grad_norm": 0.4112820625305176, "learning_rate": 1.5846361378029254e-05, "loss": 0.525, "step": 22013 }, { "epoch": 0.6044481054365733, "grad_norm": 0.5114617347717285, "learning_rate": 1.5846010979345166e-05, "loss": 0.4873, "step": 22014 }, { "epoch": 0.6044755628775398, "grad_norm": 0.4386422634124756, "learning_rate": 1.5845660569756404e-05, "loss": 0.4898, "step": 22015 }, { "epoch": 0.6045030203185063, "grad_norm": 0.46579617261886597, "learning_rate": 1.5845310149263617e-05, "loss": 0.5227, "step": 22016 }, { "epoch": 0.6045304777594728, "grad_norm": 0.4261919856071472, "learning_rate": 1.5844959717867463e-05, "loss": 0.5586, "step": 22017 }, { "epoch": 0.6045579352004393, "grad_norm": 0.36521458625793457, "learning_rate": 1.5844609275568592e-05, "loss": 0.5602, "step": 22018 }, { "epoch": 0.6045853926414059, "grad_norm": 0.40752655267715454, "learning_rate": 1.5844258822367658e-05, "loss": 0.4777, "step": 22019 }, { "epoch": 0.6046128500823723, "grad_norm": 0.38092315196990967, "learning_rate": 1.5843908358265322e-05, "loss": 0.5912, "step": 22020 }, { "epoch": 0.6046403075233389, "grad_norm": 0.350268691778183, "learning_rate": 1.5843557883262224e-05, "loss": 0.4893, "step": 22021 }, { "epoch": 0.6046677649643053, "grad_norm": 0.42278265953063965, "learning_rate": 1.5843207397359033e-05, "loss": 0.5272, "step": 22022 }, { "epoch": 0.6046952224052718, "grad_norm": 0.4499445855617523, "learning_rate": 1.584285690055639e-05, "loss": 0.4741, "step": 22023 }, { "epoch": 0.6047226798462383, "grad_norm": 0.382244735956192, "learning_rate": 1.5842506392854958e-05, "loss": 0.5081, "step": 22024 }, { "epoch": 0.6047501372872048, "grad_norm": 0.4162083566188812, "learning_rate": 1.584215587425538e-05, "loss": 0.4692, "step": 22025 }, { "epoch": 0.6047775947281714, "grad_norm": 0.3337762653827667, "learning_rate": 1.5841805344758325e-05, "loss": 0.4367, "step": 22026 }, { "epoch": 0.6048050521691378, "grad_norm": 0.39718976616859436, "learning_rate": 1.5841454804364437e-05, "loss": 0.558, "step": 22027 }, { "epoch": 0.6048325096101044, "grad_norm": 0.38476407527923584, "learning_rate": 1.5841104253074363e-05, "loss": 0.479, "step": 22028 }, { "epoch": 0.6048599670510708, "grad_norm": 0.4053555428981781, "learning_rate": 1.5840753690888773e-05, "loss": 0.5724, "step": 22029 }, { "epoch": 0.6048874244920374, "grad_norm": 0.38143840432167053, "learning_rate": 1.584040311780831e-05, "loss": 0.502, "step": 22030 }, { "epoch": 0.6049148819330038, "grad_norm": 0.44752082228660583, "learning_rate": 1.5840052533833638e-05, "loss": 0.6008, "step": 22031 }, { "epoch": 0.6049423393739704, "grad_norm": 0.3775634467601776, "learning_rate": 1.5839701938965397e-05, "loss": 0.5143, "step": 22032 }, { "epoch": 0.6049697968149369, "grad_norm": 0.34329158067703247, "learning_rate": 1.583935133320425e-05, "loss": 0.4434, "step": 22033 }, { "epoch": 0.6049972542559033, "grad_norm": 0.38565322756767273, "learning_rate": 1.583900071655085e-05, "loss": 0.5364, "step": 22034 }, { "epoch": 0.6050247116968699, "grad_norm": 0.385924369096756, "learning_rate": 1.5838650089005847e-05, "loss": 0.4859, "step": 22035 }, { "epoch": 0.6050521691378363, "grad_norm": 0.465825617313385, "learning_rate": 1.58382994505699e-05, "loss": 0.5728, "step": 22036 }, { "epoch": 0.6050796265788029, "grad_norm": 0.39024296402931213, "learning_rate": 1.583794880124366e-05, "loss": 0.5511, "step": 22037 }, { "epoch": 0.6051070840197693, "grad_norm": 0.38440871238708496, "learning_rate": 1.5837598141027784e-05, "loss": 0.4558, "step": 22038 }, { "epoch": 0.6051345414607359, "grad_norm": 0.386993944644928, "learning_rate": 1.583724746992293e-05, "loss": 0.5569, "step": 22039 }, { "epoch": 0.6051619989017024, "grad_norm": 0.35573261976242065, "learning_rate": 1.5836896787929736e-05, "loss": 0.5213, "step": 22040 }, { "epoch": 0.6051894563426689, "grad_norm": 0.3912200629711151, "learning_rate": 1.5836546095048873e-05, "loss": 0.5338, "step": 22041 }, { "epoch": 0.6052169137836354, "grad_norm": 0.38286322355270386, "learning_rate": 1.5836195391280984e-05, "loss": 0.5278, "step": 22042 }, { "epoch": 0.6052443712246018, "grad_norm": 0.3643689453601837, "learning_rate": 1.5835844676626732e-05, "loss": 0.509, "step": 22043 }, { "epoch": 0.6052718286655684, "grad_norm": 0.39928707480430603, "learning_rate": 1.5835493951086766e-05, "loss": 0.461, "step": 22044 }, { "epoch": 0.6052992861065348, "grad_norm": 0.34975969791412354, "learning_rate": 1.583514321466174e-05, "loss": 0.459, "step": 22045 }, { "epoch": 0.6053267435475014, "grad_norm": 0.3743583559989929, "learning_rate": 1.583479246735231e-05, "loss": 0.4258, "step": 22046 }, { "epoch": 0.6053542009884679, "grad_norm": 0.36621275544166565, "learning_rate": 1.5834441709159132e-05, "loss": 0.5417, "step": 22047 }, { "epoch": 0.6053816584294344, "grad_norm": 0.39711233973503113, "learning_rate": 1.5834090940082855e-05, "loss": 0.5181, "step": 22048 }, { "epoch": 0.6054091158704009, "grad_norm": 0.3767964243888855, "learning_rate": 1.5833740160124138e-05, "loss": 0.5255, "step": 22049 }, { "epoch": 0.6054365733113674, "grad_norm": 0.38204848766326904, "learning_rate": 1.5833389369283634e-05, "loss": 0.4886, "step": 22050 }, { "epoch": 0.6054640307523339, "grad_norm": 0.7063992619514465, "learning_rate": 1.5833038567561995e-05, "loss": 0.5643, "step": 22051 }, { "epoch": 0.6054914881933003, "grad_norm": 0.3610214591026306, "learning_rate": 1.583268775495988e-05, "loss": 0.4851, "step": 22052 }, { "epoch": 0.6055189456342669, "grad_norm": 0.38465631008148193, "learning_rate": 1.5832336931477937e-05, "loss": 0.4708, "step": 22053 }, { "epoch": 0.6055464030752334, "grad_norm": 0.4002041518688202, "learning_rate": 1.583198609711683e-05, "loss": 0.4663, "step": 22054 }, { "epoch": 0.6055738605161999, "grad_norm": 0.36461061239242554, "learning_rate": 1.5831635251877205e-05, "loss": 0.5334, "step": 22055 }, { "epoch": 0.6056013179571664, "grad_norm": 0.3981207311153412, "learning_rate": 1.5831284395759717e-05, "loss": 0.4591, "step": 22056 }, { "epoch": 0.6056287753981329, "grad_norm": 0.412582129240036, "learning_rate": 1.583093352876502e-05, "loss": 0.5127, "step": 22057 }, { "epoch": 0.6056562328390994, "grad_norm": 0.38834890723228455, "learning_rate": 1.583058265089378e-05, "loss": 0.5061, "step": 22058 }, { "epoch": 0.6056836902800659, "grad_norm": 0.45924457907676697, "learning_rate": 1.5830231762146634e-05, "loss": 0.5281, "step": 22059 }, { "epoch": 0.6057111477210324, "grad_norm": 0.37035611271858215, "learning_rate": 1.5829880862524252e-05, "loss": 0.5188, "step": 22060 }, { "epoch": 0.605738605161999, "grad_norm": 0.42507433891296387, "learning_rate": 1.5829529952027276e-05, "loss": 0.514, "step": 22061 }, { "epoch": 0.6057660626029654, "grad_norm": 0.3672136962413788, "learning_rate": 1.582917903065637e-05, "loss": 0.5013, "step": 22062 }, { "epoch": 0.6057935200439319, "grad_norm": 0.41776242852211, "learning_rate": 1.582882809841218e-05, "loss": 0.5143, "step": 22063 }, { "epoch": 0.6058209774848984, "grad_norm": 0.36281895637512207, "learning_rate": 1.5828477155295366e-05, "loss": 0.5113, "step": 22064 }, { "epoch": 0.6058484349258649, "grad_norm": 0.4372508227825165, "learning_rate": 1.5828126201306585e-05, "loss": 0.544, "step": 22065 }, { "epoch": 0.6058758923668314, "grad_norm": 0.3605383038520813, "learning_rate": 1.582777523644649e-05, "loss": 0.4445, "step": 22066 }, { "epoch": 0.6059033498077979, "grad_norm": 0.33509519696235657, "learning_rate": 1.582742426071573e-05, "loss": 0.5025, "step": 22067 }, { "epoch": 0.6059308072487645, "grad_norm": 0.42036348581314087, "learning_rate": 1.5827073274114965e-05, "loss": 0.5581, "step": 22068 }, { "epoch": 0.6059582646897309, "grad_norm": 0.4040825366973877, "learning_rate": 1.582672227664485e-05, "loss": 0.6065, "step": 22069 }, { "epoch": 0.6059857221306975, "grad_norm": 0.48206230998039246, "learning_rate": 1.5826371268306037e-05, "loss": 0.5691, "step": 22070 }, { "epoch": 0.6060131795716639, "grad_norm": 0.4056321680545807, "learning_rate": 1.5826020249099178e-05, "loss": 0.5332, "step": 22071 }, { "epoch": 0.6060406370126304, "grad_norm": 0.3296893835067749, "learning_rate": 1.5825669219024934e-05, "loss": 0.4048, "step": 22072 }, { "epoch": 0.6060680944535969, "grad_norm": 0.35816094279289246, "learning_rate": 1.5825318178083963e-05, "loss": 0.5036, "step": 22073 }, { "epoch": 0.6060955518945634, "grad_norm": 0.3891911208629608, "learning_rate": 1.582496712627691e-05, "loss": 0.506, "step": 22074 }, { "epoch": 0.60612300933553, "grad_norm": 0.4552501440048218, "learning_rate": 1.5824616063604434e-05, "loss": 0.5564, "step": 22075 }, { "epoch": 0.6061504667764964, "grad_norm": 0.330242395401001, "learning_rate": 1.582426499006719e-05, "loss": 0.4454, "step": 22076 }, { "epoch": 0.606177924217463, "grad_norm": 0.4871516525745392, "learning_rate": 1.5823913905665837e-05, "loss": 0.5412, "step": 22077 }, { "epoch": 0.6062053816584294, "grad_norm": 0.5009018778800964, "learning_rate": 1.582356281040102e-05, "loss": 0.4972, "step": 22078 }, { "epoch": 0.606232839099396, "grad_norm": 0.34702619910240173, "learning_rate": 1.5823211704273402e-05, "loss": 0.5153, "step": 22079 }, { "epoch": 0.6062602965403624, "grad_norm": 0.3811483085155487, "learning_rate": 1.5822860587283637e-05, "loss": 0.4957, "step": 22080 }, { "epoch": 0.606287753981329, "grad_norm": 0.4125136137008667, "learning_rate": 1.582250945943238e-05, "loss": 0.5517, "step": 22081 }, { "epoch": 0.6063152114222955, "grad_norm": 0.3311814069747925, "learning_rate": 1.582215832072028e-05, "loss": 0.473, "step": 22082 }, { "epoch": 0.6063426688632619, "grad_norm": 0.3878011405467987, "learning_rate": 1.5821807171148e-05, "loss": 0.4384, "step": 22083 }, { "epoch": 0.6063701263042285, "grad_norm": 0.38572394847869873, "learning_rate": 1.5821456010716187e-05, "loss": 0.5004, "step": 22084 }, { "epoch": 0.6063975837451949, "grad_norm": 0.3880552053451538, "learning_rate": 1.5821104839425505e-05, "loss": 0.5454, "step": 22085 }, { "epoch": 0.6064250411861615, "grad_norm": 0.4241284430027008, "learning_rate": 1.5820753657276606e-05, "loss": 0.4663, "step": 22086 }, { "epoch": 0.6064524986271279, "grad_norm": 0.37312987446784973, "learning_rate": 1.5820402464270136e-05, "loss": 0.6423, "step": 22087 }, { "epoch": 0.6064799560680945, "grad_norm": 0.3681066036224365, "learning_rate": 1.5820051260406765e-05, "loss": 0.5087, "step": 22088 }, { "epoch": 0.606507413509061, "grad_norm": 0.3662545084953308, "learning_rate": 1.5819700045687135e-05, "loss": 0.5006, "step": 22089 }, { "epoch": 0.6065348709500274, "grad_norm": 0.4031451940536499, "learning_rate": 1.5819348820111913e-05, "loss": 0.5658, "step": 22090 }, { "epoch": 0.606562328390994, "grad_norm": 0.34718087315559387, "learning_rate": 1.5818997583681745e-05, "loss": 0.4529, "step": 22091 }, { "epoch": 0.6065897858319604, "grad_norm": 0.35310637950897217, "learning_rate": 1.581864633639729e-05, "loss": 0.528, "step": 22092 }, { "epoch": 0.606617243272927, "grad_norm": 0.43954989314079285, "learning_rate": 1.5818295078259204e-05, "loss": 0.5546, "step": 22093 }, { "epoch": 0.6066447007138934, "grad_norm": 0.514403760433197, "learning_rate": 1.5817943809268137e-05, "loss": 0.4346, "step": 22094 }, { "epoch": 0.60667215815486, "grad_norm": 0.39188629388809204, "learning_rate": 1.581759252942475e-05, "loss": 0.5332, "step": 22095 }, { "epoch": 0.6066996155958264, "grad_norm": 0.40558576583862305, "learning_rate": 1.5817241238729698e-05, "loss": 0.5214, "step": 22096 }, { "epoch": 0.606727073036793, "grad_norm": 0.3398810625076294, "learning_rate": 1.581688993718363e-05, "loss": 0.4107, "step": 22097 }, { "epoch": 0.6067545304777595, "grad_norm": 0.4979395270347595, "learning_rate": 1.581653862478721e-05, "loss": 0.5787, "step": 22098 }, { "epoch": 0.606781987918726, "grad_norm": 0.3995693325996399, "learning_rate": 1.5816187301541087e-05, "loss": 0.4773, "step": 22099 }, { "epoch": 0.6068094453596925, "grad_norm": 0.36908799409866333, "learning_rate": 1.581583596744592e-05, "loss": 0.5241, "step": 22100 }, { "epoch": 0.6068369028006589, "grad_norm": 0.3589445650577545, "learning_rate": 1.581548462250236e-05, "loss": 0.4986, "step": 22101 }, { "epoch": 0.6068643602416255, "grad_norm": 0.3230968415737152, "learning_rate": 1.5815133266711065e-05, "loss": 0.4185, "step": 22102 }, { "epoch": 0.6068918176825919, "grad_norm": 0.41053497791290283, "learning_rate": 1.5814781900072695e-05, "loss": 0.5187, "step": 22103 }, { "epoch": 0.6069192751235585, "grad_norm": 0.39029964804649353, "learning_rate": 1.5814430522587894e-05, "loss": 0.4453, "step": 22104 }, { "epoch": 0.606946732564525, "grad_norm": 0.35968488454818726, "learning_rate": 1.581407913425733e-05, "loss": 0.5161, "step": 22105 }, { "epoch": 0.6069741900054915, "grad_norm": 0.7116995453834534, "learning_rate": 1.581372773508165e-05, "loss": 0.5388, "step": 22106 }, { "epoch": 0.607001647446458, "grad_norm": 0.39681312441825867, "learning_rate": 1.5813376325061515e-05, "loss": 0.5519, "step": 22107 }, { "epoch": 0.6070291048874245, "grad_norm": 0.3241901099681854, "learning_rate": 1.5813024904197573e-05, "loss": 0.4224, "step": 22108 }, { "epoch": 0.607056562328391, "grad_norm": 0.4172731339931488, "learning_rate": 1.581267347249049e-05, "loss": 0.5128, "step": 22109 }, { "epoch": 0.6070840197693574, "grad_norm": 0.38281717896461487, "learning_rate": 1.5812322029940915e-05, "loss": 0.4974, "step": 22110 }, { "epoch": 0.607111477210324, "grad_norm": 0.38229283690452576, "learning_rate": 1.58119705765495e-05, "loss": 0.4022, "step": 22111 }, { "epoch": 0.6071389346512905, "grad_norm": 0.37011414766311646, "learning_rate": 1.581161911231691e-05, "loss": 0.561, "step": 22112 }, { "epoch": 0.607166392092257, "grad_norm": 0.4037761092185974, "learning_rate": 1.581126763724379e-05, "loss": 0.4374, "step": 22113 }, { "epoch": 0.6071938495332235, "grad_norm": 0.35760006308555603, "learning_rate": 1.5810916151330803e-05, "loss": 0.5416, "step": 22114 }, { "epoch": 0.60722130697419, "grad_norm": 0.3770168721675873, "learning_rate": 1.5810564654578607e-05, "loss": 0.55, "step": 22115 }, { "epoch": 0.6072487644151565, "grad_norm": 0.38601550459861755, "learning_rate": 1.581021314698785e-05, "loss": 0.488, "step": 22116 }, { "epoch": 0.607276221856123, "grad_norm": 0.37698793411254883, "learning_rate": 1.580986162855919e-05, "loss": 0.5317, "step": 22117 }, { "epoch": 0.6073036792970895, "grad_norm": 0.39780402183532715, "learning_rate": 1.5809510099293284e-05, "loss": 0.5391, "step": 22118 }, { "epoch": 0.607331136738056, "grad_norm": 0.3784915506839752, "learning_rate": 1.580915855919079e-05, "loss": 0.463, "step": 22119 }, { "epoch": 0.6073585941790225, "grad_norm": 0.3905419707298279, "learning_rate": 1.5808807008252357e-05, "loss": 0.3821, "step": 22120 }, { "epoch": 0.607386051619989, "grad_norm": 0.3899904191493988, "learning_rate": 1.580845544647865e-05, "loss": 0.4019, "step": 22121 }, { "epoch": 0.6074135090609555, "grad_norm": 0.4141240119934082, "learning_rate": 1.5808103873870316e-05, "loss": 0.4964, "step": 22122 }, { "epoch": 0.607440966501922, "grad_norm": 0.3833721876144409, "learning_rate": 1.5807752290428014e-05, "loss": 0.491, "step": 22123 }, { "epoch": 0.6074684239428885, "grad_norm": 0.42812344431877136, "learning_rate": 1.5807400696152405e-05, "loss": 0.5241, "step": 22124 }, { "epoch": 0.607495881383855, "grad_norm": 0.3568246066570282, "learning_rate": 1.5807049091044136e-05, "loss": 0.5473, "step": 22125 }, { "epoch": 0.6075233388248216, "grad_norm": 0.36504271626472473, "learning_rate": 1.580669747510387e-05, "loss": 0.4721, "step": 22126 }, { "epoch": 0.607550796265788, "grad_norm": 0.689912736415863, "learning_rate": 1.580634584833226e-05, "loss": 0.6213, "step": 22127 }, { "epoch": 0.6075782537067546, "grad_norm": 0.42413294315338135, "learning_rate": 1.5805994210729957e-05, "loss": 0.4986, "step": 22128 }, { "epoch": 0.607605711147721, "grad_norm": 0.38781794905662537, "learning_rate": 1.5805642562297626e-05, "loss": 0.488, "step": 22129 }, { "epoch": 0.6076331685886875, "grad_norm": 0.38299888372421265, "learning_rate": 1.5805290903035918e-05, "loss": 0.5014, "step": 22130 }, { "epoch": 0.607660626029654, "grad_norm": 0.41099846363067627, "learning_rate": 1.5804939232945487e-05, "loss": 0.5192, "step": 22131 }, { "epoch": 0.6076880834706205, "grad_norm": 0.3956263065338135, "learning_rate": 1.5804587552026997e-05, "loss": 0.5487, "step": 22132 }, { "epoch": 0.6077155409115871, "grad_norm": 0.3573462665081024, "learning_rate": 1.580423586028109e-05, "loss": 0.4534, "step": 22133 }, { "epoch": 0.6077429983525535, "grad_norm": 0.3751536011695862, "learning_rate": 1.5803884157708437e-05, "loss": 0.5375, "step": 22134 }, { "epoch": 0.6077704557935201, "grad_norm": 0.35012564063072205, "learning_rate": 1.5803532444309692e-05, "loss": 0.4569, "step": 22135 }, { "epoch": 0.6077979132344865, "grad_norm": 0.39495429396629333, "learning_rate": 1.5803180720085497e-05, "loss": 0.4655, "step": 22136 }, { "epoch": 0.6078253706754531, "grad_norm": 0.4516630470752716, "learning_rate": 1.5802828985036526e-05, "loss": 0.4964, "step": 22137 }, { "epoch": 0.6078528281164195, "grad_norm": 0.4064875543117523, "learning_rate": 1.5802477239163422e-05, "loss": 0.4325, "step": 22138 }, { "epoch": 0.607880285557386, "grad_norm": 0.43938004970550537, "learning_rate": 1.5802125482466847e-05, "loss": 0.4949, "step": 22139 }, { "epoch": 0.6079077429983526, "grad_norm": 0.3735351860523224, "learning_rate": 1.5801773714947457e-05, "loss": 0.4857, "step": 22140 }, { "epoch": 0.607935200439319, "grad_norm": 0.37340858578681946, "learning_rate": 1.5801421936605904e-05, "loss": 0.5456, "step": 22141 }, { "epoch": 0.6079626578802856, "grad_norm": 0.3619963228702545, "learning_rate": 1.580107014744285e-05, "loss": 0.4814, "step": 22142 }, { "epoch": 0.607990115321252, "grad_norm": 0.4126189649105072, "learning_rate": 1.580071834745895e-05, "loss": 0.5151, "step": 22143 }, { "epoch": 0.6080175727622186, "grad_norm": 0.329281747341156, "learning_rate": 1.5800366536654857e-05, "loss": 0.4687, "step": 22144 }, { "epoch": 0.608045030203185, "grad_norm": 0.385412335395813, "learning_rate": 1.580001471503123e-05, "loss": 0.4565, "step": 22145 }, { "epoch": 0.6080724876441516, "grad_norm": 0.3456641137599945, "learning_rate": 1.5799662882588724e-05, "loss": 0.5119, "step": 22146 }, { "epoch": 0.6080999450851181, "grad_norm": 0.4667154848575592, "learning_rate": 1.5799311039327997e-05, "loss": 0.5187, "step": 22147 }, { "epoch": 0.6081274025260845, "grad_norm": 0.45305895805358887, "learning_rate": 1.5798959185249704e-05, "loss": 0.5699, "step": 22148 }, { "epoch": 0.6081548599670511, "grad_norm": 0.3830673098564148, "learning_rate": 1.57986073203545e-05, "loss": 0.5125, "step": 22149 }, { "epoch": 0.6081823174080175, "grad_norm": 0.4098523259162903, "learning_rate": 1.5798255444643042e-05, "loss": 0.5614, "step": 22150 }, { "epoch": 0.6082097748489841, "grad_norm": 0.4794161915779114, "learning_rate": 1.579790355811599e-05, "loss": 0.5462, "step": 22151 }, { "epoch": 0.6082372322899505, "grad_norm": 0.36046695709228516, "learning_rate": 1.5797551660773992e-05, "loss": 0.453, "step": 22152 }, { "epoch": 0.6082646897309171, "grad_norm": 0.4022983908653259, "learning_rate": 1.5797199752617714e-05, "loss": 0.5717, "step": 22153 }, { "epoch": 0.6082921471718836, "grad_norm": 0.3415639400482178, "learning_rate": 1.5796847833647807e-05, "loss": 0.4571, "step": 22154 }, { "epoch": 0.6083196046128501, "grad_norm": 0.39974719285964966, "learning_rate": 1.579649590386493e-05, "loss": 0.5214, "step": 22155 }, { "epoch": 0.6083470620538166, "grad_norm": 0.4378117322921753, "learning_rate": 1.5796143963269737e-05, "loss": 0.5754, "step": 22156 }, { "epoch": 0.608374519494783, "grad_norm": 0.383973628282547, "learning_rate": 1.5795792011862886e-05, "loss": 0.4821, "step": 22157 }, { "epoch": 0.6084019769357496, "grad_norm": 0.34439054131507874, "learning_rate": 1.5795440049645032e-05, "loss": 0.5771, "step": 22158 }, { "epoch": 0.608429434376716, "grad_norm": 0.38958635926246643, "learning_rate": 1.579508807661683e-05, "loss": 0.5647, "step": 22159 }, { "epoch": 0.6084568918176826, "grad_norm": 0.35139337182044983, "learning_rate": 1.5794736092778946e-05, "loss": 0.5245, "step": 22160 }, { "epoch": 0.6084843492586491, "grad_norm": 0.3289163112640381, "learning_rate": 1.579438409813203e-05, "loss": 0.4481, "step": 22161 }, { "epoch": 0.6085118066996156, "grad_norm": 0.4133893847465515, "learning_rate": 1.579403209267673e-05, "loss": 0.4786, "step": 22162 }, { "epoch": 0.6085392641405821, "grad_norm": 0.3905380964279175, "learning_rate": 1.5793680076413718e-05, "loss": 0.5899, "step": 22163 }, { "epoch": 0.6085667215815486, "grad_norm": 0.35780879855155945, "learning_rate": 1.5793328049343637e-05, "loss": 0.4488, "step": 22164 }, { "epoch": 0.6085941790225151, "grad_norm": 0.3628959059715271, "learning_rate": 1.5792976011467156e-05, "loss": 0.5132, "step": 22165 }, { "epoch": 0.6086216364634816, "grad_norm": 0.35345855355262756, "learning_rate": 1.5792623962784924e-05, "loss": 0.4446, "step": 22166 }, { "epoch": 0.6086490939044481, "grad_norm": 0.35759517550468445, "learning_rate": 1.57922719032976e-05, "loss": 0.467, "step": 22167 }, { "epoch": 0.6086765513454147, "grad_norm": 0.41339898109436035, "learning_rate": 1.579191983300584e-05, "loss": 0.577, "step": 22168 }, { "epoch": 0.6087040087863811, "grad_norm": 0.4117659628391266, "learning_rate": 1.57915677519103e-05, "loss": 0.5659, "step": 22169 }, { "epoch": 0.6087314662273476, "grad_norm": 0.45280247926712036, "learning_rate": 1.579121566001164e-05, "loss": 0.5908, "step": 22170 }, { "epoch": 0.6087589236683141, "grad_norm": 0.36044663190841675, "learning_rate": 1.5790863557310512e-05, "loss": 0.5338, "step": 22171 }, { "epoch": 0.6087863811092806, "grad_norm": 0.34795480966567993, "learning_rate": 1.5790511443807576e-05, "loss": 0.5583, "step": 22172 }, { "epoch": 0.6088138385502471, "grad_norm": 0.4134006202220917, "learning_rate": 1.5790159319503485e-05, "loss": 0.504, "step": 22173 }, { "epoch": 0.6088412959912136, "grad_norm": 0.3408929705619812, "learning_rate": 1.5789807184398905e-05, "loss": 0.5709, "step": 22174 }, { "epoch": 0.6088687534321802, "grad_norm": 0.36412474513053894, "learning_rate": 1.578945503849448e-05, "loss": 0.4595, "step": 22175 }, { "epoch": 0.6088962108731466, "grad_norm": 0.3979083299636841, "learning_rate": 1.578910288179088e-05, "loss": 0.4969, "step": 22176 }, { "epoch": 0.6089236683141132, "grad_norm": 0.3977890610694885, "learning_rate": 1.5788750714288753e-05, "loss": 0.4747, "step": 22177 }, { "epoch": 0.6089511257550796, "grad_norm": 0.39019882678985596, "learning_rate": 1.5788398535988758e-05, "loss": 0.5, "step": 22178 }, { "epoch": 0.6089785831960461, "grad_norm": 0.3748627007007599, "learning_rate": 1.5788046346891553e-05, "loss": 0.4674, "step": 22179 }, { "epoch": 0.6090060406370126, "grad_norm": 0.3849509358406067, "learning_rate": 1.5787694146997796e-05, "loss": 0.4832, "step": 22180 }, { "epoch": 0.6090334980779791, "grad_norm": 0.341423362493515, "learning_rate": 1.5787341936308135e-05, "loss": 0.525, "step": 22181 }, { "epoch": 0.6090609555189457, "grad_norm": 0.38075128197669983, "learning_rate": 1.5786989714823244e-05, "loss": 0.45, "step": 22182 }, { "epoch": 0.6090884129599121, "grad_norm": 0.3354419469833374, "learning_rate": 1.578663748254376e-05, "loss": 0.4371, "step": 22183 }, { "epoch": 0.6091158704008787, "grad_norm": 0.37784838676452637, "learning_rate": 1.5786285239470357e-05, "loss": 0.4821, "step": 22184 }, { "epoch": 0.6091433278418451, "grad_norm": 0.3513641953468323, "learning_rate": 1.5785932985603683e-05, "loss": 0.5098, "step": 22185 }, { "epoch": 0.6091707852828117, "grad_norm": 0.40708065032958984, "learning_rate": 1.57855807209444e-05, "loss": 0.4362, "step": 22186 }, { "epoch": 0.6091982427237781, "grad_norm": 0.404041588306427, "learning_rate": 1.578522844549316e-05, "loss": 0.4709, "step": 22187 }, { "epoch": 0.6092257001647446, "grad_norm": 0.3751591444015503, "learning_rate": 1.578487615925062e-05, "loss": 0.5196, "step": 22188 }, { "epoch": 0.6092531576057112, "grad_norm": 0.35765376687049866, "learning_rate": 1.5784523862217443e-05, "loss": 0.496, "step": 22189 }, { "epoch": 0.6092806150466776, "grad_norm": 0.5689919590950012, "learning_rate": 1.5784171554394283e-05, "loss": 0.5224, "step": 22190 }, { "epoch": 0.6093080724876442, "grad_norm": 0.34813812375068665, "learning_rate": 1.5783819235781797e-05, "loss": 0.4978, "step": 22191 }, { "epoch": 0.6093355299286106, "grad_norm": 0.37208694219589233, "learning_rate": 1.578346690638064e-05, "loss": 0.4551, "step": 22192 }, { "epoch": 0.6093629873695772, "grad_norm": 0.4006715714931488, "learning_rate": 1.5783114566191472e-05, "loss": 0.508, "step": 22193 }, { "epoch": 0.6093904448105436, "grad_norm": 0.43344560265541077, "learning_rate": 1.578276221521495e-05, "loss": 0.616, "step": 22194 }, { "epoch": 0.6094179022515102, "grad_norm": 0.4291875660419464, "learning_rate": 1.5782409853451734e-05, "loss": 0.5072, "step": 22195 }, { "epoch": 0.6094453596924767, "grad_norm": 0.34838780760765076, "learning_rate": 1.5782057480902473e-05, "loss": 0.5197, "step": 22196 }, { "epoch": 0.6094728171334431, "grad_norm": 0.38382917642593384, "learning_rate": 1.5781705097567833e-05, "loss": 0.4904, "step": 22197 }, { "epoch": 0.6095002745744097, "grad_norm": 0.3871453106403351, "learning_rate": 1.578135270344847e-05, "loss": 0.5058, "step": 22198 }, { "epoch": 0.6095277320153761, "grad_norm": 0.47653159499168396, "learning_rate": 1.578100029854503e-05, "loss": 0.577, "step": 22199 }, { "epoch": 0.6095551894563427, "grad_norm": 0.36534708738327026, "learning_rate": 1.578064788285819e-05, "loss": 0.5429, "step": 22200 }, { "epoch": 0.6095826468973091, "grad_norm": 0.3328348994255066, "learning_rate": 1.5780295456388587e-05, "loss": 0.471, "step": 22201 }, { "epoch": 0.6096101043382757, "grad_norm": 0.396613746881485, "learning_rate": 1.5779943019136896e-05, "loss": 0.4894, "step": 22202 }, { "epoch": 0.6096375617792422, "grad_norm": 0.3427937924861908, "learning_rate": 1.577959057110376e-05, "loss": 0.528, "step": 22203 }, { "epoch": 0.6096650192202087, "grad_norm": 0.39479321241378784, "learning_rate": 1.5779238112289847e-05, "loss": 0.5505, "step": 22204 }, { "epoch": 0.6096924766611752, "grad_norm": 0.41960492730140686, "learning_rate": 1.577888564269581e-05, "loss": 0.4446, "step": 22205 }, { "epoch": 0.6097199341021416, "grad_norm": 0.34106117486953735, "learning_rate": 1.5778533162322306e-05, "loss": 0.5086, "step": 22206 }, { "epoch": 0.6097473915431082, "grad_norm": 0.38357850909233093, "learning_rate": 1.5778180671169994e-05, "loss": 0.6167, "step": 22207 }, { "epoch": 0.6097748489840746, "grad_norm": 0.3277190625667572, "learning_rate": 1.5777828169239527e-05, "loss": 0.4569, "step": 22208 }, { "epoch": 0.6098023064250412, "grad_norm": 0.44710683822631836, "learning_rate": 1.5777475656531572e-05, "loss": 0.5501, "step": 22209 }, { "epoch": 0.6098297638660077, "grad_norm": 0.46421998739242554, "learning_rate": 1.577712313304678e-05, "loss": 0.5072, "step": 22210 }, { "epoch": 0.6098572213069742, "grad_norm": 0.3625626266002655, "learning_rate": 1.5776770598785807e-05, "loss": 0.5418, "step": 22211 }, { "epoch": 0.6098846787479407, "grad_norm": 0.4489111006259918, "learning_rate": 1.5776418053749315e-05, "loss": 0.4951, "step": 22212 }, { "epoch": 0.6099121361889072, "grad_norm": 0.34615063667297363, "learning_rate": 1.577606549793796e-05, "loss": 0.4332, "step": 22213 }, { "epoch": 0.6099395936298737, "grad_norm": 0.4649277627468109, "learning_rate": 1.57757129313524e-05, "loss": 0.4734, "step": 22214 }, { "epoch": 0.6099670510708401, "grad_norm": 0.35214611887931824, "learning_rate": 1.577536035399329e-05, "loss": 0.5238, "step": 22215 }, { "epoch": 0.6099945085118067, "grad_norm": 0.395408570766449, "learning_rate": 1.577500776586129e-05, "loss": 0.5115, "step": 22216 }, { "epoch": 0.6100219659527732, "grad_norm": 0.3464343547821045, "learning_rate": 1.5774655166957055e-05, "loss": 0.4399, "step": 22217 }, { "epoch": 0.6100494233937397, "grad_norm": 0.4346863627433777, "learning_rate": 1.5774302557281253e-05, "loss": 0.5518, "step": 22218 }, { "epoch": 0.6100768808347062, "grad_norm": 0.38884350657463074, "learning_rate": 1.5773949936834525e-05, "loss": 0.4731, "step": 22219 }, { "epoch": 0.6101043382756727, "grad_norm": 0.39773014187812805, "learning_rate": 1.577359730561754e-05, "loss": 0.4282, "step": 22220 }, { "epoch": 0.6101317957166392, "grad_norm": 0.4026013910770416, "learning_rate": 1.5773244663630955e-05, "loss": 0.5039, "step": 22221 }, { "epoch": 0.6101592531576057, "grad_norm": 0.40135249495506287, "learning_rate": 1.5772892010875426e-05, "loss": 0.4933, "step": 22222 }, { "epoch": 0.6101867105985722, "grad_norm": 0.37555697560310364, "learning_rate": 1.577253934735161e-05, "loss": 0.4589, "step": 22223 }, { "epoch": 0.6102141680395388, "grad_norm": 0.40948793292045593, "learning_rate": 1.5772186673060165e-05, "loss": 0.5894, "step": 22224 }, { "epoch": 0.6102416254805052, "grad_norm": 0.36204996705055237, "learning_rate": 1.577183398800175e-05, "loss": 0.5111, "step": 22225 }, { "epoch": 0.6102690829214718, "grad_norm": 0.4689998924732208, "learning_rate": 1.5771481292177023e-05, "loss": 0.4118, "step": 22226 }, { "epoch": 0.6102965403624382, "grad_norm": 0.36782726645469666, "learning_rate": 1.5771128585586645e-05, "loss": 0.5473, "step": 22227 }, { "epoch": 0.6103239978034047, "grad_norm": 0.36295372247695923, "learning_rate": 1.577077586823126e-05, "loss": 0.5027, "step": 22228 }, { "epoch": 0.6103514552443712, "grad_norm": 0.4023386538028717, "learning_rate": 1.5770423140111545e-05, "loss": 0.5306, "step": 22229 }, { "epoch": 0.6103789126853377, "grad_norm": 0.3907145857810974, "learning_rate": 1.577007040122815e-05, "loss": 0.5521, "step": 22230 }, { "epoch": 0.6104063701263043, "grad_norm": 0.38940373063087463, "learning_rate": 1.576971765158173e-05, "loss": 0.5148, "step": 22231 }, { "epoch": 0.6104338275672707, "grad_norm": 0.34481024742126465, "learning_rate": 1.5769364891172942e-05, "loss": 0.473, "step": 22232 }, { "epoch": 0.6104612850082373, "grad_norm": 0.4060300886631012, "learning_rate": 1.5769012120002448e-05, "loss": 0.5675, "step": 22233 }, { "epoch": 0.6104887424492037, "grad_norm": 0.37747102975845337, "learning_rate": 1.5768659338070905e-05, "loss": 0.4409, "step": 22234 }, { "epoch": 0.6105161998901703, "grad_norm": 0.361987829208374, "learning_rate": 1.5768306545378974e-05, "loss": 0.5299, "step": 22235 }, { "epoch": 0.6105436573311367, "grad_norm": 0.3565824627876282, "learning_rate": 1.5767953741927312e-05, "loss": 0.4851, "step": 22236 }, { "epoch": 0.6105711147721032, "grad_norm": 0.36538293957710266, "learning_rate": 1.5767600927716567e-05, "loss": 0.5194, "step": 22237 }, { "epoch": 0.6105985722130698, "grad_norm": 0.3875940442085266, "learning_rate": 1.5767248102747416e-05, "loss": 0.5036, "step": 22238 }, { "epoch": 0.6106260296540362, "grad_norm": 0.3986596167087555, "learning_rate": 1.57668952670205e-05, "loss": 0.5963, "step": 22239 }, { "epoch": 0.6106534870950028, "grad_norm": 0.39053529500961304, "learning_rate": 1.5766542420536483e-05, "loss": 0.5292, "step": 22240 }, { "epoch": 0.6106809445359692, "grad_norm": 0.36083224415779114, "learning_rate": 1.5766189563296027e-05, "loss": 0.5147, "step": 22241 }, { "epoch": 0.6107084019769358, "grad_norm": 0.47257012128829956, "learning_rate": 1.5765836695299784e-05, "loss": 0.4962, "step": 22242 }, { "epoch": 0.6107358594179022, "grad_norm": 0.36781129240989685, "learning_rate": 1.5765483816548418e-05, "loss": 0.5305, "step": 22243 }, { "epoch": 0.6107633168588688, "grad_norm": 0.40821731090545654, "learning_rate": 1.5765130927042586e-05, "loss": 0.5214, "step": 22244 }, { "epoch": 0.6107907742998353, "grad_norm": 0.39149895310401917, "learning_rate": 1.5764778026782943e-05, "loss": 0.5778, "step": 22245 }, { "epoch": 0.6108182317408017, "grad_norm": 0.38333266973495483, "learning_rate": 1.576442511577015e-05, "loss": 0.4452, "step": 22246 }, { "epoch": 0.6108456891817683, "grad_norm": 0.3765413761138916, "learning_rate": 1.576407219400486e-05, "loss": 0.5188, "step": 22247 }, { "epoch": 0.6108731466227347, "grad_norm": 0.3660796880722046, "learning_rate": 1.576371926148774e-05, "loss": 0.4522, "step": 22248 }, { "epoch": 0.6109006040637013, "grad_norm": 0.3793901801109314, "learning_rate": 1.5763366318219446e-05, "loss": 0.5143, "step": 22249 }, { "epoch": 0.6109280615046677, "grad_norm": 0.35793235898017883, "learning_rate": 1.5763013364200627e-05, "loss": 0.4964, "step": 22250 }, { "epoch": 0.6109555189456343, "grad_norm": 0.3555724322795868, "learning_rate": 1.5762660399431954e-05, "loss": 0.4559, "step": 22251 }, { "epoch": 0.6109829763866008, "grad_norm": 0.4238298535346985, "learning_rate": 1.5762307423914077e-05, "loss": 0.5361, "step": 22252 }, { "epoch": 0.6110104338275673, "grad_norm": 0.3470783829689026, "learning_rate": 1.5761954437647663e-05, "loss": 0.4617, "step": 22253 }, { "epoch": 0.6110378912685338, "grad_norm": 0.3909376561641693, "learning_rate": 1.5761601440633364e-05, "loss": 0.5821, "step": 22254 }, { "epoch": 0.6110653487095002, "grad_norm": 0.4402157962322235, "learning_rate": 1.5761248432871833e-05, "loss": 0.4733, "step": 22255 }, { "epoch": 0.6110928061504668, "grad_norm": 0.37749382853507996, "learning_rate": 1.576089541436374e-05, "loss": 0.4704, "step": 22256 }, { "epoch": 0.6111202635914332, "grad_norm": 0.36298874020576477, "learning_rate": 1.5760542385109737e-05, "loss": 0.4699, "step": 22257 }, { "epoch": 0.6111477210323998, "grad_norm": 0.3920440673828125, "learning_rate": 1.5760189345110485e-05, "loss": 0.5625, "step": 22258 }, { "epoch": 0.6111751784733663, "grad_norm": 0.37410855293273926, "learning_rate": 1.5759836294366642e-05, "loss": 0.4254, "step": 22259 }, { "epoch": 0.6112026359143328, "grad_norm": 0.40716034173965454, "learning_rate": 1.5759483232878862e-05, "loss": 0.5665, "step": 22260 }, { "epoch": 0.6112300933552993, "grad_norm": 0.42992883920669556, "learning_rate": 1.575913016064781e-05, "loss": 0.5012, "step": 22261 }, { "epoch": 0.6112575507962658, "grad_norm": 0.33926594257354736, "learning_rate": 1.5758777077674144e-05, "loss": 0.4646, "step": 22262 }, { "epoch": 0.6112850082372323, "grad_norm": 0.40045300126075745, "learning_rate": 1.5758423983958518e-05, "loss": 0.5881, "step": 22263 }, { "epoch": 0.6113124656781987, "grad_norm": 0.41343575716018677, "learning_rate": 1.5758070879501596e-05, "loss": 0.5471, "step": 22264 }, { "epoch": 0.6113399231191653, "grad_norm": 0.3798188269138336, "learning_rate": 1.5757717764304032e-05, "loss": 0.4616, "step": 22265 }, { "epoch": 0.6113673805601318, "grad_norm": 0.329255074262619, "learning_rate": 1.5757364638366487e-05, "loss": 0.4039, "step": 22266 }, { "epoch": 0.6113948380010983, "grad_norm": 0.42243093252182007, "learning_rate": 1.5757011501689617e-05, "loss": 0.503, "step": 22267 }, { "epoch": 0.6114222954420648, "grad_norm": 0.36034345626831055, "learning_rate": 1.5756658354274084e-05, "loss": 0.4579, "step": 22268 }, { "epoch": 0.6114497528830313, "grad_norm": 0.3666810095310211, "learning_rate": 1.5756305196120552e-05, "loss": 0.4894, "step": 22269 }, { "epoch": 0.6114772103239978, "grad_norm": 0.4027080833911896, "learning_rate": 1.5755952027229668e-05, "loss": 0.5178, "step": 22270 }, { "epoch": 0.6115046677649643, "grad_norm": 0.4269193112850189, "learning_rate": 1.57555988476021e-05, "loss": 0.5645, "step": 22271 }, { "epoch": 0.6115321252059308, "grad_norm": 0.41972389817237854, "learning_rate": 1.57552456572385e-05, "loss": 0.5585, "step": 22272 }, { "epoch": 0.6115595826468974, "grad_norm": 0.35306113958358765, "learning_rate": 1.575489245613953e-05, "loss": 0.5372, "step": 22273 }, { "epoch": 0.6115870400878638, "grad_norm": 0.36466968059539795, "learning_rate": 1.575453924430585e-05, "loss": 0.4901, "step": 22274 }, { "epoch": 0.6116144975288303, "grad_norm": 0.37586653232574463, "learning_rate": 1.575418602173812e-05, "loss": 0.4668, "step": 22275 }, { "epoch": 0.6116419549697968, "grad_norm": 0.39249420166015625, "learning_rate": 1.575383278843699e-05, "loss": 0.5396, "step": 22276 }, { "epoch": 0.6116694124107633, "grad_norm": 0.4205707609653473, "learning_rate": 1.575347954440313e-05, "loss": 0.5576, "step": 22277 }, { "epoch": 0.6116968698517298, "grad_norm": 0.5073122978210449, "learning_rate": 1.575312628963719e-05, "loss": 0.5255, "step": 22278 }, { "epoch": 0.6117243272926963, "grad_norm": 0.36328601837158203, "learning_rate": 1.575277302413984e-05, "loss": 0.5162, "step": 22279 }, { "epoch": 0.6117517847336629, "grad_norm": 0.35701146721839905, "learning_rate": 1.575241974791173e-05, "loss": 0.4431, "step": 22280 }, { "epoch": 0.6117792421746293, "grad_norm": 0.41955891251564026, "learning_rate": 1.5752066460953522e-05, "loss": 0.5381, "step": 22281 }, { "epoch": 0.6118066996155959, "grad_norm": 0.332657128572464, "learning_rate": 1.575171316326587e-05, "loss": 0.5222, "step": 22282 }, { "epoch": 0.6118341570565623, "grad_norm": 0.37133780121803284, "learning_rate": 1.575135985484944e-05, "loss": 0.4955, "step": 22283 }, { "epoch": 0.6118616144975288, "grad_norm": 0.3627047836780548, "learning_rate": 1.5751006535704888e-05, "loss": 0.4814, "step": 22284 }, { "epoch": 0.6118890719384953, "grad_norm": 0.35918405652046204, "learning_rate": 1.5750653205832875e-05, "loss": 0.5084, "step": 22285 }, { "epoch": 0.6119165293794618, "grad_norm": 0.3813706636428833, "learning_rate": 1.5750299865234057e-05, "loss": 0.5229, "step": 22286 }, { "epoch": 0.6119439868204284, "grad_norm": 0.3598233461380005, "learning_rate": 1.574994651390909e-05, "loss": 0.4296, "step": 22287 }, { "epoch": 0.6119714442613948, "grad_norm": 0.3709660470485687, "learning_rate": 1.5749593151858645e-05, "loss": 0.4895, "step": 22288 }, { "epoch": 0.6119989017023614, "grad_norm": 0.3715469539165497, "learning_rate": 1.574923977908337e-05, "loss": 0.5333, "step": 22289 }, { "epoch": 0.6120263591433278, "grad_norm": 0.3820526599884033, "learning_rate": 1.5748886395583925e-05, "loss": 0.5229, "step": 22290 }, { "epoch": 0.6120538165842944, "grad_norm": 0.39448776841163635, "learning_rate": 1.574853300136098e-05, "loss": 0.5233, "step": 22291 }, { "epoch": 0.6120812740252608, "grad_norm": 0.42508170008659363, "learning_rate": 1.574817959641518e-05, "loss": 0.5382, "step": 22292 }, { "epoch": 0.6121087314662274, "grad_norm": 0.4328725337982178, "learning_rate": 1.574782618074719e-05, "loss": 0.5214, "step": 22293 }, { "epoch": 0.6121361889071939, "grad_norm": 0.37644755840301514, "learning_rate": 1.5747472754357675e-05, "loss": 0.479, "step": 22294 }, { "epoch": 0.6121636463481603, "grad_norm": 0.44581642746925354, "learning_rate": 1.5747119317247282e-05, "loss": 0.5448, "step": 22295 }, { "epoch": 0.6121911037891269, "grad_norm": 0.3883346617221832, "learning_rate": 1.5746765869416685e-05, "loss": 0.482, "step": 22296 }, { "epoch": 0.6122185612300933, "grad_norm": 0.3205782473087311, "learning_rate": 1.574641241086653e-05, "loss": 0.4533, "step": 22297 }, { "epoch": 0.6122460186710599, "grad_norm": 0.47209155559539795, "learning_rate": 1.574605894159748e-05, "loss": 0.4861, "step": 22298 }, { "epoch": 0.6122734761120263, "grad_norm": 0.3832240402698517, "learning_rate": 1.5745705461610203e-05, "loss": 0.5489, "step": 22299 }, { "epoch": 0.6123009335529929, "grad_norm": 0.40516626834869385, "learning_rate": 1.574535197090535e-05, "loss": 0.5339, "step": 22300 }, { "epoch": 0.6123283909939594, "grad_norm": 0.35815978050231934, "learning_rate": 1.5744998469483576e-05, "loss": 0.4997, "step": 22301 }, { "epoch": 0.6123558484349259, "grad_norm": 0.335467666387558, "learning_rate": 1.5744644957345553e-05, "loss": 0.4236, "step": 22302 }, { "epoch": 0.6123833058758924, "grad_norm": 0.3941707909107208, "learning_rate": 1.574429143449193e-05, "loss": 0.5176, "step": 22303 }, { "epoch": 0.6124107633168588, "grad_norm": 0.31672507524490356, "learning_rate": 1.5743937900923367e-05, "loss": 0.4962, "step": 22304 }, { "epoch": 0.6124382207578254, "grad_norm": 0.3727671504020691, "learning_rate": 1.574358435664053e-05, "loss": 0.4285, "step": 22305 }, { "epoch": 0.6124656781987918, "grad_norm": 0.36487576365470886, "learning_rate": 1.5743230801644078e-05, "loss": 0.5583, "step": 22306 }, { "epoch": 0.6124931356397584, "grad_norm": 0.40188220143318176, "learning_rate": 1.5742877235934666e-05, "loss": 0.5515, "step": 22307 }, { "epoch": 0.6125205930807249, "grad_norm": 0.35644564032554626, "learning_rate": 1.574252365951295e-05, "loss": 0.4323, "step": 22308 }, { "epoch": 0.6125480505216914, "grad_norm": 0.3337382376194, "learning_rate": 1.57421700723796e-05, "loss": 0.482, "step": 22309 }, { "epoch": 0.6125755079626579, "grad_norm": 0.35368168354034424, "learning_rate": 1.574181647453527e-05, "loss": 0.4247, "step": 22310 }, { "epoch": 0.6126029654036244, "grad_norm": 0.39114248752593994, "learning_rate": 1.5741462865980618e-05, "loss": 0.5189, "step": 22311 }, { "epoch": 0.6126304228445909, "grad_norm": 0.3684851825237274, "learning_rate": 1.5741109246716305e-05, "loss": 0.4586, "step": 22312 }, { "epoch": 0.6126578802855573, "grad_norm": 0.350041002035141, "learning_rate": 1.5740755616742992e-05, "loss": 0.5223, "step": 22313 }, { "epoch": 0.6126853377265239, "grad_norm": 0.40297773480415344, "learning_rate": 1.5740401976061336e-05, "loss": 0.5817, "step": 22314 }, { "epoch": 0.6127127951674904, "grad_norm": 0.3800107538700104, "learning_rate": 1.5740048324672e-05, "loss": 0.4833, "step": 22315 }, { "epoch": 0.6127402526084569, "grad_norm": 0.3722301721572876, "learning_rate": 1.573969466257564e-05, "loss": 0.4138, "step": 22316 }, { "epoch": 0.6127677100494234, "grad_norm": 0.3753984272480011, "learning_rate": 1.573934098977292e-05, "loss": 0.5311, "step": 22317 }, { "epoch": 0.6127951674903899, "grad_norm": 0.3875497579574585, "learning_rate": 1.57389873062645e-05, "loss": 0.4596, "step": 22318 }, { "epoch": 0.6128226249313564, "grad_norm": 0.3613750636577606, "learning_rate": 1.5738633612051028e-05, "loss": 0.4427, "step": 22319 }, { "epoch": 0.6128500823723229, "grad_norm": 0.35349443554878235, "learning_rate": 1.5738279907133178e-05, "loss": 0.5474, "step": 22320 }, { "epoch": 0.6128775398132894, "grad_norm": 0.3991449177265167, "learning_rate": 1.5737926191511607e-05, "loss": 0.5047, "step": 22321 }, { "epoch": 0.612904997254256, "grad_norm": 0.32441431283950806, "learning_rate": 1.5737572465186968e-05, "loss": 0.4505, "step": 22322 }, { "epoch": 0.6129324546952224, "grad_norm": 0.44071316719055176, "learning_rate": 1.5737218728159925e-05, "loss": 0.5769, "step": 22323 }, { "epoch": 0.612959912136189, "grad_norm": 0.412457674741745, "learning_rate": 1.5736864980431143e-05, "loss": 0.4911, "step": 22324 }, { "epoch": 0.6129873695771554, "grad_norm": 0.3591390550136566, "learning_rate": 1.573651122200127e-05, "loss": 0.496, "step": 22325 }, { "epoch": 0.6130148270181219, "grad_norm": 0.3497498631477356, "learning_rate": 1.5736157452870976e-05, "loss": 0.482, "step": 22326 }, { "epoch": 0.6130422844590884, "grad_norm": 0.38614487648010254, "learning_rate": 1.573580367304092e-05, "loss": 0.4969, "step": 22327 }, { "epoch": 0.6130697419000549, "grad_norm": 0.36253419518470764, "learning_rate": 1.5735449882511758e-05, "loss": 0.5635, "step": 22328 }, { "epoch": 0.6130971993410215, "grad_norm": 0.3599514961242676, "learning_rate": 1.5735096081284148e-05, "loss": 0.4987, "step": 22329 }, { "epoch": 0.6131246567819879, "grad_norm": 0.34198230504989624, "learning_rate": 1.5734742269358756e-05, "loss": 0.5161, "step": 22330 }, { "epoch": 0.6131521142229545, "grad_norm": 0.3852802515029907, "learning_rate": 1.5734388446736242e-05, "loss": 0.5353, "step": 22331 }, { "epoch": 0.6131795716639209, "grad_norm": 0.36459124088287354, "learning_rate": 1.573403461341726e-05, "loss": 0.5221, "step": 22332 }, { "epoch": 0.6132070291048874, "grad_norm": 0.4692228138446808, "learning_rate": 1.5733680769402476e-05, "loss": 0.5487, "step": 22333 }, { "epoch": 0.6132344865458539, "grad_norm": 0.37739425897598267, "learning_rate": 1.5733326914692545e-05, "loss": 0.558, "step": 22334 }, { "epoch": 0.6132619439868204, "grad_norm": 0.35902783274650574, "learning_rate": 1.573297304928813e-05, "loss": 0.5286, "step": 22335 }, { "epoch": 0.613289401427787, "grad_norm": 0.37758535146713257, "learning_rate": 1.573261917318989e-05, "loss": 0.4398, "step": 22336 }, { "epoch": 0.6133168588687534, "grad_norm": 0.4123183786869049, "learning_rate": 1.5732265286398485e-05, "loss": 0.4743, "step": 22337 }, { "epoch": 0.61334431630972, "grad_norm": 0.39421606063842773, "learning_rate": 1.5731911388914576e-05, "loss": 0.5659, "step": 22338 }, { "epoch": 0.6133717737506864, "grad_norm": 0.43935301899909973, "learning_rate": 1.5731557480738824e-05, "loss": 0.5511, "step": 22339 }, { "epoch": 0.613399231191653, "grad_norm": 0.38147106766700745, "learning_rate": 1.5731203561871887e-05, "loss": 0.456, "step": 22340 }, { "epoch": 0.6134266886326194, "grad_norm": 0.3597048819065094, "learning_rate": 1.5730849632314428e-05, "loss": 0.4967, "step": 22341 }, { "epoch": 0.613454146073586, "grad_norm": 0.3484095633029938, "learning_rate": 1.5730495692067105e-05, "loss": 0.5002, "step": 22342 }, { "epoch": 0.6134816035145525, "grad_norm": 0.3630552887916565, "learning_rate": 1.5730141741130575e-05, "loss": 0.4332, "step": 22343 }, { "epoch": 0.6135090609555189, "grad_norm": 0.3765169084072113, "learning_rate": 1.5729787779505506e-05, "loss": 0.5472, "step": 22344 }, { "epoch": 0.6135365183964855, "grad_norm": 0.3400070071220398, "learning_rate": 1.5729433807192552e-05, "loss": 0.4717, "step": 22345 }, { "epoch": 0.6135639758374519, "grad_norm": 0.42126747965812683, "learning_rate": 1.5729079824192376e-05, "loss": 0.5233, "step": 22346 }, { "epoch": 0.6135914332784185, "grad_norm": 0.35679954290390015, "learning_rate": 1.572872583050564e-05, "loss": 0.5003, "step": 22347 }, { "epoch": 0.6136188907193849, "grad_norm": 0.3954769968986511, "learning_rate": 1.5728371826132996e-05, "loss": 0.5737, "step": 22348 }, { "epoch": 0.6136463481603515, "grad_norm": 0.3443745970726013, "learning_rate": 1.5728017811075114e-05, "loss": 0.4906, "step": 22349 }, { "epoch": 0.613673805601318, "grad_norm": 0.3985420763492584, "learning_rate": 1.572766378533265e-05, "loss": 0.5053, "step": 22350 }, { "epoch": 0.6137012630422845, "grad_norm": 0.37189149856567383, "learning_rate": 1.5727309748906266e-05, "loss": 0.5204, "step": 22351 }, { "epoch": 0.613728720483251, "grad_norm": 0.3315165638923645, "learning_rate": 1.5726955701796623e-05, "loss": 0.4516, "step": 22352 }, { "epoch": 0.6137561779242174, "grad_norm": 0.357103168964386, "learning_rate": 1.5726601644004373e-05, "loss": 0.4501, "step": 22353 }, { "epoch": 0.613783635365184, "grad_norm": 0.37696170806884766, "learning_rate": 1.572624757553019e-05, "loss": 0.5775, "step": 22354 }, { "epoch": 0.6138110928061504, "grad_norm": 0.33919617533683777, "learning_rate": 1.5725893496374726e-05, "loss": 0.488, "step": 22355 }, { "epoch": 0.613838550247117, "grad_norm": 0.5796383023262024, "learning_rate": 1.572553940653864e-05, "loss": 0.5575, "step": 22356 }, { "epoch": 0.6138660076880835, "grad_norm": 0.4033207893371582, "learning_rate": 1.57251853060226e-05, "loss": 0.4949, "step": 22357 }, { "epoch": 0.61389346512905, "grad_norm": 0.3770495355129242, "learning_rate": 1.5724831194827258e-05, "loss": 0.4207, "step": 22358 }, { "epoch": 0.6139209225700165, "grad_norm": 0.3960598111152649, "learning_rate": 1.5724477072953283e-05, "loss": 0.5292, "step": 22359 }, { "epoch": 0.613948380010983, "grad_norm": 0.3861221671104431, "learning_rate": 1.572412294040133e-05, "loss": 0.5043, "step": 22360 }, { "epoch": 0.6139758374519495, "grad_norm": 0.3493136465549469, "learning_rate": 1.5723768797172057e-05, "loss": 0.4537, "step": 22361 }, { "epoch": 0.6140032948929159, "grad_norm": 0.37213313579559326, "learning_rate": 1.5723414643266135e-05, "loss": 0.4073, "step": 22362 }, { "epoch": 0.6140307523338825, "grad_norm": 0.3684461712837219, "learning_rate": 1.572306047868421e-05, "loss": 0.5287, "step": 22363 }, { "epoch": 0.6140582097748489, "grad_norm": 0.3765597641468048, "learning_rate": 1.5722706303426955e-05, "loss": 0.4715, "step": 22364 }, { "epoch": 0.6140856672158155, "grad_norm": 0.36326783895492554, "learning_rate": 1.572235211749503e-05, "loss": 0.4601, "step": 22365 }, { "epoch": 0.614113124656782, "grad_norm": 0.4628138542175293, "learning_rate": 1.572199792088909e-05, "loss": 0.4352, "step": 22366 }, { "epoch": 0.6141405820977485, "grad_norm": 0.4248127043247223, "learning_rate": 1.5721643713609794e-05, "loss": 0.4223, "step": 22367 }, { "epoch": 0.614168039538715, "grad_norm": 0.3917737901210785, "learning_rate": 1.5721289495657807e-05, "loss": 0.4955, "step": 22368 }, { "epoch": 0.6141954969796815, "grad_norm": 0.389168381690979, "learning_rate": 1.572093526703379e-05, "loss": 0.51, "step": 22369 }, { "epoch": 0.614222954420648, "grad_norm": 0.45336952805519104, "learning_rate": 1.5720581027738402e-05, "loss": 0.4563, "step": 22370 }, { "epoch": 0.6142504118616144, "grad_norm": 0.36962857842445374, "learning_rate": 1.5720226777772303e-05, "loss": 0.4596, "step": 22371 }, { "epoch": 0.614277869302581, "grad_norm": 0.4024910628795624, "learning_rate": 1.5719872517136157e-05, "loss": 0.4769, "step": 22372 }, { "epoch": 0.6143053267435475, "grad_norm": 0.3402158319950104, "learning_rate": 1.571951824583063e-05, "loss": 0.4262, "step": 22373 }, { "epoch": 0.614332784184514, "grad_norm": 0.34194216132164, "learning_rate": 1.5719163963856367e-05, "loss": 0.445, "step": 22374 }, { "epoch": 0.6143602416254805, "grad_norm": 0.36392393708229065, "learning_rate": 1.571880967121404e-05, "loss": 0.5716, "step": 22375 }, { "epoch": 0.614387699066447, "grad_norm": 0.3907800316810608, "learning_rate": 1.571845536790431e-05, "loss": 0.4888, "step": 22376 }, { "epoch": 0.6144151565074135, "grad_norm": 0.4176240861415863, "learning_rate": 1.5718101053927834e-05, "loss": 0.4936, "step": 22377 }, { "epoch": 0.61444261394838, "grad_norm": 0.3670918643474579, "learning_rate": 1.5717746729285274e-05, "loss": 0.5752, "step": 22378 }, { "epoch": 0.6144700713893465, "grad_norm": 0.37597203254699707, "learning_rate": 1.571739239397729e-05, "loss": 0.4859, "step": 22379 }, { "epoch": 0.6144975288303131, "grad_norm": 0.441389799118042, "learning_rate": 1.5717038048004548e-05, "loss": 0.6218, "step": 22380 }, { "epoch": 0.6145249862712795, "grad_norm": 0.5084608197212219, "learning_rate": 1.5716683691367704e-05, "loss": 0.5657, "step": 22381 }, { "epoch": 0.614552443712246, "grad_norm": 0.4000180661678314, "learning_rate": 1.5716329324067423e-05, "loss": 0.5494, "step": 22382 }, { "epoch": 0.6145799011532125, "grad_norm": 0.3384699821472168, "learning_rate": 1.571597494610436e-05, "loss": 0.4668, "step": 22383 }, { "epoch": 0.614607358594179, "grad_norm": 0.3452565670013428, "learning_rate": 1.571562055747918e-05, "loss": 0.495, "step": 22384 }, { "epoch": 0.6146348160351455, "grad_norm": 0.34684741497039795, "learning_rate": 1.5715266158192543e-05, "loss": 0.5045, "step": 22385 }, { "epoch": 0.614662273476112, "grad_norm": 0.3506554663181305, "learning_rate": 1.5714911748245115e-05, "loss": 0.4788, "step": 22386 }, { "epoch": 0.6146897309170786, "grad_norm": 0.4257354438304901, "learning_rate": 1.5714557327637544e-05, "loss": 0.5523, "step": 22387 }, { "epoch": 0.614717188358045, "grad_norm": 0.33242201805114746, "learning_rate": 1.5714202896370507e-05, "loss": 0.3884, "step": 22388 }, { "epoch": 0.6147446457990116, "grad_norm": 0.3994636833667755, "learning_rate": 1.5713848454444655e-05, "loss": 0.5383, "step": 22389 }, { "epoch": 0.614772103239978, "grad_norm": 0.8916911482810974, "learning_rate": 1.571349400186065e-05, "loss": 0.4923, "step": 22390 }, { "epoch": 0.6147995606809445, "grad_norm": 0.3596300482749939, "learning_rate": 1.571313953861916e-05, "loss": 0.3837, "step": 22391 }, { "epoch": 0.614827018121911, "grad_norm": 0.393643856048584, "learning_rate": 1.5712785064720837e-05, "loss": 0.4881, "step": 22392 }, { "epoch": 0.6148544755628775, "grad_norm": 0.41327959299087524, "learning_rate": 1.5712430580166348e-05, "loss": 0.6245, "step": 22393 }, { "epoch": 0.6148819330038441, "grad_norm": 0.4574626088142395, "learning_rate": 1.5712076084956354e-05, "loss": 0.4208, "step": 22394 }, { "epoch": 0.6149093904448105, "grad_norm": 0.3615926504135132, "learning_rate": 1.5711721579091515e-05, "loss": 0.4724, "step": 22395 }, { "epoch": 0.6149368478857771, "grad_norm": 0.3980497717857361, "learning_rate": 1.5711367062572492e-05, "loss": 0.4991, "step": 22396 }, { "epoch": 0.6149643053267435, "grad_norm": 0.37417009472846985, "learning_rate": 1.5711012535399946e-05, "loss": 0.533, "step": 22397 }, { "epoch": 0.6149917627677101, "grad_norm": 0.3309810757637024, "learning_rate": 1.5710657997574535e-05, "loss": 0.4067, "step": 22398 }, { "epoch": 0.6150192202086765, "grad_norm": 0.4476729929447174, "learning_rate": 1.571030344909693e-05, "loss": 0.5024, "step": 22399 }, { "epoch": 0.615046677649643, "grad_norm": 0.36522457003593445, "learning_rate": 1.570994888996778e-05, "loss": 0.4931, "step": 22400 }, { "epoch": 0.6150741350906096, "grad_norm": 0.4306666851043701, "learning_rate": 1.570959432018776e-05, "loss": 0.5797, "step": 22401 }, { "epoch": 0.615101592531576, "grad_norm": 0.4648493230342865, "learning_rate": 1.5709239739757524e-05, "loss": 0.606, "step": 22402 }, { "epoch": 0.6151290499725426, "grad_norm": 0.4590343236923218, "learning_rate": 1.5708885148677732e-05, "loss": 0.5066, "step": 22403 }, { "epoch": 0.615156507413509, "grad_norm": 0.4329231381416321, "learning_rate": 1.5708530546949048e-05, "loss": 0.4367, "step": 22404 }, { "epoch": 0.6151839648544756, "grad_norm": 0.3508068323135376, "learning_rate": 1.570817593457213e-05, "loss": 0.5327, "step": 22405 }, { "epoch": 0.615211422295442, "grad_norm": 0.41138675808906555, "learning_rate": 1.5707821311547645e-05, "loss": 0.5149, "step": 22406 }, { "epoch": 0.6152388797364086, "grad_norm": 0.4616115391254425, "learning_rate": 1.5707466677876245e-05, "loss": 0.4539, "step": 22407 }, { "epoch": 0.6152663371773751, "grad_norm": 0.3997574746608734, "learning_rate": 1.5707112033558605e-05, "loss": 0.4675, "step": 22408 }, { "epoch": 0.6152937946183415, "grad_norm": 0.3512255549430847, "learning_rate": 1.5706757378595378e-05, "loss": 0.4616, "step": 22409 }, { "epoch": 0.6153212520593081, "grad_norm": 0.3764665126800537, "learning_rate": 1.5706402712987227e-05, "loss": 0.5224, "step": 22410 }, { "epoch": 0.6153487095002745, "grad_norm": 0.3815498352050781, "learning_rate": 1.5706048036734812e-05, "loss": 0.5515, "step": 22411 }, { "epoch": 0.6153761669412411, "grad_norm": 0.47489452362060547, "learning_rate": 1.57056933498388e-05, "loss": 0.5134, "step": 22412 }, { "epoch": 0.6154036243822075, "grad_norm": 0.4045666456222534, "learning_rate": 1.5705338652299843e-05, "loss": 0.5851, "step": 22413 }, { "epoch": 0.6154310818231741, "grad_norm": 0.34440135955810547, "learning_rate": 1.5704983944118613e-05, "loss": 0.5187, "step": 22414 }, { "epoch": 0.6154585392641406, "grad_norm": 0.37784966826438904, "learning_rate": 1.5704629225295763e-05, "loss": 0.3937, "step": 22415 }, { "epoch": 0.6154859967051071, "grad_norm": 0.357105016708374, "learning_rate": 1.5704274495831966e-05, "loss": 0.5652, "step": 22416 }, { "epoch": 0.6155134541460736, "grad_norm": 0.38424667716026306, "learning_rate": 1.570391975572787e-05, "loss": 0.5099, "step": 22417 }, { "epoch": 0.61554091158704, "grad_norm": 0.36078813672065735, "learning_rate": 1.5703565004984146e-05, "loss": 0.4964, "step": 22418 }, { "epoch": 0.6155683690280066, "grad_norm": 0.37554067373275757, "learning_rate": 1.570321024360145e-05, "loss": 0.5091, "step": 22419 }, { "epoch": 0.615595826468973, "grad_norm": 0.36112555861473083, "learning_rate": 1.570285547158045e-05, "loss": 0.5605, "step": 22420 }, { "epoch": 0.6156232839099396, "grad_norm": 0.45782092213630676, "learning_rate": 1.5702500688921804e-05, "loss": 0.4812, "step": 22421 }, { "epoch": 0.6156507413509061, "grad_norm": 0.41082102060317993, "learning_rate": 1.5702145895626177e-05, "loss": 0.5948, "step": 22422 }, { "epoch": 0.6156781987918726, "grad_norm": 0.37824687361717224, "learning_rate": 1.570179109169422e-05, "loss": 0.527, "step": 22423 }, { "epoch": 0.6157056562328391, "grad_norm": 0.4140215218067169, "learning_rate": 1.570143627712661e-05, "loss": 0.5308, "step": 22424 }, { "epoch": 0.6157331136738056, "grad_norm": 0.4178859293460846, "learning_rate": 1.5701081451923996e-05, "loss": 0.4879, "step": 22425 }, { "epoch": 0.6157605711147721, "grad_norm": 0.36644741892814636, "learning_rate": 1.570072661608705e-05, "loss": 0.4808, "step": 22426 }, { "epoch": 0.6157880285557386, "grad_norm": 0.40483301877975464, "learning_rate": 1.570037176961643e-05, "loss": 0.5445, "step": 22427 }, { "epoch": 0.6158154859967051, "grad_norm": 0.35162755846977234, "learning_rate": 1.570001691251279e-05, "loss": 0.4605, "step": 22428 }, { "epoch": 0.6158429434376717, "grad_norm": 0.3669903874397278, "learning_rate": 1.5699662044776805e-05, "loss": 0.5129, "step": 22429 }, { "epoch": 0.6158704008786381, "grad_norm": 0.3989444077014923, "learning_rate": 1.5699307166409133e-05, "loss": 0.5128, "step": 22430 }, { "epoch": 0.6158978583196046, "grad_norm": 0.3426071107387543, "learning_rate": 1.569895227741043e-05, "loss": 0.5074, "step": 22431 }, { "epoch": 0.6159253157605711, "grad_norm": 0.4168485701084137, "learning_rate": 1.5698597377781368e-05, "loss": 0.5836, "step": 22432 }, { "epoch": 0.6159527732015376, "grad_norm": 0.3694388270378113, "learning_rate": 1.5698242467522596e-05, "loss": 0.525, "step": 22433 }, { "epoch": 0.6159802306425041, "grad_norm": 0.3906443119049072, "learning_rate": 1.5697887546634785e-05, "loss": 0.5378, "step": 22434 }, { "epoch": 0.6160076880834706, "grad_norm": 0.34419670701026917, "learning_rate": 1.5697532615118597e-05, "loss": 0.4807, "step": 22435 }, { "epoch": 0.6160351455244372, "grad_norm": 0.3505313992500305, "learning_rate": 1.569717767297469e-05, "loss": 0.5424, "step": 22436 }, { "epoch": 0.6160626029654036, "grad_norm": 0.3377642035484314, "learning_rate": 1.569682272020373e-05, "loss": 0.4973, "step": 22437 }, { "epoch": 0.6160900604063702, "grad_norm": 0.39497652649879456, "learning_rate": 1.569646775680638e-05, "loss": 0.5662, "step": 22438 }, { "epoch": 0.6161175178473366, "grad_norm": 0.4313346743583679, "learning_rate": 1.5696112782783296e-05, "loss": 0.4723, "step": 22439 }, { "epoch": 0.6161449752883031, "grad_norm": 0.41157907247543335, "learning_rate": 1.5695757798135143e-05, "loss": 0.5044, "step": 22440 }, { "epoch": 0.6161724327292696, "grad_norm": 0.35974326729774475, "learning_rate": 1.5695402802862586e-05, "loss": 0.5058, "step": 22441 }, { "epoch": 0.6161998901702361, "grad_norm": 0.38614991307258606, "learning_rate": 1.5695047796966287e-05, "loss": 0.5685, "step": 22442 }, { "epoch": 0.6162273476112027, "grad_norm": 0.4355829954147339, "learning_rate": 1.56946927804469e-05, "loss": 0.4541, "step": 22443 }, { "epoch": 0.6162548050521691, "grad_norm": 0.3852423429489136, "learning_rate": 1.5694337753305097e-05, "loss": 0.5405, "step": 22444 }, { "epoch": 0.6162822624931357, "grad_norm": 0.39085647463798523, "learning_rate": 1.5693982715541535e-05, "loss": 0.5302, "step": 22445 }, { "epoch": 0.6163097199341021, "grad_norm": 0.39774662256240845, "learning_rate": 1.569362766715688e-05, "loss": 0.522, "step": 22446 }, { "epoch": 0.6163371773750687, "grad_norm": 0.43441006541252136, "learning_rate": 1.569327260815179e-05, "loss": 0.6035, "step": 22447 }, { "epoch": 0.6163646348160351, "grad_norm": 0.39261582493782043, "learning_rate": 1.5692917538526936e-05, "loss": 0.4311, "step": 22448 }, { "epoch": 0.6163920922570016, "grad_norm": 0.44348111748695374, "learning_rate": 1.5692562458282967e-05, "loss": 0.5131, "step": 22449 }, { "epoch": 0.6164195496979682, "grad_norm": 0.405502587556839, "learning_rate": 1.5692207367420556e-05, "loss": 0.5683, "step": 22450 }, { "epoch": 0.6164470071389346, "grad_norm": 0.35782089829444885, "learning_rate": 1.5691852265940356e-05, "loss": 0.582, "step": 22451 }, { "epoch": 0.6164744645799012, "grad_norm": 0.38939744234085083, "learning_rate": 1.569149715384304e-05, "loss": 0.4735, "step": 22452 }, { "epoch": 0.6165019220208676, "grad_norm": 0.3565541207790375, "learning_rate": 1.569114203112926e-05, "loss": 0.477, "step": 22453 }, { "epoch": 0.6165293794618342, "grad_norm": 0.3621019124984741, "learning_rate": 1.569078689779969e-05, "loss": 0.584, "step": 22454 }, { "epoch": 0.6165568369028006, "grad_norm": 0.4409540295600891, "learning_rate": 1.5690431753854986e-05, "loss": 0.5491, "step": 22455 }, { "epoch": 0.6165842943437672, "grad_norm": 0.34002095460891724, "learning_rate": 1.5690076599295805e-05, "loss": 0.4808, "step": 22456 }, { "epoch": 0.6166117517847337, "grad_norm": 0.35386621952056885, "learning_rate": 1.568972143412282e-05, "loss": 0.4496, "step": 22457 }, { "epoch": 0.6166392092257001, "grad_norm": 0.37079381942749023, "learning_rate": 1.5689366258336687e-05, "loss": 0.4988, "step": 22458 }, { "epoch": 0.6166666666666667, "grad_norm": 0.45040878653526306, "learning_rate": 1.568901107193807e-05, "loss": 0.5233, "step": 22459 }, { "epoch": 0.6166941241076331, "grad_norm": 0.38504672050476074, "learning_rate": 1.568865587492763e-05, "loss": 0.522, "step": 22460 }, { "epoch": 0.6167215815485997, "grad_norm": 0.37678220868110657, "learning_rate": 1.5688300667306034e-05, "loss": 0.4456, "step": 22461 }, { "epoch": 0.6167490389895661, "grad_norm": 0.3849036395549774, "learning_rate": 1.568794544907394e-05, "loss": 0.5553, "step": 22462 }, { "epoch": 0.6167764964305327, "grad_norm": 0.3511054217815399, "learning_rate": 1.5687590220232013e-05, "loss": 0.4902, "step": 22463 }, { "epoch": 0.6168039538714992, "grad_norm": 0.34521570801734924, "learning_rate": 1.5687234980780918e-05, "loss": 0.4487, "step": 22464 }, { "epoch": 0.6168314113124657, "grad_norm": 0.36269545555114746, "learning_rate": 1.5686879730721307e-05, "loss": 0.5086, "step": 22465 }, { "epoch": 0.6168588687534322, "grad_norm": 0.38413697481155396, "learning_rate": 1.5686524470053858e-05, "loss": 0.4302, "step": 22466 }, { "epoch": 0.6168863261943986, "grad_norm": 0.3978446125984192, "learning_rate": 1.5686169198779223e-05, "loss": 0.5909, "step": 22467 }, { "epoch": 0.6169137836353652, "grad_norm": 0.36741313338279724, "learning_rate": 1.5685813916898065e-05, "loss": 0.4652, "step": 22468 }, { "epoch": 0.6169412410763316, "grad_norm": 0.3810917139053345, "learning_rate": 1.568545862441105e-05, "loss": 0.504, "step": 22469 }, { "epoch": 0.6169686985172982, "grad_norm": 0.386318564414978, "learning_rate": 1.5685103321318843e-05, "loss": 0.4802, "step": 22470 }, { "epoch": 0.6169961559582647, "grad_norm": 0.42613542079925537, "learning_rate": 1.5684748007622104e-05, "loss": 0.5744, "step": 22471 }, { "epoch": 0.6170236133992312, "grad_norm": 0.3909852206707001, "learning_rate": 1.5684392683321496e-05, "loss": 0.5039, "step": 22472 }, { "epoch": 0.6170510708401977, "grad_norm": 0.3132966160774231, "learning_rate": 1.5684037348417678e-05, "loss": 0.4712, "step": 22473 }, { "epoch": 0.6170785282811642, "grad_norm": 0.34158822894096375, "learning_rate": 1.5683682002911318e-05, "loss": 0.5119, "step": 22474 }, { "epoch": 0.6171059857221307, "grad_norm": 0.36224526166915894, "learning_rate": 1.5683326646803077e-05, "loss": 0.5083, "step": 22475 }, { "epoch": 0.6171334431630972, "grad_norm": 0.36369964480400085, "learning_rate": 1.568297128009362e-05, "loss": 0.456, "step": 22476 }, { "epoch": 0.6171609006040637, "grad_norm": 0.4338120222091675, "learning_rate": 1.56826159027836e-05, "loss": 0.5359, "step": 22477 }, { "epoch": 0.6171883580450302, "grad_norm": 0.39159539341926575, "learning_rate": 1.5682260514873698e-05, "loss": 0.477, "step": 22478 }, { "epoch": 0.6172158154859967, "grad_norm": 0.397657185792923, "learning_rate": 1.568190511636456e-05, "loss": 0.5043, "step": 22479 }, { "epoch": 0.6172432729269632, "grad_norm": 0.3937775790691376, "learning_rate": 1.5681549707256854e-05, "loss": 0.4635, "step": 22480 }, { "epoch": 0.6172707303679297, "grad_norm": 0.39815840125083923, "learning_rate": 1.568119428755125e-05, "loss": 0.4971, "step": 22481 }, { "epoch": 0.6172981878088962, "grad_norm": 0.40140581130981445, "learning_rate": 1.5680838857248403e-05, "loss": 0.5177, "step": 22482 }, { "epoch": 0.6173256452498627, "grad_norm": 0.3692626655101776, "learning_rate": 1.568048341634898e-05, "loss": 0.463, "step": 22483 }, { "epoch": 0.6173531026908292, "grad_norm": 0.3488836884498596, "learning_rate": 1.568012796485364e-05, "loss": 0.4654, "step": 22484 }, { "epoch": 0.6173805601317958, "grad_norm": 0.3457472622394562, "learning_rate": 1.567977250276305e-05, "loss": 0.5006, "step": 22485 }, { "epoch": 0.6174080175727622, "grad_norm": 0.364148885011673, "learning_rate": 1.567941703007787e-05, "loss": 0.5189, "step": 22486 }, { "epoch": 0.6174354750137288, "grad_norm": 0.4103710353374481, "learning_rate": 1.5679061546798765e-05, "loss": 0.466, "step": 22487 }, { "epoch": 0.6174629324546952, "grad_norm": 0.3951040208339691, "learning_rate": 1.56787060529264e-05, "loss": 0.562, "step": 22488 }, { "epoch": 0.6174903898956617, "grad_norm": 0.3802453875541687, "learning_rate": 1.5678350548461435e-05, "loss": 0.4775, "step": 22489 }, { "epoch": 0.6175178473366282, "grad_norm": 0.3959280252456665, "learning_rate": 1.567799503340453e-05, "loss": 0.4867, "step": 22490 }, { "epoch": 0.6175453047775947, "grad_norm": 0.3710545003414154, "learning_rate": 1.5677639507756354e-05, "loss": 0.4591, "step": 22491 }, { "epoch": 0.6175727622185613, "grad_norm": 0.4095623195171356, "learning_rate": 1.567728397151757e-05, "loss": 0.5751, "step": 22492 }, { "epoch": 0.6176002196595277, "grad_norm": 0.4128479063510895, "learning_rate": 1.567692842468884e-05, "loss": 0.4414, "step": 22493 }, { "epoch": 0.6176276771004943, "grad_norm": 0.3670276999473572, "learning_rate": 1.5676572867270826e-05, "loss": 0.4403, "step": 22494 }, { "epoch": 0.6176551345414607, "grad_norm": 0.43889325857162476, "learning_rate": 1.5676217299264188e-05, "loss": 0.5016, "step": 22495 }, { "epoch": 0.6176825919824273, "grad_norm": 0.37587711215019226, "learning_rate": 1.5675861720669598e-05, "loss": 0.5456, "step": 22496 }, { "epoch": 0.6177100494233937, "grad_norm": 0.3920922577381134, "learning_rate": 1.5675506131487712e-05, "loss": 0.5459, "step": 22497 }, { "epoch": 0.6177375068643602, "grad_norm": 0.47176799178123474, "learning_rate": 1.5675150531719195e-05, "loss": 0.5513, "step": 22498 }, { "epoch": 0.6177649643053268, "grad_norm": 0.4128495156764984, "learning_rate": 1.5674794921364715e-05, "loss": 0.5645, "step": 22499 }, { "epoch": 0.6177924217462932, "grad_norm": 0.3483002185821533, "learning_rate": 1.5674439300424928e-05, "loss": 0.4971, "step": 22500 }, { "epoch": 0.6178198791872598, "grad_norm": 0.4404960870742798, "learning_rate": 1.56740836689005e-05, "loss": 0.5405, "step": 22501 }, { "epoch": 0.6178473366282262, "grad_norm": 0.39649009704589844, "learning_rate": 1.5673728026792098e-05, "loss": 0.4954, "step": 22502 }, { "epoch": 0.6178747940691928, "grad_norm": 0.4728178083896637, "learning_rate": 1.5673372374100376e-05, "loss": 0.5752, "step": 22503 }, { "epoch": 0.6179022515101592, "grad_norm": 0.3820502758026123, "learning_rate": 1.567301671082601e-05, "loss": 0.4969, "step": 22504 }, { "epoch": 0.6179297089511258, "grad_norm": 0.3793199062347412, "learning_rate": 1.5672661036969654e-05, "loss": 0.4578, "step": 22505 }, { "epoch": 0.6179571663920923, "grad_norm": 0.5064769983291626, "learning_rate": 1.5672305352531978e-05, "loss": 0.4991, "step": 22506 }, { "epoch": 0.6179846238330587, "grad_norm": 0.3548373281955719, "learning_rate": 1.567194965751364e-05, "loss": 0.5038, "step": 22507 }, { "epoch": 0.6180120812740253, "grad_norm": 0.36565566062927246, "learning_rate": 1.5671593951915306e-05, "loss": 0.4591, "step": 22508 }, { "epoch": 0.6180395387149917, "grad_norm": 0.41689202189445496, "learning_rate": 1.5671238235737642e-05, "loss": 0.5052, "step": 22509 }, { "epoch": 0.6180669961559583, "grad_norm": 0.4053367078304291, "learning_rate": 1.5670882508981305e-05, "loss": 0.4925, "step": 22510 }, { "epoch": 0.6180944535969247, "grad_norm": 0.7188882231712341, "learning_rate": 1.567052677164696e-05, "loss": 0.5269, "step": 22511 }, { "epoch": 0.6181219110378913, "grad_norm": 0.38092395663261414, "learning_rate": 1.5670171023735278e-05, "loss": 0.4686, "step": 22512 }, { "epoch": 0.6181493684788578, "grad_norm": 0.37020590901374817, "learning_rate": 1.5669815265246912e-05, "loss": 0.5541, "step": 22513 }, { "epoch": 0.6181768259198243, "grad_norm": 0.40984034538269043, "learning_rate": 1.5669459496182537e-05, "loss": 0.4438, "step": 22514 }, { "epoch": 0.6182042833607908, "grad_norm": 0.4050333499908447, "learning_rate": 1.5669103716542806e-05, "loss": 0.5373, "step": 22515 }, { "epoch": 0.6182317408017572, "grad_norm": 0.3880467712879181, "learning_rate": 1.566874792632839e-05, "loss": 0.5009, "step": 22516 }, { "epoch": 0.6182591982427238, "grad_norm": 0.36311259865760803, "learning_rate": 1.5668392125539948e-05, "loss": 0.4725, "step": 22517 }, { "epoch": 0.6182866556836902, "grad_norm": 0.3772839605808258, "learning_rate": 1.5668036314178142e-05, "loss": 0.4247, "step": 22518 }, { "epoch": 0.6183141131246568, "grad_norm": 0.38857123255729675, "learning_rate": 1.5667680492243647e-05, "loss": 0.5217, "step": 22519 }, { "epoch": 0.6183415705656233, "grad_norm": 0.3524090647697449, "learning_rate": 1.5667324659737114e-05, "loss": 0.4996, "step": 22520 }, { "epoch": 0.6183690280065898, "grad_norm": 0.35601529479026794, "learning_rate": 1.5666968816659213e-05, "loss": 0.4758, "step": 22521 }, { "epoch": 0.6183964854475563, "grad_norm": 0.48193857073783875, "learning_rate": 1.5666612963010605e-05, "loss": 0.6365, "step": 22522 }, { "epoch": 0.6184239428885228, "grad_norm": 0.37535110116004944, "learning_rate": 1.5666257098791958e-05, "loss": 0.4974, "step": 22523 }, { "epoch": 0.6184514003294893, "grad_norm": 0.48583611845970154, "learning_rate": 1.5665901224003932e-05, "loss": 0.4878, "step": 22524 }, { "epoch": 0.6184788577704557, "grad_norm": 0.40410780906677246, "learning_rate": 1.5665545338647192e-05, "loss": 0.4991, "step": 22525 }, { "epoch": 0.6185063152114223, "grad_norm": 0.3774104118347168, "learning_rate": 1.56651894427224e-05, "loss": 0.4895, "step": 22526 }, { "epoch": 0.6185337726523888, "grad_norm": 0.37482893466949463, "learning_rate": 1.5664833536230223e-05, "loss": 0.5018, "step": 22527 }, { "epoch": 0.6185612300933553, "grad_norm": 0.4690578281879425, "learning_rate": 1.5664477619171323e-05, "loss": 0.4754, "step": 22528 }, { "epoch": 0.6185886875343218, "grad_norm": 0.4314991533756256, "learning_rate": 1.5664121691546367e-05, "loss": 0.5237, "step": 22529 }, { "epoch": 0.6186161449752883, "grad_norm": 0.3820810616016388, "learning_rate": 1.5663765753356014e-05, "loss": 0.5792, "step": 22530 }, { "epoch": 0.6186436024162548, "grad_norm": 0.37724629044532776, "learning_rate": 1.5663409804600928e-05, "loss": 0.5033, "step": 22531 }, { "epoch": 0.6186710598572213, "grad_norm": 0.40327388048171997, "learning_rate": 1.566305384528178e-05, "loss": 0.4932, "step": 22532 }, { "epoch": 0.6186985172981878, "grad_norm": 0.35402265191078186, "learning_rate": 1.5662697875399224e-05, "loss": 0.4803, "step": 22533 }, { "epoch": 0.6187259747391544, "grad_norm": 0.383098840713501, "learning_rate": 1.566234189495393e-05, "loss": 0.5312, "step": 22534 }, { "epoch": 0.6187534321801208, "grad_norm": 0.34646594524383545, "learning_rate": 1.5661985903946564e-05, "loss": 0.5014, "step": 22535 }, { "epoch": 0.6187808896210873, "grad_norm": 0.4238612949848175, "learning_rate": 1.5661629902377784e-05, "loss": 0.5047, "step": 22536 }, { "epoch": 0.6188083470620538, "grad_norm": 0.45120003819465637, "learning_rate": 1.566127389024826e-05, "loss": 0.6061, "step": 22537 }, { "epoch": 0.6188358045030203, "grad_norm": 0.49754321575164795, "learning_rate": 1.5660917867558657e-05, "loss": 0.573, "step": 22538 }, { "epoch": 0.6188632619439868, "grad_norm": 0.3791137933731079, "learning_rate": 1.5660561834309625e-05, "loss": 0.4359, "step": 22539 }, { "epoch": 0.6188907193849533, "grad_norm": 0.3578185439109802, "learning_rate": 1.5660205790501846e-05, "loss": 0.481, "step": 22540 }, { "epoch": 0.6189181768259199, "grad_norm": 0.4144853353500366, "learning_rate": 1.5659849736135978e-05, "loss": 0.5825, "step": 22541 }, { "epoch": 0.6189456342668863, "grad_norm": 0.344546914100647, "learning_rate": 1.565949367121268e-05, "loss": 0.511, "step": 22542 }, { "epoch": 0.6189730917078529, "grad_norm": 0.4364735186100006, "learning_rate": 1.5659137595732622e-05, "loss": 0.4951, "step": 22543 }, { "epoch": 0.6190005491488193, "grad_norm": 0.3719455301761627, "learning_rate": 1.5658781509696463e-05, "loss": 0.43, "step": 22544 }, { "epoch": 0.6190280065897859, "grad_norm": 0.44046077132225037, "learning_rate": 1.565842541310487e-05, "loss": 0.5231, "step": 22545 }, { "epoch": 0.6190554640307523, "grad_norm": 0.392814040184021, "learning_rate": 1.5658069305958513e-05, "loss": 0.5, "step": 22546 }, { "epoch": 0.6190829214717188, "grad_norm": 0.3251745104789734, "learning_rate": 1.565771318825805e-05, "loss": 0.455, "step": 22547 }, { "epoch": 0.6191103789126854, "grad_norm": 0.47120508551597595, "learning_rate": 1.5657357060004145e-05, "loss": 0.4408, "step": 22548 }, { "epoch": 0.6191378363536518, "grad_norm": 0.38633379340171814, "learning_rate": 1.5657000921197457e-05, "loss": 0.4884, "step": 22549 }, { "epoch": 0.6191652937946184, "grad_norm": 0.3656744658946991, "learning_rate": 1.5656644771838667e-05, "loss": 0.4734, "step": 22550 }, { "epoch": 0.6191927512355848, "grad_norm": 0.41446247696876526, "learning_rate": 1.5656288611928424e-05, "loss": 0.4433, "step": 22551 }, { "epoch": 0.6192202086765514, "grad_norm": 0.3832482099533081, "learning_rate": 1.5655932441467397e-05, "loss": 0.5289, "step": 22552 }, { "epoch": 0.6192476661175178, "grad_norm": 0.3296089768409729, "learning_rate": 1.5655576260456252e-05, "loss": 0.5573, "step": 22553 }, { "epoch": 0.6192751235584844, "grad_norm": 0.4522261917591095, "learning_rate": 1.565522006889565e-05, "loss": 0.5375, "step": 22554 }, { "epoch": 0.6193025809994509, "grad_norm": 0.3587777316570282, "learning_rate": 1.565486386678626e-05, "loss": 0.5655, "step": 22555 }, { "epoch": 0.6193300384404173, "grad_norm": 0.38999614119529724, "learning_rate": 1.5654507654128745e-05, "loss": 0.589, "step": 22556 }, { "epoch": 0.6193574958813839, "grad_norm": 0.3547980487346649, "learning_rate": 1.5654151430923766e-05, "loss": 0.4976, "step": 22557 }, { "epoch": 0.6193849533223503, "grad_norm": 0.39765843749046326, "learning_rate": 1.5653795197171993e-05, "loss": 0.4987, "step": 22558 }, { "epoch": 0.6194124107633169, "grad_norm": 0.36940908432006836, "learning_rate": 1.5653438952874087e-05, "loss": 0.5602, "step": 22559 }, { "epoch": 0.6194398682042833, "grad_norm": 0.41817039251327515, "learning_rate": 1.565308269803071e-05, "loss": 0.5042, "step": 22560 }, { "epoch": 0.6194673256452499, "grad_norm": 0.5144942998886108, "learning_rate": 1.5652726432642533e-05, "loss": 0.5298, "step": 22561 }, { "epoch": 0.6194947830862164, "grad_norm": 0.4038824439048767, "learning_rate": 1.5652370156710213e-05, "loss": 0.5381, "step": 22562 }, { "epoch": 0.6195222405271829, "grad_norm": 0.43323618173599243, "learning_rate": 1.5652013870234424e-05, "loss": 0.5913, "step": 22563 }, { "epoch": 0.6195496979681494, "grad_norm": 0.3764198422431946, "learning_rate": 1.5651657573215822e-05, "loss": 0.5325, "step": 22564 }, { "epoch": 0.6195771554091158, "grad_norm": 0.3528798818588257, "learning_rate": 1.5651301265655075e-05, "loss": 0.4174, "step": 22565 }, { "epoch": 0.6196046128500824, "grad_norm": 0.9821829199790955, "learning_rate": 1.5650944947552847e-05, "loss": 0.5163, "step": 22566 }, { "epoch": 0.6196320702910488, "grad_norm": 0.35506102442741394, "learning_rate": 1.56505886189098e-05, "loss": 0.5539, "step": 22567 }, { "epoch": 0.6196595277320154, "grad_norm": 0.37647560238838196, "learning_rate": 1.565023227972661e-05, "loss": 0.4637, "step": 22568 }, { "epoch": 0.6196869851729819, "grad_norm": 0.3382308781147003, "learning_rate": 1.564987593000393e-05, "loss": 0.4214, "step": 22569 }, { "epoch": 0.6197144426139484, "grad_norm": 0.4244212806224823, "learning_rate": 1.5649519569742423e-05, "loss": 0.484, "step": 22570 }, { "epoch": 0.6197419000549149, "grad_norm": 0.44075238704681396, "learning_rate": 1.5649163198942762e-05, "loss": 0.5294, "step": 22571 }, { "epoch": 0.6197693574958814, "grad_norm": 0.37580519914627075, "learning_rate": 1.564880681760561e-05, "loss": 0.4475, "step": 22572 }, { "epoch": 0.6197968149368479, "grad_norm": 0.4242590665817261, "learning_rate": 1.564845042573163e-05, "loss": 0.5493, "step": 22573 }, { "epoch": 0.6198242723778143, "grad_norm": 0.3536640405654907, "learning_rate": 1.564809402332149e-05, "loss": 0.4751, "step": 22574 }, { "epoch": 0.6198517298187809, "grad_norm": 0.3672976791858673, "learning_rate": 1.5647737610375845e-05, "loss": 0.5642, "step": 22575 }, { "epoch": 0.6198791872597474, "grad_norm": 0.35885342955589294, "learning_rate": 1.564738118689537e-05, "loss": 0.5382, "step": 22576 }, { "epoch": 0.6199066447007139, "grad_norm": 0.3349631130695343, "learning_rate": 1.564702475288073e-05, "loss": 0.435, "step": 22577 }, { "epoch": 0.6199341021416804, "grad_norm": 0.37958282232284546, "learning_rate": 1.5646668308332583e-05, "loss": 0.5096, "step": 22578 }, { "epoch": 0.6199615595826469, "grad_norm": 0.4214232861995697, "learning_rate": 1.5646311853251602e-05, "loss": 0.4949, "step": 22579 }, { "epoch": 0.6199890170236134, "grad_norm": 0.35895994305610657, "learning_rate": 1.564595538763844e-05, "loss": 0.497, "step": 22580 }, { "epoch": 0.6200164744645799, "grad_norm": 0.40956422686576843, "learning_rate": 1.5645598911493777e-05, "loss": 0.5213, "step": 22581 }, { "epoch": 0.6200439319055464, "grad_norm": 0.38975080847740173, "learning_rate": 1.5645242424818264e-05, "loss": 0.5406, "step": 22582 }, { "epoch": 0.620071389346513, "grad_norm": 0.48833921551704407, "learning_rate": 1.5644885927612573e-05, "loss": 0.4871, "step": 22583 }, { "epoch": 0.6200988467874794, "grad_norm": 0.35668689012527466, "learning_rate": 1.564452941987737e-05, "loss": 0.519, "step": 22584 }, { "epoch": 0.620126304228446, "grad_norm": 0.34398776292800903, "learning_rate": 1.5644172901613316e-05, "loss": 0.4791, "step": 22585 }, { "epoch": 0.6201537616694124, "grad_norm": 0.3954851031303406, "learning_rate": 1.5643816372821082e-05, "loss": 0.4846, "step": 22586 }, { "epoch": 0.6201812191103789, "grad_norm": 0.3854080140590668, "learning_rate": 1.5643459833501326e-05, "loss": 0.5038, "step": 22587 }, { "epoch": 0.6202086765513454, "grad_norm": 0.40405189990997314, "learning_rate": 1.5643103283654716e-05, "loss": 0.4982, "step": 22588 }, { "epoch": 0.6202361339923119, "grad_norm": 0.3797658681869507, "learning_rate": 1.564274672328192e-05, "loss": 0.535, "step": 22589 }, { "epoch": 0.6202635914332785, "grad_norm": 0.37836140394210815, "learning_rate": 1.5642390152383596e-05, "loss": 0.5108, "step": 22590 }, { "epoch": 0.6202910488742449, "grad_norm": 0.3773358166217804, "learning_rate": 1.5642033570960418e-05, "loss": 0.4456, "step": 22591 }, { "epoch": 0.6203185063152115, "grad_norm": 0.3703380227088928, "learning_rate": 1.5641676979013044e-05, "loss": 0.5331, "step": 22592 }, { "epoch": 0.6203459637561779, "grad_norm": 0.42922472953796387, "learning_rate": 1.5641320376542144e-05, "loss": 0.6243, "step": 22593 }, { "epoch": 0.6203734211971444, "grad_norm": 0.5236635804176331, "learning_rate": 1.5640963763548377e-05, "loss": 0.5139, "step": 22594 }, { "epoch": 0.6204008786381109, "grad_norm": 0.49097001552581787, "learning_rate": 1.564060714003242e-05, "loss": 0.5626, "step": 22595 }, { "epoch": 0.6204283360790774, "grad_norm": 0.3785485029220581, "learning_rate": 1.5640250505994924e-05, "loss": 0.4871, "step": 22596 }, { "epoch": 0.620455793520044, "grad_norm": 0.34385889768600464, "learning_rate": 1.5639893861436563e-05, "loss": 0.4676, "step": 22597 }, { "epoch": 0.6204832509610104, "grad_norm": 0.36455509066581726, "learning_rate": 1.5639537206357995e-05, "loss": 0.4661, "step": 22598 }, { "epoch": 0.620510708401977, "grad_norm": 0.41368114948272705, "learning_rate": 1.5639180540759894e-05, "loss": 0.4591, "step": 22599 }, { "epoch": 0.6205381658429434, "grad_norm": 0.37270450592041016, "learning_rate": 1.563882386464292e-05, "loss": 0.4966, "step": 22600 }, { "epoch": 0.62056562328391, "grad_norm": 0.46291279792785645, "learning_rate": 1.5638467178007744e-05, "loss": 0.5738, "step": 22601 }, { "epoch": 0.6205930807248764, "grad_norm": 0.3718380033969879, "learning_rate": 1.5638110480855023e-05, "loss": 0.5004, "step": 22602 }, { "epoch": 0.620620538165843, "grad_norm": 0.4439149796962738, "learning_rate": 1.563775377318543e-05, "loss": 0.5323, "step": 22603 }, { "epoch": 0.6206479956068095, "grad_norm": 0.33601468801498413, "learning_rate": 1.5637397054999627e-05, "loss": 0.4517, "step": 22604 }, { "epoch": 0.6206754530477759, "grad_norm": 0.4747694730758667, "learning_rate": 1.5637040326298273e-05, "loss": 0.5014, "step": 22605 }, { "epoch": 0.6207029104887425, "grad_norm": 0.39658188819885254, "learning_rate": 1.5636683587082045e-05, "loss": 0.5622, "step": 22606 }, { "epoch": 0.6207303679297089, "grad_norm": 0.3651837706565857, "learning_rate": 1.5636326837351604e-05, "loss": 0.5836, "step": 22607 }, { "epoch": 0.6207578253706755, "grad_norm": 0.36169058084487915, "learning_rate": 1.563597007710761e-05, "loss": 0.5424, "step": 22608 }, { "epoch": 0.6207852828116419, "grad_norm": 0.3912159502506256, "learning_rate": 1.5635613306350735e-05, "loss": 0.5342, "step": 22609 }, { "epoch": 0.6208127402526085, "grad_norm": 0.3356937766075134, "learning_rate": 1.5635256525081642e-05, "loss": 0.4676, "step": 22610 }, { "epoch": 0.620840197693575, "grad_norm": 0.41820642352104187, "learning_rate": 1.5634899733301e-05, "loss": 0.5162, "step": 22611 }, { "epoch": 0.6208676551345415, "grad_norm": 0.4147910475730896, "learning_rate": 1.563454293100947e-05, "loss": 0.5248, "step": 22612 }, { "epoch": 0.620895112575508, "grad_norm": 0.3181074261665344, "learning_rate": 1.5634186118207715e-05, "loss": 0.3554, "step": 22613 }, { "epoch": 0.6209225700164744, "grad_norm": 0.40373265743255615, "learning_rate": 1.5633829294896406e-05, "loss": 0.4901, "step": 22614 }, { "epoch": 0.620950027457441, "grad_norm": 0.3634554445743561, "learning_rate": 1.5633472461076214e-05, "loss": 0.5209, "step": 22615 }, { "epoch": 0.6209774848984074, "grad_norm": 0.3847080171108246, "learning_rate": 1.563311561674779e-05, "loss": 0.5679, "step": 22616 }, { "epoch": 0.621004942339374, "grad_norm": 0.32066574692726135, "learning_rate": 1.5632758761911813e-05, "loss": 0.4273, "step": 22617 }, { "epoch": 0.6210323997803405, "grad_norm": 0.38394179940223694, "learning_rate": 1.5632401896568944e-05, "loss": 0.5163, "step": 22618 }, { "epoch": 0.621059857221307, "grad_norm": 0.3617333173751831, "learning_rate": 1.563204502071984e-05, "loss": 0.458, "step": 22619 }, { "epoch": 0.6210873146622735, "grad_norm": 0.5677087903022766, "learning_rate": 1.563168813436518e-05, "loss": 0.4831, "step": 22620 }, { "epoch": 0.62111477210324, "grad_norm": 0.45325592160224915, "learning_rate": 1.5631331237505625e-05, "loss": 0.5384, "step": 22621 }, { "epoch": 0.6211422295442065, "grad_norm": 0.42556658387184143, "learning_rate": 1.5630974330141837e-05, "loss": 0.5262, "step": 22622 }, { "epoch": 0.6211696869851729, "grad_norm": 0.3974032402038574, "learning_rate": 1.563061741227449e-05, "loss": 0.5294, "step": 22623 }, { "epoch": 0.6211971444261395, "grad_norm": 0.39191335439682007, "learning_rate": 1.5630260483904236e-05, "loss": 0.4514, "step": 22624 }, { "epoch": 0.621224601867106, "grad_norm": 0.3502022922039032, "learning_rate": 1.5629903545031755e-05, "loss": 0.5468, "step": 22625 }, { "epoch": 0.6212520593080725, "grad_norm": 0.4027695059776306, "learning_rate": 1.5629546595657705e-05, "loss": 0.5074, "step": 22626 }, { "epoch": 0.621279516749039, "grad_norm": 0.3561052083969116, "learning_rate": 1.5629189635782753e-05, "loss": 0.505, "step": 22627 }, { "epoch": 0.6213069741900055, "grad_norm": 0.38470059633255005, "learning_rate": 1.5628832665407568e-05, "loss": 0.5201, "step": 22628 }, { "epoch": 0.621334431630972, "grad_norm": 0.36712646484375, "learning_rate": 1.5628475684532813e-05, "loss": 0.5117, "step": 22629 }, { "epoch": 0.6213618890719385, "grad_norm": 0.9152352213859558, "learning_rate": 1.5628118693159153e-05, "loss": 0.5441, "step": 22630 }, { "epoch": 0.621389346512905, "grad_norm": 0.3418535590171814, "learning_rate": 1.5627761691287257e-05, "loss": 0.5302, "step": 22631 }, { "epoch": 0.6214168039538714, "grad_norm": 0.4033520519733429, "learning_rate": 1.562740467891779e-05, "loss": 0.4855, "step": 22632 }, { "epoch": 0.621444261394838, "grad_norm": 0.3610757887363434, "learning_rate": 1.562704765605142e-05, "loss": 0.5, "step": 22633 }, { "epoch": 0.6214717188358045, "grad_norm": 0.4056897759437561, "learning_rate": 1.5626690622688804e-05, "loss": 0.4693, "step": 22634 }, { "epoch": 0.621499176276771, "grad_norm": 0.332472026348114, "learning_rate": 1.5626333578830613e-05, "loss": 0.4633, "step": 22635 }, { "epoch": 0.6215266337177375, "grad_norm": 0.4023151993751526, "learning_rate": 1.562597652447752e-05, "loss": 0.5453, "step": 22636 }, { "epoch": 0.621554091158704, "grad_norm": 0.3857662081718445, "learning_rate": 1.562561945963018e-05, "loss": 0.544, "step": 22637 }, { "epoch": 0.6215815485996705, "grad_norm": 0.4395194947719574, "learning_rate": 1.562526238428927e-05, "loss": 0.5481, "step": 22638 }, { "epoch": 0.621609006040637, "grad_norm": 0.3387903869152069, "learning_rate": 1.5624905298455448e-05, "loss": 0.4646, "step": 22639 }, { "epoch": 0.6216364634816035, "grad_norm": 0.3833998143672943, "learning_rate": 1.562454820212938e-05, "loss": 0.4652, "step": 22640 }, { "epoch": 0.6216639209225701, "grad_norm": 0.3517824411392212, "learning_rate": 1.5624191095311736e-05, "loss": 0.5051, "step": 22641 }, { "epoch": 0.6216913783635365, "grad_norm": 0.4741119146347046, "learning_rate": 1.562383397800318e-05, "loss": 0.4815, "step": 22642 }, { "epoch": 0.621718835804503, "grad_norm": 0.4316595792770386, "learning_rate": 1.562347685020438e-05, "loss": 0.4719, "step": 22643 }, { "epoch": 0.6217462932454695, "grad_norm": 0.4460110068321228, "learning_rate": 1.5623119711915998e-05, "loss": 0.4274, "step": 22644 }, { "epoch": 0.621773750686436, "grad_norm": 0.40927886962890625, "learning_rate": 1.5622762563138706e-05, "loss": 0.4898, "step": 22645 }, { "epoch": 0.6218012081274025, "grad_norm": 0.3435615599155426, "learning_rate": 1.5622405403873168e-05, "loss": 0.5115, "step": 22646 }, { "epoch": 0.621828665568369, "grad_norm": 0.3902473747730255, "learning_rate": 1.5622048234120047e-05, "loss": 0.475, "step": 22647 }, { "epoch": 0.6218561230093356, "grad_norm": 0.4384199380874634, "learning_rate": 1.5621691053880015e-05, "loss": 0.5083, "step": 22648 }, { "epoch": 0.621883580450302, "grad_norm": 0.3897465765476227, "learning_rate": 1.5621333863153732e-05, "loss": 0.5252, "step": 22649 }, { "epoch": 0.6219110378912686, "grad_norm": 0.3587454855442047, "learning_rate": 1.562097666194187e-05, "loss": 0.4685, "step": 22650 }, { "epoch": 0.621938495332235, "grad_norm": 0.43092775344848633, "learning_rate": 1.5620619450245086e-05, "loss": 0.5357, "step": 22651 }, { "epoch": 0.6219659527732015, "grad_norm": 0.4256090223789215, "learning_rate": 1.5620262228064058e-05, "loss": 0.5146, "step": 22652 }, { "epoch": 0.621993410214168, "grad_norm": 0.41692033410072327, "learning_rate": 1.561990499539945e-05, "loss": 0.4634, "step": 22653 }, { "epoch": 0.6220208676551345, "grad_norm": 0.3598794639110565, "learning_rate": 1.561954775225192e-05, "loss": 0.4653, "step": 22654 }, { "epoch": 0.6220483250961011, "grad_norm": 0.3634341061115265, "learning_rate": 1.561919049862214e-05, "loss": 0.5068, "step": 22655 }, { "epoch": 0.6220757825370675, "grad_norm": 0.3861195743083954, "learning_rate": 1.561883323451078e-05, "loss": 0.5829, "step": 22656 }, { "epoch": 0.6221032399780341, "grad_norm": 0.3582019507884979, "learning_rate": 1.56184759599185e-05, "loss": 0.5283, "step": 22657 }, { "epoch": 0.6221306974190005, "grad_norm": 0.5511125922203064, "learning_rate": 1.5618118674845967e-05, "loss": 0.6275, "step": 22658 }, { "epoch": 0.6221581548599671, "grad_norm": 0.35205045342445374, "learning_rate": 1.5617761379293854e-05, "loss": 0.5127, "step": 22659 }, { "epoch": 0.6221856123009335, "grad_norm": 0.33741921186447144, "learning_rate": 1.5617404073262822e-05, "loss": 0.4287, "step": 22660 }, { "epoch": 0.6222130697419, "grad_norm": 0.43713945150375366, "learning_rate": 1.561704675675354e-05, "loss": 0.5366, "step": 22661 }, { "epoch": 0.6222405271828666, "grad_norm": 0.34969034790992737, "learning_rate": 1.5616689429766668e-05, "loss": 0.4907, "step": 22662 }, { "epoch": 0.622267984623833, "grad_norm": 0.3576207756996155, "learning_rate": 1.561633209230288e-05, "loss": 0.4884, "step": 22663 }, { "epoch": 0.6222954420647996, "grad_norm": 0.3525940179824829, "learning_rate": 1.561597474436284e-05, "loss": 0.5335, "step": 22664 }, { "epoch": 0.622322899505766, "grad_norm": 0.4156389534473419, "learning_rate": 1.5615617385947218e-05, "loss": 0.5456, "step": 22665 }, { "epoch": 0.6223503569467326, "grad_norm": 0.43618497252464294, "learning_rate": 1.5615260017056675e-05, "loss": 0.5799, "step": 22666 }, { "epoch": 0.622377814387699, "grad_norm": 0.36887216567993164, "learning_rate": 1.561490263769188e-05, "loss": 0.5182, "step": 22667 }, { "epoch": 0.6224052718286656, "grad_norm": 0.3900572955608368, "learning_rate": 1.56145452478535e-05, "loss": 0.5064, "step": 22668 }, { "epoch": 0.6224327292696321, "grad_norm": 0.47804415225982666, "learning_rate": 1.56141878475422e-05, "loss": 0.5715, "step": 22669 }, { "epoch": 0.6224601867105986, "grad_norm": 0.4438173472881317, "learning_rate": 1.5613830436758646e-05, "loss": 0.5277, "step": 22670 }, { "epoch": 0.6224876441515651, "grad_norm": 0.3804474472999573, "learning_rate": 1.561347301550351e-05, "loss": 0.5093, "step": 22671 }, { "epoch": 0.6225151015925315, "grad_norm": 0.3673146665096283, "learning_rate": 1.5613115583777454e-05, "loss": 0.5292, "step": 22672 }, { "epoch": 0.6225425590334981, "grad_norm": 0.48643922805786133, "learning_rate": 1.5612758141581143e-05, "loss": 0.5609, "step": 22673 }, { "epoch": 0.6225700164744645, "grad_norm": 0.3528304398059845, "learning_rate": 1.5612400688915254e-05, "loss": 0.4291, "step": 22674 }, { "epoch": 0.6225974739154311, "grad_norm": 0.3978547751903534, "learning_rate": 1.561204322578044e-05, "loss": 0.5745, "step": 22675 }, { "epoch": 0.6226249313563976, "grad_norm": 0.3570099472999573, "learning_rate": 1.5611685752177374e-05, "loss": 0.5707, "step": 22676 }, { "epoch": 0.6226523887973641, "grad_norm": 0.39851608872413635, "learning_rate": 1.5611328268106728e-05, "loss": 0.5501, "step": 22677 }, { "epoch": 0.6226798462383306, "grad_norm": 0.33250051736831665, "learning_rate": 1.5610970773569158e-05, "loss": 0.4324, "step": 22678 }, { "epoch": 0.622707303679297, "grad_norm": 0.4050813317298889, "learning_rate": 1.561061326856534e-05, "loss": 0.5022, "step": 22679 }, { "epoch": 0.6227347611202636, "grad_norm": 0.47232159972190857, "learning_rate": 1.561025575309594e-05, "loss": 0.4989, "step": 22680 }, { "epoch": 0.62276221856123, "grad_norm": 0.3878662884235382, "learning_rate": 1.5609898227161618e-05, "loss": 0.4574, "step": 22681 }, { "epoch": 0.6227896760021966, "grad_norm": 0.46175870299339294, "learning_rate": 1.5609540690763046e-05, "loss": 0.554, "step": 22682 }, { "epoch": 0.6228171334431631, "grad_norm": 0.4465007185935974, "learning_rate": 1.5609183143900892e-05, "loss": 0.6144, "step": 22683 }, { "epoch": 0.6228445908841296, "grad_norm": 0.4138753116130829, "learning_rate": 1.560882558657582e-05, "loss": 0.4966, "step": 22684 }, { "epoch": 0.6228720483250961, "grad_norm": 0.3988894820213318, "learning_rate": 1.56084680187885e-05, "loss": 0.5141, "step": 22685 }, { "epoch": 0.6228995057660626, "grad_norm": 0.4476565420627594, "learning_rate": 1.5608110440539594e-05, "loss": 0.5225, "step": 22686 }, { "epoch": 0.6229269632070291, "grad_norm": 0.3888532817363739, "learning_rate": 1.5607752851829775e-05, "loss": 0.5389, "step": 22687 }, { "epoch": 0.6229544206479956, "grad_norm": 0.45980122685432434, "learning_rate": 1.5607395252659708e-05, "loss": 0.4463, "step": 22688 }, { "epoch": 0.6229818780889621, "grad_norm": 0.3242710828781128, "learning_rate": 1.5607037643030055e-05, "loss": 0.4269, "step": 22689 }, { "epoch": 0.6230093355299287, "grad_norm": 0.39005526900291443, "learning_rate": 1.560668002294149e-05, "loss": 0.5987, "step": 22690 }, { "epoch": 0.6230367929708951, "grad_norm": 0.40786340832710266, "learning_rate": 1.5606322392394674e-05, "loss": 0.5291, "step": 22691 }, { "epoch": 0.6230642504118616, "grad_norm": 0.36352455615997314, "learning_rate": 1.560596475139028e-05, "loss": 0.5293, "step": 22692 }, { "epoch": 0.6230917078528281, "grad_norm": 0.3889124393463135, "learning_rate": 1.5605607099928977e-05, "loss": 0.4513, "step": 22693 }, { "epoch": 0.6231191652937946, "grad_norm": 0.4477842152118683, "learning_rate": 1.560524943801142e-05, "loss": 0.5376, "step": 22694 }, { "epoch": 0.6231466227347611, "grad_norm": 0.47595280408859253, "learning_rate": 1.5604891765638288e-05, "loss": 0.4798, "step": 22695 }, { "epoch": 0.6231740801757276, "grad_norm": 0.39103659987449646, "learning_rate": 1.5604534082810246e-05, "loss": 0.4693, "step": 22696 }, { "epoch": 0.6232015376166942, "grad_norm": 0.39449864625930786, "learning_rate": 1.5604176389527955e-05, "loss": 0.4763, "step": 22697 }, { "epoch": 0.6232289950576606, "grad_norm": 0.3627716600894928, "learning_rate": 1.5603818685792088e-05, "loss": 0.4547, "step": 22698 }, { "epoch": 0.6232564524986272, "grad_norm": 0.35830676555633545, "learning_rate": 1.560346097160331e-05, "loss": 0.4742, "step": 22699 }, { "epoch": 0.6232839099395936, "grad_norm": 0.39674195647239685, "learning_rate": 1.560310324696229e-05, "loss": 0.4726, "step": 22700 }, { "epoch": 0.6233113673805601, "grad_norm": 0.31904152035713196, "learning_rate": 1.5602745511869693e-05, "loss": 0.4338, "step": 22701 }, { "epoch": 0.6233388248215266, "grad_norm": 0.405972421169281, "learning_rate": 1.5602387766326187e-05, "loss": 0.4822, "step": 22702 }, { "epoch": 0.6233662822624931, "grad_norm": 0.37426167726516724, "learning_rate": 1.560203001033244e-05, "loss": 0.5103, "step": 22703 }, { "epoch": 0.6233937397034597, "grad_norm": 0.3816124200820923, "learning_rate": 1.5601672243889117e-05, "loss": 0.529, "step": 22704 }, { "epoch": 0.6234211971444261, "grad_norm": 0.36131882667541504, "learning_rate": 1.560131446699689e-05, "loss": 0.554, "step": 22705 }, { "epoch": 0.6234486545853927, "grad_norm": 0.42496657371520996, "learning_rate": 1.5600956679656426e-05, "loss": 0.535, "step": 22706 }, { "epoch": 0.6234761120263591, "grad_norm": 0.45510175824165344, "learning_rate": 1.560059888186839e-05, "loss": 0.5216, "step": 22707 }, { "epoch": 0.6235035694673257, "grad_norm": 0.38218075037002563, "learning_rate": 1.5600241073633444e-05, "loss": 0.49, "step": 22708 }, { "epoch": 0.6235310269082921, "grad_norm": 0.44072282314300537, "learning_rate": 1.5599883254952265e-05, "loss": 0.5628, "step": 22709 }, { "epoch": 0.6235584843492586, "grad_norm": 0.40684399008750916, "learning_rate": 1.5599525425825516e-05, "loss": 0.4922, "step": 22710 }, { "epoch": 0.6235859417902252, "grad_norm": 0.42713338136672974, "learning_rate": 1.5599167586253866e-05, "loss": 0.443, "step": 22711 }, { "epoch": 0.6236133992311916, "grad_norm": 0.35335904359817505, "learning_rate": 1.559880973623798e-05, "loss": 0.497, "step": 22712 }, { "epoch": 0.6236408566721582, "grad_norm": 0.3614974021911621, "learning_rate": 1.5598451875778527e-05, "loss": 0.5819, "step": 22713 }, { "epoch": 0.6236683141131246, "grad_norm": 0.3396138548851013, "learning_rate": 1.5598094004876175e-05, "loss": 0.4732, "step": 22714 }, { "epoch": 0.6236957715540912, "grad_norm": 0.34503644704818726, "learning_rate": 1.559773612353159e-05, "loss": 0.4349, "step": 22715 }, { "epoch": 0.6237232289950576, "grad_norm": 0.38056257367134094, "learning_rate": 1.5597378231745443e-05, "loss": 0.4903, "step": 22716 }, { "epoch": 0.6237506864360242, "grad_norm": 0.40777167677879333, "learning_rate": 1.5597020329518396e-05, "loss": 0.4778, "step": 22717 }, { "epoch": 0.6237781438769907, "grad_norm": 0.3716745376586914, "learning_rate": 1.559666241685112e-05, "loss": 0.5017, "step": 22718 }, { "epoch": 0.6238056013179571, "grad_norm": 0.35604244470596313, "learning_rate": 1.5596304493744286e-05, "loss": 0.4767, "step": 22719 }, { "epoch": 0.6238330587589237, "grad_norm": 0.37230029702186584, "learning_rate": 1.5595946560198555e-05, "loss": 0.5212, "step": 22720 }, { "epoch": 0.6238605161998901, "grad_norm": 0.36314722895622253, "learning_rate": 1.5595588616214596e-05, "loss": 0.4847, "step": 22721 }, { "epoch": 0.6238879736408567, "grad_norm": 0.44539469480514526, "learning_rate": 1.559523066179308e-05, "loss": 0.494, "step": 22722 }, { "epoch": 0.6239154310818231, "grad_norm": 0.414894700050354, "learning_rate": 1.5594872696934676e-05, "loss": 0.4902, "step": 22723 }, { "epoch": 0.6239428885227897, "grad_norm": 0.4315321147441864, "learning_rate": 1.5594514721640046e-05, "loss": 0.5386, "step": 22724 }, { "epoch": 0.6239703459637562, "grad_norm": 0.39150580763816833, "learning_rate": 1.5594156735909858e-05, "loss": 0.5448, "step": 22725 }, { "epoch": 0.6239978034047227, "grad_norm": 0.3787269592285156, "learning_rate": 1.5593798739744787e-05, "loss": 0.495, "step": 22726 }, { "epoch": 0.6240252608456892, "grad_norm": 0.34430986642837524, "learning_rate": 1.5593440733145493e-05, "loss": 0.4511, "step": 22727 }, { "epoch": 0.6240527182866556, "grad_norm": 0.6568994522094727, "learning_rate": 1.5593082716112647e-05, "loss": 0.4838, "step": 22728 }, { "epoch": 0.6240801757276222, "grad_norm": 0.38530707359313965, "learning_rate": 1.559272468864692e-05, "loss": 0.4793, "step": 22729 }, { "epoch": 0.6241076331685886, "grad_norm": 0.40359801054000854, "learning_rate": 1.5592366650748973e-05, "loss": 0.5192, "step": 22730 }, { "epoch": 0.6241350906095552, "grad_norm": 0.37862539291381836, "learning_rate": 1.5592008602419476e-05, "loss": 0.4663, "step": 22731 }, { "epoch": 0.6241625480505217, "grad_norm": 0.45218411087989807, "learning_rate": 1.55916505436591e-05, "loss": 0.5075, "step": 22732 }, { "epoch": 0.6241900054914882, "grad_norm": 0.39124545454978943, "learning_rate": 1.559129247446851e-05, "loss": 0.5156, "step": 22733 }, { "epoch": 0.6242174629324547, "grad_norm": 0.33560776710510254, "learning_rate": 1.559093439484838e-05, "loss": 0.4552, "step": 22734 }, { "epoch": 0.6242449203734212, "grad_norm": 0.3826710879802704, "learning_rate": 1.559057630479937e-05, "loss": 0.5314, "step": 22735 }, { "epoch": 0.6242723778143877, "grad_norm": 0.3951885998249054, "learning_rate": 1.5590218204322146e-05, "loss": 0.51, "step": 22736 }, { "epoch": 0.6242998352553542, "grad_norm": 0.38460275530815125, "learning_rate": 1.5589860093417385e-05, "loss": 0.485, "step": 22737 }, { "epoch": 0.6243272926963207, "grad_norm": 0.3685416877269745, "learning_rate": 1.558950197208575e-05, "loss": 0.5065, "step": 22738 }, { "epoch": 0.6243547501372873, "grad_norm": 0.3551209270954132, "learning_rate": 1.5589143840327913e-05, "loss": 0.439, "step": 22739 }, { "epoch": 0.6243822075782537, "grad_norm": 0.3589816987514496, "learning_rate": 1.5588785698144536e-05, "loss": 0.4577, "step": 22740 }, { "epoch": 0.6244096650192202, "grad_norm": 0.483177125453949, "learning_rate": 1.558842754553629e-05, "loss": 0.5091, "step": 22741 }, { "epoch": 0.6244371224601867, "grad_norm": 0.38260507583618164, "learning_rate": 1.5588069382503846e-05, "loss": 0.5084, "step": 22742 }, { "epoch": 0.6244645799011532, "grad_norm": 0.36432504653930664, "learning_rate": 1.5587711209047863e-05, "loss": 0.4884, "step": 22743 }, { "epoch": 0.6244920373421197, "grad_norm": 0.4380388557910919, "learning_rate": 1.5587353025169022e-05, "loss": 0.5313, "step": 22744 }, { "epoch": 0.6245194947830862, "grad_norm": 0.4005570709705353, "learning_rate": 1.558699483086798e-05, "loss": 0.4904, "step": 22745 }, { "epoch": 0.6245469522240528, "grad_norm": 0.46796050667762756, "learning_rate": 1.5586636626145412e-05, "loss": 0.5251, "step": 22746 }, { "epoch": 0.6245744096650192, "grad_norm": 0.38354021310806274, "learning_rate": 1.5586278411001982e-05, "loss": 0.5363, "step": 22747 }, { "epoch": 0.6246018671059858, "grad_norm": 0.35663899779319763, "learning_rate": 1.558592018543836e-05, "loss": 0.4533, "step": 22748 }, { "epoch": 0.6246293245469522, "grad_norm": 0.4117232859134674, "learning_rate": 1.5585561949455214e-05, "loss": 0.4495, "step": 22749 }, { "epoch": 0.6246567819879187, "grad_norm": 0.41512274742126465, "learning_rate": 1.5585203703053212e-05, "loss": 0.5262, "step": 22750 }, { "epoch": 0.6246842394288852, "grad_norm": 0.4299023747444153, "learning_rate": 1.558484544623302e-05, "loss": 0.5089, "step": 22751 }, { "epoch": 0.6247116968698517, "grad_norm": 0.3952581584453583, "learning_rate": 1.5584487178995315e-05, "loss": 0.5521, "step": 22752 }, { "epoch": 0.6247391543108183, "grad_norm": 0.33879366517066956, "learning_rate": 1.5584128901340755e-05, "loss": 0.4641, "step": 22753 }, { "epoch": 0.6247666117517847, "grad_norm": 0.40377378463745117, "learning_rate": 1.5583770613270014e-05, "loss": 0.4736, "step": 22754 }, { "epoch": 0.6247940691927513, "grad_norm": 0.3976646959781647, "learning_rate": 1.5583412314783756e-05, "loss": 0.5197, "step": 22755 }, { "epoch": 0.6248215266337177, "grad_norm": 0.3554416000843048, "learning_rate": 1.5583054005882655e-05, "loss": 0.4831, "step": 22756 }, { "epoch": 0.6248489840746843, "grad_norm": 0.37269291281700134, "learning_rate": 1.5582695686567375e-05, "loss": 0.5301, "step": 22757 }, { "epoch": 0.6248764415156507, "grad_norm": 0.41960182785987854, "learning_rate": 1.5582337356838585e-05, "loss": 0.4962, "step": 22758 }, { "epoch": 0.6249038989566172, "grad_norm": 0.4099493622779846, "learning_rate": 1.5581979016696958e-05, "loss": 0.5084, "step": 22759 }, { "epoch": 0.6249313563975838, "grad_norm": 0.4261516332626343, "learning_rate": 1.5581620666143154e-05, "loss": 0.4589, "step": 22760 }, { "epoch": 0.6249588138385502, "grad_norm": 0.3573504388332367, "learning_rate": 1.5581262305177848e-05, "loss": 0.5036, "step": 22761 }, { "epoch": 0.6249862712795168, "grad_norm": 0.3524474799633026, "learning_rate": 1.5580903933801705e-05, "loss": 0.4447, "step": 22762 }, { "epoch": 0.6250137287204832, "grad_norm": 0.37237852811813354, "learning_rate": 1.5580545552015398e-05, "loss": 0.4896, "step": 22763 }, { "epoch": 0.6250411861614498, "grad_norm": 0.4299606382846832, "learning_rate": 1.5580187159819587e-05, "loss": 0.5009, "step": 22764 }, { "epoch": 0.6250686436024162, "grad_norm": 0.36828887462615967, "learning_rate": 1.5579828757214952e-05, "loss": 0.4155, "step": 22765 }, { "epoch": 0.6250961010433828, "grad_norm": 0.38769084215164185, "learning_rate": 1.557947034420215e-05, "loss": 0.4752, "step": 22766 }, { "epoch": 0.6251235584843493, "grad_norm": 0.37206271290779114, "learning_rate": 1.557911192078186e-05, "loss": 0.4872, "step": 22767 }, { "epoch": 0.6251510159253157, "grad_norm": 0.38320013880729675, "learning_rate": 1.5578753486954744e-05, "loss": 0.4651, "step": 22768 }, { "epoch": 0.6251784733662823, "grad_norm": 0.44733524322509766, "learning_rate": 1.557839504272147e-05, "loss": 0.5082, "step": 22769 }, { "epoch": 0.6252059308072487, "grad_norm": 0.3549010157585144, "learning_rate": 1.5578036588082714e-05, "loss": 0.5009, "step": 22770 }, { "epoch": 0.6252333882482153, "grad_norm": 0.49526211619377136, "learning_rate": 1.5577678123039137e-05, "loss": 0.492, "step": 22771 }, { "epoch": 0.6252608456891817, "grad_norm": 0.3599933981895447, "learning_rate": 1.5577319647591406e-05, "loss": 0.4295, "step": 22772 }, { "epoch": 0.6252883031301483, "grad_norm": 0.9174363613128662, "learning_rate": 1.55769611617402e-05, "loss": 0.537, "step": 22773 }, { "epoch": 0.6253157605711148, "grad_norm": 0.3954835832118988, "learning_rate": 1.5576602665486178e-05, "loss": 0.4932, "step": 22774 }, { "epoch": 0.6253432180120813, "grad_norm": 0.3601706922054291, "learning_rate": 1.5576244158830013e-05, "loss": 0.4549, "step": 22775 }, { "epoch": 0.6253706754530478, "grad_norm": 0.37067365646362305, "learning_rate": 1.557588564177237e-05, "loss": 0.5007, "step": 22776 }, { "epoch": 0.6253981328940142, "grad_norm": 0.4348433017730713, "learning_rate": 1.5575527114313924e-05, "loss": 0.5612, "step": 22777 }, { "epoch": 0.6254255903349808, "grad_norm": 0.6428379416465759, "learning_rate": 1.557516857645534e-05, "loss": 0.4535, "step": 22778 }, { "epoch": 0.6254530477759472, "grad_norm": 0.3750077784061432, "learning_rate": 1.5574810028197286e-05, "loss": 0.4938, "step": 22779 }, { "epoch": 0.6254805052169138, "grad_norm": 0.38986048102378845, "learning_rate": 1.5574451469540436e-05, "loss": 0.5633, "step": 22780 }, { "epoch": 0.6255079626578803, "grad_norm": 0.48992031812667847, "learning_rate": 1.557409290048545e-05, "loss": 0.4444, "step": 22781 }, { "epoch": 0.6255354200988468, "grad_norm": 0.4276306629180908, "learning_rate": 1.5573734321033005e-05, "loss": 0.5149, "step": 22782 }, { "epoch": 0.6255628775398133, "grad_norm": 0.4124740958213806, "learning_rate": 1.5573375731183764e-05, "loss": 0.5508, "step": 22783 }, { "epoch": 0.6255903349807798, "grad_norm": 0.4212484359741211, "learning_rate": 1.55730171309384e-05, "loss": 0.5488, "step": 22784 }, { "epoch": 0.6256177924217463, "grad_norm": 0.458383172750473, "learning_rate": 1.557265852029758e-05, "loss": 0.5121, "step": 22785 }, { "epoch": 0.6256452498627127, "grad_norm": 0.36886003613471985, "learning_rate": 1.5572299899261972e-05, "loss": 0.4295, "step": 22786 }, { "epoch": 0.6256727073036793, "grad_norm": 0.4020388722419739, "learning_rate": 1.5571941267832246e-05, "loss": 0.4822, "step": 22787 }, { "epoch": 0.6257001647446458, "grad_norm": 0.5739309787750244, "learning_rate": 1.5571582626009074e-05, "loss": 0.4956, "step": 22788 }, { "epoch": 0.6257276221856123, "grad_norm": 0.5499763488769531, "learning_rate": 1.557122397379312e-05, "loss": 0.5177, "step": 22789 }, { "epoch": 0.6257550796265788, "grad_norm": 0.3462575674057007, "learning_rate": 1.5570865311185056e-05, "loss": 0.4311, "step": 22790 }, { "epoch": 0.6257825370675453, "grad_norm": 0.44280657172203064, "learning_rate": 1.5570506638185552e-05, "loss": 0.4957, "step": 22791 }, { "epoch": 0.6258099945085118, "grad_norm": 0.38041606545448303, "learning_rate": 1.557014795479527e-05, "loss": 0.5174, "step": 22792 }, { "epoch": 0.6258374519494783, "grad_norm": 0.37107986211776733, "learning_rate": 1.5569789261014888e-05, "loss": 0.5224, "step": 22793 }, { "epoch": 0.6258649093904448, "grad_norm": 0.3959111273288727, "learning_rate": 1.5569430556845073e-05, "loss": 0.4643, "step": 22794 }, { "epoch": 0.6258923668314114, "grad_norm": 0.3983900249004364, "learning_rate": 1.556907184228649e-05, "loss": 0.5265, "step": 22795 }, { "epoch": 0.6259198242723778, "grad_norm": 0.37801679968833923, "learning_rate": 1.556871311733981e-05, "loss": 0.5135, "step": 22796 }, { "epoch": 0.6259472817133444, "grad_norm": 0.39358097314834595, "learning_rate": 1.5568354382005702e-05, "loss": 0.5241, "step": 22797 }, { "epoch": 0.6259747391543108, "grad_norm": 0.42735204100608826, "learning_rate": 1.5567995636284838e-05, "loss": 0.5028, "step": 22798 }, { "epoch": 0.6260021965952773, "grad_norm": 0.364994078874588, "learning_rate": 1.5567636880177887e-05, "loss": 0.43, "step": 22799 }, { "epoch": 0.6260296540362438, "grad_norm": 0.3359917998313904, "learning_rate": 1.5567278113685512e-05, "loss": 0.4711, "step": 22800 }, { "epoch": 0.6260571114772103, "grad_norm": 0.3706456422805786, "learning_rate": 1.5566919336808388e-05, "loss": 0.5135, "step": 22801 }, { "epoch": 0.6260845689181769, "grad_norm": 0.35560882091522217, "learning_rate": 1.5566560549547185e-05, "loss": 0.4687, "step": 22802 }, { "epoch": 0.6261120263591433, "grad_norm": 0.4092799723148346, "learning_rate": 1.5566201751902568e-05, "loss": 0.5098, "step": 22803 }, { "epoch": 0.6261394838001099, "grad_norm": 0.3720338046550751, "learning_rate": 1.5565842943875208e-05, "loss": 0.5246, "step": 22804 }, { "epoch": 0.6261669412410763, "grad_norm": 0.39075514674186707, "learning_rate": 1.5565484125465774e-05, "loss": 0.4956, "step": 22805 }, { "epoch": 0.6261943986820429, "grad_norm": 0.36669614911079407, "learning_rate": 1.5565125296674936e-05, "loss": 0.5522, "step": 22806 }, { "epoch": 0.6262218561230093, "grad_norm": 0.3880726099014282, "learning_rate": 1.5564766457503363e-05, "loss": 0.5263, "step": 22807 }, { "epoch": 0.6262493135639758, "grad_norm": 0.4020625948905945, "learning_rate": 1.5564407607951727e-05, "loss": 0.5234, "step": 22808 }, { "epoch": 0.6262767710049424, "grad_norm": 0.3920706808567047, "learning_rate": 1.556404874802069e-05, "loss": 0.5092, "step": 22809 }, { "epoch": 0.6263042284459088, "grad_norm": 0.37713736295700073, "learning_rate": 1.556368987771093e-05, "loss": 0.5631, "step": 22810 }, { "epoch": 0.6263316858868754, "grad_norm": 0.3829099237918854, "learning_rate": 1.5563330997023115e-05, "loss": 0.49, "step": 22811 }, { "epoch": 0.6263591433278418, "grad_norm": 0.38515612483024597, "learning_rate": 1.5562972105957906e-05, "loss": 0.6449, "step": 22812 }, { "epoch": 0.6263866007688084, "grad_norm": 0.36675822734832764, "learning_rate": 1.556261320451598e-05, "loss": 0.4394, "step": 22813 }, { "epoch": 0.6264140582097748, "grad_norm": 0.4350670278072357, "learning_rate": 1.5562254292698008e-05, "loss": 0.5397, "step": 22814 }, { "epoch": 0.6264415156507414, "grad_norm": 0.4069001376628876, "learning_rate": 1.5561895370504653e-05, "loss": 0.5161, "step": 22815 }, { "epoch": 0.6264689730917079, "grad_norm": 0.36855360865592957, "learning_rate": 1.556153643793659e-05, "loss": 0.4666, "step": 22816 }, { "epoch": 0.6264964305326743, "grad_norm": 0.38894593715667725, "learning_rate": 1.5561177494994487e-05, "loss": 0.5153, "step": 22817 }, { "epoch": 0.6265238879736409, "grad_norm": 0.34574103355407715, "learning_rate": 1.5560818541679014e-05, "loss": 0.4004, "step": 22818 }, { "epoch": 0.6265513454146073, "grad_norm": 0.3992389142513275, "learning_rate": 1.5560459577990837e-05, "loss": 0.5846, "step": 22819 }, { "epoch": 0.6265788028555739, "grad_norm": 0.43168431520462036, "learning_rate": 1.556010060393063e-05, "loss": 0.5918, "step": 22820 }, { "epoch": 0.6266062602965403, "grad_norm": 0.4261564612388611, "learning_rate": 1.555974161949906e-05, "loss": 0.5989, "step": 22821 }, { "epoch": 0.6266337177375069, "grad_norm": 1.2410686016082764, "learning_rate": 1.5559382624696797e-05, "loss": 0.4839, "step": 22822 }, { "epoch": 0.6266611751784734, "grad_norm": 0.41785067319869995, "learning_rate": 1.555902361952451e-05, "loss": 0.5445, "step": 22823 }, { "epoch": 0.6266886326194399, "grad_norm": 0.42996448278427124, "learning_rate": 1.5558664603982873e-05, "loss": 0.4732, "step": 22824 }, { "epoch": 0.6267160900604064, "grad_norm": 0.40454572439193726, "learning_rate": 1.555830557807255e-05, "loss": 0.4784, "step": 22825 }, { "epoch": 0.6267435475013728, "grad_norm": 0.5430436134338379, "learning_rate": 1.5557946541794214e-05, "loss": 0.4844, "step": 22826 }, { "epoch": 0.6267710049423394, "grad_norm": 0.40204474329948425, "learning_rate": 1.5557587495148535e-05, "loss": 0.5854, "step": 22827 }, { "epoch": 0.6267984623833058, "grad_norm": 0.3946147859096527, "learning_rate": 1.5557228438136175e-05, "loss": 0.5938, "step": 22828 }, { "epoch": 0.6268259198242724, "grad_norm": 0.39173489809036255, "learning_rate": 1.5556869370757816e-05, "loss": 0.5717, "step": 22829 }, { "epoch": 0.6268533772652389, "grad_norm": 0.38907572627067566, "learning_rate": 1.555651029301412e-05, "loss": 0.4707, "step": 22830 }, { "epoch": 0.6268808347062054, "grad_norm": 0.3478902578353882, "learning_rate": 1.5556151204905762e-05, "loss": 0.4891, "step": 22831 }, { "epoch": 0.6269082921471719, "grad_norm": 0.37382790446281433, "learning_rate": 1.555579210643341e-05, "loss": 0.4575, "step": 22832 }, { "epoch": 0.6269357495881384, "grad_norm": 0.33274486660957336, "learning_rate": 1.5555432997597727e-05, "loss": 0.406, "step": 22833 }, { "epoch": 0.6269632070291049, "grad_norm": 0.3746849000453949, "learning_rate": 1.5555073878399393e-05, "loss": 0.5383, "step": 22834 }, { "epoch": 0.6269906644700713, "grad_norm": 0.3733014464378357, "learning_rate": 1.5554714748839067e-05, "loss": 0.5393, "step": 22835 }, { "epoch": 0.6270181219110379, "grad_norm": 0.38321173191070557, "learning_rate": 1.5554355608917432e-05, "loss": 0.4955, "step": 22836 }, { "epoch": 0.6270455793520044, "grad_norm": 0.41200900077819824, "learning_rate": 1.5553996458635147e-05, "loss": 0.5136, "step": 22837 }, { "epoch": 0.6270730367929709, "grad_norm": 0.44663986563682556, "learning_rate": 1.555363729799289e-05, "loss": 0.5137, "step": 22838 }, { "epoch": 0.6271004942339374, "grad_norm": 0.35124775767326355, "learning_rate": 1.5553278126991322e-05, "loss": 0.4507, "step": 22839 }, { "epoch": 0.6271279516749039, "grad_norm": 0.3486226201057434, "learning_rate": 1.555291894563112e-05, "loss": 0.4178, "step": 22840 }, { "epoch": 0.6271554091158704, "grad_norm": 0.3821168541908264, "learning_rate": 1.5552559753912952e-05, "loss": 0.5253, "step": 22841 }, { "epoch": 0.6271828665568369, "grad_norm": 0.4208936393260956, "learning_rate": 1.5552200551837486e-05, "loss": 0.5636, "step": 22842 }, { "epoch": 0.6272103239978034, "grad_norm": 0.421752005815506, "learning_rate": 1.55518413394054e-05, "loss": 0.4758, "step": 22843 }, { "epoch": 0.62723778143877, "grad_norm": 0.38488152623176575, "learning_rate": 1.555148211661735e-05, "loss": 0.5373, "step": 22844 }, { "epoch": 0.6272652388797364, "grad_norm": 0.3750523030757904, "learning_rate": 1.555112288347402e-05, "loss": 0.4945, "step": 22845 }, { "epoch": 0.627292696320703, "grad_norm": 0.38427919149398804, "learning_rate": 1.555076363997607e-05, "loss": 0.5055, "step": 22846 }, { "epoch": 0.6273201537616694, "grad_norm": 0.41494688391685486, "learning_rate": 1.5550404386124177e-05, "loss": 0.5223, "step": 22847 }, { "epoch": 0.6273476112026359, "grad_norm": 0.34224554896354675, "learning_rate": 1.5550045121919007e-05, "loss": 0.457, "step": 22848 }, { "epoch": 0.6273750686436024, "grad_norm": 0.40508005023002625, "learning_rate": 1.554968584736123e-05, "loss": 0.5519, "step": 22849 }, { "epoch": 0.6274025260845689, "grad_norm": 0.38020893931388855, "learning_rate": 1.5549326562451522e-05, "loss": 0.4958, "step": 22850 }, { "epoch": 0.6274299835255355, "grad_norm": 0.36947232484817505, "learning_rate": 1.5548967267190546e-05, "loss": 0.5076, "step": 22851 }, { "epoch": 0.6274574409665019, "grad_norm": 0.6492322087287903, "learning_rate": 1.5548607961578976e-05, "loss": 0.5007, "step": 22852 }, { "epoch": 0.6274848984074685, "grad_norm": 0.3472132980823517, "learning_rate": 1.554824864561748e-05, "loss": 0.4317, "step": 22853 }, { "epoch": 0.6275123558484349, "grad_norm": 0.42903244495391846, "learning_rate": 1.554788931930673e-05, "loss": 0.5834, "step": 22854 }, { "epoch": 0.6275398132894014, "grad_norm": 0.3504354953765869, "learning_rate": 1.5547529982647397e-05, "loss": 0.4944, "step": 22855 }, { "epoch": 0.6275672707303679, "grad_norm": 0.3607040047645569, "learning_rate": 1.5547170635640148e-05, "loss": 0.4252, "step": 22856 }, { "epoch": 0.6275947281713344, "grad_norm": 0.3889426589012146, "learning_rate": 1.5546811278285656e-05, "loss": 0.5082, "step": 22857 }, { "epoch": 0.627622185612301, "grad_norm": 0.41101598739624023, "learning_rate": 1.554645191058459e-05, "loss": 0.4646, "step": 22858 }, { "epoch": 0.6276496430532674, "grad_norm": 0.4031015634536743, "learning_rate": 1.554609253253762e-05, "loss": 0.5549, "step": 22859 }, { "epoch": 0.627677100494234, "grad_norm": 0.6448476314544678, "learning_rate": 1.554573314414542e-05, "loss": 0.4427, "step": 22860 }, { "epoch": 0.6277045579352004, "grad_norm": 0.37643370032310486, "learning_rate": 1.554537374540866e-05, "loss": 0.469, "step": 22861 }, { "epoch": 0.627732015376167, "grad_norm": 0.35449615120887756, "learning_rate": 1.5545014336328e-05, "loss": 0.4818, "step": 22862 }, { "epoch": 0.6277594728171334, "grad_norm": 0.3600231111049652, "learning_rate": 1.5544654916904124e-05, "loss": 0.4883, "step": 22863 }, { "epoch": 0.6277869302581, "grad_norm": 0.3974609076976776, "learning_rate": 1.5544295487137697e-05, "loss": 0.5301, "step": 22864 }, { "epoch": 0.6278143876990665, "grad_norm": 0.4185226857662201, "learning_rate": 1.554393604702939e-05, "loss": 0.5473, "step": 22865 }, { "epoch": 0.6278418451400329, "grad_norm": 0.3722182810306549, "learning_rate": 1.554357659657987e-05, "loss": 0.5716, "step": 22866 }, { "epoch": 0.6278693025809995, "grad_norm": 0.3273524343967438, "learning_rate": 1.554321713578981e-05, "loss": 0.4711, "step": 22867 }, { "epoch": 0.6278967600219659, "grad_norm": 0.4521144926548004, "learning_rate": 1.5542857664659888e-05, "loss": 0.5389, "step": 22868 }, { "epoch": 0.6279242174629325, "grad_norm": 0.3632655739784241, "learning_rate": 1.5542498183190763e-05, "loss": 0.5022, "step": 22869 }, { "epoch": 0.6279516749038989, "grad_norm": 0.3864021301269531, "learning_rate": 1.5542138691383107e-05, "loss": 0.5172, "step": 22870 }, { "epoch": 0.6279791323448655, "grad_norm": 0.35259300470352173, "learning_rate": 1.5541779189237596e-05, "loss": 0.5079, "step": 22871 }, { "epoch": 0.628006589785832, "grad_norm": 0.4184204041957855, "learning_rate": 1.5541419676754895e-05, "loss": 0.5212, "step": 22872 }, { "epoch": 0.6280340472267985, "grad_norm": 0.3980865478515625, "learning_rate": 1.554106015393568e-05, "loss": 0.5447, "step": 22873 }, { "epoch": 0.628061504667765, "grad_norm": 0.42075014114379883, "learning_rate": 1.5540700620780623e-05, "loss": 0.5182, "step": 22874 }, { "epoch": 0.6280889621087314, "grad_norm": 0.33270177245140076, "learning_rate": 1.5540341077290384e-05, "loss": 0.4186, "step": 22875 }, { "epoch": 0.628116419549698, "grad_norm": 0.3945555090904236, "learning_rate": 1.5539981523465645e-05, "loss": 0.4882, "step": 22876 }, { "epoch": 0.6281438769906644, "grad_norm": 0.3895684778690338, "learning_rate": 1.5539621959307073e-05, "loss": 0.4899, "step": 22877 }, { "epoch": 0.628171334431631, "grad_norm": 0.36045852303504944, "learning_rate": 1.5539262384815333e-05, "loss": 0.4723, "step": 22878 }, { "epoch": 0.6281987918725975, "grad_norm": 0.3603440225124359, "learning_rate": 1.5538902799991106e-05, "loss": 0.4668, "step": 22879 }, { "epoch": 0.628226249313564, "grad_norm": 0.5865445137023926, "learning_rate": 1.5538543204835055e-05, "loss": 0.6415, "step": 22880 }, { "epoch": 0.6282537067545305, "grad_norm": 0.3868865966796875, "learning_rate": 1.5538183599347852e-05, "loss": 0.5384, "step": 22881 }, { "epoch": 0.628281164195497, "grad_norm": 0.38580116629600525, "learning_rate": 1.5537823983530174e-05, "loss": 0.4093, "step": 22882 }, { "epoch": 0.6283086216364635, "grad_norm": 0.3335117697715759, "learning_rate": 1.5537464357382682e-05, "loss": 0.4028, "step": 22883 }, { "epoch": 0.6283360790774299, "grad_norm": 0.39272889494895935, "learning_rate": 1.5537104720906057e-05, "loss": 0.5333, "step": 22884 }, { "epoch": 0.6283635365183965, "grad_norm": 0.41438502073287964, "learning_rate": 1.553674507410096e-05, "loss": 0.5463, "step": 22885 }, { "epoch": 0.628390993959363, "grad_norm": 0.40538445115089417, "learning_rate": 1.5536385416968063e-05, "loss": 0.5306, "step": 22886 }, { "epoch": 0.6284184514003295, "grad_norm": 0.36028042435646057, "learning_rate": 1.5536025749508047e-05, "loss": 0.515, "step": 22887 }, { "epoch": 0.628445908841296, "grad_norm": 0.36792925000190735, "learning_rate": 1.553566607172157e-05, "loss": 0.486, "step": 22888 }, { "epoch": 0.6284733662822625, "grad_norm": 0.38605278730392456, "learning_rate": 1.5535306383609316e-05, "loss": 0.5534, "step": 22889 }, { "epoch": 0.628500823723229, "grad_norm": 0.3563316762447357, "learning_rate": 1.5534946685171943e-05, "loss": 0.4876, "step": 22890 }, { "epoch": 0.6285282811641955, "grad_norm": 0.4463382661342621, "learning_rate": 1.5534586976410132e-05, "loss": 0.5101, "step": 22891 }, { "epoch": 0.628555738605162, "grad_norm": 0.3798021674156189, "learning_rate": 1.553422725732455e-05, "loss": 0.6178, "step": 22892 }, { "epoch": 0.6285831960461286, "grad_norm": 0.39021819829940796, "learning_rate": 1.5533867527915863e-05, "loss": 0.4958, "step": 22893 }, { "epoch": 0.628610653487095, "grad_norm": 0.38101503252983093, "learning_rate": 1.553350778818475e-05, "loss": 0.4599, "step": 22894 }, { "epoch": 0.6286381109280615, "grad_norm": 0.364689439535141, "learning_rate": 1.553314803813188e-05, "loss": 0.5207, "step": 22895 }, { "epoch": 0.628665568369028, "grad_norm": 0.38474416732788086, "learning_rate": 1.553278827775792e-05, "loss": 0.4767, "step": 22896 }, { "epoch": 0.6286930258099945, "grad_norm": 0.38186755776405334, "learning_rate": 1.5532428507063545e-05, "loss": 0.3628, "step": 22897 }, { "epoch": 0.628720483250961, "grad_norm": 0.4659254252910614, "learning_rate": 1.5532068726049425e-05, "loss": 0.4506, "step": 22898 }, { "epoch": 0.6287479406919275, "grad_norm": 0.33205774426460266, "learning_rate": 1.5531708934716234e-05, "loss": 0.4864, "step": 22899 }, { "epoch": 0.628775398132894, "grad_norm": 0.40065476298332214, "learning_rate": 1.5531349133064636e-05, "loss": 0.3985, "step": 22900 }, { "epoch": 0.6288028555738605, "grad_norm": 0.4364413619041443, "learning_rate": 1.553098932109531e-05, "loss": 0.5059, "step": 22901 }, { "epoch": 0.6288303130148271, "grad_norm": 0.40858250856399536, "learning_rate": 1.5530629498808923e-05, "loss": 0.486, "step": 22902 }, { "epoch": 0.6288577704557935, "grad_norm": 0.38818472623825073, "learning_rate": 1.5530269666206142e-05, "loss": 0.4953, "step": 22903 }, { "epoch": 0.62888522789676, "grad_norm": 0.46644943952560425, "learning_rate": 1.552990982328765e-05, "loss": 0.4777, "step": 22904 }, { "epoch": 0.6289126853377265, "grad_norm": 0.34150806069374084, "learning_rate": 1.552954997005411e-05, "loss": 0.4126, "step": 22905 }, { "epoch": 0.628940142778693, "grad_norm": 0.3914794325828552, "learning_rate": 1.552919010650619e-05, "loss": 0.5492, "step": 22906 }, { "epoch": 0.6289676002196595, "grad_norm": 0.3092358410358429, "learning_rate": 1.552883023264457e-05, "loss": 0.4025, "step": 22907 }, { "epoch": 0.628995057660626, "grad_norm": 0.3769543170928955, "learning_rate": 1.5528470348469914e-05, "loss": 0.526, "step": 22908 }, { "epoch": 0.6290225151015926, "grad_norm": 0.369853138923645, "learning_rate": 1.55281104539829e-05, "loss": 0.47, "step": 22909 }, { "epoch": 0.629049972542559, "grad_norm": 0.6670646667480469, "learning_rate": 1.5527750549184193e-05, "loss": 0.3542, "step": 22910 }, { "epoch": 0.6290774299835256, "grad_norm": 0.3979659676551819, "learning_rate": 1.5527390634074464e-05, "loss": 0.4738, "step": 22911 }, { "epoch": 0.629104887424492, "grad_norm": 0.41558852791786194, "learning_rate": 1.5527030708654392e-05, "loss": 0.612, "step": 22912 }, { "epoch": 0.6291323448654585, "grad_norm": 0.36219242215156555, "learning_rate": 1.5526670772924642e-05, "loss": 0.5168, "step": 22913 }, { "epoch": 0.629159802306425, "grad_norm": 0.5033413767814636, "learning_rate": 1.5526310826885887e-05, "loss": 0.4556, "step": 22914 }, { "epoch": 0.6291872597473915, "grad_norm": 0.3732967674732208, "learning_rate": 1.55259508705388e-05, "loss": 0.5071, "step": 22915 }, { "epoch": 0.6292147171883581, "grad_norm": 0.3665003180503845, "learning_rate": 1.552559090388405e-05, "loss": 0.5335, "step": 22916 }, { "epoch": 0.6292421746293245, "grad_norm": 0.4048526883125305, "learning_rate": 1.552523092692231e-05, "loss": 0.5046, "step": 22917 }, { "epoch": 0.6292696320702911, "grad_norm": 0.43777260184288025, "learning_rate": 1.552487093965425e-05, "loss": 0.5957, "step": 22918 }, { "epoch": 0.6292970895112575, "grad_norm": 0.4162471294403076, "learning_rate": 1.5524510942080545e-05, "loss": 0.4852, "step": 22919 }, { "epoch": 0.6293245469522241, "grad_norm": 0.4480338990688324, "learning_rate": 1.552415093420186e-05, "loss": 0.5534, "step": 22920 }, { "epoch": 0.6293520043931905, "grad_norm": 0.43530499935150146, "learning_rate": 1.552379091601887e-05, "loss": 0.4981, "step": 22921 }, { "epoch": 0.629379461834157, "grad_norm": 0.41387566924095154, "learning_rate": 1.552343088753225e-05, "loss": 0.552, "step": 22922 }, { "epoch": 0.6294069192751236, "grad_norm": 0.37844300270080566, "learning_rate": 1.5523070848742668e-05, "loss": 0.4801, "step": 22923 }, { "epoch": 0.62943437671609, "grad_norm": 0.35349395871162415, "learning_rate": 1.5522710799650795e-05, "loss": 0.4507, "step": 22924 }, { "epoch": 0.6294618341570566, "grad_norm": 0.3599468469619751, "learning_rate": 1.5522350740257304e-05, "loss": 0.5003, "step": 22925 }, { "epoch": 0.629489291598023, "grad_norm": 0.36881163716316223, "learning_rate": 1.5521990670562866e-05, "loss": 0.4976, "step": 22926 }, { "epoch": 0.6295167490389896, "grad_norm": 0.37747684121131897, "learning_rate": 1.552163059056815e-05, "loss": 0.5033, "step": 22927 }, { "epoch": 0.629544206479956, "grad_norm": 0.37997132539749146, "learning_rate": 1.5521270500273838e-05, "loss": 0.5049, "step": 22928 }, { "epoch": 0.6295716639209226, "grad_norm": 0.3771025538444519, "learning_rate": 1.552091039968059e-05, "loss": 0.5421, "step": 22929 }, { "epoch": 0.6295991213618891, "grad_norm": 0.39909541606903076, "learning_rate": 1.5520550288789085e-05, "loss": 0.5348, "step": 22930 }, { "epoch": 0.6296265788028556, "grad_norm": 0.3892349600791931, "learning_rate": 1.552019016759999e-05, "loss": 0.5113, "step": 22931 }, { "epoch": 0.6296540362438221, "grad_norm": 0.3954881727695465, "learning_rate": 1.5519830036113974e-05, "loss": 0.4903, "step": 22932 }, { "epoch": 0.6296814936847885, "grad_norm": 0.37848150730133057, "learning_rate": 1.551946989433172e-05, "loss": 0.467, "step": 22933 }, { "epoch": 0.6297089511257551, "grad_norm": 0.33760467171669006, "learning_rate": 1.5519109742253887e-05, "loss": 0.4737, "step": 22934 }, { "epoch": 0.6297364085667215, "grad_norm": 0.4375936686992645, "learning_rate": 1.551874957988116e-05, "loss": 0.4385, "step": 22935 }, { "epoch": 0.6297638660076881, "grad_norm": 0.4070012867450714, "learning_rate": 1.55183894072142e-05, "loss": 0.5442, "step": 22936 }, { "epoch": 0.6297913234486546, "grad_norm": 0.38781827688217163, "learning_rate": 1.551802922425368e-05, "loss": 0.504, "step": 22937 }, { "epoch": 0.6298187808896211, "grad_norm": 0.36431658267974854, "learning_rate": 1.5517669031000275e-05, "loss": 0.4919, "step": 22938 }, { "epoch": 0.6298462383305876, "grad_norm": 0.40120962262153625, "learning_rate": 1.5517308827454658e-05, "loss": 0.4572, "step": 22939 }, { "epoch": 0.629873695771554, "grad_norm": 0.45476678013801575, "learning_rate": 1.55169486136175e-05, "loss": 0.4951, "step": 22940 }, { "epoch": 0.6299011532125206, "grad_norm": 0.426628977060318, "learning_rate": 1.551658838948947e-05, "loss": 0.5363, "step": 22941 }, { "epoch": 0.629928610653487, "grad_norm": 0.39707183837890625, "learning_rate": 1.5516228155071243e-05, "loss": 0.5059, "step": 22942 }, { "epoch": 0.6299560680944536, "grad_norm": 0.39806270599365234, "learning_rate": 1.5515867910363488e-05, "loss": 0.5273, "step": 22943 }, { "epoch": 0.6299835255354201, "grad_norm": 0.4168098568916321, "learning_rate": 1.551550765536688e-05, "loss": 0.5993, "step": 22944 }, { "epoch": 0.6300109829763866, "grad_norm": 0.3636227548122406, "learning_rate": 1.5515147390082093e-05, "loss": 0.5276, "step": 22945 }, { "epoch": 0.6300384404173531, "grad_norm": 0.380204439163208, "learning_rate": 1.551478711450979e-05, "loss": 0.5359, "step": 22946 }, { "epoch": 0.6300658978583196, "grad_norm": 0.41259220242500305, "learning_rate": 1.551442682865065e-05, "loss": 0.4738, "step": 22947 }, { "epoch": 0.6300933552992861, "grad_norm": 0.4089786112308502, "learning_rate": 1.5514066532505353e-05, "loss": 0.502, "step": 22948 }, { "epoch": 0.6301208127402526, "grad_norm": 0.40132635831832886, "learning_rate": 1.5513706226074554e-05, "loss": 0.5188, "step": 22949 }, { "epoch": 0.6301482701812191, "grad_norm": 0.47363170981407166, "learning_rate": 1.5513345909358935e-05, "loss": 0.6702, "step": 22950 }, { "epoch": 0.6301757276221857, "grad_norm": 0.4880548119544983, "learning_rate": 1.5512985582359164e-05, "loss": 0.6107, "step": 22951 }, { "epoch": 0.6302031850631521, "grad_norm": 0.3790552616119385, "learning_rate": 1.5512625245075915e-05, "loss": 0.4982, "step": 22952 }, { "epoch": 0.6302306425041186, "grad_norm": 0.35731813311576843, "learning_rate": 1.5512264897509862e-05, "loss": 0.4888, "step": 22953 }, { "epoch": 0.6302580999450851, "grad_norm": 0.45342081785202026, "learning_rate": 1.551190453966168e-05, "loss": 0.498, "step": 22954 }, { "epoch": 0.6302855573860516, "grad_norm": 0.3632499575614929, "learning_rate": 1.551154417153203e-05, "loss": 0.4954, "step": 22955 }, { "epoch": 0.6303130148270181, "grad_norm": 0.3739071786403656, "learning_rate": 1.5511183793121596e-05, "loss": 0.5076, "step": 22956 }, { "epoch": 0.6303404722679846, "grad_norm": 0.3282206654548645, "learning_rate": 1.5510823404431043e-05, "loss": 0.4573, "step": 22957 }, { "epoch": 0.6303679297089512, "grad_norm": 0.4143099784851074, "learning_rate": 1.5510463005461044e-05, "loss": 0.4939, "step": 22958 }, { "epoch": 0.6303953871499176, "grad_norm": 0.3599453568458557, "learning_rate": 1.5510102596212276e-05, "loss": 0.5093, "step": 22959 }, { "epoch": 0.6304228445908842, "grad_norm": 0.38897475600242615, "learning_rate": 1.5509742176685408e-05, "loss": 0.4864, "step": 22960 }, { "epoch": 0.6304503020318506, "grad_norm": 0.35738980770111084, "learning_rate": 1.550938174688111e-05, "loss": 0.5789, "step": 22961 }, { "epoch": 0.6304777594728171, "grad_norm": 0.36751410365104675, "learning_rate": 1.5509021306800057e-05, "loss": 0.5056, "step": 22962 }, { "epoch": 0.6305052169137836, "grad_norm": 0.3869949281215668, "learning_rate": 1.550866085644292e-05, "loss": 0.5262, "step": 22963 }, { "epoch": 0.6305326743547501, "grad_norm": 0.34070298075675964, "learning_rate": 1.5508300395810377e-05, "loss": 0.4178, "step": 22964 }, { "epoch": 0.6305601317957167, "grad_norm": 0.4324380159378052, "learning_rate": 1.550793992490309e-05, "loss": 0.4132, "step": 22965 }, { "epoch": 0.6305875892366831, "grad_norm": 0.7570326924324036, "learning_rate": 1.550757944372174e-05, "loss": 0.5679, "step": 22966 }, { "epoch": 0.6306150466776497, "grad_norm": 0.3829781413078308, "learning_rate": 1.5507218952266998e-05, "loss": 0.506, "step": 22967 }, { "epoch": 0.6306425041186161, "grad_norm": 0.3756467401981354, "learning_rate": 1.5506858450539532e-05, "loss": 0.5107, "step": 22968 }, { "epoch": 0.6306699615595827, "grad_norm": 0.4128085672855377, "learning_rate": 1.5506497938540017e-05, "loss": 0.5065, "step": 22969 }, { "epoch": 0.6306974190005491, "grad_norm": 0.44040563702583313, "learning_rate": 1.5506137416269126e-05, "loss": 0.4693, "step": 22970 }, { "epoch": 0.6307248764415156, "grad_norm": 0.3799006938934326, "learning_rate": 1.5505776883727533e-05, "loss": 0.575, "step": 22971 }, { "epoch": 0.6307523338824822, "grad_norm": 0.39654120802879333, "learning_rate": 1.5505416340915914e-05, "loss": 0.4861, "step": 22972 }, { "epoch": 0.6307797913234486, "grad_norm": 0.41995829343795776, "learning_rate": 1.5505055787834928e-05, "loss": 0.5109, "step": 22973 }, { "epoch": 0.6308072487644152, "grad_norm": 0.3409193158149719, "learning_rate": 1.550469522448526e-05, "loss": 0.4153, "step": 22974 }, { "epoch": 0.6308347062053816, "grad_norm": 0.3789425194263458, "learning_rate": 1.5504334650867577e-05, "loss": 0.5455, "step": 22975 }, { "epoch": 0.6308621636463482, "grad_norm": 0.37028443813323975, "learning_rate": 1.5503974066982554e-05, "loss": 0.57, "step": 22976 }, { "epoch": 0.6308896210873146, "grad_norm": 0.3581285774707794, "learning_rate": 1.550361347283086e-05, "loss": 0.5004, "step": 22977 }, { "epoch": 0.6309170785282812, "grad_norm": 0.3471972942352295, "learning_rate": 1.5503252868413173e-05, "loss": 0.5012, "step": 22978 }, { "epoch": 0.6309445359692477, "grad_norm": 0.3664281666278839, "learning_rate": 1.5502892253730164e-05, "loss": 0.4903, "step": 22979 }, { "epoch": 0.6309719934102141, "grad_norm": 0.34512823820114136, "learning_rate": 1.5502531628782502e-05, "loss": 0.3929, "step": 22980 }, { "epoch": 0.6309994508511807, "grad_norm": 0.41000044345855713, "learning_rate": 1.5502170993570863e-05, "loss": 0.5631, "step": 22981 }, { "epoch": 0.6310269082921471, "grad_norm": 0.38099145889282227, "learning_rate": 1.550181034809592e-05, "loss": 0.4801, "step": 22982 }, { "epoch": 0.6310543657331137, "grad_norm": 0.3215654492378235, "learning_rate": 1.5501449692358342e-05, "loss": 0.5111, "step": 22983 }, { "epoch": 0.6310818231740801, "grad_norm": 0.3611699640750885, "learning_rate": 1.550108902635881e-05, "loss": 0.4901, "step": 22984 }, { "epoch": 0.6311092806150467, "grad_norm": 0.43625354766845703, "learning_rate": 1.5500728350097988e-05, "loss": 0.6227, "step": 22985 }, { "epoch": 0.6311367380560132, "grad_norm": 0.4470072090625763, "learning_rate": 1.5500367663576552e-05, "loss": 0.5361, "step": 22986 }, { "epoch": 0.6311641954969797, "grad_norm": 0.369899183511734, "learning_rate": 1.5500006966795177e-05, "loss": 0.5058, "step": 22987 }, { "epoch": 0.6311916529379462, "grad_norm": 0.38127028942108154, "learning_rate": 1.5499646259754528e-05, "loss": 0.5528, "step": 22988 }, { "epoch": 0.6312191103789127, "grad_norm": 0.5687307715415955, "learning_rate": 1.5499285542455286e-05, "loss": 0.5159, "step": 22989 }, { "epoch": 0.6312465678198792, "grad_norm": 0.37348249554634094, "learning_rate": 1.5498924814898124e-05, "loss": 0.448, "step": 22990 }, { "epoch": 0.6312740252608456, "grad_norm": 0.33262965083122253, "learning_rate": 1.5498564077083707e-05, "loss": 0.4209, "step": 22991 }, { "epoch": 0.6313014827018122, "grad_norm": 0.3300248384475708, "learning_rate": 1.549820332901272e-05, "loss": 0.4716, "step": 22992 }, { "epoch": 0.6313289401427787, "grad_norm": 0.34855470061302185, "learning_rate": 1.5497842570685823e-05, "loss": 0.5353, "step": 22993 }, { "epoch": 0.6313563975837452, "grad_norm": 0.3763744533061981, "learning_rate": 1.54974818021037e-05, "loss": 0.4528, "step": 22994 }, { "epoch": 0.6313838550247117, "grad_norm": 0.3990195393562317, "learning_rate": 1.5497121023267014e-05, "loss": 0.583, "step": 22995 }, { "epoch": 0.6314113124656782, "grad_norm": 0.622748613357544, "learning_rate": 1.5496760234176445e-05, "loss": 0.5596, "step": 22996 }, { "epoch": 0.6314387699066447, "grad_norm": 0.4373394250869751, "learning_rate": 1.5496399434832665e-05, "loss": 0.5257, "step": 22997 }, { "epoch": 0.6314662273476112, "grad_norm": 0.37158703804016113, "learning_rate": 1.5496038625236343e-05, "loss": 0.597, "step": 22998 }, { "epoch": 0.6314936847885777, "grad_norm": 0.4015958607196808, "learning_rate": 1.5495677805388157e-05, "loss": 0.5728, "step": 22999 }, { "epoch": 0.6315211422295443, "grad_norm": 0.3419346809387207, "learning_rate": 1.5495316975288776e-05, "loss": 0.4799, "step": 23000 }, { "epoch": 0.6315485996705107, "grad_norm": 0.44648393988609314, "learning_rate": 1.5494956134938877e-05, "loss": 0.5393, "step": 23001 }, { "epoch": 0.6315760571114772, "grad_norm": 0.355996698141098, "learning_rate": 1.549459528433913e-05, "loss": 0.504, "step": 23002 }, { "epoch": 0.6316035145524437, "grad_norm": 0.44878286123275757, "learning_rate": 1.549423442349021e-05, "loss": 0.5551, "step": 23003 }, { "epoch": 0.6316309719934102, "grad_norm": 0.3759143054485321, "learning_rate": 1.5493873552392788e-05, "loss": 0.5243, "step": 23004 }, { "epoch": 0.6316584294343767, "grad_norm": 0.3697158098220825, "learning_rate": 1.549351267104754e-05, "loss": 0.5109, "step": 23005 }, { "epoch": 0.6316858868753432, "grad_norm": 0.4165758490562439, "learning_rate": 1.5493151779455136e-05, "loss": 0.6003, "step": 23006 }, { "epoch": 0.6317133443163098, "grad_norm": 0.3785117268562317, "learning_rate": 1.5492790877616252e-05, "loss": 0.5426, "step": 23007 }, { "epoch": 0.6317408017572762, "grad_norm": 0.3606944978237152, "learning_rate": 1.5492429965531556e-05, "loss": 0.5128, "step": 23008 }, { "epoch": 0.6317682591982428, "grad_norm": 0.3882286846637726, "learning_rate": 1.549206904320173e-05, "loss": 0.5526, "step": 23009 }, { "epoch": 0.6317957166392092, "grad_norm": 0.45027923583984375, "learning_rate": 1.549170811062744e-05, "loss": 0.5131, "step": 23010 }, { "epoch": 0.6318231740801757, "grad_norm": 0.41854751110076904, "learning_rate": 1.549134716780936e-05, "loss": 0.4339, "step": 23011 }, { "epoch": 0.6318506315211422, "grad_norm": 0.3416934013366699, "learning_rate": 1.5490986214748168e-05, "loss": 0.4681, "step": 23012 }, { "epoch": 0.6318780889621087, "grad_norm": 0.31367728114128113, "learning_rate": 1.5490625251444535e-05, "loss": 0.4279, "step": 23013 }, { "epoch": 0.6319055464030753, "grad_norm": 0.33447420597076416, "learning_rate": 1.549026427789913e-05, "loss": 0.4394, "step": 23014 }, { "epoch": 0.6319330038440417, "grad_norm": 0.4804166853427887, "learning_rate": 1.548990329411263e-05, "loss": 0.5008, "step": 23015 }, { "epoch": 0.6319604612850083, "grad_norm": 0.3724417984485626, "learning_rate": 1.548954230008571e-05, "loss": 0.4979, "step": 23016 }, { "epoch": 0.6319879187259747, "grad_norm": 0.39227259159088135, "learning_rate": 1.5489181295819046e-05, "loss": 0.5473, "step": 23017 }, { "epoch": 0.6320153761669413, "grad_norm": 0.38329410552978516, "learning_rate": 1.54888202813133e-05, "loss": 0.4833, "step": 23018 }, { "epoch": 0.6320428336079077, "grad_norm": 0.3778405785560608, "learning_rate": 1.5488459256569153e-05, "loss": 0.4863, "step": 23019 }, { "epoch": 0.6320702910488742, "grad_norm": 0.43031617999076843, "learning_rate": 1.548809822158728e-05, "loss": 0.5496, "step": 23020 }, { "epoch": 0.6320977484898408, "grad_norm": 0.3574939966201782, "learning_rate": 1.5487737176368352e-05, "loss": 0.5407, "step": 23021 }, { "epoch": 0.6321252059308072, "grad_norm": 0.3761540651321411, "learning_rate": 1.5487376120913043e-05, "loss": 0.5075, "step": 23022 }, { "epoch": 0.6321526633717738, "grad_norm": 0.4044707417488098, "learning_rate": 1.5487015055222022e-05, "loss": 0.4666, "step": 23023 }, { "epoch": 0.6321801208127402, "grad_norm": 0.3461194336414337, "learning_rate": 1.5486653979295973e-05, "loss": 0.4671, "step": 23024 }, { "epoch": 0.6322075782537068, "grad_norm": 0.34024983644485474, "learning_rate": 1.548629289313556e-05, "loss": 0.4816, "step": 23025 }, { "epoch": 0.6322350356946732, "grad_norm": 0.3737383186817169, "learning_rate": 1.548593179674146e-05, "loss": 0.5188, "step": 23026 }, { "epoch": 0.6322624931356398, "grad_norm": 0.36025986075401306, "learning_rate": 1.5485570690114345e-05, "loss": 0.4496, "step": 23027 }, { "epoch": 0.6322899505766063, "grad_norm": 0.37574565410614014, "learning_rate": 1.548520957325489e-05, "loss": 0.505, "step": 23028 }, { "epoch": 0.6323174080175727, "grad_norm": 0.38083672523498535, "learning_rate": 1.5484848446163772e-05, "loss": 0.533, "step": 23029 }, { "epoch": 0.6323448654585393, "grad_norm": 0.368287056684494, "learning_rate": 1.548448730884166e-05, "loss": 0.4947, "step": 23030 }, { "epoch": 0.6323723228995057, "grad_norm": 0.37873774766921997, "learning_rate": 1.5484126161289226e-05, "loss": 0.5177, "step": 23031 }, { "epoch": 0.6323997803404723, "grad_norm": 0.329054057598114, "learning_rate": 1.548376500350715e-05, "loss": 0.4408, "step": 23032 }, { "epoch": 0.6324272377814387, "grad_norm": 0.3749113082885742, "learning_rate": 1.5483403835496097e-05, "loss": 0.5422, "step": 23033 }, { "epoch": 0.6324546952224053, "grad_norm": 0.43550029397010803, "learning_rate": 1.548304265725675e-05, "loss": 0.5083, "step": 23034 }, { "epoch": 0.6324821526633718, "grad_norm": 0.3979592025279999, "learning_rate": 1.5482681468789776e-05, "loss": 0.5593, "step": 23035 }, { "epoch": 0.6325096101043383, "grad_norm": 0.4033834636211395, "learning_rate": 1.5482320270095853e-05, "loss": 0.5163, "step": 23036 }, { "epoch": 0.6325370675453048, "grad_norm": 0.369273841381073, "learning_rate": 1.5481959061175653e-05, "loss": 0.5198, "step": 23037 }, { "epoch": 0.6325645249862712, "grad_norm": 0.5434684157371521, "learning_rate": 1.548159784202985e-05, "loss": 0.5531, "step": 23038 }, { "epoch": 0.6325919824272378, "grad_norm": 0.3642076253890991, "learning_rate": 1.5481236612659116e-05, "loss": 0.4597, "step": 23039 }, { "epoch": 0.6326194398682042, "grad_norm": 0.3645142912864685, "learning_rate": 1.5480875373064126e-05, "loss": 0.4934, "step": 23040 }, { "epoch": 0.6326468973091708, "grad_norm": 0.3129270374774933, "learning_rate": 1.5480514123245557e-05, "loss": 0.3625, "step": 23041 }, { "epoch": 0.6326743547501373, "grad_norm": 0.3582443594932556, "learning_rate": 1.5480152863204077e-05, "loss": 0.527, "step": 23042 }, { "epoch": 0.6327018121911038, "grad_norm": 0.35164445638656616, "learning_rate": 1.5479791592940362e-05, "loss": 0.5229, "step": 23043 }, { "epoch": 0.6327292696320703, "grad_norm": 0.36163780093193054, "learning_rate": 1.547943031245509e-05, "loss": 0.5407, "step": 23044 }, { "epoch": 0.6327567270730368, "grad_norm": 0.42777371406555176, "learning_rate": 1.547906902174893e-05, "loss": 0.4825, "step": 23045 }, { "epoch": 0.6327841845140033, "grad_norm": 0.3543710708618164, "learning_rate": 1.5478707720822555e-05, "loss": 0.542, "step": 23046 }, { "epoch": 0.6328116419549698, "grad_norm": 0.43004539608955383, "learning_rate": 1.5478346409676646e-05, "loss": 0.5023, "step": 23047 }, { "epoch": 0.6328390993959363, "grad_norm": 0.42154502868652344, "learning_rate": 1.5477985088311867e-05, "loss": 0.5912, "step": 23048 }, { "epoch": 0.6328665568369028, "grad_norm": 0.3852606415748596, "learning_rate": 1.5477623756728907e-05, "loss": 0.5186, "step": 23049 }, { "epoch": 0.6328940142778693, "grad_norm": 0.3314886689186096, "learning_rate": 1.547726241492842e-05, "loss": 0.4681, "step": 23050 }, { "epoch": 0.6329214717188358, "grad_norm": 0.4275548756122589, "learning_rate": 1.5476901062911096e-05, "loss": 0.5928, "step": 23051 }, { "epoch": 0.6329489291598023, "grad_norm": 0.45936164259910583, "learning_rate": 1.5476539700677604e-05, "loss": 0.5208, "step": 23052 }, { "epoch": 0.6329763866007688, "grad_norm": 0.3773582875728607, "learning_rate": 1.5476178328228614e-05, "loss": 0.5281, "step": 23053 }, { "epoch": 0.6330038440417353, "grad_norm": 0.4004562795162201, "learning_rate": 1.547581694556481e-05, "loss": 0.5733, "step": 23054 }, { "epoch": 0.6330313014827018, "grad_norm": 0.3871382474899292, "learning_rate": 1.5475455552686855e-05, "loss": 0.4875, "step": 23055 }, { "epoch": 0.6330587589236684, "grad_norm": 0.3839006721973419, "learning_rate": 1.5475094149595428e-05, "loss": 0.5224, "step": 23056 }, { "epoch": 0.6330862163646348, "grad_norm": 0.38582777976989746, "learning_rate": 1.54747327362912e-05, "loss": 0.4452, "step": 23057 }, { "epoch": 0.6331136738056014, "grad_norm": 0.38230785727500916, "learning_rate": 1.547437131277485e-05, "loss": 0.5311, "step": 23058 }, { "epoch": 0.6331411312465678, "grad_norm": 0.3614116609096527, "learning_rate": 1.5474009879047055e-05, "loss": 0.4979, "step": 23059 }, { "epoch": 0.6331685886875343, "grad_norm": 0.45920678973197937, "learning_rate": 1.547364843510848e-05, "loss": 0.5171, "step": 23060 }, { "epoch": 0.6331960461285008, "grad_norm": 0.33893638849258423, "learning_rate": 1.5473286980959805e-05, "loss": 0.4667, "step": 23061 }, { "epoch": 0.6332235035694673, "grad_norm": 0.4210006892681122, "learning_rate": 1.5472925516601702e-05, "loss": 0.5644, "step": 23062 }, { "epoch": 0.6332509610104339, "grad_norm": 0.3949960172176361, "learning_rate": 1.5472564042034847e-05, "loss": 0.4996, "step": 23063 }, { "epoch": 0.6332784184514003, "grad_norm": 0.6855419874191284, "learning_rate": 1.5472202557259916e-05, "loss": 0.6427, "step": 23064 }, { "epoch": 0.6333058758923669, "grad_norm": 0.3675002455711365, "learning_rate": 1.5471841062277574e-05, "loss": 0.4522, "step": 23065 }, { "epoch": 0.6333333333333333, "grad_norm": 0.38230621814727783, "learning_rate": 1.5471479557088506e-05, "loss": 0.525, "step": 23066 }, { "epoch": 0.6333607907742999, "grad_norm": 0.3966647684574127, "learning_rate": 1.5471118041693385e-05, "loss": 0.5063, "step": 23067 }, { "epoch": 0.6333882482152663, "grad_norm": 0.42115357518196106, "learning_rate": 1.5470756516092876e-05, "loss": 0.5332, "step": 23068 }, { "epoch": 0.6334157056562328, "grad_norm": 0.37133410573005676, "learning_rate": 1.5470394980287666e-05, "loss": 0.5262, "step": 23069 }, { "epoch": 0.6334431630971994, "grad_norm": 0.4698651134967804, "learning_rate": 1.5470033434278423e-05, "loss": 0.6593, "step": 23070 }, { "epoch": 0.6334706205381658, "grad_norm": 0.410562664270401, "learning_rate": 1.5469671878065818e-05, "loss": 0.4665, "step": 23071 }, { "epoch": 0.6334980779791324, "grad_norm": 0.40724897384643555, "learning_rate": 1.546931031165053e-05, "loss": 0.498, "step": 23072 }, { "epoch": 0.6335255354200988, "grad_norm": 0.38413506746292114, "learning_rate": 1.5468948735033238e-05, "loss": 0.5316, "step": 23073 }, { "epoch": 0.6335529928610654, "grad_norm": 0.4032515585422516, "learning_rate": 1.5468587148214605e-05, "loss": 0.4783, "step": 23074 }, { "epoch": 0.6335804503020318, "grad_norm": 0.36969518661499023, "learning_rate": 1.5468225551195314e-05, "loss": 0.4792, "step": 23075 }, { "epoch": 0.6336079077429984, "grad_norm": 0.3691737949848175, "learning_rate": 1.5467863943976034e-05, "loss": 0.4191, "step": 23076 }, { "epoch": 0.6336353651839649, "grad_norm": 0.3533030152320862, "learning_rate": 1.546750232655745e-05, "loss": 0.4798, "step": 23077 }, { "epoch": 0.6336628226249313, "grad_norm": 0.40293240547180176, "learning_rate": 1.5467140698940223e-05, "loss": 0.5157, "step": 23078 }, { "epoch": 0.6336902800658979, "grad_norm": 0.7748141884803772, "learning_rate": 1.5466779061125037e-05, "loss": 0.5787, "step": 23079 }, { "epoch": 0.6337177375068643, "grad_norm": 0.394393652677536, "learning_rate": 1.5466417413112562e-05, "loss": 0.4923, "step": 23080 }, { "epoch": 0.6337451949478309, "grad_norm": 0.43010014295578003, "learning_rate": 1.5466055754903472e-05, "loss": 0.447, "step": 23081 }, { "epoch": 0.6337726523887973, "grad_norm": 0.3759283125400543, "learning_rate": 1.5465694086498446e-05, "loss": 0.5098, "step": 23082 }, { "epoch": 0.6338001098297639, "grad_norm": 0.43147483468055725, "learning_rate": 1.5465332407898155e-05, "loss": 0.443, "step": 23083 }, { "epoch": 0.6338275672707304, "grad_norm": 0.3488841950893402, "learning_rate": 1.5464970719103274e-05, "loss": 0.4621, "step": 23084 }, { "epoch": 0.6338550247116969, "grad_norm": 0.3695021867752075, "learning_rate": 1.546460902011448e-05, "loss": 0.4834, "step": 23085 }, { "epoch": 0.6338824821526634, "grad_norm": 0.3534071445465088, "learning_rate": 1.5464247310932444e-05, "loss": 0.4588, "step": 23086 }, { "epoch": 0.6339099395936298, "grad_norm": 0.3558345437049866, "learning_rate": 1.5463885591557844e-05, "loss": 0.4027, "step": 23087 }, { "epoch": 0.6339373970345964, "grad_norm": 0.3654100298881531, "learning_rate": 1.5463523861991354e-05, "loss": 0.4716, "step": 23088 }, { "epoch": 0.6339648544755628, "grad_norm": 0.35962381958961487, "learning_rate": 1.5463162122233648e-05, "loss": 0.5217, "step": 23089 }, { "epoch": 0.6339923119165294, "grad_norm": 0.38005805015563965, "learning_rate": 1.54628003722854e-05, "loss": 0.5347, "step": 23090 }, { "epoch": 0.6340197693574959, "grad_norm": 0.4630787968635559, "learning_rate": 1.5462438612147286e-05, "loss": 0.593, "step": 23091 }, { "epoch": 0.6340472267984624, "grad_norm": 0.3761962354183197, "learning_rate": 1.546207684181998e-05, "loss": 0.5427, "step": 23092 }, { "epoch": 0.6340746842394289, "grad_norm": 0.41832759976387024, "learning_rate": 1.546171506130416e-05, "loss": 0.4919, "step": 23093 }, { "epoch": 0.6341021416803954, "grad_norm": 0.38694778084754944, "learning_rate": 1.5461353270600496e-05, "loss": 0.4174, "step": 23094 }, { "epoch": 0.6341295991213619, "grad_norm": 0.3563859462738037, "learning_rate": 1.5460991469709665e-05, "loss": 0.5107, "step": 23095 }, { "epoch": 0.6341570565623283, "grad_norm": 0.3383978009223938, "learning_rate": 1.5460629658632344e-05, "loss": 0.4987, "step": 23096 }, { "epoch": 0.6341845140032949, "grad_norm": 0.855695366859436, "learning_rate": 1.5460267837369204e-05, "loss": 0.642, "step": 23097 }, { "epoch": 0.6342119714442614, "grad_norm": 0.4117930233478546, "learning_rate": 1.5459906005920924e-05, "loss": 0.5239, "step": 23098 }, { "epoch": 0.6342394288852279, "grad_norm": 0.3417458236217499, "learning_rate": 1.5459544164288173e-05, "loss": 0.5376, "step": 23099 }, { "epoch": 0.6342668863261944, "grad_norm": 0.3484813868999481, "learning_rate": 1.5459182312471633e-05, "loss": 0.4391, "step": 23100 }, { "epoch": 0.6342943437671609, "grad_norm": 0.4191799461841583, "learning_rate": 1.5458820450471976e-05, "loss": 0.4705, "step": 23101 }, { "epoch": 0.6343218012081274, "grad_norm": 0.48568323254585266, "learning_rate": 1.5458458578289874e-05, "loss": 0.6249, "step": 23102 }, { "epoch": 0.6343492586490939, "grad_norm": 0.4041660726070404, "learning_rate": 1.5458096695926003e-05, "loss": 0.4758, "step": 23103 }, { "epoch": 0.6343767160900604, "grad_norm": 0.37629079818725586, "learning_rate": 1.5457734803381042e-05, "loss": 0.4648, "step": 23104 }, { "epoch": 0.634404173531027, "grad_norm": 0.4323202967643738, "learning_rate": 1.5457372900655668e-05, "loss": 0.5481, "step": 23105 }, { "epoch": 0.6344316309719934, "grad_norm": 0.34899428486824036, "learning_rate": 1.5457010987750546e-05, "loss": 0.4016, "step": 23106 }, { "epoch": 0.63445908841296, "grad_norm": 0.34941360354423523, "learning_rate": 1.5456649064666355e-05, "loss": 0.5425, "step": 23107 }, { "epoch": 0.6344865458539264, "grad_norm": 0.38764163851737976, "learning_rate": 1.5456287131403776e-05, "loss": 0.4842, "step": 23108 }, { "epoch": 0.6345140032948929, "grad_norm": 0.3626110255718231, "learning_rate": 1.5455925187963483e-05, "loss": 0.4184, "step": 23109 }, { "epoch": 0.6345414607358594, "grad_norm": 0.3764074146747589, "learning_rate": 1.5455563234346143e-05, "loss": 0.5705, "step": 23110 }, { "epoch": 0.6345689181768259, "grad_norm": 0.36458972096443176, "learning_rate": 1.5455201270552437e-05, "loss": 0.5418, "step": 23111 }, { "epoch": 0.6345963756177925, "grad_norm": 0.38084959983825684, "learning_rate": 1.5454839296583043e-05, "loss": 0.5422, "step": 23112 }, { "epoch": 0.6346238330587589, "grad_norm": 0.48499566316604614, "learning_rate": 1.5454477312438626e-05, "loss": 0.4367, "step": 23113 }, { "epoch": 0.6346512904997255, "grad_norm": 0.40629327297210693, "learning_rate": 1.5454115318119875e-05, "loss": 0.5114, "step": 23114 }, { "epoch": 0.6346787479406919, "grad_norm": 0.3993547260761261, "learning_rate": 1.5453753313627456e-05, "loss": 0.582, "step": 23115 }, { "epoch": 0.6347062053816585, "grad_norm": 0.3816734254360199, "learning_rate": 1.5453391298962043e-05, "loss": 0.4955, "step": 23116 }, { "epoch": 0.6347336628226249, "grad_norm": 0.40103021264076233, "learning_rate": 1.545302927412432e-05, "loss": 0.5222, "step": 23117 }, { "epoch": 0.6347611202635914, "grad_norm": 0.3600994646549225, "learning_rate": 1.5452667239114953e-05, "loss": 0.4531, "step": 23118 }, { "epoch": 0.634788577704558, "grad_norm": 0.36365047097206116, "learning_rate": 1.5452305193934625e-05, "loss": 0.4432, "step": 23119 }, { "epoch": 0.6348160351455244, "grad_norm": 0.45382532477378845, "learning_rate": 1.5451943138584004e-05, "loss": 0.5106, "step": 23120 }, { "epoch": 0.634843492586491, "grad_norm": 0.42120328545570374, "learning_rate": 1.5451581073063772e-05, "loss": 0.4141, "step": 23121 }, { "epoch": 0.6348709500274574, "grad_norm": 0.3394007384777069, "learning_rate": 1.54512189973746e-05, "loss": 0.4927, "step": 23122 }, { "epoch": 0.634898407468424, "grad_norm": 0.4701749384403229, "learning_rate": 1.5450856911517163e-05, "loss": 0.4855, "step": 23123 }, { "epoch": 0.6349258649093904, "grad_norm": 0.3769737184047699, "learning_rate": 1.545049481549214e-05, "loss": 0.4802, "step": 23124 }, { "epoch": 0.634953322350357, "grad_norm": 0.4439629316329956, "learning_rate": 1.5450132709300203e-05, "loss": 0.5574, "step": 23125 }, { "epoch": 0.6349807797913235, "grad_norm": 0.3873760998249054, "learning_rate": 1.5449770592942034e-05, "loss": 0.455, "step": 23126 }, { "epoch": 0.6350082372322899, "grad_norm": 0.4361985921859741, "learning_rate": 1.5449408466418296e-05, "loss": 0.5366, "step": 23127 }, { "epoch": 0.6350356946732565, "grad_norm": 0.38562464714050293, "learning_rate": 1.544904632972968e-05, "loss": 0.4885, "step": 23128 }, { "epoch": 0.6350631521142229, "grad_norm": 0.3935583531856537, "learning_rate": 1.544868418287685e-05, "loss": 0.4869, "step": 23129 }, { "epoch": 0.6350906095551895, "grad_norm": 0.3871798515319824, "learning_rate": 1.544832202586048e-05, "loss": 0.537, "step": 23130 }, { "epoch": 0.6351180669961559, "grad_norm": 0.38637059926986694, "learning_rate": 1.5447959858681256e-05, "loss": 0.499, "step": 23131 }, { "epoch": 0.6351455244371225, "grad_norm": 0.3399500846862793, "learning_rate": 1.5447597681339845e-05, "loss": 0.4482, "step": 23132 }, { "epoch": 0.635172981878089, "grad_norm": 0.397236168384552, "learning_rate": 1.5447235493836928e-05, "loss": 0.4962, "step": 23133 }, { "epoch": 0.6352004393190555, "grad_norm": 0.397553950548172, "learning_rate": 1.5446873296173177e-05, "loss": 0.5098, "step": 23134 }, { "epoch": 0.635227896760022, "grad_norm": 0.38921859860420227, "learning_rate": 1.5446511088349272e-05, "loss": 0.4999, "step": 23135 }, { "epoch": 0.6352553542009884, "grad_norm": 0.4286947548389435, "learning_rate": 1.544614887036588e-05, "loss": 0.7311, "step": 23136 }, { "epoch": 0.635282811641955, "grad_norm": 0.40711960196495056, "learning_rate": 1.544578664222369e-05, "loss": 0.5077, "step": 23137 }, { "epoch": 0.6353102690829214, "grad_norm": 0.3852107524871826, "learning_rate": 1.544542440392336e-05, "loss": 0.4234, "step": 23138 }, { "epoch": 0.635337726523888, "grad_norm": 0.373675674200058, "learning_rate": 1.544506215546558e-05, "loss": 0.538, "step": 23139 }, { "epoch": 0.6353651839648545, "grad_norm": 0.3921574354171753, "learning_rate": 1.5444699896851026e-05, "loss": 0.4957, "step": 23140 }, { "epoch": 0.635392641405821, "grad_norm": 0.5805567502975464, "learning_rate": 1.544433762808036e-05, "loss": 0.5659, "step": 23141 }, { "epoch": 0.6354200988467875, "grad_norm": 0.660605251789093, "learning_rate": 1.5443975349154274e-05, "loss": 0.516, "step": 23142 }, { "epoch": 0.635447556287754, "grad_norm": 0.3963235318660736, "learning_rate": 1.5443613060073435e-05, "loss": 0.5342, "step": 23143 }, { "epoch": 0.6354750137287205, "grad_norm": 0.40800750255584717, "learning_rate": 1.544325076083852e-05, "loss": 0.5874, "step": 23144 }, { "epoch": 0.6355024711696869, "grad_norm": 0.41823309659957886, "learning_rate": 1.5442888451450202e-05, "loss": 0.4849, "step": 23145 }, { "epoch": 0.6355299286106535, "grad_norm": 0.38655057549476624, "learning_rate": 1.544252613190916e-05, "loss": 0.5422, "step": 23146 }, { "epoch": 0.63555738605162, "grad_norm": 0.40317535400390625, "learning_rate": 1.5442163802216076e-05, "loss": 0.5309, "step": 23147 }, { "epoch": 0.6355848434925865, "grad_norm": 0.4582132399082184, "learning_rate": 1.5441801462371614e-05, "loss": 0.5887, "step": 23148 }, { "epoch": 0.635612300933553, "grad_norm": 0.39238813519477844, "learning_rate": 1.5441439112376455e-05, "loss": 0.5089, "step": 23149 }, { "epoch": 0.6356397583745195, "grad_norm": 0.3864198327064514, "learning_rate": 1.544107675223128e-05, "loss": 0.4513, "step": 23150 }, { "epoch": 0.635667215815486, "grad_norm": 0.3879215717315674, "learning_rate": 1.5440714381936756e-05, "loss": 0.4782, "step": 23151 }, { "epoch": 0.6356946732564525, "grad_norm": 0.3946992754936218, "learning_rate": 1.5440352001493564e-05, "loss": 0.5372, "step": 23152 }, { "epoch": 0.635722130697419, "grad_norm": 0.4273972511291504, "learning_rate": 1.543998961090238e-05, "loss": 0.5285, "step": 23153 }, { "epoch": 0.6357495881383856, "grad_norm": 0.3794424831867218, "learning_rate": 1.5439627210163878e-05, "loss": 0.4976, "step": 23154 }, { "epoch": 0.635777045579352, "grad_norm": 0.6093939542770386, "learning_rate": 1.543926479927874e-05, "loss": 0.4473, "step": 23155 }, { "epoch": 0.6358045030203185, "grad_norm": 0.4022117555141449, "learning_rate": 1.543890237824763e-05, "loss": 0.5864, "step": 23156 }, { "epoch": 0.635831960461285, "grad_norm": 0.3957201838493347, "learning_rate": 1.5438539947071233e-05, "loss": 0.512, "step": 23157 }, { "epoch": 0.6358594179022515, "grad_norm": 0.4199093282222748, "learning_rate": 1.5438177505750225e-05, "loss": 0.5279, "step": 23158 }, { "epoch": 0.635886875343218, "grad_norm": 0.37731701135635376, "learning_rate": 1.5437815054285282e-05, "loss": 0.4897, "step": 23159 }, { "epoch": 0.6359143327841845, "grad_norm": 0.4576948881149292, "learning_rate": 1.5437452592677072e-05, "loss": 0.5012, "step": 23160 }, { "epoch": 0.6359417902251511, "grad_norm": 0.3656446039676666, "learning_rate": 1.5437090120926284e-05, "loss": 0.4878, "step": 23161 }, { "epoch": 0.6359692476661175, "grad_norm": 0.39502575993537903, "learning_rate": 1.5436727639033583e-05, "loss": 0.5432, "step": 23162 }, { "epoch": 0.6359967051070841, "grad_norm": 0.3633093535900116, "learning_rate": 1.5436365146999655e-05, "loss": 0.4856, "step": 23163 }, { "epoch": 0.6360241625480505, "grad_norm": 0.38085854053497314, "learning_rate": 1.5436002644825164e-05, "loss": 0.4636, "step": 23164 }, { "epoch": 0.636051619989017, "grad_norm": 0.4096362590789795, "learning_rate": 1.54356401325108e-05, "loss": 0.525, "step": 23165 }, { "epoch": 0.6360790774299835, "grad_norm": 0.41394323110580444, "learning_rate": 1.543527761005723e-05, "loss": 0.4131, "step": 23166 }, { "epoch": 0.63610653487095, "grad_norm": 0.4359787702560425, "learning_rate": 1.5434915077465132e-05, "loss": 0.5229, "step": 23167 }, { "epoch": 0.6361339923119165, "grad_norm": 0.38194769620895386, "learning_rate": 1.543455253473518e-05, "loss": 0.4845, "step": 23168 }, { "epoch": 0.636161449752883, "grad_norm": 0.3960731029510498, "learning_rate": 1.543418998186806e-05, "loss": 0.668, "step": 23169 }, { "epoch": 0.6361889071938496, "grad_norm": 0.41112372279167175, "learning_rate": 1.5433827418864435e-05, "loss": 0.538, "step": 23170 }, { "epoch": 0.636216364634816, "grad_norm": 0.4057997763156891, "learning_rate": 1.5433464845724988e-05, "loss": 0.4708, "step": 23171 }, { "epoch": 0.6362438220757826, "grad_norm": 0.4092215597629547, "learning_rate": 1.5433102262450396e-05, "loss": 0.4836, "step": 23172 }, { "epoch": 0.636271279516749, "grad_norm": 0.37497901916503906, "learning_rate": 1.5432739669041335e-05, "loss": 0.5166, "step": 23173 }, { "epoch": 0.6362987369577155, "grad_norm": 0.39989611506462097, "learning_rate": 1.543237706549848e-05, "loss": 0.514, "step": 23174 }, { "epoch": 0.636326194398682, "grad_norm": 0.38425639271736145, "learning_rate": 1.543201445182251e-05, "loss": 0.5294, "step": 23175 }, { "epoch": 0.6363536518396485, "grad_norm": 0.39275774359703064, "learning_rate": 1.54316518280141e-05, "loss": 0.486, "step": 23176 }, { "epoch": 0.6363811092806151, "grad_norm": 0.37867262959480286, "learning_rate": 1.543128919407392e-05, "loss": 0.4287, "step": 23177 }, { "epoch": 0.6364085667215815, "grad_norm": 0.3839682936668396, "learning_rate": 1.5430926550002653e-05, "loss": 0.5278, "step": 23178 }, { "epoch": 0.6364360241625481, "grad_norm": 0.3572182357311249, "learning_rate": 1.5430563895800977e-05, "loss": 0.5789, "step": 23179 }, { "epoch": 0.6364634816035145, "grad_norm": 0.3409055471420288, "learning_rate": 1.5430201231469567e-05, "loss": 0.4723, "step": 23180 }, { "epoch": 0.6364909390444811, "grad_norm": 0.38545286655426025, "learning_rate": 1.54298385570091e-05, "loss": 0.4865, "step": 23181 }, { "epoch": 0.6365183964854475, "grad_norm": 0.34843191504478455, "learning_rate": 1.5429475872420246e-05, "loss": 0.5104, "step": 23182 }, { "epoch": 0.636545853926414, "grad_norm": 0.3923165202140808, "learning_rate": 1.542911317770369e-05, "loss": 0.521, "step": 23183 }, { "epoch": 0.6365733113673806, "grad_norm": 0.34207576513290405, "learning_rate": 1.5428750472860107e-05, "loss": 0.5467, "step": 23184 }, { "epoch": 0.636600768808347, "grad_norm": 0.41347983479499817, "learning_rate": 1.5428387757890167e-05, "loss": 0.5627, "step": 23185 }, { "epoch": 0.6366282262493136, "grad_norm": 0.41610628366470337, "learning_rate": 1.5428025032794555e-05, "loss": 0.4585, "step": 23186 }, { "epoch": 0.63665568369028, "grad_norm": 0.3485484719276428, "learning_rate": 1.542766229757394e-05, "loss": 0.4704, "step": 23187 }, { "epoch": 0.6366831411312466, "grad_norm": 0.4595089852809906, "learning_rate": 1.542729955222901e-05, "loss": 0.5065, "step": 23188 }, { "epoch": 0.636710598572213, "grad_norm": 0.35638228058815, "learning_rate": 1.5426936796760425e-05, "loss": 0.5323, "step": 23189 }, { "epoch": 0.6367380560131796, "grad_norm": 0.38852035999298096, "learning_rate": 1.5426574031168876e-05, "loss": 0.4487, "step": 23190 }, { "epoch": 0.6367655134541461, "grad_norm": 0.3991386592388153, "learning_rate": 1.5426211255455035e-05, "loss": 0.5382, "step": 23191 }, { "epoch": 0.6367929708951126, "grad_norm": 0.39037150144577026, "learning_rate": 1.5425848469619574e-05, "loss": 0.5094, "step": 23192 }, { "epoch": 0.6368204283360791, "grad_norm": 0.3789840638637543, "learning_rate": 1.5425485673663178e-05, "loss": 0.4757, "step": 23193 }, { "epoch": 0.6368478857770455, "grad_norm": 0.4898328185081482, "learning_rate": 1.542512286758652e-05, "loss": 0.4734, "step": 23194 }, { "epoch": 0.6368753432180121, "grad_norm": 0.4133282005786896, "learning_rate": 1.5424760051390274e-05, "loss": 0.4389, "step": 23195 }, { "epoch": 0.6369028006589785, "grad_norm": 0.3740154504776001, "learning_rate": 1.542439722507512e-05, "loss": 0.558, "step": 23196 }, { "epoch": 0.6369302580999451, "grad_norm": 0.5770440697669983, "learning_rate": 1.5424034388641735e-05, "loss": 0.4569, "step": 23197 }, { "epoch": 0.6369577155409116, "grad_norm": 0.3746641278266907, "learning_rate": 1.542367154209079e-05, "loss": 0.5123, "step": 23198 }, { "epoch": 0.6369851729818781, "grad_norm": 0.4017345607280731, "learning_rate": 1.5423308685422968e-05, "loss": 0.4687, "step": 23199 }, { "epoch": 0.6370126304228446, "grad_norm": 0.3621593117713928, "learning_rate": 1.5422945818638947e-05, "loss": 0.5744, "step": 23200 }, { "epoch": 0.637040087863811, "grad_norm": 0.37034872174263, "learning_rate": 1.54225829417394e-05, "loss": 0.5737, "step": 23201 }, { "epoch": 0.6370675453047776, "grad_norm": 0.6495491862297058, "learning_rate": 1.5422220054725007e-05, "loss": 0.5349, "step": 23202 }, { "epoch": 0.637095002745744, "grad_norm": 0.3374318480491638, "learning_rate": 1.542185715759644e-05, "loss": 0.4175, "step": 23203 }, { "epoch": 0.6371224601867106, "grad_norm": 0.33278122544288635, "learning_rate": 1.5421494250354382e-05, "loss": 0.4305, "step": 23204 }, { "epoch": 0.6371499176276771, "grad_norm": 0.5741167068481445, "learning_rate": 1.5421131332999502e-05, "loss": 0.4811, "step": 23205 }, { "epoch": 0.6371773750686436, "grad_norm": 0.4093117415904999, "learning_rate": 1.5420768405532486e-05, "loss": 0.5181, "step": 23206 }, { "epoch": 0.6372048325096101, "grad_norm": 0.3113693296909332, "learning_rate": 1.5420405467954007e-05, "loss": 0.4305, "step": 23207 }, { "epoch": 0.6372322899505766, "grad_norm": 0.35866779088974, "learning_rate": 1.542004252026474e-05, "loss": 0.4736, "step": 23208 }, { "epoch": 0.6372597473915431, "grad_norm": 0.3485752046108246, "learning_rate": 1.5419679562465364e-05, "loss": 0.4977, "step": 23209 }, { "epoch": 0.6372872048325096, "grad_norm": 0.4487015902996063, "learning_rate": 1.541931659455656e-05, "loss": 0.6114, "step": 23210 }, { "epoch": 0.6373146622734761, "grad_norm": 0.37420421838760376, "learning_rate": 1.5418953616538996e-05, "loss": 0.4884, "step": 23211 }, { "epoch": 0.6373421197144427, "grad_norm": 0.36313384771347046, "learning_rate": 1.5418590628413355e-05, "loss": 0.4764, "step": 23212 }, { "epoch": 0.6373695771554091, "grad_norm": 0.36471158266067505, "learning_rate": 1.5418227630180314e-05, "loss": 0.4562, "step": 23213 }, { "epoch": 0.6373970345963756, "grad_norm": 0.41885319352149963, "learning_rate": 1.541786462184055e-05, "loss": 0.507, "step": 23214 }, { "epoch": 0.6374244920373421, "grad_norm": 0.4322323501110077, "learning_rate": 1.5417501603394737e-05, "loss": 0.521, "step": 23215 }, { "epoch": 0.6374519494783086, "grad_norm": 0.3432362675666809, "learning_rate": 1.5417138574843554e-05, "loss": 0.5346, "step": 23216 }, { "epoch": 0.6374794069192751, "grad_norm": 0.35707592964172363, "learning_rate": 1.541677553618768e-05, "loss": 0.4464, "step": 23217 }, { "epoch": 0.6375068643602416, "grad_norm": 0.3602023422718048, "learning_rate": 1.541641248742779e-05, "loss": 0.4496, "step": 23218 }, { "epoch": 0.6375343218012082, "grad_norm": 0.3810091018676758, "learning_rate": 1.541604942856456e-05, "loss": 0.5673, "step": 23219 }, { "epoch": 0.6375617792421746, "grad_norm": 0.43578702211380005, "learning_rate": 1.5415686359598673e-05, "loss": 0.5791, "step": 23220 }, { "epoch": 0.6375892366831412, "grad_norm": 0.34876585006713867, "learning_rate": 1.5415323280530804e-05, "loss": 0.5121, "step": 23221 }, { "epoch": 0.6376166941241076, "grad_norm": 0.4660422205924988, "learning_rate": 1.5414960191361624e-05, "loss": 0.5006, "step": 23222 }, { "epoch": 0.6376441515650741, "grad_norm": 0.3958325684070587, "learning_rate": 1.541459709209182e-05, "loss": 0.5063, "step": 23223 }, { "epoch": 0.6376716090060406, "grad_norm": 0.3851735293865204, "learning_rate": 1.541423398272206e-05, "loss": 0.4619, "step": 23224 }, { "epoch": 0.6376990664470071, "grad_norm": 0.36440545320510864, "learning_rate": 1.5413870863253028e-05, "loss": 0.4488, "step": 23225 }, { "epoch": 0.6377265238879737, "grad_norm": 0.36761894822120667, "learning_rate": 1.5413507733685397e-05, "loss": 0.4717, "step": 23226 }, { "epoch": 0.6377539813289401, "grad_norm": 0.4066241383552551, "learning_rate": 1.5413144594019846e-05, "loss": 0.5012, "step": 23227 }, { "epoch": 0.6377814387699067, "grad_norm": 0.3893265426158905, "learning_rate": 1.5412781444257055e-05, "loss": 0.4694, "step": 23228 }, { "epoch": 0.6378088962108731, "grad_norm": 0.5100352168083191, "learning_rate": 1.5412418284397696e-05, "loss": 0.4352, "step": 23229 }, { "epoch": 0.6378363536518397, "grad_norm": 0.4173066020011902, "learning_rate": 1.5412055114442453e-05, "loss": 0.4703, "step": 23230 }, { "epoch": 0.6378638110928061, "grad_norm": 0.36859768629074097, "learning_rate": 1.5411691934391997e-05, "loss": 0.4064, "step": 23231 }, { "epoch": 0.6378912685337726, "grad_norm": 0.47813212871551514, "learning_rate": 1.541132874424701e-05, "loss": 0.5561, "step": 23232 }, { "epoch": 0.6379187259747392, "grad_norm": 0.3600277900695801, "learning_rate": 1.541096554400817e-05, "loss": 0.4803, "step": 23233 }, { "epoch": 0.6379461834157056, "grad_norm": 0.380526065826416, "learning_rate": 1.5410602333676144e-05, "loss": 0.5922, "step": 23234 }, { "epoch": 0.6379736408566722, "grad_norm": 0.3682764768600464, "learning_rate": 1.5410239113251626e-05, "loss": 0.5015, "step": 23235 }, { "epoch": 0.6380010982976386, "grad_norm": 0.3719002902507782, "learning_rate": 1.5409875882735282e-05, "loss": 0.4643, "step": 23236 }, { "epoch": 0.6380285557386052, "grad_norm": 0.4564128816127777, "learning_rate": 1.5409512642127795e-05, "loss": 0.5339, "step": 23237 }, { "epoch": 0.6380560131795716, "grad_norm": 0.32599058747291565, "learning_rate": 1.5409149391429838e-05, "loss": 0.3546, "step": 23238 }, { "epoch": 0.6380834706205382, "grad_norm": 0.3296312987804413, "learning_rate": 1.540878613064209e-05, "loss": 0.4248, "step": 23239 }, { "epoch": 0.6381109280615047, "grad_norm": 0.5298121571540833, "learning_rate": 1.5408422859765235e-05, "loss": 0.4915, "step": 23240 }, { "epoch": 0.6381383855024712, "grad_norm": 0.4769827723503113, "learning_rate": 1.540805957879994e-05, "loss": 0.5154, "step": 23241 }, { "epoch": 0.6381658429434377, "grad_norm": 0.33248913288116455, "learning_rate": 1.5407696287746888e-05, "loss": 0.4043, "step": 23242 }, { "epoch": 0.6381933003844041, "grad_norm": 0.3557734787464142, "learning_rate": 1.5407332986606763e-05, "loss": 0.4293, "step": 23243 }, { "epoch": 0.6382207578253707, "grad_norm": 0.376846045255661, "learning_rate": 1.5406969675380228e-05, "loss": 0.5601, "step": 23244 }, { "epoch": 0.6382482152663371, "grad_norm": 0.35156741738319397, "learning_rate": 1.5406606354067974e-05, "loss": 0.4126, "step": 23245 }, { "epoch": 0.6382756727073037, "grad_norm": 0.3726288378238678, "learning_rate": 1.5406243022670673e-05, "loss": 0.5223, "step": 23246 }, { "epoch": 0.6383031301482702, "grad_norm": 0.37260863184928894, "learning_rate": 1.5405879681189e-05, "loss": 0.4722, "step": 23247 }, { "epoch": 0.6383305875892367, "grad_norm": 0.4717891812324524, "learning_rate": 1.5405516329623638e-05, "loss": 0.4946, "step": 23248 }, { "epoch": 0.6383580450302032, "grad_norm": 0.3838779330253601, "learning_rate": 1.5405152967975264e-05, "loss": 0.4944, "step": 23249 }, { "epoch": 0.6383855024711697, "grad_norm": 0.38444384932518005, "learning_rate": 1.5404789596244552e-05, "loss": 0.5144, "step": 23250 }, { "epoch": 0.6384129599121362, "grad_norm": 0.3707401752471924, "learning_rate": 1.5404426214432183e-05, "loss": 0.4989, "step": 23251 }, { "epoch": 0.6384404173531026, "grad_norm": 0.3862748146057129, "learning_rate": 1.5404062822538835e-05, "loss": 0.5058, "step": 23252 }, { "epoch": 0.6384678747940692, "grad_norm": 0.393929660320282, "learning_rate": 1.5403699420565187e-05, "loss": 0.5495, "step": 23253 }, { "epoch": 0.6384953322350357, "grad_norm": 0.3835124969482422, "learning_rate": 1.5403336008511913e-05, "loss": 0.4646, "step": 23254 }, { "epoch": 0.6385227896760022, "grad_norm": 0.3535405099391937, "learning_rate": 1.540297258637969e-05, "loss": 0.454, "step": 23255 }, { "epoch": 0.6385502471169687, "grad_norm": 0.4486088156700134, "learning_rate": 1.5402609154169204e-05, "loss": 0.5509, "step": 23256 }, { "epoch": 0.6385777045579352, "grad_norm": 0.4190601408481598, "learning_rate": 1.5402245711881122e-05, "loss": 0.5755, "step": 23257 }, { "epoch": 0.6386051619989017, "grad_norm": 0.40132972598075867, "learning_rate": 1.5401882259516133e-05, "loss": 0.5919, "step": 23258 }, { "epoch": 0.6386326194398682, "grad_norm": 0.3933853507041931, "learning_rate": 1.5401518797074908e-05, "loss": 0.5684, "step": 23259 }, { "epoch": 0.6386600768808347, "grad_norm": 0.41423895955085754, "learning_rate": 1.5401155324558124e-05, "loss": 0.5748, "step": 23260 }, { "epoch": 0.6386875343218013, "grad_norm": 0.39568835496902466, "learning_rate": 1.5400791841966466e-05, "loss": 0.5096, "step": 23261 }, { "epoch": 0.6387149917627677, "grad_norm": 0.394079327583313, "learning_rate": 1.54004283493006e-05, "loss": 0.5171, "step": 23262 }, { "epoch": 0.6387424492037342, "grad_norm": 0.36781975626945496, "learning_rate": 1.5400064846561218e-05, "loss": 0.4662, "step": 23263 }, { "epoch": 0.6387699066447007, "grad_norm": 0.3379192054271698, "learning_rate": 1.539970133374899e-05, "loss": 0.4272, "step": 23264 }, { "epoch": 0.6387973640856672, "grad_norm": 1.4198195934295654, "learning_rate": 1.5399337810864594e-05, "loss": 0.5817, "step": 23265 }, { "epoch": 0.6388248215266337, "grad_norm": 0.38628217577934265, "learning_rate": 1.5398974277908713e-05, "loss": 0.5536, "step": 23266 }, { "epoch": 0.6388522789676002, "grad_norm": 0.38976743817329407, "learning_rate": 1.5398610734882017e-05, "loss": 0.4141, "step": 23267 }, { "epoch": 0.6388797364085668, "grad_norm": 0.42068231105804443, "learning_rate": 1.5398247181785193e-05, "loss": 0.5873, "step": 23268 }, { "epoch": 0.6389071938495332, "grad_norm": 0.3863702118396759, "learning_rate": 1.5397883618618913e-05, "loss": 0.5253, "step": 23269 }, { "epoch": 0.6389346512904998, "grad_norm": 0.4066868722438812, "learning_rate": 1.5397520045383857e-05, "loss": 0.4909, "step": 23270 }, { "epoch": 0.6389621087314662, "grad_norm": 0.34870487451553345, "learning_rate": 1.5397156462080707e-05, "loss": 0.4262, "step": 23271 }, { "epoch": 0.6389895661724327, "grad_norm": 0.3678308129310608, "learning_rate": 1.5396792868710133e-05, "loss": 0.4281, "step": 23272 }, { "epoch": 0.6390170236133992, "grad_norm": 0.3847922384738922, "learning_rate": 1.539642926527282e-05, "loss": 0.4836, "step": 23273 }, { "epoch": 0.6390444810543657, "grad_norm": 0.38642778992652893, "learning_rate": 1.5396065651769445e-05, "loss": 0.5077, "step": 23274 }, { "epoch": 0.6390719384953323, "grad_norm": 0.36486631631851196, "learning_rate": 1.5395702028200682e-05, "loss": 0.5455, "step": 23275 }, { "epoch": 0.6390993959362987, "grad_norm": 0.3988831341266632, "learning_rate": 1.5395338394567217e-05, "loss": 0.5469, "step": 23276 }, { "epoch": 0.6391268533772653, "grad_norm": 0.3848815858364105, "learning_rate": 1.5394974750869723e-05, "loss": 0.4892, "step": 23277 }, { "epoch": 0.6391543108182317, "grad_norm": 0.40750497579574585, "learning_rate": 1.539461109710888e-05, "loss": 0.5512, "step": 23278 }, { "epoch": 0.6391817682591983, "grad_norm": 0.7758901715278625, "learning_rate": 1.539424743328536e-05, "loss": 0.5387, "step": 23279 }, { "epoch": 0.6392092257001647, "grad_norm": 0.3683019280433655, "learning_rate": 1.5393883759399852e-05, "loss": 0.5237, "step": 23280 }, { "epoch": 0.6392366831411312, "grad_norm": 0.35037797689437866, "learning_rate": 1.5393520075453026e-05, "loss": 0.4326, "step": 23281 }, { "epoch": 0.6392641405820978, "grad_norm": 0.44442519545555115, "learning_rate": 1.5393156381445567e-05, "loss": 0.5378, "step": 23282 }, { "epoch": 0.6392915980230642, "grad_norm": 0.3759596347808838, "learning_rate": 1.5392792677378148e-05, "loss": 0.5501, "step": 23283 }, { "epoch": 0.6393190554640308, "grad_norm": 0.37357082962989807, "learning_rate": 1.539242896325145e-05, "loss": 0.4619, "step": 23284 }, { "epoch": 0.6393465129049972, "grad_norm": 0.4244522750377655, "learning_rate": 1.539206523906615e-05, "loss": 0.4462, "step": 23285 }, { "epoch": 0.6393739703459638, "grad_norm": 0.34691721200942993, "learning_rate": 1.539170150482293e-05, "loss": 0.4598, "step": 23286 }, { "epoch": 0.6394014277869302, "grad_norm": 0.3927154541015625, "learning_rate": 1.5391337760522464e-05, "loss": 0.5436, "step": 23287 }, { "epoch": 0.6394288852278968, "grad_norm": 0.36177805066108704, "learning_rate": 1.5390974006165432e-05, "loss": 0.4042, "step": 23288 }, { "epoch": 0.6394563426688633, "grad_norm": 0.3837836980819702, "learning_rate": 1.5390610241752516e-05, "loss": 0.5147, "step": 23289 }, { "epoch": 0.6394838001098297, "grad_norm": 0.4072783291339874, "learning_rate": 1.539024646728439e-05, "loss": 0.5934, "step": 23290 }, { "epoch": 0.6395112575507963, "grad_norm": 0.35004910826683044, "learning_rate": 1.5389882682761726e-05, "loss": 0.5394, "step": 23291 }, { "epoch": 0.6395387149917627, "grad_norm": 0.37624043226242065, "learning_rate": 1.538951888818522e-05, "loss": 0.4936, "step": 23292 }, { "epoch": 0.6395661724327293, "grad_norm": 0.3916052579879761, "learning_rate": 1.5389155083555536e-05, "loss": 0.5063, "step": 23293 }, { "epoch": 0.6395936298736957, "grad_norm": 0.37599435448646545, "learning_rate": 1.5388791268873363e-05, "loss": 0.4963, "step": 23294 }, { "epoch": 0.6396210873146623, "grad_norm": 0.3764609396457672, "learning_rate": 1.538842744413937e-05, "loss": 0.4917, "step": 23295 }, { "epoch": 0.6396485447556288, "grad_norm": 0.3665120005607605, "learning_rate": 1.5388063609354243e-05, "loss": 0.4638, "step": 23296 }, { "epoch": 0.6396760021965953, "grad_norm": 0.3854277729988098, "learning_rate": 1.5387699764518656e-05, "loss": 0.5093, "step": 23297 }, { "epoch": 0.6397034596375618, "grad_norm": 0.37834376096725464, "learning_rate": 1.538733590963329e-05, "loss": 0.5255, "step": 23298 }, { "epoch": 0.6397309170785282, "grad_norm": 0.34227702021598816, "learning_rate": 1.5386972044698823e-05, "loss": 0.4775, "step": 23299 }, { "epoch": 0.6397583745194948, "grad_norm": 0.3686126470565796, "learning_rate": 1.5386608169715934e-05, "loss": 0.5573, "step": 23300 }, { "epoch": 0.6397858319604612, "grad_norm": 0.35471388697624207, "learning_rate": 1.5386244284685302e-05, "loss": 0.5025, "step": 23301 }, { "epoch": 0.6398132894014278, "grad_norm": 0.37887367606163025, "learning_rate": 1.5385880389607603e-05, "loss": 0.4929, "step": 23302 }, { "epoch": 0.6398407468423943, "grad_norm": 0.3601284921169281, "learning_rate": 1.538551648448352e-05, "loss": 0.4779, "step": 23303 }, { "epoch": 0.6398682042833608, "grad_norm": 0.3657006323337555, "learning_rate": 1.538515256931373e-05, "loss": 0.5136, "step": 23304 }, { "epoch": 0.6398956617243273, "grad_norm": 0.42818209528923035, "learning_rate": 1.5384788644098913e-05, "loss": 0.5527, "step": 23305 }, { "epoch": 0.6399231191652938, "grad_norm": 0.3885520100593567, "learning_rate": 1.538442470883974e-05, "loss": 0.5132, "step": 23306 }, { "epoch": 0.6399505766062603, "grad_norm": 0.35363152623176575, "learning_rate": 1.5384060763536903e-05, "loss": 0.4647, "step": 23307 }, { "epoch": 0.6399780340472268, "grad_norm": 0.3432157635688782, "learning_rate": 1.5383696808191075e-05, "loss": 0.4343, "step": 23308 }, { "epoch": 0.6400054914881933, "grad_norm": 0.3943377733230591, "learning_rate": 1.538333284280293e-05, "loss": 0.4703, "step": 23309 }, { "epoch": 0.6400329489291599, "grad_norm": 0.3469265103340149, "learning_rate": 1.538296886737315e-05, "loss": 0.4871, "step": 23310 }, { "epoch": 0.6400604063701263, "grad_norm": 0.3577434718608856, "learning_rate": 1.538260488190242e-05, "loss": 0.4895, "step": 23311 }, { "epoch": 0.6400878638110928, "grad_norm": 0.3555665910243988, "learning_rate": 1.538224088639141e-05, "loss": 0.4188, "step": 23312 }, { "epoch": 0.6401153212520593, "grad_norm": 0.48646411299705505, "learning_rate": 1.5381876880840807e-05, "loss": 0.502, "step": 23313 }, { "epoch": 0.6401427786930258, "grad_norm": 0.4505626857280731, "learning_rate": 1.5381512865251282e-05, "loss": 0.4049, "step": 23314 }, { "epoch": 0.6401702361339923, "grad_norm": 0.38546234369277954, "learning_rate": 1.5381148839623522e-05, "loss": 0.5934, "step": 23315 }, { "epoch": 0.6401976935749588, "grad_norm": 0.4524680972099304, "learning_rate": 1.5380784803958198e-05, "loss": 0.4966, "step": 23316 }, { "epoch": 0.6402251510159254, "grad_norm": 0.5143951177597046, "learning_rate": 1.5380420758255994e-05, "loss": 0.5532, "step": 23317 }, { "epoch": 0.6402526084568918, "grad_norm": 0.4236416816711426, "learning_rate": 1.5380056702517588e-05, "loss": 0.5405, "step": 23318 }, { "epoch": 0.6402800658978584, "grad_norm": 0.3528895080089569, "learning_rate": 1.5379692636743658e-05, "loss": 0.4925, "step": 23319 }, { "epoch": 0.6403075233388248, "grad_norm": 0.5806242227554321, "learning_rate": 1.5379328560934888e-05, "loss": 0.5805, "step": 23320 }, { "epoch": 0.6403349807797913, "grad_norm": 0.3504464328289032, "learning_rate": 1.5378964475091952e-05, "loss": 0.4816, "step": 23321 }, { "epoch": 0.6403624382207578, "grad_norm": 0.350191593170166, "learning_rate": 1.5378600379215528e-05, "loss": 0.4926, "step": 23322 }, { "epoch": 0.6403898956617243, "grad_norm": 0.3653624951839447, "learning_rate": 1.5378236273306303e-05, "loss": 0.5394, "step": 23323 }, { "epoch": 0.6404173531026909, "grad_norm": 0.3602825701236725, "learning_rate": 1.5377872157364943e-05, "loss": 0.4885, "step": 23324 }, { "epoch": 0.6404448105436573, "grad_norm": 0.5661852359771729, "learning_rate": 1.537750803139214e-05, "loss": 0.5317, "step": 23325 }, { "epoch": 0.6404722679846239, "grad_norm": 0.3591994643211365, "learning_rate": 1.5377143895388567e-05, "loss": 0.5329, "step": 23326 }, { "epoch": 0.6404997254255903, "grad_norm": 0.3698809742927551, "learning_rate": 1.5376779749354904e-05, "loss": 0.4208, "step": 23327 }, { "epoch": 0.6405271828665569, "grad_norm": 0.3434257507324219, "learning_rate": 1.5376415593291832e-05, "loss": 0.4384, "step": 23328 }, { "epoch": 0.6405546403075233, "grad_norm": 0.43479785323143005, "learning_rate": 1.5376051427200026e-05, "loss": 0.5529, "step": 23329 }, { "epoch": 0.6405820977484898, "grad_norm": 0.4218279719352722, "learning_rate": 1.5375687251080173e-05, "loss": 0.5197, "step": 23330 }, { "epoch": 0.6406095551894564, "grad_norm": 0.3976280093193054, "learning_rate": 1.5375323064932944e-05, "loss": 0.5219, "step": 23331 }, { "epoch": 0.6406370126304228, "grad_norm": 0.389387845993042, "learning_rate": 1.5374958868759023e-05, "loss": 0.5206, "step": 23332 }, { "epoch": 0.6406644700713894, "grad_norm": 0.46067896485328674, "learning_rate": 1.537459466255909e-05, "loss": 0.5101, "step": 23333 }, { "epoch": 0.6406919275123558, "grad_norm": 0.4199887812137604, "learning_rate": 1.537423044633382e-05, "loss": 0.4974, "step": 23334 }, { "epoch": 0.6407193849533224, "grad_norm": 0.40160298347473145, "learning_rate": 1.53738662200839e-05, "loss": 0.5439, "step": 23335 }, { "epoch": 0.6407468423942888, "grad_norm": 0.3799644410610199, "learning_rate": 1.537350198381e-05, "loss": 0.5238, "step": 23336 }, { "epoch": 0.6407742998352554, "grad_norm": 0.39012610912323, "learning_rate": 1.53731377375128e-05, "loss": 0.4215, "step": 23337 }, { "epoch": 0.6408017572762219, "grad_norm": 0.532233476638794, "learning_rate": 1.537277348119299e-05, "loss": 0.4558, "step": 23338 }, { "epoch": 0.6408292147171883, "grad_norm": 0.36730891466140747, "learning_rate": 1.537240921485124e-05, "loss": 0.5315, "step": 23339 }, { "epoch": 0.6408566721581549, "grad_norm": 0.4715304672718048, "learning_rate": 1.537204493848823e-05, "loss": 0.5732, "step": 23340 }, { "epoch": 0.6408841295991213, "grad_norm": 0.42470186948776245, "learning_rate": 1.5371680652104644e-05, "loss": 0.531, "step": 23341 }, { "epoch": 0.6409115870400879, "grad_norm": 0.3835452198982239, "learning_rate": 1.537131635570116e-05, "loss": 0.4862, "step": 23342 }, { "epoch": 0.6409390444810543, "grad_norm": 0.4184708893299103, "learning_rate": 1.5370952049278454e-05, "loss": 0.3757, "step": 23343 }, { "epoch": 0.6409665019220209, "grad_norm": 0.3793332874774933, "learning_rate": 1.537058773283721e-05, "loss": 0.5559, "step": 23344 }, { "epoch": 0.6409939593629874, "grad_norm": 0.41226232051849365, "learning_rate": 1.5370223406378107e-05, "loss": 0.5638, "step": 23345 }, { "epoch": 0.6410214168039539, "grad_norm": 0.3795441687107086, "learning_rate": 1.536985906990182e-05, "loss": 0.525, "step": 23346 }, { "epoch": 0.6410488742449204, "grad_norm": 0.3820289969444275, "learning_rate": 1.5369494723409033e-05, "loss": 0.5082, "step": 23347 }, { "epoch": 0.6410763316858868, "grad_norm": 0.3663162291049957, "learning_rate": 1.5369130366900426e-05, "loss": 0.4574, "step": 23348 }, { "epoch": 0.6411037891268534, "grad_norm": 0.3452400267124176, "learning_rate": 1.5368766000376677e-05, "loss": 0.429, "step": 23349 }, { "epoch": 0.6411312465678198, "grad_norm": 0.37639743089675903, "learning_rate": 1.5368401623838464e-05, "loss": 0.4719, "step": 23350 }, { "epoch": 0.6411587040087864, "grad_norm": 0.3908580541610718, "learning_rate": 1.5368037237286472e-05, "loss": 0.5592, "step": 23351 }, { "epoch": 0.6411861614497529, "grad_norm": 0.41682612895965576, "learning_rate": 1.5367672840721378e-05, "loss": 0.4896, "step": 23352 }, { "epoch": 0.6412136188907194, "grad_norm": 0.36820265650749207, "learning_rate": 1.5367308434143853e-05, "loss": 0.5474, "step": 23353 }, { "epoch": 0.6412410763316859, "grad_norm": 0.3869185745716095, "learning_rate": 1.5366944017554592e-05, "loss": 0.4291, "step": 23354 }, { "epoch": 0.6412685337726524, "grad_norm": 0.4039987325668335, "learning_rate": 1.5366579590954266e-05, "loss": 0.5125, "step": 23355 }, { "epoch": 0.6412959912136189, "grad_norm": 0.42116060853004456, "learning_rate": 1.5366215154343555e-05, "loss": 0.4645, "step": 23356 }, { "epoch": 0.6413234486545853, "grad_norm": 0.39759477972984314, "learning_rate": 1.536585070772314e-05, "loss": 0.5255, "step": 23357 }, { "epoch": 0.6413509060955519, "grad_norm": 0.4346941411495209, "learning_rate": 1.5365486251093704e-05, "loss": 0.5069, "step": 23358 }, { "epoch": 0.6413783635365184, "grad_norm": 0.3513598144054413, "learning_rate": 1.5365121784455922e-05, "loss": 0.4782, "step": 23359 }, { "epoch": 0.6414058209774849, "grad_norm": 0.33418819308280945, "learning_rate": 1.5364757307810473e-05, "loss": 0.5259, "step": 23360 }, { "epoch": 0.6414332784184514, "grad_norm": 0.3269241750240326, "learning_rate": 1.5364392821158042e-05, "loss": 0.4267, "step": 23361 }, { "epoch": 0.6414607358594179, "grad_norm": 0.34932941198349, "learning_rate": 1.536402832449931e-05, "loss": 0.4794, "step": 23362 }, { "epoch": 0.6414881933003844, "grad_norm": 0.48010125756263733, "learning_rate": 1.5363663817834944e-05, "loss": 0.5432, "step": 23363 }, { "epoch": 0.6415156507413509, "grad_norm": 0.4471402168273926, "learning_rate": 1.536329930116564e-05, "loss": 0.5182, "step": 23364 }, { "epoch": 0.6415431081823174, "grad_norm": 0.41603222489356995, "learning_rate": 1.5362934774492068e-05, "loss": 0.5367, "step": 23365 }, { "epoch": 0.641570565623284, "grad_norm": 0.41754236817359924, "learning_rate": 1.536257023781491e-05, "loss": 0.5474, "step": 23366 }, { "epoch": 0.6415980230642504, "grad_norm": 0.34604793787002563, "learning_rate": 1.536220569113485e-05, "loss": 0.5166, "step": 23367 }, { "epoch": 0.641625480505217, "grad_norm": 0.3410996198654175, "learning_rate": 1.5361841134452563e-05, "loss": 0.5043, "step": 23368 }, { "epoch": 0.6416529379461834, "grad_norm": 0.4235232174396515, "learning_rate": 1.5361476567768735e-05, "loss": 0.5553, "step": 23369 }, { "epoch": 0.6416803953871499, "grad_norm": 0.3439613878726959, "learning_rate": 1.536111199108404e-05, "loss": 0.5254, "step": 23370 }, { "epoch": 0.6417078528281164, "grad_norm": 0.4100147485733032, "learning_rate": 1.5360747404399156e-05, "loss": 0.5018, "step": 23371 }, { "epoch": 0.6417353102690829, "grad_norm": 0.3830028474330902, "learning_rate": 1.5360382807714772e-05, "loss": 0.507, "step": 23372 }, { "epoch": 0.6417627677100495, "grad_norm": 0.41696926951408386, "learning_rate": 1.536001820103156e-05, "loss": 0.476, "step": 23373 }, { "epoch": 0.6417902251510159, "grad_norm": 0.3740525543689728, "learning_rate": 1.5359653584350205e-05, "loss": 0.5955, "step": 23374 }, { "epoch": 0.6418176825919825, "grad_norm": 0.3852023184299469, "learning_rate": 1.5359288957671386e-05, "loss": 0.4885, "step": 23375 }, { "epoch": 0.6418451400329489, "grad_norm": 0.37958207726478577, "learning_rate": 1.5358924320995782e-05, "loss": 0.5243, "step": 23376 }, { "epoch": 0.6418725974739155, "grad_norm": 0.39876991510391235, "learning_rate": 1.5358559674324075e-05, "loss": 0.5949, "step": 23377 }, { "epoch": 0.6419000549148819, "grad_norm": 0.35154786705970764, "learning_rate": 1.5358195017656938e-05, "loss": 0.5289, "step": 23378 }, { "epoch": 0.6419275123558484, "grad_norm": 0.3734380602836609, "learning_rate": 1.5357830350995064e-05, "loss": 0.5213, "step": 23379 }, { "epoch": 0.641954969796815, "grad_norm": 0.3651007413864136, "learning_rate": 1.5357465674339125e-05, "loss": 0.4817, "step": 23380 }, { "epoch": 0.6419824272377814, "grad_norm": 0.36430445313453674, "learning_rate": 1.5357100987689798e-05, "loss": 0.551, "step": 23381 }, { "epoch": 0.642009884678748, "grad_norm": 0.3833302855491638, "learning_rate": 1.5356736291047775e-05, "loss": 0.6227, "step": 23382 }, { "epoch": 0.6420373421197144, "grad_norm": 0.45691436529159546, "learning_rate": 1.5356371584413724e-05, "loss": 0.4588, "step": 23383 }, { "epoch": 0.642064799560681, "grad_norm": 0.40323683619499207, "learning_rate": 1.535600686778833e-05, "loss": 0.5494, "step": 23384 }, { "epoch": 0.6420922570016474, "grad_norm": 0.41318202018737793, "learning_rate": 1.535564214117228e-05, "loss": 0.4458, "step": 23385 }, { "epoch": 0.642119714442614, "grad_norm": 1.0183138847351074, "learning_rate": 1.535527740456624e-05, "loss": 0.7262, "step": 23386 }, { "epoch": 0.6421471718835805, "grad_norm": 0.3578435182571411, "learning_rate": 1.5354912657970905e-05, "loss": 0.3619, "step": 23387 }, { "epoch": 0.6421746293245469, "grad_norm": 0.427310973405838, "learning_rate": 1.535454790138695e-05, "loss": 0.4965, "step": 23388 }, { "epoch": 0.6422020867655135, "grad_norm": 0.37347230315208435, "learning_rate": 1.5354183134815044e-05, "loss": 0.4701, "step": 23389 }, { "epoch": 0.6422295442064799, "grad_norm": 0.3918861746788025, "learning_rate": 1.5353818358255886e-05, "loss": 0.4918, "step": 23390 }, { "epoch": 0.6422570016474465, "grad_norm": 0.37887585163116455, "learning_rate": 1.5353453571710146e-05, "loss": 0.5082, "step": 23391 }, { "epoch": 0.6422844590884129, "grad_norm": 0.38466185331344604, "learning_rate": 1.5353088775178506e-05, "loss": 0.4603, "step": 23392 }, { "epoch": 0.6423119165293795, "grad_norm": 0.3974919617176056, "learning_rate": 1.535272396866165e-05, "loss": 0.4675, "step": 23393 }, { "epoch": 0.642339373970346, "grad_norm": 0.3649398684501648, "learning_rate": 1.535235915216025e-05, "loss": 0.4737, "step": 23394 }, { "epoch": 0.6423668314113125, "grad_norm": 0.37644436955451965, "learning_rate": 1.5351994325674995e-05, "loss": 0.5591, "step": 23395 }, { "epoch": 0.642394288852279, "grad_norm": 0.41149741411209106, "learning_rate": 1.5351629489206562e-05, "loss": 0.5762, "step": 23396 }, { "epoch": 0.6424217462932454, "grad_norm": 0.3463708162307739, "learning_rate": 1.535126464275563e-05, "loss": 0.5038, "step": 23397 }, { "epoch": 0.642449203734212, "grad_norm": 0.4213593602180481, "learning_rate": 1.5350899786322887e-05, "loss": 0.5895, "step": 23398 }, { "epoch": 0.6424766611751784, "grad_norm": 0.3961278796195984, "learning_rate": 1.5350534919909e-05, "loss": 0.4481, "step": 23399 }, { "epoch": 0.642504118616145, "grad_norm": 0.4040287733078003, "learning_rate": 1.5350170043514666e-05, "loss": 0.4712, "step": 23400 }, { "epoch": 0.6425315760571115, "grad_norm": 0.4180543124675751, "learning_rate": 1.5349805157140553e-05, "loss": 0.5114, "step": 23401 }, { "epoch": 0.642559033498078, "grad_norm": 0.3451293408870697, "learning_rate": 1.534944026078735e-05, "loss": 0.558, "step": 23402 }, { "epoch": 0.6425864909390445, "grad_norm": 0.38470321893692017, "learning_rate": 1.5349075354455726e-05, "loss": 0.5177, "step": 23403 }, { "epoch": 0.642613948380011, "grad_norm": 0.48813965916633606, "learning_rate": 1.5348710438146374e-05, "loss": 0.6072, "step": 23404 }, { "epoch": 0.6426414058209775, "grad_norm": 0.4181462824344635, "learning_rate": 1.534834551185997e-05, "loss": 0.4878, "step": 23405 }, { "epoch": 0.642668863261944, "grad_norm": 0.41156619787216187, "learning_rate": 1.5347980575597192e-05, "loss": 0.5549, "step": 23406 }, { "epoch": 0.6426963207029105, "grad_norm": 0.39910459518432617, "learning_rate": 1.5347615629358724e-05, "loss": 0.421, "step": 23407 }, { "epoch": 0.642723778143877, "grad_norm": 0.39041510224342346, "learning_rate": 1.534725067314525e-05, "loss": 0.5107, "step": 23408 }, { "epoch": 0.6427512355848435, "grad_norm": 0.4237847328186035, "learning_rate": 1.534688570695744e-05, "loss": 0.5484, "step": 23409 }, { "epoch": 0.64277869302581, "grad_norm": 0.44051671028137207, "learning_rate": 1.534652073079599e-05, "loss": 0.5424, "step": 23410 }, { "epoch": 0.6428061504667765, "grad_norm": 0.38985711336135864, "learning_rate": 1.5346155744661568e-05, "loss": 0.4311, "step": 23411 }, { "epoch": 0.642833607907743, "grad_norm": 0.4444248080253601, "learning_rate": 1.5345790748554858e-05, "loss": 0.5223, "step": 23412 }, { "epoch": 0.6428610653487095, "grad_norm": 0.3561208248138428, "learning_rate": 1.5345425742476544e-05, "loss": 0.4896, "step": 23413 }, { "epoch": 0.642888522789676, "grad_norm": 0.4085029661655426, "learning_rate": 1.5345060726427304e-05, "loss": 0.5492, "step": 23414 }, { "epoch": 0.6429159802306426, "grad_norm": 0.41916653513908386, "learning_rate": 1.5344695700407823e-05, "loss": 0.4921, "step": 23415 }, { "epoch": 0.642943437671609, "grad_norm": 0.3685733377933502, "learning_rate": 1.5344330664418773e-05, "loss": 0.5424, "step": 23416 }, { "epoch": 0.6429708951125755, "grad_norm": 0.3824084997177124, "learning_rate": 1.5343965618460844e-05, "loss": 0.5628, "step": 23417 }, { "epoch": 0.642998352553542, "grad_norm": 0.38640689849853516, "learning_rate": 1.5343600562534712e-05, "loss": 0.5449, "step": 23418 }, { "epoch": 0.6430258099945085, "grad_norm": 0.42851170897483826, "learning_rate": 1.5343235496641063e-05, "loss": 0.5096, "step": 23419 }, { "epoch": 0.643053267435475, "grad_norm": 0.3772341310977936, "learning_rate": 1.534287042078057e-05, "loss": 0.4843, "step": 23420 }, { "epoch": 0.6430807248764415, "grad_norm": 0.3637911379337311, "learning_rate": 1.5342505334953922e-05, "loss": 0.4638, "step": 23421 }, { "epoch": 0.6431081823174081, "grad_norm": 0.3767666220664978, "learning_rate": 1.5342140239161795e-05, "loss": 0.5778, "step": 23422 }, { "epoch": 0.6431356397583745, "grad_norm": 0.42338645458221436, "learning_rate": 1.534177513340487e-05, "loss": 0.4927, "step": 23423 }, { "epoch": 0.6431630971993411, "grad_norm": 0.39665886759757996, "learning_rate": 1.5341410017683835e-05, "loss": 0.4541, "step": 23424 }, { "epoch": 0.6431905546403075, "grad_norm": 0.40056750178337097, "learning_rate": 1.534104489199936e-05, "loss": 0.4615, "step": 23425 }, { "epoch": 0.643218012081274, "grad_norm": 0.3307589888572693, "learning_rate": 1.534067975635213e-05, "loss": 0.4521, "step": 23426 }, { "epoch": 0.6432454695222405, "grad_norm": 0.42305514216423035, "learning_rate": 1.534031461074283e-05, "loss": 0.6592, "step": 23427 }, { "epoch": 0.643272926963207, "grad_norm": 0.482235312461853, "learning_rate": 1.533994945517214e-05, "loss": 0.5604, "step": 23428 }, { "epoch": 0.6433003844041736, "grad_norm": 0.3907907009124756, "learning_rate": 1.5339584289640742e-05, "loss": 0.5331, "step": 23429 }, { "epoch": 0.64332784184514, "grad_norm": 0.40320682525634766, "learning_rate": 1.533921911414931e-05, "loss": 0.5749, "step": 23430 }, { "epoch": 0.6433552992861066, "grad_norm": 0.3696143925189972, "learning_rate": 1.5338853928698532e-05, "loss": 0.5264, "step": 23431 }, { "epoch": 0.643382756727073, "grad_norm": 0.3319754898548126, "learning_rate": 1.5338488733289093e-05, "loss": 0.496, "step": 23432 }, { "epoch": 0.6434102141680396, "grad_norm": 0.4220735430717468, "learning_rate": 1.533812352792166e-05, "loss": 0.5118, "step": 23433 }, { "epoch": 0.643437671609006, "grad_norm": 0.3343045115470886, "learning_rate": 1.5337758312596926e-05, "loss": 0.4384, "step": 23434 }, { "epoch": 0.6434651290499726, "grad_norm": 0.3994324505329132, "learning_rate": 1.5337393087315572e-05, "loss": 0.4666, "step": 23435 }, { "epoch": 0.643492586490939, "grad_norm": 0.34487485885620117, "learning_rate": 1.533702785207827e-05, "loss": 0.4992, "step": 23436 }, { "epoch": 0.6435200439319055, "grad_norm": 0.4174627661705017, "learning_rate": 1.5336662606885715e-05, "loss": 0.4817, "step": 23437 }, { "epoch": 0.6435475013728721, "grad_norm": 0.37813815474510193, "learning_rate": 1.533629735173858e-05, "loss": 0.4715, "step": 23438 }, { "epoch": 0.6435749588138385, "grad_norm": 0.38937243819236755, "learning_rate": 1.5335932086637543e-05, "loss": 0.5953, "step": 23439 }, { "epoch": 0.6436024162548051, "grad_norm": 0.37922218441963196, "learning_rate": 1.5335566811583293e-05, "loss": 0.5448, "step": 23440 }, { "epoch": 0.6436298736957715, "grad_norm": 0.41336745023727417, "learning_rate": 1.5335201526576505e-05, "loss": 0.5324, "step": 23441 }, { "epoch": 0.6436573311367381, "grad_norm": 0.3581755459308624, "learning_rate": 1.5334836231617866e-05, "loss": 0.5366, "step": 23442 }, { "epoch": 0.6436847885777045, "grad_norm": 0.5423375368118286, "learning_rate": 1.5334470926708056e-05, "loss": 0.5196, "step": 23443 }, { "epoch": 0.643712246018671, "grad_norm": 0.3933834433555603, "learning_rate": 1.533410561184775e-05, "loss": 0.5565, "step": 23444 }, { "epoch": 0.6437397034596376, "grad_norm": 0.45108407735824585, "learning_rate": 1.533374028703764e-05, "loss": 0.4619, "step": 23445 }, { "epoch": 0.643767160900604, "grad_norm": 0.46122512221336365, "learning_rate": 1.53333749522784e-05, "loss": 0.5551, "step": 23446 }, { "epoch": 0.6437946183415706, "grad_norm": 0.379111647605896, "learning_rate": 1.5333009607570714e-05, "loss": 0.4998, "step": 23447 }, { "epoch": 0.643822075782537, "grad_norm": 0.4193352162837982, "learning_rate": 1.533264425291526e-05, "loss": 0.5244, "step": 23448 }, { "epoch": 0.6438495332235036, "grad_norm": 0.3484514057636261, "learning_rate": 1.5332278888312727e-05, "loss": 0.4978, "step": 23449 }, { "epoch": 0.64387699066447, "grad_norm": 12.829379081726074, "learning_rate": 1.533191351376379e-05, "loss": 0.5269, "step": 23450 }, { "epoch": 0.6439044481054366, "grad_norm": 0.5067495703697205, "learning_rate": 1.5331548129269132e-05, "loss": 0.5427, "step": 23451 }, { "epoch": 0.6439319055464031, "grad_norm": 0.37953925132751465, "learning_rate": 1.5331182734829434e-05, "loss": 0.5638, "step": 23452 }, { "epoch": 0.6439593629873696, "grad_norm": 0.41366273164749146, "learning_rate": 1.5330817330445376e-05, "loss": 0.6001, "step": 23453 }, { "epoch": 0.6439868204283361, "grad_norm": 0.36196261644363403, "learning_rate": 1.533045191611765e-05, "loss": 0.5917, "step": 23454 }, { "epoch": 0.6440142778693025, "grad_norm": 0.3850476145744324, "learning_rate": 1.5330086491846926e-05, "loss": 0.5483, "step": 23455 }, { "epoch": 0.6440417353102691, "grad_norm": 0.5892282128334045, "learning_rate": 1.5329721057633888e-05, "loss": 0.4598, "step": 23456 }, { "epoch": 0.6440691927512355, "grad_norm": 0.37163785099983215, "learning_rate": 1.532935561347922e-05, "loss": 0.5082, "step": 23457 }, { "epoch": 0.6440966501922021, "grad_norm": 0.3671664893627167, "learning_rate": 1.5328990159383606e-05, "loss": 0.5788, "step": 23458 }, { "epoch": 0.6441241076331686, "grad_norm": 0.3575930595397949, "learning_rate": 1.5328624695347724e-05, "loss": 0.5483, "step": 23459 }, { "epoch": 0.6441515650741351, "grad_norm": 0.34670600295066833, "learning_rate": 1.5328259221372255e-05, "loss": 0.4266, "step": 23460 }, { "epoch": 0.6441790225151016, "grad_norm": 0.4417199492454529, "learning_rate": 1.532789373745788e-05, "loss": 0.5111, "step": 23461 }, { "epoch": 0.6442064799560681, "grad_norm": 0.43500369787216187, "learning_rate": 1.532752824360528e-05, "loss": 0.4647, "step": 23462 }, { "epoch": 0.6442339373970346, "grad_norm": 0.4041222035884857, "learning_rate": 1.5327162739815148e-05, "loss": 0.436, "step": 23463 }, { "epoch": 0.644261394838001, "grad_norm": 0.39275363087654114, "learning_rate": 1.5326797226088153e-05, "loss": 0.5491, "step": 23464 }, { "epoch": 0.6442888522789676, "grad_norm": 0.46627873182296753, "learning_rate": 1.5326431702424983e-05, "loss": 0.6491, "step": 23465 }, { "epoch": 0.6443163097199341, "grad_norm": 0.42550012469291687, "learning_rate": 1.5326066168826313e-05, "loss": 0.4602, "step": 23466 }, { "epoch": 0.6443437671609006, "grad_norm": 0.38763904571533203, "learning_rate": 1.5325700625292835e-05, "loss": 0.5258, "step": 23467 }, { "epoch": 0.6443712246018671, "grad_norm": 0.39235323667526245, "learning_rate": 1.5325335071825222e-05, "loss": 0.4593, "step": 23468 }, { "epoch": 0.6443986820428336, "grad_norm": 0.37306147813796997, "learning_rate": 1.5324969508424162e-05, "loss": 0.4959, "step": 23469 }, { "epoch": 0.6444261394838001, "grad_norm": 0.5764407515525818, "learning_rate": 1.532460393509033e-05, "loss": 0.5499, "step": 23470 }, { "epoch": 0.6444535969247666, "grad_norm": 0.38954341411590576, "learning_rate": 1.5324238351824417e-05, "loss": 0.5589, "step": 23471 }, { "epoch": 0.6444810543657331, "grad_norm": 0.4033588171005249, "learning_rate": 1.5323872758627096e-05, "loss": 0.5192, "step": 23472 }, { "epoch": 0.6445085118066997, "grad_norm": 0.3980705440044403, "learning_rate": 1.5323507155499056e-05, "loss": 0.5458, "step": 23473 }, { "epoch": 0.6445359692476661, "grad_norm": 0.3896021544933319, "learning_rate": 1.5323141542440975e-05, "loss": 0.5408, "step": 23474 }, { "epoch": 0.6445634266886326, "grad_norm": 0.37329742312431335, "learning_rate": 1.532277591945354e-05, "loss": 0.5197, "step": 23475 }, { "epoch": 0.6445908841295991, "grad_norm": 0.3618464171886444, "learning_rate": 1.5322410286537424e-05, "loss": 0.5965, "step": 23476 }, { "epoch": 0.6446183415705656, "grad_norm": 0.3850589692592621, "learning_rate": 1.5322044643693317e-05, "loss": 0.4987, "step": 23477 }, { "epoch": 0.6446457990115321, "grad_norm": 0.440790057182312, "learning_rate": 1.5321678990921896e-05, "loss": 0.5097, "step": 23478 }, { "epoch": 0.6446732564524986, "grad_norm": 0.4229966104030609, "learning_rate": 1.5321313328223845e-05, "loss": 0.4775, "step": 23479 }, { "epoch": 0.6447007138934652, "grad_norm": 0.42557135224342346, "learning_rate": 1.5320947655599848e-05, "loss": 0.4439, "step": 23480 }, { "epoch": 0.6447281713344316, "grad_norm": 0.43500199913978577, "learning_rate": 1.5320581973050587e-05, "loss": 0.5613, "step": 23481 }, { "epoch": 0.6447556287753982, "grad_norm": 0.3703666925430298, "learning_rate": 1.532021628057674e-05, "loss": 0.5468, "step": 23482 }, { "epoch": 0.6447830862163646, "grad_norm": 0.35344669222831726, "learning_rate": 1.5319850578178993e-05, "loss": 0.5594, "step": 23483 }, { "epoch": 0.6448105436573311, "grad_norm": 0.4030556082725525, "learning_rate": 1.531948486585802e-05, "loss": 0.4948, "step": 23484 }, { "epoch": 0.6448380010982976, "grad_norm": 0.41496706008911133, "learning_rate": 1.531911914361452e-05, "loss": 0.4592, "step": 23485 }, { "epoch": 0.6448654585392641, "grad_norm": 0.4118815064430237, "learning_rate": 1.5318753411449162e-05, "loss": 0.5553, "step": 23486 }, { "epoch": 0.6448929159802307, "grad_norm": 0.41664260625839233, "learning_rate": 1.5318387669362626e-05, "loss": 0.5509, "step": 23487 }, { "epoch": 0.6449203734211971, "grad_norm": 0.3605995774269104, "learning_rate": 1.5318021917355607e-05, "loss": 0.4357, "step": 23488 }, { "epoch": 0.6449478308621637, "grad_norm": 0.3561449646949768, "learning_rate": 1.5317656155428774e-05, "loss": 0.5357, "step": 23489 }, { "epoch": 0.6449752883031301, "grad_norm": 0.37047451734542847, "learning_rate": 1.531729038358282e-05, "loss": 0.557, "step": 23490 }, { "epoch": 0.6450027457440967, "grad_norm": 0.36255019903182983, "learning_rate": 1.5316924601818418e-05, "loss": 0.4998, "step": 23491 }, { "epoch": 0.6450302031850631, "grad_norm": 0.37985020875930786, "learning_rate": 1.5316558810136257e-05, "loss": 0.5254, "step": 23492 }, { "epoch": 0.6450576606260296, "grad_norm": 0.385601282119751, "learning_rate": 1.5316193008537016e-05, "loss": 0.4194, "step": 23493 }, { "epoch": 0.6450851180669962, "grad_norm": 0.39671579003334045, "learning_rate": 1.531582719702138e-05, "loss": 0.5085, "step": 23494 }, { "epoch": 0.6451125755079626, "grad_norm": 0.46844983100891113, "learning_rate": 1.5315461375590027e-05, "loss": 0.5417, "step": 23495 }, { "epoch": 0.6451400329489292, "grad_norm": 0.42948397994041443, "learning_rate": 1.5315095544243644e-05, "loss": 0.5737, "step": 23496 }, { "epoch": 0.6451674903898956, "grad_norm": 0.3544919490814209, "learning_rate": 1.5314729702982913e-05, "loss": 0.515, "step": 23497 }, { "epoch": 0.6451949478308622, "grad_norm": 0.4292404353618622, "learning_rate": 1.531436385180851e-05, "loss": 0.4282, "step": 23498 }, { "epoch": 0.6452224052718286, "grad_norm": 0.48635348677635193, "learning_rate": 1.5313997990721124e-05, "loss": 0.5488, "step": 23499 }, { "epoch": 0.6452498627127952, "grad_norm": 0.5099563002586365, "learning_rate": 1.531363211972144e-05, "loss": 0.5219, "step": 23500 }, { "epoch": 0.6452773201537617, "grad_norm": 0.4011765718460083, "learning_rate": 1.5313266238810133e-05, "loss": 0.4044, "step": 23501 }, { "epoch": 0.6453047775947282, "grad_norm": 0.3679286241531372, "learning_rate": 1.5312900347987888e-05, "loss": 0.4808, "step": 23502 }, { "epoch": 0.6453322350356947, "grad_norm": 0.45455726981163025, "learning_rate": 1.5312534447255388e-05, "loss": 0.4893, "step": 23503 }, { "epoch": 0.6453596924766611, "grad_norm": 0.37145766615867615, "learning_rate": 1.5312168536613317e-05, "loss": 0.5494, "step": 23504 }, { "epoch": 0.6453871499176277, "grad_norm": 2.3793447017669678, "learning_rate": 1.5311802616062353e-05, "loss": 0.4225, "step": 23505 }, { "epoch": 0.6454146073585941, "grad_norm": 0.36410728096961975, "learning_rate": 1.5311436685603188e-05, "loss": 0.4717, "step": 23506 }, { "epoch": 0.6454420647995607, "grad_norm": 0.3831580877304077, "learning_rate": 1.531107074523649e-05, "loss": 0.5296, "step": 23507 }, { "epoch": 0.6454695222405272, "grad_norm": 0.4357326626777649, "learning_rate": 1.531070479496296e-05, "loss": 0.5425, "step": 23508 }, { "epoch": 0.6454969796814937, "grad_norm": 0.4528629779815674, "learning_rate": 1.5310338834783264e-05, "loss": 0.5998, "step": 23509 }, { "epoch": 0.6455244371224602, "grad_norm": 0.3355613648891449, "learning_rate": 1.530997286469809e-05, "loss": 0.4602, "step": 23510 }, { "epoch": 0.6455518945634267, "grad_norm": 0.4505445957183838, "learning_rate": 1.530960688470813e-05, "loss": 0.4825, "step": 23511 }, { "epoch": 0.6455793520043932, "grad_norm": 0.356999009847641, "learning_rate": 1.5309240894814047e-05, "loss": 0.4416, "step": 23512 }, { "epoch": 0.6456068094453596, "grad_norm": 0.35697898268699646, "learning_rate": 1.5308874895016544e-05, "loss": 0.5472, "step": 23513 }, { "epoch": 0.6456342668863262, "grad_norm": 0.35243189334869385, "learning_rate": 1.530850888531629e-05, "loss": 0.5401, "step": 23514 }, { "epoch": 0.6456617243272927, "grad_norm": 0.40839242935180664, "learning_rate": 1.5308142865713976e-05, "loss": 0.4558, "step": 23515 }, { "epoch": 0.6456891817682592, "grad_norm": 0.9486708045005798, "learning_rate": 1.5307776836210282e-05, "loss": 0.4795, "step": 23516 }, { "epoch": 0.6457166392092257, "grad_norm": 0.37303799390792847, "learning_rate": 1.5307410796805888e-05, "loss": 0.4964, "step": 23517 }, { "epoch": 0.6457440966501922, "grad_norm": 0.4153828024864197, "learning_rate": 1.5307044747501478e-05, "loss": 0.5379, "step": 23518 }, { "epoch": 0.6457715540911587, "grad_norm": 0.38188737630844116, "learning_rate": 1.5306678688297736e-05, "loss": 0.4485, "step": 23519 }, { "epoch": 0.6457990115321252, "grad_norm": 0.43084800243377686, "learning_rate": 1.5306312619195347e-05, "loss": 0.4929, "step": 23520 }, { "epoch": 0.6458264689730917, "grad_norm": 0.355805367231369, "learning_rate": 1.5305946540194992e-05, "loss": 0.4897, "step": 23521 }, { "epoch": 0.6458539264140583, "grad_norm": 0.39379727840423584, "learning_rate": 1.530558045129735e-05, "loss": 0.4787, "step": 23522 }, { "epoch": 0.6458813838550247, "grad_norm": 0.37184977531433105, "learning_rate": 1.5305214352503107e-05, "loss": 0.52, "step": 23523 }, { "epoch": 0.6459088412959912, "grad_norm": 0.409349650144577, "learning_rate": 1.530484824381295e-05, "loss": 0.4183, "step": 23524 }, { "epoch": 0.6459362987369577, "grad_norm": 0.37581634521484375, "learning_rate": 1.5304482125227553e-05, "loss": 0.4365, "step": 23525 }, { "epoch": 0.6459637561779242, "grad_norm": 0.3423929512500763, "learning_rate": 1.5304115996747605e-05, "loss": 0.4359, "step": 23526 }, { "epoch": 0.6459912136188907, "grad_norm": 0.33313488960266113, "learning_rate": 1.530374985837379e-05, "loss": 0.4322, "step": 23527 }, { "epoch": 0.6460186710598572, "grad_norm": 0.45416757464408875, "learning_rate": 1.5303383710106788e-05, "loss": 0.5631, "step": 23528 }, { "epoch": 0.6460461285008238, "grad_norm": 0.37279438972473145, "learning_rate": 1.530301755194728e-05, "loss": 0.5514, "step": 23529 }, { "epoch": 0.6460735859417902, "grad_norm": 0.36055248975753784, "learning_rate": 1.5302651383895953e-05, "loss": 0.4654, "step": 23530 }, { "epoch": 0.6461010433827568, "grad_norm": 0.44470393657684326, "learning_rate": 1.5302285205953493e-05, "loss": 0.4878, "step": 23531 }, { "epoch": 0.6461285008237232, "grad_norm": 0.3753061890602112, "learning_rate": 1.5301919018120574e-05, "loss": 0.451, "step": 23532 }, { "epoch": 0.6461559582646897, "grad_norm": 0.35062187910079956, "learning_rate": 1.5301552820397886e-05, "loss": 0.4896, "step": 23533 }, { "epoch": 0.6461834157056562, "grad_norm": 0.37730610370635986, "learning_rate": 1.530118661278611e-05, "loss": 0.4544, "step": 23534 }, { "epoch": 0.6462108731466227, "grad_norm": 0.32008129358291626, "learning_rate": 1.530082039528593e-05, "loss": 0.383, "step": 23535 }, { "epoch": 0.6462383305875893, "grad_norm": 0.3896711766719818, "learning_rate": 1.5300454167898025e-05, "loss": 0.4562, "step": 23536 }, { "epoch": 0.6462657880285557, "grad_norm": 0.37368258833885193, "learning_rate": 1.5300087930623085e-05, "loss": 0.5374, "step": 23537 }, { "epoch": 0.6462932454695223, "grad_norm": 0.38598549365997314, "learning_rate": 1.5299721683461787e-05, "loss": 0.54, "step": 23538 }, { "epoch": 0.6463207029104887, "grad_norm": 0.3561011254787445, "learning_rate": 1.5299355426414815e-05, "loss": 0.3945, "step": 23539 }, { "epoch": 0.6463481603514553, "grad_norm": 0.40940478444099426, "learning_rate": 1.5298989159482857e-05, "loss": 0.5368, "step": 23540 }, { "epoch": 0.6463756177924217, "grad_norm": 0.39513206481933594, "learning_rate": 1.529862288266659e-05, "loss": 0.5542, "step": 23541 }, { "epoch": 0.6464030752333882, "grad_norm": 0.3888480067253113, "learning_rate": 1.52982565959667e-05, "loss": 0.5063, "step": 23542 }, { "epoch": 0.6464305326743548, "grad_norm": 0.3695431053638458, "learning_rate": 1.5297890299383874e-05, "loss": 0.5022, "step": 23543 }, { "epoch": 0.6464579901153212, "grad_norm": 0.38698798418045044, "learning_rate": 1.529752399291879e-05, "loss": 0.5304, "step": 23544 }, { "epoch": 0.6464854475562878, "grad_norm": 0.4107344448566437, "learning_rate": 1.5297157676572133e-05, "loss": 0.5476, "step": 23545 }, { "epoch": 0.6465129049972542, "grad_norm": 0.37237295508384705, "learning_rate": 1.5296791350344585e-05, "loss": 0.4807, "step": 23546 }, { "epoch": 0.6465403624382208, "grad_norm": 0.3852366805076599, "learning_rate": 1.5296425014236835e-05, "loss": 0.6294, "step": 23547 }, { "epoch": 0.6465678198791872, "grad_norm": 0.3964526653289795, "learning_rate": 1.5296058668249557e-05, "loss": 0.502, "step": 23548 }, { "epoch": 0.6465952773201538, "grad_norm": 0.4374866187572479, "learning_rate": 1.529569231238344e-05, "loss": 0.5977, "step": 23549 }, { "epoch": 0.6466227347611203, "grad_norm": 0.42083367705345154, "learning_rate": 1.529532594663917e-05, "loss": 0.5841, "step": 23550 }, { "epoch": 0.6466501922020867, "grad_norm": 0.3813053071498871, "learning_rate": 1.5294959571017422e-05, "loss": 0.4733, "step": 23551 }, { "epoch": 0.6466776496430533, "grad_norm": 0.39929884672164917, "learning_rate": 1.529459318551889e-05, "loss": 0.4342, "step": 23552 }, { "epoch": 0.6467051070840197, "grad_norm": 0.3981907069683075, "learning_rate": 1.5294226790144247e-05, "loss": 0.5007, "step": 23553 }, { "epoch": 0.6467325645249863, "grad_norm": 0.3797193765640259, "learning_rate": 1.5293860384894184e-05, "loss": 0.5482, "step": 23554 }, { "epoch": 0.6467600219659527, "grad_norm": 0.409719318151474, "learning_rate": 1.5293493969769383e-05, "loss": 0.5305, "step": 23555 }, { "epoch": 0.6467874794069193, "grad_norm": 0.3918571472167969, "learning_rate": 1.5293127544770522e-05, "loss": 0.5065, "step": 23556 }, { "epoch": 0.6468149368478858, "grad_norm": 0.37884876132011414, "learning_rate": 1.529276110989829e-05, "loss": 0.484, "step": 23557 }, { "epoch": 0.6468423942888523, "grad_norm": 0.40796470642089844, "learning_rate": 1.529239466515337e-05, "loss": 0.566, "step": 23558 }, { "epoch": 0.6468698517298188, "grad_norm": 0.3528914153575897, "learning_rate": 1.5292028210536448e-05, "loss": 0.5433, "step": 23559 }, { "epoch": 0.6468973091707853, "grad_norm": 0.3988187611103058, "learning_rate": 1.52916617460482e-05, "loss": 0.5925, "step": 23560 }, { "epoch": 0.6469247666117518, "grad_norm": 0.3913251757621765, "learning_rate": 1.5291295271689318e-05, "loss": 0.4681, "step": 23561 }, { "epoch": 0.6469522240527182, "grad_norm": 0.43245929479599, "learning_rate": 1.5290928787460476e-05, "loss": 0.5497, "step": 23562 }, { "epoch": 0.6469796814936848, "grad_norm": 0.4246596097946167, "learning_rate": 1.529056229336237e-05, "loss": 0.5015, "step": 23563 }, { "epoch": 0.6470071389346513, "grad_norm": 0.49690550565719604, "learning_rate": 1.529019578939567e-05, "loss": 0.4511, "step": 23564 }, { "epoch": 0.6470345963756178, "grad_norm": 0.3775213658809662, "learning_rate": 1.528982927556107e-05, "loss": 0.477, "step": 23565 }, { "epoch": 0.6470620538165843, "grad_norm": 0.36722591519355774, "learning_rate": 1.528946275185925e-05, "loss": 0.5042, "step": 23566 }, { "epoch": 0.6470895112575508, "grad_norm": 0.378296434879303, "learning_rate": 1.5289096218290892e-05, "loss": 0.4759, "step": 23567 }, { "epoch": 0.6471169686985173, "grad_norm": 0.34696832299232483, "learning_rate": 1.5288729674856683e-05, "loss": 0.4398, "step": 23568 }, { "epoch": 0.6471444261394838, "grad_norm": 0.4349323511123657, "learning_rate": 1.5288363121557305e-05, "loss": 0.5371, "step": 23569 }, { "epoch": 0.6471718835804503, "grad_norm": 0.4229699373245239, "learning_rate": 1.5287996558393442e-05, "loss": 0.5069, "step": 23570 }, { "epoch": 0.6471993410214169, "grad_norm": 0.3885268568992615, "learning_rate": 1.528762998536578e-05, "loss": 0.4693, "step": 23571 }, { "epoch": 0.6472267984623833, "grad_norm": 0.38908064365386963, "learning_rate": 1.5287263402474993e-05, "loss": 0.529, "step": 23572 }, { "epoch": 0.6472542559033498, "grad_norm": 0.38693487644195557, "learning_rate": 1.528689680972178e-05, "loss": 0.4925, "step": 23573 }, { "epoch": 0.6472817133443163, "grad_norm": 0.3703179359436035, "learning_rate": 1.5286530207106815e-05, "loss": 0.5105, "step": 23574 }, { "epoch": 0.6473091707852828, "grad_norm": 0.382712185382843, "learning_rate": 1.528616359463078e-05, "loss": 0.558, "step": 23575 }, { "epoch": 0.6473366282262493, "grad_norm": 0.3714483976364136, "learning_rate": 1.528579697229437e-05, "loss": 0.5034, "step": 23576 }, { "epoch": 0.6473640856672158, "grad_norm": 0.405723512172699, "learning_rate": 1.5285430340098256e-05, "loss": 0.557, "step": 23577 }, { "epoch": 0.6473915431081824, "grad_norm": 0.34994447231292725, "learning_rate": 1.5285063698043127e-05, "loss": 0.5487, "step": 23578 }, { "epoch": 0.6474190005491488, "grad_norm": 0.34603482484817505, "learning_rate": 1.5284697046129673e-05, "loss": 0.5536, "step": 23579 }, { "epoch": 0.6474464579901154, "grad_norm": 0.3981989026069641, "learning_rate": 1.5284330384358565e-05, "loss": 0.5228, "step": 23580 }, { "epoch": 0.6474739154310818, "grad_norm": 0.3539542853832245, "learning_rate": 1.52839637127305e-05, "loss": 0.5453, "step": 23581 }, { "epoch": 0.6475013728720483, "grad_norm": 0.35297611355781555, "learning_rate": 1.5283597031246153e-05, "loss": 0.4844, "step": 23582 }, { "epoch": 0.6475288303130148, "grad_norm": 0.42301276326179504, "learning_rate": 1.5283230339906216e-05, "loss": 0.5968, "step": 23583 }, { "epoch": 0.6475562877539813, "grad_norm": 0.3421790897846222, "learning_rate": 1.5282863638711365e-05, "loss": 0.455, "step": 23584 }, { "epoch": 0.6475837451949479, "grad_norm": 0.40178149938583374, "learning_rate": 1.5282496927662285e-05, "loss": 0.413, "step": 23585 }, { "epoch": 0.6476112026359143, "grad_norm": 0.401120126247406, "learning_rate": 1.5282130206759664e-05, "loss": 0.5078, "step": 23586 }, { "epoch": 0.6476386600768809, "grad_norm": 0.3689352869987488, "learning_rate": 1.5281763476004187e-05, "loss": 0.5043, "step": 23587 }, { "epoch": 0.6476661175178473, "grad_norm": 0.3661883771419525, "learning_rate": 1.528139673539653e-05, "loss": 0.4875, "step": 23588 }, { "epoch": 0.6476935749588139, "grad_norm": 0.3733847737312317, "learning_rate": 1.5281029984937386e-05, "loss": 0.5205, "step": 23589 }, { "epoch": 0.6477210323997803, "grad_norm": 0.3995633125305176, "learning_rate": 1.5280663224627435e-05, "loss": 0.57, "step": 23590 }, { "epoch": 0.6477484898407468, "grad_norm": 0.3817859888076782, "learning_rate": 1.528029645446736e-05, "loss": 0.5332, "step": 23591 }, { "epoch": 0.6477759472817134, "grad_norm": 0.4123396575450897, "learning_rate": 1.5279929674457853e-05, "loss": 0.5235, "step": 23592 }, { "epoch": 0.6478034047226798, "grad_norm": 0.3818662464618683, "learning_rate": 1.5279562884599585e-05, "loss": 0.4773, "step": 23593 }, { "epoch": 0.6478308621636464, "grad_norm": 0.36677008867263794, "learning_rate": 1.527919608489325e-05, "loss": 0.4538, "step": 23594 }, { "epoch": 0.6478583196046128, "grad_norm": 0.37672337889671326, "learning_rate": 1.5278829275339528e-05, "loss": 0.538, "step": 23595 }, { "epoch": 0.6478857770455794, "grad_norm": 0.37302064895629883, "learning_rate": 1.5278462455939105e-05, "loss": 0.5209, "step": 23596 }, { "epoch": 0.6479132344865458, "grad_norm": 0.38945242762565613, "learning_rate": 1.5278095626692666e-05, "loss": 0.4835, "step": 23597 }, { "epoch": 0.6479406919275124, "grad_norm": 0.4318399429321289, "learning_rate": 1.5277728787600895e-05, "loss": 0.4823, "step": 23598 }, { "epoch": 0.6479681493684789, "grad_norm": 0.4127531051635742, "learning_rate": 1.5277361938664474e-05, "loss": 0.5598, "step": 23599 }, { "epoch": 0.6479956068094453, "grad_norm": 0.36781755089759827, "learning_rate": 1.5276995079884086e-05, "loss": 0.5412, "step": 23600 }, { "epoch": 0.6480230642504119, "grad_norm": 0.4121516942977905, "learning_rate": 1.5276628211260424e-05, "loss": 0.487, "step": 23601 }, { "epoch": 0.6480505216913783, "grad_norm": 0.38877174258232117, "learning_rate": 1.527626133279416e-05, "loss": 0.465, "step": 23602 }, { "epoch": 0.6480779791323449, "grad_norm": 0.3705126643180847, "learning_rate": 1.5275894444485988e-05, "loss": 0.5002, "step": 23603 }, { "epoch": 0.6481054365733113, "grad_norm": 0.37879008054733276, "learning_rate": 1.5275527546336587e-05, "loss": 0.5341, "step": 23604 }, { "epoch": 0.6481328940142779, "grad_norm": 0.43851184844970703, "learning_rate": 1.5275160638346648e-05, "loss": 0.5366, "step": 23605 }, { "epoch": 0.6481603514552444, "grad_norm": 0.49791640043258667, "learning_rate": 1.5274793720516846e-05, "loss": 0.4442, "step": 23606 }, { "epoch": 0.6481878088962109, "grad_norm": 0.4260006844997406, "learning_rate": 1.527442679284787e-05, "loss": 0.5403, "step": 23607 }, { "epoch": 0.6482152663371774, "grad_norm": 0.4213089346885681, "learning_rate": 1.5274059855340407e-05, "loss": 0.4723, "step": 23608 }, { "epoch": 0.6482427237781438, "grad_norm": 0.33732354640960693, "learning_rate": 1.5273692907995136e-05, "loss": 0.4697, "step": 23609 }, { "epoch": 0.6482701812191104, "grad_norm": 0.48739102482795715, "learning_rate": 1.527332595081275e-05, "loss": 0.4883, "step": 23610 }, { "epoch": 0.6482976386600768, "grad_norm": 0.38530057668685913, "learning_rate": 1.5272958983793924e-05, "loss": 0.6107, "step": 23611 }, { "epoch": 0.6483250961010434, "grad_norm": 0.3893119990825653, "learning_rate": 1.527259200693935e-05, "loss": 0.5648, "step": 23612 }, { "epoch": 0.6483525535420099, "grad_norm": 0.41314688324928284, "learning_rate": 1.5272225020249703e-05, "loss": 0.548, "step": 23613 }, { "epoch": 0.6483800109829764, "grad_norm": 0.36960580945014954, "learning_rate": 1.5271858023725678e-05, "loss": 0.5594, "step": 23614 }, { "epoch": 0.6484074684239429, "grad_norm": 0.41664743423461914, "learning_rate": 1.5271491017367956e-05, "loss": 0.5012, "step": 23615 }, { "epoch": 0.6484349258649094, "grad_norm": 0.404263973236084, "learning_rate": 1.527112400117722e-05, "loss": 0.56, "step": 23616 }, { "epoch": 0.6484623833058759, "grad_norm": 0.40553224086761475, "learning_rate": 1.5270756975154154e-05, "loss": 0.5104, "step": 23617 }, { "epoch": 0.6484898407468423, "grad_norm": 0.4340897500514984, "learning_rate": 1.5270389939299445e-05, "loss": 0.4944, "step": 23618 }, { "epoch": 0.6485172981878089, "grad_norm": 0.46582648158073425, "learning_rate": 1.5270022893613777e-05, "loss": 0.5584, "step": 23619 }, { "epoch": 0.6485447556287754, "grad_norm": 0.36733195185661316, "learning_rate": 1.5269655838097835e-05, "loss": 0.5322, "step": 23620 }, { "epoch": 0.6485722130697419, "grad_norm": 0.3981301188468933, "learning_rate": 1.5269288772752298e-05, "loss": 0.4716, "step": 23621 }, { "epoch": 0.6485996705107084, "grad_norm": 0.38421449065208435, "learning_rate": 1.526892169757786e-05, "loss": 0.4623, "step": 23622 }, { "epoch": 0.6486271279516749, "grad_norm": 0.36929967999458313, "learning_rate": 1.5268554612575202e-05, "loss": 0.533, "step": 23623 }, { "epoch": 0.6486545853926414, "grad_norm": 0.35250210762023926, "learning_rate": 1.5268187517745005e-05, "loss": 0.4835, "step": 23624 }, { "epoch": 0.6486820428336079, "grad_norm": 0.3253669738769531, "learning_rate": 1.5267820413087958e-05, "loss": 0.402, "step": 23625 }, { "epoch": 0.6487095002745744, "grad_norm": 0.3759809732437134, "learning_rate": 1.5267453298604746e-05, "loss": 0.6091, "step": 23626 }, { "epoch": 0.648736957715541, "grad_norm": 0.3368217647075653, "learning_rate": 1.5267086174296053e-05, "loss": 0.4803, "step": 23627 }, { "epoch": 0.6487644151565074, "grad_norm": 0.4017060399055481, "learning_rate": 1.526671904016256e-05, "loss": 0.5113, "step": 23628 }, { "epoch": 0.648791872597474, "grad_norm": 0.377604603767395, "learning_rate": 1.5266351896204957e-05, "loss": 0.5298, "step": 23629 }, { "epoch": 0.6488193300384404, "grad_norm": 0.5177984833717346, "learning_rate": 1.526598474242393e-05, "loss": 0.5153, "step": 23630 }, { "epoch": 0.6488467874794069, "grad_norm": 0.4555246829986572, "learning_rate": 1.5265617578820156e-05, "loss": 0.4201, "step": 23631 }, { "epoch": 0.6488742449203734, "grad_norm": 0.3822295069694519, "learning_rate": 1.5265250405394327e-05, "loss": 0.5237, "step": 23632 }, { "epoch": 0.6489017023613399, "grad_norm": 0.40762263536453247, "learning_rate": 1.5264883222147123e-05, "loss": 0.5813, "step": 23633 }, { "epoch": 0.6489291598023065, "grad_norm": 0.4483424425125122, "learning_rate": 1.5264516029079234e-05, "loss": 0.4888, "step": 23634 }, { "epoch": 0.6489566172432729, "grad_norm": 0.40169626474380493, "learning_rate": 1.5264148826191342e-05, "loss": 0.4925, "step": 23635 }, { "epoch": 0.6489840746842395, "grad_norm": 0.3659188747406006, "learning_rate": 1.5263781613484133e-05, "loss": 0.5582, "step": 23636 }, { "epoch": 0.6490115321252059, "grad_norm": 0.389707088470459, "learning_rate": 1.5263414390958287e-05, "loss": 0.5273, "step": 23637 }, { "epoch": 0.6490389895661725, "grad_norm": 0.38261178135871887, "learning_rate": 1.52630471586145e-05, "loss": 0.4237, "step": 23638 }, { "epoch": 0.6490664470071389, "grad_norm": 0.39575034379959106, "learning_rate": 1.5262679916453445e-05, "loss": 0.5897, "step": 23639 }, { "epoch": 0.6490939044481054, "grad_norm": 0.3864888548851013, "learning_rate": 1.5262312664475816e-05, "loss": 0.5204, "step": 23640 }, { "epoch": 0.649121361889072, "grad_norm": 0.4013170301914215, "learning_rate": 1.5261945402682292e-05, "loss": 0.5499, "step": 23641 }, { "epoch": 0.6491488193300384, "grad_norm": 0.321617990732193, "learning_rate": 1.526157813107356e-05, "loss": 0.5425, "step": 23642 }, { "epoch": 0.649176276771005, "grad_norm": 0.3522210419178009, "learning_rate": 1.5261210849650306e-05, "loss": 0.509, "step": 23643 }, { "epoch": 0.6492037342119714, "grad_norm": 0.3949056565761566, "learning_rate": 1.5260843558413215e-05, "loss": 0.5133, "step": 23644 }, { "epoch": 0.649231191652938, "grad_norm": 0.3825320899486542, "learning_rate": 1.5260476257362974e-05, "loss": 0.579, "step": 23645 }, { "epoch": 0.6492586490939044, "grad_norm": 0.3622956871986389, "learning_rate": 1.5260108946500262e-05, "loss": 0.5081, "step": 23646 }, { "epoch": 0.649286106534871, "grad_norm": 0.3925495743751526, "learning_rate": 1.5259741625825768e-05, "loss": 0.4774, "step": 23647 }, { "epoch": 0.6493135639758375, "grad_norm": 0.39794591069221497, "learning_rate": 1.525937429534018e-05, "loss": 0.4957, "step": 23648 }, { "epoch": 0.6493410214168039, "grad_norm": 1.2981231212615967, "learning_rate": 1.5259006955044177e-05, "loss": 0.4261, "step": 23649 }, { "epoch": 0.6493684788577705, "grad_norm": 0.38356369733810425, "learning_rate": 1.525863960493845e-05, "loss": 0.4724, "step": 23650 }, { "epoch": 0.6493959362987369, "grad_norm": 0.3979029059410095, "learning_rate": 1.5258272245023681e-05, "loss": 0.4162, "step": 23651 }, { "epoch": 0.6494233937397035, "grad_norm": 0.4131755232810974, "learning_rate": 1.5257904875300557e-05, "loss": 0.4941, "step": 23652 }, { "epoch": 0.6494508511806699, "grad_norm": 0.4643997251987457, "learning_rate": 1.525753749576976e-05, "loss": 0.4947, "step": 23653 }, { "epoch": 0.6494783086216365, "grad_norm": 0.37832266092300415, "learning_rate": 1.5257170106431982e-05, "loss": 0.4331, "step": 23654 }, { "epoch": 0.649505766062603, "grad_norm": 0.35687947273254395, "learning_rate": 1.5256802707287898e-05, "loss": 0.5234, "step": 23655 }, { "epoch": 0.6495332235035695, "grad_norm": 0.4457745850086212, "learning_rate": 1.5256435298338203e-05, "loss": 0.4855, "step": 23656 }, { "epoch": 0.649560680944536, "grad_norm": 0.38329577445983887, "learning_rate": 1.5256067879583579e-05, "loss": 0.5083, "step": 23657 }, { "epoch": 0.6495881383855024, "grad_norm": 0.34335607290267944, "learning_rate": 1.525570045102471e-05, "loss": 0.5203, "step": 23658 }, { "epoch": 0.649615595826469, "grad_norm": 0.3806074559688568, "learning_rate": 1.5255333012662282e-05, "loss": 0.4702, "step": 23659 }, { "epoch": 0.6496430532674354, "grad_norm": 0.3775946795940399, "learning_rate": 1.525496556449698e-05, "loss": 0.5069, "step": 23660 }, { "epoch": 0.649670510708402, "grad_norm": 0.40823933482170105, "learning_rate": 1.525459810652949e-05, "loss": 0.5247, "step": 23661 }, { "epoch": 0.6496979681493685, "grad_norm": 0.3791608512401581, "learning_rate": 1.5254230638760496e-05, "loss": 0.4472, "step": 23662 }, { "epoch": 0.649725425590335, "grad_norm": 0.332027405500412, "learning_rate": 1.5253863161190687e-05, "loss": 0.446, "step": 23663 }, { "epoch": 0.6497528830313015, "grad_norm": 0.41062307357788086, "learning_rate": 1.5253495673820746e-05, "loss": 0.4916, "step": 23664 }, { "epoch": 0.649780340472268, "grad_norm": 0.3737700879573822, "learning_rate": 1.525312817665136e-05, "loss": 0.5047, "step": 23665 }, { "epoch": 0.6498077979132345, "grad_norm": 0.36018651723861694, "learning_rate": 1.5252760669683211e-05, "loss": 0.4574, "step": 23666 }, { "epoch": 0.649835255354201, "grad_norm": 0.37488311529159546, "learning_rate": 1.5252393152916992e-05, "loss": 0.5458, "step": 23667 }, { "epoch": 0.6498627127951675, "grad_norm": 0.3431636691093445, "learning_rate": 1.5252025626353377e-05, "loss": 0.464, "step": 23668 }, { "epoch": 0.649890170236134, "grad_norm": 0.43569302558898926, "learning_rate": 1.5251658089993061e-05, "loss": 0.571, "step": 23669 }, { "epoch": 0.6499176276771005, "grad_norm": 0.35711148381233215, "learning_rate": 1.5251290543836725e-05, "loss": 0.4508, "step": 23670 }, { "epoch": 0.649945085118067, "grad_norm": 0.37078607082366943, "learning_rate": 1.5250922987885057e-05, "loss": 0.5079, "step": 23671 }, { "epoch": 0.6499725425590335, "grad_norm": 0.4647006392478943, "learning_rate": 1.5250555422138744e-05, "loss": 0.5282, "step": 23672 }, { "epoch": 0.65, "grad_norm": 0.35909104347229004, "learning_rate": 1.5250187846598465e-05, "loss": 0.5138, "step": 23673 }, { "epoch": 0.6500274574409665, "grad_norm": 0.3401111960411072, "learning_rate": 1.5249820261264914e-05, "loss": 0.4618, "step": 23674 }, { "epoch": 0.650054914881933, "grad_norm": 0.3741942346096039, "learning_rate": 1.5249452666138769e-05, "loss": 0.4879, "step": 23675 }, { "epoch": 0.6500823723228996, "grad_norm": 0.39525625109672546, "learning_rate": 1.5249085061220723e-05, "loss": 0.5236, "step": 23676 }, { "epoch": 0.650109829763866, "grad_norm": 0.4767204821109772, "learning_rate": 1.5248717446511456e-05, "loss": 0.6236, "step": 23677 }, { "epoch": 0.6501372872048325, "grad_norm": 0.40846049785614014, "learning_rate": 1.5248349822011657e-05, "loss": 0.5112, "step": 23678 }, { "epoch": 0.650164744645799, "grad_norm": 0.47721487283706665, "learning_rate": 1.5247982187722008e-05, "loss": 0.6002, "step": 23679 }, { "epoch": 0.6501922020867655, "grad_norm": 0.3992398977279663, "learning_rate": 1.5247614543643199e-05, "loss": 0.5045, "step": 23680 }, { "epoch": 0.650219659527732, "grad_norm": 0.38624507188796997, "learning_rate": 1.5247246889775915e-05, "loss": 0.5211, "step": 23681 }, { "epoch": 0.6502471169686985, "grad_norm": 0.3930586576461792, "learning_rate": 1.5246879226120838e-05, "loss": 0.503, "step": 23682 }, { "epoch": 0.6502745744096651, "grad_norm": 0.400028795003891, "learning_rate": 1.5246511552678658e-05, "loss": 0.5429, "step": 23683 }, { "epoch": 0.6503020318506315, "grad_norm": 0.4955674707889557, "learning_rate": 1.5246143869450061e-05, "loss": 0.4973, "step": 23684 }, { "epoch": 0.6503294892915981, "grad_norm": 0.37542468309402466, "learning_rate": 1.5245776176435731e-05, "loss": 0.4894, "step": 23685 }, { "epoch": 0.6503569467325645, "grad_norm": 0.4325043261051178, "learning_rate": 1.5245408473636352e-05, "loss": 0.4855, "step": 23686 }, { "epoch": 0.650384404173531, "grad_norm": 0.3471148908138275, "learning_rate": 1.5245040761052615e-05, "loss": 0.5137, "step": 23687 }, { "epoch": 0.6504118616144975, "grad_norm": 0.4817054867744446, "learning_rate": 1.5244673038685199e-05, "loss": 0.5234, "step": 23688 }, { "epoch": 0.650439319055464, "grad_norm": 0.38028454780578613, "learning_rate": 1.5244305306534797e-05, "loss": 0.4996, "step": 23689 }, { "epoch": 0.6504667764964306, "grad_norm": 0.3933710753917694, "learning_rate": 1.524393756460209e-05, "loss": 0.5649, "step": 23690 }, { "epoch": 0.650494233937397, "grad_norm": 0.39372214674949646, "learning_rate": 1.5243569812887766e-05, "loss": 0.5574, "step": 23691 }, { "epoch": 0.6505216913783636, "grad_norm": 0.4059258997440338, "learning_rate": 1.5243202051392513e-05, "loss": 0.4947, "step": 23692 }, { "epoch": 0.65054914881933, "grad_norm": 0.4512738883495331, "learning_rate": 1.5242834280117013e-05, "loss": 0.6183, "step": 23693 }, { "epoch": 0.6505766062602966, "grad_norm": 0.34508800506591797, "learning_rate": 1.5242466499061954e-05, "loss": 0.4836, "step": 23694 }, { "epoch": 0.650604063701263, "grad_norm": 0.4077144265174866, "learning_rate": 1.5242098708228019e-05, "loss": 0.5812, "step": 23695 }, { "epoch": 0.6506315211422296, "grad_norm": 0.4154678285121918, "learning_rate": 1.5241730907615902e-05, "loss": 0.5549, "step": 23696 }, { "epoch": 0.6506589785831961, "grad_norm": 0.3598564565181732, "learning_rate": 1.5241363097226282e-05, "loss": 0.4983, "step": 23697 }, { "epoch": 0.6506864360241625, "grad_norm": 0.4105212688446045, "learning_rate": 1.5240995277059845e-05, "loss": 0.511, "step": 23698 }, { "epoch": 0.6507138934651291, "grad_norm": 0.3683725595474243, "learning_rate": 1.5240627447117281e-05, "loss": 0.5487, "step": 23699 }, { "epoch": 0.6507413509060955, "grad_norm": 0.45838648080825806, "learning_rate": 1.5240259607399273e-05, "loss": 0.578, "step": 23700 }, { "epoch": 0.6507688083470621, "grad_norm": 0.39265739917755127, "learning_rate": 1.5239891757906509e-05, "loss": 0.5491, "step": 23701 }, { "epoch": 0.6507962657880285, "grad_norm": 0.347990483045578, "learning_rate": 1.5239523898639676e-05, "loss": 0.4897, "step": 23702 }, { "epoch": 0.6508237232289951, "grad_norm": 0.34755003452301025, "learning_rate": 1.5239156029599457e-05, "loss": 0.4917, "step": 23703 }, { "epoch": 0.6508511806699615, "grad_norm": 0.4398002624511719, "learning_rate": 1.5238788150786538e-05, "loss": 0.485, "step": 23704 }, { "epoch": 0.650878638110928, "grad_norm": 0.45074462890625, "learning_rate": 1.523842026220161e-05, "loss": 0.5316, "step": 23705 }, { "epoch": 0.6509060955518946, "grad_norm": 0.35325881838798523, "learning_rate": 1.5238052363845357e-05, "loss": 0.4807, "step": 23706 }, { "epoch": 0.650933552992861, "grad_norm": 0.3939119577407837, "learning_rate": 1.5237684455718462e-05, "loss": 0.4821, "step": 23707 }, { "epoch": 0.6509610104338276, "grad_norm": 0.42961618304252625, "learning_rate": 1.5237316537821616e-05, "loss": 0.5807, "step": 23708 }, { "epoch": 0.650988467874794, "grad_norm": 0.37778645753860474, "learning_rate": 1.5236948610155502e-05, "loss": 0.487, "step": 23709 }, { "epoch": 0.6510159253157606, "grad_norm": 0.382432758808136, "learning_rate": 1.5236580672720807e-05, "loss": 0.4213, "step": 23710 }, { "epoch": 0.651043382756727, "grad_norm": 0.35771504044532776, "learning_rate": 1.5236212725518218e-05, "loss": 0.4606, "step": 23711 }, { "epoch": 0.6510708401976936, "grad_norm": 0.3904881179332733, "learning_rate": 1.523584476854842e-05, "loss": 0.5279, "step": 23712 }, { "epoch": 0.6510982976386601, "grad_norm": 0.409147173166275, "learning_rate": 1.52354768018121e-05, "loss": 0.5617, "step": 23713 }, { "epoch": 0.6511257550796266, "grad_norm": 0.40701955556869507, "learning_rate": 1.523510882530995e-05, "loss": 0.5721, "step": 23714 }, { "epoch": 0.6511532125205931, "grad_norm": 0.35579943656921387, "learning_rate": 1.5234740839042646e-05, "loss": 0.4426, "step": 23715 }, { "epoch": 0.6511806699615595, "grad_norm": 0.4169216752052307, "learning_rate": 1.523437284301088e-05, "loss": 0.583, "step": 23716 }, { "epoch": 0.6512081274025261, "grad_norm": 0.3572644293308258, "learning_rate": 1.5234004837215338e-05, "loss": 0.5579, "step": 23717 }, { "epoch": 0.6512355848434925, "grad_norm": 0.38142678141593933, "learning_rate": 1.523363682165671e-05, "loss": 0.5219, "step": 23718 }, { "epoch": 0.6512630422844591, "grad_norm": 0.3415701985359192, "learning_rate": 1.5233268796335677e-05, "loss": 0.4586, "step": 23719 }, { "epoch": 0.6512904997254256, "grad_norm": 0.39159443974494934, "learning_rate": 1.5232900761252927e-05, "loss": 0.5128, "step": 23720 }, { "epoch": 0.6513179571663921, "grad_norm": 0.3865741193294525, "learning_rate": 1.5232532716409148e-05, "loss": 0.5205, "step": 23721 }, { "epoch": 0.6513454146073586, "grad_norm": 0.38265615701675415, "learning_rate": 1.523216466180502e-05, "loss": 0.5212, "step": 23722 }, { "epoch": 0.6513728720483251, "grad_norm": 0.421025812625885, "learning_rate": 1.5231796597441241e-05, "loss": 0.5062, "step": 23723 }, { "epoch": 0.6514003294892916, "grad_norm": 0.6716886758804321, "learning_rate": 1.5231428523318488e-05, "loss": 0.5067, "step": 23724 }, { "epoch": 0.651427786930258, "grad_norm": 0.3802092373371124, "learning_rate": 1.5231060439437455e-05, "loss": 0.5665, "step": 23725 }, { "epoch": 0.6514552443712246, "grad_norm": 0.5426437854766846, "learning_rate": 1.5230692345798823e-05, "loss": 0.5243, "step": 23726 }, { "epoch": 0.6514827018121911, "grad_norm": 0.370145320892334, "learning_rate": 1.523032424240328e-05, "loss": 0.4355, "step": 23727 }, { "epoch": 0.6515101592531576, "grad_norm": 0.37723442912101746, "learning_rate": 1.5229956129251513e-05, "loss": 0.4967, "step": 23728 }, { "epoch": 0.6515376166941241, "grad_norm": 0.35321810841560364, "learning_rate": 1.5229588006344209e-05, "loss": 0.4458, "step": 23729 }, { "epoch": 0.6515650741350906, "grad_norm": 0.383277952671051, "learning_rate": 1.5229219873682052e-05, "loss": 0.5726, "step": 23730 }, { "epoch": 0.6515925315760571, "grad_norm": 0.35766151547431946, "learning_rate": 1.5228851731265734e-05, "loss": 0.4736, "step": 23731 }, { "epoch": 0.6516199890170236, "grad_norm": 0.6738865971565247, "learning_rate": 1.5228483579095938e-05, "loss": 0.5536, "step": 23732 }, { "epoch": 0.6516474464579901, "grad_norm": 0.3710985779762268, "learning_rate": 1.522811541717335e-05, "loss": 0.4963, "step": 23733 }, { "epoch": 0.6516749038989567, "grad_norm": 0.3928694725036621, "learning_rate": 1.5227747245498661e-05, "loss": 0.4516, "step": 23734 }, { "epoch": 0.6517023613399231, "grad_norm": 0.3582439422607422, "learning_rate": 1.5227379064072552e-05, "loss": 0.4695, "step": 23735 }, { "epoch": 0.6517298187808896, "grad_norm": 0.4069248139858246, "learning_rate": 1.5227010872895713e-05, "loss": 0.4552, "step": 23736 }, { "epoch": 0.6517572762218561, "grad_norm": 0.3432716131210327, "learning_rate": 1.5226642671968832e-05, "loss": 0.4517, "step": 23737 }, { "epoch": 0.6517847336628226, "grad_norm": 0.4144769310951233, "learning_rate": 1.5226274461292594e-05, "loss": 0.5266, "step": 23738 }, { "epoch": 0.6518121911037891, "grad_norm": 0.4657509922981262, "learning_rate": 1.5225906240867685e-05, "loss": 0.5412, "step": 23739 }, { "epoch": 0.6518396485447556, "grad_norm": 0.4205869436264038, "learning_rate": 1.5225538010694791e-05, "loss": 0.436, "step": 23740 }, { "epoch": 0.6518671059857222, "grad_norm": 0.3671778440475464, "learning_rate": 1.5225169770774605e-05, "loss": 0.5342, "step": 23741 }, { "epoch": 0.6518945634266886, "grad_norm": 0.4004552960395813, "learning_rate": 1.5224801521107808e-05, "loss": 0.5125, "step": 23742 }, { "epoch": 0.6519220208676552, "grad_norm": 0.3999151289463043, "learning_rate": 1.5224433261695088e-05, "loss": 0.5571, "step": 23743 }, { "epoch": 0.6519494783086216, "grad_norm": 0.3808857798576355, "learning_rate": 1.5224064992537133e-05, "loss": 0.5226, "step": 23744 }, { "epoch": 0.6519769357495881, "grad_norm": 0.3525325357913971, "learning_rate": 1.522369671363463e-05, "loss": 0.4718, "step": 23745 }, { "epoch": 0.6520043931905546, "grad_norm": 0.4057543873786926, "learning_rate": 1.5223328424988263e-05, "loss": 0.4446, "step": 23746 }, { "epoch": 0.6520318506315211, "grad_norm": 0.41805851459503174, "learning_rate": 1.5222960126598727e-05, "loss": 0.5788, "step": 23747 }, { "epoch": 0.6520593080724877, "grad_norm": 0.377819687128067, "learning_rate": 1.52225918184667e-05, "loss": 0.4348, "step": 23748 }, { "epoch": 0.6520867655134541, "grad_norm": 0.37553781270980835, "learning_rate": 1.522222350059287e-05, "loss": 0.5401, "step": 23749 }, { "epoch": 0.6521142229544207, "grad_norm": 0.4081602692604065, "learning_rate": 1.5221855172977927e-05, "loss": 0.4952, "step": 23750 }, { "epoch": 0.6521416803953871, "grad_norm": 0.3945885896682739, "learning_rate": 1.5221486835622558e-05, "loss": 0.5132, "step": 23751 }, { "epoch": 0.6521691378363537, "grad_norm": 0.40265387296676636, "learning_rate": 1.5221118488527453e-05, "loss": 0.4712, "step": 23752 }, { "epoch": 0.6521965952773201, "grad_norm": 0.3667725920677185, "learning_rate": 1.5220750131693294e-05, "loss": 0.4592, "step": 23753 }, { "epoch": 0.6522240527182867, "grad_norm": 0.3840203285217285, "learning_rate": 1.5220381765120769e-05, "loss": 0.5172, "step": 23754 }, { "epoch": 0.6522515101592532, "grad_norm": 0.4408799111843109, "learning_rate": 1.5220013388810565e-05, "loss": 0.4808, "step": 23755 }, { "epoch": 0.6522789676002196, "grad_norm": 0.5411415100097656, "learning_rate": 1.5219645002763371e-05, "loss": 0.5785, "step": 23756 }, { "epoch": 0.6523064250411862, "grad_norm": 0.41816431283950806, "learning_rate": 1.5219276606979874e-05, "loss": 0.5406, "step": 23757 }, { "epoch": 0.6523338824821526, "grad_norm": 0.4204012453556061, "learning_rate": 1.5218908201460755e-05, "loss": 0.5289, "step": 23758 }, { "epoch": 0.6523613399231192, "grad_norm": 0.40524566173553467, "learning_rate": 1.5218539786206713e-05, "loss": 0.5269, "step": 23759 }, { "epoch": 0.6523887973640856, "grad_norm": 0.3587714433670044, "learning_rate": 1.5218171361218429e-05, "loss": 0.429, "step": 23760 }, { "epoch": 0.6524162548050522, "grad_norm": 0.4066140949726105, "learning_rate": 1.5217802926496585e-05, "loss": 0.5531, "step": 23761 }, { "epoch": 0.6524437122460187, "grad_norm": 0.5565515756607056, "learning_rate": 1.5217434482041875e-05, "loss": 0.5525, "step": 23762 }, { "epoch": 0.6524711696869852, "grad_norm": 0.4059676229953766, "learning_rate": 1.5217066027854986e-05, "loss": 0.576, "step": 23763 }, { "epoch": 0.6524986271279517, "grad_norm": 0.3765068054199219, "learning_rate": 1.5216697563936604e-05, "loss": 0.5686, "step": 23764 }, { "epoch": 0.6525260845689181, "grad_norm": 0.36149701476097107, "learning_rate": 1.5216329090287417e-05, "loss": 0.501, "step": 23765 }, { "epoch": 0.6525535420098847, "grad_norm": 0.3929203748703003, "learning_rate": 1.5215960606908104e-05, "loss": 0.4872, "step": 23766 }, { "epoch": 0.6525809994508511, "grad_norm": 0.36024075746536255, "learning_rate": 1.5215592113799366e-05, "loss": 0.4183, "step": 23767 }, { "epoch": 0.6526084568918177, "grad_norm": 0.3582187294960022, "learning_rate": 1.5215223610961882e-05, "loss": 0.4814, "step": 23768 }, { "epoch": 0.6526359143327842, "grad_norm": 0.35664743185043335, "learning_rate": 1.5214855098396344e-05, "loss": 0.483, "step": 23769 }, { "epoch": 0.6526633717737507, "grad_norm": 0.3740565776824951, "learning_rate": 1.5214486576103438e-05, "loss": 0.447, "step": 23770 }, { "epoch": 0.6526908292147172, "grad_norm": 0.3794468641281128, "learning_rate": 1.5214118044083845e-05, "loss": 0.4795, "step": 23771 }, { "epoch": 0.6527182866556837, "grad_norm": 0.41327399015426636, "learning_rate": 1.5213749502338262e-05, "loss": 0.4577, "step": 23772 }, { "epoch": 0.6527457440966502, "grad_norm": 0.3863506019115448, "learning_rate": 1.5213380950867372e-05, "loss": 0.482, "step": 23773 }, { "epoch": 0.6527732015376166, "grad_norm": 0.40236327052116394, "learning_rate": 1.5213012389671859e-05, "loss": 0.4166, "step": 23774 }, { "epoch": 0.6528006589785832, "grad_norm": 0.3786713778972626, "learning_rate": 1.5212643818752418e-05, "loss": 0.5158, "step": 23775 }, { "epoch": 0.6528281164195497, "grad_norm": 0.4604823887348175, "learning_rate": 1.521227523810973e-05, "loss": 0.5837, "step": 23776 }, { "epoch": 0.6528555738605162, "grad_norm": 0.4109199643135071, "learning_rate": 1.5211906647744486e-05, "loss": 0.5722, "step": 23777 }, { "epoch": 0.6528830313014827, "grad_norm": 0.4758520722389221, "learning_rate": 1.5211538047657373e-05, "loss": 0.5151, "step": 23778 }, { "epoch": 0.6529104887424492, "grad_norm": 0.3880813717842102, "learning_rate": 1.5211169437849079e-05, "loss": 0.4948, "step": 23779 }, { "epoch": 0.6529379461834157, "grad_norm": 0.4055856764316559, "learning_rate": 1.5210800818320289e-05, "loss": 0.5317, "step": 23780 }, { "epoch": 0.6529654036243822, "grad_norm": 0.4025108516216278, "learning_rate": 1.5210432189071691e-05, "loss": 0.4616, "step": 23781 }, { "epoch": 0.6529928610653487, "grad_norm": 0.39077186584472656, "learning_rate": 1.5210063550103978e-05, "loss": 0.4972, "step": 23782 }, { "epoch": 0.6530203185063153, "grad_norm": 0.4076533913612366, "learning_rate": 1.5209694901417832e-05, "loss": 0.5243, "step": 23783 }, { "epoch": 0.6530477759472817, "grad_norm": 0.42377105355262756, "learning_rate": 1.520932624301394e-05, "loss": 0.5308, "step": 23784 }, { "epoch": 0.6530752333882482, "grad_norm": 0.40554720163345337, "learning_rate": 1.5208957574892995e-05, "loss": 0.4734, "step": 23785 }, { "epoch": 0.6531026908292147, "grad_norm": 0.39534664154052734, "learning_rate": 1.520858889705568e-05, "loss": 0.539, "step": 23786 }, { "epoch": 0.6531301482701812, "grad_norm": 0.3672639727592468, "learning_rate": 1.5208220209502684e-05, "loss": 0.4598, "step": 23787 }, { "epoch": 0.6531576057111477, "grad_norm": 0.363486111164093, "learning_rate": 1.5207851512234698e-05, "loss": 0.4702, "step": 23788 }, { "epoch": 0.6531850631521142, "grad_norm": 0.37160325050354004, "learning_rate": 1.5207482805252402e-05, "loss": 0.505, "step": 23789 }, { "epoch": 0.6532125205930808, "grad_norm": 0.36664673686027527, "learning_rate": 1.5207114088556494e-05, "loss": 0.4925, "step": 23790 }, { "epoch": 0.6532399780340472, "grad_norm": 0.3830573260784149, "learning_rate": 1.5206745362147652e-05, "loss": 0.4389, "step": 23791 }, { "epoch": 0.6532674354750138, "grad_norm": 0.5204707384109497, "learning_rate": 1.520637662602657e-05, "loss": 0.576, "step": 23792 }, { "epoch": 0.6532948929159802, "grad_norm": 0.4023855924606323, "learning_rate": 1.5206007880193934e-05, "loss": 0.5501, "step": 23793 }, { "epoch": 0.6533223503569467, "grad_norm": 0.3745805323123932, "learning_rate": 1.5205639124650428e-05, "loss": 0.4616, "step": 23794 }, { "epoch": 0.6533498077979132, "grad_norm": 0.36603912711143494, "learning_rate": 1.5205270359396748e-05, "loss": 0.5504, "step": 23795 }, { "epoch": 0.6533772652388797, "grad_norm": 0.37189003825187683, "learning_rate": 1.5204901584433577e-05, "loss": 0.5068, "step": 23796 }, { "epoch": 0.6534047226798463, "grad_norm": 0.3633081018924713, "learning_rate": 1.5204532799761603e-05, "loss": 0.4207, "step": 23797 }, { "epoch": 0.6534321801208127, "grad_norm": 0.4171067774295807, "learning_rate": 1.5204164005381513e-05, "loss": 0.4352, "step": 23798 }, { "epoch": 0.6534596375617793, "grad_norm": 0.4381736218929291, "learning_rate": 1.5203795201293997e-05, "loss": 0.6069, "step": 23799 }, { "epoch": 0.6534870950027457, "grad_norm": 0.3735615909099579, "learning_rate": 1.5203426387499742e-05, "loss": 0.4798, "step": 23800 }, { "epoch": 0.6535145524437123, "grad_norm": 0.37482067942619324, "learning_rate": 1.5203057563999437e-05, "loss": 0.4397, "step": 23801 }, { "epoch": 0.6535420098846787, "grad_norm": 0.33044013381004333, "learning_rate": 1.5202688730793767e-05, "loss": 0.4354, "step": 23802 }, { "epoch": 0.6535694673256452, "grad_norm": 0.41229477524757385, "learning_rate": 1.5202319887883424e-05, "loss": 0.4889, "step": 23803 }, { "epoch": 0.6535969247666118, "grad_norm": 0.349237322807312, "learning_rate": 1.5201951035269094e-05, "loss": 0.5141, "step": 23804 }, { "epoch": 0.6536243822075782, "grad_norm": 0.42026081681251526, "learning_rate": 1.5201582172951464e-05, "loss": 0.5115, "step": 23805 }, { "epoch": 0.6536518396485448, "grad_norm": 0.474020779132843, "learning_rate": 1.5201213300931223e-05, "loss": 0.5016, "step": 23806 }, { "epoch": 0.6536792970895112, "grad_norm": 0.3715853691101074, "learning_rate": 1.5200844419209058e-05, "loss": 0.4883, "step": 23807 }, { "epoch": 0.6537067545304778, "grad_norm": 0.4046780467033386, "learning_rate": 1.5200475527785661e-05, "loss": 0.5049, "step": 23808 }, { "epoch": 0.6537342119714442, "grad_norm": 0.383783757686615, "learning_rate": 1.5200106626661715e-05, "loss": 0.5251, "step": 23809 }, { "epoch": 0.6537616694124108, "grad_norm": 0.37583038210868835, "learning_rate": 1.519973771583791e-05, "loss": 0.5465, "step": 23810 }, { "epoch": 0.6537891268533773, "grad_norm": 0.3696254789829254, "learning_rate": 1.519936879531494e-05, "loss": 0.4922, "step": 23811 }, { "epoch": 0.6538165842943438, "grad_norm": 0.34464162588119507, "learning_rate": 1.5198999865093482e-05, "loss": 0.4619, "step": 23812 }, { "epoch": 0.6538440417353103, "grad_norm": 0.3787131905555725, "learning_rate": 1.5198630925174231e-05, "loss": 0.5723, "step": 23813 }, { "epoch": 0.6538714991762767, "grad_norm": 0.4072667956352234, "learning_rate": 1.5198261975557876e-05, "loss": 0.5128, "step": 23814 }, { "epoch": 0.6538989566172433, "grad_norm": 0.40222057700157166, "learning_rate": 1.5197893016245102e-05, "loss": 0.4705, "step": 23815 }, { "epoch": 0.6539264140582097, "grad_norm": 0.34019890427589417, "learning_rate": 1.5197524047236599e-05, "loss": 0.389, "step": 23816 }, { "epoch": 0.6539538714991763, "grad_norm": 0.4082937240600586, "learning_rate": 1.5197155068533054e-05, "loss": 0.5523, "step": 23817 }, { "epoch": 0.6539813289401428, "grad_norm": 0.4714715778827667, "learning_rate": 1.5196786080135155e-05, "loss": 0.5584, "step": 23818 }, { "epoch": 0.6540087863811093, "grad_norm": 0.4458354115486145, "learning_rate": 1.5196417082043593e-05, "loss": 0.4666, "step": 23819 }, { "epoch": 0.6540362438220758, "grad_norm": 0.33524900674819946, "learning_rate": 1.5196048074259056e-05, "loss": 0.4747, "step": 23820 }, { "epoch": 0.6540637012630423, "grad_norm": 0.39744752645492554, "learning_rate": 1.519567905678223e-05, "loss": 0.5207, "step": 23821 }, { "epoch": 0.6540911587040088, "grad_norm": 0.5068857669830322, "learning_rate": 1.5195310029613804e-05, "loss": 0.4709, "step": 23822 }, { "epoch": 0.6541186161449752, "grad_norm": 0.33176591992378235, "learning_rate": 1.5194940992754466e-05, "loss": 0.5086, "step": 23823 }, { "epoch": 0.6541460735859418, "grad_norm": 0.382457971572876, "learning_rate": 1.5194571946204906e-05, "loss": 0.4801, "step": 23824 }, { "epoch": 0.6541735310269083, "grad_norm": 0.35402071475982666, "learning_rate": 1.5194202889965808e-05, "loss": 0.4487, "step": 23825 }, { "epoch": 0.6542009884678748, "grad_norm": 0.4963783025741577, "learning_rate": 1.5193833824037868e-05, "loss": 0.488, "step": 23826 }, { "epoch": 0.6542284459088413, "grad_norm": 0.3790636360645294, "learning_rate": 1.5193464748421769e-05, "loss": 0.4382, "step": 23827 }, { "epoch": 0.6542559033498078, "grad_norm": 0.5410585403442383, "learning_rate": 1.51930956631182e-05, "loss": 0.4887, "step": 23828 }, { "epoch": 0.6542833607907743, "grad_norm": 0.38508063554763794, "learning_rate": 1.5192726568127851e-05, "loss": 0.3924, "step": 23829 }, { "epoch": 0.6543108182317408, "grad_norm": 0.39466235041618347, "learning_rate": 1.519235746345141e-05, "loss": 0.4942, "step": 23830 }, { "epoch": 0.6543382756727073, "grad_norm": 0.3778545558452606, "learning_rate": 1.5191988349089566e-05, "loss": 0.4066, "step": 23831 }, { "epoch": 0.6543657331136739, "grad_norm": 0.37348175048828125, "learning_rate": 1.5191619225043004e-05, "loss": 0.465, "step": 23832 }, { "epoch": 0.6543931905546403, "grad_norm": 0.40360498428344727, "learning_rate": 1.5191250091312415e-05, "loss": 0.5081, "step": 23833 }, { "epoch": 0.6544206479956068, "grad_norm": 0.3740587830543518, "learning_rate": 1.519088094789849e-05, "loss": 0.4967, "step": 23834 }, { "epoch": 0.6544481054365733, "grad_norm": 0.3612286448478699, "learning_rate": 1.5190511794801914e-05, "loss": 0.4658, "step": 23835 }, { "epoch": 0.6544755628775398, "grad_norm": 0.3687891364097595, "learning_rate": 1.5190142632023377e-05, "loss": 0.4652, "step": 23836 }, { "epoch": 0.6545030203185063, "grad_norm": 0.38653191924095154, "learning_rate": 1.5189773459563568e-05, "loss": 0.4581, "step": 23837 }, { "epoch": 0.6545304777594728, "grad_norm": 0.35681405663490295, "learning_rate": 1.5189404277423175e-05, "loss": 0.4695, "step": 23838 }, { "epoch": 0.6545579352004394, "grad_norm": 0.33935126662254333, "learning_rate": 1.5189035085602885e-05, "loss": 0.4454, "step": 23839 }, { "epoch": 0.6545853926414058, "grad_norm": 0.3510790765285492, "learning_rate": 1.5188665884103393e-05, "loss": 0.4684, "step": 23840 }, { "epoch": 0.6546128500823724, "grad_norm": 0.37300312519073486, "learning_rate": 1.5188296672925378e-05, "loss": 0.5473, "step": 23841 }, { "epoch": 0.6546403075233388, "grad_norm": 0.38593602180480957, "learning_rate": 1.5187927452069535e-05, "loss": 0.5712, "step": 23842 }, { "epoch": 0.6546677649643053, "grad_norm": 0.3275633454322815, "learning_rate": 1.5187558221536553e-05, "loss": 0.4773, "step": 23843 }, { "epoch": 0.6546952224052718, "grad_norm": 0.37738746404647827, "learning_rate": 1.5187188981327119e-05, "loss": 0.5524, "step": 23844 }, { "epoch": 0.6547226798462383, "grad_norm": 0.39229831099510193, "learning_rate": 1.5186819731441924e-05, "loss": 0.4537, "step": 23845 }, { "epoch": 0.6547501372872049, "grad_norm": 0.34418031573295593, "learning_rate": 1.518645047188165e-05, "loss": 0.4924, "step": 23846 }, { "epoch": 0.6547775947281713, "grad_norm": 0.3606501519680023, "learning_rate": 1.5186081202646995e-05, "loss": 0.4855, "step": 23847 }, { "epoch": 0.6548050521691379, "grad_norm": 0.44193190336227417, "learning_rate": 1.5185711923738638e-05, "loss": 0.5643, "step": 23848 }, { "epoch": 0.6548325096101043, "grad_norm": 0.3966399133205414, "learning_rate": 1.5185342635157278e-05, "loss": 0.5632, "step": 23849 }, { "epoch": 0.6548599670510709, "grad_norm": 0.3869325518608093, "learning_rate": 1.51849733369036e-05, "loss": 0.5197, "step": 23850 }, { "epoch": 0.6548874244920373, "grad_norm": 0.39721745252609253, "learning_rate": 1.5184604028978288e-05, "loss": 0.4994, "step": 23851 }, { "epoch": 0.6549148819330038, "grad_norm": 0.4197114408016205, "learning_rate": 1.5184234711382036e-05, "loss": 0.5329, "step": 23852 }, { "epoch": 0.6549423393739704, "grad_norm": 0.3475610017776489, "learning_rate": 1.5183865384115533e-05, "loss": 0.5301, "step": 23853 }, { "epoch": 0.6549697968149368, "grad_norm": 0.38975295424461365, "learning_rate": 1.5183496047179463e-05, "loss": 0.4781, "step": 23854 }, { "epoch": 0.6549972542559034, "grad_norm": 0.3484359681606293, "learning_rate": 1.5183126700574522e-05, "loss": 0.4631, "step": 23855 }, { "epoch": 0.6550247116968698, "grad_norm": 0.35377705097198486, "learning_rate": 1.518275734430139e-05, "loss": 0.518, "step": 23856 }, { "epoch": 0.6550521691378364, "grad_norm": 0.3655840754508972, "learning_rate": 1.5182387978360769e-05, "loss": 0.5972, "step": 23857 }, { "epoch": 0.6550796265788028, "grad_norm": 0.4111377000808716, "learning_rate": 1.5182018602753337e-05, "loss": 0.503, "step": 23858 }, { "epoch": 0.6551070840197694, "grad_norm": 0.35863634943962097, "learning_rate": 1.5181649217479786e-05, "loss": 0.531, "step": 23859 }, { "epoch": 0.6551345414607359, "grad_norm": 0.35143330693244934, "learning_rate": 1.5181279822540806e-05, "loss": 0.52, "step": 23860 }, { "epoch": 0.6551619989017023, "grad_norm": 0.3911744952201843, "learning_rate": 1.5180910417937084e-05, "loss": 0.5612, "step": 23861 }, { "epoch": 0.6551894563426689, "grad_norm": 0.4091092050075531, "learning_rate": 1.5180541003669313e-05, "loss": 0.5319, "step": 23862 }, { "epoch": 0.6552169137836353, "grad_norm": 0.35621142387390137, "learning_rate": 1.5180171579738178e-05, "loss": 0.4696, "step": 23863 }, { "epoch": 0.6552443712246019, "grad_norm": 0.3588516116142273, "learning_rate": 1.5179802146144372e-05, "loss": 0.5012, "step": 23864 }, { "epoch": 0.6552718286655683, "grad_norm": 0.3860105276107788, "learning_rate": 1.5179432702888578e-05, "loss": 0.4704, "step": 23865 }, { "epoch": 0.6552992861065349, "grad_norm": 0.43671151995658875, "learning_rate": 1.517906324997149e-05, "loss": 0.5322, "step": 23866 }, { "epoch": 0.6553267435475014, "grad_norm": 0.4514642655849457, "learning_rate": 1.5178693787393796e-05, "loss": 0.6095, "step": 23867 }, { "epoch": 0.6553542009884679, "grad_norm": 0.4700597822666168, "learning_rate": 1.5178324315156188e-05, "loss": 0.5468, "step": 23868 }, { "epoch": 0.6553816584294344, "grad_norm": 0.4331531822681427, "learning_rate": 1.517795483325935e-05, "loss": 0.4699, "step": 23869 }, { "epoch": 0.6554091158704008, "grad_norm": 0.3561910390853882, "learning_rate": 1.5177585341703976e-05, "loss": 0.4965, "step": 23870 }, { "epoch": 0.6554365733113674, "grad_norm": 0.34720396995544434, "learning_rate": 1.5177215840490751e-05, "loss": 0.4987, "step": 23871 }, { "epoch": 0.6554640307523338, "grad_norm": 0.38149508833885193, "learning_rate": 1.5176846329620365e-05, "loss": 0.4135, "step": 23872 }, { "epoch": 0.6554914881933004, "grad_norm": 0.3961644768714905, "learning_rate": 1.517647680909351e-05, "loss": 0.5101, "step": 23873 }, { "epoch": 0.6555189456342669, "grad_norm": 0.3593004047870636, "learning_rate": 1.5176107278910873e-05, "loss": 0.4887, "step": 23874 }, { "epoch": 0.6555464030752334, "grad_norm": 0.40347328782081604, "learning_rate": 1.5175737739073144e-05, "loss": 0.4819, "step": 23875 }, { "epoch": 0.6555738605161999, "grad_norm": 0.3572215139865875, "learning_rate": 1.5175368189581013e-05, "loss": 0.5335, "step": 23876 }, { "epoch": 0.6556013179571664, "grad_norm": 0.5818918347358704, "learning_rate": 1.5174998630435166e-05, "loss": 0.4523, "step": 23877 }, { "epoch": 0.6556287753981329, "grad_norm": 0.3931645154953003, "learning_rate": 1.5174629061636297e-05, "loss": 0.5136, "step": 23878 }, { "epoch": 0.6556562328390994, "grad_norm": 0.40563544631004333, "learning_rate": 1.5174259483185092e-05, "loss": 0.4645, "step": 23879 }, { "epoch": 0.6556836902800659, "grad_norm": 0.3705555498600006, "learning_rate": 1.5173889895082243e-05, "loss": 0.5074, "step": 23880 }, { "epoch": 0.6557111477210325, "grad_norm": 0.3994797468185425, "learning_rate": 1.5173520297328437e-05, "loss": 0.4818, "step": 23881 }, { "epoch": 0.6557386051619989, "grad_norm": 0.35491520166397095, "learning_rate": 1.5173150689924365e-05, "loss": 0.4476, "step": 23882 }, { "epoch": 0.6557660626029654, "grad_norm": 0.43078482151031494, "learning_rate": 1.5172781072870717e-05, "loss": 0.4963, "step": 23883 }, { "epoch": 0.6557935200439319, "grad_norm": 0.4117569029331207, "learning_rate": 1.517241144616818e-05, "loss": 0.5301, "step": 23884 }, { "epoch": 0.6558209774848984, "grad_norm": 0.41313356161117554, "learning_rate": 1.5172041809817444e-05, "loss": 0.5432, "step": 23885 }, { "epoch": 0.6558484349258649, "grad_norm": 0.3572746813297272, "learning_rate": 1.51716721638192e-05, "loss": 0.4555, "step": 23886 }, { "epoch": 0.6558758923668314, "grad_norm": 0.3679569959640503, "learning_rate": 1.5171302508174139e-05, "loss": 0.5601, "step": 23887 }, { "epoch": 0.655903349807798, "grad_norm": 0.40712982416152954, "learning_rate": 1.5170932842882943e-05, "loss": 0.4921, "step": 23888 }, { "epoch": 0.6559308072487644, "grad_norm": 0.4098339378833771, "learning_rate": 1.5170563167946312e-05, "loss": 0.5065, "step": 23889 }, { "epoch": 0.655958264689731, "grad_norm": 0.5623688101768494, "learning_rate": 1.517019348336493e-05, "loss": 0.4739, "step": 23890 }, { "epoch": 0.6559857221306974, "grad_norm": 0.33378925919532776, "learning_rate": 1.5169823789139483e-05, "loss": 0.4159, "step": 23891 }, { "epoch": 0.6560131795716639, "grad_norm": 0.5865381360054016, "learning_rate": 1.516945408527067e-05, "loss": 0.4695, "step": 23892 }, { "epoch": 0.6560406370126304, "grad_norm": 0.3597528636455536, "learning_rate": 1.516908437175917e-05, "loss": 0.4878, "step": 23893 }, { "epoch": 0.6560680944535969, "grad_norm": 0.40665188431739807, "learning_rate": 1.5168714648605682e-05, "loss": 0.5298, "step": 23894 }, { "epoch": 0.6560955518945635, "grad_norm": 0.36472827196121216, "learning_rate": 1.5168344915810891e-05, "loss": 0.4748, "step": 23895 }, { "epoch": 0.6561230093355299, "grad_norm": 0.4921271502971649, "learning_rate": 1.5167975173375484e-05, "loss": 0.5927, "step": 23896 }, { "epoch": 0.6561504667764965, "grad_norm": 0.45371463894844055, "learning_rate": 1.5167605421300158e-05, "loss": 0.5704, "step": 23897 }, { "epoch": 0.6561779242174629, "grad_norm": 0.36017146706581116, "learning_rate": 1.5167235659585595e-05, "loss": 0.4274, "step": 23898 }, { "epoch": 0.6562053816584295, "grad_norm": 0.359554260969162, "learning_rate": 1.5166865888232492e-05, "loss": 0.4522, "step": 23899 }, { "epoch": 0.6562328390993959, "grad_norm": 0.35941338539123535, "learning_rate": 1.5166496107241532e-05, "loss": 0.4787, "step": 23900 }, { "epoch": 0.6562602965403624, "grad_norm": 0.368913859128952, "learning_rate": 1.5166126316613409e-05, "loss": 0.4793, "step": 23901 }, { "epoch": 0.656287753981329, "grad_norm": 0.37052103877067566, "learning_rate": 1.5165756516348812e-05, "loss": 0.5205, "step": 23902 }, { "epoch": 0.6563152114222954, "grad_norm": 0.42437684535980225, "learning_rate": 1.5165386706448428e-05, "loss": 0.5374, "step": 23903 }, { "epoch": 0.656342668863262, "grad_norm": 0.35600847005844116, "learning_rate": 1.5165016886912951e-05, "loss": 0.5493, "step": 23904 }, { "epoch": 0.6563701263042284, "grad_norm": 0.44146472215652466, "learning_rate": 1.5164647057743069e-05, "loss": 0.5154, "step": 23905 }, { "epoch": 0.656397583745195, "grad_norm": 0.3685690760612488, "learning_rate": 1.5164277218939472e-05, "loss": 0.4844, "step": 23906 }, { "epoch": 0.6564250411861614, "grad_norm": 0.4134138524532318, "learning_rate": 1.5163907370502851e-05, "loss": 0.5426, "step": 23907 }, { "epoch": 0.656452498627128, "grad_norm": 0.41853782534599304, "learning_rate": 1.5163537512433892e-05, "loss": 0.5598, "step": 23908 }, { "epoch": 0.6564799560680945, "grad_norm": 0.47246310114860535, "learning_rate": 1.5163167644733289e-05, "loss": 0.4777, "step": 23909 }, { "epoch": 0.6565074135090609, "grad_norm": 0.40454962849617004, "learning_rate": 1.5162797767401728e-05, "loss": 0.5228, "step": 23910 }, { "epoch": 0.6565348709500275, "grad_norm": 0.34973350167274475, "learning_rate": 1.5162427880439908e-05, "loss": 0.5539, "step": 23911 }, { "epoch": 0.6565623283909939, "grad_norm": 0.40779784321784973, "learning_rate": 1.5162057983848507e-05, "loss": 0.4787, "step": 23912 }, { "epoch": 0.6565897858319605, "grad_norm": 0.3787303864955902, "learning_rate": 1.516168807762822e-05, "loss": 0.4521, "step": 23913 }, { "epoch": 0.6566172432729269, "grad_norm": 0.46128949522972107, "learning_rate": 1.516131816177974e-05, "loss": 0.6272, "step": 23914 }, { "epoch": 0.6566447007138935, "grad_norm": 0.38078975677490234, "learning_rate": 1.5160948236303755e-05, "loss": 0.4702, "step": 23915 }, { "epoch": 0.65667215815486, "grad_norm": 0.3621194064617157, "learning_rate": 1.516057830120095e-05, "loss": 0.4584, "step": 23916 }, { "epoch": 0.6566996155958265, "grad_norm": 0.42416226863861084, "learning_rate": 1.5160208356472021e-05, "loss": 0.5008, "step": 23917 }, { "epoch": 0.656727073036793, "grad_norm": 0.3621540367603302, "learning_rate": 1.5159838402117654e-05, "loss": 0.534, "step": 23918 }, { "epoch": 0.6567545304777594, "grad_norm": 0.3991404175758362, "learning_rate": 1.5159468438138548e-05, "loss": 0.5303, "step": 23919 }, { "epoch": 0.656781987918726, "grad_norm": 0.38658607006073, "learning_rate": 1.5159098464535382e-05, "loss": 0.4608, "step": 23920 }, { "epoch": 0.6568094453596924, "grad_norm": 1.1730042695999146, "learning_rate": 1.5158728481308853e-05, "loss": 0.507, "step": 23921 }, { "epoch": 0.656836902800659, "grad_norm": 0.5095987915992737, "learning_rate": 1.5158358488459647e-05, "loss": 0.5022, "step": 23922 }, { "epoch": 0.6568643602416255, "grad_norm": 0.40227818489074707, "learning_rate": 1.5157988485988457e-05, "loss": 0.5288, "step": 23923 }, { "epoch": 0.656891817682592, "grad_norm": 0.4037502706050873, "learning_rate": 1.5157618473895972e-05, "loss": 0.5322, "step": 23924 }, { "epoch": 0.6569192751235585, "grad_norm": 0.41233935952186584, "learning_rate": 1.5157248452182885e-05, "loss": 0.5454, "step": 23925 }, { "epoch": 0.656946732564525, "grad_norm": 0.35549062490463257, "learning_rate": 1.5156878420849879e-05, "loss": 0.475, "step": 23926 }, { "epoch": 0.6569741900054915, "grad_norm": 0.3565361499786377, "learning_rate": 1.5156508379897651e-05, "loss": 0.4303, "step": 23927 }, { "epoch": 0.657001647446458, "grad_norm": 0.3923676609992981, "learning_rate": 1.515613832932689e-05, "loss": 0.5526, "step": 23928 }, { "epoch": 0.6570291048874245, "grad_norm": 0.42218273878097534, "learning_rate": 1.5155768269138284e-05, "loss": 0.5848, "step": 23929 }, { "epoch": 0.657056562328391, "grad_norm": 0.39501747488975525, "learning_rate": 1.515539819933253e-05, "loss": 0.5981, "step": 23930 }, { "epoch": 0.6570840197693575, "grad_norm": 0.4172563850879669, "learning_rate": 1.5155028119910305e-05, "loss": 0.6399, "step": 23931 }, { "epoch": 0.657111477210324, "grad_norm": 0.44342079758644104, "learning_rate": 1.515465803087231e-05, "loss": 0.5189, "step": 23932 }, { "epoch": 0.6571389346512905, "grad_norm": 0.3691185414791107, "learning_rate": 1.5154287932219233e-05, "loss": 0.4341, "step": 23933 }, { "epoch": 0.657166392092257, "grad_norm": 0.3722602128982544, "learning_rate": 1.5153917823951764e-05, "loss": 0.5038, "step": 23934 }, { "epoch": 0.6571938495332235, "grad_norm": 0.40006932616233826, "learning_rate": 1.5153547706070595e-05, "loss": 0.5565, "step": 23935 }, { "epoch": 0.65722130697419, "grad_norm": 0.37805014848709106, "learning_rate": 1.5153177578576412e-05, "loss": 0.5111, "step": 23936 }, { "epoch": 0.6572487644151566, "grad_norm": 0.49466150999069214, "learning_rate": 1.515280744146991e-05, "loss": 0.4771, "step": 23937 }, { "epoch": 0.657276221856123, "grad_norm": 0.3885880708694458, "learning_rate": 1.5152437294751779e-05, "loss": 0.4808, "step": 23938 }, { "epoch": 0.6573036792970895, "grad_norm": 0.3847009539604187, "learning_rate": 1.5152067138422703e-05, "loss": 0.5279, "step": 23939 }, { "epoch": 0.657331136738056, "grad_norm": 0.3731232285499573, "learning_rate": 1.515169697248338e-05, "loss": 0.4419, "step": 23940 }, { "epoch": 0.6573585941790225, "grad_norm": 0.42065170407295227, "learning_rate": 1.5151326796934499e-05, "loss": 0.4949, "step": 23941 }, { "epoch": 0.657386051619989, "grad_norm": 0.39258480072021484, "learning_rate": 1.5150956611776748e-05, "loss": 0.477, "step": 23942 }, { "epoch": 0.6574135090609555, "grad_norm": 0.45420950651168823, "learning_rate": 1.515058641701082e-05, "loss": 0.592, "step": 23943 }, { "epoch": 0.6574409665019221, "grad_norm": 0.3520858883857727, "learning_rate": 1.5150216212637403e-05, "loss": 0.4525, "step": 23944 }, { "epoch": 0.6574684239428885, "grad_norm": 0.3777257800102234, "learning_rate": 1.5149845998657191e-05, "loss": 0.5192, "step": 23945 }, { "epoch": 0.6574958813838551, "grad_norm": 0.3686080276966095, "learning_rate": 1.5149475775070872e-05, "loss": 0.4893, "step": 23946 }, { "epoch": 0.6575233388248215, "grad_norm": 0.39480507373809814, "learning_rate": 1.5149105541879134e-05, "loss": 0.5124, "step": 23947 }, { "epoch": 0.657550796265788, "grad_norm": 0.7043114304542542, "learning_rate": 1.5148735299082677e-05, "loss": 0.4159, "step": 23948 }, { "epoch": 0.6575782537067545, "grad_norm": 0.38168227672576904, "learning_rate": 1.514836504668218e-05, "loss": 0.4615, "step": 23949 }, { "epoch": 0.657605711147721, "grad_norm": 0.42309117317199707, "learning_rate": 1.5147994784678342e-05, "loss": 0.5307, "step": 23950 }, { "epoch": 0.6576331685886876, "grad_norm": 0.3904206454753876, "learning_rate": 1.5147624513071849e-05, "loss": 0.5361, "step": 23951 }, { "epoch": 0.657660626029654, "grad_norm": 0.4133411943912506, "learning_rate": 1.5147254231863393e-05, "loss": 0.5704, "step": 23952 }, { "epoch": 0.6576880834706206, "grad_norm": 0.3648108243942261, "learning_rate": 1.5146883941053668e-05, "loss": 0.4851, "step": 23953 }, { "epoch": 0.657715540911587, "grad_norm": 0.3536510169506073, "learning_rate": 1.5146513640643358e-05, "loss": 0.5073, "step": 23954 }, { "epoch": 0.6577429983525536, "grad_norm": 0.45780104398727417, "learning_rate": 1.5146143330633161e-05, "loss": 0.5176, "step": 23955 }, { "epoch": 0.65777045579352, "grad_norm": 0.38521525263786316, "learning_rate": 1.514577301102376e-05, "loss": 0.4969, "step": 23956 }, { "epoch": 0.6577979132344866, "grad_norm": 0.4498515725135803, "learning_rate": 1.5145402681815853e-05, "loss": 0.5313, "step": 23957 }, { "epoch": 0.6578253706754531, "grad_norm": 0.4214504361152649, "learning_rate": 1.5145032343010128e-05, "loss": 0.4698, "step": 23958 }, { "epoch": 0.6578528281164195, "grad_norm": 0.36548590660095215, "learning_rate": 1.5144661994607272e-05, "loss": 0.4879, "step": 23959 }, { "epoch": 0.6578802855573861, "grad_norm": 0.34963274002075195, "learning_rate": 1.5144291636607985e-05, "loss": 0.5168, "step": 23960 }, { "epoch": 0.6579077429983525, "grad_norm": 0.35828661918640137, "learning_rate": 1.514392126901295e-05, "loss": 0.5391, "step": 23961 }, { "epoch": 0.6579352004393191, "grad_norm": 0.3735388517379761, "learning_rate": 1.5143550891822859e-05, "loss": 0.4526, "step": 23962 }, { "epoch": 0.6579626578802855, "grad_norm": 0.3720947504043579, "learning_rate": 1.5143180505038402e-05, "loss": 0.5468, "step": 23963 }, { "epoch": 0.6579901153212521, "grad_norm": 0.3825312852859497, "learning_rate": 1.5142810108660277e-05, "loss": 0.4602, "step": 23964 }, { "epoch": 0.6580175727622186, "grad_norm": 0.34863603115081787, "learning_rate": 1.5142439702689164e-05, "loss": 0.3957, "step": 23965 }, { "epoch": 0.658045030203185, "grad_norm": 0.443844735622406, "learning_rate": 1.5142069287125765e-05, "loss": 0.5104, "step": 23966 }, { "epoch": 0.6580724876441516, "grad_norm": 0.4012451469898224, "learning_rate": 1.5141698861970762e-05, "loss": 0.5935, "step": 23967 }, { "epoch": 0.658099945085118, "grad_norm": 0.37376147508621216, "learning_rate": 1.514132842722485e-05, "loss": 0.4681, "step": 23968 }, { "epoch": 0.6581274025260846, "grad_norm": 0.36959168314933777, "learning_rate": 1.5140957982888723e-05, "loss": 0.4913, "step": 23969 }, { "epoch": 0.658154859967051, "grad_norm": 0.38753822445869446, "learning_rate": 1.5140587528963065e-05, "loss": 0.5109, "step": 23970 }, { "epoch": 0.6581823174080176, "grad_norm": 0.3941248059272766, "learning_rate": 1.514021706544857e-05, "loss": 0.5205, "step": 23971 }, { "epoch": 0.658209774848984, "grad_norm": 0.43180322647094727, "learning_rate": 1.513984659234593e-05, "loss": 0.448, "step": 23972 }, { "epoch": 0.6582372322899506, "grad_norm": 0.4012894332408905, "learning_rate": 1.5139476109655839e-05, "loss": 0.4678, "step": 23973 }, { "epoch": 0.6582646897309171, "grad_norm": 0.3820578157901764, "learning_rate": 1.5139105617378982e-05, "loss": 0.4982, "step": 23974 }, { "epoch": 0.6582921471718836, "grad_norm": 0.39036595821380615, "learning_rate": 1.5138735115516055e-05, "loss": 0.4335, "step": 23975 }, { "epoch": 0.6583196046128501, "grad_norm": 0.42036300897598267, "learning_rate": 1.5138364604067745e-05, "loss": 0.4912, "step": 23976 }, { "epoch": 0.6583470620538165, "grad_norm": 0.45336592197418213, "learning_rate": 1.5137994083034743e-05, "loss": 0.4222, "step": 23977 }, { "epoch": 0.6583745194947831, "grad_norm": 0.38211461901664734, "learning_rate": 1.5137623552417746e-05, "loss": 0.4518, "step": 23978 }, { "epoch": 0.6584019769357495, "grad_norm": 0.35518449544906616, "learning_rate": 1.513725301221744e-05, "loss": 0.5271, "step": 23979 }, { "epoch": 0.6584294343767161, "grad_norm": 0.3911410868167877, "learning_rate": 1.5136882462434517e-05, "loss": 0.6067, "step": 23980 }, { "epoch": 0.6584568918176826, "grad_norm": 0.41568097472190857, "learning_rate": 1.513651190306967e-05, "loss": 0.5425, "step": 23981 }, { "epoch": 0.6584843492586491, "grad_norm": 0.37300705909729004, "learning_rate": 1.5136141334123586e-05, "loss": 0.4626, "step": 23982 }, { "epoch": 0.6585118066996156, "grad_norm": 0.33540773391723633, "learning_rate": 1.5135770755596962e-05, "loss": 0.404, "step": 23983 }, { "epoch": 0.6585392641405821, "grad_norm": 0.3623064458370209, "learning_rate": 1.5135400167490487e-05, "loss": 0.4151, "step": 23984 }, { "epoch": 0.6585667215815486, "grad_norm": 0.3585006892681122, "learning_rate": 1.5135029569804846e-05, "loss": 0.4249, "step": 23985 }, { "epoch": 0.658594179022515, "grad_norm": 0.3870038688182831, "learning_rate": 1.5134658962540743e-05, "loss": 0.4751, "step": 23986 }, { "epoch": 0.6586216364634816, "grad_norm": 0.3566970229148865, "learning_rate": 1.513428834569886e-05, "loss": 0.4223, "step": 23987 }, { "epoch": 0.6586490939044481, "grad_norm": 0.40721994638442993, "learning_rate": 1.5133917719279886e-05, "loss": 0.4698, "step": 23988 }, { "epoch": 0.6586765513454146, "grad_norm": 0.3818439245223999, "learning_rate": 1.5133547083284523e-05, "loss": 0.5191, "step": 23989 }, { "epoch": 0.6587040087863811, "grad_norm": 0.35108712315559387, "learning_rate": 1.5133176437713454e-05, "loss": 0.4659, "step": 23990 }, { "epoch": 0.6587314662273476, "grad_norm": 0.40364962816238403, "learning_rate": 1.5132805782567373e-05, "loss": 0.4969, "step": 23991 }, { "epoch": 0.6587589236683141, "grad_norm": 0.3653351962566376, "learning_rate": 1.513243511784697e-05, "loss": 0.5234, "step": 23992 }, { "epoch": 0.6587863811092806, "grad_norm": 0.40441039204597473, "learning_rate": 1.5132064443552938e-05, "loss": 0.5877, "step": 23993 }, { "epoch": 0.6588138385502471, "grad_norm": 0.38160017132759094, "learning_rate": 1.5131693759685968e-05, "loss": 0.4616, "step": 23994 }, { "epoch": 0.6588412959912137, "grad_norm": 0.3807300925254822, "learning_rate": 1.5131323066246753e-05, "loss": 0.5681, "step": 23995 }, { "epoch": 0.6588687534321801, "grad_norm": 0.3919525742530823, "learning_rate": 1.513095236323598e-05, "loss": 0.4571, "step": 23996 }, { "epoch": 0.6588962108731466, "grad_norm": 0.4258916974067688, "learning_rate": 1.5130581650654344e-05, "loss": 0.4643, "step": 23997 }, { "epoch": 0.6589236683141131, "grad_norm": 0.3517382740974426, "learning_rate": 1.5130210928502536e-05, "loss": 0.4735, "step": 23998 }, { "epoch": 0.6589511257550796, "grad_norm": 0.3432435691356659, "learning_rate": 1.5129840196781249e-05, "loss": 0.477, "step": 23999 }, { "epoch": 0.6589785831960461, "grad_norm": 0.3310431241989136, "learning_rate": 1.5129469455491171e-05, "loss": 0.4738, "step": 24000 }, { "epoch": 0.6590060406370126, "grad_norm": 0.3629182279109955, "learning_rate": 1.5129098704632994e-05, "loss": 0.434, "step": 24001 }, { "epoch": 0.6590334980779792, "grad_norm": 0.31795990467071533, "learning_rate": 1.5128727944207412e-05, "loss": 0.4147, "step": 24002 }, { "epoch": 0.6590609555189456, "grad_norm": 0.37017470598220825, "learning_rate": 1.5128357174215116e-05, "loss": 0.5033, "step": 24003 }, { "epoch": 0.6590884129599122, "grad_norm": 0.37525224685668945, "learning_rate": 1.5127986394656797e-05, "loss": 0.5104, "step": 24004 }, { "epoch": 0.6591158704008786, "grad_norm": 0.35840412974357605, "learning_rate": 1.5127615605533148e-05, "loss": 0.4773, "step": 24005 }, { "epoch": 0.6591433278418452, "grad_norm": 0.38165760040283203, "learning_rate": 1.5127244806844856e-05, "loss": 0.4435, "step": 24006 }, { "epoch": 0.6591707852828116, "grad_norm": 0.31844136118888855, "learning_rate": 1.5126873998592619e-05, "loss": 0.4476, "step": 24007 }, { "epoch": 0.6591982427237781, "grad_norm": 0.4082482159137726, "learning_rate": 1.5126503180777126e-05, "loss": 0.4852, "step": 24008 }, { "epoch": 0.6592257001647447, "grad_norm": 0.3316288888454437, "learning_rate": 1.5126132353399064e-05, "loss": 0.4509, "step": 24009 }, { "epoch": 0.6592531576057111, "grad_norm": 0.3549002707004547, "learning_rate": 1.5125761516459132e-05, "loss": 0.4472, "step": 24010 }, { "epoch": 0.6592806150466777, "grad_norm": 0.3818439841270447, "learning_rate": 1.5125390669958018e-05, "loss": 0.5111, "step": 24011 }, { "epoch": 0.6593080724876441, "grad_norm": 0.39492741227149963, "learning_rate": 1.5125019813896416e-05, "loss": 0.5555, "step": 24012 }, { "epoch": 0.6593355299286107, "grad_norm": 0.41379600763320923, "learning_rate": 1.5124648948275016e-05, "loss": 0.5018, "step": 24013 }, { "epoch": 0.6593629873695771, "grad_norm": 0.38908863067626953, "learning_rate": 1.512427807309451e-05, "loss": 0.5159, "step": 24014 }, { "epoch": 0.6593904448105437, "grad_norm": 0.42091208696365356, "learning_rate": 1.512390718835559e-05, "loss": 0.4969, "step": 24015 }, { "epoch": 0.6594179022515102, "grad_norm": 0.4098203182220459, "learning_rate": 1.5123536294058947e-05, "loss": 0.5016, "step": 24016 }, { "epoch": 0.6594453596924766, "grad_norm": 0.38693326711654663, "learning_rate": 1.5123165390205274e-05, "loss": 0.4588, "step": 24017 }, { "epoch": 0.6594728171334432, "grad_norm": 0.3761516511440277, "learning_rate": 1.5122794476795263e-05, "loss": 0.4856, "step": 24018 }, { "epoch": 0.6595002745744096, "grad_norm": 0.3800496757030487, "learning_rate": 1.5122423553829603e-05, "loss": 0.5056, "step": 24019 }, { "epoch": 0.6595277320153762, "grad_norm": 0.37697774171829224, "learning_rate": 1.5122052621308991e-05, "loss": 0.4705, "step": 24020 }, { "epoch": 0.6595551894563426, "grad_norm": 0.3954748809337616, "learning_rate": 1.5121681679234112e-05, "loss": 0.5063, "step": 24021 }, { "epoch": 0.6595826468973092, "grad_norm": 0.6181082129478455, "learning_rate": 1.5121310727605667e-05, "loss": 0.536, "step": 24022 }, { "epoch": 0.6596101043382757, "grad_norm": 0.3828640282154083, "learning_rate": 1.5120939766424343e-05, "loss": 0.5675, "step": 24023 }, { "epoch": 0.6596375617792422, "grad_norm": 0.44749152660369873, "learning_rate": 1.512056879569083e-05, "loss": 0.5516, "step": 24024 }, { "epoch": 0.6596650192202087, "grad_norm": 0.45890605449676514, "learning_rate": 1.5120197815405821e-05, "loss": 0.5038, "step": 24025 }, { "epoch": 0.6596924766611751, "grad_norm": 0.7149428725242615, "learning_rate": 1.511982682557001e-05, "loss": 0.4651, "step": 24026 }, { "epoch": 0.6597199341021417, "grad_norm": 0.4190370738506317, "learning_rate": 1.5119455826184087e-05, "loss": 0.5132, "step": 24027 }, { "epoch": 0.6597473915431081, "grad_norm": 0.3901206851005554, "learning_rate": 1.5119084817248747e-05, "loss": 0.5277, "step": 24028 }, { "epoch": 0.6597748489840747, "grad_norm": 0.3569476306438446, "learning_rate": 1.5118713798764677e-05, "loss": 0.5044, "step": 24029 }, { "epoch": 0.6598023064250412, "grad_norm": 0.36212247610092163, "learning_rate": 1.5118342770732572e-05, "loss": 0.4704, "step": 24030 }, { "epoch": 0.6598297638660077, "grad_norm": 0.40042051672935486, "learning_rate": 1.5117971733153129e-05, "loss": 0.5376, "step": 24031 }, { "epoch": 0.6598572213069742, "grad_norm": 0.37853512167930603, "learning_rate": 1.511760068602703e-05, "loss": 0.5069, "step": 24032 }, { "epoch": 0.6598846787479407, "grad_norm": 0.3851795494556427, "learning_rate": 1.5117229629354974e-05, "loss": 0.5673, "step": 24033 }, { "epoch": 0.6599121361889072, "grad_norm": 0.3782115578651428, "learning_rate": 1.5116858563137654e-05, "loss": 0.5024, "step": 24034 }, { "epoch": 0.6599395936298736, "grad_norm": 0.4062112867832184, "learning_rate": 1.5116487487375757e-05, "loss": 0.4557, "step": 24035 }, { "epoch": 0.6599670510708402, "grad_norm": 0.4363280236721039, "learning_rate": 1.511611640206998e-05, "loss": 0.4942, "step": 24036 }, { "epoch": 0.6599945085118067, "grad_norm": 0.4659200608730316, "learning_rate": 1.511574530722101e-05, "loss": 0.5659, "step": 24037 }, { "epoch": 0.6600219659527732, "grad_norm": 0.9206740856170654, "learning_rate": 1.5115374202829545e-05, "loss": 0.5659, "step": 24038 }, { "epoch": 0.6600494233937397, "grad_norm": 0.4030377268791199, "learning_rate": 1.5115003088896272e-05, "loss": 0.491, "step": 24039 }, { "epoch": 0.6600768808347062, "grad_norm": 0.4007171392440796, "learning_rate": 1.5114631965421886e-05, "loss": 0.5207, "step": 24040 }, { "epoch": 0.6601043382756727, "grad_norm": 0.39342543482780457, "learning_rate": 1.511426083240708e-05, "loss": 0.4838, "step": 24041 }, { "epoch": 0.6601317957166392, "grad_norm": 0.4421384632587433, "learning_rate": 1.5113889689852546e-05, "loss": 0.5926, "step": 24042 }, { "epoch": 0.6601592531576057, "grad_norm": 0.35787492990493774, "learning_rate": 1.5113518537758976e-05, "loss": 0.4773, "step": 24043 }, { "epoch": 0.6601867105985723, "grad_norm": 0.3687651753425598, "learning_rate": 1.511314737612706e-05, "loss": 0.5367, "step": 24044 }, { "epoch": 0.6602141680395387, "grad_norm": 0.35999825596809387, "learning_rate": 1.511277620495749e-05, "loss": 0.4835, "step": 24045 }, { "epoch": 0.6602416254805052, "grad_norm": 0.39089030027389526, "learning_rate": 1.5112405024250966e-05, "loss": 0.5618, "step": 24046 }, { "epoch": 0.6602690829214717, "grad_norm": 0.38359716534614563, "learning_rate": 1.5112033834008171e-05, "loss": 0.5083, "step": 24047 }, { "epoch": 0.6602965403624382, "grad_norm": 0.3704761266708374, "learning_rate": 1.5111662634229803e-05, "loss": 0.5196, "step": 24048 }, { "epoch": 0.6603239978034047, "grad_norm": 0.3586193025112152, "learning_rate": 1.5111291424916554e-05, "loss": 0.4378, "step": 24049 }, { "epoch": 0.6603514552443712, "grad_norm": 0.633192777633667, "learning_rate": 1.511092020606911e-05, "loss": 0.5174, "step": 24050 }, { "epoch": 0.6603789126853378, "grad_norm": 0.42016056180000305, "learning_rate": 1.5110548977688172e-05, "loss": 0.5789, "step": 24051 }, { "epoch": 0.6604063701263042, "grad_norm": 0.363089382648468, "learning_rate": 1.5110177739774429e-05, "loss": 0.406, "step": 24052 }, { "epoch": 0.6604338275672708, "grad_norm": 0.35215166211128235, "learning_rate": 1.5109806492328575e-05, "loss": 0.4863, "step": 24053 }, { "epoch": 0.6604612850082372, "grad_norm": 0.5428338646888733, "learning_rate": 1.5109435235351302e-05, "loss": 0.5706, "step": 24054 }, { "epoch": 0.6604887424492037, "grad_norm": 0.3742062449455261, "learning_rate": 1.5109063968843298e-05, "loss": 0.4954, "step": 24055 }, { "epoch": 0.6605161998901702, "grad_norm": 0.8461037278175354, "learning_rate": 1.510869269280526e-05, "loss": 0.5463, "step": 24056 }, { "epoch": 0.6605436573311367, "grad_norm": 0.3577113449573517, "learning_rate": 1.510832140723788e-05, "loss": 0.4176, "step": 24057 }, { "epoch": 0.6605711147721033, "grad_norm": 0.35823047161102295, "learning_rate": 1.5107950112141851e-05, "loss": 0.4928, "step": 24058 }, { "epoch": 0.6605985722130697, "grad_norm": 0.362909734249115, "learning_rate": 1.5107578807517865e-05, "loss": 0.4787, "step": 24059 }, { "epoch": 0.6606260296540363, "grad_norm": 0.35504424571990967, "learning_rate": 1.5107207493366611e-05, "loss": 0.4398, "step": 24060 }, { "epoch": 0.6606534870950027, "grad_norm": 0.40052786469459534, "learning_rate": 1.5106836169688788e-05, "loss": 0.5272, "step": 24061 }, { "epoch": 0.6606809445359693, "grad_norm": 0.36349132657051086, "learning_rate": 1.5106464836485085e-05, "loss": 0.5023, "step": 24062 }, { "epoch": 0.6607084019769357, "grad_norm": 0.32683902978897095, "learning_rate": 1.5106093493756195e-05, "loss": 0.4705, "step": 24063 }, { "epoch": 0.6607358594179022, "grad_norm": 0.3968454897403717, "learning_rate": 1.5105722141502812e-05, "loss": 0.5029, "step": 24064 }, { "epoch": 0.6607633168588688, "grad_norm": 0.39025482535362244, "learning_rate": 1.5105350779725627e-05, "loss": 0.4629, "step": 24065 }, { "epoch": 0.6607907742998352, "grad_norm": 0.4100659191608429, "learning_rate": 1.5104979408425333e-05, "loss": 0.496, "step": 24066 }, { "epoch": 0.6608182317408018, "grad_norm": 0.35037732124328613, "learning_rate": 1.5104608027602621e-05, "loss": 0.5274, "step": 24067 }, { "epoch": 0.6608456891817682, "grad_norm": 0.36645716428756714, "learning_rate": 1.510423663725819e-05, "loss": 0.4737, "step": 24068 }, { "epoch": 0.6608731466227348, "grad_norm": 0.37400683760643005, "learning_rate": 1.5103865237392725e-05, "loss": 0.5377, "step": 24069 }, { "epoch": 0.6609006040637012, "grad_norm": 0.3864007592201233, "learning_rate": 1.5103493828006923e-05, "loss": 0.4622, "step": 24070 }, { "epoch": 0.6609280615046678, "grad_norm": 0.4001660645008087, "learning_rate": 1.5103122409101479e-05, "loss": 0.5646, "step": 24071 }, { "epoch": 0.6609555189456343, "grad_norm": 0.40740805864334106, "learning_rate": 1.5102750980677081e-05, "loss": 0.504, "step": 24072 }, { "epoch": 0.6609829763866008, "grad_norm": 0.4128975570201874, "learning_rate": 1.5102379542734425e-05, "loss": 0.4433, "step": 24073 }, { "epoch": 0.6610104338275673, "grad_norm": 0.34095457196235657, "learning_rate": 1.51020080952742e-05, "loss": 0.4605, "step": 24074 }, { "epoch": 0.6610378912685337, "grad_norm": 0.38530340790748596, "learning_rate": 1.5101636638297103e-05, "loss": 0.6284, "step": 24075 }, { "epoch": 0.6610653487095003, "grad_norm": 0.395935595035553, "learning_rate": 1.5101265171803828e-05, "loss": 0.5093, "step": 24076 }, { "epoch": 0.6610928061504667, "grad_norm": 0.4380389451980591, "learning_rate": 1.5100893695795061e-05, "loss": 0.5881, "step": 24077 }, { "epoch": 0.6611202635914333, "grad_norm": 0.3459262549877167, "learning_rate": 1.5100522210271502e-05, "loss": 0.4551, "step": 24078 }, { "epoch": 0.6611477210323998, "grad_norm": 0.3491075932979584, "learning_rate": 1.510015071523384e-05, "loss": 0.5219, "step": 24079 }, { "epoch": 0.6611751784733663, "grad_norm": 0.40279272198677063, "learning_rate": 1.509977921068277e-05, "loss": 0.4988, "step": 24080 }, { "epoch": 0.6612026359143328, "grad_norm": 0.37964364886283875, "learning_rate": 1.5099407696618983e-05, "loss": 0.551, "step": 24081 }, { "epoch": 0.6612300933552993, "grad_norm": 0.40749576687812805, "learning_rate": 1.5099036173043174e-05, "loss": 0.5119, "step": 24082 }, { "epoch": 0.6612575507962658, "grad_norm": 0.5994154810905457, "learning_rate": 1.5098664639956036e-05, "loss": 0.4453, "step": 24083 }, { "epoch": 0.6612850082372322, "grad_norm": 0.431636780500412, "learning_rate": 1.5098293097358258e-05, "loss": 0.4968, "step": 24084 }, { "epoch": 0.6613124656781988, "grad_norm": 0.38016730546951294, "learning_rate": 1.5097921545250538e-05, "loss": 0.4865, "step": 24085 }, { "epoch": 0.6613399231191653, "grad_norm": 0.3413971960544586, "learning_rate": 1.5097549983633566e-05, "loss": 0.4759, "step": 24086 }, { "epoch": 0.6613673805601318, "grad_norm": 0.36416012048721313, "learning_rate": 1.509717841250804e-05, "loss": 0.4468, "step": 24087 }, { "epoch": 0.6613948380010983, "grad_norm": 0.37473538517951965, "learning_rate": 1.5096806831874648e-05, "loss": 0.4714, "step": 24088 }, { "epoch": 0.6614222954420648, "grad_norm": 0.5513883233070374, "learning_rate": 1.5096435241734081e-05, "loss": 0.5142, "step": 24089 }, { "epoch": 0.6614497528830313, "grad_norm": 0.39200666546821594, "learning_rate": 1.509606364208704e-05, "loss": 0.5927, "step": 24090 }, { "epoch": 0.6614772103239978, "grad_norm": 0.36687755584716797, "learning_rate": 1.5095692032934213e-05, "loss": 0.493, "step": 24091 }, { "epoch": 0.6615046677649643, "grad_norm": 0.34702202677726746, "learning_rate": 1.5095320414276292e-05, "loss": 0.4591, "step": 24092 }, { "epoch": 0.6615321252059309, "grad_norm": 0.38459476828575134, "learning_rate": 1.5094948786113975e-05, "loss": 0.5869, "step": 24093 }, { "epoch": 0.6615595826468973, "grad_norm": 0.4456349313259125, "learning_rate": 1.5094577148447948e-05, "loss": 0.5504, "step": 24094 }, { "epoch": 0.6615870400878638, "grad_norm": 0.38392919301986694, "learning_rate": 1.5094205501278913e-05, "loss": 0.5209, "step": 24095 }, { "epoch": 0.6616144975288303, "grad_norm": 0.4337863624095917, "learning_rate": 1.5093833844607554e-05, "loss": 0.6019, "step": 24096 }, { "epoch": 0.6616419549697968, "grad_norm": 0.42098167538642883, "learning_rate": 1.5093462178434575e-05, "loss": 0.4615, "step": 24097 }, { "epoch": 0.6616694124107633, "grad_norm": 0.5025973320007324, "learning_rate": 1.5093090502760662e-05, "loss": 0.5172, "step": 24098 }, { "epoch": 0.6616968698517298, "grad_norm": 0.3784642815589905, "learning_rate": 1.5092718817586507e-05, "loss": 0.487, "step": 24099 }, { "epoch": 0.6617243272926964, "grad_norm": 0.35975584387779236, "learning_rate": 1.509234712291281e-05, "loss": 0.434, "step": 24100 }, { "epoch": 0.6617517847336628, "grad_norm": 0.384817510843277, "learning_rate": 1.5091975418740257e-05, "loss": 0.5038, "step": 24101 }, { "epoch": 0.6617792421746294, "grad_norm": 0.5579428672790527, "learning_rate": 1.5091603705069545e-05, "loss": 0.535, "step": 24102 }, { "epoch": 0.6618066996155958, "grad_norm": 0.520343542098999, "learning_rate": 1.5091231981901367e-05, "loss": 0.5234, "step": 24103 }, { "epoch": 0.6618341570565623, "grad_norm": 0.3436465561389923, "learning_rate": 1.5090860249236416e-05, "loss": 0.4964, "step": 24104 }, { "epoch": 0.6618616144975288, "grad_norm": 0.35006874799728394, "learning_rate": 1.5090488507075387e-05, "loss": 0.5023, "step": 24105 }, { "epoch": 0.6618890719384953, "grad_norm": 0.37106871604919434, "learning_rate": 1.5090116755418973e-05, "loss": 0.4692, "step": 24106 }, { "epoch": 0.6619165293794619, "grad_norm": 0.3689243197441101, "learning_rate": 1.5089744994267866e-05, "loss": 0.4702, "step": 24107 }, { "epoch": 0.6619439868204283, "grad_norm": 0.4075925946235657, "learning_rate": 1.508937322362276e-05, "loss": 0.5174, "step": 24108 }, { "epoch": 0.6619714442613949, "grad_norm": 0.42445439100265503, "learning_rate": 1.5089001443484347e-05, "loss": 0.513, "step": 24109 }, { "epoch": 0.6619989017023613, "grad_norm": 0.37527552247047424, "learning_rate": 1.5088629653853324e-05, "loss": 0.4822, "step": 24110 }, { "epoch": 0.6620263591433279, "grad_norm": 0.334225058555603, "learning_rate": 1.5088257854730384e-05, "loss": 0.4879, "step": 24111 }, { "epoch": 0.6620538165842943, "grad_norm": 0.422102153301239, "learning_rate": 1.5087886046116216e-05, "loss": 0.5615, "step": 24112 }, { "epoch": 0.6620812740252608, "grad_norm": 0.41751548647880554, "learning_rate": 1.5087514228011517e-05, "loss": 0.5649, "step": 24113 }, { "epoch": 0.6621087314662274, "grad_norm": 0.39554914832115173, "learning_rate": 1.508714240041698e-05, "loss": 0.5092, "step": 24114 }, { "epoch": 0.6621361889071938, "grad_norm": 0.43264371156692505, "learning_rate": 1.5086770563333301e-05, "loss": 0.4845, "step": 24115 }, { "epoch": 0.6621636463481604, "grad_norm": 0.3488117754459381, "learning_rate": 1.5086398716761172e-05, "loss": 0.4437, "step": 24116 }, { "epoch": 0.6621911037891268, "grad_norm": 0.385703444480896, "learning_rate": 1.5086026860701281e-05, "loss": 0.5007, "step": 24117 }, { "epoch": 0.6622185612300934, "grad_norm": 0.41718965768814087, "learning_rate": 1.5085654995154333e-05, "loss": 0.4635, "step": 24118 }, { "epoch": 0.6622460186710598, "grad_norm": 0.518252968788147, "learning_rate": 1.5085283120121013e-05, "loss": 0.4583, "step": 24119 }, { "epoch": 0.6622734761120264, "grad_norm": 0.3761674463748932, "learning_rate": 1.5084911235602016e-05, "loss": 0.5112, "step": 24120 }, { "epoch": 0.6623009335529929, "grad_norm": 0.336700439453125, "learning_rate": 1.5084539341598036e-05, "loss": 0.4203, "step": 24121 }, { "epoch": 0.6623283909939593, "grad_norm": 0.34201329946517944, "learning_rate": 1.5084167438109767e-05, "loss": 0.4344, "step": 24122 }, { "epoch": 0.6623558484349259, "grad_norm": 0.4143046736717224, "learning_rate": 1.5083795525137906e-05, "loss": 0.48, "step": 24123 }, { "epoch": 0.6623833058758923, "grad_norm": 0.38507211208343506, "learning_rate": 1.5083423602683142e-05, "loss": 0.5533, "step": 24124 }, { "epoch": 0.6624107633168589, "grad_norm": 0.3607304096221924, "learning_rate": 1.5083051670746169e-05, "loss": 0.5231, "step": 24125 }, { "epoch": 0.6624382207578253, "grad_norm": 0.3629733920097351, "learning_rate": 1.5082679729327685e-05, "loss": 0.4819, "step": 24126 }, { "epoch": 0.6624656781987919, "grad_norm": 0.40217646956443787, "learning_rate": 1.5082307778428378e-05, "loss": 0.4728, "step": 24127 }, { "epoch": 0.6624931356397584, "grad_norm": 0.4167408347129822, "learning_rate": 1.508193581804895e-05, "loss": 0.5381, "step": 24128 }, { "epoch": 0.6625205930807249, "grad_norm": 0.42048609256744385, "learning_rate": 1.5081563848190084e-05, "loss": 0.5138, "step": 24129 }, { "epoch": 0.6625480505216914, "grad_norm": 0.3978072702884674, "learning_rate": 1.5081191868852481e-05, "loss": 0.5127, "step": 24130 }, { "epoch": 0.6625755079626579, "grad_norm": 0.3801674544811249, "learning_rate": 1.5080819880036837e-05, "loss": 0.4829, "step": 24131 }, { "epoch": 0.6626029654036244, "grad_norm": 0.3630140721797943, "learning_rate": 1.508044788174384e-05, "loss": 0.4327, "step": 24132 }, { "epoch": 0.6626304228445908, "grad_norm": 0.4014008045196533, "learning_rate": 1.5080075873974185e-05, "loss": 0.5311, "step": 24133 }, { "epoch": 0.6626578802855574, "grad_norm": 0.4049926996231079, "learning_rate": 1.507970385672857e-05, "loss": 0.5948, "step": 24134 }, { "epoch": 0.6626853377265239, "grad_norm": 0.37951719760894775, "learning_rate": 1.5079331830007681e-05, "loss": 0.5267, "step": 24135 }, { "epoch": 0.6627127951674904, "grad_norm": 0.36609435081481934, "learning_rate": 1.5078959793812223e-05, "loss": 0.512, "step": 24136 }, { "epoch": 0.6627402526084569, "grad_norm": 0.3981935977935791, "learning_rate": 1.5078587748142882e-05, "loss": 0.5145, "step": 24137 }, { "epoch": 0.6627677100494234, "grad_norm": 0.40482285618782043, "learning_rate": 1.5078215693000351e-05, "loss": 0.5976, "step": 24138 }, { "epoch": 0.6627951674903899, "grad_norm": 0.3753305673599243, "learning_rate": 1.5077843628385332e-05, "loss": 0.5233, "step": 24139 }, { "epoch": 0.6628226249313564, "grad_norm": 0.37663978338241577, "learning_rate": 1.5077471554298506e-05, "loss": 0.4939, "step": 24140 }, { "epoch": 0.6628500823723229, "grad_norm": 0.3635084927082062, "learning_rate": 1.5077099470740582e-05, "loss": 0.4412, "step": 24141 }, { "epoch": 0.6628775398132895, "grad_norm": 0.3875877261161804, "learning_rate": 1.5076727377712245e-05, "loss": 0.5379, "step": 24142 }, { "epoch": 0.6629049972542559, "grad_norm": 0.37023869156837463, "learning_rate": 1.5076355275214192e-05, "loss": 0.5542, "step": 24143 }, { "epoch": 0.6629324546952224, "grad_norm": 0.34797853231430054, "learning_rate": 1.5075983163247116e-05, "loss": 0.5922, "step": 24144 }, { "epoch": 0.6629599121361889, "grad_norm": 0.3749619424343109, "learning_rate": 1.5075611041811708e-05, "loss": 0.5123, "step": 24145 }, { "epoch": 0.6629873695771554, "grad_norm": 0.35781553387641907, "learning_rate": 1.5075238910908669e-05, "loss": 0.5244, "step": 24146 }, { "epoch": 0.6630148270181219, "grad_norm": 0.36886435747146606, "learning_rate": 1.5074866770538692e-05, "loss": 0.5059, "step": 24147 }, { "epoch": 0.6630422844590884, "grad_norm": 0.37600159645080566, "learning_rate": 1.5074494620702464e-05, "loss": 0.5023, "step": 24148 }, { "epoch": 0.663069741900055, "grad_norm": 0.33499136567115784, "learning_rate": 1.5074122461400687e-05, "loss": 0.4933, "step": 24149 }, { "epoch": 0.6630971993410214, "grad_norm": 0.4667464792728424, "learning_rate": 1.507375029263405e-05, "loss": 0.4926, "step": 24150 }, { "epoch": 0.663124656781988, "grad_norm": 0.363365113735199, "learning_rate": 1.507337811440325e-05, "loss": 0.4143, "step": 24151 }, { "epoch": 0.6631521142229544, "grad_norm": 0.3883298337459564, "learning_rate": 1.5073005926708981e-05, "loss": 0.487, "step": 24152 }, { "epoch": 0.6631795716639209, "grad_norm": 0.3779236078262329, "learning_rate": 1.5072633729551936e-05, "loss": 0.4739, "step": 24153 }, { "epoch": 0.6632070291048874, "grad_norm": 0.3439730405807495, "learning_rate": 1.507226152293281e-05, "loss": 0.5902, "step": 24154 }, { "epoch": 0.6632344865458539, "grad_norm": 0.37182050943374634, "learning_rate": 1.50718893068523e-05, "loss": 0.5425, "step": 24155 }, { "epoch": 0.6632619439868205, "grad_norm": 0.40336528420448303, "learning_rate": 1.5071517081311093e-05, "loss": 0.4826, "step": 24156 }, { "epoch": 0.6632894014277869, "grad_norm": 0.41912999749183655, "learning_rate": 1.5071144846309894e-05, "loss": 0.518, "step": 24157 }, { "epoch": 0.6633168588687535, "grad_norm": 0.4210856854915619, "learning_rate": 1.5070772601849386e-05, "loss": 0.5113, "step": 24158 }, { "epoch": 0.6633443163097199, "grad_norm": 0.39987993240356445, "learning_rate": 1.5070400347930271e-05, "loss": 0.5336, "step": 24159 }, { "epoch": 0.6633717737506865, "grad_norm": 0.4171293377876282, "learning_rate": 1.507002808455324e-05, "loss": 0.4919, "step": 24160 }, { "epoch": 0.6633992311916529, "grad_norm": 0.4034542739391327, "learning_rate": 1.5069655811718988e-05, "loss": 0.563, "step": 24161 }, { "epoch": 0.6634266886326194, "grad_norm": 0.39471226930618286, "learning_rate": 1.5069283529428213e-05, "loss": 0.4632, "step": 24162 }, { "epoch": 0.663454146073586, "grad_norm": 0.3965429663658142, "learning_rate": 1.5068911237681606e-05, "loss": 0.6324, "step": 24163 }, { "epoch": 0.6634816035145524, "grad_norm": 0.3571731150150299, "learning_rate": 1.5068538936479859e-05, "loss": 0.517, "step": 24164 }, { "epoch": 0.663509060955519, "grad_norm": 0.37974628806114197, "learning_rate": 1.506816662582367e-05, "loss": 0.6562, "step": 24165 }, { "epoch": 0.6635365183964854, "grad_norm": 0.4550722539424896, "learning_rate": 1.5067794305713732e-05, "loss": 0.5554, "step": 24166 }, { "epoch": 0.663563975837452, "grad_norm": 0.39317789673805237, "learning_rate": 1.5067421976150743e-05, "loss": 0.5683, "step": 24167 }, { "epoch": 0.6635914332784184, "grad_norm": 0.3563327193260193, "learning_rate": 1.5067049637135393e-05, "loss": 0.44, "step": 24168 }, { "epoch": 0.663618890719385, "grad_norm": 0.39784228801727295, "learning_rate": 1.506667728866838e-05, "loss": 0.5806, "step": 24169 }, { "epoch": 0.6636463481603515, "grad_norm": 0.39702364802360535, "learning_rate": 1.5066304930750393e-05, "loss": 0.4914, "step": 24170 }, { "epoch": 0.663673805601318, "grad_norm": 0.40982237458229065, "learning_rate": 1.506593256338213e-05, "loss": 0.5524, "step": 24171 }, { "epoch": 0.6637012630422845, "grad_norm": 0.3796665668487549, "learning_rate": 1.506556018656429e-05, "loss": 0.5302, "step": 24172 }, { "epoch": 0.6637287204832509, "grad_norm": 0.356151819229126, "learning_rate": 1.5065187800297563e-05, "loss": 0.5683, "step": 24173 }, { "epoch": 0.6637561779242175, "grad_norm": 0.41748401522636414, "learning_rate": 1.506481540458264e-05, "loss": 0.4465, "step": 24174 }, { "epoch": 0.6637836353651839, "grad_norm": 0.3835783302783966, "learning_rate": 1.5064442999420227e-05, "loss": 0.4899, "step": 24175 }, { "epoch": 0.6638110928061505, "grad_norm": 0.4015693664550781, "learning_rate": 1.5064070584811003e-05, "loss": 0.4784, "step": 24176 }, { "epoch": 0.663838550247117, "grad_norm": 0.3756457567214966, "learning_rate": 1.5063698160755675e-05, "loss": 0.5327, "step": 24177 }, { "epoch": 0.6638660076880835, "grad_norm": 0.3549119830131531, "learning_rate": 1.5063325727254933e-05, "loss": 0.506, "step": 24178 }, { "epoch": 0.66389346512905, "grad_norm": 0.3947664201259613, "learning_rate": 1.5062953284309474e-05, "loss": 0.5255, "step": 24179 }, { "epoch": 0.6639209225700164, "grad_norm": 0.35491350293159485, "learning_rate": 1.5062580831919991e-05, "loss": 0.431, "step": 24180 }, { "epoch": 0.663948380010983, "grad_norm": 0.3664568364620209, "learning_rate": 1.5062208370087178e-05, "loss": 0.5761, "step": 24181 }, { "epoch": 0.6639758374519494, "grad_norm": 0.5195244550704956, "learning_rate": 1.5061835898811732e-05, "loss": 0.4687, "step": 24182 }, { "epoch": 0.664003294892916, "grad_norm": 0.3594239056110382, "learning_rate": 1.5061463418094347e-05, "loss": 0.4965, "step": 24183 }, { "epoch": 0.6640307523338825, "grad_norm": 0.31815096735954285, "learning_rate": 1.5061090927935715e-05, "loss": 0.4363, "step": 24184 }, { "epoch": 0.664058209774849, "grad_norm": 0.3921823501586914, "learning_rate": 1.5060718428336534e-05, "loss": 0.4861, "step": 24185 }, { "epoch": 0.6640856672158155, "grad_norm": 0.6466066241264343, "learning_rate": 1.5060345919297499e-05, "loss": 0.4584, "step": 24186 }, { "epoch": 0.664113124656782, "grad_norm": 0.3789427578449249, "learning_rate": 1.5059973400819302e-05, "loss": 0.4593, "step": 24187 }, { "epoch": 0.6641405820977485, "grad_norm": 0.4090641438961029, "learning_rate": 1.505960087290264e-05, "loss": 0.4895, "step": 24188 }, { "epoch": 0.664168039538715, "grad_norm": 0.3521094024181366, "learning_rate": 1.5059228335548209e-05, "loss": 0.4475, "step": 24189 }, { "epoch": 0.6641954969796815, "grad_norm": 0.370278537273407, "learning_rate": 1.50588557887567e-05, "loss": 0.5097, "step": 24190 }, { "epoch": 0.664222954420648, "grad_norm": 0.40206268429756165, "learning_rate": 1.5058483232528813e-05, "loss": 0.5239, "step": 24191 }, { "epoch": 0.6642504118616145, "grad_norm": 0.3939264416694641, "learning_rate": 1.5058110666865238e-05, "loss": 0.4175, "step": 24192 }, { "epoch": 0.664277869302581, "grad_norm": 0.38880443572998047, "learning_rate": 1.5057738091766674e-05, "loss": 0.4847, "step": 24193 }, { "epoch": 0.6643053267435475, "grad_norm": 0.3916959762573242, "learning_rate": 1.5057365507233814e-05, "loss": 0.5073, "step": 24194 }, { "epoch": 0.664332784184514, "grad_norm": 0.3489813804626465, "learning_rate": 1.5056992913267352e-05, "loss": 0.4986, "step": 24195 }, { "epoch": 0.6643602416254805, "grad_norm": 0.3755294680595398, "learning_rate": 1.5056620309867987e-05, "loss": 0.4871, "step": 24196 }, { "epoch": 0.664387699066447, "grad_norm": 0.3563813269138336, "learning_rate": 1.5056247697036406e-05, "loss": 0.4945, "step": 24197 }, { "epoch": 0.6644151565074136, "grad_norm": 0.37481462955474854, "learning_rate": 1.5055875074773313e-05, "loss": 0.5441, "step": 24198 }, { "epoch": 0.66444261394838, "grad_norm": 0.5117475390434265, "learning_rate": 1.5055502443079401e-05, "loss": 0.5517, "step": 24199 }, { "epoch": 0.6644700713893466, "grad_norm": 0.36590078473091125, "learning_rate": 1.505512980195536e-05, "loss": 0.5025, "step": 24200 }, { "epoch": 0.664497528830313, "grad_norm": 0.368334025144577, "learning_rate": 1.505475715140189e-05, "loss": 0.4817, "step": 24201 }, { "epoch": 0.6645249862712795, "grad_norm": 0.380587637424469, "learning_rate": 1.5054384491419685e-05, "loss": 0.4824, "step": 24202 }, { "epoch": 0.664552443712246, "grad_norm": 0.36474063992500305, "learning_rate": 1.5054011822009438e-05, "loss": 0.5386, "step": 24203 }, { "epoch": 0.6645799011532125, "grad_norm": 0.343766450881958, "learning_rate": 1.5053639143171847e-05, "loss": 0.4076, "step": 24204 }, { "epoch": 0.6646073585941791, "grad_norm": 0.43854156136512756, "learning_rate": 1.5053266454907607e-05, "loss": 0.5044, "step": 24205 }, { "epoch": 0.6646348160351455, "grad_norm": 0.39683741331100464, "learning_rate": 1.5052893757217411e-05, "loss": 0.465, "step": 24206 }, { "epoch": 0.6646622734761121, "grad_norm": 0.3657856285572052, "learning_rate": 1.5052521050101954e-05, "loss": 0.5207, "step": 24207 }, { "epoch": 0.6646897309170785, "grad_norm": 0.4090490937232971, "learning_rate": 1.5052148333561936e-05, "loss": 0.4543, "step": 24208 }, { "epoch": 0.664717188358045, "grad_norm": 0.4072422683238983, "learning_rate": 1.5051775607598047e-05, "loss": 0.5824, "step": 24209 }, { "epoch": 0.6647446457990115, "grad_norm": 0.3620085120201111, "learning_rate": 1.5051402872210985e-05, "loss": 0.4899, "step": 24210 }, { "epoch": 0.664772103239978, "grad_norm": 0.3452054560184479, "learning_rate": 1.5051030127401442e-05, "loss": 0.5201, "step": 24211 }, { "epoch": 0.6647995606809446, "grad_norm": 0.34123384952545166, "learning_rate": 1.5050657373170121e-05, "loss": 0.4407, "step": 24212 }, { "epoch": 0.664827018121911, "grad_norm": 0.43103909492492676, "learning_rate": 1.5050284609517707e-05, "loss": 0.6199, "step": 24213 }, { "epoch": 0.6648544755628776, "grad_norm": 0.3763953447341919, "learning_rate": 1.5049911836444903e-05, "loss": 0.5346, "step": 24214 }, { "epoch": 0.664881933003844, "grad_norm": 0.44751787185668945, "learning_rate": 1.5049539053952398e-05, "loss": 0.4734, "step": 24215 }, { "epoch": 0.6649093904448106, "grad_norm": 0.4253011643886566, "learning_rate": 1.5049166262040894e-05, "loss": 0.5102, "step": 24216 }, { "epoch": 0.664936847885777, "grad_norm": 0.37465086579322815, "learning_rate": 1.5048793460711086e-05, "loss": 0.5047, "step": 24217 }, { "epoch": 0.6649643053267436, "grad_norm": 0.43068674206733704, "learning_rate": 1.5048420649963662e-05, "loss": 0.5131, "step": 24218 }, { "epoch": 0.6649917627677101, "grad_norm": 0.39951497316360474, "learning_rate": 1.5048047829799326e-05, "loss": 0.4901, "step": 24219 }, { "epoch": 0.6650192202086765, "grad_norm": 0.5702389478683472, "learning_rate": 1.5047675000218766e-05, "loss": 0.4603, "step": 24220 }, { "epoch": 0.6650466776496431, "grad_norm": 0.35309115052223206, "learning_rate": 1.5047302161222684e-05, "loss": 0.4338, "step": 24221 }, { "epoch": 0.6650741350906095, "grad_norm": 0.38658371567726135, "learning_rate": 1.504692931281177e-05, "loss": 0.4785, "step": 24222 }, { "epoch": 0.6651015925315761, "grad_norm": 0.3986166715621948, "learning_rate": 1.5046556454986725e-05, "loss": 0.4921, "step": 24223 }, { "epoch": 0.6651290499725425, "grad_norm": 0.37376898527145386, "learning_rate": 1.504618358774824e-05, "loss": 0.6043, "step": 24224 }, { "epoch": 0.6651565074135091, "grad_norm": 0.44485223293304443, "learning_rate": 1.5045810711097009e-05, "loss": 0.5251, "step": 24225 }, { "epoch": 0.6651839648544756, "grad_norm": 0.35929742455482483, "learning_rate": 1.5045437825033736e-05, "loss": 0.4902, "step": 24226 }, { "epoch": 0.6652114222954421, "grad_norm": 0.4207545220851898, "learning_rate": 1.504506492955911e-05, "loss": 0.4933, "step": 24227 }, { "epoch": 0.6652388797364086, "grad_norm": 0.39709773659706116, "learning_rate": 1.5044692024673824e-05, "loss": 0.4969, "step": 24228 }, { "epoch": 0.665266337177375, "grad_norm": 0.4719196557998657, "learning_rate": 1.504431911037858e-05, "loss": 0.54, "step": 24229 }, { "epoch": 0.6652937946183416, "grad_norm": 0.40913572907447815, "learning_rate": 1.5043946186674072e-05, "loss": 0.4606, "step": 24230 }, { "epoch": 0.665321252059308, "grad_norm": 0.3766596019268036, "learning_rate": 1.5043573253560992e-05, "loss": 0.3952, "step": 24231 }, { "epoch": 0.6653487095002746, "grad_norm": 0.42699214816093445, "learning_rate": 1.5043200311040041e-05, "loss": 0.5142, "step": 24232 }, { "epoch": 0.6653761669412411, "grad_norm": 0.3776165246963501, "learning_rate": 1.5042827359111908e-05, "loss": 0.5193, "step": 24233 }, { "epoch": 0.6654036243822076, "grad_norm": 0.38419708609580994, "learning_rate": 1.5042454397777295e-05, "loss": 0.3944, "step": 24234 }, { "epoch": 0.6654310818231741, "grad_norm": 0.3634811043739319, "learning_rate": 1.5042081427036896e-05, "loss": 0.4968, "step": 24235 }, { "epoch": 0.6654585392641406, "grad_norm": 0.40527984499931335, "learning_rate": 1.5041708446891404e-05, "loss": 0.4491, "step": 24236 }, { "epoch": 0.6654859967051071, "grad_norm": 0.40782490372657776, "learning_rate": 1.5041335457341517e-05, "loss": 0.577, "step": 24237 }, { "epoch": 0.6655134541460735, "grad_norm": 0.41175195574760437, "learning_rate": 1.504096245838793e-05, "loss": 0.5168, "step": 24238 }, { "epoch": 0.6655409115870401, "grad_norm": 0.4158669114112854, "learning_rate": 1.5040589450031342e-05, "loss": 0.4742, "step": 24239 }, { "epoch": 0.6655683690280065, "grad_norm": 0.3490431308746338, "learning_rate": 1.5040216432272444e-05, "loss": 0.4117, "step": 24240 }, { "epoch": 0.6655958264689731, "grad_norm": 0.41116976737976074, "learning_rate": 1.503984340511193e-05, "loss": 0.5726, "step": 24241 }, { "epoch": 0.6656232839099396, "grad_norm": 0.38836508989334106, "learning_rate": 1.5039470368550507e-05, "loss": 0.5151, "step": 24242 }, { "epoch": 0.6656507413509061, "grad_norm": 0.3445570468902588, "learning_rate": 1.5039097322588859e-05, "loss": 0.4741, "step": 24243 }, { "epoch": 0.6656781987918726, "grad_norm": 0.45580556988716125, "learning_rate": 1.5038724267227686e-05, "loss": 0.5107, "step": 24244 }, { "epoch": 0.6657056562328391, "grad_norm": 0.38449615240097046, "learning_rate": 1.5038351202467686e-05, "loss": 0.388, "step": 24245 }, { "epoch": 0.6657331136738056, "grad_norm": 0.3545491397380829, "learning_rate": 1.503797812830955e-05, "loss": 0.4464, "step": 24246 }, { "epoch": 0.665760571114772, "grad_norm": 0.43351510167121887, "learning_rate": 1.5037605044753979e-05, "loss": 0.5531, "step": 24247 }, { "epoch": 0.6657880285557386, "grad_norm": 0.36498621106147766, "learning_rate": 1.5037231951801665e-05, "loss": 0.4834, "step": 24248 }, { "epoch": 0.6658154859967051, "grad_norm": 0.3675636947154999, "learning_rate": 1.5036858849453308e-05, "loss": 0.5325, "step": 24249 }, { "epoch": 0.6658429434376716, "grad_norm": 0.45448777079582214, "learning_rate": 1.5036485737709598e-05, "loss": 0.5457, "step": 24250 }, { "epoch": 0.6658704008786381, "grad_norm": 0.4208862781524658, "learning_rate": 1.503611261657124e-05, "loss": 0.4814, "step": 24251 }, { "epoch": 0.6658978583196046, "grad_norm": 0.3787975013256073, "learning_rate": 1.5035739486038922e-05, "loss": 0.4756, "step": 24252 }, { "epoch": 0.6659253157605711, "grad_norm": 0.3726511299610138, "learning_rate": 1.503536634611334e-05, "loss": 0.4438, "step": 24253 }, { "epoch": 0.6659527732015376, "grad_norm": 0.41570496559143066, "learning_rate": 1.5034993196795195e-05, "loss": 0.4989, "step": 24254 }, { "epoch": 0.6659802306425041, "grad_norm": 0.34305539727211, "learning_rate": 1.5034620038085181e-05, "loss": 0.4679, "step": 24255 }, { "epoch": 0.6660076880834707, "grad_norm": 0.37321698665618896, "learning_rate": 1.5034246869983994e-05, "loss": 0.4885, "step": 24256 }, { "epoch": 0.6660351455244371, "grad_norm": 0.42823684215545654, "learning_rate": 1.5033873692492331e-05, "loss": 0.533, "step": 24257 }, { "epoch": 0.6660626029654036, "grad_norm": 0.3562394380569458, "learning_rate": 1.5033500505610886e-05, "loss": 0.492, "step": 24258 }, { "epoch": 0.6660900604063701, "grad_norm": 0.39827442169189453, "learning_rate": 1.5033127309340355e-05, "loss": 0.5359, "step": 24259 }, { "epoch": 0.6661175178473366, "grad_norm": 0.3797565698623657, "learning_rate": 1.5032754103681436e-05, "loss": 0.5106, "step": 24260 }, { "epoch": 0.6661449752883031, "grad_norm": 0.34880509972572327, "learning_rate": 1.503238088863482e-05, "loss": 0.4892, "step": 24261 }, { "epoch": 0.6661724327292696, "grad_norm": 0.32279735803604126, "learning_rate": 1.5032007664201213e-05, "loss": 0.4818, "step": 24262 }, { "epoch": 0.6661998901702362, "grad_norm": 0.33914434909820557, "learning_rate": 1.5031634430381306e-05, "loss": 0.5107, "step": 24263 }, { "epoch": 0.6662273476112026, "grad_norm": 0.3743007779121399, "learning_rate": 1.5031261187175792e-05, "loss": 0.5172, "step": 24264 }, { "epoch": 0.6662548050521692, "grad_norm": 0.3478195369243622, "learning_rate": 1.503088793458537e-05, "loss": 0.4637, "step": 24265 }, { "epoch": 0.6662822624931356, "grad_norm": 0.3925755023956299, "learning_rate": 1.5030514672610738e-05, "loss": 0.537, "step": 24266 }, { "epoch": 0.6663097199341022, "grad_norm": 0.3946317136287689, "learning_rate": 1.503014140125259e-05, "loss": 0.5593, "step": 24267 }, { "epoch": 0.6663371773750686, "grad_norm": 0.3520088195800781, "learning_rate": 1.5029768120511623e-05, "loss": 0.4634, "step": 24268 }, { "epoch": 0.6663646348160351, "grad_norm": 0.370897501707077, "learning_rate": 1.5029394830388535e-05, "loss": 0.5052, "step": 24269 }, { "epoch": 0.6663920922570017, "grad_norm": 0.4122379720211029, "learning_rate": 1.5029021530884016e-05, "loss": 0.5165, "step": 24270 }, { "epoch": 0.6664195496979681, "grad_norm": 0.4931597411632538, "learning_rate": 1.502864822199877e-05, "loss": 0.417, "step": 24271 }, { "epoch": 0.6664470071389347, "grad_norm": 0.4112676978111267, "learning_rate": 1.5028274903733486e-05, "loss": 0.5693, "step": 24272 }, { "epoch": 0.6664744645799011, "grad_norm": 0.36541077494621277, "learning_rate": 1.5027901576088871e-05, "loss": 0.509, "step": 24273 }, { "epoch": 0.6665019220208677, "grad_norm": 0.4211333990097046, "learning_rate": 1.5027528239065611e-05, "loss": 0.5073, "step": 24274 }, { "epoch": 0.6665293794618341, "grad_norm": 2.5447514057159424, "learning_rate": 1.5027154892664406e-05, "loss": 0.5188, "step": 24275 }, { "epoch": 0.6665568369028007, "grad_norm": 0.34201720356941223, "learning_rate": 1.5026781536885955e-05, "loss": 0.4568, "step": 24276 }, { "epoch": 0.6665842943437672, "grad_norm": 0.5979773998260498, "learning_rate": 1.502640817173095e-05, "loss": 0.4701, "step": 24277 }, { "epoch": 0.6666117517847336, "grad_norm": 0.3147057890892029, "learning_rate": 1.502603479720009e-05, "loss": 0.4287, "step": 24278 }, { "epoch": 0.6666392092257002, "grad_norm": 0.5007888078689575, "learning_rate": 1.5025661413294073e-05, "loss": 0.4065, "step": 24279 }, { "epoch": 0.6666666666666666, "grad_norm": 0.40072354674339294, "learning_rate": 1.5025288020013588e-05, "loss": 0.4746, "step": 24280 }, { "epoch": 0.6666941241076332, "grad_norm": 0.46675577759742737, "learning_rate": 1.5024914617359341e-05, "loss": 0.5428, "step": 24281 }, { "epoch": 0.6667215815485996, "grad_norm": 0.4897279441356659, "learning_rate": 1.5024541205332024e-05, "loss": 0.6035, "step": 24282 }, { "epoch": 0.6667490389895662, "grad_norm": 0.3675212860107422, "learning_rate": 1.5024167783932336e-05, "loss": 0.4935, "step": 24283 }, { "epoch": 0.6667764964305327, "grad_norm": 0.4407653510570526, "learning_rate": 1.502379435316097e-05, "loss": 0.6273, "step": 24284 }, { "epoch": 0.6668039538714992, "grad_norm": 0.41590872406959534, "learning_rate": 1.5023420913018622e-05, "loss": 0.6079, "step": 24285 }, { "epoch": 0.6668314113124657, "grad_norm": 0.3302535116672516, "learning_rate": 1.5023047463505994e-05, "loss": 0.479, "step": 24286 }, { "epoch": 0.6668588687534321, "grad_norm": 0.41366657614707947, "learning_rate": 1.5022674004623773e-05, "loss": 0.5534, "step": 24287 }, { "epoch": 0.6668863261943987, "grad_norm": 0.3608993589878082, "learning_rate": 1.5022300536372669e-05, "loss": 0.4465, "step": 24288 }, { "epoch": 0.6669137836353651, "grad_norm": 0.37615910172462463, "learning_rate": 1.502192705875337e-05, "loss": 0.4666, "step": 24289 }, { "epoch": 0.6669412410763317, "grad_norm": 0.3864412009716034, "learning_rate": 1.5021553571766574e-05, "loss": 0.5269, "step": 24290 }, { "epoch": 0.6669686985172982, "grad_norm": 0.49482518434524536, "learning_rate": 1.5021180075412978e-05, "loss": 0.5628, "step": 24291 }, { "epoch": 0.6669961559582647, "grad_norm": 0.6335674524307251, "learning_rate": 1.5020806569693278e-05, "loss": 0.4642, "step": 24292 }, { "epoch": 0.6670236133992312, "grad_norm": 0.3880801796913147, "learning_rate": 1.5020433054608168e-05, "loss": 0.5422, "step": 24293 }, { "epoch": 0.6670510708401977, "grad_norm": 0.36019986867904663, "learning_rate": 1.5020059530158354e-05, "loss": 0.4627, "step": 24294 }, { "epoch": 0.6670785282811642, "grad_norm": 0.39428842067718506, "learning_rate": 1.5019685996344522e-05, "loss": 0.5287, "step": 24295 }, { "epoch": 0.6671059857221306, "grad_norm": 0.36318179965019226, "learning_rate": 1.5019312453167377e-05, "loss": 0.5142, "step": 24296 }, { "epoch": 0.6671334431630972, "grad_norm": 0.4445419907569885, "learning_rate": 1.501893890062761e-05, "loss": 0.5263, "step": 24297 }, { "epoch": 0.6671609006040637, "grad_norm": 0.4042070508003235, "learning_rate": 1.501856533872592e-05, "loss": 0.5395, "step": 24298 }, { "epoch": 0.6671883580450302, "grad_norm": 0.4005545675754547, "learning_rate": 1.5018191767463006e-05, "loss": 0.4429, "step": 24299 }, { "epoch": 0.6672158154859967, "grad_norm": 0.40798091888427734, "learning_rate": 1.501781818683956e-05, "loss": 0.4967, "step": 24300 }, { "epoch": 0.6672432729269632, "grad_norm": 0.38643401861190796, "learning_rate": 1.5017444596856283e-05, "loss": 0.5507, "step": 24301 }, { "epoch": 0.6672707303679297, "grad_norm": 0.3932947814464569, "learning_rate": 1.5017070997513871e-05, "loss": 0.5038, "step": 24302 }, { "epoch": 0.6672981878088962, "grad_norm": 0.3761448860168457, "learning_rate": 1.501669738881302e-05, "loss": 0.496, "step": 24303 }, { "epoch": 0.6673256452498627, "grad_norm": 0.3224492371082306, "learning_rate": 1.5016323770754429e-05, "loss": 0.4321, "step": 24304 }, { "epoch": 0.6673531026908293, "grad_norm": 0.38647928833961487, "learning_rate": 1.5015950143338791e-05, "loss": 0.413, "step": 24305 }, { "epoch": 0.6673805601317957, "grad_norm": 0.37665998935699463, "learning_rate": 1.5015576506566802e-05, "loss": 0.5239, "step": 24306 }, { "epoch": 0.6674080175727622, "grad_norm": 0.3810124695301056, "learning_rate": 1.5015202860439168e-05, "loss": 0.467, "step": 24307 }, { "epoch": 0.6674354750137287, "grad_norm": 0.41939398646354675, "learning_rate": 1.5014829204956576e-05, "loss": 0.6226, "step": 24308 }, { "epoch": 0.6674629324546952, "grad_norm": 0.33680832386016846, "learning_rate": 1.5014455540119727e-05, "loss": 0.4647, "step": 24309 }, { "epoch": 0.6674903898956617, "grad_norm": 0.378614604473114, "learning_rate": 1.5014081865929323e-05, "loss": 0.5379, "step": 24310 }, { "epoch": 0.6675178473366282, "grad_norm": 0.40570586919784546, "learning_rate": 1.5013708182386052e-05, "loss": 0.5612, "step": 24311 }, { "epoch": 0.6675453047775948, "grad_norm": 0.4285777509212494, "learning_rate": 1.5013334489490613e-05, "loss": 0.5702, "step": 24312 }, { "epoch": 0.6675727622185612, "grad_norm": 0.3776269555091858, "learning_rate": 1.5012960787243706e-05, "loss": 0.4954, "step": 24313 }, { "epoch": 0.6676002196595278, "grad_norm": 0.4037593603134155, "learning_rate": 1.501258707564603e-05, "loss": 0.4616, "step": 24314 }, { "epoch": 0.6676276771004942, "grad_norm": 0.3557252585887909, "learning_rate": 1.5012213354698278e-05, "loss": 0.4646, "step": 24315 }, { "epoch": 0.6676551345414607, "grad_norm": 0.390337735414505, "learning_rate": 1.5011839624401148e-05, "loss": 0.472, "step": 24316 }, { "epoch": 0.6676825919824272, "grad_norm": 0.41521748900413513, "learning_rate": 1.5011465884755339e-05, "loss": 0.4235, "step": 24317 }, { "epoch": 0.6677100494233937, "grad_norm": 0.8316264152526855, "learning_rate": 1.5011092135761542e-05, "loss": 0.5215, "step": 24318 }, { "epoch": 0.6677375068643603, "grad_norm": 0.43116140365600586, "learning_rate": 1.5010718377420466e-05, "loss": 0.4586, "step": 24319 }, { "epoch": 0.6677649643053267, "grad_norm": 0.3652154207229614, "learning_rate": 1.5010344609732796e-05, "loss": 0.4281, "step": 24320 }, { "epoch": 0.6677924217462933, "grad_norm": 0.4387313425540924, "learning_rate": 1.5009970832699234e-05, "loss": 0.4702, "step": 24321 }, { "epoch": 0.6678198791872597, "grad_norm": 0.3595532774925232, "learning_rate": 1.500959704632048e-05, "loss": 0.5312, "step": 24322 }, { "epoch": 0.6678473366282263, "grad_norm": 0.33936193585395813, "learning_rate": 1.500922325059723e-05, "loss": 0.5302, "step": 24323 }, { "epoch": 0.6678747940691927, "grad_norm": 0.3752181828022003, "learning_rate": 1.5008849445530174e-05, "loss": 0.5571, "step": 24324 }, { "epoch": 0.6679022515101593, "grad_norm": 0.34363141655921936, "learning_rate": 1.500847563112002e-05, "loss": 0.4118, "step": 24325 }, { "epoch": 0.6679297089511258, "grad_norm": 0.3626946210861206, "learning_rate": 1.5008101807367457e-05, "loss": 0.4215, "step": 24326 }, { "epoch": 0.6679571663920922, "grad_norm": 0.4208669066429138, "learning_rate": 1.5007727974273189e-05, "loss": 0.5038, "step": 24327 }, { "epoch": 0.6679846238330588, "grad_norm": 0.3616136610507965, "learning_rate": 1.500735413183791e-05, "loss": 0.4944, "step": 24328 }, { "epoch": 0.6680120812740252, "grad_norm": 0.4310722351074219, "learning_rate": 1.5006980280062315e-05, "loss": 0.6103, "step": 24329 }, { "epoch": 0.6680395387149918, "grad_norm": 0.3598319888114929, "learning_rate": 1.5006606418947105e-05, "loss": 0.4811, "step": 24330 }, { "epoch": 0.6680669961559582, "grad_norm": 0.3842642307281494, "learning_rate": 1.5006232548492974e-05, "loss": 0.4406, "step": 24331 }, { "epoch": 0.6680944535969248, "grad_norm": 0.38358089327812195, "learning_rate": 1.5005858668700626e-05, "loss": 0.5383, "step": 24332 }, { "epoch": 0.6681219110378913, "grad_norm": 0.33139070868492126, "learning_rate": 1.500548477957075e-05, "loss": 0.4286, "step": 24333 }, { "epoch": 0.6681493684788578, "grad_norm": 0.43517187237739563, "learning_rate": 1.5005110881104048e-05, "loss": 0.5903, "step": 24334 }, { "epoch": 0.6681768259198243, "grad_norm": 0.40206030011177063, "learning_rate": 1.5004736973301217e-05, "loss": 0.4927, "step": 24335 }, { "epoch": 0.6682042833607907, "grad_norm": 0.34805840253829956, "learning_rate": 1.5004363056162955e-05, "loss": 0.487, "step": 24336 }, { "epoch": 0.6682317408017573, "grad_norm": 0.38826602697372437, "learning_rate": 1.5003989129689957e-05, "loss": 0.4662, "step": 24337 }, { "epoch": 0.6682591982427237, "grad_norm": 0.41704583168029785, "learning_rate": 1.5003615193882925e-05, "loss": 0.5347, "step": 24338 }, { "epoch": 0.6682866556836903, "grad_norm": 0.3648903965950012, "learning_rate": 1.5003241248742551e-05, "loss": 0.5107, "step": 24339 }, { "epoch": 0.6683141131246568, "grad_norm": 0.3982028365135193, "learning_rate": 1.5002867294269537e-05, "loss": 0.5505, "step": 24340 }, { "epoch": 0.6683415705656233, "grad_norm": 0.40073609352111816, "learning_rate": 1.500249333046458e-05, "loss": 0.5213, "step": 24341 }, { "epoch": 0.6683690280065898, "grad_norm": 0.40653330087661743, "learning_rate": 1.5002119357328372e-05, "loss": 0.5462, "step": 24342 }, { "epoch": 0.6683964854475563, "grad_norm": 0.37435877323150635, "learning_rate": 1.5001745374861618e-05, "loss": 0.4803, "step": 24343 }, { "epoch": 0.6684239428885228, "grad_norm": 0.4322631359100342, "learning_rate": 1.5001371383065012e-05, "loss": 0.4894, "step": 24344 }, { "epoch": 0.6684514003294892, "grad_norm": 0.36270809173583984, "learning_rate": 1.5000997381939253e-05, "loss": 0.5413, "step": 24345 }, { "epoch": 0.6684788577704558, "grad_norm": 0.41866815090179443, "learning_rate": 1.5000623371485037e-05, "loss": 0.5551, "step": 24346 }, { "epoch": 0.6685063152114223, "grad_norm": 0.37811413407325745, "learning_rate": 1.500024935170306e-05, "loss": 0.4978, "step": 24347 }, { "epoch": 0.6685337726523888, "grad_norm": 0.4079609811306, "learning_rate": 1.4999875322594026e-05, "loss": 0.5723, "step": 24348 }, { "epoch": 0.6685612300933553, "grad_norm": 0.48508164286613464, "learning_rate": 1.4999501284158626e-05, "loss": 0.6176, "step": 24349 }, { "epoch": 0.6685886875343218, "grad_norm": 0.4689445495605469, "learning_rate": 1.4999127236397562e-05, "loss": 0.4416, "step": 24350 }, { "epoch": 0.6686161449752883, "grad_norm": 0.3810408115386963, "learning_rate": 1.499875317931153e-05, "loss": 0.4832, "step": 24351 }, { "epoch": 0.6686436024162548, "grad_norm": 0.3728795349597931, "learning_rate": 1.4998379112901226e-05, "loss": 0.5562, "step": 24352 }, { "epoch": 0.6686710598572213, "grad_norm": 0.43987923860549927, "learning_rate": 1.4998005037167351e-05, "loss": 0.5487, "step": 24353 }, { "epoch": 0.6686985172981879, "grad_norm": 0.3972318768501282, "learning_rate": 1.4997630952110604e-05, "loss": 0.5483, "step": 24354 }, { "epoch": 0.6687259747391543, "grad_norm": 0.3873419463634491, "learning_rate": 1.4997256857731677e-05, "loss": 0.5411, "step": 24355 }, { "epoch": 0.6687534321801208, "grad_norm": 0.3939681351184845, "learning_rate": 1.4996882754031272e-05, "loss": 0.5142, "step": 24356 }, { "epoch": 0.6687808896210873, "grad_norm": 0.5004730224609375, "learning_rate": 1.4996508641010085e-05, "loss": 0.5284, "step": 24357 }, { "epoch": 0.6688083470620538, "grad_norm": 0.3302323520183563, "learning_rate": 1.4996134518668814e-05, "loss": 0.4774, "step": 24358 }, { "epoch": 0.6688358045030203, "grad_norm": 0.3861036002635956, "learning_rate": 1.4995760387008163e-05, "loss": 0.4676, "step": 24359 }, { "epoch": 0.6688632619439868, "grad_norm": 0.3524801731109619, "learning_rate": 1.4995386246028818e-05, "loss": 0.5578, "step": 24360 }, { "epoch": 0.6688907193849534, "grad_norm": 0.4333026111125946, "learning_rate": 1.4995012095731487e-05, "loss": 0.5156, "step": 24361 }, { "epoch": 0.6689181768259198, "grad_norm": 0.42605918645858765, "learning_rate": 1.499463793611686e-05, "loss": 0.5665, "step": 24362 }, { "epoch": 0.6689456342668864, "grad_norm": 0.3261209726333618, "learning_rate": 1.4994263767185642e-05, "loss": 0.4203, "step": 24363 }, { "epoch": 0.6689730917078528, "grad_norm": 0.3765048086643219, "learning_rate": 1.499388958893853e-05, "loss": 0.5693, "step": 24364 }, { "epoch": 0.6690005491488193, "grad_norm": 0.41863781213760376, "learning_rate": 1.4993515401376217e-05, "loss": 0.5141, "step": 24365 }, { "epoch": 0.6690280065897858, "grad_norm": 0.43369126319885254, "learning_rate": 1.4993141204499406e-05, "loss": 0.475, "step": 24366 }, { "epoch": 0.6690554640307523, "grad_norm": 0.36454156041145325, "learning_rate": 1.4992766998308792e-05, "loss": 0.5366, "step": 24367 }, { "epoch": 0.6690829214717189, "grad_norm": 0.4030226469039917, "learning_rate": 1.4992392782805074e-05, "loss": 0.5922, "step": 24368 }, { "epoch": 0.6691103789126853, "grad_norm": 0.4193758964538574, "learning_rate": 1.499201855798895e-05, "loss": 0.5467, "step": 24369 }, { "epoch": 0.6691378363536519, "grad_norm": 0.4341091811656952, "learning_rate": 1.4991644323861119e-05, "loss": 0.5709, "step": 24370 }, { "epoch": 0.6691652937946183, "grad_norm": 0.4614722430706024, "learning_rate": 1.4991270080422278e-05, "loss": 0.3962, "step": 24371 }, { "epoch": 0.6691927512355849, "grad_norm": 0.41258805990219116, "learning_rate": 1.4990895827673124e-05, "loss": 0.5031, "step": 24372 }, { "epoch": 0.6692202086765513, "grad_norm": 0.39752405881881714, "learning_rate": 1.4990521565614357e-05, "loss": 0.4428, "step": 24373 }, { "epoch": 0.6692476661175178, "grad_norm": 0.3943370282649994, "learning_rate": 1.4990147294246675e-05, "loss": 0.5469, "step": 24374 }, { "epoch": 0.6692751235584844, "grad_norm": 0.39323973655700684, "learning_rate": 1.4989773013570773e-05, "loss": 0.5471, "step": 24375 }, { "epoch": 0.6693025809994508, "grad_norm": 0.4107409417629242, "learning_rate": 1.4989398723587357e-05, "loss": 0.4745, "step": 24376 }, { "epoch": 0.6693300384404174, "grad_norm": 0.45797106623649597, "learning_rate": 1.4989024424297118e-05, "loss": 0.5897, "step": 24377 }, { "epoch": 0.6693574958813838, "grad_norm": 0.4222685396671295, "learning_rate": 1.4988650115700754e-05, "loss": 0.5418, "step": 24378 }, { "epoch": 0.6693849533223504, "grad_norm": 0.4189116060733795, "learning_rate": 1.4988275797798966e-05, "loss": 0.503, "step": 24379 }, { "epoch": 0.6694124107633168, "grad_norm": 0.36321568489074707, "learning_rate": 1.4987901470592453e-05, "loss": 0.485, "step": 24380 }, { "epoch": 0.6694398682042834, "grad_norm": 0.44860923290252686, "learning_rate": 1.498752713408191e-05, "loss": 0.5239, "step": 24381 }, { "epoch": 0.6694673256452499, "grad_norm": 0.40222999453544617, "learning_rate": 1.4987152788268037e-05, "loss": 0.427, "step": 24382 }, { "epoch": 0.6694947830862163, "grad_norm": 0.4308027923107147, "learning_rate": 1.4986778433151534e-05, "loss": 0.5102, "step": 24383 }, { "epoch": 0.6695222405271829, "grad_norm": 0.3822615146636963, "learning_rate": 1.4986404068733099e-05, "loss": 0.4638, "step": 24384 }, { "epoch": 0.6695496979681493, "grad_norm": 0.3790890872478485, "learning_rate": 1.4986029695013426e-05, "loss": 0.4502, "step": 24385 }, { "epoch": 0.6695771554091159, "grad_norm": 0.3913233280181885, "learning_rate": 1.4985655311993217e-05, "loss": 0.5257, "step": 24386 }, { "epoch": 0.6696046128500823, "grad_norm": 0.4571707844734192, "learning_rate": 1.4985280919673169e-05, "loss": 0.48, "step": 24387 }, { "epoch": 0.6696320702910489, "grad_norm": 0.37993302941322327, "learning_rate": 1.4984906518053981e-05, "loss": 0.472, "step": 24388 }, { "epoch": 0.6696595277320154, "grad_norm": 0.34993407130241394, "learning_rate": 1.4984532107136354e-05, "loss": 0.4845, "step": 24389 }, { "epoch": 0.6696869851729819, "grad_norm": 0.3929523825645447, "learning_rate": 1.4984157686920981e-05, "loss": 0.4319, "step": 24390 }, { "epoch": 0.6697144426139484, "grad_norm": 0.3905077278614044, "learning_rate": 1.4983783257408565e-05, "loss": 0.5104, "step": 24391 }, { "epoch": 0.6697419000549149, "grad_norm": 0.36483293771743774, "learning_rate": 1.4983408818599801e-05, "loss": 0.4944, "step": 24392 }, { "epoch": 0.6697693574958814, "grad_norm": 0.35506492853164673, "learning_rate": 1.498303437049539e-05, "loss": 0.5292, "step": 24393 }, { "epoch": 0.6697968149368478, "grad_norm": 0.3880969285964966, "learning_rate": 1.498265991309603e-05, "loss": 0.4419, "step": 24394 }, { "epoch": 0.6698242723778144, "grad_norm": 0.3575671315193176, "learning_rate": 1.4982285446402416e-05, "loss": 0.4682, "step": 24395 }, { "epoch": 0.6698517298187809, "grad_norm": 0.3916985094547272, "learning_rate": 1.498191097041525e-05, "loss": 0.4762, "step": 24396 }, { "epoch": 0.6698791872597474, "grad_norm": 0.31731340289115906, "learning_rate": 1.4981536485135233e-05, "loss": 0.4522, "step": 24397 }, { "epoch": 0.6699066447007139, "grad_norm": 0.3951798677444458, "learning_rate": 1.498116199056306e-05, "loss": 0.495, "step": 24398 }, { "epoch": 0.6699341021416804, "grad_norm": 0.3886653482913971, "learning_rate": 1.498078748669943e-05, "loss": 0.5202, "step": 24399 }, { "epoch": 0.6699615595826469, "grad_norm": 0.3919002115726471, "learning_rate": 1.498041297354504e-05, "loss": 0.5527, "step": 24400 }, { "epoch": 0.6699890170236134, "grad_norm": 0.37803444266319275, "learning_rate": 1.4980038451100589e-05, "loss": 0.4434, "step": 24401 }, { "epoch": 0.6700164744645799, "grad_norm": 0.3771652579307556, "learning_rate": 1.497966391936678e-05, "loss": 0.4572, "step": 24402 }, { "epoch": 0.6700439319055465, "grad_norm": 0.33237597346305847, "learning_rate": 1.4979289378344309e-05, "loss": 0.4936, "step": 24403 }, { "epoch": 0.6700713893465129, "grad_norm": 0.37814798951148987, "learning_rate": 1.4978914828033871e-05, "loss": 0.5242, "step": 24404 }, { "epoch": 0.6700988467874794, "grad_norm": 0.4509678781032562, "learning_rate": 1.4978540268436171e-05, "loss": 0.511, "step": 24405 }, { "epoch": 0.6701263042284459, "grad_norm": 0.39572659134864807, "learning_rate": 1.4978165699551901e-05, "loss": 0.427, "step": 24406 }, { "epoch": 0.6701537616694124, "grad_norm": 0.32596614956855774, "learning_rate": 1.4977791121381766e-05, "loss": 0.4426, "step": 24407 }, { "epoch": 0.6701812191103789, "grad_norm": 0.3713546097278595, "learning_rate": 1.4977416533926461e-05, "loss": 0.4539, "step": 24408 }, { "epoch": 0.6702086765513454, "grad_norm": 0.44320282340049744, "learning_rate": 1.4977041937186683e-05, "loss": 0.5377, "step": 24409 }, { "epoch": 0.670236133992312, "grad_norm": 0.36684906482696533, "learning_rate": 1.4976667331163138e-05, "loss": 0.4875, "step": 24410 }, { "epoch": 0.6702635914332784, "grad_norm": 0.3943157494068146, "learning_rate": 1.4976292715856515e-05, "loss": 0.5146, "step": 24411 }, { "epoch": 0.670291048874245, "grad_norm": 0.8033331632614136, "learning_rate": 1.497591809126752e-05, "loss": 0.4514, "step": 24412 }, { "epoch": 0.6703185063152114, "grad_norm": 0.3910847008228302, "learning_rate": 1.497554345739685e-05, "loss": 0.5179, "step": 24413 }, { "epoch": 0.6703459637561779, "grad_norm": 0.33453473448753357, "learning_rate": 1.4975168814245204e-05, "loss": 0.4003, "step": 24414 }, { "epoch": 0.6703734211971444, "grad_norm": 0.37554192543029785, "learning_rate": 1.497479416181328e-05, "loss": 0.5238, "step": 24415 }, { "epoch": 0.6704008786381109, "grad_norm": 0.38779062032699585, "learning_rate": 1.4974419500101778e-05, "loss": 0.5791, "step": 24416 }, { "epoch": 0.6704283360790775, "grad_norm": 0.41506287455558777, "learning_rate": 1.4974044829111393e-05, "loss": 0.5785, "step": 24417 }, { "epoch": 0.6704557935200439, "grad_norm": 0.3559380769729614, "learning_rate": 1.497367014884283e-05, "loss": 0.4849, "step": 24418 }, { "epoch": 0.6704832509610105, "grad_norm": 0.4134364426136017, "learning_rate": 1.4973295459296781e-05, "loss": 0.5252, "step": 24419 }, { "epoch": 0.6705107084019769, "grad_norm": 0.42875584959983826, "learning_rate": 1.4972920760473954e-05, "loss": 0.4843, "step": 24420 }, { "epoch": 0.6705381658429435, "grad_norm": 0.39449357986450195, "learning_rate": 1.497254605237504e-05, "loss": 0.531, "step": 24421 }, { "epoch": 0.6705656232839099, "grad_norm": 0.34989890456199646, "learning_rate": 1.4972171335000737e-05, "loss": 0.4463, "step": 24422 }, { "epoch": 0.6705930807248764, "grad_norm": 0.3672311007976532, "learning_rate": 1.4971796608351753e-05, "loss": 0.4691, "step": 24423 }, { "epoch": 0.670620538165843, "grad_norm": 0.38913196325302124, "learning_rate": 1.497142187242878e-05, "loss": 0.4898, "step": 24424 }, { "epoch": 0.6706479956068094, "grad_norm": 0.43850991129875183, "learning_rate": 1.4971047127232517e-05, "loss": 0.5865, "step": 24425 }, { "epoch": 0.670675453047776, "grad_norm": 0.3611348867416382, "learning_rate": 1.4970672372763666e-05, "loss": 0.4693, "step": 24426 }, { "epoch": 0.6707029104887424, "grad_norm": 0.40363195538520813, "learning_rate": 1.4970297609022923e-05, "loss": 0.4555, "step": 24427 }, { "epoch": 0.670730367929709, "grad_norm": 0.36001938581466675, "learning_rate": 1.4969922836010991e-05, "loss": 0.4449, "step": 24428 }, { "epoch": 0.6707578253706754, "grad_norm": 0.4865620732307434, "learning_rate": 1.4969548053728565e-05, "loss": 0.4578, "step": 24429 }, { "epoch": 0.670785282811642, "grad_norm": 0.35550743341445923, "learning_rate": 1.4969173262176347e-05, "loss": 0.4586, "step": 24430 }, { "epoch": 0.6708127402526085, "grad_norm": 0.36922210454940796, "learning_rate": 1.4968798461355032e-05, "loss": 0.5186, "step": 24431 }, { "epoch": 0.670840197693575, "grad_norm": 0.3907538056373596, "learning_rate": 1.4968423651265322e-05, "loss": 0.5083, "step": 24432 }, { "epoch": 0.6708676551345415, "grad_norm": 0.38454726338386536, "learning_rate": 1.4968048831907921e-05, "loss": 0.5063, "step": 24433 }, { "epoch": 0.6708951125755079, "grad_norm": 0.3746526837348938, "learning_rate": 1.496767400328352e-05, "loss": 0.5778, "step": 24434 }, { "epoch": 0.6709225700164745, "grad_norm": 0.4165246784687042, "learning_rate": 1.4967299165392821e-05, "loss": 0.488, "step": 24435 }, { "epoch": 0.6709500274574409, "grad_norm": 0.4082239270210266, "learning_rate": 1.4966924318236524e-05, "loss": 0.4991, "step": 24436 }, { "epoch": 0.6709774848984075, "grad_norm": 0.39807015657424927, "learning_rate": 1.496654946181533e-05, "loss": 0.569, "step": 24437 }, { "epoch": 0.671004942339374, "grad_norm": 0.3520061671733856, "learning_rate": 1.4966174596129935e-05, "loss": 0.5158, "step": 24438 }, { "epoch": 0.6710323997803405, "grad_norm": 0.4027259349822998, "learning_rate": 1.4965799721181038e-05, "loss": 0.5252, "step": 24439 }, { "epoch": 0.671059857221307, "grad_norm": 0.3892516493797302, "learning_rate": 1.496542483696934e-05, "loss": 0.4177, "step": 24440 }, { "epoch": 0.6710873146622734, "grad_norm": 0.4236079752445221, "learning_rate": 1.496504994349554e-05, "loss": 0.4811, "step": 24441 }, { "epoch": 0.67111477210324, "grad_norm": 0.4508568346500397, "learning_rate": 1.496467504076034e-05, "loss": 0.5621, "step": 24442 }, { "epoch": 0.6711422295442064, "grad_norm": 0.40316540002822876, "learning_rate": 1.496430012876443e-05, "loss": 0.5559, "step": 24443 }, { "epoch": 0.671169686985173, "grad_norm": 0.3428434431552887, "learning_rate": 1.4963925207508523e-05, "loss": 0.5307, "step": 24444 }, { "epoch": 0.6711971444261395, "grad_norm": 0.45570889115333557, "learning_rate": 1.4963550276993308e-05, "loss": 0.545, "step": 24445 }, { "epoch": 0.671224601867106, "grad_norm": 0.41465601325035095, "learning_rate": 1.4963175337219487e-05, "loss": 0.5224, "step": 24446 }, { "epoch": 0.6712520593080725, "grad_norm": 0.37135088443756104, "learning_rate": 1.4962800388187761e-05, "loss": 0.5557, "step": 24447 }, { "epoch": 0.671279516749039, "grad_norm": 0.3866599500179291, "learning_rate": 1.4962425429898826e-05, "loss": 0.4375, "step": 24448 }, { "epoch": 0.6713069741900055, "grad_norm": 0.48393958806991577, "learning_rate": 1.4962050462353388e-05, "loss": 0.485, "step": 24449 }, { "epoch": 0.671334431630972, "grad_norm": 0.4009033143520355, "learning_rate": 1.4961675485552141e-05, "loss": 0.4335, "step": 24450 }, { "epoch": 0.6713618890719385, "grad_norm": 0.4112264811992645, "learning_rate": 1.4961300499495783e-05, "loss": 0.4858, "step": 24451 }, { "epoch": 0.671389346512905, "grad_norm": 0.40976840257644653, "learning_rate": 1.4960925504185018e-05, "loss": 0.4333, "step": 24452 }, { "epoch": 0.6714168039538715, "grad_norm": 0.39261680841445923, "learning_rate": 1.4960550499620543e-05, "loss": 0.474, "step": 24453 }, { "epoch": 0.671444261394838, "grad_norm": 0.38440343737602234, "learning_rate": 1.496017548580306e-05, "loss": 0.5004, "step": 24454 }, { "epoch": 0.6714717188358045, "grad_norm": 0.4346105456352234, "learning_rate": 1.4959800462733264e-05, "loss": 0.5456, "step": 24455 }, { "epoch": 0.671499176276771, "grad_norm": 0.39084070920944214, "learning_rate": 1.4959425430411858e-05, "loss": 0.5208, "step": 24456 }, { "epoch": 0.6715266337177375, "grad_norm": 0.39274176955223083, "learning_rate": 1.495905038883954e-05, "loss": 0.4488, "step": 24457 }, { "epoch": 0.671554091158704, "grad_norm": 0.4990578591823578, "learning_rate": 1.495867533801701e-05, "loss": 0.4589, "step": 24458 }, { "epoch": 0.6715815485996706, "grad_norm": 0.398885577917099, "learning_rate": 1.495830027794497e-05, "loss": 0.4637, "step": 24459 }, { "epoch": 0.671609006040637, "grad_norm": 0.35534748435020447, "learning_rate": 1.4957925208624118e-05, "loss": 0.4969, "step": 24460 }, { "epoch": 0.6716364634816036, "grad_norm": 0.34399113059043884, "learning_rate": 1.495755013005515e-05, "loss": 0.4393, "step": 24461 }, { "epoch": 0.67166392092257, "grad_norm": 0.3440386950969696, "learning_rate": 1.495717504223877e-05, "loss": 0.4292, "step": 24462 }, { "epoch": 0.6716913783635365, "grad_norm": 0.3939066529273987, "learning_rate": 1.4956799945175674e-05, "loss": 0.491, "step": 24463 }, { "epoch": 0.671718835804503, "grad_norm": 0.34361159801483154, "learning_rate": 1.495642483886657e-05, "loss": 0.4754, "step": 24464 }, { "epoch": 0.6717462932454695, "grad_norm": 0.8950289487838745, "learning_rate": 1.495604972331215e-05, "loss": 0.4859, "step": 24465 }, { "epoch": 0.6717737506864361, "grad_norm": 0.38327756524086, "learning_rate": 1.4955674598513113e-05, "loss": 0.4449, "step": 24466 }, { "epoch": 0.6718012081274025, "grad_norm": 0.41009992361068726, "learning_rate": 1.4955299464470164e-05, "loss": 0.4658, "step": 24467 }, { "epoch": 0.6718286655683691, "grad_norm": 0.38031160831451416, "learning_rate": 1.4954924321183994e-05, "loss": 0.4434, "step": 24468 }, { "epoch": 0.6718561230093355, "grad_norm": 0.4130900204181671, "learning_rate": 1.4954549168655315e-05, "loss": 0.5147, "step": 24469 }, { "epoch": 0.671883580450302, "grad_norm": 0.38960033655166626, "learning_rate": 1.4954174006884818e-05, "loss": 0.4447, "step": 24470 }, { "epoch": 0.6719110378912685, "grad_norm": 0.36396655440330505, "learning_rate": 1.4953798835873205e-05, "loss": 0.4915, "step": 24471 }, { "epoch": 0.671938495332235, "grad_norm": 0.4284327030181885, "learning_rate": 1.4953423655621178e-05, "loss": 0.4925, "step": 24472 }, { "epoch": 0.6719659527732016, "grad_norm": 0.4440813660621643, "learning_rate": 1.4953048466129434e-05, "loss": 0.503, "step": 24473 }, { "epoch": 0.671993410214168, "grad_norm": 0.4893396496772766, "learning_rate": 1.4952673267398674e-05, "loss": 0.5462, "step": 24474 }, { "epoch": 0.6720208676551346, "grad_norm": 0.3778676688671112, "learning_rate": 1.4952298059429597e-05, "loss": 0.4204, "step": 24475 }, { "epoch": 0.672048325096101, "grad_norm": 0.4116414785385132, "learning_rate": 1.4951922842222901e-05, "loss": 0.4551, "step": 24476 }, { "epoch": 0.6720757825370676, "grad_norm": 0.3751869201660156, "learning_rate": 1.4951547615779294e-05, "loss": 0.4486, "step": 24477 }, { "epoch": 0.672103239978034, "grad_norm": 0.40163102746009827, "learning_rate": 1.4951172380099467e-05, "loss": 0.5748, "step": 24478 }, { "epoch": 0.6721306974190006, "grad_norm": 0.34393271803855896, "learning_rate": 1.4950797135184123e-05, "loss": 0.4346, "step": 24479 }, { "epoch": 0.6721581548599671, "grad_norm": 0.4224848449230194, "learning_rate": 1.4950421881033964e-05, "loss": 0.5778, "step": 24480 }, { "epoch": 0.6721856123009335, "grad_norm": 0.4111120104789734, "learning_rate": 1.4950046617649684e-05, "loss": 0.4551, "step": 24481 }, { "epoch": 0.6722130697419001, "grad_norm": 0.7008498311042786, "learning_rate": 1.4949671345031992e-05, "loss": 0.5095, "step": 24482 }, { "epoch": 0.6722405271828665, "grad_norm": 0.3964710831642151, "learning_rate": 1.4949296063181584e-05, "loss": 0.5372, "step": 24483 }, { "epoch": 0.6722679846238331, "grad_norm": 0.3755912482738495, "learning_rate": 1.4948920772099156e-05, "loss": 0.5495, "step": 24484 }, { "epoch": 0.6722954420647995, "grad_norm": 0.3712335526943207, "learning_rate": 1.4948545471785411e-05, "loss": 0.4229, "step": 24485 }, { "epoch": 0.6723228995057661, "grad_norm": 0.3628402352333069, "learning_rate": 1.494817016224105e-05, "loss": 0.4856, "step": 24486 }, { "epoch": 0.6723503569467326, "grad_norm": 0.3742866516113281, "learning_rate": 1.4947794843466774e-05, "loss": 0.5166, "step": 24487 }, { "epoch": 0.6723778143876991, "grad_norm": 0.37783434987068176, "learning_rate": 1.494741951546328e-05, "loss": 0.5001, "step": 24488 }, { "epoch": 0.6724052718286656, "grad_norm": 0.42667174339294434, "learning_rate": 1.494704417823127e-05, "loss": 0.5584, "step": 24489 }, { "epoch": 0.672432729269632, "grad_norm": 0.33719876408576965, "learning_rate": 1.4946668831771443e-05, "loss": 0.5239, "step": 24490 }, { "epoch": 0.6724601867105986, "grad_norm": 0.37199804186820984, "learning_rate": 1.4946293476084501e-05, "loss": 0.4333, "step": 24491 }, { "epoch": 0.672487644151565, "grad_norm": 0.3581751883029938, "learning_rate": 1.4945918111171141e-05, "loss": 0.5307, "step": 24492 }, { "epoch": 0.6725151015925316, "grad_norm": 0.37696823477745056, "learning_rate": 1.4945542737032066e-05, "loss": 0.5229, "step": 24493 }, { "epoch": 0.6725425590334981, "grad_norm": 0.3762693703174591, "learning_rate": 1.4945167353667975e-05, "loss": 0.5006, "step": 24494 }, { "epoch": 0.6725700164744646, "grad_norm": 0.4108847975730896, "learning_rate": 1.494479196107957e-05, "loss": 0.4629, "step": 24495 }, { "epoch": 0.6725974739154311, "grad_norm": 0.4466383159160614, "learning_rate": 1.494441655926755e-05, "loss": 0.5142, "step": 24496 }, { "epoch": 0.6726249313563976, "grad_norm": 0.39571741223335266, "learning_rate": 1.4944041148232612e-05, "loss": 0.5531, "step": 24497 }, { "epoch": 0.6726523887973641, "grad_norm": 0.4102534353733063, "learning_rate": 1.4943665727975463e-05, "loss": 0.4842, "step": 24498 }, { "epoch": 0.6726798462383305, "grad_norm": 0.39369282126426697, "learning_rate": 1.4943290298496798e-05, "loss": 0.5472, "step": 24499 }, { "epoch": 0.6727073036792971, "grad_norm": 0.3298470973968506, "learning_rate": 1.494291485979732e-05, "loss": 0.4777, "step": 24500 }, { "epoch": 0.6727347611202636, "grad_norm": 0.5239458084106445, "learning_rate": 1.4942539411877727e-05, "loss": 0.5037, "step": 24501 }, { "epoch": 0.6727622185612301, "grad_norm": 0.31752511858940125, "learning_rate": 1.494216395473872e-05, "loss": 0.4761, "step": 24502 }, { "epoch": 0.6727896760021966, "grad_norm": 0.3949616849422455, "learning_rate": 1.4941788488381002e-05, "loss": 0.5349, "step": 24503 }, { "epoch": 0.6728171334431631, "grad_norm": 0.38686633110046387, "learning_rate": 1.4941413012805267e-05, "loss": 0.5575, "step": 24504 }, { "epoch": 0.6728445908841296, "grad_norm": 0.3982712924480438, "learning_rate": 1.4941037528012224e-05, "loss": 0.5018, "step": 24505 }, { "epoch": 0.6728720483250961, "grad_norm": 0.3536010980606079, "learning_rate": 1.4940662034002569e-05, "loss": 0.4696, "step": 24506 }, { "epoch": 0.6728995057660626, "grad_norm": 0.40320783853530884, "learning_rate": 1.4940286530776998e-05, "loss": 0.4809, "step": 24507 }, { "epoch": 0.672926963207029, "grad_norm": 0.3977469503879547, "learning_rate": 1.493991101833622e-05, "loss": 0.5186, "step": 24508 }, { "epoch": 0.6729544206479956, "grad_norm": 0.34845396876335144, "learning_rate": 1.493953549668093e-05, "loss": 0.4973, "step": 24509 }, { "epoch": 0.6729818780889621, "grad_norm": 0.37814903259277344, "learning_rate": 1.4939159965811829e-05, "loss": 0.524, "step": 24510 }, { "epoch": 0.6730093355299286, "grad_norm": 0.35990217328071594, "learning_rate": 1.493878442572962e-05, "loss": 0.4931, "step": 24511 }, { "epoch": 0.6730367929708951, "grad_norm": 0.3558020293712616, "learning_rate": 1.4938408876435e-05, "loss": 0.4619, "step": 24512 }, { "epoch": 0.6730642504118616, "grad_norm": 0.36216166615486145, "learning_rate": 1.4938033317928672e-05, "loss": 0.4477, "step": 24513 }, { "epoch": 0.6730917078528281, "grad_norm": 0.3496515452861786, "learning_rate": 1.4937657750211339e-05, "loss": 0.4388, "step": 24514 }, { "epoch": 0.6731191652937946, "grad_norm": 0.4190749228000641, "learning_rate": 1.4937282173283692e-05, "loss": 0.4998, "step": 24515 }, { "epoch": 0.6731466227347611, "grad_norm": 0.37252727150917053, "learning_rate": 1.4936906587146443e-05, "loss": 0.48, "step": 24516 }, { "epoch": 0.6731740801757277, "grad_norm": 0.430812269449234, "learning_rate": 1.4936530991800284e-05, "loss": 0.4833, "step": 24517 }, { "epoch": 0.6732015376166941, "grad_norm": 0.3485341966152191, "learning_rate": 1.4936155387245921e-05, "loss": 0.4742, "step": 24518 }, { "epoch": 0.6732289950576607, "grad_norm": 0.3503608703613281, "learning_rate": 1.4935779773484054e-05, "loss": 0.5123, "step": 24519 }, { "epoch": 0.6732564524986271, "grad_norm": 0.3595551550388336, "learning_rate": 1.493540415051538e-05, "loss": 0.4135, "step": 24520 }, { "epoch": 0.6732839099395936, "grad_norm": 0.5299739241600037, "learning_rate": 1.4935028518340604e-05, "loss": 0.5142, "step": 24521 }, { "epoch": 0.6733113673805601, "grad_norm": 0.4050653278827667, "learning_rate": 1.4934652876960425e-05, "loss": 0.541, "step": 24522 }, { "epoch": 0.6733388248215266, "grad_norm": 0.364177942276001, "learning_rate": 1.493427722637554e-05, "loss": 0.5486, "step": 24523 }, { "epoch": 0.6733662822624932, "grad_norm": 0.3974637985229492, "learning_rate": 1.4933901566586658e-05, "loss": 0.4585, "step": 24524 }, { "epoch": 0.6733937397034596, "grad_norm": 0.36399203538894653, "learning_rate": 1.493352589759447e-05, "loss": 0.3959, "step": 24525 }, { "epoch": 0.6734211971444262, "grad_norm": 0.43902236223220825, "learning_rate": 1.4933150219399686e-05, "loss": 0.4614, "step": 24526 }, { "epoch": 0.6734486545853926, "grad_norm": 0.4430224299430847, "learning_rate": 1.4932774532003001e-05, "loss": 0.5336, "step": 24527 }, { "epoch": 0.6734761120263592, "grad_norm": 0.37345507740974426, "learning_rate": 1.4932398835405115e-05, "loss": 0.5204, "step": 24528 }, { "epoch": 0.6735035694673256, "grad_norm": 0.4007030129432678, "learning_rate": 1.4932023129606732e-05, "loss": 0.511, "step": 24529 }, { "epoch": 0.6735310269082921, "grad_norm": 0.40268856287002563, "learning_rate": 1.4931647414608551e-05, "loss": 0.5501, "step": 24530 }, { "epoch": 0.6735584843492587, "grad_norm": 0.46880510449409485, "learning_rate": 1.4931271690411276e-05, "loss": 0.519, "step": 24531 }, { "epoch": 0.6735859417902251, "grad_norm": 0.47139108180999756, "learning_rate": 1.4930895957015606e-05, "loss": 0.5584, "step": 24532 }, { "epoch": 0.6736133992311917, "grad_norm": 0.40465277433395386, "learning_rate": 1.4930520214422237e-05, "loss": 0.5799, "step": 24533 }, { "epoch": 0.6736408566721581, "grad_norm": 0.332698792219162, "learning_rate": 1.4930144462631877e-05, "loss": 0.3716, "step": 24534 }, { "epoch": 0.6736683141131247, "grad_norm": 0.4443698227405548, "learning_rate": 1.4929768701645222e-05, "loss": 0.5167, "step": 24535 }, { "epoch": 0.6736957715540911, "grad_norm": 0.3921014666557312, "learning_rate": 1.4929392931462979e-05, "loss": 0.4969, "step": 24536 }, { "epoch": 0.6737232289950577, "grad_norm": 0.3766387104988098, "learning_rate": 1.4929017152085843e-05, "loss": 0.606, "step": 24537 }, { "epoch": 0.6737506864360242, "grad_norm": 0.34511616826057434, "learning_rate": 1.4928641363514513e-05, "loss": 0.5163, "step": 24538 }, { "epoch": 0.6737781438769906, "grad_norm": 0.37016546726226807, "learning_rate": 1.4928265565749697e-05, "loss": 0.5239, "step": 24539 }, { "epoch": 0.6738056013179572, "grad_norm": 0.4538094699382782, "learning_rate": 1.4927889758792095e-05, "loss": 0.612, "step": 24540 }, { "epoch": 0.6738330587589236, "grad_norm": 0.404949814081192, "learning_rate": 1.4927513942642402e-05, "loss": 0.4993, "step": 24541 }, { "epoch": 0.6738605161998902, "grad_norm": 0.37416964769363403, "learning_rate": 1.4927138117301328e-05, "loss": 0.451, "step": 24542 }, { "epoch": 0.6738879736408566, "grad_norm": 0.4025149345397949, "learning_rate": 1.4926762282769563e-05, "loss": 0.5023, "step": 24543 }, { "epoch": 0.6739154310818232, "grad_norm": 0.3611920177936554, "learning_rate": 1.4926386439047817e-05, "loss": 0.5549, "step": 24544 }, { "epoch": 0.6739428885227897, "grad_norm": 0.3956725001335144, "learning_rate": 1.4926010586136787e-05, "loss": 0.4103, "step": 24545 }, { "epoch": 0.6739703459637562, "grad_norm": 0.39659959077835083, "learning_rate": 1.4925634724037175e-05, "loss": 0.5993, "step": 24546 }, { "epoch": 0.6739978034047227, "grad_norm": 0.3755542039871216, "learning_rate": 1.4925258852749682e-05, "loss": 0.4324, "step": 24547 }, { "epoch": 0.6740252608456891, "grad_norm": 0.3986935019493103, "learning_rate": 1.492488297227501e-05, "loss": 0.5585, "step": 24548 }, { "epoch": 0.6740527182866557, "grad_norm": 0.4030296206474304, "learning_rate": 1.492450708261386e-05, "loss": 0.5238, "step": 24549 }, { "epoch": 0.6740801757276221, "grad_norm": 0.35807281732559204, "learning_rate": 1.4924131183766932e-05, "loss": 0.4775, "step": 24550 }, { "epoch": 0.6741076331685887, "grad_norm": 0.3918784558773041, "learning_rate": 1.4923755275734926e-05, "loss": 0.4318, "step": 24551 }, { "epoch": 0.6741350906095552, "grad_norm": 0.3889288604259491, "learning_rate": 1.4923379358518548e-05, "loss": 0.4331, "step": 24552 }, { "epoch": 0.6741625480505217, "grad_norm": 0.3840411305427551, "learning_rate": 1.4923003432118492e-05, "loss": 0.4858, "step": 24553 }, { "epoch": 0.6741900054914882, "grad_norm": 0.38037073612213135, "learning_rate": 1.4922627496535466e-05, "loss": 0.5236, "step": 24554 }, { "epoch": 0.6742174629324547, "grad_norm": 0.37249642610549927, "learning_rate": 1.4922251551770171e-05, "loss": 0.5037, "step": 24555 }, { "epoch": 0.6742449203734212, "grad_norm": 0.3978208601474762, "learning_rate": 1.4921875597823303e-05, "loss": 0.4535, "step": 24556 }, { "epoch": 0.6742723778143876, "grad_norm": 0.5610777735710144, "learning_rate": 1.4921499634695567e-05, "loss": 0.5917, "step": 24557 }, { "epoch": 0.6742998352553542, "grad_norm": 0.3878350257873535, "learning_rate": 1.4921123662387662e-05, "loss": 0.453, "step": 24558 }, { "epoch": 0.6743272926963207, "grad_norm": 0.3919682204723358, "learning_rate": 1.4920747680900292e-05, "loss": 0.5592, "step": 24559 }, { "epoch": 0.6743547501372872, "grad_norm": 0.3920331299304962, "learning_rate": 1.4920371690234156e-05, "loss": 0.4849, "step": 24560 }, { "epoch": 0.6743822075782537, "grad_norm": 0.3460105359554291, "learning_rate": 1.4919995690389958e-05, "loss": 0.5209, "step": 24561 }, { "epoch": 0.6744096650192202, "grad_norm": 0.38608741760253906, "learning_rate": 1.4919619681368396e-05, "loss": 0.5291, "step": 24562 }, { "epoch": 0.6744371224601867, "grad_norm": 0.35018467903137207, "learning_rate": 1.4919243663170176e-05, "loss": 0.4278, "step": 24563 }, { "epoch": 0.6744645799011532, "grad_norm": 0.32496705651283264, "learning_rate": 1.4918867635795993e-05, "loss": 0.4549, "step": 24564 }, { "epoch": 0.6744920373421197, "grad_norm": 0.33646532893180847, "learning_rate": 1.4918491599246554e-05, "loss": 0.4681, "step": 24565 }, { "epoch": 0.6745194947830863, "grad_norm": 0.370693564414978, "learning_rate": 1.4918115553522556e-05, "loss": 0.4586, "step": 24566 }, { "epoch": 0.6745469522240527, "grad_norm": 0.3917047083377838, "learning_rate": 1.4917739498624706e-05, "loss": 0.6157, "step": 24567 }, { "epoch": 0.6745744096650192, "grad_norm": 0.34217947721481323, "learning_rate": 1.4917363434553702e-05, "loss": 0.4138, "step": 24568 }, { "epoch": 0.6746018671059857, "grad_norm": 0.35845229029655457, "learning_rate": 1.4916987361310243e-05, "loss": 0.4271, "step": 24569 }, { "epoch": 0.6746293245469522, "grad_norm": 0.36230671405792236, "learning_rate": 1.4916611278895036e-05, "loss": 0.5158, "step": 24570 }, { "epoch": 0.6746567819879187, "grad_norm": 0.38496971130371094, "learning_rate": 1.491623518730878e-05, "loss": 0.5548, "step": 24571 }, { "epoch": 0.6746842394288852, "grad_norm": 0.4383469521999359, "learning_rate": 1.4915859086552173e-05, "loss": 0.5212, "step": 24572 }, { "epoch": 0.6747116968698518, "grad_norm": 0.4856886565685272, "learning_rate": 1.4915482976625924e-05, "loss": 0.5637, "step": 24573 }, { "epoch": 0.6747391543108182, "grad_norm": 0.36719903349876404, "learning_rate": 1.4915106857530725e-05, "loss": 0.4087, "step": 24574 }, { "epoch": 0.6747666117517848, "grad_norm": 0.3883417546749115, "learning_rate": 1.4914730729267289e-05, "loss": 0.4336, "step": 24575 }, { "epoch": 0.6747940691927512, "grad_norm": 0.40620318055152893, "learning_rate": 1.4914354591836308e-05, "loss": 0.4924, "step": 24576 }, { "epoch": 0.6748215266337177, "grad_norm": 0.45186689496040344, "learning_rate": 1.4913978445238488e-05, "loss": 0.4614, "step": 24577 }, { "epoch": 0.6748489840746842, "grad_norm": 0.4127728044986725, "learning_rate": 1.4913602289474529e-05, "loss": 0.4723, "step": 24578 }, { "epoch": 0.6748764415156507, "grad_norm": 0.38014960289001465, "learning_rate": 1.4913226124545135e-05, "loss": 0.5423, "step": 24579 }, { "epoch": 0.6749038989566173, "grad_norm": 0.38368716835975647, "learning_rate": 1.4912849950451004e-05, "loss": 0.524, "step": 24580 }, { "epoch": 0.6749313563975837, "grad_norm": 0.34034568071365356, "learning_rate": 1.4912473767192842e-05, "loss": 0.475, "step": 24581 }, { "epoch": 0.6749588138385503, "grad_norm": 0.3314669132232666, "learning_rate": 1.491209757477135e-05, "loss": 0.4378, "step": 24582 }, { "epoch": 0.6749862712795167, "grad_norm": 0.4210059344768524, "learning_rate": 1.4911721373187225e-05, "loss": 0.5095, "step": 24583 }, { "epoch": 0.6750137287204833, "grad_norm": 0.3827277421951294, "learning_rate": 1.491134516244117e-05, "loss": 0.4634, "step": 24584 }, { "epoch": 0.6750411861614497, "grad_norm": 0.36526167392730713, "learning_rate": 1.4910968942533894e-05, "loss": 0.5349, "step": 24585 }, { "epoch": 0.6750686436024163, "grad_norm": 0.32987093925476074, "learning_rate": 1.4910592713466093e-05, "loss": 0.4919, "step": 24586 }, { "epoch": 0.6750961010433828, "grad_norm": 0.37047910690307617, "learning_rate": 1.4910216475238466e-05, "loss": 0.4371, "step": 24587 }, { "epoch": 0.6751235584843492, "grad_norm": 0.4140540361404419, "learning_rate": 1.4909840227851722e-05, "loss": 0.5127, "step": 24588 }, { "epoch": 0.6751510159253158, "grad_norm": 0.46208202838897705, "learning_rate": 1.4909463971306557e-05, "loss": 0.5287, "step": 24589 }, { "epoch": 0.6751784733662822, "grad_norm": 0.3833446800708771, "learning_rate": 1.4909087705603673e-05, "loss": 0.4806, "step": 24590 }, { "epoch": 0.6752059308072488, "grad_norm": 0.3414084017276764, "learning_rate": 1.4908711430743776e-05, "loss": 0.524, "step": 24591 }, { "epoch": 0.6752333882482152, "grad_norm": 0.3484494686126709, "learning_rate": 1.4908335146727563e-05, "loss": 0.4932, "step": 24592 }, { "epoch": 0.6752608456891818, "grad_norm": 0.3920177221298218, "learning_rate": 1.4907958853555742e-05, "loss": 0.4606, "step": 24593 }, { "epoch": 0.6752883031301483, "grad_norm": 0.47357261180877686, "learning_rate": 1.490758255122901e-05, "loss": 0.4883, "step": 24594 }, { "epoch": 0.6753157605711148, "grad_norm": 0.3538281321525574, "learning_rate": 1.490720623974807e-05, "loss": 0.45, "step": 24595 }, { "epoch": 0.6753432180120813, "grad_norm": 0.41389501094818115, "learning_rate": 1.4906829919113623e-05, "loss": 0.5301, "step": 24596 }, { "epoch": 0.6753706754530477, "grad_norm": 0.40059810876846313, "learning_rate": 1.490645358932637e-05, "loss": 0.608, "step": 24597 }, { "epoch": 0.6753981328940143, "grad_norm": 0.41472533345222473, "learning_rate": 1.4906077250387022e-05, "loss": 0.5225, "step": 24598 }, { "epoch": 0.6754255903349807, "grad_norm": 0.3730088770389557, "learning_rate": 1.4905700902296269e-05, "loss": 0.5635, "step": 24599 }, { "epoch": 0.6754530477759473, "grad_norm": 0.3968581259250641, "learning_rate": 1.4905324545054817e-05, "loss": 0.4474, "step": 24600 }, { "epoch": 0.6754805052169138, "grad_norm": 0.44245412945747375, "learning_rate": 1.4904948178663374e-05, "loss": 0.5852, "step": 24601 }, { "epoch": 0.6755079626578803, "grad_norm": 0.3946479856967926, "learning_rate": 1.4904571803122634e-05, "loss": 0.4995, "step": 24602 }, { "epoch": 0.6755354200988468, "grad_norm": 0.40508314967155457, "learning_rate": 1.4904195418433302e-05, "loss": 0.5048, "step": 24603 }, { "epoch": 0.6755628775398133, "grad_norm": 0.4574253559112549, "learning_rate": 1.4903819024596082e-05, "loss": 0.5316, "step": 24604 }, { "epoch": 0.6755903349807798, "grad_norm": 0.4047321081161499, "learning_rate": 1.4903442621611673e-05, "loss": 0.5592, "step": 24605 }, { "epoch": 0.6756177924217462, "grad_norm": 0.3417544364929199, "learning_rate": 1.4903066209480779e-05, "loss": 0.4515, "step": 24606 }, { "epoch": 0.6756452498627128, "grad_norm": 0.3838084638118744, "learning_rate": 1.49026897882041e-05, "loss": 0.557, "step": 24607 }, { "epoch": 0.6756727073036793, "grad_norm": 0.4075060784816742, "learning_rate": 1.4902313357782343e-05, "loss": 0.5663, "step": 24608 }, { "epoch": 0.6757001647446458, "grad_norm": 0.4222567081451416, "learning_rate": 1.4901936918216206e-05, "loss": 0.5671, "step": 24609 }, { "epoch": 0.6757276221856123, "grad_norm": 0.41118401288986206, "learning_rate": 1.4901560469506389e-05, "loss": 0.5282, "step": 24610 }, { "epoch": 0.6757550796265788, "grad_norm": 0.3938857316970825, "learning_rate": 1.49011840116536e-05, "loss": 0.5166, "step": 24611 }, { "epoch": 0.6757825370675453, "grad_norm": 0.3827604055404663, "learning_rate": 1.4900807544658538e-05, "loss": 0.5455, "step": 24612 }, { "epoch": 0.6758099945085118, "grad_norm": 0.3926313817501068, "learning_rate": 1.4900431068521908e-05, "loss": 0.4973, "step": 24613 }, { "epoch": 0.6758374519494783, "grad_norm": 0.3724226951599121, "learning_rate": 1.4900054583244408e-05, "loss": 0.4499, "step": 24614 }, { "epoch": 0.6758649093904449, "grad_norm": 0.423447847366333, "learning_rate": 1.4899678088826741e-05, "loss": 0.5387, "step": 24615 }, { "epoch": 0.6758923668314113, "grad_norm": 0.3710342049598694, "learning_rate": 1.4899301585269611e-05, "loss": 0.462, "step": 24616 }, { "epoch": 0.6759198242723778, "grad_norm": 0.361422598361969, "learning_rate": 1.4898925072573723e-05, "loss": 0.4674, "step": 24617 }, { "epoch": 0.6759472817133443, "grad_norm": 0.3752308487892151, "learning_rate": 1.4898548550739773e-05, "loss": 0.4755, "step": 24618 }, { "epoch": 0.6759747391543108, "grad_norm": 0.37381258606910706, "learning_rate": 1.4898172019768469e-05, "loss": 0.4472, "step": 24619 }, { "epoch": 0.6760021965952773, "grad_norm": 0.339123010635376, "learning_rate": 1.4897795479660509e-05, "loss": 0.4967, "step": 24620 }, { "epoch": 0.6760296540362438, "grad_norm": 0.35449087619781494, "learning_rate": 1.4897418930416598e-05, "loss": 0.5171, "step": 24621 }, { "epoch": 0.6760571114772104, "grad_norm": 0.3871387243270874, "learning_rate": 1.4897042372037436e-05, "loss": 0.4505, "step": 24622 }, { "epoch": 0.6760845689181768, "grad_norm": 0.38298478722572327, "learning_rate": 1.4896665804523728e-05, "loss": 0.4688, "step": 24623 }, { "epoch": 0.6761120263591434, "grad_norm": 0.39064833521842957, "learning_rate": 1.4896289227876174e-05, "loss": 0.4934, "step": 24624 }, { "epoch": 0.6761394838001098, "grad_norm": 0.38540512323379517, "learning_rate": 1.489591264209548e-05, "loss": 0.5237, "step": 24625 }, { "epoch": 0.6761669412410763, "grad_norm": 0.3764270842075348, "learning_rate": 1.489553604718235e-05, "loss": 0.4997, "step": 24626 }, { "epoch": 0.6761943986820428, "grad_norm": 1.1259453296661377, "learning_rate": 1.4895159443137477e-05, "loss": 0.5908, "step": 24627 }, { "epoch": 0.6762218561230093, "grad_norm": 0.41269755363464355, "learning_rate": 1.489478282996157e-05, "loss": 0.5677, "step": 24628 }, { "epoch": 0.6762493135639759, "grad_norm": 0.378048837184906, "learning_rate": 1.489440620765533e-05, "loss": 0.5304, "step": 24629 }, { "epoch": 0.6762767710049423, "grad_norm": 0.42348745465278625, "learning_rate": 1.4894029576219464e-05, "loss": 0.5175, "step": 24630 }, { "epoch": 0.6763042284459089, "grad_norm": 0.33571645617485046, "learning_rate": 1.489365293565467e-05, "loss": 0.4423, "step": 24631 }, { "epoch": 0.6763316858868753, "grad_norm": 0.41025876998901367, "learning_rate": 1.4893276285961648e-05, "loss": 0.4845, "step": 24632 }, { "epoch": 0.6763591433278419, "grad_norm": 0.38076502084732056, "learning_rate": 1.4892899627141109e-05, "loss": 0.5086, "step": 24633 }, { "epoch": 0.6763866007688083, "grad_norm": 0.46431028842926025, "learning_rate": 1.4892522959193745e-05, "loss": 0.5989, "step": 24634 }, { "epoch": 0.6764140582097748, "grad_norm": 0.4151221513748169, "learning_rate": 1.4892146282120268e-05, "loss": 0.5386, "step": 24635 }, { "epoch": 0.6764415156507414, "grad_norm": 0.36836618185043335, "learning_rate": 1.4891769595921377e-05, "loss": 0.4415, "step": 24636 }, { "epoch": 0.6764689730917078, "grad_norm": 0.40889889001846313, "learning_rate": 1.4891392900597772e-05, "loss": 0.4884, "step": 24637 }, { "epoch": 0.6764964305326744, "grad_norm": 0.37919095158576965, "learning_rate": 1.4891016196150161e-05, "loss": 0.5247, "step": 24638 }, { "epoch": 0.6765238879736408, "grad_norm": 0.3906339704990387, "learning_rate": 1.4890639482579242e-05, "loss": 0.5675, "step": 24639 }, { "epoch": 0.6765513454146074, "grad_norm": 0.3761000633239746, "learning_rate": 1.4890262759885718e-05, "loss": 0.406, "step": 24640 }, { "epoch": 0.6765788028555738, "grad_norm": 0.4928313195705414, "learning_rate": 1.4889886028070295e-05, "loss": 0.5413, "step": 24641 }, { "epoch": 0.6766062602965404, "grad_norm": 0.39812812209129333, "learning_rate": 1.4889509287133674e-05, "loss": 0.5857, "step": 24642 }, { "epoch": 0.6766337177375069, "grad_norm": 0.41760751605033875, "learning_rate": 1.4889132537076558e-05, "loss": 0.5174, "step": 24643 }, { "epoch": 0.6766611751784734, "grad_norm": 0.3205573856830597, "learning_rate": 1.4888755777899647e-05, "loss": 0.4519, "step": 24644 }, { "epoch": 0.6766886326194399, "grad_norm": 0.40193575620651245, "learning_rate": 1.4888379009603648e-05, "loss": 0.5573, "step": 24645 }, { "epoch": 0.6767160900604063, "grad_norm": 0.40479370951652527, "learning_rate": 1.488800223218926e-05, "loss": 0.5312, "step": 24646 }, { "epoch": 0.6767435475013729, "grad_norm": 0.40116778016090393, "learning_rate": 1.4887625445657193e-05, "loss": 0.5737, "step": 24647 }, { "epoch": 0.6767710049423393, "grad_norm": 0.48819276690483093, "learning_rate": 1.4887248650008142e-05, "loss": 0.5088, "step": 24648 }, { "epoch": 0.6767984623833059, "grad_norm": 0.37710481882095337, "learning_rate": 1.488687184524281e-05, "loss": 0.5945, "step": 24649 }, { "epoch": 0.6768259198242724, "grad_norm": 0.627784013748169, "learning_rate": 1.4886495031361903e-05, "loss": 0.5013, "step": 24650 }, { "epoch": 0.6768533772652389, "grad_norm": 0.4297949969768524, "learning_rate": 1.4886118208366125e-05, "loss": 0.567, "step": 24651 }, { "epoch": 0.6768808347062054, "grad_norm": 0.4072975516319275, "learning_rate": 1.4885741376256175e-05, "loss": 0.5192, "step": 24652 }, { "epoch": 0.6769082921471719, "grad_norm": 0.3537621796131134, "learning_rate": 1.488536453503276e-05, "loss": 0.4993, "step": 24653 }, { "epoch": 0.6769357495881384, "grad_norm": 0.3731457591056824, "learning_rate": 1.4884987684696579e-05, "loss": 0.4527, "step": 24654 }, { "epoch": 0.6769632070291048, "grad_norm": 0.4117981791496277, "learning_rate": 1.4884610825248338e-05, "loss": 0.5099, "step": 24655 }, { "epoch": 0.6769906644700714, "grad_norm": 0.4026380479335785, "learning_rate": 1.488423395668874e-05, "loss": 0.4926, "step": 24656 }, { "epoch": 0.6770181219110379, "grad_norm": 0.3824782073497772, "learning_rate": 1.4883857079018485e-05, "loss": 0.4945, "step": 24657 }, { "epoch": 0.6770455793520044, "grad_norm": 0.3629313111305237, "learning_rate": 1.4883480192238277e-05, "loss": 0.4808, "step": 24658 }, { "epoch": 0.6770730367929709, "grad_norm": 0.3595391809940338, "learning_rate": 1.4883103296348822e-05, "loss": 0.4426, "step": 24659 }, { "epoch": 0.6771004942339374, "grad_norm": 0.6131923794746399, "learning_rate": 1.4882726391350821e-05, "loss": 0.5036, "step": 24660 }, { "epoch": 0.6771279516749039, "grad_norm": 0.3421216309070587, "learning_rate": 1.4882349477244975e-05, "loss": 0.4825, "step": 24661 }, { "epoch": 0.6771554091158704, "grad_norm": 0.2953258454799652, "learning_rate": 1.488197255403199e-05, "loss": 0.3599, "step": 24662 }, { "epoch": 0.6771828665568369, "grad_norm": 0.34559378027915955, "learning_rate": 1.4881595621712567e-05, "loss": 0.4903, "step": 24663 }, { "epoch": 0.6772103239978035, "grad_norm": 0.3611229658126831, "learning_rate": 1.4881218680287415e-05, "loss": 0.4677, "step": 24664 }, { "epoch": 0.6772377814387699, "grad_norm": 0.42003607749938965, "learning_rate": 1.4880841729757225e-05, "loss": 0.5121, "step": 24665 }, { "epoch": 0.6772652388797364, "grad_norm": 0.3831963539123535, "learning_rate": 1.488046477012271e-05, "loss": 0.4612, "step": 24666 }, { "epoch": 0.6772926963207029, "grad_norm": 0.3545961081981659, "learning_rate": 1.4880087801384573e-05, "loss": 0.5236, "step": 24667 }, { "epoch": 0.6773201537616694, "grad_norm": 0.5288163423538208, "learning_rate": 1.4879710823543514e-05, "loss": 0.4867, "step": 24668 }, { "epoch": 0.6773476112026359, "grad_norm": 0.35817283391952515, "learning_rate": 1.4879333836600236e-05, "loss": 0.461, "step": 24669 }, { "epoch": 0.6773750686436024, "grad_norm": 0.3856397569179535, "learning_rate": 1.4878956840555441e-05, "loss": 0.5019, "step": 24670 }, { "epoch": 0.677402526084569, "grad_norm": 0.3809085190296173, "learning_rate": 1.4878579835409837e-05, "loss": 0.533, "step": 24671 }, { "epoch": 0.6774299835255354, "grad_norm": 0.33630844950675964, "learning_rate": 1.4878202821164122e-05, "loss": 0.4322, "step": 24672 }, { "epoch": 0.677457440966502, "grad_norm": 0.3207458257675171, "learning_rate": 1.4877825797819005e-05, "loss": 0.515, "step": 24673 }, { "epoch": 0.6774848984074684, "grad_norm": 0.4093596041202545, "learning_rate": 1.4877448765375184e-05, "loss": 0.501, "step": 24674 }, { "epoch": 0.6775123558484349, "grad_norm": 0.36584851145744324, "learning_rate": 1.4877071723833365e-05, "loss": 0.4966, "step": 24675 }, { "epoch": 0.6775398132894014, "grad_norm": 0.5227502584457397, "learning_rate": 1.4876694673194252e-05, "loss": 0.5177, "step": 24676 }, { "epoch": 0.6775672707303679, "grad_norm": 0.3756561875343323, "learning_rate": 1.4876317613458543e-05, "loss": 0.5299, "step": 24677 }, { "epoch": 0.6775947281713345, "grad_norm": 0.3682920038700104, "learning_rate": 1.487594054462695e-05, "loss": 0.5348, "step": 24678 }, { "epoch": 0.6776221856123009, "grad_norm": 0.34742066264152527, "learning_rate": 1.4875563466700169e-05, "loss": 0.4761, "step": 24679 }, { "epoch": 0.6776496430532675, "grad_norm": 0.43169623613357544, "learning_rate": 1.4875186379678908e-05, "loss": 0.5757, "step": 24680 }, { "epoch": 0.6776771004942339, "grad_norm": 0.37947484850883484, "learning_rate": 1.4874809283563867e-05, "loss": 0.4821, "step": 24681 }, { "epoch": 0.6777045579352005, "grad_norm": 0.4154565930366516, "learning_rate": 1.487443217835575e-05, "loss": 0.5336, "step": 24682 }, { "epoch": 0.6777320153761669, "grad_norm": 0.3759317994117737, "learning_rate": 1.4874055064055261e-05, "loss": 0.4997, "step": 24683 }, { "epoch": 0.6777594728171334, "grad_norm": 0.3883313238620758, "learning_rate": 1.4873677940663107e-05, "loss": 0.5253, "step": 24684 }, { "epoch": 0.6777869302581, "grad_norm": 0.3828660845756531, "learning_rate": 1.4873300808179985e-05, "loss": 0.5104, "step": 24685 }, { "epoch": 0.6778143876990664, "grad_norm": 0.4011501669883728, "learning_rate": 1.4872923666606602e-05, "loss": 0.5339, "step": 24686 }, { "epoch": 0.677841845140033, "grad_norm": 0.3488782048225403, "learning_rate": 1.4872546515943661e-05, "loss": 0.4998, "step": 24687 }, { "epoch": 0.6778693025809994, "grad_norm": 0.3759072422981262, "learning_rate": 1.4872169356191867e-05, "loss": 0.4991, "step": 24688 }, { "epoch": 0.677896760021966, "grad_norm": 0.47305694222450256, "learning_rate": 1.4871792187351921e-05, "loss": 0.6008, "step": 24689 }, { "epoch": 0.6779242174629324, "grad_norm": 0.3556036353111267, "learning_rate": 1.4871415009424528e-05, "loss": 0.4323, "step": 24690 }, { "epoch": 0.677951674903899, "grad_norm": 0.42531007528305054, "learning_rate": 1.4871037822410392e-05, "loss": 0.561, "step": 24691 }, { "epoch": 0.6779791323448655, "grad_norm": 0.36813661456108093, "learning_rate": 1.4870660626310216e-05, "loss": 0.4902, "step": 24692 }, { "epoch": 0.678006589785832, "grad_norm": 0.37318286299705505, "learning_rate": 1.48702834211247e-05, "loss": 0.4899, "step": 24693 }, { "epoch": 0.6780340472267985, "grad_norm": 0.3967891037464142, "learning_rate": 1.4869906206854556e-05, "loss": 0.5505, "step": 24694 }, { "epoch": 0.6780615046677649, "grad_norm": 0.4000481367111206, "learning_rate": 1.486952898350048e-05, "loss": 0.4045, "step": 24695 }, { "epoch": 0.6780889621087315, "grad_norm": 0.3878704905509949, "learning_rate": 1.4869151751063178e-05, "loss": 0.6026, "step": 24696 }, { "epoch": 0.6781164195496979, "grad_norm": 0.3805018663406372, "learning_rate": 1.4868774509543355e-05, "loss": 0.5186, "step": 24697 }, { "epoch": 0.6781438769906645, "grad_norm": 0.35620933771133423, "learning_rate": 1.486839725894171e-05, "loss": 0.5231, "step": 24698 }, { "epoch": 0.678171334431631, "grad_norm": 0.3495666980743408, "learning_rate": 1.4868019999258957e-05, "loss": 0.4321, "step": 24699 }, { "epoch": 0.6781987918725975, "grad_norm": 0.35241419076919556, "learning_rate": 1.4867642730495787e-05, "loss": 0.4532, "step": 24700 }, { "epoch": 0.678226249313564, "grad_norm": 0.37175247073173523, "learning_rate": 1.4867265452652913e-05, "loss": 0.5061, "step": 24701 }, { "epoch": 0.6782537067545304, "grad_norm": 0.3732503056526184, "learning_rate": 1.4866888165731034e-05, "loss": 0.456, "step": 24702 }, { "epoch": 0.678281164195497, "grad_norm": 0.4279842972755432, "learning_rate": 1.4866510869730855e-05, "loss": 0.482, "step": 24703 }, { "epoch": 0.6783086216364634, "grad_norm": 0.38725391030311584, "learning_rate": 1.4866133564653083e-05, "loss": 0.5526, "step": 24704 }, { "epoch": 0.67833607907743, "grad_norm": 0.358897864818573, "learning_rate": 1.4865756250498416e-05, "loss": 0.4947, "step": 24705 }, { "epoch": 0.6783635365183965, "grad_norm": 0.3751176595687866, "learning_rate": 1.486537892726756e-05, "loss": 0.5463, "step": 24706 }, { "epoch": 0.678390993959363, "grad_norm": 0.372969388961792, "learning_rate": 1.4865001594961221e-05, "loss": 0.5352, "step": 24707 }, { "epoch": 0.6784184514003295, "grad_norm": 0.3516174256801605, "learning_rate": 1.48646242535801e-05, "loss": 0.5204, "step": 24708 }, { "epoch": 0.678445908841296, "grad_norm": 0.4268017113208771, "learning_rate": 1.4864246903124904e-05, "loss": 0.5965, "step": 24709 }, { "epoch": 0.6784733662822625, "grad_norm": 0.3818952441215515, "learning_rate": 1.4863869543596337e-05, "loss": 0.5956, "step": 24710 }, { "epoch": 0.678500823723229, "grad_norm": 0.43440333008766174, "learning_rate": 1.4863492174995095e-05, "loss": 0.5548, "step": 24711 }, { "epoch": 0.6785282811641955, "grad_norm": 0.37717920541763306, "learning_rate": 1.4863114797321891e-05, "loss": 0.5345, "step": 24712 }, { "epoch": 0.678555738605162, "grad_norm": 0.38694632053375244, "learning_rate": 1.4862737410577427e-05, "loss": 0.4599, "step": 24713 }, { "epoch": 0.6785831960461285, "grad_norm": 0.3523053824901581, "learning_rate": 1.4862360014762404e-05, "loss": 0.4339, "step": 24714 }, { "epoch": 0.678610653487095, "grad_norm": 0.3841882050037384, "learning_rate": 1.4861982609877527e-05, "loss": 0.5559, "step": 24715 }, { "epoch": 0.6786381109280615, "grad_norm": 0.3694884181022644, "learning_rate": 1.48616051959235e-05, "loss": 0.4902, "step": 24716 }, { "epoch": 0.678665568369028, "grad_norm": 0.38973236083984375, "learning_rate": 1.4861227772901032e-05, "loss": 0.5725, "step": 24717 }, { "epoch": 0.6786930258099945, "grad_norm": 0.4293643534183502, "learning_rate": 1.4860850340810822e-05, "loss": 0.4838, "step": 24718 }, { "epoch": 0.678720483250961, "grad_norm": 0.32226577401161194, "learning_rate": 1.4860472899653571e-05, "loss": 0.469, "step": 24719 }, { "epoch": 0.6787479406919276, "grad_norm": 0.39230573177337646, "learning_rate": 1.486009544942999e-05, "loss": 0.4656, "step": 24720 }, { "epoch": 0.678775398132894, "grad_norm": 0.3882876932621002, "learning_rate": 1.4859717990140776e-05, "loss": 0.5951, "step": 24721 }, { "epoch": 0.6788028555738606, "grad_norm": 0.3995455801486969, "learning_rate": 1.485934052178664e-05, "loss": 0.5415, "step": 24722 }, { "epoch": 0.678830313014827, "grad_norm": 0.4224449396133423, "learning_rate": 1.4858963044368284e-05, "loss": 0.506, "step": 24723 }, { "epoch": 0.6788577704557935, "grad_norm": 0.45586255192756653, "learning_rate": 1.4858585557886408e-05, "loss": 0.5372, "step": 24724 }, { "epoch": 0.67888522789676, "grad_norm": 0.41354838013648987, "learning_rate": 1.4858208062341721e-05, "loss": 0.5182, "step": 24725 }, { "epoch": 0.6789126853377265, "grad_norm": 0.3628770411014557, "learning_rate": 1.4857830557734927e-05, "loss": 0.4627, "step": 24726 }, { "epoch": 0.6789401427786931, "grad_norm": 0.37641704082489014, "learning_rate": 1.4857453044066726e-05, "loss": 0.4879, "step": 24727 }, { "epoch": 0.6789676002196595, "grad_norm": 0.3610899746417999, "learning_rate": 1.4857075521337826e-05, "loss": 0.4745, "step": 24728 }, { "epoch": 0.6789950576606261, "grad_norm": 0.4320964813232422, "learning_rate": 1.4856697989548927e-05, "loss": 0.5422, "step": 24729 }, { "epoch": 0.6790225151015925, "grad_norm": 0.386313796043396, "learning_rate": 1.4856320448700739e-05, "loss": 0.5062, "step": 24730 }, { "epoch": 0.679049972542559, "grad_norm": 0.39313608407974243, "learning_rate": 1.4855942898793965e-05, "loss": 0.4889, "step": 24731 }, { "epoch": 0.6790774299835255, "grad_norm": 0.3889591097831726, "learning_rate": 1.4855565339829303e-05, "loss": 0.3975, "step": 24732 }, { "epoch": 0.679104887424492, "grad_norm": 0.3713637590408325, "learning_rate": 1.4855187771807466e-05, "loss": 0.4693, "step": 24733 }, { "epoch": 0.6791323448654586, "grad_norm": 0.386711984872818, "learning_rate": 1.4854810194729152e-05, "loss": 0.4752, "step": 24734 }, { "epoch": 0.679159802306425, "grad_norm": 0.4015325903892517, "learning_rate": 1.4854432608595069e-05, "loss": 0.5516, "step": 24735 }, { "epoch": 0.6791872597473916, "grad_norm": 0.3431996703147888, "learning_rate": 1.4854055013405918e-05, "loss": 0.4807, "step": 24736 }, { "epoch": 0.679214717188358, "grad_norm": 0.3879076838493347, "learning_rate": 1.4853677409162406e-05, "loss": 0.5259, "step": 24737 }, { "epoch": 0.6792421746293246, "grad_norm": 0.3491518199443817, "learning_rate": 1.4853299795865236e-05, "loss": 0.4694, "step": 24738 }, { "epoch": 0.679269632070291, "grad_norm": 0.37226054072380066, "learning_rate": 1.4852922173515114e-05, "loss": 0.4995, "step": 24739 }, { "epoch": 0.6792970895112576, "grad_norm": 0.418663889169693, "learning_rate": 1.4852544542112744e-05, "loss": 0.4902, "step": 24740 }, { "epoch": 0.6793245469522241, "grad_norm": 0.3524758815765381, "learning_rate": 1.485216690165883e-05, "loss": 0.4686, "step": 24741 }, { "epoch": 0.6793520043931905, "grad_norm": 0.38901662826538086, "learning_rate": 1.4851789252154074e-05, "loss": 0.4399, "step": 24742 }, { "epoch": 0.6793794618341571, "grad_norm": 0.43476101756095886, "learning_rate": 1.4851411593599183e-05, "loss": 0.5006, "step": 24743 }, { "epoch": 0.6794069192751235, "grad_norm": 0.3728569746017456, "learning_rate": 1.4851033925994862e-05, "loss": 0.4891, "step": 24744 }, { "epoch": 0.6794343767160901, "grad_norm": 0.36587709188461304, "learning_rate": 1.4850656249341813e-05, "loss": 0.4751, "step": 24745 }, { "epoch": 0.6794618341570565, "grad_norm": 0.41170018911361694, "learning_rate": 1.4850278563640743e-05, "loss": 0.5321, "step": 24746 }, { "epoch": 0.6794892915980231, "grad_norm": 0.3987009823322296, "learning_rate": 1.4849900868892354e-05, "loss": 0.4182, "step": 24747 }, { "epoch": 0.6795167490389896, "grad_norm": 0.3998626172542572, "learning_rate": 1.4849523165097352e-05, "loss": 0.5608, "step": 24748 }, { "epoch": 0.6795442064799561, "grad_norm": 0.46267759799957275, "learning_rate": 1.4849145452256447e-05, "loss": 0.5438, "step": 24749 }, { "epoch": 0.6795716639209226, "grad_norm": 0.3884490132331848, "learning_rate": 1.4848767730370331e-05, "loss": 0.5015, "step": 24750 }, { "epoch": 0.679599121361889, "grad_norm": 0.4451427757740021, "learning_rate": 1.484838999943972e-05, "loss": 0.5458, "step": 24751 }, { "epoch": 0.6796265788028556, "grad_norm": 0.3769061863422394, "learning_rate": 1.484801225946531e-05, "loss": 0.5113, "step": 24752 }, { "epoch": 0.679654036243822, "grad_norm": 0.41998496651649475, "learning_rate": 1.4847634510447812e-05, "loss": 0.5882, "step": 24753 }, { "epoch": 0.6796814936847886, "grad_norm": 0.5420153141021729, "learning_rate": 1.4847256752387931e-05, "loss": 0.4638, "step": 24754 }, { "epoch": 0.6797089511257551, "grad_norm": 0.39680346846580505, "learning_rate": 1.4846878985286366e-05, "loss": 0.5539, "step": 24755 }, { "epoch": 0.6797364085667216, "grad_norm": 0.38108667731285095, "learning_rate": 1.4846501209143824e-05, "loss": 0.4766, "step": 24756 }, { "epoch": 0.6797638660076881, "grad_norm": 0.4224685728549957, "learning_rate": 1.4846123423961015e-05, "loss": 0.5303, "step": 24757 }, { "epoch": 0.6797913234486546, "grad_norm": 0.3879458010196686, "learning_rate": 1.4845745629738634e-05, "loss": 0.4765, "step": 24758 }, { "epoch": 0.6798187808896211, "grad_norm": 0.3626798987388611, "learning_rate": 1.4845367826477396e-05, "loss": 0.5577, "step": 24759 }, { "epoch": 0.6798462383305875, "grad_norm": 0.3651992380619049, "learning_rate": 1.4844990014177994e-05, "loss": 0.4387, "step": 24760 }, { "epoch": 0.6798736957715541, "grad_norm": 0.41993337869644165, "learning_rate": 1.4844612192841143e-05, "loss": 0.499, "step": 24761 }, { "epoch": 0.6799011532125206, "grad_norm": 0.6609918475151062, "learning_rate": 1.4844234362467545e-05, "loss": 0.4649, "step": 24762 }, { "epoch": 0.6799286106534871, "grad_norm": 0.4617288410663605, "learning_rate": 1.4843856523057902e-05, "loss": 0.479, "step": 24763 }, { "epoch": 0.6799560680944536, "grad_norm": 0.414126455783844, "learning_rate": 1.4843478674612924e-05, "loss": 0.4752, "step": 24764 }, { "epoch": 0.6799835255354201, "grad_norm": 0.37222009897232056, "learning_rate": 1.4843100817133306e-05, "loss": 0.4388, "step": 24765 }, { "epoch": 0.6800109829763866, "grad_norm": 0.38828524947166443, "learning_rate": 1.4842722950619764e-05, "loss": 0.5248, "step": 24766 }, { "epoch": 0.6800384404173531, "grad_norm": 0.4090743362903595, "learning_rate": 1.4842345075072998e-05, "loss": 0.5264, "step": 24767 }, { "epoch": 0.6800658978583196, "grad_norm": 0.40971800684928894, "learning_rate": 1.4841967190493711e-05, "loss": 0.549, "step": 24768 }, { "epoch": 0.6800933552992862, "grad_norm": 0.3782045543193817, "learning_rate": 1.484158929688261e-05, "loss": 0.5415, "step": 24769 }, { "epoch": 0.6801208127402526, "grad_norm": 0.40254074335098267, "learning_rate": 1.48412113942404e-05, "loss": 0.5044, "step": 24770 }, { "epoch": 0.6801482701812192, "grad_norm": 0.3977079391479492, "learning_rate": 1.4840833482567786e-05, "loss": 0.4948, "step": 24771 }, { "epoch": 0.6801757276221856, "grad_norm": 0.36784598231315613, "learning_rate": 1.4840455561865475e-05, "loss": 0.5194, "step": 24772 }, { "epoch": 0.6802031850631521, "grad_norm": 0.41661709547042847, "learning_rate": 1.4840077632134167e-05, "loss": 0.5307, "step": 24773 }, { "epoch": 0.6802306425041186, "grad_norm": 0.35944920778274536, "learning_rate": 1.4839699693374567e-05, "loss": 0.4671, "step": 24774 }, { "epoch": 0.6802580999450851, "grad_norm": 0.4112045466899872, "learning_rate": 1.4839321745587387e-05, "loss": 0.5018, "step": 24775 }, { "epoch": 0.6802855573860516, "grad_norm": 0.41675832867622375, "learning_rate": 1.4838943788773325e-05, "loss": 0.5168, "step": 24776 }, { "epoch": 0.6803130148270181, "grad_norm": 0.42622271180152893, "learning_rate": 1.483856582293309e-05, "loss": 0.5926, "step": 24777 }, { "epoch": 0.6803404722679847, "grad_norm": 0.4079679548740387, "learning_rate": 1.4838187848067382e-05, "loss": 0.5387, "step": 24778 }, { "epoch": 0.6803679297089511, "grad_norm": 0.4241170883178711, "learning_rate": 1.4837809864176913e-05, "loss": 0.5272, "step": 24779 }, { "epoch": 0.6803953871499177, "grad_norm": 0.3668062388896942, "learning_rate": 1.4837431871262385e-05, "loss": 0.5025, "step": 24780 }, { "epoch": 0.6804228445908841, "grad_norm": 0.3498862385749817, "learning_rate": 1.4837053869324498e-05, "loss": 0.4955, "step": 24781 }, { "epoch": 0.6804503020318506, "grad_norm": 0.3379315137863159, "learning_rate": 1.4836675858363964e-05, "loss": 0.5317, "step": 24782 }, { "epoch": 0.6804777594728171, "grad_norm": 0.35673996806144714, "learning_rate": 1.4836297838381489e-05, "loss": 0.4594, "step": 24783 }, { "epoch": 0.6805052169137836, "grad_norm": 0.39776644110679626, "learning_rate": 1.483591980937777e-05, "loss": 0.4776, "step": 24784 }, { "epoch": 0.6805326743547502, "grad_norm": 0.3943544924259186, "learning_rate": 1.483554177135352e-05, "loss": 0.5366, "step": 24785 }, { "epoch": 0.6805601317957166, "grad_norm": 0.3750261962413788, "learning_rate": 1.483516372430944e-05, "loss": 0.4671, "step": 24786 }, { "epoch": 0.6805875892366832, "grad_norm": 0.40222543478012085, "learning_rate": 1.4834785668246238e-05, "loss": 0.4992, "step": 24787 }, { "epoch": 0.6806150466776496, "grad_norm": 0.37992438673973083, "learning_rate": 1.4834407603164617e-05, "loss": 0.5811, "step": 24788 }, { "epoch": 0.6806425041186162, "grad_norm": 0.3561042547225952, "learning_rate": 1.4834029529065283e-05, "loss": 0.4847, "step": 24789 }, { "epoch": 0.6806699615595826, "grad_norm": 0.4806372821331024, "learning_rate": 1.4833651445948943e-05, "loss": 0.5443, "step": 24790 }, { "epoch": 0.6806974190005491, "grad_norm": 0.3604779839515686, "learning_rate": 1.4833273353816296e-05, "loss": 0.4748, "step": 24791 }, { "epoch": 0.6807248764415157, "grad_norm": 0.4138298034667969, "learning_rate": 1.4832895252668054e-05, "loss": 0.4999, "step": 24792 }, { "epoch": 0.6807523338824821, "grad_norm": 0.36065396666526794, "learning_rate": 1.4832517142504918e-05, "loss": 0.4658, "step": 24793 }, { "epoch": 0.6807797913234487, "grad_norm": 0.4247809648513794, "learning_rate": 1.4832139023327599e-05, "loss": 0.5228, "step": 24794 }, { "epoch": 0.6808072487644151, "grad_norm": 0.38223543763160706, "learning_rate": 1.4831760895136795e-05, "loss": 0.5051, "step": 24795 }, { "epoch": 0.6808347062053817, "grad_norm": 0.39497119188308716, "learning_rate": 1.4831382757933217e-05, "loss": 0.5363, "step": 24796 }, { "epoch": 0.6808621636463481, "grad_norm": 0.35188817977905273, "learning_rate": 1.4831004611717567e-05, "loss": 0.4574, "step": 24797 }, { "epoch": 0.6808896210873147, "grad_norm": 0.3510240614414215, "learning_rate": 1.4830626456490551e-05, "loss": 0.5603, "step": 24798 }, { "epoch": 0.6809170785282812, "grad_norm": 0.3709449768066406, "learning_rate": 1.4830248292252876e-05, "loss": 0.518, "step": 24799 }, { "epoch": 0.6809445359692476, "grad_norm": 0.3718576431274414, "learning_rate": 1.4829870119005247e-05, "loss": 0.4625, "step": 24800 }, { "epoch": 0.6809719934102142, "grad_norm": 0.3791601061820984, "learning_rate": 1.4829491936748368e-05, "loss": 0.4699, "step": 24801 }, { "epoch": 0.6809994508511806, "grad_norm": 0.3657032549381256, "learning_rate": 1.4829113745482944e-05, "loss": 0.4952, "step": 24802 }, { "epoch": 0.6810269082921472, "grad_norm": 0.3757285475730896, "learning_rate": 1.482873554520968e-05, "loss": 0.4656, "step": 24803 }, { "epoch": 0.6810543657331136, "grad_norm": 0.3871396780014038, "learning_rate": 1.4828357335929288e-05, "loss": 0.4707, "step": 24804 }, { "epoch": 0.6810818231740802, "grad_norm": 0.36886411905288696, "learning_rate": 1.4827979117642467e-05, "loss": 0.5038, "step": 24805 }, { "epoch": 0.6811092806150467, "grad_norm": 0.5465626120567322, "learning_rate": 1.4827600890349923e-05, "loss": 0.5637, "step": 24806 }, { "epoch": 0.6811367380560132, "grad_norm": 0.4028051793575287, "learning_rate": 1.4827222654052363e-05, "loss": 0.5625, "step": 24807 }, { "epoch": 0.6811641954969797, "grad_norm": 0.3903573751449585, "learning_rate": 1.482684440875049e-05, "loss": 0.4736, "step": 24808 }, { "epoch": 0.6811916529379461, "grad_norm": 0.38079503178596497, "learning_rate": 1.4826466154445014e-05, "loss": 0.4752, "step": 24809 }, { "epoch": 0.6812191103789127, "grad_norm": 0.33930355310440063, "learning_rate": 1.4826087891136636e-05, "loss": 0.4692, "step": 24810 }, { "epoch": 0.6812465678198791, "grad_norm": 0.42364850640296936, "learning_rate": 1.4825709618826065e-05, "loss": 0.5114, "step": 24811 }, { "epoch": 0.6812740252608457, "grad_norm": 0.41698595881462097, "learning_rate": 1.4825331337514007e-05, "loss": 0.5794, "step": 24812 }, { "epoch": 0.6813014827018122, "grad_norm": 0.3929669260978699, "learning_rate": 1.4824953047201163e-05, "loss": 0.4573, "step": 24813 }, { "epoch": 0.6813289401427787, "grad_norm": 0.3830530345439911, "learning_rate": 1.4824574747888245e-05, "loss": 0.4742, "step": 24814 }, { "epoch": 0.6813563975837452, "grad_norm": 0.3926469087600708, "learning_rate": 1.482419643957595e-05, "loss": 0.6256, "step": 24815 }, { "epoch": 0.6813838550247117, "grad_norm": 0.3491367995738983, "learning_rate": 1.4823818122264994e-05, "loss": 0.4926, "step": 24816 }, { "epoch": 0.6814113124656782, "grad_norm": 0.4047195315361023, "learning_rate": 1.4823439795956079e-05, "loss": 0.4939, "step": 24817 }, { "epoch": 0.6814387699066446, "grad_norm": 0.4140149652957916, "learning_rate": 1.4823061460649904e-05, "loss": 0.5182, "step": 24818 }, { "epoch": 0.6814662273476112, "grad_norm": 0.39249858260154724, "learning_rate": 1.4822683116347183e-05, "loss": 0.4697, "step": 24819 }, { "epoch": 0.6814936847885777, "grad_norm": 0.3508499264717102, "learning_rate": 1.4822304763048619e-05, "loss": 0.5878, "step": 24820 }, { "epoch": 0.6815211422295442, "grad_norm": 1.8966386318206787, "learning_rate": 1.4821926400754915e-05, "loss": 0.5219, "step": 24821 }, { "epoch": 0.6815485996705107, "grad_norm": 0.403727650642395, "learning_rate": 1.4821548029466782e-05, "loss": 0.4543, "step": 24822 }, { "epoch": 0.6815760571114772, "grad_norm": 0.3967725932598114, "learning_rate": 1.482116964918492e-05, "loss": 0.4663, "step": 24823 }, { "epoch": 0.6816035145524437, "grad_norm": 0.40517550706863403, "learning_rate": 1.4820791259910041e-05, "loss": 0.5123, "step": 24824 }, { "epoch": 0.6816309719934102, "grad_norm": 0.4405979812145233, "learning_rate": 1.4820412861642845e-05, "loss": 0.555, "step": 24825 }, { "epoch": 0.6816584294343767, "grad_norm": 0.4646373391151428, "learning_rate": 1.4820034454384045e-05, "loss": 0.5118, "step": 24826 }, { "epoch": 0.6816858868753433, "grad_norm": 0.39963871240615845, "learning_rate": 1.4819656038134337e-05, "loss": 0.5276, "step": 24827 }, { "epoch": 0.6817133443163097, "grad_norm": 0.4460827112197876, "learning_rate": 1.4819277612894436e-05, "loss": 0.4293, "step": 24828 }, { "epoch": 0.6817408017572762, "grad_norm": 0.5707235336303711, "learning_rate": 1.4818899178665044e-05, "loss": 0.4611, "step": 24829 }, { "epoch": 0.6817682591982427, "grad_norm": 0.37080761790275574, "learning_rate": 1.4818520735446863e-05, "loss": 0.5063, "step": 24830 }, { "epoch": 0.6817957166392092, "grad_norm": 0.3917675018310547, "learning_rate": 1.4818142283240609e-05, "loss": 0.5086, "step": 24831 }, { "epoch": 0.6818231740801757, "grad_norm": 0.4136042892932892, "learning_rate": 1.4817763822046977e-05, "loss": 0.5181, "step": 24832 }, { "epoch": 0.6818506315211422, "grad_norm": 0.37907809019088745, "learning_rate": 1.481738535186668e-05, "loss": 0.5442, "step": 24833 }, { "epoch": 0.6818780889621088, "grad_norm": 0.4064784049987793, "learning_rate": 1.4817006872700421e-05, "loss": 0.4985, "step": 24834 }, { "epoch": 0.6819055464030752, "grad_norm": 1.4483965635299683, "learning_rate": 1.4816628384548906e-05, "loss": 0.5151, "step": 24835 }, { "epoch": 0.6819330038440418, "grad_norm": 0.46663758158683777, "learning_rate": 1.4816249887412844e-05, "loss": 0.5539, "step": 24836 }, { "epoch": 0.6819604612850082, "grad_norm": 0.44579198956489563, "learning_rate": 1.4815871381292939e-05, "loss": 0.4706, "step": 24837 }, { "epoch": 0.6819879187259748, "grad_norm": 0.38031381368637085, "learning_rate": 1.4815492866189894e-05, "loss": 0.4931, "step": 24838 }, { "epoch": 0.6820153761669412, "grad_norm": 0.3751586079597473, "learning_rate": 1.481511434210442e-05, "loss": 0.5085, "step": 24839 }, { "epoch": 0.6820428336079077, "grad_norm": 0.3458572030067444, "learning_rate": 1.4814735809037219e-05, "loss": 0.4341, "step": 24840 }, { "epoch": 0.6820702910488743, "grad_norm": 0.44269365072250366, "learning_rate": 1.4814357266989e-05, "loss": 0.5144, "step": 24841 }, { "epoch": 0.6820977484898407, "grad_norm": 0.32083529233932495, "learning_rate": 1.481397871596047e-05, "loss": 0.3412, "step": 24842 }, { "epoch": 0.6821252059308073, "grad_norm": 1.5561447143554688, "learning_rate": 1.481360015595233e-05, "loss": 0.4517, "step": 24843 }, { "epoch": 0.6821526633717737, "grad_norm": 0.3980329930782318, "learning_rate": 1.4813221586965292e-05, "loss": 0.5683, "step": 24844 }, { "epoch": 0.6821801208127403, "grad_norm": 0.6409361958503723, "learning_rate": 1.4812843009000058e-05, "loss": 0.5434, "step": 24845 }, { "epoch": 0.6822075782537067, "grad_norm": 0.34522053599357605, "learning_rate": 1.4812464422057335e-05, "loss": 0.4681, "step": 24846 }, { "epoch": 0.6822350356946733, "grad_norm": 0.37633946537971497, "learning_rate": 1.4812085826137834e-05, "loss": 0.5096, "step": 24847 }, { "epoch": 0.6822624931356398, "grad_norm": 0.44478827714920044, "learning_rate": 1.4811707221242253e-05, "loss": 0.5422, "step": 24848 }, { "epoch": 0.6822899505766062, "grad_norm": 0.3703426718711853, "learning_rate": 1.4811328607371305e-05, "loss": 0.4298, "step": 24849 }, { "epoch": 0.6823174080175728, "grad_norm": 0.3889651298522949, "learning_rate": 1.4810949984525693e-05, "loss": 0.4828, "step": 24850 }, { "epoch": 0.6823448654585392, "grad_norm": 0.4192187786102295, "learning_rate": 1.4810571352706121e-05, "loss": 0.4349, "step": 24851 }, { "epoch": 0.6823723228995058, "grad_norm": 0.36454305052757263, "learning_rate": 1.48101927119133e-05, "loss": 0.4337, "step": 24852 }, { "epoch": 0.6823997803404722, "grad_norm": 0.4030281901359558, "learning_rate": 1.4809814062147934e-05, "loss": 0.5325, "step": 24853 }, { "epoch": 0.6824272377814388, "grad_norm": 0.36381566524505615, "learning_rate": 1.480943540341073e-05, "loss": 0.5145, "step": 24854 }, { "epoch": 0.6824546952224053, "grad_norm": 0.42357510328292847, "learning_rate": 1.4809056735702396e-05, "loss": 0.503, "step": 24855 }, { "epoch": 0.6824821526633718, "grad_norm": 0.3834179639816284, "learning_rate": 1.4808678059023632e-05, "loss": 0.5132, "step": 24856 }, { "epoch": 0.6825096101043383, "grad_norm": 0.4150472581386566, "learning_rate": 1.4808299373375153e-05, "loss": 0.5936, "step": 24857 }, { "epoch": 0.6825370675453047, "grad_norm": 0.495029091835022, "learning_rate": 1.4807920678757657e-05, "loss": 0.4828, "step": 24858 }, { "epoch": 0.6825645249862713, "grad_norm": 0.4056919515132904, "learning_rate": 1.4807541975171857e-05, "loss": 0.4918, "step": 24859 }, { "epoch": 0.6825919824272377, "grad_norm": 0.3829820454120636, "learning_rate": 1.4807163262618459e-05, "loss": 0.5496, "step": 24860 }, { "epoch": 0.6826194398682043, "grad_norm": 0.3589872419834137, "learning_rate": 1.4806784541098162e-05, "loss": 0.533, "step": 24861 }, { "epoch": 0.6826468973091708, "grad_norm": 0.3868025243282318, "learning_rate": 1.4806405810611682e-05, "loss": 0.58, "step": 24862 }, { "epoch": 0.6826743547501373, "grad_norm": 0.4043683111667633, "learning_rate": 1.4806027071159716e-05, "loss": 0.4895, "step": 24863 }, { "epoch": 0.6827018121911038, "grad_norm": 0.3368467688560486, "learning_rate": 1.480564832274298e-05, "loss": 0.4189, "step": 24864 }, { "epoch": 0.6827292696320703, "grad_norm": 0.37265080213546753, "learning_rate": 1.4805269565362177e-05, "loss": 0.5494, "step": 24865 }, { "epoch": 0.6827567270730368, "grad_norm": 0.4464718699455261, "learning_rate": 1.4804890799018008e-05, "loss": 0.5403, "step": 24866 }, { "epoch": 0.6827841845140032, "grad_norm": 0.3574119806289673, "learning_rate": 1.4804512023711188e-05, "loss": 0.4588, "step": 24867 }, { "epoch": 0.6828116419549698, "grad_norm": 0.37792500853538513, "learning_rate": 1.4804133239442419e-05, "loss": 0.4836, "step": 24868 }, { "epoch": 0.6828390993959363, "grad_norm": 0.3386591374874115, "learning_rate": 1.4803754446212408e-05, "loss": 0.4961, "step": 24869 }, { "epoch": 0.6828665568369028, "grad_norm": 0.5056691765785217, "learning_rate": 1.480337564402186e-05, "loss": 0.6012, "step": 24870 }, { "epoch": 0.6828940142778693, "grad_norm": 0.33615219593048096, "learning_rate": 1.4802996832871484e-05, "loss": 0.4586, "step": 24871 }, { "epoch": 0.6829214717188358, "grad_norm": 0.4249819219112396, "learning_rate": 1.4802618012761988e-05, "loss": 0.5109, "step": 24872 }, { "epoch": 0.6829489291598023, "grad_norm": 0.36838701367378235, "learning_rate": 1.4802239183694076e-05, "loss": 0.4545, "step": 24873 }, { "epoch": 0.6829763866007688, "grad_norm": 0.39959385991096497, "learning_rate": 1.4801860345668452e-05, "loss": 0.4946, "step": 24874 }, { "epoch": 0.6830038440417353, "grad_norm": 0.35120922327041626, "learning_rate": 1.480148149868583e-05, "loss": 0.3723, "step": 24875 }, { "epoch": 0.6830313014827019, "grad_norm": 0.3476572334766388, "learning_rate": 1.480110264274691e-05, "loss": 0.5063, "step": 24876 }, { "epoch": 0.6830587589236683, "grad_norm": 0.4200665056705475, "learning_rate": 1.4800723777852404e-05, "loss": 0.5149, "step": 24877 }, { "epoch": 0.6830862163646348, "grad_norm": 0.3658219873905182, "learning_rate": 1.4800344904003014e-05, "loss": 0.5494, "step": 24878 }, { "epoch": 0.6831136738056013, "grad_norm": 0.34370124340057373, "learning_rate": 1.4799966021199448e-05, "loss": 0.4804, "step": 24879 }, { "epoch": 0.6831411312465678, "grad_norm": 0.40713876485824585, "learning_rate": 1.4799587129442414e-05, "loss": 0.5802, "step": 24880 }, { "epoch": 0.6831685886875343, "grad_norm": 0.7041587829589844, "learning_rate": 1.479920822873262e-05, "loss": 0.5646, "step": 24881 }, { "epoch": 0.6831960461285008, "grad_norm": 0.364563524723053, "learning_rate": 1.479882931907077e-05, "loss": 0.3966, "step": 24882 }, { "epoch": 0.6832235035694674, "grad_norm": 0.3685877323150635, "learning_rate": 1.4798450400457571e-05, "loss": 0.4529, "step": 24883 }, { "epoch": 0.6832509610104338, "grad_norm": 0.36175569891929626, "learning_rate": 1.479807147289373e-05, "loss": 0.4592, "step": 24884 }, { "epoch": 0.6832784184514004, "grad_norm": 0.3668641149997711, "learning_rate": 1.4797692536379953e-05, "loss": 0.4945, "step": 24885 }, { "epoch": 0.6833058758923668, "grad_norm": 0.3530903458595276, "learning_rate": 1.4797313590916952e-05, "loss": 0.4972, "step": 24886 }, { "epoch": 0.6833333333333333, "grad_norm": 0.38182833790779114, "learning_rate": 1.4796934636505428e-05, "loss": 0.5429, "step": 24887 }, { "epoch": 0.6833607907742998, "grad_norm": 0.3649163842201233, "learning_rate": 1.4796555673146089e-05, "loss": 0.4949, "step": 24888 }, { "epoch": 0.6833882482152663, "grad_norm": 0.3764351010322571, "learning_rate": 1.4796176700839641e-05, "loss": 0.4988, "step": 24889 }, { "epoch": 0.6834157056562329, "grad_norm": 0.44681215286254883, "learning_rate": 1.4795797719586798e-05, "loss": 0.5556, "step": 24890 }, { "epoch": 0.6834431630971993, "grad_norm": 0.40046921372413635, "learning_rate": 1.479541872938826e-05, "loss": 0.4293, "step": 24891 }, { "epoch": 0.6834706205381659, "grad_norm": 0.3907008171081543, "learning_rate": 1.4795039730244734e-05, "loss": 0.4954, "step": 24892 }, { "epoch": 0.6834980779791323, "grad_norm": 0.40889349579811096, "learning_rate": 1.4794660722156931e-05, "loss": 0.5006, "step": 24893 }, { "epoch": 0.6835255354200989, "grad_norm": 0.3955991566181183, "learning_rate": 1.479428170512555e-05, "loss": 0.5082, "step": 24894 }, { "epoch": 0.6835529928610653, "grad_norm": 0.40025994181632996, "learning_rate": 1.4793902679151308e-05, "loss": 0.5652, "step": 24895 }, { "epoch": 0.6835804503020319, "grad_norm": 0.40693941712379456, "learning_rate": 1.4793523644234909e-05, "loss": 0.4547, "step": 24896 }, { "epoch": 0.6836079077429984, "grad_norm": 0.36389395594596863, "learning_rate": 1.4793144600377054e-05, "loss": 0.4543, "step": 24897 }, { "epoch": 0.6836353651839648, "grad_norm": 0.4055582284927368, "learning_rate": 1.4792765547578456e-05, "loss": 0.4703, "step": 24898 }, { "epoch": 0.6836628226249314, "grad_norm": 0.3808901011943817, "learning_rate": 1.4792386485839822e-05, "loss": 0.4959, "step": 24899 }, { "epoch": 0.6836902800658978, "grad_norm": 0.4612281620502472, "learning_rate": 1.4792007415161855e-05, "loss": 0.5233, "step": 24900 }, { "epoch": 0.6837177375068644, "grad_norm": 0.3687084913253784, "learning_rate": 1.4791628335545269e-05, "loss": 0.4798, "step": 24901 }, { "epoch": 0.6837451949478308, "grad_norm": 0.39728397130966187, "learning_rate": 1.4791249246990763e-05, "loss": 0.5932, "step": 24902 }, { "epoch": 0.6837726523887974, "grad_norm": 0.4013398289680481, "learning_rate": 1.479087014949905e-05, "loss": 0.5159, "step": 24903 }, { "epoch": 0.6838001098297639, "grad_norm": 0.37396004796028137, "learning_rate": 1.4790491043070833e-05, "loss": 0.4705, "step": 24904 }, { "epoch": 0.6838275672707304, "grad_norm": 0.40817540884017944, "learning_rate": 1.4790111927706823e-05, "loss": 0.4784, "step": 24905 }, { "epoch": 0.6838550247116969, "grad_norm": 0.3609885275363922, "learning_rate": 1.4789732803407726e-05, "loss": 0.4982, "step": 24906 }, { "epoch": 0.6838824821526633, "grad_norm": 0.42398396134376526, "learning_rate": 1.4789353670174247e-05, "loss": 0.5199, "step": 24907 }, { "epoch": 0.6839099395936299, "grad_norm": 0.3661375939846039, "learning_rate": 1.4788974528007096e-05, "loss": 0.4282, "step": 24908 }, { "epoch": 0.6839373970345963, "grad_norm": 0.35768160223960876, "learning_rate": 1.478859537690698e-05, "loss": 0.4536, "step": 24909 }, { "epoch": 0.6839648544755629, "grad_norm": 0.3608910143375397, "learning_rate": 1.4788216216874603e-05, "loss": 0.507, "step": 24910 }, { "epoch": 0.6839923119165294, "grad_norm": 0.43883541226387024, "learning_rate": 1.4787837047910676e-05, "loss": 0.4287, "step": 24911 }, { "epoch": 0.6840197693574959, "grad_norm": 0.4053664803504944, "learning_rate": 1.4787457870015901e-05, "loss": 0.463, "step": 24912 }, { "epoch": 0.6840472267984624, "grad_norm": 0.3748384118080139, "learning_rate": 1.4787078683190994e-05, "loss": 0.6243, "step": 24913 }, { "epoch": 0.6840746842394289, "grad_norm": 0.42392557859420776, "learning_rate": 1.4786699487436655e-05, "loss": 0.5504, "step": 24914 }, { "epoch": 0.6841021416803954, "grad_norm": 0.3693647086620331, "learning_rate": 1.4786320282753595e-05, "loss": 0.5149, "step": 24915 }, { "epoch": 0.6841295991213618, "grad_norm": 0.3919825851917267, "learning_rate": 1.478594106914252e-05, "loss": 0.5066, "step": 24916 }, { "epoch": 0.6841570565623284, "grad_norm": 0.42959752678871155, "learning_rate": 1.4785561846604138e-05, "loss": 0.4099, "step": 24917 }, { "epoch": 0.6841845140032949, "grad_norm": 0.3968738615512848, "learning_rate": 1.4785182615139153e-05, "loss": 0.522, "step": 24918 }, { "epoch": 0.6842119714442614, "grad_norm": 0.3526715040206909, "learning_rate": 1.4784803374748278e-05, "loss": 0.4596, "step": 24919 }, { "epoch": 0.6842394288852279, "grad_norm": 0.413411021232605, "learning_rate": 1.4784424125432216e-05, "loss": 0.5313, "step": 24920 }, { "epoch": 0.6842668863261944, "grad_norm": 0.42681244015693665, "learning_rate": 1.4784044867191676e-05, "loss": 0.5624, "step": 24921 }, { "epoch": 0.6842943437671609, "grad_norm": 0.4470168650150299, "learning_rate": 1.4783665600027365e-05, "loss": 0.4721, "step": 24922 }, { "epoch": 0.6843218012081274, "grad_norm": 0.34993165731430054, "learning_rate": 1.4783286323939991e-05, "loss": 0.4156, "step": 24923 }, { "epoch": 0.6843492586490939, "grad_norm": 0.42912545800209045, "learning_rate": 1.4782907038930266e-05, "loss": 0.5474, "step": 24924 }, { "epoch": 0.6843767160900605, "grad_norm": 0.3474637567996979, "learning_rate": 1.4782527744998888e-05, "loss": 0.5479, "step": 24925 }, { "epoch": 0.6844041735310269, "grad_norm": 0.44261348247528076, "learning_rate": 1.4782148442146569e-05, "loss": 0.4871, "step": 24926 }, { "epoch": 0.6844316309719934, "grad_norm": 0.35779350996017456, "learning_rate": 1.4781769130374017e-05, "loss": 0.4876, "step": 24927 }, { "epoch": 0.6844590884129599, "grad_norm": 0.45802047848701477, "learning_rate": 1.478138980968194e-05, "loss": 0.5213, "step": 24928 }, { "epoch": 0.6844865458539264, "grad_norm": 0.5102986693382263, "learning_rate": 1.4781010480071048e-05, "loss": 0.5165, "step": 24929 }, { "epoch": 0.6845140032948929, "grad_norm": 0.42031463980674744, "learning_rate": 1.4780631141542043e-05, "loss": 0.5854, "step": 24930 }, { "epoch": 0.6845414607358594, "grad_norm": 0.3348855972290039, "learning_rate": 1.4780251794095635e-05, "loss": 0.5283, "step": 24931 }, { "epoch": 0.684568918176826, "grad_norm": 0.4121032655239105, "learning_rate": 1.4779872437732531e-05, "loss": 0.4891, "step": 24932 }, { "epoch": 0.6845963756177924, "grad_norm": 0.3642995357513428, "learning_rate": 1.4779493072453439e-05, "loss": 0.5107, "step": 24933 }, { "epoch": 0.684623833058759, "grad_norm": 0.39876437187194824, "learning_rate": 1.4779113698259072e-05, "loss": 0.4427, "step": 24934 }, { "epoch": 0.6846512904997254, "grad_norm": 0.47884130477905273, "learning_rate": 1.4778734315150127e-05, "loss": 0.5315, "step": 24935 }, { "epoch": 0.684678747940692, "grad_norm": 0.3604498505592346, "learning_rate": 1.4778354923127317e-05, "loss": 0.4786, "step": 24936 }, { "epoch": 0.6847062053816584, "grad_norm": 0.38389894366264343, "learning_rate": 1.4777975522191355e-05, "loss": 0.492, "step": 24937 }, { "epoch": 0.6847336628226249, "grad_norm": 0.4527512490749359, "learning_rate": 1.4777596112342938e-05, "loss": 0.5591, "step": 24938 }, { "epoch": 0.6847611202635915, "grad_norm": 0.3925826847553253, "learning_rate": 1.4777216693582783e-05, "loss": 0.6062, "step": 24939 }, { "epoch": 0.6847885777045579, "grad_norm": 0.34460556507110596, "learning_rate": 1.4776837265911594e-05, "loss": 0.4643, "step": 24940 }, { "epoch": 0.6848160351455245, "grad_norm": 0.37968993186950684, "learning_rate": 1.4776457829330078e-05, "loss": 0.4267, "step": 24941 }, { "epoch": 0.6848434925864909, "grad_norm": 0.4676034152507782, "learning_rate": 1.4776078383838945e-05, "loss": 0.5278, "step": 24942 }, { "epoch": 0.6848709500274575, "grad_norm": 0.38904330134391785, "learning_rate": 1.47756989294389e-05, "loss": 0.5015, "step": 24943 }, { "epoch": 0.6848984074684239, "grad_norm": 0.3668583929538727, "learning_rate": 1.4775319466130653e-05, "loss": 0.522, "step": 24944 }, { "epoch": 0.6849258649093904, "grad_norm": 0.39829540252685547, "learning_rate": 1.4774939993914911e-05, "loss": 0.4656, "step": 24945 }, { "epoch": 0.684953322350357, "grad_norm": 0.3840712606906891, "learning_rate": 1.4774560512792379e-05, "loss": 0.4565, "step": 24946 }, { "epoch": 0.6849807797913234, "grad_norm": 0.41839128732681274, "learning_rate": 1.4774181022763775e-05, "loss": 0.5362, "step": 24947 }, { "epoch": 0.68500823723229, "grad_norm": 0.3668530583381653, "learning_rate": 1.4773801523829795e-05, "loss": 0.5099, "step": 24948 }, { "epoch": 0.6850356946732564, "grad_norm": 0.3699326813220978, "learning_rate": 1.477342201599115e-05, "loss": 0.5032, "step": 24949 }, { "epoch": 0.685063152114223, "grad_norm": 0.4102562963962555, "learning_rate": 1.4773042499248552e-05, "loss": 0.4784, "step": 24950 }, { "epoch": 0.6850906095551894, "grad_norm": 0.45973747968673706, "learning_rate": 1.4772662973602705e-05, "loss": 0.4985, "step": 24951 }, { "epoch": 0.685118066996156, "grad_norm": 0.40723615884780884, "learning_rate": 1.4772283439054318e-05, "loss": 0.4512, "step": 24952 }, { "epoch": 0.6851455244371225, "grad_norm": 0.3389741778373718, "learning_rate": 1.47719038956041e-05, "loss": 0.5405, "step": 24953 }, { "epoch": 0.685172981878089, "grad_norm": 0.40978842973709106, "learning_rate": 1.477152434325276e-05, "loss": 0.4712, "step": 24954 }, { "epoch": 0.6852004393190555, "grad_norm": 0.38378196954727173, "learning_rate": 1.4771144782001002e-05, "loss": 0.5406, "step": 24955 }, { "epoch": 0.6852278967600219, "grad_norm": 0.43732205033302307, "learning_rate": 1.4770765211849535e-05, "loss": 0.4949, "step": 24956 }, { "epoch": 0.6852553542009885, "grad_norm": 0.4060264527797699, "learning_rate": 1.4770385632799071e-05, "loss": 0.4806, "step": 24957 }, { "epoch": 0.6852828116419549, "grad_norm": 0.42101046442985535, "learning_rate": 1.4770006044850316e-05, "loss": 0.4894, "step": 24958 }, { "epoch": 0.6853102690829215, "grad_norm": 0.3874875605106354, "learning_rate": 1.4769626448003974e-05, "loss": 0.5152, "step": 24959 }, { "epoch": 0.685337726523888, "grad_norm": 0.3824084997177124, "learning_rate": 1.4769246842260759e-05, "loss": 0.5424, "step": 24960 }, { "epoch": 0.6853651839648545, "grad_norm": 0.4230633080005646, "learning_rate": 1.4768867227621375e-05, "loss": 0.4938, "step": 24961 }, { "epoch": 0.685392641405821, "grad_norm": 0.42961710691452026, "learning_rate": 1.4768487604086529e-05, "loss": 0.5427, "step": 24962 }, { "epoch": 0.6854200988467875, "grad_norm": 0.37059178948402405, "learning_rate": 1.4768107971656938e-05, "loss": 0.5728, "step": 24963 }, { "epoch": 0.685447556287754, "grad_norm": 0.37713146209716797, "learning_rate": 1.4767728330333299e-05, "loss": 0.5318, "step": 24964 }, { "epoch": 0.6854750137287204, "grad_norm": 0.40062010288238525, "learning_rate": 1.4767348680116326e-05, "loss": 0.5003, "step": 24965 }, { "epoch": 0.685502471169687, "grad_norm": 0.45412859320640564, "learning_rate": 1.476696902100673e-05, "loss": 0.5091, "step": 24966 }, { "epoch": 0.6855299286106535, "grad_norm": 0.3741428852081299, "learning_rate": 1.476658935300521e-05, "loss": 0.4517, "step": 24967 }, { "epoch": 0.68555738605162, "grad_norm": 0.425538569688797, "learning_rate": 1.4766209676112482e-05, "loss": 0.5778, "step": 24968 }, { "epoch": 0.6855848434925865, "grad_norm": 0.4259178638458252, "learning_rate": 1.4765829990329251e-05, "loss": 0.5469, "step": 24969 }, { "epoch": 0.685612300933553, "grad_norm": 0.5256428122520447, "learning_rate": 1.4765450295656225e-05, "loss": 0.5168, "step": 24970 }, { "epoch": 0.6856397583745195, "grad_norm": 0.34909167885780334, "learning_rate": 1.4765070592094116e-05, "loss": 0.5063, "step": 24971 }, { "epoch": 0.685667215815486, "grad_norm": 0.38031238317489624, "learning_rate": 1.4764690879643626e-05, "loss": 0.5727, "step": 24972 }, { "epoch": 0.6856946732564525, "grad_norm": 0.4337478280067444, "learning_rate": 1.476431115830547e-05, "loss": 0.5525, "step": 24973 }, { "epoch": 0.685722130697419, "grad_norm": 0.35913437604904175, "learning_rate": 1.476393142808035e-05, "loss": 0.4606, "step": 24974 }, { "epoch": 0.6857495881383855, "grad_norm": 0.4607742130756378, "learning_rate": 1.476355168896898e-05, "loss": 0.5573, "step": 24975 }, { "epoch": 0.685777045579352, "grad_norm": 0.4378810226917267, "learning_rate": 1.4763171940972068e-05, "loss": 0.5009, "step": 24976 }, { "epoch": 0.6858045030203185, "grad_norm": 0.379414826631546, "learning_rate": 1.4762792184090316e-05, "loss": 0.5106, "step": 24977 }, { "epoch": 0.685831960461285, "grad_norm": 0.34290239214897156, "learning_rate": 1.4762412418324437e-05, "loss": 0.4643, "step": 24978 }, { "epoch": 0.6858594179022515, "grad_norm": 0.4200463593006134, "learning_rate": 1.4762032643675137e-05, "loss": 0.5193, "step": 24979 }, { "epoch": 0.685886875343218, "grad_norm": 0.46877843141555786, "learning_rate": 1.4761652860143132e-05, "loss": 0.4636, "step": 24980 }, { "epoch": 0.6859143327841846, "grad_norm": 0.3822246193885803, "learning_rate": 1.476127306772912e-05, "loss": 0.4352, "step": 24981 }, { "epoch": 0.685941790225151, "grad_norm": 0.3386247158050537, "learning_rate": 1.4760893266433816e-05, "loss": 0.5464, "step": 24982 }, { "epoch": 0.6859692476661176, "grad_norm": 1.7205625772476196, "learning_rate": 1.4760513456257925e-05, "loss": 0.5874, "step": 24983 }, { "epoch": 0.685996705107084, "grad_norm": 0.3882298171520233, "learning_rate": 1.4760133637202159e-05, "loss": 0.3966, "step": 24984 }, { "epoch": 0.6860241625480505, "grad_norm": 0.36815860867500305, "learning_rate": 1.4759753809267223e-05, "loss": 0.4937, "step": 24985 }, { "epoch": 0.686051619989017, "grad_norm": 0.36421552300453186, "learning_rate": 1.4759373972453828e-05, "loss": 0.4762, "step": 24986 }, { "epoch": 0.6860790774299835, "grad_norm": 0.4105118215084076, "learning_rate": 1.4758994126762681e-05, "loss": 0.4799, "step": 24987 }, { "epoch": 0.6861065348709501, "grad_norm": 0.34058263897895813, "learning_rate": 1.4758614272194491e-05, "loss": 0.5007, "step": 24988 }, { "epoch": 0.6861339923119165, "grad_norm": 0.4137830436229706, "learning_rate": 1.4758234408749967e-05, "loss": 0.5466, "step": 24989 }, { "epoch": 0.6861614497528831, "grad_norm": 0.4226836562156677, "learning_rate": 1.4757854536429817e-05, "loss": 0.4598, "step": 24990 }, { "epoch": 0.6861889071938495, "grad_norm": 0.3843311667442322, "learning_rate": 1.475747465523475e-05, "loss": 0.4898, "step": 24991 }, { "epoch": 0.6862163646348161, "grad_norm": 0.4199639856815338, "learning_rate": 1.4757094765165473e-05, "loss": 0.5183, "step": 24992 }, { "epoch": 0.6862438220757825, "grad_norm": 0.35397061705589294, "learning_rate": 1.47567148662227e-05, "loss": 0.4597, "step": 24993 }, { "epoch": 0.686271279516749, "grad_norm": 0.4356652796268463, "learning_rate": 1.475633495840713e-05, "loss": 0.5085, "step": 24994 }, { "epoch": 0.6862987369577156, "grad_norm": 0.36105191707611084, "learning_rate": 1.475595504171948e-05, "loss": 0.5558, "step": 24995 }, { "epoch": 0.686326194398682, "grad_norm": 0.36948299407958984, "learning_rate": 1.4755575116160455e-05, "loss": 0.4741, "step": 24996 }, { "epoch": 0.6863536518396486, "grad_norm": 0.3623158037662506, "learning_rate": 1.4755195181730767e-05, "loss": 0.3846, "step": 24997 }, { "epoch": 0.686381109280615, "grad_norm": 0.41395583748817444, "learning_rate": 1.4754815238431122e-05, "loss": 0.4677, "step": 24998 }, { "epoch": 0.6864085667215816, "grad_norm": 0.3729924261569977, "learning_rate": 1.4754435286262224e-05, "loss": 0.5122, "step": 24999 }, { "epoch": 0.686436024162548, "grad_norm": 0.3699617385864258, "learning_rate": 1.4754055325224794e-05, "loss": 0.4486, "step": 25000 }, { "epoch": 0.6864634816035146, "grad_norm": 0.3920876681804657, "learning_rate": 1.4753675355319527e-05, "loss": 0.5503, "step": 25001 }, { "epoch": 0.6864909390444811, "grad_norm": 0.45700857043266296, "learning_rate": 1.4753295376547143e-05, "loss": 0.565, "step": 25002 }, { "epoch": 0.6865183964854475, "grad_norm": 0.34269505739212036, "learning_rate": 1.4752915388908342e-05, "loss": 0.4277, "step": 25003 }, { "epoch": 0.6865458539264141, "grad_norm": 0.3658309280872345, "learning_rate": 1.4752535392403841e-05, "loss": 0.4448, "step": 25004 }, { "epoch": 0.6865733113673805, "grad_norm": 0.37079334259033203, "learning_rate": 1.4752155387034344e-05, "loss": 0.4879, "step": 25005 }, { "epoch": 0.6866007688083471, "grad_norm": 0.3612517714500427, "learning_rate": 1.4751775372800557e-05, "loss": 0.4967, "step": 25006 }, { "epoch": 0.6866282262493135, "grad_norm": 0.3654747009277344, "learning_rate": 1.4751395349703197e-05, "loss": 0.4719, "step": 25007 }, { "epoch": 0.6866556836902801, "grad_norm": 0.37965530157089233, "learning_rate": 1.4751015317742963e-05, "loss": 0.5241, "step": 25008 }, { "epoch": 0.6866831411312466, "grad_norm": 0.3599485456943512, "learning_rate": 1.4750635276920573e-05, "loss": 0.4624, "step": 25009 }, { "epoch": 0.6867105985722131, "grad_norm": 0.3858427405357361, "learning_rate": 1.475025522723673e-05, "loss": 0.5038, "step": 25010 }, { "epoch": 0.6867380560131796, "grad_norm": 0.4101352095603943, "learning_rate": 1.4749875168692147e-05, "loss": 0.5477, "step": 25011 }, { "epoch": 0.686765513454146, "grad_norm": 0.4274534285068512, "learning_rate": 1.474949510128753e-05, "loss": 0.5704, "step": 25012 }, { "epoch": 0.6867929708951126, "grad_norm": 0.3857730031013489, "learning_rate": 1.4749115025023588e-05, "loss": 0.5318, "step": 25013 }, { "epoch": 0.686820428336079, "grad_norm": 2.4252800941467285, "learning_rate": 1.4748734939901033e-05, "loss": 0.411, "step": 25014 }, { "epoch": 0.6868478857770456, "grad_norm": 0.35186588764190674, "learning_rate": 1.474835484592057e-05, "loss": 0.4944, "step": 25015 }, { "epoch": 0.6868753432180121, "grad_norm": 0.4326329231262207, "learning_rate": 1.4747974743082906e-05, "loss": 0.5832, "step": 25016 }, { "epoch": 0.6869028006589786, "grad_norm": 0.4257397949695587, "learning_rate": 1.474759463138876e-05, "loss": 0.5567, "step": 25017 }, { "epoch": 0.6869302580999451, "grad_norm": 0.3894282281398773, "learning_rate": 1.474721451083883e-05, "loss": 0.5232, "step": 25018 }, { "epoch": 0.6869577155409116, "grad_norm": 0.3541971743106842, "learning_rate": 1.4746834381433833e-05, "loss": 0.4159, "step": 25019 }, { "epoch": 0.6869851729818781, "grad_norm": 0.41042444109916687, "learning_rate": 1.4746454243174475e-05, "loss": 0.543, "step": 25020 }, { "epoch": 0.6870126304228446, "grad_norm": 0.40615513920783997, "learning_rate": 1.4746074096061463e-05, "loss": 0.5442, "step": 25021 }, { "epoch": 0.6870400878638111, "grad_norm": 0.44850075244903564, "learning_rate": 1.474569394009551e-05, "loss": 0.5657, "step": 25022 }, { "epoch": 0.6870675453047776, "grad_norm": 0.42442286014556885, "learning_rate": 1.4745313775277322e-05, "loss": 0.5105, "step": 25023 }, { "epoch": 0.6870950027457441, "grad_norm": 0.42426684498786926, "learning_rate": 1.4744933601607608e-05, "loss": 0.4948, "step": 25024 }, { "epoch": 0.6871224601867106, "grad_norm": 0.3726227283477783, "learning_rate": 1.474455341908708e-05, "loss": 0.4652, "step": 25025 }, { "epoch": 0.6871499176276771, "grad_norm": 0.3685840368270874, "learning_rate": 1.4744173227716446e-05, "loss": 0.5292, "step": 25026 }, { "epoch": 0.6871773750686436, "grad_norm": 0.30504414439201355, "learning_rate": 1.4743793027496414e-05, "loss": 0.4059, "step": 25027 }, { "epoch": 0.6872048325096101, "grad_norm": 0.3608452379703522, "learning_rate": 1.4743412818427694e-05, "loss": 0.4795, "step": 25028 }, { "epoch": 0.6872322899505766, "grad_norm": 0.37962475419044495, "learning_rate": 1.4743032600510994e-05, "loss": 0.508, "step": 25029 }, { "epoch": 0.6872597473915432, "grad_norm": 0.4278847873210907, "learning_rate": 1.4742652373747026e-05, "loss": 0.4813, "step": 25030 }, { "epoch": 0.6872872048325096, "grad_norm": 0.4042535722255707, "learning_rate": 1.4742272138136496e-05, "loss": 0.5658, "step": 25031 }, { "epoch": 0.6873146622734762, "grad_norm": 0.3782712519168854, "learning_rate": 1.4741891893680116e-05, "loss": 0.5645, "step": 25032 }, { "epoch": 0.6873421197144426, "grad_norm": 0.37286460399627686, "learning_rate": 1.4741511640378593e-05, "loss": 0.4959, "step": 25033 }, { "epoch": 0.6873695771554091, "grad_norm": 0.40477001667022705, "learning_rate": 1.474113137823264e-05, "loss": 0.5579, "step": 25034 }, { "epoch": 0.6873970345963756, "grad_norm": 0.3944624364376068, "learning_rate": 1.4740751107242964e-05, "loss": 0.5203, "step": 25035 }, { "epoch": 0.6874244920373421, "grad_norm": 0.35118967294692993, "learning_rate": 1.474037082741027e-05, "loss": 0.4067, "step": 25036 }, { "epoch": 0.6874519494783087, "grad_norm": 0.4388546645641327, "learning_rate": 1.4739990538735276e-05, "loss": 0.4794, "step": 25037 }, { "epoch": 0.6874794069192751, "grad_norm": 0.3888486325740814, "learning_rate": 1.4739610241218684e-05, "loss": 0.5317, "step": 25038 }, { "epoch": 0.6875068643602417, "grad_norm": 0.3496399223804474, "learning_rate": 1.4739229934861206e-05, "loss": 0.4152, "step": 25039 }, { "epoch": 0.6875343218012081, "grad_norm": 0.5843207836151123, "learning_rate": 1.4738849619663555e-05, "loss": 0.5576, "step": 25040 }, { "epoch": 0.6875617792421747, "grad_norm": 0.36091458797454834, "learning_rate": 1.4738469295626435e-05, "loss": 0.5032, "step": 25041 }, { "epoch": 0.6875892366831411, "grad_norm": 0.4014441668987274, "learning_rate": 1.4738088962750556e-05, "loss": 0.4403, "step": 25042 }, { "epoch": 0.6876166941241076, "grad_norm": 0.4212038516998291, "learning_rate": 1.473770862103663e-05, "loss": 0.5211, "step": 25043 }, { "epoch": 0.6876441515650741, "grad_norm": 0.4082971513271332, "learning_rate": 1.4737328270485364e-05, "loss": 0.4951, "step": 25044 }, { "epoch": 0.6876716090060406, "grad_norm": 0.38707464933395386, "learning_rate": 1.4736947911097472e-05, "loss": 0.5072, "step": 25045 }, { "epoch": 0.6876990664470072, "grad_norm": 0.378704309463501, "learning_rate": 1.4736567542873658e-05, "loss": 0.4963, "step": 25046 }, { "epoch": 0.6877265238879736, "grad_norm": 0.3576977252960205, "learning_rate": 1.4736187165814633e-05, "loss": 0.4656, "step": 25047 }, { "epoch": 0.6877539813289402, "grad_norm": 0.39872992038726807, "learning_rate": 1.4735806779921109e-05, "loss": 0.5016, "step": 25048 }, { "epoch": 0.6877814387699066, "grad_norm": 0.3615678548812866, "learning_rate": 1.4735426385193792e-05, "loss": 0.4991, "step": 25049 }, { "epoch": 0.6878088962108732, "grad_norm": 0.34810179471969604, "learning_rate": 1.4735045981633396e-05, "loss": 0.5623, "step": 25050 }, { "epoch": 0.6878363536518396, "grad_norm": 0.3768642246723175, "learning_rate": 1.4734665569240627e-05, "loss": 0.4401, "step": 25051 }, { "epoch": 0.6878638110928061, "grad_norm": 0.3954802453517914, "learning_rate": 1.4734285148016195e-05, "loss": 0.5877, "step": 25052 }, { "epoch": 0.6878912685337727, "grad_norm": 0.44435742497444153, "learning_rate": 1.4733904717960812e-05, "loss": 0.4486, "step": 25053 }, { "epoch": 0.6879187259747391, "grad_norm": 0.3506354093551636, "learning_rate": 1.4733524279075184e-05, "loss": 0.4998, "step": 25054 }, { "epoch": 0.6879461834157057, "grad_norm": 0.5023093223571777, "learning_rate": 1.4733143831360021e-05, "loss": 0.5652, "step": 25055 }, { "epoch": 0.6879736408566721, "grad_norm": 0.49816906452178955, "learning_rate": 1.4732763374816038e-05, "loss": 0.4692, "step": 25056 }, { "epoch": 0.6880010982976387, "grad_norm": 0.37674686312675476, "learning_rate": 1.4732382909443936e-05, "loss": 0.5367, "step": 25057 }, { "epoch": 0.6880285557386051, "grad_norm": 0.4826428294181824, "learning_rate": 1.4732002435244432e-05, "loss": 0.4539, "step": 25058 }, { "epoch": 0.6880560131795717, "grad_norm": 0.43274232745170593, "learning_rate": 1.4731621952218236e-05, "loss": 0.5841, "step": 25059 }, { "epoch": 0.6880834706205382, "grad_norm": 0.34822770953178406, "learning_rate": 1.473124146036605e-05, "loss": 0.4099, "step": 25060 }, { "epoch": 0.6881109280615046, "grad_norm": 0.4351721405982971, "learning_rate": 1.473086095968859e-05, "loss": 0.5288, "step": 25061 }, { "epoch": 0.6881383855024712, "grad_norm": 0.4239504337310791, "learning_rate": 1.4730480450186566e-05, "loss": 0.4811, "step": 25062 }, { "epoch": 0.6881658429434376, "grad_norm": 0.42835599184036255, "learning_rate": 1.4730099931860687e-05, "loss": 0.4789, "step": 25063 }, { "epoch": 0.6881933003844042, "grad_norm": 0.37714943289756775, "learning_rate": 1.472971940471166e-05, "loss": 0.5495, "step": 25064 }, { "epoch": 0.6882207578253706, "grad_norm": 0.3670966327190399, "learning_rate": 1.4729338868740198e-05, "loss": 0.5114, "step": 25065 }, { "epoch": 0.6882482152663372, "grad_norm": 0.4600536823272705, "learning_rate": 1.4728958323947006e-05, "loss": 0.61, "step": 25066 }, { "epoch": 0.6882756727073037, "grad_norm": 0.36692866683006287, "learning_rate": 1.4728577770332801e-05, "loss": 0.4459, "step": 25067 }, { "epoch": 0.6883031301482702, "grad_norm": 0.4146535396575928, "learning_rate": 1.472819720789829e-05, "loss": 0.4765, "step": 25068 }, { "epoch": 0.6883305875892367, "grad_norm": 0.4122669994831085, "learning_rate": 1.4727816636644182e-05, "loss": 0.5305, "step": 25069 }, { "epoch": 0.6883580450302031, "grad_norm": 0.365607887506485, "learning_rate": 1.4727436056571183e-05, "loss": 0.4116, "step": 25070 }, { "epoch": 0.6883855024711697, "grad_norm": 0.40892621874809265, "learning_rate": 1.472705546768001e-05, "loss": 0.482, "step": 25071 }, { "epoch": 0.6884129599121361, "grad_norm": 0.37252146005630493, "learning_rate": 1.4726674869971368e-05, "loss": 0.4766, "step": 25072 }, { "epoch": 0.6884404173531027, "grad_norm": 6.623183727264404, "learning_rate": 1.4726294263445971e-05, "loss": 0.4764, "step": 25073 }, { "epoch": 0.6884678747940692, "grad_norm": 0.38651081919670105, "learning_rate": 1.4725913648104527e-05, "loss": 0.4928, "step": 25074 }, { "epoch": 0.6884953322350357, "grad_norm": 0.410879522562027, "learning_rate": 1.4725533023947743e-05, "loss": 0.4696, "step": 25075 }, { "epoch": 0.6885227896760022, "grad_norm": 0.3840910792350769, "learning_rate": 1.4725152390976334e-05, "loss": 0.5055, "step": 25076 }, { "epoch": 0.6885502471169687, "grad_norm": 0.3773084282875061, "learning_rate": 1.4724771749191007e-05, "loss": 0.443, "step": 25077 }, { "epoch": 0.6885777045579352, "grad_norm": 0.35352739691734314, "learning_rate": 1.4724391098592471e-05, "loss": 0.5276, "step": 25078 }, { "epoch": 0.6886051619989016, "grad_norm": 0.3392471373081207, "learning_rate": 1.4724010439181442e-05, "loss": 0.4511, "step": 25079 }, { "epoch": 0.6886326194398682, "grad_norm": 0.41726890206336975, "learning_rate": 1.4723629770958621e-05, "loss": 0.4964, "step": 25080 }, { "epoch": 0.6886600768808347, "grad_norm": 0.32720935344696045, "learning_rate": 1.4723249093924725e-05, "loss": 0.4174, "step": 25081 }, { "epoch": 0.6886875343218012, "grad_norm": 0.4577919840812683, "learning_rate": 1.4722868408080464e-05, "loss": 0.5203, "step": 25082 }, { "epoch": 0.6887149917627677, "grad_norm": 0.4065510928630829, "learning_rate": 1.4722487713426544e-05, "loss": 0.5171, "step": 25083 }, { "epoch": 0.6887424492037342, "grad_norm": 0.359680712223053, "learning_rate": 1.4722107009963677e-05, "loss": 0.4451, "step": 25084 }, { "epoch": 0.6887699066447007, "grad_norm": 0.37972524762153625, "learning_rate": 1.4721726297692573e-05, "loss": 0.5947, "step": 25085 }, { "epoch": 0.6887973640856672, "grad_norm": 0.3927869200706482, "learning_rate": 1.4721345576613944e-05, "loss": 0.4311, "step": 25086 }, { "epoch": 0.6888248215266337, "grad_norm": 0.41468921303749084, "learning_rate": 1.4720964846728498e-05, "loss": 0.4945, "step": 25087 }, { "epoch": 0.6888522789676003, "grad_norm": 0.31542977690696716, "learning_rate": 1.4720584108036944e-05, "loss": 0.3842, "step": 25088 }, { "epoch": 0.6888797364085667, "grad_norm": 0.3769542872905731, "learning_rate": 1.4720203360539996e-05, "loss": 0.4527, "step": 25089 }, { "epoch": 0.6889071938495333, "grad_norm": 0.35488808155059814, "learning_rate": 1.4719822604238362e-05, "loss": 0.4428, "step": 25090 }, { "epoch": 0.6889346512904997, "grad_norm": 0.3445208966732025, "learning_rate": 1.4719441839132751e-05, "loss": 0.3965, "step": 25091 }, { "epoch": 0.6889621087314662, "grad_norm": 0.4206531047821045, "learning_rate": 1.4719061065223876e-05, "loss": 0.4906, "step": 25092 }, { "epoch": 0.6889895661724327, "grad_norm": 0.37973538041114807, "learning_rate": 1.4718680282512448e-05, "loss": 0.4633, "step": 25093 }, { "epoch": 0.6890170236133992, "grad_norm": 0.39888596534729004, "learning_rate": 1.4718299490999173e-05, "loss": 0.4291, "step": 25094 }, { "epoch": 0.6890444810543658, "grad_norm": 0.40059205889701843, "learning_rate": 1.4717918690684763e-05, "loss": 0.5751, "step": 25095 }, { "epoch": 0.6890719384953322, "grad_norm": 0.4870827794075012, "learning_rate": 1.4717537881569928e-05, "loss": 0.4746, "step": 25096 }, { "epoch": 0.6890993959362988, "grad_norm": 0.37847352027893066, "learning_rate": 1.4717157063655383e-05, "loss": 0.4501, "step": 25097 }, { "epoch": 0.6891268533772652, "grad_norm": 0.3859655261039734, "learning_rate": 1.471677623694183e-05, "loss": 0.5017, "step": 25098 }, { "epoch": 0.6891543108182318, "grad_norm": 0.34948059916496277, "learning_rate": 1.4716395401429986e-05, "loss": 0.4668, "step": 25099 }, { "epoch": 0.6891817682591982, "grad_norm": 0.3598960041999817, "learning_rate": 1.4716014557120562e-05, "loss": 0.4752, "step": 25100 }, { "epoch": 0.6892092257001647, "grad_norm": 0.3713364899158478, "learning_rate": 1.471563370401426e-05, "loss": 0.4527, "step": 25101 }, { "epoch": 0.6892366831411313, "grad_norm": 0.4153587520122528, "learning_rate": 1.4715252842111801e-05, "loss": 0.5141, "step": 25102 }, { "epoch": 0.6892641405820977, "grad_norm": 0.3808860778808594, "learning_rate": 1.4714871971413889e-05, "loss": 0.483, "step": 25103 }, { "epoch": 0.6892915980230643, "grad_norm": 0.3795967996120453, "learning_rate": 1.4714491091921235e-05, "loss": 0.5017, "step": 25104 }, { "epoch": 0.6893190554640307, "grad_norm": 0.4091937243938446, "learning_rate": 1.4714110203634551e-05, "loss": 0.4184, "step": 25105 }, { "epoch": 0.6893465129049973, "grad_norm": 0.3974437713623047, "learning_rate": 1.4713729306554548e-05, "loss": 0.4831, "step": 25106 }, { "epoch": 0.6893739703459637, "grad_norm": 0.3688446283340454, "learning_rate": 1.4713348400681932e-05, "loss": 0.5266, "step": 25107 }, { "epoch": 0.6894014277869303, "grad_norm": 0.43463316559791565, "learning_rate": 1.4712967486017421e-05, "loss": 0.4446, "step": 25108 }, { "epoch": 0.6894288852278968, "grad_norm": 0.3839513659477234, "learning_rate": 1.471258656256172e-05, "loss": 0.5215, "step": 25109 }, { "epoch": 0.6894563426688632, "grad_norm": 0.3581148087978363, "learning_rate": 1.4712205630315539e-05, "loss": 0.4172, "step": 25110 }, { "epoch": 0.6894838001098298, "grad_norm": 0.357438862323761, "learning_rate": 1.4711824689279592e-05, "loss": 0.4471, "step": 25111 }, { "epoch": 0.6895112575507962, "grad_norm": 0.3794468343257904, "learning_rate": 1.471144373945459e-05, "loss": 0.4891, "step": 25112 }, { "epoch": 0.6895387149917628, "grad_norm": 0.46722954511642456, "learning_rate": 1.471106278084124e-05, "loss": 0.5057, "step": 25113 }, { "epoch": 0.6895661724327292, "grad_norm": 0.4025157690048218, "learning_rate": 1.4710681813440253e-05, "loss": 0.5166, "step": 25114 }, { "epoch": 0.6895936298736958, "grad_norm": 0.42446282505989075, "learning_rate": 1.4710300837252343e-05, "loss": 0.4754, "step": 25115 }, { "epoch": 0.6896210873146623, "grad_norm": 0.3991166353225708, "learning_rate": 1.4709919852278219e-05, "loss": 0.5271, "step": 25116 }, { "epoch": 0.6896485447556288, "grad_norm": 0.3417428135871887, "learning_rate": 1.4709538858518588e-05, "loss": 0.5012, "step": 25117 }, { "epoch": 0.6896760021965953, "grad_norm": 0.39597266912460327, "learning_rate": 1.4709157855974168e-05, "loss": 0.54, "step": 25118 }, { "epoch": 0.6897034596375617, "grad_norm": 0.3713369369506836, "learning_rate": 1.4708776844645663e-05, "loss": 0.4756, "step": 25119 }, { "epoch": 0.6897309170785283, "grad_norm": 0.33983322978019714, "learning_rate": 1.4708395824533785e-05, "loss": 0.3708, "step": 25120 }, { "epoch": 0.6897583745194947, "grad_norm": 0.39624738693237305, "learning_rate": 1.4708014795639249e-05, "loss": 0.5013, "step": 25121 }, { "epoch": 0.6897858319604613, "grad_norm": 0.3773285448551178, "learning_rate": 1.4707633757962762e-05, "loss": 0.5156, "step": 25122 }, { "epoch": 0.6898132894014278, "grad_norm": 0.37116116285324097, "learning_rate": 1.4707252711505036e-05, "loss": 0.5224, "step": 25123 }, { "epoch": 0.6898407468423943, "grad_norm": 0.3811199963092804, "learning_rate": 1.470687165626678e-05, "loss": 0.5417, "step": 25124 }, { "epoch": 0.6898682042833608, "grad_norm": 0.39893168210983276, "learning_rate": 1.4706490592248706e-05, "loss": 0.3728, "step": 25125 }, { "epoch": 0.6898956617243273, "grad_norm": 0.33234381675720215, "learning_rate": 1.4706109519451528e-05, "loss": 0.4073, "step": 25126 }, { "epoch": 0.6899231191652938, "grad_norm": 0.4006821811199188, "learning_rate": 1.4705728437875947e-05, "loss": 0.4716, "step": 25127 }, { "epoch": 0.6899505766062602, "grad_norm": 0.35408464074134827, "learning_rate": 1.4705347347522687e-05, "loss": 0.4999, "step": 25128 }, { "epoch": 0.6899780340472268, "grad_norm": 0.3397924304008484, "learning_rate": 1.470496624839245e-05, "loss": 0.5501, "step": 25129 }, { "epoch": 0.6900054914881933, "grad_norm": 0.32466262578964233, "learning_rate": 1.470458514048595e-05, "loss": 0.372, "step": 25130 }, { "epoch": 0.6900329489291598, "grad_norm": 0.34637022018432617, "learning_rate": 1.4704204023803899e-05, "loss": 0.5191, "step": 25131 }, { "epoch": 0.6900604063701263, "grad_norm": 0.36118608713150024, "learning_rate": 1.4703822898347003e-05, "loss": 0.4445, "step": 25132 }, { "epoch": 0.6900878638110928, "grad_norm": 0.3964383602142334, "learning_rate": 1.4703441764115975e-05, "loss": 0.5861, "step": 25133 }, { "epoch": 0.6901153212520593, "grad_norm": 0.4415261149406433, "learning_rate": 1.470306062111153e-05, "loss": 0.5025, "step": 25134 }, { "epoch": 0.6901427786930258, "grad_norm": 0.4176766872406006, "learning_rate": 1.4702679469334373e-05, "loss": 0.4964, "step": 25135 }, { "epoch": 0.6901702361339923, "grad_norm": 0.33790937066078186, "learning_rate": 1.4702298308785222e-05, "loss": 0.4865, "step": 25136 }, { "epoch": 0.6901976935749589, "grad_norm": 0.41459140181541443, "learning_rate": 1.4701917139464778e-05, "loss": 0.5087, "step": 25137 }, { "epoch": 0.6902251510159253, "grad_norm": 0.7533536553382874, "learning_rate": 1.4701535961373763e-05, "loss": 0.452, "step": 25138 }, { "epoch": 0.6902526084568918, "grad_norm": 0.39161553978919983, "learning_rate": 1.4701154774512883e-05, "loss": 0.4935, "step": 25139 }, { "epoch": 0.6902800658978583, "grad_norm": 0.3953689932823181, "learning_rate": 1.4700773578882846e-05, "loss": 0.5239, "step": 25140 }, { "epoch": 0.6903075233388248, "grad_norm": 0.39209139347076416, "learning_rate": 1.470039237448437e-05, "loss": 0.4791, "step": 25141 }, { "epoch": 0.6903349807797913, "grad_norm": 0.37539637088775635, "learning_rate": 1.4700011161318155e-05, "loss": 0.4776, "step": 25142 }, { "epoch": 0.6903624382207578, "grad_norm": 0.35168296098709106, "learning_rate": 1.4699629939384926e-05, "loss": 0.4261, "step": 25143 }, { "epoch": 0.6903898956617244, "grad_norm": 0.39977067708969116, "learning_rate": 1.4699248708685383e-05, "loss": 0.4836, "step": 25144 }, { "epoch": 0.6904173531026908, "grad_norm": 0.4935271739959717, "learning_rate": 1.4698867469220241e-05, "loss": 0.5367, "step": 25145 }, { "epoch": 0.6904448105436574, "grad_norm": 0.39314594864845276, "learning_rate": 1.4698486220990215e-05, "loss": 0.4944, "step": 25146 }, { "epoch": 0.6904722679846238, "grad_norm": 0.39134353399276733, "learning_rate": 1.4698104963996012e-05, "loss": 0.4885, "step": 25147 }, { "epoch": 0.6904997254255903, "grad_norm": 0.45482149720191956, "learning_rate": 1.4697723698238342e-05, "loss": 0.4756, "step": 25148 }, { "epoch": 0.6905271828665568, "grad_norm": 0.37733834981918335, "learning_rate": 1.4697342423717919e-05, "loss": 0.5128, "step": 25149 }, { "epoch": 0.6905546403075233, "grad_norm": 0.37441515922546387, "learning_rate": 1.469696114043545e-05, "loss": 0.4776, "step": 25150 }, { "epoch": 0.6905820977484899, "grad_norm": 0.3624413013458252, "learning_rate": 1.4696579848391654e-05, "loss": 0.5185, "step": 25151 }, { "epoch": 0.6906095551894563, "grad_norm": 0.38542863726615906, "learning_rate": 1.4696198547587237e-05, "loss": 0.5966, "step": 25152 }, { "epoch": 0.6906370126304229, "grad_norm": 0.3669089674949646, "learning_rate": 1.469581723802291e-05, "loss": 0.4863, "step": 25153 }, { "epoch": 0.6906644700713893, "grad_norm": 0.46713951230049133, "learning_rate": 1.4695435919699383e-05, "loss": 0.5836, "step": 25154 }, { "epoch": 0.6906919275123559, "grad_norm": 0.4087623655796051, "learning_rate": 1.469505459261737e-05, "loss": 0.5799, "step": 25155 }, { "epoch": 0.6907193849533223, "grad_norm": 0.3530656099319458, "learning_rate": 1.4694673256777585e-05, "loss": 0.4848, "step": 25156 }, { "epoch": 0.6907468423942889, "grad_norm": 0.4121893048286438, "learning_rate": 1.4694291912180734e-05, "loss": 0.5801, "step": 25157 }, { "epoch": 0.6907742998352554, "grad_norm": 0.3783740997314453, "learning_rate": 1.469391055882753e-05, "loss": 0.5875, "step": 25158 }, { "epoch": 0.6908017572762218, "grad_norm": 0.3836696743965149, "learning_rate": 1.4693529196718683e-05, "loss": 0.4768, "step": 25159 }, { "epoch": 0.6908292147171884, "grad_norm": 0.3699340522289276, "learning_rate": 1.4693147825854908e-05, "loss": 0.5046, "step": 25160 }, { "epoch": 0.6908566721581548, "grad_norm": 0.3765628933906555, "learning_rate": 1.4692766446236914e-05, "loss": 0.5463, "step": 25161 }, { "epoch": 0.6908841295991214, "grad_norm": 0.3935123085975647, "learning_rate": 1.4692385057865414e-05, "loss": 0.4037, "step": 25162 }, { "epoch": 0.6909115870400878, "grad_norm": 0.44081082940101624, "learning_rate": 1.4692003660741116e-05, "loss": 0.6421, "step": 25163 }, { "epoch": 0.6909390444810544, "grad_norm": 0.4446166157722473, "learning_rate": 1.4691622254864736e-05, "loss": 0.5896, "step": 25164 }, { "epoch": 0.6909665019220209, "grad_norm": 0.40320149064064026, "learning_rate": 1.4691240840236982e-05, "loss": 0.6174, "step": 25165 }, { "epoch": 0.6909939593629874, "grad_norm": 0.5162468552589417, "learning_rate": 1.4690859416858567e-05, "loss": 0.4752, "step": 25166 }, { "epoch": 0.6910214168039539, "grad_norm": 0.414180725812912, "learning_rate": 1.46904779847302e-05, "loss": 0.5809, "step": 25167 }, { "epoch": 0.6910488742449203, "grad_norm": 0.37775689363479614, "learning_rate": 1.4690096543852599e-05, "loss": 0.503, "step": 25168 }, { "epoch": 0.6910763316858869, "grad_norm": 0.3934357166290283, "learning_rate": 1.468971509422647e-05, "loss": 0.486, "step": 25169 }, { "epoch": 0.6911037891268533, "grad_norm": 0.44130611419677734, "learning_rate": 1.4689333635852521e-05, "loss": 0.6161, "step": 25170 }, { "epoch": 0.6911312465678199, "grad_norm": 0.3749227523803711, "learning_rate": 1.4688952168731472e-05, "loss": 0.5143, "step": 25171 }, { "epoch": 0.6911587040087864, "grad_norm": 0.36254534125328064, "learning_rate": 1.4688570692864028e-05, "loss": 0.4728, "step": 25172 }, { "epoch": 0.6911861614497529, "grad_norm": 0.3468341827392578, "learning_rate": 1.4688189208250906e-05, "loss": 0.5047, "step": 25173 }, { "epoch": 0.6912136188907194, "grad_norm": 0.4492226541042328, "learning_rate": 1.4687807714892815e-05, "loss": 0.5027, "step": 25174 }, { "epoch": 0.6912410763316859, "grad_norm": 0.4727918803691864, "learning_rate": 1.4687426212790464e-05, "loss": 0.4858, "step": 25175 }, { "epoch": 0.6912685337726524, "grad_norm": 0.39062702655792236, "learning_rate": 1.4687044701944568e-05, "loss": 0.4804, "step": 25176 }, { "epoch": 0.6912959912136188, "grad_norm": 0.40380164980888367, "learning_rate": 1.4686663182355836e-05, "loss": 0.537, "step": 25177 }, { "epoch": 0.6913234486545854, "grad_norm": 0.380109041929245, "learning_rate": 1.4686281654024986e-05, "loss": 0.4731, "step": 25178 }, { "epoch": 0.6913509060955519, "grad_norm": 0.3816669285297394, "learning_rate": 1.468590011695272e-05, "loss": 0.4819, "step": 25179 }, { "epoch": 0.6913783635365184, "grad_norm": 0.4698818624019623, "learning_rate": 1.4685518571139759e-05, "loss": 0.4723, "step": 25180 }, { "epoch": 0.6914058209774849, "grad_norm": 0.40228191018104553, "learning_rate": 1.4685137016586808e-05, "loss": 0.4878, "step": 25181 }, { "epoch": 0.6914332784184514, "grad_norm": 0.38203513622283936, "learning_rate": 1.4684755453294582e-05, "loss": 0.52, "step": 25182 }, { "epoch": 0.6914607358594179, "grad_norm": 0.36494672298431396, "learning_rate": 1.4684373881263792e-05, "loss": 0.4669, "step": 25183 }, { "epoch": 0.6914881933003844, "grad_norm": 0.33518001437187195, "learning_rate": 1.4683992300495147e-05, "loss": 0.4987, "step": 25184 }, { "epoch": 0.6915156507413509, "grad_norm": 0.3928460478782654, "learning_rate": 1.4683610710989365e-05, "loss": 0.5583, "step": 25185 }, { "epoch": 0.6915431081823175, "grad_norm": 0.3606477379798889, "learning_rate": 1.4683229112747153e-05, "loss": 0.5413, "step": 25186 }, { "epoch": 0.6915705656232839, "grad_norm": 0.3750912845134735, "learning_rate": 1.4682847505769222e-05, "loss": 0.432, "step": 25187 }, { "epoch": 0.6915980230642504, "grad_norm": 0.4567478895187378, "learning_rate": 1.4682465890056288e-05, "loss": 0.6373, "step": 25188 }, { "epoch": 0.6916254805052169, "grad_norm": 0.37875521183013916, "learning_rate": 1.468208426560906e-05, "loss": 0.5336, "step": 25189 }, { "epoch": 0.6916529379461834, "grad_norm": 0.3795166313648224, "learning_rate": 1.468170263242825e-05, "loss": 0.4831, "step": 25190 }, { "epoch": 0.6916803953871499, "grad_norm": 0.37683531641960144, "learning_rate": 1.4681320990514571e-05, "loss": 0.547, "step": 25191 }, { "epoch": 0.6917078528281164, "grad_norm": 0.3660186231136322, "learning_rate": 1.4680939339868733e-05, "loss": 0.57, "step": 25192 }, { "epoch": 0.691735310269083, "grad_norm": 0.4044761061668396, "learning_rate": 1.4680557680491452e-05, "loss": 0.4682, "step": 25193 }, { "epoch": 0.6917627677100494, "grad_norm": 0.3722977042198181, "learning_rate": 1.4680176012383433e-05, "loss": 0.4952, "step": 25194 }, { "epoch": 0.691790225151016, "grad_norm": 0.3607144057750702, "learning_rate": 1.4679794335545397e-05, "loss": 0.4785, "step": 25195 }, { "epoch": 0.6918176825919824, "grad_norm": 0.37791478633880615, "learning_rate": 1.4679412649978049e-05, "loss": 0.4927, "step": 25196 }, { "epoch": 0.691845140032949, "grad_norm": 0.401456356048584, "learning_rate": 1.46790309556821e-05, "loss": 0.5625, "step": 25197 }, { "epoch": 0.6918725974739154, "grad_norm": 0.4532018005847931, "learning_rate": 1.4678649252658267e-05, "loss": 0.5409, "step": 25198 }, { "epoch": 0.6919000549148819, "grad_norm": 0.4127953350543976, "learning_rate": 1.467826754090726e-05, "loss": 0.6087, "step": 25199 }, { "epoch": 0.6919275123558485, "grad_norm": 0.38508889079093933, "learning_rate": 1.4677885820429793e-05, "loss": 0.4267, "step": 25200 }, { "epoch": 0.6919549697968149, "grad_norm": 0.3701131343841553, "learning_rate": 1.4677504091226576e-05, "loss": 0.5291, "step": 25201 }, { "epoch": 0.6919824272377815, "grad_norm": 0.37522387504577637, "learning_rate": 1.4677122353298317e-05, "loss": 0.4914, "step": 25202 }, { "epoch": 0.6920098846787479, "grad_norm": 0.4703931510448456, "learning_rate": 1.4676740606645734e-05, "loss": 0.584, "step": 25203 }, { "epoch": 0.6920373421197145, "grad_norm": 0.3892243206501007, "learning_rate": 1.4676358851269536e-05, "loss": 0.4829, "step": 25204 }, { "epoch": 0.6920647995606809, "grad_norm": 0.3794500529766083, "learning_rate": 1.467597708717044e-05, "loss": 0.4882, "step": 25205 }, { "epoch": 0.6920922570016474, "grad_norm": 0.3992704451084137, "learning_rate": 1.467559531434915e-05, "loss": 0.4585, "step": 25206 }, { "epoch": 0.692119714442614, "grad_norm": 0.3610994219779968, "learning_rate": 1.4675213532806382e-05, "loss": 0.5107, "step": 25207 }, { "epoch": 0.6921471718835804, "grad_norm": 0.5334581136703491, "learning_rate": 1.4674831742542851e-05, "loss": 0.4696, "step": 25208 }, { "epoch": 0.692174629324547, "grad_norm": 0.3638720214366913, "learning_rate": 1.467444994355927e-05, "loss": 0.5806, "step": 25209 }, { "epoch": 0.6922020867655134, "grad_norm": 0.3518778681755066, "learning_rate": 1.4674068135856343e-05, "loss": 0.4581, "step": 25210 }, { "epoch": 0.69222954420648, "grad_norm": 0.43035128712654114, "learning_rate": 1.467368631943479e-05, "loss": 0.4631, "step": 25211 }, { "epoch": 0.6922570016474464, "grad_norm": 0.34792494773864746, "learning_rate": 1.4673304494295317e-05, "loss": 0.5073, "step": 25212 }, { "epoch": 0.692284459088413, "grad_norm": 0.37970224022865295, "learning_rate": 1.4672922660438641e-05, "loss": 0.5415, "step": 25213 }, { "epoch": 0.6923119165293795, "grad_norm": 0.38961437344551086, "learning_rate": 1.4672540817865474e-05, "loss": 0.5444, "step": 25214 }, { "epoch": 0.692339373970346, "grad_norm": 0.4083119034767151, "learning_rate": 1.4672158966576527e-05, "loss": 0.4804, "step": 25215 }, { "epoch": 0.6923668314113125, "grad_norm": 0.3417511284351349, "learning_rate": 1.4671777106572511e-05, "loss": 0.4263, "step": 25216 }, { "epoch": 0.6923942888522789, "grad_norm": 0.4251704514026642, "learning_rate": 1.467139523785414e-05, "loss": 0.6044, "step": 25217 }, { "epoch": 0.6924217462932455, "grad_norm": 0.3595302999019623, "learning_rate": 1.4671013360422126e-05, "loss": 0.4547, "step": 25218 }, { "epoch": 0.6924492037342119, "grad_norm": 0.3667217791080475, "learning_rate": 1.4670631474277184e-05, "loss": 0.5544, "step": 25219 }, { "epoch": 0.6924766611751785, "grad_norm": 0.3650515675544739, "learning_rate": 1.467024957942002e-05, "loss": 0.4541, "step": 25220 }, { "epoch": 0.692504118616145, "grad_norm": 0.38099753856658936, "learning_rate": 1.466986767585135e-05, "loss": 0.4635, "step": 25221 }, { "epoch": 0.6925315760571115, "grad_norm": 0.4211656153202057, "learning_rate": 1.4669485763571887e-05, "loss": 0.5122, "step": 25222 }, { "epoch": 0.692559033498078, "grad_norm": 0.4039934575557709, "learning_rate": 1.4669103842582345e-05, "loss": 0.5063, "step": 25223 }, { "epoch": 0.6925864909390445, "grad_norm": 0.39328083395957947, "learning_rate": 1.4668721912883433e-05, "loss": 0.4823, "step": 25224 }, { "epoch": 0.692613948380011, "grad_norm": 0.3448216915130615, "learning_rate": 1.4668339974475862e-05, "loss": 0.5185, "step": 25225 }, { "epoch": 0.6926414058209774, "grad_norm": 0.4108954966068268, "learning_rate": 1.466795802736035e-05, "loss": 0.4397, "step": 25226 }, { "epoch": 0.692668863261944, "grad_norm": 0.49058103561401367, "learning_rate": 1.4667576071537605e-05, "loss": 0.4965, "step": 25227 }, { "epoch": 0.6926963207029105, "grad_norm": 0.34978529810905457, "learning_rate": 1.466719410700834e-05, "loss": 0.4537, "step": 25228 }, { "epoch": 0.692723778143877, "grad_norm": 0.35640397667884827, "learning_rate": 1.4666812133773271e-05, "loss": 0.3783, "step": 25229 }, { "epoch": 0.6927512355848435, "grad_norm": 0.373700350522995, "learning_rate": 1.4666430151833107e-05, "loss": 0.4836, "step": 25230 }, { "epoch": 0.69277869302581, "grad_norm": 0.38002943992614746, "learning_rate": 1.466604816118856e-05, "loss": 0.4481, "step": 25231 }, { "epoch": 0.6928061504667765, "grad_norm": 0.38804709911346436, "learning_rate": 1.4665666161840347e-05, "loss": 0.5381, "step": 25232 }, { "epoch": 0.692833607907743, "grad_norm": 0.4911561608314514, "learning_rate": 1.4665284153789175e-05, "loss": 0.522, "step": 25233 }, { "epoch": 0.6928610653487095, "grad_norm": 0.3446517586708069, "learning_rate": 1.466490213703576e-05, "loss": 0.5039, "step": 25234 }, { "epoch": 0.692888522789676, "grad_norm": 0.4756913185119629, "learning_rate": 1.4664520111580812e-05, "loss": 0.5168, "step": 25235 }, { "epoch": 0.6929159802306425, "grad_norm": 0.39775654673576355, "learning_rate": 1.4664138077425045e-05, "loss": 0.5365, "step": 25236 }, { "epoch": 0.692943437671609, "grad_norm": 0.39302608370780945, "learning_rate": 1.4663756034569174e-05, "loss": 0.5126, "step": 25237 }, { "epoch": 0.6929708951125755, "grad_norm": 0.3990744352340698, "learning_rate": 1.466337398301391e-05, "loss": 0.4602, "step": 25238 }, { "epoch": 0.692998352553542, "grad_norm": 0.39376354217529297, "learning_rate": 1.4662991922759963e-05, "loss": 0.5681, "step": 25239 }, { "epoch": 0.6930258099945085, "grad_norm": 0.41882070899009705, "learning_rate": 1.4662609853808047e-05, "loss": 0.4373, "step": 25240 }, { "epoch": 0.693053267435475, "grad_norm": 0.41154372692108154, "learning_rate": 1.4662227776158877e-05, "loss": 0.4955, "step": 25241 }, { "epoch": 0.6930807248764416, "grad_norm": 0.3723752498626709, "learning_rate": 1.4661845689813165e-05, "loss": 0.4783, "step": 25242 }, { "epoch": 0.693108182317408, "grad_norm": 0.38918331265449524, "learning_rate": 1.4661463594771622e-05, "loss": 0.5411, "step": 25243 }, { "epoch": 0.6931356397583746, "grad_norm": 0.37669679522514343, "learning_rate": 1.4661081491034963e-05, "loss": 0.494, "step": 25244 }, { "epoch": 0.693163097199341, "grad_norm": 0.3923669457435608, "learning_rate": 1.4660699378603899e-05, "loss": 0.5222, "step": 25245 }, { "epoch": 0.6931905546403075, "grad_norm": 0.3321725130081177, "learning_rate": 1.466031725747914e-05, "loss": 0.4621, "step": 25246 }, { "epoch": 0.693218012081274, "grad_norm": 0.38330796360969543, "learning_rate": 1.4659935127661407e-05, "loss": 0.4775, "step": 25247 }, { "epoch": 0.6932454695222405, "grad_norm": 0.37005865573883057, "learning_rate": 1.4659552989151403e-05, "loss": 0.5532, "step": 25248 }, { "epoch": 0.6932729269632071, "grad_norm": 0.39477023482322693, "learning_rate": 1.4659170841949848e-05, "loss": 0.4919, "step": 25249 }, { "epoch": 0.6933003844041735, "grad_norm": 0.3802751302719116, "learning_rate": 1.4658788686057453e-05, "loss": 0.5338, "step": 25250 }, { "epoch": 0.6933278418451401, "grad_norm": 0.3821379542350769, "learning_rate": 1.4658406521474928e-05, "loss": 0.5943, "step": 25251 }, { "epoch": 0.6933552992861065, "grad_norm": 0.35306933522224426, "learning_rate": 1.465802434820299e-05, "loss": 0.4939, "step": 25252 }, { "epoch": 0.6933827567270731, "grad_norm": 0.4965232312679291, "learning_rate": 1.4657642166242347e-05, "loss": 0.5358, "step": 25253 }, { "epoch": 0.6934102141680395, "grad_norm": 0.42576315999031067, "learning_rate": 1.4657259975593718e-05, "loss": 0.5183, "step": 25254 }, { "epoch": 0.693437671609006, "grad_norm": 0.4263446033000946, "learning_rate": 1.4656877776257812e-05, "loss": 0.4374, "step": 25255 }, { "epoch": 0.6934651290499726, "grad_norm": 0.3645632565021515, "learning_rate": 1.465649556823534e-05, "loss": 0.39, "step": 25256 }, { "epoch": 0.693492586490939, "grad_norm": 0.3877979516983032, "learning_rate": 1.4656113351527022e-05, "loss": 0.4432, "step": 25257 }, { "epoch": 0.6935200439319056, "grad_norm": 0.4157184362411499, "learning_rate": 1.4655731126133566e-05, "loss": 0.5022, "step": 25258 }, { "epoch": 0.693547501372872, "grad_norm": 0.3689919412136078, "learning_rate": 1.465534889205568e-05, "loss": 0.4733, "step": 25259 }, { "epoch": 0.6935749588138386, "grad_norm": 0.3436318635940552, "learning_rate": 1.465496664929409e-05, "loss": 0.4639, "step": 25260 }, { "epoch": 0.693602416254805, "grad_norm": 0.4040205478668213, "learning_rate": 1.4654584397849496e-05, "loss": 0.4947, "step": 25261 }, { "epoch": 0.6936298736957716, "grad_norm": 0.36143758893013, "learning_rate": 1.4654202137722617e-05, "loss": 0.4977, "step": 25262 }, { "epoch": 0.6936573311367381, "grad_norm": 0.38464444875717163, "learning_rate": 1.465381986891417e-05, "loss": 0.5376, "step": 25263 }, { "epoch": 0.6936847885777045, "grad_norm": 0.9891734719276428, "learning_rate": 1.4653437591424857e-05, "loss": 0.4806, "step": 25264 }, { "epoch": 0.6937122460186711, "grad_norm": 0.507115364074707, "learning_rate": 1.4653055305255404e-05, "loss": 0.5676, "step": 25265 }, { "epoch": 0.6937397034596375, "grad_norm": 0.39354366064071655, "learning_rate": 1.4652673010406514e-05, "loss": 0.5126, "step": 25266 }, { "epoch": 0.6937671609006041, "grad_norm": 0.37439870834350586, "learning_rate": 1.4652290706878905e-05, "loss": 0.4475, "step": 25267 }, { "epoch": 0.6937946183415705, "grad_norm": 0.4412074089050293, "learning_rate": 1.4651908394673289e-05, "loss": 0.478, "step": 25268 }, { "epoch": 0.6938220757825371, "grad_norm": 0.36869552731513977, "learning_rate": 1.465152607379038e-05, "loss": 0.4779, "step": 25269 }, { "epoch": 0.6938495332235036, "grad_norm": 0.38996580243110657, "learning_rate": 1.465114374423089e-05, "loss": 0.4829, "step": 25270 }, { "epoch": 0.6938769906644701, "grad_norm": 0.3765906095504761, "learning_rate": 1.4650761405995528e-05, "loss": 0.4821, "step": 25271 }, { "epoch": 0.6939044481054366, "grad_norm": 0.3175306022167206, "learning_rate": 1.4650379059085017e-05, "loss": 0.4082, "step": 25272 }, { "epoch": 0.693931905546403, "grad_norm": 0.4054983854293823, "learning_rate": 1.4649996703500064e-05, "loss": 0.5091, "step": 25273 }, { "epoch": 0.6939593629873696, "grad_norm": 0.39155060052871704, "learning_rate": 1.4649614339241382e-05, "loss": 0.52, "step": 25274 }, { "epoch": 0.693986820428336, "grad_norm": 0.3766295611858368, "learning_rate": 1.4649231966309684e-05, "loss": 0.4237, "step": 25275 }, { "epoch": 0.6940142778693026, "grad_norm": 0.3961699903011322, "learning_rate": 1.4648849584705688e-05, "loss": 0.49, "step": 25276 }, { "epoch": 0.6940417353102691, "grad_norm": 0.4775618612766266, "learning_rate": 1.4648467194430101e-05, "loss": 0.5444, "step": 25277 }, { "epoch": 0.6940691927512356, "grad_norm": 0.4622125029563904, "learning_rate": 1.4648084795483641e-05, "loss": 0.6007, "step": 25278 }, { "epoch": 0.6940966501922021, "grad_norm": 0.41597622632980347, "learning_rate": 1.4647702387867018e-05, "loss": 0.5353, "step": 25279 }, { "epoch": 0.6941241076331686, "grad_norm": 0.3501005172729492, "learning_rate": 1.4647319971580946e-05, "loss": 0.511, "step": 25280 }, { "epoch": 0.6941515650741351, "grad_norm": 0.3766581118106842, "learning_rate": 1.4646937546626141e-05, "loss": 0.5035, "step": 25281 }, { "epoch": 0.6941790225151016, "grad_norm": 0.4425783157348633, "learning_rate": 1.464655511300331e-05, "loss": 0.5265, "step": 25282 }, { "epoch": 0.6942064799560681, "grad_norm": 0.36914992332458496, "learning_rate": 1.4646172670713177e-05, "loss": 0.4661, "step": 25283 }, { "epoch": 0.6942339373970347, "grad_norm": 0.37693244218826294, "learning_rate": 1.4645790219756444e-05, "loss": 0.5499, "step": 25284 }, { "epoch": 0.6942613948380011, "grad_norm": 0.37350693345069885, "learning_rate": 1.4645407760133834e-05, "loss": 0.5183, "step": 25285 }, { "epoch": 0.6942888522789676, "grad_norm": 0.3501913249492645, "learning_rate": 1.4645025291846054e-05, "loss": 0.4275, "step": 25286 }, { "epoch": 0.6943163097199341, "grad_norm": 0.34879228472709656, "learning_rate": 1.4644642814893817e-05, "loss": 0.4898, "step": 25287 }, { "epoch": 0.6943437671609006, "grad_norm": 0.45376527309417725, "learning_rate": 1.4644260329277841e-05, "loss": 0.513, "step": 25288 }, { "epoch": 0.6943712246018671, "grad_norm": 0.3733663856983185, "learning_rate": 1.4643877834998836e-05, "loss": 0.4905, "step": 25289 }, { "epoch": 0.6943986820428336, "grad_norm": 0.34017378091812134, "learning_rate": 1.4643495332057518e-05, "loss": 0.4182, "step": 25290 }, { "epoch": 0.6944261394838002, "grad_norm": 0.4982990622520447, "learning_rate": 1.4643112820454598e-05, "loss": 0.5068, "step": 25291 }, { "epoch": 0.6944535969247666, "grad_norm": 0.38283804059028625, "learning_rate": 1.464273030019079e-05, "loss": 0.4342, "step": 25292 }, { "epoch": 0.6944810543657332, "grad_norm": 0.36872217059135437, "learning_rate": 1.4642347771266811e-05, "loss": 0.5398, "step": 25293 }, { "epoch": 0.6945085118066996, "grad_norm": 0.39132872223854065, "learning_rate": 1.464196523368337e-05, "loss": 0.521, "step": 25294 }, { "epoch": 0.6945359692476661, "grad_norm": 0.3570835590362549, "learning_rate": 1.4641582687441182e-05, "loss": 0.4955, "step": 25295 }, { "epoch": 0.6945634266886326, "grad_norm": 0.3967655301094055, "learning_rate": 1.4641200132540962e-05, "loss": 0.4441, "step": 25296 }, { "epoch": 0.6945908841295991, "grad_norm": 0.3841409981250763, "learning_rate": 1.464081756898342e-05, "loss": 0.4826, "step": 25297 }, { "epoch": 0.6946183415705657, "grad_norm": 0.45963919162750244, "learning_rate": 1.4640434996769274e-05, "loss": 0.4966, "step": 25298 }, { "epoch": 0.6946457990115321, "grad_norm": 0.3364951014518738, "learning_rate": 1.4640052415899238e-05, "loss": 0.5372, "step": 25299 }, { "epoch": 0.6946732564524987, "grad_norm": 0.33859264850616455, "learning_rate": 1.4639669826374019e-05, "loss": 0.5137, "step": 25300 }, { "epoch": 0.6947007138934651, "grad_norm": 0.36878830194473267, "learning_rate": 1.4639287228194339e-05, "loss": 0.487, "step": 25301 }, { "epoch": 0.6947281713344317, "grad_norm": 0.3932752013206482, "learning_rate": 1.4638904621360902e-05, "loss": 0.5497, "step": 25302 }, { "epoch": 0.6947556287753981, "grad_norm": 0.381727397441864, "learning_rate": 1.4638522005874433e-05, "loss": 0.4869, "step": 25303 }, { "epoch": 0.6947830862163646, "grad_norm": 0.40487241744995117, "learning_rate": 1.4638139381735638e-05, "loss": 0.5001, "step": 25304 }, { "epoch": 0.6948105436573312, "grad_norm": 0.3610488772392273, "learning_rate": 1.4637756748945231e-05, "loss": 0.6001, "step": 25305 }, { "epoch": 0.6948380010982976, "grad_norm": 0.681302547454834, "learning_rate": 1.463737410750393e-05, "loss": 0.476, "step": 25306 }, { "epoch": 0.6948654585392642, "grad_norm": 0.359510600566864, "learning_rate": 1.4636991457412446e-05, "loss": 0.5343, "step": 25307 }, { "epoch": 0.6948929159802306, "grad_norm": 0.39557838439941406, "learning_rate": 1.463660879867149e-05, "loss": 0.4517, "step": 25308 }, { "epoch": 0.6949203734211972, "grad_norm": 0.42404815554618835, "learning_rate": 1.4636226131281784e-05, "loss": 0.5471, "step": 25309 }, { "epoch": 0.6949478308621636, "grad_norm": 0.35788264870643616, "learning_rate": 1.463584345524403e-05, "loss": 0.4769, "step": 25310 }, { "epoch": 0.6949752883031302, "grad_norm": 0.39822208881378174, "learning_rate": 1.4635460770558953e-05, "loss": 0.581, "step": 25311 }, { "epoch": 0.6950027457440966, "grad_norm": 0.38095206022262573, "learning_rate": 1.4635078077227263e-05, "loss": 0.4934, "step": 25312 }, { "epoch": 0.6950302031850631, "grad_norm": 0.44227394461631775, "learning_rate": 1.4634695375249669e-05, "loss": 0.4983, "step": 25313 }, { "epoch": 0.6950576606260297, "grad_norm": 0.36248454451560974, "learning_rate": 1.4634312664626893e-05, "loss": 0.4201, "step": 25314 }, { "epoch": 0.6950851180669961, "grad_norm": 1.1304601430892944, "learning_rate": 1.463392994535964e-05, "loss": 0.6357, "step": 25315 }, { "epoch": 0.6951125755079627, "grad_norm": 0.4404352307319641, "learning_rate": 1.4633547217448631e-05, "loss": 0.491, "step": 25316 }, { "epoch": 0.6951400329489291, "grad_norm": 0.3608398735523224, "learning_rate": 1.463316448089458e-05, "loss": 0.4893, "step": 25317 }, { "epoch": 0.6951674903898957, "grad_norm": 0.3433651924133301, "learning_rate": 1.4632781735698196e-05, "loss": 0.4357, "step": 25318 }, { "epoch": 0.6951949478308621, "grad_norm": 0.3370068371295929, "learning_rate": 1.4632398981860196e-05, "loss": 0.5421, "step": 25319 }, { "epoch": 0.6952224052718287, "grad_norm": 0.4155050814151764, "learning_rate": 1.4632016219381293e-05, "loss": 0.5557, "step": 25320 }, { "epoch": 0.6952498627127952, "grad_norm": 0.3878953456878662, "learning_rate": 1.4631633448262202e-05, "loss": 0.4911, "step": 25321 }, { "epoch": 0.6952773201537616, "grad_norm": 0.4021739065647125, "learning_rate": 1.4631250668503637e-05, "loss": 0.5855, "step": 25322 }, { "epoch": 0.6953047775947282, "grad_norm": 0.37154150009155273, "learning_rate": 1.4630867880106308e-05, "loss": 0.5045, "step": 25323 }, { "epoch": 0.6953322350356946, "grad_norm": 0.38938480615615845, "learning_rate": 1.4630485083070936e-05, "loss": 0.5404, "step": 25324 }, { "epoch": 0.6953596924766612, "grad_norm": 0.3544134199619293, "learning_rate": 1.4630102277398233e-05, "loss": 0.505, "step": 25325 }, { "epoch": 0.6953871499176276, "grad_norm": 0.522505521774292, "learning_rate": 1.4629719463088904e-05, "loss": 0.4818, "step": 25326 }, { "epoch": 0.6954146073585942, "grad_norm": 0.43122386932373047, "learning_rate": 1.4629336640143679e-05, "loss": 0.5599, "step": 25327 }, { "epoch": 0.6954420647995607, "grad_norm": 0.3991270959377289, "learning_rate": 1.4628953808563258e-05, "loss": 0.5873, "step": 25328 }, { "epoch": 0.6954695222405272, "grad_norm": 0.39553025364875793, "learning_rate": 1.4628570968348364e-05, "loss": 0.5059, "step": 25329 }, { "epoch": 0.6954969796814937, "grad_norm": 0.47177019715309143, "learning_rate": 1.4628188119499707e-05, "loss": 0.6603, "step": 25330 }, { "epoch": 0.6955244371224601, "grad_norm": 0.45925551652908325, "learning_rate": 1.4627805262018e-05, "loss": 0.5599, "step": 25331 }, { "epoch": 0.6955518945634267, "grad_norm": 0.3627833425998688, "learning_rate": 1.4627422395903964e-05, "loss": 0.5625, "step": 25332 }, { "epoch": 0.6955793520043931, "grad_norm": 0.37874358892440796, "learning_rate": 1.4627039521158302e-05, "loss": 0.4893, "step": 25333 }, { "epoch": 0.6956068094453597, "grad_norm": 0.40369677543640137, "learning_rate": 1.4626656637781738e-05, "loss": 0.4307, "step": 25334 }, { "epoch": 0.6956342668863262, "grad_norm": 0.5013852715492249, "learning_rate": 1.4626273745774983e-05, "loss": 0.5441, "step": 25335 }, { "epoch": 0.6956617243272927, "grad_norm": 0.38237547874450684, "learning_rate": 1.462589084513875e-05, "loss": 0.3777, "step": 25336 }, { "epoch": 0.6956891817682592, "grad_norm": 0.3825632333755493, "learning_rate": 1.4625507935873756e-05, "loss": 0.5852, "step": 25337 }, { "epoch": 0.6957166392092257, "grad_norm": 0.3818103075027466, "learning_rate": 1.4625125017980712e-05, "loss": 0.5004, "step": 25338 }, { "epoch": 0.6957440966501922, "grad_norm": 0.43543270230293274, "learning_rate": 1.4624742091460333e-05, "loss": 0.5584, "step": 25339 }, { "epoch": 0.6957715540911587, "grad_norm": 0.3585946559906006, "learning_rate": 1.4624359156313335e-05, "loss": 0.5109, "step": 25340 }, { "epoch": 0.6957990115321252, "grad_norm": 0.36217913031578064, "learning_rate": 1.462397621254043e-05, "loss": 0.4498, "step": 25341 }, { "epoch": 0.6958264689730917, "grad_norm": 0.4188084900379181, "learning_rate": 1.4623593260142335e-05, "loss": 0.5033, "step": 25342 }, { "epoch": 0.6958539264140582, "grad_norm": 0.42253798246383667, "learning_rate": 1.462321029911976e-05, "loss": 0.5178, "step": 25343 }, { "epoch": 0.6958813838550247, "grad_norm": 0.37353643774986267, "learning_rate": 1.4622827329473427e-05, "loss": 0.5293, "step": 25344 }, { "epoch": 0.6959088412959912, "grad_norm": 0.3775566816329956, "learning_rate": 1.4622444351204042e-05, "loss": 0.4313, "step": 25345 }, { "epoch": 0.6959362987369577, "grad_norm": 0.43346893787384033, "learning_rate": 1.4622061364312323e-05, "loss": 0.468, "step": 25346 }, { "epoch": 0.6959637561779242, "grad_norm": 0.3822588622570038, "learning_rate": 1.4621678368798986e-05, "loss": 0.4988, "step": 25347 }, { "epoch": 0.6959912136188907, "grad_norm": 0.3968234360218048, "learning_rate": 1.4621295364664743e-05, "loss": 0.4992, "step": 25348 }, { "epoch": 0.6960186710598573, "grad_norm": 0.39415308833122253, "learning_rate": 1.4620912351910308e-05, "loss": 0.4947, "step": 25349 }, { "epoch": 0.6960461285008237, "grad_norm": 0.36490535736083984, "learning_rate": 1.46205293305364e-05, "loss": 0.5293, "step": 25350 }, { "epoch": 0.6960735859417903, "grad_norm": 0.4414251744747162, "learning_rate": 1.4620146300543727e-05, "loss": 0.5424, "step": 25351 }, { "epoch": 0.6961010433827567, "grad_norm": 0.43288469314575195, "learning_rate": 1.4619763261933008e-05, "loss": 0.4863, "step": 25352 }, { "epoch": 0.6961285008237232, "grad_norm": 0.4547356963157654, "learning_rate": 1.4619380214704954e-05, "loss": 0.4757, "step": 25353 }, { "epoch": 0.6961559582646897, "grad_norm": 0.6719582080841064, "learning_rate": 1.4618997158860284e-05, "loss": 0.5723, "step": 25354 }, { "epoch": 0.6961834157056562, "grad_norm": 0.3890974223613739, "learning_rate": 1.4618614094399712e-05, "loss": 0.5295, "step": 25355 }, { "epoch": 0.6962108731466228, "grad_norm": 0.3563663363456726, "learning_rate": 1.4618231021323946e-05, "loss": 0.4951, "step": 25356 }, { "epoch": 0.6962383305875892, "grad_norm": 0.4012490212917328, "learning_rate": 1.4617847939633709e-05, "loss": 0.4457, "step": 25357 }, { "epoch": 0.6962657880285558, "grad_norm": 0.3936764597892761, "learning_rate": 1.4617464849329708e-05, "loss": 0.569, "step": 25358 }, { "epoch": 0.6962932454695222, "grad_norm": 0.38228994607925415, "learning_rate": 1.4617081750412662e-05, "loss": 0.4749, "step": 25359 }, { "epoch": 0.6963207029104888, "grad_norm": 0.49958130717277527, "learning_rate": 1.4616698642883287e-05, "loss": 0.6399, "step": 25360 }, { "epoch": 0.6963481603514552, "grad_norm": 0.4254586696624756, "learning_rate": 1.4616315526742296e-05, "loss": 0.5756, "step": 25361 }, { "epoch": 0.6963756177924217, "grad_norm": 0.38669806718826294, "learning_rate": 1.4615932401990403e-05, "loss": 0.5336, "step": 25362 }, { "epoch": 0.6964030752333883, "grad_norm": 0.37593865394592285, "learning_rate": 1.461554926862832e-05, "loss": 0.5053, "step": 25363 }, { "epoch": 0.6964305326743547, "grad_norm": 0.5282707810401917, "learning_rate": 1.4615166126656766e-05, "loss": 0.5421, "step": 25364 }, { "epoch": 0.6964579901153213, "grad_norm": 0.3744141161441803, "learning_rate": 1.4614782976076454e-05, "loss": 0.4873, "step": 25365 }, { "epoch": 0.6964854475562877, "grad_norm": 0.39034080505371094, "learning_rate": 1.46143998168881e-05, "loss": 0.5006, "step": 25366 }, { "epoch": 0.6965129049972543, "grad_norm": 0.34547728300094604, "learning_rate": 1.4614016649092418e-05, "loss": 0.4647, "step": 25367 }, { "epoch": 0.6965403624382207, "grad_norm": 0.3805381655693054, "learning_rate": 1.4613633472690118e-05, "loss": 0.4734, "step": 25368 }, { "epoch": 0.6965678198791873, "grad_norm": 0.41958561539649963, "learning_rate": 1.4613250287681924e-05, "loss": 0.5373, "step": 25369 }, { "epoch": 0.6965952773201538, "grad_norm": 0.46847084164619446, "learning_rate": 1.4612867094068543e-05, "loss": 0.5214, "step": 25370 }, { "epoch": 0.6966227347611202, "grad_norm": 0.3500595986843109, "learning_rate": 1.4612483891850695e-05, "loss": 0.4928, "step": 25371 }, { "epoch": 0.6966501922020868, "grad_norm": 0.33767861127853394, "learning_rate": 1.4612100681029091e-05, "loss": 0.4669, "step": 25372 }, { "epoch": 0.6966776496430532, "grad_norm": 0.4210420548915863, "learning_rate": 1.4611717461604446e-05, "loss": 0.5056, "step": 25373 }, { "epoch": 0.6967051070840198, "grad_norm": 0.37151041626930237, "learning_rate": 1.4611334233577478e-05, "loss": 0.5013, "step": 25374 }, { "epoch": 0.6967325645249862, "grad_norm": 0.36425137519836426, "learning_rate": 1.46109509969489e-05, "loss": 0.5092, "step": 25375 }, { "epoch": 0.6967600219659528, "grad_norm": 0.4395934045314789, "learning_rate": 1.4610567751719427e-05, "loss": 0.4686, "step": 25376 }, { "epoch": 0.6967874794069193, "grad_norm": 0.4124169647693634, "learning_rate": 1.461018449788977e-05, "loss": 0.462, "step": 25377 }, { "epoch": 0.6968149368478858, "grad_norm": 0.3974516987800598, "learning_rate": 1.4609801235460653e-05, "loss": 0.5503, "step": 25378 }, { "epoch": 0.6968423942888523, "grad_norm": 0.43092894554138184, "learning_rate": 1.4609417964432782e-05, "loss": 0.4488, "step": 25379 }, { "epoch": 0.6968698517298187, "grad_norm": 0.35248851776123047, "learning_rate": 1.4609034684806875e-05, "loss": 0.463, "step": 25380 }, { "epoch": 0.6968973091707853, "grad_norm": 0.38552138209342957, "learning_rate": 1.4608651396583649e-05, "loss": 0.5011, "step": 25381 }, { "epoch": 0.6969247666117517, "grad_norm": 0.3823942542076111, "learning_rate": 1.4608268099763816e-05, "loss": 0.5015, "step": 25382 }, { "epoch": 0.6969522240527183, "grad_norm": 0.3486464321613312, "learning_rate": 1.4607884794348093e-05, "loss": 0.4782, "step": 25383 }, { "epoch": 0.6969796814936848, "grad_norm": 0.3770047724246979, "learning_rate": 1.4607501480337193e-05, "loss": 0.4927, "step": 25384 }, { "epoch": 0.6970071389346513, "grad_norm": 0.3807443082332611, "learning_rate": 1.4607118157731831e-05, "loss": 0.45, "step": 25385 }, { "epoch": 0.6970345963756178, "grad_norm": 0.39712586998939514, "learning_rate": 1.4606734826532729e-05, "loss": 0.4707, "step": 25386 }, { "epoch": 0.6970620538165843, "grad_norm": 0.3702531158924103, "learning_rate": 1.4606351486740592e-05, "loss": 0.4357, "step": 25387 }, { "epoch": 0.6970895112575508, "grad_norm": 0.46723392605781555, "learning_rate": 1.460596813835614e-05, "loss": 0.444, "step": 25388 }, { "epoch": 0.6971169686985172, "grad_norm": 0.4044448137283325, "learning_rate": 1.4605584781380087e-05, "loss": 0.4749, "step": 25389 }, { "epoch": 0.6971444261394838, "grad_norm": 0.40213897824287415, "learning_rate": 1.4605201415813147e-05, "loss": 0.4802, "step": 25390 }, { "epoch": 0.6971718835804503, "grad_norm": 0.4053410589694977, "learning_rate": 1.4604818041656039e-05, "loss": 0.472, "step": 25391 }, { "epoch": 0.6971993410214168, "grad_norm": 0.36085158586502075, "learning_rate": 1.4604434658909476e-05, "loss": 0.4822, "step": 25392 }, { "epoch": 0.6972267984623833, "grad_norm": 0.36807844042778015, "learning_rate": 1.4604051267574171e-05, "loss": 0.4882, "step": 25393 }, { "epoch": 0.6972542559033498, "grad_norm": 0.3604799509048462, "learning_rate": 1.460366786765084e-05, "loss": 0.5556, "step": 25394 }, { "epoch": 0.6972817133443163, "grad_norm": 0.3477962613105774, "learning_rate": 1.4603284459140202e-05, "loss": 0.4845, "step": 25395 }, { "epoch": 0.6973091707852828, "grad_norm": 0.36576423048973083, "learning_rate": 1.460290104204297e-05, "loss": 0.4659, "step": 25396 }, { "epoch": 0.6973366282262493, "grad_norm": 0.40345698595046997, "learning_rate": 1.4602517616359854e-05, "loss": 0.4915, "step": 25397 }, { "epoch": 0.6973640856672159, "grad_norm": 0.39943066239356995, "learning_rate": 1.4602134182091577e-05, "loss": 0.5471, "step": 25398 }, { "epoch": 0.6973915431081823, "grad_norm": 0.3715866804122925, "learning_rate": 1.4601750739238851e-05, "loss": 0.4938, "step": 25399 }, { "epoch": 0.6974190005491488, "grad_norm": 0.4035443067550659, "learning_rate": 1.460136728780239e-05, "loss": 0.5001, "step": 25400 }, { "epoch": 0.6974464579901153, "grad_norm": 0.3853027820587158, "learning_rate": 1.460098382778291e-05, "loss": 0.515, "step": 25401 }, { "epoch": 0.6974739154310818, "grad_norm": 0.3632504343986511, "learning_rate": 1.4600600359181127e-05, "loss": 0.4341, "step": 25402 }, { "epoch": 0.6975013728720483, "grad_norm": 0.40799659490585327, "learning_rate": 1.4600216881997758e-05, "loss": 0.5214, "step": 25403 }, { "epoch": 0.6975288303130148, "grad_norm": 0.4270766079425812, "learning_rate": 1.4599833396233514e-05, "loss": 0.5095, "step": 25404 }, { "epoch": 0.6975562877539814, "grad_norm": 0.39089149236679077, "learning_rate": 1.4599449901889114e-05, "loss": 0.447, "step": 25405 }, { "epoch": 0.6975837451949478, "grad_norm": 0.39415526390075684, "learning_rate": 1.4599066398965269e-05, "loss": 0.4844, "step": 25406 }, { "epoch": 0.6976112026359144, "grad_norm": 0.35627275705337524, "learning_rate": 1.4598682887462701e-05, "loss": 0.4882, "step": 25407 }, { "epoch": 0.6976386600768808, "grad_norm": 0.4121449589729309, "learning_rate": 1.459829936738212e-05, "loss": 0.4814, "step": 25408 }, { "epoch": 0.6976661175178474, "grad_norm": 0.39693117141723633, "learning_rate": 1.4597915838724244e-05, "loss": 0.4702, "step": 25409 }, { "epoch": 0.6976935749588138, "grad_norm": 0.3664967119693756, "learning_rate": 1.4597532301489788e-05, "loss": 0.4528, "step": 25410 }, { "epoch": 0.6977210323997803, "grad_norm": 0.37770670652389526, "learning_rate": 1.4597148755679462e-05, "loss": 0.4451, "step": 25411 }, { "epoch": 0.6977484898407469, "grad_norm": 0.34469226002693176, "learning_rate": 1.4596765201293992e-05, "loss": 0.3914, "step": 25412 }, { "epoch": 0.6977759472817133, "grad_norm": 0.37485289573669434, "learning_rate": 1.4596381638334088e-05, "loss": 0.446, "step": 25413 }, { "epoch": 0.6978034047226799, "grad_norm": 0.340489000082016, "learning_rate": 1.459599806680046e-05, "loss": 0.5094, "step": 25414 }, { "epoch": 0.6978308621636463, "grad_norm": 0.3640578091144562, "learning_rate": 1.4595614486693835e-05, "loss": 0.5321, "step": 25415 }, { "epoch": 0.6978583196046129, "grad_norm": 0.35629913210868835, "learning_rate": 1.4595230898014916e-05, "loss": 0.5058, "step": 25416 }, { "epoch": 0.6978857770455793, "grad_norm": 0.37914976477622986, "learning_rate": 1.4594847300764428e-05, "loss": 0.4503, "step": 25417 }, { "epoch": 0.6979132344865459, "grad_norm": 0.43085458874702454, "learning_rate": 1.4594463694943084e-05, "loss": 0.5518, "step": 25418 }, { "epoch": 0.6979406919275124, "grad_norm": 0.3899019658565521, "learning_rate": 1.4594080080551597e-05, "loss": 0.5519, "step": 25419 }, { "epoch": 0.6979681493684788, "grad_norm": 0.4156707525253296, "learning_rate": 1.4593696457590688e-05, "loss": 0.5862, "step": 25420 }, { "epoch": 0.6979956068094454, "grad_norm": 0.34225592017173767, "learning_rate": 1.4593312826061063e-05, "loss": 0.4772, "step": 25421 }, { "epoch": 0.6980230642504118, "grad_norm": 0.35079413652420044, "learning_rate": 1.4592929185963447e-05, "loss": 0.4031, "step": 25422 }, { "epoch": 0.6980505216913784, "grad_norm": 0.3509799540042877, "learning_rate": 1.4592545537298555e-05, "loss": 0.3967, "step": 25423 }, { "epoch": 0.6980779791323448, "grad_norm": 0.35817137360572815, "learning_rate": 1.4592161880067096e-05, "loss": 0.4591, "step": 25424 }, { "epoch": 0.6981054365733114, "grad_norm": 0.3844546675682068, "learning_rate": 1.4591778214269789e-05, "loss": 0.4986, "step": 25425 }, { "epoch": 0.6981328940142779, "grad_norm": 0.45115259289741516, "learning_rate": 1.4591394539907348e-05, "loss": 0.529, "step": 25426 }, { "epoch": 0.6981603514552444, "grad_norm": 0.43783843517303467, "learning_rate": 1.4591010856980496e-05, "loss": 0.5832, "step": 25427 }, { "epoch": 0.6981878088962109, "grad_norm": 0.355221688747406, "learning_rate": 1.4590627165489944e-05, "loss": 0.5, "step": 25428 }, { "epoch": 0.6982152663371773, "grad_norm": 0.37167999148368835, "learning_rate": 1.4590243465436402e-05, "loss": 0.5238, "step": 25429 }, { "epoch": 0.6982427237781439, "grad_norm": 0.4129979908466339, "learning_rate": 1.4589859756820594e-05, "loss": 0.4964, "step": 25430 }, { "epoch": 0.6982701812191103, "grad_norm": 0.41145846247673035, "learning_rate": 1.4589476039643233e-05, "loss": 0.5889, "step": 25431 }, { "epoch": 0.6982976386600769, "grad_norm": 0.3577579855918884, "learning_rate": 1.4589092313905034e-05, "loss": 0.4973, "step": 25432 }, { "epoch": 0.6983250961010434, "grad_norm": 0.3763614296913147, "learning_rate": 1.4588708579606714e-05, "loss": 0.4417, "step": 25433 }, { "epoch": 0.6983525535420099, "grad_norm": 0.38437125086784363, "learning_rate": 1.4588324836748986e-05, "loss": 0.6009, "step": 25434 }, { "epoch": 0.6983800109829764, "grad_norm": 0.3477506935596466, "learning_rate": 1.458794108533257e-05, "loss": 0.4205, "step": 25435 }, { "epoch": 0.6984074684239429, "grad_norm": 0.3894840478897095, "learning_rate": 1.4587557325358177e-05, "loss": 0.484, "step": 25436 }, { "epoch": 0.6984349258649094, "grad_norm": 0.6417276263237, "learning_rate": 1.4587173556826526e-05, "loss": 0.4433, "step": 25437 }, { "epoch": 0.6984623833058758, "grad_norm": 0.5560301542282104, "learning_rate": 1.4586789779738333e-05, "loss": 0.5942, "step": 25438 }, { "epoch": 0.6984898407468424, "grad_norm": 0.4187886416912079, "learning_rate": 1.4586405994094313e-05, "loss": 0.6353, "step": 25439 }, { "epoch": 0.6985172981878089, "grad_norm": 0.44966015219688416, "learning_rate": 1.4586022199895182e-05, "loss": 0.5811, "step": 25440 }, { "epoch": 0.6985447556287754, "grad_norm": 0.357273668050766, "learning_rate": 1.4585638397141657e-05, "loss": 0.5718, "step": 25441 }, { "epoch": 0.6985722130697419, "grad_norm": 0.40705162286758423, "learning_rate": 1.458525458583445e-05, "loss": 0.549, "step": 25442 }, { "epoch": 0.6985996705107084, "grad_norm": 0.4409189522266388, "learning_rate": 1.4584870765974282e-05, "loss": 0.5284, "step": 25443 }, { "epoch": 0.6986271279516749, "grad_norm": 0.3913683593273163, "learning_rate": 1.4584486937561866e-05, "loss": 0.5282, "step": 25444 }, { "epoch": 0.6986545853926414, "grad_norm": 0.4199334681034088, "learning_rate": 1.458410310059792e-05, "loss": 0.511, "step": 25445 }, { "epoch": 0.6986820428336079, "grad_norm": 0.3714434802532196, "learning_rate": 1.4583719255083155e-05, "loss": 0.4918, "step": 25446 }, { "epoch": 0.6987095002745745, "grad_norm": 0.3949292004108429, "learning_rate": 1.4583335401018293e-05, "loss": 0.5284, "step": 25447 }, { "epoch": 0.6987369577155409, "grad_norm": 0.36550769209861755, "learning_rate": 1.4582951538404048e-05, "loss": 0.4871, "step": 25448 }, { "epoch": 0.6987644151565074, "grad_norm": 0.361356258392334, "learning_rate": 1.4582567667241137e-05, "loss": 0.488, "step": 25449 }, { "epoch": 0.6987918725974739, "grad_norm": 0.4206015467643738, "learning_rate": 1.4582183787530273e-05, "loss": 0.4468, "step": 25450 }, { "epoch": 0.6988193300384404, "grad_norm": 0.342547208070755, "learning_rate": 1.4581799899272173e-05, "loss": 0.5168, "step": 25451 }, { "epoch": 0.6988467874794069, "grad_norm": 0.4114772379398346, "learning_rate": 1.4581416002467554e-05, "loss": 0.5461, "step": 25452 }, { "epoch": 0.6988742449203734, "grad_norm": 0.38687655329704285, "learning_rate": 1.4581032097117134e-05, "loss": 0.4838, "step": 25453 }, { "epoch": 0.69890170236134, "grad_norm": 0.34125709533691406, "learning_rate": 1.4580648183221626e-05, "loss": 0.5352, "step": 25454 }, { "epoch": 0.6989291598023064, "grad_norm": 0.398270845413208, "learning_rate": 1.4580264260781744e-05, "loss": 0.5313, "step": 25455 }, { "epoch": 0.698956617243273, "grad_norm": 0.38955777883529663, "learning_rate": 1.4579880329798213e-05, "loss": 0.6164, "step": 25456 }, { "epoch": 0.6989840746842394, "grad_norm": 0.3907632529735565, "learning_rate": 1.4579496390271739e-05, "loss": 0.472, "step": 25457 }, { "epoch": 0.699011532125206, "grad_norm": 0.4551410675048828, "learning_rate": 1.4579112442203046e-05, "loss": 0.4656, "step": 25458 }, { "epoch": 0.6990389895661724, "grad_norm": 0.39492297172546387, "learning_rate": 1.4578728485592845e-05, "loss": 0.5435, "step": 25459 }, { "epoch": 0.6990664470071389, "grad_norm": 0.39289143681526184, "learning_rate": 1.4578344520441853e-05, "loss": 0.5264, "step": 25460 }, { "epoch": 0.6990939044481055, "grad_norm": 0.39825108647346497, "learning_rate": 1.457796054675079e-05, "loss": 0.5373, "step": 25461 }, { "epoch": 0.6991213618890719, "grad_norm": 0.4253794550895691, "learning_rate": 1.4577576564520368e-05, "loss": 0.5739, "step": 25462 }, { "epoch": 0.6991488193300385, "grad_norm": 0.41354668140411377, "learning_rate": 1.4577192573751302e-05, "loss": 0.5114, "step": 25463 }, { "epoch": 0.6991762767710049, "grad_norm": 0.6106164455413818, "learning_rate": 1.4576808574444314e-05, "loss": 0.4972, "step": 25464 }, { "epoch": 0.6992037342119715, "grad_norm": 0.3712099492549896, "learning_rate": 1.4576424566600116e-05, "loss": 0.491, "step": 25465 }, { "epoch": 0.6992311916529379, "grad_norm": 0.4822063744068146, "learning_rate": 1.4576040550219428e-05, "loss": 0.4949, "step": 25466 }, { "epoch": 0.6992586490939044, "grad_norm": 0.3936867415904999, "learning_rate": 1.4575656525302964e-05, "loss": 0.5418, "step": 25467 }, { "epoch": 0.699286106534871, "grad_norm": 0.3470097482204437, "learning_rate": 1.4575272491851435e-05, "loss": 0.4388, "step": 25468 }, { "epoch": 0.6993135639758374, "grad_norm": 0.37351739406585693, "learning_rate": 1.4574888449865567e-05, "loss": 0.5053, "step": 25469 }, { "epoch": 0.699341021416804, "grad_norm": 0.4169138967990875, "learning_rate": 1.457450439934607e-05, "loss": 0.5411, "step": 25470 }, { "epoch": 0.6993684788577704, "grad_norm": 1.3586455583572388, "learning_rate": 1.4574120340293663e-05, "loss": 0.5479, "step": 25471 }, { "epoch": 0.699395936298737, "grad_norm": 0.3889513611793518, "learning_rate": 1.4573736272709065e-05, "loss": 0.4562, "step": 25472 }, { "epoch": 0.6994233937397034, "grad_norm": 0.4128541946411133, "learning_rate": 1.4573352196592983e-05, "loss": 0.5425, "step": 25473 }, { "epoch": 0.69945085118067, "grad_norm": 0.3986850678920746, "learning_rate": 1.4572968111946143e-05, "loss": 0.4688, "step": 25474 }, { "epoch": 0.6994783086216365, "grad_norm": 0.39681771397590637, "learning_rate": 1.4572584018769259e-05, "loss": 0.5502, "step": 25475 }, { "epoch": 0.699505766062603, "grad_norm": 0.3734547793865204, "learning_rate": 1.4572199917063044e-05, "loss": 0.5884, "step": 25476 }, { "epoch": 0.6995332235035695, "grad_norm": 0.4484544098377228, "learning_rate": 1.4571815806828218e-05, "loss": 0.4151, "step": 25477 }, { "epoch": 0.6995606809445359, "grad_norm": 0.40805771946907043, "learning_rate": 1.4571431688065494e-05, "loss": 0.605, "step": 25478 }, { "epoch": 0.6995881383855025, "grad_norm": 0.4716414511203766, "learning_rate": 1.4571047560775593e-05, "loss": 0.4715, "step": 25479 }, { "epoch": 0.6996155958264689, "grad_norm": 0.4433923065662384, "learning_rate": 1.457066342495923e-05, "loss": 0.5876, "step": 25480 }, { "epoch": 0.6996430532674355, "grad_norm": 0.34977322816848755, "learning_rate": 1.4570279280617119e-05, "loss": 0.4731, "step": 25481 }, { "epoch": 0.699670510708402, "grad_norm": 0.37862730026245117, "learning_rate": 1.4569895127749982e-05, "loss": 0.5158, "step": 25482 }, { "epoch": 0.6996979681493685, "grad_norm": 0.3817298114299774, "learning_rate": 1.4569510966358526e-05, "loss": 0.545, "step": 25483 }, { "epoch": 0.699725425590335, "grad_norm": 0.3889427185058594, "learning_rate": 1.456912679644348e-05, "loss": 0.5485, "step": 25484 }, { "epoch": 0.6997528830313015, "grad_norm": 0.3915839195251465, "learning_rate": 1.4568742618005552e-05, "loss": 0.4928, "step": 25485 }, { "epoch": 0.699780340472268, "grad_norm": 0.36893731355667114, "learning_rate": 1.456835843104546e-05, "loss": 0.4595, "step": 25486 }, { "epoch": 0.6998077979132344, "grad_norm": 0.40549057722091675, "learning_rate": 1.4567974235563922e-05, "loss": 0.5017, "step": 25487 }, { "epoch": 0.699835255354201, "grad_norm": 0.3457392752170563, "learning_rate": 1.456759003156165e-05, "loss": 0.4983, "step": 25488 }, { "epoch": 0.6998627127951675, "grad_norm": 0.4409995675086975, "learning_rate": 1.4567205819039371e-05, "loss": 0.6225, "step": 25489 }, { "epoch": 0.699890170236134, "grad_norm": 0.3928782045841217, "learning_rate": 1.4566821597997795e-05, "loss": 0.4683, "step": 25490 }, { "epoch": 0.6999176276771005, "grad_norm": 0.36442095041275024, "learning_rate": 1.4566437368437636e-05, "loss": 0.4939, "step": 25491 }, { "epoch": 0.699945085118067, "grad_norm": 0.3952120244503021, "learning_rate": 1.4566053130359617e-05, "loss": 0.4593, "step": 25492 }, { "epoch": 0.6999725425590335, "grad_norm": 0.32815104722976685, "learning_rate": 1.456566888376445e-05, "loss": 0.409, "step": 25493 }, { "epoch": 0.7, "grad_norm": 0.44275471568107605, "learning_rate": 1.4565284628652851e-05, "loss": 0.5578, "step": 25494 }, { "epoch": 0.7000274574409665, "grad_norm": 0.41010984778404236, "learning_rate": 1.4564900365025542e-05, "loss": 0.5546, "step": 25495 }, { "epoch": 0.700054914881933, "grad_norm": 0.39667388796806335, "learning_rate": 1.4564516092883236e-05, "loss": 0.4838, "step": 25496 }, { "epoch": 0.7000823723228995, "grad_norm": 0.414629727602005, "learning_rate": 1.4564131812226652e-05, "loss": 0.4735, "step": 25497 }, { "epoch": 0.700109829763866, "grad_norm": 0.40252256393432617, "learning_rate": 1.4563747523056504e-05, "loss": 0.5028, "step": 25498 }, { "epoch": 0.7001372872048325, "grad_norm": 0.44975876808166504, "learning_rate": 1.456336322537351e-05, "loss": 0.5288, "step": 25499 }, { "epoch": 0.700164744645799, "grad_norm": 0.35001280903816223, "learning_rate": 1.4562978919178388e-05, "loss": 0.3927, "step": 25500 }, { "epoch": 0.7001922020867655, "grad_norm": 0.4171823561191559, "learning_rate": 1.4562594604471851e-05, "loss": 0.5573, "step": 25501 }, { "epoch": 0.700219659527732, "grad_norm": 0.3928503692150116, "learning_rate": 1.4562210281254624e-05, "loss": 0.5539, "step": 25502 }, { "epoch": 0.7002471169686986, "grad_norm": 0.39245885610580444, "learning_rate": 1.4561825949527417e-05, "loss": 0.5552, "step": 25503 }, { "epoch": 0.700274574409665, "grad_norm": 0.395622581243515, "learning_rate": 1.4561441609290948e-05, "loss": 0.5678, "step": 25504 }, { "epoch": 0.7003020318506316, "grad_norm": 0.3703465759754181, "learning_rate": 1.4561057260545935e-05, "loss": 0.5397, "step": 25505 }, { "epoch": 0.700329489291598, "grad_norm": 0.39561647176742554, "learning_rate": 1.4560672903293094e-05, "loss": 0.5331, "step": 25506 }, { "epoch": 0.7003569467325645, "grad_norm": 0.38219231367111206, "learning_rate": 1.456028853753314e-05, "loss": 0.5533, "step": 25507 }, { "epoch": 0.700384404173531, "grad_norm": 0.4668722450733185, "learning_rate": 1.4559904163266795e-05, "loss": 0.5284, "step": 25508 }, { "epoch": 0.7004118616144975, "grad_norm": 0.43465766310691833, "learning_rate": 1.4559519780494773e-05, "loss": 0.4835, "step": 25509 }, { "epoch": 0.7004393190554641, "grad_norm": 0.4053919315338135, "learning_rate": 1.4559135389217794e-05, "loss": 0.4844, "step": 25510 }, { "epoch": 0.7004667764964305, "grad_norm": 0.37558043003082275, "learning_rate": 1.455875098943657e-05, "loss": 0.4119, "step": 25511 }, { "epoch": 0.7004942339373971, "grad_norm": 0.4137763977050781, "learning_rate": 1.4558366581151819e-05, "loss": 0.5691, "step": 25512 }, { "epoch": 0.7005216913783635, "grad_norm": 0.365683913230896, "learning_rate": 1.4557982164364261e-05, "loss": 0.4583, "step": 25513 }, { "epoch": 0.7005491488193301, "grad_norm": 0.5254682302474976, "learning_rate": 1.455759773907461e-05, "loss": 0.4846, "step": 25514 }, { "epoch": 0.7005766062602965, "grad_norm": 0.34630414843559265, "learning_rate": 1.4557213305283585e-05, "loss": 0.4335, "step": 25515 }, { "epoch": 0.700604063701263, "grad_norm": 0.4035494327545166, "learning_rate": 1.4556828862991908e-05, "loss": 0.4658, "step": 25516 }, { "epoch": 0.7006315211422296, "grad_norm": 0.39087194204330444, "learning_rate": 1.4556444412200283e-05, "loss": 0.563, "step": 25517 }, { "epoch": 0.700658978583196, "grad_norm": 0.4403948485851288, "learning_rate": 1.455605995290944e-05, "loss": 0.5627, "step": 25518 }, { "epoch": 0.7006864360241626, "grad_norm": 0.39401742815971375, "learning_rate": 1.4555675485120089e-05, "loss": 0.5103, "step": 25519 }, { "epoch": 0.700713893465129, "grad_norm": 0.3294680714607239, "learning_rate": 1.455529100883295e-05, "loss": 0.4399, "step": 25520 }, { "epoch": 0.7007413509060956, "grad_norm": 0.45106953382492065, "learning_rate": 1.455490652404874e-05, "loss": 0.4513, "step": 25521 }, { "epoch": 0.700768808347062, "grad_norm": 0.38094329833984375, "learning_rate": 1.4554522030768171e-05, "loss": 0.52, "step": 25522 }, { "epoch": 0.7007962657880286, "grad_norm": 0.3357788920402527, "learning_rate": 1.4554137528991972e-05, "loss": 0.494, "step": 25523 }, { "epoch": 0.7008237232289951, "grad_norm": 0.4036049246788025, "learning_rate": 1.4553753018720849e-05, "loss": 0.5181, "step": 25524 }, { "epoch": 0.7008511806699615, "grad_norm": 0.34515562653541565, "learning_rate": 1.4553368499955523e-05, "loss": 0.4078, "step": 25525 }, { "epoch": 0.7008786381109281, "grad_norm": 0.5524396300315857, "learning_rate": 1.4552983972696712e-05, "loss": 0.5571, "step": 25526 }, { "epoch": 0.7009060955518945, "grad_norm": 0.4001401662826538, "learning_rate": 1.4552599436945132e-05, "loss": 0.4668, "step": 25527 }, { "epoch": 0.7009335529928611, "grad_norm": 0.3967103660106659, "learning_rate": 1.4552214892701502e-05, "loss": 0.4778, "step": 25528 }, { "epoch": 0.7009610104338275, "grad_norm": 0.4463861584663391, "learning_rate": 1.4551830339966537e-05, "loss": 0.4796, "step": 25529 }, { "epoch": 0.7009884678747941, "grad_norm": 0.35379305481910706, "learning_rate": 1.4551445778740957e-05, "loss": 0.4753, "step": 25530 }, { "epoch": 0.7010159253157606, "grad_norm": 0.39269736409187317, "learning_rate": 1.4551061209025478e-05, "loss": 0.5051, "step": 25531 }, { "epoch": 0.7010433827567271, "grad_norm": 0.40198907256126404, "learning_rate": 1.4550676630820819e-05, "loss": 0.5353, "step": 25532 }, { "epoch": 0.7010708401976936, "grad_norm": 0.4116518497467041, "learning_rate": 1.4550292044127694e-05, "loss": 0.543, "step": 25533 }, { "epoch": 0.70109829763866, "grad_norm": 0.3985337018966675, "learning_rate": 1.454990744894682e-05, "loss": 0.5066, "step": 25534 }, { "epoch": 0.7011257550796266, "grad_norm": 0.35581669211387634, "learning_rate": 1.454952284527892e-05, "loss": 0.4877, "step": 25535 }, { "epoch": 0.701153212520593, "grad_norm": 0.3826042413711548, "learning_rate": 1.4549138233124707e-05, "loss": 0.4641, "step": 25536 }, { "epoch": 0.7011806699615596, "grad_norm": 0.4104309380054474, "learning_rate": 1.4548753612484896e-05, "loss": 0.5685, "step": 25537 }, { "epoch": 0.7012081274025261, "grad_norm": 0.41666179895401, "learning_rate": 1.454836898336021e-05, "loss": 0.5691, "step": 25538 }, { "epoch": 0.7012355848434926, "grad_norm": 0.39096251130104065, "learning_rate": 1.4547984345751365e-05, "loss": 0.5303, "step": 25539 }, { "epoch": 0.7012630422844591, "grad_norm": 0.4020645320415497, "learning_rate": 1.4547599699659079e-05, "loss": 0.4741, "step": 25540 }, { "epoch": 0.7012904997254256, "grad_norm": 0.4154307246208191, "learning_rate": 1.4547215045084065e-05, "loss": 0.5128, "step": 25541 }, { "epoch": 0.7013179571663921, "grad_norm": 0.38484203815460205, "learning_rate": 1.4546830382027045e-05, "loss": 0.543, "step": 25542 }, { "epoch": 0.7013454146073586, "grad_norm": 0.33228689432144165, "learning_rate": 1.4546445710488734e-05, "loss": 0.4587, "step": 25543 }, { "epoch": 0.7013728720483251, "grad_norm": 0.4225093424320221, "learning_rate": 1.4546061030469852e-05, "loss": 0.5432, "step": 25544 }, { "epoch": 0.7014003294892917, "grad_norm": 0.41656234860420227, "learning_rate": 1.4545676341971114e-05, "loss": 0.5205, "step": 25545 }, { "epoch": 0.7014277869302581, "grad_norm": 0.4305850863456726, "learning_rate": 1.454529164499324e-05, "loss": 0.5787, "step": 25546 }, { "epoch": 0.7014552443712246, "grad_norm": 0.39565590023994446, "learning_rate": 1.4544906939536944e-05, "loss": 0.4778, "step": 25547 }, { "epoch": 0.7014827018121911, "grad_norm": 0.3816887140274048, "learning_rate": 1.4544522225602951e-05, "loss": 0.5187, "step": 25548 }, { "epoch": 0.7015101592531576, "grad_norm": 0.37928593158721924, "learning_rate": 1.4544137503191969e-05, "loss": 0.5063, "step": 25549 }, { "epoch": 0.7015376166941241, "grad_norm": 0.3695752024650574, "learning_rate": 1.4543752772304722e-05, "loss": 0.5127, "step": 25550 }, { "epoch": 0.7015650741350906, "grad_norm": 0.3623110353946686, "learning_rate": 1.4543368032941924e-05, "loss": 0.4892, "step": 25551 }, { "epoch": 0.7015925315760572, "grad_norm": 0.3767757713794708, "learning_rate": 1.4542983285104297e-05, "loss": 0.4772, "step": 25552 }, { "epoch": 0.7016199890170236, "grad_norm": 0.39002251625061035, "learning_rate": 1.4542598528792557e-05, "loss": 0.5077, "step": 25553 }, { "epoch": 0.7016474464579902, "grad_norm": 0.4232649803161621, "learning_rate": 1.4542213764007418e-05, "loss": 0.4417, "step": 25554 }, { "epoch": 0.7016749038989566, "grad_norm": 0.3804721236228943, "learning_rate": 1.4541828990749605e-05, "loss": 0.4642, "step": 25555 }, { "epoch": 0.7017023613399231, "grad_norm": 0.37860339879989624, "learning_rate": 1.4541444209019826e-05, "loss": 0.572, "step": 25556 }, { "epoch": 0.7017298187808896, "grad_norm": 0.3334372341632843, "learning_rate": 1.4541059418818808e-05, "loss": 0.4386, "step": 25557 }, { "epoch": 0.7017572762218561, "grad_norm": 0.3732791543006897, "learning_rate": 1.4540674620147261e-05, "loss": 0.5359, "step": 25558 }, { "epoch": 0.7017847336628227, "grad_norm": 0.37556856870651245, "learning_rate": 1.454028981300591e-05, "loss": 0.5223, "step": 25559 }, { "epoch": 0.7018121911037891, "grad_norm": 0.32337796688079834, "learning_rate": 1.4539904997395468e-05, "loss": 0.4151, "step": 25560 }, { "epoch": 0.7018396485447557, "grad_norm": 0.410470575094223, "learning_rate": 1.4539520173316654e-05, "loss": 0.5381, "step": 25561 }, { "epoch": 0.7018671059857221, "grad_norm": 0.3592793047428131, "learning_rate": 1.4539135340770188e-05, "loss": 0.4025, "step": 25562 }, { "epoch": 0.7018945634266887, "grad_norm": 0.4087314009666443, "learning_rate": 1.4538750499756785e-05, "loss": 0.6024, "step": 25563 }, { "epoch": 0.7019220208676551, "grad_norm": 0.35463646054267883, "learning_rate": 1.4538365650277162e-05, "loss": 0.4124, "step": 25564 }, { "epoch": 0.7019494783086216, "grad_norm": 0.41876786947250366, "learning_rate": 1.453798079233204e-05, "loss": 0.4653, "step": 25565 }, { "epoch": 0.7019769357495882, "grad_norm": 0.35154515504837036, "learning_rate": 1.4537595925922134e-05, "loss": 0.4721, "step": 25566 }, { "epoch": 0.7020043931905546, "grad_norm": 0.3559425175189972, "learning_rate": 1.4537211051048167e-05, "loss": 0.4896, "step": 25567 }, { "epoch": 0.7020318506315212, "grad_norm": 0.361256867647171, "learning_rate": 1.4536826167710852e-05, "loss": 0.4437, "step": 25568 }, { "epoch": 0.7020593080724876, "grad_norm": 0.5027250051498413, "learning_rate": 1.4536441275910904e-05, "loss": 0.5362, "step": 25569 }, { "epoch": 0.7020867655134542, "grad_norm": 0.3667948544025421, "learning_rate": 1.453605637564905e-05, "loss": 0.5205, "step": 25570 }, { "epoch": 0.7021142229544206, "grad_norm": 0.4688922166824341, "learning_rate": 1.4535671466926e-05, "loss": 0.501, "step": 25571 }, { "epoch": 0.7021416803953872, "grad_norm": 0.36227062344551086, "learning_rate": 1.4535286549742478e-05, "loss": 0.407, "step": 25572 }, { "epoch": 0.7021691378363537, "grad_norm": 0.3392520546913147, "learning_rate": 1.4534901624099198e-05, "loss": 0.3981, "step": 25573 }, { "epoch": 0.7021965952773201, "grad_norm": 0.39412832260131836, "learning_rate": 1.4534516689996878e-05, "loss": 0.5246, "step": 25574 }, { "epoch": 0.7022240527182867, "grad_norm": 0.39351391792297363, "learning_rate": 1.4534131747436237e-05, "loss": 0.4999, "step": 25575 }, { "epoch": 0.7022515101592531, "grad_norm": 0.35336238145828247, "learning_rate": 1.4533746796417994e-05, "loss": 0.5567, "step": 25576 }, { "epoch": 0.7022789676002197, "grad_norm": 0.4432460367679596, "learning_rate": 1.4533361836942868e-05, "loss": 0.6227, "step": 25577 }, { "epoch": 0.7023064250411861, "grad_norm": 0.4296761453151703, "learning_rate": 1.4532976869011572e-05, "loss": 0.4503, "step": 25578 }, { "epoch": 0.7023338824821527, "grad_norm": 0.4211810827255249, "learning_rate": 1.4532591892624828e-05, "loss": 0.4204, "step": 25579 }, { "epoch": 0.7023613399231191, "grad_norm": 0.33984047174453735, "learning_rate": 1.4532206907783356e-05, "loss": 0.4243, "step": 25580 }, { "epoch": 0.7023887973640857, "grad_norm": 0.37419456243515015, "learning_rate": 1.4531821914487869e-05, "loss": 0.4865, "step": 25581 }, { "epoch": 0.7024162548050522, "grad_norm": 0.3900434076786041, "learning_rate": 1.453143691273909e-05, "loss": 0.4746, "step": 25582 }, { "epoch": 0.7024437122460186, "grad_norm": 0.43376314640045166, "learning_rate": 1.4531051902537734e-05, "loss": 0.4498, "step": 25583 }, { "epoch": 0.7024711696869852, "grad_norm": 0.42061108350753784, "learning_rate": 1.4530666883884518e-05, "loss": 0.5837, "step": 25584 }, { "epoch": 0.7024986271279516, "grad_norm": 0.6189656257629395, "learning_rate": 1.4530281856780164e-05, "loss": 0.5164, "step": 25585 }, { "epoch": 0.7025260845689182, "grad_norm": 0.4034634232521057, "learning_rate": 1.4529896821225388e-05, "loss": 0.6029, "step": 25586 }, { "epoch": 0.7025535420098846, "grad_norm": 1.0585527420043945, "learning_rate": 1.452951177722091e-05, "loss": 0.4641, "step": 25587 }, { "epoch": 0.7025809994508512, "grad_norm": 0.3717815577983856, "learning_rate": 1.4529126724767447e-05, "loss": 0.5409, "step": 25588 }, { "epoch": 0.7026084568918177, "grad_norm": 0.5228464603424072, "learning_rate": 1.4528741663865714e-05, "loss": 0.4318, "step": 25589 }, { "epoch": 0.7026359143327842, "grad_norm": 0.3814222514629364, "learning_rate": 1.4528356594516435e-05, "loss": 0.4768, "step": 25590 }, { "epoch": 0.7026633717737507, "grad_norm": 0.4083000421524048, "learning_rate": 1.4527971516720325e-05, "loss": 0.5827, "step": 25591 }, { "epoch": 0.7026908292147171, "grad_norm": 0.37474197149276733, "learning_rate": 1.4527586430478102e-05, "loss": 0.5117, "step": 25592 }, { "epoch": 0.7027182866556837, "grad_norm": 0.4106961786746979, "learning_rate": 1.4527201335790486e-05, "loss": 0.5974, "step": 25593 }, { "epoch": 0.7027457440966501, "grad_norm": 0.3558419644832611, "learning_rate": 1.4526816232658197e-05, "loss": 0.5242, "step": 25594 }, { "epoch": 0.7027732015376167, "grad_norm": 0.3599098324775696, "learning_rate": 1.4526431121081947e-05, "loss": 0.5219, "step": 25595 }, { "epoch": 0.7028006589785832, "grad_norm": 0.33500590920448303, "learning_rate": 1.4526046001062461e-05, "loss": 0.4254, "step": 25596 }, { "epoch": 0.7028281164195497, "grad_norm": 0.34247061610221863, "learning_rate": 1.4525660872600453e-05, "loss": 0.4618, "step": 25597 }, { "epoch": 0.7028555738605162, "grad_norm": 0.40571045875549316, "learning_rate": 1.4525275735696644e-05, "loss": 0.5539, "step": 25598 }, { "epoch": 0.7028830313014827, "grad_norm": 0.41074541211128235, "learning_rate": 1.452489059035175e-05, "loss": 0.4878, "step": 25599 }, { "epoch": 0.7029104887424492, "grad_norm": 0.37267449498176575, "learning_rate": 1.4524505436566493e-05, "loss": 0.4228, "step": 25600 }, { "epoch": 0.7029379461834157, "grad_norm": 0.5731538534164429, "learning_rate": 1.4524120274341588e-05, "loss": 0.5077, "step": 25601 }, { "epoch": 0.7029654036243822, "grad_norm": 0.4136224389076233, "learning_rate": 1.4523735103677756e-05, "loss": 0.5979, "step": 25602 }, { "epoch": 0.7029928610653488, "grad_norm": 0.40230220556259155, "learning_rate": 1.4523349924575715e-05, "loss": 0.5509, "step": 25603 }, { "epoch": 0.7030203185063152, "grad_norm": 0.3627816438674927, "learning_rate": 1.4522964737036181e-05, "loss": 0.4381, "step": 25604 }, { "epoch": 0.7030477759472817, "grad_norm": 0.38819822669029236, "learning_rate": 1.4522579541059874e-05, "loss": 0.4852, "step": 25605 }, { "epoch": 0.7030752333882482, "grad_norm": 0.42598363757133484, "learning_rate": 1.4522194336647512e-05, "loss": 0.5109, "step": 25606 }, { "epoch": 0.7031026908292147, "grad_norm": 0.36737725138664246, "learning_rate": 1.4521809123799815e-05, "loss": 0.4938, "step": 25607 }, { "epoch": 0.7031301482701812, "grad_norm": 0.3718014359474182, "learning_rate": 1.4521423902517502e-05, "loss": 0.4206, "step": 25608 }, { "epoch": 0.7031576057111477, "grad_norm": 0.4484901428222656, "learning_rate": 1.4521038672801288e-05, "loss": 0.4296, "step": 25609 }, { "epoch": 0.7031850631521143, "grad_norm": 0.3786068260669708, "learning_rate": 1.4520653434651895e-05, "loss": 0.5819, "step": 25610 }, { "epoch": 0.7032125205930807, "grad_norm": 0.3906787037849426, "learning_rate": 1.4520268188070042e-05, "loss": 0.4294, "step": 25611 }, { "epoch": 0.7032399780340473, "grad_norm": 0.4383326768875122, "learning_rate": 1.4519882933056443e-05, "loss": 0.5258, "step": 25612 }, { "epoch": 0.7032674354750137, "grad_norm": 0.43764373660087585, "learning_rate": 1.4519497669611824e-05, "loss": 0.5359, "step": 25613 }, { "epoch": 0.7032948929159802, "grad_norm": 0.35168763995170593, "learning_rate": 1.4519112397736898e-05, "loss": 0.4438, "step": 25614 }, { "epoch": 0.7033223503569467, "grad_norm": 0.3711172044277191, "learning_rate": 1.4518727117432383e-05, "loss": 0.5329, "step": 25615 }, { "epoch": 0.7033498077979132, "grad_norm": 0.3994397521018982, "learning_rate": 1.4518341828699e-05, "loss": 0.4727, "step": 25616 }, { "epoch": 0.7033772652388798, "grad_norm": 0.34934115409851074, "learning_rate": 1.451795653153747e-05, "loss": 0.4681, "step": 25617 }, { "epoch": 0.7034047226798462, "grad_norm": 0.34250280261039734, "learning_rate": 1.4517571225948506e-05, "loss": 0.4884, "step": 25618 }, { "epoch": 0.7034321801208128, "grad_norm": 0.4589095115661621, "learning_rate": 1.4517185911932835e-05, "loss": 0.542, "step": 25619 }, { "epoch": 0.7034596375617792, "grad_norm": 0.36239370703697205, "learning_rate": 1.4516800589491165e-05, "loss": 0.4984, "step": 25620 }, { "epoch": 0.7034870950027458, "grad_norm": 0.34629982709884644, "learning_rate": 1.451641525862422e-05, "loss": 0.4649, "step": 25621 }, { "epoch": 0.7035145524437122, "grad_norm": 0.38144567608833313, "learning_rate": 1.4516029919332724e-05, "loss": 0.5291, "step": 25622 }, { "epoch": 0.7035420098846787, "grad_norm": 0.383162260055542, "learning_rate": 1.4515644571617387e-05, "loss": 0.5606, "step": 25623 }, { "epoch": 0.7035694673256453, "grad_norm": 0.37346625328063965, "learning_rate": 1.4515259215478932e-05, "loss": 0.4641, "step": 25624 }, { "epoch": 0.7035969247666117, "grad_norm": 0.39530083537101746, "learning_rate": 1.4514873850918076e-05, "loss": 0.4764, "step": 25625 }, { "epoch": 0.7036243822075783, "grad_norm": 0.3469306230545044, "learning_rate": 1.4514488477935542e-05, "loss": 0.4742, "step": 25626 }, { "epoch": 0.7036518396485447, "grad_norm": 0.3829055726528168, "learning_rate": 1.4514103096532046e-05, "loss": 0.5122, "step": 25627 }, { "epoch": 0.7036792970895113, "grad_norm": 0.34749579429626465, "learning_rate": 1.4513717706708309e-05, "loss": 0.4326, "step": 25628 }, { "epoch": 0.7037067545304777, "grad_norm": 0.3661653995513916, "learning_rate": 1.4513332308465045e-05, "loss": 0.4628, "step": 25629 }, { "epoch": 0.7037342119714443, "grad_norm": 0.439898818731308, "learning_rate": 1.4512946901802974e-05, "loss": 0.5724, "step": 25630 }, { "epoch": 0.7037616694124108, "grad_norm": 0.37449410557746887, "learning_rate": 1.451256148672282e-05, "loss": 0.4641, "step": 25631 }, { "epoch": 0.7037891268533772, "grad_norm": 0.4128553569316864, "learning_rate": 1.4512176063225298e-05, "loss": 0.5863, "step": 25632 }, { "epoch": 0.7038165842943438, "grad_norm": 0.4747392237186432, "learning_rate": 1.4511790631311126e-05, "loss": 0.5474, "step": 25633 }, { "epoch": 0.7038440417353102, "grad_norm": 0.4205755591392517, "learning_rate": 1.4511405190981025e-05, "loss": 0.537, "step": 25634 }, { "epoch": 0.7038714991762768, "grad_norm": 0.37671807408332825, "learning_rate": 1.4511019742235714e-05, "loss": 0.4643, "step": 25635 }, { "epoch": 0.7038989566172432, "grad_norm": 0.4256541132926941, "learning_rate": 1.451063428507591e-05, "loss": 0.5761, "step": 25636 }, { "epoch": 0.7039264140582098, "grad_norm": 0.3496798276901245, "learning_rate": 1.4510248819502337e-05, "loss": 0.5236, "step": 25637 }, { "epoch": 0.7039538714991763, "grad_norm": 0.3632848858833313, "learning_rate": 1.4509863345515707e-05, "loss": 0.4441, "step": 25638 }, { "epoch": 0.7039813289401428, "grad_norm": 0.40795382857322693, "learning_rate": 1.4509477863116744e-05, "loss": 0.533, "step": 25639 }, { "epoch": 0.7040087863811093, "grad_norm": 0.5161058306694031, "learning_rate": 1.4509092372306166e-05, "loss": 0.5042, "step": 25640 }, { "epoch": 0.7040362438220757, "grad_norm": 0.3534725606441498, "learning_rate": 1.4508706873084691e-05, "loss": 0.5134, "step": 25641 }, { "epoch": 0.7040637012630423, "grad_norm": 0.3642263412475586, "learning_rate": 1.4508321365453038e-05, "loss": 0.5275, "step": 25642 }, { "epoch": 0.7040911587040087, "grad_norm": 0.38733094930648804, "learning_rate": 1.4507935849411925e-05, "loss": 0.5905, "step": 25643 }, { "epoch": 0.7041186161449753, "grad_norm": 0.4156807065010071, "learning_rate": 1.4507550324962076e-05, "loss": 0.5182, "step": 25644 }, { "epoch": 0.7041460735859418, "grad_norm": 0.37364086508750916, "learning_rate": 1.4507164792104206e-05, "loss": 0.5202, "step": 25645 }, { "epoch": 0.7041735310269083, "grad_norm": 0.3490923047065735, "learning_rate": 1.4506779250839037e-05, "loss": 0.4464, "step": 25646 }, { "epoch": 0.7042009884678748, "grad_norm": 0.3800398111343384, "learning_rate": 1.4506393701167284e-05, "loss": 0.4617, "step": 25647 }, { "epoch": 0.7042284459088413, "grad_norm": 0.3761318027973175, "learning_rate": 1.4506008143089669e-05, "loss": 0.4826, "step": 25648 }, { "epoch": 0.7042559033498078, "grad_norm": 0.37143296003341675, "learning_rate": 1.450562257660691e-05, "loss": 0.4963, "step": 25649 }, { "epoch": 0.7042833607907742, "grad_norm": 0.3475790321826935, "learning_rate": 1.4505237001719727e-05, "loss": 0.4575, "step": 25650 }, { "epoch": 0.7043108182317408, "grad_norm": 0.31979092955589294, "learning_rate": 1.4504851418428838e-05, "loss": 0.4743, "step": 25651 }, { "epoch": 0.7043382756727073, "grad_norm": 0.37928786873817444, "learning_rate": 1.4504465826734966e-05, "loss": 0.5174, "step": 25652 }, { "epoch": 0.7043657331136738, "grad_norm": 0.38302531838417053, "learning_rate": 1.4504080226638828e-05, "loss": 0.5332, "step": 25653 }, { "epoch": 0.7043931905546403, "grad_norm": 0.41552409529685974, "learning_rate": 1.450369461814114e-05, "loss": 0.597, "step": 25654 }, { "epoch": 0.7044206479956068, "grad_norm": 0.4232742190361023, "learning_rate": 1.4503309001242626e-05, "loss": 0.5567, "step": 25655 }, { "epoch": 0.7044481054365733, "grad_norm": 0.38879749178886414, "learning_rate": 1.4502923375944002e-05, "loss": 0.5338, "step": 25656 }, { "epoch": 0.7044755628775398, "grad_norm": 0.5789538025856018, "learning_rate": 1.450253774224599e-05, "loss": 0.5065, "step": 25657 }, { "epoch": 0.7045030203185063, "grad_norm": 0.36123889684677124, "learning_rate": 1.450215210014931e-05, "loss": 0.5241, "step": 25658 }, { "epoch": 0.7045304777594729, "grad_norm": 0.3551064133644104, "learning_rate": 1.4501766449654675e-05, "loss": 0.475, "step": 25659 }, { "epoch": 0.7045579352004393, "grad_norm": 0.3896539509296417, "learning_rate": 1.450138079076281e-05, "loss": 0.4842, "step": 25660 }, { "epoch": 0.7045853926414058, "grad_norm": 0.3786185383796692, "learning_rate": 1.4500995123474436e-05, "loss": 0.5168, "step": 25661 }, { "epoch": 0.7046128500823723, "grad_norm": 0.39876487851142883, "learning_rate": 1.4500609447790265e-05, "loss": 0.4985, "step": 25662 }, { "epoch": 0.7046403075233388, "grad_norm": 0.3739904761314392, "learning_rate": 1.4500223763711026e-05, "loss": 0.532, "step": 25663 }, { "epoch": 0.7046677649643053, "grad_norm": 0.38699647784233093, "learning_rate": 1.4499838071237431e-05, "loss": 0.4889, "step": 25664 }, { "epoch": 0.7046952224052718, "grad_norm": 0.33833956718444824, "learning_rate": 1.4499452370370202e-05, "loss": 0.4362, "step": 25665 }, { "epoch": 0.7047226798462384, "grad_norm": 0.4008192718029022, "learning_rate": 1.4499066661110057e-05, "loss": 0.4949, "step": 25666 }, { "epoch": 0.7047501372872048, "grad_norm": 0.381436824798584, "learning_rate": 1.4498680943457717e-05, "loss": 0.449, "step": 25667 }, { "epoch": 0.7047775947281714, "grad_norm": 0.43844908475875854, "learning_rate": 1.4498295217413902e-05, "loss": 0.4962, "step": 25668 }, { "epoch": 0.7048050521691378, "grad_norm": 0.32793542742729187, "learning_rate": 1.449790948297933e-05, "loss": 0.4906, "step": 25669 }, { "epoch": 0.7048325096101044, "grad_norm": 0.3963126838207245, "learning_rate": 1.4497523740154722e-05, "loss": 0.5174, "step": 25670 }, { "epoch": 0.7048599670510708, "grad_norm": 0.40752270817756653, "learning_rate": 1.4497137988940798e-05, "loss": 0.4838, "step": 25671 }, { "epoch": 0.7048874244920373, "grad_norm": 0.3942354917526245, "learning_rate": 1.4496752229338275e-05, "loss": 0.4657, "step": 25672 }, { "epoch": 0.7049148819330039, "grad_norm": 0.3837500810623169, "learning_rate": 1.4496366461347875e-05, "loss": 0.4252, "step": 25673 }, { "epoch": 0.7049423393739703, "grad_norm": 0.3776382505893707, "learning_rate": 1.4495980684970311e-05, "loss": 0.5714, "step": 25674 }, { "epoch": 0.7049697968149369, "grad_norm": 0.44987764954566956, "learning_rate": 1.4495594900206316e-05, "loss": 0.5105, "step": 25675 }, { "epoch": 0.7049972542559033, "grad_norm": 0.37573108077049255, "learning_rate": 1.4495209107056599e-05, "loss": 0.4366, "step": 25676 }, { "epoch": 0.7050247116968699, "grad_norm": 0.356988787651062, "learning_rate": 1.4494823305521879e-05, "loss": 0.4971, "step": 25677 }, { "epoch": 0.7050521691378363, "grad_norm": 0.5029333233833313, "learning_rate": 1.4494437495602881e-05, "loss": 0.419, "step": 25678 }, { "epoch": 0.7050796265788029, "grad_norm": 0.6312275528907776, "learning_rate": 1.4494051677300326e-05, "loss": 0.4725, "step": 25679 }, { "epoch": 0.7051070840197694, "grad_norm": 0.3834789991378784, "learning_rate": 1.4493665850614927e-05, "loss": 0.4791, "step": 25680 }, { "epoch": 0.7051345414607358, "grad_norm": 0.482816606760025, "learning_rate": 1.4493280015547406e-05, "loss": 0.4919, "step": 25681 }, { "epoch": 0.7051619989017024, "grad_norm": 0.42107242345809937, "learning_rate": 1.4492894172098484e-05, "loss": 0.4614, "step": 25682 }, { "epoch": 0.7051894563426688, "grad_norm": 0.34619423747062683, "learning_rate": 1.4492508320268882e-05, "loss": 0.413, "step": 25683 }, { "epoch": 0.7052169137836354, "grad_norm": 0.3370210528373718, "learning_rate": 1.449212246005932e-05, "loss": 0.4787, "step": 25684 }, { "epoch": 0.7052443712246018, "grad_norm": 0.40019407868385315, "learning_rate": 1.449173659147051e-05, "loss": 0.5185, "step": 25685 }, { "epoch": 0.7052718286655684, "grad_norm": 0.43080300092697144, "learning_rate": 1.4491350714503182e-05, "loss": 0.5265, "step": 25686 }, { "epoch": 0.7052992861065349, "grad_norm": 0.4275834858417511, "learning_rate": 1.4490964829158052e-05, "loss": 0.6374, "step": 25687 }, { "epoch": 0.7053267435475014, "grad_norm": 0.3616642355918884, "learning_rate": 1.4490578935435838e-05, "loss": 0.488, "step": 25688 }, { "epoch": 0.7053542009884679, "grad_norm": 0.410146564245224, "learning_rate": 1.4490193033337262e-05, "loss": 0.5554, "step": 25689 }, { "epoch": 0.7053816584294343, "grad_norm": 0.3768078684806824, "learning_rate": 1.448980712286304e-05, "loss": 0.5111, "step": 25690 }, { "epoch": 0.7054091158704009, "grad_norm": 0.33338791131973267, "learning_rate": 1.4489421204013896e-05, "loss": 0.4547, "step": 25691 }, { "epoch": 0.7054365733113673, "grad_norm": 0.36915135383605957, "learning_rate": 1.4489035276790549e-05, "loss": 0.508, "step": 25692 }, { "epoch": 0.7054640307523339, "grad_norm": 0.35876166820526123, "learning_rate": 1.4488649341193719e-05, "loss": 0.4499, "step": 25693 }, { "epoch": 0.7054914881933004, "grad_norm": 0.341897189617157, "learning_rate": 1.4488263397224126e-05, "loss": 0.4691, "step": 25694 }, { "epoch": 0.7055189456342669, "grad_norm": 0.37152573466300964, "learning_rate": 1.4487877444882486e-05, "loss": 0.4995, "step": 25695 }, { "epoch": 0.7055464030752334, "grad_norm": 0.4035491347312927, "learning_rate": 1.4487491484169525e-05, "loss": 0.5153, "step": 25696 }, { "epoch": 0.7055738605161999, "grad_norm": 0.37194037437438965, "learning_rate": 1.4487105515085959e-05, "loss": 0.4509, "step": 25697 }, { "epoch": 0.7056013179571664, "grad_norm": 0.5227673649787903, "learning_rate": 1.448671953763251e-05, "loss": 0.501, "step": 25698 }, { "epoch": 0.7056287753981328, "grad_norm": 0.38403379917144775, "learning_rate": 1.4486333551809897e-05, "loss": 0.6119, "step": 25699 }, { "epoch": 0.7056562328390994, "grad_norm": 0.3858312964439392, "learning_rate": 1.4485947557618838e-05, "loss": 0.5314, "step": 25700 }, { "epoch": 0.705683690280066, "grad_norm": 0.3897850215435028, "learning_rate": 1.4485561555060059e-05, "loss": 0.4101, "step": 25701 }, { "epoch": 0.7057111477210324, "grad_norm": 0.35661229491233826, "learning_rate": 1.4485175544134273e-05, "loss": 0.5197, "step": 25702 }, { "epoch": 0.7057386051619989, "grad_norm": 0.4033012390136719, "learning_rate": 1.4484789524842204e-05, "loss": 0.4788, "step": 25703 }, { "epoch": 0.7057660626029654, "grad_norm": 0.336189329624176, "learning_rate": 1.4484403497184572e-05, "loss": 0.4763, "step": 25704 }, { "epoch": 0.7057935200439319, "grad_norm": 0.4546240568161011, "learning_rate": 1.4484017461162094e-05, "loss": 0.4977, "step": 25705 }, { "epoch": 0.7058209774848984, "grad_norm": 0.4221002459526062, "learning_rate": 1.4483631416775494e-05, "loss": 0.6043, "step": 25706 }, { "epoch": 0.7058484349258649, "grad_norm": 0.38541996479034424, "learning_rate": 1.4483245364025491e-05, "loss": 0.5197, "step": 25707 }, { "epoch": 0.7058758923668315, "grad_norm": 0.4084306061267853, "learning_rate": 1.4482859302912801e-05, "loss": 0.4485, "step": 25708 }, { "epoch": 0.7059033498077979, "grad_norm": 0.4060688614845276, "learning_rate": 1.4482473233438152e-05, "loss": 0.5711, "step": 25709 }, { "epoch": 0.7059308072487644, "grad_norm": 0.39270877838134766, "learning_rate": 1.4482087155602257e-05, "loss": 0.5145, "step": 25710 }, { "epoch": 0.7059582646897309, "grad_norm": 0.355991929769516, "learning_rate": 1.448170106940584e-05, "loss": 0.453, "step": 25711 }, { "epoch": 0.7059857221306974, "grad_norm": 0.42438971996307373, "learning_rate": 1.4481314974849622e-05, "loss": 0.4171, "step": 25712 }, { "epoch": 0.7060131795716639, "grad_norm": 0.3883151113986969, "learning_rate": 1.4480928871934317e-05, "loss": 0.4488, "step": 25713 }, { "epoch": 0.7060406370126304, "grad_norm": 0.3715982437133789, "learning_rate": 1.4480542760660653e-05, "loss": 0.4877, "step": 25714 }, { "epoch": 0.706068094453597, "grad_norm": 0.45923733711242676, "learning_rate": 1.4480156641029344e-05, "loss": 0.5273, "step": 25715 }, { "epoch": 0.7060955518945634, "grad_norm": 0.4255702793598175, "learning_rate": 1.4479770513041117e-05, "loss": 0.5369, "step": 25716 }, { "epoch": 0.70612300933553, "grad_norm": 0.4855153560638428, "learning_rate": 1.4479384376696685e-05, "loss": 0.5134, "step": 25717 }, { "epoch": 0.7061504667764964, "grad_norm": 0.37968909740448, "learning_rate": 1.447899823199677e-05, "loss": 0.4735, "step": 25718 }, { "epoch": 0.706177924217463, "grad_norm": 0.3487691879272461, "learning_rate": 1.4478612078942099e-05, "loss": 0.4684, "step": 25719 }, { "epoch": 0.7062053816584294, "grad_norm": 0.4061296284198761, "learning_rate": 1.4478225917533381e-05, "loss": 0.5009, "step": 25720 }, { "epoch": 0.7062328390993959, "grad_norm": 0.3874371349811554, "learning_rate": 1.4477839747771347e-05, "loss": 0.4659, "step": 25721 }, { "epoch": 0.7062602965403625, "grad_norm": 0.3507485091686249, "learning_rate": 1.447745356965671e-05, "loss": 0.4963, "step": 25722 }, { "epoch": 0.7062877539813289, "grad_norm": 0.43081483244895935, "learning_rate": 1.4477067383190196e-05, "loss": 0.5389, "step": 25723 }, { "epoch": 0.7063152114222955, "grad_norm": 0.3848617374897003, "learning_rate": 1.447668118837252e-05, "loss": 0.5021, "step": 25724 }, { "epoch": 0.7063426688632619, "grad_norm": 0.36143532395362854, "learning_rate": 1.4476294985204405e-05, "loss": 0.5079, "step": 25725 }, { "epoch": 0.7063701263042285, "grad_norm": 0.3673800230026245, "learning_rate": 1.4475908773686574e-05, "loss": 0.5014, "step": 25726 }, { "epoch": 0.7063975837451949, "grad_norm": 0.44015592336654663, "learning_rate": 1.4475522553819739e-05, "loss": 0.5735, "step": 25727 }, { "epoch": 0.7064250411861615, "grad_norm": 0.40906164050102234, "learning_rate": 1.447513632560463e-05, "loss": 0.4852, "step": 25728 }, { "epoch": 0.706452498627128, "grad_norm": 0.42395922541618347, "learning_rate": 1.4474750089041964e-05, "loss": 0.5996, "step": 25729 }, { "epoch": 0.7064799560680944, "grad_norm": 0.38385912775993347, "learning_rate": 1.4474363844132458e-05, "loss": 0.4663, "step": 25730 }, { "epoch": 0.706507413509061, "grad_norm": 0.33122003078460693, "learning_rate": 1.4473977590876838e-05, "loss": 0.4348, "step": 25731 }, { "epoch": 0.7065348709500274, "grad_norm": 0.3811768889427185, "learning_rate": 1.447359132927582e-05, "loss": 0.4455, "step": 25732 }, { "epoch": 0.706562328390994, "grad_norm": 0.4336438775062561, "learning_rate": 1.4473205059330129e-05, "loss": 0.545, "step": 25733 }, { "epoch": 0.7065897858319604, "grad_norm": 0.32426828145980835, "learning_rate": 1.447281878104048e-05, "loss": 0.4015, "step": 25734 }, { "epoch": 0.706617243272927, "grad_norm": 0.40139874815940857, "learning_rate": 1.4472432494407595e-05, "loss": 0.5196, "step": 25735 }, { "epoch": 0.7066447007138935, "grad_norm": 0.3931628465652466, "learning_rate": 1.4472046199432202e-05, "loss": 0.4771, "step": 25736 }, { "epoch": 0.70667215815486, "grad_norm": 0.3732810616493225, "learning_rate": 1.4471659896115009e-05, "loss": 0.5287, "step": 25737 }, { "epoch": 0.7066996155958265, "grad_norm": 0.39896053075790405, "learning_rate": 1.4471273584456746e-05, "loss": 0.5576, "step": 25738 }, { "epoch": 0.7067270730367929, "grad_norm": 0.4081512987613678, "learning_rate": 1.4470887264458132e-05, "loss": 0.5608, "step": 25739 }, { "epoch": 0.7067545304777595, "grad_norm": 0.38027241826057434, "learning_rate": 1.4470500936119885e-05, "loss": 0.5332, "step": 25740 }, { "epoch": 0.7067819879187259, "grad_norm": 0.3419736325740814, "learning_rate": 1.4470114599442729e-05, "loss": 0.4611, "step": 25741 }, { "epoch": 0.7068094453596925, "grad_norm": 0.4779621660709381, "learning_rate": 1.4469728254427377e-05, "loss": 0.5786, "step": 25742 }, { "epoch": 0.706836902800659, "grad_norm": 0.36675217747688293, "learning_rate": 1.446934190107456e-05, "loss": 0.4825, "step": 25743 }, { "epoch": 0.7068643602416255, "grad_norm": 0.4539373815059662, "learning_rate": 1.4468955539384992e-05, "loss": 0.6122, "step": 25744 }, { "epoch": 0.706891817682592, "grad_norm": 0.5645896196365356, "learning_rate": 1.4468569169359397e-05, "loss": 0.564, "step": 25745 }, { "epoch": 0.7069192751235585, "grad_norm": 0.37369534373283386, "learning_rate": 1.4468182790998495e-05, "loss": 0.4622, "step": 25746 }, { "epoch": 0.706946732564525, "grad_norm": 0.38322997093200684, "learning_rate": 1.4467796404303003e-05, "loss": 0.5072, "step": 25747 }, { "epoch": 0.7069741900054914, "grad_norm": 0.3349263072013855, "learning_rate": 1.4467410009273648e-05, "loss": 0.3805, "step": 25748 }, { "epoch": 0.707001647446458, "grad_norm": 0.3882410228252411, "learning_rate": 1.4467023605911143e-05, "loss": 0.5204, "step": 25749 }, { "epoch": 0.7070291048874245, "grad_norm": 0.38654112815856934, "learning_rate": 1.4466637194216218e-05, "loss": 0.5152, "step": 25750 }, { "epoch": 0.707056562328391, "grad_norm": 0.4016451835632324, "learning_rate": 1.4466250774189588e-05, "loss": 0.476, "step": 25751 }, { "epoch": 0.7070840197693575, "grad_norm": 0.38211220502853394, "learning_rate": 1.4465864345831973e-05, "loss": 0.4882, "step": 25752 }, { "epoch": 0.707111477210324, "grad_norm": 0.337505578994751, "learning_rate": 1.4465477909144097e-05, "loss": 0.4075, "step": 25753 }, { "epoch": 0.7071389346512905, "grad_norm": 0.39918753504753113, "learning_rate": 1.4465091464126677e-05, "loss": 0.4603, "step": 25754 }, { "epoch": 0.707166392092257, "grad_norm": 0.40793853998184204, "learning_rate": 1.446470501078044e-05, "loss": 0.462, "step": 25755 }, { "epoch": 0.7071938495332235, "grad_norm": 0.39319825172424316, "learning_rate": 1.4464318549106102e-05, "loss": 0.5569, "step": 25756 }, { "epoch": 0.7072213069741901, "grad_norm": 0.39530667662620544, "learning_rate": 1.4463932079104383e-05, "loss": 0.5546, "step": 25757 }, { "epoch": 0.7072487644151565, "grad_norm": 0.500117301940918, "learning_rate": 1.446354560077601e-05, "loss": 0.5559, "step": 25758 }, { "epoch": 0.707276221856123, "grad_norm": 0.5452039241790771, "learning_rate": 1.4463159114121698e-05, "loss": 0.5991, "step": 25759 }, { "epoch": 0.7073036792970895, "grad_norm": 0.3802798092365265, "learning_rate": 1.446277261914217e-05, "loss": 0.511, "step": 25760 }, { "epoch": 0.707331136738056, "grad_norm": 0.3717237710952759, "learning_rate": 1.4462386115838145e-05, "loss": 0.4862, "step": 25761 }, { "epoch": 0.7073585941790225, "grad_norm": 0.3931743800640106, "learning_rate": 1.4461999604210347e-05, "loss": 0.5458, "step": 25762 }, { "epoch": 0.707386051619989, "grad_norm": 0.3521151542663574, "learning_rate": 1.4461613084259496e-05, "loss": 0.5007, "step": 25763 }, { "epoch": 0.7074135090609556, "grad_norm": 0.41678494215011597, "learning_rate": 1.4461226555986312e-05, "loss": 0.5594, "step": 25764 }, { "epoch": 0.707440966501922, "grad_norm": 0.373597115278244, "learning_rate": 1.4460840019391515e-05, "loss": 0.4758, "step": 25765 }, { "epoch": 0.7074684239428886, "grad_norm": 0.3464244306087494, "learning_rate": 1.446045347447583e-05, "loss": 0.4404, "step": 25766 }, { "epoch": 0.707495881383855, "grad_norm": 0.43420642614364624, "learning_rate": 1.4460066921239975e-05, "loss": 0.5269, "step": 25767 }, { "epoch": 0.7075233388248215, "grad_norm": 0.3888736665248871, "learning_rate": 1.445968035968467e-05, "loss": 0.5315, "step": 25768 }, { "epoch": 0.707550796265788, "grad_norm": 0.42245182394981384, "learning_rate": 1.4459293789810642e-05, "loss": 0.4392, "step": 25769 }, { "epoch": 0.7075782537067545, "grad_norm": 0.4308229386806488, "learning_rate": 1.4458907211618604e-05, "loss": 0.485, "step": 25770 }, { "epoch": 0.7076057111477211, "grad_norm": 0.4681887626647949, "learning_rate": 1.4458520625109282e-05, "loss": 0.592, "step": 25771 }, { "epoch": 0.7076331685886875, "grad_norm": 0.4080866277217865, "learning_rate": 1.4458134030283397e-05, "loss": 0.4988, "step": 25772 }, { "epoch": 0.7076606260296541, "grad_norm": 0.40526530146598816, "learning_rate": 1.4457747427141669e-05, "loss": 0.5694, "step": 25773 }, { "epoch": 0.7076880834706205, "grad_norm": 0.38375622034072876, "learning_rate": 1.4457360815684817e-05, "loss": 0.4885, "step": 25774 }, { "epoch": 0.7077155409115871, "grad_norm": 0.3987455368041992, "learning_rate": 1.4456974195913565e-05, "loss": 0.5158, "step": 25775 }, { "epoch": 0.7077429983525535, "grad_norm": 0.37412500381469727, "learning_rate": 1.4456587567828636e-05, "loss": 0.4185, "step": 25776 }, { "epoch": 0.70777045579352, "grad_norm": 0.3820159435272217, "learning_rate": 1.4456200931430748e-05, "loss": 0.5422, "step": 25777 }, { "epoch": 0.7077979132344866, "grad_norm": 0.357799232006073, "learning_rate": 1.4455814286720622e-05, "loss": 0.5764, "step": 25778 }, { "epoch": 0.707825370675453, "grad_norm": 0.36209723353385925, "learning_rate": 1.4455427633698981e-05, "loss": 0.5209, "step": 25779 }, { "epoch": 0.7078528281164196, "grad_norm": 0.43998652696609497, "learning_rate": 1.4455040972366545e-05, "loss": 0.5779, "step": 25780 }, { "epoch": 0.707880285557386, "grad_norm": 0.3826634883880615, "learning_rate": 1.4454654302724036e-05, "loss": 0.522, "step": 25781 }, { "epoch": 0.7079077429983526, "grad_norm": 0.4328444004058838, "learning_rate": 1.4454267624772176e-05, "loss": 0.6581, "step": 25782 }, { "epoch": 0.707935200439319, "grad_norm": 0.36680397391319275, "learning_rate": 1.4453880938511681e-05, "loss": 0.4429, "step": 25783 }, { "epoch": 0.7079626578802856, "grad_norm": 0.39536669850349426, "learning_rate": 1.4453494243943283e-05, "loss": 0.5179, "step": 25784 }, { "epoch": 0.7079901153212521, "grad_norm": 0.3626229763031006, "learning_rate": 1.4453107541067692e-05, "loss": 0.4557, "step": 25785 }, { "epoch": 0.7080175727622185, "grad_norm": 0.38307052850723267, "learning_rate": 1.4452720829885635e-05, "loss": 0.5401, "step": 25786 }, { "epoch": 0.7080450302031851, "grad_norm": 0.38652434945106506, "learning_rate": 1.4452334110397834e-05, "loss": 0.4866, "step": 25787 }, { "epoch": 0.7080724876441515, "grad_norm": 0.41213810443878174, "learning_rate": 1.4451947382605007e-05, "loss": 0.477, "step": 25788 }, { "epoch": 0.7080999450851181, "grad_norm": 0.40793541073799133, "learning_rate": 1.4451560646507878e-05, "loss": 0.5188, "step": 25789 }, { "epoch": 0.7081274025260845, "grad_norm": 0.4347650408744812, "learning_rate": 1.445117390210717e-05, "loss": 0.5449, "step": 25790 }, { "epoch": 0.7081548599670511, "grad_norm": 0.3900369107723236, "learning_rate": 1.44507871494036e-05, "loss": 0.479, "step": 25791 }, { "epoch": 0.7081823174080176, "grad_norm": 0.4668372571468353, "learning_rate": 1.4450400388397891e-05, "loss": 0.5207, "step": 25792 }, { "epoch": 0.7082097748489841, "grad_norm": 0.39451760053634644, "learning_rate": 1.4450013619090763e-05, "loss": 0.4333, "step": 25793 }, { "epoch": 0.7082372322899506, "grad_norm": 0.49405157566070557, "learning_rate": 1.4449626841482944e-05, "loss": 0.4786, "step": 25794 }, { "epoch": 0.708264689730917, "grad_norm": 0.32388219237327576, "learning_rate": 1.444924005557515e-05, "loss": 0.4228, "step": 25795 }, { "epoch": 0.7082921471718836, "grad_norm": 0.43572863936424255, "learning_rate": 1.4448853261368099e-05, "loss": 0.5313, "step": 25796 }, { "epoch": 0.70831960461285, "grad_norm": 0.3893767297267914, "learning_rate": 1.4448466458862521e-05, "loss": 0.4867, "step": 25797 }, { "epoch": 0.7083470620538166, "grad_norm": 0.4757401943206787, "learning_rate": 1.444807964805913e-05, "loss": 0.513, "step": 25798 }, { "epoch": 0.7083745194947831, "grad_norm": 0.4045049846172333, "learning_rate": 1.4447692828958652e-05, "loss": 0.5479, "step": 25799 }, { "epoch": 0.7084019769357496, "grad_norm": 0.33715349435806274, "learning_rate": 1.444730600156181e-05, "loss": 0.3917, "step": 25800 }, { "epoch": 0.7084294343767161, "grad_norm": 0.4399476945400238, "learning_rate": 1.444691916586932e-05, "loss": 0.5505, "step": 25801 }, { "epoch": 0.7084568918176826, "grad_norm": 0.38126063346862793, "learning_rate": 1.4446532321881907e-05, "loss": 0.5112, "step": 25802 }, { "epoch": 0.7084843492586491, "grad_norm": 0.35870254039764404, "learning_rate": 1.4446145469600292e-05, "loss": 0.5016, "step": 25803 }, { "epoch": 0.7085118066996156, "grad_norm": 0.3618539571762085, "learning_rate": 1.4445758609025195e-05, "loss": 0.4817, "step": 25804 }, { "epoch": 0.7085392641405821, "grad_norm": 0.3373320698738098, "learning_rate": 1.4445371740157343e-05, "loss": 0.4371, "step": 25805 }, { "epoch": 0.7085667215815487, "grad_norm": 0.42204996943473816, "learning_rate": 1.4444984862997451e-05, "loss": 0.4789, "step": 25806 }, { "epoch": 0.7085941790225151, "grad_norm": 0.3369329571723938, "learning_rate": 1.4444597977546244e-05, "loss": 0.5282, "step": 25807 }, { "epoch": 0.7086216364634816, "grad_norm": 0.3729742169380188, "learning_rate": 1.4444211083804445e-05, "loss": 0.5663, "step": 25808 }, { "epoch": 0.7086490939044481, "grad_norm": 0.3488827347755432, "learning_rate": 1.4443824181772772e-05, "loss": 0.4834, "step": 25809 }, { "epoch": 0.7086765513454146, "grad_norm": 0.480985552072525, "learning_rate": 1.444343727145195e-05, "loss": 0.5354, "step": 25810 }, { "epoch": 0.7087040087863811, "grad_norm": 0.38537535071372986, "learning_rate": 1.4443050352842698e-05, "loss": 0.5036, "step": 25811 }, { "epoch": 0.7087314662273476, "grad_norm": 0.44684240221977234, "learning_rate": 1.444266342594574e-05, "loss": 0.543, "step": 25812 }, { "epoch": 0.7087589236683142, "grad_norm": 0.4180106818675995, "learning_rate": 1.4442276490761798e-05, "loss": 0.4191, "step": 25813 }, { "epoch": 0.7087863811092806, "grad_norm": 0.3970363140106201, "learning_rate": 1.444188954729159e-05, "loss": 0.4795, "step": 25814 }, { "epoch": 0.7088138385502472, "grad_norm": 0.34595996141433716, "learning_rate": 1.444150259553584e-05, "loss": 0.4438, "step": 25815 }, { "epoch": 0.7088412959912136, "grad_norm": 0.39089435338974, "learning_rate": 1.444111563549527e-05, "loss": 0.547, "step": 25816 }, { "epoch": 0.7088687534321801, "grad_norm": 0.37585216760635376, "learning_rate": 1.4440728667170606e-05, "loss": 0.579, "step": 25817 }, { "epoch": 0.7088962108731466, "grad_norm": 0.36819952726364136, "learning_rate": 1.4440341690562562e-05, "loss": 0.5601, "step": 25818 }, { "epoch": 0.7089236683141131, "grad_norm": 0.4637983739376068, "learning_rate": 1.4439954705671864e-05, "loss": 0.5443, "step": 25819 }, { "epoch": 0.7089511257550797, "grad_norm": 0.38356813788414, "learning_rate": 1.4439567712499234e-05, "loss": 0.4348, "step": 25820 }, { "epoch": 0.7089785831960461, "grad_norm": 0.3750327527523041, "learning_rate": 1.4439180711045395e-05, "loss": 0.4895, "step": 25821 }, { "epoch": 0.7090060406370127, "grad_norm": 0.4449750781059265, "learning_rate": 1.4438793701311063e-05, "loss": 0.4585, "step": 25822 }, { "epoch": 0.7090334980779791, "grad_norm": 0.3264961540699005, "learning_rate": 1.4438406683296968e-05, "loss": 0.4985, "step": 25823 }, { "epoch": 0.7090609555189457, "grad_norm": 0.4114227294921875, "learning_rate": 1.4438019657003826e-05, "loss": 0.5352, "step": 25824 }, { "epoch": 0.7090884129599121, "grad_norm": 0.4186139404773712, "learning_rate": 1.443763262243236e-05, "loss": 0.5633, "step": 25825 }, { "epoch": 0.7091158704008786, "grad_norm": 0.4439699947834015, "learning_rate": 1.4437245579583296e-05, "loss": 0.4674, "step": 25826 }, { "epoch": 0.7091433278418452, "grad_norm": 0.3628106415271759, "learning_rate": 1.443685852845735e-05, "loss": 0.4199, "step": 25827 }, { "epoch": 0.7091707852828116, "grad_norm": 0.3921641409397125, "learning_rate": 1.4436471469055247e-05, "loss": 0.5196, "step": 25828 }, { "epoch": 0.7091982427237782, "grad_norm": 0.4399005174636841, "learning_rate": 1.443608440137771e-05, "loss": 0.4564, "step": 25829 }, { "epoch": 0.7092257001647446, "grad_norm": 0.438305139541626, "learning_rate": 1.4435697325425459e-05, "loss": 0.5609, "step": 25830 }, { "epoch": 0.7092531576057112, "grad_norm": 0.3977104127407074, "learning_rate": 1.4435310241199218e-05, "loss": 0.6207, "step": 25831 }, { "epoch": 0.7092806150466776, "grad_norm": 0.3645772635936737, "learning_rate": 1.4434923148699704e-05, "loss": 0.5425, "step": 25832 }, { "epoch": 0.7093080724876442, "grad_norm": 0.3777216374874115, "learning_rate": 1.4434536047927647e-05, "loss": 0.4752, "step": 25833 }, { "epoch": 0.7093355299286107, "grad_norm": 0.34704485535621643, "learning_rate": 1.4434148938883764e-05, "loss": 0.4338, "step": 25834 }, { "epoch": 0.7093629873695771, "grad_norm": 0.380829393863678, "learning_rate": 1.4433761821568778e-05, "loss": 0.5339, "step": 25835 }, { "epoch": 0.7093904448105437, "grad_norm": 0.43411070108413696, "learning_rate": 1.443337469598341e-05, "loss": 0.5468, "step": 25836 }, { "epoch": 0.7094179022515101, "grad_norm": 0.3334331512451172, "learning_rate": 1.4432987562128382e-05, "loss": 0.4828, "step": 25837 }, { "epoch": 0.7094453596924767, "grad_norm": 0.3748095631599426, "learning_rate": 1.443260042000442e-05, "loss": 0.5057, "step": 25838 }, { "epoch": 0.7094728171334431, "grad_norm": 0.3970426321029663, "learning_rate": 1.4432213269612245e-05, "loss": 0.5899, "step": 25839 }, { "epoch": 0.7095002745744097, "grad_norm": 0.35481947660446167, "learning_rate": 1.4431826110952574e-05, "loss": 0.4512, "step": 25840 }, { "epoch": 0.7095277320153762, "grad_norm": 0.3363458514213562, "learning_rate": 1.4431438944026135e-05, "loss": 0.471, "step": 25841 }, { "epoch": 0.7095551894563427, "grad_norm": 0.3688531219959259, "learning_rate": 1.4431051768833647e-05, "loss": 0.4241, "step": 25842 }, { "epoch": 0.7095826468973092, "grad_norm": 0.38773712515830994, "learning_rate": 1.4430664585375834e-05, "loss": 0.517, "step": 25843 }, { "epoch": 0.7096101043382756, "grad_norm": 0.42674520611763, "learning_rate": 1.4430277393653418e-05, "loss": 0.5195, "step": 25844 }, { "epoch": 0.7096375617792422, "grad_norm": 0.4060184061527252, "learning_rate": 1.4429890193667117e-05, "loss": 0.5087, "step": 25845 }, { "epoch": 0.7096650192202086, "grad_norm": 0.4025982916355133, "learning_rate": 1.442950298541766e-05, "loss": 0.5271, "step": 25846 }, { "epoch": 0.7096924766611752, "grad_norm": 0.3356834948062897, "learning_rate": 1.4429115768905764e-05, "loss": 0.3966, "step": 25847 }, { "epoch": 0.7097199341021416, "grad_norm": 0.3872230648994446, "learning_rate": 1.4428728544132157e-05, "loss": 0.509, "step": 25848 }, { "epoch": 0.7097473915431082, "grad_norm": 0.388248085975647, "learning_rate": 1.4428341311097555e-05, "loss": 0.5706, "step": 25849 }, { "epoch": 0.7097748489840747, "grad_norm": 0.40677523612976074, "learning_rate": 1.4427954069802685e-05, "loss": 0.5736, "step": 25850 }, { "epoch": 0.7098023064250412, "grad_norm": 0.4282175302505493, "learning_rate": 1.4427566820248267e-05, "loss": 0.5892, "step": 25851 }, { "epoch": 0.7098297638660077, "grad_norm": 0.3885265290737152, "learning_rate": 1.4427179562435022e-05, "loss": 0.4792, "step": 25852 }, { "epoch": 0.7098572213069742, "grad_norm": 0.3750806450843811, "learning_rate": 1.4426792296363674e-05, "loss": 0.5074, "step": 25853 }, { "epoch": 0.7098846787479407, "grad_norm": 0.36252644658088684, "learning_rate": 1.4426405022034949e-05, "loss": 0.4572, "step": 25854 }, { "epoch": 0.7099121361889071, "grad_norm": 0.3991866409778595, "learning_rate": 1.4426017739449563e-05, "loss": 0.5363, "step": 25855 }, { "epoch": 0.7099395936298737, "grad_norm": 0.41951021552085876, "learning_rate": 1.4425630448608242e-05, "loss": 0.3796, "step": 25856 }, { "epoch": 0.7099670510708402, "grad_norm": 0.3674119710922241, "learning_rate": 1.442524314951171e-05, "loss": 0.4847, "step": 25857 }, { "epoch": 0.7099945085118067, "grad_norm": 0.45130351185798645, "learning_rate": 1.4424855842160683e-05, "loss": 0.4912, "step": 25858 }, { "epoch": 0.7100219659527732, "grad_norm": 0.41289767622947693, "learning_rate": 1.442446852655589e-05, "loss": 0.5049, "step": 25859 }, { "epoch": 0.7100494233937397, "grad_norm": 0.44206786155700684, "learning_rate": 1.4424081202698048e-05, "loss": 0.4921, "step": 25860 }, { "epoch": 0.7100768808347062, "grad_norm": 0.37867385149002075, "learning_rate": 1.4423693870587886e-05, "loss": 0.4639, "step": 25861 }, { "epoch": 0.7101043382756727, "grad_norm": 0.4461255669593811, "learning_rate": 1.4423306530226124e-05, "loss": 0.5506, "step": 25862 }, { "epoch": 0.7101317957166392, "grad_norm": 0.35779985785484314, "learning_rate": 1.442291918161348e-05, "loss": 0.5065, "step": 25863 }, { "epoch": 0.7101592531576058, "grad_norm": 3.286142587661743, "learning_rate": 1.4422531824750683e-05, "loss": 0.5818, "step": 25864 }, { "epoch": 0.7101867105985722, "grad_norm": 0.4205411970615387, "learning_rate": 1.4422144459638452e-05, "loss": 0.5353, "step": 25865 }, { "epoch": 0.7102141680395387, "grad_norm": 0.4121982157230377, "learning_rate": 1.4421757086277511e-05, "loss": 0.4407, "step": 25866 }, { "epoch": 0.7102416254805052, "grad_norm": 0.42069026827812195, "learning_rate": 1.442136970466858e-05, "loss": 0.5813, "step": 25867 }, { "epoch": 0.7102690829214717, "grad_norm": 0.34720295667648315, "learning_rate": 1.4420982314812382e-05, "loss": 0.4975, "step": 25868 }, { "epoch": 0.7102965403624382, "grad_norm": 0.38738787174224854, "learning_rate": 1.4420594916709643e-05, "loss": 0.4326, "step": 25869 }, { "epoch": 0.7103239978034047, "grad_norm": 0.4035623073577881, "learning_rate": 1.4420207510361083e-05, "loss": 0.5888, "step": 25870 }, { "epoch": 0.7103514552443713, "grad_norm": 0.4032056927680969, "learning_rate": 1.4419820095767426e-05, "loss": 0.5085, "step": 25871 }, { "epoch": 0.7103789126853377, "grad_norm": 0.35014891624450684, "learning_rate": 1.4419432672929395e-05, "loss": 0.4893, "step": 25872 }, { "epoch": 0.7104063701263043, "grad_norm": 0.4209508001804352, "learning_rate": 1.4419045241847708e-05, "loss": 0.4982, "step": 25873 }, { "epoch": 0.7104338275672707, "grad_norm": 0.3537967801094055, "learning_rate": 1.4418657802523094e-05, "loss": 0.5071, "step": 25874 }, { "epoch": 0.7104612850082372, "grad_norm": 0.3859637379646301, "learning_rate": 1.4418270354956273e-05, "loss": 0.4997, "step": 25875 }, { "epoch": 0.7104887424492037, "grad_norm": 0.37590491771698, "learning_rate": 1.4417882899147966e-05, "loss": 0.5022, "step": 25876 }, { "epoch": 0.7105161998901702, "grad_norm": 0.5707791447639465, "learning_rate": 1.4417495435098899e-05, "loss": 0.5083, "step": 25877 }, { "epoch": 0.7105436573311368, "grad_norm": 0.35279977321624756, "learning_rate": 1.4417107962809792e-05, "loss": 0.442, "step": 25878 }, { "epoch": 0.7105711147721032, "grad_norm": 0.39908552169799805, "learning_rate": 1.4416720482281368e-05, "loss": 0.51, "step": 25879 }, { "epoch": 0.7105985722130698, "grad_norm": 0.4755646586418152, "learning_rate": 1.4416332993514353e-05, "loss": 0.538, "step": 25880 }, { "epoch": 0.7106260296540362, "grad_norm": 0.38870757818222046, "learning_rate": 1.4415945496509465e-05, "loss": 0.4796, "step": 25881 }, { "epoch": 0.7106534870950028, "grad_norm": 0.3827751874923706, "learning_rate": 1.4415557991267432e-05, "loss": 0.4864, "step": 25882 }, { "epoch": 0.7106809445359692, "grad_norm": 0.3704669177532196, "learning_rate": 1.4415170477788973e-05, "loss": 0.586, "step": 25883 }, { "epoch": 0.7107084019769357, "grad_norm": 0.3682456612586975, "learning_rate": 1.4414782956074809e-05, "loss": 0.4704, "step": 25884 }, { "epoch": 0.7107358594179023, "grad_norm": 0.401678204536438, "learning_rate": 1.4414395426125669e-05, "loss": 0.5773, "step": 25885 }, { "epoch": 0.7107633168588687, "grad_norm": 0.3607645630836487, "learning_rate": 1.4414007887942272e-05, "loss": 0.4821, "step": 25886 }, { "epoch": 0.7107907742998353, "grad_norm": 0.39781495928764343, "learning_rate": 1.4413620341525341e-05, "loss": 0.4894, "step": 25887 }, { "epoch": 0.7108182317408017, "grad_norm": 0.38592299818992615, "learning_rate": 1.4413232786875602e-05, "loss": 0.5221, "step": 25888 }, { "epoch": 0.7108456891817683, "grad_norm": 0.4252718985080719, "learning_rate": 1.4412845223993772e-05, "loss": 0.5244, "step": 25889 }, { "epoch": 0.7108731466227347, "grad_norm": 0.38716769218444824, "learning_rate": 1.4412457652880579e-05, "loss": 0.5318, "step": 25890 }, { "epoch": 0.7109006040637013, "grad_norm": 0.39136409759521484, "learning_rate": 1.4412070073536741e-05, "loss": 0.4811, "step": 25891 }, { "epoch": 0.7109280615046678, "grad_norm": 0.3492092490196228, "learning_rate": 1.4411682485962987e-05, "loss": 0.483, "step": 25892 }, { "epoch": 0.7109555189456342, "grad_norm": 0.33289024233818054, "learning_rate": 1.4411294890160039e-05, "loss": 0.4, "step": 25893 }, { "epoch": 0.7109829763866008, "grad_norm": 0.4194401502609253, "learning_rate": 1.4410907286128614e-05, "loss": 0.5308, "step": 25894 }, { "epoch": 0.7110104338275672, "grad_norm": 0.4035889506340027, "learning_rate": 1.4410519673869444e-05, "loss": 0.5295, "step": 25895 }, { "epoch": 0.7110378912685338, "grad_norm": 0.3732859790325165, "learning_rate": 1.441013205338324e-05, "loss": 0.5064, "step": 25896 }, { "epoch": 0.7110653487095002, "grad_norm": 0.4064289629459381, "learning_rate": 1.4409744424670738e-05, "loss": 0.5661, "step": 25897 }, { "epoch": 0.7110928061504668, "grad_norm": 0.3807610869407654, "learning_rate": 1.4409356787732653e-05, "loss": 0.4305, "step": 25898 }, { "epoch": 0.7111202635914333, "grad_norm": 0.39028415083885193, "learning_rate": 1.4408969142569712e-05, "loss": 0.53, "step": 25899 }, { "epoch": 0.7111477210323998, "grad_norm": 0.4621504247188568, "learning_rate": 1.4408581489182634e-05, "loss": 0.5023, "step": 25900 }, { "epoch": 0.7111751784733663, "grad_norm": 1.576985239982605, "learning_rate": 1.4408193827572143e-05, "loss": 0.5212, "step": 25901 }, { "epoch": 0.7112026359143327, "grad_norm": 0.37873151898384094, "learning_rate": 1.4407806157738969e-05, "loss": 0.4957, "step": 25902 }, { "epoch": 0.7112300933552993, "grad_norm": 0.39273035526275635, "learning_rate": 1.4407418479683826e-05, "loss": 0.4546, "step": 25903 }, { "epoch": 0.7112575507962657, "grad_norm": 0.38402411341667175, "learning_rate": 1.4407030793407442e-05, "loss": 0.5313, "step": 25904 }, { "epoch": 0.7112850082372323, "grad_norm": 0.35619717836380005, "learning_rate": 1.440664309891054e-05, "loss": 0.5005, "step": 25905 }, { "epoch": 0.7113124656781988, "grad_norm": 0.4717952013015747, "learning_rate": 1.4406255396193838e-05, "loss": 0.5155, "step": 25906 }, { "epoch": 0.7113399231191653, "grad_norm": 0.39893174171447754, "learning_rate": 1.4405867685258069e-05, "loss": 0.5246, "step": 25907 }, { "epoch": 0.7113673805601318, "grad_norm": 0.37765154242515564, "learning_rate": 1.4405479966103945e-05, "loss": 0.4267, "step": 25908 }, { "epoch": 0.7113948380010983, "grad_norm": 0.39481621980667114, "learning_rate": 1.44050922387322e-05, "loss": 0.4379, "step": 25909 }, { "epoch": 0.7114222954420648, "grad_norm": 0.38604483008384705, "learning_rate": 1.440470450314355e-05, "loss": 0.5663, "step": 25910 }, { "epoch": 0.7114497528830313, "grad_norm": 0.3876763880252838, "learning_rate": 1.4404316759338719e-05, "loss": 0.4947, "step": 25911 }, { "epoch": 0.7114772103239978, "grad_norm": 0.36547011137008667, "learning_rate": 1.4403929007318433e-05, "loss": 0.4833, "step": 25912 }, { "epoch": 0.7115046677649643, "grad_norm": 0.8799788355827332, "learning_rate": 1.4403541247083413e-05, "loss": 0.4775, "step": 25913 }, { "epoch": 0.7115321252059308, "grad_norm": 0.3627760410308838, "learning_rate": 1.4403153478634384e-05, "loss": 0.4307, "step": 25914 }, { "epoch": 0.7115595826468973, "grad_norm": 0.4181090295314789, "learning_rate": 1.4402765701972066e-05, "loss": 0.5294, "step": 25915 }, { "epoch": 0.7115870400878638, "grad_norm": 0.3783631920814514, "learning_rate": 1.4402377917097188e-05, "loss": 0.4117, "step": 25916 }, { "epoch": 0.7116144975288303, "grad_norm": 0.3377611041069031, "learning_rate": 1.440199012401047e-05, "loss": 0.4415, "step": 25917 }, { "epoch": 0.7116419549697968, "grad_norm": 0.3880821764469147, "learning_rate": 1.4401602322712633e-05, "loss": 0.5722, "step": 25918 }, { "epoch": 0.7116694124107633, "grad_norm": 0.49683985114097595, "learning_rate": 1.4401214513204405e-05, "loss": 0.4794, "step": 25919 }, { "epoch": 0.7116968698517299, "grad_norm": 0.4280538856983185, "learning_rate": 1.4400826695486505e-05, "loss": 0.5164, "step": 25920 }, { "epoch": 0.7117243272926963, "grad_norm": 0.8664463758468628, "learning_rate": 1.4400438869559659e-05, "loss": 0.5216, "step": 25921 }, { "epoch": 0.7117517847336629, "grad_norm": 0.460968017578125, "learning_rate": 1.4400051035424594e-05, "loss": 0.4418, "step": 25922 }, { "epoch": 0.7117792421746293, "grad_norm": 0.3797134459018707, "learning_rate": 1.4399663193082023e-05, "loss": 0.5978, "step": 25923 }, { "epoch": 0.7118066996155958, "grad_norm": 0.34685707092285156, "learning_rate": 1.4399275342532681e-05, "loss": 0.4997, "step": 25924 }, { "epoch": 0.7118341570565623, "grad_norm": 0.3786543905735016, "learning_rate": 1.4398887483777284e-05, "loss": 0.4936, "step": 25925 }, { "epoch": 0.7118616144975288, "grad_norm": 0.3833673894405365, "learning_rate": 1.439849961681656e-05, "loss": 0.4858, "step": 25926 }, { "epoch": 0.7118890719384954, "grad_norm": 0.3846262991428375, "learning_rate": 1.439811174165123e-05, "loss": 0.4914, "step": 25927 }, { "epoch": 0.7119165293794618, "grad_norm": 0.3826737403869629, "learning_rate": 1.4397723858282016e-05, "loss": 0.5051, "step": 25928 }, { "epoch": 0.7119439868204284, "grad_norm": 0.38408300280570984, "learning_rate": 1.4397335966709644e-05, "loss": 0.4322, "step": 25929 }, { "epoch": 0.7119714442613948, "grad_norm": 0.4126847982406616, "learning_rate": 1.4396948066934838e-05, "loss": 0.5163, "step": 25930 }, { "epoch": 0.7119989017023614, "grad_norm": 0.4045517146587372, "learning_rate": 1.4396560158958319e-05, "loss": 0.4843, "step": 25931 }, { "epoch": 0.7120263591433278, "grad_norm": 0.3896666467189789, "learning_rate": 1.4396172242780813e-05, "loss": 0.4487, "step": 25932 }, { "epoch": 0.7120538165842943, "grad_norm": 0.3857246935367584, "learning_rate": 1.439578431840304e-05, "loss": 0.4191, "step": 25933 }, { "epoch": 0.7120812740252609, "grad_norm": 0.3503515124320984, "learning_rate": 1.439539638582573e-05, "loss": 0.4054, "step": 25934 }, { "epoch": 0.7121087314662273, "grad_norm": 0.4055585563182831, "learning_rate": 1.4395008445049603e-05, "loss": 0.4603, "step": 25935 }, { "epoch": 0.7121361889071939, "grad_norm": 0.5107809901237488, "learning_rate": 1.4394620496075382e-05, "loss": 0.5577, "step": 25936 }, { "epoch": 0.7121636463481603, "grad_norm": 0.43938422203063965, "learning_rate": 1.4394232538903792e-05, "loss": 0.5277, "step": 25937 }, { "epoch": 0.7121911037891269, "grad_norm": 0.36351537704467773, "learning_rate": 1.4393844573535551e-05, "loss": 0.4813, "step": 25938 }, { "epoch": 0.7122185612300933, "grad_norm": 0.3506568372249603, "learning_rate": 1.4393456599971392e-05, "loss": 0.3828, "step": 25939 }, { "epoch": 0.7122460186710599, "grad_norm": 0.391205370426178, "learning_rate": 1.4393068618212032e-05, "loss": 0.4972, "step": 25940 }, { "epoch": 0.7122734761120264, "grad_norm": 0.39560946822166443, "learning_rate": 1.43926806282582e-05, "loss": 0.4712, "step": 25941 }, { "epoch": 0.7123009335529928, "grad_norm": 0.41738152503967285, "learning_rate": 1.4392292630110616e-05, "loss": 0.6272, "step": 25942 }, { "epoch": 0.7123283909939594, "grad_norm": 0.4293467700481415, "learning_rate": 1.4391904623770002e-05, "loss": 0.5164, "step": 25943 }, { "epoch": 0.7123558484349258, "grad_norm": 0.3723197281360626, "learning_rate": 1.4391516609237083e-05, "loss": 0.5208, "step": 25944 }, { "epoch": 0.7123833058758924, "grad_norm": 0.41635605692863464, "learning_rate": 1.4391128586512589e-05, "loss": 0.4522, "step": 25945 }, { "epoch": 0.7124107633168588, "grad_norm": 0.38310515880584717, "learning_rate": 1.4390740555597234e-05, "loss": 0.548, "step": 25946 }, { "epoch": 0.7124382207578254, "grad_norm": 0.4137367606163025, "learning_rate": 1.439035251649175e-05, "loss": 0.4877, "step": 25947 }, { "epoch": 0.7124656781987919, "grad_norm": 0.4245050251483917, "learning_rate": 1.4389964469196853e-05, "loss": 0.4451, "step": 25948 }, { "epoch": 0.7124931356397584, "grad_norm": 0.41657742857933044, "learning_rate": 1.4389576413713275e-05, "loss": 0.495, "step": 25949 }, { "epoch": 0.7125205930807249, "grad_norm": 0.4260657727718353, "learning_rate": 1.4389188350041736e-05, "loss": 0.5358, "step": 25950 }, { "epoch": 0.7125480505216913, "grad_norm": 0.3994043171405792, "learning_rate": 1.4388800278182957e-05, "loss": 0.4597, "step": 25951 }, { "epoch": 0.7125755079626579, "grad_norm": 0.37952253222465515, "learning_rate": 1.4388412198137666e-05, "loss": 0.4271, "step": 25952 }, { "epoch": 0.7126029654036243, "grad_norm": 0.3841443955898285, "learning_rate": 1.4388024109906586e-05, "loss": 0.5913, "step": 25953 }, { "epoch": 0.7126304228445909, "grad_norm": 0.37923842668533325, "learning_rate": 1.4387636013490442e-05, "loss": 0.4927, "step": 25954 }, { "epoch": 0.7126578802855574, "grad_norm": 0.38640761375427246, "learning_rate": 1.4387247908889954e-05, "loss": 0.4822, "step": 25955 }, { "epoch": 0.7126853377265239, "grad_norm": 0.4377199411392212, "learning_rate": 1.438685979610585e-05, "loss": 0.6094, "step": 25956 }, { "epoch": 0.7127127951674904, "grad_norm": 0.41739317774772644, "learning_rate": 1.4386471675138852e-05, "loss": 0.4461, "step": 25957 }, { "epoch": 0.7127402526084569, "grad_norm": 0.36272579431533813, "learning_rate": 1.4386083545989682e-05, "loss": 0.4606, "step": 25958 }, { "epoch": 0.7127677100494234, "grad_norm": 0.4073275923728943, "learning_rate": 1.438569540865907e-05, "loss": 0.5624, "step": 25959 }, { "epoch": 0.7127951674903898, "grad_norm": 0.4696483016014099, "learning_rate": 1.4385307263147734e-05, "loss": 0.5, "step": 25960 }, { "epoch": 0.7128226249313564, "grad_norm": 0.39894652366638184, "learning_rate": 1.4384919109456402e-05, "loss": 0.4527, "step": 25961 }, { "epoch": 0.712850082372323, "grad_norm": 0.38506248593330383, "learning_rate": 1.4384530947585795e-05, "loss": 0.4546, "step": 25962 }, { "epoch": 0.7128775398132894, "grad_norm": 0.41508010029792786, "learning_rate": 1.438414277753664e-05, "loss": 0.5727, "step": 25963 }, { "epoch": 0.7129049972542559, "grad_norm": 0.6940839290618896, "learning_rate": 1.4383754599309656e-05, "loss": 0.4917, "step": 25964 }, { "epoch": 0.7129324546952224, "grad_norm": 0.43948104977607727, "learning_rate": 1.4383366412905576e-05, "loss": 0.4622, "step": 25965 }, { "epoch": 0.7129599121361889, "grad_norm": 0.4594871997833252, "learning_rate": 1.4382978218325111e-05, "loss": 0.4819, "step": 25966 }, { "epoch": 0.7129873695771554, "grad_norm": 0.38914597034454346, "learning_rate": 1.4382590015569e-05, "loss": 0.5076, "step": 25967 }, { "epoch": 0.7130148270181219, "grad_norm": 0.4657936096191406, "learning_rate": 1.438220180463796e-05, "loss": 0.4284, "step": 25968 }, { "epoch": 0.7130422844590885, "grad_norm": 0.48969766497612, "learning_rate": 1.438181358553271e-05, "loss": 0.5413, "step": 25969 }, { "epoch": 0.7130697419000549, "grad_norm": 0.44247207045555115, "learning_rate": 1.438142535825398e-05, "loss": 0.4644, "step": 25970 }, { "epoch": 0.7130971993410214, "grad_norm": 0.4502282738685608, "learning_rate": 1.4381037122802492e-05, "loss": 0.4569, "step": 25971 }, { "epoch": 0.7131246567819879, "grad_norm": 0.43277135491371155, "learning_rate": 1.4380648879178976e-05, "loss": 0.4819, "step": 25972 }, { "epoch": 0.7131521142229544, "grad_norm": 0.3291560709476471, "learning_rate": 1.438026062738415e-05, "loss": 0.3899, "step": 25973 }, { "epoch": 0.7131795716639209, "grad_norm": 0.4028421938419342, "learning_rate": 1.437987236741874e-05, "loss": 0.5276, "step": 25974 }, { "epoch": 0.7132070291048874, "grad_norm": 0.3960241377353668, "learning_rate": 1.4379484099283469e-05, "loss": 0.4851, "step": 25975 }, { "epoch": 0.713234486545854, "grad_norm": 0.3833281993865967, "learning_rate": 1.4379095822979064e-05, "loss": 0.4818, "step": 25976 }, { "epoch": 0.7132619439868204, "grad_norm": 0.396078884601593, "learning_rate": 1.4378707538506245e-05, "loss": 0.5456, "step": 25977 }, { "epoch": 0.713289401427787, "grad_norm": 0.32703641057014465, "learning_rate": 1.4378319245865743e-05, "loss": 0.4434, "step": 25978 }, { "epoch": 0.7133168588687534, "grad_norm": 0.3778613209724426, "learning_rate": 1.4377930945058273e-05, "loss": 0.5207, "step": 25979 }, { "epoch": 0.71334431630972, "grad_norm": 0.3476533591747284, "learning_rate": 1.437754263608457e-05, "loss": 0.442, "step": 25980 }, { "epoch": 0.7133717737506864, "grad_norm": 0.4215749502182007, "learning_rate": 1.437715431894535e-05, "loss": 0.5703, "step": 25981 }, { "epoch": 0.7133992311916529, "grad_norm": 0.36284834146499634, "learning_rate": 1.437676599364134e-05, "loss": 0.4192, "step": 25982 }, { "epoch": 0.7134266886326195, "grad_norm": 0.3717990219593048, "learning_rate": 1.4376377660173266e-05, "loss": 0.489, "step": 25983 }, { "epoch": 0.7134541460735859, "grad_norm": 0.39584022760391235, "learning_rate": 1.4375989318541848e-05, "loss": 0.5279, "step": 25984 }, { "epoch": 0.7134816035145525, "grad_norm": 0.4071667790412903, "learning_rate": 1.4375600968747816e-05, "loss": 0.5037, "step": 25985 }, { "epoch": 0.7135090609555189, "grad_norm": 0.3223027288913727, "learning_rate": 1.4375212610791891e-05, "loss": 0.4133, "step": 25986 }, { "epoch": 0.7135365183964855, "grad_norm": 0.4755399227142334, "learning_rate": 1.4374824244674797e-05, "loss": 0.5483, "step": 25987 }, { "epoch": 0.7135639758374519, "grad_norm": 0.460130900144577, "learning_rate": 1.4374435870397262e-05, "loss": 0.5069, "step": 25988 }, { "epoch": 0.7135914332784185, "grad_norm": 0.39262843132019043, "learning_rate": 1.4374047487960004e-05, "loss": 0.5673, "step": 25989 }, { "epoch": 0.713618890719385, "grad_norm": 0.427388459444046, "learning_rate": 1.4373659097363754e-05, "loss": 0.4231, "step": 25990 }, { "epoch": 0.7136463481603514, "grad_norm": 0.42621687054634094, "learning_rate": 1.4373270698609234e-05, "loss": 0.5288, "step": 25991 }, { "epoch": 0.713673805601318, "grad_norm": 0.4201515018939972, "learning_rate": 1.4372882291697168e-05, "loss": 0.543, "step": 25992 }, { "epoch": 0.7137012630422844, "grad_norm": 0.363801509141922, "learning_rate": 1.4372493876628281e-05, "loss": 0.5238, "step": 25993 }, { "epoch": 0.713728720483251, "grad_norm": 0.38467860221862793, "learning_rate": 1.4372105453403297e-05, "loss": 0.4905, "step": 25994 }, { "epoch": 0.7137561779242174, "grad_norm": 0.3743225634098053, "learning_rate": 1.437171702202294e-05, "loss": 0.5376, "step": 25995 }, { "epoch": 0.713783635365184, "grad_norm": 0.32865023612976074, "learning_rate": 1.4371328582487938e-05, "loss": 0.4581, "step": 25996 }, { "epoch": 0.7138110928061505, "grad_norm": 0.39075711369514465, "learning_rate": 1.437094013479901e-05, "loss": 0.5274, "step": 25997 }, { "epoch": 0.713838550247117, "grad_norm": 0.40577179193496704, "learning_rate": 1.4370551678956886e-05, "loss": 0.4905, "step": 25998 }, { "epoch": 0.7138660076880835, "grad_norm": 0.4174309968948364, "learning_rate": 1.4370163214962289e-05, "loss": 0.5331, "step": 25999 }, { "epoch": 0.7138934651290499, "grad_norm": 0.3949275016784668, "learning_rate": 1.4369774742815938e-05, "loss": 0.4953, "step": 26000 }, { "epoch": 0.7139209225700165, "grad_norm": 0.38455671072006226, "learning_rate": 1.4369386262518568e-05, "loss": 0.5754, "step": 26001 }, { "epoch": 0.7139483800109829, "grad_norm": 0.45652082562446594, "learning_rate": 1.4368997774070894e-05, "loss": 0.4518, "step": 26002 }, { "epoch": 0.7139758374519495, "grad_norm": 0.36224669218063354, "learning_rate": 1.4368609277473646e-05, "loss": 0.4747, "step": 26003 }, { "epoch": 0.714003294892916, "grad_norm": 0.38411861658096313, "learning_rate": 1.436822077272755e-05, "loss": 0.5422, "step": 26004 }, { "epoch": 0.7140307523338825, "grad_norm": 0.40852829813957214, "learning_rate": 1.4367832259833326e-05, "loss": 0.4866, "step": 26005 }, { "epoch": 0.714058209774849, "grad_norm": 0.48855963349342346, "learning_rate": 1.43674437387917e-05, "loss": 0.5617, "step": 26006 }, { "epoch": 0.7140856672158155, "grad_norm": 0.3702276051044464, "learning_rate": 1.43670552096034e-05, "loss": 0.4608, "step": 26007 }, { "epoch": 0.714113124656782, "grad_norm": 0.49852994084358215, "learning_rate": 1.4366666672269145e-05, "loss": 0.4755, "step": 26008 }, { "epoch": 0.7141405820977484, "grad_norm": 0.4262365400791168, "learning_rate": 1.4366278126789666e-05, "loss": 0.6589, "step": 26009 }, { "epoch": 0.714168039538715, "grad_norm": 0.4021655023097992, "learning_rate": 1.4365889573165681e-05, "loss": 0.506, "step": 26010 }, { "epoch": 0.7141954969796815, "grad_norm": 0.41766127943992615, "learning_rate": 1.4365501011397922e-05, "loss": 0.4721, "step": 26011 }, { "epoch": 0.714222954420648, "grad_norm": 0.4753163456916809, "learning_rate": 1.436511244148711e-05, "loss": 0.5897, "step": 26012 }, { "epoch": 0.7142504118616145, "grad_norm": 0.3903539180755615, "learning_rate": 1.4364723863433968e-05, "loss": 0.4906, "step": 26013 }, { "epoch": 0.714277869302581, "grad_norm": 0.3684806823730469, "learning_rate": 1.4364335277239223e-05, "loss": 0.5362, "step": 26014 }, { "epoch": 0.7143053267435475, "grad_norm": 0.3942180275917053, "learning_rate": 1.43639466829036e-05, "loss": 0.4893, "step": 26015 }, { "epoch": 0.714332784184514, "grad_norm": 0.35558828711509705, "learning_rate": 1.4363558080427824e-05, "loss": 0.5058, "step": 26016 }, { "epoch": 0.7143602416254805, "grad_norm": 0.35713696479797363, "learning_rate": 1.4363169469812621e-05, "loss": 0.458, "step": 26017 }, { "epoch": 0.7143876990664471, "grad_norm": 0.34736618399620056, "learning_rate": 1.4362780851058713e-05, "loss": 0.4978, "step": 26018 }, { "epoch": 0.7144151565074135, "grad_norm": 0.3738850951194763, "learning_rate": 1.4362392224166829e-05, "loss": 0.4561, "step": 26019 }, { "epoch": 0.71444261394838, "grad_norm": 0.360006183385849, "learning_rate": 1.4362003589137685e-05, "loss": 0.4539, "step": 26020 }, { "epoch": 0.7144700713893465, "grad_norm": 0.37713584303855896, "learning_rate": 1.4361614945972018e-05, "loss": 0.4806, "step": 26021 }, { "epoch": 0.714497528830313, "grad_norm": 0.46365079283714294, "learning_rate": 1.4361226294670545e-05, "loss": 0.559, "step": 26022 }, { "epoch": 0.7145249862712795, "grad_norm": 0.38509702682495117, "learning_rate": 1.4360837635233993e-05, "loss": 0.4922, "step": 26023 }, { "epoch": 0.714552443712246, "grad_norm": 0.4964011609554291, "learning_rate": 1.4360448967663087e-05, "loss": 0.5858, "step": 26024 }, { "epoch": 0.7145799011532126, "grad_norm": 0.4346403479576111, "learning_rate": 1.4360060291958552e-05, "loss": 0.4809, "step": 26025 }, { "epoch": 0.714607358594179, "grad_norm": 0.3773009181022644, "learning_rate": 1.4359671608121112e-05, "loss": 0.5492, "step": 26026 }, { "epoch": 0.7146348160351456, "grad_norm": 0.3958376348018646, "learning_rate": 1.4359282916151495e-05, "loss": 0.4844, "step": 26027 }, { "epoch": 0.714662273476112, "grad_norm": 0.3867471218109131, "learning_rate": 1.4358894216050421e-05, "loss": 0.4285, "step": 26028 }, { "epoch": 0.7146897309170785, "grad_norm": 0.3897256851196289, "learning_rate": 1.435850550781862e-05, "loss": 0.5382, "step": 26029 }, { "epoch": 0.714717188358045, "grad_norm": 0.4148065447807312, "learning_rate": 1.4358116791456818e-05, "loss": 0.5384, "step": 26030 }, { "epoch": 0.7147446457990115, "grad_norm": 0.3845970630645752, "learning_rate": 1.4357728066965733e-05, "loss": 0.4896, "step": 26031 }, { "epoch": 0.7147721032399781, "grad_norm": 0.36382463574409485, "learning_rate": 1.4357339334346097e-05, "loss": 0.4894, "step": 26032 }, { "epoch": 0.7147995606809445, "grad_norm": 0.3579001724720001, "learning_rate": 1.435695059359863e-05, "loss": 0.4506, "step": 26033 }, { "epoch": 0.7148270181219111, "grad_norm": 0.3949882686138153, "learning_rate": 1.4356561844724062e-05, "loss": 0.4519, "step": 26034 }, { "epoch": 0.7148544755628775, "grad_norm": 0.3821321129798889, "learning_rate": 1.4356173087723114e-05, "loss": 0.5271, "step": 26035 }, { "epoch": 0.7148819330038441, "grad_norm": 0.4146723747253418, "learning_rate": 1.4355784322596513e-05, "loss": 0.5351, "step": 26036 }, { "epoch": 0.7149093904448105, "grad_norm": 0.3883277475833893, "learning_rate": 1.4355395549344984e-05, "loss": 0.5089, "step": 26037 }, { "epoch": 0.714936847885777, "grad_norm": 0.3695624768733978, "learning_rate": 1.4355006767969252e-05, "loss": 0.4451, "step": 26038 }, { "epoch": 0.7149643053267436, "grad_norm": 0.33791589736938477, "learning_rate": 1.4354617978470043e-05, "loss": 0.4692, "step": 26039 }, { "epoch": 0.71499176276771, "grad_norm": 0.35020723938941956, "learning_rate": 1.4354229180848083e-05, "loss": 0.4214, "step": 26040 }, { "epoch": 0.7150192202086766, "grad_norm": 0.34675848484039307, "learning_rate": 1.4353840375104093e-05, "loss": 0.4471, "step": 26041 }, { "epoch": 0.715046677649643, "grad_norm": 0.37643393874168396, "learning_rate": 1.4353451561238803e-05, "loss": 0.4881, "step": 26042 }, { "epoch": 0.7150741350906096, "grad_norm": 0.38813480734825134, "learning_rate": 1.4353062739252937e-05, "loss": 0.534, "step": 26043 }, { "epoch": 0.715101592531576, "grad_norm": 0.343219131231308, "learning_rate": 1.4352673909147217e-05, "loss": 0.4204, "step": 26044 }, { "epoch": 0.7151290499725426, "grad_norm": 1.6274243593215942, "learning_rate": 1.4352285070922374e-05, "loss": 0.5529, "step": 26045 }, { "epoch": 0.7151565074135091, "grad_norm": 0.3580920100212097, "learning_rate": 1.4351896224579127e-05, "loss": 0.4163, "step": 26046 }, { "epoch": 0.7151839648544756, "grad_norm": 0.4271029233932495, "learning_rate": 1.4351507370118208e-05, "loss": 0.4626, "step": 26047 }, { "epoch": 0.7152114222954421, "grad_norm": 0.4010319411754608, "learning_rate": 1.4351118507540338e-05, "loss": 0.4866, "step": 26048 }, { "epoch": 0.7152388797364085, "grad_norm": 0.3428836762905121, "learning_rate": 1.4350729636846242e-05, "loss": 0.5174, "step": 26049 }, { "epoch": 0.7152663371773751, "grad_norm": 0.4119150936603546, "learning_rate": 1.4350340758036647e-05, "loss": 0.4791, "step": 26050 }, { "epoch": 0.7152937946183415, "grad_norm": 0.4236863851547241, "learning_rate": 1.4349951871112277e-05, "loss": 0.4695, "step": 26051 }, { "epoch": 0.7153212520593081, "grad_norm": 0.3825666010379791, "learning_rate": 1.4349562976073861e-05, "loss": 0.5353, "step": 26052 }, { "epoch": 0.7153487095002746, "grad_norm": 0.5928026437759399, "learning_rate": 1.434917407292212e-05, "loss": 0.4978, "step": 26053 }, { "epoch": 0.7153761669412411, "grad_norm": 0.39703139662742615, "learning_rate": 1.434878516165778e-05, "loss": 0.5088, "step": 26054 }, { "epoch": 0.7154036243822076, "grad_norm": 0.4349009394645691, "learning_rate": 1.4348396242281573e-05, "loss": 0.4828, "step": 26055 }, { "epoch": 0.715431081823174, "grad_norm": 0.35437270998954773, "learning_rate": 1.4348007314794216e-05, "loss": 0.4957, "step": 26056 }, { "epoch": 0.7154585392641406, "grad_norm": 0.3615390360355377, "learning_rate": 1.4347618379196436e-05, "loss": 0.5743, "step": 26057 }, { "epoch": 0.715485996705107, "grad_norm": 0.3424055278301239, "learning_rate": 1.4347229435488962e-05, "loss": 0.4815, "step": 26058 }, { "epoch": 0.7155134541460736, "grad_norm": 0.36199507117271423, "learning_rate": 1.4346840483672515e-05, "loss": 0.4992, "step": 26059 }, { "epoch": 0.7155409115870401, "grad_norm": 0.3768567740917206, "learning_rate": 1.4346451523747827e-05, "loss": 0.5407, "step": 26060 }, { "epoch": 0.7155683690280066, "grad_norm": 0.4903936982154846, "learning_rate": 1.4346062555715619e-05, "loss": 0.5462, "step": 26061 }, { "epoch": 0.7155958264689731, "grad_norm": 0.36411523818969727, "learning_rate": 1.4345673579576614e-05, "loss": 0.4862, "step": 26062 }, { "epoch": 0.7156232839099396, "grad_norm": 0.48901668190956116, "learning_rate": 1.4345284595331543e-05, "loss": 0.5784, "step": 26063 }, { "epoch": 0.7156507413509061, "grad_norm": 0.401764452457428, "learning_rate": 1.4344895602981129e-05, "loss": 0.5223, "step": 26064 }, { "epoch": 0.7156781987918726, "grad_norm": 0.4186842739582062, "learning_rate": 1.4344506602526099e-05, "loss": 0.578, "step": 26065 }, { "epoch": 0.7157056562328391, "grad_norm": 0.39163926243782043, "learning_rate": 1.4344117593967177e-05, "loss": 0.5653, "step": 26066 }, { "epoch": 0.7157331136738057, "grad_norm": 0.3941185474395752, "learning_rate": 1.4343728577305088e-05, "loss": 0.4998, "step": 26067 }, { "epoch": 0.7157605711147721, "grad_norm": 0.37268924713134766, "learning_rate": 1.4343339552540561e-05, "loss": 0.4307, "step": 26068 }, { "epoch": 0.7157880285557386, "grad_norm": 0.36848315596580505, "learning_rate": 1.434295051967432e-05, "loss": 0.4918, "step": 26069 }, { "epoch": 0.7158154859967051, "grad_norm": 0.43074172735214233, "learning_rate": 1.4342561478707086e-05, "loss": 0.5924, "step": 26070 }, { "epoch": 0.7158429434376716, "grad_norm": 2.2466816902160645, "learning_rate": 1.4342172429639592e-05, "loss": 0.458, "step": 26071 }, { "epoch": 0.7158704008786381, "grad_norm": 0.3757255971431732, "learning_rate": 1.434178337247256e-05, "loss": 0.5188, "step": 26072 }, { "epoch": 0.7158978583196046, "grad_norm": 0.32748883962631226, "learning_rate": 1.4341394307206717e-05, "loss": 0.4603, "step": 26073 }, { "epoch": 0.7159253157605712, "grad_norm": 0.3900916874408722, "learning_rate": 1.4341005233842787e-05, "loss": 0.4578, "step": 26074 }, { "epoch": 0.7159527732015376, "grad_norm": 0.43124040961265564, "learning_rate": 1.4340616152381497e-05, "loss": 0.4966, "step": 26075 }, { "epoch": 0.7159802306425042, "grad_norm": 0.3590388298034668, "learning_rate": 1.4340227062823572e-05, "loss": 0.4929, "step": 26076 }, { "epoch": 0.7160076880834706, "grad_norm": 0.3887551426887512, "learning_rate": 1.4339837965169738e-05, "loss": 0.5729, "step": 26077 }, { "epoch": 0.7160351455244371, "grad_norm": 0.38222411274909973, "learning_rate": 1.4339448859420721e-05, "loss": 0.5335, "step": 26078 }, { "epoch": 0.7160626029654036, "grad_norm": 0.4081416726112366, "learning_rate": 1.433905974557725e-05, "loss": 0.5226, "step": 26079 }, { "epoch": 0.7160900604063701, "grad_norm": 0.38909003138542175, "learning_rate": 1.4338670623640046e-05, "loss": 0.5727, "step": 26080 }, { "epoch": 0.7161175178473367, "grad_norm": 0.39402592182159424, "learning_rate": 1.4338281493609835e-05, "loss": 0.5244, "step": 26081 }, { "epoch": 0.7161449752883031, "grad_norm": 0.39477553963661194, "learning_rate": 1.4337892355487345e-05, "loss": 0.4593, "step": 26082 }, { "epoch": 0.7161724327292697, "grad_norm": 0.4092545509338379, "learning_rate": 1.4337503209273302e-05, "loss": 0.5247, "step": 26083 }, { "epoch": 0.7161998901702361, "grad_norm": 0.6482234597206116, "learning_rate": 1.433711405496843e-05, "loss": 0.6255, "step": 26084 }, { "epoch": 0.7162273476112027, "grad_norm": 0.405996173620224, "learning_rate": 1.4336724892573457e-05, "loss": 0.4935, "step": 26085 }, { "epoch": 0.7162548050521691, "grad_norm": 0.37049180269241333, "learning_rate": 1.433633572208911e-05, "loss": 0.5238, "step": 26086 }, { "epoch": 0.7162822624931356, "grad_norm": 0.37465107440948486, "learning_rate": 1.4335946543516108e-05, "loss": 0.5259, "step": 26087 }, { "epoch": 0.7163097199341022, "grad_norm": 0.3569576144218445, "learning_rate": 1.4335557356855185e-05, "loss": 0.4734, "step": 26088 }, { "epoch": 0.7163371773750686, "grad_norm": 0.41351377964019775, "learning_rate": 1.4335168162107063e-05, "loss": 0.574, "step": 26089 }, { "epoch": 0.7163646348160352, "grad_norm": 0.3936741352081299, "learning_rate": 1.4334778959272468e-05, "loss": 0.4813, "step": 26090 }, { "epoch": 0.7163920922570016, "grad_norm": 0.36563870310783386, "learning_rate": 1.433438974835213e-05, "loss": 0.5472, "step": 26091 }, { "epoch": 0.7164195496979682, "grad_norm": 0.35716190934181213, "learning_rate": 1.4334000529346768e-05, "loss": 0.378, "step": 26092 }, { "epoch": 0.7164470071389346, "grad_norm": 0.33149388432502747, "learning_rate": 1.4333611302257113e-05, "loss": 0.464, "step": 26093 }, { "epoch": 0.7164744645799012, "grad_norm": 0.4030951261520386, "learning_rate": 1.433322206708389e-05, "loss": 0.5381, "step": 26094 }, { "epoch": 0.7165019220208677, "grad_norm": 0.42903417348861694, "learning_rate": 1.4332832823827824e-05, "loss": 0.5041, "step": 26095 }, { "epoch": 0.7165293794618341, "grad_norm": 0.37395361065864563, "learning_rate": 1.4332443572489643e-05, "loss": 0.4914, "step": 26096 }, { "epoch": 0.7165568369028007, "grad_norm": 0.360299289226532, "learning_rate": 1.4332054313070071e-05, "loss": 0.4776, "step": 26097 }, { "epoch": 0.7165842943437671, "grad_norm": 0.40636587142944336, "learning_rate": 1.4331665045569839e-05, "loss": 0.5004, "step": 26098 }, { "epoch": 0.7166117517847337, "grad_norm": 0.40199699997901917, "learning_rate": 1.4331275769989664e-05, "loss": 0.5059, "step": 26099 }, { "epoch": 0.7166392092257001, "grad_norm": 0.3748112618923187, "learning_rate": 1.4330886486330279e-05, "loss": 0.5354, "step": 26100 }, { "epoch": 0.7166666666666667, "grad_norm": 0.36844587326049805, "learning_rate": 1.4330497194592407e-05, "loss": 0.5141, "step": 26101 }, { "epoch": 0.7166941241076332, "grad_norm": 0.36240458488464355, "learning_rate": 1.4330107894776779e-05, "loss": 0.5151, "step": 26102 }, { "epoch": 0.7167215815485997, "grad_norm": 0.4110575318336487, "learning_rate": 1.4329718586884118e-05, "loss": 0.5385, "step": 26103 }, { "epoch": 0.7167490389895662, "grad_norm": 0.37489038705825806, "learning_rate": 1.4329329270915146e-05, "loss": 0.6121, "step": 26104 }, { "epoch": 0.7167764964305327, "grad_norm": 0.4682330787181854, "learning_rate": 1.4328939946870597e-05, "loss": 0.5265, "step": 26105 }, { "epoch": 0.7168039538714992, "grad_norm": 0.385666161775589, "learning_rate": 1.432855061475119e-05, "loss": 0.5543, "step": 26106 }, { "epoch": 0.7168314113124656, "grad_norm": 0.3662683367729187, "learning_rate": 1.4328161274557657e-05, "loss": 0.4047, "step": 26107 }, { "epoch": 0.7168588687534322, "grad_norm": 0.3850827217102051, "learning_rate": 1.432777192629072e-05, "loss": 0.4766, "step": 26108 }, { "epoch": 0.7168863261943987, "grad_norm": 0.4758588969707489, "learning_rate": 1.432738256995111e-05, "loss": 0.5628, "step": 26109 }, { "epoch": 0.7169137836353652, "grad_norm": 0.41869208216667175, "learning_rate": 1.4326993205539547e-05, "loss": 0.5503, "step": 26110 }, { "epoch": 0.7169412410763317, "grad_norm": 0.39144888520240784, "learning_rate": 1.4326603833056762e-05, "loss": 0.5089, "step": 26111 }, { "epoch": 0.7169686985172982, "grad_norm": 0.387212872505188, "learning_rate": 1.4326214452503481e-05, "loss": 0.5347, "step": 26112 }, { "epoch": 0.7169961559582647, "grad_norm": 0.39412763714790344, "learning_rate": 1.4325825063880427e-05, "loss": 0.5182, "step": 26113 }, { "epoch": 0.7170236133992312, "grad_norm": 0.4453129172325134, "learning_rate": 1.4325435667188333e-05, "loss": 0.5811, "step": 26114 }, { "epoch": 0.7170510708401977, "grad_norm": 0.35040122270584106, "learning_rate": 1.4325046262427917e-05, "loss": 0.5189, "step": 26115 }, { "epoch": 0.7170785282811641, "grad_norm": 0.4012080729007721, "learning_rate": 1.432465684959991e-05, "loss": 0.5791, "step": 26116 }, { "epoch": 0.7171059857221307, "grad_norm": 0.482459157705307, "learning_rate": 1.4324267428705039e-05, "loss": 0.4512, "step": 26117 }, { "epoch": 0.7171334431630972, "grad_norm": 0.4126095473766327, "learning_rate": 1.4323877999744027e-05, "loss": 0.4473, "step": 26118 }, { "epoch": 0.7171609006040637, "grad_norm": 0.37289637327194214, "learning_rate": 1.4323488562717604e-05, "loss": 0.4951, "step": 26119 }, { "epoch": 0.7171883580450302, "grad_norm": 0.5719515681266785, "learning_rate": 1.4323099117626495e-05, "loss": 0.6058, "step": 26120 }, { "epoch": 0.7172158154859967, "grad_norm": 0.4207122027873993, "learning_rate": 1.4322709664471423e-05, "loss": 0.5131, "step": 26121 }, { "epoch": 0.7172432729269632, "grad_norm": 0.34789028763771057, "learning_rate": 1.4322320203253121e-05, "loss": 0.4525, "step": 26122 }, { "epoch": 0.7172707303679297, "grad_norm": 0.39910948276519775, "learning_rate": 1.4321930733972314e-05, "loss": 0.5194, "step": 26123 }, { "epoch": 0.7172981878088962, "grad_norm": 0.37317660450935364, "learning_rate": 1.4321541256629725e-05, "loss": 0.4822, "step": 26124 }, { "epoch": 0.7173256452498628, "grad_norm": 0.3971939980983734, "learning_rate": 1.4321151771226082e-05, "loss": 0.5052, "step": 26125 }, { "epoch": 0.7173531026908292, "grad_norm": 0.5035853981971741, "learning_rate": 1.432076227776211e-05, "loss": 0.4542, "step": 26126 }, { "epoch": 0.7173805601317957, "grad_norm": 0.3951180577278137, "learning_rate": 1.4320372776238541e-05, "loss": 0.5678, "step": 26127 }, { "epoch": 0.7174080175727622, "grad_norm": 0.4150985777378082, "learning_rate": 1.4319983266656098e-05, "loss": 0.5039, "step": 26128 }, { "epoch": 0.7174354750137287, "grad_norm": 0.39717182517051697, "learning_rate": 1.4319593749015504e-05, "loss": 0.4831, "step": 26129 }, { "epoch": 0.7174629324546952, "grad_norm": 0.37337180972099304, "learning_rate": 1.4319204223317491e-05, "loss": 0.5665, "step": 26130 }, { "epoch": 0.7174903898956617, "grad_norm": 0.4071110486984253, "learning_rate": 1.4318814689562783e-05, "loss": 0.5334, "step": 26131 }, { "epoch": 0.7175178473366283, "grad_norm": 0.42442798614501953, "learning_rate": 1.4318425147752106e-05, "loss": 0.4944, "step": 26132 }, { "epoch": 0.7175453047775947, "grad_norm": 0.3565179109573364, "learning_rate": 1.4318035597886192e-05, "loss": 0.4892, "step": 26133 }, { "epoch": 0.7175727622185613, "grad_norm": 0.41918933391571045, "learning_rate": 1.4317646039965758e-05, "loss": 0.4814, "step": 26134 }, { "epoch": 0.7176002196595277, "grad_norm": 0.3607025146484375, "learning_rate": 1.4317256473991539e-05, "loss": 0.5235, "step": 26135 }, { "epoch": 0.7176276771004942, "grad_norm": 0.4720178544521332, "learning_rate": 1.4316866899964258e-05, "loss": 0.5301, "step": 26136 }, { "epoch": 0.7176551345414607, "grad_norm": 0.3793657124042511, "learning_rate": 1.4316477317884643e-05, "loss": 0.5271, "step": 26137 }, { "epoch": 0.7176825919824272, "grad_norm": 0.4030263125896454, "learning_rate": 1.431608772775342e-05, "loss": 0.5631, "step": 26138 }, { "epoch": 0.7177100494233938, "grad_norm": 0.37038934230804443, "learning_rate": 1.4315698129571316e-05, "loss": 0.4733, "step": 26139 }, { "epoch": 0.7177375068643602, "grad_norm": 0.38974541425704956, "learning_rate": 1.4315308523339058e-05, "loss": 0.4833, "step": 26140 }, { "epoch": 0.7177649643053268, "grad_norm": 0.3655814826488495, "learning_rate": 1.4314918909057372e-05, "loss": 0.5154, "step": 26141 }, { "epoch": 0.7177924217462932, "grad_norm": 0.3481271266937256, "learning_rate": 1.4314529286726984e-05, "loss": 0.4369, "step": 26142 }, { "epoch": 0.7178198791872598, "grad_norm": 0.44555598497390747, "learning_rate": 1.4314139656348625e-05, "loss": 0.536, "step": 26143 }, { "epoch": 0.7178473366282262, "grad_norm": 0.5637860894203186, "learning_rate": 1.4313750017923014e-05, "loss": 0.5152, "step": 26144 }, { "epoch": 0.7178747940691927, "grad_norm": 0.36215564608573914, "learning_rate": 1.4313360371450886e-05, "loss": 0.4575, "step": 26145 }, { "epoch": 0.7179022515101593, "grad_norm": 0.5309955477714539, "learning_rate": 1.4312970716932964e-05, "loss": 0.5299, "step": 26146 }, { "epoch": 0.7179297089511257, "grad_norm": 0.3564058244228363, "learning_rate": 1.4312581054369971e-05, "loss": 0.4414, "step": 26147 }, { "epoch": 0.7179571663920923, "grad_norm": 0.35266149044036865, "learning_rate": 1.4312191383762643e-05, "loss": 0.4349, "step": 26148 }, { "epoch": 0.7179846238330587, "grad_norm": 0.4178427755832672, "learning_rate": 1.4311801705111701e-05, "loss": 0.5109, "step": 26149 }, { "epoch": 0.7180120812740253, "grad_norm": 0.418233186006546, "learning_rate": 1.431141201841787e-05, "loss": 0.5721, "step": 26150 }, { "epoch": 0.7180395387149917, "grad_norm": 0.5421519875526428, "learning_rate": 1.431102232368188e-05, "loss": 0.445, "step": 26151 }, { "epoch": 0.7180669961559583, "grad_norm": 0.38479962944984436, "learning_rate": 1.4310632620904458e-05, "loss": 0.4325, "step": 26152 }, { "epoch": 0.7180944535969248, "grad_norm": 0.397297203540802, "learning_rate": 1.4310242910086331e-05, "loss": 0.5345, "step": 26153 }, { "epoch": 0.7181219110378912, "grad_norm": 0.45248350501060486, "learning_rate": 1.4309853191228226e-05, "loss": 0.601, "step": 26154 }, { "epoch": 0.7181493684788578, "grad_norm": 0.3964867889881134, "learning_rate": 1.4309463464330868e-05, "loss": 0.4727, "step": 26155 }, { "epoch": 0.7181768259198242, "grad_norm": 0.362616628408432, "learning_rate": 1.4309073729394987e-05, "loss": 0.4373, "step": 26156 }, { "epoch": 0.7182042833607908, "grad_norm": 0.3739689588546753, "learning_rate": 1.4308683986421305e-05, "loss": 0.4566, "step": 26157 }, { "epoch": 0.7182317408017572, "grad_norm": 0.4261053800582886, "learning_rate": 1.4308294235410553e-05, "loss": 0.4762, "step": 26158 }, { "epoch": 0.7182591982427238, "grad_norm": 0.35194164514541626, "learning_rate": 1.430790447636346e-05, "loss": 0.4912, "step": 26159 }, { "epoch": 0.7182866556836903, "grad_norm": 0.3615327775478363, "learning_rate": 1.4307514709280748e-05, "loss": 0.4551, "step": 26160 }, { "epoch": 0.7183141131246568, "grad_norm": 0.332665354013443, "learning_rate": 1.4307124934163149e-05, "loss": 0.4949, "step": 26161 }, { "epoch": 0.7183415705656233, "grad_norm": 0.4085514545440674, "learning_rate": 1.4306735151011384e-05, "loss": 0.5758, "step": 26162 }, { "epoch": 0.7183690280065897, "grad_norm": 0.3623743951320648, "learning_rate": 1.4306345359826183e-05, "loss": 0.4946, "step": 26163 }, { "epoch": 0.7183964854475563, "grad_norm": 0.37114787101745605, "learning_rate": 1.4305955560608275e-05, "loss": 0.5396, "step": 26164 }, { "epoch": 0.7184239428885227, "grad_norm": 0.5669533610343933, "learning_rate": 1.4305565753358386e-05, "loss": 0.567, "step": 26165 }, { "epoch": 0.7184514003294893, "grad_norm": 0.3448384702205658, "learning_rate": 1.4305175938077242e-05, "loss": 0.4902, "step": 26166 }, { "epoch": 0.7184788577704558, "grad_norm": 0.38355425000190735, "learning_rate": 1.4304786114765572e-05, "loss": 0.4901, "step": 26167 }, { "epoch": 0.7185063152114223, "grad_norm": 0.45099496841430664, "learning_rate": 1.4304396283424101e-05, "loss": 0.4608, "step": 26168 }, { "epoch": 0.7185337726523888, "grad_norm": 0.43905749917030334, "learning_rate": 1.4304006444053555e-05, "loss": 0.5486, "step": 26169 }, { "epoch": 0.7185612300933553, "grad_norm": 0.45028647780418396, "learning_rate": 1.4303616596654665e-05, "loss": 0.5635, "step": 26170 }, { "epoch": 0.7185886875343218, "grad_norm": 0.44524693489074707, "learning_rate": 1.4303226741228158e-05, "loss": 0.5334, "step": 26171 }, { "epoch": 0.7186161449752883, "grad_norm": 0.38128823041915894, "learning_rate": 1.430283687777476e-05, "loss": 0.4802, "step": 26172 }, { "epoch": 0.7186436024162548, "grad_norm": 0.390337198972702, "learning_rate": 1.4302447006295195e-05, "loss": 0.463, "step": 26173 }, { "epoch": 0.7186710598572214, "grad_norm": 0.40497079491615295, "learning_rate": 1.4302057126790194e-05, "loss": 0.6039, "step": 26174 }, { "epoch": 0.7186985172981878, "grad_norm": 0.45977380871772766, "learning_rate": 1.4301667239260484e-05, "loss": 0.5255, "step": 26175 }, { "epoch": 0.7187259747391543, "grad_norm": 0.35989248752593994, "learning_rate": 1.4301277343706792e-05, "loss": 0.4479, "step": 26176 }, { "epoch": 0.7187534321801208, "grad_norm": 0.3945430815219879, "learning_rate": 1.4300887440129848e-05, "loss": 0.4702, "step": 26177 }, { "epoch": 0.7187808896210873, "grad_norm": 0.304027259349823, "learning_rate": 1.4300497528530368e-05, "loss": 0.394, "step": 26178 }, { "epoch": 0.7188083470620538, "grad_norm": 0.38538888096809387, "learning_rate": 1.4300107608909095e-05, "loss": 0.4594, "step": 26179 }, { "epoch": 0.7188358045030203, "grad_norm": 0.4191279709339142, "learning_rate": 1.4299717681266747e-05, "loss": 0.5421, "step": 26180 }, { "epoch": 0.7188632619439869, "grad_norm": 0.38736531138420105, "learning_rate": 1.429932774560405e-05, "loss": 0.5317, "step": 26181 }, { "epoch": 0.7188907193849533, "grad_norm": 0.38019874691963196, "learning_rate": 1.429893780192174e-05, "loss": 0.5431, "step": 26182 }, { "epoch": 0.7189181768259199, "grad_norm": 0.42159005999565125, "learning_rate": 1.4298547850220534e-05, "loss": 0.5049, "step": 26183 }, { "epoch": 0.7189456342668863, "grad_norm": 0.5824725031852722, "learning_rate": 1.4298157890501166e-05, "loss": 0.4632, "step": 26184 }, { "epoch": 0.7189730917078528, "grad_norm": 0.4003783166408539, "learning_rate": 1.4297767922764363e-05, "loss": 0.6163, "step": 26185 }, { "epoch": 0.7190005491488193, "grad_norm": 0.43775486946105957, "learning_rate": 1.4297377947010848e-05, "loss": 0.6225, "step": 26186 }, { "epoch": 0.7190280065897858, "grad_norm": 0.4314621686935425, "learning_rate": 1.4296987963241356e-05, "loss": 0.4625, "step": 26187 }, { "epoch": 0.7190554640307524, "grad_norm": 0.39199307560920715, "learning_rate": 1.4296597971456607e-05, "loss": 0.4791, "step": 26188 }, { "epoch": 0.7190829214717188, "grad_norm": 0.37466001510620117, "learning_rate": 1.4296207971657331e-05, "loss": 0.4416, "step": 26189 }, { "epoch": 0.7191103789126854, "grad_norm": 0.3806830644607544, "learning_rate": 1.4295817963844259e-05, "loss": 0.4978, "step": 26190 }, { "epoch": 0.7191378363536518, "grad_norm": 0.3389679789543152, "learning_rate": 1.4295427948018112e-05, "loss": 0.4997, "step": 26191 }, { "epoch": 0.7191652937946184, "grad_norm": 0.36870241165161133, "learning_rate": 1.4295037924179625e-05, "loss": 0.463, "step": 26192 }, { "epoch": 0.7191927512355848, "grad_norm": 0.36981818079948425, "learning_rate": 1.4294647892329519e-05, "loss": 0.4972, "step": 26193 }, { "epoch": 0.7192202086765513, "grad_norm": 0.4398452639579773, "learning_rate": 1.4294257852468522e-05, "loss": 0.5845, "step": 26194 }, { "epoch": 0.7192476661175179, "grad_norm": 0.3703881800174713, "learning_rate": 1.4293867804597366e-05, "loss": 0.5184, "step": 26195 }, { "epoch": 0.7192751235584843, "grad_norm": 0.43894556164741516, "learning_rate": 1.4293477748716776e-05, "loss": 0.4012, "step": 26196 }, { "epoch": 0.7193025809994509, "grad_norm": 0.4664405882358551, "learning_rate": 1.429308768482748e-05, "loss": 0.4257, "step": 26197 }, { "epoch": 0.7193300384404173, "grad_norm": 0.58287113904953, "learning_rate": 1.4292697612930205e-05, "loss": 0.5843, "step": 26198 }, { "epoch": 0.7193574958813839, "grad_norm": 0.4222634434700012, "learning_rate": 1.4292307533025678e-05, "loss": 0.4701, "step": 26199 }, { "epoch": 0.7193849533223503, "grad_norm": 0.404619038105011, "learning_rate": 1.4291917445114627e-05, "loss": 0.4638, "step": 26200 }, { "epoch": 0.7194124107633169, "grad_norm": 0.4166834354400635, "learning_rate": 1.429152734919778e-05, "loss": 0.4896, "step": 26201 }, { "epoch": 0.7194398682042834, "grad_norm": 0.4430655837059021, "learning_rate": 1.4291137245275868e-05, "loss": 0.5691, "step": 26202 }, { "epoch": 0.7194673256452498, "grad_norm": 0.4023185968399048, "learning_rate": 1.4290747133349614e-05, "loss": 0.4897, "step": 26203 }, { "epoch": 0.7194947830862164, "grad_norm": 0.37929847836494446, "learning_rate": 1.4290357013419745e-05, "loss": 0.4634, "step": 26204 }, { "epoch": 0.7195222405271828, "grad_norm": 0.36354824900627136, "learning_rate": 1.4289966885486992e-05, "loss": 0.4756, "step": 26205 }, { "epoch": 0.7195496979681494, "grad_norm": 0.41010090708732605, "learning_rate": 1.4289576749552081e-05, "loss": 0.5014, "step": 26206 }, { "epoch": 0.7195771554091158, "grad_norm": 0.3832513689994812, "learning_rate": 1.4289186605615744e-05, "loss": 0.5108, "step": 26207 }, { "epoch": 0.7196046128500824, "grad_norm": 0.43626484274864197, "learning_rate": 1.4288796453678702e-05, "loss": 0.513, "step": 26208 }, { "epoch": 0.7196320702910489, "grad_norm": 0.31584861874580383, "learning_rate": 1.4288406293741685e-05, "loss": 0.4582, "step": 26209 }, { "epoch": 0.7196595277320154, "grad_norm": 0.3873485028743744, "learning_rate": 1.4288016125805424e-05, "loss": 0.5044, "step": 26210 }, { "epoch": 0.7196869851729819, "grad_norm": 0.37661442160606384, "learning_rate": 1.4287625949870643e-05, "loss": 0.5308, "step": 26211 }, { "epoch": 0.7197144426139483, "grad_norm": 0.34133851528167725, "learning_rate": 1.428723576593807e-05, "loss": 0.5497, "step": 26212 }, { "epoch": 0.7197419000549149, "grad_norm": 0.3546529710292816, "learning_rate": 1.4286845574008434e-05, "loss": 0.4419, "step": 26213 }, { "epoch": 0.7197693574958813, "grad_norm": 0.436663419008255, "learning_rate": 1.4286455374082463e-05, "loss": 0.5149, "step": 26214 }, { "epoch": 0.7197968149368479, "grad_norm": 0.44005313515663147, "learning_rate": 1.4286065166160888e-05, "loss": 0.53, "step": 26215 }, { "epoch": 0.7198242723778144, "grad_norm": 0.4178759753704071, "learning_rate": 1.428567495024443e-05, "loss": 0.5713, "step": 26216 }, { "epoch": 0.7198517298187809, "grad_norm": 0.37635666131973267, "learning_rate": 1.4285284726333822e-05, "loss": 0.4466, "step": 26217 }, { "epoch": 0.7198791872597474, "grad_norm": 0.3842974305152893, "learning_rate": 1.4284894494429789e-05, "loss": 0.5565, "step": 26218 }, { "epoch": 0.7199066447007139, "grad_norm": 0.39772120118141174, "learning_rate": 1.428450425453306e-05, "loss": 0.4554, "step": 26219 }, { "epoch": 0.7199341021416804, "grad_norm": 0.3840694725513458, "learning_rate": 1.4284114006644365e-05, "loss": 0.4778, "step": 26220 }, { "epoch": 0.7199615595826468, "grad_norm": 0.3528640866279602, "learning_rate": 1.4283723750764429e-05, "loss": 0.4299, "step": 26221 }, { "epoch": 0.7199890170236134, "grad_norm": 0.4639025926589966, "learning_rate": 1.428333348689398e-05, "loss": 0.5386, "step": 26222 }, { "epoch": 0.72001647446458, "grad_norm": 0.37705302238464355, "learning_rate": 1.4282943215033748e-05, "loss": 0.5376, "step": 26223 }, { "epoch": 0.7200439319055464, "grad_norm": 0.37812814116477966, "learning_rate": 1.4282552935184462e-05, "loss": 0.556, "step": 26224 }, { "epoch": 0.7200713893465129, "grad_norm": 0.4301338195800781, "learning_rate": 1.4282162647346844e-05, "loss": 0.5536, "step": 26225 }, { "epoch": 0.7200988467874794, "grad_norm": 0.3351184129714966, "learning_rate": 1.428177235152163e-05, "loss": 0.4926, "step": 26226 }, { "epoch": 0.7201263042284459, "grad_norm": 0.3775240182876587, "learning_rate": 1.428138204770954e-05, "loss": 0.5207, "step": 26227 }, { "epoch": 0.7201537616694124, "grad_norm": 0.41062137484550476, "learning_rate": 1.428099173591131e-05, "loss": 0.4929, "step": 26228 }, { "epoch": 0.7201812191103789, "grad_norm": 0.4157417416572571, "learning_rate": 1.4280601416127662e-05, "loss": 0.5623, "step": 26229 }, { "epoch": 0.7202086765513455, "grad_norm": 0.3604680001735687, "learning_rate": 1.4280211088359324e-05, "loss": 0.4481, "step": 26230 }, { "epoch": 0.7202361339923119, "grad_norm": 0.3841487467288971, "learning_rate": 1.427982075260703e-05, "loss": 0.5473, "step": 26231 }, { "epoch": 0.7202635914332784, "grad_norm": 0.3956283628940582, "learning_rate": 1.4279430408871502e-05, "loss": 0.4776, "step": 26232 }, { "epoch": 0.7202910488742449, "grad_norm": 0.3674543499946594, "learning_rate": 1.427904005715347e-05, "loss": 0.4937, "step": 26233 }, { "epoch": 0.7203185063152114, "grad_norm": 0.4042946696281433, "learning_rate": 1.4278649697453664e-05, "loss": 0.4807, "step": 26234 }, { "epoch": 0.7203459637561779, "grad_norm": 0.4877842664718628, "learning_rate": 1.427825932977281e-05, "loss": 0.5296, "step": 26235 }, { "epoch": 0.7203734211971444, "grad_norm": 0.41079410910606384, "learning_rate": 1.4277868954111637e-05, "loss": 0.5069, "step": 26236 }, { "epoch": 0.720400878638111, "grad_norm": 0.43223297595977783, "learning_rate": 1.4277478570470872e-05, "loss": 0.4927, "step": 26237 }, { "epoch": 0.7204283360790774, "grad_norm": 0.36788058280944824, "learning_rate": 1.4277088178851249e-05, "loss": 0.5992, "step": 26238 }, { "epoch": 0.720455793520044, "grad_norm": 0.4175417721271515, "learning_rate": 1.4276697779253488e-05, "loss": 0.5572, "step": 26239 }, { "epoch": 0.7204832509610104, "grad_norm": 0.38129922747612, "learning_rate": 1.427630737167832e-05, "loss": 0.52, "step": 26240 }, { "epoch": 0.720510708401977, "grad_norm": 0.3569270670413971, "learning_rate": 1.4275916956126475e-05, "loss": 0.438, "step": 26241 }, { "epoch": 0.7205381658429434, "grad_norm": 0.38436394929885864, "learning_rate": 1.427552653259868e-05, "loss": 0.4727, "step": 26242 }, { "epoch": 0.7205656232839099, "grad_norm": 0.39693009853363037, "learning_rate": 1.4275136101095664e-05, "loss": 0.4976, "step": 26243 }, { "epoch": 0.7205930807248765, "grad_norm": 0.4520387351512909, "learning_rate": 1.4274745661618152e-05, "loss": 0.5197, "step": 26244 }, { "epoch": 0.7206205381658429, "grad_norm": 0.3893456757068634, "learning_rate": 1.4274355214166877e-05, "loss": 0.5074, "step": 26245 }, { "epoch": 0.7206479956068095, "grad_norm": 0.3761766254901886, "learning_rate": 1.4273964758742565e-05, "loss": 0.4832, "step": 26246 }, { "epoch": 0.7206754530477759, "grad_norm": 0.41545554995536804, "learning_rate": 1.4273574295345947e-05, "loss": 0.5893, "step": 26247 }, { "epoch": 0.7207029104887425, "grad_norm": 0.43613356351852417, "learning_rate": 1.4273183823977745e-05, "loss": 0.5737, "step": 26248 }, { "epoch": 0.7207303679297089, "grad_norm": 0.5463149547576904, "learning_rate": 1.4272793344638693e-05, "loss": 0.4763, "step": 26249 }, { "epoch": 0.7207578253706755, "grad_norm": 0.36588752269744873, "learning_rate": 1.4272402857329517e-05, "loss": 0.5348, "step": 26250 }, { "epoch": 0.720785282811642, "grad_norm": 0.5114498734474182, "learning_rate": 1.427201236205095e-05, "loss": 0.5291, "step": 26251 }, { "epoch": 0.7208127402526084, "grad_norm": 0.3683733344078064, "learning_rate": 1.4271621858803713e-05, "loss": 0.4701, "step": 26252 }, { "epoch": 0.720840197693575, "grad_norm": 0.3885875344276428, "learning_rate": 1.4271231347588536e-05, "loss": 0.5034, "step": 26253 }, { "epoch": 0.7208676551345414, "grad_norm": 0.41927212476730347, "learning_rate": 1.4270840828406154e-05, "loss": 0.5062, "step": 26254 }, { "epoch": 0.720895112575508, "grad_norm": 0.5036165714263916, "learning_rate": 1.4270450301257288e-05, "loss": 0.5628, "step": 26255 }, { "epoch": 0.7209225700164744, "grad_norm": 0.4025128185749054, "learning_rate": 1.4270059766142668e-05, "loss": 0.4765, "step": 26256 }, { "epoch": 0.720950027457441, "grad_norm": 0.3120341897010803, "learning_rate": 1.4269669223063027e-05, "loss": 0.5093, "step": 26257 }, { "epoch": 0.7209774848984075, "grad_norm": 0.42347878217697144, "learning_rate": 1.4269278672019087e-05, "loss": 0.516, "step": 26258 }, { "epoch": 0.721004942339374, "grad_norm": 0.37198248505592346, "learning_rate": 1.4268888113011583e-05, "loss": 0.4174, "step": 26259 }, { "epoch": 0.7210323997803405, "grad_norm": 0.4406661093235016, "learning_rate": 1.4268497546041235e-05, "loss": 0.5398, "step": 26260 }, { "epoch": 0.7210598572213069, "grad_norm": 0.3685888350009918, "learning_rate": 1.4268106971108781e-05, "loss": 0.4405, "step": 26261 }, { "epoch": 0.7210873146622735, "grad_norm": 0.4906046986579895, "learning_rate": 1.4267716388214947e-05, "loss": 0.5081, "step": 26262 }, { "epoch": 0.7211147721032399, "grad_norm": 0.34039852023124695, "learning_rate": 1.4267325797360457e-05, "loss": 0.4349, "step": 26263 }, { "epoch": 0.7211422295442065, "grad_norm": 0.3763958215713501, "learning_rate": 1.4266935198546042e-05, "loss": 0.5451, "step": 26264 }, { "epoch": 0.721169686985173, "grad_norm": 0.3860439360141754, "learning_rate": 1.4266544591772432e-05, "loss": 0.4771, "step": 26265 }, { "epoch": 0.7211971444261395, "grad_norm": 0.3366050124168396, "learning_rate": 1.4266153977040354e-05, "loss": 0.4724, "step": 26266 }, { "epoch": 0.721224601867106, "grad_norm": 0.3330809772014618, "learning_rate": 1.4265763354350538e-05, "loss": 0.4616, "step": 26267 }, { "epoch": 0.7212520593080725, "grad_norm": 0.38502225279808044, "learning_rate": 1.4265372723703709e-05, "loss": 0.47, "step": 26268 }, { "epoch": 0.721279516749039, "grad_norm": 0.35724079608917236, "learning_rate": 1.4264982085100603e-05, "loss": 0.4877, "step": 26269 }, { "epoch": 0.7213069741900054, "grad_norm": 0.4185185134410858, "learning_rate": 1.426459143854194e-05, "loss": 0.4841, "step": 26270 }, { "epoch": 0.721334431630972, "grad_norm": 0.39674848318099976, "learning_rate": 1.4264200784028456e-05, "loss": 0.4899, "step": 26271 }, { "epoch": 0.7213618890719385, "grad_norm": 0.4386468529701233, "learning_rate": 1.4263810121560878e-05, "loss": 0.4655, "step": 26272 }, { "epoch": 0.721389346512905, "grad_norm": 0.4280010461807251, "learning_rate": 1.4263419451139929e-05, "loss": 0.4762, "step": 26273 }, { "epoch": 0.7214168039538715, "grad_norm": 0.368112176656723, "learning_rate": 1.4263028772766342e-05, "loss": 0.4967, "step": 26274 }, { "epoch": 0.721444261394838, "grad_norm": 0.3781695067882538, "learning_rate": 1.4262638086440848e-05, "loss": 0.511, "step": 26275 }, { "epoch": 0.7214717188358045, "grad_norm": 0.40472105145454407, "learning_rate": 1.4262247392164175e-05, "loss": 0.4738, "step": 26276 }, { "epoch": 0.721499176276771, "grad_norm": 0.4131077826023102, "learning_rate": 1.4261856689937046e-05, "loss": 0.5635, "step": 26277 }, { "epoch": 0.7215266337177375, "grad_norm": 0.38940805196762085, "learning_rate": 1.4261465979760196e-05, "loss": 0.4663, "step": 26278 }, { "epoch": 0.7215540911587041, "grad_norm": 0.39356133341789246, "learning_rate": 1.4261075261634354e-05, "loss": 0.537, "step": 26279 }, { "epoch": 0.7215815485996705, "grad_norm": 0.46857911348342896, "learning_rate": 1.4260684535560244e-05, "loss": 0.5535, "step": 26280 }, { "epoch": 0.721609006040637, "grad_norm": 0.6023870706558228, "learning_rate": 1.4260293801538598e-05, "loss": 0.5408, "step": 26281 }, { "epoch": 0.7216364634816035, "grad_norm": 0.396579384803772, "learning_rate": 1.4259903059570144e-05, "loss": 0.4941, "step": 26282 }, { "epoch": 0.72166392092257, "grad_norm": 0.38739538192749023, "learning_rate": 1.4259512309655612e-05, "loss": 0.4934, "step": 26283 }, { "epoch": 0.7216913783635365, "grad_norm": 0.400272399187088, "learning_rate": 1.4259121551795732e-05, "loss": 0.4158, "step": 26284 }, { "epoch": 0.721718835804503, "grad_norm": 0.37733325362205505, "learning_rate": 1.4258730785991226e-05, "loss": 0.4928, "step": 26285 }, { "epoch": 0.7217462932454696, "grad_norm": 0.3831734359264374, "learning_rate": 1.4258340012242833e-05, "loss": 0.4003, "step": 26286 }, { "epoch": 0.721773750686436, "grad_norm": 0.3889709413051605, "learning_rate": 1.4257949230551274e-05, "loss": 0.5343, "step": 26287 }, { "epoch": 0.7218012081274026, "grad_norm": 0.4269602596759796, "learning_rate": 1.425755844091728e-05, "loss": 0.5037, "step": 26288 }, { "epoch": 0.721828665568369, "grad_norm": 0.3762735426425934, "learning_rate": 1.4257167643341582e-05, "loss": 0.4708, "step": 26289 }, { "epoch": 0.7218561230093355, "grad_norm": 0.42619529366493225, "learning_rate": 1.4256776837824908e-05, "loss": 0.5279, "step": 26290 }, { "epoch": 0.721883580450302, "grad_norm": 0.41751664876937866, "learning_rate": 1.4256386024367986e-05, "loss": 0.484, "step": 26291 }, { "epoch": 0.7219110378912685, "grad_norm": 0.4200271666049957, "learning_rate": 1.4255995202971544e-05, "loss": 0.5013, "step": 26292 }, { "epoch": 0.7219384953322351, "grad_norm": 0.3393397629261017, "learning_rate": 1.4255604373636317e-05, "loss": 0.4235, "step": 26293 }, { "epoch": 0.7219659527732015, "grad_norm": 0.4236258566379547, "learning_rate": 1.4255213536363023e-05, "loss": 0.5022, "step": 26294 }, { "epoch": 0.7219934102141681, "grad_norm": 0.3938595950603485, "learning_rate": 1.4254822691152403e-05, "loss": 0.4811, "step": 26295 }, { "epoch": 0.7220208676551345, "grad_norm": 0.36631831526756287, "learning_rate": 1.425443183800518e-05, "loss": 0.5581, "step": 26296 }, { "epoch": 0.7220483250961011, "grad_norm": 0.38633090257644653, "learning_rate": 1.425404097692208e-05, "loss": 0.5025, "step": 26297 }, { "epoch": 0.7220757825370675, "grad_norm": 0.32096973061561584, "learning_rate": 1.4253650107903843e-05, "loss": 0.442, "step": 26298 }, { "epoch": 0.722103239978034, "grad_norm": 0.41667404770851135, "learning_rate": 1.4253259230951184e-05, "loss": 0.4959, "step": 26299 }, { "epoch": 0.7221306974190006, "grad_norm": 0.3770669102668762, "learning_rate": 1.4252868346064843e-05, "loss": 0.4902, "step": 26300 }, { "epoch": 0.722158154859967, "grad_norm": 0.36138156056404114, "learning_rate": 1.4252477453245546e-05, "loss": 0.4942, "step": 26301 }, { "epoch": 0.7221856123009336, "grad_norm": 0.3463192582130432, "learning_rate": 1.4252086552494015e-05, "loss": 0.4573, "step": 26302 }, { "epoch": 0.7222130697419, "grad_norm": 0.33310213685035706, "learning_rate": 1.4251695643810992e-05, "loss": 0.389, "step": 26303 }, { "epoch": 0.7222405271828666, "grad_norm": 0.38002684712409973, "learning_rate": 1.4251304727197199e-05, "loss": 0.4751, "step": 26304 }, { "epoch": 0.722267984623833, "grad_norm": 0.38640910387039185, "learning_rate": 1.4250913802653364e-05, "loss": 0.4876, "step": 26305 }, { "epoch": 0.7222954420647996, "grad_norm": 0.36124387383461, "learning_rate": 1.4250522870180218e-05, "loss": 0.5134, "step": 26306 }, { "epoch": 0.7223228995057661, "grad_norm": 0.4217796325683594, "learning_rate": 1.4250131929778489e-05, "loss": 0.4264, "step": 26307 }, { "epoch": 0.7223503569467326, "grad_norm": 0.46357014775276184, "learning_rate": 1.424974098144891e-05, "loss": 0.4757, "step": 26308 }, { "epoch": 0.7223778143876991, "grad_norm": 0.5685821771621704, "learning_rate": 1.4249350025192206e-05, "loss": 0.5384, "step": 26309 }, { "epoch": 0.7224052718286655, "grad_norm": 0.40026241540908813, "learning_rate": 1.4248959061009108e-05, "loss": 0.4831, "step": 26310 }, { "epoch": 0.7224327292696321, "grad_norm": 0.35808658599853516, "learning_rate": 1.4248568088900348e-05, "loss": 0.4117, "step": 26311 }, { "epoch": 0.7224601867105985, "grad_norm": 0.352862149477005, "learning_rate": 1.4248177108866648e-05, "loss": 0.4932, "step": 26312 }, { "epoch": 0.7224876441515651, "grad_norm": 0.3899402618408203, "learning_rate": 1.4247786120908745e-05, "loss": 0.5392, "step": 26313 }, { "epoch": 0.7225151015925316, "grad_norm": 0.36253198981285095, "learning_rate": 1.4247395125027365e-05, "loss": 0.5089, "step": 26314 }, { "epoch": 0.7225425590334981, "grad_norm": 0.4078875780105591, "learning_rate": 1.4247004121223236e-05, "loss": 0.5274, "step": 26315 }, { "epoch": 0.7225700164744646, "grad_norm": 0.36778631806373596, "learning_rate": 1.4246613109497092e-05, "loss": 0.5192, "step": 26316 }, { "epoch": 0.722597473915431, "grad_norm": 0.38470765948295593, "learning_rate": 1.4246222089849653e-05, "loss": 0.4711, "step": 26317 }, { "epoch": 0.7226249313563976, "grad_norm": 0.3613913953304291, "learning_rate": 1.4245831062281662e-05, "loss": 0.4448, "step": 26318 }, { "epoch": 0.722652388797364, "grad_norm": 0.40048375725746155, "learning_rate": 1.4245440026793836e-05, "loss": 0.484, "step": 26319 }, { "epoch": 0.7226798462383306, "grad_norm": 0.4068031311035156, "learning_rate": 1.4245048983386908e-05, "loss": 0.5118, "step": 26320 }, { "epoch": 0.7227073036792971, "grad_norm": 0.3690464496612549, "learning_rate": 1.4244657932061614e-05, "loss": 0.4878, "step": 26321 }, { "epoch": 0.7227347611202636, "grad_norm": 0.41541072726249695, "learning_rate": 1.4244266872818677e-05, "loss": 0.5736, "step": 26322 }, { "epoch": 0.7227622185612301, "grad_norm": 0.38711100816726685, "learning_rate": 1.4243875805658824e-05, "loss": 0.4552, "step": 26323 }, { "epoch": 0.7227896760021966, "grad_norm": 0.36797118186950684, "learning_rate": 1.4243484730582793e-05, "loss": 0.4941, "step": 26324 }, { "epoch": 0.7228171334431631, "grad_norm": 0.3849070370197296, "learning_rate": 1.4243093647591305e-05, "loss": 0.4861, "step": 26325 }, { "epoch": 0.7228445908841296, "grad_norm": 0.41786113381385803, "learning_rate": 1.4242702556685095e-05, "loss": 0.5294, "step": 26326 }, { "epoch": 0.7228720483250961, "grad_norm": 0.4417918920516968, "learning_rate": 1.424231145786489e-05, "loss": 0.5263, "step": 26327 }, { "epoch": 0.7228995057660627, "grad_norm": 0.41903501749038696, "learning_rate": 1.424192035113142e-05, "loss": 0.5224, "step": 26328 }, { "epoch": 0.7229269632070291, "grad_norm": 0.34714922308921814, "learning_rate": 1.4241529236485414e-05, "loss": 0.4611, "step": 26329 }, { "epoch": 0.7229544206479956, "grad_norm": 0.3831663727760315, "learning_rate": 1.4241138113927602e-05, "loss": 0.4607, "step": 26330 }, { "epoch": 0.7229818780889621, "grad_norm": 0.48073646426200867, "learning_rate": 1.4240746983458715e-05, "loss": 0.4751, "step": 26331 }, { "epoch": 0.7230093355299286, "grad_norm": 0.3660992681980133, "learning_rate": 1.4240355845079483e-05, "loss": 0.4304, "step": 26332 }, { "epoch": 0.7230367929708951, "grad_norm": 0.5809625387191772, "learning_rate": 1.4239964698790632e-05, "loss": 0.4821, "step": 26333 }, { "epoch": 0.7230642504118616, "grad_norm": 0.3969602882862091, "learning_rate": 1.4239573544592892e-05, "loss": 0.4137, "step": 26334 }, { "epoch": 0.7230917078528282, "grad_norm": 0.39329320192337036, "learning_rate": 1.4239182382486997e-05, "loss": 0.5173, "step": 26335 }, { "epoch": 0.7231191652937946, "grad_norm": 0.3918006718158722, "learning_rate": 1.4238791212473673e-05, "loss": 0.4828, "step": 26336 }, { "epoch": 0.7231466227347612, "grad_norm": 0.37745431065559387, "learning_rate": 1.423840003455365e-05, "loss": 0.5126, "step": 26337 }, { "epoch": 0.7231740801757276, "grad_norm": 0.42074286937713623, "learning_rate": 1.4238008848727659e-05, "loss": 0.5237, "step": 26338 }, { "epoch": 0.7232015376166941, "grad_norm": 0.34583741426467896, "learning_rate": 1.4237617654996428e-05, "loss": 0.5167, "step": 26339 }, { "epoch": 0.7232289950576606, "grad_norm": 0.8585256934165955, "learning_rate": 1.4237226453360688e-05, "loss": 0.4866, "step": 26340 }, { "epoch": 0.7232564524986271, "grad_norm": 0.3312419652938843, "learning_rate": 1.4236835243821168e-05, "loss": 0.3702, "step": 26341 }, { "epoch": 0.7232839099395937, "grad_norm": 0.4142915904521942, "learning_rate": 1.42364440263786e-05, "loss": 0.5588, "step": 26342 }, { "epoch": 0.7233113673805601, "grad_norm": 0.3874225318431854, "learning_rate": 1.4236052801033708e-05, "loss": 0.5469, "step": 26343 }, { "epoch": 0.7233388248215267, "grad_norm": 0.42192745208740234, "learning_rate": 1.4235661567787228e-05, "loss": 0.5976, "step": 26344 }, { "epoch": 0.7233662822624931, "grad_norm": 0.3531978130340576, "learning_rate": 1.423527032663989e-05, "loss": 0.4392, "step": 26345 }, { "epoch": 0.7233937397034597, "grad_norm": 0.3849564790725708, "learning_rate": 1.4234879077592413e-05, "loss": 0.4405, "step": 26346 }, { "epoch": 0.7234211971444261, "grad_norm": 0.3676339089870453, "learning_rate": 1.4234487820645542e-05, "loss": 0.5264, "step": 26347 }, { "epoch": 0.7234486545853926, "grad_norm": 0.3764999806880951, "learning_rate": 1.4234096555799996e-05, "loss": 0.5167, "step": 26348 }, { "epoch": 0.7234761120263592, "grad_norm": 0.42065373063087463, "learning_rate": 1.4233705283056511e-05, "loss": 0.5164, "step": 26349 }, { "epoch": 0.7235035694673256, "grad_norm": 0.37902164459228516, "learning_rate": 1.4233314002415814e-05, "loss": 0.4738, "step": 26350 }, { "epoch": 0.7235310269082922, "grad_norm": 0.39323294162750244, "learning_rate": 1.4232922713878633e-05, "loss": 0.4883, "step": 26351 }, { "epoch": 0.7235584843492586, "grad_norm": 0.3537800908088684, "learning_rate": 1.4232531417445705e-05, "loss": 0.4579, "step": 26352 }, { "epoch": 0.7235859417902252, "grad_norm": 0.38359153270721436, "learning_rate": 1.4232140113117753e-05, "loss": 0.4997, "step": 26353 }, { "epoch": 0.7236133992311916, "grad_norm": 0.4274642765522003, "learning_rate": 1.4231748800895506e-05, "loss": 0.4861, "step": 26354 }, { "epoch": 0.7236408566721582, "grad_norm": 0.35325971245765686, "learning_rate": 1.4231357480779702e-05, "loss": 0.4932, "step": 26355 }, { "epoch": 0.7236683141131247, "grad_norm": 0.4098687469959259, "learning_rate": 1.4230966152771059e-05, "loss": 0.5025, "step": 26356 }, { "epoch": 0.7236957715540911, "grad_norm": 0.39559587836265564, "learning_rate": 1.423057481687032e-05, "loss": 0.4989, "step": 26357 }, { "epoch": 0.7237232289950577, "grad_norm": 0.41673314571380615, "learning_rate": 1.4230183473078208e-05, "loss": 0.5464, "step": 26358 }, { "epoch": 0.7237506864360241, "grad_norm": 0.34742486476898193, "learning_rate": 1.422979212139545e-05, "loss": 0.4663, "step": 26359 }, { "epoch": 0.7237781438769907, "grad_norm": 0.3894139528274536, "learning_rate": 1.4229400761822781e-05, "loss": 0.5462, "step": 26360 }, { "epoch": 0.7238056013179571, "grad_norm": 0.36407729983329773, "learning_rate": 1.4229009394360929e-05, "loss": 0.4054, "step": 26361 }, { "epoch": 0.7238330587589237, "grad_norm": 0.35093462467193604, "learning_rate": 1.4228618019010627e-05, "loss": 0.4662, "step": 26362 }, { "epoch": 0.7238605161998902, "grad_norm": 0.44433826208114624, "learning_rate": 1.4228226635772605e-05, "loss": 0.5486, "step": 26363 }, { "epoch": 0.7238879736408567, "grad_norm": 0.37154632806777954, "learning_rate": 1.4227835244647583e-05, "loss": 0.4859, "step": 26364 }, { "epoch": 0.7239154310818232, "grad_norm": 0.3874884247779846, "learning_rate": 1.4227443845636306e-05, "loss": 0.5098, "step": 26365 }, { "epoch": 0.7239428885227897, "grad_norm": 0.35846519470214844, "learning_rate": 1.4227052438739496e-05, "loss": 0.4561, "step": 26366 }, { "epoch": 0.7239703459637562, "grad_norm": 0.38736188411712646, "learning_rate": 1.422666102395788e-05, "loss": 0.5045, "step": 26367 }, { "epoch": 0.7239978034047226, "grad_norm": 0.36723482608795166, "learning_rate": 1.4226269601292196e-05, "loss": 0.4816, "step": 26368 }, { "epoch": 0.7240252608456892, "grad_norm": 0.37435993552207947, "learning_rate": 1.4225878170743169e-05, "loss": 0.4169, "step": 26369 }, { "epoch": 0.7240527182866557, "grad_norm": 0.4375900328159332, "learning_rate": 1.4225486732311531e-05, "loss": 0.4907, "step": 26370 }, { "epoch": 0.7240801757276222, "grad_norm": 0.3844525218009949, "learning_rate": 1.4225095285998012e-05, "loss": 0.5113, "step": 26371 }, { "epoch": 0.7241076331685887, "grad_norm": 0.37552109360694885, "learning_rate": 1.422470383180334e-05, "loss": 0.4801, "step": 26372 }, { "epoch": 0.7241350906095552, "grad_norm": 0.3887079656124115, "learning_rate": 1.4224312369728248e-05, "loss": 0.5155, "step": 26373 }, { "epoch": 0.7241625480505217, "grad_norm": 0.40262892842292786, "learning_rate": 1.4223920899773465e-05, "loss": 0.446, "step": 26374 }, { "epoch": 0.7241900054914882, "grad_norm": 0.40675559639930725, "learning_rate": 1.4223529421939721e-05, "loss": 0.5028, "step": 26375 }, { "epoch": 0.7242174629324547, "grad_norm": 0.3714660704135895, "learning_rate": 1.4223137936227748e-05, "loss": 0.5056, "step": 26376 }, { "epoch": 0.7242449203734213, "grad_norm": 0.40309402346611023, "learning_rate": 1.4222746442638274e-05, "loss": 0.5118, "step": 26377 }, { "epoch": 0.7242723778143877, "grad_norm": 0.4292336702346802, "learning_rate": 1.422235494117203e-05, "loss": 0.4984, "step": 26378 }, { "epoch": 0.7242998352553542, "grad_norm": 0.3892662525177002, "learning_rate": 1.4221963431829745e-05, "loss": 0.5143, "step": 26379 }, { "epoch": 0.7243272926963207, "grad_norm": 0.38032689690589905, "learning_rate": 1.4221571914612153e-05, "loss": 0.4583, "step": 26380 }, { "epoch": 0.7243547501372872, "grad_norm": 0.34581825137138367, "learning_rate": 1.4221180389519984e-05, "loss": 0.4716, "step": 26381 }, { "epoch": 0.7243822075782537, "grad_norm": 0.37776103615760803, "learning_rate": 1.4220788856553962e-05, "loss": 0.4926, "step": 26382 }, { "epoch": 0.7244096650192202, "grad_norm": 0.43873611092567444, "learning_rate": 1.4220397315714825e-05, "loss": 0.5137, "step": 26383 }, { "epoch": 0.7244371224601867, "grad_norm": 0.36577561497688293, "learning_rate": 1.4220005767003297e-05, "loss": 0.5511, "step": 26384 }, { "epoch": 0.7244645799011532, "grad_norm": 0.43140238523483276, "learning_rate": 1.4219614210420111e-05, "loss": 0.5508, "step": 26385 }, { "epoch": 0.7244920373421198, "grad_norm": 0.3570996820926666, "learning_rate": 1.4219222645966002e-05, "loss": 0.5081, "step": 26386 }, { "epoch": 0.7245194947830862, "grad_norm": 0.4277471899986267, "learning_rate": 1.4218831073641693e-05, "loss": 0.5797, "step": 26387 }, { "epoch": 0.7245469522240527, "grad_norm": 0.4457041621208191, "learning_rate": 1.4218439493447918e-05, "loss": 0.572, "step": 26388 }, { "epoch": 0.7245744096650192, "grad_norm": 0.4033837616443634, "learning_rate": 1.4218047905385408e-05, "loss": 0.4866, "step": 26389 }, { "epoch": 0.7246018671059857, "grad_norm": 0.3534661829471588, "learning_rate": 1.421765630945489e-05, "loss": 0.479, "step": 26390 }, { "epoch": 0.7246293245469522, "grad_norm": 0.41968095302581787, "learning_rate": 1.42172647056571e-05, "loss": 0.5048, "step": 26391 }, { "epoch": 0.7246567819879187, "grad_norm": 0.3813953995704651, "learning_rate": 1.4216873093992763e-05, "loss": 0.4959, "step": 26392 }, { "epoch": 0.7246842394288853, "grad_norm": 0.417678564786911, "learning_rate": 1.4216481474462613e-05, "loss": 0.5247, "step": 26393 }, { "epoch": 0.7247116968698517, "grad_norm": 0.37071678042411804, "learning_rate": 1.421608984706738e-05, "loss": 0.473, "step": 26394 }, { "epoch": 0.7247391543108183, "grad_norm": 0.39828938245773315, "learning_rate": 1.4215698211807791e-05, "loss": 0.5874, "step": 26395 }, { "epoch": 0.7247666117517847, "grad_norm": 0.40159937739372253, "learning_rate": 1.421530656868458e-05, "loss": 0.5622, "step": 26396 }, { "epoch": 0.7247940691927512, "grad_norm": 0.43385884165763855, "learning_rate": 1.4214914917698482e-05, "loss": 0.5691, "step": 26397 }, { "epoch": 0.7248215266337177, "grad_norm": 0.4976484775543213, "learning_rate": 1.4214523258850216e-05, "loss": 0.5757, "step": 26398 }, { "epoch": 0.7248489840746842, "grad_norm": 0.4641304612159729, "learning_rate": 1.4214131592140521e-05, "loss": 0.4672, "step": 26399 }, { "epoch": 0.7248764415156508, "grad_norm": 0.3834673762321472, "learning_rate": 1.4213739917570127e-05, "loss": 0.5402, "step": 26400 }, { "epoch": 0.7249038989566172, "grad_norm": 0.3498471975326538, "learning_rate": 1.4213348235139761e-05, "loss": 0.4811, "step": 26401 }, { "epoch": 0.7249313563975838, "grad_norm": 0.42082828283309937, "learning_rate": 1.4212956544850158e-05, "loss": 0.5085, "step": 26402 }, { "epoch": 0.7249588138385502, "grad_norm": 0.45856621861457825, "learning_rate": 1.4212564846702045e-05, "loss": 0.6257, "step": 26403 }, { "epoch": 0.7249862712795168, "grad_norm": 0.3876584470272064, "learning_rate": 1.4212173140696156e-05, "loss": 0.5957, "step": 26404 }, { "epoch": 0.7250137287204832, "grad_norm": 0.40300464630126953, "learning_rate": 1.4211781426833216e-05, "loss": 0.5562, "step": 26405 }, { "epoch": 0.7250411861614497, "grad_norm": 0.39774537086486816, "learning_rate": 1.4211389705113963e-05, "loss": 0.4741, "step": 26406 }, { "epoch": 0.7250686436024163, "grad_norm": 0.3788633346557617, "learning_rate": 1.4210997975539123e-05, "loss": 0.466, "step": 26407 }, { "epoch": 0.7250961010433827, "grad_norm": 0.37986791133880615, "learning_rate": 1.4210606238109426e-05, "loss": 0.4441, "step": 26408 }, { "epoch": 0.7251235584843493, "grad_norm": 0.36719101667404175, "learning_rate": 1.4210214492825607e-05, "loss": 0.4855, "step": 26409 }, { "epoch": 0.7251510159253157, "grad_norm": 0.38099488615989685, "learning_rate": 1.4209822739688393e-05, "loss": 0.5348, "step": 26410 }, { "epoch": 0.7251784733662823, "grad_norm": 0.8786234855651855, "learning_rate": 1.4209430978698515e-05, "loss": 0.4423, "step": 26411 }, { "epoch": 0.7252059308072487, "grad_norm": 0.3889460861682892, "learning_rate": 1.4209039209856708e-05, "loss": 0.5789, "step": 26412 }, { "epoch": 0.7252333882482153, "grad_norm": 0.3521285951137543, "learning_rate": 1.4208647433163696e-05, "loss": 0.5161, "step": 26413 }, { "epoch": 0.7252608456891818, "grad_norm": 0.36205610632896423, "learning_rate": 1.4208255648620215e-05, "loss": 0.496, "step": 26414 }, { "epoch": 0.7252883031301482, "grad_norm": 0.4542630910873413, "learning_rate": 1.4207863856226994e-05, "loss": 0.5158, "step": 26415 }, { "epoch": 0.7253157605711148, "grad_norm": 0.4029260277748108, "learning_rate": 1.4207472055984764e-05, "loss": 0.5135, "step": 26416 }, { "epoch": 0.7253432180120812, "grad_norm": 0.3832293450832367, "learning_rate": 1.4207080247894257e-05, "loss": 0.5343, "step": 26417 }, { "epoch": 0.7253706754530478, "grad_norm": 0.44434553384780884, "learning_rate": 1.4206688431956198e-05, "loss": 0.4721, "step": 26418 }, { "epoch": 0.7253981328940142, "grad_norm": 0.40917137265205383, "learning_rate": 1.4206296608171325e-05, "loss": 0.4936, "step": 26419 }, { "epoch": 0.7254255903349808, "grad_norm": 0.41396626830101013, "learning_rate": 1.4205904776540368e-05, "loss": 0.5648, "step": 26420 }, { "epoch": 0.7254530477759473, "grad_norm": 0.4792381525039673, "learning_rate": 1.4205512937064055e-05, "loss": 0.6015, "step": 26421 }, { "epoch": 0.7254805052169138, "grad_norm": 0.4989106059074402, "learning_rate": 1.4205121089743118e-05, "loss": 0.4385, "step": 26422 }, { "epoch": 0.7255079626578803, "grad_norm": 0.37714317440986633, "learning_rate": 1.4204729234578288e-05, "loss": 0.531, "step": 26423 }, { "epoch": 0.7255354200988468, "grad_norm": 0.40086984634399414, "learning_rate": 1.4204337371570296e-05, "loss": 0.4412, "step": 26424 }, { "epoch": 0.7255628775398133, "grad_norm": 0.4098984897136688, "learning_rate": 1.4203945500719874e-05, "loss": 0.485, "step": 26425 }, { "epoch": 0.7255903349807797, "grad_norm": 0.3646160662174225, "learning_rate": 1.4203553622027751e-05, "loss": 0.4863, "step": 26426 }, { "epoch": 0.7256177924217463, "grad_norm": 0.3762587904930115, "learning_rate": 1.420316173549466e-05, "loss": 0.4519, "step": 26427 }, { "epoch": 0.7256452498627128, "grad_norm": 0.3749825656414032, "learning_rate": 1.4202769841121329e-05, "loss": 0.4246, "step": 26428 }, { "epoch": 0.7256727073036793, "grad_norm": 0.32484063506126404, "learning_rate": 1.4202377938908489e-05, "loss": 0.5229, "step": 26429 }, { "epoch": 0.7257001647446458, "grad_norm": 0.3740861713886261, "learning_rate": 1.4201986028856878e-05, "loss": 0.4569, "step": 26430 }, { "epoch": 0.7257276221856123, "grad_norm": 0.5718126893043518, "learning_rate": 1.4201594110967218e-05, "loss": 0.5988, "step": 26431 }, { "epoch": 0.7257550796265788, "grad_norm": 0.3922485113143921, "learning_rate": 1.4201202185240247e-05, "loss": 0.491, "step": 26432 }, { "epoch": 0.7257825370675453, "grad_norm": 0.3829801678657532, "learning_rate": 1.4200810251676691e-05, "loss": 0.5354, "step": 26433 }, { "epoch": 0.7258099945085118, "grad_norm": 0.4154652953147888, "learning_rate": 1.4200418310277284e-05, "loss": 0.5481, "step": 26434 }, { "epoch": 0.7258374519494784, "grad_norm": 0.3663593828678131, "learning_rate": 1.4200026361042756e-05, "loss": 0.4595, "step": 26435 }, { "epoch": 0.7258649093904448, "grad_norm": 0.37983477115631104, "learning_rate": 1.4199634403973836e-05, "loss": 0.3936, "step": 26436 }, { "epoch": 0.7258923668314113, "grad_norm": 0.43202972412109375, "learning_rate": 1.419924243907126e-05, "loss": 0.519, "step": 26437 }, { "epoch": 0.7259198242723778, "grad_norm": 0.3615007698535919, "learning_rate": 1.4198850466335758e-05, "loss": 0.4495, "step": 26438 }, { "epoch": 0.7259472817133443, "grad_norm": 0.4083486795425415, "learning_rate": 1.4198458485768057e-05, "loss": 0.5636, "step": 26439 }, { "epoch": 0.7259747391543108, "grad_norm": 0.38871923089027405, "learning_rate": 1.4198066497368893e-05, "loss": 0.5016, "step": 26440 }, { "epoch": 0.7260021965952773, "grad_norm": 0.4324597716331482, "learning_rate": 1.4197674501138993e-05, "loss": 0.4988, "step": 26441 }, { "epoch": 0.7260296540362439, "grad_norm": 0.40918484330177307, "learning_rate": 1.4197282497079092e-05, "loss": 0.5969, "step": 26442 }, { "epoch": 0.7260571114772103, "grad_norm": 0.361136794090271, "learning_rate": 1.419689048518992e-05, "loss": 0.447, "step": 26443 }, { "epoch": 0.7260845689181769, "grad_norm": 0.41849544644355774, "learning_rate": 1.4196498465472206e-05, "loss": 0.5575, "step": 26444 }, { "epoch": 0.7261120263591433, "grad_norm": 0.38336381316185, "learning_rate": 1.4196106437926685e-05, "loss": 0.587, "step": 26445 }, { "epoch": 0.7261394838001098, "grad_norm": 0.3666728734970093, "learning_rate": 1.4195714402554082e-05, "loss": 0.4826, "step": 26446 }, { "epoch": 0.7261669412410763, "grad_norm": 0.44882965087890625, "learning_rate": 1.4195322359355137e-05, "loss": 0.4838, "step": 26447 }, { "epoch": 0.7261943986820428, "grad_norm": 0.3574516177177429, "learning_rate": 1.4194930308330576e-05, "loss": 0.4894, "step": 26448 }, { "epoch": 0.7262218561230094, "grad_norm": 0.3355248272418976, "learning_rate": 1.4194538249481129e-05, "loss": 0.4776, "step": 26449 }, { "epoch": 0.7262493135639758, "grad_norm": 0.422640323638916, "learning_rate": 1.4194146182807531e-05, "loss": 0.5326, "step": 26450 }, { "epoch": 0.7262767710049424, "grad_norm": 0.35390058159828186, "learning_rate": 1.4193754108310512e-05, "loss": 0.4399, "step": 26451 }, { "epoch": 0.7263042284459088, "grad_norm": 0.39028510451316833, "learning_rate": 1.4193362025990803e-05, "loss": 0.5629, "step": 26452 }, { "epoch": 0.7263316858868754, "grad_norm": 0.4464449882507324, "learning_rate": 1.4192969935849134e-05, "loss": 0.4543, "step": 26453 }, { "epoch": 0.7263591433278418, "grad_norm": 0.4140593409538269, "learning_rate": 1.4192577837886239e-05, "loss": 0.4759, "step": 26454 }, { "epoch": 0.7263866007688083, "grad_norm": 0.3974490761756897, "learning_rate": 1.419218573210285e-05, "loss": 0.5187, "step": 26455 }, { "epoch": 0.7264140582097749, "grad_norm": 0.35428494215011597, "learning_rate": 1.4191793618499692e-05, "loss": 0.502, "step": 26456 }, { "epoch": 0.7264415156507413, "grad_norm": 0.444654256105423, "learning_rate": 1.4191401497077504e-05, "loss": 0.4694, "step": 26457 }, { "epoch": 0.7264689730917079, "grad_norm": 0.3769027292728424, "learning_rate": 1.4191009367837015e-05, "loss": 0.4875, "step": 26458 }, { "epoch": 0.7264964305326743, "grad_norm": 0.3933315575122833, "learning_rate": 1.4190617230778955e-05, "loss": 0.5882, "step": 26459 }, { "epoch": 0.7265238879736409, "grad_norm": 0.3647533357143402, "learning_rate": 1.4190225085904057e-05, "loss": 0.4623, "step": 26460 }, { "epoch": 0.7265513454146073, "grad_norm": 0.45978331565856934, "learning_rate": 1.418983293321305e-05, "loss": 0.5435, "step": 26461 }, { "epoch": 0.7265788028555739, "grad_norm": 0.37823987007141113, "learning_rate": 1.4189440772706671e-05, "loss": 0.4919, "step": 26462 }, { "epoch": 0.7266062602965404, "grad_norm": 0.4116574823856354, "learning_rate": 1.4189048604385645e-05, "loss": 0.5491, "step": 26463 }, { "epoch": 0.7266337177375068, "grad_norm": 0.44545435905456543, "learning_rate": 1.4188656428250707e-05, "loss": 0.4653, "step": 26464 }, { "epoch": 0.7266611751784734, "grad_norm": 0.33950814604759216, "learning_rate": 1.4188264244302587e-05, "loss": 0.5353, "step": 26465 }, { "epoch": 0.7266886326194398, "grad_norm": 0.3806222081184387, "learning_rate": 1.4187872052542018e-05, "loss": 0.492, "step": 26466 }, { "epoch": 0.7267160900604064, "grad_norm": 0.4128456711769104, "learning_rate": 1.4187479852969732e-05, "loss": 0.5316, "step": 26467 }, { "epoch": 0.7267435475013728, "grad_norm": 0.35050854086875916, "learning_rate": 1.4187087645586459e-05, "loss": 0.5336, "step": 26468 }, { "epoch": 0.7267710049423394, "grad_norm": 0.41721194982528687, "learning_rate": 1.4186695430392932e-05, "loss": 0.4958, "step": 26469 }, { "epoch": 0.7267984623833059, "grad_norm": 0.464465469121933, "learning_rate": 1.418630320738988e-05, "loss": 0.5014, "step": 26470 }, { "epoch": 0.7268259198242724, "grad_norm": 0.35035431385040283, "learning_rate": 1.4185910976578037e-05, "loss": 0.4107, "step": 26471 }, { "epoch": 0.7268533772652389, "grad_norm": 0.37879011034965515, "learning_rate": 1.4185518737958135e-05, "loss": 0.5833, "step": 26472 }, { "epoch": 0.7268808347062053, "grad_norm": 0.3867303431034088, "learning_rate": 1.4185126491530902e-05, "loss": 0.4525, "step": 26473 }, { "epoch": 0.7269082921471719, "grad_norm": 0.37824559211730957, "learning_rate": 1.4184734237297076e-05, "loss": 0.4968, "step": 26474 }, { "epoch": 0.7269357495881383, "grad_norm": 0.3510550260543823, "learning_rate": 1.418434197525738e-05, "loss": 0.3955, "step": 26475 }, { "epoch": 0.7269632070291049, "grad_norm": 0.403104305267334, "learning_rate": 1.4183949705412556e-05, "loss": 0.4759, "step": 26476 }, { "epoch": 0.7269906644700714, "grad_norm": 0.4203472435474396, "learning_rate": 1.418355742776333e-05, "loss": 0.5627, "step": 26477 }, { "epoch": 0.7270181219110379, "grad_norm": 0.3917500674724579, "learning_rate": 1.4183165142310432e-05, "loss": 0.4908, "step": 26478 }, { "epoch": 0.7270455793520044, "grad_norm": 0.3582918345928192, "learning_rate": 1.4182772849054596e-05, "loss": 0.495, "step": 26479 }, { "epoch": 0.7270730367929709, "grad_norm": 0.3930503726005554, "learning_rate": 1.4182380547996553e-05, "loss": 0.4926, "step": 26480 }, { "epoch": 0.7271004942339374, "grad_norm": 0.36117374897003174, "learning_rate": 1.4181988239137038e-05, "loss": 0.5062, "step": 26481 }, { "epoch": 0.7271279516749038, "grad_norm": 0.4584674537181854, "learning_rate": 1.418159592247678e-05, "loss": 0.4922, "step": 26482 }, { "epoch": 0.7271554091158704, "grad_norm": 0.3861490488052368, "learning_rate": 1.4181203598016508e-05, "loss": 0.4786, "step": 26483 }, { "epoch": 0.727182866556837, "grad_norm": 0.37113747000694275, "learning_rate": 1.418081126575696e-05, "loss": 0.4695, "step": 26484 }, { "epoch": 0.7272103239978034, "grad_norm": 0.36588913202285767, "learning_rate": 1.4180418925698862e-05, "loss": 0.5311, "step": 26485 }, { "epoch": 0.7272377814387699, "grad_norm": 0.3983186185359955, "learning_rate": 1.4180026577842952e-05, "loss": 0.5221, "step": 26486 }, { "epoch": 0.7272652388797364, "grad_norm": 0.3702963590621948, "learning_rate": 1.4179634222189956e-05, "loss": 0.5131, "step": 26487 }, { "epoch": 0.7272926963207029, "grad_norm": 0.42811235785484314, "learning_rate": 1.4179241858740606e-05, "loss": 0.5307, "step": 26488 }, { "epoch": 0.7273201537616694, "grad_norm": 0.42560893297195435, "learning_rate": 1.417884948749564e-05, "loss": 0.5298, "step": 26489 }, { "epoch": 0.7273476112026359, "grad_norm": 0.37565577030181885, "learning_rate": 1.4178457108455785e-05, "loss": 0.4312, "step": 26490 }, { "epoch": 0.7273750686436025, "grad_norm": 0.4072927236557007, "learning_rate": 1.4178064721621771e-05, "loss": 0.4909, "step": 26491 }, { "epoch": 0.7274025260845689, "grad_norm": 0.39482831954956055, "learning_rate": 1.4177672326994337e-05, "loss": 0.5077, "step": 26492 }, { "epoch": 0.7274299835255355, "grad_norm": 0.3971184194087982, "learning_rate": 1.4177279924574207e-05, "loss": 0.5124, "step": 26493 }, { "epoch": 0.7274574409665019, "grad_norm": 0.3772537410259247, "learning_rate": 1.4176887514362122e-05, "loss": 0.4509, "step": 26494 }, { "epoch": 0.7274848984074684, "grad_norm": 0.39377671480178833, "learning_rate": 1.4176495096358804e-05, "loss": 0.4754, "step": 26495 }, { "epoch": 0.7275123558484349, "grad_norm": 0.3578818142414093, "learning_rate": 1.417610267056499e-05, "loss": 0.5896, "step": 26496 }, { "epoch": 0.7275398132894014, "grad_norm": 0.3803613781929016, "learning_rate": 1.4175710236981412e-05, "loss": 0.4972, "step": 26497 }, { "epoch": 0.727567270730368, "grad_norm": 0.3479051887989044, "learning_rate": 1.4175317795608802e-05, "loss": 0.4934, "step": 26498 }, { "epoch": 0.7275947281713344, "grad_norm": 0.3572935461997986, "learning_rate": 1.4174925346447892e-05, "loss": 0.4743, "step": 26499 }, { "epoch": 0.727622185612301, "grad_norm": 0.43500110507011414, "learning_rate": 1.4174532889499415e-05, "loss": 0.5439, "step": 26500 }, { "epoch": 0.7276496430532674, "grad_norm": 0.42536523938179016, "learning_rate": 1.41741404247641e-05, "loss": 0.5395, "step": 26501 }, { "epoch": 0.727677100494234, "grad_norm": 0.3965587615966797, "learning_rate": 1.417374795224268e-05, "loss": 0.5299, "step": 26502 }, { "epoch": 0.7277045579352004, "grad_norm": 0.3151324987411499, "learning_rate": 1.417335547193589e-05, "loss": 0.3839, "step": 26503 }, { "epoch": 0.7277320153761669, "grad_norm": 0.36532846093177795, "learning_rate": 1.4172962983844461e-05, "loss": 0.4491, "step": 26504 }, { "epoch": 0.7277594728171335, "grad_norm": 0.37355268001556396, "learning_rate": 1.4172570487969122e-05, "loss": 0.5088, "step": 26505 }, { "epoch": 0.7277869302580999, "grad_norm": 0.3800819218158722, "learning_rate": 1.4172177984310606e-05, "loss": 0.4695, "step": 26506 }, { "epoch": 0.7278143876990665, "grad_norm": 0.44021841883659363, "learning_rate": 1.417178547286965e-05, "loss": 0.5277, "step": 26507 }, { "epoch": 0.7278418451400329, "grad_norm": 0.41313350200653076, "learning_rate": 1.4171392953646981e-05, "loss": 0.469, "step": 26508 }, { "epoch": 0.7278693025809995, "grad_norm": 0.34809181094169617, "learning_rate": 1.4171000426643332e-05, "loss": 0.4887, "step": 26509 }, { "epoch": 0.7278967600219659, "grad_norm": 0.4057377576828003, "learning_rate": 1.4170607891859435e-05, "loss": 0.5353, "step": 26510 }, { "epoch": 0.7279242174629325, "grad_norm": 0.3681914210319519, "learning_rate": 1.4170215349296026e-05, "loss": 0.5192, "step": 26511 }, { "epoch": 0.727951674903899, "grad_norm": 0.384828120470047, "learning_rate": 1.4169822798953834e-05, "loss": 0.5605, "step": 26512 }, { "epoch": 0.7279791323448654, "grad_norm": 0.3783910870552063, "learning_rate": 1.4169430240833592e-05, "loss": 0.3986, "step": 26513 }, { "epoch": 0.728006589785832, "grad_norm": 0.34458988904953003, "learning_rate": 1.4169037674936028e-05, "loss": 0.5224, "step": 26514 }, { "epoch": 0.7280340472267984, "grad_norm": 0.3403850793838501, "learning_rate": 1.4168645101261882e-05, "loss": 0.4469, "step": 26515 }, { "epoch": 0.728061504667765, "grad_norm": 0.49941548705101013, "learning_rate": 1.416825251981188e-05, "loss": 0.6053, "step": 26516 }, { "epoch": 0.7280889621087314, "grad_norm": 0.38647252321243286, "learning_rate": 1.4167859930586759e-05, "loss": 0.5266, "step": 26517 }, { "epoch": 0.728116419549698, "grad_norm": 0.6274833083152771, "learning_rate": 1.4167467333587247e-05, "loss": 0.4544, "step": 26518 }, { "epoch": 0.7281438769906645, "grad_norm": 0.45255839824676514, "learning_rate": 1.4167074728814081e-05, "loss": 0.6464, "step": 26519 }, { "epoch": 0.728171334431631, "grad_norm": 0.3711800277233124, "learning_rate": 1.416668211626799e-05, "loss": 0.4887, "step": 26520 }, { "epoch": 0.7281987918725975, "grad_norm": 0.37301263213157654, "learning_rate": 1.4166289495949705e-05, "loss": 0.5297, "step": 26521 }, { "epoch": 0.728226249313564, "grad_norm": 0.41086140275001526, "learning_rate": 1.4165896867859961e-05, "loss": 0.4161, "step": 26522 }, { "epoch": 0.7282537067545305, "grad_norm": 0.3640088737010956, "learning_rate": 1.416550423199949e-05, "loss": 0.5215, "step": 26523 }, { "epoch": 0.7282811641954969, "grad_norm": 0.39346370100975037, "learning_rate": 1.4165111588369022e-05, "loss": 0.5225, "step": 26524 }, { "epoch": 0.7283086216364635, "grad_norm": 0.3535346984863281, "learning_rate": 1.4164718936969296e-05, "loss": 0.4657, "step": 26525 }, { "epoch": 0.72833607907743, "grad_norm": 0.3901219964027405, "learning_rate": 1.4164326277801039e-05, "loss": 0.5341, "step": 26526 }, { "epoch": 0.7283635365183965, "grad_norm": 0.4328969120979309, "learning_rate": 1.4163933610864981e-05, "loss": 0.5041, "step": 26527 }, { "epoch": 0.728390993959363, "grad_norm": 0.36454060673713684, "learning_rate": 1.416354093616186e-05, "loss": 0.5042, "step": 26528 }, { "epoch": 0.7284184514003295, "grad_norm": 0.41108620166778564, "learning_rate": 1.4163148253692407e-05, "loss": 0.4659, "step": 26529 }, { "epoch": 0.728445908841296, "grad_norm": 0.45896053314208984, "learning_rate": 1.4162755563457353e-05, "loss": 0.5698, "step": 26530 }, { "epoch": 0.7284733662822624, "grad_norm": 0.38824862241744995, "learning_rate": 1.4162362865457433e-05, "loss": 0.5385, "step": 26531 }, { "epoch": 0.728500823723229, "grad_norm": 0.40727198123931885, "learning_rate": 1.4161970159693375e-05, "loss": 0.5304, "step": 26532 }, { "epoch": 0.7285282811641955, "grad_norm": 0.4097726345062256, "learning_rate": 1.4161577446165915e-05, "loss": 0.5577, "step": 26533 }, { "epoch": 0.728555738605162, "grad_norm": 0.4027020335197449, "learning_rate": 1.4161184724875786e-05, "loss": 0.4932, "step": 26534 }, { "epoch": 0.7285831960461285, "grad_norm": 0.40310585498809814, "learning_rate": 1.4160791995823717e-05, "loss": 0.523, "step": 26535 }, { "epoch": 0.728610653487095, "grad_norm": 0.39297324419021606, "learning_rate": 1.4160399259010447e-05, "loss": 0.4424, "step": 26536 }, { "epoch": 0.7286381109280615, "grad_norm": 0.39155375957489014, "learning_rate": 1.41600065144367e-05, "loss": 0.5157, "step": 26537 }, { "epoch": 0.728665568369028, "grad_norm": 0.4629780650138855, "learning_rate": 1.4159613762103215e-05, "loss": 0.4922, "step": 26538 }, { "epoch": 0.7286930258099945, "grad_norm": 0.37035366892814636, "learning_rate": 1.4159221002010726e-05, "loss": 0.4656, "step": 26539 }, { "epoch": 0.7287204832509611, "grad_norm": 0.3666989207267761, "learning_rate": 1.4158828234159958e-05, "loss": 0.5405, "step": 26540 }, { "epoch": 0.7287479406919275, "grad_norm": 0.38648420572280884, "learning_rate": 1.415843545855165e-05, "loss": 0.4527, "step": 26541 }, { "epoch": 0.728775398132894, "grad_norm": 0.39043840765953064, "learning_rate": 1.4158042675186529e-05, "loss": 0.5284, "step": 26542 }, { "epoch": 0.7288028555738605, "grad_norm": 0.35405468940734863, "learning_rate": 1.4157649884065337e-05, "loss": 0.4679, "step": 26543 }, { "epoch": 0.728830313014827, "grad_norm": 0.338649719953537, "learning_rate": 1.4157257085188799e-05, "loss": 0.4629, "step": 26544 }, { "epoch": 0.7288577704557935, "grad_norm": 0.37344124913215637, "learning_rate": 1.4156864278557647e-05, "loss": 0.467, "step": 26545 }, { "epoch": 0.72888522789676, "grad_norm": 0.3812141716480255, "learning_rate": 1.4156471464172618e-05, "loss": 0.4193, "step": 26546 }, { "epoch": 0.7289126853377266, "grad_norm": 0.3628295063972473, "learning_rate": 1.4156078642034443e-05, "loss": 0.5669, "step": 26547 }, { "epoch": 0.728940142778693, "grad_norm": 0.4500172436237335, "learning_rate": 1.4155685812143856e-05, "loss": 0.5307, "step": 26548 }, { "epoch": 0.7289676002196596, "grad_norm": 0.38682615756988525, "learning_rate": 1.4155292974501588e-05, "loss": 0.5448, "step": 26549 }, { "epoch": 0.728995057660626, "grad_norm": 0.4714806079864502, "learning_rate": 1.4154900129108373e-05, "loss": 0.5412, "step": 26550 }, { "epoch": 0.7290225151015925, "grad_norm": 0.4793491065502167, "learning_rate": 1.415450727596494e-05, "loss": 0.4279, "step": 26551 }, { "epoch": 0.729049972542559, "grad_norm": 0.3594540059566498, "learning_rate": 1.4154114415072032e-05, "loss": 0.5011, "step": 26552 }, { "epoch": 0.7290774299835255, "grad_norm": 0.3965149223804474, "learning_rate": 1.4153721546430369e-05, "loss": 0.4649, "step": 26553 }, { "epoch": 0.7291048874244921, "grad_norm": 0.35032832622528076, "learning_rate": 1.4153328670040692e-05, "loss": 0.4267, "step": 26554 }, { "epoch": 0.7291323448654585, "grad_norm": 0.3988913297653198, "learning_rate": 1.4152935785903727e-05, "loss": 0.4928, "step": 26555 }, { "epoch": 0.7291598023064251, "grad_norm": 0.4012396037578583, "learning_rate": 1.4152542894020217e-05, "loss": 0.4708, "step": 26556 }, { "epoch": 0.7291872597473915, "grad_norm": 0.36177119612693787, "learning_rate": 1.4152149994390887e-05, "loss": 0.5796, "step": 26557 }, { "epoch": 0.7292147171883581, "grad_norm": 0.40994319319725037, "learning_rate": 1.4151757087016472e-05, "loss": 0.4861, "step": 26558 }, { "epoch": 0.7292421746293245, "grad_norm": 0.38693487644195557, "learning_rate": 1.4151364171897703e-05, "loss": 0.4376, "step": 26559 }, { "epoch": 0.729269632070291, "grad_norm": 0.7607212066650391, "learning_rate": 1.4150971249035316e-05, "loss": 0.4897, "step": 26560 }, { "epoch": 0.7292970895112576, "grad_norm": 0.6499704718589783, "learning_rate": 1.4150578318430044e-05, "loss": 0.5245, "step": 26561 }, { "epoch": 0.729324546952224, "grad_norm": 0.42308396100997925, "learning_rate": 1.4150185380082619e-05, "loss": 0.4657, "step": 26562 }, { "epoch": 0.7293520043931906, "grad_norm": 0.3374638855457306, "learning_rate": 1.4149792433993773e-05, "loss": 0.4844, "step": 26563 }, { "epoch": 0.729379461834157, "grad_norm": 0.3751237094402313, "learning_rate": 1.414939948016424e-05, "loss": 0.5307, "step": 26564 }, { "epoch": 0.7294069192751236, "grad_norm": 0.35915347933769226, "learning_rate": 1.4149006518594752e-05, "loss": 0.5347, "step": 26565 }, { "epoch": 0.72943437671609, "grad_norm": 0.4206906855106354, "learning_rate": 1.4148613549286042e-05, "loss": 0.5219, "step": 26566 }, { "epoch": 0.7294618341570566, "grad_norm": 0.3614136278629303, "learning_rate": 1.4148220572238846e-05, "loss": 0.4042, "step": 26567 }, { "epoch": 0.7294892915980231, "grad_norm": 0.41736340522766113, "learning_rate": 1.414782758745389e-05, "loss": 0.5065, "step": 26568 }, { "epoch": 0.7295167490389896, "grad_norm": 0.4216974377632141, "learning_rate": 1.4147434594931918e-05, "loss": 0.5325, "step": 26569 }, { "epoch": 0.7295442064799561, "grad_norm": 0.36988237500190735, "learning_rate": 1.4147041594673654e-05, "loss": 0.5293, "step": 26570 }, { "epoch": 0.7295716639209225, "grad_norm": 0.42117854952812195, "learning_rate": 1.4146648586679831e-05, "loss": 0.4906, "step": 26571 }, { "epoch": 0.7295991213618891, "grad_norm": 0.41042831540107727, "learning_rate": 1.4146255570951188e-05, "loss": 0.4677, "step": 26572 }, { "epoch": 0.7296265788028555, "grad_norm": 0.42743924260139465, "learning_rate": 1.4145862547488454e-05, "loss": 0.4856, "step": 26573 }, { "epoch": 0.7296540362438221, "grad_norm": 0.39331531524658203, "learning_rate": 1.4145469516292366e-05, "loss": 0.5033, "step": 26574 }, { "epoch": 0.7296814936847886, "grad_norm": 0.3990926444530487, "learning_rate": 1.4145076477363651e-05, "loss": 0.5321, "step": 26575 }, { "epoch": 0.7297089511257551, "grad_norm": 0.3844515383243561, "learning_rate": 1.4144683430703045e-05, "loss": 0.559, "step": 26576 }, { "epoch": 0.7297364085667216, "grad_norm": 0.37143275141716003, "learning_rate": 1.4144290376311282e-05, "loss": 0.509, "step": 26577 }, { "epoch": 0.7297638660076881, "grad_norm": 0.3957463204860687, "learning_rate": 1.4143897314189096e-05, "loss": 0.5124, "step": 26578 }, { "epoch": 0.7297913234486546, "grad_norm": 0.36681613326072693, "learning_rate": 1.4143504244337218e-05, "loss": 0.5988, "step": 26579 }, { "epoch": 0.729818780889621, "grad_norm": 0.40924209356307983, "learning_rate": 1.4143111166756385e-05, "loss": 0.5429, "step": 26580 }, { "epoch": 0.7298462383305876, "grad_norm": 0.3682584762573242, "learning_rate": 1.4142718081447324e-05, "loss": 0.4523, "step": 26581 }, { "epoch": 0.7298736957715541, "grad_norm": 0.3872680366039276, "learning_rate": 1.4142324988410772e-05, "loss": 0.4398, "step": 26582 }, { "epoch": 0.7299011532125206, "grad_norm": 0.3861006200313568, "learning_rate": 1.4141931887647465e-05, "loss": 0.5088, "step": 26583 }, { "epoch": 0.7299286106534871, "grad_norm": 0.3463515639305115, "learning_rate": 1.4141538779158128e-05, "loss": 0.4563, "step": 26584 }, { "epoch": 0.7299560680944536, "grad_norm": 0.4558379650115967, "learning_rate": 1.4141145662943503e-05, "loss": 0.5964, "step": 26585 }, { "epoch": 0.7299835255354201, "grad_norm": 0.34973520040512085, "learning_rate": 1.4140752539004316e-05, "loss": 0.4399, "step": 26586 }, { "epoch": 0.7300109829763866, "grad_norm": 0.3631007969379425, "learning_rate": 1.414035940734131e-05, "loss": 0.5333, "step": 26587 }, { "epoch": 0.7300384404173531, "grad_norm": 0.4125973582267761, "learning_rate": 1.4139966267955207e-05, "loss": 0.5464, "step": 26588 }, { "epoch": 0.7300658978583197, "grad_norm": 0.34898966550827026, "learning_rate": 1.4139573120846745e-05, "loss": 0.4814, "step": 26589 }, { "epoch": 0.7300933552992861, "grad_norm": 0.3678034245967865, "learning_rate": 1.4139179966016662e-05, "loss": 0.4951, "step": 26590 }, { "epoch": 0.7301208127402526, "grad_norm": 0.3277418315410614, "learning_rate": 1.4138786803465685e-05, "loss": 0.3801, "step": 26591 }, { "epoch": 0.7301482701812191, "grad_norm": 0.36244693398475647, "learning_rate": 1.413839363319455e-05, "loss": 0.4644, "step": 26592 }, { "epoch": 0.7301757276221856, "grad_norm": 0.40801718831062317, "learning_rate": 1.413800045520399e-05, "loss": 0.5557, "step": 26593 }, { "epoch": 0.7302031850631521, "grad_norm": 0.4273216724395752, "learning_rate": 1.413760726949474e-05, "loss": 0.5198, "step": 26594 }, { "epoch": 0.7302306425041186, "grad_norm": 0.4020334780216217, "learning_rate": 1.4137214076067532e-05, "loss": 0.5638, "step": 26595 }, { "epoch": 0.7302580999450852, "grad_norm": 0.3610541820526123, "learning_rate": 1.4136820874923094e-05, "loss": 0.5159, "step": 26596 }, { "epoch": 0.7302855573860516, "grad_norm": 0.39594796299934387, "learning_rate": 1.4136427666062169e-05, "loss": 0.5206, "step": 26597 }, { "epoch": 0.7303130148270182, "grad_norm": 0.39101001620292664, "learning_rate": 1.4136034449485487e-05, "loss": 0.4311, "step": 26598 }, { "epoch": 0.7303404722679846, "grad_norm": 0.3728178143501282, "learning_rate": 1.413564122519378e-05, "loss": 0.5061, "step": 26599 }, { "epoch": 0.7303679297089511, "grad_norm": 0.376911461353302, "learning_rate": 1.4135247993187781e-05, "loss": 0.5727, "step": 26600 }, { "epoch": 0.7303953871499176, "grad_norm": 0.40042656660079956, "learning_rate": 1.4134854753468226e-05, "loss": 0.4835, "step": 26601 }, { "epoch": 0.7304228445908841, "grad_norm": 0.44072577357292175, "learning_rate": 1.4134461506035846e-05, "loss": 0.5519, "step": 26602 }, { "epoch": 0.7304503020318507, "grad_norm": 0.3764120638370514, "learning_rate": 1.4134068250891377e-05, "loss": 0.4893, "step": 26603 }, { "epoch": 0.7304777594728171, "grad_norm": 0.36523425579071045, "learning_rate": 1.4133674988035548e-05, "loss": 0.4685, "step": 26604 }, { "epoch": 0.7305052169137837, "grad_norm": 0.34030604362487793, "learning_rate": 1.41332817174691e-05, "loss": 0.4473, "step": 26605 }, { "epoch": 0.7305326743547501, "grad_norm": 0.3979475200176239, "learning_rate": 1.413288843919276e-05, "loss": 0.5462, "step": 26606 }, { "epoch": 0.7305601317957167, "grad_norm": 0.3781273663043976, "learning_rate": 1.4132495153207264e-05, "loss": 0.5008, "step": 26607 }, { "epoch": 0.7305875892366831, "grad_norm": 0.41187605261802673, "learning_rate": 1.4132101859513347e-05, "loss": 0.4673, "step": 26608 }, { "epoch": 0.7306150466776496, "grad_norm": 0.42054057121276855, "learning_rate": 1.4131708558111741e-05, "loss": 0.5041, "step": 26609 }, { "epoch": 0.7306425041186162, "grad_norm": 0.36657437682151794, "learning_rate": 1.413131524900318e-05, "loss": 0.5214, "step": 26610 }, { "epoch": 0.7306699615595826, "grad_norm": 0.3942070007324219, "learning_rate": 1.4130921932188398e-05, "loss": 0.5069, "step": 26611 }, { "epoch": 0.7306974190005492, "grad_norm": 0.45702075958251953, "learning_rate": 1.4130528607668124e-05, "loss": 0.5386, "step": 26612 }, { "epoch": 0.7307248764415156, "grad_norm": 0.4739861786365509, "learning_rate": 1.41301352754431e-05, "loss": 0.4841, "step": 26613 }, { "epoch": 0.7307523338824822, "grad_norm": 0.3973667621612549, "learning_rate": 1.4129741935514055e-05, "loss": 0.3918, "step": 26614 }, { "epoch": 0.7307797913234486, "grad_norm": 0.3771193027496338, "learning_rate": 1.4129348587881721e-05, "loss": 0.4598, "step": 26615 }, { "epoch": 0.7308072487644152, "grad_norm": 0.42019081115722656, "learning_rate": 1.4128955232546838e-05, "loss": 0.5464, "step": 26616 }, { "epoch": 0.7308347062053817, "grad_norm": 0.37588950991630554, "learning_rate": 1.4128561869510131e-05, "loss": 0.5691, "step": 26617 }, { "epoch": 0.7308621636463482, "grad_norm": 0.3414279520511627, "learning_rate": 1.4128168498772341e-05, "loss": 0.4597, "step": 26618 }, { "epoch": 0.7308896210873147, "grad_norm": 0.37602299451828003, "learning_rate": 1.41277751203342e-05, "loss": 0.5049, "step": 26619 }, { "epoch": 0.7309170785282811, "grad_norm": 0.37521424889564514, "learning_rate": 1.4127381734196437e-05, "loss": 0.4413, "step": 26620 }, { "epoch": 0.7309445359692477, "grad_norm": 0.5217202305793762, "learning_rate": 1.4126988340359796e-05, "loss": 0.4992, "step": 26621 }, { "epoch": 0.7309719934102141, "grad_norm": 0.37709757685661316, "learning_rate": 1.4126594938824998e-05, "loss": 0.4308, "step": 26622 }, { "epoch": 0.7309994508511807, "grad_norm": 0.3669458329677582, "learning_rate": 1.4126201529592786e-05, "loss": 0.5002, "step": 26623 }, { "epoch": 0.7310269082921472, "grad_norm": 0.4073140621185303, "learning_rate": 1.4125808112663894e-05, "loss": 0.4133, "step": 26624 }, { "epoch": 0.7310543657331137, "grad_norm": 0.4262892007827759, "learning_rate": 1.412541468803905e-05, "loss": 0.6321, "step": 26625 }, { "epoch": 0.7310818231740802, "grad_norm": 0.3695288896560669, "learning_rate": 1.4125021255718991e-05, "loss": 0.4906, "step": 26626 }, { "epoch": 0.7311092806150467, "grad_norm": 0.42302626371383667, "learning_rate": 1.4124627815704452e-05, "loss": 0.3965, "step": 26627 }, { "epoch": 0.7311367380560132, "grad_norm": 0.3469468355178833, "learning_rate": 1.4124234367996165e-05, "loss": 0.4854, "step": 26628 }, { "epoch": 0.7311641954969796, "grad_norm": 0.45364734530448914, "learning_rate": 1.4123840912594866e-05, "loss": 0.5305, "step": 26629 }, { "epoch": 0.7311916529379462, "grad_norm": 0.3702292740345001, "learning_rate": 1.4123447449501286e-05, "loss": 0.5682, "step": 26630 }, { "epoch": 0.7312191103789127, "grad_norm": 0.36429956555366516, "learning_rate": 1.4123053978716162e-05, "loss": 0.4649, "step": 26631 }, { "epoch": 0.7312465678198792, "grad_norm": 0.36547327041625977, "learning_rate": 1.4122660500240222e-05, "loss": 0.5376, "step": 26632 }, { "epoch": 0.7312740252608457, "grad_norm": 0.4035506844520569, "learning_rate": 1.412226701407421e-05, "loss": 0.5301, "step": 26633 }, { "epoch": 0.7313014827018122, "grad_norm": 0.34333980083465576, "learning_rate": 1.4121873520218851e-05, "loss": 0.3999, "step": 26634 }, { "epoch": 0.7313289401427787, "grad_norm": 0.36641931533813477, "learning_rate": 1.4121480018674883e-05, "loss": 0.5865, "step": 26635 }, { "epoch": 0.7313563975837452, "grad_norm": 0.40156421065330505, "learning_rate": 1.4121086509443041e-05, "loss": 0.5058, "step": 26636 }, { "epoch": 0.7313838550247117, "grad_norm": 0.3742820620536804, "learning_rate": 1.4120692992524054e-05, "loss": 0.544, "step": 26637 }, { "epoch": 0.7314113124656783, "grad_norm": 0.4717273712158203, "learning_rate": 1.4120299467918664e-05, "loss": 0.4797, "step": 26638 }, { "epoch": 0.7314387699066447, "grad_norm": 0.43366363644599915, "learning_rate": 1.4119905935627597e-05, "loss": 0.5514, "step": 26639 }, { "epoch": 0.7314662273476112, "grad_norm": 0.3691422939300537, "learning_rate": 1.411951239565159e-05, "loss": 0.4288, "step": 26640 }, { "epoch": 0.7314936847885777, "grad_norm": 0.35187941789627075, "learning_rate": 1.411911884799138e-05, "loss": 0.4482, "step": 26641 }, { "epoch": 0.7315211422295442, "grad_norm": 0.3669714331626892, "learning_rate": 1.4118725292647697e-05, "loss": 0.4393, "step": 26642 }, { "epoch": 0.7315485996705107, "grad_norm": 0.384246826171875, "learning_rate": 1.4118331729621277e-05, "loss": 0.511, "step": 26643 }, { "epoch": 0.7315760571114772, "grad_norm": 0.40885427594184875, "learning_rate": 1.4117938158912854e-05, "loss": 0.5906, "step": 26644 }, { "epoch": 0.7316035145524438, "grad_norm": 0.38156941533088684, "learning_rate": 1.4117544580523163e-05, "loss": 0.5341, "step": 26645 }, { "epoch": 0.7316309719934102, "grad_norm": 0.38611382246017456, "learning_rate": 1.4117150994452937e-05, "loss": 0.5292, "step": 26646 }, { "epoch": 0.7316584294343768, "grad_norm": 0.3753373622894287, "learning_rate": 1.4116757400702907e-05, "loss": 0.4356, "step": 26647 }, { "epoch": 0.7316858868753432, "grad_norm": 0.3972727954387665, "learning_rate": 1.4116363799273816e-05, "loss": 0.5588, "step": 26648 }, { "epoch": 0.7317133443163097, "grad_norm": 0.39224064350128174, "learning_rate": 1.4115970190166387e-05, "loss": 0.5327, "step": 26649 }, { "epoch": 0.7317408017572762, "grad_norm": 0.35966992378234863, "learning_rate": 1.4115576573381364e-05, "loss": 0.5228, "step": 26650 }, { "epoch": 0.7317682591982427, "grad_norm": 0.5135764479637146, "learning_rate": 1.4115182948919475e-05, "loss": 0.5464, "step": 26651 }, { "epoch": 0.7317957166392092, "grad_norm": 0.5260113477706909, "learning_rate": 1.4114789316781458e-05, "loss": 0.5305, "step": 26652 }, { "epoch": 0.7318231740801757, "grad_norm": 0.35490211844444275, "learning_rate": 1.4114395676968047e-05, "loss": 0.531, "step": 26653 }, { "epoch": 0.7318506315211423, "grad_norm": 0.3917534351348877, "learning_rate": 1.4114002029479972e-05, "loss": 0.5294, "step": 26654 }, { "epoch": 0.7318780889621087, "grad_norm": 0.42279505729675293, "learning_rate": 1.4113608374317973e-05, "loss": 0.5256, "step": 26655 }, { "epoch": 0.7319055464030753, "grad_norm": 0.3340851068496704, "learning_rate": 1.4113214711482779e-05, "loss": 0.4211, "step": 26656 }, { "epoch": 0.7319330038440417, "grad_norm": 0.4368550777435303, "learning_rate": 1.4112821040975126e-05, "loss": 0.532, "step": 26657 }, { "epoch": 0.7319604612850082, "grad_norm": 0.4259128272533417, "learning_rate": 1.4112427362795752e-05, "loss": 0.4526, "step": 26658 }, { "epoch": 0.7319879187259747, "grad_norm": 0.41899076104164124, "learning_rate": 1.4112033676945387e-05, "loss": 0.4457, "step": 26659 }, { "epoch": 0.7320153761669412, "grad_norm": 0.38266968727111816, "learning_rate": 1.4111639983424768e-05, "loss": 0.5027, "step": 26660 }, { "epoch": 0.7320428336079078, "grad_norm": 0.3534713685512543, "learning_rate": 1.4111246282234626e-05, "loss": 0.5048, "step": 26661 }, { "epoch": 0.7320702910488742, "grad_norm": 0.4523453712463379, "learning_rate": 1.4110852573375697e-05, "loss": 0.4822, "step": 26662 }, { "epoch": 0.7320977484898408, "grad_norm": 0.4091130495071411, "learning_rate": 1.4110458856848719e-05, "loss": 0.4893, "step": 26663 }, { "epoch": 0.7321252059308072, "grad_norm": 0.4428843557834625, "learning_rate": 1.4110065132654421e-05, "loss": 0.5753, "step": 26664 }, { "epoch": 0.7321526633717738, "grad_norm": 0.3675065040588379, "learning_rate": 1.4109671400793541e-05, "loss": 0.5534, "step": 26665 }, { "epoch": 0.7321801208127402, "grad_norm": 0.4975585341453552, "learning_rate": 1.4109277661266809e-05, "loss": 0.5463, "step": 26666 }, { "epoch": 0.7322075782537067, "grad_norm": 0.35465702414512634, "learning_rate": 1.4108883914074966e-05, "loss": 0.5995, "step": 26667 }, { "epoch": 0.7322350356946733, "grad_norm": 0.35120564699172974, "learning_rate": 1.4108490159218745e-05, "loss": 0.4265, "step": 26668 }, { "epoch": 0.7322624931356397, "grad_norm": 0.4252427816390991, "learning_rate": 1.4108096396698874e-05, "loss": 0.4855, "step": 26669 }, { "epoch": 0.7322899505766063, "grad_norm": 0.4417116641998291, "learning_rate": 1.4107702626516096e-05, "loss": 0.4766, "step": 26670 }, { "epoch": 0.7323174080175727, "grad_norm": 0.3743552565574646, "learning_rate": 1.4107308848671138e-05, "loss": 0.4706, "step": 26671 }, { "epoch": 0.7323448654585393, "grad_norm": 0.4142800569534302, "learning_rate": 1.410691506316474e-05, "loss": 0.5337, "step": 26672 }, { "epoch": 0.7323723228995057, "grad_norm": 0.3921546936035156, "learning_rate": 1.4106521269997636e-05, "loss": 0.488, "step": 26673 }, { "epoch": 0.7323997803404723, "grad_norm": 0.37586069107055664, "learning_rate": 1.4106127469170556e-05, "loss": 0.5124, "step": 26674 }, { "epoch": 0.7324272377814388, "grad_norm": 0.3704080283641815, "learning_rate": 1.410573366068424e-05, "loss": 0.4697, "step": 26675 }, { "epoch": 0.7324546952224052, "grad_norm": 0.355331689119339, "learning_rate": 1.4105339844539418e-05, "loss": 0.5386, "step": 26676 }, { "epoch": 0.7324821526633718, "grad_norm": 0.4029296338558197, "learning_rate": 1.4104946020736829e-05, "loss": 0.435, "step": 26677 }, { "epoch": 0.7325096101043382, "grad_norm": 0.375123530626297, "learning_rate": 1.4104552189277207e-05, "loss": 0.4799, "step": 26678 }, { "epoch": 0.7325370675453048, "grad_norm": 0.43076229095458984, "learning_rate": 1.410415835016128e-05, "loss": 0.4997, "step": 26679 }, { "epoch": 0.7325645249862712, "grad_norm": 0.3566029965877533, "learning_rate": 1.4103764503389794e-05, "loss": 0.5157, "step": 26680 }, { "epoch": 0.7325919824272378, "grad_norm": 0.4659750759601593, "learning_rate": 1.4103370648963475e-05, "loss": 0.5435, "step": 26681 }, { "epoch": 0.7326194398682043, "grad_norm": 0.3504939675331116, "learning_rate": 1.4102976786883059e-05, "loss": 0.4683, "step": 26682 }, { "epoch": 0.7326468973091708, "grad_norm": 0.43005064129829407, "learning_rate": 1.4102582917149281e-05, "loss": 0.5061, "step": 26683 }, { "epoch": 0.7326743547501373, "grad_norm": 0.3728664815425873, "learning_rate": 1.4102189039762878e-05, "loss": 0.4543, "step": 26684 }, { "epoch": 0.7327018121911038, "grad_norm": 0.3582504987716675, "learning_rate": 1.4101795154724582e-05, "loss": 0.4898, "step": 26685 }, { "epoch": 0.7327292696320703, "grad_norm": 0.4484447240829468, "learning_rate": 1.4101401262035132e-05, "loss": 0.5573, "step": 26686 }, { "epoch": 0.7327567270730367, "grad_norm": 0.3991978168487549, "learning_rate": 1.4101007361695256e-05, "loss": 0.3768, "step": 26687 }, { "epoch": 0.7327841845140033, "grad_norm": 0.3809034526348114, "learning_rate": 1.4100613453705694e-05, "loss": 0.5598, "step": 26688 }, { "epoch": 0.7328116419549698, "grad_norm": 0.4048158824443817, "learning_rate": 1.410021953806718e-05, "loss": 0.4241, "step": 26689 }, { "epoch": 0.7328390993959363, "grad_norm": 0.3699227273464203, "learning_rate": 1.4099825614780445e-05, "loss": 0.5024, "step": 26690 }, { "epoch": 0.7328665568369028, "grad_norm": 0.48359525203704834, "learning_rate": 1.409943168384623e-05, "loss": 0.4765, "step": 26691 }, { "epoch": 0.7328940142778693, "grad_norm": 0.3565784692764282, "learning_rate": 1.4099037745265265e-05, "loss": 0.4381, "step": 26692 }, { "epoch": 0.7329214717188358, "grad_norm": 0.4451216757297516, "learning_rate": 1.4098643799038287e-05, "loss": 0.6161, "step": 26693 }, { "epoch": 0.7329489291598023, "grad_norm": 0.37974485754966736, "learning_rate": 1.409824984516603e-05, "loss": 0.4794, "step": 26694 }, { "epoch": 0.7329763866007688, "grad_norm": 0.3507250249385834, "learning_rate": 1.4097855883649227e-05, "loss": 0.4746, "step": 26695 }, { "epoch": 0.7330038440417354, "grad_norm": 0.38968220353126526, "learning_rate": 1.4097461914488617e-05, "loss": 0.4762, "step": 26696 }, { "epoch": 0.7330313014827018, "grad_norm": 0.36109769344329834, "learning_rate": 1.4097067937684931e-05, "loss": 0.5282, "step": 26697 }, { "epoch": 0.7330587589236683, "grad_norm": 0.36757415533065796, "learning_rate": 1.4096673953238908e-05, "loss": 0.4548, "step": 26698 }, { "epoch": 0.7330862163646348, "grad_norm": 0.3779936134815216, "learning_rate": 1.4096279961151282e-05, "loss": 0.5052, "step": 26699 }, { "epoch": 0.7331136738056013, "grad_norm": 0.37249383330345154, "learning_rate": 1.4095885961422782e-05, "loss": 0.5192, "step": 26700 }, { "epoch": 0.7331411312465678, "grad_norm": 0.4277748167514801, "learning_rate": 1.409549195405415e-05, "loss": 0.5789, "step": 26701 }, { "epoch": 0.7331685886875343, "grad_norm": 0.4231413006782532, "learning_rate": 1.4095097939046118e-05, "loss": 0.5703, "step": 26702 }, { "epoch": 0.7331960461285009, "grad_norm": 0.39980870485305786, "learning_rate": 1.4094703916399422e-05, "loss": 0.4315, "step": 26703 }, { "epoch": 0.7332235035694673, "grad_norm": 0.4135701060295105, "learning_rate": 1.4094309886114795e-05, "loss": 0.5196, "step": 26704 }, { "epoch": 0.7332509610104339, "grad_norm": 0.3548392355442047, "learning_rate": 1.4093915848192974e-05, "loss": 0.4487, "step": 26705 }, { "epoch": 0.7332784184514003, "grad_norm": 0.34857165813446045, "learning_rate": 1.4093521802634693e-05, "loss": 0.3435, "step": 26706 }, { "epoch": 0.7333058758923668, "grad_norm": 0.40449267625808716, "learning_rate": 1.4093127749440687e-05, "loss": 0.488, "step": 26707 }, { "epoch": 0.7333333333333333, "grad_norm": 0.3974066972732544, "learning_rate": 1.4092733688611692e-05, "loss": 0.4478, "step": 26708 }, { "epoch": 0.7333607907742998, "grad_norm": 0.41632896661758423, "learning_rate": 1.4092339620148442e-05, "loss": 0.4021, "step": 26709 }, { "epoch": 0.7333882482152664, "grad_norm": 0.4103683829307556, "learning_rate": 1.4091945544051672e-05, "loss": 0.4985, "step": 26710 }, { "epoch": 0.7334157056562328, "grad_norm": 0.3618229031562805, "learning_rate": 1.4091551460322119e-05, "loss": 0.5356, "step": 26711 }, { "epoch": 0.7334431630971994, "grad_norm": 0.3905039131641388, "learning_rate": 1.4091157368960517e-05, "loss": 0.4706, "step": 26712 }, { "epoch": 0.7334706205381658, "grad_norm": 0.4260636568069458, "learning_rate": 1.4090763269967597e-05, "loss": 0.436, "step": 26713 }, { "epoch": 0.7334980779791324, "grad_norm": 0.5614686012268066, "learning_rate": 1.4090369163344104e-05, "loss": 0.4346, "step": 26714 }, { "epoch": 0.7335255354200988, "grad_norm": 0.37865978479385376, "learning_rate": 1.4089975049090762e-05, "loss": 0.4942, "step": 26715 }, { "epoch": 0.7335529928610653, "grad_norm": 0.34778186678886414, "learning_rate": 1.4089580927208312e-05, "loss": 0.4524, "step": 26716 }, { "epoch": 0.7335804503020319, "grad_norm": 0.41114431619644165, "learning_rate": 1.4089186797697492e-05, "loss": 0.595, "step": 26717 }, { "epoch": 0.7336079077429983, "grad_norm": 0.3985481560230255, "learning_rate": 1.408879266055903e-05, "loss": 0.5265, "step": 26718 }, { "epoch": 0.7336353651839649, "grad_norm": 0.4016360342502594, "learning_rate": 1.4088398515793666e-05, "loss": 0.5423, "step": 26719 }, { "epoch": 0.7336628226249313, "grad_norm": 0.4575062394142151, "learning_rate": 1.4088004363402133e-05, "loss": 0.5209, "step": 26720 }, { "epoch": 0.7336902800658979, "grad_norm": 0.3783665895462036, "learning_rate": 1.4087610203385166e-05, "loss": 0.5247, "step": 26721 }, { "epoch": 0.7337177375068643, "grad_norm": 0.34201475977897644, "learning_rate": 1.4087216035743505e-05, "loss": 0.4794, "step": 26722 }, { "epoch": 0.7337451949478309, "grad_norm": 0.5068672299385071, "learning_rate": 1.4086821860477877e-05, "loss": 0.4763, "step": 26723 }, { "epoch": 0.7337726523887974, "grad_norm": 0.3796377182006836, "learning_rate": 1.4086427677589026e-05, "loss": 0.4679, "step": 26724 }, { "epoch": 0.7338001098297638, "grad_norm": 0.5012624263763428, "learning_rate": 1.4086033487077681e-05, "loss": 0.4855, "step": 26725 }, { "epoch": 0.7338275672707304, "grad_norm": 0.37655067443847656, "learning_rate": 1.408563928894458e-05, "loss": 0.4846, "step": 26726 }, { "epoch": 0.7338550247116968, "grad_norm": 0.38984498381614685, "learning_rate": 1.4085245083190457e-05, "loss": 0.477, "step": 26727 }, { "epoch": 0.7338824821526634, "grad_norm": 0.40606385469436646, "learning_rate": 1.4084850869816048e-05, "loss": 0.4923, "step": 26728 }, { "epoch": 0.7339099395936298, "grad_norm": 0.37682977318763733, "learning_rate": 1.408445664882209e-05, "loss": 0.5079, "step": 26729 }, { "epoch": 0.7339373970345964, "grad_norm": 0.3731168806552887, "learning_rate": 1.4084062420209317e-05, "loss": 0.4486, "step": 26730 }, { "epoch": 0.7339648544755629, "grad_norm": 0.3683261573314667, "learning_rate": 1.4083668183978461e-05, "loss": 0.4619, "step": 26731 }, { "epoch": 0.7339923119165294, "grad_norm": 0.3711625337600708, "learning_rate": 1.4083273940130263e-05, "loss": 0.4432, "step": 26732 }, { "epoch": 0.7340197693574959, "grad_norm": 0.35676294565200806, "learning_rate": 1.4082879688665455e-05, "loss": 0.4879, "step": 26733 }, { "epoch": 0.7340472267984623, "grad_norm": 0.386452317237854, "learning_rate": 1.4082485429584774e-05, "loss": 0.4884, "step": 26734 }, { "epoch": 0.7340746842394289, "grad_norm": 0.4428277313709259, "learning_rate": 1.4082091162888953e-05, "loss": 0.4806, "step": 26735 }, { "epoch": 0.7341021416803953, "grad_norm": 0.4111708104610443, "learning_rate": 1.4081696888578731e-05, "loss": 0.5101, "step": 26736 }, { "epoch": 0.7341295991213619, "grad_norm": 0.4626118242740631, "learning_rate": 1.408130260665484e-05, "loss": 0.545, "step": 26737 }, { "epoch": 0.7341570565623284, "grad_norm": 0.36632686853408813, "learning_rate": 1.4080908317118019e-05, "loss": 0.5265, "step": 26738 }, { "epoch": 0.7341845140032949, "grad_norm": 0.4038887321949005, "learning_rate": 1.4080514019968998e-05, "loss": 0.4233, "step": 26739 }, { "epoch": 0.7342119714442614, "grad_norm": 0.3883209228515625, "learning_rate": 1.408011971520852e-05, "loss": 0.5228, "step": 26740 }, { "epoch": 0.7342394288852279, "grad_norm": 0.37469372153282166, "learning_rate": 1.4079725402837315e-05, "loss": 0.4838, "step": 26741 }, { "epoch": 0.7342668863261944, "grad_norm": 0.36086317896842957, "learning_rate": 1.407933108285612e-05, "loss": 0.4578, "step": 26742 }, { "epoch": 0.7342943437671609, "grad_norm": 0.40030696988105774, "learning_rate": 1.407893675526567e-05, "loss": 0.5545, "step": 26743 }, { "epoch": 0.7343218012081274, "grad_norm": 0.3916415274143219, "learning_rate": 1.4078542420066704e-05, "loss": 0.4515, "step": 26744 }, { "epoch": 0.734349258649094, "grad_norm": 0.44614145159721375, "learning_rate": 1.4078148077259952e-05, "loss": 0.5019, "step": 26745 }, { "epoch": 0.7343767160900604, "grad_norm": 0.4030715823173523, "learning_rate": 1.407775372684615e-05, "loss": 0.4345, "step": 26746 }, { "epoch": 0.7344041735310269, "grad_norm": 0.39367079734802246, "learning_rate": 1.407735936882604e-05, "loss": 0.5357, "step": 26747 }, { "epoch": 0.7344316309719934, "grad_norm": 0.41018959879875183, "learning_rate": 1.4076965003200352e-05, "loss": 0.4775, "step": 26748 }, { "epoch": 0.7344590884129599, "grad_norm": 0.3469686210155487, "learning_rate": 1.407657062996982e-05, "loss": 0.4466, "step": 26749 }, { "epoch": 0.7344865458539264, "grad_norm": 0.5237682461738586, "learning_rate": 1.4076176249135187e-05, "loss": 0.5632, "step": 26750 }, { "epoch": 0.7345140032948929, "grad_norm": 0.3647957146167755, "learning_rate": 1.4075781860697182e-05, "loss": 0.5484, "step": 26751 }, { "epoch": 0.7345414607358595, "grad_norm": 0.4532521367073059, "learning_rate": 1.4075387464656544e-05, "loss": 0.4751, "step": 26752 }, { "epoch": 0.7345689181768259, "grad_norm": 0.5405377745628357, "learning_rate": 1.4074993061014008e-05, "loss": 0.4761, "step": 26753 }, { "epoch": 0.7345963756177925, "grad_norm": 0.472959965467453, "learning_rate": 1.4074598649770307e-05, "loss": 0.6116, "step": 26754 }, { "epoch": 0.7346238330587589, "grad_norm": 0.37921208143234253, "learning_rate": 1.4074204230926185e-05, "loss": 0.4894, "step": 26755 }, { "epoch": 0.7346512904997254, "grad_norm": 0.37806081771850586, "learning_rate": 1.4073809804482368e-05, "loss": 0.4929, "step": 26756 }, { "epoch": 0.7346787479406919, "grad_norm": 0.3266824781894684, "learning_rate": 1.4073415370439594e-05, "loss": 0.4875, "step": 26757 }, { "epoch": 0.7347062053816584, "grad_norm": 0.4415694773197174, "learning_rate": 1.4073020928798602e-05, "loss": 0.5264, "step": 26758 }, { "epoch": 0.734733662822625, "grad_norm": 0.37810978293418884, "learning_rate": 1.4072626479560123e-05, "loss": 0.4675, "step": 26759 }, { "epoch": 0.7347611202635914, "grad_norm": 0.4063452482223511, "learning_rate": 1.4072232022724901e-05, "loss": 0.5168, "step": 26760 }, { "epoch": 0.734788577704558, "grad_norm": 0.3630560636520386, "learning_rate": 1.4071837558293664e-05, "loss": 0.5019, "step": 26761 }, { "epoch": 0.7348160351455244, "grad_norm": 0.35066670179367065, "learning_rate": 1.407144308626715e-05, "loss": 0.4511, "step": 26762 }, { "epoch": 0.734843492586491, "grad_norm": 0.47871726751327515, "learning_rate": 1.4071048606646094e-05, "loss": 0.4671, "step": 26763 }, { "epoch": 0.7348709500274574, "grad_norm": 0.36331501603126526, "learning_rate": 1.4070654119431235e-05, "loss": 0.4429, "step": 26764 }, { "epoch": 0.7348984074684239, "grad_norm": 0.40536069869995117, "learning_rate": 1.4070259624623306e-05, "loss": 0.4696, "step": 26765 }, { "epoch": 0.7349258649093905, "grad_norm": 0.44533032178878784, "learning_rate": 1.4069865122223046e-05, "loss": 0.5336, "step": 26766 }, { "epoch": 0.7349533223503569, "grad_norm": 0.3424697816371918, "learning_rate": 1.4069470612231184e-05, "loss": 0.421, "step": 26767 }, { "epoch": 0.7349807797913235, "grad_norm": 0.45720845460891724, "learning_rate": 1.4069076094648464e-05, "loss": 0.5469, "step": 26768 }, { "epoch": 0.7350082372322899, "grad_norm": 0.35980600118637085, "learning_rate": 1.4068681569475615e-05, "loss": 0.499, "step": 26769 }, { "epoch": 0.7350356946732565, "grad_norm": 0.4075120687484741, "learning_rate": 1.4068287036713381e-05, "loss": 0.5372, "step": 26770 }, { "epoch": 0.7350631521142229, "grad_norm": 0.37598979473114014, "learning_rate": 1.4067892496362492e-05, "loss": 0.4659, "step": 26771 }, { "epoch": 0.7350906095551895, "grad_norm": 0.3727644681930542, "learning_rate": 1.4067497948423683e-05, "loss": 0.5116, "step": 26772 }, { "epoch": 0.735118066996156, "grad_norm": 0.40593764185905457, "learning_rate": 1.4067103392897694e-05, "loss": 0.5518, "step": 26773 }, { "epoch": 0.7351455244371224, "grad_norm": 0.45456749200820923, "learning_rate": 1.4066708829785258e-05, "loss": 0.5449, "step": 26774 }, { "epoch": 0.735172981878089, "grad_norm": 0.414571613073349, "learning_rate": 1.4066314259087112e-05, "loss": 0.5349, "step": 26775 }, { "epoch": 0.7352004393190554, "grad_norm": 0.48014166951179504, "learning_rate": 1.4065919680803994e-05, "loss": 0.5015, "step": 26776 }, { "epoch": 0.735227896760022, "grad_norm": 0.36388012766838074, "learning_rate": 1.4065525094936635e-05, "loss": 0.5005, "step": 26777 }, { "epoch": 0.7352553542009884, "grad_norm": 0.42333975434303284, "learning_rate": 1.4065130501485777e-05, "loss": 0.5066, "step": 26778 }, { "epoch": 0.735282811641955, "grad_norm": 0.3746768534183502, "learning_rate": 1.4064735900452153e-05, "loss": 0.483, "step": 26779 }, { "epoch": 0.7353102690829215, "grad_norm": 0.3555462062358856, "learning_rate": 1.4064341291836497e-05, "loss": 0.4283, "step": 26780 }, { "epoch": 0.735337726523888, "grad_norm": 0.38079363107681274, "learning_rate": 1.4063946675639549e-05, "loss": 0.4989, "step": 26781 }, { "epoch": 0.7353651839648545, "grad_norm": 0.38357651233673096, "learning_rate": 1.406355205186204e-05, "loss": 0.4783, "step": 26782 }, { "epoch": 0.735392641405821, "grad_norm": 0.3867793679237366, "learning_rate": 1.4063157420504716e-05, "loss": 0.5737, "step": 26783 }, { "epoch": 0.7354200988467875, "grad_norm": 0.4419555366039276, "learning_rate": 1.4062762781568301e-05, "loss": 0.535, "step": 26784 }, { "epoch": 0.7354475562877539, "grad_norm": 0.36043888330459595, "learning_rate": 1.4062368135053539e-05, "loss": 0.5035, "step": 26785 }, { "epoch": 0.7354750137287205, "grad_norm": 0.4382515251636505, "learning_rate": 1.4061973480961164e-05, "loss": 0.4206, "step": 26786 }, { "epoch": 0.735502471169687, "grad_norm": 0.3858183026313782, "learning_rate": 1.4061578819291914e-05, "loss": 0.4363, "step": 26787 }, { "epoch": 0.7355299286106535, "grad_norm": 0.40409815311431885, "learning_rate": 1.4061184150046518e-05, "loss": 0.5754, "step": 26788 }, { "epoch": 0.73555738605162, "grad_norm": 0.34124770760536194, "learning_rate": 1.4060789473225723e-05, "loss": 0.4872, "step": 26789 }, { "epoch": 0.7355848434925865, "grad_norm": 0.4785085916519165, "learning_rate": 1.4060394788830254e-05, "loss": 0.6009, "step": 26790 }, { "epoch": 0.735612300933553, "grad_norm": 0.34383562207221985, "learning_rate": 1.4060000096860858e-05, "loss": 0.4518, "step": 26791 }, { "epoch": 0.7356397583745194, "grad_norm": 0.3602442443370819, "learning_rate": 1.4059605397318263e-05, "loss": 0.4322, "step": 26792 }, { "epoch": 0.735667215815486, "grad_norm": 0.4535205066204071, "learning_rate": 1.405921069020321e-05, "loss": 0.4844, "step": 26793 }, { "epoch": 0.7356946732564525, "grad_norm": 0.3411996364593506, "learning_rate": 1.4058815975516432e-05, "loss": 0.3879, "step": 26794 }, { "epoch": 0.735722130697419, "grad_norm": 0.3886256515979767, "learning_rate": 1.4058421253258665e-05, "loss": 0.4875, "step": 26795 }, { "epoch": 0.7357495881383855, "grad_norm": 0.4088708460330963, "learning_rate": 1.405802652343065e-05, "loss": 0.4866, "step": 26796 }, { "epoch": 0.735777045579352, "grad_norm": 0.3228149116039276, "learning_rate": 1.4057631786033121e-05, "loss": 0.4509, "step": 26797 }, { "epoch": 0.7358045030203185, "grad_norm": 0.39626410603523254, "learning_rate": 1.4057237041066811e-05, "loss": 0.43, "step": 26798 }, { "epoch": 0.735831960461285, "grad_norm": 0.371433287858963, "learning_rate": 1.405684228853246e-05, "loss": 0.5376, "step": 26799 }, { "epoch": 0.7358594179022515, "grad_norm": 0.3786960542201996, "learning_rate": 1.4056447528430803e-05, "loss": 0.5216, "step": 26800 }, { "epoch": 0.7358868753432181, "grad_norm": 0.3950761556625366, "learning_rate": 1.4056052760762577e-05, "loss": 0.4345, "step": 26801 }, { "epoch": 0.7359143327841845, "grad_norm": 0.3593626916408539, "learning_rate": 1.4055657985528522e-05, "loss": 0.4782, "step": 26802 }, { "epoch": 0.735941790225151, "grad_norm": 0.3727959096431732, "learning_rate": 1.4055263202729363e-05, "loss": 0.4961, "step": 26803 }, { "epoch": 0.7359692476661175, "grad_norm": 0.335500568151474, "learning_rate": 1.405486841236585e-05, "loss": 0.4309, "step": 26804 }, { "epoch": 0.735996705107084, "grad_norm": 0.46572306752204895, "learning_rate": 1.405447361443871e-05, "loss": 0.5871, "step": 26805 }, { "epoch": 0.7360241625480505, "grad_norm": 0.4017355144023895, "learning_rate": 1.4054078808948684e-05, "loss": 0.5151, "step": 26806 }, { "epoch": 0.736051619989017, "grad_norm": 0.3752233386039734, "learning_rate": 1.4053683995896505e-05, "loss": 0.4665, "step": 26807 }, { "epoch": 0.7360790774299836, "grad_norm": 0.40134963393211365, "learning_rate": 1.4053289175282914e-05, "loss": 0.5131, "step": 26808 }, { "epoch": 0.73610653487095, "grad_norm": 0.5097197890281677, "learning_rate": 1.4052894347108646e-05, "loss": 0.4892, "step": 26809 }, { "epoch": 0.7361339923119166, "grad_norm": 0.4008578360080719, "learning_rate": 1.4052499511374433e-05, "loss": 0.5355, "step": 26810 }, { "epoch": 0.736161449752883, "grad_norm": 0.4043530523777008, "learning_rate": 1.4052104668081016e-05, "loss": 0.5077, "step": 26811 }, { "epoch": 0.7361889071938496, "grad_norm": 0.5712657570838928, "learning_rate": 1.4051709817229133e-05, "loss": 0.556, "step": 26812 }, { "epoch": 0.736216364634816, "grad_norm": 0.3757840394973755, "learning_rate": 1.4051314958819514e-05, "loss": 0.486, "step": 26813 }, { "epoch": 0.7362438220757825, "grad_norm": 0.3828083872795105, "learning_rate": 1.4050920092852903e-05, "loss": 0.5365, "step": 26814 }, { "epoch": 0.7362712795167491, "grad_norm": 0.3400215804576874, "learning_rate": 1.405052521933003e-05, "loss": 0.5048, "step": 26815 }, { "epoch": 0.7362987369577155, "grad_norm": 0.3809468150138855, "learning_rate": 1.405013033825164e-05, "loss": 0.4293, "step": 26816 }, { "epoch": 0.7363261943986821, "grad_norm": 0.3395894169807434, "learning_rate": 1.404973544961846e-05, "loss": 0.4598, "step": 26817 }, { "epoch": 0.7363536518396485, "grad_norm": 0.37438374757766724, "learning_rate": 1.404934055343123e-05, "loss": 0.4621, "step": 26818 }, { "epoch": 0.7363811092806151, "grad_norm": 0.47816231846809387, "learning_rate": 1.4048945649690691e-05, "loss": 0.5332, "step": 26819 }, { "epoch": 0.7364085667215815, "grad_norm": 0.35645508766174316, "learning_rate": 1.4048550738397573e-05, "loss": 0.4698, "step": 26820 }, { "epoch": 0.736436024162548, "grad_norm": 0.4041135311126709, "learning_rate": 1.4048155819552617e-05, "loss": 0.5493, "step": 26821 }, { "epoch": 0.7364634816035146, "grad_norm": 0.3920229375362396, "learning_rate": 1.4047760893156561e-05, "loss": 0.4296, "step": 26822 }, { "epoch": 0.736490939044481, "grad_norm": 0.462161660194397, "learning_rate": 1.4047365959210135e-05, "loss": 0.5646, "step": 26823 }, { "epoch": 0.7365183964854476, "grad_norm": 0.3791539967060089, "learning_rate": 1.4046971017714082e-05, "loss": 0.5487, "step": 26824 }, { "epoch": 0.736545853926414, "grad_norm": 0.36192288994789124, "learning_rate": 1.4046576068669135e-05, "loss": 0.4647, "step": 26825 }, { "epoch": 0.7365733113673806, "grad_norm": 0.42394378781318665, "learning_rate": 1.4046181112076034e-05, "loss": 0.4502, "step": 26826 }, { "epoch": 0.736600768808347, "grad_norm": 0.38123819231987, "learning_rate": 1.4045786147935512e-05, "loss": 0.5483, "step": 26827 }, { "epoch": 0.7366282262493136, "grad_norm": 0.41510316729545593, "learning_rate": 1.4045391176248306e-05, "loss": 0.523, "step": 26828 }, { "epoch": 0.7366556836902801, "grad_norm": 0.4080808758735657, "learning_rate": 1.404499619701516e-05, "loss": 0.4025, "step": 26829 }, { "epoch": 0.7366831411312466, "grad_norm": 0.3901219666004181, "learning_rate": 1.4044601210236798e-05, "loss": 0.5374, "step": 26830 }, { "epoch": 0.7367105985722131, "grad_norm": 0.3654945492744446, "learning_rate": 1.4044206215913968e-05, "loss": 0.4977, "step": 26831 }, { "epoch": 0.7367380560131795, "grad_norm": 0.4243938624858856, "learning_rate": 1.4043811214047402e-05, "loss": 0.4552, "step": 26832 }, { "epoch": 0.7367655134541461, "grad_norm": 0.37865304946899414, "learning_rate": 1.4043416204637838e-05, "loss": 0.4619, "step": 26833 }, { "epoch": 0.7367929708951125, "grad_norm": 0.42752042412757874, "learning_rate": 1.4043021187686013e-05, "loss": 0.5033, "step": 26834 }, { "epoch": 0.7368204283360791, "grad_norm": 0.37474361062049866, "learning_rate": 1.404262616319266e-05, "loss": 0.4843, "step": 26835 }, { "epoch": 0.7368478857770456, "grad_norm": 0.39080795645713806, "learning_rate": 1.4042231131158521e-05, "loss": 0.4643, "step": 26836 }, { "epoch": 0.7368753432180121, "grad_norm": 0.4225553274154663, "learning_rate": 1.4041836091584331e-05, "loss": 0.5605, "step": 26837 }, { "epoch": 0.7369028006589786, "grad_norm": 0.49672991037368774, "learning_rate": 1.4041441044470827e-05, "loss": 0.5722, "step": 26838 }, { "epoch": 0.7369302580999451, "grad_norm": 0.363634318113327, "learning_rate": 1.4041045989818744e-05, "loss": 0.4386, "step": 26839 }, { "epoch": 0.7369577155409116, "grad_norm": 0.39900293946266174, "learning_rate": 1.404065092762882e-05, "loss": 0.4836, "step": 26840 }, { "epoch": 0.736985172981878, "grad_norm": 0.3871653079986572, "learning_rate": 1.4040255857901797e-05, "loss": 0.4954, "step": 26841 }, { "epoch": 0.7370126304228446, "grad_norm": 0.4468710124492645, "learning_rate": 1.4039860780638402e-05, "loss": 0.5223, "step": 26842 }, { "epoch": 0.7370400878638111, "grad_norm": 0.3423226773738861, "learning_rate": 1.403946569583938e-05, "loss": 0.4436, "step": 26843 }, { "epoch": 0.7370675453047776, "grad_norm": 0.38789188861846924, "learning_rate": 1.4039070603505465e-05, "loss": 0.5919, "step": 26844 }, { "epoch": 0.7370950027457441, "grad_norm": 0.3712608516216278, "learning_rate": 1.4038675503637393e-05, "loss": 0.4264, "step": 26845 }, { "epoch": 0.7371224601867106, "grad_norm": 0.38093405961990356, "learning_rate": 1.4038280396235903e-05, "loss": 0.4715, "step": 26846 }, { "epoch": 0.7371499176276771, "grad_norm": 0.3771340847015381, "learning_rate": 1.4037885281301731e-05, "loss": 0.4793, "step": 26847 }, { "epoch": 0.7371773750686436, "grad_norm": 0.4227500259876251, "learning_rate": 1.4037490158835617e-05, "loss": 0.477, "step": 26848 }, { "epoch": 0.7372048325096101, "grad_norm": 0.39581841230392456, "learning_rate": 1.403709502883829e-05, "loss": 0.5122, "step": 26849 }, { "epoch": 0.7372322899505767, "grad_norm": 0.3972928822040558, "learning_rate": 1.4036699891310495e-05, "loss": 0.5747, "step": 26850 }, { "epoch": 0.7372597473915431, "grad_norm": 0.4198804199695587, "learning_rate": 1.4036304746252966e-05, "loss": 0.5469, "step": 26851 }, { "epoch": 0.7372872048325096, "grad_norm": 0.3642120063304901, "learning_rate": 1.403590959366644e-05, "loss": 0.4829, "step": 26852 }, { "epoch": 0.7373146622734761, "grad_norm": 0.3882497251033783, "learning_rate": 1.4035514433551655e-05, "loss": 0.5109, "step": 26853 }, { "epoch": 0.7373421197144426, "grad_norm": 0.4718131721019745, "learning_rate": 1.4035119265909349e-05, "loss": 0.5367, "step": 26854 }, { "epoch": 0.7373695771554091, "grad_norm": 0.38770103454589844, "learning_rate": 1.4034724090740256e-05, "loss": 0.4127, "step": 26855 }, { "epoch": 0.7373970345963756, "grad_norm": 0.377190500497818, "learning_rate": 1.4034328908045115e-05, "loss": 0.5243, "step": 26856 }, { "epoch": 0.7374244920373422, "grad_norm": 0.4333525598049164, "learning_rate": 1.4033933717824664e-05, "loss": 0.5192, "step": 26857 }, { "epoch": 0.7374519494783086, "grad_norm": 0.4396527111530304, "learning_rate": 1.4033538520079638e-05, "loss": 0.4861, "step": 26858 }, { "epoch": 0.7374794069192752, "grad_norm": 0.37734219431877136, "learning_rate": 1.4033143314810778e-05, "loss": 0.4707, "step": 26859 }, { "epoch": 0.7375068643602416, "grad_norm": 0.37134724855422974, "learning_rate": 1.4032748102018813e-05, "loss": 0.5001, "step": 26860 }, { "epoch": 0.7375343218012081, "grad_norm": 0.5436025261878967, "learning_rate": 1.403235288170449e-05, "loss": 0.5384, "step": 26861 }, { "epoch": 0.7375617792421746, "grad_norm": 0.40164047479629517, "learning_rate": 1.403195765386854e-05, "loss": 0.546, "step": 26862 }, { "epoch": 0.7375892366831411, "grad_norm": 0.38459765911102295, "learning_rate": 1.4031562418511704e-05, "loss": 0.5496, "step": 26863 }, { "epoch": 0.7376166941241077, "grad_norm": 0.3972252905368805, "learning_rate": 1.4031167175634716e-05, "loss": 0.5411, "step": 26864 }, { "epoch": 0.7376441515650741, "grad_norm": 0.3950878977775574, "learning_rate": 1.4030771925238314e-05, "loss": 0.5337, "step": 26865 }, { "epoch": 0.7376716090060407, "grad_norm": 0.39425697922706604, "learning_rate": 1.4030376667323238e-05, "loss": 0.4519, "step": 26866 }, { "epoch": 0.7376990664470071, "grad_norm": 0.36517825722694397, "learning_rate": 1.4029981401890222e-05, "loss": 0.4502, "step": 26867 }, { "epoch": 0.7377265238879737, "grad_norm": 0.3860747218132019, "learning_rate": 1.4029586128940004e-05, "loss": 0.4663, "step": 26868 }, { "epoch": 0.7377539813289401, "grad_norm": 0.47391536831855774, "learning_rate": 1.4029190848473322e-05, "loss": 0.5615, "step": 26869 }, { "epoch": 0.7377814387699067, "grad_norm": 0.3787294626235962, "learning_rate": 1.4028795560490913e-05, "loss": 0.4764, "step": 26870 }, { "epoch": 0.7378088962108732, "grad_norm": 0.3876148462295532, "learning_rate": 1.4028400264993517e-05, "loss": 0.4974, "step": 26871 }, { "epoch": 0.7378363536518396, "grad_norm": 0.4248621165752411, "learning_rate": 1.4028004961981867e-05, "loss": 0.6069, "step": 26872 }, { "epoch": 0.7378638110928062, "grad_norm": 0.4256625175476074, "learning_rate": 1.40276096514567e-05, "loss": 0.4587, "step": 26873 }, { "epoch": 0.7378912685337726, "grad_norm": 0.3804173469543457, "learning_rate": 1.4027214333418757e-05, "loss": 0.4536, "step": 26874 }, { "epoch": 0.7379187259747392, "grad_norm": 0.43846410512924194, "learning_rate": 1.4026819007868773e-05, "loss": 0.5697, "step": 26875 }, { "epoch": 0.7379461834157056, "grad_norm": 0.3968181610107422, "learning_rate": 1.4026423674807488e-05, "loss": 0.5266, "step": 26876 }, { "epoch": 0.7379736408566722, "grad_norm": 0.3713231682777405, "learning_rate": 1.402602833423564e-05, "loss": 0.5643, "step": 26877 }, { "epoch": 0.7380010982976387, "grad_norm": 0.3924883008003235, "learning_rate": 1.402563298615396e-05, "loss": 0.5078, "step": 26878 }, { "epoch": 0.7380285557386052, "grad_norm": 0.3911442458629608, "learning_rate": 1.4025237630563192e-05, "loss": 0.5714, "step": 26879 }, { "epoch": 0.7380560131795717, "grad_norm": 0.3872321546077728, "learning_rate": 1.402484226746407e-05, "loss": 0.4613, "step": 26880 }, { "epoch": 0.7380834706205381, "grad_norm": 0.3668537437915802, "learning_rate": 1.4024446896857331e-05, "loss": 0.5565, "step": 26881 }, { "epoch": 0.7381109280615047, "grad_norm": 0.447473406791687, "learning_rate": 1.4024051518743718e-05, "loss": 0.5889, "step": 26882 }, { "epoch": 0.7381383855024711, "grad_norm": 0.4213132858276367, "learning_rate": 1.4023656133123959e-05, "loss": 0.5611, "step": 26883 }, { "epoch": 0.7381658429434377, "grad_norm": 0.35202112793922424, "learning_rate": 1.4023260739998804e-05, "loss": 0.4745, "step": 26884 }, { "epoch": 0.7381933003844042, "grad_norm": 0.3622756600379944, "learning_rate": 1.402286533936898e-05, "loss": 0.498, "step": 26885 }, { "epoch": 0.7382207578253707, "grad_norm": 0.41173428297042847, "learning_rate": 1.4022469931235226e-05, "loss": 0.4599, "step": 26886 }, { "epoch": 0.7382482152663372, "grad_norm": 0.35925057530403137, "learning_rate": 1.4022074515598286e-05, "loss": 0.5048, "step": 26887 }, { "epoch": 0.7382756727073037, "grad_norm": 0.37363913655281067, "learning_rate": 1.4021679092458888e-05, "loss": 0.4914, "step": 26888 }, { "epoch": 0.7383031301482702, "grad_norm": 0.39672181010246277, "learning_rate": 1.4021283661817781e-05, "loss": 0.534, "step": 26889 }, { "epoch": 0.7383305875892366, "grad_norm": 0.3558575212955475, "learning_rate": 1.4020888223675694e-05, "loss": 0.4711, "step": 26890 }, { "epoch": 0.7383580450302032, "grad_norm": 0.3770904839038849, "learning_rate": 1.4020492778033368e-05, "loss": 0.4182, "step": 26891 }, { "epoch": 0.7383855024711697, "grad_norm": 1.0287595987319946, "learning_rate": 1.4020097324891537e-05, "loss": 0.4297, "step": 26892 }, { "epoch": 0.7384129599121362, "grad_norm": 0.36092114448547363, "learning_rate": 1.4019701864250943e-05, "loss": 0.5392, "step": 26893 }, { "epoch": 0.7384404173531027, "grad_norm": 0.6335458159446716, "learning_rate": 1.4019306396112321e-05, "loss": 0.4896, "step": 26894 }, { "epoch": 0.7384678747940692, "grad_norm": 0.41617828607559204, "learning_rate": 1.4018910920476414e-05, "loss": 0.545, "step": 26895 }, { "epoch": 0.7384953322350357, "grad_norm": 3.185993194580078, "learning_rate": 1.4018515437343952e-05, "loss": 0.4745, "step": 26896 }, { "epoch": 0.7385227896760022, "grad_norm": 0.3929463028907776, "learning_rate": 1.4018119946715678e-05, "loss": 0.4456, "step": 26897 }, { "epoch": 0.7385502471169687, "grad_norm": 0.41995003819465637, "learning_rate": 1.4017724448592327e-05, "loss": 0.5387, "step": 26898 }, { "epoch": 0.7385777045579353, "grad_norm": 0.4150804877281189, "learning_rate": 1.4017328942974635e-05, "loss": 0.4559, "step": 26899 }, { "epoch": 0.7386051619989017, "grad_norm": 0.38101813197135925, "learning_rate": 1.4016933429863346e-05, "loss": 0.4486, "step": 26900 }, { "epoch": 0.7386326194398682, "grad_norm": 0.3665793538093567, "learning_rate": 1.4016537909259191e-05, "loss": 0.4496, "step": 26901 }, { "epoch": 0.7386600768808347, "grad_norm": 0.4896700978279114, "learning_rate": 1.4016142381162913e-05, "loss": 0.6119, "step": 26902 }, { "epoch": 0.7386875343218012, "grad_norm": 0.4386366307735443, "learning_rate": 1.4015746845575249e-05, "loss": 0.4467, "step": 26903 }, { "epoch": 0.7387149917627677, "grad_norm": 0.3772076964378357, "learning_rate": 1.4015351302496932e-05, "loss": 0.4884, "step": 26904 }, { "epoch": 0.7387424492037342, "grad_norm": 0.34964191913604736, "learning_rate": 1.4014955751928705e-05, "loss": 0.4984, "step": 26905 }, { "epoch": 0.7387699066447008, "grad_norm": 0.4497126340866089, "learning_rate": 1.4014560193871303e-05, "loss": 0.4908, "step": 26906 }, { "epoch": 0.7387973640856672, "grad_norm": 0.3677826523780823, "learning_rate": 1.4014164628325466e-05, "loss": 0.3728, "step": 26907 }, { "epoch": 0.7388248215266338, "grad_norm": 0.37968766689300537, "learning_rate": 1.4013769055291932e-05, "loss": 0.4846, "step": 26908 }, { "epoch": 0.7388522789676002, "grad_norm": 0.3600018322467804, "learning_rate": 1.4013373474771435e-05, "loss": 0.3895, "step": 26909 }, { "epoch": 0.7388797364085667, "grad_norm": 0.3752509653568268, "learning_rate": 1.4012977886764718e-05, "loss": 0.5828, "step": 26910 }, { "epoch": 0.7389071938495332, "grad_norm": 0.4137454628944397, "learning_rate": 1.4012582291272516e-05, "loss": 0.4445, "step": 26911 }, { "epoch": 0.7389346512904997, "grad_norm": 0.36200979351997375, "learning_rate": 1.4012186688295566e-05, "loss": 0.4829, "step": 26912 }, { "epoch": 0.7389621087314663, "grad_norm": 0.4001387655735016, "learning_rate": 1.4011791077834609e-05, "loss": 0.4656, "step": 26913 }, { "epoch": 0.7389895661724327, "grad_norm": 0.4600948989391327, "learning_rate": 1.4011395459890378e-05, "loss": 0.4824, "step": 26914 }, { "epoch": 0.7390170236133993, "grad_norm": 0.38273128867149353, "learning_rate": 1.4010999834463616e-05, "loss": 0.5034, "step": 26915 }, { "epoch": 0.7390444810543657, "grad_norm": 0.38257551193237305, "learning_rate": 1.4010604201555063e-05, "loss": 0.4281, "step": 26916 }, { "epoch": 0.7390719384953323, "grad_norm": 0.359611451625824, "learning_rate": 1.4010208561165449e-05, "loss": 0.4987, "step": 26917 }, { "epoch": 0.7390993959362987, "grad_norm": 0.33377569913864136, "learning_rate": 1.4009812913295515e-05, "loss": 0.5034, "step": 26918 }, { "epoch": 0.7391268533772652, "grad_norm": 0.5776461958885193, "learning_rate": 1.4009417257946001e-05, "loss": 0.4336, "step": 26919 }, { "epoch": 0.7391543108182317, "grad_norm": 0.36637192964553833, "learning_rate": 1.4009021595117648e-05, "loss": 0.5662, "step": 26920 }, { "epoch": 0.7391817682591982, "grad_norm": 0.45175227522850037, "learning_rate": 1.4008625924811185e-05, "loss": 0.5161, "step": 26921 }, { "epoch": 0.7392092257001648, "grad_norm": 0.40861761569976807, "learning_rate": 1.4008230247027358e-05, "loss": 0.4665, "step": 26922 }, { "epoch": 0.7392366831411312, "grad_norm": 0.3550074100494385, "learning_rate": 1.4007834561766902e-05, "loss": 0.4674, "step": 26923 }, { "epoch": 0.7392641405820978, "grad_norm": 0.3695279359817505, "learning_rate": 1.4007438869030554e-05, "loss": 0.4523, "step": 26924 }, { "epoch": 0.7392915980230642, "grad_norm": 0.37004032731056213, "learning_rate": 1.4007043168819056e-05, "loss": 0.4861, "step": 26925 }, { "epoch": 0.7393190554640308, "grad_norm": 0.38590580224990845, "learning_rate": 1.4006647461133143e-05, "loss": 0.4698, "step": 26926 }, { "epoch": 0.7393465129049972, "grad_norm": 0.40374884009361267, "learning_rate": 1.400625174597355e-05, "loss": 0.5441, "step": 26927 }, { "epoch": 0.7393739703459637, "grad_norm": 0.4075644910335541, "learning_rate": 1.4005856023341025e-05, "loss": 0.4611, "step": 26928 }, { "epoch": 0.7394014277869303, "grad_norm": 0.41321998834609985, "learning_rate": 1.4005460293236297e-05, "loss": 0.4346, "step": 26929 }, { "epoch": 0.7394288852278967, "grad_norm": 0.4256496727466583, "learning_rate": 1.4005064555660106e-05, "loss": 0.5977, "step": 26930 }, { "epoch": 0.7394563426688633, "grad_norm": 0.33093923330307007, "learning_rate": 1.4004668810613193e-05, "loss": 0.5213, "step": 26931 }, { "epoch": 0.7394838001098297, "grad_norm": 0.38425198197364807, "learning_rate": 1.4004273058096294e-05, "loss": 0.5047, "step": 26932 }, { "epoch": 0.7395112575507963, "grad_norm": 0.3922576308250427, "learning_rate": 1.4003877298110146e-05, "loss": 0.4393, "step": 26933 }, { "epoch": 0.7395387149917627, "grad_norm": 0.37931686639785767, "learning_rate": 1.4003481530655491e-05, "loss": 0.4704, "step": 26934 }, { "epoch": 0.7395661724327293, "grad_norm": 0.4024805426597595, "learning_rate": 1.4003085755733064e-05, "loss": 0.5363, "step": 26935 }, { "epoch": 0.7395936298736958, "grad_norm": 0.4000808894634247, "learning_rate": 1.4002689973343605e-05, "loss": 0.5339, "step": 26936 }, { "epoch": 0.7396210873146623, "grad_norm": 0.4065569341182709, "learning_rate": 1.4002294183487852e-05, "loss": 0.485, "step": 26937 }, { "epoch": 0.7396485447556288, "grad_norm": 0.3905651271343231, "learning_rate": 1.4001898386166544e-05, "loss": 0.5601, "step": 26938 }, { "epoch": 0.7396760021965952, "grad_norm": 0.4125363230705261, "learning_rate": 1.4001502581380417e-05, "loss": 0.4413, "step": 26939 }, { "epoch": 0.7397034596375618, "grad_norm": 1.035237193107605, "learning_rate": 1.4001106769130211e-05, "loss": 0.5391, "step": 26940 }, { "epoch": 0.7397309170785282, "grad_norm": 0.3474701941013336, "learning_rate": 1.4000710949416663e-05, "loss": 0.5034, "step": 26941 }, { "epoch": 0.7397583745194948, "grad_norm": 0.38741669058799744, "learning_rate": 1.4000315122240515e-05, "loss": 0.4733, "step": 26942 }, { "epoch": 0.7397858319604613, "grad_norm": 0.4131896197795868, "learning_rate": 1.3999919287602497e-05, "loss": 0.4786, "step": 26943 }, { "epoch": 0.7398132894014278, "grad_norm": 0.3835640251636505, "learning_rate": 1.3999523445503358e-05, "loss": 0.5531, "step": 26944 }, { "epoch": 0.7398407468423943, "grad_norm": 0.4858456552028656, "learning_rate": 1.3999127595943828e-05, "loss": 0.5689, "step": 26945 }, { "epoch": 0.7398682042833608, "grad_norm": 0.3768438994884491, "learning_rate": 1.3998731738924652e-05, "loss": 0.5192, "step": 26946 }, { "epoch": 0.7398956617243273, "grad_norm": 0.3518599271774292, "learning_rate": 1.3998335874446562e-05, "loss": 0.46, "step": 26947 }, { "epoch": 0.7399231191652937, "grad_norm": 0.3840305209159851, "learning_rate": 1.39979400025103e-05, "loss": 0.4731, "step": 26948 }, { "epoch": 0.7399505766062603, "grad_norm": 0.4118255376815796, "learning_rate": 1.3997544123116606e-05, "loss": 0.4935, "step": 26949 }, { "epoch": 0.7399780340472268, "grad_norm": 0.4147377014160156, "learning_rate": 1.3997148236266213e-05, "loss": 0.4052, "step": 26950 }, { "epoch": 0.7400054914881933, "grad_norm": 0.38787901401519775, "learning_rate": 1.3996752341959866e-05, "loss": 0.5542, "step": 26951 }, { "epoch": 0.7400329489291598, "grad_norm": 0.4729222357273102, "learning_rate": 1.39963564401983e-05, "loss": 0.4429, "step": 26952 }, { "epoch": 0.7400604063701263, "grad_norm": 0.4370659589767456, "learning_rate": 1.399596053098225e-05, "loss": 0.5768, "step": 26953 }, { "epoch": 0.7400878638110928, "grad_norm": 0.4059261083602905, "learning_rate": 1.3995564614312463e-05, "loss": 0.4678, "step": 26954 }, { "epoch": 0.7401153212520593, "grad_norm": 0.4078056812286377, "learning_rate": 1.399516869018967e-05, "loss": 0.5788, "step": 26955 }, { "epoch": 0.7401427786930258, "grad_norm": 0.3330034911632538, "learning_rate": 1.3994772758614612e-05, "loss": 0.4323, "step": 26956 }, { "epoch": 0.7401702361339924, "grad_norm": 0.39782974123954773, "learning_rate": 1.3994376819588032e-05, "loss": 0.5074, "step": 26957 }, { "epoch": 0.7401976935749588, "grad_norm": 0.3972403407096863, "learning_rate": 1.3993980873110658e-05, "loss": 0.5037, "step": 26958 }, { "epoch": 0.7402251510159253, "grad_norm": 0.4074924886226654, "learning_rate": 1.3993584919183238e-05, "loss": 0.6275, "step": 26959 }, { "epoch": 0.7402526084568918, "grad_norm": 0.4455668032169342, "learning_rate": 1.3993188957806509e-05, "loss": 0.492, "step": 26960 }, { "epoch": 0.7402800658978583, "grad_norm": 0.3915172517299652, "learning_rate": 1.3992792988981206e-05, "loss": 0.4534, "step": 26961 }, { "epoch": 0.7403075233388248, "grad_norm": 0.41746827960014343, "learning_rate": 1.399239701270807e-05, "loss": 0.5172, "step": 26962 }, { "epoch": 0.7403349807797913, "grad_norm": 0.38396212458610535, "learning_rate": 1.3992001028987841e-05, "loss": 0.5713, "step": 26963 }, { "epoch": 0.7403624382207579, "grad_norm": 0.4991645812988281, "learning_rate": 1.3991605037821255e-05, "loss": 0.5662, "step": 26964 }, { "epoch": 0.7403898956617243, "grad_norm": 0.4520252048969269, "learning_rate": 1.3991209039209052e-05, "loss": 0.5531, "step": 26965 }, { "epoch": 0.7404173531026909, "grad_norm": 0.36058536171913147, "learning_rate": 1.3990813033151969e-05, "loss": 0.4766, "step": 26966 }, { "epoch": 0.7404448105436573, "grad_norm": 0.38229936361312866, "learning_rate": 1.3990417019650747e-05, "loss": 0.521, "step": 26967 }, { "epoch": 0.7404722679846238, "grad_norm": 0.46099817752838135, "learning_rate": 1.3990020998706122e-05, "loss": 0.5066, "step": 26968 }, { "epoch": 0.7404997254255903, "grad_norm": 0.34967130422592163, "learning_rate": 1.3989624970318836e-05, "loss": 0.4916, "step": 26969 }, { "epoch": 0.7405271828665568, "grad_norm": 0.37066882848739624, "learning_rate": 1.3989228934489627e-05, "loss": 0.522, "step": 26970 }, { "epoch": 0.7405546403075234, "grad_norm": 0.3502979874610901, "learning_rate": 1.398883289121923e-05, "loss": 0.4554, "step": 26971 }, { "epoch": 0.7405820977484898, "grad_norm": 0.37085363268852234, "learning_rate": 1.398843684050839e-05, "loss": 0.5997, "step": 26972 }, { "epoch": 0.7406095551894564, "grad_norm": 0.3762287497520447, "learning_rate": 1.3988040782357841e-05, "loss": 0.4961, "step": 26973 }, { "epoch": 0.7406370126304228, "grad_norm": 0.4316796064376831, "learning_rate": 1.3987644716768321e-05, "loss": 0.4849, "step": 26974 }, { "epoch": 0.7406644700713894, "grad_norm": 0.3795869052410126, "learning_rate": 1.3987248643740575e-05, "loss": 0.531, "step": 26975 }, { "epoch": 0.7406919275123558, "grad_norm": 0.45316386222839355, "learning_rate": 1.3986852563275333e-05, "loss": 0.6502, "step": 26976 }, { "epoch": 0.7407193849533223, "grad_norm": 0.40596747398376465, "learning_rate": 1.398645647537334e-05, "loss": 0.4478, "step": 26977 }, { "epoch": 0.7407468423942889, "grad_norm": 0.38838255405426025, "learning_rate": 1.3986060380035335e-05, "loss": 0.4654, "step": 26978 }, { "epoch": 0.7407742998352553, "grad_norm": 0.3709757626056671, "learning_rate": 1.3985664277262055e-05, "loss": 0.5165, "step": 26979 }, { "epoch": 0.7408017572762219, "grad_norm": 0.406777560710907, "learning_rate": 1.3985268167054237e-05, "loss": 0.4421, "step": 26980 }, { "epoch": 0.7408292147171883, "grad_norm": 0.399940550327301, "learning_rate": 1.3984872049412623e-05, "loss": 0.5169, "step": 26981 }, { "epoch": 0.7408566721581549, "grad_norm": 0.4080324172973633, "learning_rate": 1.3984475924337951e-05, "loss": 0.5316, "step": 26982 }, { "epoch": 0.7408841295991213, "grad_norm": 0.36110150814056396, "learning_rate": 1.3984079791830963e-05, "loss": 0.501, "step": 26983 }, { "epoch": 0.7409115870400879, "grad_norm": 0.34707656502723694, "learning_rate": 1.3983683651892389e-05, "loss": 0.5373, "step": 26984 }, { "epoch": 0.7409390444810544, "grad_norm": 0.38079214096069336, "learning_rate": 1.3983287504522976e-05, "loss": 0.5472, "step": 26985 }, { "epoch": 0.7409665019220208, "grad_norm": 0.37639397382736206, "learning_rate": 1.398289134972346e-05, "loss": 0.4688, "step": 26986 }, { "epoch": 0.7409939593629874, "grad_norm": 0.4476366341114044, "learning_rate": 1.398249518749458e-05, "loss": 0.5715, "step": 26987 }, { "epoch": 0.7410214168039538, "grad_norm": 0.37277206778526306, "learning_rate": 1.3982099017837079e-05, "loss": 0.5241, "step": 26988 }, { "epoch": 0.7410488742449204, "grad_norm": 0.4106844663619995, "learning_rate": 1.3981702840751687e-05, "loss": 0.5212, "step": 26989 }, { "epoch": 0.7410763316858868, "grad_norm": 0.5333192348480225, "learning_rate": 1.3981306656239153e-05, "loss": 0.6125, "step": 26990 }, { "epoch": 0.7411037891268534, "grad_norm": 0.4036438465118408, "learning_rate": 1.3980910464300208e-05, "loss": 0.5118, "step": 26991 }, { "epoch": 0.7411312465678199, "grad_norm": 0.38734087347984314, "learning_rate": 1.3980514264935596e-05, "loss": 0.5278, "step": 26992 }, { "epoch": 0.7411587040087864, "grad_norm": 0.3706018030643463, "learning_rate": 1.3980118058146055e-05, "loss": 0.453, "step": 26993 }, { "epoch": 0.7411861614497529, "grad_norm": 0.3623046278953552, "learning_rate": 1.397972184393232e-05, "loss": 0.4827, "step": 26994 }, { "epoch": 0.7412136188907194, "grad_norm": 0.39502736926078796, "learning_rate": 1.3979325622295139e-05, "loss": 0.5576, "step": 26995 }, { "epoch": 0.7412410763316859, "grad_norm": 0.3824015259742737, "learning_rate": 1.3978929393235241e-05, "loss": 0.6003, "step": 26996 }, { "epoch": 0.7412685337726523, "grad_norm": 0.40936264395713806, "learning_rate": 1.3978533156753373e-05, "loss": 0.5178, "step": 26997 }, { "epoch": 0.7412959912136189, "grad_norm": 0.39788392186164856, "learning_rate": 1.3978136912850271e-05, "loss": 0.5091, "step": 26998 }, { "epoch": 0.7413234486545854, "grad_norm": 0.4086301624774933, "learning_rate": 1.3977740661526672e-05, "loss": 0.4624, "step": 26999 }, { "epoch": 0.7413509060955519, "grad_norm": 0.38847866654396057, "learning_rate": 1.3977344402783317e-05, "loss": 0.4791, "step": 27000 }, { "epoch": 0.7413783635365184, "grad_norm": 0.36396417021751404, "learning_rate": 1.3976948136620947e-05, "loss": 0.4861, "step": 27001 }, { "epoch": 0.7414058209774849, "grad_norm": 0.4006044566631317, "learning_rate": 1.3976551863040298e-05, "loss": 0.5493, "step": 27002 }, { "epoch": 0.7414332784184514, "grad_norm": 0.4012194573879242, "learning_rate": 1.3976155582042112e-05, "loss": 0.502, "step": 27003 }, { "epoch": 0.7414607358594179, "grad_norm": 0.6170787811279297, "learning_rate": 1.3975759293627124e-05, "loss": 0.3974, "step": 27004 }, { "epoch": 0.7414881933003844, "grad_norm": 0.4621216952800751, "learning_rate": 1.397536299779608e-05, "loss": 0.5754, "step": 27005 }, { "epoch": 0.741515650741351, "grad_norm": 0.3639104664325714, "learning_rate": 1.397496669454971e-05, "loss": 0.5249, "step": 27006 }, { "epoch": 0.7415431081823174, "grad_norm": 0.40076789259910583, "learning_rate": 1.3974570383888764e-05, "loss": 0.4694, "step": 27007 }, { "epoch": 0.7415705656232839, "grad_norm": 0.3661685287952423, "learning_rate": 1.3974174065813975e-05, "loss": 0.4826, "step": 27008 }, { "epoch": 0.7415980230642504, "grad_norm": 0.3940727114677429, "learning_rate": 1.3973777740326078e-05, "loss": 0.5932, "step": 27009 }, { "epoch": 0.7416254805052169, "grad_norm": 0.5018472075462341, "learning_rate": 1.3973381407425821e-05, "loss": 0.5114, "step": 27010 }, { "epoch": 0.7416529379461834, "grad_norm": 0.7577658891677856, "learning_rate": 1.3972985067113938e-05, "loss": 0.4143, "step": 27011 }, { "epoch": 0.7416803953871499, "grad_norm": 0.34429749846458435, "learning_rate": 1.3972588719391172e-05, "loss": 0.5219, "step": 27012 }, { "epoch": 0.7417078528281165, "grad_norm": 0.37247148156166077, "learning_rate": 1.3972192364258258e-05, "loss": 0.5266, "step": 27013 }, { "epoch": 0.7417353102690829, "grad_norm": 0.3828836977481842, "learning_rate": 1.3971796001715938e-05, "loss": 0.4829, "step": 27014 }, { "epoch": 0.7417627677100495, "grad_norm": 0.38407185673713684, "learning_rate": 1.3971399631764952e-05, "loss": 0.511, "step": 27015 }, { "epoch": 0.7417902251510159, "grad_norm": 0.3723770081996918, "learning_rate": 1.3971003254406038e-05, "loss": 0.4689, "step": 27016 }, { "epoch": 0.7418176825919824, "grad_norm": 0.34597229957580566, "learning_rate": 1.3970606869639936e-05, "loss": 0.4586, "step": 27017 }, { "epoch": 0.7418451400329489, "grad_norm": 0.3791463375091553, "learning_rate": 1.3970210477467381e-05, "loss": 0.5616, "step": 27018 }, { "epoch": 0.7418725974739154, "grad_norm": 0.5693525075912476, "learning_rate": 1.3969814077889121e-05, "loss": 0.5547, "step": 27019 }, { "epoch": 0.741900054914882, "grad_norm": 0.6281541585922241, "learning_rate": 1.3969417670905889e-05, "loss": 0.5113, "step": 27020 }, { "epoch": 0.7419275123558484, "grad_norm": 0.3776805102825165, "learning_rate": 1.3969021256518424e-05, "loss": 0.5099, "step": 27021 }, { "epoch": 0.741954969796815, "grad_norm": 0.39486807584762573, "learning_rate": 1.396862483472747e-05, "loss": 0.5539, "step": 27022 }, { "epoch": 0.7419824272377814, "grad_norm": 0.40166792273521423, "learning_rate": 1.3968228405533764e-05, "loss": 0.5669, "step": 27023 }, { "epoch": 0.742009884678748, "grad_norm": 0.3637751340866089, "learning_rate": 1.3967831968938046e-05, "loss": 0.4277, "step": 27024 }, { "epoch": 0.7420373421197144, "grad_norm": 0.3840588629245758, "learning_rate": 1.3967435524941056e-05, "loss": 0.5955, "step": 27025 }, { "epoch": 0.7420647995606809, "grad_norm": 0.4896114766597748, "learning_rate": 1.3967039073543528e-05, "loss": 0.5564, "step": 27026 }, { "epoch": 0.7420922570016475, "grad_norm": 0.4592808485031128, "learning_rate": 1.3966642614746209e-05, "loss": 0.5053, "step": 27027 }, { "epoch": 0.7421197144426139, "grad_norm": 0.380420982837677, "learning_rate": 1.3966246148549834e-05, "loss": 0.5416, "step": 27028 }, { "epoch": 0.7421471718835805, "grad_norm": 0.4022286534309387, "learning_rate": 1.3965849674955148e-05, "loss": 0.4983, "step": 27029 }, { "epoch": 0.7421746293245469, "grad_norm": 0.40779808163642883, "learning_rate": 1.3965453193962881e-05, "loss": 0.5306, "step": 27030 }, { "epoch": 0.7422020867655135, "grad_norm": 0.3884509801864624, "learning_rate": 1.396505670557378e-05, "loss": 0.3986, "step": 27031 }, { "epoch": 0.7422295442064799, "grad_norm": 0.3529687225818634, "learning_rate": 1.3964660209788585e-05, "loss": 0.4739, "step": 27032 }, { "epoch": 0.7422570016474465, "grad_norm": 0.3728959262371063, "learning_rate": 1.3964263706608032e-05, "loss": 0.4908, "step": 27033 }, { "epoch": 0.742284459088413, "grad_norm": 0.3877026438713074, "learning_rate": 1.3963867196032863e-05, "loss": 0.4273, "step": 27034 }, { "epoch": 0.7423119165293794, "grad_norm": 0.365773469209671, "learning_rate": 1.3963470678063815e-05, "loss": 0.5108, "step": 27035 }, { "epoch": 0.742339373970346, "grad_norm": 0.4517700970172882, "learning_rate": 1.396307415270163e-05, "loss": 0.6046, "step": 27036 }, { "epoch": 0.7423668314113124, "grad_norm": 0.3713710308074951, "learning_rate": 1.3962677619947048e-05, "loss": 0.5133, "step": 27037 }, { "epoch": 0.742394288852279, "grad_norm": 0.4072043001651764, "learning_rate": 1.3962281079800803e-05, "loss": 0.5265, "step": 27038 }, { "epoch": 0.7424217462932454, "grad_norm": 0.41244199872016907, "learning_rate": 1.3961884532263646e-05, "loss": 0.4787, "step": 27039 }, { "epoch": 0.742449203734212, "grad_norm": 0.3624032735824585, "learning_rate": 1.3961487977336306e-05, "loss": 0.4867, "step": 27040 }, { "epoch": 0.7424766611751785, "grad_norm": 0.34586301445961, "learning_rate": 1.3961091415019525e-05, "loss": 0.5436, "step": 27041 }, { "epoch": 0.742504118616145, "grad_norm": 0.41804537177085876, "learning_rate": 1.3960694845314047e-05, "loss": 0.5208, "step": 27042 }, { "epoch": 0.7425315760571115, "grad_norm": 0.4163196086883545, "learning_rate": 1.3960298268220606e-05, "loss": 0.5266, "step": 27043 }, { "epoch": 0.742559033498078, "grad_norm": 0.38681161403656006, "learning_rate": 1.3959901683739946e-05, "loss": 0.4889, "step": 27044 }, { "epoch": 0.7425864909390445, "grad_norm": 0.4204488694667816, "learning_rate": 1.395950509187281e-05, "loss": 0.5392, "step": 27045 }, { "epoch": 0.7426139483800109, "grad_norm": 0.34840127825737, "learning_rate": 1.3959108492619925e-05, "loss": 0.4505, "step": 27046 }, { "epoch": 0.7426414058209775, "grad_norm": 0.3483383357524872, "learning_rate": 1.3958711885982045e-05, "loss": 0.4448, "step": 27047 }, { "epoch": 0.742668863261944, "grad_norm": 0.4208407998085022, "learning_rate": 1.3958315271959903e-05, "loss": 0.5637, "step": 27048 }, { "epoch": 0.7426963207029105, "grad_norm": 0.445615291595459, "learning_rate": 1.395791865055424e-05, "loss": 0.553, "step": 27049 }, { "epoch": 0.742723778143877, "grad_norm": 0.45298632979393005, "learning_rate": 1.3957522021765794e-05, "loss": 0.5358, "step": 27050 }, { "epoch": 0.7427512355848435, "grad_norm": 0.3946841061115265, "learning_rate": 1.3957125385595307e-05, "loss": 0.4639, "step": 27051 }, { "epoch": 0.74277869302581, "grad_norm": 0.37979593873023987, "learning_rate": 1.395672874204352e-05, "loss": 0.5293, "step": 27052 }, { "epoch": 0.7428061504667764, "grad_norm": 0.413076788187027, "learning_rate": 1.3956332091111168e-05, "loss": 0.4377, "step": 27053 }, { "epoch": 0.742833607907743, "grad_norm": 0.36655354499816895, "learning_rate": 1.3955935432798995e-05, "loss": 0.5263, "step": 27054 }, { "epoch": 0.7428610653487095, "grad_norm": 0.39434880018234253, "learning_rate": 1.395553876710774e-05, "loss": 0.533, "step": 27055 }, { "epoch": 0.742888522789676, "grad_norm": 0.38567689061164856, "learning_rate": 1.3955142094038141e-05, "loss": 0.533, "step": 27056 }, { "epoch": 0.7429159802306425, "grad_norm": 0.5730993151664734, "learning_rate": 1.3954745413590942e-05, "loss": 0.4025, "step": 27057 }, { "epoch": 0.742943437671609, "grad_norm": 0.3477962017059326, "learning_rate": 1.3954348725766882e-05, "loss": 0.4427, "step": 27058 }, { "epoch": 0.7429708951125755, "grad_norm": 0.39207765460014343, "learning_rate": 1.3953952030566696e-05, "loss": 0.4823, "step": 27059 }, { "epoch": 0.742998352553542, "grad_norm": 0.4089158773422241, "learning_rate": 1.3953555327991128e-05, "loss": 0.505, "step": 27060 }, { "epoch": 0.7430258099945085, "grad_norm": 0.6506774425506592, "learning_rate": 1.3953158618040918e-05, "loss": 0.4924, "step": 27061 }, { "epoch": 0.7430532674354751, "grad_norm": 0.40461763739585876, "learning_rate": 1.3952761900716807e-05, "loss": 0.5104, "step": 27062 }, { "epoch": 0.7430807248764415, "grad_norm": 0.40663713216781616, "learning_rate": 1.3952365176019533e-05, "loss": 0.5684, "step": 27063 }, { "epoch": 0.743108182317408, "grad_norm": 0.4110662341117859, "learning_rate": 1.3951968443949834e-05, "loss": 0.501, "step": 27064 }, { "epoch": 0.7431356397583745, "grad_norm": 0.43960919976234436, "learning_rate": 1.3951571704508457e-05, "loss": 0.6208, "step": 27065 }, { "epoch": 0.743163097199341, "grad_norm": 0.41747182607650757, "learning_rate": 1.3951174957696135e-05, "loss": 0.4687, "step": 27066 }, { "epoch": 0.7431905546403075, "grad_norm": 0.3488908112049103, "learning_rate": 1.3950778203513608e-05, "loss": 0.4216, "step": 27067 }, { "epoch": 0.743218012081274, "grad_norm": 0.3998335003852844, "learning_rate": 1.3950381441961623e-05, "loss": 0.4383, "step": 27068 }, { "epoch": 0.7432454695222406, "grad_norm": 0.36452800035476685, "learning_rate": 1.3949984673040913e-05, "loss": 0.4885, "step": 27069 }, { "epoch": 0.743272926963207, "grad_norm": 0.40931764245033264, "learning_rate": 1.3949587896752223e-05, "loss": 0.5214, "step": 27070 }, { "epoch": 0.7433003844041736, "grad_norm": 0.36774489283561707, "learning_rate": 1.3949191113096292e-05, "loss": 0.5405, "step": 27071 }, { "epoch": 0.74332784184514, "grad_norm": 0.39834725856781006, "learning_rate": 1.3948794322073856e-05, "loss": 0.4999, "step": 27072 }, { "epoch": 0.7433552992861066, "grad_norm": 0.43503931164741516, "learning_rate": 1.394839752368566e-05, "loss": 0.5113, "step": 27073 }, { "epoch": 0.743382756727073, "grad_norm": 0.40426960587501526, "learning_rate": 1.394800071793244e-05, "loss": 0.4931, "step": 27074 }, { "epoch": 0.7434102141680395, "grad_norm": 0.3584212064743042, "learning_rate": 1.3947603904814941e-05, "loss": 0.424, "step": 27075 }, { "epoch": 0.7434376716090061, "grad_norm": 0.4627923369407654, "learning_rate": 1.3947207084333902e-05, "loss": 0.4763, "step": 27076 }, { "epoch": 0.7434651290499725, "grad_norm": 0.431939959526062, "learning_rate": 1.394681025649006e-05, "loss": 0.4371, "step": 27077 }, { "epoch": 0.7434925864909391, "grad_norm": 0.37147459387779236, "learning_rate": 1.3946413421284157e-05, "loss": 0.4794, "step": 27078 }, { "epoch": 0.7435200439319055, "grad_norm": 0.374906450510025, "learning_rate": 1.3946016578716932e-05, "loss": 0.4879, "step": 27079 }, { "epoch": 0.7435475013728721, "grad_norm": 0.38800692558288574, "learning_rate": 1.3945619728789131e-05, "loss": 0.5119, "step": 27080 }, { "epoch": 0.7435749588138385, "grad_norm": 0.4813782572746277, "learning_rate": 1.3945222871501486e-05, "loss": 0.5977, "step": 27081 }, { "epoch": 0.743602416254805, "grad_norm": 0.3642158508300781, "learning_rate": 1.3944826006854743e-05, "loss": 0.4433, "step": 27082 }, { "epoch": 0.7436298736957716, "grad_norm": 0.3974789083003998, "learning_rate": 1.394442913484964e-05, "loss": 0.529, "step": 27083 }, { "epoch": 0.743657331136738, "grad_norm": 0.37380459904670715, "learning_rate": 1.3944032255486919e-05, "loss": 0.4875, "step": 27084 }, { "epoch": 0.7436847885777046, "grad_norm": 0.3741873502731323, "learning_rate": 1.3943635368767317e-05, "loss": 0.5714, "step": 27085 }, { "epoch": 0.743712246018671, "grad_norm": 0.3588683307170868, "learning_rate": 1.3943238474691578e-05, "loss": 0.4343, "step": 27086 }, { "epoch": 0.7437397034596376, "grad_norm": 0.37900128960609436, "learning_rate": 1.3942841573260439e-05, "loss": 0.4288, "step": 27087 }, { "epoch": 0.743767160900604, "grad_norm": 0.3489559292793274, "learning_rate": 1.3942444664474644e-05, "loss": 0.4004, "step": 27088 }, { "epoch": 0.7437946183415706, "grad_norm": 0.4358506500720978, "learning_rate": 1.3942047748334932e-05, "loss": 0.3661, "step": 27089 }, { "epoch": 0.7438220757825371, "grad_norm": 0.3340131640434265, "learning_rate": 1.3941650824842038e-05, "loss": 0.4497, "step": 27090 }, { "epoch": 0.7438495332235036, "grad_norm": 0.36977794766426086, "learning_rate": 1.394125389399671e-05, "loss": 0.4955, "step": 27091 }, { "epoch": 0.7438769906644701, "grad_norm": 0.45542672276496887, "learning_rate": 1.394085695579969e-05, "loss": 0.4593, "step": 27092 }, { "epoch": 0.7439044481054365, "grad_norm": 0.47883540391921997, "learning_rate": 1.3940460010251708e-05, "loss": 0.5406, "step": 27093 }, { "epoch": 0.7439319055464031, "grad_norm": 0.4001176655292511, "learning_rate": 1.3940063057353515e-05, "loss": 0.5008, "step": 27094 }, { "epoch": 0.7439593629873695, "grad_norm": 0.41670510172843933, "learning_rate": 1.3939666097105843e-05, "loss": 0.5625, "step": 27095 }, { "epoch": 0.7439868204283361, "grad_norm": 0.371585488319397, "learning_rate": 1.3939269129509438e-05, "loss": 0.4543, "step": 27096 }, { "epoch": 0.7440142778693026, "grad_norm": 0.3722008168697357, "learning_rate": 1.3938872154565037e-05, "loss": 0.5741, "step": 27097 }, { "epoch": 0.7440417353102691, "grad_norm": 0.4285443127155304, "learning_rate": 1.3938475172273385e-05, "loss": 0.517, "step": 27098 }, { "epoch": 0.7440691927512356, "grad_norm": 0.38197964429855347, "learning_rate": 1.3938078182635217e-05, "loss": 0.4778, "step": 27099 }, { "epoch": 0.7440966501922021, "grad_norm": 0.35393184423446655, "learning_rate": 1.3937681185651277e-05, "loss": 0.4518, "step": 27100 }, { "epoch": 0.7441241076331686, "grad_norm": 0.4424516558647156, "learning_rate": 1.3937284181322308e-05, "loss": 0.4988, "step": 27101 }, { "epoch": 0.744151565074135, "grad_norm": 0.3476366102695465, "learning_rate": 1.3936887169649046e-05, "loss": 0.4822, "step": 27102 }, { "epoch": 0.7441790225151016, "grad_norm": 0.4049743413925171, "learning_rate": 1.3936490150632232e-05, "loss": 0.4526, "step": 27103 }, { "epoch": 0.7442064799560681, "grad_norm": 0.379192590713501, "learning_rate": 1.3936093124272608e-05, "loss": 0.482, "step": 27104 }, { "epoch": 0.7442339373970346, "grad_norm": 0.41520941257476807, "learning_rate": 1.3935696090570909e-05, "loss": 0.4772, "step": 27105 }, { "epoch": 0.7442613948380011, "grad_norm": 0.41623514890670776, "learning_rate": 1.393529904952789e-05, "loss": 0.4814, "step": 27106 }, { "epoch": 0.7442888522789676, "grad_norm": 0.38903263211250305, "learning_rate": 1.3934902001144277e-05, "loss": 0.5082, "step": 27107 }, { "epoch": 0.7443163097199341, "grad_norm": 0.3993878662586212, "learning_rate": 1.3934504945420814e-05, "loss": 0.5242, "step": 27108 }, { "epoch": 0.7443437671609006, "grad_norm": 0.3858012557029724, "learning_rate": 1.3934107882358245e-05, "loss": 0.4838, "step": 27109 }, { "epoch": 0.7443712246018671, "grad_norm": 0.3855363428592682, "learning_rate": 1.393371081195731e-05, "loss": 0.4414, "step": 27110 }, { "epoch": 0.7443986820428337, "grad_norm": 0.3909245729446411, "learning_rate": 1.393331373421875e-05, "loss": 0.4464, "step": 27111 }, { "epoch": 0.7444261394838001, "grad_norm": 0.4376932382583618, "learning_rate": 1.3932916649143305e-05, "loss": 0.5515, "step": 27112 }, { "epoch": 0.7444535969247666, "grad_norm": 0.5710103511810303, "learning_rate": 1.3932519556731711e-05, "loss": 0.4287, "step": 27113 }, { "epoch": 0.7444810543657331, "grad_norm": 0.3659098744392395, "learning_rate": 1.3932122456984717e-05, "loss": 0.4851, "step": 27114 }, { "epoch": 0.7445085118066996, "grad_norm": 0.523100733757019, "learning_rate": 1.3931725349903059e-05, "loss": 0.4958, "step": 27115 }, { "epoch": 0.7445359692476661, "grad_norm": 0.3838779926300049, "learning_rate": 1.3931328235487475e-05, "loss": 0.479, "step": 27116 }, { "epoch": 0.7445634266886326, "grad_norm": 0.3862573206424713, "learning_rate": 1.3930931113738716e-05, "loss": 0.4632, "step": 27117 }, { "epoch": 0.7445908841295992, "grad_norm": 0.4086238443851471, "learning_rate": 1.3930533984657509e-05, "loss": 0.5311, "step": 27118 }, { "epoch": 0.7446183415705656, "grad_norm": 0.4274047911167145, "learning_rate": 1.3930136848244605e-05, "loss": 0.4777, "step": 27119 }, { "epoch": 0.7446457990115322, "grad_norm": 0.46751153469085693, "learning_rate": 1.392973970450074e-05, "loss": 0.5909, "step": 27120 }, { "epoch": 0.7446732564524986, "grad_norm": 0.3500654995441437, "learning_rate": 1.3929342553426658e-05, "loss": 0.444, "step": 27121 }, { "epoch": 0.7447007138934651, "grad_norm": 0.40573498606681824, "learning_rate": 1.3928945395023096e-05, "loss": 0.508, "step": 27122 }, { "epoch": 0.7447281713344316, "grad_norm": 0.3551639914512634, "learning_rate": 1.3928548229290797e-05, "loss": 0.5032, "step": 27123 }, { "epoch": 0.7447556287753981, "grad_norm": 0.38197723031044006, "learning_rate": 1.3928151056230502e-05, "loss": 0.5257, "step": 27124 }, { "epoch": 0.7447830862163647, "grad_norm": 0.38350850343704224, "learning_rate": 1.3927753875842952e-05, "loss": 0.5198, "step": 27125 }, { "epoch": 0.7448105436573311, "grad_norm": 0.3775874078273773, "learning_rate": 1.3927356688128888e-05, "loss": 0.5118, "step": 27126 }, { "epoch": 0.7448380010982977, "grad_norm": 0.39955732226371765, "learning_rate": 1.392695949308905e-05, "loss": 0.4732, "step": 27127 }, { "epoch": 0.7448654585392641, "grad_norm": 0.3672311007976532, "learning_rate": 1.3926562290724177e-05, "loss": 0.4945, "step": 27128 }, { "epoch": 0.7448929159802307, "grad_norm": 0.3316587209701538, "learning_rate": 1.3926165081035015e-05, "loss": 0.439, "step": 27129 }, { "epoch": 0.7449203734211971, "grad_norm": 0.3736424446105957, "learning_rate": 1.3925767864022302e-05, "loss": 0.453, "step": 27130 }, { "epoch": 0.7449478308621637, "grad_norm": 0.4106175899505615, "learning_rate": 1.3925370639686776e-05, "loss": 0.4689, "step": 27131 }, { "epoch": 0.7449752883031302, "grad_norm": 0.40685001015663147, "learning_rate": 1.3924973408029185e-05, "loss": 0.5874, "step": 27132 }, { "epoch": 0.7450027457440966, "grad_norm": 0.34905654191970825, "learning_rate": 1.3924576169050266e-05, "loss": 0.5361, "step": 27133 }, { "epoch": 0.7450302031850632, "grad_norm": 0.37440919876098633, "learning_rate": 1.3924178922750755e-05, "loss": 0.4592, "step": 27134 }, { "epoch": 0.7450576606260296, "grad_norm": 0.36881229281425476, "learning_rate": 1.3923781669131402e-05, "loss": 0.4391, "step": 27135 }, { "epoch": 0.7450851180669962, "grad_norm": 0.38177669048309326, "learning_rate": 1.3923384408192942e-05, "loss": 0.5695, "step": 27136 }, { "epoch": 0.7451125755079626, "grad_norm": 0.3537244498729706, "learning_rate": 1.3922987139936117e-05, "loss": 0.4993, "step": 27137 }, { "epoch": 0.7451400329489292, "grad_norm": 0.4073982238769531, "learning_rate": 1.392258986436167e-05, "loss": 0.5166, "step": 27138 }, { "epoch": 0.7451674903898957, "grad_norm": 0.4546234607696533, "learning_rate": 1.3922192581470343e-05, "loss": 0.4864, "step": 27139 }, { "epoch": 0.7451949478308622, "grad_norm": 0.36803680658340454, "learning_rate": 1.3921795291262871e-05, "loss": 0.5067, "step": 27140 }, { "epoch": 0.7452224052718287, "grad_norm": 0.410756915807724, "learning_rate": 1.3921397993740003e-05, "loss": 0.5143, "step": 27141 }, { "epoch": 0.7452498627127951, "grad_norm": 0.35265594720840454, "learning_rate": 1.3921000688902475e-05, "loss": 0.4577, "step": 27142 }, { "epoch": 0.7452773201537617, "grad_norm": 0.4021552801132202, "learning_rate": 1.3920603376751029e-05, "loss": 0.5464, "step": 27143 }, { "epoch": 0.7453047775947281, "grad_norm": 0.44017112255096436, "learning_rate": 1.3920206057286404e-05, "loss": 0.5355, "step": 27144 }, { "epoch": 0.7453322350356947, "grad_norm": 0.4080444872379303, "learning_rate": 1.3919808730509346e-05, "loss": 0.496, "step": 27145 }, { "epoch": 0.7453596924766612, "grad_norm": 0.3905821442604065, "learning_rate": 1.3919411396420595e-05, "loss": 0.4314, "step": 27146 }, { "epoch": 0.7453871499176277, "grad_norm": 0.36063605546951294, "learning_rate": 1.3919014055020887e-05, "loss": 0.4612, "step": 27147 }, { "epoch": 0.7454146073585942, "grad_norm": 0.4225355088710785, "learning_rate": 1.391861670631097e-05, "loss": 0.4897, "step": 27148 }, { "epoch": 0.7454420647995607, "grad_norm": 0.39107781648635864, "learning_rate": 1.391821935029158e-05, "loss": 0.5227, "step": 27149 }, { "epoch": 0.7454695222405272, "grad_norm": 0.36338916420936584, "learning_rate": 1.3917821986963463e-05, "loss": 0.4211, "step": 27150 }, { "epoch": 0.7454969796814936, "grad_norm": 0.46212705969810486, "learning_rate": 1.3917424616327358e-05, "loss": 0.5134, "step": 27151 }, { "epoch": 0.7455244371224602, "grad_norm": 0.4817342460155487, "learning_rate": 1.3917027238384002e-05, "loss": 0.4462, "step": 27152 }, { "epoch": 0.7455518945634267, "grad_norm": 0.37376728653907776, "learning_rate": 1.3916629853134143e-05, "loss": 0.4296, "step": 27153 }, { "epoch": 0.7455793520043932, "grad_norm": 0.3929413855075836, "learning_rate": 1.3916232460578515e-05, "loss": 0.5084, "step": 27154 }, { "epoch": 0.7456068094453597, "grad_norm": 0.4164959490299225, "learning_rate": 1.391583506071787e-05, "loss": 0.4763, "step": 27155 }, { "epoch": 0.7456342668863262, "grad_norm": 0.3522765040397644, "learning_rate": 1.391543765355294e-05, "loss": 0.4968, "step": 27156 }, { "epoch": 0.7456617243272927, "grad_norm": 0.417506605386734, "learning_rate": 1.3915040239084468e-05, "loss": 0.5443, "step": 27157 }, { "epoch": 0.7456891817682592, "grad_norm": 0.37671589851379395, "learning_rate": 1.3914642817313198e-05, "loss": 0.5092, "step": 27158 }, { "epoch": 0.7457166392092257, "grad_norm": 0.4278164207935333, "learning_rate": 1.3914245388239867e-05, "loss": 0.5458, "step": 27159 }, { "epoch": 0.7457440966501923, "grad_norm": 0.38732007145881653, "learning_rate": 1.3913847951865222e-05, "loss": 0.5723, "step": 27160 }, { "epoch": 0.7457715540911587, "grad_norm": 0.4173907935619354, "learning_rate": 1.391345050819e-05, "loss": 0.5911, "step": 27161 }, { "epoch": 0.7457990115321252, "grad_norm": 0.3393164277076721, "learning_rate": 1.3913053057214945e-05, "loss": 0.5143, "step": 27162 }, { "epoch": 0.7458264689730917, "grad_norm": 0.4084787964820862, "learning_rate": 1.3912655598940797e-05, "loss": 0.5384, "step": 27163 }, { "epoch": 0.7458539264140582, "grad_norm": 0.35454392433166504, "learning_rate": 1.3912258133368298e-05, "loss": 0.4794, "step": 27164 }, { "epoch": 0.7458813838550247, "grad_norm": 0.37580403685569763, "learning_rate": 1.3911860660498186e-05, "loss": 0.4753, "step": 27165 }, { "epoch": 0.7459088412959912, "grad_norm": 0.3833920955657959, "learning_rate": 1.3911463180331208e-05, "loss": 0.5385, "step": 27166 }, { "epoch": 0.7459362987369578, "grad_norm": 0.47670644521713257, "learning_rate": 1.39110656928681e-05, "loss": 0.4627, "step": 27167 }, { "epoch": 0.7459637561779242, "grad_norm": 0.40480056405067444, "learning_rate": 1.391066819810961e-05, "loss": 0.4288, "step": 27168 }, { "epoch": 0.7459912136188908, "grad_norm": 0.3918590843677521, "learning_rate": 1.3910270696056474e-05, "loss": 0.5279, "step": 27169 }, { "epoch": 0.7460186710598572, "grad_norm": 0.3651442229747772, "learning_rate": 1.3909873186709435e-05, "loss": 0.5582, "step": 27170 }, { "epoch": 0.7460461285008237, "grad_norm": 0.3872045874595642, "learning_rate": 1.3909475670069234e-05, "loss": 0.4849, "step": 27171 }, { "epoch": 0.7460735859417902, "grad_norm": 0.38970157504081726, "learning_rate": 1.3909078146136613e-05, "loss": 0.5024, "step": 27172 }, { "epoch": 0.7461010433827567, "grad_norm": 0.4299333989620209, "learning_rate": 1.3908680614912316e-05, "loss": 0.4279, "step": 27173 }, { "epoch": 0.7461285008237233, "grad_norm": 0.3810559809207916, "learning_rate": 1.390828307639708e-05, "loss": 0.5369, "step": 27174 }, { "epoch": 0.7461559582646897, "grad_norm": 0.4064956605434418, "learning_rate": 1.3907885530591647e-05, "loss": 0.4505, "step": 27175 }, { "epoch": 0.7461834157056563, "grad_norm": 0.4144935607910156, "learning_rate": 1.3907487977496765e-05, "loss": 0.4898, "step": 27176 }, { "epoch": 0.7462108731466227, "grad_norm": 0.8397083282470703, "learning_rate": 1.3907090417113164e-05, "loss": 0.4445, "step": 27177 }, { "epoch": 0.7462383305875893, "grad_norm": 0.47813528776168823, "learning_rate": 1.3906692849441599e-05, "loss": 0.4286, "step": 27178 }, { "epoch": 0.7462657880285557, "grad_norm": 0.39944225549697876, "learning_rate": 1.3906295274482804e-05, "loss": 0.4939, "step": 27179 }, { "epoch": 0.7462932454695222, "grad_norm": 0.3795377314090729, "learning_rate": 1.3905897692237517e-05, "loss": 0.5604, "step": 27180 }, { "epoch": 0.7463207029104888, "grad_norm": 0.4075329899787903, "learning_rate": 1.3905500102706491e-05, "loss": 0.4266, "step": 27181 }, { "epoch": 0.7463481603514552, "grad_norm": 0.3287191092967987, "learning_rate": 1.3905102505890455e-05, "loss": 0.4576, "step": 27182 }, { "epoch": 0.7463756177924218, "grad_norm": 0.46525388956069946, "learning_rate": 1.3904704901790158e-05, "loss": 0.5392, "step": 27183 }, { "epoch": 0.7464030752333882, "grad_norm": 0.3898729979991913, "learning_rate": 1.3904307290406343e-05, "loss": 0.525, "step": 27184 }, { "epoch": 0.7464305326743548, "grad_norm": 0.45892661809921265, "learning_rate": 1.3903909671739744e-05, "loss": 0.4987, "step": 27185 }, { "epoch": 0.7464579901153212, "grad_norm": 0.4826240837574005, "learning_rate": 1.390351204579111e-05, "loss": 0.5486, "step": 27186 }, { "epoch": 0.7464854475562878, "grad_norm": 0.38185274600982666, "learning_rate": 1.390311441256118e-05, "loss": 0.4884, "step": 27187 }, { "epoch": 0.7465129049972542, "grad_norm": 0.6604644060134888, "learning_rate": 1.3902716772050698e-05, "loss": 0.4779, "step": 27188 }, { "epoch": 0.7465403624382208, "grad_norm": 0.4946640133857727, "learning_rate": 1.3902319124260402e-05, "loss": 0.5538, "step": 27189 }, { "epoch": 0.7465678198791873, "grad_norm": 0.4191156029701233, "learning_rate": 1.3901921469191034e-05, "loss": 0.4597, "step": 27190 }, { "epoch": 0.7465952773201537, "grad_norm": 0.42399370670318604, "learning_rate": 1.3901523806843338e-05, "loss": 0.4322, "step": 27191 }, { "epoch": 0.7466227347611203, "grad_norm": 0.3724319636821747, "learning_rate": 1.3901126137218053e-05, "loss": 0.582, "step": 27192 }, { "epoch": 0.7466501922020867, "grad_norm": 0.4776276648044586, "learning_rate": 1.3900728460315927e-05, "loss": 0.4866, "step": 27193 }, { "epoch": 0.7466776496430533, "grad_norm": 0.3981255888938904, "learning_rate": 1.3900330776137694e-05, "loss": 0.5099, "step": 27194 }, { "epoch": 0.7467051070840197, "grad_norm": 0.3810243010520935, "learning_rate": 1.38999330846841e-05, "loss": 0.5041, "step": 27195 }, { "epoch": 0.7467325645249863, "grad_norm": 0.33832311630249023, "learning_rate": 1.3899535385955887e-05, "loss": 0.4311, "step": 27196 }, { "epoch": 0.7467600219659528, "grad_norm": 0.3368035554885864, "learning_rate": 1.3899137679953794e-05, "loss": 0.4817, "step": 27197 }, { "epoch": 0.7467874794069193, "grad_norm": 0.3735591173171997, "learning_rate": 1.3898739966678567e-05, "loss": 0.4288, "step": 27198 }, { "epoch": 0.7468149368478858, "grad_norm": 0.35658788681030273, "learning_rate": 1.3898342246130944e-05, "loss": 0.4453, "step": 27199 }, { "epoch": 0.7468423942888522, "grad_norm": 0.5987304449081421, "learning_rate": 1.389794451831167e-05, "loss": 0.584, "step": 27200 }, { "epoch": 0.7468698517298188, "grad_norm": 0.47466161847114563, "learning_rate": 1.3897546783221484e-05, "loss": 0.4885, "step": 27201 }, { "epoch": 0.7468973091707852, "grad_norm": 0.4351346492767334, "learning_rate": 1.389714904086113e-05, "loss": 0.5053, "step": 27202 }, { "epoch": 0.7469247666117518, "grad_norm": 0.37689408659935, "learning_rate": 1.389675129123135e-05, "loss": 0.4893, "step": 27203 }, { "epoch": 0.7469522240527183, "grad_norm": 0.3708382844924927, "learning_rate": 1.3896353534332882e-05, "loss": 0.5026, "step": 27204 }, { "epoch": 0.7469796814936848, "grad_norm": 0.3707188069820404, "learning_rate": 1.3895955770166477e-05, "loss": 0.4612, "step": 27205 }, { "epoch": 0.7470071389346513, "grad_norm": 0.3584029972553253, "learning_rate": 1.3895557998732867e-05, "loss": 0.4551, "step": 27206 }, { "epoch": 0.7470345963756178, "grad_norm": 0.40067678689956665, "learning_rate": 1.3895160220032798e-05, "loss": 0.5111, "step": 27207 }, { "epoch": 0.7470620538165843, "grad_norm": 0.358562707901001, "learning_rate": 1.3894762434067013e-05, "loss": 0.4244, "step": 27208 }, { "epoch": 0.7470895112575507, "grad_norm": 0.38155868649482727, "learning_rate": 1.3894364640836251e-05, "loss": 0.5691, "step": 27209 }, { "epoch": 0.7471169686985173, "grad_norm": 0.3841138780117035, "learning_rate": 1.3893966840341258e-05, "loss": 0.482, "step": 27210 }, { "epoch": 0.7471444261394838, "grad_norm": 0.3982864022254944, "learning_rate": 1.3893569032582773e-05, "loss": 0.4275, "step": 27211 }, { "epoch": 0.7471718835804503, "grad_norm": 0.3701251447200775, "learning_rate": 1.3893171217561539e-05, "loss": 0.4292, "step": 27212 }, { "epoch": 0.7471993410214168, "grad_norm": 0.37015044689178467, "learning_rate": 1.3892773395278299e-05, "loss": 0.5336, "step": 27213 }, { "epoch": 0.7472267984623833, "grad_norm": 0.39199334383010864, "learning_rate": 1.3892375565733793e-05, "loss": 0.4395, "step": 27214 }, { "epoch": 0.7472542559033498, "grad_norm": 0.3883916437625885, "learning_rate": 1.3891977728928767e-05, "loss": 0.4065, "step": 27215 }, { "epoch": 0.7472817133443163, "grad_norm": 0.3810133635997772, "learning_rate": 1.3891579884863958e-05, "loss": 0.4793, "step": 27216 }, { "epoch": 0.7473091707852828, "grad_norm": 0.3558897376060486, "learning_rate": 1.3891182033540108e-05, "loss": 0.4632, "step": 27217 }, { "epoch": 0.7473366282262494, "grad_norm": 0.39683258533477783, "learning_rate": 1.3890784174957968e-05, "loss": 0.4542, "step": 27218 }, { "epoch": 0.7473640856672158, "grad_norm": 0.37866291403770447, "learning_rate": 1.3890386309118269e-05, "loss": 0.6002, "step": 27219 }, { "epoch": 0.7473915431081823, "grad_norm": 0.3932018578052521, "learning_rate": 1.388998843602176e-05, "loss": 0.5608, "step": 27220 }, { "epoch": 0.7474190005491488, "grad_norm": 0.355747252702713, "learning_rate": 1.388959055566918e-05, "loss": 0.4884, "step": 27221 }, { "epoch": 0.7474464579901153, "grad_norm": 0.41602379083633423, "learning_rate": 1.3889192668061271e-05, "loss": 0.4603, "step": 27222 }, { "epoch": 0.7474739154310818, "grad_norm": 0.38681963086128235, "learning_rate": 1.388879477319878e-05, "loss": 0.4662, "step": 27223 }, { "epoch": 0.7475013728720483, "grad_norm": 0.41736915707588196, "learning_rate": 1.3888396871082442e-05, "loss": 0.4665, "step": 27224 }, { "epoch": 0.7475288303130149, "grad_norm": 0.3702283799648285, "learning_rate": 1.3887998961713005e-05, "loss": 0.4888, "step": 27225 }, { "epoch": 0.7475562877539813, "grad_norm": 0.5274899005889893, "learning_rate": 1.3887601045091208e-05, "loss": 0.5581, "step": 27226 }, { "epoch": 0.7475837451949479, "grad_norm": 0.33664631843566895, "learning_rate": 1.3887203121217794e-05, "loss": 0.4297, "step": 27227 }, { "epoch": 0.7476112026359143, "grad_norm": 0.39843234419822693, "learning_rate": 1.3886805190093507e-05, "loss": 0.4826, "step": 27228 }, { "epoch": 0.7476386600768808, "grad_norm": 0.3743366599082947, "learning_rate": 1.3886407251719085e-05, "loss": 0.4856, "step": 27229 }, { "epoch": 0.7476661175178473, "grad_norm": 0.4379758834838867, "learning_rate": 1.3886009306095276e-05, "loss": 0.4451, "step": 27230 }, { "epoch": 0.7476935749588138, "grad_norm": 0.3881988823413849, "learning_rate": 1.3885611353222818e-05, "loss": 0.451, "step": 27231 }, { "epoch": 0.7477210323997804, "grad_norm": 0.3969597816467285, "learning_rate": 1.3885213393102454e-05, "loss": 0.5273, "step": 27232 }, { "epoch": 0.7477484898407468, "grad_norm": 0.5058870911598206, "learning_rate": 1.388481542573493e-05, "loss": 0.4753, "step": 27233 }, { "epoch": 0.7477759472817134, "grad_norm": 0.40488335490226746, "learning_rate": 1.3884417451120981e-05, "loss": 0.5586, "step": 27234 }, { "epoch": 0.7478034047226798, "grad_norm": 0.3504822552204132, "learning_rate": 1.3884019469261357e-05, "loss": 0.4717, "step": 27235 }, { "epoch": 0.7478308621636464, "grad_norm": 0.35097169876098633, "learning_rate": 1.3883621480156797e-05, "loss": 0.4724, "step": 27236 }, { "epoch": 0.7478583196046128, "grad_norm": 0.3842412233352661, "learning_rate": 1.3883223483808039e-05, "loss": 0.4928, "step": 27237 }, { "epoch": 0.7478857770455793, "grad_norm": 0.3985026180744171, "learning_rate": 1.3882825480215836e-05, "loss": 0.4874, "step": 27238 }, { "epoch": 0.7479132344865459, "grad_norm": 0.42950519919395447, "learning_rate": 1.388242746938092e-05, "loss": 0.4894, "step": 27239 }, { "epoch": 0.7479406919275123, "grad_norm": 0.37394410371780396, "learning_rate": 1.388202945130404e-05, "loss": 0.4429, "step": 27240 }, { "epoch": 0.7479681493684789, "grad_norm": 0.3448483943939209, "learning_rate": 1.3881631425985935e-05, "loss": 0.3888, "step": 27241 }, { "epoch": 0.7479956068094453, "grad_norm": 0.38049083948135376, "learning_rate": 1.3881233393427346e-05, "loss": 0.4762, "step": 27242 }, { "epoch": 0.7480230642504119, "grad_norm": 0.7437814474105835, "learning_rate": 1.3880835353629024e-05, "loss": 0.4576, "step": 27243 }, { "epoch": 0.7480505216913783, "grad_norm": 0.42212507128715515, "learning_rate": 1.3880437306591703e-05, "loss": 0.584, "step": 27244 }, { "epoch": 0.7480779791323449, "grad_norm": 0.4118565618991852, "learning_rate": 1.3880039252316127e-05, "loss": 0.5228, "step": 27245 }, { "epoch": 0.7481054365733114, "grad_norm": 0.36588066816329956, "learning_rate": 1.3879641190803039e-05, "loss": 0.4504, "step": 27246 }, { "epoch": 0.7481328940142778, "grad_norm": 0.41977429389953613, "learning_rate": 1.387924312205318e-05, "loss": 0.5453, "step": 27247 }, { "epoch": 0.7481603514552444, "grad_norm": 0.3767732083797455, "learning_rate": 1.38788450460673e-05, "loss": 0.4098, "step": 27248 }, { "epoch": 0.7481878088962108, "grad_norm": 0.3668791651725769, "learning_rate": 1.3878446962846132e-05, "loss": 0.4774, "step": 27249 }, { "epoch": 0.7482152663371774, "grad_norm": 0.3821262717247009, "learning_rate": 1.3878048872390423e-05, "loss": 0.4986, "step": 27250 }, { "epoch": 0.7482427237781438, "grad_norm": 0.40216055512428284, "learning_rate": 1.3877650774700917e-05, "loss": 0.4615, "step": 27251 }, { "epoch": 0.7482701812191104, "grad_norm": 0.33533957600593567, "learning_rate": 1.3877252669778352e-05, "loss": 0.5043, "step": 27252 }, { "epoch": 0.7482976386600769, "grad_norm": 0.36502718925476074, "learning_rate": 1.3876854557623473e-05, "loss": 0.5033, "step": 27253 }, { "epoch": 0.7483250961010434, "grad_norm": 0.590496838092804, "learning_rate": 1.3876456438237027e-05, "loss": 0.5091, "step": 27254 }, { "epoch": 0.7483525535420099, "grad_norm": 0.415359765291214, "learning_rate": 1.387605831161975e-05, "loss": 0.5281, "step": 27255 }, { "epoch": 0.7483800109829764, "grad_norm": 0.391146183013916, "learning_rate": 1.3875660177772385e-05, "loss": 0.4462, "step": 27256 }, { "epoch": 0.7484074684239429, "grad_norm": 0.47389405965805054, "learning_rate": 1.3875262036695683e-05, "loss": 0.5884, "step": 27257 }, { "epoch": 0.7484349258649093, "grad_norm": 0.36258751153945923, "learning_rate": 1.3874863888390373e-05, "loss": 0.3856, "step": 27258 }, { "epoch": 0.7484623833058759, "grad_norm": 0.3667765259742737, "learning_rate": 1.3874465732857209e-05, "loss": 0.4116, "step": 27259 }, { "epoch": 0.7484898407468424, "grad_norm": 0.3515108823776245, "learning_rate": 1.3874067570096929e-05, "loss": 0.4619, "step": 27260 }, { "epoch": 0.7485172981878089, "grad_norm": 0.3529461920261383, "learning_rate": 1.3873669400110278e-05, "loss": 0.469, "step": 27261 }, { "epoch": 0.7485447556287754, "grad_norm": 0.42865675687789917, "learning_rate": 1.3873271222897998e-05, "loss": 0.5093, "step": 27262 }, { "epoch": 0.7485722130697419, "grad_norm": 0.46264100074768066, "learning_rate": 1.3872873038460825e-05, "loss": 0.4724, "step": 27263 }, { "epoch": 0.7485996705107084, "grad_norm": 0.37311965227127075, "learning_rate": 1.3872474846799514e-05, "loss": 0.4561, "step": 27264 }, { "epoch": 0.7486271279516749, "grad_norm": 0.37122562527656555, "learning_rate": 1.3872076647914798e-05, "loss": 0.5166, "step": 27265 }, { "epoch": 0.7486545853926414, "grad_norm": 0.35183191299438477, "learning_rate": 1.3871678441807426e-05, "loss": 0.45, "step": 27266 }, { "epoch": 0.748682042833608, "grad_norm": 0.4477759003639221, "learning_rate": 1.3871280228478136e-05, "loss": 0.4795, "step": 27267 }, { "epoch": 0.7487095002745744, "grad_norm": 0.40349265933036804, "learning_rate": 1.3870882007927673e-05, "loss": 0.5608, "step": 27268 }, { "epoch": 0.7487369577155409, "grad_norm": 0.394853413105011, "learning_rate": 1.3870483780156781e-05, "loss": 0.5217, "step": 27269 }, { "epoch": 0.7487644151565074, "grad_norm": 0.41012370586395264, "learning_rate": 1.38700855451662e-05, "loss": 0.4976, "step": 27270 }, { "epoch": 0.7487918725974739, "grad_norm": 0.43071407079696655, "learning_rate": 1.3869687302956673e-05, "loss": 0.5027, "step": 27271 }, { "epoch": 0.7488193300384404, "grad_norm": 0.3650836944580078, "learning_rate": 1.3869289053528948e-05, "loss": 0.4555, "step": 27272 }, { "epoch": 0.7488467874794069, "grad_norm": 0.3612661361694336, "learning_rate": 1.386889079688376e-05, "loss": 0.4749, "step": 27273 }, { "epoch": 0.7488742449203735, "grad_norm": 0.36449575424194336, "learning_rate": 1.3868492533021858e-05, "loss": 0.4809, "step": 27274 }, { "epoch": 0.7489017023613399, "grad_norm": 0.41477158665657043, "learning_rate": 1.3868094261943982e-05, "loss": 0.469, "step": 27275 }, { "epoch": 0.7489291598023065, "grad_norm": 0.4054095447063446, "learning_rate": 1.3867695983650875e-05, "loss": 0.5849, "step": 27276 }, { "epoch": 0.7489566172432729, "grad_norm": 0.520847737789154, "learning_rate": 1.3867297698143283e-05, "loss": 0.5741, "step": 27277 }, { "epoch": 0.7489840746842394, "grad_norm": 0.40863898396492004, "learning_rate": 1.3866899405421946e-05, "loss": 0.5277, "step": 27278 }, { "epoch": 0.7490115321252059, "grad_norm": 0.4149259626865387, "learning_rate": 1.3866501105487606e-05, "loss": 0.4874, "step": 27279 }, { "epoch": 0.7490389895661724, "grad_norm": 0.42169731855392456, "learning_rate": 1.3866102798341007e-05, "loss": 0.5059, "step": 27280 }, { "epoch": 0.749066447007139, "grad_norm": 0.39926689863204956, "learning_rate": 1.3865704483982894e-05, "loss": 0.4881, "step": 27281 }, { "epoch": 0.7490939044481054, "grad_norm": 0.41997143626213074, "learning_rate": 1.3865306162414008e-05, "loss": 0.5304, "step": 27282 }, { "epoch": 0.749121361889072, "grad_norm": 0.37914493680000305, "learning_rate": 1.3864907833635091e-05, "loss": 0.4346, "step": 27283 }, { "epoch": 0.7491488193300384, "grad_norm": 0.3975631296634674, "learning_rate": 1.3864509497646887e-05, "loss": 0.4935, "step": 27284 }, { "epoch": 0.749176276771005, "grad_norm": 0.39906707406044006, "learning_rate": 1.3864111154450144e-05, "loss": 0.4859, "step": 27285 }, { "epoch": 0.7492037342119714, "grad_norm": 0.36917242407798767, "learning_rate": 1.3863712804045595e-05, "loss": 0.5077, "step": 27286 }, { "epoch": 0.749231191652938, "grad_norm": 0.3992873728275299, "learning_rate": 1.386331444643399e-05, "loss": 0.5824, "step": 27287 }, { "epoch": 0.7492586490939045, "grad_norm": 0.414122074842453, "learning_rate": 1.3862916081616074e-05, "loss": 0.4871, "step": 27288 }, { "epoch": 0.7492861065348709, "grad_norm": 0.3591693937778473, "learning_rate": 1.3862517709592583e-05, "loss": 0.5037, "step": 27289 }, { "epoch": 0.7493135639758375, "grad_norm": 0.3731274902820587, "learning_rate": 1.3862119330364263e-05, "loss": 0.5862, "step": 27290 }, { "epoch": 0.7493410214168039, "grad_norm": 0.4219018816947937, "learning_rate": 1.386172094393186e-05, "loss": 0.4902, "step": 27291 }, { "epoch": 0.7493684788577705, "grad_norm": 0.34861108660697937, "learning_rate": 1.3861322550296112e-05, "loss": 0.5196, "step": 27292 }, { "epoch": 0.7493959362987369, "grad_norm": 0.3414864242076874, "learning_rate": 1.3860924149457771e-05, "loss": 0.4158, "step": 27293 }, { "epoch": 0.7494233937397035, "grad_norm": 0.3952367603778839, "learning_rate": 1.3860525741417568e-05, "loss": 0.4989, "step": 27294 }, { "epoch": 0.74945085118067, "grad_norm": 0.44005030393600464, "learning_rate": 1.3860127326176256e-05, "loss": 0.3815, "step": 27295 }, { "epoch": 0.7494783086216364, "grad_norm": 0.40655073523521423, "learning_rate": 1.385972890373457e-05, "loss": 0.4548, "step": 27296 }, { "epoch": 0.749505766062603, "grad_norm": 0.4122556447982788, "learning_rate": 1.3859330474093265e-05, "loss": 0.5193, "step": 27297 }, { "epoch": 0.7495332235035694, "grad_norm": 0.4157485067844391, "learning_rate": 1.3858932037253073e-05, "loss": 0.5416, "step": 27298 }, { "epoch": 0.749560680944536, "grad_norm": 0.37868767976760864, "learning_rate": 1.3858533593214739e-05, "loss": 0.4202, "step": 27299 }, { "epoch": 0.7495881383855024, "grad_norm": 0.39639246463775635, "learning_rate": 1.385813514197901e-05, "loss": 0.4899, "step": 27300 }, { "epoch": 0.749615595826469, "grad_norm": 0.3652653694152832, "learning_rate": 1.385773668354663e-05, "loss": 0.515, "step": 27301 }, { "epoch": 0.7496430532674355, "grad_norm": 0.3428898751735687, "learning_rate": 1.3857338217918334e-05, "loss": 0.4462, "step": 27302 }, { "epoch": 0.749670510708402, "grad_norm": 0.35772785544395447, "learning_rate": 1.3856939745094878e-05, "loss": 0.474, "step": 27303 }, { "epoch": 0.7496979681493685, "grad_norm": 0.48533573746681213, "learning_rate": 1.3856541265076993e-05, "loss": 0.512, "step": 27304 }, { "epoch": 0.749725425590335, "grad_norm": 0.442781537771225, "learning_rate": 1.385614277786543e-05, "loss": 0.4735, "step": 27305 }, { "epoch": 0.7497528830313015, "grad_norm": 0.38486069440841675, "learning_rate": 1.3855744283460932e-05, "loss": 0.4823, "step": 27306 }, { "epoch": 0.7497803404722679, "grad_norm": 0.45295071601867676, "learning_rate": 1.3855345781864235e-05, "loss": 0.5847, "step": 27307 }, { "epoch": 0.7498077979132345, "grad_norm": 0.39545738697052, "learning_rate": 1.3854947273076094e-05, "loss": 0.4664, "step": 27308 }, { "epoch": 0.749835255354201, "grad_norm": 0.3854566514492035, "learning_rate": 1.3854548757097241e-05, "loss": 0.4586, "step": 27309 }, { "epoch": 0.7498627127951675, "grad_norm": 0.41221392154693604, "learning_rate": 1.3854150233928427e-05, "loss": 0.5205, "step": 27310 }, { "epoch": 0.749890170236134, "grad_norm": 0.36695021390914917, "learning_rate": 1.3853751703570393e-05, "loss": 0.5047, "step": 27311 }, { "epoch": 0.7499176276771005, "grad_norm": 0.37965846061706543, "learning_rate": 1.385335316602388e-05, "loss": 0.4602, "step": 27312 }, { "epoch": 0.749945085118067, "grad_norm": 0.3498547375202179, "learning_rate": 1.3852954621289634e-05, "loss": 0.4908, "step": 27313 }, { "epoch": 0.7499725425590335, "grad_norm": 0.438501238822937, "learning_rate": 1.38525560693684e-05, "loss": 0.446, "step": 27314 }, { "epoch": 0.75, "grad_norm": 0.40804430842399597, "learning_rate": 1.3852157510260918e-05, "loss": 0.5047, "step": 27315 }, { "epoch": 0.7500274574409665, "grad_norm": 0.3886106312274933, "learning_rate": 1.3851758943967932e-05, "loss": 0.4864, "step": 27316 }, { "epoch": 0.750054914881933, "grad_norm": 0.3662477135658264, "learning_rate": 1.3851360370490185e-05, "loss": 0.4422, "step": 27317 }, { "epoch": 0.7500823723228995, "grad_norm": 0.37884294986724854, "learning_rate": 1.3850961789828423e-05, "loss": 0.3827, "step": 27318 }, { "epoch": 0.750109829763866, "grad_norm": 0.5056405067443848, "learning_rate": 1.3850563201983392e-05, "loss": 0.6113, "step": 27319 }, { "epoch": 0.7501372872048325, "grad_norm": 0.34513652324676514, "learning_rate": 1.3850164606955826e-05, "loss": 0.4479, "step": 27320 }, { "epoch": 0.750164744645799, "grad_norm": 0.4008210599422455, "learning_rate": 1.3849766004746477e-05, "loss": 0.5378, "step": 27321 }, { "epoch": 0.7501922020867655, "grad_norm": 0.386120468378067, "learning_rate": 1.3849367395356084e-05, "loss": 0.4754, "step": 27322 }, { "epoch": 0.7502196595277321, "grad_norm": 0.3881092071533203, "learning_rate": 1.3848968778785395e-05, "loss": 0.5055, "step": 27323 }, { "epoch": 0.7502471169686985, "grad_norm": 0.3796946108341217, "learning_rate": 1.3848570155035149e-05, "loss": 0.5257, "step": 27324 }, { "epoch": 0.750274574409665, "grad_norm": 0.38822445273399353, "learning_rate": 1.3848171524106091e-05, "loss": 0.485, "step": 27325 }, { "epoch": 0.7503020318506315, "grad_norm": 0.5708234906196594, "learning_rate": 1.3847772885998967e-05, "loss": 0.4515, "step": 27326 }, { "epoch": 0.750329489291598, "grad_norm": 0.34968551993370056, "learning_rate": 1.3847374240714515e-05, "loss": 0.4944, "step": 27327 }, { "epoch": 0.7503569467325645, "grad_norm": 0.3837652802467346, "learning_rate": 1.3846975588253484e-05, "loss": 0.5144, "step": 27328 }, { "epoch": 0.750384404173531, "grad_norm": 0.4440564513206482, "learning_rate": 1.3846576928616618e-05, "loss": 0.5365, "step": 27329 }, { "epoch": 0.7504118616144976, "grad_norm": 0.38832637667655945, "learning_rate": 1.3846178261804655e-05, "loss": 0.5262, "step": 27330 }, { "epoch": 0.750439319055464, "grad_norm": 0.3699714243412018, "learning_rate": 1.3845779587818342e-05, "loss": 0.4796, "step": 27331 }, { "epoch": 0.7504667764964306, "grad_norm": 0.394499272108078, "learning_rate": 1.3845380906658425e-05, "loss": 0.542, "step": 27332 }, { "epoch": 0.750494233937397, "grad_norm": 0.4773648679256439, "learning_rate": 1.3844982218325642e-05, "loss": 0.4834, "step": 27333 }, { "epoch": 0.7505216913783636, "grad_norm": 0.42648574709892273, "learning_rate": 1.3844583522820742e-05, "loss": 0.4863, "step": 27334 }, { "epoch": 0.75054914881933, "grad_norm": 0.47004643082618713, "learning_rate": 1.3844184820144466e-05, "loss": 0.5189, "step": 27335 }, { "epoch": 0.7505766062602965, "grad_norm": 0.40205755829811096, "learning_rate": 1.3843786110297562e-05, "loss": 0.5569, "step": 27336 }, { "epoch": 0.7506040637012631, "grad_norm": 0.49081623554229736, "learning_rate": 1.3843387393280767e-05, "loss": 0.5178, "step": 27337 }, { "epoch": 0.7506315211422295, "grad_norm": 0.3894991874694824, "learning_rate": 1.3842988669094828e-05, "loss": 0.4543, "step": 27338 }, { "epoch": 0.7506589785831961, "grad_norm": 0.3913445770740509, "learning_rate": 1.3842589937740488e-05, "loss": 0.5185, "step": 27339 }, { "epoch": 0.7506864360241625, "grad_norm": 0.5093732476234436, "learning_rate": 1.3842191199218491e-05, "loss": 0.4766, "step": 27340 }, { "epoch": 0.7507138934651291, "grad_norm": 0.35144326090812683, "learning_rate": 1.3841792453529582e-05, "loss": 0.4954, "step": 27341 }, { "epoch": 0.7507413509060955, "grad_norm": 0.362045556306839, "learning_rate": 1.3841393700674506e-05, "loss": 0.4763, "step": 27342 }, { "epoch": 0.750768808347062, "grad_norm": 0.37703239917755127, "learning_rate": 1.3840994940654e-05, "loss": 0.4955, "step": 27343 }, { "epoch": 0.7507962657880286, "grad_norm": 0.382886677980423, "learning_rate": 1.3840596173468817e-05, "loss": 0.458, "step": 27344 }, { "epoch": 0.750823723228995, "grad_norm": 0.3518584072589874, "learning_rate": 1.3840197399119692e-05, "loss": 0.4289, "step": 27345 }, { "epoch": 0.7508511806699616, "grad_norm": 0.40720582008361816, "learning_rate": 1.3839798617607379e-05, "loss": 0.511, "step": 27346 }, { "epoch": 0.750878638110928, "grad_norm": 0.36922648549079895, "learning_rate": 1.3839399828932614e-05, "loss": 0.4487, "step": 27347 }, { "epoch": 0.7509060955518946, "grad_norm": 0.7264671921730042, "learning_rate": 1.383900103309614e-05, "loss": 0.4617, "step": 27348 }, { "epoch": 0.750933552992861, "grad_norm": 0.479998916387558, "learning_rate": 1.3838602230098706e-05, "loss": 0.501, "step": 27349 }, { "epoch": 0.7509610104338276, "grad_norm": 0.3706666827201843, "learning_rate": 1.3838203419941055e-05, "loss": 0.4693, "step": 27350 }, { "epoch": 0.7509884678747941, "grad_norm": 0.37931665778160095, "learning_rate": 1.3837804602623926e-05, "loss": 0.4647, "step": 27351 }, { "epoch": 0.7510159253157606, "grad_norm": 0.41075924038887024, "learning_rate": 1.3837405778148067e-05, "loss": 0.5135, "step": 27352 }, { "epoch": 0.7510433827567271, "grad_norm": 0.35232123732566833, "learning_rate": 1.3837006946514224e-05, "loss": 0.4748, "step": 27353 }, { "epoch": 0.7510708401976935, "grad_norm": 0.3913501799106598, "learning_rate": 1.3836608107723137e-05, "loss": 0.5794, "step": 27354 }, { "epoch": 0.7510982976386601, "grad_norm": 0.4091120660305023, "learning_rate": 1.3836209261775552e-05, "loss": 0.5545, "step": 27355 }, { "epoch": 0.7511257550796265, "grad_norm": 0.4256509244441986, "learning_rate": 1.3835810408672212e-05, "loss": 0.532, "step": 27356 }, { "epoch": 0.7511532125205931, "grad_norm": 0.35041505098342896, "learning_rate": 1.3835411548413862e-05, "loss": 0.4737, "step": 27357 }, { "epoch": 0.7511806699615596, "grad_norm": 0.42662370204925537, "learning_rate": 1.3835012681001244e-05, "loss": 0.5414, "step": 27358 }, { "epoch": 0.7512081274025261, "grad_norm": 0.4302554428577423, "learning_rate": 1.3834613806435104e-05, "loss": 0.4963, "step": 27359 }, { "epoch": 0.7512355848434926, "grad_norm": 0.4277259409427643, "learning_rate": 1.3834214924716187e-05, "loss": 0.5325, "step": 27360 }, { "epoch": 0.7512630422844591, "grad_norm": 0.375774621963501, "learning_rate": 1.3833816035845233e-05, "loss": 0.4461, "step": 27361 }, { "epoch": 0.7512904997254256, "grad_norm": 0.36699536442756653, "learning_rate": 1.3833417139822988e-05, "loss": 0.4731, "step": 27362 }, { "epoch": 0.751317957166392, "grad_norm": 0.3365950584411621, "learning_rate": 1.3833018236650198e-05, "loss": 0.4517, "step": 27363 }, { "epoch": 0.7513454146073586, "grad_norm": 0.33247217535972595, "learning_rate": 1.3832619326327606e-05, "loss": 0.4475, "step": 27364 }, { "epoch": 0.7513728720483251, "grad_norm": 0.42874640226364136, "learning_rate": 1.3832220408855957e-05, "loss": 0.4884, "step": 27365 }, { "epoch": 0.7514003294892916, "grad_norm": 0.38884368538856506, "learning_rate": 1.383182148423599e-05, "loss": 0.4474, "step": 27366 }, { "epoch": 0.7514277869302581, "grad_norm": 0.343723326921463, "learning_rate": 1.3831422552468456e-05, "loss": 0.4138, "step": 27367 }, { "epoch": 0.7514552443712246, "grad_norm": 0.41703569889068604, "learning_rate": 1.3831023613554094e-05, "loss": 0.5546, "step": 27368 }, { "epoch": 0.7514827018121911, "grad_norm": 0.4082193970680237, "learning_rate": 1.3830624667493653e-05, "loss": 0.5022, "step": 27369 }, { "epoch": 0.7515101592531576, "grad_norm": 0.4089539349079132, "learning_rate": 1.3830225714287872e-05, "loss": 0.4997, "step": 27370 }, { "epoch": 0.7515376166941241, "grad_norm": 0.33666419982910156, "learning_rate": 1.3829826753937498e-05, "loss": 0.3632, "step": 27371 }, { "epoch": 0.7515650741350907, "grad_norm": 0.44910866022109985, "learning_rate": 1.3829427786443277e-05, "loss": 0.5902, "step": 27372 }, { "epoch": 0.7515925315760571, "grad_norm": 0.4061928391456604, "learning_rate": 1.3829028811805947e-05, "loss": 0.5335, "step": 27373 }, { "epoch": 0.7516199890170236, "grad_norm": 0.4015025794506073, "learning_rate": 1.382862983002626e-05, "loss": 0.4857, "step": 27374 }, { "epoch": 0.7516474464579901, "grad_norm": 0.3873145878314972, "learning_rate": 1.3828230841104953e-05, "loss": 0.5614, "step": 27375 }, { "epoch": 0.7516749038989566, "grad_norm": 0.4389117956161499, "learning_rate": 1.3827831845042775e-05, "loss": 0.4316, "step": 27376 }, { "epoch": 0.7517023613399231, "grad_norm": 0.4204513132572174, "learning_rate": 1.3827432841840471e-05, "loss": 0.5136, "step": 27377 }, { "epoch": 0.7517298187808896, "grad_norm": 0.4550164043903351, "learning_rate": 1.382703383149878e-05, "loss": 0.5062, "step": 27378 }, { "epoch": 0.7517572762218562, "grad_norm": 0.423313170671463, "learning_rate": 1.3826634814018451e-05, "loss": 0.5254, "step": 27379 }, { "epoch": 0.7517847336628226, "grad_norm": 0.3931397795677185, "learning_rate": 1.3826235789400226e-05, "loss": 0.547, "step": 27380 }, { "epoch": 0.7518121911037892, "grad_norm": 0.37100398540496826, "learning_rate": 1.3825836757644853e-05, "loss": 0.4762, "step": 27381 }, { "epoch": 0.7518396485447556, "grad_norm": 0.34981009364128113, "learning_rate": 1.382543771875307e-05, "loss": 0.4222, "step": 27382 }, { "epoch": 0.7518671059857222, "grad_norm": 0.36910736560821533, "learning_rate": 1.3825038672725626e-05, "loss": 0.467, "step": 27383 }, { "epoch": 0.7518945634266886, "grad_norm": 0.44296619296073914, "learning_rate": 1.3824639619563262e-05, "loss": 0.485, "step": 27384 }, { "epoch": 0.7519220208676551, "grad_norm": 0.38606807589530945, "learning_rate": 1.3824240559266726e-05, "loss": 0.4827, "step": 27385 }, { "epoch": 0.7519494783086217, "grad_norm": 0.40094926953315735, "learning_rate": 1.3823841491836762e-05, "loss": 0.5463, "step": 27386 }, { "epoch": 0.7519769357495881, "grad_norm": 0.41017380356788635, "learning_rate": 1.382344241727411e-05, "loss": 0.4977, "step": 27387 }, { "epoch": 0.7520043931905547, "grad_norm": 0.47281554341316223, "learning_rate": 1.3823043335579523e-05, "loss": 0.5698, "step": 27388 }, { "epoch": 0.7520318506315211, "grad_norm": 0.3336227536201477, "learning_rate": 1.3822644246753738e-05, "loss": 0.4565, "step": 27389 }, { "epoch": 0.7520593080724877, "grad_norm": 0.5030280947685242, "learning_rate": 1.3822245150797497e-05, "loss": 0.4736, "step": 27390 }, { "epoch": 0.7520867655134541, "grad_norm": 0.47680026292800903, "learning_rate": 1.3821846047711553e-05, "loss": 0.5012, "step": 27391 }, { "epoch": 0.7521142229544207, "grad_norm": 0.49399617314338684, "learning_rate": 1.3821446937496646e-05, "loss": 0.6456, "step": 27392 }, { "epoch": 0.7521416803953872, "grad_norm": 0.38945528864860535, "learning_rate": 1.3821047820153521e-05, "loss": 0.4638, "step": 27393 }, { "epoch": 0.7521691378363536, "grad_norm": 0.36322858929634094, "learning_rate": 1.3820648695682924e-05, "loss": 0.5058, "step": 27394 }, { "epoch": 0.7521965952773202, "grad_norm": 0.5027502775192261, "learning_rate": 1.3820249564085592e-05, "loss": 0.5272, "step": 27395 }, { "epoch": 0.7522240527182866, "grad_norm": 0.3763788342475891, "learning_rate": 1.381985042536228e-05, "loss": 0.4819, "step": 27396 }, { "epoch": 0.7522515101592532, "grad_norm": 0.4000832736492157, "learning_rate": 1.3819451279513725e-05, "loss": 0.5507, "step": 27397 }, { "epoch": 0.7522789676002196, "grad_norm": 0.440574049949646, "learning_rate": 1.3819052126540674e-05, "loss": 0.5748, "step": 27398 }, { "epoch": 0.7523064250411862, "grad_norm": 0.38016992807388306, "learning_rate": 1.3818652966443877e-05, "loss": 0.4941, "step": 27399 }, { "epoch": 0.7523338824821527, "grad_norm": 0.40089645981788635, "learning_rate": 1.3818253799224069e-05, "loss": 0.5238, "step": 27400 }, { "epoch": 0.7523613399231192, "grad_norm": 0.3849565088748932, "learning_rate": 1.3817854624882002e-05, "loss": 0.5341, "step": 27401 }, { "epoch": 0.7523887973640857, "grad_norm": 0.34693220257759094, "learning_rate": 1.3817455443418412e-05, "loss": 0.4992, "step": 27402 }, { "epoch": 0.7524162548050521, "grad_norm": 0.42022043466567993, "learning_rate": 1.3817056254834057e-05, "loss": 0.4595, "step": 27403 }, { "epoch": 0.7524437122460187, "grad_norm": 0.360822468996048, "learning_rate": 1.3816657059129668e-05, "loss": 0.4415, "step": 27404 }, { "epoch": 0.7524711696869851, "grad_norm": 0.3601246476173401, "learning_rate": 1.3816257856305997e-05, "loss": 0.4151, "step": 27405 }, { "epoch": 0.7524986271279517, "grad_norm": 0.374319851398468, "learning_rate": 1.3815858646363789e-05, "loss": 0.5255, "step": 27406 }, { "epoch": 0.7525260845689182, "grad_norm": 0.40697139501571655, "learning_rate": 1.3815459429303784e-05, "loss": 0.567, "step": 27407 }, { "epoch": 0.7525535420098847, "grad_norm": 0.33135420083999634, "learning_rate": 1.381506020512673e-05, "loss": 0.3816, "step": 27408 }, { "epoch": 0.7525809994508512, "grad_norm": 0.3707138001918793, "learning_rate": 1.3814660973833372e-05, "loss": 0.4718, "step": 27409 }, { "epoch": 0.7526084568918177, "grad_norm": 0.3785172402858734, "learning_rate": 1.3814261735424453e-05, "loss": 0.5114, "step": 27410 }, { "epoch": 0.7526359143327842, "grad_norm": 0.41617366671562195, "learning_rate": 1.3813862489900719e-05, "loss": 0.55, "step": 27411 }, { "epoch": 0.7526633717737506, "grad_norm": 0.39219939708709717, "learning_rate": 1.3813463237262913e-05, "loss": 0.4659, "step": 27412 }, { "epoch": 0.7526908292147172, "grad_norm": 0.3991316556930542, "learning_rate": 1.3813063977511783e-05, "loss": 0.4545, "step": 27413 }, { "epoch": 0.7527182866556837, "grad_norm": 0.39374956488609314, "learning_rate": 1.3812664710648072e-05, "loss": 0.519, "step": 27414 }, { "epoch": 0.7527457440966502, "grad_norm": 0.36585918068885803, "learning_rate": 1.381226543667252e-05, "loss": 0.4567, "step": 27415 }, { "epoch": 0.7527732015376167, "grad_norm": 0.42882782220840454, "learning_rate": 1.3811866155585883e-05, "loss": 0.4442, "step": 27416 }, { "epoch": 0.7528006589785832, "grad_norm": 0.41047266125679016, "learning_rate": 1.3811466867388894e-05, "loss": 0.5326, "step": 27417 }, { "epoch": 0.7528281164195497, "grad_norm": 0.40959838032722473, "learning_rate": 1.3811067572082304e-05, "loss": 0.4822, "step": 27418 }, { "epoch": 0.7528555738605162, "grad_norm": 0.365744948387146, "learning_rate": 1.3810668269666856e-05, "loss": 0.5346, "step": 27419 }, { "epoch": 0.7528830313014827, "grad_norm": 0.3976588547229767, "learning_rate": 1.3810268960143296e-05, "loss": 0.4374, "step": 27420 }, { "epoch": 0.7529104887424493, "grad_norm": 0.37935927510261536, "learning_rate": 1.3809869643512368e-05, "loss": 0.5137, "step": 27421 }, { "epoch": 0.7529379461834157, "grad_norm": 0.42621850967407227, "learning_rate": 1.3809470319774821e-05, "loss": 0.5077, "step": 27422 }, { "epoch": 0.7529654036243822, "grad_norm": 0.34526926279067993, "learning_rate": 1.380907098893139e-05, "loss": 0.4688, "step": 27423 }, { "epoch": 0.7529928610653487, "grad_norm": 0.38526463508605957, "learning_rate": 1.3808671650982831e-05, "loss": 0.433, "step": 27424 }, { "epoch": 0.7530203185063152, "grad_norm": 0.37267425656318665, "learning_rate": 1.3808272305929879e-05, "loss": 0.4488, "step": 27425 }, { "epoch": 0.7530477759472817, "grad_norm": 0.34904181957244873, "learning_rate": 1.3807872953773287e-05, "loss": 0.4018, "step": 27426 }, { "epoch": 0.7530752333882482, "grad_norm": 0.36720648407936096, "learning_rate": 1.3807473594513797e-05, "loss": 0.4553, "step": 27427 }, { "epoch": 0.7531026908292148, "grad_norm": 0.3960861563682556, "learning_rate": 1.380707422815215e-05, "loss": 0.4306, "step": 27428 }, { "epoch": 0.7531301482701812, "grad_norm": 0.37422430515289307, "learning_rate": 1.3806674854689097e-05, "loss": 0.4784, "step": 27429 }, { "epoch": 0.7531576057111478, "grad_norm": 0.4131346046924591, "learning_rate": 1.3806275474125381e-05, "loss": 0.516, "step": 27430 }, { "epoch": 0.7531850631521142, "grad_norm": 0.3767428994178772, "learning_rate": 1.3805876086461744e-05, "loss": 0.5444, "step": 27431 }, { "epoch": 0.7532125205930807, "grad_norm": 0.38154444098472595, "learning_rate": 1.3805476691698937e-05, "loss": 0.4913, "step": 27432 }, { "epoch": 0.7532399780340472, "grad_norm": 0.36562278866767883, "learning_rate": 1.3805077289837698e-05, "loss": 0.5133, "step": 27433 }, { "epoch": 0.7532674354750137, "grad_norm": 0.410063773393631, "learning_rate": 1.3804677880878778e-05, "loss": 0.5191, "step": 27434 }, { "epoch": 0.7532948929159803, "grad_norm": 0.39462798833847046, "learning_rate": 1.380427846482292e-05, "loss": 0.5038, "step": 27435 }, { "epoch": 0.7533223503569467, "grad_norm": 0.3686285614967346, "learning_rate": 1.3803879041670864e-05, "loss": 0.5295, "step": 27436 }, { "epoch": 0.7533498077979133, "grad_norm": 0.4308117926120758, "learning_rate": 1.3803479611423366e-05, "loss": 0.5169, "step": 27437 }, { "epoch": 0.7533772652388797, "grad_norm": 0.3405895233154297, "learning_rate": 1.3803080174081158e-05, "loss": 0.475, "step": 27438 }, { "epoch": 0.7534047226798463, "grad_norm": 0.3409496247768402, "learning_rate": 1.3802680729644995e-05, "loss": 0.3752, "step": 27439 }, { "epoch": 0.7534321801208127, "grad_norm": 0.3883706331253052, "learning_rate": 1.380228127811562e-05, "loss": 0.4774, "step": 27440 }, { "epoch": 0.7534596375617792, "grad_norm": 0.3556082248687744, "learning_rate": 1.3801881819493772e-05, "loss": 0.4838, "step": 27441 }, { "epoch": 0.7534870950027458, "grad_norm": 0.4511399567127228, "learning_rate": 1.3801482353780204e-05, "loss": 0.595, "step": 27442 }, { "epoch": 0.7535145524437122, "grad_norm": 0.41371944546699524, "learning_rate": 1.380108288097566e-05, "loss": 0.6038, "step": 27443 }, { "epoch": 0.7535420098846788, "grad_norm": 0.34458720684051514, "learning_rate": 1.3800683401080882e-05, "loss": 0.4381, "step": 27444 }, { "epoch": 0.7535694673256452, "grad_norm": 0.4064805805683136, "learning_rate": 1.3800283914096616e-05, "loss": 0.539, "step": 27445 }, { "epoch": 0.7535969247666118, "grad_norm": 0.39985084533691406, "learning_rate": 1.3799884420023606e-05, "loss": 0.4702, "step": 27446 }, { "epoch": 0.7536243822075782, "grad_norm": 0.3620544672012329, "learning_rate": 1.37994849188626e-05, "loss": 0.5339, "step": 27447 }, { "epoch": 0.7536518396485448, "grad_norm": 0.5194942951202393, "learning_rate": 1.3799085410614343e-05, "loss": 0.5034, "step": 27448 }, { "epoch": 0.7536792970895112, "grad_norm": 0.42927801609039307, "learning_rate": 1.3798685895279578e-05, "loss": 0.5396, "step": 27449 }, { "epoch": 0.7537067545304778, "grad_norm": 0.4543072283267975, "learning_rate": 1.3798286372859053e-05, "loss": 0.5618, "step": 27450 }, { "epoch": 0.7537342119714443, "grad_norm": 0.4229802191257477, "learning_rate": 1.379788684335351e-05, "loss": 0.5486, "step": 27451 }, { "epoch": 0.7537616694124107, "grad_norm": 0.3706176280975342, "learning_rate": 1.3797487306763698e-05, "loss": 0.4763, "step": 27452 }, { "epoch": 0.7537891268533773, "grad_norm": 0.4034881293773651, "learning_rate": 1.3797087763090357e-05, "loss": 0.5139, "step": 27453 }, { "epoch": 0.7538165842943437, "grad_norm": 0.3683713674545288, "learning_rate": 1.3796688212334238e-05, "loss": 0.4798, "step": 27454 }, { "epoch": 0.7538440417353103, "grad_norm": 0.43784400820732117, "learning_rate": 1.3796288654496081e-05, "loss": 0.5323, "step": 27455 }, { "epoch": 0.7538714991762767, "grad_norm": 0.3527250587940216, "learning_rate": 1.3795889089576636e-05, "loss": 0.4427, "step": 27456 }, { "epoch": 0.7538989566172433, "grad_norm": 0.3878675103187561, "learning_rate": 1.3795489517576645e-05, "loss": 0.5214, "step": 27457 }, { "epoch": 0.7539264140582098, "grad_norm": 0.380063533782959, "learning_rate": 1.3795089938496857e-05, "loss": 0.5037, "step": 27458 }, { "epoch": 0.7539538714991763, "grad_norm": 0.37094417214393616, "learning_rate": 1.3794690352338014e-05, "loss": 0.5636, "step": 27459 }, { "epoch": 0.7539813289401428, "grad_norm": 0.5691012740135193, "learning_rate": 1.3794290759100863e-05, "loss": 0.5687, "step": 27460 }, { "epoch": 0.7540087863811092, "grad_norm": 0.38244369626045227, "learning_rate": 1.3793891158786149e-05, "loss": 0.4663, "step": 27461 }, { "epoch": 0.7540362438220758, "grad_norm": 0.4005008935928345, "learning_rate": 1.3793491551394613e-05, "loss": 0.5172, "step": 27462 }, { "epoch": 0.7540637012630422, "grad_norm": 0.3730928599834442, "learning_rate": 1.379309193692701e-05, "loss": 0.4635, "step": 27463 }, { "epoch": 0.7540911587040088, "grad_norm": 0.36966219544410706, "learning_rate": 1.3792692315384076e-05, "loss": 0.4395, "step": 27464 }, { "epoch": 0.7541186161449753, "grad_norm": 0.49581119418144226, "learning_rate": 1.3792292686766561e-05, "loss": 0.5116, "step": 27465 }, { "epoch": 0.7541460735859418, "grad_norm": 0.40539538860321045, "learning_rate": 1.3791893051075214e-05, "loss": 0.5251, "step": 27466 }, { "epoch": 0.7541735310269083, "grad_norm": 0.37437543272972107, "learning_rate": 1.379149340831077e-05, "loss": 0.5035, "step": 27467 }, { "epoch": 0.7542009884678748, "grad_norm": 0.3826046884059906, "learning_rate": 1.3791093758473984e-05, "loss": 0.3972, "step": 27468 }, { "epoch": 0.7542284459088413, "grad_norm": 0.3767905533313751, "learning_rate": 1.3790694101565598e-05, "loss": 0.4711, "step": 27469 }, { "epoch": 0.7542559033498077, "grad_norm": 0.36417174339294434, "learning_rate": 1.3790294437586358e-05, "loss": 0.4459, "step": 27470 }, { "epoch": 0.7542833607907743, "grad_norm": 0.36076247692108154, "learning_rate": 1.378989476653701e-05, "loss": 0.499, "step": 27471 }, { "epoch": 0.7543108182317408, "grad_norm": 0.9807671904563904, "learning_rate": 1.3789495088418293e-05, "loss": 0.6576, "step": 27472 }, { "epoch": 0.7543382756727073, "grad_norm": 0.3835633099079132, "learning_rate": 1.3789095403230962e-05, "loss": 0.4201, "step": 27473 }, { "epoch": 0.7543657331136738, "grad_norm": 0.38365963101387024, "learning_rate": 1.3788695710975761e-05, "loss": 0.4819, "step": 27474 }, { "epoch": 0.7543931905546403, "grad_norm": 0.39149606227874756, "learning_rate": 1.378829601165343e-05, "loss": 0.4258, "step": 27475 }, { "epoch": 0.7544206479956068, "grad_norm": 0.37589630484580994, "learning_rate": 1.3787896305264721e-05, "loss": 0.5428, "step": 27476 }, { "epoch": 0.7544481054365733, "grad_norm": 0.38685938715934753, "learning_rate": 1.3787496591810374e-05, "loss": 0.5408, "step": 27477 }, { "epoch": 0.7544755628775398, "grad_norm": 0.5689713954925537, "learning_rate": 1.3787096871291139e-05, "loss": 0.5971, "step": 27478 }, { "epoch": 0.7545030203185064, "grad_norm": 0.4438979923725128, "learning_rate": 1.3786697143707759e-05, "loss": 0.5404, "step": 27479 }, { "epoch": 0.7545304777594728, "grad_norm": 0.35911253094673157, "learning_rate": 1.3786297409060978e-05, "loss": 0.5055, "step": 27480 }, { "epoch": 0.7545579352004393, "grad_norm": 0.39251431822776794, "learning_rate": 1.3785897667351545e-05, "loss": 0.5412, "step": 27481 }, { "epoch": 0.7545853926414058, "grad_norm": 0.3521425724029541, "learning_rate": 1.3785497918580205e-05, "loss": 0.4863, "step": 27482 }, { "epoch": 0.7546128500823723, "grad_norm": 0.43061575293540955, "learning_rate": 1.3785098162747703e-05, "loss": 0.5482, "step": 27483 }, { "epoch": 0.7546403075233388, "grad_norm": 0.3611091077327728, "learning_rate": 1.3784698399854786e-05, "loss": 0.4043, "step": 27484 }, { "epoch": 0.7546677649643053, "grad_norm": 0.38685232400894165, "learning_rate": 1.3784298629902195e-05, "loss": 0.4695, "step": 27485 }, { "epoch": 0.7546952224052719, "grad_norm": 0.3795594573020935, "learning_rate": 1.3783898852890683e-05, "loss": 0.5184, "step": 27486 }, { "epoch": 0.7547226798462383, "grad_norm": 0.4886377453804016, "learning_rate": 1.378349906882099e-05, "loss": 0.4802, "step": 27487 }, { "epoch": 0.7547501372872049, "grad_norm": 0.3918628394603729, "learning_rate": 1.3783099277693866e-05, "loss": 0.4373, "step": 27488 }, { "epoch": 0.7547775947281713, "grad_norm": 0.500649094581604, "learning_rate": 1.3782699479510052e-05, "loss": 0.528, "step": 27489 }, { "epoch": 0.7548050521691378, "grad_norm": 0.3562527596950531, "learning_rate": 1.3782299674270297e-05, "loss": 0.4898, "step": 27490 }, { "epoch": 0.7548325096101043, "grad_norm": 0.5865973830223083, "learning_rate": 1.3781899861975347e-05, "loss": 0.4915, "step": 27491 }, { "epoch": 0.7548599670510708, "grad_norm": 0.3637229800224304, "learning_rate": 1.3781500042625946e-05, "loss": 0.4518, "step": 27492 }, { "epoch": 0.7548874244920374, "grad_norm": 0.46118590235710144, "learning_rate": 1.378110021622284e-05, "loss": 0.5998, "step": 27493 }, { "epoch": 0.7549148819330038, "grad_norm": 0.4428277015686035, "learning_rate": 1.3780700382766776e-05, "loss": 0.5025, "step": 27494 }, { "epoch": 0.7549423393739704, "grad_norm": 0.4031149446964264, "learning_rate": 1.3780300542258496e-05, "loss": 0.4418, "step": 27495 }, { "epoch": 0.7549697968149368, "grad_norm": 0.37004002928733826, "learning_rate": 1.3779900694698754e-05, "loss": 0.501, "step": 27496 }, { "epoch": 0.7549972542559034, "grad_norm": 0.497615247964859, "learning_rate": 1.377950084008829e-05, "loss": 0.4866, "step": 27497 }, { "epoch": 0.7550247116968698, "grad_norm": 0.3661970794200897, "learning_rate": 1.3779100978427846e-05, "loss": 0.5456, "step": 27498 }, { "epoch": 0.7550521691378363, "grad_norm": 0.342549204826355, "learning_rate": 1.3778701109718178e-05, "loss": 0.4795, "step": 27499 }, { "epoch": 0.7550796265788029, "grad_norm": 0.3651229441165924, "learning_rate": 1.3778301233960022e-05, "loss": 0.4366, "step": 27500 }, { "epoch": 0.7551070840197693, "grad_norm": 0.38635557889938354, "learning_rate": 1.377790135115413e-05, "loss": 0.4826, "step": 27501 }, { "epoch": 0.7551345414607359, "grad_norm": 0.5230139493942261, "learning_rate": 1.3777501461301249e-05, "loss": 0.4723, "step": 27502 }, { "epoch": 0.7551619989017023, "grad_norm": 0.3606136739253998, "learning_rate": 1.3777101564402117e-05, "loss": 0.5097, "step": 27503 }, { "epoch": 0.7551894563426689, "grad_norm": 0.3978869318962097, "learning_rate": 1.377670166045749e-05, "loss": 0.5297, "step": 27504 }, { "epoch": 0.7552169137836353, "grad_norm": 0.3582232594490051, "learning_rate": 1.3776301749468106e-05, "loss": 0.5088, "step": 27505 }, { "epoch": 0.7552443712246019, "grad_norm": 0.4276093542575836, "learning_rate": 1.3775901831434713e-05, "loss": 0.5309, "step": 27506 }, { "epoch": 0.7552718286655684, "grad_norm": 0.3917903006076813, "learning_rate": 1.3775501906358062e-05, "loss": 0.5128, "step": 27507 }, { "epoch": 0.7552992861065349, "grad_norm": 0.3940390646457672, "learning_rate": 1.377510197423889e-05, "loss": 0.5618, "step": 27508 }, { "epoch": 0.7553267435475014, "grad_norm": 0.3739676773548126, "learning_rate": 1.3774702035077951e-05, "loss": 0.4741, "step": 27509 }, { "epoch": 0.7553542009884678, "grad_norm": 0.40808942914009094, "learning_rate": 1.3774302088875987e-05, "loss": 0.5045, "step": 27510 }, { "epoch": 0.7553816584294344, "grad_norm": 0.6243113279342651, "learning_rate": 1.3773902135633747e-05, "loss": 0.4722, "step": 27511 }, { "epoch": 0.7554091158704008, "grad_norm": 0.37808215618133545, "learning_rate": 1.3773502175351973e-05, "loss": 0.4433, "step": 27512 }, { "epoch": 0.7554365733113674, "grad_norm": 0.4219508767127991, "learning_rate": 1.3773102208031413e-05, "loss": 0.5362, "step": 27513 }, { "epoch": 0.7554640307523339, "grad_norm": 0.4148716926574707, "learning_rate": 1.3772702233672814e-05, "loss": 0.513, "step": 27514 }, { "epoch": 0.7554914881933004, "grad_norm": 0.40660879015922546, "learning_rate": 1.3772302252276924e-05, "loss": 0.4511, "step": 27515 }, { "epoch": 0.7555189456342669, "grad_norm": 0.3832208812236786, "learning_rate": 1.3771902263844481e-05, "loss": 0.4975, "step": 27516 }, { "epoch": 0.7555464030752334, "grad_norm": 0.41661015152931213, "learning_rate": 1.3771502268376237e-05, "loss": 0.5349, "step": 27517 }, { "epoch": 0.7555738605161999, "grad_norm": 0.4264613091945648, "learning_rate": 1.377110226587294e-05, "loss": 0.4368, "step": 27518 }, { "epoch": 0.7556013179571663, "grad_norm": 0.3765987753868103, "learning_rate": 1.3770702256335333e-05, "loss": 0.5371, "step": 27519 }, { "epoch": 0.7556287753981329, "grad_norm": 0.3989420235157013, "learning_rate": 1.3770302239764165e-05, "loss": 0.4593, "step": 27520 }, { "epoch": 0.7556562328390994, "grad_norm": 0.36543136835098267, "learning_rate": 1.3769902216160176e-05, "loss": 0.4634, "step": 27521 }, { "epoch": 0.7556836902800659, "grad_norm": 0.3724704384803772, "learning_rate": 1.376950218552412e-05, "loss": 0.4284, "step": 27522 }, { "epoch": 0.7557111477210324, "grad_norm": 0.3520794212818146, "learning_rate": 1.3769102147856737e-05, "loss": 0.4536, "step": 27523 }, { "epoch": 0.7557386051619989, "grad_norm": 0.44091513752937317, "learning_rate": 1.3768702103158775e-05, "loss": 0.489, "step": 27524 }, { "epoch": 0.7557660626029654, "grad_norm": 0.41204312443733215, "learning_rate": 1.3768302051430982e-05, "loss": 0.4473, "step": 27525 }, { "epoch": 0.7557935200439319, "grad_norm": 0.4513415992259979, "learning_rate": 1.37679019926741e-05, "loss": 0.4873, "step": 27526 }, { "epoch": 0.7558209774848984, "grad_norm": 0.40007126331329346, "learning_rate": 1.3767501926888882e-05, "loss": 0.4542, "step": 27527 }, { "epoch": 0.755848434925865, "grad_norm": 0.38244155049324036, "learning_rate": 1.376710185407607e-05, "loss": 0.5178, "step": 27528 }, { "epoch": 0.7558758923668314, "grad_norm": 0.38126128911972046, "learning_rate": 1.3766701774236409e-05, "loss": 0.4114, "step": 27529 }, { "epoch": 0.7559033498077979, "grad_norm": 0.5126073956489563, "learning_rate": 1.3766301687370649e-05, "loss": 0.516, "step": 27530 }, { "epoch": 0.7559308072487644, "grad_norm": 0.4202978312969208, "learning_rate": 1.376590159347953e-05, "loss": 0.4936, "step": 27531 }, { "epoch": 0.7559582646897309, "grad_norm": 0.4048677980899811, "learning_rate": 1.376550149256381e-05, "loss": 0.5367, "step": 27532 }, { "epoch": 0.7559857221306974, "grad_norm": 0.4019112288951874, "learning_rate": 1.3765101384624221e-05, "loss": 0.5163, "step": 27533 }, { "epoch": 0.7560131795716639, "grad_norm": 0.4039137363433838, "learning_rate": 1.376470126966152e-05, "loss": 0.538, "step": 27534 }, { "epoch": 0.7560406370126305, "grad_norm": 0.35743454098701477, "learning_rate": 1.376430114767645e-05, "loss": 0.4983, "step": 27535 }, { "epoch": 0.7560680944535969, "grad_norm": 0.4129966199398041, "learning_rate": 1.3763901018669756e-05, "loss": 0.4663, "step": 27536 }, { "epoch": 0.7560955518945635, "grad_norm": 0.3555627465248108, "learning_rate": 1.3763500882642183e-05, "loss": 0.457, "step": 27537 }, { "epoch": 0.7561230093355299, "grad_norm": 0.4347292482852936, "learning_rate": 1.3763100739594482e-05, "loss": 0.4133, "step": 27538 }, { "epoch": 0.7561504667764964, "grad_norm": 0.34317827224731445, "learning_rate": 1.3762700589527396e-05, "loss": 0.4884, "step": 27539 }, { "epoch": 0.7561779242174629, "grad_norm": 0.41400739550590515, "learning_rate": 1.3762300432441674e-05, "loss": 0.6011, "step": 27540 }, { "epoch": 0.7562053816584294, "grad_norm": 0.38131603598594666, "learning_rate": 1.376190026833806e-05, "loss": 0.3998, "step": 27541 }, { "epoch": 0.756232839099396, "grad_norm": 0.3886866867542267, "learning_rate": 1.3761500097217299e-05, "loss": 0.4698, "step": 27542 }, { "epoch": 0.7562602965403624, "grad_norm": 0.4297283887863159, "learning_rate": 1.3761099919080143e-05, "loss": 0.556, "step": 27543 }, { "epoch": 0.756287753981329, "grad_norm": 0.5573281645774841, "learning_rate": 1.3760699733927334e-05, "loss": 0.5372, "step": 27544 }, { "epoch": 0.7563152114222954, "grad_norm": 0.40695157647132874, "learning_rate": 1.376029954175962e-05, "loss": 0.447, "step": 27545 }, { "epoch": 0.756342668863262, "grad_norm": 0.4201756715774536, "learning_rate": 1.375989934257775e-05, "loss": 0.475, "step": 27546 }, { "epoch": 0.7563701263042284, "grad_norm": 0.34100592136383057, "learning_rate": 1.3759499136382462e-05, "loss": 0.4757, "step": 27547 }, { "epoch": 0.756397583745195, "grad_norm": 0.4017561376094818, "learning_rate": 1.3759098923174512e-05, "loss": 0.5395, "step": 27548 }, { "epoch": 0.7564250411861615, "grad_norm": 0.3643178641796112, "learning_rate": 1.375869870295464e-05, "loss": 0.5211, "step": 27549 }, { "epoch": 0.7564524986271279, "grad_norm": 0.4174191653728485, "learning_rate": 1.37582984757236e-05, "loss": 0.5496, "step": 27550 }, { "epoch": 0.7564799560680945, "grad_norm": 0.36081743240356445, "learning_rate": 1.3757898241482128e-05, "loss": 0.4641, "step": 27551 }, { "epoch": 0.7565074135090609, "grad_norm": 0.4405480921268463, "learning_rate": 1.375749800023098e-05, "loss": 0.5081, "step": 27552 }, { "epoch": 0.7565348709500275, "grad_norm": 0.3774234354496002, "learning_rate": 1.3757097751970898e-05, "loss": 0.5286, "step": 27553 }, { "epoch": 0.7565623283909939, "grad_norm": 0.4056599736213684, "learning_rate": 1.3756697496702631e-05, "loss": 0.5921, "step": 27554 }, { "epoch": 0.7565897858319605, "grad_norm": 0.36997726559638977, "learning_rate": 1.3756297234426923e-05, "loss": 0.5274, "step": 27555 }, { "epoch": 0.756617243272927, "grad_norm": 0.40627744793891907, "learning_rate": 1.3755896965144521e-05, "loss": 0.4804, "step": 27556 }, { "epoch": 0.7566447007138934, "grad_norm": 0.3889913856983185, "learning_rate": 1.3755496688856173e-05, "loss": 0.4592, "step": 27557 }, { "epoch": 0.75667215815486, "grad_norm": 0.3482077121734619, "learning_rate": 1.3755096405562627e-05, "loss": 0.4783, "step": 27558 }, { "epoch": 0.7566996155958264, "grad_norm": 0.3734759986400604, "learning_rate": 1.3754696115264626e-05, "loss": 0.4343, "step": 27559 }, { "epoch": 0.756727073036793, "grad_norm": 0.3776051998138428, "learning_rate": 1.3754295817962917e-05, "loss": 0.4727, "step": 27560 }, { "epoch": 0.7567545304777594, "grad_norm": 0.35889962315559387, "learning_rate": 1.3753895513658249e-05, "loss": 0.4344, "step": 27561 }, { "epoch": 0.756781987918726, "grad_norm": 0.39201265573501587, "learning_rate": 1.3753495202351368e-05, "loss": 0.5141, "step": 27562 }, { "epoch": 0.7568094453596925, "grad_norm": 0.39373087882995605, "learning_rate": 1.3753094884043023e-05, "loss": 0.4385, "step": 27563 }, { "epoch": 0.756836902800659, "grad_norm": 0.3407222330570221, "learning_rate": 1.3752694558733954e-05, "loss": 0.4612, "step": 27564 }, { "epoch": 0.7568643602416255, "grad_norm": 0.3993576765060425, "learning_rate": 1.3752294226424916e-05, "loss": 0.5612, "step": 27565 }, { "epoch": 0.756891817682592, "grad_norm": 0.4099799692630768, "learning_rate": 1.3751893887116647e-05, "loss": 0.5849, "step": 27566 }, { "epoch": 0.7569192751235585, "grad_norm": 0.4852117598056793, "learning_rate": 1.3751493540809901e-05, "loss": 0.4857, "step": 27567 }, { "epoch": 0.7569467325645249, "grad_norm": 0.3952242434024811, "learning_rate": 1.3751093187505422e-05, "loss": 0.5127, "step": 27568 }, { "epoch": 0.7569741900054915, "grad_norm": 0.399491548538208, "learning_rate": 1.3750692827203957e-05, "loss": 0.5351, "step": 27569 }, { "epoch": 0.757001647446458, "grad_norm": 0.4401789605617523, "learning_rate": 1.3750292459906253e-05, "loss": 0.5556, "step": 27570 }, { "epoch": 0.7570291048874245, "grad_norm": 0.5013060569763184, "learning_rate": 1.3749892085613057e-05, "loss": 0.542, "step": 27571 }, { "epoch": 0.757056562328391, "grad_norm": 0.3615363538265228, "learning_rate": 1.3749491704325117e-05, "loss": 0.4643, "step": 27572 }, { "epoch": 0.7570840197693575, "grad_norm": 0.39951735734939575, "learning_rate": 1.3749091316043175e-05, "loss": 0.4758, "step": 27573 }, { "epoch": 0.757111477210324, "grad_norm": 0.3840891420841217, "learning_rate": 1.3748690920767982e-05, "loss": 0.5358, "step": 27574 }, { "epoch": 0.7571389346512905, "grad_norm": 0.3357703983783722, "learning_rate": 1.3748290518500285e-05, "loss": 0.4821, "step": 27575 }, { "epoch": 0.757166392092257, "grad_norm": 0.3693673312664032, "learning_rate": 1.374789010924083e-05, "loss": 0.5422, "step": 27576 }, { "epoch": 0.7571938495332236, "grad_norm": 0.3842740058898926, "learning_rate": 1.3747489692990361e-05, "loss": 0.4698, "step": 27577 }, { "epoch": 0.75722130697419, "grad_norm": 0.40051543712615967, "learning_rate": 1.374708926974963e-05, "loss": 0.5529, "step": 27578 }, { "epoch": 0.7572487644151565, "grad_norm": 0.39086419343948364, "learning_rate": 1.3746688839519383e-05, "loss": 0.5049, "step": 27579 }, { "epoch": 0.757276221856123, "grad_norm": 0.4792500436306, "learning_rate": 1.3746288402300363e-05, "loss": 0.539, "step": 27580 }, { "epoch": 0.7573036792970895, "grad_norm": 0.3595951497554779, "learning_rate": 1.374588795809332e-05, "loss": 0.4222, "step": 27581 }, { "epoch": 0.757331136738056, "grad_norm": 0.384039044380188, "learning_rate": 1.3745487506899e-05, "loss": 0.4607, "step": 27582 }, { "epoch": 0.7573585941790225, "grad_norm": 0.3714715838432312, "learning_rate": 1.3745087048718151e-05, "loss": 0.463, "step": 27583 }, { "epoch": 0.7573860516199891, "grad_norm": 0.3681863844394684, "learning_rate": 1.374468658355152e-05, "loss": 0.4647, "step": 27584 }, { "epoch": 0.7574135090609555, "grad_norm": 0.34809988737106323, "learning_rate": 1.3744286111399854e-05, "loss": 0.4013, "step": 27585 }, { "epoch": 0.757440966501922, "grad_norm": 0.37563320994377136, "learning_rate": 1.3743885632263896e-05, "loss": 0.4699, "step": 27586 }, { "epoch": 0.7574684239428885, "grad_norm": 0.3870886564254761, "learning_rate": 1.37434851461444e-05, "loss": 0.5267, "step": 27587 }, { "epoch": 0.757495881383855, "grad_norm": 0.48617035150527954, "learning_rate": 1.3743084653042108e-05, "loss": 0.5996, "step": 27588 }, { "epoch": 0.7575233388248215, "grad_norm": 0.3436395823955536, "learning_rate": 1.3742684152957768e-05, "loss": 0.4447, "step": 27589 }, { "epoch": 0.757550796265788, "grad_norm": 0.36828941106796265, "learning_rate": 1.3742283645892127e-05, "loss": 0.4467, "step": 27590 }, { "epoch": 0.7575782537067546, "grad_norm": 0.4570577144622803, "learning_rate": 1.3741883131845933e-05, "loss": 0.489, "step": 27591 }, { "epoch": 0.757605711147721, "grad_norm": 0.36666497588157654, "learning_rate": 1.3741482610819934e-05, "loss": 0.4416, "step": 27592 }, { "epoch": 0.7576331685886876, "grad_norm": 0.40060052275657654, "learning_rate": 1.3741082082814874e-05, "loss": 0.5132, "step": 27593 }, { "epoch": 0.757660626029654, "grad_norm": 0.4599471390247345, "learning_rate": 1.3740681547831505e-05, "loss": 0.4231, "step": 27594 }, { "epoch": 0.7576880834706206, "grad_norm": 0.3865695595741272, "learning_rate": 1.3740281005870568e-05, "loss": 0.4361, "step": 27595 }, { "epoch": 0.757715540911587, "grad_norm": 0.3990618586540222, "learning_rate": 1.3739880456932815e-05, "loss": 0.5831, "step": 27596 }, { "epoch": 0.7577429983525535, "grad_norm": 0.39022350311279297, "learning_rate": 1.373947990101899e-05, "loss": 0.4345, "step": 27597 }, { "epoch": 0.7577704557935201, "grad_norm": 0.35238388180732727, "learning_rate": 1.3739079338129843e-05, "loss": 0.4136, "step": 27598 }, { "epoch": 0.7577979132344865, "grad_norm": 0.42477521300315857, "learning_rate": 1.3738678768266116e-05, "loss": 0.5458, "step": 27599 }, { "epoch": 0.7578253706754531, "grad_norm": 0.4255114197731018, "learning_rate": 1.3738278191428565e-05, "loss": 0.54, "step": 27600 }, { "epoch": 0.7578528281164195, "grad_norm": 0.4043692648410797, "learning_rate": 1.3737877607617926e-05, "loss": 0.5044, "step": 27601 }, { "epoch": 0.7578802855573861, "grad_norm": 0.3934517800807953, "learning_rate": 1.3737477016834957e-05, "loss": 0.494, "step": 27602 }, { "epoch": 0.7579077429983525, "grad_norm": 0.48621514439582825, "learning_rate": 1.3737076419080401e-05, "loss": 0.4319, "step": 27603 }, { "epoch": 0.7579352004393191, "grad_norm": 0.3886386454105377, "learning_rate": 1.3736675814355e-05, "loss": 0.5077, "step": 27604 }, { "epoch": 0.7579626578802856, "grad_norm": 0.43813198804855347, "learning_rate": 1.3736275202659513e-05, "loss": 0.4689, "step": 27605 }, { "epoch": 0.757990115321252, "grad_norm": 0.43443629145622253, "learning_rate": 1.3735874583994672e-05, "loss": 0.589, "step": 27606 }, { "epoch": 0.7580175727622186, "grad_norm": 0.40044865012168884, "learning_rate": 1.3735473958361239e-05, "loss": 0.5359, "step": 27607 }, { "epoch": 0.758045030203185, "grad_norm": 0.43496739864349365, "learning_rate": 1.3735073325759953e-05, "loss": 0.5286, "step": 27608 }, { "epoch": 0.7580724876441516, "grad_norm": 0.4170817732810974, "learning_rate": 1.3734672686191561e-05, "loss": 0.4732, "step": 27609 }, { "epoch": 0.758099945085118, "grad_norm": 0.38607722520828247, "learning_rate": 1.3734272039656815e-05, "loss": 0.5543, "step": 27610 }, { "epoch": 0.7581274025260846, "grad_norm": 0.37256914377212524, "learning_rate": 1.3733871386156459e-05, "loss": 0.4916, "step": 27611 }, { "epoch": 0.7581548599670511, "grad_norm": 0.3554389178752899, "learning_rate": 1.373347072569124e-05, "loss": 0.4776, "step": 27612 }, { "epoch": 0.7581823174080176, "grad_norm": 0.5816598534584045, "learning_rate": 1.373307005826191e-05, "loss": 0.5048, "step": 27613 }, { "epoch": 0.7582097748489841, "grad_norm": 0.3698295056819916, "learning_rate": 1.3732669383869208e-05, "loss": 0.4519, "step": 27614 }, { "epoch": 0.7582372322899505, "grad_norm": 0.33602607250213623, "learning_rate": 1.3732268702513891e-05, "loss": 0.4012, "step": 27615 }, { "epoch": 0.7582646897309171, "grad_norm": 0.3773578703403473, "learning_rate": 1.3731868014196701e-05, "loss": 0.4549, "step": 27616 }, { "epoch": 0.7582921471718835, "grad_norm": 0.4213685393333435, "learning_rate": 1.3731467318918383e-05, "loss": 0.5388, "step": 27617 }, { "epoch": 0.7583196046128501, "grad_norm": 0.368636816740036, "learning_rate": 1.373106661667969e-05, "loss": 0.4665, "step": 27618 }, { "epoch": 0.7583470620538166, "grad_norm": 0.38233712315559387, "learning_rate": 1.3730665907481364e-05, "loss": 0.4602, "step": 27619 }, { "epoch": 0.7583745194947831, "grad_norm": 0.40753936767578125, "learning_rate": 1.3730265191324159e-05, "loss": 0.5478, "step": 27620 }, { "epoch": 0.7584019769357496, "grad_norm": 0.3705849051475525, "learning_rate": 1.3729864468208817e-05, "loss": 0.5763, "step": 27621 }, { "epoch": 0.7584294343767161, "grad_norm": 0.383811354637146, "learning_rate": 1.3729463738136088e-05, "loss": 0.404, "step": 27622 }, { "epoch": 0.7584568918176826, "grad_norm": 0.40676844120025635, "learning_rate": 1.3729063001106719e-05, "loss": 0.4295, "step": 27623 }, { "epoch": 0.758484349258649, "grad_norm": 0.4472912847995758, "learning_rate": 1.3728662257121455e-05, "loss": 0.5733, "step": 27624 }, { "epoch": 0.7585118066996156, "grad_norm": 0.3761669397354126, "learning_rate": 1.3728261506181048e-05, "loss": 0.4398, "step": 27625 }, { "epoch": 0.7585392641405821, "grad_norm": 0.35715365409851074, "learning_rate": 1.3727860748286244e-05, "loss": 0.4147, "step": 27626 }, { "epoch": 0.7585667215815486, "grad_norm": 0.3594139814376831, "learning_rate": 1.3727459983437788e-05, "loss": 0.5431, "step": 27627 }, { "epoch": 0.7585941790225151, "grad_norm": 0.48935559391975403, "learning_rate": 1.372705921163643e-05, "loss": 0.5208, "step": 27628 }, { "epoch": 0.7586216364634816, "grad_norm": 0.3724452257156372, "learning_rate": 1.3726658432882919e-05, "loss": 0.4872, "step": 27629 }, { "epoch": 0.7586490939044481, "grad_norm": 0.4394300580024719, "learning_rate": 1.3726257647177998e-05, "loss": 0.5234, "step": 27630 }, { "epoch": 0.7586765513454146, "grad_norm": 0.4020772874355316, "learning_rate": 1.3725856854522419e-05, "loss": 0.4704, "step": 27631 }, { "epoch": 0.7587040087863811, "grad_norm": 0.39618340134620667, "learning_rate": 1.3725456054916926e-05, "loss": 0.4931, "step": 27632 }, { "epoch": 0.7587314662273477, "grad_norm": 0.37641340494155884, "learning_rate": 1.372505524836227e-05, "loss": 0.4451, "step": 27633 }, { "epoch": 0.7587589236683141, "grad_norm": 0.41824963688850403, "learning_rate": 1.3724654434859197e-05, "loss": 0.491, "step": 27634 }, { "epoch": 0.7587863811092806, "grad_norm": 0.3293622136116028, "learning_rate": 1.3724253614408453e-05, "loss": 0.5248, "step": 27635 }, { "epoch": 0.7588138385502471, "grad_norm": 0.4438917934894562, "learning_rate": 1.372385278701079e-05, "loss": 0.5393, "step": 27636 }, { "epoch": 0.7588412959912136, "grad_norm": 0.6015644073486328, "learning_rate": 1.372345195266695e-05, "loss": 0.4204, "step": 27637 }, { "epoch": 0.7588687534321801, "grad_norm": 0.549423098564148, "learning_rate": 1.3723051111377685e-05, "loss": 0.5215, "step": 27638 }, { "epoch": 0.7588962108731466, "grad_norm": 0.3952849507331848, "learning_rate": 1.3722650263143744e-05, "loss": 0.5059, "step": 27639 }, { "epoch": 0.7589236683141132, "grad_norm": 0.41572609543800354, "learning_rate": 1.3722249407965868e-05, "loss": 0.5024, "step": 27640 }, { "epoch": 0.7589511257550796, "grad_norm": 0.3816871643066406, "learning_rate": 1.3721848545844812e-05, "loss": 0.5686, "step": 27641 }, { "epoch": 0.7589785831960462, "grad_norm": 0.3963963985443115, "learning_rate": 1.372144767678132e-05, "loss": 0.5382, "step": 27642 }, { "epoch": 0.7590060406370126, "grad_norm": 0.4628923833370209, "learning_rate": 1.372104680077614e-05, "loss": 0.5055, "step": 27643 }, { "epoch": 0.7590334980779792, "grad_norm": 0.38763415813446045, "learning_rate": 1.3720645917830018e-05, "loss": 0.4886, "step": 27644 }, { "epoch": 0.7590609555189456, "grad_norm": 0.35843518376350403, "learning_rate": 1.3720245027943704e-05, "loss": 0.5096, "step": 27645 }, { "epoch": 0.7590884129599121, "grad_norm": 0.4085257649421692, "learning_rate": 1.371984413111795e-05, "loss": 0.4666, "step": 27646 }, { "epoch": 0.7591158704008787, "grad_norm": 6.698368549346924, "learning_rate": 1.3719443227353498e-05, "loss": 0.5266, "step": 27647 }, { "epoch": 0.7591433278418451, "grad_norm": 0.407379150390625, "learning_rate": 1.3719042316651092e-05, "loss": 0.481, "step": 27648 }, { "epoch": 0.7591707852828117, "grad_norm": 0.39034196734428406, "learning_rate": 1.3718641399011492e-05, "loss": 0.629, "step": 27649 }, { "epoch": 0.7591982427237781, "grad_norm": 0.45856019854545593, "learning_rate": 1.3718240474435434e-05, "loss": 0.4874, "step": 27650 }, { "epoch": 0.7592257001647447, "grad_norm": 0.38015010952949524, "learning_rate": 1.3717839542923675e-05, "loss": 0.4608, "step": 27651 }, { "epoch": 0.7592531576057111, "grad_norm": 0.4262295961380005, "learning_rate": 1.3717438604476956e-05, "loss": 0.5186, "step": 27652 }, { "epoch": 0.7592806150466777, "grad_norm": 0.33036792278289795, "learning_rate": 1.3717037659096027e-05, "loss": 0.4618, "step": 27653 }, { "epoch": 0.7593080724876442, "grad_norm": 0.3805568516254425, "learning_rate": 1.3716636706781639e-05, "loss": 0.4691, "step": 27654 }, { "epoch": 0.7593355299286106, "grad_norm": 0.37285491824150085, "learning_rate": 1.3716235747534535e-05, "loss": 0.4471, "step": 27655 }, { "epoch": 0.7593629873695772, "grad_norm": 0.37135598063468933, "learning_rate": 1.3715834781355466e-05, "loss": 0.4486, "step": 27656 }, { "epoch": 0.7593904448105436, "grad_norm": 0.3867364227771759, "learning_rate": 1.371543380824518e-05, "loss": 0.4761, "step": 27657 }, { "epoch": 0.7594179022515102, "grad_norm": 0.3717508614063263, "learning_rate": 1.3715032828204422e-05, "loss": 0.453, "step": 27658 }, { "epoch": 0.7594453596924766, "grad_norm": 0.3562491834163666, "learning_rate": 1.3714631841233943e-05, "loss": 0.5122, "step": 27659 }, { "epoch": 0.7594728171334432, "grad_norm": 0.3627948462963104, "learning_rate": 1.3714230847334493e-05, "loss": 0.4744, "step": 27660 }, { "epoch": 0.7595002745744097, "grad_norm": 0.3799888789653778, "learning_rate": 1.3713829846506813e-05, "loss": 0.5032, "step": 27661 }, { "epoch": 0.7595277320153762, "grad_norm": 0.36911511421203613, "learning_rate": 1.3713428838751657e-05, "loss": 0.4891, "step": 27662 }, { "epoch": 0.7595551894563427, "grad_norm": 0.42111217975616455, "learning_rate": 1.3713027824069768e-05, "loss": 0.4639, "step": 27663 }, { "epoch": 0.7595826468973091, "grad_norm": 0.38927870988845825, "learning_rate": 1.3712626802461902e-05, "loss": 0.5313, "step": 27664 }, { "epoch": 0.7596101043382757, "grad_norm": 0.36434662342071533, "learning_rate": 1.37122257739288e-05, "loss": 0.5053, "step": 27665 }, { "epoch": 0.7596375617792421, "grad_norm": 0.36594638228416443, "learning_rate": 1.3711824738471208e-05, "loss": 0.482, "step": 27666 }, { "epoch": 0.7596650192202087, "grad_norm": 0.3925136625766754, "learning_rate": 1.3711423696089886e-05, "loss": 0.5458, "step": 27667 }, { "epoch": 0.7596924766611752, "grad_norm": 0.4328237771987915, "learning_rate": 1.3711022646785568e-05, "loss": 0.5651, "step": 27668 }, { "epoch": 0.7597199341021417, "grad_norm": 0.3576619625091553, "learning_rate": 1.3710621590559012e-05, "loss": 0.4676, "step": 27669 }, { "epoch": 0.7597473915431082, "grad_norm": 0.3592420518398285, "learning_rate": 1.371022052741096e-05, "loss": 0.4082, "step": 27670 }, { "epoch": 0.7597748489840747, "grad_norm": 0.36904236674308777, "learning_rate": 1.3709819457342164e-05, "loss": 0.5112, "step": 27671 }, { "epoch": 0.7598023064250412, "grad_norm": 0.3739687502384186, "learning_rate": 1.370941838035337e-05, "loss": 0.4932, "step": 27672 }, { "epoch": 0.7598297638660076, "grad_norm": 0.3613985776901245, "learning_rate": 1.3709017296445326e-05, "loss": 0.5149, "step": 27673 }, { "epoch": 0.7598572213069742, "grad_norm": 0.3639315366744995, "learning_rate": 1.3708616205618783e-05, "loss": 0.4257, "step": 27674 }, { "epoch": 0.7598846787479407, "grad_norm": 0.3708198070526123, "learning_rate": 1.3708215107874487e-05, "loss": 0.4805, "step": 27675 }, { "epoch": 0.7599121361889072, "grad_norm": 0.5333237648010254, "learning_rate": 1.3707814003213185e-05, "loss": 0.4746, "step": 27676 }, { "epoch": 0.7599395936298737, "grad_norm": 0.3760264813899994, "learning_rate": 1.3707412891635627e-05, "loss": 0.4956, "step": 27677 }, { "epoch": 0.7599670510708402, "grad_norm": 0.40180063247680664, "learning_rate": 1.370701177314256e-05, "loss": 0.482, "step": 27678 }, { "epoch": 0.7599945085118067, "grad_norm": 0.37029796838760376, "learning_rate": 1.3706610647734732e-05, "loss": 0.4941, "step": 27679 }, { "epoch": 0.7600219659527732, "grad_norm": 0.4901975095272064, "learning_rate": 1.3706209515412896e-05, "loss": 0.5295, "step": 27680 }, { "epoch": 0.7600494233937397, "grad_norm": 0.37968742847442627, "learning_rate": 1.3705808376177791e-05, "loss": 0.4626, "step": 27681 }, { "epoch": 0.7600768808347063, "grad_norm": 0.41979652643203735, "learning_rate": 1.3705407230030176e-05, "loss": 0.5293, "step": 27682 }, { "epoch": 0.7601043382756727, "grad_norm": 0.37220993638038635, "learning_rate": 1.3705006076970793e-05, "loss": 0.5629, "step": 27683 }, { "epoch": 0.7601317957166392, "grad_norm": 0.41969823837280273, "learning_rate": 1.3704604917000388e-05, "loss": 0.5171, "step": 27684 }, { "epoch": 0.7601592531576057, "grad_norm": 0.3921274244785309, "learning_rate": 1.3704203750119715e-05, "loss": 0.4804, "step": 27685 }, { "epoch": 0.7601867105985722, "grad_norm": 0.36152493953704834, "learning_rate": 1.3703802576329516e-05, "loss": 0.4257, "step": 27686 }, { "epoch": 0.7602141680395387, "grad_norm": 0.4251555800437927, "learning_rate": 1.3703401395630547e-05, "loss": 0.5464, "step": 27687 }, { "epoch": 0.7602416254805052, "grad_norm": 0.4464849531650543, "learning_rate": 1.3703000208023554e-05, "loss": 0.5189, "step": 27688 }, { "epoch": 0.7602690829214718, "grad_norm": 0.4088986814022064, "learning_rate": 1.370259901350928e-05, "loss": 0.437, "step": 27689 }, { "epoch": 0.7602965403624382, "grad_norm": 0.4370674192905426, "learning_rate": 1.3702197812088478e-05, "loss": 0.5175, "step": 27690 }, { "epoch": 0.7603239978034048, "grad_norm": 0.353396475315094, "learning_rate": 1.3701796603761897e-05, "loss": 0.4336, "step": 27691 }, { "epoch": 0.7603514552443712, "grad_norm": 0.385336309671402, "learning_rate": 1.3701395388530281e-05, "loss": 0.5093, "step": 27692 }, { "epoch": 0.7603789126853377, "grad_norm": 0.4940180778503418, "learning_rate": 1.3700994166394383e-05, "loss": 0.4869, "step": 27693 }, { "epoch": 0.7604063701263042, "grad_norm": 0.42163312435150146, "learning_rate": 1.3700592937354949e-05, "loss": 0.4807, "step": 27694 }, { "epoch": 0.7604338275672707, "grad_norm": 0.4935605823993683, "learning_rate": 1.3700191701412729e-05, "loss": 0.5081, "step": 27695 }, { "epoch": 0.7604612850082373, "grad_norm": 0.3801659643650055, "learning_rate": 1.3699790458568472e-05, "loss": 0.4846, "step": 27696 }, { "epoch": 0.7604887424492037, "grad_norm": 0.3604607880115509, "learning_rate": 1.369938920882292e-05, "loss": 0.4756, "step": 27697 }, { "epoch": 0.7605161998901703, "grad_norm": 0.37225109338760376, "learning_rate": 1.3698987952176829e-05, "loss": 0.5317, "step": 27698 }, { "epoch": 0.7605436573311367, "grad_norm": 0.42815205454826355, "learning_rate": 1.3698586688630947e-05, "loss": 0.4964, "step": 27699 }, { "epoch": 0.7605711147721033, "grad_norm": 0.39067530632019043, "learning_rate": 1.3698185418186018e-05, "loss": 0.5673, "step": 27700 }, { "epoch": 0.7605985722130697, "grad_norm": 0.4094858169555664, "learning_rate": 1.3697784140842796e-05, "loss": 0.5377, "step": 27701 }, { "epoch": 0.7606260296540363, "grad_norm": 0.41249608993530273, "learning_rate": 1.3697382856602022e-05, "loss": 0.5057, "step": 27702 }, { "epoch": 0.7606534870950028, "grad_norm": 1.239504337310791, "learning_rate": 1.3696981565464451e-05, "loss": 0.4986, "step": 27703 }, { "epoch": 0.7606809445359692, "grad_norm": 0.44319769740104675, "learning_rate": 1.3696580267430828e-05, "loss": 0.4508, "step": 27704 }, { "epoch": 0.7607084019769358, "grad_norm": 0.4132560193538666, "learning_rate": 1.3696178962501904e-05, "loss": 0.5291, "step": 27705 }, { "epoch": 0.7607358594179022, "grad_norm": 0.3599596619606018, "learning_rate": 1.3695777650678427e-05, "loss": 0.5193, "step": 27706 }, { "epoch": 0.7607633168588688, "grad_norm": 0.8087995648384094, "learning_rate": 1.3695376331961145e-05, "loss": 0.5225, "step": 27707 }, { "epoch": 0.7607907742998352, "grad_norm": 0.3895479738712311, "learning_rate": 1.3694975006350808e-05, "loss": 0.526, "step": 27708 }, { "epoch": 0.7608182317408018, "grad_norm": 0.3646712303161621, "learning_rate": 1.3694573673848161e-05, "loss": 0.5728, "step": 27709 }, { "epoch": 0.7608456891817683, "grad_norm": 0.3624202311038971, "learning_rate": 1.3694172334453956e-05, "loss": 0.4834, "step": 27710 }, { "epoch": 0.7608731466227348, "grad_norm": 0.3842116892337799, "learning_rate": 1.3693770988168942e-05, "loss": 0.4549, "step": 27711 }, { "epoch": 0.7609006040637013, "grad_norm": 0.323030024766922, "learning_rate": 1.3693369634993863e-05, "loss": 0.4067, "step": 27712 }, { "epoch": 0.7609280615046677, "grad_norm": 0.40119877457618713, "learning_rate": 1.3692968274929475e-05, "loss": 0.5134, "step": 27713 }, { "epoch": 0.7609555189456343, "grad_norm": 0.4033125042915344, "learning_rate": 1.369256690797652e-05, "loss": 0.5442, "step": 27714 }, { "epoch": 0.7609829763866007, "grad_norm": 0.3812948763370514, "learning_rate": 1.3692165534135748e-05, "loss": 0.4346, "step": 27715 }, { "epoch": 0.7610104338275673, "grad_norm": 0.39885058999061584, "learning_rate": 1.3691764153407909e-05, "loss": 0.5615, "step": 27716 }, { "epoch": 0.7610378912685337, "grad_norm": 0.38529273867607117, "learning_rate": 1.3691362765793751e-05, "loss": 0.4414, "step": 27717 }, { "epoch": 0.7610653487095003, "grad_norm": 0.3980371057987213, "learning_rate": 1.3690961371294028e-05, "loss": 0.4736, "step": 27718 }, { "epoch": 0.7610928061504668, "grad_norm": 0.3693397045135498, "learning_rate": 1.3690559969909481e-05, "loss": 0.5072, "step": 27719 }, { "epoch": 0.7611202635914333, "grad_norm": 0.3797854483127594, "learning_rate": 1.369015856164086e-05, "loss": 0.4186, "step": 27720 }, { "epoch": 0.7611477210323998, "grad_norm": 0.4087902307510376, "learning_rate": 1.3689757146488918e-05, "loss": 0.4539, "step": 27721 }, { "epoch": 0.7611751784733662, "grad_norm": 0.34327200055122375, "learning_rate": 1.3689355724454401e-05, "loss": 0.4785, "step": 27722 }, { "epoch": 0.7612026359143328, "grad_norm": 0.3739432692527771, "learning_rate": 1.3688954295538057e-05, "loss": 0.4793, "step": 27723 }, { "epoch": 0.7612300933552992, "grad_norm": 0.4621001183986664, "learning_rate": 1.3688552859740639e-05, "loss": 0.5151, "step": 27724 }, { "epoch": 0.7612575507962658, "grad_norm": 0.447447806596756, "learning_rate": 1.3688151417062887e-05, "loss": 0.5961, "step": 27725 }, { "epoch": 0.7612850082372323, "grad_norm": 0.37173202633857727, "learning_rate": 1.368774996750556e-05, "loss": 0.4832, "step": 27726 }, { "epoch": 0.7613124656781988, "grad_norm": 0.3942643702030182, "learning_rate": 1.36873485110694e-05, "loss": 0.5347, "step": 27727 }, { "epoch": 0.7613399231191653, "grad_norm": 0.3896092474460602, "learning_rate": 1.368694704775516e-05, "loss": 0.4582, "step": 27728 }, { "epoch": 0.7613673805601318, "grad_norm": 0.38348066806793213, "learning_rate": 1.3686545577563586e-05, "loss": 0.4897, "step": 27729 }, { "epoch": 0.7613948380010983, "grad_norm": 0.423664391040802, "learning_rate": 1.3686144100495426e-05, "loss": 0.5298, "step": 27730 }, { "epoch": 0.7614222954420647, "grad_norm": 0.35986703634262085, "learning_rate": 1.3685742616551433e-05, "loss": 0.5188, "step": 27731 }, { "epoch": 0.7614497528830313, "grad_norm": 0.47581714391708374, "learning_rate": 1.3685341125732354e-05, "loss": 0.4955, "step": 27732 }, { "epoch": 0.7614772103239978, "grad_norm": 0.4043029546737671, "learning_rate": 1.3684939628038938e-05, "loss": 0.4792, "step": 27733 }, { "epoch": 0.7615046677649643, "grad_norm": 0.3783077001571655, "learning_rate": 1.3684538123471933e-05, "loss": 0.5398, "step": 27734 }, { "epoch": 0.7615321252059308, "grad_norm": 0.3784475028514862, "learning_rate": 1.3684136612032086e-05, "loss": 0.561, "step": 27735 }, { "epoch": 0.7615595826468973, "grad_norm": 0.3712831735610962, "learning_rate": 1.3683735093720152e-05, "loss": 0.5013, "step": 27736 }, { "epoch": 0.7615870400878638, "grad_norm": 0.39237672090530396, "learning_rate": 1.3683333568536875e-05, "loss": 0.5866, "step": 27737 }, { "epoch": 0.7616144975288303, "grad_norm": 0.4448559284210205, "learning_rate": 1.3682932036483005e-05, "loss": 0.5092, "step": 27738 }, { "epoch": 0.7616419549697968, "grad_norm": 0.4101945161819458, "learning_rate": 1.3682530497559291e-05, "loss": 0.5347, "step": 27739 }, { "epoch": 0.7616694124107634, "grad_norm": 0.37881746888160706, "learning_rate": 1.3682128951766483e-05, "loss": 0.4735, "step": 27740 }, { "epoch": 0.7616968698517298, "grad_norm": 0.3992409408092499, "learning_rate": 1.3681727399105328e-05, "loss": 0.4644, "step": 27741 }, { "epoch": 0.7617243272926963, "grad_norm": 0.4056754410266876, "learning_rate": 1.3681325839576576e-05, "loss": 0.6068, "step": 27742 }, { "epoch": 0.7617517847336628, "grad_norm": 0.5025073289871216, "learning_rate": 1.3680924273180977e-05, "loss": 0.6022, "step": 27743 }, { "epoch": 0.7617792421746293, "grad_norm": 0.4117066562175751, "learning_rate": 1.3680522699919283e-05, "loss": 0.6169, "step": 27744 }, { "epoch": 0.7618066996155958, "grad_norm": 0.4183805286884308, "learning_rate": 1.3680121119792236e-05, "loss": 0.4412, "step": 27745 }, { "epoch": 0.7618341570565623, "grad_norm": 0.4059961438179016, "learning_rate": 1.3679719532800589e-05, "loss": 0.5059, "step": 27746 }, { "epoch": 0.7618616144975289, "grad_norm": 0.3868643641471863, "learning_rate": 1.367931793894509e-05, "loss": 0.4896, "step": 27747 }, { "epoch": 0.7618890719384953, "grad_norm": 0.4365611672401428, "learning_rate": 1.367891633822649e-05, "loss": 0.5321, "step": 27748 }, { "epoch": 0.7619165293794619, "grad_norm": 0.36385923624038696, "learning_rate": 1.3678514730645534e-05, "loss": 0.5024, "step": 27749 }, { "epoch": 0.7619439868204283, "grad_norm": 0.3380683362483978, "learning_rate": 1.3678113116202977e-05, "loss": 0.445, "step": 27750 }, { "epoch": 0.7619714442613948, "grad_norm": 0.4052305519580841, "learning_rate": 1.3677711494899567e-05, "loss": 0.5241, "step": 27751 }, { "epoch": 0.7619989017023613, "grad_norm": 0.40106073021888733, "learning_rate": 1.3677309866736047e-05, "loss": 0.4339, "step": 27752 }, { "epoch": 0.7620263591433278, "grad_norm": 0.3528982102870941, "learning_rate": 1.367690823171317e-05, "loss": 0.4474, "step": 27753 }, { "epoch": 0.7620538165842944, "grad_norm": 0.3793761432170868, "learning_rate": 1.367650658983169e-05, "loss": 0.4831, "step": 27754 }, { "epoch": 0.7620812740252608, "grad_norm": 0.39393332600593567, "learning_rate": 1.367610494109235e-05, "loss": 0.4606, "step": 27755 }, { "epoch": 0.7621087314662274, "grad_norm": 0.40860724449157715, "learning_rate": 1.36757032854959e-05, "loss": 0.5324, "step": 27756 }, { "epoch": 0.7621361889071938, "grad_norm": 0.40888237953186035, "learning_rate": 1.367530162304309e-05, "loss": 0.5216, "step": 27757 }, { "epoch": 0.7621636463481604, "grad_norm": 0.35342031717300415, "learning_rate": 1.3674899953734671e-05, "loss": 0.4481, "step": 27758 }, { "epoch": 0.7621911037891268, "grad_norm": 0.4081381559371948, "learning_rate": 1.3674498277571388e-05, "loss": 0.4811, "step": 27759 }, { "epoch": 0.7622185612300933, "grad_norm": 0.3280874490737915, "learning_rate": 1.3674096594553995e-05, "loss": 0.4215, "step": 27760 }, { "epoch": 0.7622460186710599, "grad_norm": 0.4109775424003601, "learning_rate": 1.367369490468324e-05, "loss": 0.5525, "step": 27761 }, { "epoch": 0.7622734761120263, "grad_norm": 0.40750330686569214, "learning_rate": 1.3673293207959871e-05, "loss": 0.4804, "step": 27762 }, { "epoch": 0.7623009335529929, "grad_norm": 0.38180074095726013, "learning_rate": 1.3672891504384638e-05, "loss": 0.4522, "step": 27763 }, { "epoch": 0.7623283909939593, "grad_norm": 0.39386266469955444, "learning_rate": 1.3672489793958288e-05, "loss": 0.5578, "step": 27764 }, { "epoch": 0.7623558484349259, "grad_norm": 0.7645328640937805, "learning_rate": 1.3672088076681574e-05, "loss": 0.5599, "step": 27765 }, { "epoch": 0.7623833058758923, "grad_norm": 0.3817692995071411, "learning_rate": 1.3671686352555243e-05, "loss": 0.5069, "step": 27766 }, { "epoch": 0.7624107633168589, "grad_norm": 0.3971666693687439, "learning_rate": 1.3671284621580045e-05, "loss": 0.526, "step": 27767 }, { "epoch": 0.7624382207578254, "grad_norm": 0.3990360200405121, "learning_rate": 1.367088288375673e-05, "loss": 0.4888, "step": 27768 }, { "epoch": 0.7624656781987919, "grad_norm": 0.3851589858531952, "learning_rate": 1.3670481139086048e-05, "loss": 0.4942, "step": 27769 }, { "epoch": 0.7624931356397584, "grad_norm": 0.47141632437705994, "learning_rate": 1.3670079387568747e-05, "loss": 0.5224, "step": 27770 }, { "epoch": 0.7625205930807248, "grad_norm": 0.37925323843955994, "learning_rate": 1.3669677629205575e-05, "loss": 0.5386, "step": 27771 }, { "epoch": 0.7625480505216914, "grad_norm": 0.4285934269428253, "learning_rate": 1.3669275863997284e-05, "loss": 0.3895, "step": 27772 }, { "epoch": 0.7625755079626578, "grad_norm": 0.4272766411304474, "learning_rate": 1.3668874091944621e-05, "loss": 0.5325, "step": 27773 }, { "epoch": 0.7626029654036244, "grad_norm": 0.34230008721351624, "learning_rate": 1.3668472313048337e-05, "loss": 0.4218, "step": 27774 }, { "epoch": 0.7626304228445909, "grad_norm": 0.48643726110458374, "learning_rate": 1.3668070527309186e-05, "loss": 0.5306, "step": 27775 }, { "epoch": 0.7626578802855574, "grad_norm": 0.36312153935432434, "learning_rate": 1.3667668734727909e-05, "loss": 0.5255, "step": 27776 }, { "epoch": 0.7626853377265239, "grad_norm": 0.38644668459892273, "learning_rate": 1.3667266935305257e-05, "loss": 0.5082, "step": 27777 }, { "epoch": 0.7627127951674904, "grad_norm": 0.3857123553752899, "learning_rate": 1.3666865129041984e-05, "loss": 0.5449, "step": 27778 }, { "epoch": 0.7627402526084569, "grad_norm": 0.34857305884361267, "learning_rate": 1.3666463315938836e-05, "loss": 0.4433, "step": 27779 }, { "epoch": 0.7627677100494233, "grad_norm": 0.3622816801071167, "learning_rate": 1.3666061495996566e-05, "loss": 0.4512, "step": 27780 }, { "epoch": 0.7627951674903899, "grad_norm": 0.4157201051712036, "learning_rate": 1.366565966921592e-05, "loss": 0.5656, "step": 27781 }, { "epoch": 0.7628226249313564, "grad_norm": 0.3683641850948334, "learning_rate": 1.366525783559765e-05, "loss": 0.4394, "step": 27782 }, { "epoch": 0.7628500823723229, "grad_norm": 0.4074408710002899, "learning_rate": 1.3664855995142503e-05, "loss": 0.543, "step": 27783 }, { "epoch": 0.7628775398132894, "grad_norm": 0.36161404848098755, "learning_rate": 1.3664454147851229e-05, "loss": 0.5714, "step": 27784 }, { "epoch": 0.7629049972542559, "grad_norm": 0.41448935866355896, "learning_rate": 1.3664052293724579e-05, "loss": 0.5617, "step": 27785 }, { "epoch": 0.7629324546952224, "grad_norm": 0.34676072001457214, "learning_rate": 1.3663650432763304e-05, "loss": 0.4908, "step": 27786 }, { "epoch": 0.7629599121361889, "grad_norm": 4.789437294006348, "learning_rate": 1.366324856496815e-05, "loss": 0.632, "step": 27787 }, { "epoch": 0.7629873695771554, "grad_norm": 0.40399935841560364, "learning_rate": 1.3662846690339868e-05, "loss": 0.4697, "step": 27788 }, { "epoch": 0.763014827018122, "grad_norm": 0.46968382596969604, "learning_rate": 1.3662444808879209e-05, "loss": 0.5173, "step": 27789 }, { "epoch": 0.7630422844590884, "grad_norm": 0.47655972838401794, "learning_rate": 1.366204292058692e-05, "loss": 0.4826, "step": 27790 }, { "epoch": 0.7630697419000549, "grad_norm": 0.4338267147541046, "learning_rate": 1.3661641025463754e-05, "loss": 0.5357, "step": 27791 }, { "epoch": 0.7630971993410214, "grad_norm": 0.41392213106155396, "learning_rate": 1.3661239123510457e-05, "loss": 0.4851, "step": 27792 }, { "epoch": 0.7631246567819879, "grad_norm": 0.39809951186180115, "learning_rate": 1.3660837214727783e-05, "loss": 0.5303, "step": 27793 }, { "epoch": 0.7631521142229544, "grad_norm": 0.3641553521156311, "learning_rate": 1.3660435299116476e-05, "loss": 0.4618, "step": 27794 }, { "epoch": 0.7631795716639209, "grad_norm": 0.43978753685951233, "learning_rate": 1.3660033376677291e-05, "loss": 0.4598, "step": 27795 }, { "epoch": 0.7632070291048875, "grad_norm": 0.44936907291412354, "learning_rate": 1.3659631447410975e-05, "loss": 0.4893, "step": 27796 }, { "epoch": 0.7632344865458539, "grad_norm": 0.44615668058395386, "learning_rate": 1.3659229511318276e-05, "loss": 0.4832, "step": 27797 }, { "epoch": 0.7632619439868205, "grad_norm": 0.3849855661392212, "learning_rate": 1.365882756839995e-05, "loss": 0.4667, "step": 27798 }, { "epoch": 0.7632894014277869, "grad_norm": 0.37337446212768555, "learning_rate": 1.3658425618656744e-05, "loss": 0.4359, "step": 27799 }, { "epoch": 0.7633168588687534, "grad_norm": 0.36242055892944336, "learning_rate": 1.3658023662089402e-05, "loss": 0.447, "step": 27800 }, { "epoch": 0.7633443163097199, "grad_norm": 0.3944315016269684, "learning_rate": 1.365762169869868e-05, "loss": 0.4538, "step": 27801 }, { "epoch": 0.7633717737506864, "grad_norm": 0.3870130181312561, "learning_rate": 1.3657219728485327e-05, "loss": 0.4648, "step": 27802 }, { "epoch": 0.763399231191653, "grad_norm": 0.35821667313575745, "learning_rate": 1.3656817751450091e-05, "loss": 0.4736, "step": 27803 }, { "epoch": 0.7634266886326194, "grad_norm": 0.36469095945358276, "learning_rate": 1.3656415767593725e-05, "loss": 0.5544, "step": 27804 }, { "epoch": 0.763454146073586, "grad_norm": 0.4492700695991516, "learning_rate": 1.3656013776916973e-05, "loss": 0.4036, "step": 27805 }, { "epoch": 0.7634816035145524, "grad_norm": 0.48716190457344055, "learning_rate": 1.365561177942059e-05, "loss": 0.4997, "step": 27806 }, { "epoch": 0.763509060955519, "grad_norm": 0.34977415204048157, "learning_rate": 1.3655209775105327e-05, "loss": 0.4977, "step": 27807 }, { "epoch": 0.7635365183964854, "grad_norm": 0.3628666400909424, "learning_rate": 1.3654807763971927e-05, "loss": 0.4763, "step": 27808 }, { "epoch": 0.763563975837452, "grad_norm": 0.3645716607570648, "learning_rate": 1.3654405746021144e-05, "loss": 0.4932, "step": 27809 }, { "epoch": 0.7635914332784185, "grad_norm": 0.37192651629447937, "learning_rate": 1.365400372125373e-05, "loss": 0.433, "step": 27810 }, { "epoch": 0.7636188907193849, "grad_norm": 0.40480494499206543, "learning_rate": 1.3653601689670433e-05, "loss": 0.496, "step": 27811 }, { "epoch": 0.7636463481603515, "grad_norm": 0.42466622591018677, "learning_rate": 1.3653199651272002e-05, "loss": 0.4869, "step": 27812 }, { "epoch": 0.7636738056013179, "grad_norm": 0.453584223985672, "learning_rate": 1.3652797606059186e-05, "loss": 0.4967, "step": 27813 }, { "epoch": 0.7637012630422845, "grad_norm": 0.3777746260166168, "learning_rate": 1.3652395554032741e-05, "loss": 0.4789, "step": 27814 }, { "epoch": 0.7637287204832509, "grad_norm": 0.36479008197784424, "learning_rate": 1.3651993495193408e-05, "loss": 0.4473, "step": 27815 }, { "epoch": 0.7637561779242175, "grad_norm": 0.48667091131210327, "learning_rate": 1.3651591429541946e-05, "loss": 0.4181, "step": 27816 }, { "epoch": 0.763783635365184, "grad_norm": 0.3714837431907654, "learning_rate": 1.3651189357079098e-05, "loss": 0.5322, "step": 27817 }, { "epoch": 0.7638110928061504, "grad_norm": 0.43368715047836304, "learning_rate": 1.3650787277805615e-05, "loss": 0.504, "step": 27818 }, { "epoch": 0.763838550247117, "grad_norm": 0.469939261674881, "learning_rate": 1.3650385191722251e-05, "loss": 0.5097, "step": 27819 }, { "epoch": 0.7638660076880834, "grad_norm": 0.4302958548069, "learning_rate": 1.3649983098829753e-05, "loss": 0.498, "step": 27820 }, { "epoch": 0.76389346512905, "grad_norm": 0.6261295080184937, "learning_rate": 1.3649580999128871e-05, "loss": 0.4836, "step": 27821 }, { "epoch": 0.7639209225700164, "grad_norm": 0.39858943223953247, "learning_rate": 1.3649178892620358e-05, "loss": 0.5152, "step": 27822 }, { "epoch": 0.763948380010983, "grad_norm": 0.4095688760280609, "learning_rate": 1.3648776779304957e-05, "loss": 0.428, "step": 27823 }, { "epoch": 0.7639758374519495, "grad_norm": 0.428072988986969, "learning_rate": 1.3648374659183427e-05, "loss": 0.5611, "step": 27824 }, { "epoch": 0.764003294892916, "grad_norm": 0.34700849652290344, "learning_rate": 1.3647972532256512e-05, "loss": 0.4337, "step": 27825 }, { "epoch": 0.7640307523338825, "grad_norm": 0.3652670085430145, "learning_rate": 1.3647570398524962e-05, "loss": 0.4701, "step": 27826 }, { "epoch": 0.764058209774849, "grad_norm": 0.40153026580810547, "learning_rate": 1.3647168257989533e-05, "loss": 0.5293, "step": 27827 }, { "epoch": 0.7640856672158155, "grad_norm": 0.3866989016532898, "learning_rate": 1.3646766110650968e-05, "loss": 0.43, "step": 27828 }, { "epoch": 0.7641131246567819, "grad_norm": 0.4103657007217407, "learning_rate": 1.3646363956510022e-05, "loss": 0.5307, "step": 27829 }, { "epoch": 0.7641405820977485, "grad_norm": 0.4224601984024048, "learning_rate": 1.3645961795567443e-05, "loss": 0.5025, "step": 27830 }, { "epoch": 0.764168039538715, "grad_norm": 0.5194474458694458, "learning_rate": 1.3645559627823983e-05, "loss": 0.5152, "step": 27831 }, { "epoch": 0.7641954969796815, "grad_norm": 0.4085952341556549, "learning_rate": 1.3645157453280388e-05, "loss": 0.4994, "step": 27832 }, { "epoch": 0.764222954420648, "grad_norm": 0.3722376525402069, "learning_rate": 1.3644755271937416e-05, "loss": 0.5262, "step": 27833 }, { "epoch": 0.7642504118616145, "grad_norm": 0.45494306087493896, "learning_rate": 1.3644353083795806e-05, "loss": 0.6307, "step": 27834 }, { "epoch": 0.764277869302581, "grad_norm": 0.40302610397338867, "learning_rate": 1.3643950888856319e-05, "loss": 0.5534, "step": 27835 }, { "epoch": 0.7643053267435475, "grad_norm": 0.4879723787307739, "learning_rate": 1.3643548687119697e-05, "loss": 0.5714, "step": 27836 }, { "epoch": 0.764332784184514, "grad_norm": 0.4247101843357086, "learning_rate": 1.3643146478586696e-05, "loss": 0.4267, "step": 27837 }, { "epoch": 0.7643602416254806, "grad_norm": 0.41916462779045105, "learning_rate": 1.3642744263258063e-05, "loss": 0.4716, "step": 27838 }, { "epoch": 0.764387699066447, "grad_norm": 0.36569055914878845, "learning_rate": 1.3642342041134549e-05, "loss": 0.535, "step": 27839 }, { "epoch": 0.7644151565074135, "grad_norm": 0.3312757909297943, "learning_rate": 1.3641939812216907e-05, "loss": 0.4397, "step": 27840 }, { "epoch": 0.76444261394838, "grad_norm": 0.450429230928421, "learning_rate": 1.3641537576505882e-05, "loss": 0.5709, "step": 27841 }, { "epoch": 0.7644700713893465, "grad_norm": 0.3767551779747009, "learning_rate": 1.364113533400223e-05, "loss": 0.4768, "step": 27842 }, { "epoch": 0.764497528830313, "grad_norm": 0.39069774746894836, "learning_rate": 1.3640733084706696e-05, "loss": 0.5115, "step": 27843 }, { "epoch": 0.7645249862712795, "grad_norm": 0.39298439025878906, "learning_rate": 1.3640330828620032e-05, "loss": 0.592, "step": 27844 }, { "epoch": 0.7645524437122461, "grad_norm": 0.39082473516464233, "learning_rate": 1.3639928565742992e-05, "loss": 0.528, "step": 27845 }, { "epoch": 0.7645799011532125, "grad_norm": 0.3673419952392578, "learning_rate": 1.3639526296076321e-05, "loss": 0.4901, "step": 27846 }, { "epoch": 0.764607358594179, "grad_norm": 0.39938321709632874, "learning_rate": 1.3639124019620775e-05, "loss": 0.5564, "step": 27847 }, { "epoch": 0.7646348160351455, "grad_norm": 0.3836069107055664, "learning_rate": 1.3638721736377098e-05, "loss": 0.553, "step": 27848 }, { "epoch": 0.764662273476112, "grad_norm": 0.3791085183620453, "learning_rate": 1.3638319446346044e-05, "loss": 0.4804, "step": 27849 }, { "epoch": 0.7646897309170785, "grad_norm": 0.3713722229003906, "learning_rate": 1.3637917149528366e-05, "loss": 0.5082, "step": 27850 }, { "epoch": 0.764717188358045, "grad_norm": 0.5478196144104004, "learning_rate": 1.363751484592481e-05, "loss": 0.4651, "step": 27851 }, { "epoch": 0.7647446457990116, "grad_norm": 0.4450196623802185, "learning_rate": 1.3637112535536125e-05, "loss": 0.5261, "step": 27852 }, { "epoch": 0.764772103239978, "grad_norm": 0.41863352060317993, "learning_rate": 1.3636710218363068e-05, "loss": 0.4847, "step": 27853 }, { "epoch": 0.7647995606809446, "grad_norm": 0.3868766725063324, "learning_rate": 1.3636307894406385e-05, "loss": 0.537, "step": 27854 }, { "epoch": 0.764827018121911, "grad_norm": 0.35054004192352295, "learning_rate": 1.3635905563666824e-05, "loss": 0.4685, "step": 27855 }, { "epoch": 0.7648544755628776, "grad_norm": 0.36963996291160583, "learning_rate": 1.3635503226145144e-05, "loss": 0.4572, "step": 27856 }, { "epoch": 0.764881933003844, "grad_norm": 0.37679052352905273, "learning_rate": 1.3635100881842086e-05, "loss": 0.4269, "step": 27857 }, { "epoch": 0.7649093904448105, "grad_norm": 0.4062311351299286, "learning_rate": 1.3634698530758405e-05, "loss": 0.458, "step": 27858 }, { "epoch": 0.7649368478857771, "grad_norm": 0.38182783126831055, "learning_rate": 1.3634296172894852e-05, "loss": 0.4345, "step": 27859 }, { "epoch": 0.7649643053267435, "grad_norm": 0.3910367488861084, "learning_rate": 1.3633893808252178e-05, "loss": 0.4874, "step": 27860 }, { "epoch": 0.7649917627677101, "grad_norm": 0.38973838090896606, "learning_rate": 1.3633491436831131e-05, "loss": 0.4639, "step": 27861 }, { "epoch": 0.7650192202086765, "grad_norm": 0.4802083373069763, "learning_rate": 1.3633089058632462e-05, "loss": 0.4929, "step": 27862 }, { "epoch": 0.7650466776496431, "grad_norm": 0.3954799175262451, "learning_rate": 1.3632686673656925e-05, "loss": 0.4296, "step": 27863 }, { "epoch": 0.7650741350906095, "grad_norm": 0.4408734440803528, "learning_rate": 1.3632284281905266e-05, "loss": 0.5159, "step": 27864 }, { "epoch": 0.7651015925315761, "grad_norm": 0.4179886281490326, "learning_rate": 1.3631881883378237e-05, "loss": 0.5303, "step": 27865 }, { "epoch": 0.7651290499725426, "grad_norm": 0.36080968379974365, "learning_rate": 1.363147947807659e-05, "loss": 0.4554, "step": 27866 }, { "epoch": 0.765156507413509, "grad_norm": 0.37062546610832214, "learning_rate": 1.3631077066001074e-05, "loss": 0.5149, "step": 27867 }, { "epoch": 0.7651839648544756, "grad_norm": 0.3982182443141937, "learning_rate": 1.3630674647152442e-05, "loss": 0.5745, "step": 27868 }, { "epoch": 0.765211422295442, "grad_norm": 0.37394070625305176, "learning_rate": 1.3630272221531443e-05, "loss": 0.4851, "step": 27869 }, { "epoch": 0.7652388797364086, "grad_norm": 0.3372376263141632, "learning_rate": 1.3629869789138825e-05, "loss": 0.5194, "step": 27870 }, { "epoch": 0.765266337177375, "grad_norm": 0.41050970554351807, "learning_rate": 1.3629467349975345e-05, "loss": 0.5555, "step": 27871 }, { "epoch": 0.7652937946183416, "grad_norm": 0.34749579429626465, "learning_rate": 1.3629064904041744e-05, "loss": 0.3409, "step": 27872 }, { "epoch": 0.7653212520593081, "grad_norm": 0.35551580786705017, "learning_rate": 1.3628662451338785e-05, "loss": 0.5057, "step": 27873 }, { "epoch": 0.7653487095002746, "grad_norm": 0.414498507976532, "learning_rate": 1.362825999186721e-05, "loss": 0.4484, "step": 27874 }, { "epoch": 0.7653761669412411, "grad_norm": 0.42545145750045776, "learning_rate": 1.3627857525627772e-05, "loss": 0.4645, "step": 27875 }, { "epoch": 0.7654036243822075, "grad_norm": 0.4027795195579529, "learning_rate": 1.3627455052621224e-05, "loss": 0.4525, "step": 27876 }, { "epoch": 0.7654310818231741, "grad_norm": 0.4570791721343994, "learning_rate": 1.362705257284831e-05, "loss": 0.4574, "step": 27877 }, { "epoch": 0.7654585392641405, "grad_norm": 0.42148464918136597, "learning_rate": 1.3626650086309789e-05, "loss": 0.5596, "step": 27878 }, { "epoch": 0.7654859967051071, "grad_norm": 0.4957759380340576, "learning_rate": 1.3626247593006408e-05, "loss": 0.5682, "step": 27879 }, { "epoch": 0.7655134541460736, "grad_norm": 0.42917799949645996, "learning_rate": 1.3625845092938917e-05, "loss": 0.4698, "step": 27880 }, { "epoch": 0.7655409115870401, "grad_norm": 0.4097735285758972, "learning_rate": 1.3625442586108066e-05, "loss": 0.4451, "step": 27881 }, { "epoch": 0.7655683690280066, "grad_norm": 0.4042794108390808, "learning_rate": 1.3625040072514611e-05, "loss": 0.5317, "step": 27882 }, { "epoch": 0.7655958264689731, "grad_norm": 0.3711957037448883, "learning_rate": 1.3624637552159294e-05, "loss": 0.5942, "step": 27883 }, { "epoch": 0.7656232839099396, "grad_norm": 0.3671351373195648, "learning_rate": 1.3624235025042878e-05, "loss": 0.4647, "step": 27884 }, { "epoch": 0.765650741350906, "grad_norm": 0.4117841422557831, "learning_rate": 1.36238324911661e-05, "loss": 0.4995, "step": 27885 }, { "epoch": 0.7656781987918726, "grad_norm": 0.38087204098701477, "learning_rate": 1.3623429950529723e-05, "loss": 0.4751, "step": 27886 }, { "epoch": 0.7657056562328391, "grad_norm": 0.4012148082256317, "learning_rate": 1.3623027403134493e-05, "loss": 0.4452, "step": 27887 }, { "epoch": 0.7657331136738056, "grad_norm": 0.3719598054885864, "learning_rate": 1.3622624848981157e-05, "loss": 0.5151, "step": 27888 }, { "epoch": 0.7657605711147721, "grad_norm": 0.36409908533096313, "learning_rate": 1.362222228807047e-05, "loss": 0.5513, "step": 27889 }, { "epoch": 0.7657880285557386, "grad_norm": 0.3858785629272461, "learning_rate": 1.3621819720403183e-05, "loss": 0.502, "step": 27890 }, { "epoch": 0.7658154859967051, "grad_norm": 0.5726978778839111, "learning_rate": 1.3621417145980046e-05, "loss": 0.4998, "step": 27891 }, { "epoch": 0.7658429434376716, "grad_norm": 0.37165966629981995, "learning_rate": 1.3621014564801814e-05, "loss": 0.4895, "step": 27892 }, { "epoch": 0.7658704008786381, "grad_norm": 0.41381168365478516, "learning_rate": 1.3620611976869227e-05, "loss": 0.5176, "step": 27893 }, { "epoch": 0.7658978583196047, "grad_norm": 0.4211967885494232, "learning_rate": 1.3620209382183048e-05, "loss": 0.5578, "step": 27894 }, { "epoch": 0.7659253157605711, "grad_norm": 0.3661515712738037, "learning_rate": 1.3619806780744023e-05, "loss": 0.4934, "step": 27895 }, { "epoch": 0.7659527732015377, "grad_norm": 0.3936666250228882, "learning_rate": 1.3619404172552901e-05, "loss": 0.4356, "step": 27896 }, { "epoch": 0.7659802306425041, "grad_norm": 0.39520037174224854, "learning_rate": 1.3619001557610436e-05, "loss": 0.4805, "step": 27897 }, { "epoch": 0.7660076880834706, "grad_norm": 0.35052451491355896, "learning_rate": 1.3618598935917378e-05, "loss": 0.4464, "step": 27898 }, { "epoch": 0.7660351455244371, "grad_norm": 0.36251866817474365, "learning_rate": 1.3618196307474478e-05, "loss": 0.4218, "step": 27899 }, { "epoch": 0.7660626029654036, "grad_norm": 0.4100438952445984, "learning_rate": 1.3617793672282489e-05, "loss": 0.467, "step": 27900 }, { "epoch": 0.7660900604063702, "grad_norm": 0.44292888045310974, "learning_rate": 1.3617391030342158e-05, "loss": 0.5008, "step": 27901 }, { "epoch": 0.7661175178473366, "grad_norm": 0.3937571942806244, "learning_rate": 1.361698838165424e-05, "loss": 0.4319, "step": 27902 }, { "epoch": 0.7661449752883032, "grad_norm": 0.3771359622478485, "learning_rate": 1.361658572621948e-05, "loss": 0.4573, "step": 27903 }, { "epoch": 0.7661724327292696, "grad_norm": 0.5890532732009888, "learning_rate": 1.3616183064038637e-05, "loss": 0.4313, "step": 27904 }, { "epoch": 0.7661998901702362, "grad_norm": 0.42245644330978394, "learning_rate": 1.3615780395112458e-05, "loss": 0.5069, "step": 27905 }, { "epoch": 0.7662273476112026, "grad_norm": 0.4315625727176666, "learning_rate": 1.3615377719441693e-05, "loss": 0.5236, "step": 27906 }, { "epoch": 0.7662548050521691, "grad_norm": 0.3646075129508972, "learning_rate": 1.3614975037027097e-05, "loss": 0.4929, "step": 27907 }, { "epoch": 0.7662822624931357, "grad_norm": 0.4007493257522583, "learning_rate": 1.3614572347869416e-05, "loss": 0.5033, "step": 27908 }, { "epoch": 0.7663097199341021, "grad_norm": 0.41998612880706787, "learning_rate": 1.3614169651969407e-05, "loss": 0.5195, "step": 27909 }, { "epoch": 0.7663371773750687, "grad_norm": 0.3904440402984619, "learning_rate": 1.3613766949327817e-05, "loss": 0.4579, "step": 27910 }, { "epoch": 0.7663646348160351, "grad_norm": 0.42993488907814026, "learning_rate": 1.3613364239945397e-05, "loss": 0.5413, "step": 27911 }, { "epoch": 0.7663920922570017, "grad_norm": 0.4059152603149414, "learning_rate": 1.3612961523822901e-05, "loss": 0.6045, "step": 27912 }, { "epoch": 0.7664195496979681, "grad_norm": 0.4058482050895691, "learning_rate": 1.3612558800961078e-05, "loss": 0.5271, "step": 27913 }, { "epoch": 0.7664470071389347, "grad_norm": 0.38170701265335083, "learning_rate": 1.3612156071360679e-05, "loss": 0.5155, "step": 27914 }, { "epoch": 0.7664744645799012, "grad_norm": 0.36836135387420654, "learning_rate": 1.3611753335022458e-05, "loss": 0.4884, "step": 27915 }, { "epoch": 0.7665019220208676, "grad_norm": 0.3811154365539551, "learning_rate": 1.3611350591947164e-05, "loss": 0.512, "step": 27916 }, { "epoch": 0.7665293794618342, "grad_norm": 0.39510875940322876, "learning_rate": 1.3610947842135547e-05, "loss": 0.5332, "step": 27917 }, { "epoch": 0.7665568369028006, "grad_norm": 0.4496965706348419, "learning_rate": 1.3610545085588359e-05, "loss": 0.5635, "step": 27918 }, { "epoch": 0.7665842943437672, "grad_norm": 0.4198967218399048, "learning_rate": 1.3610142322306355e-05, "loss": 0.5638, "step": 27919 }, { "epoch": 0.7666117517847336, "grad_norm": 0.4147225320339203, "learning_rate": 1.3609739552290282e-05, "loss": 0.5349, "step": 27920 }, { "epoch": 0.7666392092257002, "grad_norm": 0.40342646837234497, "learning_rate": 1.3609336775540892e-05, "loss": 0.5758, "step": 27921 }, { "epoch": 0.7666666666666667, "grad_norm": 0.35331979393959045, "learning_rate": 1.3608933992058936e-05, "loss": 0.5079, "step": 27922 }, { "epoch": 0.7666941241076332, "grad_norm": 0.3548499643802643, "learning_rate": 1.3608531201845168e-05, "loss": 0.3963, "step": 27923 }, { "epoch": 0.7667215815485997, "grad_norm": 0.3902825117111206, "learning_rate": 1.3608128404900339e-05, "loss": 0.5084, "step": 27924 }, { "epoch": 0.7667490389895661, "grad_norm": 0.35879606008529663, "learning_rate": 1.3607725601225197e-05, "loss": 0.5078, "step": 27925 }, { "epoch": 0.7667764964305327, "grad_norm": 0.3608178496360779, "learning_rate": 1.3607322790820495e-05, "loss": 0.4802, "step": 27926 }, { "epoch": 0.7668039538714991, "grad_norm": 0.36149460077285767, "learning_rate": 1.3606919973686986e-05, "loss": 0.4512, "step": 27927 }, { "epoch": 0.7668314113124657, "grad_norm": 0.33917367458343506, "learning_rate": 1.3606517149825418e-05, "loss": 0.4317, "step": 27928 }, { "epoch": 0.7668588687534322, "grad_norm": 0.41974562406539917, "learning_rate": 1.3606114319236547e-05, "loss": 0.4625, "step": 27929 }, { "epoch": 0.7668863261943987, "grad_norm": 0.3692377507686615, "learning_rate": 1.3605711481921117e-05, "loss": 0.459, "step": 27930 }, { "epoch": 0.7669137836353652, "grad_norm": 0.4317486584186554, "learning_rate": 1.3605308637879891e-05, "loss": 0.596, "step": 27931 }, { "epoch": 0.7669412410763317, "grad_norm": 0.38630765676498413, "learning_rate": 1.360490578711361e-05, "loss": 0.5325, "step": 27932 }, { "epoch": 0.7669686985172982, "grad_norm": 0.34342890977859497, "learning_rate": 1.3604502929623029e-05, "loss": 0.3644, "step": 27933 }, { "epoch": 0.7669961559582646, "grad_norm": 0.412077397108078, "learning_rate": 1.3604100065408901e-05, "loss": 0.4653, "step": 27934 }, { "epoch": 0.7670236133992312, "grad_norm": 0.43031978607177734, "learning_rate": 1.3603697194471975e-05, "loss": 0.583, "step": 27935 }, { "epoch": 0.7670510708401977, "grad_norm": 0.3807745575904846, "learning_rate": 1.3603294316813005e-05, "loss": 0.4786, "step": 27936 }, { "epoch": 0.7670785282811642, "grad_norm": 0.38018473982810974, "learning_rate": 1.360289143243274e-05, "loss": 0.5018, "step": 27937 }, { "epoch": 0.7671059857221307, "grad_norm": 0.37534019351005554, "learning_rate": 1.3602488541331932e-05, "loss": 0.5771, "step": 27938 }, { "epoch": 0.7671334431630972, "grad_norm": 0.41659069061279297, "learning_rate": 1.3602085643511335e-05, "loss": 0.4876, "step": 27939 }, { "epoch": 0.7671609006040637, "grad_norm": 0.43477585911750793, "learning_rate": 1.3601682738971695e-05, "loss": 0.5483, "step": 27940 }, { "epoch": 0.7671883580450302, "grad_norm": 0.3925119638442993, "learning_rate": 1.3601279827713771e-05, "loss": 0.5112, "step": 27941 }, { "epoch": 0.7672158154859967, "grad_norm": 0.39348024129867554, "learning_rate": 1.360087690973831e-05, "loss": 0.523, "step": 27942 }, { "epoch": 0.7672432729269633, "grad_norm": 0.495700865983963, "learning_rate": 1.3600473985046064e-05, "loss": 0.4979, "step": 27943 }, { "epoch": 0.7672707303679297, "grad_norm": 0.40965840220451355, "learning_rate": 1.3600071053637785e-05, "loss": 0.5468, "step": 27944 }, { "epoch": 0.7672981878088962, "grad_norm": 0.5231394171714783, "learning_rate": 1.3599668115514222e-05, "loss": 0.6477, "step": 27945 }, { "epoch": 0.7673256452498627, "grad_norm": 0.37318387627601624, "learning_rate": 1.359926517067613e-05, "loss": 0.4665, "step": 27946 }, { "epoch": 0.7673531026908292, "grad_norm": 0.48445335030555725, "learning_rate": 1.359886221912426e-05, "loss": 0.5752, "step": 27947 }, { "epoch": 0.7673805601317957, "grad_norm": 0.3662641644477844, "learning_rate": 1.3598459260859364e-05, "loss": 0.4461, "step": 27948 }, { "epoch": 0.7674080175727622, "grad_norm": 0.431325227022171, "learning_rate": 1.3598056295882196e-05, "loss": 0.4546, "step": 27949 }, { "epoch": 0.7674354750137288, "grad_norm": 0.3369337022304535, "learning_rate": 1.35976533241935e-05, "loss": 0.5441, "step": 27950 }, { "epoch": 0.7674629324546952, "grad_norm": 0.362045556306839, "learning_rate": 1.3597250345794036e-05, "loss": 0.4736, "step": 27951 }, { "epoch": 0.7674903898956618, "grad_norm": 0.3732687830924988, "learning_rate": 1.3596847360684549e-05, "loss": 0.4866, "step": 27952 }, { "epoch": 0.7675178473366282, "grad_norm": 0.4180013835430145, "learning_rate": 1.3596444368865794e-05, "loss": 0.554, "step": 27953 }, { "epoch": 0.7675453047775948, "grad_norm": 0.35502302646636963, "learning_rate": 1.3596041370338524e-05, "loss": 0.463, "step": 27954 }, { "epoch": 0.7675727622185612, "grad_norm": 0.37081098556518555, "learning_rate": 1.3595638365103487e-05, "loss": 0.4448, "step": 27955 }, { "epoch": 0.7676002196595277, "grad_norm": 0.3752250373363495, "learning_rate": 1.3595235353161438e-05, "loss": 0.456, "step": 27956 }, { "epoch": 0.7676276771004943, "grad_norm": 0.42058265209198, "learning_rate": 1.3594832334513131e-05, "loss": 0.4908, "step": 27957 }, { "epoch": 0.7676551345414607, "grad_norm": 0.42261481285095215, "learning_rate": 1.3594429309159307e-05, "loss": 0.6008, "step": 27958 }, { "epoch": 0.7676825919824273, "grad_norm": 0.3996158540248871, "learning_rate": 1.3594026277100732e-05, "loss": 0.5171, "step": 27959 }, { "epoch": 0.7677100494233937, "grad_norm": 0.4017082750797272, "learning_rate": 1.3593623238338145e-05, "loss": 0.5353, "step": 27960 }, { "epoch": 0.7677375068643603, "grad_norm": 0.41361287236213684, "learning_rate": 1.3593220192872308e-05, "loss": 0.5507, "step": 27961 }, { "epoch": 0.7677649643053267, "grad_norm": 0.4877259433269501, "learning_rate": 1.359281714070397e-05, "loss": 0.6388, "step": 27962 }, { "epoch": 0.7677924217462933, "grad_norm": 0.37224888801574707, "learning_rate": 1.3592414081833876e-05, "loss": 0.4271, "step": 27963 }, { "epoch": 0.7678198791872598, "grad_norm": 0.4030722975730896, "learning_rate": 1.3592011016262787e-05, "loss": 0.5039, "step": 27964 }, { "epoch": 0.7678473366282262, "grad_norm": 0.3558710515499115, "learning_rate": 1.359160794399145e-05, "loss": 0.4475, "step": 27965 }, { "epoch": 0.7678747940691928, "grad_norm": 0.5430089235305786, "learning_rate": 1.3591204865020617e-05, "loss": 0.5374, "step": 27966 }, { "epoch": 0.7679022515101592, "grad_norm": 0.40361642837524414, "learning_rate": 1.3590801779351042e-05, "loss": 0.5239, "step": 27967 }, { "epoch": 0.7679297089511258, "grad_norm": 0.3550936281681061, "learning_rate": 1.3590398686983472e-05, "loss": 0.4967, "step": 27968 }, { "epoch": 0.7679571663920922, "grad_norm": 0.38995662331581116, "learning_rate": 1.3589995587918669e-05, "loss": 0.5761, "step": 27969 }, { "epoch": 0.7679846238330588, "grad_norm": 0.3682675063610077, "learning_rate": 1.3589592482157374e-05, "loss": 0.5166, "step": 27970 }, { "epoch": 0.7680120812740253, "grad_norm": 0.3949057459831238, "learning_rate": 1.3589189369700344e-05, "loss": 0.4866, "step": 27971 }, { "epoch": 0.7680395387149918, "grad_norm": 0.39920124411582947, "learning_rate": 1.3588786250548332e-05, "loss": 0.4927, "step": 27972 }, { "epoch": 0.7680669961559583, "grad_norm": 0.36707353591918945, "learning_rate": 1.3588383124702085e-05, "loss": 0.447, "step": 27973 }, { "epoch": 0.7680944535969247, "grad_norm": 0.4270772933959961, "learning_rate": 1.3587979992162361e-05, "loss": 0.4578, "step": 27974 }, { "epoch": 0.7681219110378913, "grad_norm": 0.3590148985385895, "learning_rate": 1.3587576852929909e-05, "loss": 0.4862, "step": 27975 }, { "epoch": 0.7681493684788577, "grad_norm": 0.3854174315929413, "learning_rate": 1.358717370700548e-05, "loss": 0.5316, "step": 27976 }, { "epoch": 0.7681768259198243, "grad_norm": 0.42962580919265747, "learning_rate": 1.3586770554389828e-05, "loss": 0.5247, "step": 27977 }, { "epoch": 0.7682042833607908, "grad_norm": 0.4102726876735687, "learning_rate": 1.3586367395083701e-05, "loss": 0.5207, "step": 27978 }, { "epoch": 0.7682317408017573, "grad_norm": 0.3867008686065674, "learning_rate": 1.3585964229087858e-05, "loss": 0.5074, "step": 27979 }, { "epoch": 0.7682591982427238, "grad_norm": 0.3736969232559204, "learning_rate": 1.3585561056403047e-05, "loss": 0.4771, "step": 27980 }, { "epoch": 0.7682866556836903, "grad_norm": 0.37903568148612976, "learning_rate": 1.358515787703002e-05, "loss": 0.5476, "step": 27981 }, { "epoch": 0.7683141131246568, "grad_norm": 0.44269874691963196, "learning_rate": 1.3584754690969529e-05, "loss": 0.4457, "step": 27982 }, { "epoch": 0.7683415705656232, "grad_norm": 0.37522202730178833, "learning_rate": 1.3584351498222326e-05, "loss": 0.4912, "step": 27983 }, { "epoch": 0.7683690280065898, "grad_norm": 0.5374528169631958, "learning_rate": 1.3583948298789163e-05, "loss": 0.4484, "step": 27984 }, { "epoch": 0.7683964854475562, "grad_norm": 0.4107024669647217, "learning_rate": 1.3583545092670793e-05, "loss": 0.5298, "step": 27985 }, { "epoch": 0.7684239428885228, "grad_norm": 0.37669023871421814, "learning_rate": 1.3583141879867967e-05, "loss": 0.477, "step": 27986 }, { "epoch": 0.7684514003294893, "grad_norm": 0.37141096591949463, "learning_rate": 1.358273866038144e-05, "loss": 0.4773, "step": 27987 }, { "epoch": 0.7684788577704558, "grad_norm": 0.33972862362861633, "learning_rate": 1.358233543421196e-05, "loss": 0.4399, "step": 27988 }, { "epoch": 0.7685063152114223, "grad_norm": 0.3660767674446106, "learning_rate": 1.3581932201360281e-05, "loss": 0.4506, "step": 27989 }, { "epoch": 0.7685337726523888, "grad_norm": 0.39641717076301575, "learning_rate": 1.3581528961827157e-05, "loss": 0.5154, "step": 27990 }, { "epoch": 0.7685612300933553, "grad_norm": 0.4479491710662842, "learning_rate": 1.3581125715613335e-05, "loss": 0.5023, "step": 27991 }, { "epoch": 0.7685886875343217, "grad_norm": 0.36564165353775024, "learning_rate": 1.3580722462719573e-05, "loss": 0.5418, "step": 27992 }, { "epoch": 0.7686161449752883, "grad_norm": 0.39860275387763977, "learning_rate": 1.3580319203146622e-05, "loss": 0.5126, "step": 27993 }, { "epoch": 0.7686436024162548, "grad_norm": 0.40494057536125183, "learning_rate": 1.3579915936895231e-05, "loss": 0.4553, "step": 27994 }, { "epoch": 0.7686710598572213, "grad_norm": 0.35501962900161743, "learning_rate": 1.3579512663966155e-05, "loss": 0.4635, "step": 27995 }, { "epoch": 0.7686985172981878, "grad_norm": 0.386973112821579, "learning_rate": 1.3579109384360143e-05, "loss": 0.535, "step": 27996 }, { "epoch": 0.7687259747391543, "grad_norm": 0.3680135905742645, "learning_rate": 1.3578706098077952e-05, "loss": 0.4969, "step": 27997 }, { "epoch": 0.7687534321801208, "grad_norm": 0.4172013998031616, "learning_rate": 1.3578302805120333e-05, "loss": 0.5812, "step": 27998 }, { "epoch": 0.7687808896210873, "grad_norm": 0.3964799642562866, "learning_rate": 1.3577899505488034e-05, "loss": 0.5512, "step": 27999 }, { "epoch": 0.7688083470620538, "grad_norm": 0.40725332498550415, "learning_rate": 1.3577496199181814e-05, "loss": 0.4978, "step": 28000 }, { "epoch": 0.7688358045030204, "grad_norm": 0.4264396131038666, "learning_rate": 1.357709288620242e-05, "loss": 0.5704, "step": 28001 }, { "epoch": 0.7688632619439868, "grad_norm": 0.36075058579444885, "learning_rate": 1.3576689566550607e-05, "loss": 0.5104, "step": 28002 }, { "epoch": 0.7688907193849533, "grad_norm": 0.41394850611686707, "learning_rate": 1.3576286240227125e-05, "loss": 0.4964, "step": 28003 }, { "epoch": 0.7689181768259198, "grad_norm": 0.5065253376960754, "learning_rate": 1.357588290723273e-05, "loss": 0.4913, "step": 28004 }, { "epoch": 0.7689456342668863, "grad_norm": 0.371906042098999, "learning_rate": 1.3575479567568168e-05, "loss": 0.5775, "step": 28005 }, { "epoch": 0.7689730917078528, "grad_norm": 0.3753708600997925, "learning_rate": 1.35750762212342e-05, "loss": 0.4567, "step": 28006 }, { "epoch": 0.7690005491488193, "grad_norm": 0.3868293762207031, "learning_rate": 1.357467286823157e-05, "loss": 0.5786, "step": 28007 }, { "epoch": 0.7690280065897859, "grad_norm": 0.42944371700286865, "learning_rate": 1.3574269508561038e-05, "loss": 0.6175, "step": 28008 }, { "epoch": 0.7690554640307523, "grad_norm": 0.3710891008377075, "learning_rate": 1.3573866142223348e-05, "loss": 0.538, "step": 28009 }, { "epoch": 0.7690829214717189, "grad_norm": 0.3486689031124115, "learning_rate": 1.3573462769219262e-05, "loss": 0.4269, "step": 28010 }, { "epoch": 0.7691103789126853, "grad_norm": 0.4652608335018158, "learning_rate": 1.3573059389549526e-05, "loss": 0.5074, "step": 28011 }, { "epoch": 0.7691378363536518, "grad_norm": 0.3810223639011383, "learning_rate": 1.357265600321489e-05, "loss": 0.538, "step": 28012 }, { "epoch": 0.7691652937946183, "grad_norm": 0.40334707498550415, "learning_rate": 1.3572252610216114e-05, "loss": 0.4449, "step": 28013 }, { "epoch": 0.7691927512355848, "grad_norm": 0.34336867928504944, "learning_rate": 1.3571849210553944e-05, "loss": 0.4671, "step": 28014 }, { "epoch": 0.7692202086765514, "grad_norm": 0.37650883197784424, "learning_rate": 1.357144580422914e-05, "loss": 0.4996, "step": 28015 }, { "epoch": 0.7692476661175178, "grad_norm": 0.4217616319656372, "learning_rate": 1.3571042391242446e-05, "loss": 0.5442, "step": 28016 }, { "epoch": 0.7692751235584844, "grad_norm": 0.4186553657054901, "learning_rate": 1.3570638971594618e-05, "loss": 0.4883, "step": 28017 }, { "epoch": 0.7693025809994508, "grad_norm": 0.39594221115112305, "learning_rate": 1.3570235545286411e-05, "loss": 0.5056, "step": 28018 }, { "epoch": 0.7693300384404174, "grad_norm": 0.3648996651172638, "learning_rate": 1.3569832112318576e-05, "loss": 0.4473, "step": 28019 }, { "epoch": 0.7693574958813838, "grad_norm": 0.37187623977661133, "learning_rate": 1.356942867269186e-05, "loss": 0.4626, "step": 28020 }, { "epoch": 0.7693849533223504, "grad_norm": 0.4035250246524811, "learning_rate": 1.3569025226407025e-05, "loss": 0.4229, "step": 28021 }, { "epoch": 0.7694124107633169, "grad_norm": 0.48372161388397217, "learning_rate": 1.3568621773464814e-05, "loss": 0.5111, "step": 28022 }, { "epoch": 0.7694398682042833, "grad_norm": 0.36847466230392456, "learning_rate": 1.3568218313865988e-05, "loss": 0.4493, "step": 28023 }, { "epoch": 0.7694673256452499, "grad_norm": 0.7029227018356323, "learning_rate": 1.3567814847611295e-05, "loss": 0.466, "step": 28024 }, { "epoch": 0.7694947830862163, "grad_norm": 0.3704756498336792, "learning_rate": 1.3567411374701489e-05, "loss": 0.5098, "step": 28025 }, { "epoch": 0.7695222405271829, "grad_norm": 0.38176873326301575, "learning_rate": 1.3567007895137324e-05, "loss": 0.4569, "step": 28026 }, { "epoch": 0.7695496979681493, "grad_norm": 0.3453659415245056, "learning_rate": 1.3566604408919545e-05, "loss": 0.4207, "step": 28027 }, { "epoch": 0.7695771554091159, "grad_norm": 0.3721897006034851, "learning_rate": 1.3566200916048918e-05, "loss": 0.5097, "step": 28028 }, { "epoch": 0.7696046128500824, "grad_norm": 0.41934019327163696, "learning_rate": 1.3565797416526185e-05, "loss": 0.5452, "step": 28029 }, { "epoch": 0.7696320702910489, "grad_norm": 0.3950050473213196, "learning_rate": 1.3565393910352099e-05, "loss": 0.5415, "step": 28030 }, { "epoch": 0.7696595277320154, "grad_norm": 0.3357141613960266, "learning_rate": 1.3564990397527418e-05, "loss": 0.4571, "step": 28031 }, { "epoch": 0.7696869851729818, "grad_norm": 0.35553568601608276, "learning_rate": 1.356458687805289e-05, "loss": 0.483, "step": 28032 }, { "epoch": 0.7697144426139484, "grad_norm": 0.3440077304840088, "learning_rate": 1.3564183351929274e-05, "loss": 0.5116, "step": 28033 }, { "epoch": 0.7697419000549148, "grad_norm": 0.4196542501449585, "learning_rate": 1.3563779819157316e-05, "loss": 0.5028, "step": 28034 }, { "epoch": 0.7697693574958814, "grad_norm": 0.3786267638206482, "learning_rate": 1.356337627973777e-05, "loss": 0.4279, "step": 28035 }, { "epoch": 0.7697968149368479, "grad_norm": 0.33255940675735474, "learning_rate": 1.3562972733671391e-05, "loss": 0.4083, "step": 28036 }, { "epoch": 0.7698242723778144, "grad_norm": 0.40417125821113586, "learning_rate": 1.3562569180958933e-05, "loss": 0.4504, "step": 28037 }, { "epoch": 0.7698517298187809, "grad_norm": 0.36948099732398987, "learning_rate": 1.3562165621601142e-05, "loss": 0.5183, "step": 28038 }, { "epoch": 0.7698791872597474, "grad_norm": 0.3877907991409302, "learning_rate": 1.3561762055598777e-05, "loss": 0.5111, "step": 28039 }, { "epoch": 0.7699066447007139, "grad_norm": 0.39727193117141724, "learning_rate": 1.356135848295259e-05, "loss": 0.5755, "step": 28040 }, { "epoch": 0.7699341021416803, "grad_norm": 0.4464273452758789, "learning_rate": 1.3560954903663333e-05, "loss": 0.4457, "step": 28041 }, { "epoch": 0.7699615595826469, "grad_norm": 0.408321738243103, "learning_rate": 1.3560551317731758e-05, "loss": 0.5236, "step": 28042 }, { "epoch": 0.7699890170236134, "grad_norm": 0.42154422402381897, "learning_rate": 1.3560147725158617e-05, "loss": 0.537, "step": 28043 }, { "epoch": 0.7700164744645799, "grad_norm": 0.4169590473175049, "learning_rate": 1.3559744125944665e-05, "loss": 0.4786, "step": 28044 }, { "epoch": 0.7700439319055464, "grad_norm": 0.3673803508281708, "learning_rate": 1.3559340520090654e-05, "loss": 0.3718, "step": 28045 }, { "epoch": 0.7700713893465129, "grad_norm": 0.3822043836116791, "learning_rate": 1.355893690759734e-05, "loss": 0.459, "step": 28046 }, { "epoch": 0.7700988467874794, "grad_norm": 0.4061557352542877, "learning_rate": 1.355853328846547e-05, "loss": 0.5408, "step": 28047 }, { "epoch": 0.7701263042284459, "grad_norm": 0.3583306670188904, "learning_rate": 1.3558129662695799e-05, "loss": 0.5139, "step": 28048 }, { "epoch": 0.7701537616694124, "grad_norm": 0.34545695781707764, "learning_rate": 1.3557726030289082e-05, "loss": 0.4417, "step": 28049 }, { "epoch": 0.770181219110379, "grad_norm": 0.4461559057235718, "learning_rate": 1.355732239124607e-05, "loss": 0.5377, "step": 28050 }, { "epoch": 0.7702086765513454, "grad_norm": 0.36533525586128235, "learning_rate": 1.3556918745567516e-05, "loss": 0.4401, "step": 28051 }, { "epoch": 0.7702361339923119, "grad_norm": 0.4652427136898041, "learning_rate": 1.3556515093254174e-05, "loss": 0.4798, "step": 28052 }, { "epoch": 0.7702635914332784, "grad_norm": 0.41947072744369507, "learning_rate": 1.3556111434306793e-05, "loss": 0.4792, "step": 28053 }, { "epoch": 0.7702910488742449, "grad_norm": 0.3946281671524048, "learning_rate": 1.3555707768726134e-05, "loss": 0.4914, "step": 28054 }, { "epoch": 0.7703185063152114, "grad_norm": 0.40286222100257874, "learning_rate": 1.3555304096512943e-05, "loss": 0.4723, "step": 28055 }, { "epoch": 0.7703459637561779, "grad_norm": 0.36907759308815, "learning_rate": 1.3554900417667973e-05, "loss": 0.4693, "step": 28056 }, { "epoch": 0.7703734211971445, "grad_norm": 0.3980594873428345, "learning_rate": 1.3554496732191984e-05, "loss": 0.4944, "step": 28057 }, { "epoch": 0.7704008786381109, "grad_norm": 0.36845189332962036, "learning_rate": 1.3554093040085721e-05, "loss": 0.5463, "step": 28058 }, { "epoch": 0.7704283360790775, "grad_norm": 0.3730827867984772, "learning_rate": 1.3553689341349941e-05, "loss": 0.4794, "step": 28059 }, { "epoch": 0.7704557935200439, "grad_norm": 0.37356460094451904, "learning_rate": 1.3553285635985397e-05, "loss": 0.4539, "step": 28060 }, { "epoch": 0.7704832509610104, "grad_norm": 0.38706114888191223, "learning_rate": 1.3552881923992838e-05, "loss": 0.4844, "step": 28061 }, { "epoch": 0.7705107084019769, "grad_norm": 0.3801008462905884, "learning_rate": 1.3552478205373024e-05, "loss": 0.518, "step": 28062 }, { "epoch": 0.7705381658429434, "grad_norm": 0.3878426253795624, "learning_rate": 1.35520744801267e-05, "loss": 0.4344, "step": 28063 }, { "epoch": 0.77056562328391, "grad_norm": 0.40854930877685547, "learning_rate": 1.3551670748254627e-05, "loss": 0.5112, "step": 28064 }, { "epoch": 0.7705930807248764, "grad_norm": 0.4653995633125305, "learning_rate": 1.3551267009757553e-05, "loss": 0.4868, "step": 28065 }, { "epoch": 0.770620538165843, "grad_norm": 0.3766760230064392, "learning_rate": 1.3550863264636233e-05, "loss": 0.5131, "step": 28066 }, { "epoch": 0.7706479956068094, "grad_norm": 0.3708924651145935, "learning_rate": 1.355045951289142e-05, "loss": 0.5087, "step": 28067 }, { "epoch": 0.770675453047776, "grad_norm": 0.4046022593975067, "learning_rate": 1.3550055754523867e-05, "loss": 0.4658, "step": 28068 }, { "epoch": 0.7707029104887424, "grad_norm": 0.41350144147872925, "learning_rate": 1.3549651989534327e-05, "loss": 0.4975, "step": 28069 }, { "epoch": 0.770730367929709, "grad_norm": 0.3963812291622162, "learning_rate": 1.3549248217923554e-05, "loss": 0.5011, "step": 28070 }, { "epoch": 0.7707578253706755, "grad_norm": 0.3329675495624542, "learning_rate": 1.3548844439692298e-05, "loss": 0.3957, "step": 28071 }, { "epoch": 0.7707852828116419, "grad_norm": 0.4136483669281006, "learning_rate": 1.3548440654841316e-05, "loss": 0.601, "step": 28072 }, { "epoch": 0.7708127402526085, "grad_norm": 0.4158021807670593, "learning_rate": 1.3548036863371359e-05, "loss": 0.5151, "step": 28073 }, { "epoch": 0.7708401976935749, "grad_norm": 0.4483943581581116, "learning_rate": 1.3547633065283181e-05, "loss": 0.5004, "step": 28074 }, { "epoch": 0.7708676551345415, "grad_norm": 0.401706337928772, "learning_rate": 1.3547229260577535e-05, "loss": 0.476, "step": 28075 }, { "epoch": 0.7708951125755079, "grad_norm": 0.36227771639823914, "learning_rate": 1.3546825449255173e-05, "loss": 0.442, "step": 28076 }, { "epoch": 0.7709225700164745, "grad_norm": 0.5395846962928772, "learning_rate": 1.3546421631316853e-05, "loss": 0.54, "step": 28077 }, { "epoch": 0.770950027457441, "grad_norm": 0.4544094502925873, "learning_rate": 1.3546017806763322e-05, "loss": 0.5798, "step": 28078 }, { "epoch": 0.7709774848984075, "grad_norm": 0.40414515137672424, "learning_rate": 1.3545613975595335e-05, "loss": 0.4753, "step": 28079 }, { "epoch": 0.771004942339374, "grad_norm": 0.4329834580421448, "learning_rate": 1.3545210137813649e-05, "loss": 0.467, "step": 28080 }, { "epoch": 0.7710323997803404, "grad_norm": 0.4618852734565735, "learning_rate": 1.3544806293419016e-05, "loss": 0.4782, "step": 28081 }, { "epoch": 0.771059857221307, "grad_norm": 0.43277621269226074, "learning_rate": 1.3544402442412184e-05, "loss": 0.5309, "step": 28082 }, { "epoch": 0.7710873146622734, "grad_norm": 0.5024019479751587, "learning_rate": 1.3543998584793912e-05, "loss": 0.5312, "step": 28083 }, { "epoch": 0.77111477210324, "grad_norm": 0.39985841512680054, "learning_rate": 1.354359472056495e-05, "loss": 0.495, "step": 28084 }, { "epoch": 0.7711422295442065, "grad_norm": 0.4453759491443634, "learning_rate": 1.3543190849726053e-05, "loss": 0.5328, "step": 28085 }, { "epoch": 0.771169686985173, "grad_norm": 0.3726481795310974, "learning_rate": 1.3542786972277977e-05, "loss": 0.5139, "step": 28086 }, { "epoch": 0.7711971444261395, "grad_norm": 0.3856644034385681, "learning_rate": 1.354238308822147e-05, "loss": 0.472, "step": 28087 }, { "epoch": 0.771224601867106, "grad_norm": 0.3621286451816559, "learning_rate": 1.3541979197557288e-05, "loss": 0.5403, "step": 28088 }, { "epoch": 0.7712520593080725, "grad_norm": 0.37487852573394775, "learning_rate": 1.3541575300286184e-05, "loss": 0.4453, "step": 28089 }, { "epoch": 0.7712795167490389, "grad_norm": 0.35984012484550476, "learning_rate": 1.3541171396408915e-05, "loss": 0.4301, "step": 28090 }, { "epoch": 0.7713069741900055, "grad_norm": 0.42794057726860046, "learning_rate": 1.354076748592623e-05, "loss": 0.6026, "step": 28091 }, { "epoch": 0.771334431630972, "grad_norm": 0.36211958527565, "learning_rate": 1.3540363568838881e-05, "loss": 0.5188, "step": 28092 }, { "epoch": 0.7713618890719385, "grad_norm": 0.3773888349533081, "learning_rate": 1.3539959645147627e-05, "loss": 0.5461, "step": 28093 }, { "epoch": 0.771389346512905, "grad_norm": 0.4743172526359558, "learning_rate": 1.3539555714853214e-05, "loss": 0.5139, "step": 28094 }, { "epoch": 0.7714168039538715, "grad_norm": 0.556021511554718, "learning_rate": 1.3539151777956406e-05, "loss": 0.518, "step": 28095 }, { "epoch": 0.771444261394838, "grad_norm": 0.4232417047023773, "learning_rate": 1.3538747834457945e-05, "loss": 0.5706, "step": 28096 }, { "epoch": 0.7714717188358045, "grad_norm": 0.3846933841705322, "learning_rate": 1.353834388435859e-05, "loss": 0.4586, "step": 28097 }, { "epoch": 0.771499176276771, "grad_norm": 0.4053516983985901, "learning_rate": 1.3537939927659101e-05, "loss": 0.5699, "step": 28098 }, { "epoch": 0.7715266337177376, "grad_norm": 0.44594940543174744, "learning_rate": 1.3537535964360219e-05, "loss": 0.507, "step": 28099 }, { "epoch": 0.771554091158704, "grad_norm": 0.4442480504512787, "learning_rate": 1.3537131994462704e-05, "loss": 0.4994, "step": 28100 }, { "epoch": 0.7715815485996705, "grad_norm": 0.8298069834709167, "learning_rate": 1.353672801796731e-05, "loss": 0.5591, "step": 28101 }, { "epoch": 0.771609006040637, "grad_norm": 0.44732266664505005, "learning_rate": 1.3536324034874788e-05, "loss": 0.5281, "step": 28102 }, { "epoch": 0.7716364634816035, "grad_norm": 0.30889490246772766, "learning_rate": 1.3535920045185896e-05, "loss": 0.4321, "step": 28103 }, { "epoch": 0.77166392092257, "grad_norm": 0.35455989837646484, "learning_rate": 1.353551604890138e-05, "loss": 0.4705, "step": 28104 }, { "epoch": 0.7716913783635365, "grad_norm": 0.38841819763183594, "learning_rate": 1.3535112046022003e-05, "loss": 0.4847, "step": 28105 }, { "epoch": 0.7717188358045031, "grad_norm": 0.3943098485469818, "learning_rate": 1.3534708036548512e-05, "loss": 0.5507, "step": 28106 }, { "epoch": 0.7717462932454695, "grad_norm": 0.39878880977630615, "learning_rate": 1.353430402048166e-05, "loss": 0.4441, "step": 28107 }, { "epoch": 0.771773750686436, "grad_norm": 0.38307464122772217, "learning_rate": 1.3533899997822206e-05, "loss": 0.5053, "step": 28108 }, { "epoch": 0.7718012081274025, "grad_norm": 0.39007630944252014, "learning_rate": 1.3533495968570898e-05, "loss": 0.4688, "step": 28109 }, { "epoch": 0.771828665568369, "grad_norm": 0.5208595395088196, "learning_rate": 1.3533091932728495e-05, "loss": 0.5065, "step": 28110 }, { "epoch": 0.7718561230093355, "grad_norm": 0.3612220287322998, "learning_rate": 1.3532687890295746e-05, "loss": 0.4168, "step": 28111 }, { "epoch": 0.771883580450302, "grad_norm": 0.3601859509944916, "learning_rate": 1.3532283841273409e-05, "loss": 0.4512, "step": 28112 }, { "epoch": 0.7719110378912686, "grad_norm": 0.5171055197715759, "learning_rate": 1.3531879785662231e-05, "loss": 0.6205, "step": 28113 }, { "epoch": 0.771938495332235, "grad_norm": 0.360683411359787, "learning_rate": 1.3531475723462972e-05, "loss": 0.499, "step": 28114 }, { "epoch": 0.7719659527732016, "grad_norm": 0.4912988841533661, "learning_rate": 1.3531071654676384e-05, "loss": 0.5138, "step": 28115 }, { "epoch": 0.771993410214168, "grad_norm": 0.35627758502960205, "learning_rate": 1.3530667579303221e-05, "loss": 0.425, "step": 28116 }, { "epoch": 0.7720208676551346, "grad_norm": 0.36523035168647766, "learning_rate": 1.3530263497344237e-05, "loss": 0.5413, "step": 28117 }, { "epoch": 0.772048325096101, "grad_norm": 0.33623793721199036, "learning_rate": 1.3529859408800183e-05, "loss": 0.447, "step": 28118 }, { "epoch": 0.7720757825370675, "grad_norm": 0.3778061866760254, "learning_rate": 1.3529455313671813e-05, "loss": 0.5479, "step": 28119 }, { "epoch": 0.7721032399780341, "grad_norm": 0.42022591829299927, "learning_rate": 1.3529051211959886e-05, "loss": 0.5087, "step": 28120 }, { "epoch": 0.7721306974190005, "grad_norm": 0.5833909511566162, "learning_rate": 1.3528647103665149e-05, "loss": 0.5479, "step": 28121 }, { "epoch": 0.7721581548599671, "grad_norm": 0.424650639295578, "learning_rate": 1.3528242988788363e-05, "loss": 0.5811, "step": 28122 }, { "epoch": 0.7721856123009335, "grad_norm": 0.44501209259033203, "learning_rate": 1.3527838867330272e-05, "loss": 0.466, "step": 28123 }, { "epoch": 0.7722130697419001, "grad_norm": 0.380220890045166, "learning_rate": 1.352743473929164e-05, "loss": 0.4473, "step": 28124 }, { "epoch": 0.7722405271828665, "grad_norm": 0.3745841681957245, "learning_rate": 1.3527030604673217e-05, "loss": 0.5228, "step": 28125 }, { "epoch": 0.7722679846238331, "grad_norm": 0.37489089369773865, "learning_rate": 1.3526626463475752e-05, "loss": 0.5132, "step": 28126 }, { "epoch": 0.7722954420647996, "grad_norm": 0.3623855710029602, "learning_rate": 1.3526222315700006e-05, "loss": 0.4989, "step": 28127 }, { "epoch": 0.772322899505766, "grad_norm": 0.3706584572792053, "learning_rate": 1.3525818161346728e-05, "loss": 0.5058, "step": 28128 }, { "epoch": 0.7723503569467326, "grad_norm": 0.35792964696884155, "learning_rate": 1.3525414000416677e-05, "loss": 0.4673, "step": 28129 }, { "epoch": 0.772377814387699, "grad_norm": 0.39853012561798096, "learning_rate": 1.3525009832910602e-05, "loss": 0.4672, "step": 28130 }, { "epoch": 0.7724052718286656, "grad_norm": 0.3813755214214325, "learning_rate": 1.3524605658829258e-05, "loss": 0.4898, "step": 28131 }, { "epoch": 0.772432729269632, "grad_norm": 0.40334632992744446, "learning_rate": 1.3524201478173402e-05, "loss": 0.5035, "step": 28132 }, { "epoch": 0.7724601867105986, "grad_norm": 0.4218386113643646, "learning_rate": 1.352379729094378e-05, "loss": 0.5235, "step": 28133 }, { "epoch": 0.7724876441515651, "grad_norm": 0.4870929419994354, "learning_rate": 1.3523393097141158e-05, "loss": 0.5413, "step": 28134 }, { "epoch": 0.7725151015925316, "grad_norm": 0.4026777744293213, "learning_rate": 1.352298889676628e-05, "loss": 0.533, "step": 28135 }, { "epoch": 0.7725425590334981, "grad_norm": 0.5732431411743164, "learning_rate": 1.3522584689819901e-05, "loss": 0.4565, "step": 28136 }, { "epoch": 0.7725700164744645, "grad_norm": 0.4227655827999115, "learning_rate": 1.3522180476302782e-05, "loss": 0.4378, "step": 28137 }, { "epoch": 0.7725974739154311, "grad_norm": 0.4186539947986603, "learning_rate": 1.3521776256215668e-05, "loss": 0.5147, "step": 28138 }, { "epoch": 0.7726249313563975, "grad_norm": 0.34855538606643677, "learning_rate": 1.3521372029559321e-05, "loss": 0.4124, "step": 28139 }, { "epoch": 0.7726523887973641, "grad_norm": 0.4212324619293213, "learning_rate": 1.352096779633449e-05, "loss": 0.4771, "step": 28140 }, { "epoch": 0.7726798462383306, "grad_norm": 0.3861721158027649, "learning_rate": 1.352056355654193e-05, "loss": 0.5069, "step": 28141 }, { "epoch": 0.7727073036792971, "grad_norm": 0.4387202262878418, "learning_rate": 1.3520159310182394e-05, "loss": 0.5347, "step": 28142 }, { "epoch": 0.7727347611202636, "grad_norm": 0.4261201024055481, "learning_rate": 1.3519755057256638e-05, "loss": 0.4822, "step": 28143 }, { "epoch": 0.7727622185612301, "grad_norm": 0.38961097598075867, "learning_rate": 1.3519350797765418e-05, "loss": 0.534, "step": 28144 }, { "epoch": 0.7727896760021966, "grad_norm": 0.38125547766685486, "learning_rate": 1.3518946531709482e-05, "loss": 0.5085, "step": 28145 }, { "epoch": 0.772817133443163, "grad_norm": 0.3463003635406494, "learning_rate": 1.351854225908959e-05, "loss": 0.4389, "step": 28146 }, { "epoch": 0.7728445908841296, "grad_norm": 0.3934618830680847, "learning_rate": 1.3518137979906494e-05, "loss": 0.4912, "step": 28147 }, { "epoch": 0.7728720483250962, "grad_norm": 0.37373045086860657, "learning_rate": 1.3517733694160949e-05, "loss": 0.5068, "step": 28148 }, { "epoch": 0.7728995057660626, "grad_norm": 0.4456433057785034, "learning_rate": 1.3517329401853702e-05, "loss": 0.4188, "step": 28149 }, { "epoch": 0.7729269632070291, "grad_norm": 0.3595468997955322, "learning_rate": 1.351692510298552e-05, "loss": 0.5505, "step": 28150 }, { "epoch": 0.7729544206479956, "grad_norm": 0.41618210077285767, "learning_rate": 1.3516520797557144e-05, "loss": 0.5439, "step": 28151 }, { "epoch": 0.7729818780889621, "grad_norm": 0.5136029720306396, "learning_rate": 1.3516116485569338e-05, "loss": 0.436, "step": 28152 }, { "epoch": 0.7730093355299286, "grad_norm": 0.33721694350242615, "learning_rate": 1.3515712167022852e-05, "loss": 0.4504, "step": 28153 }, { "epoch": 0.7730367929708951, "grad_norm": 0.41195061802864075, "learning_rate": 1.3515307841918439e-05, "loss": 0.4946, "step": 28154 }, { "epoch": 0.7730642504118617, "grad_norm": 0.4286714196205139, "learning_rate": 1.3514903510256859e-05, "loss": 0.5263, "step": 28155 }, { "epoch": 0.7730917078528281, "grad_norm": 0.3944145143032074, "learning_rate": 1.3514499172038858e-05, "loss": 0.4069, "step": 28156 }, { "epoch": 0.7731191652937947, "grad_norm": 0.3621658384799957, "learning_rate": 1.3514094827265196e-05, "loss": 0.5327, "step": 28157 }, { "epoch": 0.7731466227347611, "grad_norm": 0.38595983386039734, "learning_rate": 1.3513690475936625e-05, "loss": 0.4784, "step": 28158 }, { "epoch": 0.7731740801757276, "grad_norm": 0.38389724493026733, "learning_rate": 1.35132861180539e-05, "loss": 0.4853, "step": 28159 }, { "epoch": 0.7732015376166941, "grad_norm": 0.4450489580631256, "learning_rate": 1.3512881753617777e-05, "loss": 0.5541, "step": 28160 }, { "epoch": 0.7732289950576606, "grad_norm": 1.0994484424591064, "learning_rate": 1.3512477382629008e-05, "loss": 0.4884, "step": 28161 }, { "epoch": 0.7732564524986272, "grad_norm": 0.437832772731781, "learning_rate": 1.3512073005088344e-05, "loss": 0.5154, "step": 28162 }, { "epoch": 0.7732839099395936, "grad_norm": 0.34799882769584656, "learning_rate": 1.3511668620996546e-05, "loss": 0.4029, "step": 28163 }, { "epoch": 0.7733113673805602, "grad_norm": 0.36387404799461365, "learning_rate": 1.3511264230354364e-05, "loss": 0.4298, "step": 28164 }, { "epoch": 0.7733388248215266, "grad_norm": 0.38041022419929504, "learning_rate": 1.3510859833162555e-05, "loss": 0.519, "step": 28165 }, { "epoch": 0.7733662822624932, "grad_norm": 0.3908531665802002, "learning_rate": 1.3510455429421871e-05, "loss": 0.4843, "step": 28166 }, { "epoch": 0.7733937397034596, "grad_norm": 0.4574621915817261, "learning_rate": 1.3510051019133065e-05, "loss": 0.4701, "step": 28167 }, { "epoch": 0.7734211971444261, "grad_norm": 0.4848713278770447, "learning_rate": 1.3509646602296898e-05, "loss": 0.59, "step": 28168 }, { "epoch": 0.7734486545853927, "grad_norm": 0.39097860455513, "learning_rate": 1.3509242178914117e-05, "loss": 0.4952, "step": 28169 }, { "epoch": 0.7734761120263591, "grad_norm": 0.4004112482070923, "learning_rate": 1.350883774898548e-05, "loss": 0.491, "step": 28170 }, { "epoch": 0.7735035694673257, "grad_norm": 0.4104389548301697, "learning_rate": 1.3508433312511742e-05, "loss": 0.5012, "step": 28171 }, { "epoch": 0.7735310269082921, "grad_norm": 0.3916216790676117, "learning_rate": 1.3508028869493654e-05, "loss": 0.5004, "step": 28172 }, { "epoch": 0.7735584843492587, "grad_norm": 0.4049875736236572, "learning_rate": 1.3507624419931972e-05, "loss": 0.5859, "step": 28173 }, { "epoch": 0.7735859417902251, "grad_norm": 0.40612444281578064, "learning_rate": 1.3507219963827454e-05, "loss": 0.5817, "step": 28174 }, { "epoch": 0.7736133992311917, "grad_norm": 0.4054552912712097, "learning_rate": 1.3506815501180849e-05, "loss": 0.5329, "step": 28175 }, { "epoch": 0.7736408566721582, "grad_norm": 0.3710959851741791, "learning_rate": 1.3506411031992914e-05, "loss": 0.515, "step": 28176 }, { "epoch": 0.7736683141131246, "grad_norm": 0.3760150074958801, "learning_rate": 1.3506006556264404e-05, "loss": 0.4658, "step": 28177 }, { "epoch": 0.7736957715540912, "grad_norm": 0.3800768554210663, "learning_rate": 1.3505602073996074e-05, "loss": 0.5069, "step": 28178 }, { "epoch": 0.7737232289950576, "grad_norm": 0.38846415281295776, "learning_rate": 1.3505197585188676e-05, "loss": 0.5864, "step": 28179 }, { "epoch": 0.7737506864360242, "grad_norm": 0.3637012243270874, "learning_rate": 1.3504793089842964e-05, "loss": 0.4614, "step": 28180 }, { "epoch": 0.7737781438769906, "grad_norm": 0.46601203083992004, "learning_rate": 1.3504388587959695e-05, "loss": 0.609, "step": 28181 }, { "epoch": 0.7738056013179572, "grad_norm": 0.3705854117870331, "learning_rate": 1.3503984079539625e-05, "loss": 0.3897, "step": 28182 }, { "epoch": 0.7738330587589237, "grad_norm": 0.39473235607147217, "learning_rate": 1.3503579564583507e-05, "loss": 0.4804, "step": 28183 }, { "epoch": 0.7738605161998902, "grad_norm": 0.36216500401496887, "learning_rate": 1.3503175043092091e-05, "loss": 0.5223, "step": 28184 }, { "epoch": 0.7738879736408567, "grad_norm": 0.4263075292110443, "learning_rate": 1.3502770515066138e-05, "loss": 0.5312, "step": 28185 }, { "epoch": 0.7739154310818231, "grad_norm": 0.45693960785865784, "learning_rate": 1.35023659805064e-05, "loss": 0.5474, "step": 28186 }, { "epoch": 0.7739428885227897, "grad_norm": 0.41497522592544556, "learning_rate": 1.3501961439413629e-05, "loss": 0.4826, "step": 28187 }, { "epoch": 0.7739703459637561, "grad_norm": 0.39683568477630615, "learning_rate": 1.3501556891788585e-05, "loss": 0.5516, "step": 28188 }, { "epoch": 0.7739978034047227, "grad_norm": 0.3927954137325287, "learning_rate": 1.3501152337632023e-05, "loss": 0.4248, "step": 28189 }, { "epoch": 0.7740252608456892, "grad_norm": 0.37162286043167114, "learning_rate": 1.3500747776944687e-05, "loss": 0.5258, "step": 28190 }, { "epoch": 0.7740527182866557, "grad_norm": 0.362943559885025, "learning_rate": 1.3500343209727344e-05, "loss": 0.493, "step": 28191 }, { "epoch": 0.7740801757276222, "grad_norm": 0.38695845007896423, "learning_rate": 1.3499938635980743e-05, "loss": 0.4953, "step": 28192 }, { "epoch": 0.7741076331685887, "grad_norm": 0.3890756070613861, "learning_rate": 1.3499534055705638e-05, "loss": 0.5148, "step": 28193 }, { "epoch": 0.7741350906095552, "grad_norm": 0.3818354606628418, "learning_rate": 1.3499129468902788e-05, "loss": 0.498, "step": 28194 }, { "epoch": 0.7741625480505216, "grad_norm": 0.3655965328216553, "learning_rate": 1.3498724875572939e-05, "loss": 0.4643, "step": 28195 }, { "epoch": 0.7741900054914882, "grad_norm": 0.5055716633796692, "learning_rate": 1.3498320275716857e-05, "loss": 0.3904, "step": 28196 }, { "epoch": 0.7742174629324547, "grad_norm": 0.3510594367980957, "learning_rate": 1.349791566933529e-05, "loss": 0.4644, "step": 28197 }, { "epoch": 0.7742449203734212, "grad_norm": 0.41722628474235535, "learning_rate": 1.3497511056428992e-05, "loss": 0.5151, "step": 28198 }, { "epoch": 0.7742723778143877, "grad_norm": 0.37877893447875977, "learning_rate": 1.3497106436998722e-05, "loss": 0.4706, "step": 28199 }, { "epoch": 0.7742998352553542, "grad_norm": 0.3950786292552948, "learning_rate": 1.349670181104523e-05, "loss": 0.4981, "step": 28200 }, { "epoch": 0.7743272926963207, "grad_norm": 0.39173269271850586, "learning_rate": 1.3496297178569275e-05, "loss": 0.4826, "step": 28201 }, { "epoch": 0.7743547501372872, "grad_norm": 0.396249383687973, "learning_rate": 1.3495892539571609e-05, "loss": 0.4395, "step": 28202 }, { "epoch": 0.7743822075782537, "grad_norm": 0.313416451215744, "learning_rate": 1.3495487894052985e-05, "loss": 0.4041, "step": 28203 }, { "epoch": 0.7744096650192203, "grad_norm": 0.45207086205482483, "learning_rate": 1.3495083242014166e-05, "loss": 0.441, "step": 28204 }, { "epoch": 0.7744371224601867, "grad_norm": 0.4104832708835602, "learning_rate": 1.3494678583455896e-05, "loss": 0.5408, "step": 28205 }, { "epoch": 0.7744645799011532, "grad_norm": 0.5234546661376953, "learning_rate": 1.3494273918378939e-05, "loss": 0.4711, "step": 28206 }, { "epoch": 0.7744920373421197, "grad_norm": 0.371689110994339, "learning_rate": 1.3493869246784045e-05, "loss": 0.4957, "step": 28207 }, { "epoch": 0.7745194947830862, "grad_norm": 0.3746110498905182, "learning_rate": 1.3493464568671968e-05, "loss": 0.3972, "step": 28208 }, { "epoch": 0.7745469522240527, "grad_norm": 0.3617337644100189, "learning_rate": 1.3493059884043467e-05, "loss": 0.498, "step": 28209 }, { "epoch": 0.7745744096650192, "grad_norm": 0.4012305736541748, "learning_rate": 1.3492655192899293e-05, "loss": 0.5314, "step": 28210 }, { "epoch": 0.7746018671059858, "grad_norm": 0.4669276773929596, "learning_rate": 1.3492250495240203e-05, "loss": 0.5374, "step": 28211 }, { "epoch": 0.7746293245469522, "grad_norm": 0.38123661279678345, "learning_rate": 1.3491845791066951e-05, "loss": 0.5324, "step": 28212 }, { "epoch": 0.7746567819879188, "grad_norm": 0.3987761437892914, "learning_rate": 1.3491441080380289e-05, "loss": 0.5425, "step": 28213 }, { "epoch": 0.7746842394288852, "grad_norm": 0.3654307425022125, "learning_rate": 1.349103636318098e-05, "loss": 0.4682, "step": 28214 }, { "epoch": 0.7747116968698518, "grad_norm": 0.36978432536125183, "learning_rate": 1.3490631639469774e-05, "loss": 0.4801, "step": 28215 }, { "epoch": 0.7747391543108182, "grad_norm": 0.3881734609603882, "learning_rate": 1.3490226909247422e-05, "loss": 0.5447, "step": 28216 }, { "epoch": 0.7747666117517847, "grad_norm": 0.41216522455215454, "learning_rate": 1.3489822172514683e-05, "loss": 0.5168, "step": 28217 }, { "epoch": 0.7747940691927513, "grad_norm": 0.3619668483734131, "learning_rate": 1.3489417429272313e-05, "loss": 0.5075, "step": 28218 }, { "epoch": 0.7748215266337177, "grad_norm": 0.3952668309211731, "learning_rate": 1.3489012679521067e-05, "loss": 0.4691, "step": 28219 }, { "epoch": 0.7748489840746843, "grad_norm": 0.40763992071151733, "learning_rate": 1.34886079232617e-05, "loss": 0.5518, "step": 28220 }, { "epoch": 0.7748764415156507, "grad_norm": 0.37600114941596985, "learning_rate": 1.3488203160494963e-05, "loss": 0.4482, "step": 28221 }, { "epoch": 0.7749038989566173, "grad_norm": 0.3524140417575836, "learning_rate": 1.3487798391221616e-05, "loss": 0.4347, "step": 28222 }, { "epoch": 0.7749313563975837, "grad_norm": 0.3742995262145996, "learning_rate": 1.348739361544241e-05, "loss": 0.4969, "step": 28223 }, { "epoch": 0.7749588138385503, "grad_norm": 0.4089823067188263, "learning_rate": 1.34869888331581e-05, "loss": 0.5506, "step": 28224 }, { "epoch": 0.7749862712795168, "grad_norm": 0.4307990074157715, "learning_rate": 1.3486584044369448e-05, "loss": 0.5623, "step": 28225 }, { "epoch": 0.7750137287204832, "grad_norm": 0.3851928412914276, "learning_rate": 1.34861792490772e-05, "loss": 0.4861, "step": 28226 }, { "epoch": 0.7750411861614498, "grad_norm": 0.39445430040359497, "learning_rate": 1.3485774447282117e-05, "loss": 0.4747, "step": 28227 }, { "epoch": 0.7750686436024162, "grad_norm": 0.38409045338630676, "learning_rate": 1.348536963898495e-05, "loss": 0.4476, "step": 28228 }, { "epoch": 0.7750961010433828, "grad_norm": 0.3743935823440552, "learning_rate": 1.348496482418646e-05, "loss": 0.5099, "step": 28229 }, { "epoch": 0.7751235584843492, "grad_norm": 0.3843429684638977, "learning_rate": 1.3484560002887395e-05, "loss": 0.5027, "step": 28230 }, { "epoch": 0.7751510159253158, "grad_norm": 0.3835598826408386, "learning_rate": 1.3484155175088514e-05, "loss": 0.5537, "step": 28231 }, { "epoch": 0.7751784733662823, "grad_norm": 0.3626602292060852, "learning_rate": 1.3483750340790573e-05, "loss": 0.4398, "step": 28232 }, { "epoch": 0.7752059308072488, "grad_norm": 0.3957691490650177, "learning_rate": 1.3483345499994324e-05, "loss": 0.4528, "step": 28233 }, { "epoch": 0.7752333882482153, "grad_norm": 0.412028968334198, "learning_rate": 1.3482940652700525e-05, "loss": 0.4431, "step": 28234 }, { "epoch": 0.7752608456891817, "grad_norm": 0.3249298632144928, "learning_rate": 1.348253579890993e-05, "loss": 0.4293, "step": 28235 }, { "epoch": 0.7752883031301483, "grad_norm": 0.37951064109802246, "learning_rate": 1.3482130938623292e-05, "loss": 0.4358, "step": 28236 }, { "epoch": 0.7753157605711147, "grad_norm": 0.3515189588069916, "learning_rate": 1.3481726071841372e-05, "loss": 0.5023, "step": 28237 }, { "epoch": 0.7753432180120813, "grad_norm": 0.39141878485679626, "learning_rate": 1.3481321198564921e-05, "loss": 0.5116, "step": 28238 }, { "epoch": 0.7753706754530478, "grad_norm": 0.3415139615535736, "learning_rate": 1.3480916318794692e-05, "loss": 0.444, "step": 28239 }, { "epoch": 0.7753981328940143, "grad_norm": 0.3789421021938324, "learning_rate": 1.3480511432531444e-05, "loss": 0.4952, "step": 28240 }, { "epoch": 0.7754255903349808, "grad_norm": 0.3773335814476013, "learning_rate": 1.3480106539775935e-05, "loss": 0.5677, "step": 28241 }, { "epoch": 0.7754530477759473, "grad_norm": 0.3818061053752899, "learning_rate": 1.3479701640528914e-05, "loss": 0.5257, "step": 28242 }, { "epoch": 0.7754805052169138, "grad_norm": 0.4140602946281433, "learning_rate": 1.3479296734791136e-05, "loss": 0.4812, "step": 28243 }, { "epoch": 0.7755079626578802, "grad_norm": 0.37105098366737366, "learning_rate": 1.3478891822563362e-05, "loss": 0.4931, "step": 28244 }, { "epoch": 0.7755354200988468, "grad_norm": 0.3386287987232208, "learning_rate": 1.3478486903846344e-05, "loss": 0.4403, "step": 28245 }, { "epoch": 0.7755628775398133, "grad_norm": 0.3610832691192627, "learning_rate": 1.347808197864084e-05, "loss": 0.4812, "step": 28246 }, { "epoch": 0.7755903349807798, "grad_norm": 0.38271254301071167, "learning_rate": 1.34776770469476e-05, "loss": 0.5111, "step": 28247 }, { "epoch": 0.7756177924217463, "grad_norm": 0.3500004708766937, "learning_rate": 1.3477272108767383e-05, "loss": 0.4625, "step": 28248 }, { "epoch": 0.7756452498627128, "grad_norm": 0.38372674584388733, "learning_rate": 1.3476867164100944e-05, "loss": 0.5782, "step": 28249 }, { "epoch": 0.7756727073036793, "grad_norm": 0.436383992433548, "learning_rate": 1.347646221294904e-05, "loss": 0.4971, "step": 28250 }, { "epoch": 0.7757001647446458, "grad_norm": 0.4247516691684723, "learning_rate": 1.3476057255312422e-05, "loss": 0.4993, "step": 28251 }, { "epoch": 0.7757276221856123, "grad_norm": 0.3895137310028076, "learning_rate": 1.3475652291191847e-05, "loss": 0.4898, "step": 28252 }, { "epoch": 0.7757550796265787, "grad_norm": 0.38318535685539246, "learning_rate": 1.3475247320588074e-05, "loss": 0.549, "step": 28253 }, { "epoch": 0.7757825370675453, "grad_norm": 0.743206799030304, "learning_rate": 1.3474842343501855e-05, "loss": 0.4576, "step": 28254 }, { "epoch": 0.7758099945085118, "grad_norm": 0.4293211102485657, "learning_rate": 1.3474437359933943e-05, "loss": 0.4711, "step": 28255 }, { "epoch": 0.7758374519494783, "grad_norm": 0.3956354856491089, "learning_rate": 1.34740323698851e-05, "loss": 0.4098, "step": 28256 }, { "epoch": 0.7758649093904448, "grad_norm": 0.41182565689086914, "learning_rate": 1.3473627373356075e-05, "loss": 0.5273, "step": 28257 }, { "epoch": 0.7758923668314113, "grad_norm": 0.49123942852020264, "learning_rate": 1.3473222370347628e-05, "loss": 0.5356, "step": 28258 }, { "epoch": 0.7759198242723778, "grad_norm": 0.38723188638687134, "learning_rate": 1.3472817360860512e-05, "loss": 0.5146, "step": 28259 }, { "epoch": 0.7759472817133443, "grad_norm": 0.37261393666267395, "learning_rate": 1.3472412344895483e-05, "loss": 0.4551, "step": 28260 }, { "epoch": 0.7759747391543108, "grad_norm": 0.40633153915405273, "learning_rate": 1.3472007322453298e-05, "loss": 0.5805, "step": 28261 }, { "epoch": 0.7760021965952774, "grad_norm": 0.4587074816226959, "learning_rate": 1.347160229353471e-05, "loss": 0.4478, "step": 28262 }, { "epoch": 0.7760296540362438, "grad_norm": 0.5762916803359985, "learning_rate": 1.3471197258140476e-05, "loss": 0.4173, "step": 28263 }, { "epoch": 0.7760571114772103, "grad_norm": 0.37404805421829224, "learning_rate": 1.3470792216271353e-05, "loss": 0.4257, "step": 28264 }, { "epoch": 0.7760845689181768, "grad_norm": 0.37844759225845337, "learning_rate": 1.3470387167928092e-05, "loss": 0.4575, "step": 28265 }, { "epoch": 0.7761120263591433, "grad_norm": 0.39577963948249817, "learning_rate": 1.3469982113111452e-05, "loss": 0.4269, "step": 28266 }, { "epoch": 0.7761394838001098, "grad_norm": 0.35193756222724915, "learning_rate": 1.346957705182219e-05, "loss": 0.4114, "step": 28267 }, { "epoch": 0.7761669412410763, "grad_norm": 0.4198407232761383, "learning_rate": 1.3469171984061057e-05, "loss": 0.514, "step": 28268 }, { "epoch": 0.7761943986820429, "grad_norm": 0.3921268880367279, "learning_rate": 1.3468766909828811e-05, "loss": 0.4815, "step": 28269 }, { "epoch": 0.7762218561230093, "grad_norm": 0.49732667207717896, "learning_rate": 1.3468361829126208e-05, "loss": 0.4579, "step": 28270 }, { "epoch": 0.7762493135639759, "grad_norm": 0.47012007236480713, "learning_rate": 1.3467956741954007e-05, "loss": 0.4738, "step": 28271 }, { "epoch": 0.7762767710049423, "grad_norm": 0.3518120348453522, "learning_rate": 1.3467551648312955e-05, "loss": 0.4468, "step": 28272 }, { "epoch": 0.7763042284459089, "grad_norm": 0.3907483220100403, "learning_rate": 1.3467146548203814e-05, "loss": 0.4959, "step": 28273 }, { "epoch": 0.7763316858868753, "grad_norm": 0.39827674627304077, "learning_rate": 1.3466741441627337e-05, "loss": 0.425, "step": 28274 }, { "epoch": 0.7763591433278418, "grad_norm": 1.072135329246521, "learning_rate": 1.3466336328584282e-05, "loss": 0.6105, "step": 28275 }, { "epoch": 0.7763866007688084, "grad_norm": 0.36219584941864014, "learning_rate": 1.3465931209075405e-05, "loss": 0.4848, "step": 28276 }, { "epoch": 0.7764140582097748, "grad_norm": 0.4321988523006439, "learning_rate": 1.3465526083101458e-05, "loss": 0.5018, "step": 28277 }, { "epoch": 0.7764415156507414, "grad_norm": 0.34359318017959595, "learning_rate": 1.3465120950663197e-05, "loss": 0.4894, "step": 28278 }, { "epoch": 0.7764689730917078, "grad_norm": 0.4370637834072113, "learning_rate": 1.3464715811761383e-05, "loss": 0.4434, "step": 28279 }, { "epoch": 0.7764964305326744, "grad_norm": 0.3910772204399109, "learning_rate": 1.3464310666396768e-05, "loss": 0.4469, "step": 28280 }, { "epoch": 0.7765238879736408, "grad_norm": 0.4678737223148346, "learning_rate": 1.3463905514570105e-05, "loss": 0.504, "step": 28281 }, { "epoch": 0.7765513454146074, "grad_norm": 0.4155193865299225, "learning_rate": 1.3463500356282155e-05, "loss": 0.4867, "step": 28282 }, { "epoch": 0.7765788028555739, "grad_norm": 0.4001312553882599, "learning_rate": 1.3463095191533671e-05, "loss": 0.4984, "step": 28283 }, { "epoch": 0.7766062602965403, "grad_norm": 0.4213049113750458, "learning_rate": 1.3462690020325408e-05, "loss": 0.5859, "step": 28284 }, { "epoch": 0.7766337177375069, "grad_norm": 0.40920019149780273, "learning_rate": 1.3462284842658125e-05, "loss": 0.5104, "step": 28285 }, { "epoch": 0.7766611751784733, "grad_norm": 0.36911576986312866, "learning_rate": 1.3461879658532575e-05, "loss": 0.4965, "step": 28286 }, { "epoch": 0.7766886326194399, "grad_norm": 1.3963522911071777, "learning_rate": 1.3461474467949513e-05, "loss": 0.5098, "step": 28287 }, { "epoch": 0.7767160900604063, "grad_norm": 0.3688260316848755, "learning_rate": 1.34610692709097e-05, "loss": 0.4606, "step": 28288 }, { "epoch": 0.7767435475013729, "grad_norm": 0.4914516508579254, "learning_rate": 1.3460664067413885e-05, "loss": 0.4688, "step": 28289 }, { "epoch": 0.7767710049423394, "grad_norm": 0.344197541475296, "learning_rate": 1.3460258857462827e-05, "loss": 0.4299, "step": 28290 }, { "epoch": 0.7767984623833059, "grad_norm": 0.35952383279800415, "learning_rate": 1.3459853641057284e-05, "loss": 0.4898, "step": 28291 }, { "epoch": 0.7768259198242724, "grad_norm": 0.39834654331207275, "learning_rate": 1.3459448418198004e-05, "loss": 0.5118, "step": 28292 }, { "epoch": 0.7768533772652388, "grad_norm": 0.36288490891456604, "learning_rate": 1.3459043188885756e-05, "loss": 0.4582, "step": 28293 }, { "epoch": 0.7768808347062054, "grad_norm": 0.3628290593624115, "learning_rate": 1.3458637953121286e-05, "loss": 0.5368, "step": 28294 }, { "epoch": 0.7769082921471718, "grad_norm": 0.37976381182670593, "learning_rate": 1.345823271090535e-05, "loss": 0.4732, "step": 28295 }, { "epoch": 0.7769357495881384, "grad_norm": 0.4036215543746948, "learning_rate": 1.3457827462238709e-05, "loss": 0.4958, "step": 28296 }, { "epoch": 0.7769632070291049, "grad_norm": 0.4164693057537079, "learning_rate": 1.3457422207122113e-05, "loss": 0.5987, "step": 28297 }, { "epoch": 0.7769906644700714, "grad_norm": 0.36895301938056946, "learning_rate": 1.3457016945556325e-05, "loss": 0.4748, "step": 28298 }, { "epoch": 0.7770181219110379, "grad_norm": 0.4363538324832916, "learning_rate": 1.3456611677542096e-05, "loss": 0.4878, "step": 28299 }, { "epoch": 0.7770455793520044, "grad_norm": 0.4383692443370819, "learning_rate": 1.345620640308018e-05, "loss": 0.5271, "step": 28300 }, { "epoch": 0.7770730367929709, "grad_norm": 0.3817042112350464, "learning_rate": 1.345580112217134e-05, "loss": 0.4846, "step": 28301 }, { "epoch": 0.7771004942339373, "grad_norm": 0.3546237051486969, "learning_rate": 1.3455395834816324e-05, "loss": 0.4466, "step": 28302 }, { "epoch": 0.7771279516749039, "grad_norm": 0.3541264235973358, "learning_rate": 1.3454990541015895e-05, "loss": 0.5262, "step": 28303 }, { "epoch": 0.7771554091158704, "grad_norm": 0.35832759737968445, "learning_rate": 1.3454585240770803e-05, "loss": 0.5382, "step": 28304 }, { "epoch": 0.7771828665568369, "grad_norm": 0.3685150146484375, "learning_rate": 1.345417993408181e-05, "loss": 0.4883, "step": 28305 }, { "epoch": 0.7772103239978034, "grad_norm": 0.3387349545955658, "learning_rate": 1.345377462094967e-05, "loss": 0.4123, "step": 28306 }, { "epoch": 0.7772377814387699, "grad_norm": 0.42654523253440857, "learning_rate": 1.3453369301375135e-05, "loss": 0.5131, "step": 28307 }, { "epoch": 0.7772652388797364, "grad_norm": 0.35354942083358765, "learning_rate": 1.3452963975358966e-05, "loss": 0.5078, "step": 28308 }, { "epoch": 0.7772926963207029, "grad_norm": 0.4756500720977783, "learning_rate": 1.3452558642901914e-05, "loss": 0.5358, "step": 28309 }, { "epoch": 0.7773201537616694, "grad_norm": 0.3659714162349701, "learning_rate": 1.3452153304004742e-05, "loss": 0.4581, "step": 28310 }, { "epoch": 0.777347611202636, "grad_norm": 0.3754408657550812, "learning_rate": 1.3451747958668202e-05, "loss": 0.4697, "step": 28311 }, { "epoch": 0.7773750686436024, "grad_norm": 0.34801986813545227, "learning_rate": 1.3451342606893048e-05, "loss": 0.482, "step": 28312 }, { "epoch": 0.777402526084569, "grad_norm": 0.3841097950935364, "learning_rate": 1.3450937248680039e-05, "loss": 0.4947, "step": 28313 }, { "epoch": 0.7774299835255354, "grad_norm": 0.3857743442058563, "learning_rate": 1.345053188402993e-05, "loss": 0.5047, "step": 28314 }, { "epoch": 0.7774574409665019, "grad_norm": 0.3777301013469696, "learning_rate": 1.345012651294348e-05, "loss": 0.4525, "step": 28315 }, { "epoch": 0.7774848984074684, "grad_norm": 0.4195750653743744, "learning_rate": 1.3449721135421442e-05, "loss": 0.5852, "step": 28316 }, { "epoch": 0.7775123558484349, "grad_norm": 0.38235345482826233, "learning_rate": 1.344931575146457e-05, "loss": 0.5092, "step": 28317 }, { "epoch": 0.7775398132894015, "grad_norm": 0.44984009861946106, "learning_rate": 1.3448910361073628e-05, "loss": 0.5041, "step": 28318 }, { "epoch": 0.7775672707303679, "grad_norm": 0.3625917136669159, "learning_rate": 1.3448504964249366e-05, "loss": 0.5533, "step": 28319 }, { "epoch": 0.7775947281713345, "grad_norm": 0.44959181547164917, "learning_rate": 1.344809956099254e-05, "loss": 0.5733, "step": 28320 }, { "epoch": 0.7776221856123009, "grad_norm": 0.41790828108787537, "learning_rate": 1.344769415130391e-05, "loss": 0.4825, "step": 28321 }, { "epoch": 0.7776496430532674, "grad_norm": 0.4692777991294861, "learning_rate": 1.344728873518423e-05, "loss": 0.5568, "step": 28322 }, { "epoch": 0.7776771004942339, "grad_norm": 0.9842053055763245, "learning_rate": 1.3446883312634254e-05, "loss": 0.4653, "step": 28323 }, { "epoch": 0.7777045579352004, "grad_norm": 0.5059345364570618, "learning_rate": 1.344647788365474e-05, "loss": 0.5785, "step": 28324 }, { "epoch": 0.777732015376167, "grad_norm": 0.3677705228328705, "learning_rate": 1.3446072448246448e-05, "loss": 0.4852, "step": 28325 }, { "epoch": 0.7777594728171334, "grad_norm": 0.37064483761787415, "learning_rate": 1.344566700641013e-05, "loss": 0.4767, "step": 28326 }, { "epoch": 0.7777869302581, "grad_norm": 0.3325006067752838, "learning_rate": 1.3445261558146543e-05, "loss": 0.3919, "step": 28327 }, { "epoch": 0.7778143876990664, "grad_norm": 0.406099408864975, "learning_rate": 1.3444856103456444e-05, "loss": 0.5035, "step": 28328 }, { "epoch": 0.777841845140033, "grad_norm": 0.40474891662597656, "learning_rate": 1.3444450642340584e-05, "loss": 0.5211, "step": 28329 }, { "epoch": 0.7778693025809994, "grad_norm": 0.37044721841812134, "learning_rate": 1.3444045174799731e-05, "loss": 0.5017, "step": 28330 }, { "epoch": 0.777896760021966, "grad_norm": 0.40171030163764954, "learning_rate": 1.3443639700834635e-05, "loss": 0.521, "step": 28331 }, { "epoch": 0.7779242174629325, "grad_norm": 0.38076308369636536, "learning_rate": 1.3443234220446047e-05, "loss": 0.4676, "step": 28332 }, { "epoch": 0.7779516749038989, "grad_norm": 0.3626200556755066, "learning_rate": 1.344282873363473e-05, "loss": 0.4917, "step": 28333 }, { "epoch": 0.7779791323448655, "grad_norm": 0.3405854105949402, "learning_rate": 1.3442423240401442e-05, "loss": 0.4701, "step": 28334 }, { "epoch": 0.7780065897858319, "grad_norm": 0.42208755016326904, "learning_rate": 1.3442017740746929e-05, "loss": 0.508, "step": 28335 }, { "epoch": 0.7780340472267985, "grad_norm": 0.4467397928237915, "learning_rate": 1.3441612234671958e-05, "loss": 0.5731, "step": 28336 }, { "epoch": 0.7780615046677649, "grad_norm": 0.3980191648006439, "learning_rate": 1.3441206722177283e-05, "loss": 0.5548, "step": 28337 }, { "epoch": 0.7780889621087315, "grad_norm": 0.3671995997428894, "learning_rate": 1.3440801203263658e-05, "loss": 0.5417, "step": 28338 }, { "epoch": 0.778116419549698, "grad_norm": 0.36819443106651306, "learning_rate": 1.344039567793184e-05, "loss": 0.5284, "step": 28339 }, { "epoch": 0.7781438769906645, "grad_norm": 0.38161659240722656, "learning_rate": 1.3439990146182587e-05, "loss": 0.5039, "step": 28340 }, { "epoch": 0.778171334431631, "grad_norm": 0.44572076201438904, "learning_rate": 1.3439584608016654e-05, "loss": 0.543, "step": 28341 }, { "epoch": 0.7781987918725974, "grad_norm": 0.4220609664916992, "learning_rate": 1.3439179063434798e-05, "loss": 0.4882, "step": 28342 }, { "epoch": 0.778226249313564, "grad_norm": 0.42330318689346313, "learning_rate": 1.3438773512437774e-05, "loss": 0.511, "step": 28343 }, { "epoch": 0.7782537067545304, "grad_norm": 0.3726212680339813, "learning_rate": 1.3438367955026344e-05, "loss": 0.5027, "step": 28344 }, { "epoch": 0.778281164195497, "grad_norm": 0.40305978059768677, "learning_rate": 1.3437962391201255e-05, "loss": 0.4669, "step": 28345 }, { "epoch": 0.7783086216364635, "grad_norm": 0.41632702946662903, "learning_rate": 1.3437556820963273e-05, "loss": 0.4697, "step": 28346 }, { "epoch": 0.77833607907743, "grad_norm": 0.3862800896167755, "learning_rate": 1.3437151244313149e-05, "loss": 0.4842, "step": 28347 }, { "epoch": 0.7783635365183965, "grad_norm": 0.42688703536987305, "learning_rate": 1.343674566125164e-05, "loss": 0.5779, "step": 28348 }, { "epoch": 0.778390993959363, "grad_norm": 0.3537035286426544, "learning_rate": 1.3436340071779506e-05, "loss": 0.4378, "step": 28349 }, { "epoch": 0.7784184514003295, "grad_norm": 0.4033121168613434, "learning_rate": 1.3435934475897497e-05, "loss": 0.5151, "step": 28350 }, { "epoch": 0.7784459088412959, "grad_norm": 0.39423590898513794, "learning_rate": 1.3435528873606377e-05, "loss": 0.5054, "step": 28351 }, { "epoch": 0.7784733662822625, "grad_norm": 0.4398956000804901, "learning_rate": 1.3435123264906897e-05, "loss": 0.4635, "step": 28352 }, { "epoch": 0.778500823723229, "grad_norm": 0.3882869482040405, "learning_rate": 1.3434717649799816e-05, "loss": 0.423, "step": 28353 }, { "epoch": 0.7785282811641955, "grad_norm": 0.38836196064949036, "learning_rate": 1.3434312028285892e-05, "loss": 0.5456, "step": 28354 }, { "epoch": 0.778555738605162, "grad_norm": 0.40438419580459595, "learning_rate": 1.3433906400365877e-05, "loss": 0.4391, "step": 28355 }, { "epoch": 0.7785831960461285, "grad_norm": 0.3405722677707672, "learning_rate": 1.3433500766040535e-05, "loss": 0.4018, "step": 28356 }, { "epoch": 0.778610653487095, "grad_norm": 0.4466783106327057, "learning_rate": 1.3433095125310616e-05, "loss": 0.4735, "step": 28357 }, { "epoch": 0.7786381109280615, "grad_norm": 0.4132806956768036, "learning_rate": 1.3432689478176876e-05, "loss": 0.4767, "step": 28358 }, { "epoch": 0.778665568369028, "grad_norm": 0.41407015919685364, "learning_rate": 1.343228382464008e-05, "loss": 0.5538, "step": 28359 }, { "epoch": 0.7786930258099946, "grad_norm": 0.40532186627388, "learning_rate": 1.3431878164700973e-05, "loss": 0.4843, "step": 28360 }, { "epoch": 0.778720483250961, "grad_norm": 0.3900507092475891, "learning_rate": 1.3431472498360325e-05, "loss": 0.4686, "step": 28361 }, { "epoch": 0.7787479406919275, "grad_norm": 0.44119203090667725, "learning_rate": 1.3431066825618882e-05, "loss": 0.4779, "step": 28362 }, { "epoch": 0.778775398132894, "grad_norm": 0.5149109959602356, "learning_rate": 1.3430661146477402e-05, "loss": 0.5022, "step": 28363 }, { "epoch": 0.7788028555738605, "grad_norm": 0.3562660813331604, "learning_rate": 1.3430255460936647e-05, "loss": 0.4887, "step": 28364 }, { "epoch": 0.778830313014827, "grad_norm": 0.3927086293697357, "learning_rate": 1.3429849768997371e-05, "loss": 0.5395, "step": 28365 }, { "epoch": 0.7788577704557935, "grad_norm": 0.4236011505126953, "learning_rate": 1.342944407066033e-05, "loss": 0.4563, "step": 28366 }, { "epoch": 0.7788852278967601, "grad_norm": 0.34525221586227417, "learning_rate": 1.3429038365926279e-05, "loss": 0.4917, "step": 28367 }, { "epoch": 0.7789126853377265, "grad_norm": 0.48055121302604675, "learning_rate": 1.3428632654795979e-05, "loss": 0.4519, "step": 28368 }, { "epoch": 0.7789401427786931, "grad_norm": 0.38502568006515503, "learning_rate": 1.3428226937270185e-05, "loss": 0.5079, "step": 28369 }, { "epoch": 0.7789676002196595, "grad_norm": 0.39114829897880554, "learning_rate": 1.3427821213349654e-05, "loss": 0.4694, "step": 28370 }, { "epoch": 0.778995057660626, "grad_norm": 0.4049071669578552, "learning_rate": 1.342741548303514e-05, "loss": 0.4661, "step": 28371 }, { "epoch": 0.7790225151015925, "grad_norm": 0.3710751235485077, "learning_rate": 1.3427009746327406e-05, "loss": 0.5355, "step": 28372 }, { "epoch": 0.779049972542559, "grad_norm": 0.48356571793556213, "learning_rate": 1.34266040032272e-05, "loss": 0.5492, "step": 28373 }, { "epoch": 0.7790774299835256, "grad_norm": 0.3668820858001709, "learning_rate": 1.3426198253735287e-05, "loss": 0.4466, "step": 28374 }, { "epoch": 0.779104887424492, "grad_norm": 0.3794287443161011, "learning_rate": 1.3425792497852421e-05, "loss": 0.5443, "step": 28375 }, { "epoch": 0.7791323448654586, "grad_norm": 0.39222216606140137, "learning_rate": 1.3425386735579358e-05, "loss": 0.4468, "step": 28376 }, { "epoch": 0.779159802306425, "grad_norm": 0.3606302738189697, "learning_rate": 1.3424980966916856e-05, "loss": 0.4598, "step": 28377 }, { "epoch": 0.7791872597473916, "grad_norm": 0.4046744108200073, "learning_rate": 1.3424575191865672e-05, "loss": 0.4843, "step": 28378 }, { "epoch": 0.779214717188358, "grad_norm": 0.4486985504627228, "learning_rate": 1.342416941042656e-05, "loss": 0.5368, "step": 28379 }, { "epoch": 0.7792421746293245, "grad_norm": 0.382944792509079, "learning_rate": 1.3423763622600281e-05, "loss": 0.5517, "step": 28380 }, { "epoch": 0.7792696320702911, "grad_norm": 0.4992680549621582, "learning_rate": 1.3423357828387589e-05, "loss": 0.5475, "step": 28381 }, { "epoch": 0.7792970895112575, "grad_norm": 0.38581135869026184, "learning_rate": 1.3422952027789244e-05, "loss": 0.5106, "step": 28382 }, { "epoch": 0.7793245469522241, "grad_norm": 0.3689609467983246, "learning_rate": 1.3422546220806e-05, "loss": 0.4851, "step": 28383 }, { "epoch": 0.7793520043931905, "grad_norm": 0.417877733707428, "learning_rate": 1.3422140407438613e-05, "loss": 0.5482, "step": 28384 }, { "epoch": 0.7793794618341571, "grad_norm": 0.6721457242965698, "learning_rate": 1.3421734587687844e-05, "loss": 0.5559, "step": 28385 }, { "epoch": 0.7794069192751235, "grad_norm": 0.8405423164367676, "learning_rate": 1.3421328761554446e-05, "loss": 0.524, "step": 28386 }, { "epoch": 0.7794343767160901, "grad_norm": 0.3702731728553772, "learning_rate": 1.342092292903918e-05, "loss": 0.3974, "step": 28387 }, { "epoch": 0.7794618341570566, "grad_norm": 0.36919865012168884, "learning_rate": 1.3420517090142801e-05, "loss": 0.5025, "step": 28388 }, { "epoch": 0.779489291598023, "grad_norm": 0.39232879877090454, "learning_rate": 1.3420111244866063e-05, "loss": 0.505, "step": 28389 }, { "epoch": 0.7795167490389896, "grad_norm": 0.3390423059463501, "learning_rate": 1.341970539320973e-05, "loss": 0.3845, "step": 28390 }, { "epoch": 0.779544206479956, "grad_norm": 0.4078536927700043, "learning_rate": 1.3419299535174553e-05, "loss": 0.5891, "step": 28391 }, { "epoch": 0.7795716639209226, "grad_norm": 0.47244563698768616, "learning_rate": 1.341889367076129e-05, "loss": 0.553, "step": 28392 }, { "epoch": 0.779599121361889, "grad_norm": 0.3809697926044464, "learning_rate": 1.34184877999707e-05, "loss": 0.4621, "step": 28393 }, { "epoch": 0.7796265788028556, "grad_norm": 0.34132733941078186, "learning_rate": 1.3418081922803538e-05, "loss": 0.4932, "step": 28394 }, { "epoch": 0.7796540362438221, "grad_norm": 0.39506322145462036, "learning_rate": 1.3417676039260565e-05, "loss": 0.6013, "step": 28395 }, { "epoch": 0.7796814936847886, "grad_norm": 0.37229278683662415, "learning_rate": 1.3417270149342536e-05, "loss": 0.4564, "step": 28396 }, { "epoch": 0.7797089511257551, "grad_norm": 0.4023679792881012, "learning_rate": 1.3416864253050204e-05, "loss": 0.4923, "step": 28397 }, { "epoch": 0.7797364085667216, "grad_norm": 0.45694610476493835, "learning_rate": 1.3416458350384331e-05, "loss": 0.4729, "step": 28398 }, { "epoch": 0.7797638660076881, "grad_norm": 0.36701759696006775, "learning_rate": 1.3416052441345672e-05, "loss": 0.5267, "step": 28399 }, { "epoch": 0.7797913234486545, "grad_norm": 0.5573608875274658, "learning_rate": 1.3415646525934986e-05, "loss": 0.5149, "step": 28400 }, { "epoch": 0.7798187808896211, "grad_norm": 0.3773966133594513, "learning_rate": 1.341524060415303e-05, "loss": 0.4482, "step": 28401 }, { "epoch": 0.7798462383305876, "grad_norm": 0.3912425935268402, "learning_rate": 1.3414834676000558e-05, "loss": 0.5603, "step": 28402 }, { "epoch": 0.7798736957715541, "grad_norm": 0.4053973853588104, "learning_rate": 1.3414428741478332e-05, "loss": 0.5301, "step": 28403 }, { "epoch": 0.7799011532125206, "grad_norm": 0.4020237624645233, "learning_rate": 1.3414022800587106e-05, "loss": 0.5398, "step": 28404 }, { "epoch": 0.7799286106534871, "grad_norm": 0.426035076379776, "learning_rate": 1.3413616853327636e-05, "loss": 0.5568, "step": 28405 }, { "epoch": 0.7799560680944536, "grad_norm": 0.4072171449661255, "learning_rate": 1.3413210899700683e-05, "loss": 0.4506, "step": 28406 }, { "epoch": 0.77998352553542, "grad_norm": 0.39409300684928894, "learning_rate": 1.3412804939707e-05, "loss": 0.5237, "step": 28407 }, { "epoch": 0.7800109829763866, "grad_norm": 0.40994518995285034, "learning_rate": 1.3412398973347349e-05, "loss": 0.4391, "step": 28408 }, { "epoch": 0.7800384404173532, "grad_norm": 0.34082934260368347, "learning_rate": 1.3411993000622485e-05, "loss": 0.4661, "step": 28409 }, { "epoch": 0.7800658978583196, "grad_norm": 0.369512677192688, "learning_rate": 1.3411587021533163e-05, "loss": 0.505, "step": 28410 }, { "epoch": 0.7800933552992861, "grad_norm": 0.38385850191116333, "learning_rate": 1.3411181036080144e-05, "loss": 0.5259, "step": 28411 }, { "epoch": 0.7801208127402526, "grad_norm": 0.4414614737033844, "learning_rate": 1.3410775044264182e-05, "loss": 0.5526, "step": 28412 }, { "epoch": 0.7801482701812191, "grad_norm": 0.36264342069625854, "learning_rate": 1.3410369046086036e-05, "loss": 0.5188, "step": 28413 }, { "epoch": 0.7801757276221856, "grad_norm": 0.38743430376052856, "learning_rate": 1.3409963041546465e-05, "loss": 0.5006, "step": 28414 }, { "epoch": 0.7802031850631521, "grad_norm": 0.4299183487892151, "learning_rate": 1.3409557030646223e-05, "loss": 0.4998, "step": 28415 }, { "epoch": 0.7802306425041187, "grad_norm": 0.37382620573043823, "learning_rate": 1.340915101338607e-05, "loss": 0.5737, "step": 28416 }, { "epoch": 0.7802580999450851, "grad_norm": 0.3595377504825592, "learning_rate": 1.3408744989766762e-05, "loss": 0.494, "step": 28417 }, { "epoch": 0.7802855573860517, "grad_norm": 0.46890145540237427, "learning_rate": 1.3408338959789057e-05, "loss": 0.5098, "step": 28418 }, { "epoch": 0.7803130148270181, "grad_norm": 0.3638168275356293, "learning_rate": 1.3407932923453713e-05, "loss": 0.5277, "step": 28419 }, { "epoch": 0.7803404722679846, "grad_norm": 0.447844535112381, "learning_rate": 1.3407526880761484e-05, "loss": 0.5471, "step": 28420 }, { "epoch": 0.7803679297089511, "grad_norm": 0.3813786506652832, "learning_rate": 1.340712083171313e-05, "loss": 0.5143, "step": 28421 }, { "epoch": 0.7803953871499176, "grad_norm": 0.36296844482421875, "learning_rate": 1.3406714776309408e-05, "loss": 0.5241, "step": 28422 }, { "epoch": 0.7804228445908842, "grad_norm": 0.3996497690677643, "learning_rate": 1.3406308714551078e-05, "loss": 0.4806, "step": 28423 }, { "epoch": 0.7804503020318506, "grad_norm": 0.369353711605072, "learning_rate": 1.3405902646438895e-05, "loss": 0.4353, "step": 28424 }, { "epoch": 0.7804777594728172, "grad_norm": 0.3882175087928772, "learning_rate": 1.3405496571973614e-05, "loss": 0.443, "step": 28425 }, { "epoch": 0.7805052169137836, "grad_norm": 0.44214141368865967, "learning_rate": 1.3405090491155996e-05, "loss": 0.4843, "step": 28426 }, { "epoch": 0.7805326743547502, "grad_norm": 0.3591752350330353, "learning_rate": 1.3404684403986798e-05, "loss": 0.4657, "step": 28427 }, { "epoch": 0.7805601317957166, "grad_norm": 0.3726717233657837, "learning_rate": 1.3404278310466777e-05, "loss": 0.3781, "step": 28428 }, { "epoch": 0.7805875892366831, "grad_norm": 0.43287816643714905, "learning_rate": 1.3403872210596691e-05, "loss": 0.4886, "step": 28429 }, { "epoch": 0.7806150466776497, "grad_norm": 0.3651362657546997, "learning_rate": 1.3403466104377293e-05, "loss": 0.462, "step": 28430 }, { "epoch": 0.7806425041186161, "grad_norm": 0.3935036361217499, "learning_rate": 1.340305999180935e-05, "loss": 0.5221, "step": 28431 }, { "epoch": 0.7806699615595827, "grad_norm": 0.4111352860927582, "learning_rate": 1.3402653872893611e-05, "loss": 0.5658, "step": 28432 }, { "epoch": 0.7806974190005491, "grad_norm": 0.37481313943862915, "learning_rate": 1.3402247747630836e-05, "loss": 0.5034, "step": 28433 }, { "epoch": 0.7807248764415157, "grad_norm": 0.36274436116218567, "learning_rate": 1.3401841616021787e-05, "loss": 0.4884, "step": 28434 }, { "epoch": 0.7807523338824821, "grad_norm": 0.37172359228134155, "learning_rate": 1.3401435478067212e-05, "loss": 0.3868, "step": 28435 }, { "epoch": 0.7807797913234487, "grad_norm": 0.40933361649513245, "learning_rate": 1.3401029333767878e-05, "loss": 0.613, "step": 28436 }, { "epoch": 0.7808072487644152, "grad_norm": 0.42853212356567383, "learning_rate": 1.3400623183124538e-05, "loss": 0.5748, "step": 28437 }, { "epoch": 0.7808347062053816, "grad_norm": 0.3979892432689667, "learning_rate": 1.340021702613795e-05, "loss": 0.5257, "step": 28438 }, { "epoch": 0.7808621636463482, "grad_norm": 0.38719895482063293, "learning_rate": 1.3399810862808872e-05, "loss": 0.4777, "step": 28439 }, { "epoch": 0.7808896210873146, "grad_norm": 0.40805765986442566, "learning_rate": 1.3399404693138062e-05, "loss": 0.4353, "step": 28440 }, { "epoch": 0.7809170785282812, "grad_norm": 0.3788049817085266, "learning_rate": 1.3398998517126275e-05, "loss": 0.4762, "step": 28441 }, { "epoch": 0.7809445359692476, "grad_norm": 0.3962560296058655, "learning_rate": 1.3398592334774276e-05, "loss": 0.4862, "step": 28442 }, { "epoch": 0.7809719934102142, "grad_norm": 0.36837902665138245, "learning_rate": 1.3398186146082814e-05, "loss": 0.4642, "step": 28443 }, { "epoch": 0.7809994508511807, "grad_norm": 0.387105256319046, "learning_rate": 1.3397779951052652e-05, "loss": 0.4513, "step": 28444 }, { "epoch": 0.7810269082921472, "grad_norm": 0.42085781693458557, "learning_rate": 1.3397373749684546e-05, "loss": 0.4917, "step": 28445 }, { "epoch": 0.7810543657331137, "grad_norm": 0.42858198285102844, "learning_rate": 1.3396967541979251e-05, "loss": 0.5656, "step": 28446 }, { "epoch": 0.7810818231740801, "grad_norm": 0.34804633259773254, "learning_rate": 1.339656132793753e-05, "loss": 0.5027, "step": 28447 }, { "epoch": 0.7811092806150467, "grad_norm": 0.3719485104084015, "learning_rate": 1.3396155107560134e-05, "loss": 0.5196, "step": 28448 }, { "epoch": 0.7811367380560131, "grad_norm": 0.44015923142433167, "learning_rate": 1.339574888084783e-05, "loss": 0.4639, "step": 28449 }, { "epoch": 0.7811641954969797, "grad_norm": 0.3587462604045868, "learning_rate": 1.3395342647801367e-05, "loss": 0.468, "step": 28450 }, { "epoch": 0.7811916529379462, "grad_norm": 0.40826842188835144, "learning_rate": 1.3394936408421506e-05, "loss": 0.498, "step": 28451 }, { "epoch": 0.7812191103789127, "grad_norm": 0.36605533957481384, "learning_rate": 1.3394530162709009e-05, "loss": 0.5031, "step": 28452 }, { "epoch": 0.7812465678198792, "grad_norm": 0.4033891558647156, "learning_rate": 1.3394123910664628e-05, "loss": 0.524, "step": 28453 }, { "epoch": 0.7812740252608457, "grad_norm": 0.4219375252723694, "learning_rate": 1.3393717652289122e-05, "loss": 0.5339, "step": 28454 }, { "epoch": 0.7813014827018122, "grad_norm": 0.4926528036594391, "learning_rate": 1.339331138758325e-05, "loss": 0.5984, "step": 28455 }, { "epoch": 0.7813289401427786, "grad_norm": 0.4163082540035248, "learning_rate": 1.3392905116547768e-05, "loss": 0.4724, "step": 28456 }, { "epoch": 0.7813563975837452, "grad_norm": 0.3724246919155121, "learning_rate": 1.3392498839183436e-05, "loss": 0.4732, "step": 28457 }, { "epoch": 0.7813838550247117, "grad_norm": 0.34673550724983215, "learning_rate": 1.339209255549101e-05, "loss": 0.4608, "step": 28458 }, { "epoch": 0.7814113124656782, "grad_norm": 0.3661571741104126, "learning_rate": 1.3391686265471248e-05, "loss": 0.5043, "step": 28459 }, { "epoch": 0.7814387699066447, "grad_norm": 0.3749069273471832, "learning_rate": 1.3391279969124912e-05, "loss": 0.4922, "step": 28460 }, { "epoch": 0.7814662273476112, "grad_norm": 0.34214121103286743, "learning_rate": 1.3390873666452753e-05, "loss": 0.4741, "step": 28461 }, { "epoch": 0.7814936847885777, "grad_norm": 0.3599623143672943, "learning_rate": 1.3390467357455537e-05, "loss": 0.4604, "step": 28462 }, { "epoch": 0.7815211422295442, "grad_norm": 0.4431513249874115, "learning_rate": 1.3390061042134013e-05, "loss": 0.5022, "step": 28463 }, { "epoch": 0.7815485996705107, "grad_norm": 0.37014004588127136, "learning_rate": 1.3389654720488943e-05, "loss": 0.4621, "step": 28464 }, { "epoch": 0.7815760571114773, "grad_norm": 0.3937191963195801, "learning_rate": 1.3389248392521089e-05, "loss": 0.472, "step": 28465 }, { "epoch": 0.7816035145524437, "grad_norm": 0.3921925127506256, "learning_rate": 1.3388842058231199e-05, "loss": 0.492, "step": 28466 }, { "epoch": 0.7816309719934103, "grad_norm": 0.39209091663360596, "learning_rate": 1.3388435717620043e-05, "loss": 0.4674, "step": 28467 }, { "epoch": 0.7816584294343767, "grad_norm": 0.3981027901172638, "learning_rate": 1.338802937068837e-05, "loss": 0.5204, "step": 28468 }, { "epoch": 0.7816858868753432, "grad_norm": 0.4413098692893982, "learning_rate": 1.3387623017436942e-05, "loss": 0.563, "step": 28469 }, { "epoch": 0.7817133443163097, "grad_norm": 0.4021369218826294, "learning_rate": 1.3387216657866516e-05, "loss": 0.5333, "step": 28470 }, { "epoch": 0.7817408017572762, "grad_norm": 0.38715115189552307, "learning_rate": 1.3386810291977845e-05, "loss": 0.4775, "step": 28471 }, { "epoch": 0.7817682591982428, "grad_norm": 0.4343203902244568, "learning_rate": 1.3386403919771698e-05, "loss": 0.4695, "step": 28472 }, { "epoch": 0.7817957166392092, "grad_norm": 0.4320381283760071, "learning_rate": 1.3385997541248822e-05, "loss": 0.51, "step": 28473 }, { "epoch": 0.7818231740801758, "grad_norm": 0.3581677973270416, "learning_rate": 1.3385591156409985e-05, "loss": 0.489, "step": 28474 }, { "epoch": 0.7818506315211422, "grad_norm": 0.33903494477272034, "learning_rate": 1.338518476525594e-05, "loss": 0.4474, "step": 28475 }, { "epoch": 0.7818780889621088, "grad_norm": 0.37225276231765747, "learning_rate": 1.338477836778744e-05, "loss": 0.506, "step": 28476 }, { "epoch": 0.7819055464030752, "grad_norm": 0.3785501718521118, "learning_rate": 1.3384371964005252e-05, "loss": 0.5032, "step": 28477 }, { "epoch": 0.7819330038440417, "grad_norm": 0.3985570967197418, "learning_rate": 1.3383965553910128e-05, "loss": 0.4529, "step": 28478 }, { "epoch": 0.7819604612850083, "grad_norm": 0.42770519852638245, "learning_rate": 1.3383559137502829e-05, "loss": 0.4902, "step": 28479 }, { "epoch": 0.7819879187259747, "grad_norm": 0.38585570454597473, "learning_rate": 1.338315271478411e-05, "loss": 0.4176, "step": 28480 }, { "epoch": 0.7820153761669413, "grad_norm": 0.39199960231781006, "learning_rate": 1.3382746285754736e-05, "loss": 0.4958, "step": 28481 }, { "epoch": 0.7820428336079077, "grad_norm": 0.4406903088092804, "learning_rate": 1.3382339850415457e-05, "loss": 0.4479, "step": 28482 }, { "epoch": 0.7820702910488743, "grad_norm": 0.3840814232826233, "learning_rate": 1.3381933408767036e-05, "loss": 0.4917, "step": 28483 }, { "epoch": 0.7820977484898407, "grad_norm": 0.431279718875885, "learning_rate": 1.338152696081023e-05, "loss": 0.5117, "step": 28484 }, { "epoch": 0.7821252059308073, "grad_norm": 0.371452271938324, "learning_rate": 1.3381120506545795e-05, "loss": 0.4996, "step": 28485 }, { "epoch": 0.7821526633717738, "grad_norm": 0.4259665608406067, "learning_rate": 1.3380714045974494e-05, "loss": 0.6252, "step": 28486 }, { "epoch": 0.7821801208127402, "grad_norm": 0.3834419846534729, "learning_rate": 1.3380307579097082e-05, "loss": 0.466, "step": 28487 }, { "epoch": 0.7822075782537068, "grad_norm": 0.4216686487197876, "learning_rate": 1.3379901105914314e-05, "loss": 0.5586, "step": 28488 }, { "epoch": 0.7822350356946732, "grad_norm": 0.42965811491012573, "learning_rate": 1.3379494626426956e-05, "loss": 0.5019, "step": 28489 }, { "epoch": 0.7822624931356398, "grad_norm": 0.391330748796463, "learning_rate": 1.337908814063576e-05, "loss": 0.4852, "step": 28490 }, { "epoch": 0.7822899505766062, "grad_norm": 0.35641250014305115, "learning_rate": 1.3378681648541485e-05, "loss": 0.5032, "step": 28491 }, { "epoch": 0.7823174080175728, "grad_norm": 0.4122146964073181, "learning_rate": 1.3378275150144893e-05, "loss": 0.4962, "step": 28492 }, { "epoch": 0.7823448654585393, "grad_norm": 0.4047633707523346, "learning_rate": 1.3377868645446736e-05, "loss": 0.6312, "step": 28493 }, { "epoch": 0.7823723228995058, "grad_norm": 0.3865005671977997, "learning_rate": 1.3377462134447781e-05, "loss": 0.4904, "step": 28494 }, { "epoch": 0.7823997803404723, "grad_norm": 0.43229925632476807, "learning_rate": 1.3377055617148778e-05, "loss": 0.492, "step": 28495 }, { "epoch": 0.7824272377814387, "grad_norm": 0.4041188657283783, "learning_rate": 1.3376649093550488e-05, "loss": 0.5045, "step": 28496 }, { "epoch": 0.7824546952224053, "grad_norm": 0.3728707730770111, "learning_rate": 1.3376242563653673e-05, "loss": 0.5072, "step": 28497 }, { "epoch": 0.7824821526633717, "grad_norm": 0.3549196124076843, "learning_rate": 1.3375836027459084e-05, "loss": 0.4124, "step": 28498 }, { "epoch": 0.7825096101043383, "grad_norm": 0.44475609064102173, "learning_rate": 1.3375429484967484e-05, "loss": 0.4649, "step": 28499 }, { "epoch": 0.7825370675453048, "grad_norm": 0.3876137137413025, "learning_rate": 1.3375022936179631e-05, "loss": 0.5223, "step": 28500 }, { "epoch": 0.7825645249862713, "grad_norm": 0.37685900926589966, "learning_rate": 1.3374616381096285e-05, "loss": 0.5476, "step": 28501 }, { "epoch": 0.7825919824272378, "grad_norm": 0.3867493271827698, "learning_rate": 1.3374209819718201e-05, "loss": 0.5314, "step": 28502 }, { "epoch": 0.7826194398682043, "grad_norm": 0.727252185344696, "learning_rate": 1.3373803252046138e-05, "loss": 0.4495, "step": 28503 }, { "epoch": 0.7826468973091708, "grad_norm": 0.4604683220386505, "learning_rate": 1.3373396678080856e-05, "loss": 0.5372, "step": 28504 }, { "epoch": 0.7826743547501372, "grad_norm": 0.3539043962955475, "learning_rate": 1.3372990097823113e-05, "loss": 0.4584, "step": 28505 }, { "epoch": 0.7827018121911038, "grad_norm": 0.49406298995018005, "learning_rate": 1.3372583511273668e-05, "loss": 0.5424, "step": 28506 }, { "epoch": 0.7827292696320703, "grad_norm": 0.43275701999664307, "learning_rate": 1.3372176918433274e-05, "loss": 0.5083, "step": 28507 }, { "epoch": 0.7827567270730368, "grad_norm": 0.3733243942260742, "learning_rate": 1.3371770319302697e-05, "loss": 0.439, "step": 28508 }, { "epoch": 0.7827841845140033, "grad_norm": 0.35461002588272095, "learning_rate": 1.3371363713882693e-05, "loss": 0.4323, "step": 28509 }, { "epoch": 0.7828116419549698, "grad_norm": 0.34013083577156067, "learning_rate": 1.3370957102174019e-05, "loss": 0.4917, "step": 28510 }, { "epoch": 0.7828390993959363, "grad_norm": 0.427473247051239, "learning_rate": 1.3370550484177431e-05, "loss": 0.608, "step": 28511 }, { "epoch": 0.7828665568369028, "grad_norm": 0.39536038041114807, "learning_rate": 1.3370143859893696e-05, "loss": 0.5085, "step": 28512 }, { "epoch": 0.7828940142778693, "grad_norm": 0.4015469551086426, "learning_rate": 1.3369737229323564e-05, "loss": 0.4803, "step": 28513 }, { "epoch": 0.7829214717188359, "grad_norm": 0.4073996841907501, "learning_rate": 1.3369330592467798e-05, "loss": 0.548, "step": 28514 }, { "epoch": 0.7829489291598023, "grad_norm": 0.4034525156021118, "learning_rate": 1.336892394932715e-05, "loss": 0.5399, "step": 28515 }, { "epoch": 0.7829763866007688, "grad_norm": 0.43523699045181274, "learning_rate": 1.3368517299902388e-05, "loss": 0.5836, "step": 28516 }, { "epoch": 0.7830038440417353, "grad_norm": 0.349631130695343, "learning_rate": 1.3368110644194268e-05, "loss": 0.4309, "step": 28517 }, { "epoch": 0.7830313014827018, "grad_norm": 0.4010617733001709, "learning_rate": 1.3367703982203541e-05, "loss": 0.4765, "step": 28518 }, { "epoch": 0.7830587589236683, "grad_norm": 0.44537878036499023, "learning_rate": 1.3367297313930978e-05, "loss": 0.5687, "step": 28519 }, { "epoch": 0.7830862163646348, "grad_norm": 0.4076749086380005, "learning_rate": 1.3366890639377328e-05, "loss": 0.4945, "step": 28520 }, { "epoch": 0.7831136738056013, "grad_norm": 0.42865896224975586, "learning_rate": 1.336648395854335e-05, "loss": 0.5827, "step": 28521 }, { "epoch": 0.7831411312465678, "grad_norm": 0.4331841766834259, "learning_rate": 1.3366077271429807e-05, "loss": 0.4966, "step": 28522 }, { "epoch": 0.7831685886875344, "grad_norm": 0.41074928641319275, "learning_rate": 1.3365670578037455e-05, "loss": 0.5129, "step": 28523 }, { "epoch": 0.7831960461285008, "grad_norm": 0.3877605199813843, "learning_rate": 1.3365263878367052e-05, "loss": 0.5476, "step": 28524 }, { "epoch": 0.7832235035694673, "grad_norm": 0.4297112226486206, "learning_rate": 1.336485717241936e-05, "loss": 0.5284, "step": 28525 }, { "epoch": 0.7832509610104338, "grad_norm": 0.39266401529312134, "learning_rate": 1.3364450460195135e-05, "loss": 0.5369, "step": 28526 }, { "epoch": 0.7832784184514003, "grad_norm": 0.42589110136032104, "learning_rate": 1.3364043741695136e-05, "loss": 0.5277, "step": 28527 }, { "epoch": 0.7833058758923668, "grad_norm": 0.401692271232605, "learning_rate": 1.3363637016920122e-05, "loss": 0.4799, "step": 28528 }, { "epoch": 0.7833333333333333, "grad_norm": 0.6176092624664307, "learning_rate": 1.336323028587085e-05, "loss": 0.4562, "step": 28529 }, { "epoch": 0.7833607907742999, "grad_norm": 0.389681339263916, "learning_rate": 1.336282354854808e-05, "loss": 0.5766, "step": 28530 }, { "epoch": 0.7833882482152663, "grad_norm": 0.3871902525424957, "learning_rate": 1.3362416804952572e-05, "loss": 0.4636, "step": 28531 }, { "epoch": 0.7834157056562329, "grad_norm": 0.3995181918144226, "learning_rate": 1.3362010055085084e-05, "loss": 0.5076, "step": 28532 }, { "epoch": 0.7834431630971993, "grad_norm": 0.39414969086647034, "learning_rate": 1.3361603298946373e-05, "loss": 0.4686, "step": 28533 }, { "epoch": 0.7834706205381659, "grad_norm": 0.5603223443031311, "learning_rate": 1.33611965365372e-05, "loss": 0.5378, "step": 28534 }, { "epoch": 0.7834980779791323, "grad_norm": 0.3895227611064911, "learning_rate": 1.3360789767858322e-05, "loss": 0.4158, "step": 28535 }, { "epoch": 0.7835255354200988, "grad_norm": 0.3635835647583008, "learning_rate": 1.3360382992910497e-05, "loss": 0.5075, "step": 28536 }, { "epoch": 0.7835529928610654, "grad_norm": 0.3528456389904022, "learning_rate": 1.3359976211694488e-05, "loss": 0.4137, "step": 28537 }, { "epoch": 0.7835804503020318, "grad_norm": 0.3949505686759949, "learning_rate": 1.3359569424211053e-05, "loss": 0.4349, "step": 28538 }, { "epoch": 0.7836079077429984, "grad_norm": 0.43972334265708923, "learning_rate": 1.3359162630460943e-05, "loss": 0.5766, "step": 28539 }, { "epoch": 0.7836353651839648, "grad_norm": 0.42622682452201843, "learning_rate": 1.3358755830444925e-05, "loss": 0.4968, "step": 28540 }, { "epoch": 0.7836628226249314, "grad_norm": 0.375531405210495, "learning_rate": 1.3358349024163754e-05, "loss": 0.5039, "step": 28541 }, { "epoch": 0.7836902800658978, "grad_norm": 0.34669139981269836, "learning_rate": 1.3357942211618193e-05, "loss": 0.5259, "step": 28542 }, { "epoch": 0.7837177375068644, "grad_norm": 0.347439706325531, "learning_rate": 1.3357535392808998e-05, "loss": 0.3919, "step": 28543 }, { "epoch": 0.7837451949478309, "grad_norm": 0.403978168964386, "learning_rate": 1.3357128567736926e-05, "loss": 0.4659, "step": 28544 }, { "epoch": 0.7837726523887973, "grad_norm": 0.3821006417274475, "learning_rate": 1.3356721736402738e-05, "loss": 0.4834, "step": 28545 }, { "epoch": 0.7838001098297639, "grad_norm": 0.35240307450294495, "learning_rate": 1.3356314898807192e-05, "loss": 0.4767, "step": 28546 }, { "epoch": 0.7838275672707303, "grad_norm": 0.4064358174800873, "learning_rate": 1.3355908054951048e-05, "loss": 0.4636, "step": 28547 }, { "epoch": 0.7838550247116969, "grad_norm": 0.35197290778160095, "learning_rate": 1.3355501204835066e-05, "loss": 0.5124, "step": 28548 }, { "epoch": 0.7838824821526633, "grad_norm": 0.37883713841438293, "learning_rate": 1.3355094348460002e-05, "loss": 0.4969, "step": 28549 }, { "epoch": 0.7839099395936299, "grad_norm": 0.4068754315376282, "learning_rate": 1.3354687485826617e-05, "loss": 0.6057, "step": 28550 }, { "epoch": 0.7839373970345964, "grad_norm": 0.3308519423007965, "learning_rate": 1.3354280616935668e-05, "loss": 0.4487, "step": 28551 }, { "epoch": 0.7839648544755629, "grad_norm": 0.4298515319824219, "learning_rate": 1.3353873741787915e-05, "loss": 0.522, "step": 28552 }, { "epoch": 0.7839923119165294, "grad_norm": 0.3906472623348236, "learning_rate": 1.3353466860384118e-05, "loss": 0.4407, "step": 28553 }, { "epoch": 0.7840197693574958, "grad_norm": 0.3851187527179718, "learning_rate": 1.3353059972725032e-05, "loss": 0.5427, "step": 28554 }, { "epoch": 0.7840472267984624, "grad_norm": 0.39142030477523804, "learning_rate": 1.3352653078811427e-05, "loss": 0.5822, "step": 28555 }, { "epoch": 0.7840746842394288, "grad_norm": 0.3507900834083557, "learning_rate": 1.3352246178644048e-05, "loss": 0.4929, "step": 28556 }, { "epoch": 0.7841021416803954, "grad_norm": 0.40149107575416565, "learning_rate": 1.3351839272223658e-05, "loss": 0.5484, "step": 28557 }, { "epoch": 0.7841295991213619, "grad_norm": 0.3942813277244568, "learning_rate": 1.3351432359551022e-05, "loss": 0.5279, "step": 28558 }, { "epoch": 0.7841570565623284, "grad_norm": 0.5833288431167603, "learning_rate": 1.3351025440626893e-05, "loss": 0.4546, "step": 28559 }, { "epoch": 0.7841845140032949, "grad_norm": 0.3930203914642334, "learning_rate": 1.3350618515452034e-05, "loss": 0.4915, "step": 28560 }, { "epoch": 0.7842119714442614, "grad_norm": 0.3552148640155792, "learning_rate": 1.33502115840272e-05, "loss": 0.4739, "step": 28561 }, { "epoch": 0.7842394288852279, "grad_norm": 0.3673396110534668, "learning_rate": 1.334980464635315e-05, "loss": 0.474, "step": 28562 }, { "epoch": 0.7842668863261943, "grad_norm": 0.4045673906803131, "learning_rate": 1.334939770243065e-05, "loss": 0.5394, "step": 28563 }, { "epoch": 0.7842943437671609, "grad_norm": 0.4181194603443146, "learning_rate": 1.3348990752260452e-05, "loss": 0.3989, "step": 28564 }, { "epoch": 0.7843218012081274, "grad_norm": 0.4101807475090027, "learning_rate": 1.3348583795843317e-05, "loss": 0.5123, "step": 28565 }, { "epoch": 0.7843492586490939, "grad_norm": 0.3553203344345093, "learning_rate": 1.3348176833180006e-05, "loss": 0.5158, "step": 28566 }, { "epoch": 0.7843767160900604, "grad_norm": 0.36090731620788574, "learning_rate": 1.3347769864271275e-05, "loss": 0.5112, "step": 28567 }, { "epoch": 0.7844041735310269, "grad_norm": 0.40829378366470337, "learning_rate": 1.3347362889117887e-05, "loss": 0.5432, "step": 28568 }, { "epoch": 0.7844316309719934, "grad_norm": 0.3922879993915558, "learning_rate": 1.3346955907720598e-05, "loss": 0.5055, "step": 28569 }, { "epoch": 0.7844590884129599, "grad_norm": 0.38471484184265137, "learning_rate": 1.3346548920080165e-05, "loss": 0.4545, "step": 28570 }, { "epoch": 0.7844865458539264, "grad_norm": 0.3782680332660675, "learning_rate": 1.3346141926197354e-05, "loss": 0.4931, "step": 28571 }, { "epoch": 0.784514003294893, "grad_norm": 0.42047634720802307, "learning_rate": 1.3345734926072917e-05, "loss": 0.4747, "step": 28572 }, { "epoch": 0.7845414607358594, "grad_norm": 0.3679380714893341, "learning_rate": 1.334532791970762e-05, "loss": 0.4495, "step": 28573 }, { "epoch": 0.784568918176826, "grad_norm": 0.34807541966438293, "learning_rate": 1.3344920907102216e-05, "loss": 0.4144, "step": 28574 }, { "epoch": 0.7845963756177924, "grad_norm": 0.3892156779766083, "learning_rate": 1.334451388825747e-05, "loss": 0.5014, "step": 28575 }, { "epoch": 0.7846238330587589, "grad_norm": 0.37741124629974365, "learning_rate": 1.3344106863174135e-05, "loss": 0.4683, "step": 28576 }, { "epoch": 0.7846512904997254, "grad_norm": 0.5001950860023499, "learning_rate": 1.3343699831852975e-05, "loss": 0.5324, "step": 28577 }, { "epoch": 0.7846787479406919, "grad_norm": 0.36884984374046326, "learning_rate": 1.3343292794294746e-05, "loss": 0.4935, "step": 28578 }, { "epoch": 0.7847062053816585, "grad_norm": 0.3299672305583954, "learning_rate": 1.3342885750500212e-05, "loss": 0.4651, "step": 28579 }, { "epoch": 0.7847336628226249, "grad_norm": 0.6071812510490417, "learning_rate": 1.3342478700470124e-05, "loss": 0.4636, "step": 28580 }, { "epoch": 0.7847611202635915, "grad_norm": 0.3639971911907196, "learning_rate": 1.3342071644205253e-05, "loss": 0.4514, "step": 28581 }, { "epoch": 0.7847885777045579, "grad_norm": 0.3829035758972168, "learning_rate": 1.3341664581706349e-05, "loss": 0.4279, "step": 28582 }, { "epoch": 0.7848160351455244, "grad_norm": 0.39494678378105164, "learning_rate": 1.334125751297417e-05, "loss": 0.4981, "step": 28583 }, { "epoch": 0.7848434925864909, "grad_norm": 0.4093272387981415, "learning_rate": 1.3340850438009485e-05, "loss": 0.5351, "step": 28584 }, { "epoch": 0.7848709500274574, "grad_norm": 0.42278704047203064, "learning_rate": 1.3340443356813044e-05, "loss": 0.6067, "step": 28585 }, { "epoch": 0.784898407468424, "grad_norm": 0.37400755286216736, "learning_rate": 1.3340036269385613e-05, "loss": 0.5387, "step": 28586 }, { "epoch": 0.7849258649093904, "grad_norm": 0.3952639400959015, "learning_rate": 1.3339629175727947e-05, "loss": 0.6254, "step": 28587 }, { "epoch": 0.784953322350357, "grad_norm": 0.4326179027557373, "learning_rate": 1.3339222075840805e-05, "loss": 0.5481, "step": 28588 }, { "epoch": 0.7849807797913234, "grad_norm": 0.48301681876182556, "learning_rate": 1.333881496972495e-05, "loss": 0.6137, "step": 28589 }, { "epoch": 0.78500823723229, "grad_norm": 0.40100082755088806, "learning_rate": 1.3338407857381137e-05, "loss": 0.5249, "step": 28590 }, { "epoch": 0.7850356946732564, "grad_norm": 0.412720650434494, "learning_rate": 1.3338000738810131e-05, "loss": 0.5367, "step": 28591 }, { "epoch": 0.785063152114223, "grad_norm": 0.43882298469543457, "learning_rate": 1.3337593614012688e-05, "loss": 0.5046, "step": 28592 }, { "epoch": 0.7850906095551895, "grad_norm": 0.3544243276119232, "learning_rate": 1.3337186482989566e-05, "loss": 0.5023, "step": 28593 }, { "epoch": 0.7851180669961559, "grad_norm": 0.3966607451438904, "learning_rate": 1.3336779345741526e-05, "loss": 0.4714, "step": 28594 }, { "epoch": 0.7851455244371225, "grad_norm": 0.3811722695827484, "learning_rate": 1.3336372202269328e-05, "loss": 0.4595, "step": 28595 }, { "epoch": 0.7851729818780889, "grad_norm": 0.38858118653297424, "learning_rate": 1.3335965052573732e-05, "loss": 0.5003, "step": 28596 }, { "epoch": 0.7852004393190555, "grad_norm": 0.3597685396671295, "learning_rate": 1.3335557896655497e-05, "loss": 0.5508, "step": 28597 }, { "epoch": 0.7852278967600219, "grad_norm": 0.4281710684299469, "learning_rate": 1.3335150734515381e-05, "loss": 0.5099, "step": 28598 }, { "epoch": 0.7852553542009885, "grad_norm": 0.36340469121932983, "learning_rate": 1.3334743566154144e-05, "loss": 0.4559, "step": 28599 }, { "epoch": 0.785282811641955, "grad_norm": 0.44517242908477783, "learning_rate": 1.3334336391572546e-05, "loss": 0.5045, "step": 28600 }, { "epoch": 0.7853102690829215, "grad_norm": 0.35935983061790466, "learning_rate": 1.3333929210771346e-05, "loss": 0.471, "step": 28601 }, { "epoch": 0.785337726523888, "grad_norm": 0.3856869339942932, "learning_rate": 1.3333522023751304e-05, "loss": 0.5632, "step": 28602 }, { "epoch": 0.7853651839648544, "grad_norm": 0.3806551694869995, "learning_rate": 1.3333114830513178e-05, "loss": 0.511, "step": 28603 }, { "epoch": 0.785392641405821, "grad_norm": 0.42841389775276184, "learning_rate": 1.3332707631057733e-05, "loss": 0.5678, "step": 28604 }, { "epoch": 0.7854200988467874, "grad_norm": 0.37921497225761414, "learning_rate": 1.3332300425385722e-05, "loss": 0.4011, "step": 28605 }, { "epoch": 0.785447556287754, "grad_norm": 0.38186606764793396, "learning_rate": 1.3331893213497908e-05, "loss": 0.4915, "step": 28606 }, { "epoch": 0.7854750137287205, "grad_norm": 0.403382271528244, "learning_rate": 1.333148599539505e-05, "loss": 0.4344, "step": 28607 }, { "epoch": 0.785502471169687, "grad_norm": 0.44847145676612854, "learning_rate": 1.3331078771077905e-05, "loss": 0.5397, "step": 28608 }, { "epoch": 0.7855299286106535, "grad_norm": 0.37197309732437134, "learning_rate": 1.333067154054724e-05, "loss": 0.5171, "step": 28609 }, { "epoch": 0.78555738605162, "grad_norm": 0.4694949984550476, "learning_rate": 1.3330264303803806e-05, "loss": 0.5071, "step": 28610 }, { "epoch": 0.7855848434925865, "grad_norm": 0.43054571747779846, "learning_rate": 1.3329857060848365e-05, "loss": 0.4501, "step": 28611 }, { "epoch": 0.7856123009335529, "grad_norm": 0.38067638874053955, "learning_rate": 1.332944981168168e-05, "loss": 0.5018, "step": 28612 }, { "epoch": 0.7856397583745195, "grad_norm": 0.40148431062698364, "learning_rate": 1.3329042556304508e-05, "loss": 0.4653, "step": 28613 }, { "epoch": 0.785667215815486, "grad_norm": 0.4487994909286499, "learning_rate": 1.332863529471761e-05, "loss": 0.4561, "step": 28614 }, { "epoch": 0.7856946732564525, "grad_norm": 0.42154935002326965, "learning_rate": 1.3328228026921743e-05, "loss": 0.5322, "step": 28615 }, { "epoch": 0.785722130697419, "grad_norm": 0.393182635307312, "learning_rate": 1.3327820752917669e-05, "loss": 0.4948, "step": 28616 }, { "epoch": 0.7857495881383855, "grad_norm": 0.38314366340637207, "learning_rate": 1.3327413472706148e-05, "loss": 0.4856, "step": 28617 }, { "epoch": 0.785777045579352, "grad_norm": 0.4314896762371063, "learning_rate": 1.3327006186287939e-05, "loss": 0.5573, "step": 28618 }, { "epoch": 0.7858045030203185, "grad_norm": 0.3996351361274719, "learning_rate": 1.3326598893663798e-05, "loss": 0.4918, "step": 28619 }, { "epoch": 0.785831960461285, "grad_norm": 0.4297027885913849, "learning_rate": 1.3326191594834493e-05, "loss": 0.5724, "step": 28620 }, { "epoch": 0.7858594179022516, "grad_norm": 0.4000239074230194, "learning_rate": 1.3325784289800776e-05, "loss": 0.4838, "step": 28621 }, { "epoch": 0.785886875343218, "grad_norm": 0.4019842743873596, "learning_rate": 1.3325376978563412e-05, "loss": 0.5253, "step": 28622 }, { "epoch": 0.7859143327841845, "grad_norm": 0.3807014226913452, "learning_rate": 1.332496966112316e-05, "loss": 0.4316, "step": 28623 }, { "epoch": 0.785941790225151, "grad_norm": 0.38970303535461426, "learning_rate": 1.3324562337480773e-05, "loss": 0.5642, "step": 28624 }, { "epoch": 0.7859692476661175, "grad_norm": 0.4067208170890808, "learning_rate": 1.3324155007637019e-05, "loss": 0.4722, "step": 28625 }, { "epoch": 0.785996705107084, "grad_norm": 0.38186755776405334, "learning_rate": 1.3323747671592655e-05, "loss": 0.5537, "step": 28626 }, { "epoch": 0.7860241625480505, "grad_norm": 0.35769546031951904, "learning_rate": 1.3323340329348442e-05, "loss": 0.4806, "step": 28627 }, { "epoch": 0.7860516199890171, "grad_norm": 0.37352874875068665, "learning_rate": 1.3322932980905137e-05, "loss": 0.483, "step": 28628 }, { "epoch": 0.7860790774299835, "grad_norm": 0.3881298899650574, "learning_rate": 1.3322525626263501e-05, "loss": 0.4706, "step": 28629 }, { "epoch": 0.7861065348709501, "grad_norm": 0.37725841999053955, "learning_rate": 1.3322118265424298e-05, "loss": 0.4764, "step": 28630 }, { "epoch": 0.7861339923119165, "grad_norm": 0.3734586536884308, "learning_rate": 1.3321710898388281e-05, "loss": 0.4533, "step": 28631 }, { "epoch": 0.786161449752883, "grad_norm": 0.38989877700805664, "learning_rate": 1.3321303525156213e-05, "loss": 0.501, "step": 28632 }, { "epoch": 0.7861889071938495, "grad_norm": 0.36490899324417114, "learning_rate": 1.3320896145728855e-05, "loss": 0.5211, "step": 28633 }, { "epoch": 0.786216364634816, "grad_norm": 0.4901975393295288, "learning_rate": 1.3320488760106965e-05, "loss": 0.549, "step": 28634 }, { "epoch": 0.7862438220757826, "grad_norm": 0.3965071439743042, "learning_rate": 1.3320081368291302e-05, "loss": 0.496, "step": 28635 }, { "epoch": 0.786271279516749, "grad_norm": 0.3885347545146942, "learning_rate": 1.3319673970282631e-05, "loss": 0.4955, "step": 28636 }, { "epoch": 0.7862987369577156, "grad_norm": 0.45938044786453247, "learning_rate": 1.3319266566081705e-05, "loss": 0.4793, "step": 28637 }, { "epoch": 0.786326194398682, "grad_norm": 0.3902451992034912, "learning_rate": 1.331885915568929e-05, "loss": 0.4388, "step": 28638 }, { "epoch": 0.7863536518396486, "grad_norm": 0.3827420771121979, "learning_rate": 1.3318451739106143e-05, "loss": 0.4703, "step": 28639 }, { "epoch": 0.786381109280615, "grad_norm": 0.3847358226776123, "learning_rate": 1.3318044316333025e-05, "loss": 0.4063, "step": 28640 }, { "epoch": 0.7864085667215815, "grad_norm": 0.39052614569664, "learning_rate": 1.3317636887370695e-05, "loss": 0.4618, "step": 28641 }, { "epoch": 0.7864360241625481, "grad_norm": 0.3938121199607849, "learning_rate": 1.331722945221991e-05, "loss": 0.5058, "step": 28642 }, { "epoch": 0.7864634816035145, "grad_norm": 0.3729912042617798, "learning_rate": 1.3316822010881439e-05, "loss": 0.5285, "step": 28643 }, { "epoch": 0.7864909390444811, "grad_norm": 0.45940956473350525, "learning_rate": 1.3316414563356035e-05, "loss": 0.5992, "step": 28644 }, { "epoch": 0.7865183964854475, "grad_norm": 0.43041175603866577, "learning_rate": 1.3316007109644456e-05, "loss": 0.5529, "step": 28645 }, { "epoch": 0.7865458539264141, "grad_norm": 0.3898368775844574, "learning_rate": 1.3315599649747468e-05, "loss": 0.465, "step": 28646 }, { "epoch": 0.7865733113673805, "grad_norm": 0.4616767168045044, "learning_rate": 1.3315192183665826e-05, "loss": 0.456, "step": 28647 }, { "epoch": 0.7866007688083471, "grad_norm": 0.7247854471206665, "learning_rate": 1.3314784711400295e-05, "loss": 0.5186, "step": 28648 }, { "epoch": 0.7866282262493136, "grad_norm": 0.3668421506881714, "learning_rate": 1.331437723295163e-05, "loss": 0.435, "step": 28649 }, { "epoch": 0.78665568369028, "grad_norm": 0.41485854983329773, "learning_rate": 1.3313969748320597e-05, "loss": 0.4813, "step": 28650 }, { "epoch": 0.7866831411312466, "grad_norm": 0.3943430185317993, "learning_rate": 1.331356225750795e-05, "loss": 0.4827, "step": 28651 }, { "epoch": 0.786710598572213, "grad_norm": 0.4185878336429596, "learning_rate": 1.3313154760514453e-05, "loss": 0.5243, "step": 28652 }, { "epoch": 0.7867380560131796, "grad_norm": 0.45483964681625366, "learning_rate": 1.3312747257340865e-05, "loss": 0.528, "step": 28653 }, { "epoch": 0.786765513454146, "grad_norm": 0.343625545501709, "learning_rate": 1.3312339747987946e-05, "loss": 0.4616, "step": 28654 }, { "epoch": 0.7867929708951126, "grad_norm": 0.3839671313762665, "learning_rate": 1.3311932232456457e-05, "loss": 0.5358, "step": 28655 }, { "epoch": 0.7868204283360791, "grad_norm": 0.389047771692276, "learning_rate": 1.3311524710747157e-05, "loss": 0.5173, "step": 28656 }, { "epoch": 0.7868478857770456, "grad_norm": 0.46810394525527954, "learning_rate": 1.3311117182860805e-05, "loss": 0.3487, "step": 28657 }, { "epoch": 0.7868753432180121, "grad_norm": 0.3342956304550171, "learning_rate": 1.3310709648798163e-05, "loss": 0.4087, "step": 28658 }, { "epoch": 0.7869028006589786, "grad_norm": 0.40955907106399536, "learning_rate": 1.3310302108559992e-05, "loss": 0.4568, "step": 28659 }, { "epoch": 0.7869302580999451, "grad_norm": 0.41724738478660583, "learning_rate": 1.330989456214705e-05, "loss": 0.5074, "step": 28660 }, { "epoch": 0.7869577155409115, "grad_norm": 0.4353211224079132, "learning_rate": 1.33094870095601e-05, "loss": 0.5222, "step": 28661 }, { "epoch": 0.7869851729818781, "grad_norm": 0.4935241639614105, "learning_rate": 1.3309079450799898e-05, "loss": 0.5429, "step": 28662 }, { "epoch": 0.7870126304228446, "grad_norm": 0.4105493128299713, "learning_rate": 1.330867188586721e-05, "loss": 0.4969, "step": 28663 }, { "epoch": 0.7870400878638111, "grad_norm": 0.38226690888404846, "learning_rate": 1.3308264314762789e-05, "loss": 0.5021, "step": 28664 }, { "epoch": 0.7870675453047776, "grad_norm": 0.43444204330444336, "learning_rate": 1.33078567374874e-05, "loss": 0.5607, "step": 28665 }, { "epoch": 0.7870950027457441, "grad_norm": 0.4139217734336853, "learning_rate": 1.3307449154041803e-05, "loss": 0.4535, "step": 28666 }, { "epoch": 0.7871224601867106, "grad_norm": 0.39236581325531006, "learning_rate": 1.330704156442676e-05, "loss": 0.5401, "step": 28667 }, { "epoch": 0.787149917627677, "grad_norm": 0.39120668172836304, "learning_rate": 1.3306633968643029e-05, "loss": 0.4666, "step": 28668 }, { "epoch": 0.7871773750686436, "grad_norm": 0.39181020855903625, "learning_rate": 1.3306226366691367e-05, "loss": 0.5086, "step": 28669 }, { "epoch": 0.7872048325096102, "grad_norm": 0.39238253235816956, "learning_rate": 1.330581875857254e-05, "loss": 0.4304, "step": 28670 }, { "epoch": 0.7872322899505766, "grad_norm": 0.3857230246067047, "learning_rate": 1.3305411144287304e-05, "loss": 0.5496, "step": 28671 }, { "epoch": 0.7872597473915431, "grad_norm": 0.3652074933052063, "learning_rate": 1.3305003523836422e-05, "loss": 0.4587, "step": 28672 }, { "epoch": 0.7872872048325096, "grad_norm": 0.3866010010242462, "learning_rate": 1.3304595897220653e-05, "loss": 0.4318, "step": 28673 }, { "epoch": 0.7873146622734761, "grad_norm": 0.4507792890071869, "learning_rate": 1.3304188264440758e-05, "loss": 0.5441, "step": 28674 }, { "epoch": 0.7873421197144426, "grad_norm": 0.37840813398361206, "learning_rate": 1.33037806254975e-05, "loss": 0.5038, "step": 28675 }, { "epoch": 0.7873695771554091, "grad_norm": 0.38228511810302734, "learning_rate": 1.3303372980391633e-05, "loss": 0.4543, "step": 28676 }, { "epoch": 0.7873970345963757, "grad_norm": 0.40321415662765503, "learning_rate": 1.3302965329123925e-05, "loss": 0.5197, "step": 28677 }, { "epoch": 0.7874244920373421, "grad_norm": 0.3747674822807312, "learning_rate": 1.3302557671695131e-05, "loss": 0.5046, "step": 28678 }, { "epoch": 0.7874519494783087, "grad_norm": 0.5481221079826355, "learning_rate": 1.330215000810601e-05, "loss": 0.5062, "step": 28679 }, { "epoch": 0.7874794069192751, "grad_norm": 0.37306562066078186, "learning_rate": 1.3301742338357328e-05, "loss": 0.4624, "step": 28680 }, { "epoch": 0.7875068643602416, "grad_norm": 0.4052174985408783, "learning_rate": 1.330133466244984e-05, "loss": 0.5054, "step": 28681 }, { "epoch": 0.7875343218012081, "grad_norm": 0.4206315279006958, "learning_rate": 1.3300926980384315e-05, "loss": 0.4876, "step": 28682 }, { "epoch": 0.7875617792421746, "grad_norm": 0.42313152551651, "learning_rate": 1.3300519292161502e-05, "loss": 0.4331, "step": 28683 }, { "epoch": 0.7875892366831412, "grad_norm": 0.4104222059249878, "learning_rate": 1.3300111597782171e-05, "loss": 0.554, "step": 28684 }, { "epoch": 0.7876166941241076, "grad_norm": 0.38474753499031067, "learning_rate": 1.3299703897247075e-05, "loss": 0.5117, "step": 28685 }, { "epoch": 0.7876441515650742, "grad_norm": 0.6084360480308533, "learning_rate": 1.329929619055698e-05, "loss": 0.6342, "step": 28686 }, { "epoch": 0.7876716090060406, "grad_norm": 0.4092370867729187, "learning_rate": 1.3298888477712646e-05, "loss": 0.5296, "step": 28687 }, { "epoch": 0.7876990664470072, "grad_norm": 0.38154953718185425, "learning_rate": 1.329848075871483e-05, "loss": 0.5152, "step": 28688 }, { "epoch": 0.7877265238879736, "grad_norm": 0.38066861033439636, "learning_rate": 1.3298073033564296e-05, "loss": 0.4978, "step": 28689 }, { "epoch": 0.7877539813289401, "grad_norm": 0.4489380121231079, "learning_rate": 1.3297665302261802e-05, "loss": 0.5354, "step": 28690 }, { "epoch": 0.7877814387699067, "grad_norm": 0.3307829797267914, "learning_rate": 1.3297257564808108e-05, "loss": 0.3817, "step": 28691 }, { "epoch": 0.7878088962108731, "grad_norm": 0.39020416140556335, "learning_rate": 1.329684982120398e-05, "loss": 0.4815, "step": 28692 }, { "epoch": 0.7878363536518397, "grad_norm": 0.39014992117881775, "learning_rate": 1.3296442071450175e-05, "loss": 0.3909, "step": 28693 }, { "epoch": 0.7878638110928061, "grad_norm": 0.37285593152046204, "learning_rate": 1.329603431554745e-05, "loss": 0.4063, "step": 28694 }, { "epoch": 0.7878912685337727, "grad_norm": 0.441509485244751, "learning_rate": 1.3295626553496573e-05, "loss": 0.5577, "step": 28695 }, { "epoch": 0.7879187259747391, "grad_norm": 0.35273265838623047, "learning_rate": 1.3295218785298296e-05, "loss": 0.4714, "step": 28696 }, { "epoch": 0.7879461834157057, "grad_norm": 0.696442186832428, "learning_rate": 1.3294811010953388e-05, "loss": 0.5854, "step": 28697 }, { "epoch": 0.7879736408566722, "grad_norm": 0.374620646238327, "learning_rate": 1.3294403230462606e-05, "loss": 0.4804, "step": 28698 }, { "epoch": 0.7880010982976386, "grad_norm": 0.4309881329536438, "learning_rate": 1.3293995443826706e-05, "loss": 0.5358, "step": 28699 }, { "epoch": 0.7880285557386052, "grad_norm": 0.3447522521018982, "learning_rate": 1.3293587651046458e-05, "loss": 0.3828, "step": 28700 }, { "epoch": 0.7880560131795716, "grad_norm": 0.4072777330875397, "learning_rate": 1.3293179852122613e-05, "loss": 0.4494, "step": 28701 }, { "epoch": 0.7880834706205382, "grad_norm": 0.3874247074127197, "learning_rate": 1.3292772047055942e-05, "loss": 0.4562, "step": 28702 }, { "epoch": 0.7881109280615046, "grad_norm": 0.3823643922805786, "learning_rate": 1.3292364235847196e-05, "loss": 0.3909, "step": 28703 }, { "epoch": 0.7881383855024712, "grad_norm": 0.3705933392047882, "learning_rate": 1.3291956418497143e-05, "loss": 0.5745, "step": 28704 }, { "epoch": 0.7881658429434377, "grad_norm": 0.4128609001636505, "learning_rate": 1.329154859500654e-05, "loss": 0.4475, "step": 28705 }, { "epoch": 0.7881933003844042, "grad_norm": 0.3666200637817383, "learning_rate": 1.3291140765376147e-05, "loss": 0.4679, "step": 28706 }, { "epoch": 0.7882207578253707, "grad_norm": 0.3920372724533081, "learning_rate": 1.3290732929606727e-05, "loss": 0.5515, "step": 28707 }, { "epoch": 0.7882482152663371, "grad_norm": 0.3892602324485779, "learning_rate": 1.3290325087699039e-05, "loss": 0.6272, "step": 28708 }, { "epoch": 0.7882756727073037, "grad_norm": 0.40660855174064636, "learning_rate": 1.3289917239653845e-05, "loss": 0.5492, "step": 28709 }, { "epoch": 0.7883031301482701, "grad_norm": 0.3779171407222748, "learning_rate": 1.3289509385471906e-05, "loss": 0.4956, "step": 28710 }, { "epoch": 0.7883305875892367, "grad_norm": 0.7255706787109375, "learning_rate": 1.3289101525153982e-05, "loss": 0.4457, "step": 28711 }, { "epoch": 0.7883580450302032, "grad_norm": 0.540687084197998, "learning_rate": 1.3288693658700833e-05, "loss": 0.5702, "step": 28712 }, { "epoch": 0.7883855024711697, "grad_norm": 0.40479639172554016, "learning_rate": 1.3288285786113221e-05, "loss": 0.5408, "step": 28713 }, { "epoch": 0.7884129599121362, "grad_norm": 0.3845753073692322, "learning_rate": 1.3287877907391906e-05, "loss": 0.444, "step": 28714 }, { "epoch": 0.7884404173531027, "grad_norm": 0.3765038549900055, "learning_rate": 1.328747002253765e-05, "loss": 0.4872, "step": 28715 }, { "epoch": 0.7884678747940692, "grad_norm": 0.3913820683956146, "learning_rate": 1.3287062131551214e-05, "loss": 0.5165, "step": 28716 }, { "epoch": 0.7884953322350357, "grad_norm": 0.34332478046417236, "learning_rate": 1.3286654234433356e-05, "loss": 0.4469, "step": 28717 }, { "epoch": 0.7885227896760022, "grad_norm": 0.4851280152797699, "learning_rate": 1.328624633118484e-05, "loss": 0.5554, "step": 28718 }, { "epoch": 0.7885502471169687, "grad_norm": 0.5946874618530273, "learning_rate": 1.3285838421806426e-05, "loss": 0.4878, "step": 28719 }, { "epoch": 0.7885777045579352, "grad_norm": 0.35871437191963196, "learning_rate": 1.3285430506298876e-05, "loss": 0.4648, "step": 28720 }, { "epoch": 0.7886051619989017, "grad_norm": 0.4237557053565979, "learning_rate": 1.3285022584662948e-05, "loss": 0.5119, "step": 28721 }, { "epoch": 0.7886326194398682, "grad_norm": 0.4222288131713867, "learning_rate": 1.3284614656899402e-05, "loss": 0.5272, "step": 28722 }, { "epoch": 0.7886600768808347, "grad_norm": 0.34645676612854004, "learning_rate": 1.3284206723009006e-05, "loss": 0.4642, "step": 28723 }, { "epoch": 0.7886875343218012, "grad_norm": 0.36718112230300903, "learning_rate": 1.3283798782992514e-05, "loss": 0.522, "step": 28724 }, { "epoch": 0.7887149917627677, "grad_norm": 0.39704757928848267, "learning_rate": 1.3283390836850686e-05, "loss": 0.5583, "step": 28725 }, { "epoch": 0.7887424492037343, "grad_norm": 0.37737664580345154, "learning_rate": 1.328298288458429e-05, "loss": 0.4623, "step": 28726 }, { "epoch": 0.7887699066447007, "grad_norm": 0.4209991693496704, "learning_rate": 1.3282574926194083e-05, "loss": 0.5249, "step": 28727 }, { "epoch": 0.7887973640856673, "grad_norm": 0.34382298588752747, "learning_rate": 1.3282166961680827e-05, "loss": 0.4697, "step": 28728 }, { "epoch": 0.7888248215266337, "grad_norm": 0.3796181380748749, "learning_rate": 1.328175899104528e-05, "loss": 0.4721, "step": 28729 }, { "epoch": 0.7888522789676002, "grad_norm": 0.364887535572052, "learning_rate": 1.3281351014288205e-05, "loss": 0.3642, "step": 28730 }, { "epoch": 0.7888797364085667, "grad_norm": 0.347625732421875, "learning_rate": 1.3280943031410365e-05, "loss": 0.4582, "step": 28731 }, { "epoch": 0.7889071938495332, "grad_norm": 0.3529820144176483, "learning_rate": 1.3280535042412515e-05, "loss": 0.3773, "step": 28732 }, { "epoch": 0.7889346512904998, "grad_norm": 0.35188260674476624, "learning_rate": 1.3280127047295424e-05, "loss": 0.4708, "step": 28733 }, { "epoch": 0.7889621087314662, "grad_norm": 0.38121113181114197, "learning_rate": 1.3279719046059849e-05, "loss": 0.531, "step": 28734 }, { "epoch": 0.7889895661724328, "grad_norm": 0.4306797981262207, "learning_rate": 1.3279311038706548e-05, "loss": 0.4604, "step": 28735 }, { "epoch": 0.7890170236133992, "grad_norm": 0.3811364471912384, "learning_rate": 1.3278903025236289e-05, "loss": 0.3897, "step": 28736 }, { "epoch": 0.7890444810543658, "grad_norm": 0.3542564809322357, "learning_rate": 1.327849500564983e-05, "loss": 0.4293, "step": 28737 }, { "epoch": 0.7890719384953322, "grad_norm": 0.3825948238372803, "learning_rate": 1.3278086979947927e-05, "loss": 0.4534, "step": 28738 }, { "epoch": 0.7890993959362987, "grad_norm": 0.3584884703159332, "learning_rate": 1.3277678948131349e-05, "loss": 0.5225, "step": 28739 }, { "epoch": 0.7891268533772653, "grad_norm": 0.3880850672721863, "learning_rate": 1.327727091020085e-05, "loss": 0.4346, "step": 28740 }, { "epoch": 0.7891543108182317, "grad_norm": 0.3914419114589691, "learning_rate": 1.3276862866157199e-05, "loss": 0.4989, "step": 28741 }, { "epoch": 0.7891817682591983, "grad_norm": 0.40792304277420044, "learning_rate": 1.3276454816001151e-05, "loss": 0.5285, "step": 28742 }, { "epoch": 0.7892092257001647, "grad_norm": 0.38914453983306885, "learning_rate": 1.3276046759733468e-05, "loss": 0.4984, "step": 28743 }, { "epoch": 0.7892366831411313, "grad_norm": 0.4196838140487671, "learning_rate": 1.3275638697354915e-05, "loss": 0.4622, "step": 28744 }, { "epoch": 0.7892641405820977, "grad_norm": 0.4279472827911377, "learning_rate": 1.3275230628866247e-05, "loss": 0.4798, "step": 28745 }, { "epoch": 0.7892915980230643, "grad_norm": 0.3705406188964844, "learning_rate": 1.3274822554268232e-05, "loss": 0.4879, "step": 28746 }, { "epoch": 0.7893190554640308, "grad_norm": 0.38727065920829773, "learning_rate": 1.3274414473561628e-05, "loss": 0.4768, "step": 28747 }, { "epoch": 0.7893465129049972, "grad_norm": 0.34978559613227844, "learning_rate": 1.3274006386747192e-05, "loss": 0.471, "step": 28748 }, { "epoch": 0.7893739703459638, "grad_norm": 0.41685059666633606, "learning_rate": 1.3273598293825691e-05, "loss": 0.5586, "step": 28749 }, { "epoch": 0.7894014277869302, "grad_norm": 0.4733380675315857, "learning_rate": 1.3273190194797882e-05, "loss": 0.5458, "step": 28750 }, { "epoch": 0.7894288852278968, "grad_norm": 0.40309932827949524, "learning_rate": 1.3272782089664533e-05, "loss": 0.5719, "step": 28751 }, { "epoch": 0.7894563426688632, "grad_norm": 0.45125052332878113, "learning_rate": 1.32723739784264e-05, "loss": 0.5269, "step": 28752 }, { "epoch": 0.7894838001098298, "grad_norm": 0.3974987864494324, "learning_rate": 1.3271965861084243e-05, "loss": 0.5641, "step": 28753 }, { "epoch": 0.7895112575507963, "grad_norm": 0.41609787940979004, "learning_rate": 1.3271557737638828e-05, "loss": 0.4734, "step": 28754 }, { "epoch": 0.7895387149917628, "grad_norm": 0.3417462408542633, "learning_rate": 1.3271149608090912e-05, "loss": 0.4477, "step": 28755 }, { "epoch": 0.7895661724327293, "grad_norm": 0.3700108826160431, "learning_rate": 1.3270741472441258e-05, "loss": 0.5435, "step": 28756 }, { "epoch": 0.7895936298736957, "grad_norm": 0.41643938422203064, "learning_rate": 1.3270333330690629e-05, "loss": 0.4562, "step": 28757 }, { "epoch": 0.7896210873146623, "grad_norm": 0.3777387738227844, "learning_rate": 1.326992518283978e-05, "loss": 0.4687, "step": 28758 }, { "epoch": 0.7896485447556287, "grad_norm": 0.4808649718761444, "learning_rate": 1.326951702888948e-05, "loss": 0.5712, "step": 28759 }, { "epoch": 0.7896760021965953, "grad_norm": 0.37809059023857117, "learning_rate": 1.3269108868840488e-05, "loss": 0.5197, "step": 28760 }, { "epoch": 0.7897034596375618, "grad_norm": 0.6567462086677551, "learning_rate": 1.3268700702693561e-05, "loss": 0.5087, "step": 28761 }, { "epoch": 0.7897309170785283, "grad_norm": 0.37763139605522156, "learning_rate": 1.326829253044947e-05, "loss": 0.5108, "step": 28762 }, { "epoch": 0.7897583745194948, "grad_norm": 0.36527568101882935, "learning_rate": 1.3267884352108964e-05, "loss": 0.4703, "step": 28763 }, { "epoch": 0.7897858319604613, "grad_norm": 0.414664089679718, "learning_rate": 1.3267476167672815e-05, "loss": 0.5171, "step": 28764 }, { "epoch": 0.7898132894014278, "grad_norm": 0.3772841989994049, "learning_rate": 1.326706797714178e-05, "loss": 0.4724, "step": 28765 }, { "epoch": 0.7898407468423942, "grad_norm": 0.44333040714263916, "learning_rate": 1.3266659780516618e-05, "loss": 0.5378, "step": 28766 }, { "epoch": 0.7898682042833608, "grad_norm": 0.3829750716686249, "learning_rate": 1.3266251577798095e-05, "loss": 0.5229, "step": 28767 }, { "epoch": 0.7898956617243273, "grad_norm": 0.4057861864566803, "learning_rate": 1.3265843368986971e-05, "loss": 0.4957, "step": 28768 }, { "epoch": 0.7899231191652938, "grad_norm": 0.35993650555610657, "learning_rate": 1.3265435154084003e-05, "loss": 0.4533, "step": 28769 }, { "epoch": 0.7899505766062603, "grad_norm": 0.3637941777706146, "learning_rate": 1.326502693308996e-05, "loss": 0.561, "step": 28770 }, { "epoch": 0.7899780340472268, "grad_norm": 0.37555134296417236, "learning_rate": 1.3264618706005595e-05, "loss": 0.5442, "step": 28771 }, { "epoch": 0.7900054914881933, "grad_norm": 0.38919374346733093, "learning_rate": 1.3264210472831679e-05, "loss": 0.5717, "step": 28772 }, { "epoch": 0.7900329489291598, "grad_norm": 0.4083525240421295, "learning_rate": 1.3263802233568968e-05, "loss": 0.4594, "step": 28773 }, { "epoch": 0.7900604063701263, "grad_norm": 0.3497200310230255, "learning_rate": 1.326339398821822e-05, "loss": 0.5545, "step": 28774 }, { "epoch": 0.7900878638110929, "grad_norm": 0.440510094165802, "learning_rate": 1.3262985736780205e-05, "loss": 0.5471, "step": 28775 }, { "epoch": 0.7901153212520593, "grad_norm": 0.41741400957107544, "learning_rate": 1.3262577479255678e-05, "loss": 0.5417, "step": 28776 }, { "epoch": 0.7901427786930258, "grad_norm": 0.38766446709632874, "learning_rate": 1.3262169215645405e-05, "loss": 0.4905, "step": 28777 }, { "epoch": 0.7901702361339923, "grad_norm": 0.36024513840675354, "learning_rate": 1.3261760945950144e-05, "loss": 0.386, "step": 28778 }, { "epoch": 0.7901976935749588, "grad_norm": 0.47505369782447815, "learning_rate": 1.3261352670170657e-05, "loss": 0.5368, "step": 28779 }, { "epoch": 0.7902251510159253, "grad_norm": 0.4676710367202759, "learning_rate": 1.3260944388307708e-05, "loss": 0.6109, "step": 28780 }, { "epoch": 0.7902526084568918, "grad_norm": 0.37154754996299744, "learning_rate": 1.3260536100362055e-05, "loss": 0.4531, "step": 28781 }, { "epoch": 0.7902800658978584, "grad_norm": 0.34991809725761414, "learning_rate": 1.3260127806334463e-05, "loss": 0.4542, "step": 28782 }, { "epoch": 0.7903075233388248, "grad_norm": 0.3522988557815552, "learning_rate": 1.3259719506225692e-05, "loss": 0.479, "step": 28783 }, { "epoch": 0.7903349807797914, "grad_norm": 0.3739938735961914, "learning_rate": 1.3259311200036502e-05, "loss": 0.4743, "step": 28784 }, { "epoch": 0.7903624382207578, "grad_norm": 0.362521767616272, "learning_rate": 1.325890288776766e-05, "loss": 0.4635, "step": 28785 }, { "epoch": 0.7903898956617244, "grad_norm": 0.33983349800109863, "learning_rate": 1.3258494569419922e-05, "loss": 0.4133, "step": 28786 }, { "epoch": 0.7904173531026908, "grad_norm": 0.35386502742767334, "learning_rate": 1.325808624499405e-05, "loss": 0.4941, "step": 28787 }, { "epoch": 0.7904448105436573, "grad_norm": 0.4481097161769867, "learning_rate": 1.325767791449081e-05, "loss": 0.5186, "step": 28788 }, { "epoch": 0.7904722679846238, "grad_norm": 0.4191543459892273, "learning_rate": 1.3257269577910959e-05, "loss": 0.4725, "step": 28789 }, { "epoch": 0.7904997254255903, "grad_norm": 0.3973647654056549, "learning_rate": 1.3256861235255261e-05, "loss": 0.4125, "step": 28790 }, { "epoch": 0.7905271828665569, "grad_norm": 0.3925064206123352, "learning_rate": 1.3256452886524477e-05, "loss": 0.4629, "step": 28791 }, { "epoch": 0.7905546403075233, "grad_norm": 0.33964601159095764, "learning_rate": 1.325604453171937e-05, "loss": 0.4058, "step": 28792 }, { "epoch": 0.7905820977484899, "grad_norm": 0.3849335014820099, "learning_rate": 1.32556361708407e-05, "loss": 0.5544, "step": 28793 }, { "epoch": 0.7906095551894563, "grad_norm": 0.37691089510917664, "learning_rate": 1.3255227803889228e-05, "loss": 0.4062, "step": 28794 }, { "epoch": 0.7906370126304229, "grad_norm": 0.4426472783088684, "learning_rate": 1.325481943086572e-05, "loss": 0.5622, "step": 28795 }, { "epoch": 0.7906644700713893, "grad_norm": 0.4006551504135132, "learning_rate": 1.3254411051770935e-05, "loss": 0.5441, "step": 28796 }, { "epoch": 0.7906919275123558, "grad_norm": 0.42031607031822205, "learning_rate": 1.3254002666605632e-05, "loss": 0.4502, "step": 28797 }, { "epoch": 0.7907193849533224, "grad_norm": 0.3763772249221802, "learning_rate": 1.3253594275370579e-05, "loss": 0.5356, "step": 28798 }, { "epoch": 0.7907468423942888, "grad_norm": 0.4517800211906433, "learning_rate": 1.3253185878066533e-05, "loss": 0.5563, "step": 28799 }, { "epoch": 0.7907742998352554, "grad_norm": 0.383159875869751, "learning_rate": 1.3252777474694253e-05, "loss": 0.4516, "step": 28800 }, { "epoch": 0.7908017572762218, "grad_norm": 0.3485627770423889, "learning_rate": 1.325236906525451e-05, "loss": 0.4104, "step": 28801 }, { "epoch": 0.7908292147171884, "grad_norm": 0.40728214383125305, "learning_rate": 1.3251960649748059e-05, "loss": 0.4653, "step": 28802 }, { "epoch": 0.7908566721581548, "grad_norm": 0.393544465303421, "learning_rate": 1.3251552228175664e-05, "loss": 0.5353, "step": 28803 }, { "epoch": 0.7908841295991214, "grad_norm": 0.3621717691421509, "learning_rate": 1.3251143800538086e-05, "loss": 0.4772, "step": 28804 }, { "epoch": 0.7909115870400879, "grad_norm": 0.41948407888412476, "learning_rate": 1.3250735366836086e-05, "loss": 0.5562, "step": 28805 }, { "epoch": 0.7909390444810543, "grad_norm": 0.40144652128219604, "learning_rate": 1.3250326927070429e-05, "loss": 0.4751, "step": 28806 }, { "epoch": 0.7909665019220209, "grad_norm": 0.3527287542819977, "learning_rate": 1.3249918481241873e-05, "loss": 0.5219, "step": 28807 }, { "epoch": 0.7909939593629873, "grad_norm": 0.346693217754364, "learning_rate": 1.3249510029351182e-05, "loss": 0.4092, "step": 28808 }, { "epoch": 0.7910214168039539, "grad_norm": 0.3968033790588379, "learning_rate": 1.3249101571399118e-05, "loss": 0.4977, "step": 28809 }, { "epoch": 0.7910488742449203, "grad_norm": 0.4277237355709076, "learning_rate": 1.3248693107386444e-05, "loss": 0.5526, "step": 28810 }, { "epoch": 0.7910763316858869, "grad_norm": 0.42285820841789246, "learning_rate": 1.324828463731392e-05, "loss": 0.5252, "step": 28811 }, { "epoch": 0.7911037891268534, "grad_norm": 0.3829896152019501, "learning_rate": 1.3247876161182308e-05, "loss": 0.4626, "step": 28812 }, { "epoch": 0.7911312465678199, "grad_norm": 0.35425707697868347, "learning_rate": 1.3247467678992369e-05, "loss": 0.479, "step": 28813 }, { "epoch": 0.7911587040087864, "grad_norm": 0.4092322587966919, "learning_rate": 1.3247059190744869e-05, "loss": 0.5027, "step": 28814 }, { "epoch": 0.7911861614497528, "grad_norm": 0.4137939512729645, "learning_rate": 1.3246650696440563e-05, "loss": 0.5203, "step": 28815 }, { "epoch": 0.7912136188907194, "grad_norm": 0.5959417819976807, "learning_rate": 1.3246242196080221e-05, "loss": 0.4806, "step": 28816 }, { "epoch": 0.7912410763316858, "grad_norm": 0.38896676898002625, "learning_rate": 1.3245833689664602e-05, "loss": 0.4062, "step": 28817 }, { "epoch": 0.7912685337726524, "grad_norm": 0.4118989109992981, "learning_rate": 1.3245425177194465e-05, "loss": 0.4699, "step": 28818 }, { "epoch": 0.7912959912136189, "grad_norm": 0.351879358291626, "learning_rate": 1.3245016658670573e-05, "loss": 0.4054, "step": 28819 }, { "epoch": 0.7913234486545854, "grad_norm": 0.3627587854862213, "learning_rate": 1.324460813409369e-05, "loss": 0.4361, "step": 28820 }, { "epoch": 0.7913509060955519, "grad_norm": 0.38015952706336975, "learning_rate": 1.3244199603464581e-05, "loss": 0.5092, "step": 28821 }, { "epoch": 0.7913783635365184, "grad_norm": 0.4279283881187439, "learning_rate": 1.3243791066784e-05, "loss": 0.4967, "step": 28822 }, { "epoch": 0.7914058209774849, "grad_norm": 0.3671358823776245, "learning_rate": 1.3243382524052715e-05, "loss": 0.5393, "step": 28823 }, { "epoch": 0.7914332784184513, "grad_norm": 0.39224401116371155, "learning_rate": 1.3242973975271487e-05, "loss": 0.5066, "step": 28824 }, { "epoch": 0.7914607358594179, "grad_norm": 0.40928715467453003, "learning_rate": 1.3242565420441075e-05, "loss": 0.6175, "step": 28825 }, { "epoch": 0.7914881933003844, "grad_norm": 0.41364970803260803, "learning_rate": 1.3242156859562245e-05, "loss": 0.5509, "step": 28826 }, { "epoch": 0.7915156507413509, "grad_norm": 0.4603220522403717, "learning_rate": 1.3241748292635758e-05, "loss": 0.5254, "step": 28827 }, { "epoch": 0.7915431081823174, "grad_norm": 0.402834415435791, "learning_rate": 1.3241339719662377e-05, "loss": 0.5306, "step": 28828 }, { "epoch": 0.7915705656232839, "grad_norm": 0.37796106934547424, "learning_rate": 1.324093114064286e-05, "loss": 0.4772, "step": 28829 }, { "epoch": 0.7915980230642504, "grad_norm": 0.44534188508987427, "learning_rate": 1.3240522555577975e-05, "loss": 0.5066, "step": 28830 }, { "epoch": 0.7916254805052169, "grad_norm": 0.3886604905128479, "learning_rate": 1.324011396446848e-05, "loss": 0.4695, "step": 28831 }, { "epoch": 0.7916529379461834, "grad_norm": 0.3656606674194336, "learning_rate": 1.323970536731514e-05, "loss": 0.4217, "step": 28832 }, { "epoch": 0.79168039538715, "grad_norm": 0.414870947599411, "learning_rate": 1.3239296764118713e-05, "loss": 0.5656, "step": 28833 }, { "epoch": 0.7917078528281164, "grad_norm": 0.49546369910240173, "learning_rate": 1.3238888154879963e-05, "loss": 0.5589, "step": 28834 }, { "epoch": 0.791735310269083, "grad_norm": 0.3685397505760193, "learning_rate": 1.3238479539599654e-05, "loss": 0.4269, "step": 28835 }, { "epoch": 0.7917627677100494, "grad_norm": 0.39196398854255676, "learning_rate": 1.3238070918278548e-05, "loss": 0.6068, "step": 28836 }, { "epoch": 0.7917902251510159, "grad_norm": 0.3487417995929718, "learning_rate": 1.3237662290917406e-05, "loss": 0.4564, "step": 28837 }, { "epoch": 0.7918176825919824, "grad_norm": 0.4145239591598511, "learning_rate": 1.3237253657516988e-05, "loss": 0.5148, "step": 28838 }, { "epoch": 0.7918451400329489, "grad_norm": 0.36039966344833374, "learning_rate": 1.3236845018078061e-05, "loss": 0.5154, "step": 28839 }, { "epoch": 0.7918725974739155, "grad_norm": 0.3527248799800873, "learning_rate": 1.3236436372601385e-05, "loss": 0.534, "step": 28840 }, { "epoch": 0.7919000549148819, "grad_norm": 0.38374635577201843, "learning_rate": 1.3236027721087724e-05, "loss": 0.4675, "step": 28841 }, { "epoch": 0.7919275123558485, "grad_norm": 0.374239057302475, "learning_rate": 1.3235619063537835e-05, "loss": 0.547, "step": 28842 }, { "epoch": 0.7919549697968149, "grad_norm": 0.35949474573135376, "learning_rate": 1.3235210399952485e-05, "loss": 0.5014, "step": 28843 }, { "epoch": 0.7919824272377815, "grad_norm": 0.4086843430995941, "learning_rate": 1.3234801730332436e-05, "loss": 0.5773, "step": 28844 }, { "epoch": 0.7920098846787479, "grad_norm": 0.366864413022995, "learning_rate": 1.3234393054678447e-05, "loss": 0.4851, "step": 28845 }, { "epoch": 0.7920373421197144, "grad_norm": 0.37816229462623596, "learning_rate": 1.3233984372991284e-05, "loss": 0.4902, "step": 28846 }, { "epoch": 0.792064799560681, "grad_norm": 0.38824498653411865, "learning_rate": 1.3233575685271709e-05, "loss": 0.5251, "step": 28847 }, { "epoch": 0.7920922570016474, "grad_norm": 0.41435888409614563, "learning_rate": 1.3233166991520482e-05, "loss": 0.6081, "step": 28848 }, { "epoch": 0.792119714442614, "grad_norm": 0.39882898330688477, "learning_rate": 1.3232758291738367e-05, "loss": 0.4776, "step": 28849 }, { "epoch": 0.7921471718835804, "grad_norm": 0.3463805615901947, "learning_rate": 1.3232349585926128e-05, "loss": 0.4811, "step": 28850 }, { "epoch": 0.792174629324547, "grad_norm": 0.33547157049179077, "learning_rate": 1.323194087408452e-05, "loss": 0.3953, "step": 28851 }, { "epoch": 0.7922020867655134, "grad_norm": 0.4081627130508423, "learning_rate": 1.3231532156214315e-05, "loss": 0.4782, "step": 28852 }, { "epoch": 0.79222954420648, "grad_norm": 0.3658817708492279, "learning_rate": 1.3231123432316272e-05, "loss": 0.4957, "step": 28853 }, { "epoch": 0.7922570016474465, "grad_norm": 0.3836922347545624, "learning_rate": 1.3230714702391152e-05, "loss": 0.5295, "step": 28854 }, { "epoch": 0.7922844590884129, "grad_norm": 0.37357601523399353, "learning_rate": 1.3230305966439715e-05, "loss": 0.4502, "step": 28855 }, { "epoch": 0.7923119165293795, "grad_norm": 0.36162295937538147, "learning_rate": 1.3229897224462728e-05, "loss": 0.4605, "step": 28856 }, { "epoch": 0.7923393739703459, "grad_norm": 0.40765616297721863, "learning_rate": 1.3229488476460952e-05, "loss": 0.497, "step": 28857 }, { "epoch": 0.7923668314113125, "grad_norm": 0.3485613167285919, "learning_rate": 1.322907972243515e-05, "loss": 0.5149, "step": 28858 }, { "epoch": 0.7923942888522789, "grad_norm": 0.42321762442588806, "learning_rate": 1.3228670962386084e-05, "loss": 0.4979, "step": 28859 }, { "epoch": 0.7924217462932455, "grad_norm": 0.3583534061908722, "learning_rate": 1.3228262196314515e-05, "loss": 0.4873, "step": 28860 }, { "epoch": 0.792449203734212, "grad_norm": 0.42862364649772644, "learning_rate": 1.3227853424221206e-05, "loss": 0.4781, "step": 28861 }, { "epoch": 0.7924766611751785, "grad_norm": 0.4304666817188263, "learning_rate": 1.3227444646106922e-05, "loss": 0.5133, "step": 28862 }, { "epoch": 0.792504118616145, "grad_norm": 0.38164833188056946, "learning_rate": 1.3227035861972421e-05, "loss": 0.489, "step": 28863 }, { "epoch": 0.7925315760571114, "grad_norm": 0.33918261528015137, "learning_rate": 1.322662707181847e-05, "loss": 0.4562, "step": 28864 }, { "epoch": 0.792559033498078, "grad_norm": 0.38227182626724243, "learning_rate": 1.322621827564583e-05, "loss": 0.4261, "step": 28865 }, { "epoch": 0.7925864909390444, "grad_norm": 0.39893192052841187, "learning_rate": 1.3225809473455264e-05, "loss": 0.542, "step": 28866 }, { "epoch": 0.792613948380011, "grad_norm": 0.4717659056186676, "learning_rate": 1.3225400665247532e-05, "loss": 0.506, "step": 28867 }, { "epoch": 0.7926414058209775, "grad_norm": 0.37798482179641724, "learning_rate": 1.3224991851023399e-05, "loss": 0.4592, "step": 28868 }, { "epoch": 0.792668863261944, "grad_norm": 0.36725515127182007, "learning_rate": 1.3224583030783626e-05, "loss": 0.4341, "step": 28869 }, { "epoch": 0.7926963207029105, "grad_norm": 0.4269859492778778, "learning_rate": 1.3224174204528978e-05, "loss": 0.48, "step": 28870 }, { "epoch": 0.792723778143877, "grad_norm": 0.4400424361228943, "learning_rate": 1.3223765372260216e-05, "loss": 0.4848, "step": 28871 }, { "epoch": 0.7927512355848435, "grad_norm": 0.3910837173461914, "learning_rate": 1.32233565339781e-05, "loss": 0.4968, "step": 28872 }, { "epoch": 0.7927786930258099, "grad_norm": 0.39259791374206543, "learning_rate": 1.3222947689683398e-05, "loss": 0.5624, "step": 28873 }, { "epoch": 0.7928061504667765, "grad_norm": 0.35314613580703735, "learning_rate": 1.322253883937687e-05, "loss": 0.3769, "step": 28874 }, { "epoch": 0.792833607907743, "grad_norm": 0.4707760810852051, "learning_rate": 1.3222129983059277e-05, "loss": 0.5189, "step": 28875 }, { "epoch": 0.7928610653487095, "grad_norm": 0.456609845161438, "learning_rate": 1.3221721120731385e-05, "loss": 0.5191, "step": 28876 }, { "epoch": 0.792888522789676, "grad_norm": 0.44897323846817017, "learning_rate": 1.3221312252393954e-05, "loss": 0.5007, "step": 28877 }, { "epoch": 0.7929159802306425, "grad_norm": 0.3793288767337799, "learning_rate": 1.3220903378047747e-05, "loss": 0.4678, "step": 28878 }, { "epoch": 0.792943437671609, "grad_norm": 0.37855416536331177, "learning_rate": 1.3220494497693527e-05, "loss": 0.4795, "step": 28879 }, { "epoch": 0.7929708951125755, "grad_norm": 0.3419977128505707, "learning_rate": 1.3220085611332058e-05, "loss": 0.4603, "step": 28880 }, { "epoch": 0.792998352553542, "grad_norm": 0.37966781854629517, "learning_rate": 1.3219676718964103e-05, "loss": 0.4587, "step": 28881 }, { "epoch": 0.7930258099945086, "grad_norm": 0.4326576888561249, "learning_rate": 1.321926782059042e-05, "loss": 0.4851, "step": 28882 }, { "epoch": 0.793053267435475, "grad_norm": 0.39535120129585266, "learning_rate": 1.3218858916211776e-05, "loss": 0.4479, "step": 28883 }, { "epoch": 0.7930807248764415, "grad_norm": 0.39760252833366394, "learning_rate": 1.3218450005828936e-05, "loss": 0.4515, "step": 28884 }, { "epoch": 0.793108182317408, "grad_norm": 0.47074970602989197, "learning_rate": 1.3218041089442654e-05, "loss": 0.5293, "step": 28885 }, { "epoch": 0.7931356397583745, "grad_norm": 0.37029096484184265, "learning_rate": 1.3217632167053703e-05, "loss": 0.4433, "step": 28886 }, { "epoch": 0.793163097199341, "grad_norm": 0.3818627893924713, "learning_rate": 1.3217223238662838e-05, "loss": 0.4652, "step": 28887 }, { "epoch": 0.7931905546403075, "grad_norm": 0.3991394639015198, "learning_rate": 1.3216814304270827e-05, "loss": 0.4352, "step": 28888 }, { "epoch": 0.7932180120812741, "grad_norm": 0.3925267159938812, "learning_rate": 1.3216405363878429e-05, "loss": 0.4879, "step": 28889 }, { "epoch": 0.7932454695222405, "grad_norm": 0.41844043135643005, "learning_rate": 1.3215996417486408e-05, "loss": 0.4964, "step": 28890 }, { "epoch": 0.7932729269632071, "grad_norm": 0.384756475687027, "learning_rate": 1.3215587465095529e-05, "loss": 0.6204, "step": 28891 }, { "epoch": 0.7933003844041735, "grad_norm": 0.4123842716217041, "learning_rate": 1.3215178506706552e-05, "loss": 0.4751, "step": 28892 }, { "epoch": 0.79332784184514, "grad_norm": 0.39646777510643005, "learning_rate": 1.321476954232024e-05, "loss": 0.5301, "step": 28893 }, { "epoch": 0.7933552992861065, "grad_norm": 0.390313982963562, "learning_rate": 1.3214360571937358e-05, "loss": 0.5025, "step": 28894 }, { "epoch": 0.793382756727073, "grad_norm": 0.4119836091995239, "learning_rate": 1.3213951595558667e-05, "loss": 0.5073, "step": 28895 }, { "epoch": 0.7934102141680396, "grad_norm": 0.3242810070514679, "learning_rate": 1.3213542613184933e-05, "loss": 0.4325, "step": 28896 }, { "epoch": 0.793437671609006, "grad_norm": 0.38877540826797485, "learning_rate": 1.3213133624816916e-05, "loss": 0.4542, "step": 28897 }, { "epoch": 0.7934651290499726, "grad_norm": 0.4285781681537628, "learning_rate": 1.3212724630455376e-05, "loss": 0.4436, "step": 28898 }, { "epoch": 0.793492586490939, "grad_norm": 0.3953193724155426, "learning_rate": 1.3212315630101082e-05, "loss": 0.4951, "step": 28899 }, { "epoch": 0.7935200439319056, "grad_norm": 0.4985332190990448, "learning_rate": 1.321190662375479e-05, "loss": 0.4928, "step": 28900 }, { "epoch": 0.793547501372872, "grad_norm": 0.43795469403266907, "learning_rate": 1.3211497611417273e-05, "loss": 0.4747, "step": 28901 }, { "epoch": 0.7935749588138385, "grad_norm": 0.4009590446949005, "learning_rate": 1.3211088593089287e-05, "loss": 0.4691, "step": 28902 }, { "epoch": 0.7936024162548051, "grad_norm": 0.38843539357185364, "learning_rate": 1.3210679568771592e-05, "loss": 0.5055, "step": 28903 }, { "epoch": 0.7936298736957715, "grad_norm": 0.4087790846824646, "learning_rate": 1.3210270538464958e-05, "loss": 0.5421, "step": 28904 }, { "epoch": 0.7936573311367381, "grad_norm": 0.38917821645736694, "learning_rate": 1.3209861502170142e-05, "loss": 0.4981, "step": 28905 }, { "epoch": 0.7936847885777045, "grad_norm": 0.3837707042694092, "learning_rate": 1.3209452459887912e-05, "loss": 0.4955, "step": 28906 }, { "epoch": 0.7937122460186711, "grad_norm": 0.4079829156398773, "learning_rate": 1.320904341161903e-05, "loss": 0.5346, "step": 28907 }, { "epoch": 0.7937397034596375, "grad_norm": 0.35063624382019043, "learning_rate": 1.3208634357364255e-05, "loss": 0.4962, "step": 28908 }, { "epoch": 0.7937671609006041, "grad_norm": 0.39185482263565063, "learning_rate": 1.3208225297124355e-05, "loss": 0.5217, "step": 28909 }, { "epoch": 0.7937946183415706, "grad_norm": 0.39969444274902344, "learning_rate": 1.320781623090009e-05, "loss": 0.5255, "step": 28910 }, { "epoch": 0.793822075782537, "grad_norm": 0.37885648012161255, "learning_rate": 1.3207407158692223e-05, "loss": 0.4382, "step": 28911 }, { "epoch": 0.7938495332235036, "grad_norm": 0.5401685833930969, "learning_rate": 1.320699808050152e-05, "loss": 0.5728, "step": 28912 }, { "epoch": 0.79387699066447, "grad_norm": 0.3693428039550781, "learning_rate": 1.320658899632874e-05, "loss": 0.4898, "step": 28913 }, { "epoch": 0.7939044481054366, "grad_norm": 0.37648046016693115, "learning_rate": 1.320617990617465e-05, "loss": 0.4873, "step": 28914 }, { "epoch": 0.793931905546403, "grad_norm": 0.3708253502845764, "learning_rate": 1.3205770810040011e-05, "loss": 0.4507, "step": 28915 }, { "epoch": 0.7939593629873696, "grad_norm": 0.4348164200782776, "learning_rate": 1.3205361707925584e-05, "loss": 0.4804, "step": 28916 }, { "epoch": 0.7939868204283361, "grad_norm": 0.3872295022010803, "learning_rate": 1.3204952599832135e-05, "loss": 0.496, "step": 28917 }, { "epoch": 0.7940142778693026, "grad_norm": 0.3441913425922394, "learning_rate": 1.3204543485760427e-05, "loss": 0.428, "step": 28918 }, { "epoch": 0.7940417353102691, "grad_norm": 0.4735545516014099, "learning_rate": 1.3204134365711223e-05, "loss": 0.5471, "step": 28919 }, { "epoch": 0.7940691927512356, "grad_norm": 0.41436660289764404, "learning_rate": 1.3203725239685288e-05, "loss": 0.5609, "step": 28920 }, { "epoch": 0.7940966501922021, "grad_norm": 0.3847878873348236, "learning_rate": 1.3203316107683376e-05, "loss": 0.4504, "step": 28921 }, { "epoch": 0.7941241076331685, "grad_norm": 0.39955657720565796, "learning_rate": 1.3202906969706266e-05, "loss": 0.4405, "step": 28922 }, { "epoch": 0.7941515650741351, "grad_norm": 0.3667219877243042, "learning_rate": 1.3202497825754705e-05, "loss": 0.3947, "step": 28923 }, { "epoch": 0.7941790225151016, "grad_norm": 0.33845439553260803, "learning_rate": 1.3202088675829468e-05, "loss": 0.4869, "step": 28924 }, { "epoch": 0.7942064799560681, "grad_norm": 0.39948487281799316, "learning_rate": 1.3201679519931312e-05, "loss": 0.494, "step": 28925 }, { "epoch": 0.7942339373970346, "grad_norm": 0.3847973048686981, "learning_rate": 1.3201270358061003e-05, "loss": 0.565, "step": 28926 }, { "epoch": 0.7942613948380011, "grad_norm": 0.45412498712539673, "learning_rate": 1.3200861190219301e-05, "loss": 0.5211, "step": 28927 }, { "epoch": 0.7942888522789676, "grad_norm": 0.3907316327095032, "learning_rate": 1.3200452016406973e-05, "loss": 0.4964, "step": 28928 }, { "epoch": 0.794316309719934, "grad_norm": 0.38353538513183594, "learning_rate": 1.3200042836624778e-05, "loss": 0.4768, "step": 28929 }, { "epoch": 0.7943437671609006, "grad_norm": 0.5252509117126465, "learning_rate": 1.3199633650873485e-05, "loss": 0.517, "step": 28930 }, { "epoch": 0.7943712246018672, "grad_norm": 0.40649229288101196, "learning_rate": 1.3199224459153851e-05, "loss": 0.5259, "step": 28931 }, { "epoch": 0.7943986820428336, "grad_norm": 0.42858976125717163, "learning_rate": 1.3198815261466644e-05, "loss": 0.4701, "step": 28932 }, { "epoch": 0.7944261394838001, "grad_norm": 0.35216280817985535, "learning_rate": 1.3198406057812626e-05, "loss": 0.4922, "step": 28933 }, { "epoch": 0.7944535969247666, "grad_norm": 0.3455829918384552, "learning_rate": 1.3197996848192558e-05, "loss": 0.4383, "step": 28934 }, { "epoch": 0.7944810543657331, "grad_norm": 0.36359497904777527, "learning_rate": 1.3197587632607207e-05, "loss": 0.4838, "step": 28935 }, { "epoch": 0.7945085118066996, "grad_norm": 0.3743777573108673, "learning_rate": 1.3197178411057335e-05, "loss": 0.5327, "step": 28936 }, { "epoch": 0.7945359692476661, "grad_norm": 0.3947320878505707, "learning_rate": 1.3196769183543702e-05, "loss": 0.3855, "step": 28937 }, { "epoch": 0.7945634266886327, "grad_norm": 0.3827601969242096, "learning_rate": 1.319635995006708e-05, "loss": 0.453, "step": 28938 }, { "epoch": 0.7945908841295991, "grad_norm": 0.35645008087158203, "learning_rate": 1.319595071062822e-05, "loss": 0.5089, "step": 28939 }, { "epoch": 0.7946183415705657, "grad_norm": 0.43448230624198914, "learning_rate": 1.3195541465227894e-05, "loss": 0.5048, "step": 28940 }, { "epoch": 0.7946457990115321, "grad_norm": 0.39545026421546936, "learning_rate": 1.3195132213866865e-05, "loss": 0.4999, "step": 28941 }, { "epoch": 0.7946732564524986, "grad_norm": 0.40067028999328613, "learning_rate": 1.3194722956545894e-05, "loss": 0.5783, "step": 28942 }, { "epoch": 0.7947007138934651, "grad_norm": 0.34980469942092896, "learning_rate": 1.3194313693265743e-05, "loss": 0.4871, "step": 28943 }, { "epoch": 0.7947281713344316, "grad_norm": 0.3777106702327728, "learning_rate": 1.319390442402718e-05, "loss": 0.4947, "step": 28944 }, { "epoch": 0.7947556287753982, "grad_norm": 0.37771689891815186, "learning_rate": 1.3193495148830964e-05, "loss": 0.4435, "step": 28945 }, { "epoch": 0.7947830862163646, "grad_norm": 0.41709640622138977, "learning_rate": 1.3193085867677862e-05, "loss": 0.4871, "step": 28946 }, { "epoch": 0.7948105436573312, "grad_norm": 0.6367322206497192, "learning_rate": 1.3192676580568633e-05, "loss": 0.5517, "step": 28947 }, { "epoch": 0.7948380010982976, "grad_norm": 0.3614189028739929, "learning_rate": 1.3192267287504045e-05, "loss": 0.4476, "step": 28948 }, { "epoch": 0.7948654585392642, "grad_norm": 0.39594003558158875, "learning_rate": 1.319185798848486e-05, "loss": 0.5804, "step": 28949 }, { "epoch": 0.7948929159802306, "grad_norm": 0.3705328404903412, "learning_rate": 1.3191448683511841e-05, "loss": 0.4203, "step": 28950 }, { "epoch": 0.7949203734211971, "grad_norm": 0.4032622277736664, "learning_rate": 1.3191039372585753e-05, "loss": 0.5207, "step": 28951 }, { "epoch": 0.7949478308621637, "grad_norm": 0.4157354533672333, "learning_rate": 1.3190630055707354e-05, "loss": 0.4702, "step": 28952 }, { "epoch": 0.7949752883031301, "grad_norm": 0.39681676030158997, "learning_rate": 1.3190220732877417e-05, "loss": 0.5039, "step": 28953 }, { "epoch": 0.7950027457440967, "grad_norm": 0.3669542372226715, "learning_rate": 1.3189811404096695e-05, "loss": 0.567, "step": 28954 }, { "epoch": 0.7950302031850631, "grad_norm": 0.3854005038738251, "learning_rate": 1.3189402069365959e-05, "loss": 0.4781, "step": 28955 }, { "epoch": 0.7950576606260297, "grad_norm": 0.46714159846305847, "learning_rate": 1.3188992728685971e-05, "loss": 0.4627, "step": 28956 }, { "epoch": 0.7950851180669961, "grad_norm": 0.41735193133354187, "learning_rate": 1.3188583382057492e-05, "loss": 0.5532, "step": 28957 }, { "epoch": 0.7951125755079627, "grad_norm": 0.42548415064811707, "learning_rate": 1.3188174029481289e-05, "loss": 0.483, "step": 28958 }, { "epoch": 0.7951400329489292, "grad_norm": 0.4392024576663971, "learning_rate": 1.3187764670958126e-05, "loss": 0.5085, "step": 28959 }, { "epoch": 0.7951674903898956, "grad_norm": 0.4262329339981079, "learning_rate": 1.3187355306488759e-05, "loss": 0.5612, "step": 28960 }, { "epoch": 0.7951949478308622, "grad_norm": 0.35186007618904114, "learning_rate": 1.3186945936073961e-05, "loss": 0.4993, "step": 28961 }, { "epoch": 0.7952224052718286, "grad_norm": 0.6515346169471741, "learning_rate": 1.3186536559714488e-05, "loss": 0.4742, "step": 28962 }, { "epoch": 0.7952498627127952, "grad_norm": 0.5186629891395569, "learning_rate": 1.318612717741111e-05, "loss": 0.5616, "step": 28963 }, { "epoch": 0.7952773201537616, "grad_norm": 0.4028205871582031, "learning_rate": 1.318571778916459e-05, "loss": 0.5053, "step": 28964 }, { "epoch": 0.7953047775947282, "grad_norm": 0.3469853401184082, "learning_rate": 1.3185308394975684e-05, "loss": 0.4505, "step": 28965 }, { "epoch": 0.7953322350356947, "grad_norm": 0.4933312237262726, "learning_rate": 1.3184898994845166e-05, "loss": 0.4927, "step": 28966 }, { "epoch": 0.7953596924766612, "grad_norm": 0.36995700001716614, "learning_rate": 1.3184489588773793e-05, "loss": 0.4944, "step": 28967 }, { "epoch": 0.7953871499176277, "grad_norm": 0.47748467326164246, "learning_rate": 1.318408017676233e-05, "loss": 0.4968, "step": 28968 }, { "epoch": 0.7954146073585942, "grad_norm": 0.40114104747772217, "learning_rate": 1.3183670758811542e-05, "loss": 0.5484, "step": 28969 }, { "epoch": 0.7954420647995607, "grad_norm": 0.3817068934440613, "learning_rate": 1.318326133492219e-05, "loss": 0.5265, "step": 28970 }, { "epoch": 0.7954695222405271, "grad_norm": 0.3893270790576935, "learning_rate": 1.3182851905095045e-05, "loss": 0.5267, "step": 28971 }, { "epoch": 0.7954969796814937, "grad_norm": 0.3444291651248932, "learning_rate": 1.318244246933086e-05, "loss": 0.4493, "step": 28972 }, { "epoch": 0.7955244371224602, "grad_norm": 0.3642902374267578, "learning_rate": 1.3182033027630405e-05, "loss": 0.5724, "step": 28973 }, { "epoch": 0.7955518945634267, "grad_norm": 0.3791426718235016, "learning_rate": 1.3181623579994444e-05, "loss": 0.3808, "step": 28974 }, { "epoch": 0.7955793520043932, "grad_norm": 0.38107529282569885, "learning_rate": 1.3181214126423738e-05, "loss": 0.4558, "step": 28975 }, { "epoch": 0.7956068094453597, "grad_norm": 0.3459852635860443, "learning_rate": 1.3180804666919055e-05, "loss": 0.435, "step": 28976 }, { "epoch": 0.7956342668863262, "grad_norm": 0.45314517617225647, "learning_rate": 1.3180395201481155e-05, "loss": 0.5414, "step": 28977 }, { "epoch": 0.7956617243272927, "grad_norm": 0.39795181155204773, "learning_rate": 1.3179985730110803e-05, "loss": 0.4731, "step": 28978 }, { "epoch": 0.7956891817682592, "grad_norm": 0.3769240379333496, "learning_rate": 1.3179576252808763e-05, "loss": 0.5555, "step": 28979 }, { "epoch": 0.7957166392092258, "grad_norm": 0.37245845794677734, "learning_rate": 1.3179166769575797e-05, "loss": 0.4356, "step": 28980 }, { "epoch": 0.7957440966501922, "grad_norm": 0.7072638273239136, "learning_rate": 1.317875728041267e-05, "loss": 0.4678, "step": 28981 }, { "epoch": 0.7957715540911587, "grad_norm": 0.525495171546936, "learning_rate": 1.3178347785320149e-05, "loss": 0.4787, "step": 28982 }, { "epoch": 0.7957990115321252, "grad_norm": 0.5341266989707947, "learning_rate": 1.3177938284298992e-05, "loss": 0.4895, "step": 28983 }, { "epoch": 0.7958264689730917, "grad_norm": 0.36437392234802246, "learning_rate": 1.3177528777349968e-05, "loss": 0.494, "step": 28984 }, { "epoch": 0.7958539264140582, "grad_norm": 0.3832783102989197, "learning_rate": 1.3177119264473837e-05, "loss": 0.4945, "step": 28985 }, { "epoch": 0.7958813838550247, "grad_norm": 0.47551196813583374, "learning_rate": 1.3176709745671366e-05, "loss": 0.4647, "step": 28986 }, { "epoch": 0.7959088412959913, "grad_norm": 0.4040127992630005, "learning_rate": 1.3176300220943316e-05, "loss": 0.4945, "step": 28987 }, { "epoch": 0.7959362987369577, "grad_norm": 0.44782355427742004, "learning_rate": 1.3175890690290452e-05, "loss": 0.5005, "step": 28988 }, { "epoch": 0.7959637561779243, "grad_norm": 0.3713240921497345, "learning_rate": 1.3175481153713541e-05, "loss": 0.4582, "step": 28989 }, { "epoch": 0.7959912136188907, "grad_norm": 0.5358617901802063, "learning_rate": 1.3175071611213343e-05, "loss": 0.468, "step": 28990 }, { "epoch": 0.7960186710598572, "grad_norm": 0.3746187090873718, "learning_rate": 1.3174662062790622e-05, "loss": 0.5507, "step": 28991 }, { "epoch": 0.7960461285008237, "grad_norm": 0.4287177622318268, "learning_rate": 1.3174252508446144e-05, "loss": 0.518, "step": 28992 }, { "epoch": 0.7960735859417902, "grad_norm": 0.5034673810005188, "learning_rate": 1.317384294818067e-05, "loss": 0.4232, "step": 28993 }, { "epoch": 0.7961010433827568, "grad_norm": 0.4008547067642212, "learning_rate": 1.3173433381994966e-05, "loss": 0.5677, "step": 28994 }, { "epoch": 0.7961285008237232, "grad_norm": 0.3953173756599426, "learning_rate": 1.31730238098898e-05, "loss": 0.595, "step": 28995 }, { "epoch": 0.7961559582646898, "grad_norm": 0.42454051971435547, "learning_rate": 1.3172614231865928e-05, "loss": 0.501, "step": 28996 }, { "epoch": 0.7961834157056562, "grad_norm": 0.3601769804954529, "learning_rate": 1.3172204647924121e-05, "loss": 0.4904, "step": 28997 }, { "epoch": 0.7962108731466228, "grad_norm": 0.40780165791511536, "learning_rate": 1.3171795058065136e-05, "loss": 0.5028, "step": 28998 }, { "epoch": 0.7962383305875892, "grad_norm": 0.39404717087745667, "learning_rate": 1.3171385462289743e-05, "loss": 0.4742, "step": 28999 }, { "epoch": 0.7962657880285557, "grad_norm": 0.37312766909599304, "learning_rate": 1.3170975860598705e-05, "loss": 0.4567, "step": 29000 }, { "epoch": 0.7962932454695223, "grad_norm": 0.5204466581344604, "learning_rate": 1.3170566252992782e-05, "loss": 0.5156, "step": 29001 }, { "epoch": 0.7963207029104887, "grad_norm": 0.415997177362442, "learning_rate": 1.3170156639472744e-05, "loss": 0.5595, "step": 29002 }, { "epoch": 0.7963481603514553, "grad_norm": 0.42201319336891174, "learning_rate": 1.3169747020039352e-05, "loss": 0.6014, "step": 29003 }, { "epoch": 0.7963756177924217, "grad_norm": 0.3771829307079315, "learning_rate": 1.3169337394693366e-05, "loss": 0.4484, "step": 29004 }, { "epoch": 0.7964030752333883, "grad_norm": 0.3933040201663971, "learning_rate": 1.316892776343556e-05, "loss": 0.5345, "step": 29005 }, { "epoch": 0.7964305326743547, "grad_norm": 0.42519697546958923, "learning_rate": 1.3168518126266687e-05, "loss": 0.513, "step": 29006 }, { "epoch": 0.7964579901153213, "grad_norm": 0.4067384898662567, "learning_rate": 1.3168108483187521e-05, "loss": 0.5442, "step": 29007 }, { "epoch": 0.7964854475562878, "grad_norm": 0.37784504890441895, "learning_rate": 1.3167698834198818e-05, "loss": 0.5166, "step": 29008 }, { "epoch": 0.7965129049972542, "grad_norm": 0.3664315640926361, "learning_rate": 1.3167289179301345e-05, "loss": 0.4835, "step": 29009 }, { "epoch": 0.7965403624382208, "grad_norm": 0.46559232473373413, "learning_rate": 1.3166879518495872e-05, "loss": 0.4406, "step": 29010 }, { "epoch": 0.7965678198791872, "grad_norm": 0.3529190421104431, "learning_rate": 1.3166469851783152e-05, "loss": 0.4549, "step": 29011 }, { "epoch": 0.7965952773201538, "grad_norm": 0.3821374475955963, "learning_rate": 1.316606017916396e-05, "loss": 0.5085, "step": 29012 }, { "epoch": 0.7966227347611202, "grad_norm": 0.3859216272830963, "learning_rate": 1.3165650500639054e-05, "loss": 0.4978, "step": 29013 }, { "epoch": 0.7966501922020868, "grad_norm": 0.5575281977653503, "learning_rate": 1.3165240816209196e-05, "loss": 0.5499, "step": 29014 }, { "epoch": 0.7966776496430533, "grad_norm": 0.4493682086467743, "learning_rate": 1.3164831125875157e-05, "loss": 0.4845, "step": 29015 }, { "epoch": 0.7967051070840198, "grad_norm": 0.3335815370082855, "learning_rate": 1.3164421429637697e-05, "loss": 0.4792, "step": 29016 }, { "epoch": 0.7967325645249863, "grad_norm": 0.3945569396018982, "learning_rate": 1.3164011727497584e-05, "loss": 0.5052, "step": 29017 }, { "epoch": 0.7967600219659527, "grad_norm": 0.3899145722389221, "learning_rate": 1.3163602019455577e-05, "loss": 0.5455, "step": 29018 }, { "epoch": 0.7967874794069193, "grad_norm": 0.5879490375518799, "learning_rate": 1.316319230551244e-05, "loss": 0.5277, "step": 29019 }, { "epoch": 0.7968149368478857, "grad_norm": 0.41111961007118225, "learning_rate": 1.3162782585668944e-05, "loss": 0.4302, "step": 29020 }, { "epoch": 0.7968423942888523, "grad_norm": 0.4166162610054016, "learning_rate": 1.3162372859925845e-05, "loss": 0.5109, "step": 29021 }, { "epoch": 0.7968698517298188, "grad_norm": 0.4510578513145447, "learning_rate": 1.3161963128283911e-05, "loss": 0.53, "step": 29022 }, { "epoch": 0.7968973091707853, "grad_norm": 0.3990943729877472, "learning_rate": 1.316155339074391e-05, "loss": 0.389, "step": 29023 }, { "epoch": 0.7969247666117518, "grad_norm": 0.4815865457057953, "learning_rate": 1.3161143647306603e-05, "loss": 0.4574, "step": 29024 }, { "epoch": 0.7969522240527183, "grad_norm": 0.36859461665153503, "learning_rate": 1.3160733897972753e-05, "loss": 0.5121, "step": 29025 }, { "epoch": 0.7969796814936848, "grad_norm": 0.6260314583778381, "learning_rate": 1.3160324142743125e-05, "loss": 0.4624, "step": 29026 }, { "epoch": 0.7970071389346512, "grad_norm": 0.40281176567077637, "learning_rate": 1.3159914381618484e-05, "loss": 0.4671, "step": 29027 }, { "epoch": 0.7970345963756178, "grad_norm": 0.469783753156662, "learning_rate": 1.3159504614599593e-05, "loss": 0.5148, "step": 29028 }, { "epoch": 0.7970620538165843, "grad_norm": 0.3544091284275055, "learning_rate": 1.3159094841687218e-05, "loss": 0.4611, "step": 29029 }, { "epoch": 0.7970895112575508, "grad_norm": 0.3897426128387451, "learning_rate": 1.3158685062882126e-05, "loss": 0.523, "step": 29030 }, { "epoch": 0.7971169686985173, "grad_norm": 0.4406341016292572, "learning_rate": 1.3158275278185073e-05, "loss": 0.4712, "step": 29031 }, { "epoch": 0.7971444261394838, "grad_norm": 0.4181170165538788, "learning_rate": 1.3157865487596833e-05, "loss": 0.5434, "step": 29032 }, { "epoch": 0.7971718835804503, "grad_norm": 0.39553216099739075, "learning_rate": 1.3157455691118164e-05, "loss": 0.53, "step": 29033 }, { "epoch": 0.7971993410214168, "grad_norm": 0.48474088311195374, "learning_rate": 1.3157045888749833e-05, "loss": 0.5429, "step": 29034 }, { "epoch": 0.7972267984623833, "grad_norm": 0.37144753336906433, "learning_rate": 1.3156636080492605e-05, "loss": 0.4854, "step": 29035 }, { "epoch": 0.7972542559033499, "grad_norm": 0.3819325268268585, "learning_rate": 1.3156226266347241e-05, "loss": 0.4149, "step": 29036 }, { "epoch": 0.7972817133443163, "grad_norm": 0.3960030674934387, "learning_rate": 1.3155816446314508e-05, "loss": 0.4779, "step": 29037 }, { "epoch": 0.7973091707852829, "grad_norm": 0.3991811275482178, "learning_rate": 1.315540662039517e-05, "loss": 0.4801, "step": 29038 }, { "epoch": 0.7973366282262493, "grad_norm": 0.37946075201034546, "learning_rate": 1.3154996788589992e-05, "loss": 0.5505, "step": 29039 }, { "epoch": 0.7973640856672158, "grad_norm": 0.37354370951652527, "learning_rate": 1.3154586950899737e-05, "loss": 0.5421, "step": 29040 }, { "epoch": 0.7973915431081823, "grad_norm": 0.38070148229599, "learning_rate": 1.3154177107325174e-05, "loss": 0.6137, "step": 29041 }, { "epoch": 0.7974190005491488, "grad_norm": 0.35067659616470337, "learning_rate": 1.3153767257867062e-05, "loss": 0.4519, "step": 29042 }, { "epoch": 0.7974464579901154, "grad_norm": 0.3871609568595886, "learning_rate": 1.3153357402526166e-05, "loss": 0.5573, "step": 29043 }, { "epoch": 0.7974739154310818, "grad_norm": 0.4399052560329437, "learning_rate": 1.3152947541303253e-05, "loss": 0.5264, "step": 29044 }, { "epoch": 0.7975013728720484, "grad_norm": 0.42424777150154114, "learning_rate": 1.3152537674199086e-05, "loss": 0.5176, "step": 29045 }, { "epoch": 0.7975288303130148, "grad_norm": 0.3781950771808624, "learning_rate": 1.315212780121443e-05, "loss": 0.5501, "step": 29046 }, { "epoch": 0.7975562877539814, "grad_norm": 0.38911280035972595, "learning_rate": 1.3151717922350053e-05, "loss": 0.5771, "step": 29047 }, { "epoch": 0.7975837451949478, "grad_norm": 0.4925481975078583, "learning_rate": 1.3151308037606714e-05, "loss": 0.5295, "step": 29048 }, { "epoch": 0.7976112026359143, "grad_norm": 0.45661115646362305, "learning_rate": 1.3150898146985181e-05, "loss": 0.5157, "step": 29049 }, { "epoch": 0.7976386600768809, "grad_norm": 0.4560356140136719, "learning_rate": 1.3150488250486212e-05, "loss": 0.5802, "step": 29050 }, { "epoch": 0.7976661175178473, "grad_norm": 0.3228946924209595, "learning_rate": 1.3150078348110581e-05, "loss": 0.4096, "step": 29051 }, { "epoch": 0.7976935749588139, "grad_norm": 3.9901256561279297, "learning_rate": 1.3149668439859049e-05, "loss": 0.6683, "step": 29052 }, { "epoch": 0.7977210323997803, "grad_norm": 0.3776914179325104, "learning_rate": 1.3149258525732378e-05, "loss": 0.4878, "step": 29053 }, { "epoch": 0.7977484898407469, "grad_norm": 0.4084141254425049, "learning_rate": 1.3148848605731336e-05, "loss": 0.478, "step": 29054 }, { "epoch": 0.7977759472817133, "grad_norm": 0.37120872735977173, "learning_rate": 1.3148438679856685e-05, "loss": 0.3956, "step": 29055 }, { "epoch": 0.7978034047226799, "grad_norm": 0.4057466685771942, "learning_rate": 1.3148028748109194e-05, "loss": 0.5313, "step": 29056 }, { "epoch": 0.7978308621636463, "grad_norm": 0.36052778363227844, "learning_rate": 1.3147618810489624e-05, "loss": 0.4306, "step": 29057 }, { "epoch": 0.7978583196046128, "grad_norm": 0.3783266842365265, "learning_rate": 1.3147208866998737e-05, "loss": 0.4892, "step": 29058 }, { "epoch": 0.7978857770455794, "grad_norm": 0.45477592945098877, "learning_rate": 1.3146798917637304e-05, "loss": 0.4776, "step": 29059 }, { "epoch": 0.7979132344865458, "grad_norm": 0.36877313256263733, "learning_rate": 1.3146388962406084e-05, "loss": 0.4209, "step": 29060 }, { "epoch": 0.7979406919275124, "grad_norm": 0.43184465169906616, "learning_rate": 1.3145979001305849e-05, "loss": 0.5766, "step": 29061 }, { "epoch": 0.7979681493684788, "grad_norm": 0.6014851331710815, "learning_rate": 1.3145569034337356e-05, "loss": 0.4418, "step": 29062 }, { "epoch": 0.7979956068094454, "grad_norm": 0.3952651023864746, "learning_rate": 1.3145159061501374e-05, "loss": 0.5244, "step": 29063 }, { "epoch": 0.7980230642504118, "grad_norm": 0.3939405381679535, "learning_rate": 1.3144749082798665e-05, "loss": 0.5022, "step": 29064 }, { "epoch": 0.7980505216913784, "grad_norm": 0.4110875427722931, "learning_rate": 1.3144339098229996e-05, "loss": 0.4944, "step": 29065 }, { "epoch": 0.7980779791323449, "grad_norm": 0.36536529660224915, "learning_rate": 1.3143929107796129e-05, "loss": 0.54, "step": 29066 }, { "epoch": 0.7981054365733113, "grad_norm": 0.4180467426776886, "learning_rate": 1.3143519111497836e-05, "loss": 0.4755, "step": 29067 }, { "epoch": 0.7981328940142779, "grad_norm": 0.3996206820011139, "learning_rate": 1.3143109109335873e-05, "loss": 0.5248, "step": 29068 }, { "epoch": 0.7981603514552443, "grad_norm": 0.45701864361763, "learning_rate": 1.3142699101311008e-05, "loss": 0.5638, "step": 29069 }, { "epoch": 0.7981878088962109, "grad_norm": 0.4195692539215088, "learning_rate": 1.3142289087424008e-05, "loss": 0.5729, "step": 29070 }, { "epoch": 0.7982152663371773, "grad_norm": 0.4224366545677185, "learning_rate": 1.3141879067675633e-05, "loss": 0.5256, "step": 29071 }, { "epoch": 0.7982427237781439, "grad_norm": 0.39007267355918884, "learning_rate": 1.3141469042066653e-05, "loss": 0.4774, "step": 29072 }, { "epoch": 0.7982701812191104, "grad_norm": 0.4094315767288208, "learning_rate": 1.314105901059783e-05, "loss": 0.4916, "step": 29073 }, { "epoch": 0.7982976386600769, "grad_norm": 0.3510702848434448, "learning_rate": 1.3140648973269929e-05, "loss": 0.4631, "step": 29074 }, { "epoch": 0.7983250961010434, "grad_norm": 0.34776371717453003, "learning_rate": 1.3140238930083715e-05, "loss": 0.4445, "step": 29075 }, { "epoch": 0.7983525535420098, "grad_norm": 0.3266529440879822, "learning_rate": 1.3139828881039955e-05, "loss": 0.5192, "step": 29076 }, { "epoch": 0.7983800109829764, "grad_norm": 0.3539287745952606, "learning_rate": 1.3139418826139412e-05, "loss": 0.3711, "step": 29077 }, { "epoch": 0.7984074684239428, "grad_norm": 0.37110257148742676, "learning_rate": 1.3139008765382848e-05, "loss": 0.6131, "step": 29078 }, { "epoch": 0.7984349258649094, "grad_norm": 0.36466532945632935, "learning_rate": 1.3138598698771032e-05, "loss": 0.4763, "step": 29079 }, { "epoch": 0.7984623833058759, "grad_norm": 0.36504364013671875, "learning_rate": 1.3138188626304731e-05, "loss": 0.4301, "step": 29080 }, { "epoch": 0.7984898407468424, "grad_norm": 0.35784369707107544, "learning_rate": 1.3137778547984704e-05, "loss": 0.4448, "step": 29081 }, { "epoch": 0.7985172981878089, "grad_norm": 0.43151602149009705, "learning_rate": 1.3137368463811718e-05, "loss": 0.4895, "step": 29082 }, { "epoch": 0.7985447556287754, "grad_norm": 0.40974509716033936, "learning_rate": 1.3136958373786539e-05, "loss": 0.4993, "step": 29083 }, { "epoch": 0.7985722130697419, "grad_norm": 0.35487547516822815, "learning_rate": 1.3136548277909931e-05, "loss": 0.4106, "step": 29084 }, { "epoch": 0.7985996705107083, "grad_norm": 0.4229618012905121, "learning_rate": 1.313613817618266e-05, "loss": 0.4984, "step": 29085 }, { "epoch": 0.7986271279516749, "grad_norm": 0.3697095215320587, "learning_rate": 1.313572806860549e-05, "loss": 0.4999, "step": 29086 }, { "epoch": 0.7986545853926414, "grad_norm": 0.4121082127094269, "learning_rate": 1.3135317955179186e-05, "loss": 0.4781, "step": 29087 }, { "epoch": 0.7986820428336079, "grad_norm": 0.430002361536026, "learning_rate": 1.3134907835904516e-05, "loss": 0.4813, "step": 29088 }, { "epoch": 0.7987095002745744, "grad_norm": 0.4083607792854309, "learning_rate": 1.3134497710782239e-05, "loss": 0.4761, "step": 29089 }, { "epoch": 0.7987369577155409, "grad_norm": 0.3601089119911194, "learning_rate": 1.3134087579813124e-05, "loss": 0.478, "step": 29090 }, { "epoch": 0.7987644151565074, "grad_norm": 0.4273609220981598, "learning_rate": 1.3133677442997935e-05, "loss": 0.4971, "step": 29091 }, { "epoch": 0.7987918725974739, "grad_norm": 0.4148862957954407, "learning_rate": 1.313326730033744e-05, "loss": 0.6158, "step": 29092 }, { "epoch": 0.7988193300384404, "grad_norm": 0.34946319460868835, "learning_rate": 1.3132857151832399e-05, "loss": 0.4901, "step": 29093 }, { "epoch": 0.798846787479407, "grad_norm": 0.4432167410850525, "learning_rate": 1.313244699748358e-05, "loss": 0.6193, "step": 29094 }, { "epoch": 0.7988742449203734, "grad_norm": 0.44857048988342285, "learning_rate": 1.3132036837291749e-05, "loss": 0.4659, "step": 29095 }, { "epoch": 0.79890170236134, "grad_norm": 0.3773891031742096, "learning_rate": 1.3131626671257669e-05, "loss": 0.5581, "step": 29096 }, { "epoch": 0.7989291598023064, "grad_norm": 0.37992119789123535, "learning_rate": 1.3131216499382104e-05, "loss": 0.4289, "step": 29097 }, { "epoch": 0.7989566172432729, "grad_norm": 0.3994618058204651, "learning_rate": 1.3130806321665823e-05, "loss": 0.5594, "step": 29098 }, { "epoch": 0.7989840746842394, "grad_norm": 0.4410887360572815, "learning_rate": 1.3130396138109587e-05, "loss": 0.5108, "step": 29099 }, { "epoch": 0.7990115321252059, "grad_norm": 0.5912794470787048, "learning_rate": 1.3129985948714166e-05, "loss": 0.5296, "step": 29100 }, { "epoch": 0.7990389895661725, "grad_norm": 0.40886834263801575, "learning_rate": 1.3129575753480322e-05, "loss": 0.4659, "step": 29101 }, { "epoch": 0.7990664470071389, "grad_norm": 0.41191229224205017, "learning_rate": 1.3129165552408819e-05, "loss": 0.4523, "step": 29102 }, { "epoch": 0.7990939044481055, "grad_norm": 0.38387927412986755, "learning_rate": 1.3128755345500422e-05, "loss": 0.4508, "step": 29103 }, { "epoch": 0.7991213618890719, "grad_norm": 0.43947702646255493, "learning_rate": 1.3128345132755898e-05, "loss": 0.4797, "step": 29104 }, { "epoch": 0.7991488193300385, "grad_norm": 0.36185574531555176, "learning_rate": 1.3127934914176015e-05, "loss": 0.4915, "step": 29105 }, { "epoch": 0.7991762767710049, "grad_norm": 0.4595886766910553, "learning_rate": 1.3127524689761533e-05, "loss": 0.4743, "step": 29106 }, { "epoch": 0.7992037342119714, "grad_norm": 0.4020952582359314, "learning_rate": 1.3127114459513217e-05, "loss": 0.5889, "step": 29107 }, { "epoch": 0.799231191652938, "grad_norm": 0.3786865770816803, "learning_rate": 1.3126704223431838e-05, "loss": 0.5102, "step": 29108 }, { "epoch": 0.7992586490939044, "grad_norm": 0.39134594798088074, "learning_rate": 1.3126293981518155e-05, "loss": 0.5629, "step": 29109 }, { "epoch": 0.799286106534871, "grad_norm": 0.3818022906780243, "learning_rate": 1.3125883733772939e-05, "loss": 0.599, "step": 29110 }, { "epoch": 0.7993135639758374, "grad_norm": 0.474028080701828, "learning_rate": 1.3125473480196952e-05, "loss": 0.4338, "step": 29111 }, { "epoch": 0.799341021416804, "grad_norm": 0.4080114960670471, "learning_rate": 1.3125063220790957e-05, "loss": 0.5286, "step": 29112 }, { "epoch": 0.7993684788577704, "grad_norm": 0.3642703890800476, "learning_rate": 1.3124652955555724e-05, "loss": 0.5151, "step": 29113 }, { "epoch": 0.799395936298737, "grad_norm": 0.41107290983200073, "learning_rate": 1.3124242684492014e-05, "loss": 0.4436, "step": 29114 }, { "epoch": 0.7994233937397035, "grad_norm": 0.3984230160713196, "learning_rate": 1.3123832407600595e-05, "loss": 0.5039, "step": 29115 }, { "epoch": 0.7994508511806699, "grad_norm": 0.39857614040374756, "learning_rate": 1.3123422124882232e-05, "loss": 0.4713, "step": 29116 }, { "epoch": 0.7994783086216365, "grad_norm": 0.36511629819869995, "learning_rate": 1.3123011836337687e-05, "loss": 0.489, "step": 29117 }, { "epoch": 0.7995057660626029, "grad_norm": 0.4279373288154602, "learning_rate": 1.3122601541967733e-05, "loss": 0.4902, "step": 29118 }, { "epoch": 0.7995332235035695, "grad_norm": 0.35879722237586975, "learning_rate": 1.3122191241773129e-05, "loss": 0.4692, "step": 29119 }, { "epoch": 0.7995606809445359, "grad_norm": 0.4871772527694702, "learning_rate": 1.3121780935754638e-05, "loss": 0.4902, "step": 29120 }, { "epoch": 0.7995881383855025, "grad_norm": 0.35764777660369873, "learning_rate": 1.3121370623913033e-05, "loss": 0.5113, "step": 29121 }, { "epoch": 0.799615595826469, "grad_norm": 0.4404439926147461, "learning_rate": 1.3120960306249073e-05, "loss": 0.5129, "step": 29122 }, { "epoch": 0.7996430532674355, "grad_norm": 0.34557315707206726, "learning_rate": 1.312054998276353e-05, "loss": 0.4338, "step": 29123 }, { "epoch": 0.799670510708402, "grad_norm": 0.34522420167922974, "learning_rate": 1.3120139653457162e-05, "loss": 0.3949, "step": 29124 }, { "epoch": 0.7996979681493684, "grad_norm": 1.368660807609558, "learning_rate": 1.3119729318330738e-05, "loss": 0.5649, "step": 29125 }, { "epoch": 0.799725425590335, "grad_norm": 0.3706548511981964, "learning_rate": 1.3119318977385025e-05, "loss": 0.4643, "step": 29126 }, { "epoch": 0.7997528830313014, "grad_norm": 0.4455910623073578, "learning_rate": 1.3118908630620785e-05, "loss": 0.486, "step": 29127 }, { "epoch": 0.799780340472268, "grad_norm": 0.4087054133415222, "learning_rate": 1.3118498278038785e-05, "loss": 0.4282, "step": 29128 }, { "epoch": 0.7998077979132345, "grad_norm": 0.3901224434375763, "learning_rate": 1.311808791963979e-05, "loss": 0.4786, "step": 29129 }, { "epoch": 0.799835255354201, "grad_norm": 0.32245543599128723, "learning_rate": 1.3117677555424566e-05, "loss": 0.4286, "step": 29130 }, { "epoch": 0.7998627127951675, "grad_norm": 0.3785116970539093, "learning_rate": 1.3117267185393878e-05, "loss": 0.4781, "step": 29131 }, { "epoch": 0.799890170236134, "grad_norm": 0.414079874753952, "learning_rate": 1.3116856809548495e-05, "loss": 0.4662, "step": 29132 }, { "epoch": 0.7999176276771005, "grad_norm": 0.4767438769340515, "learning_rate": 1.3116446427889176e-05, "loss": 0.5405, "step": 29133 }, { "epoch": 0.799945085118067, "grad_norm": 0.5000969767570496, "learning_rate": 1.3116036040416692e-05, "loss": 0.4739, "step": 29134 }, { "epoch": 0.7999725425590335, "grad_norm": 0.42922744154930115, "learning_rate": 1.3115625647131802e-05, "loss": 0.4968, "step": 29135 }, { "epoch": 0.8, "grad_norm": 0.44298577308654785, "learning_rate": 1.311521524803528e-05, "loss": 0.5319, "step": 29136 }, { "epoch": 0.8000274574409665, "grad_norm": 0.40506407618522644, "learning_rate": 1.3114804843127886e-05, "loss": 0.4823, "step": 29137 }, { "epoch": 0.800054914881933, "grad_norm": 0.3785078823566437, "learning_rate": 1.3114394432410387e-05, "loss": 0.4462, "step": 29138 }, { "epoch": 0.8000823723228995, "grad_norm": 0.3727637827396393, "learning_rate": 1.3113984015883548e-05, "loss": 0.4357, "step": 29139 }, { "epoch": 0.800109829763866, "grad_norm": 0.3969329595565796, "learning_rate": 1.3113573593548135e-05, "loss": 0.5433, "step": 29140 }, { "epoch": 0.8001372872048325, "grad_norm": 0.569121241569519, "learning_rate": 1.3113163165404913e-05, "loss": 0.5526, "step": 29141 }, { "epoch": 0.800164744645799, "grad_norm": 0.44490399956703186, "learning_rate": 1.311275273145465e-05, "loss": 0.4389, "step": 29142 }, { "epoch": 0.8001922020867656, "grad_norm": 0.42447057366371155, "learning_rate": 1.3112342291698106e-05, "loss": 0.3933, "step": 29143 }, { "epoch": 0.800219659527732, "grad_norm": 0.40848058462142944, "learning_rate": 1.3111931846136055e-05, "loss": 0.4735, "step": 29144 }, { "epoch": 0.8002471169686985, "grad_norm": 0.5512576103210449, "learning_rate": 1.3111521394769255e-05, "loss": 0.6299, "step": 29145 }, { "epoch": 0.800274574409665, "grad_norm": 0.4880638122558594, "learning_rate": 1.3111110937598475e-05, "loss": 0.5181, "step": 29146 }, { "epoch": 0.8003020318506315, "grad_norm": 0.35670068860054016, "learning_rate": 1.311070047462448e-05, "loss": 0.4629, "step": 29147 }, { "epoch": 0.800329489291598, "grad_norm": 0.39041730761528015, "learning_rate": 1.3110290005848034e-05, "loss": 0.5705, "step": 29148 }, { "epoch": 0.8003569467325645, "grad_norm": 0.37231674790382385, "learning_rate": 1.3109879531269909e-05, "loss": 0.54, "step": 29149 }, { "epoch": 0.8003844041735311, "grad_norm": 0.4168016016483307, "learning_rate": 1.3109469050890863e-05, "loss": 0.478, "step": 29150 }, { "epoch": 0.8004118616144975, "grad_norm": 0.46938660740852356, "learning_rate": 1.3109058564711665e-05, "loss": 0.5784, "step": 29151 }, { "epoch": 0.8004393190554641, "grad_norm": 0.3670569360256195, "learning_rate": 1.310864807273308e-05, "loss": 0.5504, "step": 29152 }, { "epoch": 0.8004667764964305, "grad_norm": 0.37420788407325745, "learning_rate": 1.3108237574955875e-05, "loss": 0.4928, "step": 29153 }, { "epoch": 0.800494233937397, "grad_norm": 0.6301101446151733, "learning_rate": 1.3107827071380817e-05, "loss": 0.4848, "step": 29154 }, { "epoch": 0.8005216913783635, "grad_norm": 0.4114154875278473, "learning_rate": 1.3107416562008667e-05, "loss": 0.5103, "step": 29155 }, { "epoch": 0.80054914881933, "grad_norm": 0.4152678847312927, "learning_rate": 1.3107006046840193e-05, "loss": 0.4356, "step": 29156 }, { "epoch": 0.8005766062602966, "grad_norm": 0.42905333638191223, "learning_rate": 1.3106595525876162e-05, "loss": 0.539, "step": 29157 }, { "epoch": 0.800604063701263, "grad_norm": 0.44179752469062805, "learning_rate": 1.3106184999117341e-05, "loss": 0.5176, "step": 29158 }, { "epoch": 0.8006315211422296, "grad_norm": 0.4532783627510071, "learning_rate": 1.3105774466564488e-05, "loss": 0.4806, "step": 29159 }, { "epoch": 0.800658978583196, "grad_norm": 0.43077999353408813, "learning_rate": 1.3105363928218379e-05, "loss": 0.481, "step": 29160 }, { "epoch": 0.8006864360241626, "grad_norm": 0.4026997685432434, "learning_rate": 1.3104953384079772e-05, "loss": 0.4536, "step": 29161 }, { "epoch": 0.800713893465129, "grad_norm": 0.3785710334777832, "learning_rate": 1.310454283414944e-05, "loss": 0.5086, "step": 29162 }, { "epoch": 0.8007413509060956, "grad_norm": 0.48225560784339905, "learning_rate": 1.3104132278428146e-05, "loss": 0.5338, "step": 29163 }, { "epoch": 0.8007688083470621, "grad_norm": 0.3822985887527466, "learning_rate": 1.3103721716916649e-05, "loss": 0.4595, "step": 29164 }, { "epoch": 0.8007962657880285, "grad_norm": 0.42490342259407043, "learning_rate": 1.3103311149615723e-05, "loss": 0.6067, "step": 29165 }, { "epoch": 0.8008237232289951, "grad_norm": 0.5072552561759949, "learning_rate": 1.310290057652613e-05, "loss": 0.517, "step": 29166 }, { "epoch": 0.8008511806699615, "grad_norm": 0.3773775100708008, "learning_rate": 1.3102489997648638e-05, "loss": 0.446, "step": 29167 }, { "epoch": 0.8008786381109281, "grad_norm": 0.4199569821357727, "learning_rate": 1.3102079412984012e-05, "loss": 0.4436, "step": 29168 }, { "epoch": 0.8009060955518945, "grad_norm": 0.43325966596603394, "learning_rate": 1.3101668822533018e-05, "loss": 0.5489, "step": 29169 }, { "epoch": 0.8009335529928611, "grad_norm": 0.44219517707824707, "learning_rate": 1.310125822629642e-05, "loss": 0.446, "step": 29170 }, { "epoch": 0.8009610104338276, "grad_norm": 0.39357301592826843, "learning_rate": 1.3100847624274988e-05, "loss": 0.4528, "step": 29171 }, { "epoch": 0.800988467874794, "grad_norm": 0.37682464718818665, "learning_rate": 1.3100437016469485e-05, "loss": 0.5273, "step": 29172 }, { "epoch": 0.8010159253157606, "grad_norm": 0.40211084485054016, "learning_rate": 1.3100026402880677e-05, "loss": 0.4865, "step": 29173 }, { "epoch": 0.801043382756727, "grad_norm": 0.5158193111419678, "learning_rate": 1.309961578350933e-05, "loss": 0.4707, "step": 29174 }, { "epoch": 0.8010708401976936, "grad_norm": 0.40495765209198, "learning_rate": 1.309920515835621e-05, "loss": 0.3965, "step": 29175 }, { "epoch": 0.80109829763866, "grad_norm": 0.5107333660125732, "learning_rate": 1.3098794527422086e-05, "loss": 0.4284, "step": 29176 }, { "epoch": 0.8011257550796266, "grad_norm": 0.3299923837184906, "learning_rate": 1.3098383890707718e-05, "loss": 0.4153, "step": 29177 }, { "epoch": 0.8011532125205931, "grad_norm": 0.36808720231056213, "learning_rate": 1.3097973248213876e-05, "loss": 0.5153, "step": 29178 }, { "epoch": 0.8011806699615596, "grad_norm": 0.4672467112541199, "learning_rate": 1.3097562599941323e-05, "loss": 0.4629, "step": 29179 }, { "epoch": 0.8012081274025261, "grad_norm": 0.4379623532295227, "learning_rate": 1.3097151945890832e-05, "loss": 0.4771, "step": 29180 }, { "epoch": 0.8012355848434926, "grad_norm": 0.5127833485603333, "learning_rate": 1.3096741286063162e-05, "loss": 0.5206, "step": 29181 }, { "epoch": 0.8012630422844591, "grad_norm": 0.36670196056365967, "learning_rate": 1.3096330620459078e-05, "loss": 0.5611, "step": 29182 }, { "epoch": 0.8012904997254255, "grad_norm": 0.4481138288974762, "learning_rate": 1.3095919949079355e-05, "loss": 0.5247, "step": 29183 }, { "epoch": 0.8013179571663921, "grad_norm": 0.3792518973350525, "learning_rate": 1.3095509271924747e-05, "loss": 0.4895, "step": 29184 }, { "epoch": 0.8013454146073586, "grad_norm": 0.3921165466308594, "learning_rate": 1.309509858899603e-05, "loss": 0.5042, "step": 29185 }, { "epoch": 0.8013728720483251, "grad_norm": 0.40312889218330383, "learning_rate": 1.3094687900293965e-05, "loss": 0.4476, "step": 29186 }, { "epoch": 0.8014003294892916, "grad_norm": 0.43470439314842224, "learning_rate": 1.309427720581932e-05, "loss": 0.5179, "step": 29187 }, { "epoch": 0.8014277869302581, "grad_norm": 0.35027629137039185, "learning_rate": 1.309386650557286e-05, "loss": 0.4767, "step": 29188 }, { "epoch": 0.8014552443712246, "grad_norm": 0.39299920201301575, "learning_rate": 1.3093455799555352e-05, "loss": 0.4734, "step": 29189 }, { "epoch": 0.8014827018121911, "grad_norm": 0.9287964701652527, "learning_rate": 1.3093045087767561e-05, "loss": 0.5444, "step": 29190 }, { "epoch": 0.8015101592531576, "grad_norm": 0.37981459498405457, "learning_rate": 1.3092634370210258e-05, "loss": 0.5016, "step": 29191 }, { "epoch": 0.8015376166941242, "grad_norm": 0.37807193398475647, "learning_rate": 1.3092223646884198e-05, "loss": 0.4492, "step": 29192 }, { "epoch": 0.8015650741350906, "grad_norm": 0.41267430782318115, "learning_rate": 1.3091812917790158e-05, "loss": 0.5157, "step": 29193 }, { "epoch": 0.8015925315760571, "grad_norm": 0.7747998833656311, "learning_rate": 1.3091402182928901e-05, "loss": 0.4798, "step": 29194 }, { "epoch": 0.8016199890170236, "grad_norm": 0.3421415090560913, "learning_rate": 1.3090991442301189e-05, "loss": 0.4514, "step": 29195 }, { "epoch": 0.8016474464579901, "grad_norm": 0.4311414659023285, "learning_rate": 1.3090580695907794e-05, "loss": 0.5359, "step": 29196 }, { "epoch": 0.8016749038989566, "grad_norm": 0.3741026818752289, "learning_rate": 1.3090169943749475e-05, "loss": 0.4867, "step": 29197 }, { "epoch": 0.8017023613399231, "grad_norm": 0.3668615221977234, "learning_rate": 1.3089759185827008e-05, "loss": 0.4267, "step": 29198 }, { "epoch": 0.8017298187808897, "grad_norm": 0.4216841459274292, "learning_rate": 1.3089348422141153e-05, "loss": 0.615, "step": 29199 }, { "epoch": 0.8017572762218561, "grad_norm": 0.578728437423706, "learning_rate": 1.3088937652692675e-05, "loss": 0.4749, "step": 29200 }, { "epoch": 0.8017847336628227, "grad_norm": 0.3730204403400421, "learning_rate": 1.3088526877482343e-05, "loss": 0.4474, "step": 29201 }, { "epoch": 0.8018121911037891, "grad_norm": 0.3595421314239502, "learning_rate": 1.3088116096510924e-05, "loss": 0.5449, "step": 29202 }, { "epoch": 0.8018396485447556, "grad_norm": 0.3826541602611542, "learning_rate": 1.3087705309779182e-05, "loss": 0.5341, "step": 29203 }, { "epoch": 0.8018671059857221, "grad_norm": 0.42042264342308044, "learning_rate": 1.3087294517287882e-05, "loss": 0.5808, "step": 29204 }, { "epoch": 0.8018945634266886, "grad_norm": 0.4367527663707733, "learning_rate": 1.3086883719037797e-05, "loss": 0.5858, "step": 29205 }, { "epoch": 0.8019220208676552, "grad_norm": 0.3621957302093506, "learning_rate": 1.3086472915029687e-05, "loss": 0.4595, "step": 29206 }, { "epoch": 0.8019494783086216, "grad_norm": 0.35541832447052, "learning_rate": 1.308606210526432e-05, "loss": 0.5087, "step": 29207 }, { "epoch": 0.8019769357495882, "grad_norm": 0.5034292936325073, "learning_rate": 1.3085651289742463e-05, "loss": 0.6516, "step": 29208 }, { "epoch": 0.8020043931905546, "grad_norm": 0.3400304317474365, "learning_rate": 1.3085240468464877e-05, "loss": 0.4546, "step": 29209 }, { "epoch": 0.8020318506315212, "grad_norm": 0.36553654074668884, "learning_rate": 1.3084829641432337e-05, "loss": 0.476, "step": 29210 }, { "epoch": 0.8020593080724876, "grad_norm": 0.3430170714855194, "learning_rate": 1.3084418808645604e-05, "loss": 0.5083, "step": 29211 }, { "epoch": 0.8020867655134541, "grad_norm": 0.3544014096260071, "learning_rate": 1.3084007970105445e-05, "loss": 0.5695, "step": 29212 }, { "epoch": 0.8021142229544207, "grad_norm": 0.4599895477294922, "learning_rate": 1.3083597125812629e-05, "loss": 0.4948, "step": 29213 }, { "epoch": 0.8021416803953871, "grad_norm": 0.4548998773097992, "learning_rate": 1.3083186275767916e-05, "loss": 0.5179, "step": 29214 }, { "epoch": 0.8021691378363537, "grad_norm": 0.41896748542785645, "learning_rate": 1.308277541997208e-05, "loss": 0.5401, "step": 29215 }, { "epoch": 0.8021965952773201, "grad_norm": 0.3558630645275116, "learning_rate": 1.3082364558425885e-05, "loss": 0.4145, "step": 29216 }, { "epoch": 0.8022240527182867, "grad_norm": 0.3833305537700653, "learning_rate": 1.3081953691130092e-05, "loss": 0.4613, "step": 29217 }, { "epoch": 0.8022515101592531, "grad_norm": 0.3405984938144684, "learning_rate": 1.3081542818085476e-05, "loss": 0.3917, "step": 29218 }, { "epoch": 0.8022789676002197, "grad_norm": 0.3519830107688904, "learning_rate": 1.3081131939292795e-05, "loss": 0.4249, "step": 29219 }, { "epoch": 0.8023064250411862, "grad_norm": 0.4106930196285248, "learning_rate": 1.3080721054752824e-05, "loss": 0.5247, "step": 29220 }, { "epoch": 0.8023338824821526, "grad_norm": 0.368289589881897, "learning_rate": 1.308031016446632e-05, "loss": 0.5197, "step": 29221 }, { "epoch": 0.8023613399231192, "grad_norm": 0.4990631341934204, "learning_rate": 1.3079899268434057e-05, "loss": 0.5068, "step": 29222 }, { "epoch": 0.8023887973640856, "grad_norm": 0.4203818440437317, "learning_rate": 1.3079488366656801e-05, "loss": 0.5577, "step": 29223 }, { "epoch": 0.8024162548050522, "grad_norm": 0.3579583764076233, "learning_rate": 1.3079077459135314e-05, "loss": 0.5357, "step": 29224 }, { "epoch": 0.8024437122460186, "grad_norm": 0.3787112832069397, "learning_rate": 1.3078666545870367e-05, "loss": 0.5438, "step": 29225 }, { "epoch": 0.8024711696869852, "grad_norm": 0.35144075751304626, "learning_rate": 1.307825562686272e-05, "loss": 0.4945, "step": 29226 }, { "epoch": 0.8024986271279517, "grad_norm": 0.38969871401786804, "learning_rate": 1.3077844702113148e-05, "loss": 0.5285, "step": 29227 }, { "epoch": 0.8025260845689182, "grad_norm": 0.3513358235359192, "learning_rate": 1.3077433771622412e-05, "loss": 0.4304, "step": 29228 }, { "epoch": 0.8025535420098847, "grad_norm": 0.3572852611541748, "learning_rate": 1.3077022835391278e-05, "loss": 0.5002, "step": 29229 }, { "epoch": 0.8025809994508512, "grad_norm": 0.42672204971313477, "learning_rate": 1.3076611893420516e-05, "loss": 0.5599, "step": 29230 }, { "epoch": 0.8026084568918177, "grad_norm": 0.36939480900764465, "learning_rate": 1.3076200945710888e-05, "loss": 0.5116, "step": 29231 }, { "epoch": 0.8026359143327841, "grad_norm": 0.4221998453140259, "learning_rate": 1.3075789992263168e-05, "loss": 0.5114, "step": 29232 }, { "epoch": 0.8026633717737507, "grad_norm": 0.40252476930618286, "learning_rate": 1.3075379033078117e-05, "loss": 0.5017, "step": 29233 }, { "epoch": 0.8026908292147172, "grad_norm": 0.4087562561035156, "learning_rate": 1.30749680681565e-05, "loss": 0.534, "step": 29234 }, { "epoch": 0.8027182866556837, "grad_norm": 0.4063475728034973, "learning_rate": 1.307455709749909e-05, "loss": 0.5283, "step": 29235 }, { "epoch": 0.8027457440966502, "grad_norm": 0.4030974209308624, "learning_rate": 1.3074146121106646e-05, "loss": 0.5429, "step": 29236 }, { "epoch": 0.8027732015376167, "grad_norm": 0.37640929222106934, "learning_rate": 1.307373513897994e-05, "loss": 0.4406, "step": 29237 }, { "epoch": 0.8028006589785832, "grad_norm": 0.3428286612033844, "learning_rate": 1.3073324151119737e-05, "loss": 0.4398, "step": 29238 }, { "epoch": 0.8028281164195497, "grad_norm": 0.3896891176700592, "learning_rate": 1.3072913157526804e-05, "loss": 0.4859, "step": 29239 }, { "epoch": 0.8028555738605162, "grad_norm": 0.398946613073349, "learning_rate": 1.3072502158201905e-05, "loss": 0.4983, "step": 29240 }, { "epoch": 0.8028830313014828, "grad_norm": 0.4389644265174866, "learning_rate": 1.307209115314581e-05, "loss": 0.4895, "step": 29241 }, { "epoch": 0.8029104887424492, "grad_norm": 0.36754414439201355, "learning_rate": 1.3071680142359287e-05, "loss": 0.5013, "step": 29242 }, { "epoch": 0.8029379461834157, "grad_norm": 0.3834399878978729, "learning_rate": 1.3071269125843099e-05, "loss": 0.4652, "step": 29243 }, { "epoch": 0.8029654036243822, "grad_norm": 0.40663254261016846, "learning_rate": 1.307085810359801e-05, "loss": 0.4608, "step": 29244 }, { "epoch": 0.8029928610653487, "grad_norm": 0.4026694595813751, "learning_rate": 1.3070447075624795e-05, "loss": 0.5746, "step": 29245 }, { "epoch": 0.8030203185063152, "grad_norm": 0.4021170139312744, "learning_rate": 1.3070036041924213e-05, "loss": 0.5076, "step": 29246 }, { "epoch": 0.8030477759472817, "grad_norm": 0.3857075273990631, "learning_rate": 1.3069625002497037e-05, "loss": 0.4789, "step": 29247 }, { "epoch": 0.8030752333882483, "grad_norm": 0.40717148780822754, "learning_rate": 1.3069213957344029e-05, "loss": 0.5332, "step": 29248 }, { "epoch": 0.8031026908292147, "grad_norm": 0.5499364733695984, "learning_rate": 1.3068802906465956e-05, "loss": 0.4673, "step": 29249 }, { "epoch": 0.8031301482701813, "grad_norm": 0.416138619184494, "learning_rate": 1.3068391849863588e-05, "loss": 0.4311, "step": 29250 }, { "epoch": 0.8031576057111477, "grad_norm": 0.44073930382728577, "learning_rate": 1.3067980787537691e-05, "loss": 0.491, "step": 29251 }, { "epoch": 0.8031850631521142, "grad_norm": 0.40688058733940125, "learning_rate": 1.3067569719489029e-05, "loss": 0.5427, "step": 29252 }, { "epoch": 0.8032125205930807, "grad_norm": 0.36823633313179016, "learning_rate": 1.3067158645718372e-05, "loss": 0.4854, "step": 29253 }, { "epoch": 0.8032399780340472, "grad_norm": 0.3807859420776367, "learning_rate": 1.3066747566226484e-05, "loss": 0.4411, "step": 29254 }, { "epoch": 0.8032674354750138, "grad_norm": 0.3783659338951111, "learning_rate": 1.3066336481014133e-05, "loss": 0.5111, "step": 29255 }, { "epoch": 0.8032948929159802, "grad_norm": 0.39556705951690674, "learning_rate": 1.3065925390082086e-05, "loss": 0.5307, "step": 29256 }, { "epoch": 0.8033223503569468, "grad_norm": 0.4396951198577881, "learning_rate": 1.3065514293431107e-05, "loss": 0.4986, "step": 29257 }, { "epoch": 0.8033498077979132, "grad_norm": 2.4530866146087646, "learning_rate": 1.306510319106197e-05, "loss": 0.5037, "step": 29258 }, { "epoch": 0.8033772652388798, "grad_norm": 0.3830241560935974, "learning_rate": 1.3064692082975432e-05, "loss": 0.5047, "step": 29259 }, { "epoch": 0.8034047226798462, "grad_norm": 0.37599197030067444, "learning_rate": 1.3064280969172269e-05, "loss": 0.5248, "step": 29260 }, { "epoch": 0.8034321801208127, "grad_norm": 0.40679746866226196, "learning_rate": 1.3063869849653244e-05, "loss": 0.4448, "step": 29261 }, { "epoch": 0.8034596375617793, "grad_norm": 0.42717593908309937, "learning_rate": 1.3063458724419122e-05, "loss": 0.563, "step": 29262 }, { "epoch": 0.8034870950027457, "grad_norm": 0.3784163296222687, "learning_rate": 1.3063047593470675e-05, "loss": 0.5172, "step": 29263 }, { "epoch": 0.8035145524437123, "grad_norm": 0.43950018286705017, "learning_rate": 1.3062636456808661e-05, "loss": 0.5266, "step": 29264 }, { "epoch": 0.8035420098846787, "grad_norm": 0.39761871099472046, "learning_rate": 1.3062225314433858e-05, "loss": 0.4879, "step": 29265 }, { "epoch": 0.8035694673256453, "grad_norm": 0.3379712998867035, "learning_rate": 1.3061814166347026e-05, "loss": 0.3819, "step": 29266 }, { "epoch": 0.8035969247666117, "grad_norm": 0.35254883766174316, "learning_rate": 1.3061403012548932e-05, "loss": 0.4624, "step": 29267 }, { "epoch": 0.8036243822075783, "grad_norm": 0.3758380711078644, "learning_rate": 1.3060991853040345e-05, "loss": 0.5109, "step": 29268 }, { "epoch": 0.8036518396485448, "grad_norm": 0.36637696623802185, "learning_rate": 1.3060580687822033e-05, "loss": 0.5015, "step": 29269 }, { "epoch": 0.8036792970895112, "grad_norm": 0.3569898307323456, "learning_rate": 1.3060169516894759e-05, "loss": 0.4998, "step": 29270 }, { "epoch": 0.8037067545304778, "grad_norm": 0.3481728136539459, "learning_rate": 1.3059758340259293e-05, "loss": 0.5021, "step": 29271 }, { "epoch": 0.8037342119714442, "grad_norm": 0.4171615540981293, "learning_rate": 1.30593471579164e-05, "loss": 0.4763, "step": 29272 }, { "epoch": 0.8037616694124108, "grad_norm": 0.3932040333747864, "learning_rate": 1.3058935969866848e-05, "loss": 0.5428, "step": 29273 }, { "epoch": 0.8037891268533772, "grad_norm": 0.35196009278297424, "learning_rate": 1.3058524776111405e-05, "loss": 0.4913, "step": 29274 }, { "epoch": 0.8038165842943438, "grad_norm": 0.3961043655872345, "learning_rate": 1.3058113576650837e-05, "loss": 0.402, "step": 29275 }, { "epoch": 0.8038440417353103, "grad_norm": 0.3783618211746216, "learning_rate": 1.3057702371485912e-05, "loss": 0.532, "step": 29276 }, { "epoch": 0.8038714991762768, "grad_norm": 0.4880228340625763, "learning_rate": 1.3057291160617394e-05, "loss": 0.5304, "step": 29277 }, { "epoch": 0.8038989566172433, "grad_norm": 0.3352951407432556, "learning_rate": 1.3056879944046054e-05, "loss": 0.4647, "step": 29278 }, { "epoch": 0.8039264140582097, "grad_norm": 0.41419297456741333, "learning_rate": 1.305646872177266e-05, "loss": 0.5851, "step": 29279 }, { "epoch": 0.8039538714991763, "grad_norm": 0.3965702950954437, "learning_rate": 1.305605749379797e-05, "loss": 0.4502, "step": 29280 }, { "epoch": 0.8039813289401427, "grad_norm": 0.42804551124572754, "learning_rate": 1.3055646260122763e-05, "loss": 0.5886, "step": 29281 }, { "epoch": 0.8040087863811093, "grad_norm": 0.3650549650192261, "learning_rate": 1.3055235020747797e-05, "loss": 0.4509, "step": 29282 }, { "epoch": 0.8040362438220758, "grad_norm": 0.34479251503944397, "learning_rate": 1.3054823775673844e-05, "loss": 0.4279, "step": 29283 }, { "epoch": 0.8040637012630423, "grad_norm": 0.4186252951622009, "learning_rate": 1.305441252490167e-05, "loss": 0.3871, "step": 29284 }, { "epoch": 0.8040911587040088, "grad_norm": 0.3462826609611511, "learning_rate": 1.3054001268432039e-05, "loss": 0.4502, "step": 29285 }, { "epoch": 0.8041186161449753, "grad_norm": 0.44076478481292725, "learning_rate": 1.3053590006265724e-05, "loss": 0.5235, "step": 29286 }, { "epoch": 0.8041460735859418, "grad_norm": 0.3502502143383026, "learning_rate": 1.305317873840349e-05, "loss": 0.4168, "step": 29287 }, { "epoch": 0.8041735310269083, "grad_norm": 0.38856756687164307, "learning_rate": 1.3052767464846099e-05, "loss": 0.4506, "step": 29288 }, { "epoch": 0.8042009884678748, "grad_norm": 0.41032955050468445, "learning_rate": 1.3052356185594326e-05, "loss": 0.5799, "step": 29289 }, { "epoch": 0.8042284459088413, "grad_norm": 0.3966670632362366, "learning_rate": 1.305194490064893e-05, "loss": 0.5255, "step": 29290 }, { "epoch": 0.8042559033498078, "grad_norm": 0.40781712532043457, "learning_rate": 1.3051533610010688e-05, "loss": 0.5297, "step": 29291 }, { "epoch": 0.8042833607907743, "grad_norm": 0.3736419379711151, "learning_rate": 1.305112231368036e-05, "loss": 0.4154, "step": 29292 }, { "epoch": 0.8043108182317408, "grad_norm": 0.36889439821243286, "learning_rate": 1.3050711011658714e-05, "loss": 0.5015, "step": 29293 }, { "epoch": 0.8043382756727073, "grad_norm": 0.39999228715896606, "learning_rate": 1.3050299703946521e-05, "loss": 0.5293, "step": 29294 }, { "epoch": 0.8043657331136738, "grad_norm": 0.38545048236846924, "learning_rate": 1.3049888390544541e-05, "loss": 0.5369, "step": 29295 }, { "epoch": 0.8043931905546403, "grad_norm": 0.4583372473716736, "learning_rate": 1.304947707145355e-05, "loss": 0.5271, "step": 29296 }, { "epoch": 0.8044206479956069, "grad_norm": 0.36652520298957825, "learning_rate": 1.304906574667431e-05, "loss": 0.4821, "step": 29297 }, { "epoch": 0.8044481054365733, "grad_norm": 0.3696240186691284, "learning_rate": 1.3048654416207587e-05, "loss": 0.4932, "step": 29298 }, { "epoch": 0.8044755628775399, "grad_norm": 0.3374003469944, "learning_rate": 1.3048243080054152e-05, "loss": 0.4159, "step": 29299 }, { "epoch": 0.8045030203185063, "grad_norm": 0.42727285623550415, "learning_rate": 1.304783173821477e-05, "loss": 0.5362, "step": 29300 }, { "epoch": 0.8045304777594728, "grad_norm": 0.3621295988559723, "learning_rate": 1.3047420390690211e-05, "loss": 0.4568, "step": 29301 }, { "epoch": 0.8045579352004393, "grad_norm": 0.3824690878391266, "learning_rate": 1.304700903748124e-05, "loss": 0.4907, "step": 29302 }, { "epoch": 0.8045853926414058, "grad_norm": 0.42043012380599976, "learning_rate": 1.3046597678588621e-05, "loss": 0.5117, "step": 29303 }, { "epoch": 0.8046128500823724, "grad_norm": 0.3988417387008667, "learning_rate": 1.3046186314013129e-05, "loss": 0.4722, "step": 29304 }, { "epoch": 0.8046403075233388, "grad_norm": 0.45411643385887146, "learning_rate": 1.3045774943755526e-05, "loss": 0.4466, "step": 29305 }, { "epoch": 0.8046677649643054, "grad_norm": 0.37517988681793213, "learning_rate": 1.304536356781658e-05, "loss": 0.4144, "step": 29306 }, { "epoch": 0.8046952224052718, "grad_norm": 0.3832337260246277, "learning_rate": 1.3044952186197063e-05, "loss": 0.4692, "step": 29307 }, { "epoch": 0.8047226798462384, "grad_norm": 0.3998803496360779, "learning_rate": 1.3044540798897733e-05, "loss": 0.4552, "step": 29308 }, { "epoch": 0.8047501372872048, "grad_norm": 0.3992183804512024, "learning_rate": 1.3044129405919366e-05, "loss": 0.4342, "step": 29309 }, { "epoch": 0.8047775947281713, "grad_norm": 0.3600234389305115, "learning_rate": 1.3043718007262724e-05, "loss": 0.4406, "step": 29310 }, { "epoch": 0.8048050521691379, "grad_norm": 0.3486102223396301, "learning_rate": 1.3043306602928576e-05, "loss": 0.5301, "step": 29311 }, { "epoch": 0.8048325096101043, "grad_norm": 0.4384933114051819, "learning_rate": 1.3042895192917693e-05, "loss": 0.5678, "step": 29312 }, { "epoch": 0.8048599670510709, "grad_norm": 0.4090483784675598, "learning_rate": 1.3042483777230837e-05, "loss": 0.4957, "step": 29313 }, { "epoch": 0.8048874244920373, "grad_norm": 0.3703594505786896, "learning_rate": 1.3042072355868779e-05, "loss": 0.4884, "step": 29314 }, { "epoch": 0.8049148819330039, "grad_norm": 0.4462020993232727, "learning_rate": 1.3041660928832282e-05, "loss": 0.4828, "step": 29315 }, { "epoch": 0.8049423393739703, "grad_norm": 0.37689974904060364, "learning_rate": 1.3041249496122119e-05, "loss": 0.617, "step": 29316 }, { "epoch": 0.8049697968149369, "grad_norm": 0.36636969447135925, "learning_rate": 1.3040838057739056e-05, "loss": 0.4816, "step": 29317 }, { "epoch": 0.8049972542559034, "grad_norm": 0.3613339066505432, "learning_rate": 1.3040426613683858e-05, "loss": 0.4588, "step": 29318 }, { "epoch": 0.8050247116968698, "grad_norm": 0.38181352615356445, "learning_rate": 1.3040015163957294e-05, "loss": 0.5458, "step": 29319 }, { "epoch": 0.8050521691378364, "grad_norm": 0.41048315167427063, "learning_rate": 1.3039603708560133e-05, "loss": 0.5633, "step": 29320 }, { "epoch": 0.8050796265788028, "grad_norm": 0.3927260637283325, "learning_rate": 1.303919224749314e-05, "loss": 0.5098, "step": 29321 }, { "epoch": 0.8051070840197694, "grad_norm": 0.3640190064907074, "learning_rate": 1.3038780780757083e-05, "loss": 0.5287, "step": 29322 }, { "epoch": 0.8051345414607358, "grad_norm": 0.4234021008014679, "learning_rate": 1.3038369308352731e-05, "loss": 0.4428, "step": 29323 }, { "epoch": 0.8051619989017024, "grad_norm": 0.3672170042991638, "learning_rate": 1.303795783028085e-05, "loss": 0.4467, "step": 29324 }, { "epoch": 0.8051894563426688, "grad_norm": 0.3756997883319855, "learning_rate": 1.3037546346542206e-05, "loss": 0.5332, "step": 29325 }, { "epoch": 0.8052169137836354, "grad_norm": 0.4669555723667145, "learning_rate": 1.303713485713757e-05, "loss": 0.5059, "step": 29326 }, { "epoch": 0.8052443712246019, "grad_norm": 0.4370321035385132, "learning_rate": 1.303672336206771e-05, "loss": 0.4102, "step": 29327 }, { "epoch": 0.8052718286655683, "grad_norm": 0.41028234362602234, "learning_rate": 1.303631186133339e-05, "loss": 0.5214, "step": 29328 }, { "epoch": 0.8052992861065349, "grad_norm": 0.42239952087402344, "learning_rate": 1.3035900354935379e-05, "loss": 0.5181, "step": 29329 }, { "epoch": 0.8053267435475013, "grad_norm": 0.3377436101436615, "learning_rate": 1.3035488842874447e-05, "loss": 0.4914, "step": 29330 }, { "epoch": 0.8053542009884679, "grad_norm": 0.4023153483867645, "learning_rate": 1.303507732515136e-05, "loss": 0.4867, "step": 29331 }, { "epoch": 0.8053816584294343, "grad_norm": 0.4480002820491791, "learning_rate": 1.303466580176688e-05, "loss": 0.5036, "step": 29332 }, { "epoch": 0.8054091158704009, "grad_norm": 0.3864193260669708, "learning_rate": 1.3034254272721784e-05, "loss": 0.5176, "step": 29333 }, { "epoch": 0.8054365733113674, "grad_norm": 0.44890260696411133, "learning_rate": 1.3033842738016833e-05, "loss": 0.5265, "step": 29334 }, { "epoch": 0.8054640307523339, "grad_norm": 0.4213705360889435, "learning_rate": 1.30334311976528e-05, "loss": 0.4608, "step": 29335 }, { "epoch": 0.8054914881933004, "grad_norm": 0.3722233176231384, "learning_rate": 1.3033019651630448e-05, "loss": 0.498, "step": 29336 }, { "epoch": 0.8055189456342668, "grad_norm": 0.49473217129707336, "learning_rate": 1.3032608099950545e-05, "loss": 0.4858, "step": 29337 }, { "epoch": 0.8055464030752334, "grad_norm": 0.35101282596588135, "learning_rate": 1.3032196542613862e-05, "loss": 0.4626, "step": 29338 }, { "epoch": 0.8055738605161998, "grad_norm": 0.37192296981811523, "learning_rate": 1.303178497962116e-05, "loss": 0.5281, "step": 29339 }, { "epoch": 0.8056013179571664, "grad_norm": 0.38906019926071167, "learning_rate": 1.3031373410973218e-05, "loss": 0.5101, "step": 29340 }, { "epoch": 0.8056287753981329, "grad_norm": 0.41012558341026306, "learning_rate": 1.3030961836670794e-05, "loss": 0.5216, "step": 29341 }, { "epoch": 0.8056562328390994, "grad_norm": 0.4278920292854309, "learning_rate": 1.3030550256714658e-05, "loss": 0.601, "step": 29342 }, { "epoch": 0.8056836902800659, "grad_norm": 0.42188629508018494, "learning_rate": 1.3030138671105581e-05, "loss": 0.539, "step": 29343 }, { "epoch": 0.8057111477210324, "grad_norm": 0.36156055331230164, "learning_rate": 1.3029727079844325e-05, "loss": 0.5271, "step": 29344 }, { "epoch": 0.8057386051619989, "grad_norm": 0.3534310758113861, "learning_rate": 1.3029315482931664e-05, "loss": 0.4318, "step": 29345 }, { "epoch": 0.8057660626029653, "grad_norm": 0.3930320143699646, "learning_rate": 1.3028903880368362e-05, "loss": 0.4766, "step": 29346 }, { "epoch": 0.8057935200439319, "grad_norm": 0.39342641830444336, "learning_rate": 1.3028492272155186e-05, "loss": 0.4411, "step": 29347 }, { "epoch": 0.8058209774848984, "grad_norm": 0.4272310137748718, "learning_rate": 1.3028080658292907e-05, "loss": 0.5005, "step": 29348 }, { "epoch": 0.8058484349258649, "grad_norm": 0.4306212067604065, "learning_rate": 1.3027669038782294e-05, "loss": 0.4856, "step": 29349 }, { "epoch": 0.8058758923668314, "grad_norm": 0.4168049097061157, "learning_rate": 1.3027257413624105e-05, "loss": 0.5051, "step": 29350 }, { "epoch": 0.8059033498077979, "grad_norm": 0.48116442561149597, "learning_rate": 1.302684578281912e-05, "loss": 0.542, "step": 29351 }, { "epoch": 0.8059308072487644, "grad_norm": 0.36561664938926697, "learning_rate": 1.3026434146368099e-05, "loss": 0.5119, "step": 29352 }, { "epoch": 0.8059582646897309, "grad_norm": 0.43488359451293945, "learning_rate": 1.3026022504271814e-05, "loss": 0.431, "step": 29353 }, { "epoch": 0.8059857221306974, "grad_norm": 0.3575337827205658, "learning_rate": 1.3025610856531033e-05, "loss": 0.5065, "step": 29354 }, { "epoch": 0.806013179571664, "grad_norm": 0.3681592345237732, "learning_rate": 1.3025199203146516e-05, "loss": 0.4955, "step": 29355 }, { "epoch": 0.8060406370126304, "grad_norm": 0.3524419367313385, "learning_rate": 1.3024787544119041e-05, "loss": 0.4764, "step": 29356 }, { "epoch": 0.806068094453597, "grad_norm": 0.4165794551372528, "learning_rate": 1.3024375879449371e-05, "loss": 0.6015, "step": 29357 }, { "epoch": 0.8060955518945634, "grad_norm": 0.3568027913570404, "learning_rate": 1.3023964209138275e-05, "loss": 0.4648, "step": 29358 }, { "epoch": 0.8061230093355299, "grad_norm": 0.37588179111480713, "learning_rate": 1.3023552533186522e-05, "loss": 0.618, "step": 29359 }, { "epoch": 0.8061504667764964, "grad_norm": 0.400118887424469, "learning_rate": 1.3023140851594877e-05, "loss": 0.5167, "step": 29360 }, { "epoch": 0.8061779242174629, "grad_norm": 0.4007990062236786, "learning_rate": 1.302272916436411e-05, "loss": 0.4874, "step": 29361 }, { "epoch": 0.8062053816584295, "grad_norm": 0.3932684063911438, "learning_rate": 1.302231747149499e-05, "loss": 0.5122, "step": 29362 }, { "epoch": 0.8062328390993959, "grad_norm": 0.3700112998485565, "learning_rate": 1.302190577298828e-05, "loss": 0.4661, "step": 29363 }, { "epoch": 0.8062602965403625, "grad_norm": 0.3656933307647705, "learning_rate": 1.3021494068844753e-05, "loss": 0.4572, "step": 29364 }, { "epoch": 0.8062877539813289, "grad_norm": 0.36733341217041016, "learning_rate": 1.3021082359065173e-05, "loss": 0.5285, "step": 29365 }, { "epoch": 0.8063152114222955, "grad_norm": 0.34713247418403625, "learning_rate": 1.3020670643650313e-05, "loss": 0.4932, "step": 29366 }, { "epoch": 0.8063426688632619, "grad_norm": 0.39993149042129517, "learning_rate": 1.3020258922600939e-05, "loss": 0.4571, "step": 29367 }, { "epoch": 0.8063701263042284, "grad_norm": 0.3778378367424011, "learning_rate": 1.3019847195917815e-05, "loss": 0.5697, "step": 29368 }, { "epoch": 0.806397583745195, "grad_norm": 0.3687410056591034, "learning_rate": 1.3019435463601713e-05, "loss": 0.4914, "step": 29369 }, { "epoch": 0.8064250411861614, "grad_norm": 0.38451018929481506, "learning_rate": 1.3019023725653398e-05, "loss": 0.4369, "step": 29370 }, { "epoch": 0.806452498627128, "grad_norm": 0.3888314962387085, "learning_rate": 1.3018611982073646e-05, "loss": 0.5161, "step": 29371 }, { "epoch": 0.8064799560680944, "grad_norm": 0.40148288011550903, "learning_rate": 1.3018200232863216e-05, "loss": 0.3646, "step": 29372 }, { "epoch": 0.806507413509061, "grad_norm": 0.3756755590438843, "learning_rate": 1.301778847802288e-05, "loss": 0.4443, "step": 29373 }, { "epoch": 0.8065348709500274, "grad_norm": 0.3927207589149475, "learning_rate": 1.3017376717553405e-05, "loss": 0.5686, "step": 29374 }, { "epoch": 0.806562328390994, "grad_norm": 0.40501537919044495, "learning_rate": 1.3016964951455558e-05, "loss": 0.5245, "step": 29375 }, { "epoch": 0.8065897858319605, "grad_norm": 0.38609999418258667, "learning_rate": 1.301655317973011e-05, "loss": 0.4583, "step": 29376 }, { "epoch": 0.8066172432729269, "grad_norm": 0.3541078269481659, "learning_rate": 1.3016141402377827e-05, "loss": 0.4032, "step": 29377 }, { "epoch": 0.8066447007138935, "grad_norm": 0.43155819177627563, "learning_rate": 1.3015729619399476e-05, "loss": 0.4076, "step": 29378 }, { "epoch": 0.8066721581548599, "grad_norm": 0.35251927375793457, "learning_rate": 1.3015317830795827e-05, "loss": 0.396, "step": 29379 }, { "epoch": 0.8066996155958265, "grad_norm": 0.3832796812057495, "learning_rate": 1.3014906036567654e-05, "loss": 0.4579, "step": 29380 }, { "epoch": 0.8067270730367929, "grad_norm": 0.35901930928230286, "learning_rate": 1.3014494236715711e-05, "loss": 0.468, "step": 29381 }, { "epoch": 0.8067545304777595, "grad_norm": 0.36127135157585144, "learning_rate": 1.301408243124078e-05, "loss": 0.3762, "step": 29382 }, { "epoch": 0.806781987918726, "grad_norm": 0.3636269271373749, "learning_rate": 1.3013670620143619e-05, "loss": 0.456, "step": 29383 }, { "epoch": 0.8068094453596925, "grad_norm": 0.38814401626586914, "learning_rate": 1.3013258803425002e-05, "loss": 0.5559, "step": 29384 }, { "epoch": 0.806836902800659, "grad_norm": 0.4110961854457855, "learning_rate": 1.3012846981085695e-05, "loss": 0.469, "step": 29385 }, { "epoch": 0.8068643602416254, "grad_norm": 0.3730020225048065, "learning_rate": 1.3012435153126469e-05, "loss": 0.4556, "step": 29386 }, { "epoch": 0.806891817682592, "grad_norm": 0.4088495671749115, "learning_rate": 1.3012023319548088e-05, "loss": 0.534, "step": 29387 }, { "epoch": 0.8069192751235584, "grad_norm": 0.42812466621398926, "learning_rate": 1.3011611480351323e-05, "loss": 0.4689, "step": 29388 }, { "epoch": 0.806946732564525, "grad_norm": 0.39296087622642517, "learning_rate": 1.3011199635536942e-05, "loss": 0.4693, "step": 29389 }, { "epoch": 0.8069741900054915, "grad_norm": 0.3873558044433594, "learning_rate": 1.301078778510571e-05, "loss": 0.5128, "step": 29390 }, { "epoch": 0.807001647446458, "grad_norm": 0.3912961483001709, "learning_rate": 1.3010375929058401e-05, "loss": 0.4191, "step": 29391 }, { "epoch": 0.8070291048874245, "grad_norm": 0.47561711072921753, "learning_rate": 1.300996406739578e-05, "loss": 0.5744, "step": 29392 }, { "epoch": 0.807056562328391, "grad_norm": 0.3955985903739929, "learning_rate": 1.3009552200118612e-05, "loss": 0.431, "step": 29393 }, { "epoch": 0.8070840197693575, "grad_norm": 0.37683477997779846, "learning_rate": 1.3009140327227669e-05, "loss": 0.4321, "step": 29394 }, { "epoch": 0.807111477210324, "grad_norm": 0.3950742185115814, "learning_rate": 1.3008728448723721e-05, "loss": 0.5412, "step": 29395 }, { "epoch": 0.8071389346512905, "grad_norm": 0.5140008926391602, "learning_rate": 1.3008316564607535e-05, "loss": 0.4297, "step": 29396 }, { "epoch": 0.807166392092257, "grad_norm": 0.4415270984172821, "learning_rate": 1.3007904674879876e-05, "loss": 0.5275, "step": 29397 }, { "epoch": 0.8071938495332235, "grad_norm": 0.3497909605503082, "learning_rate": 1.3007492779541515e-05, "loss": 0.5807, "step": 29398 }, { "epoch": 0.80722130697419, "grad_norm": 0.3741939067840576, "learning_rate": 1.3007080878593224e-05, "loss": 0.5668, "step": 29399 }, { "epoch": 0.8072487644151565, "grad_norm": 0.3608841598033905, "learning_rate": 1.3006668972035763e-05, "loss": 0.4769, "step": 29400 }, { "epoch": 0.807276221856123, "grad_norm": 0.39583635330200195, "learning_rate": 1.3006257059869907e-05, "loss": 0.4831, "step": 29401 }, { "epoch": 0.8073036792970895, "grad_norm": 0.34256693720817566, "learning_rate": 1.3005845142096417e-05, "loss": 0.4392, "step": 29402 }, { "epoch": 0.807331136738056, "grad_norm": 0.4751836657524109, "learning_rate": 1.3005433218716075e-05, "loss": 0.5432, "step": 29403 }, { "epoch": 0.8073585941790226, "grad_norm": 0.42635682225227356, "learning_rate": 1.3005021289729636e-05, "loss": 0.4972, "step": 29404 }, { "epoch": 0.807386051619989, "grad_norm": 0.4164731502532959, "learning_rate": 1.300460935513787e-05, "loss": 0.4701, "step": 29405 }, { "epoch": 0.8074135090609555, "grad_norm": 0.41399696469306946, "learning_rate": 1.3004197414941554e-05, "loss": 0.4718, "step": 29406 }, { "epoch": 0.807440966501922, "grad_norm": 0.3999139666557312, "learning_rate": 1.300378546914145e-05, "loss": 0.4936, "step": 29407 }, { "epoch": 0.8074684239428885, "grad_norm": 0.4684426784515381, "learning_rate": 1.3003373517738328e-05, "loss": 0.5146, "step": 29408 }, { "epoch": 0.807495881383855, "grad_norm": 0.42809706926345825, "learning_rate": 1.3002961560732956e-05, "loss": 0.4895, "step": 29409 }, { "epoch": 0.8075233388248215, "grad_norm": 0.38195574283599854, "learning_rate": 1.3002549598126097e-05, "loss": 0.5228, "step": 29410 }, { "epoch": 0.8075507962657881, "grad_norm": 0.3730248510837555, "learning_rate": 1.3002137629918531e-05, "loss": 0.4755, "step": 29411 }, { "epoch": 0.8075782537067545, "grad_norm": 0.3368849754333496, "learning_rate": 1.3001725656111016e-05, "loss": 0.4233, "step": 29412 }, { "epoch": 0.8076057111477211, "grad_norm": 0.3756151497364044, "learning_rate": 1.3001313676704328e-05, "loss": 0.4762, "step": 29413 }, { "epoch": 0.8076331685886875, "grad_norm": 0.41606852412223816, "learning_rate": 1.300090169169923e-05, "loss": 0.5328, "step": 29414 }, { "epoch": 0.807660626029654, "grad_norm": 0.38197484612464905, "learning_rate": 1.3000489701096492e-05, "loss": 0.4489, "step": 29415 }, { "epoch": 0.8076880834706205, "grad_norm": 0.46300119161605835, "learning_rate": 1.3000077704896885e-05, "loss": 0.5031, "step": 29416 }, { "epoch": 0.807715540911587, "grad_norm": 0.36390382051467896, "learning_rate": 1.2999665703101175e-05, "loss": 0.4843, "step": 29417 }, { "epoch": 0.8077429983525536, "grad_norm": 0.4122953712940216, "learning_rate": 1.2999253695710131e-05, "loss": 0.4931, "step": 29418 }, { "epoch": 0.80777045579352, "grad_norm": 0.4027218818664551, "learning_rate": 1.2998841682724519e-05, "loss": 0.5061, "step": 29419 }, { "epoch": 0.8077979132344866, "grad_norm": 0.4687792956829071, "learning_rate": 1.2998429664145114e-05, "loss": 0.5133, "step": 29420 }, { "epoch": 0.807825370675453, "grad_norm": 0.4201371371746063, "learning_rate": 1.2998017639972678e-05, "loss": 0.4439, "step": 29421 }, { "epoch": 0.8078528281164196, "grad_norm": 0.3712780177593231, "learning_rate": 1.2997605610207984e-05, "loss": 0.4889, "step": 29422 }, { "epoch": 0.807880285557386, "grad_norm": 0.3486365079879761, "learning_rate": 1.2997193574851799e-05, "loss": 0.481, "step": 29423 }, { "epoch": 0.8079077429983526, "grad_norm": 0.3600008487701416, "learning_rate": 1.299678153390489e-05, "loss": 0.5144, "step": 29424 }, { "epoch": 0.8079352004393191, "grad_norm": 0.427494078874588, "learning_rate": 1.2996369487368026e-05, "loss": 0.5239, "step": 29425 }, { "epoch": 0.8079626578802855, "grad_norm": 0.4252254068851471, "learning_rate": 1.2995957435241979e-05, "loss": 0.4741, "step": 29426 }, { "epoch": 0.8079901153212521, "grad_norm": 0.34764155745506287, "learning_rate": 1.2995545377527514e-05, "loss": 0.4178, "step": 29427 }, { "epoch": 0.8080175727622185, "grad_norm": 0.39059314131736755, "learning_rate": 1.29951333142254e-05, "loss": 0.4194, "step": 29428 }, { "epoch": 0.8080450302031851, "grad_norm": 0.3681449890136719, "learning_rate": 1.299472124533641e-05, "loss": 0.4661, "step": 29429 }, { "epoch": 0.8080724876441515, "grad_norm": 0.40060120820999146, "learning_rate": 1.2994309170861306e-05, "loss": 0.4448, "step": 29430 }, { "epoch": 0.8080999450851181, "grad_norm": 0.40241697430610657, "learning_rate": 1.2993897090800859e-05, "loss": 0.4768, "step": 29431 }, { "epoch": 0.8081274025260846, "grad_norm": 0.3991473913192749, "learning_rate": 1.299348500515584e-05, "loss": 0.5595, "step": 29432 }, { "epoch": 0.808154859967051, "grad_norm": 0.44829243421554565, "learning_rate": 1.2993072913927016e-05, "loss": 0.5852, "step": 29433 }, { "epoch": 0.8081823174080176, "grad_norm": 0.39060178399086, "learning_rate": 1.2992660817115156e-05, "loss": 0.4811, "step": 29434 }, { "epoch": 0.808209774848984, "grad_norm": 0.3648832440376282, "learning_rate": 1.2992248714721028e-05, "loss": 0.4753, "step": 29435 }, { "epoch": 0.8082372322899506, "grad_norm": 0.43978139758110046, "learning_rate": 1.29918366067454e-05, "loss": 0.5131, "step": 29436 }, { "epoch": 0.808264689730917, "grad_norm": 0.40225088596343994, "learning_rate": 1.2991424493189041e-05, "loss": 0.4803, "step": 29437 }, { "epoch": 0.8082921471718836, "grad_norm": 0.3790709972381592, "learning_rate": 1.2991012374052725e-05, "loss": 0.4879, "step": 29438 }, { "epoch": 0.8083196046128501, "grad_norm": 0.3985547423362732, "learning_rate": 1.2990600249337213e-05, "loss": 0.485, "step": 29439 }, { "epoch": 0.8083470620538166, "grad_norm": 0.3995741307735443, "learning_rate": 1.2990188119043276e-05, "loss": 0.5021, "step": 29440 }, { "epoch": 0.8083745194947831, "grad_norm": 0.37435781955718994, "learning_rate": 1.2989775983171688e-05, "loss": 0.4741, "step": 29441 }, { "epoch": 0.8084019769357496, "grad_norm": 0.5141257047653198, "learning_rate": 1.2989363841723213e-05, "loss": 0.5813, "step": 29442 }, { "epoch": 0.8084294343767161, "grad_norm": 0.35613977909088135, "learning_rate": 1.2988951694698617e-05, "loss": 0.4765, "step": 29443 }, { "epoch": 0.8084568918176825, "grad_norm": 0.37130415439605713, "learning_rate": 1.2988539542098675e-05, "loss": 0.4947, "step": 29444 }, { "epoch": 0.8084843492586491, "grad_norm": 0.4218887388706207, "learning_rate": 1.298812738392415e-05, "loss": 0.4629, "step": 29445 }, { "epoch": 0.8085118066996156, "grad_norm": 0.4021981656551361, "learning_rate": 1.2987715220175817e-05, "loss": 0.6318, "step": 29446 }, { "epoch": 0.8085392641405821, "grad_norm": 0.44219136238098145, "learning_rate": 1.2987303050854439e-05, "loss": 0.4955, "step": 29447 }, { "epoch": 0.8085667215815486, "grad_norm": 0.39716288447380066, "learning_rate": 1.2986890875960786e-05, "loss": 0.511, "step": 29448 }, { "epoch": 0.8085941790225151, "grad_norm": 0.38068515062332153, "learning_rate": 1.2986478695495632e-05, "loss": 0.5056, "step": 29449 }, { "epoch": 0.8086216364634816, "grad_norm": 0.35609909892082214, "learning_rate": 1.2986066509459741e-05, "loss": 0.4288, "step": 29450 }, { "epoch": 0.8086490939044481, "grad_norm": 0.3484458029270172, "learning_rate": 1.2985654317853881e-05, "loss": 0.4781, "step": 29451 }, { "epoch": 0.8086765513454146, "grad_norm": 0.41526639461517334, "learning_rate": 1.2985242120678824e-05, "loss": 0.469, "step": 29452 }, { "epoch": 0.8087040087863812, "grad_norm": 0.3589704930782318, "learning_rate": 1.2984829917935338e-05, "loss": 0.4729, "step": 29453 }, { "epoch": 0.8087314662273476, "grad_norm": 0.42792582511901855, "learning_rate": 1.2984417709624193e-05, "loss": 0.5534, "step": 29454 }, { "epoch": 0.8087589236683141, "grad_norm": 0.48579084873199463, "learning_rate": 1.2984005495746155e-05, "loss": 0.6071, "step": 29455 }, { "epoch": 0.8087863811092806, "grad_norm": 0.4047023355960846, "learning_rate": 1.2983593276301993e-05, "loss": 0.4914, "step": 29456 }, { "epoch": 0.8088138385502471, "grad_norm": 0.4741160571575165, "learning_rate": 1.298318105129248e-05, "loss": 0.5403, "step": 29457 }, { "epoch": 0.8088412959912136, "grad_norm": 0.39354386925697327, "learning_rate": 1.298276882071838e-05, "loss": 0.4349, "step": 29458 }, { "epoch": 0.8088687534321801, "grad_norm": 0.3863707482814789, "learning_rate": 1.2982356584580464e-05, "loss": 0.4427, "step": 29459 }, { "epoch": 0.8088962108731467, "grad_norm": 0.3832639157772064, "learning_rate": 1.2981944342879502e-05, "loss": 0.417, "step": 29460 }, { "epoch": 0.8089236683141131, "grad_norm": 0.4353289008140564, "learning_rate": 1.2981532095616262e-05, "loss": 0.4539, "step": 29461 }, { "epoch": 0.8089511257550797, "grad_norm": 0.39595019817352295, "learning_rate": 1.2981119842791515e-05, "loss": 0.4936, "step": 29462 }, { "epoch": 0.8089785831960461, "grad_norm": 0.41224920749664307, "learning_rate": 1.2980707584406025e-05, "loss": 0.5325, "step": 29463 }, { "epoch": 0.8090060406370126, "grad_norm": 0.42082679271698, "learning_rate": 1.2980295320460565e-05, "loss": 0.4374, "step": 29464 }, { "epoch": 0.8090334980779791, "grad_norm": 0.3755422830581665, "learning_rate": 1.2979883050955904e-05, "loss": 0.506, "step": 29465 }, { "epoch": 0.8090609555189456, "grad_norm": 0.5387374758720398, "learning_rate": 1.2979470775892808e-05, "loss": 0.5295, "step": 29466 }, { "epoch": 0.8090884129599122, "grad_norm": 0.40527233481407166, "learning_rate": 1.297905849527205e-05, "loss": 0.4798, "step": 29467 }, { "epoch": 0.8091158704008786, "grad_norm": 0.626909613609314, "learning_rate": 1.2978646209094397e-05, "loss": 0.5444, "step": 29468 }, { "epoch": 0.8091433278418452, "grad_norm": 0.36374878883361816, "learning_rate": 1.2978233917360616e-05, "loss": 0.3678, "step": 29469 }, { "epoch": 0.8091707852828116, "grad_norm": 0.41649457812309265, "learning_rate": 1.297782162007148e-05, "loss": 0.5762, "step": 29470 }, { "epoch": 0.8091982427237782, "grad_norm": 0.43338820338249207, "learning_rate": 1.2977409317227756e-05, "loss": 0.5431, "step": 29471 }, { "epoch": 0.8092257001647446, "grad_norm": 0.41102635860443115, "learning_rate": 1.2976997008830215e-05, "loss": 0.5521, "step": 29472 }, { "epoch": 0.8092531576057111, "grad_norm": 0.3560909926891327, "learning_rate": 1.2976584694879624e-05, "loss": 0.4622, "step": 29473 }, { "epoch": 0.8092806150466777, "grad_norm": 0.34616658091545105, "learning_rate": 1.2976172375376748e-05, "loss": 0.4882, "step": 29474 }, { "epoch": 0.8093080724876441, "grad_norm": 0.40120819211006165, "learning_rate": 1.2975760050322366e-05, "loss": 0.4568, "step": 29475 }, { "epoch": 0.8093355299286107, "grad_norm": 0.3529524803161621, "learning_rate": 1.2975347719717237e-05, "loss": 0.4533, "step": 29476 }, { "epoch": 0.8093629873695771, "grad_norm": 0.39462703466415405, "learning_rate": 1.2974935383562139e-05, "loss": 0.4806, "step": 29477 }, { "epoch": 0.8093904448105437, "grad_norm": 0.4188230335712433, "learning_rate": 1.2974523041857836e-05, "loss": 0.4929, "step": 29478 }, { "epoch": 0.8094179022515101, "grad_norm": 0.335833877325058, "learning_rate": 1.2974110694605098e-05, "loss": 0.4372, "step": 29479 }, { "epoch": 0.8094453596924767, "grad_norm": 0.3695518672466278, "learning_rate": 1.2973698341804695e-05, "loss": 0.4518, "step": 29480 }, { "epoch": 0.8094728171334432, "grad_norm": 0.4042917490005493, "learning_rate": 1.2973285983457394e-05, "loss": 0.4946, "step": 29481 }, { "epoch": 0.8095002745744097, "grad_norm": 0.3929165303707123, "learning_rate": 1.2972873619563966e-05, "loss": 0.5122, "step": 29482 }, { "epoch": 0.8095277320153762, "grad_norm": 0.3701874017715454, "learning_rate": 1.2972461250125182e-05, "loss": 0.5325, "step": 29483 }, { "epoch": 0.8095551894563426, "grad_norm": 0.37275320291519165, "learning_rate": 1.2972048875141805e-05, "loss": 0.524, "step": 29484 }, { "epoch": 0.8095826468973092, "grad_norm": 0.3935512900352478, "learning_rate": 1.297163649461461e-05, "loss": 0.5924, "step": 29485 }, { "epoch": 0.8096101043382756, "grad_norm": 0.40529245138168335, "learning_rate": 1.2971224108544366e-05, "loss": 0.4702, "step": 29486 }, { "epoch": 0.8096375617792422, "grad_norm": 0.4017189145088196, "learning_rate": 1.2970811716931839e-05, "loss": 0.4271, "step": 29487 }, { "epoch": 0.8096650192202087, "grad_norm": 0.5570821762084961, "learning_rate": 1.2970399319777802e-05, "loss": 0.5466, "step": 29488 }, { "epoch": 0.8096924766611752, "grad_norm": 0.42345738410949707, "learning_rate": 1.2969986917083019e-05, "loss": 0.5067, "step": 29489 }, { "epoch": 0.8097199341021417, "grad_norm": 0.3816317021846771, "learning_rate": 1.2969574508848266e-05, "loss": 0.4863, "step": 29490 }, { "epoch": 0.8097473915431082, "grad_norm": 0.4255378544330597, "learning_rate": 1.2969162095074307e-05, "loss": 0.5691, "step": 29491 }, { "epoch": 0.8097748489840747, "grad_norm": 1.0249608755111694, "learning_rate": 1.2968749675761914e-05, "loss": 0.5383, "step": 29492 }, { "epoch": 0.8098023064250411, "grad_norm": 0.41814616322517395, "learning_rate": 1.2968337250911856e-05, "loss": 0.5307, "step": 29493 }, { "epoch": 0.8098297638660077, "grad_norm": 0.3621957302093506, "learning_rate": 1.2967924820524898e-05, "loss": 0.5893, "step": 29494 }, { "epoch": 0.8098572213069742, "grad_norm": 0.35061565041542053, "learning_rate": 1.2967512384601815e-05, "loss": 0.4123, "step": 29495 }, { "epoch": 0.8098846787479407, "grad_norm": 0.3685160279273987, "learning_rate": 1.2967099943143376e-05, "loss": 0.5488, "step": 29496 }, { "epoch": 0.8099121361889072, "grad_norm": 0.4127882719039917, "learning_rate": 1.2966687496150345e-05, "loss": 0.5565, "step": 29497 }, { "epoch": 0.8099395936298737, "grad_norm": 0.41425564885139465, "learning_rate": 1.2966275043623498e-05, "loss": 0.5224, "step": 29498 }, { "epoch": 0.8099670510708402, "grad_norm": 0.4093611538410187, "learning_rate": 1.29658625855636e-05, "loss": 0.5095, "step": 29499 }, { "epoch": 0.8099945085118067, "grad_norm": 0.36601340770721436, "learning_rate": 1.2965450121971423e-05, "loss": 0.5153, "step": 29500 }, { "epoch": 0.8100219659527732, "grad_norm": 0.4663650095462799, "learning_rate": 1.2965037652847734e-05, "loss": 0.5076, "step": 29501 }, { "epoch": 0.8100494233937398, "grad_norm": 0.3334299325942993, "learning_rate": 1.2964625178193302e-05, "loss": 0.4744, "step": 29502 }, { "epoch": 0.8100768808347062, "grad_norm": 0.40422627329826355, "learning_rate": 1.2964212698008903e-05, "loss": 0.4501, "step": 29503 }, { "epoch": 0.8101043382756727, "grad_norm": 0.41122275590896606, "learning_rate": 1.2963800212295296e-05, "loss": 0.5328, "step": 29504 }, { "epoch": 0.8101317957166392, "grad_norm": 0.3514132499694824, "learning_rate": 1.2963387721053256e-05, "loss": 0.5073, "step": 29505 }, { "epoch": 0.8101592531576057, "grad_norm": 0.43318405747413635, "learning_rate": 1.2962975224283555e-05, "loss": 0.536, "step": 29506 }, { "epoch": 0.8101867105985722, "grad_norm": 0.3973047435283661, "learning_rate": 1.2962562721986957e-05, "loss": 0.4557, "step": 29507 }, { "epoch": 0.8102141680395387, "grad_norm": 0.377435564994812, "learning_rate": 1.2962150214164235e-05, "loss": 0.4197, "step": 29508 }, { "epoch": 0.8102416254805053, "grad_norm": 0.3853634297847748, "learning_rate": 1.296173770081616e-05, "loss": 0.5508, "step": 29509 }, { "epoch": 0.8102690829214717, "grad_norm": 0.3784346580505371, "learning_rate": 1.2961325181943494e-05, "loss": 0.4869, "step": 29510 }, { "epoch": 0.8102965403624383, "grad_norm": 0.41430482268333435, "learning_rate": 1.2960912657547014e-05, "loss": 0.4436, "step": 29511 }, { "epoch": 0.8103239978034047, "grad_norm": 0.43021053075790405, "learning_rate": 1.2960500127627485e-05, "loss": 0.54, "step": 29512 }, { "epoch": 0.8103514552443712, "grad_norm": 1.4004594087600708, "learning_rate": 1.2960087592185681e-05, "loss": 0.5657, "step": 29513 }, { "epoch": 0.8103789126853377, "grad_norm": 0.3727751076221466, "learning_rate": 1.2959675051222368e-05, "loss": 0.4319, "step": 29514 }, { "epoch": 0.8104063701263042, "grad_norm": 0.3506917655467987, "learning_rate": 1.2959262504738316e-05, "loss": 0.448, "step": 29515 }, { "epoch": 0.8104338275672708, "grad_norm": 0.3836301267147064, "learning_rate": 1.2958849952734297e-05, "loss": 0.4563, "step": 29516 }, { "epoch": 0.8104612850082372, "grad_norm": 0.394192099571228, "learning_rate": 1.2958437395211076e-05, "loss": 0.4563, "step": 29517 }, { "epoch": 0.8104887424492038, "grad_norm": 0.372183233499527, "learning_rate": 1.2958024832169424e-05, "loss": 0.4386, "step": 29518 }, { "epoch": 0.8105161998901702, "grad_norm": 0.39903146028518677, "learning_rate": 1.2957612263610115e-05, "loss": 0.5004, "step": 29519 }, { "epoch": 0.8105436573311368, "grad_norm": 0.3910328149795532, "learning_rate": 1.2957199689533912e-05, "loss": 0.5197, "step": 29520 }, { "epoch": 0.8105711147721032, "grad_norm": 0.4052291810512543, "learning_rate": 1.295678710994159e-05, "loss": 0.4389, "step": 29521 }, { "epoch": 0.8105985722130697, "grad_norm": 0.3781130909919739, "learning_rate": 1.2956374524833917e-05, "loss": 0.5169, "step": 29522 }, { "epoch": 0.8106260296540363, "grad_norm": 0.4098453223705292, "learning_rate": 1.2955961934211658e-05, "loss": 0.517, "step": 29523 }, { "epoch": 0.8106534870950027, "grad_norm": 0.38490021228790283, "learning_rate": 1.295554933807559e-05, "loss": 0.4899, "step": 29524 }, { "epoch": 0.8106809445359693, "grad_norm": 0.4297489523887634, "learning_rate": 1.2955136736426476e-05, "loss": 0.4903, "step": 29525 }, { "epoch": 0.8107084019769357, "grad_norm": 0.37305212020874023, "learning_rate": 1.2954724129265092e-05, "loss": 0.4387, "step": 29526 }, { "epoch": 0.8107358594179023, "grad_norm": 0.4005805253982544, "learning_rate": 1.2954311516592207e-05, "loss": 0.4855, "step": 29527 }, { "epoch": 0.8107633168588687, "grad_norm": 0.36399951577186584, "learning_rate": 1.2953898898408583e-05, "loss": 0.517, "step": 29528 }, { "epoch": 0.8107907742998353, "grad_norm": 0.4083423912525177, "learning_rate": 1.2953486274714996e-05, "loss": 0.4704, "step": 29529 }, { "epoch": 0.8108182317408018, "grad_norm": 0.4460705518722534, "learning_rate": 1.2953073645512216e-05, "loss": 0.5362, "step": 29530 }, { "epoch": 0.8108456891817682, "grad_norm": 0.38192039728164673, "learning_rate": 1.2952661010801009e-05, "loss": 0.4575, "step": 29531 }, { "epoch": 0.8108731466227348, "grad_norm": 0.4847450256347656, "learning_rate": 1.295224837058215e-05, "loss": 0.52, "step": 29532 }, { "epoch": 0.8109006040637012, "grad_norm": 0.3990357220172882, "learning_rate": 1.2951835724856401e-05, "loss": 0.4893, "step": 29533 }, { "epoch": 0.8109280615046678, "grad_norm": 0.4013604521751404, "learning_rate": 1.295142307362454e-05, "loss": 0.491, "step": 29534 }, { "epoch": 0.8109555189456342, "grad_norm": 0.3828431963920593, "learning_rate": 1.2951010416887334e-05, "loss": 0.5288, "step": 29535 }, { "epoch": 0.8109829763866008, "grad_norm": 0.4829552471637726, "learning_rate": 1.2950597754645547e-05, "loss": 0.5561, "step": 29536 }, { "epoch": 0.8110104338275673, "grad_norm": 0.4025965929031372, "learning_rate": 1.2950185086899955e-05, "loss": 0.4747, "step": 29537 }, { "epoch": 0.8110378912685338, "grad_norm": 0.3385898768901825, "learning_rate": 1.2949772413651328e-05, "loss": 0.451, "step": 29538 }, { "epoch": 0.8110653487095003, "grad_norm": 0.9679966568946838, "learning_rate": 1.2949359734900433e-05, "loss": 0.4322, "step": 29539 }, { "epoch": 0.8110928061504667, "grad_norm": 0.38145795464515686, "learning_rate": 1.2948947050648041e-05, "loss": 0.5252, "step": 29540 }, { "epoch": 0.8111202635914333, "grad_norm": 0.4032808542251587, "learning_rate": 1.294853436089492e-05, "loss": 0.5057, "step": 29541 }, { "epoch": 0.8111477210323997, "grad_norm": 0.3743438124656677, "learning_rate": 1.2948121665641844e-05, "loss": 0.4878, "step": 29542 }, { "epoch": 0.8111751784733663, "grad_norm": 0.35850146412849426, "learning_rate": 1.2947708964889576e-05, "loss": 0.4997, "step": 29543 }, { "epoch": 0.8112026359143328, "grad_norm": 0.39038166403770447, "learning_rate": 1.2947296258638894e-05, "loss": 0.4851, "step": 29544 }, { "epoch": 0.8112300933552993, "grad_norm": 0.37322530150413513, "learning_rate": 1.2946883546890564e-05, "loss": 0.4637, "step": 29545 }, { "epoch": 0.8112575507962658, "grad_norm": 0.41893863677978516, "learning_rate": 1.2946470829645351e-05, "loss": 0.5578, "step": 29546 }, { "epoch": 0.8112850082372323, "grad_norm": 0.41660282015800476, "learning_rate": 1.2946058106904036e-05, "loss": 0.4165, "step": 29547 }, { "epoch": 0.8113124656781988, "grad_norm": 0.3646438717842102, "learning_rate": 1.2945645378667377e-05, "loss": 0.5101, "step": 29548 }, { "epoch": 0.8113399231191653, "grad_norm": 0.3531654477119446, "learning_rate": 1.2945232644936155e-05, "loss": 0.3994, "step": 29549 }, { "epoch": 0.8113673805601318, "grad_norm": 0.3620224893093109, "learning_rate": 1.294481990571113e-05, "loss": 0.4928, "step": 29550 }, { "epoch": 0.8113948380010984, "grad_norm": 0.31709370017051697, "learning_rate": 1.2944407160993076e-05, "loss": 0.4365, "step": 29551 }, { "epoch": 0.8114222954420648, "grad_norm": 0.40850868821144104, "learning_rate": 1.2943994410782763e-05, "loss": 0.5077, "step": 29552 }, { "epoch": 0.8114497528830313, "grad_norm": 0.4181002080440521, "learning_rate": 1.2943581655080962e-05, "loss": 0.5369, "step": 29553 }, { "epoch": 0.8114772103239978, "grad_norm": 0.3830395042896271, "learning_rate": 1.294316889388844e-05, "loss": 0.4238, "step": 29554 }, { "epoch": 0.8115046677649643, "grad_norm": 0.40456250309944153, "learning_rate": 1.294275612720597e-05, "loss": 0.4366, "step": 29555 }, { "epoch": 0.8115321252059308, "grad_norm": 0.4017515182495117, "learning_rate": 1.294234335503432e-05, "loss": 0.534, "step": 29556 }, { "epoch": 0.8115595826468973, "grad_norm": 0.4970342516899109, "learning_rate": 1.294193057737426e-05, "loss": 0.4478, "step": 29557 }, { "epoch": 0.8115870400878639, "grad_norm": 0.39134839177131653, "learning_rate": 1.2941517794226563e-05, "loss": 0.5249, "step": 29558 }, { "epoch": 0.8116144975288303, "grad_norm": 0.4332135319709778, "learning_rate": 1.2941105005591994e-05, "loss": 0.5172, "step": 29559 }, { "epoch": 0.8116419549697969, "grad_norm": 0.4169181287288666, "learning_rate": 1.2940692211471326e-05, "loss": 0.4596, "step": 29560 }, { "epoch": 0.8116694124107633, "grad_norm": 0.395397424697876, "learning_rate": 1.2940279411865328e-05, "loss": 0.4681, "step": 29561 }, { "epoch": 0.8116968698517298, "grad_norm": 0.40580812096595764, "learning_rate": 1.293986660677477e-05, "loss": 0.412, "step": 29562 }, { "epoch": 0.8117243272926963, "grad_norm": 0.44008082151412964, "learning_rate": 1.2939453796200425e-05, "loss": 0.5563, "step": 29563 }, { "epoch": 0.8117517847336628, "grad_norm": 0.3815366327762604, "learning_rate": 1.293904098014306e-05, "loss": 0.4923, "step": 29564 }, { "epoch": 0.8117792421746294, "grad_norm": 0.3866012394428253, "learning_rate": 1.2938628158603445e-05, "loss": 0.5149, "step": 29565 }, { "epoch": 0.8118066996155958, "grad_norm": 0.37853699922561646, "learning_rate": 1.2938215331582349e-05, "loss": 0.4605, "step": 29566 }, { "epoch": 0.8118341570565624, "grad_norm": 0.521323561668396, "learning_rate": 1.2937802499080544e-05, "loss": 0.5136, "step": 29567 }, { "epoch": 0.8118616144975288, "grad_norm": 0.3569294214248657, "learning_rate": 1.2937389661098803e-05, "loss": 0.4601, "step": 29568 }, { "epoch": 0.8118890719384954, "grad_norm": 0.4325162172317505, "learning_rate": 1.293697681763789e-05, "loss": 0.5423, "step": 29569 }, { "epoch": 0.8119165293794618, "grad_norm": 0.39335185289382935, "learning_rate": 1.2936563968698579e-05, "loss": 0.4392, "step": 29570 }, { "epoch": 0.8119439868204283, "grad_norm": 0.4136277735233307, "learning_rate": 1.2936151114281637e-05, "loss": 0.524, "step": 29571 }, { "epoch": 0.8119714442613949, "grad_norm": 0.39998939633369446, "learning_rate": 1.2935738254387837e-05, "loss": 0.5334, "step": 29572 }, { "epoch": 0.8119989017023613, "grad_norm": 0.3581618368625641, "learning_rate": 1.293532538901795e-05, "loss": 0.5298, "step": 29573 }, { "epoch": 0.8120263591433279, "grad_norm": 0.41104552149772644, "learning_rate": 1.2934912518172743e-05, "loss": 0.5137, "step": 29574 }, { "epoch": 0.8120538165842943, "grad_norm": 0.3504338264465332, "learning_rate": 1.2934499641852987e-05, "loss": 0.4433, "step": 29575 }, { "epoch": 0.8120812740252609, "grad_norm": 0.4092327356338501, "learning_rate": 1.293408676005945e-05, "loss": 0.6025, "step": 29576 }, { "epoch": 0.8121087314662273, "grad_norm": 0.41085711121559143, "learning_rate": 1.2933673872792909e-05, "loss": 0.4566, "step": 29577 }, { "epoch": 0.8121361889071939, "grad_norm": 0.39957955479621887, "learning_rate": 1.293326098005413e-05, "loss": 0.4571, "step": 29578 }, { "epoch": 0.8121636463481604, "grad_norm": 0.4089006781578064, "learning_rate": 1.2932848081843878e-05, "loss": 0.4728, "step": 29579 }, { "epoch": 0.8121911037891268, "grad_norm": 0.3994095027446747, "learning_rate": 1.2932435178162934e-05, "loss": 0.5446, "step": 29580 }, { "epoch": 0.8122185612300934, "grad_norm": 0.4758124351501465, "learning_rate": 1.293202226901206e-05, "loss": 0.5161, "step": 29581 }, { "epoch": 0.8122460186710598, "grad_norm": 0.37188291549682617, "learning_rate": 1.293160935439203e-05, "loss": 0.4089, "step": 29582 }, { "epoch": 0.8122734761120264, "grad_norm": 0.4123263657093048, "learning_rate": 1.2931196434303611e-05, "loss": 0.4846, "step": 29583 }, { "epoch": 0.8123009335529928, "grad_norm": 0.39721983671188354, "learning_rate": 1.2930783508747575e-05, "loss": 0.5171, "step": 29584 }, { "epoch": 0.8123283909939594, "grad_norm": 0.35082757472991943, "learning_rate": 1.2930370577724695e-05, "loss": 0.5082, "step": 29585 }, { "epoch": 0.8123558484349259, "grad_norm": 0.4238072335720062, "learning_rate": 1.2929957641235736e-05, "loss": 0.441, "step": 29586 }, { "epoch": 0.8123833058758924, "grad_norm": 0.4310474097728729, "learning_rate": 1.292954469928147e-05, "loss": 0.4773, "step": 29587 }, { "epoch": 0.8124107633168589, "grad_norm": 0.34800177812576294, "learning_rate": 1.292913175186267e-05, "loss": 0.4825, "step": 29588 }, { "epoch": 0.8124382207578253, "grad_norm": 0.37402093410491943, "learning_rate": 1.2928718798980104e-05, "loss": 0.4686, "step": 29589 }, { "epoch": 0.8124656781987919, "grad_norm": 0.3755300045013428, "learning_rate": 1.2928305840634545e-05, "loss": 0.4535, "step": 29590 }, { "epoch": 0.8124931356397583, "grad_norm": 0.340661883354187, "learning_rate": 1.2927892876826757e-05, "loss": 0.4648, "step": 29591 }, { "epoch": 0.8125205930807249, "grad_norm": 0.41110870242118835, "learning_rate": 1.292747990755752e-05, "loss": 0.5171, "step": 29592 }, { "epoch": 0.8125480505216913, "grad_norm": 0.49654239416122437, "learning_rate": 1.2927066932827592e-05, "loss": 0.5008, "step": 29593 }, { "epoch": 0.8125755079626579, "grad_norm": 0.3608541190624237, "learning_rate": 1.2926653952637753e-05, "loss": 0.4807, "step": 29594 }, { "epoch": 0.8126029654036244, "grad_norm": 0.401468962430954, "learning_rate": 1.292624096698877e-05, "loss": 0.5339, "step": 29595 }, { "epoch": 0.8126304228445909, "grad_norm": 0.4100181758403778, "learning_rate": 1.2925827975881414e-05, "loss": 0.5086, "step": 29596 }, { "epoch": 0.8126578802855574, "grad_norm": 0.4900486171245575, "learning_rate": 1.2925414979316458e-05, "loss": 0.5329, "step": 29597 }, { "epoch": 0.8126853377265238, "grad_norm": 0.4653148651123047, "learning_rate": 1.2925001977294661e-05, "loss": 0.5901, "step": 29598 }, { "epoch": 0.8127127951674904, "grad_norm": 0.3838897943496704, "learning_rate": 1.2924588969816809e-05, "loss": 0.4765, "step": 29599 }, { "epoch": 0.8127402526084568, "grad_norm": 0.6647209525108337, "learning_rate": 1.2924175956883661e-05, "loss": 0.5773, "step": 29600 }, { "epoch": 0.8127677100494234, "grad_norm": 0.4066369831562042, "learning_rate": 1.2923762938495996e-05, "loss": 0.4626, "step": 29601 }, { "epoch": 0.8127951674903899, "grad_norm": 0.34017422795295715, "learning_rate": 1.2923349914654579e-05, "loss": 0.4288, "step": 29602 }, { "epoch": 0.8128226249313564, "grad_norm": 0.3408830165863037, "learning_rate": 1.292293688536018e-05, "loss": 0.3954, "step": 29603 }, { "epoch": 0.8128500823723229, "grad_norm": 0.4165657162666321, "learning_rate": 1.292252385061357e-05, "loss": 0.4478, "step": 29604 }, { "epoch": 0.8128775398132894, "grad_norm": 0.46480241417884827, "learning_rate": 1.292211081041552e-05, "loss": 0.4708, "step": 29605 }, { "epoch": 0.8129049972542559, "grad_norm": 0.4057855010032654, "learning_rate": 1.2921697764766802e-05, "loss": 0.4627, "step": 29606 }, { "epoch": 0.8129324546952224, "grad_norm": 0.4088543653488159, "learning_rate": 1.2921284713668188e-05, "loss": 0.4845, "step": 29607 }, { "epoch": 0.8129599121361889, "grad_norm": 0.4309626519680023, "learning_rate": 1.292087165712044e-05, "loss": 0.4736, "step": 29608 }, { "epoch": 0.8129873695771554, "grad_norm": 0.3678014874458313, "learning_rate": 1.2920458595124337e-05, "loss": 0.5028, "step": 29609 }, { "epoch": 0.8130148270181219, "grad_norm": 0.36970940232276917, "learning_rate": 1.2920045527680647e-05, "loss": 0.4775, "step": 29610 }, { "epoch": 0.8130422844590884, "grad_norm": 0.4626849591732025, "learning_rate": 1.2919632454790138e-05, "loss": 0.6166, "step": 29611 }, { "epoch": 0.8130697419000549, "grad_norm": 0.43095266819000244, "learning_rate": 1.2919219376453587e-05, "loss": 0.5316, "step": 29612 }, { "epoch": 0.8130971993410214, "grad_norm": 0.4321107566356659, "learning_rate": 1.2918806292671756e-05, "loss": 0.5078, "step": 29613 }, { "epoch": 0.8131246567819879, "grad_norm": 0.3618690073490143, "learning_rate": 1.291839320344542e-05, "loss": 0.4716, "step": 29614 }, { "epoch": 0.8131521142229544, "grad_norm": 0.4261356294155121, "learning_rate": 1.2917980108775352e-05, "loss": 0.5211, "step": 29615 }, { "epoch": 0.813179571663921, "grad_norm": 0.38632476329803467, "learning_rate": 1.2917567008662317e-05, "loss": 0.5377, "step": 29616 }, { "epoch": 0.8132070291048874, "grad_norm": 0.40701478719711304, "learning_rate": 1.2917153903107091e-05, "loss": 0.4577, "step": 29617 }, { "epoch": 0.813234486545854, "grad_norm": 0.4208388328552246, "learning_rate": 1.2916740792110439e-05, "loss": 0.5512, "step": 29618 }, { "epoch": 0.8132619439868204, "grad_norm": 0.39313143491744995, "learning_rate": 1.2916327675673136e-05, "loss": 0.4173, "step": 29619 }, { "epoch": 0.8132894014277869, "grad_norm": 0.3423662781715393, "learning_rate": 1.2915914553795953e-05, "loss": 0.4318, "step": 29620 }, { "epoch": 0.8133168588687534, "grad_norm": 0.3884488046169281, "learning_rate": 1.2915501426479655e-05, "loss": 0.4207, "step": 29621 }, { "epoch": 0.8133443163097199, "grad_norm": 0.5153118968009949, "learning_rate": 1.2915088293725017e-05, "loss": 0.4444, "step": 29622 }, { "epoch": 0.8133717737506865, "grad_norm": 0.3892473578453064, "learning_rate": 1.291467515553281e-05, "loss": 0.3661, "step": 29623 }, { "epoch": 0.8133992311916529, "grad_norm": 0.4076761305332184, "learning_rate": 1.2914262011903805e-05, "loss": 0.4431, "step": 29624 }, { "epoch": 0.8134266886326195, "grad_norm": 0.35966745018959045, "learning_rate": 1.291384886283877e-05, "loss": 0.5608, "step": 29625 }, { "epoch": 0.8134541460735859, "grad_norm": 0.34544119238853455, "learning_rate": 1.2913435708338473e-05, "loss": 0.44, "step": 29626 }, { "epoch": 0.8134816035145525, "grad_norm": 0.37218162417411804, "learning_rate": 1.2913022548403692e-05, "loss": 0.5809, "step": 29627 }, { "epoch": 0.8135090609555189, "grad_norm": 0.34327733516693115, "learning_rate": 1.2912609383035197e-05, "loss": 0.4563, "step": 29628 }, { "epoch": 0.8135365183964854, "grad_norm": 0.36169132590293884, "learning_rate": 1.2912196212233751e-05, "loss": 0.4286, "step": 29629 }, { "epoch": 0.813563975837452, "grad_norm": 0.35497453808784485, "learning_rate": 1.2911783036000134e-05, "loss": 0.4525, "step": 29630 }, { "epoch": 0.8135914332784184, "grad_norm": 0.43474435806274414, "learning_rate": 1.291136985433511e-05, "loss": 0.473, "step": 29631 }, { "epoch": 0.813618890719385, "grad_norm": 0.3872703015804291, "learning_rate": 1.2910956667239452e-05, "loss": 0.4924, "step": 29632 }, { "epoch": 0.8136463481603514, "grad_norm": 0.4053436517715454, "learning_rate": 1.2910543474713932e-05, "loss": 0.5807, "step": 29633 }, { "epoch": 0.813673805601318, "grad_norm": 0.36375686526298523, "learning_rate": 1.2910130276759315e-05, "loss": 0.4646, "step": 29634 }, { "epoch": 0.8137012630422844, "grad_norm": 0.3685169816017151, "learning_rate": 1.2909717073376381e-05, "loss": 0.4141, "step": 29635 }, { "epoch": 0.813728720483251, "grad_norm": 0.3700298070907593, "learning_rate": 1.2909303864565894e-05, "loss": 0.4508, "step": 29636 }, { "epoch": 0.8137561779242175, "grad_norm": 0.4163369834423065, "learning_rate": 1.290889065032863e-05, "loss": 0.4414, "step": 29637 }, { "epoch": 0.8137836353651839, "grad_norm": 0.4599630534648895, "learning_rate": 1.2908477430665354e-05, "loss": 0.5072, "step": 29638 }, { "epoch": 0.8138110928061505, "grad_norm": 0.4431251585483551, "learning_rate": 1.2908064205576839e-05, "loss": 0.5115, "step": 29639 }, { "epoch": 0.8138385502471169, "grad_norm": 0.3723028004169464, "learning_rate": 1.2907650975063856e-05, "loss": 0.5277, "step": 29640 }, { "epoch": 0.8138660076880835, "grad_norm": 0.4782600700855255, "learning_rate": 1.2907237739127174e-05, "loss": 0.5177, "step": 29641 }, { "epoch": 0.8138934651290499, "grad_norm": 0.5230302810668945, "learning_rate": 1.2906824497767569e-05, "loss": 0.4926, "step": 29642 }, { "epoch": 0.8139209225700165, "grad_norm": 3.103111743927002, "learning_rate": 1.2906411250985809e-05, "loss": 0.525, "step": 29643 }, { "epoch": 0.813948380010983, "grad_norm": 0.3813362717628479, "learning_rate": 1.2905997998782662e-05, "loss": 0.4911, "step": 29644 }, { "epoch": 0.8139758374519495, "grad_norm": 0.411438524723053, "learning_rate": 1.2905584741158901e-05, "loss": 0.4834, "step": 29645 }, { "epoch": 0.814003294892916, "grad_norm": 0.40232861042022705, "learning_rate": 1.2905171478115301e-05, "loss": 0.6135, "step": 29646 }, { "epoch": 0.8140307523338824, "grad_norm": 0.38741716742515564, "learning_rate": 1.2904758209652624e-05, "loss": 0.4616, "step": 29647 }, { "epoch": 0.814058209774849, "grad_norm": 0.34849390387535095, "learning_rate": 1.2904344935771649e-05, "loss": 0.5621, "step": 29648 }, { "epoch": 0.8140856672158154, "grad_norm": 0.42317554354667664, "learning_rate": 1.2903931656473141e-05, "loss": 0.5115, "step": 29649 }, { "epoch": 0.814113124656782, "grad_norm": 0.4695170819759369, "learning_rate": 1.2903518371757877e-05, "loss": 0.502, "step": 29650 }, { "epoch": 0.8141405820977485, "grad_norm": 0.40875184535980225, "learning_rate": 1.2903105081626624e-05, "loss": 0.542, "step": 29651 }, { "epoch": 0.814168039538715, "grad_norm": 0.3518095314502716, "learning_rate": 1.2902691786080151e-05, "loss": 0.4491, "step": 29652 }, { "epoch": 0.8141954969796815, "grad_norm": 0.35969388484954834, "learning_rate": 1.2902278485119233e-05, "loss": 0.4856, "step": 29653 }, { "epoch": 0.814222954420648, "grad_norm": 0.33749881386756897, "learning_rate": 1.2901865178744637e-05, "loss": 0.4106, "step": 29654 }, { "epoch": 0.8142504118616145, "grad_norm": 0.434109091758728, "learning_rate": 1.290145186695714e-05, "loss": 0.5132, "step": 29655 }, { "epoch": 0.814277869302581, "grad_norm": 0.3891954720020294, "learning_rate": 1.290103854975751e-05, "loss": 0.4817, "step": 29656 }, { "epoch": 0.8143053267435475, "grad_norm": 0.3787616789340973, "learning_rate": 1.2900625227146513e-05, "loss": 0.4944, "step": 29657 }, { "epoch": 0.814332784184514, "grad_norm": 0.49212613701820374, "learning_rate": 1.2900211899124925e-05, "loss": 0.4822, "step": 29658 }, { "epoch": 0.8143602416254805, "grad_norm": 0.3635871410369873, "learning_rate": 1.289979856569352e-05, "loss": 0.5128, "step": 29659 }, { "epoch": 0.814387699066447, "grad_norm": 0.4330967664718628, "learning_rate": 1.2899385226853061e-05, "loss": 0.4993, "step": 29660 }, { "epoch": 0.8144151565074135, "grad_norm": 0.4087710976600647, "learning_rate": 1.2898971882604325e-05, "loss": 0.5533, "step": 29661 }, { "epoch": 0.81444261394838, "grad_norm": 0.3568320870399475, "learning_rate": 1.289855853294808e-05, "loss": 0.4173, "step": 29662 }, { "epoch": 0.8144700713893465, "grad_norm": 0.3942646086215973, "learning_rate": 1.28981451778851e-05, "loss": 0.5365, "step": 29663 }, { "epoch": 0.814497528830313, "grad_norm": 0.40745624899864197, "learning_rate": 1.2897731817416152e-05, "loss": 0.5747, "step": 29664 }, { "epoch": 0.8145249862712796, "grad_norm": 0.3683948814868927, "learning_rate": 1.289731845154201e-05, "loss": 0.5188, "step": 29665 }, { "epoch": 0.814552443712246, "grad_norm": 0.35330629348754883, "learning_rate": 1.2896905080263447e-05, "loss": 0.431, "step": 29666 }, { "epoch": 0.8145799011532125, "grad_norm": 0.44796663522720337, "learning_rate": 1.2896491703581226e-05, "loss": 0.4751, "step": 29667 }, { "epoch": 0.814607358594179, "grad_norm": 0.3655795454978943, "learning_rate": 1.2896078321496129e-05, "loss": 0.4808, "step": 29668 }, { "epoch": 0.8146348160351455, "grad_norm": 0.415217787027359, "learning_rate": 1.289566493400892e-05, "loss": 0.4654, "step": 29669 }, { "epoch": 0.814662273476112, "grad_norm": 0.42492350935935974, "learning_rate": 1.289525154112037e-05, "loss": 0.463, "step": 29670 }, { "epoch": 0.8146897309170785, "grad_norm": 0.38228297233581543, "learning_rate": 1.2894838142831254e-05, "loss": 0.4751, "step": 29671 }, { "epoch": 0.8147171883580451, "grad_norm": 0.38600119948387146, "learning_rate": 1.289442473914234e-05, "loss": 0.4729, "step": 29672 }, { "epoch": 0.8147446457990115, "grad_norm": 0.49936333298683167, "learning_rate": 1.28940113300544e-05, "loss": 0.5595, "step": 29673 }, { "epoch": 0.8147721032399781, "grad_norm": 0.35432907938957214, "learning_rate": 1.2893597915568206e-05, "loss": 0.4591, "step": 29674 }, { "epoch": 0.8147995606809445, "grad_norm": 0.4329057037830353, "learning_rate": 1.2893184495684526e-05, "loss": 0.5276, "step": 29675 }, { "epoch": 0.814827018121911, "grad_norm": 0.3661741018295288, "learning_rate": 1.2892771070404135e-05, "loss": 0.5129, "step": 29676 }, { "epoch": 0.8148544755628775, "grad_norm": 0.386715829372406, "learning_rate": 1.2892357639727804e-05, "loss": 0.4963, "step": 29677 }, { "epoch": 0.814881933003844, "grad_norm": 0.36390063166618347, "learning_rate": 1.2891944203656302e-05, "loss": 0.4332, "step": 29678 }, { "epoch": 0.8149093904448106, "grad_norm": 0.41562891006469727, "learning_rate": 1.2891530762190401e-05, "loss": 0.4659, "step": 29679 }, { "epoch": 0.814936847885777, "grad_norm": 0.39317476749420166, "learning_rate": 1.289111731533087e-05, "loss": 0.5087, "step": 29680 }, { "epoch": 0.8149643053267436, "grad_norm": 0.36388128995895386, "learning_rate": 1.2890703863078487e-05, "loss": 0.4506, "step": 29681 }, { "epoch": 0.81499176276771, "grad_norm": 0.3606870770454407, "learning_rate": 1.2890290405434016e-05, "loss": 0.4537, "step": 29682 }, { "epoch": 0.8150192202086766, "grad_norm": 0.39668333530426025, "learning_rate": 1.288987694239823e-05, "loss": 0.5569, "step": 29683 }, { "epoch": 0.815046677649643, "grad_norm": 0.3754545748233795, "learning_rate": 1.2889463473971902e-05, "loss": 0.5015, "step": 29684 }, { "epoch": 0.8150741350906096, "grad_norm": 0.3048594892024994, "learning_rate": 1.2889050000155803e-05, "loss": 0.4042, "step": 29685 }, { "epoch": 0.8151015925315761, "grad_norm": 0.37362435460090637, "learning_rate": 1.2888636520950701e-05, "loss": 0.4708, "step": 29686 }, { "epoch": 0.8151290499725425, "grad_norm": 0.4279981553554535, "learning_rate": 1.2888223036357374e-05, "loss": 0.5575, "step": 29687 }, { "epoch": 0.8151565074135091, "grad_norm": 0.503505527973175, "learning_rate": 1.2887809546376587e-05, "loss": 0.5745, "step": 29688 }, { "epoch": 0.8151839648544755, "grad_norm": 0.38288772106170654, "learning_rate": 1.2887396051009114e-05, "loss": 0.5058, "step": 29689 }, { "epoch": 0.8152114222954421, "grad_norm": 0.37281230092048645, "learning_rate": 1.2886982550255726e-05, "loss": 0.4524, "step": 29690 }, { "epoch": 0.8152388797364085, "grad_norm": 0.36497363448143005, "learning_rate": 1.2886569044117194e-05, "loss": 0.4844, "step": 29691 }, { "epoch": 0.8152663371773751, "grad_norm": 0.44114360213279724, "learning_rate": 1.288615553259429e-05, "loss": 0.5717, "step": 29692 }, { "epoch": 0.8152937946183416, "grad_norm": 0.3874850273132324, "learning_rate": 1.2885742015687783e-05, "loss": 0.4447, "step": 29693 }, { "epoch": 0.815321252059308, "grad_norm": 0.40775415301322937, "learning_rate": 1.2885328493398449e-05, "loss": 0.5454, "step": 29694 }, { "epoch": 0.8153487095002746, "grad_norm": 0.5019264221191406, "learning_rate": 1.2884914965727053e-05, "loss": 0.5763, "step": 29695 }, { "epoch": 0.815376166941241, "grad_norm": 0.35922524333000183, "learning_rate": 1.2884501432674373e-05, "loss": 0.4646, "step": 29696 }, { "epoch": 0.8154036243822076, "grad_norm": 0.44227585196495056, "learning_rate": 1.2884087894241175e-05, "loss": 0.5414, "step": 29697 }, { "epoch": 0.815431081823174, "grad_norm": 0.42100703716278076, "learning_rate": 1.2883674350428232e-05, "loss": 0.4557, "step": 29698 }, { "epoch": 0.8154585392641406, "grad_norm": 0.3757691979408264, "learning_rate": 1.2883260801236318e-05, "loss": 0.5158, "step": 29699 }, { "epoch": 0.8154859967051071, "grad_norm": 0.37019652128219604, "learning_rate": 1.2882847246666204e-05, "loss": 0.5474, "step": 29700 }, { "epoch": 0.8155134541460736, "grad_norm": 0.3662957549095154, "learning_rate": 1.2882433686718656e-05, "loss": 0.4927, "step": 29701 }, { "epoch": 0.8155409115870401, "grad_norm": 0.40276002883911133, "learning_rate": 1.2882020121394452e-05, "loss": 0.5507, "step": 29702 }, { "epoch": 0.8155683690280066, "grad_norm": 0.3047461211681366, "learning_rate": 1.2881606550694356e-05, "loss": 0.3361, "step": 29703 }, { "epoch": 0.8155958264689731, "grad_norm": 0.3473450541496277, "learning_rate": 1.288119297461915e-05, "loss": 0.4556, "step": 29704 }, { "epoch": 0.8156232839099395, "grad_norm": 0.42273736000061035, "learning_rate": 1.2880779393169599e-05, "loss": 0.6042, "step": 29705 }, { "epoch": 0.8156507413509061, "grad_norm": 0.3936721682548523, "learning_rate": 1.288036580634647e-05, "loss": 0.4423, "step": 29706 }, { "epoch": 0.8156781987918726, "grad_norm": 0.35070574283599854, "learning_rate": 1.2879952214150543e-05, "loss": 0.524, "step": 29707 }, { "epoch": 0.8157056562328391, "grad_norm": 0.37846872210502625, "learning_rate": 1.2879538616582585e-05, "loss": 0.5545, "step": 29708 }, { "epoch": 0.8157331136738056, "grad_norm": 0.4268110990524292, "learning_rate": 1.2879125013643368e-05, "loss": 0.459, "step": 29709 }, { "epoch": 0.8157605711147721, "grad_norm": 0.39726683497428894, "learning_rate": 1.2878711405333666e-05, "loss": 0.5088, "step": 29710 }, { "epoch": 0.8157880285557386, "grad_norm": 0.38816994428634644, "learning_rate": 1.2878297791654247e-05, "loss": 0.4576, "step": 29711 }, { "epoch": 0.8158154859967051, "grad_norm": 0.40452614426612854, "learning_rate": 1.2877884172605884e-05, "loss": 0.4886, "step": 29712 }, { "epoch": 0.8158429434376716, "grad_norm": 0.4006691575050354, "learning_rate": 1.287747054818935e-05, "loss": 0.5545, "step": 29713 }, { "epoch": 0.8158704008786382, "grad_norm": 0.403721421957016, "learning_rate": 1.2877056918405413e-05, "loss": 0.5866, "step": 29714 }, { "epoch": 0.8158978583196046, "grad_norm": 0.4676116108894348, "learning_rate": 1.2876643283254845e-05, "loss": 0.5291, "step": 29715 }, { "epoch": 0.8159253157605711, "grad_norm": 0.34624263644218445, "learning_rate": 1.2876229642738422e-05, "loss": 0.4647, "step": 29716 }, { "epoch": 0.8159527732015376, "grad_norm": 0.4008829891681671, "learning_rate": 1.2875815996856913e-05, "loss": 0.5369, "step": 29717 }, { "epoch": 0.8159802306425041, "grad_norm": 0.3786114454269409, "learning_rate": 1.287540234561109e-05, "loss": 0.4427, "step": 29718 }, { "epoch": 0.8160076880834706, "grad_norm": 0.38280701637268066, "learning_rate": 1.2874988689001722e-05, "loss": 0.5238, "step": 29719 }, { "epoch": 0.8160351455244371, "grad_norm": 0.39356446266174316, "learning_rate": 1.2874575027029584e-05, "loss": 0.4724, "step": 29720 }, { "epoch": 0.8160626029654037, "grad_norm": 0.4333021342754364, "learning_rate": 1.2874161359695446e-05, "loss": 0.5083, "step": 29721 }, { "epoch": 0.8160900604063701, "grad_norm": 0.37130939960479736, "learning_rate": 1.2873747687000075e-05, "loss": 0.4423, "step": 29722 }, { "epoch": 0.8161175178473367, "grad_norm": 0.4218670427799225, "learning_rate": 1.2873334008944252e-05, "loss": 0.4912, "step": 29723 }, { "epoch": 0.8161449752883031, "grad_norm": 0.3501530587673187, "learning_rate": 1.287292032552874e-05, "loss": 0.4189, "step": 29724 }, { "epoch": 0.8161724327292696, "grad_norm": 0.45749709010124207, "learning_rate": 1.287250663675432e-05, "loss": 0.5541, "step": 29725 }, { "epoch": 0.8161998901702361, "grad_norm": 0.39695268869400024, "learning_rate": 1.2872092942621759e-05, "loss": 0.5719, "step": 29726 }, { "epoch": 0.8162273476112026, "grad_norm": 0.36696940660476685, "learning_rate": 1.2871679243131822e-05, "loss": 0.4814, "step": 29727 }, { "epoch": 0.8162548050521692, "grad_norm": 0.3626328408718109, "learning_rate": 1.2871265538285292e-05, "loss": 0.4726, "step": 29728 }, { "epoch": 0.8162822624931356, "grad_norm": 0.38126489520072937, "learning_rate": 1.2870851828082931e-05, "loss": 0.4939, "step": 29729 }, { "epoch": 0.8163097199341022, "grad_norm": 0.3867487609386444, "learning_rate": 1.2870438112525519e-05, "loss": 0.5756, "step": 29730 }, { "epoch": 0.8163371773750686, "grad_norm": 0.5645161867141724, "learning_rate": 1.2870024391613822e-05, "loss": 0.5693, "step": 29731 }, { "epoch": 0.8163646348160352, "grad_norm": 1.1714768409729004, "learning_rate": 1.2869610665348614e-05, "loss": 0.461, "step": 29732 }, { "epoch": 0.8163920922570016, "grad_norm": 0.39456361532211304, "learning_rate": 1.2869196933730665e-05, "loss": 0.4592, "step": 29733 }, { "epoch": 0.8164195496979681, "grad_norm": 0.43155261874198914, "learning_rate": 1.2868783196760747e-05, "loss": 0.5164, "step": 29734 }, { "epoch": 0.8164470071389347, "grad_norm": 0.3342248797416687, "learning_rate": 1.2868369454439636e-05, "loss": 0.4578, "step": 29735 }, { "epoch": 0.8164744645799011, "grad_norm": 0.4071844220161438, "learning_rate": 1.28679557067681e-05, "loss": 0.4955, "step": 29736 }, { "epoch": 0.8165019220208677, "grad_norm": 0.37005311250686646, "learning_rate": 1.2867541953746909e-05, "loss": 0.5064, "step": 29737 }, { "epoch": 0.8165293794618341, "grad_norm": 0.39512720704078674, "learning_rate": 1.2867128195376839e-05, "loss": 0.4554, "step": 29738 }, { "epoch": 0.8165568369028007, "grad_norm": 0.5053516626358032, "learning_rate": 1.286671443165866e-05, "loss": 0.4803, "step": 29739 }, { "epoch": 0.8165842943437671, "grad_norm": 0.3862617611885071, "learning_rate": 1.2866300662593142e-05, "loss": 0.4621, "step": 29740 }, { "epoch": 0.8166117517847337, "grad_norm": 0.40775880217552185, "learning_rate": 1.2865886888181059e-05, "loss": 0.4434, "step": 29741 }, { "epoch": 0.8166392092257002, "grad_norm": 0.36323708295822144, "learning_rate": 1.286547310842318e-05, "loss": 0.4895, "step": 29742 }, { "epoch": 0.8166666666666667, "grad_norm": 0.48690205812454224, "learning_rate": 1.2865059323320283e-05, "loss": 0.5689, "step": 29743 }, { "epoch": 0.8166941241076332, "grad_norm": 0.3782793879508972, "learning_rate": 1.2864645532873137e-05, "loss": 0.4358, "step": 29744 }, { "epoch": 0.8167215815485996, "grad_norm": 0.3822595775127411, "learning_rate": 1.2864231737082508e-05, "loss": 0.4783, "step": 29745 }, { "epoch": 0.8167490389895662, "grad_norm": 0.4232713282108307, "learning_rate": 1.2863817935949177e-05, "loss": 0.4751, "step": 29746 }, { "epoch": 0.8167764964305326, "grad_norm": 0.41067248582839966, "learning_rate": 1.2863404129473906e-05, "loss": 0.5064, "step": 29747 }, { "epoch": 0.8168039538714992, "grad_norm": 0.3760000467300415, "learning_rate": 1.2862990317657477e-05, "loss": 0.4558, "step": 29748 }, { "epoch": 0.8168314113124657, "grad_norm": 0.37570181488990784, "learning_rate": 1.286257650050066e-05, "loss": 0.5458, "step": 29749 }, { "epoch": 0.8168588687534322, "grad_norm": 0.4286228120326996, "learning_rate": 1.2862162678004217e-05, "loss": 0.5763, "step": 29750 }, { "epoch": 0.8168863261943987, "grad_norm": 0.36378636956214905, "learning_rate": 1.2861748850168931e-05, "loss": 0.4184, "step": 29751 }, { "epoch": 0.8169137836353652, "grad_norm": 0.3504086136817932, "learning_rate": 1.286133501699557e-05, "loss": 0.4585, "step": 29752 }, { "epoch": 0.8169412410763317, "grad_norm": 0.38307705521583557, "learning_rate": 1.2860921178484904e-05, "loss": 0.6137, "step": 29753 }, { "epoch": 0.8169686985172981, "grad_norm": 0.5503459572792053, "learning_rate": 1.286050733463771e-05, "loss": 0.4692, "step": 29754 }, { "epoch": 0.8169961559582647, "grad_norm": 0.3408075273036957, "learning_rate": 1.2860093485454755e-05, "loss": 0.4522, "step": 29755 }, { "epoch": 0.8170236133992312, "grad_norm": 0.45777225494384766, "learning_rate": 1.2859679630936812e-05, "loss": 0.6154, "step": 29756 }, { "epoch": 0.8170510708401977, "grad_norm": 0.42029720544815063, "learning_rate": 1.2859265771084654e-05, "loss": 0.5537, "step": 29757 }, { "epoch": 0.8170785282811642, "grad_norm": 0.4263295531272888, "learning_rate": 1.2858851905899054e-05, "loss": 0.5452, "step": 29758 }, { "epoch": 0.8171059857221307, "grad_norm": 0.3551003038883209, "learning_rate": 1.2858438035380782e-05, "loss": 0.469, "step": 29759 }, { "epoch": 0.8171334431630972, "grad_norm": 0.39218613505363464, "learning_rate": 1.2858024159530612e-05, "loss": 0.3642, "step": 29760 }, { "epoch": 0.8171609006040637, "grad_norm": 0.3816457986831665, "learning_rate": 1.2857610278349315e-05, "loss": 0.498, "step": 29761 }, { "epoch": 0.8171883580450302, "grad_norm": 0.3663220703601837, "learning_rate": 1.2857196391837658e-05, "loss": 0.5079, "step": 29762 }, { "epoch": 0.8172158154859968, "grad_norm": 0.3677317798137665, "learning_rate": 1.2856782499996423e-05, "loss": 0.4264, "step": 29763 }, { "epoch": 0.8172432729269632, "grad_norm": 0.4292408525943756, "learning_rate": 1.2856368602826374e-05, "loss": 0.4762, "step": 29764 }, { "epoch": 0.8172707303679297, "grad_norm": 0.3520265817642212, "learning_rate": 1.285595470032829e-05, "loss": 0.4849, "step": 29765 }, { "epoch": 0.8172981878088962, "grad_norm": 0.41530415415763855, "learning_rate": 1.2855540792502937e-05, "loss": 0.5179, "step": 29766 }, { "epoch": 0.8173256452498627, "grad_norm": 0.44347211718559265, "learning_rate": 1.2855126879351087e-05, "loss": 0.5606, "step": 29767 }, { "epoch": 0.8173531026908292, "grad_norm": 0.4456445276737213, "learning_rate": 1.2854712960873515e-05, "loss": 0.5662, "step": 29768 }, { "epoch": 0.8173805601317957, "grad_norm": 0.5070704817771912, "learning_rate": 1.2854299037070992e-05, "loss": 0.5295, "step": 29769 }, { "epoch": 0.8174080175727623, "grad_norm": 0.3860585391521454, "learning_rate": 1.2853885107944293e-05, "loss": 0.504, "step": 29770 }, { "epoch": 0.8174354750137287, "grad_norm": 0.3696349859237671, "learning_rate": 1.2853471173494185e-05, "loss": 0.4708, "step": 29771 }, { "epoch": 0.8174629324546953, "grad_norm": 0.45967990159988403, "learning_rate": 1.2853057233721442e-05, "loss": 0.4607, "step": 29772 }, { "epoch": 0.8174903898956617, "grad_norm": 0.4232665002346039, "learning_rate": 1.2852643288626838e-05, "loss": 0.5396, "step": 29773 }, { "epoch": 0.8175178473366282, "grad_norm": 0.4151057302951813, "learning_rate": 1.2852229338211141e-05, "loss": 0.4545, "step": 29774 }, { "epoch": 0.8175453047775947, "grad_norm": 0.4133724868297577, "learning_rate": 1.2851815382475131e-05, "loss": 0.5243, "step": 29775 }, { "epoch": 0.8175727622185612, "grad_norm": 0.36403003334999084, "learning_rate": 1.2851401421419569e-05, "loss": 0.4807, "step": 29776 }, { "epoch": 0.8176002196595278, "grad_norm": 0.41358503699302673, "learning_rate": 1.2850987455045238e-05, "loss": 0.4616, "step": 29777 }, { "epoch": 0.8176276771004942, "grad_norm": 0.3617730140686035, "learning_rate": 1.2850573483352907e-05, "loss": 0.4267, "step": 29778 }, { "epoch": 0.8176551345414608, "grad_norm": 0.39328116178512573, "learning_rate": 1.285015950634334e-05, "loss": 0.5058, "step": 29779 }, { "epoch": 0.8176825919824272, "grad_norm": 0.4431750774383545, "learning_rate": 1.284974552401732e-05, "loss": 0.5915, "step": 29780 }, { "epoch": 0.8177100494233938, "grad_norm": 0.4044540822505951, "learning_rate": 1.2849331536375615e-05, "loss": 0.5149, "step": 29781 }, { "epoch": 0.8177375068643602, "grad_norm": 0.3452550172805786, "learning_rate": 1.2848917543418996e-05, "loss": 0.4379, "step": 29782 }, { "epoch": 0.8177649643053267, "grad_norm": 0.3403281569480896, "learning_rate": 1.2848503545148239e-05, "loss": 0.4622, "step": 29783 }, { "epoch": 0.8177924217462933, "grad_norm": 0.36390501260757446, "learning_rate": 1.2848089541564113e-05, "loss": 0.4866, "step": 29784 }, { "epoch": 0.8178198791872597, "grad_norm": 0.3850850462913513, "learning_rate": 1.284767553266739e-05, "loss": 0.4964, "step": 29785 }, { "epoch": 0.8178473366282263, "grad_norm": 0.387901246547699, "learning_rate": 1.2847261518458843e-05, "loss": 0.5279, "step": 29786 }, { "epoch": 0.8178747940691927, "grad_norm": 0.7095381617546082, "learning_rate": 1.2846847498939246e-05, "loss": 0.5327, "step": 29787 }, { "epoch": 0.8179022515101593, "grad_norm": 0.38062119483947754, "learning_rate": 1.284643347410937e-05, "loss": 0.4592, "step": 29788 }, { "epoch": 0.8179297089511257, "grad_norm": 0.3846418559551239, "learning_rate": 1.2846019443969984e-05, "loss": 0.4263, "step": 29789 }, { "epoch": 0.8179571663920923, "grad_norm": 0.3894844949245453, "learning_rate": 1.2845605408521866e-05, "loss": 0.4956, "step": 29790 }, { "epoch": 0.8179846238330588, "grad_norm": 0.4017009437084198, "learning_rate": 1.2845191367765784e-05, "loss": 0.4934, "step": 29791 }, { "epoch": 0.8180120812740252, "grad_norm": 0.3971722722053528, "learning_rate": 1.2844777321702513e-05, "loss": 0.5223, "step": 29792 }, { "epoch": 0.8180395387149918, "grad_norm": 0.4198957681655884, "learning_rate": 1.2844363270332827e-05, "loss": 0.546, "step": 29793 }, { "epoch": 0.8180669961559582, "grad_norm": 0.40161409974098206, "learning_rate": 1.284394921365749e-05, "loss": 0.5212, "step": 29794 }, { "epoch": 0.8180944535969248, "grad_norm": 0.42480170726776123, "learning_rate": 1.2843535151677284e-05, "loss": 0.4874, "step": 29795 }, { "epoch": 0.8181219110378912, "grad_norm": 0.36984366178512573, "learning_rate": 1.2843121084392976e-05, "loss": 0.4279, "step": 29796 }, { "epoch": 0.8181493684788578, "grad_norm": 0.3794953525066376, "learning_rate": 1.284270701180534e-05, "loss": 0.5116, "step": 29797 }, { "epoch": 0.8181768259198243, "grad_norm": 0.3618275821208954, "learning_rate": 1.284229293391515e-05, "loss": 0.5045, "step": 29798 }, { "epoch": 0.8182042833607908, "grad_norm": 0.3655815124511719, "learning_rate": 1.2841878850723175e-05, "loss": 0.4918, "step": 29799 }, { "epoch": 0.8182317408017573, "grad_norm": 0.3755134344100952, "learning_rate": 1.284146476223019e-05, "loss": 0.5354, "step": 29800 }, { "epoch": 0.8182591982427238, "grad_norm": 0.4752017855644226, "learning_rate": 1.2841050668436965e-05, "loss": 0.5692, "step": 29801 }, { "epoch": 0.8182866556836903, "grad_norm": 0.386306494474411, "learning_rate": 1.2840636569344272e-05, "loss": 0.4465, "step": 29802 }, { "epoch": 0.8183141131246567, "grad_norm": 0.43851736187934875, "learning_rate": 1.284022246495289e-05, "loss": 0.4647, "step": 29803 }, { "epoch": 0.8183415705656233, "grad_norm": 0.40463677048683167, "learning_rate": 1.283980835526358e-05, "loss": 0.4668, "step": 29804 }, { "epoch": 0.8183690280065898, "grad_norm": 0.3938886523246765, "learning_rate": 1.2839394240277128e-05, "loss": 0.4555, "step": 29805 }, { "epoch": 0.8183964854475563, "grad_norm": 0.3796316981315613, "learning_rate": 1.2838980119994296e-05, "loss": 0.5182, "step": 29806 }, { "epoch": 0.8184239428885228, "grad_norm": 0.48716869950294495, "learning_rate": 1.2838565994415859e-05, "loss": 0.4594, "step": 29807 }, { "epoch": 0.8184514003294893, "grad_norm": 0.3775138258934021, "learning_rate": 1.2838151863542591e-05, "loss": 0.4846, "step": 29808 }, { "epoch": 0.8184788577704558, "grad_norm": 0.422982394695282, "learning_rate": 1.2837737727375266e-05, "loss": 0.4368, "step": 29809 }, { "epoch": 0.8185063152114223, "grad_norm": 0.3985643982887268, "learning_rate": 1.2837323585914651e-05, "loss": 0.4925, "step": 29810 }, { "epoch": 0.8185337726523888, "grad_norm": 0.37971627712249756, "learning_rate": 1.2836909439161527e-05, "loss": 0.5156, "step": 29811 }, { "epoch": 0.8185612300933554, "grad_norm": 0.3909282982349396, "learning_rate": 1.2836495287116657e-05, "loss": 0.5297, "step": 29812 }, { "epoch": 0.8185886875343218, "grad_norm": 0.4281807541847229, "learning_rate": 1.283608112978082e-05, "loss": 0.4078, "step": 29813 }, { "epoch": 0.8186161449752883, "grad_norm": 0.37684252858161926, "learning_rate": 1.2835666967154788e-05, "loss": 0.6046, "step": 29814 }, { "epoch": 0.8186436024162548, "grad_norm": 0.38392916321754456, "learning_rate": 1.2835252799239326e-05, "loss": 0.5565, "step": 29815 }, { "epoch": 0.8186710598572213, "grad_norm": 0.39906471967697144, "learning_rate": 1.283483862603522e-05, "loss": 0.5272, "step": 29816 }, { "epoch": 0.8186985172981878, "grad_norm": 0.35901233553886414, "learning_rate": 1.283442444754323e-05, "loss": 0.4892, "step": 29817 }, { "epoch": 0.8187259747391543, "grad_norm": 0.3971671164035797, "learning_rate": 1.2834010263764135e-05, "loss": 0.4485, "step": 29818 }, { "epoch": 0.8187534321801209, "grad_norm": 0.42621615529060364, "learning_rate": 1.2833596074698708e-05, "loss": 0.481, "step": 29819 }, { "epoch": 0.8187808896210873, "grad_norm": 0.4067281186580658, "learning_rate": 1.2833181880347718e-05, "loss": 0.4965, "step": 29820 }, { "epoch": 0.8188083470620539, "grad_norm": 0.354600191116333, "learning_rate": 1.2832767680711941e-05, "loss": 0.4434, "step": 29821 }, { "epoch": 0.8188358045030203, "grad_norm": 0.3844257593154907, "learning_rate": 1.2832353475792148e-05, "loss": 0.5731, "step": 29822 }, { "epoch": 0.8188632619439868, "grad_norm": 0.39769527316093445, "learning_rate": 1.2831939265589113e-05, "loss": 0.4689, "step": 29823 }, { "epoch": 0.8188907193849533, "grad_norm": 0.36561912298202515, "learning_rate": 1.2831525050103606e-05, "loss": 0.5345, "step": 29824 }, { "epoch": 0.8189181768259198, "grad_norm": 0.5199923515319824, "learning_rate": 1.28311108293364e-05, "loss": 0.4972, "step": 29825 }, { "epoch": 0.8189456342668864, "grad_norm": 0.39659687876701355, "learning_rate": 1.2830696603288271e-05, "loss": 0.4071, "step": 29826 }, { "epoch": 0.8189730917078528, "grad_norm": 0.4294394850730896, "learning_rate": 1.2830282371959986e-05, "loss": 0.5133, "step": 29827 }, { "epoch": 0.8190005491488194, "grad_norm": 0.4190565049648285, "learning_rate": 1.2829868135352324e-05, "loss": 0.5887, "step": 29828 }, { "epoch": 0.8190280065897858, "grad_norm": 0.38593602180480957, "learning_rate": 1.2829453893466053e-05, "loss": 0.4954, "step": 29829 }, { "epoch": 0.8190554640307524, "grad_norm": 0.3709847629070282, "learning_rate": 1.2829039646301946e-05, "loss": 0.5136, "step": 29830 }, { "epoch": 0.8190829214717188, "grad_norm": 0.40476980805397034, "learning_rate": 1.2828625393860782e-05, "loss": 0.5198, "step": 29831 }, { "epoch": 0.8191103789126853, "grad_norm": 0.3767644464969635, "learning_rate": 1.2828211136143326e-05, "loss": 0.5791, "step": 29832 }, { "epoch": 0.8191378363536519, "grad_norm": 0.3346176743507385, "learning_rate": 1.2827796873150353e-05, "loss": 0.4857, "step": 29833 }, { "epoch": 0.8191652937946183, "grad_norm": 0.4106161892414093, "learning_rate": 1.2827382604882639e-05, "loss": 0.4706, "step": 29834 }, { "epoch": 0.8191927512355849, "grad_norm": 0.3526047170162201, "learning_rate": 1.2826968331340949e-05, "loss": 0.4846, "step": 29835 }, { "epoch": 0.8192202086765513, "grad_norm": 0.401846319437027, "learning_rate": 1.2826554052526066e-05, "loss": 0.4879, "step": 29836 }, { "epoch": 0.8192476661175179, "grad_norm": 0.4400515854358673, "learning_rate": 1.2826139768438756e-05, "loss": 0.5374, "step": 29837 }, { "epoch": 0.8192751235584843, "grad_norm": 0.3706651031970978, "learning_rate": 1.2825725479079792e-05, "loss": 0.4916, "step": 29838 }, { "epoch": 0.8193025809994509, "grad_norm": 0.4236578643321991, "learning_rate": 1.282531118444995e-05, "loss": 0.4902, "step": 29839 }, { "epoch": 0.8193300384404174, "grad_norm": 0.5619128942489624, "learning_rate": 1.2824896884549997e-05, "loss": 0.4542, "step": 29840 }, { "epoch": 0.8193574958813838, "grad_norm": 0.3436570167541504, "learning_rate": 1.2824482579380715e-05, "loss": 0.4947, "step": 29841 }, { "epoch": 0.8193849533223504, "grad_norm": 0.35925742983818054, "learning_rate": 1.2824068268942868e-05, "loss": 0.5106, "step": 29842 }, { "epoch": 0.8194124107633168, "grad_norm": 0.4353228509426117, "learning_rate": 1.2823653953237233e-05, "loss": 0.4198, "step": 29843 }, { "epoch": 0.8194398682042834, "grad_norm": 0.4057868421077728, "learning_rate": 1.2823239632264584e-05, "loss": 0.4985, "step": 29844 }, { "epoch": 0.8194673256452498, "grad_norm": 0.7350486516952515, "learning_rate": 1.2822825306025691e-05, "loss": 0.4542, "step": 29845 }, { "epoch": 0.8194947830862164, "grad_norm": 0.36042657494544983, "learning_rate": 1.2822410974521326e-05, "loss": 0.4392, "step": 29846 }, { "epoch": 0.8195222405271829, "grad_norm": 0.4078938364982605, "learning_rate": 1.2821996637752268e-05, "loss": 0.511, "step": 29847 }, { "epoch": 0.8195496979681494, "grad_norm": 0.5156942009925842, "learning_rate": 1.2821582295719282e-05, "loss": 0.5418, "step": 29848 }, { "epoch": 0.8195771554091159, "grad_norm": 0.3941909670829773, "learning_rate": 1.2821167948423145e-05, "loss": 0.4683, "step": 29849 }, { "epoch": 0.8196046128500823, "grad_norm": 0.40761852264404297, "learning_rate": 1.282075359586463e-05, "loss": 0.5008, "step": 29850 }, { "epoch": 0.8196320702910489, "grad_norm": 0.37715354561805725, "learning_rate": 1.2820339238044508e-05, "loss": 0.4884, "step": 29851 }, { "epoch": 0.8196595277320153, "grad_norm": 0.43794193863868713, "learning_rate": 1.2819924874963553e-05, "loss": 0.4518, "step": 29852 }, { "epoch": 0.8196869851729819, "grad_norm": 0.3783273696899414, "learning_rate": 1.281951050662254e-05, "loss": 0.5185, "step": 29853 }, { "epoch": 0.8197144426139484, "grad_norm": 0.38956475257873535, "learning_rate": 1.281909613302224e-05, "loss": 0.4552, "step": 29854 }, { "epoch": 0.8197419000549149, "grad_norm": 0.488350510597229, "learning_rate": 1.2818681754163424e-05, "loss": 0.5068, "step": 29855 }, { "epoch": 0.8197693574958814, "grad_norm": 0.45009031891822815, "learning_rate": 1.2818267370046869e-05, "loss": 0.4665, "step": 29856 }, { "epoch": 0.8197968149368479, "grad_norm": 0.48266151547431946, "learning_rate": 1.2817852980673344e-05, "loss": 0.5353, "step": 29857 }, { "epoch": 0.8198242723778144, "grad_norm": 0.3745397627353668, "learning_rate": 1.2817438586043625e-05, "loss": 0.42, "step": 29858 }, { "epoch": 0.8198517298187808, "grad_norm": 0.4363009035587311, "learning_rate": 1.2817024186158484e-05, "loss": 0.5672, "step": 29859 }, { "epoch": 0.8198791872597474, "grad_norm": 0.4505922496318817, "learning_rate": 1.2816609781018694e-05, "loss": 0.5208, "step": 29860 }, { "epoch": 0.8199066447007138, "grad_norm": 0.42160022258758545, "learning_rate": 1.2816195370625027e-05, "loss": 0.5102, "step": 29861 }, { "epoch": 0.8199341021416804, "grad_norm": 0.4656600058078766, "learning_rate": 1.2815780954978256e-05, "loss": 0.52, "step": 29862 }, { "epoch": 0.8199615595826469, "grad_norm": 0.4245969355106354, "learning_rate": 1.2815366534079157e-05, "loss": 0.4924, "step": 29863 }, { "epoch": 0.8199890170236134, "grad_norm": 0.382367879152298, "learning_rate": 1.28149521079285e-05, "loss": 0.4943, "step": 29864 }, { "epoch": 0.8200164744645799, "grad_norm": 0.37704774737358093, "learning_rate": 1.2814537676527058e-05, "loss": 0.5113, "step": 29865 }, { "epoch": 0.8200439319055464, "grad_norm": 0.4379624128341675, "learning_rate": 1.2814123239875605e-05, "loss": 0.5605, "step": 29866 }, { "epoch": 0.8200713893465129, "grad_norm": 0.38951823115348816, "learning_rate": 1.2813708797974916e-05, "loss": 0.46, "step": 29867 }, { "epoch": 0.8200988467874794, "grad_norm": 0.37134289741516113, "learning_rate": 1.281329435082576e-05, "loss": 0.5031, "step": 29868 }, { "epoch": 0.8201263042284459, "grad_norm": 0.3908522427082062, "learning_rate": 1.2812879898428912e-05, "loss": 0.4355, "step": 29869 }, { "epoch": 0.8201537616694125, "grad_norm": 0.4594365656375885, "learning_rate": 1.2812465440785147e-05, "loss": 0.6039, "step": 29870 }, { "epoch": 0.8201812191103789, "grad_norm": 0.4187853932380676, "learning_rate": 1.2812050977895232e-05, "loss": 0.5031, "step": 29871 }, { "epoch": 0.8202086765513454, "grad_norm": 0.39344313740730286, "learning_rate": 1.2811636509759948e-05, "loss": 0.5434, "step": 29872 }, { "epoch": 0.8202361339923119, "grad_norm": 0.4878123998641968, "learning_rate": 1.2811222036380067e-05, "loss": 0.545, "step": 29873 }, { "epoch": 0.8202635914332784, "grad_norm": 0.4038982093334198, "learning_rate": 1.2810807557756354e-05, "loss": 0.5517, "step": 29874 }, { "epoch": 0.8202910488742449, "grad_norm": 0.4685501754283905, "learning_rate": 1.2810393073889594e-05, "loss": 0.5159, "step": 29875 }, { "epoch": 0.8203185063152114, "grad_norm": 0.4132039248943329, "learning_rate": 1.2809978584780546e-05, "loss": 0.5286, "step": 29876 }, { "epoch": 0.820345963756178, "grad_norm": 0.43112990260124207, "learning_rate": 1.2809564090429998e-05, "loss": 0.4618, "step": 29877 }, { "epoch": 0.8203734211971444, "grad_norm": 0.4360826015472412, "learning_rate": 1.2809149590838716e-05, "loss": 0.4623, "step": 29878 }, { "epoch": 0.820400878638111, "grad_norm": 0.3895154297351837, "learning_rate": 1.2808735086007469e-05, "loss": 0.4619, "step": 29879 }, { "epoch": 0.8204283360790774, "grad_norm": 0.4145752191543579, "learning_rate": 1.2808320575937039e-05, "loss": 0.4981, "step": 29880 }, { "epoch": 0.8204557935200439, "grad_norm": 0.6444263458251953, "learning_rate": 1.2807906060628192e-05, "loss": 0.5635, "step": 29881 }, { "epoch": 0.8204832509610104, "grad_norm": 0.4042304754257202, "learning_rate": 1.2807491540081705e-05, "loss": 0.5288, "step": 29882 }, { "epoch": 0.8205107084019769, "grad_norm": 0.38391855359077454, "learning_rate": 1.280707701429835e-05, "loss": 0.475, "step": 29883 }, { "epoch": 0.8205381658429435, "grad_norm": 0.34838199615478516, "learning_rate": 1.28066624832789e-05, "loss": 0.4463, "step": 29884 }, { "epoch": 0.8205656232839099, "grad_norm": 0.34607356786727905, "learning_rate": 1.280624794702413e-05, "loss": 0.4721, "step": 29885 }, { "epoch": 0.8205930807248765, "grad_norm": 0.41536784172058105, "learning_rate": 1.2805833405534812e-05, "loss": 0.4354, "step": 29886 }, { "epoch": 0.8206205381658429, "grad_norm": 0.34718164801597595, "learning_rate": 1.2805418858811715e-05, "loss": 0.4472, "step": 29887 }, { "epoch": 0.8206479956068095, "grad_norm": 0.5155624151229858, "learning_rate": 1.2805004306855621e-05, "loss": 0.4329, "step": 29888 }, { "epoch": 0.8206754530477759, "grad_norm": 0.44949495792388916, "learning_rate": 1.2804589749667297e-05, "loss": 0.5001, "step": 29889 }, { "epoch": 0.8207029104887424, "grad_norm": 0.387574166059494, "learning_rate": 1.2804175187247521e-05, "loss": 0.4559, "step": 29890 }, { "epoch": 0.820730367929709, "grad_norm": 0.39950132369995117, "learning_rate": 1.280376061959706e-05, "loss": 0.509, "step": 29891 }, { "epoch": 0.8207578253706754, "grad_norm": 0.41575077176094055, "learning_rate": 1.2803346046716692e-05, "loss": 0.528, "step": 29892 }, { "epoch": 0.820785282811642, "grad_norm": 0.3535975217819214, "learning_rate": 1.2802931468607187e-05, "loss": 0.4468, "step": 29893 }, { "epoch": 0.8208127402526084, "grad_norm": 0.3681621849536896, "learning_rate": 1.2802516885269323e-05, "loss": 0.4727, "step": 29894 }, { "epoch": 0.820840197693575, "grad_norm": 0.379279226064682, "learning_rate": 1.2802102296703867e-05, "loss": 0.5322, "step": 29895 }, { "epoch": 0.8208676551345414, "grad_norm": 0.3884551525115967, "learning_rate": 1.28016877029116e-05, "loss": 0.537, "step": 29896 }, { "epoch": 0.820895112575508, "grad_norm": 0.37283244729042053, "learning_rate": 1.2801273103893289e-05, "loss": 0.5382, "step": 29897 }, { "epoch": 0.8209225700164745, "grad_norm": 0.3809974193572998, "learning_rate": 1.2800858499649709e-05, "loss": 0.4511, "step": 29898 }, { "epoch": 0.820950027457441, "grad_norm": 0.41279274225234985, "learning_rate": 1.2800443890181635e-05, "loss": 0.4925, "step": 29899 }, { "epoch": 0.8209774848984075, "grad_norm": 0.3863135874271393, "learning_rate": 1.2800029275489838e-05, "loss": 0.5206, "step": 29900 }, { "epoch": 0.8210049423393739, "grad_norm": 0.39064309000968933, "learning_rate": 1.2799614655575094e-05, "loss": 0.5195, "step": 29901 }, { "epoch": 0.8210323997803405, "grad_norm": 0.37219980359077454, "learning_rate": 1.2799200030438174e-05, "loss": 0.4593, "step": 29902 }, { "epoch": 0.8210598572213069, "grad_norm": 0.40074869990348816, "learning_rate": 1.2798785400079854e-05, "loss": 0.5235, "step": 29903 }, { "epoch": 0.8210873146622735, "grad_norm": 0.40831074118614197, "learning_rate": 1.2798370764500907e-05, "loss": 0.5429, "step": 29904 }, { "epoch": 0.82111477210324, "grad_norm": 0.40143465995788574, "learning_rate": 1.2797956123702103e-05, "loss": 0.5066, "step": 29905 }, { "epoch": 0.8211422295442065, "grad_norm": 0.44001856446266174, "learning_rate": 1.2797541477684218e-05, "loss": 0.477, "step": 29906 }, { "epoch": 0.821169686985173, "grad_norm": 0.35562053322792053, "learning_rate": 1.2797126826448025e-05, "loss": 0.5422, "step": 29907 }, { "epoch": 0.8211971444261394, "grad_norm": 0.385434091091156, "learning_rate": 1.2796712169994302e-05, "loss": 0.5467, "step": 29908 }, { "epoch": 0.821224601867106, "grad_norm": 0.4164957106113434, "learning_rate": 1.2796297508323814e-05, "loss": 0.488, "step": 29909 }, { "epoch": 0.8212520593080724, "grad_norm": 0.3466986119747162, "learning_rate": 1.2795882841437337e-05, "loss": 0.5509, "step": 29910 }, { "epoch": 0.821279516749039, "grad_norm": 0.3941405415534973, "learning_rate": 1.279546816933565e-05, "loss": 0.4694, "step": 29911 }, { "epoch": 0.8213069741900055, "grad_norm": 0.3895045518875122, "learning_rate": 1.2795053492019523e-05, "loss": 0.4959, "step": 29912 }, { "epoch": 0.821334431630972, "grad_norm": 0.3811872899532318, "learning_rate": 1.2794638809489726e-05, "loss": 0.4561, "step": 29913 }, { "epoch": 0.8213618890719385, "grad_norm": 0.36958014965057373, "learning_rate": 1.2794224121747036e-05, "loss": 0.4692, "step": 29914 }, { "epoch": 0.821389346512905, "grad_norm": 0.3804799020290375, "learning_rate": 1.2793809428792227e-05, "loss": 0.4532, "step": 29915 }, { "epoch": 0.8214168039538715, "grad_norm": 0.34270092844963074, "learning_rate": 1.2793394730626074e-05, "loss": 0.4669, "step": 29916 }, { "epoch": 0.821444261394838, "grad_norm": 0.36368194222450256, "learning_rate": 1.2792980027249346e-05, "loss": 0.5075, "step": 29917 }, { "epoch": 0.8214717188358045, "grad_norm": 0.3523280918598175, "learning_rate": 1.279256531866282e-05, "loss": 0.509, "step": 29918 }, { "epoch": 0.821499176276771, "grad_norm": 0.39013880491256714, "learning_rate": 1.2792150604867267e-05, "loss": 0.5199, "step": 29919 }, { "epoch": 0.8215266337177375, "grad_norm": 0.46968773007392883, "learning_rate": 1.279173588586346e-05, "loss": 0.5053, "step": 29920 }, { "epoch": 0.821554091158704, "grad_norm": 0.4417799711227417, "learning_rate": 1.2791321161652179e-05, "loss": 0.4593, "step": 29921 }, { "epoch": 0.8215815485996705, "grad_norm": 0.48014965653419495, "learning_rate": 1.2790906432234192e-05, "loss": 0.5026, "step": 29922 }, { "epoch": 0.821609006040637, "grad_norm": 0.395215779542923, "learning_rate": 1.2790491697610272e-05, "loss": 0.5504, "step": 29923 }, { "epoch": 0.8216364634816035, "grad_norm": 0.4149378538131714, "learning_rate": 1.2790076957781196e-05, "loss": 0.3855, "step": 29924 }, { "epoch": 0.82166392092257, "grad_norm": 0.40790456533432007, "learning_rate": 1.2789662212747737e-05, "loss": 0.5428, "step": 29925 }, { "epoch": 0.8216913783635366, "grad_norm": 0.4447188973426819, "learning_rate": 1.2789247462510663e-05, "loss": 0.4653, "step": 29926 }, { "epoch": 0.821718835804503, "grad_norm": 0.3934735655784607, "learning_rate": 1.2788832707070757e-05, "loss": 0.3929, "step": 29927 }, { "epoch": 0.8217462932454696, "grad_norm": 0.4023551344871521, "learning_rate": 1.2788417946428787e-05, "loss": 0.4554, "step": 29928 }, { "epoch": 0.821773750686436, "grad_norm": 0.39139485359191895, "learning_rate": 1.2788003180585527e-05, "loss": 0.4843, "step": 29929 }, { "epoch": 0.8218012081274025, "grad_norm": 0.5988703966140747, "learning_rate": 1.2787588409541751e-05, "loss": 0.4933, "step": 29930 }, { "epoch": 0.821828665568369, "grad_norm": 0.3873371481895447, "learning_rate": 1.2787173633298232e-05, "loss": 0.4846, "step": 29931 }, { "epoch": 0.8218561230093355, "grad_norm": 0.41739487648010254, "learning_rate": 1.2786758851855746e-05, "loss": 0.548, "step": 29932 }, { "epoch": 0.8218835804503021, "grad_norm": 0.6519547700881958, "learning_rate": 1.2786344065215064e-05, "loss": 0.5361, "step": 29933 }, { "epoch": 0.8219110378912685, "grad_norm": 0.3546435236930847, "learning_rate": 1.2785929273376963e-05, "loss": 0.4308, "step": 29934 }, { "epoch": 0.8219384953322351, "grad_norm": 0.34583303332328796, "learning_rate": 1.2785514476342213e-05, "loss": 0.507, "step": 29935 }, { "epoch": 0.8219659527732015, "grad_norm": 0.3663499355316162, "learning_rate": 1.2785099674111589e-05, "loss": 0.4887, "step": 29936 }, { "epoch": 0.821993410214168, "grad_norm": 0.5385345816612244, "learning_rate": 1.2784684866685866e-05, "loss": 0.5456, "step": 29937 }, { "epoch": 0.8220208676551345, "grad_norm": 0.3891913592815399, "learning_rate": 1.2784270054065817e-05, "loss": 0.4265, "step": 29938 }, { "epoch": 0.822048325096101, "grad_norm": 0.3883543610572815, "learning_rate": 1.2783855236252215e-05, "loss": 0.4804, "step": 29939 }, { "epoch": 0.8220757825370676, "grad_norm": 0.33860111236572266, "learning_rate": 1.2783440413245836e-05, "loss": 0.4411, "step": 29940 }, { "epoch": 0.822103239978034, "grad_norm": 0.39401957392692566, "learning_rate": 1.2783025585047454e-05, "loss": 0.4939, "step": 29941 }, { "epoch": 0.8221306974190006, "grad_norm": 0.3971298336982727, "learning_rate": 1.2782610751657838e-05, "loss": 0.4963, "step": 29942 }, { "epoch": 0.822158154859967, "grad_norm": 0.39848434925079346, "learning_rate": 1.2782195913077764e-05, "loss": 0.4923, "step": 29943 }, { "epoch": 0.8221856123009336, "grad_norm": 0.4648064970970154, "learning_rate": 1.2781781069308008e-05, "loss": 0.5747, "step": 29944 }, { "epoch": 0.8222130697419, "grad_norm": 0.39721524715423584, "learning_rate": 1.2781366220349343e-05, "loss": 0.5657, "step": 29945 }, { "epoch": 0.8222405271828666, "grad_norm": 0.41288328170776367, "learning_rate": 1.2780951366202543e-05, "loss": 0.4754, "step": 29946 }, { "epoch": 0.8222679846238331, "grad_norm": 0.3748377859592438, "learning_rate": 1.2780536506868379e-05, "loss": 0.5287, "step": 29947 }, { "epoch": 0.8222954420647995, "grad_norm": 0.39680829644203186, "learning_rate": 1.2780121642347628e-05, "loss": 0.5136, "step": 29948 }, { "epoch": 0.8223228995057661, "grad_norm": 0.4079782962799072, "learning_rate": 1.2779706772641064e-05, "loss": 0.5923, "step": 29949 }, { "epoch": 0.8223503569467325, "grad_norm": 0.40668410062789917, "learning_rate": 1.2779291897749457e-05, "loss": 0.4765, "step": 29950 }, { "epoch": 0.8223778143876991, "grad_norm": 0.3868073523044586, "learning_rate": 1.2778877017673586e-05, "loss": 0.4281, "step": 29951 }, { "epoch": 0.8224052718286655, "grad_norm": 0.3639702796936035, "learning_rate": 1.2778462132414222e-05, "loss": 0.491, "step": 29952 }, { "epoch": 0.8224327292696321, "grad_norm": 0.3745518922805786, "learning_rate": 1.2778047241972138e-05, "loss": 0.4388, "step": 29953 }, { "epoch": 0.8224601867105986, "grad_norm": 0.3815753161907196, "learning_rate": 1.277763234634811e-05, "loss": 0.4664, "step": 29954 }, { "epoch": 0.8224876441515651, "grad_norm": 0.45984596014022827, "learning_rate": 1.2777217445542912e-05, "loss": 0.4529, "step": 29955 }, { "epoch": 0.8225151015925316, "grad_norm": 0.3938109874725342, "learning_rate": 1.2776802539557316e-05, "loss": 0.4895, "step": 29956 }, { "epoch": 0.822542559033498, "grad_norm": 0.3679609000682831, "learning_rate": 1.2776387628392098e-05, "loss": 0.4738, "step": 29957 }, { "epoch": 0.8225700164744646, "grad_norm": 0.3644793927669525, "learning_rate": 1.2775972712048028e-05, "loss": 0.4357, "step": 29958 }, { "epoch": 0.822597473915431, "grad_norm": 0.3909688889980316, "learning_rate": 1.2775557790525888e-05, "loss": 0.4717, "step": 29959 }, { "epoch": 0.8226249313563976, "grad_norm": 0.4523567259311676, "learning_rate": 1.2775142863826442e-05, "loss": 0.5753, "step": 29960 }, { "epoch": 0.8226523887973641, "grad_norm": 0.4281628131866455, "learning_rate": 1.2774727931950472e-05, "loss": 0.529, "step": 29961 }, { "epoch": 0.8226798462383306, "grad_norm": 0.44045910239219666, "learning_rate": 1.2774312994898747e-05, "loss": 0.4804, "step": 29962 }, { "epoch": 0.8227073036792971, "grad_norm": 0.3357817828655243, "learning_rate": 1.2773898052672045e-05, "loss": 0.4307, "step": 29963 }, { "epoch": 0.8227347611202636, "grad_norm": 0.5774185657501221, "learning_rate": 1.2773483105271136e-05, "loss": 0.5336, "step": 29964 }, { "epoch": 0.8227622185612301, "grad_norm": 0.36748388409614563, "learning_rate": 1.2773068152696795e-05, "loss": 0.4111, "step": 29965 }, { "epoch": 0.8227896760021965, "grad_norm": 0.41485676169395447, "learning_rate": 1.2772653194949798e-05, "loss": 0.4646, "step": 29966 }, { "epoch": 0.8228171334431631, "grad_norm": 0.4384790062904358, "learning_rate": 1.2772238232030918e-05, "loss": 0.4943, "step": 29967 }, { "epoch": 0.8228445908841296, "grad_norm": 0.3912218511104584, "learning_rate": 1.277182326394093e-05, "loss": 0.5567, "step": 29968 }, { "epoch": 0.8228720483250961, "grad_norm": 0.38021907210350037, "learning_rate": 1.2771408290680608e-05, "loss": 0.3705, "step": 29969 }, { "epoch": 0.8228995057660626, "grad_norm": 0.368449330329895, "learning_rate": 1.277099331225072e-05, "loss": 0.4268, "step": 29970 }, { "epoch": 0.8229269632070291, "grad_norm": 0.4010332524776459, "learning_rate": 1.2770578328652047e-05, "loss": 0.5891, "step": 29971 }, { "epoch": 0.8229544206479956, "grad_norm": 0.3945092558860779, "learning_rate": 1.2770163339885362e-05, "loss": 0.5276, "step": 29972 }, { "epoch": 0.8229818780889621, "grad_norm": 0.43296948075294495, "learning_rate": 1.2769748345951438e-05, "loss": 0.3899, "step": 29973 }, { "epoch": 0.8230093355299286, "grad_norm": 0.3877841830253601, "learning_rate": 1.2769333346851052e-05, "loss": 0.5319, "step": 29974 }, { "epoch": 0.8230367929708952, "grad_norm": 0.3753948509693146, "learning_rate": 1.276891834258497e-05, "loss": 0.521, "step": 29975 }, { "epoch": 0.8230642504118616, "grad_norm": 0.35551419854164124, "learning_rate": 1.2768503333153977e-05, "loss": 0.3981, "step": 29976 }, { "epoch": 0.8230917078528281, "grad_norm": 0.4029289782047272, "learning_rate": 1.2768088318558838e-05, "loss": 0.5081, "step": 29977 }, { "epoch": 0.8231191652937946, "grad_norm": 0.8277904987335205, "learning_rate": 1.2767673298800331e-05, "loss": 0.4955, "step": 29978 }, { "epoch": 0.8231466227347611, "grad_norm": 0.4085511565208435, "learning_rate": 1.2767258273879232e-05, "loss": 0.4482, "step": 29979 }, { "epoch": 0.8231740801757276, "grad_norm": 0.4499463737010956, "learning_rate": 1.2766843243796312e-05, "loss": 0.4876, "step": 29980 }, { "epoch": 0.8232015376166941, "grad_norm": 0.42418110370635986, "learning_rate": 1.2766428208552348e-05, "loss": 0.4974, "step": 29981 }, { "epoch": 0.8232289950576607, "grad_norm": 0.3913818895816803, "learning_rate": 1.2766013168148109e-05, "loss": 0.5453, "step": 29982 }, { "epoch": 0.8232564524986271, "grad_norm": 0.4030565917491913, "learning_rate": 1.2765598122584375e-05, "loss": 0.4955, "step": 29983 }, { "epoch": 0.8232839099395937, "grad_norm": 0.39294466376304626, "learning_rate": 1.2765183071861917e-05, "loss": 0.4096, "step": 29984 }, { "epoch": 0.8233113673805601, "grad_norm": 0.3509330451488495, "learning_rate": 1.276476801598151e-05, "loss": 0.4509, "step": 29985 }, { "epoch": 0.8233388248215266, "grad_norm": 0.4455319941043854, "learning_rate": 1.2764352954943928e-05, "loss": 0.5588, "step": 29986 }, { "epoch": 0.8233662822624931, "grad_norm": 0.4269362986087799, "learning_rate": 1.276393788874995e-05, "loss": 0.561, "step": 29987 }, { "epoch": 0.8233937397034596, "grad_norm": 0.3653850853443146, "learning_rate": 1.2763522817400337e-05, "loss": 0.4691, "step": 29988 }, { "epoch": 0.8234211971444262, "grad_norm": 0.3845020830631256, "learning_rate": 1.276310774089588e-05, "loss": 0.5371, "step": 29989 }, { "epoch": 0.8234486545853926, "grad_norm": 0.3969345688819885, "learning_rate": 1.2762692659237339e-05, "loss": 0.5298, "step": 29990 }, { "epoch": 0.8234761120263592, "grad_norm": 0.4187753200531006, "learning_rate": 1.27622775724255e-05, "loss": 0.4737, "step": 29991 }, { "epoch": 0.8235035694673256, "grad_norm": 0.4063633680343628, "learning_rate": 1.276186248046113e-05, "loss": 0.4445, "step": 29992 }, { "epoch": 0.8235310269082922, "grad_norm": 0.379369854927063, "learning_rate": 1.2761447383345002e-05, "loss": 0.4825, "step": 29993 }, { "epoch": 0.8235584843492586, "grad_norm": 0.36130291223526, "learning_rate": 1.2761032281077897e-05, "loss": 0.4766, "step": 29994 }, { "epoch": 0.8235859417902252, "grad_norm": 0.470331609249115, "learning_rate": 1.2760617173660581e-05, "loss": 0.4969, "step": 29995 }, { "epoch": 0.8236133992311917, "grad_norm": 0.39652520418167114, "learning_rate": 1.2760202061093836e-05, "loss": 0.5041, "step": 29996 }, { "epoch": 0.8236408566721581, "grad_norm": 0.3905658721923828, "learning_rate": 1.2759786943378433e-05, "loss": 0.4205, "step": 29997 }, { "epoch": 0.8236683141131247, "grad_norm": 0.4269920885562897, "learning_rate": 1.2759371820515147e-05, "loss": 0.4691, "step": 29998 }, { "epoch": 0.8236957715540911, "grad_norm": 0.39325404167175293, "learning_rate": 1.2758956692504751e-05, "loss": 0.6014, "step": 29999 }, { "epoch": 0.8237232289950577, "grad_norm": 0.3313271999359131, "learning_rate": 1.2758541559348019e-05, "loss": 0.4626, "step": 30000 }, { "epoch": 0.8237506864360241, "grad_norm": 0.42190682888031006, "learning_rate": 1.275812642104573e-05, "loss": 0.5343, "step": 30001 }, { "epoch": 0.8237781438769907, "grad_norm": 0.4376525282859802, "learning_rate": 1.2757711277598655e-05, "loss": 0.5074, "step": 30002 }, { "epoch": 0.8238056013179572, "grad_norm": 0.3896116316318512, "learning_rate": 1.2757296129007564e-05, "loss": 0.4573, "step": 30003 }, { "epoch": 0.8238330587589237, "grad_norm": 0.3991791307926178, "learning_rate": 1.2756880975273237e-05, "loss": 0.4902, "step": 30004 }, { "epoch": 0.8238605161998902, "grad_norm": 0.3675098717212677, "learning_rate": 1.275646581639645e-05, "loss": 0.5131, "step": 30005 }, { "epoch": 0.8238879736408566, "grad_norm": 0.4021913707256317, "learning_rate": 1.2756050652377972e-05, "loss": 0.6101, "step": 30006 }, { "epoch": 0.8239154310818232, "grad_norm": 0.38579806685447693, "learning_rate": 1.2755635483218582e-05, "loss": 0.5711, "step": 30007 }, { "epoch": 0.8239428885227896, "grad_norm": 0.37184837460517883, "learning_rate": 1.2755220308919048e-05, "loss": 0.4905, "step": 30008 }, { "epoch": 0.8239703459637562, "grad_norm": 0.3647450804710388, "learning_rate": 1.2754805129480152e-05, "loss": 0.5364, "step": 30009 }, { "epoch": 0.8239978034047227, "grad_norm": 0.3583522439002991, "learning_rate": 1.2754389944902665e-05, "loss": 0.491, "step": 30010 }, { "epoch": 0.8240252608456892, "grad_norm": 0.4194508194923401, "learning_rate": 1.2753974755187361e-05, "loss": 0.5487, "step": 30011 }, { "epoch": 0.8240527182866557, "grad_norm": 0.40678131580352783, "learning_rate": 1.2753559560335016e-05, "loss": 0.5814, "step": 30012 }, { "epoch": 0.8240801757276222, "grad_norm": 0.39239487051963806, "learning_rate": 1.2753144360346403e-05, "loss": 0.4455, "step": 30013 }, { "epoch": 0.8241076331685887, "grad_norm": 0.3476126790046692, "learning_rate": 1.27527291552223e-05, "loss": 0.3995, "step": 30014 }, { "epoch": 0.8241350906095551, "grad_norm": 0.40221983194351196, "learning_rate": 1.2752313944963475e-05, "loss": 0.4501, "step": 30015 }, { "epoch": 0.8241625480505217, "grad_norm": 0.3327177166938782, "learning_rate": 1.2751898729570704e-05, "loss": 0.4882, "step": 30016 }, { "epoch": 0.8241900054914882, "grad_norm": 0.4446655511856079, "learning_rate": 1.2751483509044768e-05, "loss": 0.5171, "step": 30017 }, { "epoch": 0.8242174629324547, "grad_norm": 0.4090450704097748, "learning_rate": 1.2751068283386436e-05, "loss": 0.5762, "step": 30018 }, { "epoch": 0.8242449203734212, "grad_norm": 0.37045615911483765, "learning_rate": 1.2750653052596482e-05, "loss": 0.5186, "step": 30019 }, { "epoch": 0.8242723778143877, "grad_norm": 0.4112650752067566, "learning_rate": 1.2750237816675686e-05, "loss": 0.501, "step": 30020 }, { "epoch": 0.8242998352553542, "grad_norm": 0.43648308515548706, "learning_rate": 1.2749822575624814e-05, "loss": 0.5663, "step": 30021 }, { "epoch": 0.8243272926963207, "grad_norm": 0.40236660838127136, "learning_rate": 1.2749407329444647e-05, "loss": 0.4694, "step": 30022 }, { "epoch": 0.8243547501372872, "grad_norm": 0.4052143692970276, "learning_rate": 1.274899207813596e-05, "loss": 0.598, "step": 30023 }, { "epoch": 0.8243822075782538, "grad_norm": 0.40498629212379456, "learning_rate": 1.2748576821699521e-05, "loss": 0.5215, "step": 30024 }, { "epoch": 0.8244096650192202, "grad_norm": 0.3881228566169739, "learning_rate": 1.2748161560136113e-05, "loss": 0.4459, "step": 30025 }, { "epoch": 0.8244371224601867, "grad_norm": 0.4377542734146118, "learning_rate": 1.2747746293446503e-05, "loss": 0.4805, "step": 30026 }, { "epoch": 0.8244645799011532, "grad_norm": 0.39185309410095215, "learning_rate": 1.2747331021631472e-05, "loss": 0.4323, "step": 30027 }, { "epoch": 0.8244920373421197, "grad_norm": 0.3949560523033142, "learning_rate": 1.2746915744691793e-05, "loss": 0.4833, "step": 30028 }, { "epoch": 0.8245194947830862, "grad_norm": 0.3868871331214905, "learning_rate": 1.2746500462628236e-05, "loss": 0.4647, "step": 30029 }, { "epoch": 0.8245469522240527, "grad_norm": 0.4288168251514435, "learning_rate": 1.2746085175441581e-05, "loss": 0.5402, "step": 30030 }, { "epoch": 0.8245744096650193, "grad_norm": 0.3613189160823822, "learning_rate": 1.2745669883132598e-05, "loss": 0.4258, "step": 30031 }, { "epoch": 0.8246018671059857, "grad_norm": 0.6592974066734314, "learning_rate": 1.2745254585702067e-05, "loss": 0.492, "step": 30032 }, { "epoch": 0.8246293245469523, "grad_norm": 0.39612558484077454, "learning_rate": 1.274483928315076e-05, "loss": 0.5031, "step": 30033 }, { "epoch": 0.8246567819879187, "grad_norm": 0.3817562758922577, "learning_rate": 1.274442397547945e-05, "loss": 0.4313, "step": 30034 }, { "epoch": 0.8246842394288852, "grad_norm": 0.39822861552238464, "learning_rate": 1.2744008662688915e-05, "loss": 0.5055, "step": 30035 }, { "epoch": 0.8247116968698517, "grad_norm": 0.37748175859451294, "learning_rate": 1.274359334477993e-05, "loss": 0.501, "step": 30036 }, { "epoch": 0.8247391543108182, "grad_norm": 0.4502507150173187, "learning_rate": 1.2743178021753265e-05, "loss": 0.4685, "step": 30037 }, { "epoch": 0.8247666117517848, "grad_norm": 0.42123207449913025, "learning_rate": 1.2742762693609695e-05, "loss": 0.4623, "step": 30038 }, { "epoch": 0.8247940691927512, "grad_norm": 0.3928719162940979, "learning_rate": 1.274234736035e-05, "loss": 0.4374, "step": 30039 }, { "epoch": 0.8248215266337178, "grad_norm": 0.48763883113861084, "learning_rate": 1.2741932021974953e-05, "loss": 0.4966, "step": 30040 }, { "epoch": 0.8248489840746842, "grad_norm": 0.4133080244064331, "learning_rate": 1.274151667848533e-05, "loss": 0.4802, "step": 30041 }, { "epoch": 0.8248764415156508, "grad_norm": 0.36849987506866455, "learning_rate": 1.2741101329881898e-05, "loss": 0.529, "step": 30042 }, { "epoch": 0.8249038989566172, "grad_norm": 0.49688974022865295, "learning_rate": 1.274068597616544e-05, "loss": 0.5193, "step": 30043 }, { "epoch": 0.8249313563975837, "grad_norm": 0.34656471014022827, "learning_rate": 1.2740270617336725e-05, "loss": 0.4804, "step": 30044 }, { "epoch": 0.8249588138385503, "grad_norm": 0.34300270676612854, "learning_rate": 1.2739855253396533e-05, "loss": 0.4406, "step": 30045 }, { "epoch": 0.8249862712795167, "grad_norm": 0.41893136501312256, "learning_rate": 1.2739439884345637e-05, "loss": 0.5029, "step": 30046 }, { "epoch": 0.8250137287204833, "grad_norm": 0.39303460717201233, "learning_rate": 1.273902451018481e-05, "loss": 0.4781, "step": 30047 }, { "epoch": 0.8250411861614497, "grad_norm": 0.36963775753974915, "learning_rate": 1.2738609130914829e-05, "loss": 0.4409, "step": 30048 }, { "epoch": 0.8250686436024163, "grad_norm": 0.44479629397392273, "learning_rate": 1.273819374653647e-05, "loss": 0.4713, "step": 30049 }, { "epoch": 0.8250961010433827, "grad_norm": 0.3783676326274872, "learning_rate": 1.2737778357050501e-05, "loss": 0.4813, "step": 30050 }, { "epoch": 0.8251235584843493, "grad_norm": 0.5109935402870178, "learning_rate": 1.2737362962457704e-05, "loss": 0.4319, "step": 30051 }, { "epoch": 0.8251510159253158, "grad_norm": 0.3941800594329834, "learning_rate": 1.273694756275885e-05, "loss": 0.561, "step": 30052 }, { "epoch": 0.8251784733662823, "grad_norm": 0.3979749381542206, "learning_rate": 1.2736532157954718e-05, "loss": 0.461, "step": 30053 }, { "epoch": 0.8252059308072488, "grad_norm": 0.40778252482414246, "learning_rate": 1.2736116748046079e-05, "loss": 0.5404, "step": 30054 }, { "epoch": 0.8252333882482152, "grad_norm": 0.37501847743988037, "learning_rate": 1.2735701333033707e-05, "loss": 0.4959, "step": 30055 }, { "epoch": 0.8252608456891818, "grad_norm": 0.4288727343082428, "learning_rate": 1.2735285912918382e-05, "loss": 0.4935, "step": 30056 }, { "epoch": 0.8252883031301482, "grad_norm": 0.39976486563682556, "learning_rate": 1.2734870487700875e-05, "loss": 0.5143, "step": 30057 }, { "epoch": 0.8253157605711148, "grad_norm": 0.359549880027771, "learning_rate": 1.273445505738196e-05, "loss": 0.4336, "step": 30058 }, { "epoch": 0.8253432180120813, "grad_norm": 0.35370197892189026, "learning_rate": 1.2734039621962416e-05, "loss": 0.4658, "step": 30059 }, { "epoch": 0.8253706754530478, "grad_norm": 0.4350409507751465, "learning_rate": 1.2733624181443013e-05, "loss": 0.5244, "step": 30060 }, { "epoch": 0.8253981328940143, "grad_norm": 0.3538420498371124, "learning_rate": 1.2733208735824528e-05, "loss": 0.5015, "step": 30061 }, { "epoch": 0.8254255903349808, "grad_norm": 0.4439171254634857, "learning_rate": 1.2732793285107738e-05, "loss": 0.5278, "step": 30062 }, { "epoch": 0.8254530477759473, "grad_norm": 0.39641737937927246, "learning_rate": 1.2732377829293417e-05, "loss": 0.5612, "step": 30063 }, { "epoch": 0.8254805052169137, "grad_norm": 0.4680884778499603, "learning_rate": 1.273196236838234e-05, "loss": 0.4915, "step": 30064 }, { "epoch": 0.8255079626578803, "grad_norm": 0.33782559633255005, "learning_rate": 1.2731546902375277e-05, "loss": 0.4798, "step": 30065 }, { "epoch": 0.8255354200988468, "grad_norm": 0.4061569273471832, "learning_rate": 1.2731131431273012e-05, "loss": 0.4155, "step": 30066 }, { "epoch": 0.8255628775398133, "grad_norm": 0.38242581486701965, "learning_rate": 1.2730715955076314e-05, "loss": 0.4768, "step": 30067 }, { "epoch": 0.8255903349807798, "grad_norm": 0.4381382167339325, "learning_rate": 1.2730300473785957e-05, "loss": 0.5623, "step": 30068 }, { "epoch": 0.8256177924217463, "grad_norm": 0.4432758688926697, "learning_rate": 1.2729884987402722e-05, "loss": 0.4646, "step": 30069 }, { "epoch": 0.8256452498627128, "grad_norm": 0.38696810603141785, "learning_rate": 1.2729469495927376e-05, "loss": 0.4252, "step": 30070 }, { "epoch": 0.8256727073036793, "grad_norm": 0.3531753122806549, "learning_rate": 1.27290539993607e-05, "loss": 0.4265, "step": 30071 }, { "epoch": 0.8257001647446458, "grad_norm": 0.38056692481040955, "learning_rate": 1.2728638497703467e-05, "loss": 0.4811, "step": 30072 }, { "epoch": 0.8257276221856124, "grad_norm": 0.40290361642837524, "learning_rate": 1.2728222990956452e-05, "loss": 0.5614, "step": 30073 }, { "epoch": 0.8257550796265788, "grad_norm": 0.38526788353919983, "learning_rate": 1.272780747912043e-05, "loss": 0.5256, "step": 30074 }, { "epoch": 0.8257825370675453, "grad_norm": 0.46520668268203735, "learning_rate": 1.2727391962196179e-05, "loss": 0.3929, "step": 30075 }, { "epoch": 0.8258099945085118, "grad_norm": 0.35699141025543213, "learning_rate": 1.2726976440184469e-05, "loss": 0.4496, "step": 30076 }, { "epoch": 0.8258374519494783, "grad_norm": 0.3921355903148651, "learning_rate": 1.2726560913086079e-05, "loss": 0.5055, "step": 30077 }, { "epoch": 0.8258649093904448, "grad_norm": 0.3734223246574402, "learning_rate": 1.2726145380901782e-05, "loss": 0.525, "step": 30078 }, { "epoch": 0.8258923668314113, "grad_norm": 0.37923407554626465, "learning_rate": 1.2725729843632356e-05, "loss": 0.4201, "step": 30079 }, { "epoch": 0.8259198242723779, "grad_norm": 0.37023797631263733, "learning_rate": 1.272531430127857e-05, "loss": 0.4102, "step": 30080 }, { "epoch": 0.8259472817133443, "grad_norm": 0.3981805145740509, "learning_rate": 1.2724898753841205e-05, "loss": 0.413, "step": 30081 }, { "epoch": 0.8259747391543109, "grad_norm": 0.36054694652557373, "learning_rate": 1.2724483201321034e-05, "loss": 0.449, "step": 30082 }, { "epoch": 0.8260021965952773, "grad_norm": 0.3362351953983307, "learning_rate": 1.2724067643718831e-05, "loss": 0.5562, "step": 30083 }, { "epoch": 0.8260296540362438, "grad_norm": 0.3543832302093506, "learning_rate": 1.2723652081035376e-05, "loss": 0.4922, "step": 30084 }, { "epoch": 0.8260571114772103, "grad_norm": 0.3987925946712494, "learning_rate": 1.2723236513271438e-05, "loss": 0.5044, "step": 30085 }, { "epoch": 0.8260845689181768, "grad_norm": 0.4136558771133423, "learning_rate": 1.2722820940427794e-05, "loss": 0.3967, "step": 30086 }, { "epoch": 0.8261120263591434, "grad_norm": 0.3615911602973938, "learning_rate": 1.2722405362505221e-05, "loss": 0.4499, "step": 30087 }, { "epoch": 0.8261394838001098, "grad_norm": 0.4165189862251282, "learning_rate": 1.2721989779504493e-05, "loss": 0.5151, "step": 30088 }, { "epoch": 0.8261669412410764, "grad_norm": 0.3700328767299652, "learning_rate": 1.2721574191426384e-05, "loss": 0.3437, "step": 30089 }, { "epoch": 0.8261943986820428, "grad_norm": 0.3384542167186737, "learning_rate": 1.2721158598271674e-05, "loss": 0.3734, "step": 30090 }, { "epoch": 0.8262218561230094, "grad_norm": 0.41426801681518555, "learning_rate": 1.2720743000041131e-05, "loss": 0.555, "step": 30091 }, { "epoch": 0.8262493135639758, "grad_norm": 0.4262765944004059, "learning_rate": 1.2720327396735536e-05, "loss": 0.5074, "step": 30092 }, { "epoch": 0.8262767710049423, "grad_norm": 1.041501522064209, "learning_rate": 1.2719911788355661e-05, "loss": 0.4802, "step": 30093 }, { "epoch": 0.8263042284459089, "grad_norm": 0.3926730453968048, "learning_rate": 1.2719496174902283e-05, "loss": 0.4979, "step": 30094 }, { "epoch": 0.8263316858868753, "grad_norm": 0.3984961211681366, "learning_rate": 1.271908055637618e-05, "loss": 0.4541, "step": 30095 }, { "epoch": 0.8263591433278419, "grad_norm": 0.3787977993488312, "learning_rate": 1.2718664932778117e-05, "loss": 0.567, "step": 30096 }, { "epoch": 0.8263866007688083, "grad_norm": 0.3780995309352875, "learning_rate": 1.2718249304108882e-05, "loss": 0.5596, "step": 30097 }, { "epoch": 0.8264140582097749, "grad_norm": 0.3820092976093292, "learning_rate": 1.2717833670369245e-05, "loss": 0.5011, "step": 30098 }, { "epoch": 0.8264415156507413, "grad_norm": 0.34030672907829285, "learning_rate": 1.2717418031559975e-05, "loss": 0.4785, "step": 30099 }, { "epoch": 0.8264689730917079, "grad_norm": 0.36741650104522705, "learning_rate": 1.2717002387681858e-05, "loss": 0.4345, "step": 30100 }, { "epoch": 0.8264964305326744, "grad_norm": 0.35311800241470337, "learning_rate": 1.2716586738735661e-05, "loss": 0.4831, "step": 30101 }, { "epoch": 0.8265238879736408, "grad_norm": 0.41958972811698914, "learning_rate": 1.2716171084722166e-05, "loss": 0.6139, "step": 30102 }, { "epoch": 0.8265513454146074, "grad_norm": 0.387897253036499, "learning_rate": 1.2715755425642145e-05, "loss": 0.4668, "step": 30103 }, { "epoch": 0.8265788028555738, "grad_norm": 0.4208543002605438, "learning_rate": 1.2715339761496369e-05, "loss": 0.5108, "step": 30104 }, { "epoch": 0.8266062602965404, "grad_norm": 0.3848203122615814, "learning_rate": 1.2714924092285622e-05, "loss": 0.5457, "step": 30105 }, { "epoch": 0.8266337177375068, "grad_norm": 0.36472487449645996, "learning_rate": 1.2714508418010672e-05, "loss": 0.4319, "step": 30106 }, { "epoch": 0.8266611751784734, "grad_norm": 0.36384403705596924, "learning_rate": 1.2714092738672299e-05, "loss": 0.5047, "step": 30107 }, { "epoch": 0.8266886326194399, "grad_norm": 0.3845245838165283, "learning_rate": 1.2713677054271278e-05, "loss": 0.54, "step": 30108 }, { "epoch": 0.8267160900604064, "grad_norm": 0.3769253194332123, "learning_rate": 1.271326136480838e-05, "loss": 0.4752, "step": 30109 }, { "epoch": 0.8267435475013729, "grad_norm": 0.38465461134910583, "learning_rate": 1.2712845670284386e-05, "loss": 0.4791, "step": 30110 }, { "epoch": 0.8267710049423393, "grad_norm": 0.3746567666530609, "learning_rate": 1.271242997070007e-05, "loss": 0.4809, "step": 30111 }, { "epoch": 0.8267984623833059, "grad_norm": 0.3916131556034088, "learning_rate": 1.2712014266056203e-05, "loss": 0.5733, "step": 30112 }, { "epoch": 0.8268259198242723, "grad_norm": 0.3987334668636322, "learning_rate": 1.2711598556353567e-05, "loss": 0.481, "step": 30113 }, { "epoch": 0.8268533772652389, "grad_norm": 0.430874228477478, "learning_rate": 1.2711182841592931e-05, "loss": 0.4309, "step": 30114 }, { "epoch": 0.8268808347062054, "grad_norm": 0.6166215538978577, "learning_rate": 1.2710767121775076e-05, "loss": 0.5483, "step": 30115 }, { "epoch": 0.8269082921471719, "grad_norm": 0.42193493247032166, "learning_rate": 1.2710351396900778e-05, "loss": 0.4804, "step": 30116 }, { "epoch": 0.8269357495881384, "grad_norm": 0.3564535975456238, "learning_rate": 1.2709935666970803e-05, "loss": 0.4864, "step": 30117 }, { "epoch": 0.8269632070291049, "grad_norm": 0.38248199224472046, "learning_rate": 1.2709519931985938e-05, "loss": 0.4735, "step": 30118 }, { "epoch": 0.8269906644700714, "grad_norm": 0.3628825545310974, "learning_rate": 1.270910419194695e-05, "loss": 0.4974, "step": 30119 }, { "epoch": 0.8270181219110379, "grad_norm": 0.4056534171104431, "learning_rate": 1.2708688446854623e-05, "loss": 0.408, "step": 30120 }, { "epoch": 0.8270455793520044, "grad_norm": 0.38093435764312744, "learning_rate": 1.2708272696709722e-05, "loss": 0.5439, "step": 30121 }, { "epoch": 0.827073036792971, "grad_norm": 0.3965618312358856, "learning_rate": 1.2707856941513032e-05, "loss": 0.5498, "step": 30122 }, { "epoch": 0.8271004942339374, "grad_norm": 0.3851867616176605, "learning_rate": 1.2707441181265324e-05, "loss": 0.4833, "step": 30123 }, { "epoch": 0.8271279516749039, "grad_norm": 0.36996588110923767, "learning_rate": 1.2707025415967371e-05, "loss": 0.4948, "step": 30124 }, { "epoch": 0.8271554091158704, "grad_norm": 0.4340568482875824, "learning_rate": 1.2706609645619957e-05, "loss": 0.5249, "step": 30125 }, { "epoch": 0.8271828665568369, "grad_norm": 0.3755122423171997, "learning_rate": 1.2706193870223847e-05, "loss": 0.4781, "step": 30126 }, { "epoch": 0.8272103239978034, "grad_norm": 0.4060381352901459, "learning_rate": 1.2705778089779823e-05, "loss": 0.4389, "step": 30127 }, { "epoch": 0.8272377814387699, "grad_norm": 0.3720158636569977, "learning_rate": 1.2705362304288661e-05, "loss": 0.4482, "step": 30128 }, { "epoch": 0.8272652388797364, "grad_norm": 0.48068419098854065, "learning_rate": 1.2704946513751132e-05, "loss": 0.5242, "step": 30129 }, { "epoch": 0.8272926963207029, "grad_norm": 0.5425692200660706, "learning_rate": 1.2704530718168018e-05, "loss": 0.4399, "step": 30130 }, { "epoch": 0.8273201537616695, "grad_norm": 0.4231375455856323, "learning_rate": 1.270411491754009e-05, "loss": 0.5214, "step": 30131 }, { "epoch": 0.8273476112026359, "grad_norm": 0.32926681637763977, "learning_rate": 1.2703699111868125e-05, "loss": 0.471, "step": 30132 }, { "epoch": 0.8273750686436024, "grad_norm": 0.40884506702423096, "learning_rate": 1.2703283301152896e-05, "loss": 0.4973, "step": 30133 }, { "epoch": 0.8274025260845689, "grad_norm": 0.42598846554756165, "learning_rate": 1.2702867485395181e-05, "loss": 0.5446, "step": 30134 }, { "epoch": 0.8274299835255354, "grad_norm": 0.36551520228385925, "learning_rate": 1.2702451664595755e-05, "loss": 0.5037, "step": 30135 }, { "epoch": 0.8274574409665019, "grad_norm": 0.3586095869541168, "learning_rate": 1.2702035838755398e-05, "loss": 0.468, "step": 30136 }, { "epoch": 0.8274848984074684, "grad_norm": 0.43615463376045227, "learning_rate": 1.2701620007874879e-05, "loss": 0.587, "step": 30137 }, { "epoch": 0.827512355848435, "grad_norm": 0.3970477283000946, "learning_rate": 1.2701204171954975e-05, "loss": 0.5834, "step": 30138 }, { "epoch": 0.8275398132894014, "grad_norm": 0.32892900705337524, "learning_rate": 1.2700788330996467e-05, "loss": 0.4238, "step": 30139 }, { "epoch": 0.827567270730368, "grad_norm": 0.43328094482421875, "learning_rate": 1.2700372485000125e-05, "loss": 0.528, "step": 30140 }, { "epoch": 0.8275947281713344, "grad_norm": 0.3569276034832001, "learning_rate": 1.2699956633966726e-05, "loss": 0.4591, "step": 30141 }, { "epoch": 0.8276221856123009, "grad_norm": 0.43493252992630005, "learning_rate": 1.2699540777897046e-05, "loss": 0.5895, "step": 30142 }, { "epoch": 0.8276496430532674, "grad_norm": 0.41313982009887695, "learning_rate": 1.2699124916791861e-05, "loss": 0.5469, "step": 30143 }, { "epoch": 0.8276771004942339, "grad_norm": 0.45179951190948486, "learning_rate": 1.2698709050651947e-05, "loss": 0.491, "step": 30144 }, { "epoch": 0.8277045579352005, "grad_norm": 0.37474048137664795, "learning_rate": 1.2698293179478078e-05, "loss": 0.5026, "step": 30145 }, { "epoch": 0.8277320153761669, "grad_norm": 0.37731871008872986, "learning_rate": 1.2697877303271034e-05, "loss": 0.4817, "step": 30146 }, { "epoch": 0.8277594728171335, "grad_norm": 0.42035356163978577, "learning_rate": 1.2697461422031586e-05, "loss": 0.5638, "step": 30147 }, { "epoch": 0.8277869302580999, "grad_norm": 0.3683796525001526, "learning_rate": 1.2697045535760508e-05, "loss": 0.4321, "step": 30148 }, { "epoch": 0.8278143876990665, "grad_norm": 0.3960830271244049, "learning_rate": 1.2696629644458585e-05, "loss": 0.4508, "step": 30149 }, { "epoch": 0.8278418451400329, "grad_norm": 0.3760804831981659, "learning_rate": 1.2696213748126583e-05, "loss": 0.4632, "step": 30150 }, { "epoch": 0.8278693025809994, "grad_norm": 0.3876819908618927, "learning_rate": 1.2695797846765285e-05, "loss": 0.4306, "step": 30151 }, { "epoch": 0.827896760021966, "grad_norm": 0.4273565411567688, "learning_rate": 1.2695381940375461e-05, "loss": 0.4712, "step": 30152 }, { "epoch": 0.8279242174629324, "grad_norm": 0.3546307682991028, "learning_rate": 1.2694966028957891e-05, "loss": 0.4521, "step": 30153 }, { "epoch": 0.827951674903899, "grad_norm": 0.35536137223243713, "learning_rate": 1.2694550112513348e-05, "loss": 0.3796, "step": 30154 }, { "epoch": 0.8279791323448654, "grad_norm": 0.3789352774620056, "learning_rate": 1.269413419104261e-05, "loss": 0.5208, "step": 30155 }, { "epoch": 0.828006589785832, "grad_norm": 0.36131414771080017, "learning_rate": 1.2693718264546453e-05, "loss": 0.4721, "step": 30156 }, { "epoch": 0.8280340472267984, "grad_norm": 0.3911927044391632, "learning_rate": 1.269330233302565e-05, "loss": 0.4985, "step": 30157 }, { "epoch": 0.828061504667765, "grad_norm": 0.36796438694000244, "learning_rate": 1.269288639648098e-05, "loss": 0.4736, "step": 30158 }, { "epoch": 0.8280889621087315, "grad_norm": 0.3711654543876648, "learning_rate": 1.2692470454913216e-05, "loss": 0.4563, "step": 30159 }, { "epoch": 0.828116419549698, "grad_norm": 0.4181745946407318, "learning_rate": 1.2692054508323138e-05, "loss": 0.5324, "step": 30160 }, { "epoch": 0.8281438769906645, "grad_norm": 0.6017858386039734, "learning_rate": 1.2691638556711515e-05, "loss": 0.4316, "step": 30161 }, { "epoch": 0.8281713344316309, "grad_norm": 0.42729452252388, "learning_rate": 1.269122260007913e-05, "loss": 0.5055, "step": 30162 }, { "epoch": 0.8281987918725975, "grad_norm": 0.43209296464920044, "learning_rate": 1.269080663842675e-05, "loss": 0.5181, "step": 30163 }, { "epoch": 0.8282262493135639, "grad_norm": 0.3777647912502289, "learning_rate": 1.2690390671755163e-05, "loss": 0.4628, "step": 30164 }, { "epoch": 0.8282537067545305, "grad_norm": 0.42750871181488037, "learning_rate": 1.2689974700065139e-05, "loss": 0.5378, "step": 30165 }, { "epoch": 0.828281164195497, "grad_norm": 0.44176581501960754, "learning_rate": 1.2689558723357451e-05, "loss": 0.5742, "step": 30166 }, { "epoch": 0.8283086216364635, "grad_norm": 0.4650537967681885, "learning_rate": 1.2689142741632879e-05, "loss": 0.4509, "step": 30167 }, { "epoch": 0.82833607907743, "grad_norm": 0.44560912251472473, "learning_rate": 1.2688726754892195e-05, "loss": 0.4371, "step": 30168 }, { "epoch": 0.8283635365183964, "grad_norm": 0.4083231985569, "learning_rate": 1.268831076313618e-05, "loss": 0.554, "step": 30169 }, { "epoch": 0.828390993959363, "grad_norm": 0.381827712059021, "learning_rate": 1.268789476636561e-05, "loss": 0.4781, "step": 30170 }, { "epoch": 0.8284184514003294, "grad_norm": 0.40177249908447266, "learning_rate": 1.268747876458125e-05, "loss": 0.4553, "step": 30171 }, { "epoch": 0.828445908841296, "grad_norm": 0.37988927960395813, "learning_rate": 1.2687062757783893e-05, "loss": 0.3936, "step": 30172 }, { "epoch": 0.8284733662822625, "grad_norm": 0.3665355145931244, "learning_rate": 1.2686646745974302e-05, "loss": 0.4449, "step": 30173 }, { "epoch": 0.828500823723229, "grad_norm": 0.4080806076526642, "learning_rate": 1.2686230729153257e-05, "loss": 0.4952, "step": 30174 }, { "epoch": 0.8285282811641955, "grad_norm": 0.498286634683609, "learning_rate": 1.2685814707321537e-05, "loss": 0.4839, "step": 30175 }, { "epoch": 0.828555738605162, "grad_norm": 0.3898909091949463, "learning_rate": 1.2685398680479915e-05, "loss": 0.5103, "step": 30176 }, { "epoch": 0.8285831960461285, "grad_norm": 0.38970428705215454, "learning_rate": 1.2684982648629164e-05, "loss": 0.5584, "step": 30177 }, { "epoch": 0.828610653487095, "grad_norm": 0.35226118564605713, "learning_rate": 1.2684566611770069e-05, "loss": 0.5034, "step": 30178 }, { "epoch": 0.8286381109280615, "grad_norm": 0.4234652817249298, "learning_rate": 1.2684150569903396e-05, "loss": 0.5623, "step": 30179 }, { "epoch": 0.828665568369028, "grad_norm": 0.40462908148765564, "learning_rate": 1.2683734523029928e-05, "loss": 0.4813, "step": 30180 }, { "epoch": 0.8286930258099945, "grad_norm": 0.44137856364250183, "learning_rate": 1.2683318471150434e-05, "loss": 0.4929, "step": 30181 }, { "epoch": 0.828720483250961, "grad_norm": 0.4045804738998413, "learning_rate": 1.2682902414265699e-05, "loss": 0.5085, "step": 30182 }, { "epoch": 0.8287479406919275, "grad_norm": 0.6157320141792297, "learning_rate": 1.2682486352376495e-05, "loss": 0.5273, "step": 30183 }, { "epoch": 0.828775398132894, "grad_norm": 0.37834271788597107, "learning_rate": 1.2682070285483595e-05, "loss": 0.4627, "step": 30184 }, { "epoch": 0.8288028555738605, "grad_norm": 0.3549070358276367, "learning_rate": 1.2681654213587779e-05, "loss": 0.472, "step": 30185 }, { "epoch": 0.828830313014827, "grad_norm": 0.4049048125743866, "learning_rate": 1.2681238136689822e-05, "loss": 0.4856, "step": 30186 }, { "epoch": 0.8288577704557936, "grad_norm": 0.40977662801742554, "learning_rate": 1.26808220547905e-05, "loss": 0.5938, "step": 30187 }, { "epoch": 0.82888522789676, "grad_norm": 0.40600845217704773, "learning_rate": 1.2680405967890592e-05, "loss": 0.4962, "step": 30188 }, { "epoch": 0.8289126853377266, "grad_norm": 0.3837948143482208, "learning_rate": 1.2679989875990868e-05, "loss": 0.4846, "step": 30189 }, { "epoch": 0.828940142778693, "grad_norm": 0.3800135552883148, "learning_rate": 1.2679573779092108e-05, "loss": 0.4973, "step": 30190 }, { "epoch": 0.8289676002196595, "grad_norm": 0.42047667503356934, "learning_rate": 1.267915767719509e-05, "loss": 0.4621, "step": 30191 }, { "epoch": 0.828995057660626, "grad_norm": 0.3662310242652893, "learning_rate": 1.2678741570300585e-05, "loss": 0.525, "step": 30192 }, { "epoch": 0.8290225151015925, "grad_norm": 0.41898858547210693, "learning_rate": 1.2678325458409373e-05, "loss": 0.4083, "step": 30193 }, { "epoch": 0.8290499725425591, "grad_norm": 0.39827239513397217, "learning_rate": 1.267790934152223e-05, "loss": 0.4863, "step": 30194 }, { "epoch": 0.8290774299835255, "grad_norm": 0.41429856419563293, "learning_rate": 1.2677493219639932e-05, "loss": 0.4983, "step": 30195 }, { "epoch": 0.8291048874244921, "grad_norm": 0.3416401743888855, "learning_rate": 1.2677077092763252e-05, "loss": 0.4421, "step": 30196 }, { "epoch": 0.8291323448654585, "grad_norm": 0.37801456451416016, "learning_rate": 1.267666096089297e-05, "loss": 0.4889, "step": 30197 }, { "epoch": 0.829159802306425, "grad_norm": 0.39317193627357483, "learning_rate": 1.2676244824029861e-05, "loss": 0.5533, "step": 30198 }, { "epoch": 0.8291872597473915, "grad_norm": 0.4269481599330902, "learning_rate": 1.2675828682174701e-05, "loss": 0.481, "step": 30199 }, { "epoch": 0.829214717188358, "grad_norm": 0.37056225538253784, "learning_rate": 1.2675412535328269e-05, "loss": 0.4423, "step": 30200 }, { "epoch": 0.8292421746293246, "grad_norm": 0.3704770505428314, "learning_rate": 1.2674996383491337e-05, "loss": 0.4946, "step": 30201 }, { "epoch": 0.829269632070291, "grad_norm": 0.44553518295288086, "learning_rate": 1.2674580226664681e-05, "loss": 0.557, "step": 30202 }, { "epoch": 0.8292970895112576, "grad_norm": 0.464017391204834, "learning_rate": 1.2674164064849081e-05, "loss": 0.5308, "step": 30203 }, { "epoch": 0.829324546952224, "grad_norm": 0.3846050500869751, "learning_rate": 1.2673747898045312e-05, "loss": 0.5026, "step": 30204 }, { "epoch": 0.8293520043931906, "grad_norm": 0.4030638337135315, "learning_rate": 1.2673331726254148e-05, "loss": 0.4383, "step": 30205 }, { "epoch": 0.829379461834157, "grad_norm": 0.3786577880382538, "learning_rate": 1.267291554947637e-05, "loss": 0.4383, "step": 30206 }, { "epoch": 0.8294069192751236, "grad_norm": 0.36833974719047546, "learning_rate": 1.2672499367712747e-05, "loss": 0.4874, "step": 30207 }, { "epoch": 0.8294343767160901, "grad_norm": 0.4100600779056549, "learning_rate": 1.2672083180964063e-05, "loss": 0.4595, "step": 30208 }, { "epoch": 0.8294618341570565, "grad_norm": 0.4139527380466461, "learning_rate": 1.2671666989231093e-05, "loss": 0.5364, "step": 30209 }, { "epoch": 0.8294892915980231, "grad_norm": 0.4129088819026947, "learning_rate": 1.2671250792514607e-05, "loss": 0.5416, "step": 30210 }, { "epoch": 0.8295167490389895, "grad_norm": 0.38217490911483765, "learning_rate": 1.2670834590815388e-05, "loss": 0.5002, "step": 30211 }, { "epoch": 0.8295442064799561, "grad_norm": 0.4132919907569885, "learning_rate": 1.267041838413421e-05, "loss": 0.5468, "step": 30212 }, { "epoch": 0.8295716639209225, "grad_norm": 0.40673577785491943, "learning_rate": 1.2670002172471847e-05, "loss": 0.458, "step": 30213 }, { "epoch": 0.8295991213618891, "grad_norm": 0.3551134765148163, "learning_rate": 1.2669585955829082e-05, "loss": 0.4521, "step": 30214 }, { "epoch": 0.8296265788028556, "grad_norm": 0.30851200222969055, "learning_rate": 1.2669169734206684e-05, "loss": 0.3382, "step": 30215 }, { "epoch": 0.8296540362438221, "grad_norm": 0.477618932723999, "learning_rate": 1.2668753507605433e-05, "loss": 0.5923, "step": 30216 }, { "epoch": 0.8296814936847886, "grad_norm": 0.3746917247772217, "learning_rate": 1.2668337276026104e-05, "loss": 0.463, "step": 30217 }, { "epoch": 0.829708951125755, "grad_norm": 0.4033198058605194, "learning_rate": 1.2667921039469477e-05, "loss": 0.5204, "step": 30218 }, { "epoch": 0.8297364085667216, "grad_norm": 0.331371009349823, "learning_rate": 1.2667504797936325e-05, "loss": 0.4451, "step": 30219 }, { "epoch": 0.829763866007688, "grad_norm": 0.3861638009548187, "learning_rate": 1.2667088551427424e-05, "loss": 0.4146, "step": 30220 }, { "epoch": 0.8297913234486546, "grad_norm": 0.41285786032676697, "learning_rate": 1.2666672299943552e-05, "loss": 0.4753, "step": 30221 }, { "epoch": 0.8298187808896211, "grad_norm": 0.3504190146923065, "learning_rate": 1.2666256043485488e-05, "loss": 0.4921, "step": 30222 }, { "epoch": 0.8298462383305876, "grad_norm": 0.41357100009918213, "learning_rate": 1.2665839782054e-05, "loss": 0.5287, "step": 30223 }, { "epoch": 0.8298736957715541, "grad_norm": 0.38297852873802185, "learning_rate": 1.2665423515649876e-05, "loss": 0.4593, "step": 30224 }, { "epoch": 0.8299011532125206, "grad_norm": 0.39998334646224976, "learning_rate": 1.266500724427388e-05, "loss": 0.4833, "step": 30225 }, { "epoch": 0.8299286106534871, "grad_norm": 0.4182138442993164, "learning_rate": 1.26645909679268e-05, "loss": 0.4987, "step": 30226 }, { "epoch": 0.8299560680944535, "grad_norm": 0.4057847559452057, "learning_rate": 1.2664174686609405e-05, "loss": 0.567, "step": 30227 }, { "epoch": 0.8299835255354201, "grad_norm": 0.33791977167129517, "learning_rate": 1.2663758400322474e-05, "loss": 0.5015, "step": 30228 }, { "epoch": 0.8300109829763866, "grad_norm": 0.34357163310050964, "learning_rate": 1.2663342109066786e-05, "loss": 0.4089, "step": 30229 }, { "epoch": 0.8300384404173531, "grad_norm": 0.44750556349754333, "learning_rate": 1.266292581284311e-05, "loss": 0.3998, "step": 30230 }, { "epoch": 0.8300658978583196, "grad_norm": 0.3905784487724304, "learning_rate": 1.2662509511652233e-05, "loss": 0.4672, "step": 30231 }, { "epoch": 0.8300933552992861, "grad_norm": 0.39093077182769775, "learning_rate": 1.2662093205494922e-05, "loss": 0.463, "step": 30232 }, { "epoch": 0.8301208127402526, "grad_norm": 0.35563895106315613, "learning_rate": 1.2661676894371959e-05, "loss": 0.4995, "step": 30233 }, { "epoch": 0.8301482701812191, "grad_norm": 0.3526252508163452, "learning_rate": 1.2661260578284119e-05, "loss": 0.5078, "step": 30234 }, { "epoch": 0.8301757276221856, "grad_norm": 0.3769455552101135, "learning_rate": 1.266084425723218e-05, "loss": 0.529, "step": 30235 }, { "epoch": 0.8302031850631522, "grad_norm": 0.4023447632789612, "learning_rate": 1.2660427931216915e-05, "loss": 0.5426, "step": 30236 }, { "epoch": 0.8302306425041186, "grad_norm": 0.35571062564849854, "learning_rate": 1.2660011600239104e-05, "loss": 0.4025, "step": 30237 }, { "epoch": 0.8302580999450851, "grad_norm": 0.3777214288711548, "learning_rate": 1.265959526429952e-05, "loss": 0.4033, "step": 30238 }, { "epoch": 0.8302855573860516, "grad_norm": 0.38111695647239685, "learning_rate": 1.2659178923398945e-05, "loss": 0.4702, "step": 30239 }, { "epoch": 0.8303130148270181, "grad_norm": 0.3746671974658966, "learning_rate": 1.2658762577538152e-05, "loss": 0.5409, "step": 30240 }, { "epoch": 0.8303404722679846, "grad_norm": 0.4144847095012665, "learning_rate": 1.2658346226717917e-05, "loss": 0.5662, "step": 30241 }, { "epoch": 0.8303679297089511, "grad_norm": 0.4501059949398041, "learning_rate": 1.2657929870939018e-05, "loss": 0.5283, "step": 30242 }, { "epoch": 0.8303953871499177, "grad_norm": 0.39457330107688904, "learning_rate": 1.2657513510202233e-05, "loss": 0.4719, "step": 30243 }, { "epoch": 0.8304228445908841, "grad_norm": 0.403413325548172, "learning_rate": 1.2657097144508337e-05, "loss": 0.4586, "step": 30244 }, { "epoch": 0.8304503020318507, "grad_norm": 0.40660321712493896, "learning_rate": 1.2656680773858105e-05, "loss": 0.5519, "step": 30245 }, { "epoch": 0.8304777594728171, "grad_norm": 0.40034496784210205, "learning_rate": 1.2656264398252316e-05, "loss": 0.5132, "step": 30246 }, { "epoch": 0.8305052169137837, "grad_norm": 0.399395614862442, "learning_rate": 1.2655848017691748e-05, "loss": 0.5312, "step": 30247 }, { "epoch": 0.8305326743547501, "grad_norm": 0.3459046185016632, "learning_rate": 1.2655431632177173e-05, "loss": 0.4367, "step": 30248 }, { "epoch": 0.8305601317957166, "grad_norm": 0.39666837453842163, "learning_rate": 1.2655015241709372e-05, "loss": 0.4597, "step": 30249 }, { "epoch": 0.8305875892366832, "grad_norm": 0.34702301025390625, "learning_rate": 1.265459884628912e-05, "loss": 0.5302, "step": 30250 }, { "epoch": 0.8306150466776496, "grad_norm": 0.3708016276359558, "learning_rate": 1.2654182445917194e-05, "loss": 0.4881, "step": 30251 }, { "epoch": 0.8306425041186162, "grad_norm": 0.32588738203048706, "learning_rate": 1.265376604059437e-05, "loss": 0.4382, "step": 30252 }, { "epoch": 0.8306699615595826, "grad_norm": 0.38643187284469604, "learning_rate": 1.2653349630321428e-05, "loss": 0.4117, "step": 30253 }, { "epoch": 0.8306974190005492, "grad_norm": 0.35875391960144043, "learning_rate": 1.2652933215099139e-05, "loss": 0.5228, "step": 30254 }, { "epoch": 0.8307248764415156, "grad_norm": 0.4428936243057251, "learning_rate": 1.2652516794928285e-05, "loss": 0.5322, "step": 30255 }, { "epoch": 0.8307523338824822, "grad_norm": 0.3720729351043701, "learning_rate": 1.2652100369809637e-05, "loss": 0.4654, "step": 30256 }, { "epoch": 0.8307797913234487, "grad_norm": 0.48424971103668213, "learning_rate": 1.265168393974398e-05, "loss": 0.403, "step": 30257 }, { "epoch": 0.8308072487644151, "grad_norm": 0.3608262240886688, "learning_rate": 1.2651267504732083e-05, "loss": 0.5143, "step": 30258 }, { "epoch": 0.8308347062053817, "grad_norm": 0.40514519810676575, "learning_rate": 1.2650851064774727e-05, "loss": 0.5554, "step": 30259 }, { "epoch": 0.8308621636463481, "grad_norm": 0.3753630816936493, "learning_rate": 1.2650434619872687e-05, "loss": 0.5264, "step": 30260 }, { "epoch": 0.8308896210873147, "grad_norm": 0.35506489872932434, "learning_rate": 1.265001817002674e-05, "loss": 0.4585, "step": 30261 }, { "epoch": 0.8309170785282811, "grad_norm": 0.4116273522377014, "learning_rate": 1.2649601715237664e-05, "loss": 0.4688, "step": 30262 }, { "epoch": 0.8309445359692477, "grad_norm": 0.3632226586341858, "learning_rate": 1.2649185255506238e-05, "loss": 0.5235, "step": 30263 }, { "epoch": 0.8309719934102142, "grad_norm": 0.36816951632499695, "learning_rate": 1.2648768790833232e-05, "loss": 0.4846, "step": 30264 }, { "epoch": 0.8309994508511807, "grad_norm": 0.383434534072876, "learning_rate": 1.2648352321219428e-05, "loss": 0.4273, "step": 30265 }, { "epoch": 0.8310269082921472, "grad_norm": 0.3781331181526184, "learning_rate": 1.2647935846665601e-05, "loss": 0.5159, "step": 30266 }, { "epoch": 0.8310543657331136, "grad_norm": 0.4319455623626709, "learning_rate": 1.2647519367172532e-05, "loss": 0.5483, "step": 30267 }, { "epoch": 0.8310818231740802, "grad_norm": 0.7177205085754395, "learning_rate": 1.2647102882740992e-05, "loss": 0.5764, "step": 30268 }, { "epoch": 0.8311092806150466, "grad_norm": 0.4133775532245636, "learning_rate": 1.2646686393371759e-05, "loss": 0.4455, "step": 30269 }, { "epoch": 0.8311367380560132, "grad_norm": 0.3865716755390167, "learning_rate": 1.2646269899065613e-05, "loss": 0.4133, "step": 30270 }, { "epoch": 0.8311641954969797, "grad_norm": 0.39414212107658386, "learning_rate": 1.2645853399823329e-05, "loss": 0.458, "step": 30271 }, { "epoch": 0.8311916529379462, "grad_norm": 0.38279280066490173, "learning_rate": 1.2645436895645682e-05, "loss": 0.4867, "step": 30272 }, { "epoch": 0.8312191103789127, "grad_norm": 0.4270465075969696, "learning_rate": 1.2645020386533453e-05, "loss": 0.5423, "step": 30273 }, { "epoch": 0.8312465678198792, "grad_norm": 0.39220815896987915, "learning_rate": 1.2644603872487415e-05, "loss": 0.5524, "step": 30274 }, { "epoch": 0.8312740252608457, "grad_norm": 0.45736730098724365, "learning_rate": 1.2644187353508348e-05, "loss": 0.5405, "step": 30275 }, { "epoch": 0.8313014827018121, "grad_norm": 0.37588754296302795, "learning_rate": 1.2643770829597029e-05, "loss": 0.5701, "step": 30276 }, { "epoch": 0.8313289401427787, "grad_norm": 0.4082329571247101, "learning_rate": 1.264335430075423e-05, "loss": 0.5961, "step": 30277 }, { "epoch": 0.8313563975837452, "grad_norm": 0.38419216871261597, "learning_rate": 1.2642937766980734e-05, "loss": 0.4165, "step": 30278 }, { "epoch": 0.8313838550247117, "grad_norm": 0.39413756132125854, "learning_rate": 1.2642521228277314e-05, "loss": 0.5244, "step": 30279 }, { "epoch": 0.8314113124656782, "grad_norm": 0.37085646390914917, "learning_rate": 1.2642104684644752e-05, "loss": 0.4376, "step": 30280 }, { "epoch": 0.8314387699066447, "grad_norm": 0.40708106756210327, "learning_rate": 1.2641688136083819e-05, "loss": 0.4942, "step": 30281 }, { "epoch": 0.8314662273476112, "grad_norm": 0.38414785265922546, "learning_rate": 1.2641271582595293e-05, "loss": 0.4481, "step": 30282 }, { "epoch": 0.8314936847885777, "grad_norm": 0.443010151386261, "learning_rate": 1.2640855024179955e-05, "loss": 0.5174, "step": 30283 }, { "epoch": 0.8315211422295442, "grad_norm": 0.5067664384841919, "learning_rate": 1.2640438460838577e-05, "loss": 0.5755, "step": 30284 }, { "epoch": 0.8315485996705108, "grad_norm": 0.38709741830825806, "learning_rate": 1.264002189257194e-05, "loss": 0.462, "step": 30285 }, { "epoch": 0.8315760571114772, "grad_norm": 0.3594662845134735, "learning_rate": 1.263960531938082e-05, "loss": 0.4935, "step": 30286 }, { "epoch": 0.8316035145524437, "grad_norm": 0.3746187090873718, "learning_rate": 1.2639188741265993e-05, "loss": 0.4587, "step": 30287 }, { "epoch": 0.8316309719934102, "grad_norm": 0.3877832889556885, "learning_rate": 1.2638772158228238e-05, "loss": 0.5067, "step": 30288 }, { "epoch": 0.8316584294343767, "grad_norm": 0.4159867763519287, "learning_rate": 1.2638355570268331e-05, "loss": 0.551, "step": 30289 }, { "epoch": 0.8316858868753432, "grad_norm": 0.3844701945781708, "learning_rate": 1.2637938977387046e-05, "loss": 0.6179, "step": 30290 }, { "epoch": 0.8317133443163097, "grad_norm": 0.41171231865882874, "learning_rate": 1.2637522379585165e-05, "loss": 0.4728, "step": 30291 }, { "epoch": 0.8317408017572763, "grad_norm": 0.44197988510131836, "learning_rate": 1.263710577686346e-05, "loss": 0.47, "step": 30292 }, { "epoch": 0.8317682591982427, "grad_norm": 0.41233834624290466, "learning_rate": 1.2636689169222716e-05, "loss": 0.5605, "step": 30293 }, { "epoch": 0.8317957166392093, "grad_norm": 0.3313000500202179, "learning_rate": 1.2636272556663702e-05, "loss": 0.4556, "step": 30294 }, { "epoch": 0.8318231740801757, "grad_norm": 0.41675838828086853, "learning_rate": 1.2635855939187198e-05, "loss": 0.5221, "step": 30295 }, { "epoch": 0.8318506315211422, "grad_norm": 0.43815916776657104, "learning_rate": 1.2635439316793983e-05, "loss": 0.5229, "step": 30296 }, { "epoch": 0.8318780889621087, "grad_norm": 0.39627939462661743, "learning_rate": 1.2635022689484831e-05, "loss": 0.4406, "step": 30297 }, { "epoch": 0.8319055464030752, "grad_norm": 0.3404425382614136, "learning_rate": 1.2634606057260524e-05, "loss": 0.5073, "step": 30298 }, { "epoch": 0.8319330038440418, "grad_norm": 0.4478636384010315, "learning_rate": 1.2634189420121832e-05, "loss": 0.6056, "step": 30299 }, { "epoch": 0.8319604612850082, "grad_norm": 0.3727758228778839, "learning_rate": 1.2633772778069538e-05, "loss": 0.484, "step": 30300 }, { "epoch": 0.8319879187259748, "grad_norm": 0.34177467226982117, "learning_rate": 1.2633356131104415e-05, "loss": 0.4395, "step": 30301 }, { "epoch": 0.8320153761669412, "grad_norm": 0.6157618165016174, "learning_rate": 1.2632939479227244e-05, "loss": 0.5019, "step": 30302 }, { "epoch": 0.8320428336079078, "grad_norm": 0.39014461636543274, "learning_rate": 1.26325228224388e-05, "loss": 0.4995, "step": 30303 }, { "epoch": 0.8320702910488742, "grad_norm": 0.36032912135124207, "learning_rate": 1.263210616073986e-05, "loss": 0.3954, "step": 30304 }, { "epoch": 0.8320977484898407, "grad_norm": 0.33769431710243225, "learning_rate": 1.2631689494131204e-05, "loss": 0.4405, "step": 30305 }, { "epoch": 0.8321252059308073, "grad_norm": 0.4161273241043091, "learning_rate": 1.2631272822613608e-05, "loss": 0.4845, "step": 30306 }, { "epoch": 0.8321526633717737, "grad_norm": 0.41217732429504395, "learning_rate": 1.2630856146187844e-05, "loss": 0.504, "step": 30307 }, { "epoch": 0.8321801208127403, "grad_norm": 0.3860306739807129, "learning_rate": 1.2630439464854697e-05, "loss": 0.5634, "step": 30308 }, { "epoch": 0.8322075782537067, "grad_norm": 0.36186227202415466, "learning_rate": 1.2630022778614941e-05, "loss": 0.5264, "step": 30309 }, { "epoch": 0.8322350356946733, "grad_norm": 0.43043023347854614, "learning_rate": 1.262960608746935e-05, "loss": 0.4693, "step": 30310 }, { "epoch": 0.8322624931356397, "grad_norm": 0.3838934600353241, "learning_rate": 1.2629189391418708e-05, "loss": 0.4588, "step": 30311 }, { "epoch": 0.8322899505766063, "grad_norm": 0.4120745062828064, "learning_rate": 1.2628772690463786e-05, "loss": 0.5415, "step": 30312 }, { "epoch": 0.8323174080175728, "grad_norm": 0.4422426223754883, "learning_rate": 1.2628355984605366e-05, "loss": 0.4607, "step": 30313 }, { "epoch": 0.8323448654585393, "grad_norm": 0.4377015233039856, "learning_rate": 1.2627939273844224e-05, "loss": 0.5208, "step": 30314 }, { "epoch": 0.8323723228995058, "grad_norm": 0.40354886651039124, "learning_rate": 1.2627522558181133e-05, "loss": 0.5455, "step": 30315 }, { "epoch": 0.8323997803404722, "grad_norm": 0.41170769929885864, "learning_rate": 1.2627105837616875e-05, "loss": 0.4757, "step": 30316 }, { "epoch": 0.8324272377814388, "grad_norm": 0.35126185417175293, "learning_rate": 1.2626689112152226e-05, "loss": 0.3886, "step": 30317 }, { "epoch": 0.8324546952224052, "grad_norm": 0.42665454745292664, "learning_rate": 1.2626272381787964e-05, "loss": 0.4802, "step": 30318 }, { "epoch": 0.8324821526633718, "grad_norm": 0.46385684609413147, "learning_rate": 1.2625855646524867e-05, "loss": 0.5209, "step": 30319 }, { "epoch": 0.8325096101043383, "grad_norm": 0.3672662675380707, "learning_rate": 1.2625438906363709e-05, "loss": 0.4807, "step": 30320 }, { "epoch": 0.8325370675453048, "grad_norm": 2.130838394165039, "learning_rate": 1.2625022161305274e-05, "loss": 0.3987, "step": 30321 }, { "epoch": 0.8325645249862713, "grad_norm": 0.43482381105422974, "learning_rate": 1.2624605411350327e-05, "loss": 0.5755, "step": 30322 }, { "epoch": 0.8325919824272378, "grad_norm": 0.39659082889556885, "learning_rate": 1.2624188656499661e-05, "loss": 0.5178, "step": 30323 }, { "epoch": 0.8326194398682043, "grad_norm": 0.4611845016479492, "learning_rate": 1.262377189675404e-05, "loss": 0.4999, "step": 30324 }, { "epoch": 0.8326468973091707, "grad_norm": 0.3986891806125641, "learning_rate": 1.2623355132114252e-05, "loss": 0.4726, "step": 30325 }, { "epoch": 0.8326743547501373, "grad_norm": 0.38107144832611084, "learning_rate": 1.2622938362581065e-05, "loss": 0.475, "step": 30326 }, { "epoch": 0.8327018121911038, "grad_norm": 0.34282296895980835, "learning_rate": 1.2622521588155263e-05, "loss": 0.4364, "step": 30327 }, { "epoch": 0.8327292696320703, "grad_norm": 0.36565133929252625, "learning_rate": 1.2622104808837621e-05, "loss": 0.4967, "step": 30328 }, { "epoch": 0.8327567270730368, "grad_norm": 0.370693564414978, "learning_rate": 1.2621688024628915e-05, "loss": 0.552, "step": 30329 }, { "epoch": 0.8327841845140033, "grad_norm": 0.40343335270881653, "learning_rate": 1.2621271235529928e-05, "loss": 0.5319, "step": 30330 }, { "epoch": 0.8328116419549698, "grad_norm": 0.44981154799461365, "learning_rate": 1.2620854441541428e-05, "loss": 0.5122, "step": 30331 }, { "epoch": 0.8328390993959363, "grad_norm": 0.40278851985931396, "learning_rate": 1.2620437642664202e-05, "loss": 0.5524, "step": 30332 }, { "epoch": 0.8328665568369028, "grad_norm": 0.38788270950317383, "learning_rate": 1.2620020838899025e-05, "loss": 0.4907, "step": 30333 }, { "epoch": 0.8328940142778694, "grad_norm": 0.4460882544517517, "learning_rate": 1.2619604030246669e-05, "loss": 0.5181, "step": 30334 }, { "epoch": 0.8329214717188358, "grad_norm": 0.3744889497756958, "learning_rate": 1.2619187216707918e-05, "loss": 0.5074, "step": 30335 }, { "epoch": 0.8329489291598023, "grad_norm": 0.3618219196796417, "learning_rate": 1.2618770398283545e-05, "loss": 0.4948, "step": 30336 }, { "epoch": 0.8329763866007688, "grad_norm": 0.38342711329460144, "learning_rate": 1.2618353574974329e-05, "loss": 0.5418, "step": 30337 }, { "epoch": 0.8330038440417353, "grad_norm": 0.3774867355823517, "learning_rate": 1.261793674678105e-05, "loss": 0.5316, "step": 30338 }, { "epoch": 0.8330313014827018, "grad_norm": 0.3637533187866211, "learning_rate": 1.2617519913704482e-05, "loss": 0.4931, "step": 30339 }, { "epoch": 0.8330587589236683, "grad_norm": 0.5132043957710266, "learning_rate": 1.2617103075745404e-05, "loss": 0.5872, "step": 30340 }, { "epoch": 0.8330862163646349, "grad_norm": 0.40197402238845825, "learning_rate": 1.2616686232904593e-05, "loss": 0.5143, "step": 30341 }, { "epoch": 0.8331136738056013, "grad_norm": 0.37361249327659607, "learning_rate": 1.2616269385182829e-05, "loss": 0.6072, "step": 30342 }, { "epoch": 0.8331411312465679, "grad_norm": 0.3954754173755646, "learning_rate": 1.2615852532580888e-05, "loss": 0.4517, "step": 30343 }, { "epoch": 0.8331685886875343, "grad_norm": 0.6539633274078369, "learning_rate": 1.2615435675099544e-05, "loss": 0.4099, "step": 30344 }, { "epoch": 0.8331960461285008, "grad_norm": 0.4081096947193146, "learning_rate": 1.261501881273958e-05, "loss": 0.4552, "step": 30345 }, { "epoch": 0.8332235035694673, "grad_norm": 0.3596382737159729, "learning_rate": 1.261460194550177e-05, "loss": 0.4637, "step": 30346 }, { "epoch": 0.8332509610104338, "grad_norm": 0.3939182162284851, "learning_rate": 1.2614185073386893e-05, "loss": 0.5254, "step": 30347 }, { "epoch": 0.8332784184514004, "grad_norm": 0.3531131446361542, "learning_rate": 1.2613768196395728e-05, "loss": 0.4916, "step": 30348 }, { "epoch": 0.8333058758923668, "grad_norm": 0.37459734082221985, "learning_rate": 1.261335131452905e-05, "loss": 0.4615, "step": 30349 }, { "epoch": 0.8333333333333334, "grad_norm": 0.3843931257724762, "learning_rate": 1.2612934427787638e-05, "loss": 0.5201, "step": 30350 }, { "epoch": 0.8333607907742998, "grad_norm": 0.3601520359516144, "learning_rate": 1.261251753617227e-05, "loss": 0.4883, "step": 30351 }, { "epoch": 0.8333882482152664, "grad_norm": 0.3623564541339874, "learning_rate": 1.2612100639683723e-05, "loss": 0.5368, "step": 30352 }, { "epoch": 0.8334157056562328, "grad_norm": 0.4150620102882385, "learning_rate": 1.2611683738322774e-05, "loss": 0.448, "step": 30353 }, { "epoch": 0.8334431630971993, "grad_norm": 0.4236331880092621, "learning_rate": 1.26112668320902e-05, "loss": 0.5378, "step": 30354 }, { "epoch": 0.8334706205381659, "grad_norm": 0.38926708698272705, "learning_rate": 1.261084992098678e-05, "loss": 0.5421, "step": 30355 }, { "epoch": 0.8334980779791323, "grad_norm": 0.42358893156051636, "learning_rate": 1.2610433005013294e-05, "loss": 0.4212, "step": 30356 }, { "epoch": 0.8335255354200989, "grad_norm": 0.3743465542793274, "learning_rate": 1.2610016084170513e-05, "loss": 0.5123, "step": 30357 }, { "epoch": 0.8335529928610653, "grad_norm": 0.3726663887500763, "learning_rate": 1.2609599158459225e-05, "loss": 0.5095, "step": 30358 }, { "epoch": 0.8335804503020319, "grad_norm": 0.45146259665489197, "learning_rate": 1.2609182227880195e-05, "loss": 0.5129, "step": 30359 }, { "epoch": 0.8336079077429983, "grad_norm": 0.42571163177490234, "learning_rate": 1.2608765292434213e-05, "loss": 0.5152, "step": 30360 }, { "epoch": 0.8336353651839649, "grad_norm": 0.355617880821228, "learning_rate": 1.2608348352122049e-05, "loss": 0.4407, "step": 30361 }, { "epoch": 0.8336628226249314, "grad_norm": 0.37317395210266113, "learning_rate": 1.2607931406944482e-05, "loss": 0.462, "step": 30362 }, { "epoch": 0.8336902800658978, "grad_norm": 0.36988508701324463, "learning_rate": 1.2607514456902292e-05, "loss": 0.5132, "step": 30363 }, { "epoch": 0.8337177375068644, "grad_norm": 0.3858504295349121, "learning_rate": 1.2607097501996255e-05, "loss": 0.5284, "step": 30364 }, { "epoch": 0.8337451949478308, "grad_norm": 0.3883386552333832, "learning_rate": 1.2606680542227148e-05, "loss": 0.5406, "step": 30365 }, { "epoch": 0.8337726523887974, "grad_norm": 0.4710501432418823, "learning_rate": 1.2606263577595752e-05, "loss": 0.4565, "step": 30366 }, { "epoch": 0.8338001098297638, "grad_norm": 0.3491963744163513, "learning_rate": 1.260584660810284e-05, "loss": 0.4946, "step": 30367 }, { "epoch": 0.8338275672707304, "grad_norm": 0.3950599431991577, "learning_rate": 1.2605429633749196e-05, "loss": 0.5561, "step": 30368 }, { "epoch": 0.8338550247116969, "grad_norm": 0.43843889236450195, "learning_rate": 1.260501265453559e-05, "loss": 0.4878, "step": 30369 }, { "epoch": 0.8338824821526634, "grad_norm": 0.44451817870140076, "learning_rate": 1.2604595670462806e-05, "loss": 0.5418, "step": 30370 }, { "epoch": 0.8339099395936299, "grad_norm": 0.37603601813316345, "learning_rate": 1.260417868153162e-05, "loss": 0.5445, "step": 30371 }, { "epoch": 0.8339373970345964, "grad_norm": 0.33573755621910095, "learning_rate": 1.2603761687742809e-05, "loss": 0.4191, "step": 30372 }, { "epoch": 0.8339648544755629, "grad_norm": 0.3754858374595642, "learning_rate": 1.260334468909715e-05, "loss": 0.358, "step": 30373 }, { "epoch": 0.8339923119165293, "grad_norm": 0.4184570610523224, "learning_rate": 1.2602927685595425e-05, "loss": 0.4512, "step": 30374 }, { "epoch": 0.8340197693574959, "grad_norm": 0.4093192219734192, "learning_rate": 1.2602510677238407e-05, "loss": 0.3981, "step": 30375 }, { "epoch": 0.8340472267984624, "grad_norm": 0.3700283467769623, "learning_rate": 1.260209366402688e-05, "loss": 0.4495, "step": 30376 }, { "epoch": 0.8340746842394289, "grad_norm": 0.3919629454612732, "learning_rate": 1.2601676645961614e-05, "loss": 0.5338, "step": 30377 }, { "epoch": 0.8341021416803954, "grad_norm": 0.3945046663284302, "learning_rate": 1.2601259623043389e-05, "loss": 0.4994, "step": 30378 }, { "epoch": 0.8341295991213619, "grad_norm": 0.4797935485839844, "learning_rate": 1.2600842595272988e-05, "loss": 0.528, "step": 30379 }, { "epoch": 0.8341570565623284, "grad_norm": 0.4211718440055847, "learning_rate": 1.2600425562651184e-05, "loss": 0.4452, "step": 30380 }, { "epoch": 0.8341845140032949, "grad_norm": 0.39875686168670654, "learning_rate": 1.2600008525178757e-05, "loss": 0.6351, "step": 30381 }, { "epoch": 0.8342119714442614, "grad_norm": 0.5671746730804443, "learning_rate": 1.2599591482856486e-05, "loss": 0.4793, "step": 30382 }, { "epoch": 0.834239428885228, "grad_norm": 0.3583989441394806, "learning_rate": 1.2599174435685144e-05, "loss": 0.4434, "step": 30383 }, { "epoch": 0.8342668863261944, "grad_norm": 0.3920779228210449, "learning_rate": 1.2598757383665514e-05, "loss": 0.5764, "step": 30384 }, { "epoch": 0.8342943437671609, "grad_norm": 0.4524482786655426, "learning_rate": 1.2598340326798371e-05, "loss": 0.5016, "step": 30385 }, { "epoch": 0.8343218012081274, "grad_norm": 0.46638113260269165, "learning_rate": 1.2597923265084495e-05, "loss": 0.5158, "step": 30386 }, { "epoch": 0.8343492586490939, "grad_norm": 0.4480866491794586, "learning_rate": 1.2597506198524662e-05, "loss": 0.5464, "step": 30387 }, { "epoch": 0.8343767160900604, "grad_norm": 0.3769569396972656, "learning_rate": 1.259708912711965e-05, "loss": 0.4188, "step": 30388 }, { "epoch": 0.8344041735310269, "grad_norm": 0.3846501111984253, "learning_rate": 1.2596672050870242e-05, "loss": 0.5762, "step": 30389 }, { "epoch": 0.8344316309719935, "grad_norm": 0.3934304714202881, "learning_rate": 1.2596254969777206e-05, "loss": 0.4848, "step": 30390 }, { "epoch": 0.8344590884129599, "grad_norm": 0.3965800106525421, "learning_rate": 1.2595837883841332e-05, "loss": 0.5344, "step": 30391 }, { "epoch": 0.8344865458539265, "grad_norm": 0.3782135248184204, "learning_rate": 1.2595420793063389e-05, "loss": 0.4873, "step": 30392 }, { "epoch": 0.8345140032948929, "grad_norm": 0.45516642928123474, "learning_rate": 1.2595003697444158e-05, "loss": 0.521, "step": 30393 }, { "epoch": 0.8345414607358594, "grad_norm": 0.40234801173210144, "learning_rate": 1.2594586596984418e-05, "loss": 0.5523, "step": 30394 }, { "epoch": 0.8345689181768259, "grad_norm": 0.3990074098110199, "learning_rate": 1.2594169491684944e-05, "loss": 0.4962, "step": 30395 }, { "epoch": 0.8345963756177924, "grad_norm": 0.40876534581184387, "learning_rate": 1.2593752381546516e-05, "loss": 0.518, "step": 30396 }, { "epoch": 0.8346238330587589, "grad_norm": 0.42773404717445374, "learning_rate": 1.2593335266569913e-05, "loss": 0.5166, "step": 30397 }, { "epoch": 0.8346512904997254, "grad_norm": 0.32705938816070557, "learning_rate": 1.2592918146755909e-05, "loss": 0.4405, "step": 30398 }, { "epoch": 0.834678747940692, "grad_norm": 0.401039183139801, "learning_rate": 1.2592501022105288e-05, "loss": 0.5402, "step": 30399 }, { "epoch": 0.8347062053816584, "grad_norm": 0.4576355218887329, "learning_rate": 1.2592083892618825e-05, "loss": 0.5732, "step": 30400 }, { "epoch": 0.834733662822625, "grad_norm": 0.3891449570655823, "learning_rate": 1.2591666758297297e-05, "loss": 0.4117, "step": 30401 }, { "epoch": 0.8347611202635914, "grad_norm": 0.4207470715045929, "learning_rate": 1.2591249619141485e-05, "loss": 0.4191, "step": 30402 }, { "epoch": 0.8347885777045579, "grad_norm": 0.4032715857028961, "learning_rate": 1.2590832475152164e-05, "loss": 0.4811, "step": 30403 }, { "epoch": 0.8348160351455244, "grad_norm": 0.36735665798187256, "learning_rate": 1.2590415326330114e-05, "loss": 0.4872, "step": 30404 }, { "epoch": 0.8348434925864909, "grad_norm": 0.393069326877594, "learning_rate": 1.2589998172676114e-05, "loss": 0.5645, "step": 30405 }, { "epoch": 0.8348709500274575, "grad_norm": 0.4509965479373932, "learning_rate": 1.258958101419094e-05, "loss": 0.544, "step": 30406 }, { "epoch": 0.8348984074684239, "grad_norm": 0.39796173572540283, "learning_rate": 1.258916385087537e-05, "loss": 0.4472, "step": 30407 }, { "epoch": 0.8349258649093905, "grad_norm": 0.38559284806251526, "learning_rate": 1.2588746682730185e-05, "loss": 0.5212, "step": 30408 }, { "epoch": 0.8349533223503569, "grad_norm": 0.443797767162323, "learning_rate": 1.258832950975616e-05, "loss": 0.5908, "step": 30409 }, { "epoch": 0.8349807797913235, "grad_norm": 0.39976590871810913, "learning_rate": 1.2587912331954074e-05, "loss": 0.5695, "step": 30410 }, { "epoch": 0.8350082372322899, "grad_norm": 0.36114034056663513, "learning_rate": 1.2587495149324704e-05, "loss": 0.4554, "step": 30411 }, { "epoch": 0.8350356946732564, "grad_norm": 0.38968899846076965, "learning_rate": 1.2587077961868831e-05, "loss": 0.6291, "step": 30412 }, { "epoch": 0.835063152114223, "grad_norm": 0.384721040725708, "learning_rate": 1.2586660769587234e-05, "loss": 0.53, "step": 30413 }, { "epoch": 0.8350906095551894, "grad_norm": 0.37014463543891907, "learning_rate": 1.2586243572480686e-05, "loss": 0.484, "step": 30414 }, { "epoch": 0.835118066996156, "grad_norm": 0.3598158657550812, "learning_rate": 1.2585826370549969e-05, "loss": 0.4543, "step": 30415 }, { "epoch": 0.8351455244371224, "grad_norm": 0.41639795899391174, "learning_rate": 1.2585409163795861e-05, "loss": 0.4915, "step": 30416 }, { "epoch": 0.835172981878089, "grad_norm": 0.41681620478630066, "learning_rate": 1.2584991952219141e-05, "loss": 0.5104, "step": 30417 }, { "epoch": 0.8352004393190554, "grad_norm": 0.41427522897720337, "learning_rate": 1.2584574735820584e-05, "loss": 0.4793, "step": 30418 }, { "epoch": 0.835227896760022, "grad_norm": 0.36098653078079224, "learning_rate": 1.2584157514600972e-05, "loss": 0.4223, "step": 30419 }, { "epoch": 0.8352553542009885, "grad_norm": 0.41426321864128113, "learning_rate": 1.2583740288561081e-05, "loss": 0.473, "step": 30420 }, { "epoch": 0.835282811641955, "grad_norm": 0.3609372079372406, "learning_rate": 1.2583323057701688e-05, "loss": 0.4526, "step": 30421 }, { "epoch": 0.8353102690829215, "grad_norm": 0.4139242172241211, "learning_rate": 1.2582905822023576e-05, "loss": 0.4786, "step": 30422 }, { "epoch": 0.8353377265238879, "grad_norm": 0.37611472606658936, "learning_rate": 1.258248858152752e-05, "loss": 0.4439, "step": 30423 }, { "epoch": 0.8353651839648545, "grad_norm": 0.4390620291233063, "learning_rate": 1.2582071336214297e-05, "loss": 0.4649, "step": 30424 }, { "epoch": 0.8353926414058209, "grad_norm": 0.38855257630348206, "learning_rate": 1.2581654086084686e-05, "loss": 0.5822, "step": 30425 }, { "epoch": 0.8354200988467875, "grad_norm": 0.43391647934913635, "learning_rate": 1.2581236831139468e-05, "loss": 0.5102, "step": 30426 }, { "epoch": 0.835447556287754, "grad_norm": 0.3664599061012268, "learning_rate": 1.2580819571379417e-05, "loss": 0.4977, "step": 30427 }, { "epoch": 0.8354750137287205, "grad_norm": 0.37627461552619934, "learning_rate": 1.2580402306805318e-05, "loss": 0.4997, "step": 30428 }, { "epoch": 0.835502471169687, "grad_norm": 0.48288893699645996, "learning_rate": 1.2579985037417942e-05, "loss": 0.5576, "step": 30429 }, { "epoch": 0.8355299286106534, "grad_norm": 0.38438984751701355, "learning_rate": 1.257956776321807e-05, "loss": 0.4734, "step": 30430 }, { "epoch": 0.83555738605162, "grad_norm": 0.3639982044696808, "learning_rate": 1.2579150484206484e-05, "loss": 0.5544, "step": 30431 }, { "epoch": 0.8355848434925864, "grad_norm": 0.3813841938972473, "learning_rate": 1.2578733200383954e-05, "loss": 0.5246, "step": 30432 }, { "epoch": 0.835612300933553, "grad_norm": 0.3609105348587036, "learning_rate": 1.257831591175127e-05, "loss": 0.4242, "step": 30433 }, { "epoch": 0.8356397583745195, "grad_norm": 0.33332130312919617, "learning_rate": 1.2577898618309196e-05, "loss": 0.5132, "step": 30434 }, { "epoch": 0.835667215815486, "grad_norm": 0.3776567578315735, "learning_rate": 1.2577481320058525e-05, "loss": 0.5449, "step": 30435 }, { "epoch": 0.8356946732564525, "grad_norm": 0.39415937662124634, "learning_rate": 1.2577064017000028e-05, "loss": 0.5479, "step": 30436 }, { "epoch": 0.835722130697419, "grad_norm": 0.3302742540836334, "learning_rate": 1.257664670913448e-05, "loss": 0.3933, "step": 30437 }, { "epoch": 0.8357495881383855, "grad_norm": 0.43013089895248413, "learning_rate": 1.2576229396462667e-05, "loss": 0.5574, "step": 30438 }, { "epoch": 0.835777045579352, "grad_norm": 0.3520798087120056, "learning_rate": 1.2575812078985365e-05, "loss": 0.4259, "step": 30439 }, { "epoch": 0.8358045030203185, "grad_norm": 0.3958924412727356, "learning_rate": 1.2575394756703348e-05, "loss": 0.4749, "step": 30440 }, { "epoch": 0.835831960461285, "grad_norm": 0.4059414565563202, "learning_rate": 1.25749774296174e-05, "loss": 0.5349, "step": 30441 }, { "epoch": 0.8358594179022515, "grad_norm": 0.41743776202201843, "learning_rate": 1.2574560097728297e-05, "loss": 0.5133, "step": 30442 }, { "epoch": 0.835886875343218, "grad_norm": 0.4545934796333313, "learning_rate": 1.2574142761036816e-05, "loss": 0.5437, "step": 30443 }, { "epoch": 0.8359143327841845, "grad_norm": 0.41759464144706726, "learning_rate": 1.2573725419543741e-05, "loss": 0.6141, "step": 30444 }, { "epoch": 0.835941790225151, "grad_norm": 0.363748699426651, "learning_rate": 1.2573308073249841e-05, "loss": 0.4587, "step": 30445 }, { "epoch": 0.8359692476661175, "grad_norm": 0.4759811758995056, "learning_rate": 1.2572890722155905e-05, "loss": 0.5085, "step": 30446 }, { "epoch": 0.835996705107084, "grad_norm": 0.4488232135772705, "learning_rate": 1.2572473366262703e-05, "loss": 0.5285, "step": 30447 }, { "epoch": 0.8360241625480506, "grad_norm": 0.42045995593070984, "learning_rate": 1.2572056005571021e-05, "loss": 0.532, "step": 30448 }, { "epoch": 0.836051619989017, "grad_norm": 0.37438148260116577, "learning_rate": 1.2571638640081632e-05, "loss": 0.5421, "step": 30449 }, { "epoch": 0.8360790774299836, "grad_norm": 0.37755095958709717, "learning_rate": 1.2571221269795315e-05, "loss": 0.4682, "step": 30450 }, { "epoch": 0.83610653487095, "grad_norm": 0.4064919054508209, "learning_rate": 1.257080389471285e-05, "loss": 0.476, "step": 30451 }, { "epoch": 0.8361339923119165, "grad_norm": 0.46516042947769165, "learning_rate": 1.2570386514835015e-05, "loss": 0.6137, "step": 30452 }, { "epoch": 0.836161449752883, "grad_norm": 0.37969258427619934, "learning_rate": 1.2569969130162591e-05, "loss": 0.496, "step": 30453 }, { "epoch": 0.8361889071938495, "grad_norm": 0.39078250527381897, "learning_rate": 1.2569551740696353e-05, "loss": 0.4608, "step": 30454 }, { "epoch": 0.8362163646348161, "grad_norm": 0.45649415254592896, "learning_rate": 1.256913434643708e-05, "loss": 0.5264, "step": 30455 }, { "epoch": 0.8362438220757825, "grad_norm": 0.41031721234321594, "learning_rate": 1.2568716947385553e-05, "loss": 0.5447, "step": 30456 }, { "epoch": 0.8362712795167491, "grad_norm": 0.4300644099712372, "learning_rate": 1.2568299543542548e-05, "loss": 0.468, "step": 30457 }, { "epoch": 0.8362987369577155, "grad_norm": 0.39714306592941284, "learning_rate": 1.2567882134908845e-05, "loss": 0.5666, "step": 30458 }, { "epoch": 0.836326194398682, "grad_norm": 0.3946171700954437, "learning_rate": 1.2567464721485223e-05, "loss": 0.5444, "step": 30459 }, { "epoch": 0.8363536518396485, "grad_norm": 0.396936297416687, "learning_rate": 1.2567047303272458e-05, "loss": 0.474, "step": 30460 }, { "epoch": 0.836381109280615, "grad_norm": 0.40881767868995667, "learning_rate": 1.256662988027133e-05, "loss": 0.526, "step": 30461 }, { "epoch": 0.8364085667215816, "grad_norm": 0.3861304819583893, "learning_rate": 1.2566212452482622e-05, "loss": 0.5261, "step": 30462 }, { "epoch": 0.836436024162548, "grad_norm": 0.47773048281669617, "learning_rate": 1.2565795019907106e-05, "loss": 0.4945, "step": 30463 }, { "epoch": 0.8364634816035146, "grad_norm": 0.34292536973953247, "learning_rate": 1.2565377582545563e-05, "loss": 0.4508, "step": 30464 }, { "epoch": 0.836490939044481, "grad_norm": 0.5099388957023621, "learning_rate": 1.2564960140398772e-05, "loss": 0.5255, "step": 30465 }, { "epoch": 0.8365183964854476, "grad_norm": 0.4132194519042969, "learning_rate": 1.2564542693467513e-05, "loss": 0.5004, "step": 30466 }, { "epoch": 0.836545853926414, "grad_norm": 0.3755113482475281, "learning_rate": 1.2564125241752563e-05, "loss": 0.441, "step": 30467 }, { "epoch": 0.8365733113673806, "grad_norm": 0.3508821725845337, "learning_rate": 1.25637077852547e-05, "loss": 0.4387, "step": 30468 }, { "epoch": 0.8366007688083471, "grad_norm": 0.44334694743156433, "learning_rate": 1.2563290323974706e-05, "loss": 0.5409, "step": 30469 }, { "epoch": 0.8366282262493135, "grad_norm": 0.37245213985443115, "learning_rate": 1.2562872857913358e-05, "loss": 0.4261, "step": 30470 }, { "epoch": 0.8366556836902801, "grad_norm": 0.3761880099773407, "learning_rate": 1.256245538707143e-05, "loss": 0.4338, "step": 30471 }, { "epoch": 0.8366831411312465, "grad_norm": 0.6353683471679688, "learning_rate": 1.2562037911449707e-05, "loss": 0.5742, "step": 30472 }, { "epoch": 0.8367105985722131, "grad_norm": 0.38505589962005615, "learning_rate": 1.2561620431048965e-05, "loss": 0.4022, "step": 30473 }, { "epoch": 0.8367380560131795, "grad_norm": 0.35510653257369995, "learning_rate": 1.2561202945869986e-05, "loss": 0.4657, "step": 30474 }, { "epoch": 0.8367655134541461, "grad_norm": 0.40166106820106506, "learning_rate": 1.2560785455913545e-05, "loss": 0.5122, "step": 30475 }, { "epoch": 0.8367929708951126, "grad_norm": 0.3795928657054901, "learning_rate": 1.2560367961180417e-05, "loss": 0.5371, "step": 30476 }, { "epoch": 0.8368204283360791, "grad_norm": 0.3821995258331299, "learning_rate": 1.2559950461671391e-05, "loss": 0.5353, "step": 30477 }, { "epoch": 0.8368478857770456, "grad_norm": 0.41867461800575256, "learning_rate": 1.2559532957387237e-05, "loss": 0.5829, "step": 30478 }, { "epoch": 0.836875343218012, "grad_norm": 0.3771328926086426, "learning_rate": 1.2559115448328741e-05, "loss": 0.4342, "step": 30479 }, { "epoch": 0.8369028006589786, "grad_norm": 0.36414608359336853, "learning_rate": 1.2558697934496676e-05, "loss": 0.4796, "step": 30480 }, { "epoch": 0.836930258099945, "grad_norm": 0.39816829562187195, "learning_rate": 1.2558280415891822e-05, "loss": 0.5926, "step": 30481 }, { "epoch": 0.8369577155409116, "grad_norm": 0.33887267112731934, "learning_rate": 1.255786289251496e-05, "loss": 0.4299, "step": 30482 }, { "epoch": 0.8369851729818781, "grad_norm": 0.32662302255630493, "learning_rate": 1.2557445364366864e-05, "loss": 0.3864, "step": 30483 }, { "epoch": 0.8370126304228446, "grad_norm": 0.4129866361618042, "learning_rate": 1.2557027831448321e-05, "loss": 0.5225, "step": 30484 }, { "epoch": 0.8370400878638111, "grad_norm": 0.40260010957717896, "learning_rate": 1.2556610293760103e-05, "loss": 0.4605, "step": 30485 }, { "epoch": 0.8370675453047776, "grad_norm": 0.3679565191268921, "learning_rate": 1.255619275130299e-05, "loss": 0.4941, "step": 30486 }, { "epoch": 0.8370950027457441, "grad_norm": 0.4204712212085724, "learning_rate": 1.2555775204077763e-05, "loss": 0.5388, "step": 30487 }, { "epoch": 0.8371224601867105, "grad_norm": 0.49357491731643677, "learning_rate": 1.2555357652085196e-05, "loss": 0.3651, "step": 30488 }, { "epoch": 0.8371499176276771, "grad_norm": 0.40383797883987427, "learning_rate": 1.2554940095326074e-05, "loss": 0.5541, "step": 30489 }, { "epoch": 0.8371773750686436, "grad_norm": 0.36534613370895386, "learning_rate": 1.2554522533801175e-05, "loss": 0.4915, "step": 30490 }, { "epoch": 0.8372048325096101, "grad_norm": 0.35690346360206604, "learning_rate": 1.2554104967511273e-05, "loss": 0.4752, "step": 30491 }, { "epoch": 0.8372322899505766, "grad_norm": 0.3626042306423187, "learning_rate": 1.2553687396457153e-05, "loss": 0.4303, "step": 30492 }, { "epoch": 0.8372597473915431, "grad_norm": 0.3611411154270172, "learning_rate": 1.2553269820639586e-05, "loss": 0.4575, "step": 30493 }, { "epoch": 0.8372872048325096, "grad_norm": 0.364143043756485, "learning_rate": 1.2552852240059361e-05, "loss": 0.6268, "step": 30494 }, { "epoch": 0.8373146622734761, "grad_norm": 0.37655550241470337, "learning_rate": 1.2552434654717249e-05, "loss": 0.5233, "step": 30495 }, { "epoch": 0.8373421197144426, "grad_norm": 0.4005410075187683, "learning_rate": 1.2552017064614032e-05, "loss": 0.5058, "step": 30496 }, { "epoch": 0.8373695771554092, "grad_norm": 0.3586280345916748, "learning_rate": 1.255159946975049e-05, "loss": 0.4501, "step": 30497 }, { "epoch": 0.8373970345963756, "grad_norm": 0.5538439750671387, "learning_rate": 1.25511818701274e-05, "loss": 0.5703, "step": 30498 }, { "epoch": 0.8374244920373421, "grad_norm": 0.3882840871810913, "learning_rate": 1.2550764265745544e-05, "loss": 0.4441, "step": 30499 }, { "epoch": 0.8374519494783086, "grad_norm": 0.3873562812805176, "learning_rate": 1.2550346656605694e-05, "loss": 0.5042, "step": 30500 }, { "epoch": 0.8374794069192751, "grad_norm": 0.4123181104660034, "learning_rate": 1.2549929042708637e-05, "loss": 0.5391, "step": 30501 }, { "epoch": 0.8375068643602416, "grad_norm": 0.34177783131599426, "learning_rate": 1.2549511424055147e-05, "loss": 0.4338, "step": 30502 }, { "epoch": 0.8375343218012081, "grad_norm": 0.4181652367115021, "learning_rate": 1.2549093800646003e-05, "loss": 0.4632, "step": 30503 }, { "epoch": 0.8375617792421747, "grad_norm": 0.44736167788505554, "learning_rate": 1.2548676172481988e-05, "loss": 0.5822, "step": 30504 }, { "epoch": 0.8375892366831411, "grad_norm": 0.39303043484687805, "learning_rate": 1.2548258539563879e-05, "loss": 0.5422, "step": 30505 }, { "epoch": 0.8376166941241077, "grad_norm": 0.3506660759449005, "learning_rate": 1.2547840901892452e-05, "loss": 0.4155, "step": 30506 }, { "epoch": 0.8376441515650741, "grad_norm": 0.40155160427093506, "learning_rate": 1.254742325946849e-05, "loss": 0.5065, "step": 30507 }, { "epoch": 0.8376716090060407, "grad_norm": 0.3874933123588562, "learning_rate": 1.2547005612292771e-05, "loss": 0.4595, "step": 30508 }, { "epoch": 0.8376990664470071, "grad_norm": 0.3977082371711731, "learning_rate": 1.2546587960366073e-05, "loss": 0.5346, "step": 30509 }, { "epoch": 0.8377265238879736, "grad_norm": 0.42185431718826294, "learning_rate": 1.2546170303689174e-05, "loss": 0.486, "step": 30510 }, { "epoch": 0.8377539813289402, "grad_norm": 0.3981020748615265, "learning_rate": 1.2545752642262859e-05, "loss": 0.4549, "step": 30511 }, { "epoch": 0.8377814387699066, "grad_norm": 0.4689103364944458, "learning_rate": 1.2545334976087898e-05, "loss": 0.5193, "step": 30512 }, { "epoch": 0.8378088962108732, "grad_norm": 0.3558647334575653, "learning_rate": 1.2544917305165077e-05, "loss": 0.4262, "step": 30513 }, { "epoch": 0.8378363536518396, "grad_norm": 0.3798430860042572, "learning_rate": 1.2544499629495173e-05, "loss": 0.554, "step": 30514 }, { "epoch": 0.8378638110928062, "grad_norm": 0.4213593006134033, "learning_rate": 1.2544081949078964e-05, "loss": 0.4245, "step": 30515 }, { "epoch": 0.8378912685337726, "grad_norm": 0.3999357521533966, "learning_rate": 1.2543664263917233e-05, "loss": 0.5636, "step": 30516 }, { "epoch": 0.8379187259747392, "grad_norm": 0.38754233717918396, "learning_rate": 1.2543246574010752e-05, "loss": 0.4964, "step": 30517 }, { "epoch": 0.8379461834157057, "grad_norm": 0.37616366147994995, "learning_rate": 1.254282887936031e-05, "loss": 0.5111, "step": 30518 }, { "epoch": 0.8379736408566721, "grad_norm": 0.41615623235702515, "learning_rate": 1.2542411179966678e-05, "loss": 0.4673, "step": 30519 }, { "epoch": 0.8380010982976387, "grad_norm": 0.37341922521591187, "learning_rate": 1.2541993475830637e-05, "loss": 0.4641, "step": 30520 }, { "epoch": 0.8380285557386051, "grad_norm": 0.3912630081176758, "learning_rate": 1.2541575766952967e-05, "loss": 0.5098, "step": 30521 }, { "epoch": 0.8380560131795717, "grad_norm": 0.4364231824874878, "learning_rate": 1.2541158053334446e-05, "loss": 0.5089, "step": 30522 }, { "epoch": 0.8380834706205381, "grad_norm": 0.5087426900863647, "learning_rate": 1.2540740334975857e-05, "loss": 0.564, "step": 30523 }, { "epoch": 0.8381109280615047, "grad_norm": 0.3941194713115692, "learning_rate": 1.2540322611877973e-05, "loss": 0.5048, "step": 30524 }, { "epoch": 0.8381383855024712, "grad_norm": 0.3691200613975525, "learning_rate": 1.2539904884041578e-05, "loss": 0.4882, "step": 30525 }, { "epoch": 0.8381658429434377, "grad_norm": 0.38718971610069275, "learning_rate": 1.2539487151467452e-05, "loss": 0.4236, "step": 30526 }, { "epoch": 0.8381933003844042, "grad_norm": 0.40315380692481995, "learning_rate": 1.2539069414156368e-05, "loss": 0.4775, "step": 30527 }, { "epoch": 0.8382207578253706, "grad_norm": 0.3857317864894867, "learning_rate": 1.2538651672109112e-05, "loss": 0.5683, "step": 30528 }, { "epoch": 0.8382482152663372, "grad_norm": 0.37971267104148865, "learning_rate": 1.2538233925326459e-05, "loss": 0.4759, "step": 30529 }, { "epoch": 0.8382756727073036, "grad_norm": 0.3438046872615814, "learning_rate": 1.253781617380919e-05, "loss": 0.4465, "step": 30530 }, { "epoch": 0.8383031301482702, "grad_norm": 0.3815001845359802, "learning_rate": 1.2537398417558084e-05, "loss": 0.5467, "step": 30531 }, { "epoch": 0.8383305875892367, "grad_norm": 0.4323759973049164, "learning_rate": 1.253698065657392e-05, "loss": 0.5445, "step": 30532 }, { "epoch": 0.8383580450302032, "grad_norm": 0.38768818974494934, "learning_rate": 1.2536562890857477e-05, "loss": 0.5287, "step": 30533 }, { "epoch": 0.8383855024711697, "grad_norm": 0.37375524640083313, "learning_rate": 1.2536145120409538e-05, "loss": 0.5472, "step": 30534 }, { "epoch": 0.8384129599121362, "grad_norm": 0.3384387493133545, "learning_rate": 1.2535727345230875e-05, "loss": 0.4878, "step": 30535 }, { "epoch": 0.8384404173531027, "grad_norm": 0.39630618691444397, "learning_rate": 1.253530956532227e-05, "loss": 0.5213, "step": 30536 }, { "epoch": 0.8384678747940691, "grad_norm": 0.3707351088523865, "learning_rate": 1.2534891780684509e-05, "loss": 0.5069, "step": 30537 }, { "epoch": 0.8384953322350357, "grad_norm": 0.40763506293296814, "learning_rate": 1.2534473991318362e-05, "loss": 0.4664, "step": 30538 }, { "epoch": 0.8385227896760022, "grad_norm": 0.3907795548439026, "learning_rate": 1.253405619722461e-05, "loss": 0.5353, "step": 30539 }, { "epoch": 0.8385502471169687, "grad_norm": 0.4295741021633148, "learning_rate": 1.2533638398404038e-05, "loss": 0.5194, "step": 30540 }, { "epoch": 0.8385777045579352, "grad_norm": 0.4103579521179199, "learning_rate": 1.253322059485742e-05, "loss": 0.4675, "step": 30541 }, { "epoch": 0.8386051619989017, "grad_norm": 0.34887024760246277, "learning_rate": 1.2532802786585539e-05, "loss": 0.4254, "step": 30542 }, { "epoch": 0.8386326194398682, "grad_norm": 0.4102264940738678, "learning_rate": 1.2532384973589172e-05, "loss": 0.4487, "step": 30543 }, { "epoch": 0.8386600768808347, "grad_norm": 0.40181422233581543, "learning_rate": 1.2531967155869098e-05, "loss": 0.4631, "step": 30544 }, { "epoch": 0.8386875343218012, "grad_norm": 0.3951427936553955, "learning_rate": 1.2531549333426094e-05, "loss": 0.4994, "step": 30545 }, { "epoch": 0.8387149917627678, "grad_norm": 0.3817053735256195, "learning_rate": 1.2531131506260948e-05, "loss": 0.5816, "step": 30546 }, { "epoch": 0.8387424492037342, "grad_norm": 0.4940425157546997, "learning_rate": 1.2530713674374433e-05, "loss": 0.556, "step": 30547 }, { "epoch": 0.8387699066447007, "grad_norm": 0.39306649565696716, "learning_rate": 1.2530295837767326e-05, "loss": 0.5547, "step": 30548 }, { "epoch": 0.8387973640856672, "grad_norm": 0.4140707552433014, "learning_rate": 1.2529877996440412e-05, "loss": 0.5227, "step": 30549 }, { "epoch": 0.8388248215266337, "grad_norm": 0.43041905760765076, "learning_rate": 1.2529460150394467e-05, "loss": 0.4473, "step": 30550 }, { "epoch": 0.8388522789676002, "grad_norm": 0.40430882573127747, "learning_rate": 1.2529042299630271e-05, "loss": 0.568, "step": 30551 }, { "epoch": 0.8388797364085667, "grad_norm": 0.3902410864830017, "learning_rate": 1.2528624444148607e-05, "loss": 0.5017, "step": 30552 }, { "epoch": 0.8389071938495333, "grad_norm": 0.44447848200798035, "learning_rate": 1.252820658395025e-05, "loss": 0.5839, "step": 30553 }, { "epoch": 0.8389346512904997, "grad_norm": 0.4018041789531708, "learning_rate": 1.252778871903598e-05, "loss": 0.54, "step": 30554 }, { "epoch": 0.8389621087314663, "grad_norm": 0.35875967144966125, "learning_rate": 1.2527370849406579e-05, "loss": 0.4572, "step": 30555 }, { "epoch": 0.8389895661724327, "grad_norm": 0.37019219994544983, "learning_rate": 1.252695297506282e-05, "loss": 0.5257, "step": 30556 }, { "epoch": 0.8390170236133992, "grad_norm": 0.36494043469429016, "learning_rate": 1.2526535096005493e-05, "loss": 0.4656, "step": 30557 }, { "epoch": 0.8390444810543657, "grad_norm": 0.34855857491493225, "learning_rate": 1.2526117212235367e-05, "loss": 0.3702, "step": 30558 }, { "epoch": 0.8390719384953322, "grad_norm": 0.44938549399375916, "learning_rate": 1.252569932375323e-05, "loss": 0.4853, "step": 30559 }, { "epoch": 0.8390993959362988, "grad_norm": 0.4690459966659546, "learning_rate": 1.2525281430559857e-05, "loss": 0.501, "step": 30560 }, { "epoch": 0.8391268533772652, "grad_norm": 0.41520318388938904, "learning_rate": 1.2524863532656025e-05, "loss": 0.6192, "step": 30561 }, { "epoch": 0.8391543108182318, "grad_norm": 0.41666877269744873, "learning_rate": 1.2524445630042518e-05, "loss": 0.4896, "step": 30562 }, { "epoch": 0.8391817682591982, "grad_norm": 0.4412323236465454, "learning_rate": 1.2524027722720118e-05, "loss": 0.4934, "step": 30563 }, { "epoch": 0.8392092257001648, "grad_norm": 0.36083659529685974, "learning_rate": 1.2523609810689597e-05, "loss": 0.3908, "step": 30564 }, { "epoch": 0.8392366831411312, "grad_norm": 0.4248509705066681, "learning_rate": 1.2523191893951738e-05, "loss": 0.5339, "step": 30565 }, { "epoch": 0.8392641405820978, "grad_norm": 0.39332589507102966, "learning_rate": 1.252277397250732e-05, "loss": 0.5278, "step": 30566 }, { "epoch": 0.8392915980230643, "grad_norm": 0.3696857988834381, "learning_rate": 1.2522356046357127e-05, "loss": 0.4885, "step": 30567 }, { "epoch": 0.8393190554640307, "grad_norm": 0.38997671008110046, "learning_rate": 1.2521938115501935e-05, "loss": 0.4451, "step": 30568 }, { "epoch": 0.8393465129049973, "grad_norm": 0.375417023897171, "learning_rate": 1.2521520179942519e-05, "loss": 0.4498, "step": 30569 }, { "epoch": 0.8393739703459637, "grad_norm": 0.37603500485420227, "learning_rate": 1.2521102239679665e-05, "loss": 0.4846, "step": 30570 }, { "epoch": 0.8394014277869303, "grad_norm": 0.39804819226264954, "learning_rate": 1.2520684294714152e-05, "loss": 0.473, "step": 30571 }, { "epoch": 0.8394288852278967, "grad_norm": 0.39755845069885254, "learning_rate": 1.2520266345046758e-05, "loss": 0.5236, "step": 30572 }, { "epoch": 0.8394563426688633, "grad_norm": 0.37407106161117554, "learning_rate": 1.2519848390678261e-05, "loss": 0.5417, "step": 30573 }, { "epoch": 0.8394838001098298, "grad_norm": 0.4284287989139557, "learning_rate": 1.2519430431609444e-05, "loss": 0.499, "step": 30574 }, { "epoch": 0.8395112575507963, "grad_norm": 0.41173967719078064, "learning_rate": 1.2519012467841087e-05, "loss": 0.4791, "step": 30575 }, { "epoch": 0.8395387149917628, "grad_norm": 0.3735550045967102, "learning_rate": 1.2518594499373964e-05, "loss": 0.5092, "step": 30576 }, { "epoch": 0.8395661724327292, "grad_norm": 0.4479895532131195, "learning_rate": 1.2518176526208864e-05, "loss": 0.4782, "step": 30577 }, { "epoch": 0.8395936298736958, "grad_norm": 0.45051339268684387, "learning_rate": 1.2517758548346554e-05, "loss": 0.4851, "step": 30578 }, { "epoch": 0.8396210873146622, "grad_norm": 0.42953935265541077, "learning_rate": 1.2517340565787826e-05, "loss": 0.481, "step": 30579 }, { "epoch": 0.8396485447556288, "grad_norm": 0.40934669971466064, "learning_rate": 1.2516922578533453e-05, "loss": 0.544, "step": 30580 }, { "epoch": 0.8396760021965953, "grad_norm": 0.400932639837265, "learning_rate": 1.2516504586584215e-05, "loss": 0.5226, "step": 30581 }, { "epoch": 0.8397034596375618, "grad_norm": 0.4283602833747864, "learning_rate": 1.2516086589940893e-05, "loss": 0.5216, "step": 30582 }, { "epoch": 0.8397309170785283, "grad_norm": 0.30337604880332947, "learning_rate": 1.2515668588604268e-05, "loss": 0.4416, "step": 30583 }, { "epoch": 0.8397583745194948, "grad_norm": 0.7358819842338562, "learning_rate": 1.2515250582575114e-05, "loss": 0.5061, "step": 30584 }, { "epoch": 0.8397858319604613, "grad_norm": 0.36385655403137207, "learning_rate": 1.2514832571854218e-05, "loss": 0.4802, "step": 30585 }, { "epoch": 0.8398132894014277, "grad_norm": 0.38285067677497864, "learning_rate": 1.251441455644236e-05, "loss": 0.5212, "step": 30586 }, { "epoch": 0.8398407468423943, "grad_norm": 0.4087943434715271, "learning_rate": 1.2513996536340312e-05, "loss": 0.5488, "step": 30587 }, { "epoch": 0.8398682042833608, "grad_norm": 0.4052119553089142, "learning_rate": 1.2513578511548858e-05, "loss": 0.558, "step": 30588 }, { "epoch": 0.8398956617243273, "grad_norm": 0.3880019187927246, "learning_rate": 1.2513160482068778e-05, "loss": 0.6085, "step": 30589 }, { "epoch": 0.8399231191652938, "grad_norm": 0.3723009526729584, "learning_rate": 1.2512742447900853e-05, "loss": 0.4189, "step": 30590 }, { "epoch": 0.8399505766062603, "grad_norm": 0.3724484443664551, "learning_rate": 1.2512324409045862e-05, "loss": 0.4398, "step": 30591 }, { "epoch": 0.8399780340472268, "grad_norm": 0.3960621654987335, "learning_rate": 1.251190636550458e-05, "loss": 0.5035, "step": 30592 }, { "epoch": 0.8400054914881933, "grad_norm": 0.37847957015037537, "learning_rate": 1.2511488317277796e-05, "loss": 0.4415, "step": 30593 }, { "epoch": 0.8400329489291598, "grad_norm": 0.4386344850063324, "learning_rate": 1.251107026436628e-05, "loss": 0.552, "step": 30594 }, { "epoch": 0.8400604063701264, "grad_norm": 0.41137126088142395, "learning_rate": 1.2510652206770821e-05, "loss": 0.5459, "step": 30595 }, { "epoch": 0.8400878638110928, "grad_norm": 0.3803212642669678, "learning_rate": 1.2510234144492193e-05, "loss": 0.4352, "step": 30596 }, { "epoch": 0.8401153212520593, "grad_norm": 0.4624294340610504, "learning_rate": 1.2509816077531176e-05, "loss": 0.5412, "step": 30597 }, { "epoch": 0.8401427786930258, "grad_norm": 0.4009767770767212, "learning_rate": 1.2509398005888551e-05, "loss": 0.4563, "step": 30598 }, { "epoch": 0.8401702361339923, "grad_norm": 0.4003833532333374, "learning_rate": 1.25089799295651e-05, "loss": 0.4369, "step": 30599 }, { "epoch": 0.8401976935749588, "grad_norm": 0.39277997612953186, "learning_rate": 1.2508561848561599e-05, "loss": 0.4793, "step": 30600 }, { "epoch": 0.8402251510159253, "grad_norm": 0.4100687801837921, "learning_rate": 1.2508143762878829e-05, "loss": 0.4924, "step": 30601 }, { "epoch": 0.8402526084568919, "grad_norm": 0.34715601801872253, "learning_rate": 1.2507725672517567e-05, "loss": 0.4933, "step": 30602 }, { "epoch": 0.8402800658978583, "grad_norm": 0.3964793086051941, "learning_rate": 1.2507307577478602e-05, "loss": 0.5272, "step": 30603 }, { "epoch": 0.8403075233388249, "grad_norm": 0.381266713142395, "learning_rate": 1.2506889477762707e-05, "loss": 0.5001, "step": 30604 }, { "epoch": 0.8403349807797913, "grad_norm": 0.3887172341346741, "learning_rate": 1.2506471373370661e-05, "loss": 0.4057, "step": 30605 }, { "epoch": 0.8403624382207578, "grad_norm": 0.3514834940433502, "learning_rate": 1.2506053264303248e-05, "loss": 0.4896, "step": 30606 }, { "epoch": 0.8403898956617243, "grad_norm": 0.48293349146842957, "learning_rate": 1.2505635150561243e-05, "loss": 0.5122, "step": 30607 }, { "epoch": 0.8404173531026908, "grad_norm": 0.4390162229537964, "learning_rate": 1.2505217032145434e-05, "loss": 0.4457, "step": 30608 }, { "epoch": 0.8404448105436574, "grad_norm": 0.7867057919502258, "learning_rate": 1.2504798909056593e-05, "loss": 0.5473, "step": 30609 }, { "epoch": 0.8404722679846238, "grad_norm": 0.39432069659233093, "learning_rate": 1.25043807812955e-05, "loss": 0.4795, "step": 30610 }, { "epoch": 0.8404997254255904, "grad_norm": 0.4152894914150238, "learning_rate": 1.250396264886294e-05, "loss": 0.5287, "step": 30611 }, { "epoch": 0.8405271828665568, "grad_norm": 0.3947867751121521, "learning_rate": 1.250354451175969e-05, "loss": 0.4702, "step": 30612 }, { "epoch": 0.8405546403075234, "grad_norm": 0.5227567553520203, "learning_rate": 1.250312636998653e-05, "loss": 0.6095, "step": 30613 }, { "epoch": 0.8405820977484898, "grad_norm": 0.4005325436592102, "learning_rate": 1.2502708223544243e-05, "loss": 0.4249, "step": 30614 }, { "epoch": 0.8406095551894563, "grad_norm": 0.3780266344547272, "learning_rate": 1.2502290072433602e-05, "loss": 0.4156, "step": 30615 }, { "epoch": 0.8406370126304229, "grad_norm": 0.4126477837562561, "learning_rate": 1.2501871916655395e-05, "loss": 0.5928, "step": 30616 }, { "epoch": 0.8406644700713893, "grad_norm": 0.39426368474960327, "learning_rate": 1.2501453756210397e-05, "loss": 0.5461, "step": 30617 }, { "epoch": 0.8406919275123559, "grad_norm": 0.36441686749458313, "learning_rate": 1.2501035591099388e-05, "loss": 0.5047, "step": 30618 }, { "epoch": 0.8407193849533223, "grad_norm": 0.40143176913261414, "learning_rate": 1.2500617421323151e-05, "loss": 0.5396, "step": 30619 }, { "epoch": 0.8407468423942889, "grad_norm": 0.3671274483203888, "learning_rate": 1.2500199246882464e-05, "loss": 0.5081, "step": 30620 }, { "epoch": 0.8407742998352553, "grad_norm": 0.41633185744285583, "learning_rate": 1.2499781067778108e-05, "loss": 0.5194, "step": 30621 }, { "epoch": 0.8408017572762219, "grad_norm": 0.35814476013183594, "learning_rate": 1.2499362884010864e-05, "loss": 0.5496, "step": 30622 }, { "epoch": 0.8408292147171884, "grad_norm": 0.4559701979160309, "learning_rate": 1.2498944695581505e-05, "loss": 0.5326, "step": 30623 }, { "epoch": 0.8408566721581548, "grad_norm": 0.3734596073627472, "learning_rate": 1.2498526502490822e-05, "loss": 0.4737, "step": 30624 }, { "epoch": 0.8408841295991214, "grad_norm": 0.4217323064804077, "learning_rate": 1.2498108304739586e-05, "loss": 0.4254, "step": 30625 }, { "epoch": 0.8409115870400878, "grad_norm": 0.3695915937423706, "learning_rate": 1.2497690102328583e-05, "loss": 0.4968, "step": 30626 }, { "epoch": 0.8409390444810544, "grad_norm": 0.40209925174713135, "learning_rate": 1.249727189525859e-05, "loss": 0.4791, "step": 30627 }, { "epoch": 0.8409665019220208, "grad_norm": 0.3817031979560852, "learning_rate": 1.2496853683530388e-05, "loss": 0.5595, "step": 30628 }, { "epoch": 0.8409939593629874, "grad_norm": 0.4128388464450836, "learning_rate": 1.2496435467144759e-05, "loss": 0.5508, "step": 30629 }, { "epoch": 0.8410214168039539, "grad_norm": 0.3706955909729004, "learning_rate": 1.249601724610248e-05, "loss": 0.4445, "step": 30630 }, { "epoch": 0.8410488742449204, "grad_norm": 0.4104049801826477, "learning_rate": 1.249559902040433e-05, "loss": 0.5328, "step": 30631 }, { "epoch": 0.8410763316858869, "grad_norm": 0.3648521900177002, "learning_rate": 1.2495180790051093e-05, "loss": 0.5032, "step": 30632 }, { "epoch": 0.8411037891268534, "grad_norm": 0.40022483468055725, "learning_rate": 1.2494762555043547e-05, "loss": 0.5111, "step": 30633 }, { "epoch": 0.8411312465678199, "grad_norm": 0.44694384932518005, "learning_rate": 1.2494344315382474e-05, "loss": 0.5121, "step": 30634 }, { "epoch": 0.8411587040087863, "grad_norm": 0.40759822726249695, "learning_rate": 1.2493926071068651e-05, "loss": 0.4604, "step": 30635 }, { "epoch": 0.8411861614497529, "grad_norm": 0.3841599225997925, "learning_rate": 1.2493507822102859e-05, "loss": 0.5095, "step": 30636 }, { "epoch": 0.8412136188907194, "grad_norm": 0.33726054430007935, "learning_rate": 1.249308956848588e-05, "loss": 0.4834, "step": 30637 }, { "epoch": 0.8412410763316859, "grad_norm": 0.38278672099113464, "learning_rate": 1.2492671310218494e-05, "loss": 0.4549, "step": 30638 }, { "epoch": 0.8412685337726524, "grad_norm": 0.41481173038482666, "learning_rate": 1.2492253047301481e-05, "loss": 0.4514, "step": 30639 }, { "epoch": 0.8412959912136189, "grad_norm": 0.4671052098274231, "learning_rate": 1.2491834779735621e-05, "loss": 0.5251, "step": 30640 }, { "epoch": 0.8413234486545854, "grad_norm": 0.4366128742694855, "learning_rate": 1.2491416507521692e-05, "loss": 0.5283, "step": 30641 }, { "epoch": 0.8413509060955519, "grad_norm": 0.40222224593162537, "learning_rate": 1.2490998230660476e-05, "loss": 0.5011, "step": 30642 }, { "epoch": 0.8413783635365184, "grad_norm": 0.4204326868057251, "learning_rate": 1.2490579949152757e-05, "loss": 0.4713, "step": 30643 }, { "epoch": 0.841405820977485, "grad_norm": 0.38050511479377747, "learning_rate": 1.2490161662999304e-05, "loss": 0.435, "step": 30644 }, { "epoch": 0.8414332784184514, "grad_norm": 0.3957335948944092, "learning_rate": 1.2489743372200913e-05, "loss": 0.4745, "step": 30645 }, { "epoch": 0.8414607358594179, "grad_norm": 0.3603295087814331, "learning_rate": 1.248932507675835e-05, "loss": 0.4124, "step": 30646 }, { "epoch": 0.8414881933003844, "grad_norm": 0.4290843605995178, "learning_rate": 1.2488906776672405e-05, "loss": 0.598, "step": 30647 }, { "epoch": 0.8415156507413509, "grad_norm": 0.3954031467437744, "learning_rate": 1.2488488471943854e-05, "loss": 0.5429, "step": 30648 }, { "epoch": 0.8415431081823174, "grad_norm": 0.3519553542137146, "learning_rate": 1.2488070162573474e-05, "loss": 0.5676, "step": 30649 }, { "epoch": 0.8415705656232839, "grad_norm": 0.4318679869174957, "learning_rate": 1.2487651848562052e-05, "loss": 0.4529, "step": 30650 }, { "epoch": 0.8415980230642505, "grad_norm": 0.34621772170066833, "learning_rate": 1.2487233529910366e-05, "loss": 0.4622, "step": 30651 }, { "epoch": 0.8416254805052169, "grad_norm": 0.35052043199539185, "learning_rate": 1.2486815206619195e-05, "loss": 0.5176, "step": 30652 }, { "epoch": 0.8416529379461835, "grad_norm": 0.49845945835113525, "learning_rate": 1.248639687868932e-05, "loss": 0.4927, "step": 30653 }, { "epoch": 0.8416803953871499, "grad_norm": 0.3979085087776184, "learning_rate": 1.248597854612152e-05, "loss": 0.484, "step": 30654 }, { "epoch": 0.8417078528281164, "grad_norm": 0.40298038721084595, "learning_rate": 1.2485560208916579e-05, "loss": 0.4798, "step": 30655 }, { "epoch": 0.8417353102690829, "grad_norm": 0.3473118543624878, "learning_rate": 1.248514186707527e-05, "loss": 0.4182, "step": 30656 }, { "epoch": 0.8417627677100494, "grad_norm": 0.3972627818584442, "learning_rate": 1.2484723520598383e-05, "loss": 0.4186, "step": 30657 }, { "epoch": 0.841790225151016, "grad_norm": 0.40903374552726746, "learning_rate": 1.2484305169486692e-05, "loss": 0.5975, "step": 30658 }, { "epoch": 0.8418176825919824, "grad_norm": 0.44929078221321106, "learning_rate": 1.2483886813740979e-05, "loss": 0.5353, "step": 30659 }, { "epoch": 0.841845140032949, "grad_norm": 0.3850041329860687, "learning_rate": 1.2483468453362026e-05, "loss": 0.5101, "step": 30660 }, { "epoch": 0.8418725974739154, "grad_norm": 0.42702826857566833, "learning_rate": 1.248305008835061e-05, "loss": 0.53, "step": 30661 }, { "epoch": 0.841900054914882, "grad_norm": 0.385602742433548, "learning_rate": 1.2482631718707511e-05, "loss": 0.5222, "step": 30662 }, { "epoch": 0.8419275123558484, "grad_norm": 0.38588955998420715, "learning_rate": 1.2482213344433516e-05, "loss": 0.4856, "step": 30663 }, { "epoch": 0.841954969796815, "grad_norm": 0.40398669242858887, "learning_rate": 1.2481794965529397e-05, "loss": 0.5376, "step": 30664 }, { "epoch": 0.8419824272377814, "grad_norm": 0.49422743916511536, "learning_rate": 1.2481376581995941e-05, "loss": 0.4352, "step": 30665 }, { "epoch": 0.8420098846787479, "grad_norm": 0.39098626375198364, "learning_rate": 1.2480958193833925e-05, "loss": 0.4245, "step": 30666 }, { "epoch": 0.8420373421197145, "grad_norm": 0.39878469705581665, "learning_rate": 1.2480539801044129e-05, "loss": 0.5179, "step": 30667 }, { "epoch": 0.8420647995606809, "grad_norm": 0.41309472918510437, "learning_rate": 1.2480121403627337e-05, "loss": 0.5074, "step": 30668 }, { "epoch": 0.8420922570016475, "grad_norm": 0.37900787591934204, "learning_rate": 1.2479703001584324e-05, "loss": 0.5158, "step": 30669 }, { "epoch": 0.8421197144426139, "grad_norm": 0.38396382331848145, "learning_rate": 1.2479284594915875e-05, "loss": 0.456, "step": 30670 }, { "epoch": 0.8421471718835805, "grad_norm": 0.406526654958725, "learning_rate": 1.247886618362277e-05, "loss": 0.4787, "step": 30671 }, { "epoch": 0.8421746293245469, "grad_norm": 0.3931874632835388, "learning_rate": 1.2478447767705785e-05, "loss": 0.398, "step": 30672 }, { "epoch": 0.8422020867655134, "grad_norm": 0.358964741230011, "learning_rate": 1.2478029347165707e-05, "loss": 0.4497, "step": 30673 }, { "epoch": 0.84222954420648, "grad_norm": 0.4008294641971588, "learning_rate": 1.2477610922003311e-05, "loss": 0.5403, "step": 30674 }, { "epoch": 0.8422570016474464, "grad_norm": 0.42626452445983887, "learning_rate": 1.2477192492219383e-05, "loss": 0.552, "step": 30675 }, { "epoch": 0.842284459088413, "grad_norm": 0.37605512142181396, "learning_rate": 1.2476774057814697e-05, "loss": 0.472, "step": 30676 }, { "epoch": 0.8423119165293794, "grad_norm": 0.35545578598976135, "learning_rate": 1.2476355618790041e-05, "loss": 0.4454, "step": 30677 }, { "epoch": 0.842339373970346, "grad_norm": 0.44550999999046326, "learning_rate": 1.247593717514619e-05, "loss": 0.4729, "step": 30678 }, { "epoch": 0.8423668314113124, "grad_norm": 0.3676581382751465, "learning_rate": 1.2475518726883924e-05, "loss": 0.4004, "step": 30679 }, { "epoch": 0.842394288852279, "grad_norm": 0.3795294761657715, "learning_rate": 1.2475100274004027e-05, "loss": 0.4968, "step": 30680 }, { "epoch": 0.8424217462932455, "grad_norm": 0.36627352237701416, "learning_rate": 1.2474681816507275e-05, "loss": 0.5477, "step": 30681 }, { "epoch": 0.842449203734212, "grad_norm": 0.37374192476272583, "learning_rate": 1.2474263354394456e-05, "loss": 0.4772, "step": 30682 }, { "epoch": 0.8424766611751785, "grad_norm": 0.42114371061325073, "learning_rate": 1.2473844887666344e-05, "loss": 0.5782, "step": 30683 }, { "epoch": 0.8425041186161449, "grad_norm": 0.4570796489715576, "learning_rate": 1.2473426416323722e-05, "loss": 0.4974, "step": 30684 }, { "epoch": 0.8425315760571115, "grad_norm": 0.396892249584198, "learning_rate": 1.247300794036737e-05, "loss": 0.5301, "step": 30685 }, { "epoch": 0.8425590334980779, "grad_norm": 0.3989466428756714, "learning_rate": 1.2472589459798067e-05, "loss": 0.4949, "step": 30686 }, { "epoch": 0.8425864909390445, "grad_norm": 0.3614695370197296, "learning_rate": 1.2472170974616599e-05, "loss": 0.4357, "step": 30687 }, { "epoch": 0.842613948380011, "grad_norm": 0.4259668290615082, "learning_rate": 1.2471752484823743e-05, "loss": 0.5044, "step": 30688 }, { "epoch": 0.8426414058209775, "grad_norm": 0.4482545852661133, "learning_rate": 1.2471333990420278e-05, "loss": 0.5364, "step": 30689 }, { "epoch": 0.842668863261944, "grad_norm": 0.4045089781284332, "learning_rate": 1.2470915491406988e-05, "loss": 0.5442, "step": 30690 }, { "epoch": 0.8426963207029105, "grad_norm": 0.40730026364326477, "learning_rate": 1.2470496987784649e-05, "loss": 0.4646, "step": 30691 }, { "epoch": 0.842723778143877, "grad_norm": 0.4704570174217224, "learning_rate": 1.2470078479554047e-05, "loss": 0.5033, "step": 30692 }, { "epoch": 0.8427512355848434, "grad_norm": 0.39906933903694153, "learning_rate": 1.246965996671596e-05, "loss": 0.47, "step": 30693 }, { "epoch": 0.84277869302581, "grad_norm": 0.3706224262714386, "learning_rate": 1.2469241449271167e-05, "loss": 0.559, "step": 30694 }, { "epoch": 0.8428061504667765, "grad_norm": 0.36619266867637634, "learning_rate": 1.2468822927220454e-05, "loss": 0.4813, "step": 30695 }, { "epoch": 0.842833607907743, "grad_norm": 0.3440684676170349, "learning_rate": 1.2468404400564595e-05, "loss": 0.4262, "step": 30696 }, { "epoch": 0.8428610653487095, "grad_norm": 0.3524691164493561, "learning_rate": 1.2467985869304378e-05, "loss": 0.4951, "step": 30697 }, { "epoch": 0.842888522789676, "grad_norm": 0.40912672877311707, "learning_rate": 1.2467567333440576e-05, "loss": 0.4454, "step": 30698 }, { "epoch": 0.8429159802306425, "grad_norm": 0.37947049736976624, "learning_rate": 1.2467148792973974e-05, "loss": 0.5128, "step": 30699 }, { "epoch": 0.842943437671609, "grad_norm": 0.37977778911590576, "learning_rate": 1.2466730247905353e-05, "loss": 0.4664, "step": 30700 }, { "epoch": 0.8429708951125755, "grad_norm": 0.46851998567581177, "learning_rate": 1.246631169823549e-05, "loss": 0.5384, "step": 30701 }, { "epoch": 0.842998352553542, "grad_norm": 0.37341561913490295, "learning_rate": 1.2465893143965173e-05, "loss": 0.5048, "step": 30702 }, { "epoch": 0.8430258099945085, "grad_norm": 0.37704819440841675, "learning_rate": 1.2465474585095176e-05, "loss": 0.4334, "step": 30703 }, { "epoch": 0.843053267435475, "grad_norm": 0.4067319631576538, "learning_rate": 1.2465056021626283e-05, "loss": 0.5662, "step": 30704 }, { "epoch": 0.8430807248764415, "grad_norm": 0.3846971392631531, "learning_rate": 1.2464637453559274e-05, "loss": 0.478, "step": 30705 }, { "epoch": 0.843108182317408, "grad_norm": 0.3550218343734741, "learning_rate": 1.2464218880894927e-05, "loss": 0.4576, "step": 30706 }, { "epoch": 0.8431356397583745, "grad_norm": 0.36386606097221375, "learning_rate": 1.2463800303634028e-05, "loss": 0.4986, "step": 30707 }, { "epoch": 0.843163097199341, "grad_norm": 0.40859976410865784, "learning_rate": 1.2463381721777352e-05, "loss": 0.5197, "step": 30708 }, { "epoch": 0.8431905546403076, "grad_norm": 0.37488341331481934, "learning_rate": 1.2462963135325685e-05, "loss": 0.4141, "step": 30709 }, { "epoch": 0.843218012081274, "grad_norm": 0.39592963457107544, "learning_rate": 1.2462544544279806e-05, "loss": 0.4491, "step": 30710 }, { "epoch": 0.8432454695222406, "grad_norm": 0.38112515211105347, "learning_rate": 1.2462125948640493e-05, "loss": 0.463, "step": 30711 }, { "epoch": 0.843272926963207, "grad_norm": 0.39374786615371704, "learning_rate": 1.2461707348408535e-05, "loss": 0.4699, "step": 30712 }, { "epoch": 0.8433003844041735, "grad_norm": 0.3705957531929016, "learning_rate": 1.2461288743584701e-05, "loss": 0.485, "step": 30713 }, { "epoch": 0.84332784184514, "grad_norm": 0.3590146601200104, "learning_rate": 1.246087013416978e-05, "loss": 0.4148, "step": 30714 }, { "epoch": 0.8433552992861065, "grad_norm": 0.40518423914909363, "learning_rate": 1.2460451520164552e-05, "loss": 0.465, "step": 30715 }, { "epoch": 0.8433827567270731, "grad_norm": 0.3891465365886688, "learning_rate": 1.2460032901569794e-05, "loss": 0.4836, "step": 30716 }, { "epoch": 0.8434102141680395, "grad_norm": 0.5013729333877563, "learning_rate": 1.2459614278386292e-05, "loss": 0.5064, "step": 30717 }, { "epoch": 0.8434376716090061, "grad_norm": 0.35132691264152527, "learning_rate": 1.2459195650614823e-05, "loss": 0.5516, "step": 30718 }, { "epoch": 0.8434651290499725, "grad_norm": 0.33527764678001404, "learning_rate": 1.245877701825617e-05, "loss": 0.5047, "step": 30719 }, { "epoch": 0.8434925864909391, "grad_norm": 0.3903636038303375, "learning_rate": 1.2458358381311113e-05, "loss": 0.4574, "step": 30720 }, { "epoch": 0.8435200439319055, "grad_norm": 0.3909883499145508, "learning_rate": 1.245793973978043e-05, "loss": 0.5346, "step": 30721 }, { "epoch": 0.843547501372872, "grad_norm": 0.3726802468299866, "learning_rate": 1.245752109366491e-05, "loss": 0.5231, "step": 30722 }, { "epoch": 0.8435749588138386, "grad_norm": 0.4226968288421631, "learning_rate": 1.2457102442965328e-05, "loss": 0.5354, "step": 30723 }, { "epoch": 0.843602416254805, "grad_norm": 0.3827008306980133, "learning_rate": 1.2456683787682464e-05, "loss": 0.4928, "step": 30724 }, { "epoch": 0.8436298736957716, "grad_norm": 0.3727739453315735, "learning_rate": 1.2456265127817101e-05, "loss": 0.3599, "step": 30725 }, { "epoch": 0.843657331136738, "grad_norm": 0.38728219270706177, "learning_rate": 1.2455846463370019e-05, "loss": 0.3888, "step": 30726 }, { "epoch": 0.8436847885777046, "grad_norm": 0.36932849884033203, "learning_rate": 1.2455427794342e-05, "loss": 0.5101, "step": 30727 }, { "epoch": 0.843712246018671, "grad_norm": 0.3912103474140167, "learning_rate": 1.2455009120733825e-05, "loss": 0.5104, "step": 30728 }, { "epoch": 0.8437397034596376, "grad_norm": 0.3767103850841522, "learning_rate": 1.2454590442546272e-05, "loss": 0.452, "step": 30729 }, { "epoch": 0.8437671609006041, "grad_norm": 0.42547744512557983, "learning_rate": 1.245417175978013e-05, "loss": 0.456, "step": 30730 }, { "epoch": 0.8437946183415705, "grad_norm": 0.3675438463687897, "learning_rate": 1.2453753072436168e-05, "loss": 0.4552, "step": 30731 }, { "epoch": 0.8438220757825371, "grad_norm": 0.38660386204719543, "learning_rate": 1.2453334380515177e-05, "loss": 0.4823, "step": 30732 }, { "epoch": 0.8438495332235035, "grad_norm": 0.39893409609794617, "learning_rate": 1.2452915684017935e-05, "loss": 0.5238, "step": 30733 }, { "epoch": 0.8438769906644701, "grad_norm": 0.38012954592704773, "learning_rate": 1.2452496982945219e-05, "loss": 0.4393, "step": 30734 }, { "epoch": 0.8439044481054365, "grad_norm": 0.4679085910320282, "learning_rate": 1.2452078277297816e-05, "loss": 0.5604, "step": 30735 }, { "epoch": 0.8439319055464031, "grad_norm": 0.39917293190956116, "learning_rate": 1.2451659567076504e-05, "loss": 0.4299, "step": 30736 }, { "epoch": 0.8439593629873696, "grad_norm": 0.3971346616744995, "learning_rate": 1.2451240852282063e-05, "loss": 0.4445, "step": 30737 }, { "epoch": 0.8439868204283361, "grad_norm": 0.36895322799682617, "learning_rate": 1.2450822132915276e-05, "loss": 0.4696, "step": 30738 }, { "epoch": 0.8440142778693026, "grad_norm": 0.36785027384757996, "learning_rate": 1.2450403408976923e-05, "loss": 0.4995, "step": 30739 }, { "epoch": 0.844041735310269, "grad_norm": 0.3407529890537262, "learning_rate": 1.2449984680467788e-05, "loss": 0.4809, "step": 30740 }, { "epoch": 0.8440691927512356, "grad_norm": 0.41946470737457275, "learning_rate": 1.2449565947388651e-05, "loss": 0.5164, "step": 30741 }, { "epoch": 0.844096650192202, "grad_norm": 0.5481218695640564, "learning_rate": 1.2449147209740287e-05, "loss": 0.3922, "step": 30742 }, { "epoch": 0.8441241076331686, "grad_norm": 0.42028671503067017, "learning_rate": 1.2448728467523482e-05, "loss": 0.5084, "step": 30743 }, { "epoch": 0.8441515650741351, "grad_norm": 0.46516045928001404, "learning_rate": 1.244830972073902e-05, "loss": 0.55, "step": 30744 }, { "epoch": 0.8441790225151016, "grad_norm": 0.4107125401496887, "learning_rate": 1.2447890969387677e-05, "loss": 0.4555, "step": 30745 }, { "epoch": 0.8442064799560681, "grad_norm": 0.44805437326431274, "learning_rate": 1.2447472213470237e-05, "loss": 0.5526, "step": 30746 }, { "epoch": 0.8442339373970346, "grad_norm": 0.3675466477870941, "learning_rate": 1.244705345298748e-05, "loss": 0.5257, "step": 30747 }, { "epoch": 0.8442613948380011, "grad_norm": 0.4100622236728668, "learning_rate": 1.2446634687940188e-05, "loss": 0.4795, "step": 30748 }, { "epoch": 0.8442888522789675, "grad_norm": 0.5004065632820129, "learning_rate": 1.2446215918329139e-05, "loss": 0.5186, "step": 30749 }, { "epoch": 0.8443163097199341, "grad_norm": 0.3515418469905853, "learning_rate": 1.2445797144155118e-05, "loss": 0.428, "step": 30750 }, { "epoch": 0.8443437671609006, "grad_norm": 0.3505128026008606, "learning_rate": 1.2445378365418908e-05, "loss": 0.5068, "step": 30751 }, { "epoch": 0.8443712246018671, "grad_norm": 0.3595731556415558, "learning_rate": 1.244495958212128e-05, "loss": 0.4357, "step": 30752 }, { "epoch": 0.8443986820428336, "grad_norm": 0.40208426117897034, "learning_rate": 1.2444540794263027e-05, "loss": 0.4948, "step": 30753 }, { "epoch": 0.8444261394838001, "grad_norm": 0.41855236887931824, "learning_rate": 1.2444122001844924e-05, "loss": 0.514, "step": 30754 }, { "epoch": 0.8444535969247666, "grad_norm": 0.4175373613834381, "learning_rate": 1.2443703204867755e-05, "loss": 0.5036, "step": 30755 }, { "epoch": 0.8444810543657331, "grad_norm": 0.5334622263908386, "learning_rate": 1.2443284403332296e-05, "loss": 0.47, "step": 30756 }, { "epoch": 0.8445085118066996, "grad_norm": 0.43275851011276245, "learning_rate": 1.2442865597239333e-05, "loss": 0.5941, "step": 30757 }, { "epoch": 0.8445359692476662, "grad_norm": 0.39823341369628906, "learning_rate": 1.2442446786589647e-05, "loss": 0.586, "step": 30758 }, { "epoch": 0.8445634266886326, "grad_norm": 0.3945773243904114, "learning_rate": 1.244202797138402e-05, "loss": 0.4616, "step": 30759 }, { "epoch": 0.8445908841295992, "grad_norm": 0.4193163514137268, "learning_rate": 1.2441609151623228e-05, "loss": 0.4916, "step": 30760 }, { "epoch": 0.8446183415705656, "grad_norm": 0.42948928475379944, "learning_rate": 1.2441190327308057e-05, "loss": 0.472, "step": 30761 }, { "epoch": 0.8446457990115321, "grad_norm": 0.4326108396053314, "learning_rate": 1.2440771498439286e-05, "loss": 0.5863, "step": 30762 }, { "epoch": 0.8446732564524986, "grad_norm": 0.4393850862979889, "learning_rate": 1.2440352665017699e-05, "loss": 0.4993, "step": 30763 }, { "epoch": 0.8447007138934651, "grad_norm": 0.3841709494590759, "learning_rate": 1.2439933827044075e-05, "loss": 0.5338, "step": 30764 }, { "epoch": 0.8447281713344317, "grad_norm": 0.3892265558242798, "learning_rate": 1.2439514984519195e-05, "loss": 0.4557, "step": 30765 }, { "epoch": 0.8447556287753981, "grad_norm": 0.38360217213630676, "learning_rate": 1.2439096137443842e-05, "loss": 0.5009, "step": 30766 }, { "epoch": 0.8447830862163647, "grad_norm": 0.3927462100982666, "learning_rate": 1.2438677285818797e-05, "loss": 0.4532, "step": 30767 }, { "epoch": 0.8448105436573311, "grad_norm": 0.38551098108291626, "learning_rate": 1.2438258429644836e-05, "loss": 0.4821, "step": 30768 }, { "epoch": 0.8448380010982977, "grad_norm": 0.37478873133659363, "learning_rate": 1.243783956892275e-05, "loss": 0.5584, "step": 30769 }, { "epoch": 0.8448654585392641, "grad_norm": 0.37800586223602295, "learning_rate": 1.2437420703653309e-05, "loss": 0.4341, "step": 30770 }, { "epoch": 0.8448929159802306, "grad_norm": 0.3979164659976959, "learning_rate": 1.2437001833837306e-05, "loss": 0.4469, "step": 30771 }, { "epoch": 0.8449203734211972, "grad_norm": 0.3658542037010193, "learning_rate": 1.2436582959475517e-05, "loss": 0.4918, "step": 30772 }, { "epoch": 0.8449478308621636, "grad_norm": 0.34203478693962097, "learning_rate": 1.2436164080568718e-05, "loss": 0.482, "step": 30773 }, { "epoch": 0.8449752883031302, "grad_norm": 0.3595828711986542, "learning_rate": 1.24357451971177e-05, "loss": 0.529, "step": 30774 }, { "epoch": 0.8450027457440966, "grad_norm": 0.4439932703971863, "learning_rate": 1.2435326309123236e-05, "loss": 0.4997, "step": 30775 }, { "epoch": 0.8450302031850632, "grad_norm": 0.3953765034675598, "learning_rate": 1.2434907416586115e-05, "loss": 0.4871, "step": 30776 }, { "epoch": 0.8450576606260296, "grad_norm": 0.38154059648513794, "learning_rate": 1.2434488519507116e-05, "loss": 0.4352, "step": 30777 }, { "epoch": 0.8450851180669962, "grad_norm": 0.3829690217971802, "learning_rate": 1.2434069617887015e-05, "loss": 0.4326, "step": 30778 }, { "epoch": 0.8451125755079627, "grad_norm": 0.391422301530838, "learning_rate": 1.2433650711726597e-05, "loss": 0.5101, "step": 30779 }, { "epoch": 0.8451400329489291, "grad_norm": 0.3915350139141083, "learning_rate": 1.2433231801026644e-05, "loss": 0.513, "step": 30780 }, { "epoch": 0.8451674903898957, "grad_norm": 0.36687204241752625, "learning_rate": 1.2432812885787939e-05, "loss": 0.4471, "step": 30781 }, { "epoch": 0.8451949478308621, "grad_norm": 0.386595219373703, "learning_rate": 1.2432393966011263e-05, "loss": 0.4606, "step": 30782 }, { "epoch": 0.8452224052718287, "grad_norm": 0.35161641240119934, "learning_rate": 1.2431975041697391e-05, "loss": 0.5018, "step": 30783 }, { "epoch": 0.8452498627127951, "grad_norm": 0.3611421585083008, "learning_rate": 1.2431556112847112e-05, "loss": 0.4771, "step": 30784 }, { "epoch": 0.8452773201537617, "grad_norm": 0.41973403096199036, "learning_rate": 1.2431137179461206e-05, "loss": 0.4916, "step": 30785 }, { "epoch": 0.8453047775947282, "grad_norm": 0.40470975637435913, "learning_rate": 1.2430718241540451e-05, "loss": 0.4759, "step": 30786 }, { "epoch": 0.8453322350356947, "grad_norm": 0.4087924063205719, "learning_rate": 1.2430299299085634e-05, "loss": 0.5453, "step": 30787 }, { "epoch": 0.8453596924766612, "grad_norm": 0.4077273905277252, "learning_rate": 1.2429880352097528e-05, "loss": 0.5106, "step": 30788 }, { "epoch": 0.8453871499176276, "grad_norm": 0.38181692361831665, "learning_rate": 1.2429461400576923e-05, "loss": 0.4189, "step": 30789 }, { "epoch": 0.8454146073585942, "grad_norm": 0.3874835968017578, "learning_rate": 1.2429042444524597e-05, "loss": 0.3997, "step": 30790 }, { "epoch": 0.8454420647995606, "grad_norm": 0.38676074147224426, "learning_rate": 1.2428623483941329e-05, "loss": 0.4773, "step": 30791 }, { "epoch": 0.8454695222405272, "grad_norm": 0.4124961495399475, "learning_rate": 1.2428204518827907e-05, "loss": 0.526, "step": 30792 }, { "epoch": 0.8454969796814937, "grad_norm": 0.38988107442855835, "learning_rate": 1.2427785549185104e-05, "loss": 0.4775, "step": 30793 }, { "epoch": 0.8455244371224602, "grad_norm": 0.39029398560523987, "learning_rate": 1.2427366575013711e-05, "loss": 0.5259, "step": 30794 }, { "epoch": 0.8455518945634267, "grad_norm": 0.39702367782592773, "learning_rate": 1.2426947596314503e-05, "loss": 0.5327, "step": 30795 }, { "epoch": 0.8455793520043932, "grad_norm": 0.3583374321460724, "learning_rate": 1.2426528613088262e-05, "loss": 0.4801, "step": 30796 }, { "epoch": 0.8456068094453597, "grad_norm": 0.3858087658882141, "learning_rate": 1.2426109625335772e-05, "loss": 0.5218, "step": 30797 }, { "epoch": 0.8456342668863261, "grad_norm": 0.38249272108078003, "learning_rate": 1.2425690633057811e-05, "loss": 0.3806, "step": 30798 }, { "epoch": 0.8456617243272927, "grad_norm": 0.3798992931842804, "learning_rate": 1.2425271636255165e-05, "loss": 0.4069, "step": 30799 }, { "epoch": 0.8456891817682592, "grad_norm": 0.3444012403488159, "learning_rate": 1.2424852634928612e-05, "loss": 0.5043, "step": 30800 }, { "epoch": 0.8457166392092257, "grad_norm": 0.4105103313922882, "learning_rate": 1.2424433629078937e-05, "loss": 0.5296, "step": 30801 }, { "epoch": 0.8457440966501922, "grad_norm": 0.3665727376937866, "learning_rate": 1.2424014618706917e-05, "loss": 0.4582, "step": 30802 }, { "epoch": 0.8457715540911587, "grad_norm": 0.3720276653766632, "learning_rate": 1.2423595603813339e-05, "loss": 0.4677, "step": 30803 }, { "epoch": 0.8457990115321252, "grad_norm": 0.47655755281448364, "learning_rate": 1.2423176584398977e-05, "loss": 0.5319, "step": 30804 }, { "epoch": 0.8458264689730917, "grad_norm": 0.3740408420562744, "learning_rate": 1.242275756046462e-05, "loss": 0.4999, "step": 30805 }, { "epoch": 0.8458539264140582, "grad_norm": 0.388681024312973, "learning_rate": 1.2422338532011047e-05, "loss": 0.4833, "step": 30806 }, { "epoch": 0.8458813838550248, "grad_norm": 0.38259458541870117, "learning_rate": 1.242191949903904e-05, "loss": 0.4691, "step": 30807 }, { "epoch": 0.8459088412959912, "grad_norm": 0.3873147666454315, "learning_rate": 1.242150046154938e-05, "loss": 0.5252, "step": 30808 }, { "epoch": 0.8459362987369577, "grad_norm": 0.38362711668014526, "learning_rate": 1.2421081419542846e-05, "loss": 0.4584, "step": 30809 }, { "epoch": 0.8459637561779242, "grad_norm": 0.37525492906570435, "learning_rate": 1.2420662373020227e-05, "loss": 0.5049, "step": 30810 }, { "epoch": 0.8459912136188907, "grad_norm": 0.5058762431144714, "learning_rate": 1.2420243321982297e-05, "loss": 0.5475, "step": 30811 }, { "epoch": 0.8460186710598572, "grad_norm": 0.41086187958717346, "learning_rate": 1.2419824266429843e-05, "loss": 0.4821, "step": 30812 }, { "epoch": 0.8460461285008237, "grad_norm": 0.6994169354438782, "learning_rate": 1.2419405206363644e-05, "loss": 0.4854, "step": 30813 }, { "epoch": 0.8460735859417903, "grad_norm": 0.34632453322410583, "learning_rate": 1.241898614178448e-05, "loss": 0.4454, "step": 30814 }, { "epoch": 0.8461010433827567, "grad_norm": 0.4212203621864319, "learning_rate": 1.2418567072693135e-05, "loss": 0.5141, "step": 30815 }, { "epoch": 0.8461285008237233, "grad_norm": 0.3971915543079376, "learning_rate": 1.2418147999090394e-05, "loss": 0.5662, "step": 30816 }, { "epoch": 0.8461559582646897, "grad_norm": 0.35159993171691895, "learning_rate": 1.2417728920977031e-05, "loss": 0.383, "step": 30817 }, { "epoch": 0.8461834157056562, "grad_norm": 0.3830012381076813, "learning_rate": 1.2417309838353833e-05, "loss": 0.481, "step": 30818 }, { "epoch": 0.8462108731466227, "grad_norm": 0.37009960412979126, "learning_rate": 1.241689075122158e-05, "loss": 0.4548, "step": 30819 }, { "epoch": 0.8462383305875892, "grad_norm": 0.40312692523002625, "learning_rate": 1.2416471659581057e-05, "loss": 0.4882, "step": 30820 }, { "epoch": 0.8462657880285558, "grad_norm": 0.37304168939590454, "learning_rate": 1.2416052563433043e-05, "loss": 0.5175, "step": 30821 }, { "epoch": 0.8462932454695222, "grad_norm": 0.4129174053668976, "learning_rate": 1.2415633462778316e-05, "loss": 0.5196, "step": 30822 }, { "epoch": 0.8463207029104888, "grad_norm": 0.3736322522163391, "learning_rate": 1.2415214357617664e-05, "loss": 0.488, "step": 30823 }, { "epoch": 0.8463481603514552, "grad_norm": 0.37699583172798157, "learning_rate": 1.2414795247951868e-05, "loss": 0.5442, "step": 30824 }, { "epoch": 0.8463756177924218, "grad_norm": 0.41436439752578735, "learning_rate": 1.2414376133781705e-05, "loss": 0.4961, "step": 30825 }, { "epoch": 0.8464030752333882, "grad_norm": 0.3825435936450958, "learning_rate": 1.2413957015107962e-05, "loss": 0.4185, "step": 30826 }, { "epoch": 0.8464305326743548, "grad_norm": 0.4149620831012726, "learning_rate": 1.2413537891931418e-05, "loss": 0.5455, "step": 30827 }, { "epoch": 0.8464579901153213, "grad_norm": 0.37978580594062805, "learning_rate": 1.2413118764252857e-05, "loss": 0.4612, "step": 30828 }, { "epoch": 0.8464854475562877, "grad_norm": 0.3983061611652374, "learning_rate": 1.241269963207306e-05, "loss": 0.5241, "step": 30829 }, { "epoch": 0.8465129049972543, "grad_norm": 0.42849668860435486, "learning_rate": 1.2412280495392803e-05, "loss": 0.4719, "step": 30830 }, { "epoch": 0.8465403624382207, "grad_norm": 0.36405274271965027, "learning_rate": 1.2411861354212877e-05, "loss": 0.4634, "step": 30831 }, { "epoch": 0.8465678198791873, "grad_norm": 0.3840957581996918, "learning_rate": 1.241144220853406e-05, "loss": 0.4906, "step": 30832 }, { "epoch": 0.8465952773201537, "grad_norm": 0.5997002720832825, "learning_rate": 1.2411023058357133e-05, "loss": 0.6239, "step": 30833 }, { "epoch": 0.8466227347611203, "grad_norm": 0.38323572278022766, "learning_rate": 1.2410603903682878e-05, "loss": 0.4215, "step": 30834 }, { "epoch": 0.8466501922020868, "grad_norm": 0.39844903349876404, "learning_rate": 1.2410184744512077e-05, "loss": 0.4611, "step": 30835 }, { "epoch": 0.8466776496430533, "grad_norm": 0.38231945037841797, "learning_rate": 1.2409765580845514e-05, "loss": 0.4283, "step": 30836 }, { "epoch": 0.8467051070840198, "grad_norm": 0.418429970741272, "learning_rate": 1.2409346412683965e-05, "loss": 0.5155, "step": 30837 }, { "epoch": 0.8467325645249862, "grad_norm": 0.4392671585083008, "learning_rate": 1.2408927240028222e-05, "loss": 0.4762, "step": 30838 }, { "epoch": 0.8467600219659528, "grad_norm": 0.3618520200252533, "learning_rate": 1.2408508062879057e-05, "loss": 0.4548, "step": 30839 }, { "epoch": 0.8467874794069192, "grad_norm": 0.3948478400707245, "learning_rate": 1.2408088881237255e-05, "loss": 0.5264, "step": 30840 }, { "epoch": 0.8468149368478858, "grad_norm": 0.4691292941570282, "learning_rate": 1.2407669695103602e-05, "loss": 0.475, "step": 30841 }, { "epoch": 0.8468423942888523, "grad_norm": 0.4491787254810333, "learning_rate": 1.2407250504478872e-05, "loss": 0.4889, "step": 30842 }, { "epoch": 0.8468698517298188, "grad_norm": 0.381631463766098, "learning_rate": 1.2406831309363855e-05, "loss": 0.5406, "step": 30843 }, { "epoch": 0.8468973091707853, "grad_norm": 0.38228392601013184, "learning_rate": 1.2406412109759329e-05, "loss": 0.4461, "step": 30844 }, { "epoch": 0.8469247666117518, "grad_norm": 0.5079454183578491, "learning_rate": 1.2405992905666075e-05, "loss": 0.538, "step": 30845 }, { "epoch": 0.8469522240527183, "grad_norm": 0.35262101888656616, "learning_rate": 1.2405573697084878e-05, "loss": 0.4505, "step": 30846 }, { "epoch": 0.8469796814936847, "grad_norm": 0.4142390787601471, "learning_rate": 1.2405154484016519e-05, "loss": 0.4849, "step": 30847 }, { "epoch": 0.8470071389346513, "grad_norm": 0.45737743377685547, "learning_rate": 1.2404735266461777e-05, "loss": 0.5125, "step": 30848 }, { "epoch": 0.8470345963756178, "grad_norm": 0.3770991265773773, "learning_rate": 1.2404316044421436e-05, "loss": 0.5227, "step": 30849 }, { "epoch": 0.8470620538165843, "grad_norm": 0.3815614879131317, "learning_rate": 1.240389681789628e-05, "loss": 0.4289, "step": 30850 }, { "epoch": 0.8470895112575508, "grad_norm": 0.36415335536003113, "learning_rate": 1.2403477586887087e-05, "loss": 0.4875, "step": 30851 }, { "epoch": 0.8471169686985173, "grad_norm": 0.38489317893981934, "learning_rate": 1.2403058351394645e-05, "loss": 0.435, "step": 30852 }, { "epoch": 0.8471444261394838, "grad_norm": 0.3770506978034973, "learning_rate": 1.2402639111419727e-05, "loss": 0.4995, "step": 30853 }, { "epoch": 0.8471718835804503, "grad_norm": 0.38738927245140076, "learning_rate": 1.2402219866963126e-05, "loss": 0.4179, "step": 30854 }, { "epoch": 0.8471993410214168, "grad_norm": 0.5262322425842285, "learning_rate": 1.2401800618025612e-05, "loss": 0.542, "step": 30855 }, { "epoch": 0.8472267984623834, "grad_norm": 0.4075547456741333, "learning_rate": 1.240138136460798e-05, "loss": 0.4665, "step": 30856 }, { "epoch": 0.8472542559033498, "grad_norm": 0.37239980697631836, "learning_rate": 1.2400962106711e-05, "loss": 0.468, "step": 30857 }, { "epoch": 0.8472817133443163, "grad_norm": 0.4326724112033844, "learning_rate": 1.2400542844335462e-05, "loss": 0.4747, "step": 30858 }, { "epoch": 0.8473091707852828, "grad_norm": 0.33898380398750305, "learning_rate": 1.2400123577482147e-05, "loss": 0.4623, "step": 30859 }, { "epoch": 0.8473366282262493, "grad_norm": 0.3797750174999237, "learning_rate": 1.239970430615183e-05, "loss": 0.4951, "step": 30860 }, { "epoch": 0.8473640856672158, "grad_norm": 0.3849031329154968, "learning_rate": 1.2399285030345303e-05, "loss": 0.5249, "step": 30861 }, { "epoch": 0.8473915431081823, "grad_norm": 0.37914708256721497, "learning_rate": 1.239886575006334e-05, "loss": 0.5503, "step": 30862 }, { "epoch": 0.8474190005491489, "grad_norm": 0.4071941673755646, "learning_rate": 1.239844646530673e-05, "loss": 0.4755, "step": 30863 }, { "epoch": 0.8474464579901153, "grad_norm": 0.33149468898773193, "learning_rate": 1.2398027176076253e-05, "loss": 0.4209, "step": 30864 }, { "epoch": 0.8474739154310819, "grad_norm": 0.4663156270980835, "learning_rate": 1.2397607882372687e-05, "loss": 0.5048, "step": 30865 }, { "epoch": 0.8475013728720483, "grad_norm": 0.5159270167350769, "learning_rate": 1.2397188584196817e-05, "loss": 0.4589, "step": 30866 }, { "epoch": 0.8475288303130148, "grad_norm": 0.4392662048339844, "learning_rate": 1.2396769281549425e-05, "loss": 0.5072, "step": 30867 }, { "epoch": 0.8475562877539813, "grad_norm": 0.4814586341381073, "learning_rate": 1.2396349974431296e-05, "loss": 0.5035, "step": 30868 }, { "epoch": 0.8475837451949478, "grad_norm": 0.4415634274482727, "learning_rate": 1.239593066284321e-05, "loss": 0.456, "step": 30869 }, { "epoch": 0.8476112026359144, "grad_norm": 0.3966514468193054, "learning_rate": 1.2395511346785945e-05, "loss": 0.435, "step": 30870 }, { "epoch": 0.8476386600768808, "grad_norm": 0.44009122252464294, "learning_rate": 1.2395092026260289e-05, "loss": 0.5077, "step": 30871 }, { "epoch": 0.8476661175178474, "grad_norm": 0.41188380122184753, "learning_rate": 1.239467270126702e-05, "loss": 0.5306, "step": 30872 }, { "epoch": 0.8476935749588138, "grad_norm": 0.3930886685848236, "learning_rate": 1.2394253371806926e-05, "loss": 0.407, "step": 30873 }, { "epoch": 0.8477210323997804, "grad_norm": 0.399292916059494, "learning_rate": 1.2393834037880782e-05, "loss": 0.4929, "step": 30874 }, { "epoch": 0.8477484898407468, "grad_norm": 0.4002346396446228, "learning_rate": 1.2393414699489375e-05, "loss": 0.5568, "step": 30875 }, { "epoch": 0.8477759472817133, "grad_norm": 0.33578795194625854, "learning_rate": 1.2392995356633484e-05, "loss": 0.4619, "step": 30876 }, { "epoch": 0.8478034047226799, "grad_norm": 0.44061189889907837, "learning_rate": 1.2392576009313893e-05, "loss": 0.51, "step": 30877 }, { "epoch": 0.8478308621636463, "grad_norm": 0.33755648136138916, "learning_rate": 1.2392156657531386e-05, "loss": 0.513, "step": 30878 }, { "epoch": 0.8478583196046129, "grad_norm": 0.4249807894229889, "learning_rate": 1.2391737301286741e-05, "loss": 0.5005, "step": 30879 }, { "epoch": 0.8478857770455793, "grad_norm": 0.449179470539093, "learning_rate": 1.2391317940580744e-05, "loss": 0.5039, "step": 30880 }, { "epoch": 0.8479132344865459, "grad_norm": 0.40607166290283203, "learning_rate": 1.2390898575414179e-05, "loss": 0.5206, "step": 30881 }, { "epoch": 0.8479406919275123, "grad_norm": 0.41393545269966125, "learning_rate": 1.239047920578782e-05, "loss": 0.5334, "step": 30882 }, { "epoch": 0.8479681493684789, "grad_norm": 0.38351187109947205, "learning_rate": 1.2390059831702457e-05, "loss": 0.4956, "step": 30883 }, { "epoch": 0.8479956068094454, "grad_norm": 0.37643107771873474, "learning_rate": 1.2389640453158866e-05, "loss": 0.4676, "step": 30884 }, { "epoch": 0.8480230642504119, "grad_norm": 0.4155269265174866, "learning_rate": 1.2389221070157837e-05, "loss": 0.4848, "step": 30885 }, { "epoch": 0.8480505216913784, "grad_norm": 0.38235244154930115, "learning_rate": 1.2388801682700146e-05, "loss": 0.4545, "step": 30886 }, { "epoch": 0.8480779791323448, "grad_norm": 0.4830605089664459, "learning_rate": 1.2388382290786577e-05, "loss": 0.582, "step": 30887 }, { "epoch": 0.8481054365733114, "grad_norm": 0.3639731705188751, "learning_rate": 1.2387962894417917e-05, "loss": 0.4677, "step": 30888 }, { "epoch": 0.8481328940142778, "grad_norm": 0.396697998046875, "learning_rate": 1.238754349359494e-05, "loss": 0.5334, "step": 30889 }, { "epoch": 0.8481603514552444, "grad_norm": 0.4023316502571106, "learning_rate": 1.2387124088318434e-05, "loss": 0.4724, "step": 30890 }, { "epoch": 0.8481878088962109, "grad_norm": 0.3582085072994232, "learning_rate": 1.238670467858918e-05, "loss": 0.5009, "step": 30891 }, { "epoch": 0.8482152663371774, "grad_norm": 0.4006249010562897, "learning_rate": 1.2386285264407958e-05, "loss": 0.4893, "step": 30892 }, { "epoch": 0.8482427237781439, "grad_norm": 0.7233282327651978, "learning_rate": 1.2385865845775556e-05, "loss": 0.5721, "step": 30893 }, { "epoch": 0.8482701812191104, "grad_norm": 0.3633475601673126, "learning_rate": 1.2385446422692749e-05, "loss": 0.4744, "step": 30894 }, { "epoch": 0.8482976386600769, "grad_norm": 0.4165462553501129, "learning_rate": 1.2385026995160323e-05, "loss": 0.5687, "step": 30895 }, { "epoch": 0.8483250961010433, "grad_norm": 4.253535270690918, "learning_rate": 1.2384607563179063e-05, "loss": 0.5686, "step": 30896 }, { "epoch": 0.8483525535420099, "grad_norm": 0.4614050090312958, "learning_rate": 1.2384188126749746e-05, "loss": 0.5211, "step": 30897 }, { "epoch": 0.8483800109829764, "grad_norm": 0.3778660297393799, "learning_rate": 1.2383768685873161e-05, "loss": 0.4735, "step": 30898 }, { "epoch": 0.8484074684239429, "grad_norm": 0.4002494812011719, "learning_rate": 1.2383349240550082e-05, "loss": 0.4337, "step": 30899 }, { "epoch": 0.8484349258649094, "grad_norm": 0.3857932388782501, "learning_rate": 1.23829297907813e-05, "loss": 0.4564, "step": 30900 }, { "epoch": 0.8484623833058759, "grad_norm": 0.41697341203689575, "learning_rate": 1.2382510336567592e-05, "loss": 0.5093, "step": 30901 }, { "epoch": 0.8484898407468424, "grad_norm": 0.37241131067276, "learning_rate": 1.238209087790974e-05, "loss": 0.4346, "step": 30902 }, { "epoch": 0.8485172981878089, "grad_norm": 0.39849504828453064, "learning_rate": 1.2381671414808532e-05, "loss": 0.5315, "step": 30903 }, { "epoch": 0.8485447556287754, "grad_norm": 0.35603034496307373, "learning_rate": 1.2381251947264743e-05, "loss": 0.3911, "step": 30904 }, { "epoch": 0.848572213069742, "grad_norm": 0.4314846992492676, "learning_rate": 1.2380832475279161e-05, "loss": 0.5573, "step": 30905 }, { "epoch": 0.8485996705107084, "grad_norm": 0.39463120698928833, "learning_rate": 1.2380412998852567e-05, "loss": 0.5253, "step": 30906 }, { "epoch": 0.8486271279516749, "grad_norm": 0.3880760371685028, "learning_rate": 1.237999351798574e-05, "loss": 0.5644, "step": 30907 }, { "epoch": 0.8486545853926414, "grad_norm": 0.39166927337646484, "learning_rate": 1.237957403267947e-05, "loss": 0.4603, "step": 30908 }, { "epoch": 0.8486820428336079, "grad_norm": 0.3631230294704437, "learning_rate": 1.2379154542934534e-05, "loss": 0.4736, "step": 30909 }, { "epoch": 0.8487095002745744, "grad_norm": 0.36847206950187683, "learning_rate": 1.2378735048751711e-05, "loss": 0.4443, "step": 30910 }, { "epoch": 0.8487369577155409, "grad_norm": 0.45778828859329224, "learning_rate": 1.2378315550131793e-05, "loss": 0.5183, "step": 30911 }, { "epoch": 0.8487644151565075, "grad_norm": 0.36599844694137573, "learning_rate": 1.2377896047075553e-05, "loss": 0.4225, "step": 30912 }, { "epoch": 0.8487918725974739, "grad_norm": 0.4267384707927704, "learning_rate": 1.2377476539583781e-05, "loss": 0.4645, "step": 30913 }, { "epoch": 0.8488193300384405, "grad_norm": 0.404940664768219, "learning_rate": 1.2377057027657258e-05, "loss": 0.501, "step": 30914 }, { "epoch": 0.8488467874794069, "grad_norm": 0.3359450697898865, "learning_rate": 1.237663751129676e-05, "loss": 0.455, "step": 30915 }, { "epoch": 0.8488742449203734, "grad_norm": 0.36983630061149597, "learning_rate": 1.237621799050308e-05, "loss": 0.5332, "step": 30916 }, { "epoch": 0.8489017023613399, "grad_norm": 0.39427778124809265, "learning_rate": 1.2375798465276989e-05, "loss": 0.4515, "step": 30917 }, { "epoch": 0.8489291598023064, "grad_norm": 0.3734722137451172, "learning_rate": 1.237537893561928e-05, "loss": 0.4393, "step": 30918 }, { "epoch": 0.848956617243273, "grad_norm": 0.4404390752315521, "learning_rate": 1.2374959401530731e-05, "loss": 0.5158, "step": 30919 }, { "epoch": 0.8489840746842394, "grad_norm": 0.4286869168281555, "learning_rate": 1.2374539863012122e-05, "loss": 0.5243, "step": 30920 }, { "epoch": 0.849011532125206, "grad_norm": 0.409033864736557, "learning_rate": 1.2374120320064242e-05, "loss": 0.4906, "step": 30921 }, { "epoch": 0.8490389895661724, "grad_norm": 0.36536964774131775, "learning_rate": 1.237370077268787e-05, "loss": 0.4399, "step": 30922 }, { "epoch": 0.849066447007139, "grad_norm": 0.3922259211540222, "learning_rate": 1.2373281220883784e-05, "loss": 0.5575, "step": 30923 }, { "epoch": 0.8490939044481054, "grad_norm": 0.3407920300960541, "learning_rate": 1.2372861664652774e-05, "loss": 0.4524, "step": 30924 }, { "epoch": 0.849121361889072, "grad_norm": 0.8646525144577026, "learning_rate": 1.2372442103995617e-05, "loss": 0.5817, "step": 30925 }, { "epoch": 0.8491488193300385, "grad_norm": 0.4057696759700775, "learning_rate": 1.2372022538913103e-05, "loss": 0.4994, "step": 30926 }, { "epoch": 0.8491762767710049, "grad_norm": 0.36860454082489014, "learning_rate": 1.2371602969406007e-05, "loss": 0.467, "step": 30927 }, { "epoch": 0.8492037342119715, "grad_norm": 0.3949611783027649, "learning_rate": 1.2371183395475112e-05, "loss": 0.5215, "step": 30928 }, { "epoch": 0.8492311916529379, "grad_norm": 0.3812295198440552, "learning_rate": 1.2370763817121208e-05, "loss": 0.5543, "step": 30929 }, { "epoch": 0.8492586490939045, "grad_norm": 0.885351300239563, "learning_rate": 1.2370344234345071e-05, "loss": 0.5543, "step": 30930 }, { "epoch": 0.8492861065348709, "grad_norm": 0.4170966148376465, "learning_rate": 1.2369924647147484e-05, "loss": 0.5309, "step": 30931 }, { "epoch": 0.8493135639758375, "grad_norm": 0.4011058211326599, "learning_rate": 1.2369505055529235e-05, "loss": 0.4939, "step": 30932 }, { "epoch": 0.8493410214168039, "grad_norm": 0.4200960397720337, "learning_rate": 1.2369085459491098e-05, "loss": 0.4295, "step": 30933 }, { "epoch": 0.8493684788577704, "grad_norm": 0.3663303256034851, "learning_rate": 1.2368665859033864e-05, "loss": 0.4491, "step": 30934 }, { "epoch": 0.849395936298737, "grad_norm": 0.3643839359283447, "learning_rate": 1.236824625415831e-05, "loss": 0.4733, "step": 30935 }, { "epoch": 0.8494233937397034, "grad_norm": 0.43010213971138, "learning_rate": 1.2367826644865222e-05, "loss": 0.5642, "step": 30936 }, { "epoch": 0.84945085118067, "grad_norm": 0.4022868573665619, "learning_rate": 1.2367407031155383e-05, "loss": 0.5508, "step": 30937 }, { "epoch": 0.8494783086216364, "grad_norm": 0.36953771114349365, "learning_rate": 1.236698741302957e-05, "loss": 0.4676, "step": 30938 }, { "epoch": 0.849505766062603, "grad_norm": 0.4101738929748535, "learning_rate": 1.2366567790488574e-05, "loss": 0.473, "step": 30939 }, { "epoch": 0.8495332235035694, "grad_norm": 0.34641727805137634, "learning_rate": 1.2366148163533173e-05, "loss": 0.449, "step": 30940 }, { "epoch": 0.849560680944536, "grad_norm": 0.42998266220092773, "learning_rate": 1.2365728532164149e-05, "loss": 0.4828, "step": 30941 }, { "epoch": 0.8495881383855025, "grad_norm": 0.419980525970459, "learning_rate": 1.2365308896382288e-05, "loss": 0.5492, "step": 30942 }, { "epoch": 0.849615595826469, "grad_norm": 0.359362930059433, "learning_rate": 1.2364889256188368e-05, "loss": 0.5441, "step": 30943 }, { "epoch": 0.8496430532674355, "grad_norm": 0.4072943925857544, "learning_rate": 1.2364469611583179e-05, "loss": 0.4741, "step": 30944 }, { "epoch": 0.8496705107084019, "grad_norm": 0.4366198480129242, "learning_rate": 1.2364049962567499e-05, "loss": 0.5613, "step": 30945 }, { "epoch": 0.8496979681493685, "grad_norm": 0.3557508587837219, "learning_rate": 1.2363630309142108e-05, "loss": 0.4765, "step": 30946 }, { "epoch": 0.8497254255903349, "grad_norm": 0.3666873872280121, "learning_rate": 1.2363210651307795e-05, "loss": 0.4756, "step": 30947 }, { "epoch": 0.8497528830313015, "grad_norm": 0.4194634258747101, "learning_rate": 1.2362790989065337e-05, "loss": 0.5161, "step": 30948 }, { "epoch": 0.849780340472268, "grad_norm": 0.3794061839580536, "learning_rate": 1.2362371322415523e-05, "loss": 0.4935, "step": 30949 }, { "epoch": 0.8498077979132345, "grad_norm": 0.4171138405799866, "learning_rate": 1.2361951651359131e-05, "loss": 0.4718, "step": 30950 }, { "epoch": 0.849835255354201, "grad_norm": 0.3794862627983093, "learning_rate": 1.2361531975896943e-05, "loss": 0.4612, "step": 30951 }, { "epoch": 0.8498627127951675, "grad_norm": 0.36075684428215027, "learning_rate": 1.2361112296029748e-05, "loss": 0.4438, "step": 30952 }, { "epoch": 0.849890170236134, "grad_norm": 0.36159682273864746, "learning_rate": 1.2360692611758323e-05, "loss": 0.4521, "step": 30953 }, { "epoch": 0.8499176276771004, "grad_norm": 0.42345136404037476, "learning_rate": 1.2360272923083454e-05, "loss": 0.4993, "step": 30954 }, { "epoch": 0.849945085118067, "grad_norm": 0.3904305398464203, "learning_rate": 1.2359853230005923e-05, "loss": 0.4906, "step": 30955 }, { "epoch": 0.8499725425590335, "grad_norm": 0.4056706428527832, "learning_rate": 1.235943353252651e-05, "loss": 0.4522, "step": 30956 }, { "epoch": 0.85, "grad_norm": 0.4858977794647217, "learning_rate": 1.2359013830646003e-05, "loss": 0.3685, "step": 30957 }, { "epoch": 0.8500274574409665, "grad_norm": 0.42236262559890747, "learning_rate": 1.2358594124365181e-05, "loss": 0.5469, "step": 30958 }, { "epoch": 0.850054914881933, "grad_norm": 0.39230114221572876, "learning_rate": 1.2358174413684827e-05, "loss": 0.5169, "step": 30959 }, { "epoch": 0.8500823723228995, "grad_norm": 0.36642521619796753, "learning_rate": 1.2357754698605729e-05, "loss": 0.4472, "step": 30960 }, { "epoch": 0.850109829763866, "grad_norm": 0.39375102519989014, "learning_rate": 1.2357334979128661e-05, "loss": 0.5151, "step": 30961 }, { "epoch": 0.8501372872048325, "grad_norm": 0.3421141803264618, "learning_rate": 1.2356915255254413e-05, "loss": 0.4046, "step": 30962 }, { "epoch": 0.850164744645799, "grad_norm": 0.38372719287872314, "learning_rate": 1.2356495526983766e-05, "loss": 0.4952, "step": 30963 }, { "epoch": 0.8501922020867655, "grad_norm": 0.3562566637992859, "learning_rate": 1.2356075794317503e-05, "loss": 0.4393, "step": 30964 }, { "epoch": 0.850219659527732, "grad_norm": 0.4984065592288971, "learning_rate": 1.2355656057256409e-05, "loss": 0.4912, "step": 30965 }, { "epoch": 0.8502471169686985, "grad_norm": 0.4080946147441864, "learning_rate": 1.235523631580126e-05, "loss": 0.5069, "step": 30966 }, { "epoch": 0.850274574409665, "grad_norm": 0.388995498418808, "learning_rate": 1.2354816569952845e-05, "loss": 0.4864, "step": 30967 }, { "epoch": 0.8503020318506315, "grad_norm": 0.41058358550071716, "learning_rate": 1.235439681971195e-05, "loss": 0.4768, "step": 30968 }, { "epoch": 0.850329489291598, "grad_norm": 0.42632609605789185, "learning_rate": 1.2353977065079348e-05, "loss": 0.4174, "step": 30969 }, { "epoch": 0.8503569467325646, "grad_norm": 0.4027153253555298, "learning_rate": 1.235355730605583e-05, "loss": 0.519, "step": 30970 }, { "epoch": 0.850384404173531, "grad_norm": 0.3705084025859833, "learning_rate": 1.2353137542642176e-05, "loss": 0.4759, "step": 30971 }, { "epoch": 0.8504118616144976, "grad_norm": 0.39150431752204895, "learning_rate": 1.2352717774839166e-05, "loss": 0.5855, "step": 30972 }, { "epoch": 0.850439319055464, "grad_norm": 0.38210493326187134, "learning_rate": 1.2352298002647591e-05, "loss": 0.4873, "step": 30973 }, { "epoch": 0.8504667764964305, "grad_norm": 0.4211326241493225, "learning_rate": 1.2351878226068228e-05, "loss": 0.505, "step": 30974 }, { "epoch": 0.850494233937397, "grad_norm": 0.42518481612205505, "learning_rate": 1.2351458445101862e-05, "loss": 0.56, "step": 30975 }, { "epoch": 0.8505216913783635, "grad_norm": 0.3892074525356293, "learning_rate": 1.2351038659749275e-05, "loss": 0.4527, "step": 30976 }, { "epoch": 0.8505491488193301, "grad_norm": 0.34533172845840454, "learning_rate": 1.2350618870011251e-05, "loss": 0.482, "step": 30977 }, { "epoch": 0.8505766062602965, "grad_norm": 0.4422941505908966, "learning_rate": 1.2350199075888572e-05, "loss": 0.4593, "step": 30978 }, { "epoch": 0.8506040637012631, "grad_norm": 0.3652843236923218, "learning_rate": 1.2349779277382022e-05, "loss": 0.5513, "step": 30979 }, { "epoch": 0.8506315211422295, "grad_norm": 0.4604048430919647, "learning_rate": 1.2349359474492382e-05, "loss": 0.5314, "step": 30980 }, { "epoch": 0.8506589785831961, "grad_norm": 0.38075563311576843, "learning_rate": 1.2348939667220439e-05, "loss": 0.5011, "step": 30981 }, { "epoch": 0.8506864360241625, "grad_norm": 0.4071119725704193, "learning_rate": 1.2348519855566971e-05, "loss": 0.4905, "step": 30982 }, { "epoch": 0.850713893465129, "grad_norm": 0.3925634026527405, "learning_rate": 1.2348100039532769e-05, "loss": 0.5347, "step": 30983 }, { "epoch": 0.8507413509060956, "grad_norm": 0.4392060935497284, "learning_rate": 1.2347680219118605e-05, "loss": 0.4231, "step": 30984 }, { "epoch": 0.850768808347062, "grad_norm": 0.38869625329971313, "learning_rate": 1.2347260394325274e-05, "loss": 0.519, "step": 30985 }, { "epoch": 0.8507962657880286, "grad_norm": 0.43110522627830505, "learning_rate": 1.234684056515355e-05, "loss": 0.4977, "step": 30986 }, { "epoch": 0.850823723228995, "grad_norm": 0.4264211356639862, "learning_rate": 1.2346420731604219e-05, "loss": 0.4733, "step": 30987 }, { "epoch": 0.8508511806699616, "grad_norm": 0.36479851603507996, "learning_rate": 1.2346000893678066e-05, "loss": 0.45, "step": 30988 }, { "epoch": 0.850878638110928, "grad_norm": 0.37932509183883667, "learning_rate": 1.2345581051375871e-05, "loss": 0.5515, "step": 30989 }, { "epoch": 0.8509060955518946, "grad_norm": 0.43035778403282166, "learning_rate": 1.2345161204698419e-05, "loss": 0.5568, "step": 30990 }, { "epoch": 0.8509335529928611, "grad_norm": 0.3508646786212921, "learning_rate": 1.2344741353646492e-05, "loss": 0.465, "step": 30991 }, { "epoch": 0.8509610104338275, "grad_norm": 0.37300893664360046, "learning_rate": 1.2344321498220875e-05, "loss": 0.4699, "step": 30992 }, { "epoch": 0.8509884678747941, "grad_norm": 0.4432971477508545, "learning_rate": 1.2343901638422351e-05, "loss": 0.543, "step": 30993 }, { "epoch": 0.8510159253157605, "grad_norm": 0.40598854422569275, "learning_rate": 1.2343481774251702e-05, "loss": 0.5082, "step": 30994 }, { "epoch": 0.8510433827567271, "grad_norm": 0.4135931134223938, "learning_rate": 1.234306190570971e-05, "loss": 0.5135, "step": 30995 }, { "epoch": 0.8510708401976935, "grad_norm": 0.3538792133331299, "learning_rate": 1.2342642032797162e-05, "loss": 0.4506, "step": 30996 }, { "epoch": 0.8510982976386601, "grad_norm": 0.41177627444267273, "learning_rate": 1.2342222155514838e-05, "loss": 0.4379, "step": 30997 }, { "epoch": 0.8511257550796266, "grad_norm": 0.39413607120513916, "learning_rate": 1.234180227386352e-05, "loss": 0.5624, "step": 30998 }, { "epoch": 0.8511532125205931, "grad_norm": 0.4378010928630829, "learning_rate": 1.2341382387843999e-05, "loss": 0.523, "step": 30999 }, { "epoch": 0.8511806699615596, "grad_norm": 0.5057529211044312, "learning_rate": 1.2340962497457048e-05, "loss": 0.5792, "step": 31000 }, { "epoch": 0.851208127402526, "grad_norm": 0.39691704511642456, "learning_rate": 1.2340542602703456e-05, "loss": 0.5186, "step": 31001 }, { "epoch": 0.8512355848434926, "grad_norm": 0.34117212891578674, "learning_rate": 1.2340122703584005e-05, "loss": 0.4398, "step": 31002 }, { "epoch": 0.851263042284459, "grad_norm": 0.36317354440689087, "learning_rate": 1.2339702800099475e-05, "loss": 0.4828, "step": 31003 }, { "epoch": 0.8512904997254256, "grad_norm": 0.34949156641960144, "learning_rate": 1.2339282892250659e-05, "loss": 0.4198, "step": 31004 }, { "epoch": 0.8513179571663921, "grad_norm": 0.37902507185935974, "learning_rate": 1.2338862980038329e-05, "loss": 0.4535, "step": 31005 }, { "epoch": 0.8513454146073586, "grad_norm": 0.4213833808898926, "learning_rate": 1.2338443063463274e-05, "loss": 0.5033, "step": 31006 }, { "epoch": 0.8513728720483251, "grad_norm": 0.40954479575157166, "learning_rate": 1.2338023142526279e-05, "loss": 0.5128, "step": 31007 }, { "epoch": 0.8514003294892916, "grad_norm": 0.39020708203315735, "learning_rate": 1.233760321722812e-05, "loss": 0.4618, "step": 31008 }, { "epoch": 0.8514277869302581, "grad_norm": 0.3601935803890228, "learning_rate": 1.2337183287569588e-05, "loss": 0.4603, "step": 31009 }, { "epoch": 0.8514552443712246, "grad_norm": 0.3897362947463989, "learning_rate": 1.2336763353551461e-05, "loss": 0.4672, "step": 31010 }, { "epoch": 0.8514827018121911, "grad_norm": 0.4006001651287079, "learning_rate": 1.2336343415174527e-05, "loss": 0.4746, "step": 31011 }, { "epoch": 0.8515101592531577, "grad_norm": 0.3881980776786804, "learning_rate": 1.2335923472439566e-05, "loss": 0.5486, "step": 31012 }, { "epoch": 0.8515376166941241, "grad_norm": 0.36915266513824463, "learning_rate": 1.2335503525347363e-05, "loss": 0.4749, "step": 31013 }, { "epoch": 0.8515650741350906, "grad_norm": 0.3758358657360077, "learning_rate": 1.2335083573898698e-05, "loss": 0.4952, "step": 31014 }, { "epoch": 0.8515925315760571, "grad_norm": 0.4227577745914459, "learning_rate": 1.2334663618094358e-05, "loss": 0.4877, "step": 31015 }, { "epoch": 0.8516199890170236, "grad_norm": 0.3784068524837494, "learning_rate": 1.2334243657935128e-05, "loss": 0.5003, "step": 31016 }, { "epoch": 0.8516474464579901, "grad_norm": 0.38637858629226685, "learning_rate": 1.2333823693421788e-05, "loss": 0.4324, "step": 31017 }, { "epoch": 0.8516749038989566, "grad_norm": 0.6481234431266785, "learning_rate": 1.2333403724555118e-05, "loss": 0.5245, "step": 31018 }, { "epoch": 0.8517023613399232, "grad_norm": 0.5134816765785217, "learning_rate": 1.2332983751335907e-05, "loss": 0.536, "step": 31019 }, { "epoch": 0.8517298187808896, "grad_norm": 0.43678489327430725, "learning_rate": 1.2332563773764938e-05, "loss": 0.4471, "step": 31020 }, { "epoch": 0.8517572762218562, "grad_norm": 0.3794896602630615, "learning_rate": 1.2332143791842992e-05, "loss": 0.48, "step": 31021 }, { "epoch": 0.8517847336628226, "grad_norm": 0.39714333415031433, "learning_rate": 1.2331723805570854e-05, "loss": 0.5388, "step": 31022 }, { "epoch": 0.8518121911037891, "grad_norm": 0.4056181311607361, "learning_rate": 1.2331303814949308e-05, "loss": 0.511, "step": 31023 }, { "epoch": 0.8518396485447556, "grad_norm": 0.38568830490112305, "learning_rate": 1.2330883819979134e-05, "loss": 0.5224, "step": 31024 }, { "epoch": 0.8518671059857221, "grad_norm": 0.4196723997592926, "learning_rate": 1.233046382066112e-05, "loss": 0.4452, "step": 31025 }, { "epoch": 0.8518945634266887, "grad_norm": 0.3842492997646332, "learning_rate": 1.2330043816996044e-05, "loss": 0.5052, "step": 31026 }, { "epoch": 0.8519220208676551, "grad_norm": 0.3602806329727173, "learning_rate": 1.2329623808984696e-05, "loss": 0.6149, "step": 31027 }, { "epoch": 0.8519494783086217, "grad_norm": 0.32482197880744934, "learning_rate": 1.2329203796627853e-05, "loss": 0.4577, "step": 31028 }, { "epoch": 0.8519769357495881, "grad_norm": 0.4096108376979828, "learning_rate": 1.2328783779926303e-05, "loss": 0.521, "step": 31029 }, { "epoch": 0.8520043931905547, "grad_norm": 0.42781388759613037, "learning_rate": 1.232836375888083e-05, "loss": 0.4678, "step": 31030 }, { "epoch": 0.8520318506315211, "grad_norm": 10.840044975280762, "learning_rate": 1.2327943733492211e-05, "loss": 0.5387, "step": 31031 }, { "epoch": 0.8520593080724876, "grad_norm": 0.36492007970809937, "learning_rate": 1.2327523703761239e-05, "loss": 0.5016, "step": 31032 }, { "epoch": 0.8520867655134542, "grad_norm": 0.393689900636673, "learning_rate": 1.2327103669688691e-05, "loss": 0.489, "step": 31033 }, { "epoch": 0.8521142229544206, "grad_norm": 0.38645532727241516, "learning_rate": 1.232668363127535e-05, "loss": 0.4925, "step": 31034 }, { "epoch": 0.8521416803953872, "grad_norm": 0.3617214560508728, "learning_rate": 1.2326263588522003e-05, "loss": 0.4303, "step": 31035 }, { "epoch": 0.8521691378363536, "grad_norm": 0.392085462808609, "learning_rate": 1.232584354142943e-05, "loss": 0.4762, "step": 31036 }, { "epoch": 0.8521965952773202, "grad_norm": 0.42371758818626404, "learning_rate": 1.2325423489998418e-05, "loss": 0.5399, "step": 31037 }, { "epoch": 0.8522240527182866, "grad_norm": 0.38764989376068115, "learning_rate": 1.2325003434229748e-05, "loss": 0.4791, "step": 31038 }, { "epoch": 0.8522515101592532, "grad_norm": 0.36356252431869507, "learning_rate": 1.2324583374124206e-05, "loss": 0.518, "step": 31039 }, { "epoch": 0.8522789676002197, "grad_norm": 0.3406558930873871, "learning_rate": 1.2324163309682576e-05, "loss": 0.4146, "step": 31040 }, { "epoch": 0.8523064250411861, "grad_norm": 0.3828067183494568, "learning_rate": 1.2323743240905634e-05, "loss": 0.5172, "step": 31041 }, { "epoch": 0.8523338824821527, "grad_norm": 0.3835814595222473, "learning_rate": 1.2323323167794171e-05, "loss": 0.4769, "step": 31042 }, { "epoch": 0.8523613399231191, "grad_norm": 0.4034666419029236, "learning_rate": 1.2322903090348973e-05, "loss": 0.5751, "step": 31043 }, { "epoch": 0.8523887973640857, "grad_norm": 0.44863444566726685, "learning_rate": 1.2322483008570816e-05, "loss": 0.4815, "step": 31044 }, { "epoch": 0.8524162548050521, "grad_norm": 0.4165095090866089, "learning_rate": 1.2322062922460487e-05, "loss": 0.4481, "step": 31045 }, { "epoch": 0.8524437122460187, "grad_norm": 0.38595640659332275, "learning_rate": 1.232164283201877e-05, "loss": 0.428, "step": 31046 }, { "epoch": 0.8524711696869852, "grad_norm": 0.37867414951324463, "learning_rate": 1.2321222737246447e-05, "loss": 0.4346, "step": 31047 }, { "epoch": 0.8524986271279517, "grad_norm": 0.363534539937973, "learning_rate": 1.2320802638144302e-05, "loss": 0.5093, "step": 31048 }, { "epoch": 0.8525260845689182, "grad_norm": 0.3747383952140808, "learning_rate": 1.2320382534713121e-05, "loss": 0.4807, "step": 31049 }, { "epoch": 0.8525535420098846, "grad_norm": 0.35314878821372986, "learning_rate": 1.2319962426953686e-05, "loss": 0.4626, "step": 31050 }, { "epoch": 0.8525809994508512, "grad_norm": 0.3536129891872406, "learning_rate": 1.2319542314866784e-05, "loss": 0.454, "step": 31051 }, { "epoch": 0.8526084568918176, "grad_norm": 0.38634398579597473, "learning_rate": 1.2319122198453191e-05, "loss": 0.4754, "step": 31052 }, { "epoch": 0.8526359143327842, "grad_norm": 0.39606526494026184, "learning_rate": 1.2318702077713696e-05, "loss": 0.4437, "step": 31053 }, { "epoch": 0.8526633717737507, "grad_norm": 0.39848512411117554, "learning_rate": 1.231828195264908e-05, "loss": 0.5562, "step": 31054 }, { "epoch": 0.8526908292147172, "grad_norm": 0.43289321660995483, "learning_rate": 1.2317861823260132e-05, "loss": 0.5352, "step": 31055 }, { "epoch": 0.8527182866556837, "grad_norm": 0.529003381729126, "learning_rate": 1.2317441689547629e-05, "loss": 0.5452, "step": 31056 }, { "epoch": 0.8527457440966502, "grad_norm": 0.4022093117237091, "learning_rate": 1.2317021551512357e-05, "loss": 0.5344, "step": 31057 }, { "epoch": 0.8527732015376167, "grad_norm": 0.34161177277565, "learning_rate": 1.2316601409155102e-05, "loss": 0.4505, "step": 31058 }, { "epoch": 0.8528006589785831, "grad_norm": 0.4639519453048706, "learning_rate": 1.2316181262476646e-05, "loss": 0.5361, "step": 31059 }, { "epoch": 0.8528281164195497, "grad_norm": 0.41029810905456543, "learning_rate": 1.231576111147777e-05, "loss": 0.5182, "step": 31060 }, { "epoch": 0.8528555738605162, "grad_norm": 0.38545432686805725, "learning_rate": 1.2315340956159265e-05, "loss": 0.4168, "step": 31061 }, { "epoch": 0.8528830313014827, "grad_norm": 0.35454270243644714, "learning_rate": 1.231492079652191e-05, "loss": 0.4266, "step": 31062 }, { "epoch": 0.8529104887424492, "grad_norm": 0.42645204067230225, "learning_rate": 1.2314500632566484e-05, "loss": 0.4349, "step": 31063 }, { "epoch": 0.8529379461834157, "grad_norm": 0.44680285453796387, "learning_rate": 1.2314080464293778e-05, "loss": 0.498, "step": 31064 }, { "epoch": 0.8529654036243822, "grad_norm": 0.4174591302871704, "learning_rate": 1.2313660291704575e-05, "loss": 0.5623, "step": 31065 }, { "epoch": 0.8529928610653487, "grad_norm": 0.42842787504196167, "learning_rate": 1.2313240114799656e-05, "loss": 0.5087, "step": 31066 }, { "epoch": 0.8530203185063152, "grad_norm": 0.3808627128601074, "learning_rate": 1.2312819933579805e-05, "loss": 0.3967, "step": 31067 }, { "epoch": 0.8530477759472818, "grad_norm": 0.34249722957611084, "learning_rate": 1.2312399748045807e-05, "loss": 0.436, "step": 31068 }, { "epoch": 0.8530752333882482, "grad_norm": 0.3909807503223419, "learning_rate": 1.2311979558198445e-05, "loss": 0.5707, "step": 31069 }, { "epoch": 0.8531026908292147, "grad_norm": 0.40110358595848083, "learning_rate": 1.2311559364038503e-05, "loss": 0.4264, "step": 31070 }, { "epoch": 0.8531301482701812, "grad_norm": 0.36902734637260437, "learning_rate": 1.2311139165566769e-05, "loss": 0.4319, "step": 31071 }, { "epoch": 0.8531576057111477, "grad_norm": 0.38092124462127686, "learning_rate": 1.2310718962784018e-05, "loss": 0.433, "step": 31072 }, { "epoch": 0.8531850631521142, "grad_norm": 0.39440736174583435, "learning_rate": 1.2310298755691041e-05, "loss": 0.5044, "step": 31073 }, { "epoch": 0.8532125205930807, "grad_norm": 0.40205907821655273, "learning_rate": 1.230987854428862e-05, "loss": 0.4849, "step": 31074 }, { "epoch": 0.8532399780340473, "grad_norm": 0.3771974742412567, "learning_rate": 1.2309458328577538e-05, "loss": 0.478, "step": 31075 }, { "epoch": 0.8532674354750137, "grad_norm": 0.4436447024345398, "learning_rate": 1.2309038108558578e-05, "loss": 0.4888, "step": 31076 }, { "epoch": 0.8532948929159803, "grad_norm": 0.44624853134155273, "learning_rate": 1.2308617884232523e-05, "loss": 0.4867, "step": 31077 }, { "epoch": 0.8533223503569467, "grad_norm": 0.4120721220970154, "learning_rate": 1.2308197655600162e-05, "loss": 0.5216, "step": 31078 }, { "epoch": 0.8533498077979133, "grad_norm": 0.5084753632545471, "learning_rate": 1.2307777422662278e-05, "loss": 0.5665, "step": 31079 }, { "epoch": 0.8533772652388797, "grad_norm": 0.458081990480423, "learning_rate": 1.2307357185419646e-05, "loss": 0.5175, "step": 31080 }, { "epoch": 0.8534047226798462, "grad_norm": 0.4814344644546509, "learning_rate": 1.230693694387306e-05, "loss": 0.5037, "step": 31081 }, { "epoch": 0.8534321801208128, "grad_norm": 0.3895649313926697, "learning_rate": 1.2306516698023303e-05, "loss": 0.5016, "step": 31082 }, { "epoch": 0.8534596375617792, "grad_norm": 0.4477512836456299, "learning_rate": 1.2306096447871152e-05, "loss": 0.5117, "step": 31083 }, { "epoch": 0.8534870950027458, "grad_norm": 0.3832671344280243, "learning_rate": 1.2305676193417398e-05, "loss": 0.4342, "step": 31084 }, { "epoch": 0.8535145524437122, "grad_norm": 0.3847145736217499, "learning_rate": 1.2305255934662818e-05, "loss": 0.5258, "step": 31085 }, { "epoch": 0.8535420098846788, "grad_norm": 0.3888496458530426, "learning_rate": 1.2304835671608204e-05, "loss": 0.5603, "step": 31086 }, { "epoch": 0.8535694673256452, "grad_norm": 0.4082445800304413, "learning_rate": 1.2304415404254336e-05, "loss": 0.5365, "step": 31087 }, { "epoch": 0.8535969247666118, "grad_norm": 0.4175891876220703, "learning_rate": 1.2303995132601994e-05, "loss": 0.4132, "step": 31088 }, { "epoch": 0.8536243822075783, "grad_norm": 0.36412301659584045, "learning_rate": 1.2303574856651968e-05, "loss": 0.4462, "step": 31089 }, { "epoch": 0.8536518396485447, "grad_norm": 0.3987017869949341, "learning_rate": 1.2303154576405038e-05, "loss": 0.5003, "step": 31090 }, { "epoch": 0.8536792970895113, "grad_norm": 0.3617419898509979, "learning_rate": 1.2302734291861991e-05, "loss": 0.478, "step": 31091 }, { "epoch": 0.8537067545304777, "grad_norm": 0.3762076199054718, "learning_rate": 1.2302314003023612e-05, "loss": 0.462, "step": 31092 }, { "epoch": 0.8537342119714443, "grad_norm": 0.4034312665462494, "learning_rate": 1.2301893709890677e-05, "loss": 0.563, "step": 31093 }, { "epoch": 0.8537616694124107, "grad_norm": 0.38780027627944946, "learning_rate": 1.230147341246398e-05, "loss": 0.5566, "step": 31094 }, { "epoch": 0.8537891268533773, "grad_norm": 0.3957446813583374, "learning_rate": 1.23010531107443e-05, "loss": 0.5553, "step": 31095 }, { "epoch": 0.8538165842943438, "grad_norm": 0.43992879986763, "learning_rate": 1.230063280473242e-05, "loss": 0.5155, "step": 31096 }, { "epoch": 0.8538440417353103, "grad_norm": 0.42226675152778625, "learning_rate": 1.2300212494429129e-05, "loss": 0.5362, "step": 31097 }, { "epoch": 0.8538714991762768, "grad_norm": 0.4320169687271118, "learning_rate": 1.2299792179835203e-05, "loss": 0.5643, "step": 31098 }, { "epoch": 0.8538989566172432, "grad_norm": 0.39293718338012695, "learning_rate": 1.2299371860951432e-05, "loss": 0.5156, "step": 31099 }, { "epoch": 0.8539264140582098, "grad_norm": 0.41615521907806396, "learning_rate": 1.22989515377786e-05, "loss": 0.5479, "step": 31100 }, { "epoch": 0.8539538714991762, "grad_norm": 0.36430731415748596, "learning_rate": 1.2298531210317489e-05, "loss": 0.4606, "step": 31101 }, { "epoch": 0.8539813289401428, "grad_norm": 0.3614204525947571, "learning_rate": 1.2298110878568882e-05, "loss": 0.4658, "step": 31102 }, { "epoch": 0.8540087863811093, "grad_norm": 0.3734956383705139, "learning_rate": 1.2297690542533566e-05, "loss": 0.4515, "step": 31103 }, { "epoch": 0.8540362438220758, "grad_norm": 0.35006949305534363, "learning_rate": 1.2297270202212326e-05, "loss": 0.4527, "step": 31104 }, { "epoch": 0.8540637012630423, "grad_norm": 0.42399707436561584, "learning_rate": 1.2296849857605944e-05, "loss": 0.5041, "step": 31105 }, { "epoch": 0.8540911587040088, "grad_norm": 0.44979849457740784, "learning_rate": 1.22964295087152e-05, "loss": 0.4677, "step": 31106 }, { "epoch": 0.8541186161449753, "grad_norm": 0.368526428937912, "learning_rate": 1.2296009155540884e-05, "loss": 0.4618, "step": 31107 }, { "epoch": 0.8541460735859417, "grad_norm": 0.36994123458862305, "learning_rate": 1.2295588798083777e-05, "loss": 0.5376, "step": 31108 }, { "epoch": 0.8541735310269083, "grad_norm": 0.42082175612449646, "learning_rate": 1.2295168436344667e-05, "loss": 0.4728, "step": 31109 }, { "epoch": 0.8542009884678748, "grad_norm": 0.3538724184036255, "learning_rate": 1.2294748070324333e-05, "loss": 0.4471, "step": 31110 }, { "epoch": 0.8542284459088413, "grad_norm": 0.4204031825065613, "learning_rate": 1.2294327700023561e-05, "loss": 0.4659, "step": 31111 }, { "epoch": 0.8542559033498078, "grad_norm": 0.4318523108959198, "learning_rate": 1.2293907325443137e-05, "loss": 0.5382, "step": 31112 }, { "epoch": 0.8542833607907743, "grad_norm": 0.3609636723995209, "learning_rate": 1.2293486946583845e-05, "loss": 0.4482, "step": 31113 }, { "epoch": 0.8543108182317408, "grad_norm": 0.36573779582977295, "learning_rate": 1.2293066563446465e-05, "loss": 0.5236, "step": 31114 }, { "epoch": 0.8543382756727073, "grad_norm": 0.36562487483024597, "learning_rate": 1.2292646176031785e-05, "loss": 0.5203, "step": 31115 }, { "epoch": 0.8543657331136738, "grad_norm": 0.37902069091796875, "learning_rate": 1.2292225784340589e-05, "loss": 0.4758, "step": 31116 }, { "epoch": 0.8543931905546404, "grad_norm": 0.4114731550216675, "learning_rate": 1.229180538837366e-05, "loss": 0.5821, "step": 31117 }, { "epoch": 0.8544206479956068, "grad_norm": 0.34551650285720825, "learning_rate": 1.2291384988131782e-05, "loss": 0.4982, "step": 31118 }, { "epoch": 0.8544481054365733, "grad_norm": 0.3871213495731354, "learning_rate": 1.229096458361574e-05, "loss": 0.5187, "step": 31119 }, { "epoch": 0.8544755628775398, "grad_norm": 0.389157235622406, "learning_rate": 1.2290544174826317e-05, "loss": 0.5657, "step": 31120 }, { "epoch": 0.8545030203185063, "grad_norm": 0.38362744450569153, "learning_rate": 1.2290123761764296e-05, "loss": 0.5315, "step": 31121 }, { "epoch": 0.8545304777594728, "grad_norm": 0.4317735731601715, "learning_rate": 1.2289703344430468e-05, "loss": 0.6085, "step": 31122 }, { "epoch": 0.8545579352004393, "grad_norm": 0.39359980821609497, "learning_rate": 1.2289282922825611e-05, "loss": 0.5134, "step": 31123 }, { "epoch": 0.8545853926414059, "grad_norm": 0.4333030879497528, "learning_rate": 1.228886249695051e-05, "loss": 0.5261, "step": 31124 }, { "epoch": 0.8546128500823723, "grad_norm": 0.39051109552383423, "learning_rate": 1.228844206680595e-05, "loss": 0.4655, "step": 31125 }, { "epoch": 0.8546403075233389, "grad_norm": 0.3853558599948883, "learning_rate": 1.2288021632392717e-05, "loss": 0.4976, "step": 31126 }, { "epoch": 0.8546677649643053, "grad_norm": 0.34997573494911194, "learning_rate": 1.2287601193711588e-05, "loss": 0.4949, "step": 31127 }, { "epoch": 0.8546952224052718, "grad_norm": 0.38939419388771057, "learning_rate": 1.2287180750763358e-05, "loss": 0.3775, "step": 31128 }, { "epoch": 0.8547226798462383, "grad_norm": 0.35364654660224915, "learning_rate": 1.2286760303548803e-05, "loss": 0.424, "step": 31129 }, { "epoch": 0.8547501372872048, "grad_norm": 0.41810986399650574, "learning_rate": 1.2286339852068712e-05, "loss": 0.4748, "step": 31130 }, { "epoch": 0.8547775947281714, "grad_norm": 0.3986365795135498, "learning_rate": 1.2285919396323869e-05, "loss": 0.517, "step": 31131 }, { "epoch": 0.8548050521691378, "grad_norm": 0.41680508852005005, "learning_rate": 1.2285498936315052e-05, "loss": 0.5183, "step": 31132 }, { "epoch": 0.8548325096101044, "grad_norm": 0.40593385696411133, "learning_rate": 1.2285078472043056e-05, "loss": 0.522, "step": 31133 }, { "epoch": 0.8548599670510708, "grad_norm": 0.3749670386314392, "learning_rate": 1.2284658003508654e-05, "loss": 0.522, "step": 31134 }, { "epoch": 0.8548874244920374, "grad_norm": 0.3708672821521759, "learning_rate": 1.2284237530712636e-05, "loss": 0.5104, "step": 31135 }, { "epoch": 0.8549148819330038, "grad_norm": 0.4841960370540619, "learning_rate": 1.2283817053655789e-05, "loss": 0.5766, "step": 31136 }, { "epoch": 0.8549423393739704, "grad_norm": 0.4529702961444855, "learning_rate": 1.2283396572338893e-05, "loss": 0.4718, "step": 31137 }, { "epoch": 0.8549697968149369, "grad_norm": 0.40346720814704895, "learning_rate": 1.2282976086762735e-05, "loss": 0.5137, "step": 31138 }, { "epoch": 0.8549972542559033, "grad_norm": 0.38118085265159607, "learning_rate": 1.2282555596928094e-05, "loss": 0.4926, "step": 31139 }, { "epoch": 0.8550247116968699, "grad_norm": 0.4172128140926361, "learning_rate": 1.228213510283576e-05, "loss": 0.4978, "step": 31140 }, { "epoch": 0.8550521691378363, "grad_norm": 0.6619917750358582, "learning_rate": 1.228171460448652e-05, "loss": 0.5456, "step": 31141 }, { "epoch": 0.8550796265788029, "grad_norm": 0.41447141766548157, "learning_rate": 1.2281294101881147e-05, "loss": 0.5437, "step": 31142 }, { "epoch": 0.8551070840197693, "grad_norm": 0.44310644268989563, "learning_rate": 1.2280873595020438e-05, "loss": 0.4949, "step": 31143 }, { "epoch": 0.8551345414607359, "grad_norm": 0.38525375723838806, "learning_rate": 1.2280453083905171e-05, "loss": 0.4805, "step": 31144 }, { "epoch": 0.8551619989017024, "grad_norm": 0.36892151832580566, "learning_rate": 1.2280032568536126e-05, "loss": 0.4585, "step": 31145 }, { "epoch": 0.8551894563426689, "grad_norm": 0.3476870656013489, "learning_rate": 1.2279612048914097e-05, "loss": 0.4793, "step": 31146 }, { "epoch": 0.8552169137836354, "grad_norm": 0.4255286455154419, "learning_rate": 1.2279191525039864e-05, "loss": 0.5564, "step": 31147 }, { "epoch": 0.8552443712246018, "grad_norm": 0.45398518443107605, "learning_rate": 1.227877099691421e-05, "loss": 0.5771, "step": 31148 }, { "epoch": 0.8552718286655684, "grad_norm": 0.4463924169540405, "learning_rate": 1.2278350464537923e-05, "loss": 0.5205, "step": 31149 }, { "epoch": 0.8552992861065348, "grad_norm": 0.4087491035461426, "learning_rate": 1.2277929927911782e-05, "loss": 0.5005, "step": 31150 }, { "epoch": 0.8553267435475014, "grad_norm": 0.3750789761543274, "learning_rate": 1.2277509387036577e-05, "loss": 0.4745, "step": 31151 }, { "epoch": 0.8553542009884679, "grad_norm": 0.3907051086425781, "learning_rate": 1.2277088841913089e-05, "loss": 0.4281, "step": 31152 }, { "epoch": 0.8553816584294344, "grad_norm": 0.35830461978912354, "learning_rate": 1.2276668292542105e-05, "loss": 0.5077, "step": 31153 }, { "epoch": 0.8554091158704009, "grad_norm": 0.40933358669281006, "learning_rate": 1.2276247738924407e-05, "loss": 0.5135, "step": 31154 }, { "epoch": 0.8554365733113674, "grad_norm": 0.3851228356361389, "learning_rate": 1.227582718106078e-05, "loss": 0.4432, "step": 31155 }, { "epoch": 0.8554640307523339, "grad_norm": 0.44919613003730774, "learning_rate": 1.227540661895201e-05, "loss": 0.5504, "step": 31156 }, { "epoch": 0.8554914881933003, "grad_norm": 0.5030062198638916, "learning_rate": 1.2274986052598881e-05, "loss": 0.56, "step": 31157 }, { "epoch": 0.8555189456342669, "grad_norm": 0.36782172322273254, "learning_rate": 1.2274565482002174e-05, "loss": 0.4718, "step": 31158 }, { "epoch": 0.8555464030752334, "grad_norm": 0.38524407148361206, "learning_rate": 1.227414490716268e-05, "loss": 0.4079, "step": 31159 }, { "epoch": 0.8555738605161999, "grad_norm": 0.41931745409965515, "learning_rate": 1.2273724328081176e-05, "loss": 0.5874, "step": 31160 }, { "epoch": 0.8556013179571664, "grad_norm": 0.4634385406970978, "learning_rate": 1.2273303744758454e-05, "loss": 0.5462, "step": 31161 }, { "epoch": 0.8556287753981329, "grad_norm": 0.4506748914718628, "learning_rate": 1.2272883157195292e-05, "loss": 0.5103, "step": 31162 }, { "epoch": 0.8556562328390994, "grad_norm": 0.41302892565727234, "learning_rate": 1.2272462565392477e-05, "loss": 0.5023, "step": 31163 }, { "epoch": 0.8556836902800659, "grad_norm": 0.37175488471984863, "learning_rate": 1.22720419693508e-05, "loss": 0.4691, "step": 31164 }, { "epoch": 0.8557111477210324, "grad_norm": 0.3931663930416107, "learning_rate": 1.2271621369071034e-05, "loss": 0.6153, "step": 31165 }, { "epoch": 0.855738605161999, "grad_norm": 0.35619550943374634, "learning_rate": 1.227120076455397e-05, "loss": 0.4739, "step": 31166 }, { "epoch": 0.8557660626029654, "grad_norm": 0.3666619062423706, "learning_rate": 1.2270780155800392e-05, "loss": 0.5373, "step": 31167 }, { "epoch": 0.8557935200439319, "grad_norm": 0.37692150473594666, "learning_rate": 1.2270359542811086e-05, "loss": 0.5023, "step": 31168 }, { "epoch": 0.8558209774848984, "grad_norm": 0.3557114601135254, "learning_rate": 1.2269938925586833e-05, "loss": 0.4497, "step": 31169 }, { "epoch": 0.8558484349258649, "grad_norm": 0.5110921859741211, "learning_rate": 1.2269518304128419e-05, "loss": 0.5101, "step": 31170 }, { "epoch": 0.8558758923668314, "grad_norm": 0.4081737995147705, "learning_rate": 1.226909767843663e-05, "loss": 0.4602, "step": 31171 }, { "epoch": 0.8559033498077979, "grad_norm": 0.3897005319595337, "learning_rate": 1.2268677048512252e-05, "loss": 0.4427, "step": 31172 }, { "epoch": 0.8559308072487645, "grad_norm": 0.33740636706352234, "learning_rate": 1.2268256414356063e-05, "loss": 0.4621, "step": 31173 }, { "epoch": 0.8559582646897309, "grad_norm": 0.36769115924835205, "learning_rate": 1.2267835775968855e-05, "loss": 0.4925, "step": 31174 }, { "epoch": 0.8559857221306975, "grad_norm": 0.39439424872398376, "learning_rate": 1.2267415133351409e-05, "loss": 0.4972, "step": 31175 }, { "epoch": 0.8560131795716639, "grad_norm": 0.4376014471054077, "learning_rate": 1.2266994486504507e-05, "loss": 0.5121, "step": 31176 }, { "epoch": 0.8560406370126304, "grad_norm": 0.3644297122955322, "learning_rate": 1.2266573835428938e-05, "loss": 0.4783, "step": 31177 }, { "epoch": 0.8560680944535969, "grad_norm": 0.4137565791606903, "learning_rate": 1.2266153180125486e-05, "loss": 0.43, "step": 31178 }, { "epoch": 0.8560955518945634, "grad_norm": 0.36673516035079956, "learning_rate": 1.2265732520594938e-05, "loss": 0.4023, "step": 31179 }, { "epoch": 0.85612300933553, "grad_norm": 0.4770660102367401, "learning_rate": 1.2265311856838073e-05, "loss": 0.4638, "step": 31180 }, { "epoch": 0.8561504667764964, "grad_norm": 0.4561827480792999, "learning_rate": 1.2264891188855676e-05, "loss": 0.4489, "step": 31181 }, { "epoch": 0.856177924217463, "grad_norm": 0.33011594414711, "learning_rate": 1.226447051664854e-05, "loss": 0.4689, "step": 31182 }, { "epoch": 0.8562053816584294, "grad_norm": 0.3626307249069214, "learning_rate": 1.2264049840217438e-05, "loss": 0.4255, "step": 31183 }, { "epoch": 0.856232839099396, "grad_norm": 0.42238473892211914, "learning_rate": 1.2263629159563162e-05, "loss": 0.4437, "step": 31184 }, { "epoch": 0.8562602965403624, "grad_norm": 0.41838133335113525, "learning_rate": 1.2263208474686498e-05, "loss": 0.4888, "step": 31185 }, { "epoch": 0.856287753981329, "grad_norm": 0.35725274682044983, "learning_rate": 1.2262787785588223e-05, "loss": 0.4731, "step": 31186 }, { "epoch": 0.8563152114222955, "grad_norm": 0.46352657675743103, "learning_rate": 1.2262367092269131e-05, "loss": 0.6084, "step": 31187 }, { "epoch": 0.8563426688632619, "grad_norm": 0.45977890491485596, "learning_rate": 1.2261946394730002e-05, "loss": 0.5339, "step": 31188 }, { "epoch": 0.8563701263042285, "grad_norm": 0.4117679297924042, "learning_rate": 1.226152569297162e-05, "loss": 0.5067, "step": 31189 }, { "epoch": 0.8563975837451949, "grad_norm": 0.7090790867805481, "learning_rate": 1.226110498699477e-05, "loss": 0.5712, "step": 31190 }, { "epoch": 0.8564250411861615, "grad_norm": 0.5142337083816528, "learning_rate": 1.2260684276800237e-05, "loss": 0.5874, "step": 31191 }, { "epoch": 0.8564524986271279, "grad_norm": 0.42770060896873474, "learning_rate": 1.2260263562388808e-05, "loss": 0.5211, "step": 31192 }, { "epoch": 0.8564799560680945, "grad_norm": 0.39739400148391724, "learning_rate": 1.2259842843761265e-05, "loss": 0.5639, "step": 31193 }, { "epoch": 0.856507413509061, "grad_norm": 0.45155060291290283, "learning_rate": 1.2259422120918393e-05, "loss": 0.4859, "step": 31194 }, { "epoch": 0.8565348709500274, "grad_norm": 0.3719451129436493, "learning_rate": 1.2259001393860981e-05, "loss": 0.39, "step": 31195 }, { "epoch": 0.856562328390994, "grad_norm": 0.3901939392089844, "learning_rate": 1.2258580662589805e-05, "loss": 0.5585, "step": 31196 }, { "epoch": 0.8565897858319604, "grad_norm": 0.3525839149951935, "learning_rate": 1.2258159927105659e-05, "loss": 0.4557, "step": 31197 }, { "epoch": 0.856617243272927, "grad_norm": 0.3987579047679901, "learning_rate": 1.2257739187409326e-05, "loss": 0.4852, "step": 31198 }, { "epoch": 0.8566447007138934, "grad_norm": 0.37753841280937195, "learning_rate": 1.2257318443501585e-05, "loss": 0.5095, "step": 31199 }, { "epoch": 0.85667215815486, "grad_norm": 0.4072312116622925, "learning_rate": 1.2256897695383226e-05, "loss": 0.5053, "step": 31200 }, { "epoch": 0.8566996155958264, "grad_norm": 0.3973519504070282, "learning_rate": 1.2256476943055031e-05, "loss": 0.4955, "step": 31201 }, { "epoch": 0.856727073036793, "grad_norm": 0.4375503957271576, "learning_rate": 1.2256056186517789e-05, "loss": 0.5388, "step": 31202 }, { "epoch": 0.8567545304777595, "grad_norm": 0.3835250735282898, "learning_rate": 1.225563542577228e-05, "loss": 0.5234, "step": 31203 }, { "epoch": 0.856781987918726, "grad_norm": 1.0030382871627808, "learning_rate": 1.2255214660819292e-05, "loss": 0.4914, "step": 31204 }, { "epoch": 0.8568094453596925, "grad_norm": 0.40137964487075806, "learning_rate": 1.2254793891659609e-05, "loss": 0.5624, "step": 31205 }, { "epoch": 0.8568369028006589, "grad_norm": 0.36756718158721924, "learning_rate": 1.2254373118294018e-05, "loss": 0.5007, "step": 31206 }, { "epoch": 0.8568643602416255, "grad_norm": 0.3762566149234772, "learning_rate": 1.2253952340723296e-05, "loss": 0.488, "step": 31207 }, { "epoch": 0.8568918176825919, "grad_norm": 0.6128677129745483, "learning_rate": 1.225353155894824e-05, "loss": 0.503, "step": 31208 }, { "epoch": 0.8569192751235585, "grad_norm": 0.44457894563674927, "learning_rate": 1.2253110772969623e-05, "loss": 0.5079, "step": 31209 }, { "epoch": 0.856946732564525, "grad_norm": 0.3715469539165497, "learning_rate": 1.2252689982788238e-05, "loss": 0.4334, "step": 31210 }, { "epoch": 0.8569741900054915, "grad_norm": 0.45603036880493164, "learning_rate": 1.2252269188404866e-05, "loss": 0.4049, "step": 31211 }, { "epoch": 0.857001647446458, "grad_norm": 0.5046284794807434, "learning_rate": 1.2251848389820293e-05, "loss": 0.5554, "step": 31212 }, { "epoch": 0.8570291048874245, "grad_norm": 0.3989475667476654, "learning_rate": 1.2251427587035305e-05, "loss": 0.5416, "step": 31213 }, { "epoch": 0.857056562328391, "grad_norm": 0.43352210521698, "learning_rate": 1.2251006780050684e-05, "loss": 0.4883, "step": 31214 }, { "epoch": 0.8570840197693574, "grad_norm": 0.4373731315135956, "learning_rate": 1.2250585968867222e-05, "loss": 0.5661, "step": 31215 }, { "epoch": 0.857111477210324, "grad_norm": 0.4560258686542511, "learning_rate": 1.2250165153485696e-05, "loss": 0.5817, "step": 31216 }, { "epoch": 0.8571389346512905, "grad_norm": 0.399280309677124, "learning_rate": 1.2249744333906892e-05, "loss": 0.5201, "step": 31217 }, { "epoch": 0.857166392092257, "grad_norm": 0.40258800983428955, "learning_rate": 1.2249323510131599e-05, "loss": 0.5231, "step": 31218 }, { "epoch": 0.8571938495332235, "grad_norm": 0.37941837310791016, "learning_rate": 1.2248902682160597e-05, "loss": 0.4857, "step": 31219 }, { "epoch": 0.85722130697419, "grad_norm": 0.416501522064209, "learning_rate": 1.2248481849994675e-05, "loss": 0.4703, "step": 31220 }, { "epoch": 0.8572487644151565, "grad_norm": 0.3670039474964142, "learning_rate": 1.2248061013634619e-05, "loss": 0.4211, "step": 31221 }, { "epoch": 0.857276221856123, "grad_norm": 0.3630046248435974, "learning_rate": 1.2247640173081207e-05, "loss": 0.4684, "step": 31222 }, { "epoch": 0.8573036792970895, "grad_norm": 0.3955976366996765, "learning_rate": 1.2247219328335233e-05, "loss": 0.5305, "step": 31223 }, { "epoch": 0.857331136738056, "grad_norm": 0.403542697429657, "learning_rate": 1.2246798479397477e-05, "loss": 0.4957, "step": 31224 }, { "epoch": 0.8573585941790225, "grad_norm": 0.41247448325157166, "learning_rate": 1.2246377626268725e-05, "loss": 0.5191, "step": 31225 }, { "epoch": 0.857386051619989, "grad_norm": 0.44509851932525635, "learning_rate": 1.2245956768949761e-05, "loss": 0.4993, "step": 31226 }, { "epoch": 0.8574135090609555, "grad_norm": 0.363427996635437, "learning_rate": 1.2245535907441371e-05, "loss": 0.4672, "step": 31227 }, { "epoch": 0.857440966501922, "grad_norm": 0.42820101976394653, "learning_rate": 1.224511504174434e-05, "loss": 0.4713, "step": 31228 }, { "epoch": 0.8574684239428885, "grad_norm": 0.41293075680732727, "learning_rate": 1.224469417185945e-05, "loss": 0.474, "step": 31229 }, { "epoch": 0.857495881383855, "grad_norm": 0.391203373670578, "learning_rate": 1.2244273297787492e-05, "loss": 0.5106, "step": 31230 }, { "epoch": 0.8575233388248216, "grad_norm": 0.4602276384830475, "learning_rate": 1.2243852419529247e-05, "loss": 0.4738, "step": 31231 }, { "epoch": 0.857550796265788, "grad_norm": 0.39129823446273804, "learning_rate": 1.2243431537085501e-05, "loss": 0.3832, "step": 31232 }, { "epoch": 0.8575782537067546, "grad_norm": 0.4594971835613251, "learning_rate": 1.2243010650457041e-05, "loss": 0.5376, "step": 31233 }, { "epoch": 0.857605711147721, "grad_norm": 0.36841881275177, "learning_rate": 1.2242589759644648e-05, "loss": 0.438, "step": 31234 }, { "epoch": 0.8576331685886875, "grad_norm": 0.3976348340511322, "learning_rate": 1.2242168864649112e-05, "loss": 0.4897, "step": 31235 }, { "epoch": 0.857660626029654, "grad_norm": 0.3530547618865967, "learning_rate": 1.2241747965471211e-05, "loss": 0.4706, "step": 31236 }, { "epoch": 0.8576880834706205, "grad_norm": 0.3832060396671295, "learning_rate": 1.2241327062111738e-05, "loss": 0.4591, "step": 31237 }, { "epoch": 0.8577155409115871, "grad_norm": 0.44456860423088074, "learning_rate": 1.2240906154571475e-05, "loss": 0.5115, "step": 31238 }, { "epoch": 0.8577429983525535, "grad_norm": 0.38424327969551086, "learning_rate": 1.2240485242851205e-05, "loss": 0.4642, "step": 31239 }, { "epoch": 0.8577704557935201, "grad_norm": 0.40051573514938354, "learning_rate": 1.2240064326951717e-05, "loss": 0.5379, "step": 31240 }, { "epoch": 0.8577979132344865, "grad_norm": 0.40379762649536133, "learning_rate": 1.223964340687379e-05, "loss": 0.5596, "step": 31241 }, { "epoch": 0.8578253706754531, "grad_norm": 0.31290313601493835, "learning_rate": 1.2239222482618218e-05, "loss": 0.364, "step": 31242 }, { "epoch": 0.8578528281164195, "grad_norm": 0.34535205364227295, "learning_rate": 1.223880155418578e-05, "loss": 0.4865, "step": 31243 }, { "epoch": 0.857880285557386, "grad_norm": 0.5482646226882935, "learning_rate": 1.2238380621577261e-05, "loss": 0.4985, "step": 31244 }, { "epoch": 0.8579077429983526, "grad_norm": 0.5475465655326843, "learning_rate": 1.223795968479345e-05, "loss": 0.4205, "step": 31245 }, { "epoch": 0.857935200439319, "grad_norm": 0.40413138270378113, "learning_rate": 1.2237538743835127e-05, "loss": 0.4974, "step": 31246 }, { "epoch": 0.8579626578802856, "grad_norm": 0.379414826631546, "learning_rate": 1.2237117798703082e-05, "loss": 0.4697, "step": 31247 }, { "epoch": 0.857990115321252, "grad_norm": 0.36180299520492554, "learning_rate": 1.2236696849398097e-05, "loss": 0.4682, "step": 31248 }, { "epoch": 0.8580175727622186, "grad_norm": 0.389423131942749, "learning_rate": 1.2236275895920962e-05, "loss": 0.4728, "step": 31249 }, { "epoch": 0.858045030203185, "grad_norm": 0.4613798260688782, "learning_rate": 1.2235854938272456e-05, "loss": 0.5228, "step": 31250 }, { "epoch": 0.8580724876441516, "grad_norm": 0.41690778732299805, "learning_rate": 1.2235433976453366e-05, "loss": 0.6017, "step": 31251 }, { "epoch": 0.8580999450851181, "grad_norm": 0.4271576404571533, "learning_rate": 1.223501301046448e-05, "loss": 0.4387, "step": 31252 }, { "epoch": 0.8581274025260845, "grad_norm": 0.41105902194976807, "learning_rate": 1.2234592040306579e-05, "loss": 0.5235, "step": 31253 }, { "epoch": 0.8581548599670511, "grad_norm": 0.4484356641769409, "learning_rate": 1.2234171065980454e-05, "loss": 0.4543, "step": 31254 }, { "epoch": 0.8581823174080175, "grad_norm": 0.3563523292541504, "learning_rate": 1.2233750087486887e-05, "loss": 0.4927, "step": 31255 }, { "epoch": 0.8582097748489841, "grad_norm": 0.32308751344680786, "learning_rate": 1.223332910482666e-05, "loss": 0.4211, "step": 31256 }, { "epoch": 0.8582372322899505, "grad_norm": 0.3844435513019562, "learning_rate": 1.2232908118000564e-05, "loss": 0.5124, "step": 31257 }, { "epoch": 0.8582646897309171, "grad_norm": 0.42347580194473267, "learning_rate": 1.2232487127009378e-05, "loss": 0.5857, "step": 31258 }, { "epoch": 0.8582921471718836, "grad_norm": 0.40339529514312744, "learning_rate": 1.2232066131853895e-05, "loss": 0.5006, "step": 31259 }, { "epoch": 0.8583196046128501, "grad_norm": 0.41514450311660767, "learning_rate": 1.2231645132534894e-05, "loss": 0.5127, "step": 31260 }, { "epoch": 0.8583470620538166, "grad_norm": 0.3713267743587494, "learning_rate": 1.2231224129053164e-05, "loss": 0.4703, "step": 31261 }, { "epoch": 0.858374519494783, "grad_norm": 0.41198885440826416, "learning_rate": 1.223080312140949e-05, "loss": 0.481, "step": 31262 }, { "epoch": 0.8584019769357496, "grad_norm": 0.39830541610717773, "learning_rate": 1.223038210960465e-05, "loss": 0.5438, "step": 31263 }, { "epoch": 0.858429434376716, "grad_norm": 0.3979830741882324, "learning_rate": 1.2229961093639441e-05, "loss": 0.4577, "step": 31264 }, { "epoch": 0.8584568918176826, "grad_norm": 0.38529831171035767, "learning_rate": 1.2229540073514645e-05, "loss": 0.4845, "step": 31265 }, { "epoch": 0.8584843492586491, "grad_norm": 0.3619195222854614, "learning_rate": 1.222911904923104e-05, "loss": 0.4505, "step": 31266 }, { "epoch": 0.8585118066996156, "grad_norm": 0.44146260619163513, "learning_rate": 1.222869802078942e-05, "loss": 0.5437, "step": 31267 }, { "epoch": 0.8585392641405821, "grad_norm": 0.4120608866214752, "learning_rate": 1.2228276988190567e-05, "loss": 0.4967, "step": 31268 }, { "epoch": 0.8585667215815486, "grad_norm": 0.38189375400543213, "learning_rate": 1.222785595143526e-05, "loss": 0.5191, "step": 31269 }, { "epoch": 0.8585941790225151, "grad_norm": 0.3514060974121094, "learning_rate": 1.2227434910524298e-05, "loss": 0.4154, "step": 31270 }, { "epoch": 0.8586216364634816, "grad_norm": 0.38597914576530457, "learning_rate": 1.2227013865458456e-05, "loss": 0.4802, "step": 31271 }, { "epoch": 0.8586490939044481, "grad_norm": 0.4151039123535156, "learning_rate": 1.2226592816238523e-05, "loss": 0.5752, "step": 31272 }, { "epoch": 0.8586765513454147, "grad_norm": 0.38063693046569824, "learning_rate": 1.2226171762865285e-05, "loss": 0.4466, "step": 31273 }, { "epoch": 0.8587040087863811, "grad_norm": 0.4031619727611542, "learning_rate": 1.2225750705339521e-05, "loss": 0.4795, "step": 31274 }, { "epoch": 0.8587314662273476, "grad_norm": 0.4467563033103943, "learning_rate": 1.2225329643662028e-05, "loss": 0.4599, "step": 31275 }, { "epoch": 0.8587589236683141, "grad_norm": 0.41717079281806946, "learning_rate": 1.2224908577833578e-05, "loss": 0.4782, "step": 31276 }, { "epoch": 0.8587863811092806, "grad_norm": 0.3691525459289551, "learning_rate": 1.2224487507854969e-05, "loss": 0.472, "step": 31277 }, { "epoch": 0.8588138385502471, "grad_norm": 0.42044100165367126, "learning_rate": 1.222406643372698e-05, "loss": 0.5196, "step": 31278 }, { "epoch": 0.8588412959912136, "grad_norm": 0.44205033779144287, "learning_rate": 1.2223645355450396e-05, "loss": 0.5333, "step": 31279 }, { "epoch": 0.8588687534321802, "grad_norm": 0.4392353892326355, "learning_rate": 1.2223224273026004e-05, "loss": 0.493, "step": 31280 }, { "epoch": 0.8588962108731466, "grad_norm": 0.388270765542984, "learning_rate": 1.222280318645459e-05, "loss": 0.4187, "step": 31281 }, { "epoch": 0.8589236683141132, "grad_norm": 0.4832018315792084, "learning_rate": 1.2222382095736938e-05, "loss": 0.5345, "step": 31282 }, { "epoch": 0.8589511257550796, "grad_norm": 0.554000198841095, "learning_rate": 1.2221961000873834e-05, "loss": 0.502, "step": 31283 }, { "epoch": 0.8589785831960461, "grad_norm": 0.3446379601955414, "learning_rate": 1.2221539901866062e-05, "loss": 0.4467, "step": 31284 }, { "epoch": 0.8590060406370126, "grad_norm": 0.4168390929698944, "learning_rate": 1.2221118798714413e-05, "loss": 0.5244, "step": 31285 }, { "epoch": 0.8590334980779791, "grad_norm": 0.4153856933116913, "learning_rate": 1.2220697691419667e-05, "loss": 0.5098, "step": 31286 }, { "epoch": 0.8590609555189457, "grad_norm": 0.3888363838195801, "learning_rate": 1.2220276579982609e-05, "loss": 0.555, "step": 31287 }, { "epoch": 0.8590884129599121, "grad_norm": 0.371083527803421, "learning_rate": 1.221985546440403e-05, "loss": 0.5489, "step": 31288 }, { "epoch": 0.8591158704008787, "grad_norm": 0.3912227153778076, "learning_rate": 1.2219434344684706e-05, "loss": 0.4737, "step": 31289 }, { "epoch": 0.8591433278418451, "grad_norm": 0.41111046075820923, "learning_rate": 1.2219013220825433e-05, "loss": 0.4033, "step": 31290 }, { "epoch": 0.8591707852828117, "grad_norm": 0.4009310305118561, "learning_rate": 1.2218592092826993e-05, "loss": 0.4607, "step": 31291 }, { "epoch": 0.8591982427237781, "grad_norm": 0.46475574374198914, "learning_rate": 1.2218170960690168e-05, "loss": 0.5552, "step": 31292 }, { "epoch": 0.8592257001647446, "grad_norm": 0.47291719913482666, "learning_rate": 1.2217749824415749e-05, "loss": 0.471, "step": 31293 }, { "epoch": 0.8592531576057112, "grad_norm": 0.39113548398017883, "learning_rate": 1.2217328684004515e-05, "loss": 0.4596, "step": 31294 }, { "epoch": 0.8592806150466776, "grad_norm": 0.3748525381088257, "learning_rate": 1.2216907539457258e-05, "loss": 0.4591, "step": 31295 }, { "epoch": 0.8593080724876442, "grad_norm": 0.46413442492485046, "learning_rate": 1.2216486390774761e-05, "loss": 0.4306, "step": 31296 }, { "epoch": 0.8593355299286106, "grad_norm": 0.3811872899532318, "learning_rate": 1.2216065237957809e-05, "loss": 0.4727, "step": 31297 }, { "epoch": 0.8593629873695772, "grad_norm": 0.4518660306930542, "learning_rate": 1.2215644081007188e-05, "loss": 0.558, "step": 31298 }, { "epoch": 0.8593904448105436, "grad_norm": 0.36762717366218567, "learning_rate": 1.2215222919923685e-05, "loss": 0.5163, "step": 31299 }, { "epoch": 0.8594179022515102, "grad_norm": 0.3962073028087616, "learning_rate": 1.221480175470808e-05, "loss": 0.4161, "step": 31300 }, { "epoch": 0.8594453596924767, "grad_norm": 0.8189207911491394, "learning_rate": 1.2214380585361168e-05, "loss": 0.4844, "step": 31301 }, { "epoch": 0.8594728171334431, "grad_norm": 0.4133876860141754, "learning_rate": 1.2213959411883724e-05, "loss": 0.6187, "step": 31302 }, { "epoch": 0.8595002745744097, "grad_norm": 0.4427950084209442, "learning_rate": 1.2213538234276545e-05, "loss": 0.47, "step": 31303 }, { "epoch": 0.8595277320153761, "grad_norm": 0.3886093497276306, "learning_rate": 1.2213117052540408e-05, "loss": 0.4609, "step": 31304 }, { "epoch": 0.8595551894563427, "grad_norm": 0.38039782643318176, "learning_rate": 1.2212695866676101e-05, "loss": 0.4995, "step": 31305 }, { "epoch": 0.8595826468973091, "grad_norm": 0.3966236412525177, "learning_rate": 1.221227467668441e-05, "loss": 0.4827, "step": 31306 }, { "epoch": 0.8596101043382757, "grad_norm": 0.4804043769836426, "learning_rate": 1.2211853482566119e-05, "loss": 0.5205, "step": 31307 }, { "epoch": 0.8596375617792422, "grad_norm": 0.4277108609676361, "learning_rate": 1.2211432284322019e-05, "loss": 0.4637, "step": 31308 }, { "epoch": 0.8596650192202087, "grad_norm": 0.3633856177330017, "learning_rate": 1.2211011081952891e-05, "loss": 0.4768, "step": 31309 }, { "epoch": 0.8596924766611752, "grad_norm": 0.5255773067474365, "learning_rate": 1.221058987545952e-05, "loss": 0.5372, "step": 31310 }, { "epoch": 0.8597199341021416, "grad_norm": 0.4376133680343628, "learning_rate": 1.2210168664842694e-05, "loss": 0.5048, "step": 31311 }, { "epoch": 0.8597473915431082, "grad_norm": 0.4208686351776123, "learning_rate": 1.2209747450103198e-05, "loss": 0.5823, "step": 31312 }, { "epoch": 0.8597748489840746, "grad_norm": 0.4360350966453552, "learning_rate": 1.2209326231241819e-05, "loss": 0.468, "step": 31313 }, { "epoch": 0.8598023064250412, "grad_norm": 0.3907027840614319, "learning_rate": 1.220890500825934e-05, "loss": 0.5034, "step": 31314 }, { "epoch": 0.8598297638660077, "grad_norm": 0.407071053981781, "learning_rate": 1.2208483781156546e-05, "loss": 0.5227, "step": 31315 }, { "epoch": 0.8598572213069742, "grad_norm": 0.4122464656829834, "learning_rate": 1.220806254993423e-05, "loss": 0.4603, "step": 31316 }, { "epoch": 0.8598846787479407, "grad_norm": 0.3701207637786865, "learning_rate": 1.220764131459317e-05, "loss": 0.4747, "step": 31317 }, { "epoch": 0.8599121361889072, "grad_norm": 0.41987308859825134, "learning_rate": 1.2207220075134154e-05, "loss": 0.4214, "step": 31318 }, { "epoch": 0.8599395936298737, "grad_norm": 0.44811657071113586, "learning_rate": 1.220679883155797e-05, "loss": 0.5715, "step": 31319 }, { "epoch": 0.8599670510708401, "grad_norm": 0.4045025706291199, "learning_rate": 1.2206377583865399e-05, "loss": 0.4895, "step": 31320 }, { "epoch": 0.8599945085118067, "grad_norm": 0.3782580494880676, "learning_rate": 1.220595633205723e-05, "loss": 0.4271, "step": 31321 }, { "epoch": 0.8600219659527732, "grad_norm": 0.490177720785141, "learning_rate": 1.220553507613425e-05, "loss": 0.606, "step": 31322 }, { "epoch": 0.8600494233937397, "grad_norm": 0.359328031539917, "learning_rate": 1.2205113816097242e-05, "loss": 0.4521, "step": 31323 }, { "epoch": 0.8600768808347062, "grad_norm": 0.409210741519928, "learning_rate": 1.2204692551946996e-05, "loss": 0.5573, "step": 31324 }, { "epoch": 0.8601043382756727, "grad_norm": 0.36547163128852844, "learning_rate": 1.2204271283684289e-05, "loss": 0.4901, "step": 31325 }, { "epoch": 0.8601317957166392, "grad_norm": 0.3905768096446991, "learning_rate": 1.2203850011309916e-05, "loss": 0.5263, "step": 31326 }, { "epoch": 0.8601592531576057, "grad_norm": 0.36346250772476196, "learning_rate": 1.220342873482466e-05, "loss": 0.488, "step": 31327 }, { "epoch": 0.8601867105985722, "grad_norm": 0.3605482876300812, "learning_rate": 1.2203007454229305e-05, "loss": 0.5228, "step": 31328 }, { "epoch": 0.8602141680395388, "grad_norm": 0.4963330328464508, "learning_rate": 1.2202586169524638e-05, "loss": 0.528, "step": 31329 }, { "epoch": 0.8602416254805052, "grad_norm": 0.37144699692726135, "learning_rate": 1.2202164880711446e-05, "loss": 0.458, "step": 31330 }, { "epoch": 0.8602690829214718, "grad_norm": 0.3776478171348572, "learning_rate": 1.220174358779051e-05, "loss": 0.542, "step": 31331 }, { "epoch": 0.8602965403624382, "grad_norm": 0.3982198238372803, "learning_rate": 1.2201322290762624e-05, "loss": 0.4385, "step": 31332 }, { "epoch": 0.8603239978034047, "grad_norm": 0.3848790228366852, "learning_rate": 1.2200900989628566e-05, "loss": 0.4645, "step": 31333 }, { "epoch": 0.8603514552443712, "grad_norm": 0.4136074483394623, "learning_rate": 1.2200479684389129e-05, "loss": 0.5867, "step": 31334 }, { "epoch": 0.8603789126853377, "grad_norm": 0.3775768280029297, "learning_rate": 1.2200058375045092e-05, "loss": 0.4721, "step": 31335 }, { "epoch": 0.8604063701263043, "grad_norm": 0.3791636824607849, "learning_rate": 1.2199637061597244e-05, "loss": 0.4867, "step": 31336 }, { "epoch": 0.8604338275672707, "grad_norm": 0.3575994670391083, "learning_rate": 1.2199215744046373e-05, "loss": 0.4971, "step": 31337 }, { "epoch": 0.8604612850082373, "grad_norm": 0.44650033116340637, "learning_rate": 1.2198794422393258e-05, "loss": 0.4744, "step": 31338 }, { "epoch": 0.8604887424492037, "grad_norm": 0.3909582197666168, "learning_rate": 1.2198373096638697e-05, "loss": 0.5157, "step": 31339 }, { "epoch": 0.8605161998901703, "grad_norm": 0.4205089807510376, "learning_rate": 1.2197951766783462e-05, "loss": 0.5235, "step": 31340 }, { "epoch": 0.8605436573311367, "grad_norm": 0.437282532453537, "learning_rate": 1.2197530432828349e-05, "loss": 0.4998, "step": 31341 }, { "epoch": 0.8605711147721032, "grad_norm": 0.4647047221660614, "learning_rate": 1.219710909477414e-05, "loss": 0.5361, "step": 31342 }, { "epoch": 0.8605985722130698, "grad_norm": 0.37779924273490906, "learning_rate": 1.2196687752621619e-05, "loss": 0.5709, "step": 31343 }, { "epoch": 0.8606260296540362, "grad_norm": 0.4236941933631897, "learning_rate": 1.2196266406371575e-05, "loss": 0.4425, "step": 31344 }, { "epoch": 0.8606534870950028, "grad_norm": 0.3945387601852417, "learning_rate": 1.2195845056024796e-05, "loss": 0.4371, "step": 31345 }, { "epoch": 0.8606809445359692, "grad_norm": 0.37782523036003113, "learning_rate": 1.219542370158206e-05, "loss": 0.5279, "step": 31346 }, { "epoch": 0.8607084019769358, "grad_norm": 0.4053337574005127, "learning_rate": 1.2195002343044164e-05, "loss": 0.4945, "step": 31347 }, { "epoch": 0.8607358594179022, "grad_norm": 0.39608192443847656, "learning_rate": 1.2194580980411885e-05, "loss": 0.5008, "step": 31348 }, { "epoch": 0.8607633168588688, "grad_norm": 0.3938605487346649, "learning_rate": 1.2194159613686011e-05, "loss": 0.5555, "step": 31349 }, { "epoch": 0.8607907742998353, "grad_norm": 0.43662217259407043, "learning_rate": 1.2193738242867332e-05, "loss": 0.62, "step": 31350 }, { "epoch": 0.8608182317408017, "grad_norm": 0.41826504468917847, "learning_rate": 1.2193316867956629e-05, "loss": 0.4945, "step": 31351 }, { "epoch": 0.8608456891817683, "grad_norm": 0.40584608912467957, "learning_rate": 1.219289548895469e-05, "loss": 0.4378, "step": 31352 }, { "epoch": 0.8608731466227347, "grad_norm": 0.3724440932273865, "learning_rate": 1.2192474105862303e-05, "loss": 0.4985, "step": 31353 }, { "epoch": 0.8609006040637013, "grad_norm": 0.37889915704727173, "learning_rate": 1.2192052718680249e-05, "loss": 0.4969, "step": 31354 }, { "epoch": 0.8609280615046677, "grad_norm": 0.4117904305458069, "learning_rate": 1.2191631327409319e-05, "loss": 0.5228, "step": 31355 }, { "epoch": 0.8609555189456343, "grad_norm": 0.3804273307323456, "learning_rate": 1.2191209932050297e-05, "loss": 0.5143, "step": 31356 }, { "epoch": 0.8609829763866008, "grad_norm": 0.3504452407360077, "learning_rate": 1.2190788532603967e-05, "loss": 0.427, "step": 31357 }, { "epoch": 0.8610104338275673, "grad_norm": 0.7245714068412781, "learning_rate": 1.2190367129071122e-05, "loss": 0.3867, "step": 31358 }, { "epoch": 0.8610378912685338, "grad_norm": 0.38244062662124634, "learning_rate": 1.2189945721452538e-05, "loss": 0.4892, "step": 31359 }, { "epoch": 0.8610653487095002, "grad_norm": 0.4037846624851227, "learning_rate": 1.218952430974901e-05, "loss": 0.5373, "step": 31360 }, { "epoch": 0.8610928061504668, "grad_norm": 0.38630884885787964, "learning_rate": 1.2189102893961317e-05, "loss": 0.6379, "step": 31361 }, { "epoch": 0.8611202635914332, "grad_norm": 1.0298833847045898, "learning_rate": 1.2188681474090252e-05, "loss": 0.5322, "step": 31362 }, { "epoch": 0.8611477210323998, "grad_norm": 0.44588354229927063, "learning_rate": 1.2188260050136595e-05, "loss": 0.56, "step": 31363 }, { "epoch": 0.8611751784733663, "grad_norm": 0.35238221287727356, "learning_rate": 1.2187838622101133e-05, "loss": 0.5407, "step": 31364 }, { "epoch": 0.8612026359143328, "grad_norm": 0.4253321588039398, "learning_rate": 1.2187417189984658e-05, "loss": 0.5041, "step": 31365 }, { "epoch": 0.8612300933552993, "grad_norm": 0.32900747656822205, "learning_rate": 1.2186995753787949e-05, "loss": 0.4104, "step": 31366 }, { "epoch": 0.8612575507962658, "grad_norm": 0.38761138916015625, "learning_rate": 1.2186574313511797e-05, "loss": 0.4695, "step": 31367 }, { "epoch": 0.8612850082372323, "grad_norm": 0.3965902328491211, "learning_rate": 1.2186152869156984e-05, "loss": 0.4557, "step": 31368 }, { "epoch": 0.8613124656781987, "grad_norm": 0.3813440799713135, "learning_rate": 1.2185731420724298e-05, "loss": 0.4777, "step": 31369 }, { "epoch": 0.8613399231191653, "grad_norm": 0.433554083108902, "learning_rate": 1.2185309968214527e-05, "loss": 0.5805, "step": 31370 }, { "epoch": 0.8613673805601318, "grad_norm": 0.38253292441368103, "learning_rate": 1.2184888511628454e-05, "loss": 0.4606, "step": 31371 }, { "epoch": 0.8613948380010983, "grad_norm": 0.4328339993953705, "learning_rate": 1.2184467050966867e-05, "loss": 0.4754, "step": 31372 }, { "epoch": 0.8614222954420648, "grad_norm": 0.4179971516132355, "learning_rate": 1.2184045586230553e-05, "loss": 0.4852, "step": 31373 }, { "epoch": 0.8614497528830313, "grad_norm": 0.4774787127971649, "learning_rate": 1.2183624117420293e-05, "loss": 0.5838, "step": 31374 }, { "epoch": 0.8614772103239978, "grad_norm": 0.3898458480834961, "learning_rate": 1.2183202644536883e-05, "loss": 0.417, "step": 31375 }, { "epoch": 0.8615046677649643, "grad_norm": 0.3487231135368347, "learning_rate": 1.21827811675811e-05, "loss": 0.4454, "step": 31376 }, { "epoch": 0.8615321252059308, "grad_norm": 0.36818331480026245, "learning_rate": 1.2182359686553733e-05, "loss": 0.4492, "step": 31377 }, { "epoch": 0.8615595826468974, "grad_norm": 0.3885168433189392, "learning_rate": 1.218193820145557e-05, "loss": 0.5046, "step": 31378 }, { "epoch": 0.8615870400878638, "grad_norm": 0.36676642298698425, "learning_rate": 1.2181516712287396e-05, "loss": 0.474, "step": 31379 }, { "epoch": 0.8616144975288303, "grad_norm": 0.477789044380188, "learning_rate": 1.2181095219049993e-05, "loss": 0.5248, "step": 31380 }, { "epoch": 0.8616419549697968, "grad_norm": 0.4089867174625397, "learning_rate": 1.2180673721744157e-05, "loss": 0.5086, "step": 31381 }, { "epoch": 0.8616694124107633, "grad_norm": 0.38655516505241394, "learning_rate": 1.2180252220370666e-05, "loss": 0.5095, "step": 31382 }, { "epoch": 0.8616968698517298, "grad_norm": 0.38183602690696716, "learning_rate": 1.2179830714930309e-05, "loss": 0.4519, "step": 31383 }, { "epoch": 0.8617243272926963, "grad_norm": 0.4732474088668823, "learning_rate": 1.2179409205423874e-05, "loss": 0.4665, "step": 31384 }, { "epoch": 0.8617517847336629, "grad_norm": 0.4348851144313812, "learning_rate": 1.217898769185214e-05, "loss": 0.5202, "step": 31385 }, { "epoch": 0.8617792421746293, "grad_norm": 0.3864341974258423, "learning_rate": 1.2178566174215903e-05, "loss": 0.5081, "step": 31386 }, { "epoch": 0.8618066996155959, "grad_norm": 0.8861841559410095, "learning_rate": 1.2178144652515942e-05, "loss": 0.6036, "step": 31387 }, { "epoch": 0.8618341570565623, "grad_norm": 0.3373664617538452, "learning_rate": 1.2177723126753048e-05, "loss": 0.5018, "step": 31388 }, { "epoch": 0.8618616144975288, "grad_norm": 0.3849511742591858, "learning_rate": 1.2177301596928005e-05, "loss": 0.46, "step": 31389 }, { "epoch": 0.8618890719384953, "grad_norm": 0.39529773592948914, "learning_rate": 1.2176880063041598e-05, "loss": 0.4532, "step": 31390 }, { "epoch": 0.8619165293794618, "grad_norm": 0.3211962878704071, "learning_rate": 1.2176458525094617e-05, "loss": 0.4742, "step": 31391 }, { "epoch": 0.8619439868204284, "grad_norm": 0.43557047843933105, "learning_rate": 1.2176036983087847e-05, "loss": 0.5279, "step": 31392 }, { "epoch": 0.8619714442613948, "grad_norm": 0.44129088521003723, "learning_rate": 1.217561543702207e-05, "loss": 0.555, "step": 31393 }, { "epoch": 0.8619989017023614, "grad_norm": 0.40078625082969666, "learning_rate": 1.2175193886898078e-05, "loss": 0.4874, "step": 31394 }, { "epoch": 0.8620263591433278, "grad_norm": 0.3765333592891693, "learning_rate": 1.2174772332716653e-05, "loss": 0.4652, "step": 31395 }, { "epoch": 0.8620538165842944, "grad_norm": 0.4323311150074005, "learning_rate": 1.2174350774478587e-05, "loss": 0.4366, "step": 31396 }, { "epoch": 0.8620812740252608, "grad_norm": 0.378772109746933, "learning_rate": 1.2173929212184663e-05, "loss": 0.5, "step": 31397 }, { "epoch": 0.8621087314662274, "grad_norm": 0.36059001088142395, "learning_rate": 1.2173507645835663e-05, "loss": 0.4136, "step": 31398 }, { "epoch": 0.8621361889071939, "grad_norm": 0.37629786133766174, "learning_rate": 1.217308607543238e-05, "loss": 0.5004, "step": 31399 }, { "epoch": 0.8621636463481603, "grad_norm": 0.41888654232025146, "learning_rate": 1.2172664500975598e-05, "loss": 0.4398, "step": 31400 }, { "epoch": 0.8621911037891269, "grad_norm": 0.44094330072402954, "learning_rate": 1.2172242922466105e-05, "loss": 0.5817, "step": 31401 }, { "epoch": 0.8622185612300933, "grad_norm": 0.43093687295913696, "learning_rate": 1.2171821339904684e-05, "loss": 0.5633, "step": 31402 }, { "epoch": 0.8622460186710599, "grad_norm": 0.3847319483757019, "learning_rate": 1.2171399753292122e-05, "loss": 0.4926, "step": 31403 }, { "epoch": 0.8622734761120263, "grad_norm": 0.3456861972808838, "learning_rate": 1.2170978162629208e-05, "loss": 0.482, "step": 31404 }, { "epoch": 0.8623009335529929, "grad_norm": 0.4453994333744049, "learning_rate": 1.2170556567916726e-05, "loss": 0.4596, "step": 31405 }, { "epoch": 0.8623283909939594, "grad_norm": 0.43211716413497925, "learning_rate": 1.2170134969155463e-05, "loss": 0.5339, "step": 31406 }, { "epoch": 0.8623558484349259, "grad_norm": 0.5397331714630127, "learning_rate": 1.2169713366346205e-05, "loss": 0.4743, "step": 31407 }, { "epoch": 0.8623833058758924, "grad_norm": 0.40424805879592896, "learning_rate": 1.216929175948974e-05, "loss": 0.4974, "step": 31408 }, { "epoch": 0.8624107633168588, "grad_norm": 0.4279015064239502, "learning_rate": 1.2168870148586853e-05, "loss": 0.5002, "step": 31409 }, { "epoch": 0.8624382207578254, "grad_norm": 0.38152948021888733, "learning_rate": 1.216844853363833e-05, "loss": 0.4829, "step": 31410 }, { "epoch": 0.8624656781987918, "grad_norm": 0.3795414865016937, "learning_rate": 1.216802691464496e-05, "loss": 0.4102, "step": 31411 }, { "epoch": 0.8624931356397584, "grad_norm": 0.357448548078537, "learning_rate": 1.216760529160753e-05, "loss": 0.4431, "step": 31412 }, { "epoch": 0.8625205930807249, "grad_norm": 0.47998157143592834, "learning_rate": 1.216718366452682e-05, "loss": 0.508, "step": 31413 }, { "epoch": 0.8625480505216914, "grad_norm": 0.37344714999198914, "learning_rate": 1.2166762033403625e-05, "loss": 0.4725, "step": 31414 }, { "epoch": 0.8625755079626579, "grad_norm": 0.4621744751930237, "learning_rate": 1.2166340398238724e-05, "loss": 0.6038, "step": 31415 }, { "epoch": 0.8626029654036244, "grad_norm": 0.40091249346733093, "learning_rate": 1.2165918759032908e-05, "loss": 0.5908, "step": 31416 }, { "epoch": 0.8626304228445909, "grad_norm": 0.3997773230075836, "learning_rate": 1.2165497115786962e-05, "loss": 0.4863, "step": 31417 }, { "epoch": 0.8626578802855573, "grad_norm": 0.42403993010520935, "learning_rate": 1.2165075468501673e-05, "loss": 0.5829, "step": 31418 }, { "epoch": 0.8626853377265239, "grad_norm": 0.4686851501464844, "learning_rate": 1.2164653817177828e-05, "loss": 0.556, "step": 31419 }, { "epoch": 0.8627127951674904, "grad_norm": 0.3703324794769287, "learning_rate": 1.2164232161816209e-05, "loss": 0.4879, "step": 31420 }, { "epoch": 0.8627402526084569, "grad_norm": 0.3716850280761719, "learning_rate": 1.2163810502417612e-05, "loss": 0.49, "step": 31421 }, { "epoch": 0.8627677100494234, "grad_norm": 0.35182061791419983, "learning_rate": 1.2163388838982812e-05, "loss": 0.4717, "step": 31422 }, { "epoch": 0.8627951674903899, "grad_norm": 0.39828890562057495, "learning_rate": 1.2162967171512605e-05, "loss": 0.5181, "step": 31423 }, { "epoch": 0.8628226249313564, "grad_norm": 0.3596118688583374, "learning_rate": 1.2162545500007774e-05, "loss": 0.4178, "step": 31424 }, { "epoch": 0.8628500823723229, "grad_norm": 0.3950846791267395, "learning_rate": 1.2162123824469104e-05, "loss": 0.5353, "step": 31425 }, { "epoch": 0.8628775398132894, "grad_norm": 0.3694448471069336, "learning_rate": 1.2161702144897384e-05, "loss": 0.4533, "step": 31426 }, { "epoch": 0.862904997254256, "grad_norm": 0.3870159387588501, "learning_rate": 1.21612804612934e-05, "loss": 0.4242, "step": 31427 }, { "epoch": 0.8629324546952224, "grad_norm": 0.41604816913604736, "learning_rate": 1.2160858773657935e-05, "loss": 0.553, "step": 31428 }, { "epoch": 0.862959912136189, "grad_norm": 0.3952045738697052, "learning_rate": 1.216043708199178e-05, "loss": 0.4611, "step": 31429 }, { "epoch": 0.8629873695771554, "grad_norm": 0.43566423654556274, "learning_rate": 1.2160015386295723e-05, "loss": 0.5012, "step": 31430 }, { "epoch": 0.8630148270181219, "grad_norm": 0.4323706328868866, "learning_rate": 1.2159593686570548e-05, "loss": 0.5523, "step": 31431 }, { "epoch": 0.8630422844590884, "grad_norm": 0.4292236864566803, "learning_rate": 1.2159171982817038e-05, "loss": 0.6514, "step": 31432 }, { "epoch": 0.8630697419000549, "grad_norm": 0.4094109535217285, "learning_rate": 1.2158750275035987e-05, "loss": 0.4716, "step": 31433 }, { "epoch": 0.8630971993410215, "grad_norm": 0.3770277202129364, "learning_rate": 1.2158328563228172e-05, "loss": 0.461, "step": 31434 }, { "epoch": 0.8631246567819879, "grad_norm": 0.3735966980457306, "learning_rate": 1.2157906847394389e-05, "loss": 0.5388, "step": 31435 }, { "epoch": 0.8631521142229545, "grad_norm": 0.5216818451881409, "learning_rate": 1.2157485127535424e-05, "loss": 0.5796, "step": 31436 }, { "epoch": 0.8631795716639209, "grad_norm": 0.4204326868057251, "learning_rate": 1.2157063403652056e-05, "loss": 0.5361, "step": 31437 }, { "epoch": 0.8632070291048874, "grad_norm": 0.42258837819099426, "learning_rate": 1.2156641675745078e-05, "loss": 0.547, "step": 31438 }, { "epoch": 0.8632344865458539, "grad_norm": 0.5095781087875366, "learning_rate": 1.2156219943815275e-05, "loss": 0.4845, "step": 31439 }, { "epoch": 0.8632619439868204, "grad_norm": 0.3613291084766388, "learning_rate": 1.2155798207863435e-05, "loss": 0.4895, "step": 31440 }, { "epoch": 0.863289401427787, "grad_norm": 0.38864246010780334, "learning_rate": 1.2155376467890341e-05, "loss": 0.5144, "step": 31441 }, { "epoch": 0.8633168588687534, "grad_norm": 0.4082963168621063, "learning_rate": 1.2154954723896782e-05, "loss": 0.5443, "step": 31442 }, { "epoch": 0.86334431630972, "grad_norm": 0.3520066440105438, "learning_rate": 1.2154532975883546e-05, "loss": 0.4908, "step": 31443 }, { "epoch": 0.8633717737506864, "grad_norm": 0.4259317219257355, "learning_rate": 1.2154111223851417e-05, "loss": 0.4568, "step": 31444 }, { "epoch": 0.863399231191653, "grad_norm": 0.5068135857582092, "learning_rate": 1.2153689467801184e-05, "loss": 0.5592, "step": 31445 }, { "epoch": 0.8634266886326194, "grad_norm": 0.37814968824386597, "learning_rate": 1.2153267707733633e-05, "loss": 0.3706, "step": 31446 }, { "epoch": 0.863454146073586, "grad_norm": 0.41941264271736145, "learning_rate": 1.215284594364955e-05, "loss": 0.4669, "step": 31447 }, { "epoch": 0.8634816035145525, "grad_norm": 0.44661736488342285, "learning_rate": 1.2152424175549721e-05, "loss": 0.4946, "step": 31448 }, { "epoch": 0.8635090609555189, "grad_norm": 0.37812310457229614, "learning_rate": 1.2152002403434936e-05, "loss": 0.5358, "step": 31449 }, { "epoch": 0.8635365183964855, "grad_norm": 0.4027478098869324, "learning_rate": 1.215158062730598e-05, "loss": 0.4536, "step": 31450 }, { "epoch": 0.8635639758374519, "grad_norm": 0.42484593391418457, "learning_rate": 1.2151158847163638e-05, "loss": 0.4465, "step": 31451 }, { "epoch": 0.8635914332784185, "grad_norm": 0.36470314860343933, "learning_rate": 1.2150737063008697e-05, "loss": 0.3849, "step": 31452 }, { "epoch": 0.8636188907193849, "grad_norm": 0.3893754184246063, "learning_rate": 1.2150315274841946e-05, "loss": 0.5107, "step": 31453 }, { "epoch": 0.8636463481603515, "grad_norm": 0.5707032680511475, "learning_rate": 1.2149893482664172e-05, "loss": 0.5775, "step": 31454 }, { "epoch": 0.863673805601318, "grad_norm": 0.4218432605266571, "learning_rate": 1.214947168647616e-05, "loss": 0.4253, "step": 31455 }, { "epoch": 0.8637012630422845, "grad_norm": 0.38254213333129883, "learning_rate": 1.2149049886278697e-05, "loss": 0.5186, "step": 31456 }, { "epoch": 0.863728720483251, "grad_norm": 0.3997059166431427, "learning_rate": 1.2148628082072569e-05, "loss": 0.5987, "step": 31457 }, { "epoch": 0.8637561779242174, "grad_norm": 0.4094833433628082, "learning_rate": 1.2148206273858566e-05, "loss": 0.4981, "step": 31458 }, { "epoch": 0.863783635365184, "grad_norm": 0.5090482831001282, "learning_rate": 1.2147784461637471e-05, "loss": 0.5123, "step": 31459 }, { "epoch": 0.8638110928061504, "grad_norm": 0.37398484349250793, "learning_rate": 1.2147362645410072e-05, "loss": 0.438, "step": 31460 }, { "epoch": 0.863838550247117, "grad_norm": 0.3715219795703888, "learning_rate": 1.2146940825177158e-05, "loss": 0.4766, "step": 31461 }, { "epoch": 0.8638660076880835, "grad_norm": 0.41074997186660767, "learning_rate": 1.2146519000939512e-05, "loss": 0.4347, "step": 31462 }, { "epoch": 0.86389346512905, "grad_norm": 0.3914940655231476, "learning_rate": 1.2146097172697926e-05, "loss": 0.4674, "step": 31463 }, { "epoch": 0.8639209225700165, "grad_norm": 0.7171555161476135, "learning_rate": 1.2145675340453182e-05, "loss": 0.4632, "step": 31464 }, { "epoch": 0.863948380010983, "grad_norm": 0.3693390488624573, "learning_rate": 1.2145253504206069e-05, "loss": 0.4087, "step": 31465 }, { "epoch": 0.8639758374519495, "grad_norm": 0.3633575737476349, "learning_rate": 1.2144831663957373e-05, "loss": 0.4664, "step": 31466 }, { "epoch": 0.8640032948929159, "grad_norm": 0.42450740933418274, "learning_rate": 1.2144409819707881e-05, "loss": 0.5124, "step": 31467 }, { "epoch": 0.8640307523338825, "grad_norm": 0.4466194808483124, "learning_rate": 1.2143987971458383e-05, "loss": 0.479, "step": 31468 }, { "epoch": 0.8640582097748489, "grad_norm": 0.4256153404712677, "learning_rate": 1.2143566119209662e-05, "loss": 0.4286, "step": 31469 }, { "epoch": 0.8640856672158155, "grad_norm": 0.33586785197257996, "learning_rate": 1.2143144262962503e-05, "loss": 0.4501, "step": 31470 }, { "epoch": 0.864113124656782, "grad_norm": 0.38954028487205505, "learning_rate": 1.21427224027177e-05, "loss": 0.4896, "step": 31471 }, { "epoch": 0.8641405820977485, "grad_norm": 0.3689498007297516, "learning_rate": 1.2142300538476035e-05, "loss": 0.4662, "step": 31472 }, { "epoch": 0.864168039538715, "grad_norm": 0.4973452389240265, "learning_rate": 1.2141878670238292e-05, "loss": 0.4828, "step": 31473 }, { "epoch": 0.8641954969796815, "grad_norm": 0.4002431035041809, "learning_rate": 1.2141456798005266e-05, "loss": 0.5521, "step": 31474 }, { "epoch": 0.864222954420648, "grad_norm": 0.4470595419406891, "learning_rate": 1.2141034921777737e-05, "loss": 0.5132, "step": 31475 }, { "epoch": 0.8642504118616144, "grad_norm": 0.37496358156204224, "learning_rate": 1.2140613041556498e-05, "loss": 0.525, "step": 31476 }, { "epoch": 0.864277869302581, "grad_norm": 0.35777318477630615, "learning_rate": 1.214019115734233e-05, "loss": 0.5076, "step": 31477 }, { "epoch": 0.8643053267435475, "grad_norm": 0.4308715760707855, "learning_rate": 1.2139769269136022e-05, "loss": 0.634, "step": 31478 }, { "epoch": 0.864332784184514, "grad_norm": 0.37373873591423035, "learning_rate": 1.2139347376938362e-05, "loss": 0.4654, "step": 31479 }, { "epoch": 0.8643602416254805, "grad_norm": 0.4608699381351471, "learning_rate": 1.2138925480750134e-05, "loss": 0.5339, "step": 31480 }, { "epoch": 0.864387699066447, "grad_norm": 0.400287926197052, "learning_rate": 1.2138503580572129e-05, "loss": 0.5658, "step": 31481 }, { "epoch": 0.8644151565074135, "grad_norm": 0.3666137456893921, "learning_rate": 1.2138081676405136e-05, "loss": 0.423, "step": 31482 }, { "epoch": 0.86444261394838, "grad_norm": 0.4233371615409851, "learning_rate": 1.2137659768249932e-05, "loss": 0.5233, "step": 31483 }, { "epoch": 0.8644700713893465, "grad_norm": 0.4288518726825714, "learning_rate": 1.2137237856107315e-05, "loss": 0.4913, "step": 31484 }, { "epoch": 0.8644975288303131, "grad_norm": 0.3871913552284241, "learning_rate": 1.2136815939978065e-05, "loss": 0.5189, "step": 31485 }, { "epoch": 0.8645249862712795, "grad_norm": 0.3665926158428192, "learning_rate": 1.2136394019862972e-05, "loss": 0.5191, "step": 31486 }, { "epoch": 0.864552443712246, "grad_norm": 0.3975391089916229, "learning_rate": 1.2135972095762823e-05, "loss": 0.3726, "step": 31487 }, { "epoch": 0.8645799011532125, "grad_norm": 0.3710477352142334, "learning_rate": 1.2135550167678403e-05, "loss": 0.4698, "step": 31488 }, { "epoch": 0.864607358594179, "grad_norm": 0.38806837797164917, "learning_rate": 1.21351282356105e-05, "loss": 0.4098, "step": 31489 }, { "epoch": 0.8646348160351455, "grad_norm": 0.3590100407600403, "learning_rate": 1.2134706299559904e-05, "loss": 0.4222, "step": 31490 }, { "epoch": 0.864662273476112, "grad_norm": 0.4225962162017822, "learning_rate": 1.2134284359527398e-05, "loss": 0.5022, "step": 31491 }, { "epoch": 0.8646897309170786, "grad_norm": 0.39680561423301697, "learning_rate": 1.2133862415513771e-05, "loss": 0.4953, "step": 31492 }, { "epoch": 0.864717188358045, "grad_norm": 0.3813709318637848, "learning_rate": 1.2133440467519806e-05, "loss": 0.4481, "step": 31493 }, { "epoch": 0.8647446457990116, "grad_norm": 0.39235228300094604, "learning_rate": 1.2133018515546298e-05, "loss": 0.5381, "step": 31494 }, { "epoch": 0.864772103239978, "grad_norm": 0.41417133808135986, "learning_rate": 1.2132596559594028e-05, "loss": 0.4217, "step": 31495 }, { "epoch": 0.8647995606809445, "grad_norm": 0.36216551065444946, "learning_rate": 1.2132174599663785e-05, "loss": 0.3984, "step": 31496 }, { "epoch": 0.864827018121911, "grad_norm": 0.4111325442790985, "learning_rate": 1.2131752635756355e-05, "loss": 0.5119, "step": 31497 }, { "epoch": 0.8648544755628775, "grad_norm": 0.7796735763549805, "learning_rate": 1.2131330667872525e-05, "loss": 0.6074, "step": 31498 }, { "epoch": 0.8648819330038441, "grad_norm": 0.37634190917015076, "learning_rate": 1.2130908696013086e-05, "loss": 0.4669, "step": 31499 }, { "epoch": 0.8649093904448105, "grad_norm": 0.38186725974082947, "learning_rate": 1.2130486720178822e-05, "loss": 0.4915, "step": 31500 }, { "epoch": 0.8649368478857771, "grad_norm": 0.3454498052597046, "learning_rate": 1.2130064740370517e-05, "loss": 0.3978, "step": 31501 }, { "epoch": 0.8649643053267435, "grad_norm": 0.42750146985054016, "learning_rate": 1.2129642756588964e-05, "loss": 0.5078, "step": 31502 }, { "epoch": 0.8649917627677101, "grad_norm": 0.3775571286678314, "learning_rate": 1.2129220768834949e-05, "loss": 0.5707, "step": 31503 }, { "epoch": 0.8650192202086765, "grad_norm": 0.416988343000412, "learning_rate": 1.2128798777109254e-05, "loss": 0.5396, "step": 31504 }, { "epoch": 0.865046677649643, "grad_norm": 0.39961788058280945, "learning_rate": 1.2128376781412671e-05, "loss": 0.4827, "step": 31505 }, { "epoch": 0.8650741350906096, "grad_norm": 0.378293514251709, "learning_rate": 1.2127954781745988e-05, "loss": 0.4811, "step": 31506 }, { "epoch": 0.865101592531576, "grad_norm": 0.3830195963382721, "learning_rate": 1.212753277810999e-05, "loss": 0.5018, "step": 31507 }, { "epoch": 0.8651290499725426, "grad_norm": 0.4913994073867798, "learning_rate": 1.2127110770505464e-05, "loss": 0.5553, "step": 31508 }, { "epoch": 0.865156507413509, "grad_norm": 0.3871981203556061, "learning_rate": 1.2126688758933194e-05, "loss": 0.4635, "step": 31509 }, { "epoch": 0.8651839648544756, "grad_norm": 0.45018842816352844, "learning_rate": 1.2126266743393975e-05, "loss": 0.5197, "step": 31510 }, { "epoch": 0.865211422295442, "grad_norm": 0.3617717921733856, "learning_rate": 1.2125844723888587e-05, "loss": 0.5081, "step": 31511 }, { "epoch": 0.8652388797364086, "grad_norm": 0.3449403941631317, "learning_rate": 1.2125422700417822e-05, "loss": 0.4334, "step": 31512 }, { "epoch": 0.8652663371773751, "grad_norm": 0.39613163471221924, "learning_rate": 1.2125000672982466e-05, "loss": 0.5539, "step": 31513 }, { "epoch": 0.8652937946183415, "grad_norm": 0.4431510269641876, "learning_rate": 1.2124578641583302e-05, "loss": 0.4888, "step": 31514 }, { "epoch": 0.8653212520593081, "grad_norm": 0.4131149649620056, "learning_rate": 1.2124156606221124e-05, "loss": 0.5194, "step": 31515 }, { "epoch": 0.8653487095002745, "grad_norm": 0.40295642614364624, "learning_rate": 1.2123734566896717e-05, "loss": 0.4524, "step": 31516 }, { "epoch": 0.8653761669412411, "grad_norm": 0.41537362337112427, "learning_rate": 1.2123312523610861e-05, "loss": 0.4668, "step": 31517 }, { "epoch": 0.8654036243822075, "grad_norm": 0.4315067231655121, "learning_rate": 1.2122890476364358e-05, "loss": 0.5617, "step": 31518 }, { "epoch": 0.8654310818231741, "grad_norm": 0.3924228549003601, "learning_rate": 1.212246842515798e-05, "loss": 0.5282, "step": 31519 }, { "epoch": 0.8654585392641406, "grad_norm": 0.3875119388103485, "learning_rate": 1.2122046369992524e-05, "loss": 0.4927, "step": 31520 }, { "epoch": 0.8654859967051071, "grad_norm": 0.4284254014492035, "learning_rate": 1.2121624310868774e-05, "loss": 0.4301, "step": 31521 }, { "epoch": 0.8655134541460736, "grad_norm": 0.3712712824344635, "learning_rate": 1.2121202247787516e-05, "loss": 0.4229, "step": 31522 }, { "epoch": 0.86554091158704, "grad_norm": 0.3869176506996155, "learning_rate": 1.2120780180749541e-05, "loss": 0.4862, "step": 31523 }, { "epoch": 0.8655683690280066, "grad_norm": 0.3753916621208191, "learning_rate": 1.212035810975563e-05, "loss": 0.5269, "step": 31524 }, { "epoch": 0.865595826468973, "grad_norm": 0.39775538444519043, "learning_rate": 1.2119936034806578e-05, "loss": 0.4823, "step": 31525 }, { "epoch": 0.8656232839099396, "grad_norm": 0.38884565234184265, "learning_rate": 1.211951395590317e-05, "loss": 0.5467, "step": 31526 }, { "epoch": 0.8656507413509061, "grad_norm": 0.3829613924026489, "learning_rate": 1.2119091873046189e-05, "loss": 0.5603, "step": 31527 }, { "epoch": 0.8656781987918726, "grad_norm": 0.38568374514579773, "learning_rate": 1.2118669786236427e-05, "loss": 0.4822, "step": 31528 }, { "epoch": 0.8657056562328391, "grad_norm": 0.36596959829330444, "learning_rate": 1.2118247695474667e-05, "loss": 0.4371, "step": 31529 }, { "epoch": 0.8657331136738056, "grad_norm": 0.40159446001052856, "learning_rate": 1.2117825600761704e-05, "loss": 0.5116, "step": 31530 }, { "epoch": 0.8657605711147721, "grad_norm": 0.3983573019504547, "learning_rate": 1.2117403502098317e-05, "loss": 0.512, "step": 31531 }, { "epoch": 0.8657880285557386, "grad_norm": 0.35656166076660156, "learning_rate": 1.2116981399485294e-05, "loss": 0.3709, "step": 31532 }, { "epoch": 0.8658154859967051, "grad_norm": 0.41709575057029724, "learning_rate": 1.211655929292343e-05, "loss": 0.4763, "step": 31533 }, { "epoch": 0.8658429434376717, "grad_norm": 0.35590896010398865, "learning_rate": 1.2116137182413507e-05, "loss": 0.3706, "step": 31534 }, { "epoch": 0.8658704008786381, "grad_norm": 0.3634921908378601, "learning_rate": 1.211571506795631e-05, "loss": 0.498, "step": 31535 }, { "epoch": 0.8658978583196046, "grad_norm": 0.3729120194911957, "learning_rate": 1.211529294955263e-05, "loss": 0.4172, "step": 31536 }, { "epoch": 0.8659253157605711, "grad_norm": 0.40334153175354004, "learning_rate": 1.2114870827203253e-05, "loss": 0.4092, "step": 31537 }, { "epoch": 0.8659527732015376, "grad_norm": 0.4208989441394806, "learning_rate": 1.211444870090897e-05, "loss": 0.4897, "step": 31538 }, { "epoch": 0.8659802306425041, "grad_norm": 0.35903212428092957, "learning_rate": 1.2114026570670563e-05, "loss": 0.4842, "step": 31539 }, { "epoch": 0.8660076880834706, "grad_norm": 0.33998242020606995, "learning_rate": 1.211360443648882e-05, "loss": 0.4815, "step": 31540 }, { "epoch": 0.8660351455244372, "grad_norm": 0.38330650329589844, "learning_rate": 1.2113182298364534e-05, "loss": 0.5357, "step": 31541 }, { "epoch": 0.8660626029654036, "grad_norm": 0.4054710268974304, "learning_rate": 1.2112760156298484e-05, "loss": 0.5663, "step": 31542 }, { "epoch": 0.8660900604063702, "grad_norm": 0.39296719431877136, "learning_rate": 1.2112338010291467e-05, "loss": 0.4467, "step": 31543 }, { "epoch": 0.8661175178473366, "grad_norm": 0.4181499183177948, "learning_rate": 1.2111915860344263e-05, "loss": 0.4614, "step": 31544 }, { "epoch": 0.8661449752883031, "grad_norm": 0.3998241722583771, "learning_rate": 1.211149370645766e-05, "loss": 0.5443, "step": 31545 }, { "epoch": 0.8661724327292696, "grad_norm": 0.33989381790161133, "learning_rate": 1.211107154863245e-05, "loss": 0.4426, "step": 31546 }, { "epoch": 0.8661998901702361, "grad_norm": 0.4103005528450012, "learning_rate": 1.2110649386869418e-05, "loss": 0.5189, "step": 31547 }, { "epoch": 0.8662273476112027, "grad_norm": 0.33803364634513855, "learning_rate": 1.2110227221169349e-05, "loss": 0.3899, "step": 31548 }, { "epoch": 0.8662548050521691, "grad_norm": 0.4022403061389923, "learning_rate": 1.2109805051533035e-05, "loss": 0.4425, "step": 31549 }, { "epoch": 0.8662822624931357, "grad_norm": 0.36575186252593994, "learning_rate": 1.2109382877961258e-05, "loss": 0.4922, "step": 31550 }, { "epoch": 0.8663097199341021, "grad_norm": 0.3922678828239441, "learning_rate": 1.2108960700454813e-05, "loss": 0.5032, "step": 31551 }, { "epoch": 0.8663371773750687, "grad_norm": 0.42850300669670105, "learning_rate": 1.2108538519014482e-05, "loss": 0.5394, "step": 31552 }, { "epoch": 0.8663646348160351, "grad_norm": 0.3467021584510803, "learning_rate": 1.2108116333641049e-05, "loss": 0.4516, "step": 31553 }, { "epoch": 0.8663920922570016, "grad_norm": 0.35923898220062256, "learning_rate": 1.2107694144335312e-05, "loss": 0.4788, "step": 31554 }, { "epoch": 0.8664195496979682, "grad_norm": 0.3970898389816284, "learning_rate": 1.2107271951098049e-05, "loss": 0.5069, "step": 31555 }, { "epoch": 0.8664470071389346, "grad_norm": 0.6505720019340515, "learning_rate": 1.2106849753930055e-05, "loss": 0.5785, "step": 31556 }, { "epoch": 0.8664744645799012, "grad_norm": 0.40575045347213745, "learning_rate": 1.2106427552832111e-05, "loss": 0.5232, "step": 31557 }, { "epoch": 0.8665019220208676, "grad_norm": 0.43006595969200134, "learning_rate": 1.2106005347805007e-05, "loss": 0.4939, "step": 31558 }, { "epoch": 0.8665293794618342, "grad_norm": 0.36968427896499634, "learning_rate": 1.2105583138849532e-05, "loss": 0.4161, "step": 31559 }, { "epoch": 0.8665568369028006, "grad_norm": 0.3751338720321655, "learning_rate": 1.210516092596647e-05, "loss": 0.5197, "step": 31560 }, { "epoch": 0.8665842943437672, "grad_norm": 0.3933388590812683, "learning_rate": 1.2104738709156616e-05, "loss": 0.4871, "step": 31561 }, { "epoch": 0.8666117517847337, "grad_norm": 0.37276193499565125, "learning_rate": 1.2104316488420752e-05, "loss": 0.434, "step": 31562 }, { "epoch": 0.8666392092257001, "grad_norm": 0.37470871210098267, "learning_rate": 1.2103894263759661e-05, "loss": 0.4636, "step": 31563 }, { "epoch": 0.8666666666666667, "grad_norm": 0.3823563754558563, "learning_rate": 1.210347203517414e-05, "loss": 0.4376, "step": 31564 }, { "epoch": 0.8666941241076331, "grad_norm": 0.5199916958808899, "learning_rate": 1.2103049802664972e-05, "loss": 0.4092, "step": 31565 }, { "epoch": 0.8667215815485997, "grad_norm": 0.40976542234420776, "learning_rate": 1.210262756623294e-05, "loss": 0.5279, "step": 31566 }, { "epoch": 0.8667490389895661, "grad_norm": 0.38259872794151306, "learning_rate": 1.2102205325878841e-05, "loss": 0.4864, "step": 31567 }, { "epoch": 0.8667764964305327, "grad_norm": 0.5102532505989075, "learning_rate": 1.2101783081603457e-05, "loss": 0.5466, "step": 31568 }, { "epoch": 0.8668039538714992, "grad_norm": 0.4776012897491455, "learning_rate": 1.2101360833407578e-05, "loss": 0.5881, "step": 31569 }, { "epoch": 0.8668314113124657, "grad_norm": 0.3758331537246704, "learning_rate": 1.2100938581291991e-05, "loss": 0.424, "step": 31570 }, { "epoch": 0.8668588687534322, "grad_norm": 0.38166308403015137, "learning_rate": 1.2100516325257481e-05, "loss": 0.4312, "step": 31571 }, { "epoch": 0.8668863261943986, "grad_norm": 0.33474355936050415, "learning_rate": 1.2100094065304839e-05, "loss": 0.5266, "step": 31572 }, { "epoch": 0.8669137836353652, "grad_norm": 0.42058178782463074, "learning_rate": 1.2099671801434849e-05, "loss": 0.5597, "step": 31573 }, { "epoch": 0.8669412410763316, "grad_norm": 0.3597318232059479, "learning_rate": 1.2099249533648302e-05, "loss": 0.5304, "step": 31574 }, { "epoch": 0.8669686985172982, "grad_norm": 0.37091323733329773, "learning_rate": 1.2098827261945987e-05, "loss": 0.5641, "step": 31575 }, { "epoch": 0.8669961559582647, "grad_norm": 0.4377965033054352, "learning_rate": 1.2098404986328686e-05, "loss": 0.4881, "step": 31576 }, { "epoch": 0.8670236133992312, "grad_norm": 0.395074725151062, "learning_rate": 1.2097982706797194e-05, "loss": 0.5025, "step": 31577 }, { "epoch": 0.8670510708401977, "grad_norm": 0.3801513612270355, "learning_rate": 1.2097560423352293e-05, "loss": 0.4958, "step": 31578 }, { "epoch": 0.8670785282811642, "grad_norm": 0.4642842411994934, "learning_rate": 1.2097138135994769e-05, "loss": 0.4992, "step": 31579 }, { "epoch": 0.8671059857221307, "grad_norm": 0.41331765055656433, "learning_rate": 1.2096715844725416e-05, "loss": 0.4299, "step": 31580 }, { "epoch": 0.8671334431630972, "grad_norm": 0.5631094574928284, "learning_rate": 1.2096293549545017e-05, "loss": 0.5708, "step": 31581 }, { "epoch": 0.8671609006040637, "grad_norm": 0.3748146891593933, "learning_rate": 1.2095871250454362e-05, "loss": 0.4804, "step": 31582 }, { "epoch": 0.8671883580450302, "grad_norm": 0.431368887424469, "learning_rate": 1.209544894745424e-05, "loss": 0.4656, "step": 31583 }, { "epoch": 0.8672158154859967, "grad_norm": 0.42296621203422546, "learning_rate": 1.2095026640545435e-05, "loss": 0.4988, "step": 31584 }, { "epoch": 0.8672432729269632, "grad_norm": 0.42643114924430847, "learning_rate": 1.2094604329728739e-05, "loss": 0.5359, "step": 31585 }, { "epoch": 0.8672707303679297, "grad_norm": 0.40635889768600464, "learning_rate": 1.2094182015004934e-05, "loss": 0.4588, "step": 31586 }, { "epoch": 0.8672981878088962, "grad_norm": 0.351245254278183, "learning_rate": 1.2093759696374814e-05, "loss": 0.4838, "step": 31587 }, { "epoch": 0.8673256452498627, "grad_norm": 0.4385960102081299, "learning_rate": 1.2093337373839163e-05, "loss": 0.4629, "step": 31588 }, { "epoch": 0.8673531026908292, "grad_norm": 0.4271770417690277, "learning_rate": 1.209291504739877e-05, "loss": 0.456, "step": 31589 }, { "epoch": 0.8673805601317958, "grad_norm": 0.40859419107437134, "learning_rate": 1.2092492717054423e-05, "loss": 0.5214, "step": 31590 }, { "epoch": 0.8674080175727622, "grad_norm": 0.3935602605342865, "learning_rate": 1.2092070382806907e-05, "loss": 0.4853, "step": 31591 }, { "epoch": 0.8674354750137288, "grad_norm": 0.43324166536331177, "learning_rate": 1.2091648044657011e-05, "loss": 0.4953, "step": 31592 }, { "epoch": 0.8674629324546952, "grad_norm": 0.4370555877685547, "learning_rate": 1.2091225702605528e-05, "loss": 0.5115, "step": 31593 }, { "epoch": 0.8674903898956617, "grad_norm": 0.4666878879070282, "learning_rate": 1.2090803356653237e-05, "loss": 0.4956, "step": 31594 }, { "epoch": 0.8675178473366282, "grad_norm": 0.4186277687549591, "learning_rate": 1.2090381006800936e-05, "loss": 0.5788, "step": 31595 }, { "epoch": 0.8675453047775947, "grad_norm": 0.3628820776939392, "learning_rate": 1.2089958653049403e-05, "loss": 0.446, "step": 31596 }, { "epoch": 0.8675727622185613, "grad_norm": 0.3588583171367645, "learning_rate": 1.2089536295399434e-05, "loss": 0.4834, "step": 31597 }, { "epoch": 0.8676002196595277, "grad_norm": 0.38723406195640564, "learning_rate": 1.2089113933851808e-05, "loss": 0.4784, "step": 31598 }, { "epoch": 0.8676276771004943, "grad_norm": 0.4281860589981079, "learning_rate": 1.2088691568407321e-05, "loss": 0.501, "step": 31599 }, { "epoch": 0.8676551345414607, "grad_norm": 0.35443490743637085, "learning_rate": 1.2088269199066759e-05, "loss": 0.4412, "step": 31600 }, { "epoch": 0.8676825919824273, "grad_norm": 0.3788078725337982, "learning_rate": 1.2087846825830902e-05, "loss": 0.5097, "step": 31601 }, { "epoch": 0.8677100494233937, "grad_norm": 0.35570192337036133, "learning_rate": 1.2087424448700552e-05, "loss": 0.3861, "step": 31602 }, { "epoch": 0.8677375068643602, "grad_norm": 0.386441171169281, "learning_rate": 1.2087002067676482e-05, "loss": 0.4651, "step": 31603 }, { "epoch": 0.8677649643053268, "grad_norm": 0.38152217864990234, "learning_rate": 1.2086579682759492e-05, "loss": 0.45, "step": 31604 }, { "epoch": 0.8677924217462932, "grad_norm": 0.3883955478668213, "learning_rate": 1.2086157293950365e-05, "loss": 0.4526, "step": 31605 }, { "epoch": 0.8678198791872598, "grad_norm": 0.39036470651626587, "learning_rate": 1.2085734901249886e-05, "loss": 0.438, "step": 31606 }, { "epoch": 0.8678473366282262, "grad_norm": 0.3728494346141815, "learning_rate": 1.2085312504658847e-05, "loss": 0.4843, "step": 31607 }, { "epoch": 0.8678747940691928, "grad_norm": 0.5006009936332703, "learning_rate": 1.2084890104178035e-05, "loss": 0.5828, "step": 31608 }, { "epoch": 0.8679022515101592, "grad_norm": 0.6260147094726562, "learning_rate": 1.2084467699808237e-05, "loss": 0.5412, "step": 31609 }, { "epoch": 0.8679297089511258, "grad_norm": 0.5722406506538391, "learning_rate": 1.2084045291550241e-05, "loss": 0.4462, "step": 31610 }, { "epoch": 0.8679571663920923, "grad_norm": 0.45947232842445374, "learning_rate": 1.2083622879404836e-05, "loss": 0.4391, "step": 31611 }, { "epoch": 0.8679846238330587, "grad_norm": 0.37008780241012573, "learning_rate": 1.208320046337281e-05, "loss": 0.5021, "step": 31612 }, { "epoch": 0.8680120812740253, "grad_norm": 0.4158608317375183, "learning_rate": 1.208277804345495e-05, "loss": 0.4626, "step": 31613 }, { "epoch": 0.8680395387149917, "grad_norm": 0.42814093828201294, "learning_rate": 1.2082355619652045e-05, "loss": 0.4537, "step": 31614 }, { "epoch": 0.8680669961559583, "grad_norm": 0.3351251780986786, "learning_rate": 1.2081933191964878e-05, "loss": 0.4105, "step": 31615 }, { "epoch": 0.8680944535969247, "grad_norm": 0.3913279175758362, "learning_rate": 1.2081510760394247e-05, "loss": 0.5129, "step": 31616 }, { "epoch": 0.8681219110378913, "grad_norm": 0.4175621569156647, "learning_rate": 1.208108832494093e-05, "loss": 0.4563, "step": 31617 }, { "epoch": 0.8681493684788578, "grad_norm": 0.37003374099731445, "learning_rate": 1.208066588560572e-05, "loss": 0.4768, "step": 31618 }, { "epoch": 0.8681768259198243, "grad_norm": 0.43162956833839417, "learning_rate": 1.2080243442389405e-05, "loss": 0.4827, "step": 31619 }, { "epoch": 0.8682042833607908, "grad_norm": 0.4078165292739868, "learning_rate": 1.207982099529277e-05, "loss": 0.4889, "step": 31620 }, { "epoch": 0.8682317408017572, "grad_norm": 0.3562571704387665, "learning_rate": 1.2079398544316609e-05, "loss": 0.4858, "step": 31621 }, { "epoch": 0.8682591982427238, "grad_norm": 0.43014103174209595, "learning_rate": 1.2078976089461702e-05, "loss": 0.5299, "step": 31622 }, { "epoch": 0.8682866556836902, "grad_norm": 0.43100211024284363, "learning_rate": 1.2078553630728843e-05, "loss": 0.5587, "step": 31623 }, { "epoch": 0.8683141131246568, "grad_norm": 0.4150688648223877, "learning_rate": 1.2078131168118818e-05, "loss": 0.556, "step": 31624 }, { "epoch": 0.8683415705656233, "grad_norm": 0.4101172089576721, "learning_rate": 1.2077708701632413e-05, "loss": 0.5061, "step": 31625 }, { "epoch": 0.8683690280065898, "grad_norm": 0.40778496861457825, "learning_rate": 1.2077286231270422e-05, "loss": 0.4271, "step": 31626 }, { "epoch": 0.8683964854475563, "grad_norm": 0.4044745862483978, "learning_rate": 1.2076863757033628e-05, "loss": 0.5393, "step": 31627 }, { "epoch": 0.8684239428885228, "grad_norm": 0.42857328057289124, "learning_rate": 1.2076441278922819e-05, "loss": 0.5259, "step": 31628 }, { "epoch": 0.8684514003294893, "grad_norm": 0.40724214911460876, "learning_rate": 1.2076018796938784e-05, "loss": 0.5912, "step": 31629 }, { "epoch": 0.8684788577704557, "grad_norm": 0.3855232000350952, "learning_rate": 1.2075596311082312e-05, "loss": 0.4715, "step": 31630 }, { "epoch": 0.8685063152114223, "grad_norm": 0.5054461359977722, "learning_rate": 1.2075173821354192e-05, "loss": 0.5461, "step": 31631 }, { "epoch": 0.8685337726523888, "grad_norm": 0.4077126681804657, "learning_rate": 1.207475132775521e-05, "loss": 0.5305, "step": 31632 }, { "epoch": 0.8685612300933553, "grad_norm": 0.4125198423862457, "learning_rate": 1.2074328830286151e-05, "loss": 0.4885, "step": 31633 }, { "epoch": 0.8685886875343218, "grad_norm": 0.4084867238998413, "learning_rate": 1.2073906328947811e-05, "loss": 0.4732, "step": 31634 }, { "epoch": 0.8686161449752883, "grad_norm": 0.39985111355781555, "learning_rate": 1.207348382374097e-05, "loss": 0.4424, "step": 31635 }, { "epoch": 0.8686436024162548, "grad_norm": 0.4138895571231842, "learning_rate": 1.2073061314666424e-05, "loss": 0.5922, "step": 31636 }, { "epoch": 0.8686710598572213, "grad_norm": 0.33902326226234436, "learning_rate": 1.2072638801724954e-05, "loss": 0.3394, "step": 31637 }, { "epoch": 0.8686985172981878, "grad_norm": 0.3559619188308716, "learning_rate": 1.2072216284917352e-05, "loss": 0.4498, "step": 31638 }, { "epoch": 0.8687259747391544, "grad_norm": 0.36807939410209656, "learning_rate": 1.2071793764244405e-05, "loss": 0.4829, "step": 31639 }, { "epoch": 0.8687534321801208, "grad_norm": 0.3842988908290863, "learning_rate": 1.2071371239706902e-05, "loss": 0.4294, "step": 31640 }, { "epoch": 0.8687808896210873, "grad_norm": 0.4467127025127411, "learning_rate": 1.207094871130563e-05, "loss": 0.4416, "step": 31641 }, { "epoch": 0.8688083470620538, "grad_norm": 0.3984508216381073, "learning_rate": 1.2070526179041378e-05, "loss": 0.557, "step": 31642 }, { "epoch": 0.8688358045030203, "grad_norm": 0.36687788367271423, "learning_rate": 1.2070103642914932e-05, "loss": 0.457, "step": 31643 }, { "epoch": 0.8688632619439868, "grad_norm": 0.35939621925354004, "learning_rate": 1.2069681102927084e-05, "loss": 0.4402, "step": 31644 }, { "epoch": 0.8688907193849533, "grad_norm": 0.3983241021633148, "learning_rate": 1.2069258559078623e-05, "loss": 0.4559, "step": 31645 }, { "epoch": 0.8689181768259199, "grad_norm": 0.45618686079978943, "learning_rate": 1.2068836011370329e-05, "loss": 0.5201, "step": 31646 }, { "epoch": 0.8689456342668863, "grad_norm": 0.4162209630012512, "learning_rate": 1.2068413459803e-05, "loss": 0.5551, "step": 31647 }, { "epoch": 0.8689730917078529, "grad_norm": 0.41182029247283936, "learning_rate": 1.2067990904377416e-05, "loss": 0.5245, "step": 31648 }, { "epoch": 0.8690005491488193, "grad_norm": 0.4035487473011017, "learning_rate": 1.2067568345094372e-05, "loss": 0.4688, "step": 31649 }, { "epoch": 0.8690280065897859, "grad_norm": 0.4340803027153015, "learning_rate": 1.206714578195465e-05, "loss": 0.4614, "step": 31650 }, { "epoch": 0.8690554640307523, "grad_norm": 0.4075888395309448, "learning_rate": 1.2066723214959043e-05, "loss": 0.4762, "step": 31651 }, { "epoch": 0.8690829214717188, "grad_norm": 0.38181158900260925, "learning_rate": 1.2066300644108337e-05, "loss": 0.4386, "step": 31652 }, { "epoch": 0.8691103789126854, "grad_norm": 0.5001672506332397, "learning_rate": 1.206587806940332e-05, "loss": 0.5021, "step": 31653 }, { "epoch": 0.8691378363536518, "grad_norm": 0.38483431935310364, "learning_rate": 1.2065455490844783e-05, "loss": 0.5922, "step": 31654 }, { "epoch": 0.8691652937946184, "grad_norm": 0.3772445619106293, "learning_rate": 1.2065032908433513e-05, "loss": 0.4808, "step": 31655 }, { "epoch": 0.8691927512355848, "grad_norm": 0.44232377409935, "learning_rate": 1.2064610322170296e-05, "loss": 0.567, "step": 31656 }, { "epoch": 0.8692202086765514, "grad_norm": 0.353787362575531, "learning_rate": 1.2064187732055922e-05, "loss": 0.5463, "step": 31657 }, { "epoch": 0.8692476661175178, "grad_norm": 0.3550264239311218, "learning_rate": 1.206376513809118e-05, "loss": 0.4545, "step": 31658 }, { "epoch": 0.8692751235584844, "grad_norm": 0.44025999307632446, "learning_rate": 1.2063342540276857e-05, "loss": 0.5337, "step": 31659 }, { "epoch": 0.8693025809994509, "grad_norm": 0.33932697772979736, "learning_rate": 1.206291993861374e-05, "loss": 0.4143, "step": 31660 }, { "epoch": 0.8693300384404173, "grad_norm": 0.39654868841171265, "learning_rate": 1.206249733310262e-05, "loss": 0.4421, "step": 31661 }, { "epoch": 0.8693574958813839, "grad_norm": 0.42295345664024353, "learning_rate": 1.2062074723744286e-05, "loss": 0.4093, "step": 31662 }, { "epoch": 0.8693849533223503, "grad_norm": 0.4290679097175598, "learning_rate": 1.2061652110539521e-05, "loss": 0.5029, "step": 31663 }, { "epoch": 0.8694124107633169, "grad_norm": 0.4308621287345886, "learning_rate": 1.2061229493489117e-05, "loss": 0.5603, "step": 31664 }, { "epoch": 0.8694398682042833, "grad_norm": 0.39272138476371765, "learning_rate": 1.2060806872593867e-05, "loss": 0.4509, "step": 31665 }, { "epoch": 0.8694673256452499, "grad_norm": 0.37769007682800293, "learning_rate": 1.2060384247854548e-05, "loss": 0.4607, "step": 31666 }, { "epoch": 0.8694947830862164, "grad_norm": 0.3870627284049988, "learning_rate": 1.205996161927196e-05, "loss": 0.5334, "step": 31667 }, { "epoch": 0.8695222405271829, "grad_norm": 0.41003304719924927, "learning_rate": 1.2059538986846883e-05, "loss": 0.5362, "step": 31668 }, { "epoch": 0.8695496979681494, "grad_norm": 0.4000462591648102, "learning_rate": 1.205911635058011e-05, "loss": 0.5203, "step": 31669 }, { "epoch": 0.8695771554091158, "grad_norm": 0.3935049772262573, "learning_rate": 1.2058693710472426e-05, "loss": 0.5042, "step": 31670 }, { "epoch": 0.8696046128500824, "grad_norm": 0.407703161239624, "learning_rate": 1.2058271066524623e-05, "loss": 0.5413, "step": 31671 }, { "epoch": 0.8696320702910488, "grad_norm": 0.3974623680114746, "learning_rate": 1.2057848418737488e-05, "loss": 0.6216, "step": 31672 }, { "epoch": 0.8696595277320154, "grad_norm": 0.3803424835205078, "learning_rate": 1.2057425767111808e-05, "loss": 0.5081, "step": 31673 }, { "epoch": 0.8696869851729819, "grad_norm": 0.3655150532722473, "learning_rate": 1.2057003111648371e-05, "loss": 0.4824, "step": 31674 }, { "epoch": 0.8697144426139484, "grad_norm": 0.38135477900505066, "learning_rate": 1.205658045234797e-05, "loss": 0.4725, "step": 31675 }, { "epoch": 0.8697419000549149, "grad_norm": 0.37456902861595154, "learning_rate": 1.2056157789211387e-05, "loss": 0.5208, "step": 31676 }, { "epoch": 0.8697693574958814, "grad_norm": 0.4727851450443268, "learning_rate": 1.2055735122239414e-05, "loss": 0.4935, "step": 31677 }, { "epoch": 0.8697968149368479, "grad_norm": 0.40783873200416565, "learning_rate": 1.2055312451432839e-05, "loss": 0.4766, "step": 31678 }, { "epoch": 0.8698242723778143, "grad_norm": 0.4057944416999817, "learning_rate": 1.205488977679245e-05, "loss": 0.5568, "step": 31679 }, { "epoch": 0.8698517298187809, "grad_norm": 0.46521854400634766, "learning_rate": 1.2054467098319039e-05, "loss": 0.4948, "step": 31680 }, { "epoch": 0.8698791872597474, "grad_norm": 0.40025267004966736, "learning_rate": 1.2054044416013389e-05, "loss": 0.5377, "step": 31681 }, { "epoch": 0.8699066447007139, "grad_norm": 0.37527114152908325, "learning_rate": 1.2053621729876288e-05, "loss": 0.5174, "step": 31682 }, { "epoch": 0.8699341021416804, "grad_norm": 0.3960643410682678, "learning_rate": 1.2053199039908529e-05, "loss": 0.4681, "step": 31683 }, { "epoch": 0.8699615595826469, "grad_norm": 0.43398910760879517, "learning_rate": 1.2052776346110897e-05, "loss": 0.5391, "step": 31684 }, { "epoch": 0.8699890170236134, "grad_norm": 0.38053351640701294, "learning_rate": 1.2052353648484186e-05, "loss": 0.4534, "step": 31685 }, { "epoch": 0.8700164744645799, "grad_norm": 0.3521953821182251, "learning_rate": 1.2051930947029177e-05, "loss": 0.5431, "step": 31686 }, { "epoch": 0.8700439319055464, "grad_norm": 0.400277316570282, "learning_rate": 1.205150824174666e-05, "loss": 0.4657, "step": 31687 }, { "epoch": 0.870071389346513, "grad_norm": 0.38259997963905334, "learning_rate": 1.2051085532637432e-05, "loss": 0.4427, "step": 31688 }, { "epoch": 0.8700988467874794, "grad_norm": 0.4148651957511902, "learning_rate": 1.2050662819702273e-05, "loss": 0.6388, "step": 31689 }, { "epoch": 0.870126304228446, "grad_norm": 0.3767339289188385, "learning_rate": 1.2050240102941969e-05, "loss": 0.5227, "step": 31690 }, { "epoch": 0.8701537616694124, "grad_norm": 0.3606727719306946, "learning_rate": 1.2049817382357316e-05, "loss": 0.4668, "step": 31691 }, { "epoch": 0.8701812191103789, "grad_norm": 0.41241946816444397, "learning_rate": 1.2049394657949097e-05, "loss": 0.498, "step": 31692 }, { "epoch": 0.8702086765513454, "grad_norm": 0.4232375919818878, "learning_rate": 1.2048971929718105e-05, "loss": 0.4542, "step": 31693 }, { "epoch": 0.8702361339923119, "grad_norm": 0.37773871421813965, "learning_rate": 1.2048549197665127e-05, "loss": 0.4297, "step": 31694 }, { "epoch": 0.8702635914332785, "grad_norm": 0.36337199807167053, "learning_rate": 1.2048126461790948e-05, "loss": 0.4949, "step": 31695 }, { "epoch": 0.8702910488742449, "grad_norm": 0.39970192313194275, "learning_rate": 1.2047703722096361e-05, "loss": 0.4353, "step": 31696 }, { "epoch": 0.8703185063152115, "grad_norm": 0.4104253351688385, "learning_rate": 1.2047280978582152e-05, "loss": 0.5033, "step": 31697 }, { "epoch": 0.8703459637561779, "grad_norm": 0.3614622950553894, "learning_rate": 1.2046858231249114e-05, "loss": 0.4581, "step": 31698 }, { "epoch": 0.8703734211971444, "grad_norm": 0.36246806383132935, "learning_rate": 1.2046435480098032e-05, "loss": 0.469, "step": 31699 }, { "epoch": 0.8704008786381109, "grad_norm": 0.4749883711338043, "learning_rate": 1.2046012725129689e-05, "loss": 0.5537, "step": 31700 }, { "epoch": 0.8704283360790774, "grad_norm": 0.44101083278656006, "learning_rate": 1.2045589966344884e-05, "loss": 0.553, "step": 31701 }, { "epoch": 0.870455793520044, "grad_norm": 0.41452717781066895, "learning_rate": 1.20451672037444e-05, "loss": 0.5425, "step": 31702 }, { "epoch": 0.8704832509610104, "grad_norm": 0.43778014183044434, "learning_rate": 1.2044744437329025e-05, "loss": 0.5215, "step": 31703 }, { "epoch": 0.870510708401977, "grad_norm": 0.4183540940284729, "learning_rate": 1.2044321667099553e-05, "loss": 0.5694, "step": 31704 }, { "epoch": 0.8705381658429434, "grad_norm": 0.43823695182800293, "learning_rate": 1.2043898893056763e-05, "loss": 0.545, "step": 31705 }, { "epoch": 0.87056562328391, "grad_norm": 0.4044095575809479, "learning_rate": 1.2043476115201453e-05, "loss": 0.4808, "step": 31706 }, { "epoch": 0.8705930807248764, "grad_norm": 0.3744589686393738, "learning_rate": 1.2043053333534407e-05, "loss": 0.4152, "step": 31707 }, { "epoch": 0.870620538165843, "grad_norm": 0.35266631841659546, "learning_rate": 1.2042630548056413e-05, "loss": 0.4343, "step": 31708 }, { "epoch": 0.8706479956068095, "grad_norm": 0.43429768085479736, "learning_rate": 1.2042207758768262e-05, "loss": 0.4344, "step": 31709 }, { "epoch": 0.8706754530477759, "grad_norm": 0.4127954840660095, "learning_rate": 1.2041784965670742e-05, "loss": 0.469, "step": 31710 }, { "epoch": 0.8707029104887425, "grad_norm": 0.44362592697143555, "learning_rate": 1.2041362168764642e-05, "loss": 0.5701, "step": 31711 }, { "epoch": 0.8707303679297089, "grad_norm": 0.3784850537776947, "learning_rate": 1.2040939368050752e-05, "loss": 0.4446, "step": 31712 }, { "epoch": 0.8707578253706755, "grad_norm": 0.399278461933136, "learning_rate": 1.2040516563529854e-05, "loss": 0.4874, "step": 31713 }, { "epoch": 0.8707852828116419, "grad_norm": 0.398638516664505, "learning_rate": 1.2040093755202744e-05, "loss": 0.5665, "step": 31714 }, { "epoch": 0.8708127402526085, "grad_norm": 0.3658207058906555, "learning_rate": 1.2039670943070207e-05, "loss": 0.4541, "step": 31715 }, { "epoch": 0.870840197693575, "grad_norm": 0.3910529613494873, "learning_rate": 1.2039248127133035e-05, "loss": 0.4453, "step": 31716 }, { "epoch": 0.8708676551345415, "grad_norm": 0.40921375155448914, "learning_rate": 1.2038825307392013e-05, "loss": 0.5132, "step": 31717 }, { "epoch": 0.870895112575508, "grad_norm": 0.35969141125679016, "learning_rate": 1.2038402483847929e-05, "loss": 0.4535, "step": 31718 }, { "epoch": 0.8709225700164744, "grad_norm": 0.3709227740764618, "learning_rate": 1.2037979656501578e-05, "loss": 0.467, "step": 31719 }, { "epoch": 0.870950027457441, "grad_norm": 0.3793713450431824, "learning_rate": 1.2037556825353741e-05, "loss": 0.5671, "step": 31720 }, { "epoch": 0.8709774848984074, "grad_norm": 0.37220171093940735, "learning_rate": 1.203713399040521e-05, "loss": 0.4686, "step": 31721 }, { "epoch": 0.871004942339374, "grad_norm": 0.4014543890953064, "learning_rate": 1.2036711151656776e-05, "loss": 0.5563, "step": 31722 }, { "epoch": 0.8710323997803405, "grad_norm": 0.43236851692199707, "learning_rate": 1.2036288309109223e-05, "loss": 0.4504, "step": 31723 }, { "epoch": 0.871059857221307, "grad_norm": 0.38676249980926514, "learning_rate": 1.2035865462763346e-05, "loss": 0.4525, "step": 31724 }, { "epoch": 0.8710873146622735, "grad_norm": 0.4216510057449341, "learning_rate": 1.203544261261993e-05, "loss": 0.4786, "step": 31725 }, { "epoch": 0.87111477210324, "grad_norm": 0.3941477835178375, "learning_rate": 1.203501975867976e-05, "loss": 0.4438, "step": 31726 }, { "epoch": 0.8711422295442065, "grad_norm": 0.45727089047431946, "learning_rate": 1.2034596900943632e-05, "loss": 0.525, "step": 31727 }, { "epoch": 0.8711696869851729, "grad_norm": 0.40201425552368164, "learning_rate": 1.2034174039412329e-05, "loss": 0.4528, "step": 31728 }, { "epoch": 0.8711971444261395, "grad_norm": 0.4060031473636627, "learning_rate": 1.2033751174086646e-05, "loss": 0.5113, "step": 31729 }, { "epoch": 0.871224601867106, "grad_norm": 0.43756580352783203, "learning_rate": 1.2033328304967364e-05, "loss": 0.4966, "step": 31730 }, { "epoch": 0.8712520593080725, "grad_norm": 0.36767688393592834, "learning_rate": 1.2032905432055277e-05, "loss": 0.4636, "step": 31731 }, { "epoch": 0.871279516749039, "grad_norm": 0.39948758482933044, "learning_rate": 1.2032482555351173e-05, "loss": 0.4617, "step": 31732 }, { "epoch": 0.8713069741900055, "grad_norm": 0.3776066303253174, "learning_rate": 1.2032059674855837e-05, "loss": 0.4366, "step": 31733 }, { "epoch": 0.871334431630972, "grad_norm": 0.349153995513916, "learning_rate": 1.2031636790570067e-05, "loss": 0.4652, "step": 31734 }, { "epoch": 0.8713618890719385, "grad_norm": 0.39699193835258484, "learning_rate": 1.2031213902494643e-05, "loss": 0.5631, "step": 31735 }, { "epoch": 0.871389346512905, "grad_norm": 0.4125523269176483, "learning_rate": 1.2030791010630355e-05, "loss": 0.4715, "step": 31736 }, { "epoch": 0.8714168039538714, "grad_norm": 0.4020121693611145, "learning_rate": 1.2030368114977997e-05, "loss": 0.6101, "step": 31737 }, { "epoch": 0.871444261394838, "grad_norm": 0.34408894181251526, "learning_rate": 1.2029945215538355e-05, "loss": 0.4675, "step": 31738 }, { "epoch": 0.8714717188358045, "grad_norm": 0.36047104001045227, "learning_rate": 1.2029522312312214e-05, "loss": 0.4509, "step": 31739 }, { "epoch": 0.871499176276771, "grad_norm": 0.34066787362098694, "learning_rate": 1.2029099405300368e-05, "loss": 0.4866, "step": 31740 }, { "epoch": 0.8715266337177375, "grad_norm": 0.3502885103225708, "learning_rate": 1.2028676494503601e-05, "loss": 0.5312, "step": 31741 }, { "epoch": 0.871554091158704, "grad_norm": 0.3771030008792877, "learning_rate": 1.2028253579922708e-05, "loss": 0.4811, "step": 31742 }, { "epoch": 0.8715815485996705, "grad_norm": 0.4103029668331146, "learning_rate": 1.2027830661558477e-05, "loss": 0.5636, "step": 31743 }, { "epoch": 0.871609006040637, "grad_norm": 0.39628323912620544, "learning_rate": 1.2027407739411689e-05, "loss": 0.4536, "step": 31744 }, { "epoch": 0.8716364634816035, "grad_norm": 0.39756056666374207, "learning_rate": 1.2026984813483143e-05, "loss": 0.4943, "step": 31745 }, { "epoch": 0.8716639209225701, "grad_norm": 0.3826403021812439, "learning_rate": 1.2026561883773619e-05, "loss": 0.5134, "step": 31746 }, { "epoch": 0.8716913783635365, "grad_norm": 0.3825899064540863, "learning_rate": 1.2026138950283914e-05, "loss": 0.5769, "step": 31747 }, { "epoch": 0.871718835804503, "grad_norm": 0.36443251371383667, "learning_rate": 1.2025716013014815e-05, "loss": 0.4457, "step": 31748 }, { "epoch": 0.8717462932454695, "grad_norm": 0.7360069155693054, "learning_rate": 1.2025293071967103e-05, "loss": 0.4813, "step": 31749 }, { "epoch": 0.871773750686436, "grad_norm": 0.44219285249710083, "learning_rate": 1.2024870127141577e-05, "loss": 0.4925, "step": 31750 }, { "epoch": 0.8718012081274025, "grad_norm": 0.47277334332466125, "learning_rate": 1.2024447178539022e-05, "loss": 0.5051, "step": 31751 }, { "epoch": 0.871828665568369, "grad_norm": 0.34962978959083557, "learning_rate": 1.2024024226160224e-05, "loss": 0.5175, "step": 31752 }, { "epoch": 0.8718561230093356, "grad_norm": 0.4262475371360779, "learning_rate": 1.2023601270005978e-05, "loss": 0.5245, "step": 31753 }, { "epoch": 0.871883580450302, "grad_norm": 0.33628734946250916, "learning_rate": 1.2023178310077068e-05, "loss": 0.4373, "step": 31754 }, { "epoch": 0.8719110378912686, "grad_norm": 0.3383757174015045, "learning_rate": 1.2022755346374286e-05, "loss": 0.3956, "step": 31755 }, { "epoch": 0.871938495332235, "grad_norm": 0.38651955127716064, "learning_rate": 1.202233237889842e-05, "loss": 0.4943, "step": 31756 }, { "epoch": 0.8719659527732015, "grad_norm": 0.44454750418663025, "learning_rate": 1.2021909407650256e-05, "loss": 0.5704, "step": 31757 }, { "epoch": 0.871993410214168, "grad_norm": 0.36933431029319763, "learning_rate": 1.2021486432630589e-05, "loss": 0.4243, "step": 31758 }, { "epoch": 0.8720208676551345, "grad_norm": 0.41504836082458496, "learning_rate": 1.20210634538402e-05, "loss": 0.5056, "step": 31759 }, { "epoch": 0.8720483250961011, "grad_norm": 0.4241590201854706, "learning_rate": 1.2020640471279888e-05, "loss": 0.5428, "step": 31760 }, { "epoch": 0.8720757825370675, "grad_norm": 0.41125398874282837, "learning_rate": 1.2020217484950434e-05, "loss": 0.5272, "step": 31761 }, { "epoch": 0.8721032399780341, "grad_norm": 0.3474518358707428, "learning_rate": 1.2019794494852628e-05, "loss": 0.4178, "step": 31762 }, { "epoch": 0.8721306974190005, "grad_norm": 0.40074214339256287, "learning_rate": 1.2019371500987265e-05, "loss": 0.445, "step": 31763 }, { "epoch": 0.8721581548599671, "grad_norm": 0.40242859721183777, "learning_rate": 1.2018948503355125e-05, "loss": 0.3684, "step": 31764 }, { "epoch": 0.8721856123009335, "grad_norm": 0.39941275119781494, "learning_rate": 1.2018525501957004e-05, "loss": 0.5571, "step": 31765 }, { "epoch": 0.8722130697419, "grad_norm": 0.36917898058891296, "learning_rate": 1.2018102496793688e-05, "loss": 0.4642, "step": 31766 }, { "epoch": 0.8722405271828666, "grad_norm": 0.3781525790691376, "learning_rate": 1.2017679487865967e-05, "loss": 0.5826, "step": 31767 }, { "epoch": 0.872267984623833, "grad_norm": 0.40221431851387024, "learning_rate": 1.201725647517463e-05, "loss": 0.4623, "step": 31768 }, { "epoch": 0.8722954420647996, "grad_norm": 0.36389055848121643, "learning_rate": 1.2016833458720467e-05, "loss": 0.5253, "step": 31769 }, { "epoch": 0.872322899505766, "grad_norm": 0.39639490842819214, "learning_rate": 1.2016410438504264e-05, "loss": 0.4685, "step": 31770 }, { "epoch": 0.8723503569467326, "grad_norm": 0.3534793555736542, "learning_rate": 1.2015987414526812e-05, "loss": 0.5573, "step": 31771 }, { "epoch": 0.872377814387699, "grad_norm": 0.35887348651885986, "learning_rate": 1.20155643867889e-05, "loss": 0.5285, "step": 31772 }, { "epoch": 0.8724052718286656, "grad_norm": 0.38705700635910034, "learning_rate": 1.201514135529132e-05, "loss": 0.5019, "step": 31773 }, { "epoch": 0.8724327292696321, "grad_norm": 0.3917185366153717, "learning_rate": 1.2014718320034854e-05, "loss": 0.5326, "step": 31774 }, { "epoch": 0.8724601867105986, "grad_norm": 0.3924156725406647, "learning_rate": 1.2014295281020298e-05, "loss": 0.4267, "step": 31775 }, { "epoch": 0.8724876441515651, "grad_norm": 0.4275032877922058, "learning_rate": 1.2013872238248439e-05, "loss": 0.5094, "step": 31776 }, { "epoch": 0.8725151015925315, "grad_norm": 0.3948640525341034, "learning_rate": 1.2013449191720063e-05, "loss": 0.4906, "step": 31777 }, { "epoch": 0.8725425590334981, "grad_norm": 0.3737691342830658, "learning_rate": 1.2013026141435963e-05, "loss": 0.5471, "step": 31778 }, { "epoch": 0.8725700164744645, "grad_norm": 0.39559319615364075, "learning_rate": 1.2012603087396926e-05, "loss": 0.5294, "step": 31779 }, { "epoch": 0.8725974739154311, "grad_norm": 0.3751121163368225, "learning_rate": 1.2012180029603744e-05, "loss": 0.5673, "step": 31780 }, { "epoch": 0.8726249313563976, "grad_norm": 0.5202319025993347, "learning_rate": 1.2011756968057202e-05, "loss": 0.4837, "step": 31781 }, { "epoch": 0.8726523887973641, "grad_norm": 0.3846253752708435, "learning_rate": 1.2011333902758091e-05, "loss": 0.4025, "step": 31782 }, { "epoch": 0.8726798462383306, "grad_norm": 0.44339439272880554, "learning_rate": 1.20109108337072e-05, "loss": 0.5921, "step": 31783 }, { "epoch": 0.872707303679297, "grad_norm": 0.41370755434036255, "learning_rate": 1.201048776090532e-05, "loss": 0.5265, "step": 31784 }, { "epoch": 0.8727347611202636, "grad_norm": 0.4096059501171112, "learning_rate": 1.201006468435324e-05, "loss": 0.4412, "step": 31785 }, { "epoch": 0.87276221856123, "grad_norm": 0.4163645803928375, "learning_rate": 1.2009641604051744e-05, "loss": 0.4613, "step": 31786 }, { "epoch": 0.8727896760021966, "grad_norm": 0.40264567732810974, "learning_rate": 1.2009218520001627e-05, "loss": 0.4443, "step": 31787 }, { "epoch": 0.8728171334431631, "grad_norm": 0.34742844104766846, "learning_rate": 1.2008795432203676e-05, "loss": 0.4747, "step": 31788 }, { "epoch": 0.8728445908841296, "grad_norm": 0.3947164714336395, "learning_rate": 1.2008372340658681e-05, "loss": 0.4916, "step": 31789 }, { "epoch": 0.8728720483250961, "grad_norm": 0.3626787066459656, "learning_rate": 1.2007949245367432e-05, "loss": 0.4809, "step": 31790 }, { "epoch": 0.8728995057660626, "grad_norm": 0.378290057182312, "learning_rate": 1.2007526146330713e-05, "loss": 0.4274, "step": 31791 }, { "epoch": 0.8729269632070291, "grad_norm": 0.3793698251247406, "learning_rate": 1.200710304354932e-05, "loss": 0.4539, "step": 31792 }, { "epoch": 0.8729544206479956, "grad_norm": 0.3822493851184845, "learning_rate": 1.2006679937024039e-05, "loss": 0.482, "step": 31793 }, { "epoch": 0.8729818780889621, "grad_norm": 0.4084101915359497, "learning_rate": 1.2006256826755658e-05, "loss": 0.4954, "step": 31794 }, { "epoch": 0.8730093355299287, "grad_norm": 0.385445773601532, "learning_rate": 1.2005833712744967e-05, "loss": 0.3683, "step": 31795 }, { "epoch": 0.8730367929708951, "grad_norm": 0.5624918937683105, "learning_rate": 1.2005410594992757e-05, "loss": 0.4051, "step": 31796 }, { "epoch": 0.8730642504118616, "grad_norm": 0.40518176555633545, "learning_rate": 1.2004987473499819e-05, "loss": 0.5506, "step": 31797 }, { "epoch": 0.8730917078528281, "grad_norm": 0.37118038535118103, "learning_rate": 1.2004564348266936e-05, "loss": 0.389, "step": 31798 }, { "epoch": 0.8731191652937946, "grad_norm": 0.38809269666671753, "learning_rate": 1.20041412192949e-05, "loss": 0.5495, "step": 31799 }, { "epoch": 0.8731466227347611, "grad_norm": 0.516579270362854, "learning_rate": 1.2003718086584503e-05, "loss": 0.5189, "step": 31800 }, { "epoch": 0.8731740801757276, "grad_norm": 0.40232235193252563, "learning_rate": 1.2003294950136533e-05, "loss": 0.5378, "step": 31801 }, { "epoch": 0.8732015376166942, "grad_norm": 0.4514601230621338, "learning_rate": 1.2002871809951777e-05, "loss": 0.5082, "step": 31802 }, { "epoch": 0.8732289950576606, "grad_norm": 0.3800548017024994, "learning_rate": 1.2002448666031026e-05, "loss": 0.5034, "step": 31803 }, { "epoch": 0.8732564524986272, "grad_norm": 0.4707348644733429, "learning_rate": 1.200202551837507e-05, "loss": 0.4754, "step": 31804 }, { "epoch": 0.8732839099395936, "grad_norm": 0.4020514190196991, "learning_rate": 1.2001602366984698e-05, "loss": 0.4607, "step": 31805 }, { "epoch": 0.8733113673805601, "grad_norm": 0.44606342911720276, "learning_rate": 1.2001179211860696e-05, "loss": 0.4007, "step": 31806 }, { "epoch": 0.8733388248215266, "grad_norm": 0.43890783190727234, "learning_rate": 1.200075605300386e-05, "loss": 0.55, "step": 31807 }, { "epoch": 0.8733662822624931, "grad_norm": 0.48364341259002686, "learning_rate": 1.200033289041497e-05, "loss": 0.5266, "step": 31808 }, { "epoch": 0.8733937397034597, "grad_norm": 0.40394335985183716, "learning_rate": 1.1999909724094826e-05, "loss": 0.429, "step": 31809 }, { "epoch": 0.8734211971444261, "grad_norm": 0.4339142441749573, "learning_rate": 1.1999486554044208e-05, "loss": 0.5589, "step": 31810 }, { "epoch": 0.8734486545853927, "grad_norm": 0.3961137533187866, "learning_rate": 1.199906338026391e-05, "loss": 0.5134, "step": 31811 }, { "epoch": 0.8734761120263591, "grad_norm": 0.385503351688385, "learning_rate": 1.1998640202754724e-05, "loss": 0.4957, "step": 31812 }, { "epoch": 0.8735035694673257, "grad_norm": 0.39601874351501465, "learning_rate": 1.1998217021517435e-05, "loss": 0.5431, "step": 31813 }, { "epoch": 0.8735310269082921, "grad_norm": 0.42424455285072327, "learning_rate": 1.1997793836552832e-05, "loss": 0.4573, "step": 31814 }, { "epoch": 0.8735584843492586, "grad_norm": 0.40475431084632874, "learning_rate": 1.1997370647861706e-05, "loss": 0.5796, "step": 31815 }, { "epoch": 0.8735859417902252, "grad_norm": 0.4121479094028473, "learning_rate": 1.1996947455444846e-05, "loss": 0.5259, "step": 31816 }, { "epoch": 0.8736133992311916, "grad_norm": 0.437285453081131, "learning_rate": 1.1996524259303043e-05, "loss": 0.5095, "step": 31817 }, { "epoch": 0.8736408566721582, "grad_norm": 0.37723395228385925, "learning_rate": 1.1996101059437086e-05, "loss": 0.4836, "step": 31818 }, { "epoch": 0.8736683141131246, "grad_norm": 0.4225607216358185, "learning_rate": 1.199567785584776e-05, "loss": 0.5352, "step": 31819 }, { "epoch": 0.8736957715540912, "grad_norm": 0.3953564465045929, "learning_rate": 1.1995254648535861e-05, "loss": 0.5044, "step": 31820 }, { "epoch": 0.8737232289950576, "grad_norm": 0.4088812470436096, "learning_rate": 1.1994831437502172e-05, "loss": 0.5352, "step": 31821 }, { "epoch": 0.8737506864360242, "grad_norm": 0.42399170994758606, "learning_rate": 1.1994408222747488e-05, "loss": 0.5104, "step": 31822 }, { "epoch": 0.8737781438769907, "grad_norm": 0.42973560094833374, "learning_rate": 1.1993985004272596e-05, "loss": 0.4954, "step": 31823 }, { "epoch": 0.8738056013179571, "grad_norm": 0.39414912462234497, "learning_rate": 1.1993561782078286e-05, "loss": 0.5415, "step": 31824 }, { "epoch": 0.8738330587589237, "grad_norm": 0.353997141122818, "learning_rate": 1.1993138556165346e-05, "loss": 0.4179, "step": 31825 }, { "epoch": 0.8738605161998901, "grad_norm": 0.5182406306266785, "learning_rate": 1.1992715326534568e-05, "loss": 0.4721, "step": 31826 }, { "epoch": 0.8738879736408567, "grad_norm": 0.4468846321105957, "learning_rate": 1.199229209318674e-05, "loss": 0.5967, "step": 31827 }, { "epoch": 0.8739154310818231, "grad_norm": 0.4158179759979248, "learning_rate": 1.1991868856122651e-05, "loss": 0.4877, "step": 31828 }, { "epoch": 0.8739428885227897, "grad_norm": 0.34615716338157654, "learning_rate": 1.1991445615343087e-05, "loss": 0.4801, "step": 31829 }, { "epoch": 0.8739703459637562, "grad_norm": 0.49149075150489807, "learning_rate": 1.1991022370848845e-05, "loss": 0.5378, "step": 31830 }, { "epoch": 0.8739978034047227, "grad_norm": 0.5359712243080139, "learning_rate": 1.1990599122640712e-05, "loss": 0.4586, "step": 31831 }, { "epoch": 0.8740252608456892, "grad_norm": 0.3539796471595764, "learning_rate": 1.1990175870719472e-05, "loss": 0.5256, "step": 31832 }, { "epoch": 0.8740527182866556, "grad_norm": 0.378316193819046, "learning_rate": 1.1989752615085923e-05, "loss": 0.501, "step": 31833 }, { "epoch": 0.8740801757276222, "grad_norm": 0.42206141352653503, "learning_rate": 1.1989329355740846e-05, "loss": 0.4576, "step": 31834 }, { "epoch": 0.8741076331685886, "grad_norm": 0.5431830883026123, "learning_rate": 1.198890609268504e-05, "loss": 0.573, "step": 31835 }, { "epoch": 0.8741350906095552, "grad_norm": 0.39128991961479187, "learning_rate": 1.1988482825919287e-05, "loss": 0.579, "step": 31836 }, { "epoch": 0.8741625480505217, "grad_norm": 0.437341570854187, "learning_rate": 1.1988059555444378e-05, "loss": 0.4741, "step": 31837 }, { "epoch": 0.8741900054914882, "grad_norm": 0.4063326418399811, "learning_rate": 1.1987636281261104e-05, "loss": 0.5454, "step": 31838 }, { "epoch": 0.8742174629324547, "grad_norm": 0.3886168301105499, "learning_rate": 1.1987213003370253e-05, "loss": 0.5148, "step": 31839 }, { "epoch": 0.8742449203734212, "grad_norm": 0.3732737600803375, "learning_rate": 1.1986789721772617e-05, "loss": 0.5207, "step": 31840 }, { "epoch": 0.8742723778143877, "grad_norm": 0.4126232862472534, "learning_rate": 1.1986366436468986e-05, "loss": 0.5164, "step": 31841 }, { "epoch": 0.8742998352553542, "grad_norm": 0.3686927556991577, "learning_rate": 1.1985943147460141e-05, "loss": 0.4398, "step": 31842 }, { "epoch": 0.8743272926963207, "grad_norm": 0.4349934458732605, "learning_rate": 1.1985519854746886e-05, "loss": 0.4949, "step": 31843 }, { "epoch": 0.8743547501372873, "grad_norm": 0.4629543423652649, "learning_rate": 1.1985096558329998e-05, "loss": 0.4784, "step": 31844 }, { "epoch": 0.8743822075782537, "grad_norm": 0.3778669834136963, "learning_rate": 1.198467325821027e-05, "loss": 0.5128, "step": 31845 }, { "epoch": 0.8744096650192202, "grad_norm": 0.40083444118499756, "learning_rate": 1.1984249954388499e-05, "loss": 0.4755, "step": 31846 }, { "epoch": 0.8744371224601867, "grad_norm": 0.3765396475791931, "learning_rate": 1.1983826646865463e-05, "loss": 0.467, "step": 31847 }, { "epoch": 0.8744645799011532, "grad_norm": 0.4069221615791321, "learning_rate": 1.1983403335641961e-05, "loss": 0.5675, "step": 31848 }, { "epoch": 0.8744920373421197, "grad_norm": 0.6716517210006714, "learning_rate": 1.1982980020718777e-05, "loss": 0.508, "step": 31849 }, { "epoch": 0.8745194947830862, "grad_norm": 0.359580934047699, "learning_rate": 1.1982556702096702e-05, "loss": 0.4604, "step": 31850 }, { "epoch": 0.8745469522240528, "grad_norm": 0.4504145681858063, "learning_rate": 1.1982133379776526e-05, "loss": 0.4924, "step": 31851 }, { "epoch": 0.8745744096650192, "grad_norm": 0.38685372471809387, "learning_rate": 1.1981710053759036e-05, "loss": 0.5172, "step": 31852 }, { "epoch": 0.8746018671059858, "grad_norm": 0.39003244042396545, "learning_rate": 1.1981286724045029e-05, "loss": 0.5653, "step": 31853 }, { "epoch": 0.8746293245469522, "grad_norm": 0.4088445007801056, "learning_rate": 1.198086339063529e-05, "loss": 0.4287, "step": 31854 }, { "epoch": 0.8746567819879187, "grad_norm": 0.4351009130477905, "learning_rate": 1.1980440053530604e-05, "loss": 0.4778, "step": 31855 }, { "epoch": 0.8746842394288852, "grad_norm": 0.4301143288612366, "learning_rate": 1.198001671273177e-05, "loss": 0.4576, "step": 31856 }, { "epoch": 0.8747116968698517, "grad_norm": 0.3633374273777008, "learning_rate": 1.1979593368239568e-05, "loss": 0.4628, "step": 31857 }, { "epoch": 0.8747391543108183, "grad_norm": 0.3858073651790619, "learning_rate": 1.1979170020054798e-05, "loss": 0.4506, "step": 31858 }, { "epoch": 0.8747666117517847, "grad_norm": 0.3935641050338745, "learning_rate": 1.197874666817824e-05, "loss": 0.5347, "step": 31859 }, { "epoch": 0.8747940691927513, "grad_norm": 0.39750561118125916, "learning_rate": 1.1978323312610689e-05, "loss": 0.5519, "step": 31860 }, { "epoch": 0.8748215266337177, "grad_norm": 0.4369578957557678, "learning_rate": 1.1977899953352936e-05, "loss": 0.5132, "step": 31861 }, { "epoch": 0.8748489840746843, "grad_norm": 0.41695836186408997, "learning_rate": 1.1977476590405765e-05, "loss": 0.4952, "step": 31862 }, { "epoch": 0.8748764415156507, "grad_norm": 0.3689553439617157, "learning_rate": 1.197705322376997e-05, "loss": 0.54, "step": 31863 }, { "epoch": 0.8749038989566172, "grad_norm": 0.37761154770851135, "learning_rate": 1.1976629853446343e-05, "loss": 0.4975, "step": 31864 }, { "epoch": 0.8749313563975838, "grad_norm": 0.362316370010376, "learning_rate": 1.1976206479435666e-05, "loss": 0.4671, "step": 31865 }, { "epoch": 0.8749588138385502, "grad_norm": 0.37033727765083313, "learning_rate": 1.1975783101738733e-05, "loss": 0.4277, "step": 31866 }, { "epoch": 0.8749862712795168, "grad_norm": 0.40566325187683105, "learning_rate": 1.1975359720356336e-05, "loss": 0.4495, "step": 31867 }, { "epoch": 0.8750137287204832, "grad_norm": 0.360006719827652, "learning_rate": 1.1974936335289262e-05, "loss": 0.5112, "step": 31868 }, { "epoch": 0.8750411861614498, "grad_norm": 0.4962717890739441, "learning_rate": 1.1974512946538303e-05, "loss": 0.4344, "step": 31869 }, { "epoch": 0.8750686436024162, "grad_norm": 0.4417881369590759, "learning_rate": 1.1974089554104244e-05, "loss": 0.5199, "step": 31870 }, { "epoch": 0.8750961010433828, "grad_norm": 0.4432356357574463, "learning_rate": 1.1973666157987883e-05, "loss": 0.5189, "step": 31871 }, { "epoch": 0.8751235584843493, "grad_norm": 0.3841994106769562, "learning_rate": 1.197324275819e-05, "loss": 0.5048, "step": 31872 }, { "epoch": 0.8751510159253157, "grad_norm": 0.44454407691955566, "learning_rate": 1.1972819354711391e-05, "loss": 0.5119, "step": 31873 }, { "epoch": 0.8751784733662823, "grad_norm": 0.37630370259284973, "learning_rate": 1.1972395947552845e-05, "loss": 0.4179, "step": 31874 }, { "epoch": 0.8752059308072487, "grad_norm": 0.5090975165367126, "learning_rate": 1.1971972536715151e-05, "loss": 0.5399, "step": 31875 }, { "epoch": 0.8752333882482153, "grad_norm": 0.380250483751297, "learning_rate": 1.1971549122199098e-05, "loss": 0.4997, "step": 31876 }, { "epoch": 0.8752608456891817, "grad_norm": 0.3578014671802521, "learning_rate": 1.1971125704005476e-05, "loss": 0.5191, "step": 31877 }, { "epoch": 0.8752883031301483, "grad_norm": 0.34112077951431274, "learning_rate": 1.1970702282135076e-05, "loss": 0.4376, "step": 31878 }, { "epoch": 0.8753157605711148, "grad_norm": 0.41724973917007446, "learning_rate": 1.1970278856588688e-05, "loss": 0.5152, "step": 31879 }, { "epoch": 0.8753432180120813, "grad_norm": 0.43110817670822144, "learning_rate": 1.1969855427367101e-05, "loss": 0.5546, "step": 31880 }, { "epoch": 0.8753706754530478, "grad_norm": 0.3579998314380646, "learning_rate": 1.1969431994471103e-05, "loss": 0.4564, "step": 31881 }, { "epoch": 0.8753981328940142, "grad_norm": 0.390323668718338, "learning_rate": 1.1969008557901488e-05, "loss": 0.5075, "step": 31882 }, { "epoch": 0.8754255903349808, "grad_norm": 0.4457523822784424, "learning_rate": 1.1968585117659041e-05, "loss": 0.4842, "step": 31883 }, { "epoch": 0.8754530477759472, "grad_norm": 0.35011765360832214, "learning_rate": 1.1968161673744558e-05, "loss": 0.4315, "step": 31884 }, { "epoch": 0.8754805052169138, "grad_norm": 0.3927701711654663, "learning_rate": 1.1967738226158825e-05, "loss": 0.4838, "step": 31885 }, { "epoch": 0.8755079626578803, "grad_norm": 0.40069547295570374, "learning_rate": 1.196731477490263e-05, "loss": 0.5077, "step": 31886 }, { "epoch": 0.8755354200988468, "grad_norm": 0.3680017590522766, "learning_rate": 1.1966891319976766e-05, "loss": 0.4628, "step": 31887 }, { "epoch": 0.8755628775398133, "grad_norm": 0.4282234013080597, "learning_rate": 1.196646786138202e-05, "loss": 0.4738, "step": 31888 }, { "epoch": 0.8755903349807798, "grad_norm": 0.40885573625564575, "learning_rate": 1.1966044399119186e-05, "loss": 0.6013, "step": 31889 }, { "epoch": 0.8756177924217463, "grad_norm": 0.4058675467967987, "learning_rate": 1.1965620933189053e-05, "loss": 0.4697, "step": 31890 }, { "epoch": 0.8756452498627127, "grad_norm": 0.36844050884246826, "learning_rate": 1.1965197463592406e-05, "loss": 0.4418, "step": 31891 }, { "epoch": 0.8756727073036793, "grad_norm": 0.3595139682292938, "learning_rate": 1.1964773990330041e-05, "loss": 0.5022, "step": 31892 }, { "epoch": 0.8757001647446458, "grad_norm": 0.4897362291812897, "learning_rate": 1.1964350513402748e-05, "loss": 0.5345, "step": 31893 }, { "epoch": 0.8757276221856123, "grad_norm": 0.3723672032356262, "learning_rate": 1.1963927032811308e-05, "loss": 0.4599, "step": 31894 }, { "epoch": 0.8757550796265788, "grad_norm": 0.42442867159843445, "learning_rate": 1.196350354855652e-05, "loss": 0.6202, "step": 31895 }, { "epoch": 0.8757825370675453, "grad_norm": 0.41181549429893494, "learning_rate": 1.1963080060639171e-05, "loss": 0.556, "step": 31896 }, { "epoch": 0.8758099945085118, "grad_norm": 0.3460538983345032, "learning_rate": 1.1962656569060054e-05, "loss": 0.4509, "step": 31897 }, { "epoch": 0.8758374519494783, "grad_norm": 0.36512717604637146, "learning_rate": 1.1962233073819954e-05, "loss": 0.4704, "step": 31898 }, { "epoch": 0.8758649093904448, "grad_norm": 0.41690772771835327, "learning_rate": 1.1961809574919663e-05, "loss": 0.4956, "step": 31899 }, { "epoch": 0.8758923668314114, "grad_norm": 0.3404284119606018, "learning_rate": 1.196138607235997e-05, "loss": 0.4156, "step": 31900 }, { "epoch": 0.8759198242723778, "grad_norm": 0.35257238149642944, "learning_rate": 1.1960962566141667e-05, "loss": 0.5304, "step": 31901 }, { "epoch": 0.8759472817133444, "grad_norm": 0.4191863238811493, "learning_rate": 1.1960539056265543e-05, "loss": 0.4897, "step": 31902 }, { "epoch": 0.8759747391543108, "grad_norm": 0.385280579328537, "learning_rate": 1.1960115542732389e-05, "loss": 0.4782, "step": 31903 }, { "epoch": 0.8760021965952773, "grad_norm": 0.34118834137916565, "learning_rate": 1.195969202554299e-05, "loss": 0.481, "step": 31904 }, { "epoch": 0.8760296540362438, "grad_norm": 0.344910204410553, "learning_rate": 1.1959268504698144e-05, "loss": 0.39, "step": 31905 }, { "epoch": 0.8760571114772103, "grad_norm": 0.5103614926338196, "learning_rate": 1.1958844980198636e-05, "loss": 0.4709, "step": 31906 }, { "epoch": 0.8760845689181769, "grad_norm": 0.4079245626926422, "learning_rate": 1.1958421452045256e-05, "loss": 0.5053, "step": 31907 }, { "epoch": 0.8761120263591433, "grad_norm": 0.35297486186027527, "learning_rate": 1.1957997920238794e-05, "loss": 0.4438, "step": 31908 }, { "epoch": 0.8761394838001099, "grad_norm": 0.4198410212993622, "learning_rate": 1.1957574384780044e-05, "loss": 0.4567, "step": 31909 }, { "epoch": 0.8761669412410763, "grad_norm": 0.4119419753551483, "learning_rate": 1.1957150845669791e-05, "loss": 0.5006, "step": 31910 }, { "epoch": 0.8761943986820429, "grad_norm": 0.40050578117370605, "learning_rate": 1.1956727302908829e-05, "loss": 0.5428, "step": 31911 }, { "epoch": 0.8762218561230093, "grad_norm": 0.40592026710510254, "learning_rate": 1.1956303756497942e-05, "loss": 0.4595, "step": 31912 }, { "epoch": 0.8762493135639758, "grad_norm": 0.49376603960990906, "learning_rate": 1.1955880206437928e-05, "loss": 0.5527, "step": 31913 }, { "epoch": 0.8762767710049424, "grad_norm": 0.4548511505126953, "learning_rate": 1.1955456652729572e-05, "loss": 0.5078, "step": 31914 }, { "epoch": 0.8763042284459088, "grad_norm": 0.3654191792011261, "learning_rate": 1.1955033095373665e-05, "loss": 0.4309, "step": 31915 }, { "epoch": 0.8763316858868754, "grad_norm": 0.3512386381626129, "learning_rate": 1.1954609534370998e-05, "loss": 0.5061, "step": 31916 }, { "epoch": 0.8763591433278418, "grad_norm": 0.39778560400009155, "learning_rate": 1.1954185969722358e-05, "loss": 0.4544, "step": 31917 }, { "epoch": 0.8763866007688084, "grad_norm": 0.37687769532203674, "learning_rate": 1.195376240142854e-05, "loss": 0.4895, "step": 31918 }, { "epoch": 0.8764140582097748, "grad_norm": 0.41235029697418213, "learning_rate": 1.195333882949033e-05, "loss": 0.4582, "step": 31919 }, { "epoch": 0.8764415156507414, "grad_norm": 0.42908975481987, "learning_rate": 1.1952915253908522e-05, "loss": 0.5547, "step": 31920 }, { "epoch": 0.8764689730917079, "grad_norm": 0.4723831117153168, "learning_rate": 1.19524916746839e-05, "loss": 0.4697, "step": 31921 }, { "epoch": 0.8764964305326743, "grad_norm": 0.37824997305870056, "learning_rate": 1.195206809181726e-05, "loss": 0.4845, "step": 31922 }, { "epoch": 0.8765238879736409, "grad_norm": 0.3811679482460022, "learning_rate": 1.195164450530939e-05, "loss": 0.4681, "step": 31923 }, { "epoch": 0.8765513454146073, "grad_norm": 0.42048579454421997, "learning_rate": 1.1951220915161081e-05, "loss": 0.5278, "step": 31924 }, { "epoch": 0.8765788028555739, "grad_norm": 0.3896344304084778, "learning_rate": 1.195079732137312e-05, "loss": 0.4346, "step": 31925 }, { "epoch": 0.8766062602965403, "grad_norm": 0.4703982472419739, "learning_rate": 1.1950373723946303e-05, "loss": 0.506, "step": 31926 }, { "epoch": 0.8766337177375069, "grad_norm": 1.3505984544754028, "learning_rate": 1.1949950122881413e-05, "loss": 0.5346, "step": 31927 }, { "epoch": 0.8766611751784734, "grad_norm": 0.3653607964515686, "learning_rate": 1.1949526518179244e-05, "loss": 0.4245, "step": 31928 }, { "epoch": 0.8766886326194399, "grad_norm": 0.4055114686489105, "learning_rate": 1.1949102909840589e-05, "loss": 0.5356, "step": 31929 }, { "epoch": 0.8767160900604064, "grad_norm": 0.4203973412513733, "learning_rate": 1.194867929786623e-05, "loss": 0.4289, "step": 31930 }, { "epoch": 0.8767435475013728, "grad_norm": 0.5237071514129639, "learning_rate": 1.1948255682256966e-05, "loss": 0.4813, "step": 31931 }, { "epoch": 0.8767710049423394, "grad_norm": 0.3563280403614044, "learning_rate": 1.194783206301358e-05, "loss": 0.5408, "step": 31932 }, { "epoch": 0.8767984623833058, "grad_norm": 0.39450258016586304, "learning_rate": 1.194740844013687e-05, "loss": 0.446, "step": 31933 }, { "epoch": 0.8768259198242724, "grad_norm": 0.35435187816619873, "learning_rate": 1.194698481362762e-05, "loss": 0.4901, "step": 31934 }, { "epoch": 0.8768533772652389, "grad_norm": 0.3812824785709381, "learning_rate": 1.194656118348662e-05, "loss": 0.5197, "step": 31935 }, { "epoch": 0.8768808347062054, "grad_norm": 0.3906446695327759, "learning_rate": 1.1946137549714662e-05, "loss": 0.4534, "step": 31936 }, { "epoch": 0.8769082921471719, "grad_norm": 0.3830597698688507, "learning_rate": 1.194571391231254e-05, "loss": 0.4638, "step": 31937 }, { "epoch": 0.8769357495881384, "grad_norm": 0.3929833173751831, "learning_rate": 1.1945290271281038e-05, "loss": 0.5269, "step": 31938 }, { "epoch": 0.8769632070291049, "grad_norm": 0.3906625807285309, "learning_rate": 1.194486662662095e-05, "loss": 0.5639, "step": 31939 }, { "epoch": 0.8769906644700713, "grad_norm": 0.35075417160987854, "learning_rate": 1.1944442978333061e-05, "loss": 0.4676, "step": 31940 }, { "epoch": 0.8770181219110379, "grad_norm": 0.41575199365615845, "learning_rate": 1.194401932641817e-05, "loss": 0.5629, "step": 31941 }, { "epoch": 0.8770455793520044, "grad_norm": 0.4342697262763977, "learning_rate": 1.1943595670877062e-05, "loss": 0.5081, "step": 31942 }, { "epoch": 0.8770730367929709, "grad_norm": 0.4022100567817688, "learning_rate": 1.1943172011710525e-05, "loss": 0.5192, "step": 31943 }, { "epoch": 0.8771004942339374, "grad_norm": 0.4607600271701813, "learning_rate": 1.1942748348919356e-05, "loss": 0.5565, "step": 31944 }, { "epoch": 0.8771279516749039, "grad_norm": 0.4278750717639923, "learning_rate": 1.1942324682504336e-05, "loss": 0.5147, "step": 31945 }, { "epoch": 0.8771554091158704, "grad_norm": 0.37528759241104126, "learning_rate": 1.1941901012466264e-05, "loss": 0.4868, "step": 31946 }, { "epoch": 0.8771828665568369, "grad_norm": 0.3850431740283966, "learning_rate": 1.1941477338805926e-05, "loss": 0.5302, "step": 31947 }, { "epoch": 0.8772103239978034, "grad_norm": 0.3976559042930603, "learning_rate": 1.1941053661524114e-05, "loss": 0.4954, "step": 31948 }, { "epoch": 0.87723778143877, "grad_norm": 0.7980462312698364, "learning_rate": 1.1940629980621617e-05, "loss": 0.547, "step": 31949 }, { "epoch": 0.8772652388797364, "grad_norm": 0.4159824252128601, "learning_rate": 1.1940206296099224e-05, "loss": 0.4845, "step": 31950 }, { "epoch": 0.877292696320703, "grad_norm": 0.4210914969444275, "learning_rate": 1.1939782607957731e-05, "loss": 0.5759, "step": 31951 }, { "epoch": 0.8773201537616694, "grad_norm": 0.491599440574646, "learning_rate": 1.193935891619792e-05, "loss": 0.6394, "step": 31952 }, { "epoch": 0.8773476112026359, "grad_norm": 0.38474002480506897, "learning_rate": 1.1938935220820587e-05, "loss": 0.5074, "step": 31953 }, { "epoch": 0.8773750686436024, "grad_norm": 0.5116145014762878, "learning_rate": 1.1938511521826524e-05, "loss": 0.4148, "step": 31954 }, { "epoch": 0.8774025260845689, "grad_norm": 0.37616270780563354, "learning_rate": 1.1938087819216514e-05, "loss": 0.5131, "step": 31955 }, { "epoch": 0.8774299835255355, "grad_norm": 0.42467406392097473, "learning_rate": 1.1937664112991353e-05, "loss": 0.6235, "step": 31956 }, { "epoch": 0.8774574409665019, "grad_norm": 0.36354827880859375, "learning_rate": 1.1937240403151832e-05, "loss": 0.552, "step": 31957 }, { "epoch": 0.8774848984074685, "grad_norm": 0.4360863268375397, "learning_rate": 1.1936816689698737e-05, "loss": 0.6094, "step": 31958 }, { "epoch": 0.8775123558484349, "grad_norm": 0.3619670569896698, "learning_rate": 1.1936392972632862e-05, "loss": 0.4734, "step": 31959 }, { "epoch": 0.8775398132894014, "grad_norm": 0.44548746943473816, "learning_rate": 1.1935969251954992e-05, "loss": 0.5201, "step": 31960 }, { "epoch": 0.8775672707303679, "grad_norm": 0.35143551230430603, "learning_rate": 1.1935545527665928e-05, "loss": 0.4345, "step": 31961 }, { "epoch": 0.8775947281713344, "grad_norm": 0.3826131224632263, "learning_rate": 1.1935121799766452e-05, "loss": 0.4947, "step": 31962 }, { "epoch": 0.877622185612301, "grad_norm": 0.4370388686656952, "learning_rate": 1.1934698068257353e-05, "loss": 0.5267, "step": 31963 }, { "epoch": 0.8776496430532674, "grad_norm": 0.4482690393924713, "learning_rate": 1.1934274333139428e-05, "loss": 0.5257, "step": 31964 }, { "epoch": 0.877677100494234, "grad_norm": 0.43024206161499023, "learning_rate": 1.193385059441346e-05, "loss": 0.4191, "step": 31965 }, { "epoch": 0.8777045579352004, "grad_norm": 0.4049198627471924, "learning_rate": 1.1933426852080248e-05, "loss": 0.5261, "step": 31966 }, { "epoch": 0.877732015376167, "grad_norm": 0.36380627751350403, "learning_rate": 1.1933003106140574e-05, "loss": 0.424, "step": 31967 }, { "epoch": 0.8777594728171334, "grad_norm": 0.48806729912757874, "learning_rate": 1.1932579356595235e-05, "loss": 0.5013, "step": 31968 }, { "epoch": 0.8777869302581, "grad_norm": 0.3901720941066742, "learning_rate": 1.1932155603445019e-05, "loss": 0.5414, "step": 31969 }, { "epoch": 0.8778143876990665, "grad_norm": 0.3796052634716034, "learning_rate": 1.1931731846690715e-05, "loss": 0.4619, "step": 31970 }, { "epoch": 0.8778418451400329, "grad_norm": 0.47389957308769226, "learning_rate": 1.1931308086333116e-05, "loss": 0.4857, "step": 31971 }, { "epoch": 0.8778693025809995, "grad_norm": 0.45830944180488586, "learning_rate": 1.1930884322373007e-05, "loss": 0.6025, "step": 31972 }, { "epoch": 0.8778967600219659, "grad_norm": 0.3966934084892273, "learning_rate": 1.1930460554811188e-05, "loss": 0.4284, "step": 31973 }, { "epoch": 0.8779242174629325, "grad_norm": 0.40891844034194946, "learning_rate": 1.193003678364844e-05, "loss": 0.4911, "step": 31974 }, { "epoch": 0.8779516749038989, "grad_norm": 0.35183870792388916, "learning_rate": 1.1929613008885556e-05, "loss": 0.4971, "step": 31975 }, { "epoch": 0.8779791323448655, "grad_norm": 0.4106239080429077, "learning_rate": 1.1929189230523333e-05, "loss": 0.5669, "step": 31976 }, { "epoch": 0.878006589785832, "grad_norm": 0.3904968500137329, "learning_rate": 1.1928765448562551e-05, "loss": 0.4856, "step": 31977 }, { "epoch": 0.8780340472267985, "grad_norm": 0.33858057856559753, "learning_rate": 1.192834166300401e-05, "loss": 0.4417, "step": 31978 }, { "epoch": 0.878061504667765, "grad_norm": 0.5419690012931824, "learning_rate": 1.1927917873848495e-05, "loss": 0.5512, "step": 31979 }, { "epoch": 0.8780889621087314, "grad_norm": 0.37872421741485596, "learning_rate": 1.1927494081096797e-05, "loss": 0.4445, "step": 31980 }, { "epoch": 0.878116419549698, "grad_norm": 0.5295234322547913, "learning_rate": 1.192707028474971e-05, "loss": 0.4313, "step": 31981 }, { "epoch": 0.8781438769906644, "grad_norm": 0.3854120969772339, "learning_rate": 1.1926646484808018e-05, "loss": 0.4798, "step": 31982 }, { "epoch": 0.878171334431631, "grad_norm": 0.3849250376224518, "learning_rate": 1.1926222681272519e-05, "loss": 0.4058, "step": 31983 }, { "epoch": 0.8781987918725975, "grad_norm": 0.38627946376800537, "learning_rate": 1.1925798874143997e-05, "loss": 0.4198, "step": 31984 }, { "epoch": 0.878226249313564, "grad_norm": 0.48482489585876465, "learning_rate": 1.1925375063423248e-05, "loss": 0.6001, "step": 31985 }, { "epoch": 0.8782537067545305, "grad_norm": 0.44988325238227844, "learning_rate": 1.192495124911106e-05, "loss": 0.5519, "step": 31986 }, { "epoch": 0.878281164195497, "grad_norm": 0.4350222945213318, "learning_rate": 1.1924527431208218e-05, "loss": 0.523, "step": 31987 }, { "epoch": 0.8783086216364635, "grad_norm": 0.4062343239784241, "learning_rate": 1.1924103609715525e-05, "loss": 0.476, "step": 31988 }, { "epoch": 0.8783360790774299, "grad_norm": 0.4025411903858185, "learning_rate": 1.1923679784633761e-05, "loss": 0.5272, "step": 31989 }, { "epoch": 0.8783635365183965, "grad_norm": 0.3779144883155823, "learning_rate": 1.1923255955963722e-05, "loss": 0.3976, "step": 31990 }, { "epoch": 0.878390993959363, "grad_norm": 0.45494818687438965, "learning_rate": 1.1922832123706197e-05, "loss": 0.5027, "step": 31991 }, { "epoch": 0.8784184514003295, "grad_norm": 0.34485360980033875, "learning_rate": 1.1922408287861973e-05, "loss": 0.4355, "step": 31992 }, { "epoch": 0.878445908841296, "grad_norm": 0.38893452286720276, "learning_rate": 1.192198444843185e-05, "loss": 0.5133, "step": 31993 }, { "epoch": 0.8784733662822625, "grad_norm": 0.3244379162788391, "learning_rate": 1.1921560605416606e-05, "loss": 0.472, "step": 31994 }, { "epoch": 0.878500823723229, "grad_norm": 0.3989643156528473, "learning_rate": 1.1921136758817043e-05, "loss": 0.527, "step": 31995 }, { "epoch": 0.8785282811641955, "grad_norm": 0.3782273828983307, "learning_rate": 1.1920712908633944e-05, "loss": 0.521, "step": 31996 }, { "epoch": 0.878555738605162, "grad_norm": 0.9833260774612427, "learning_rate": 1.1920289054868104e-05, "loss": 0.461, "step": 31997 }, { "epoch": 0.8785831960461286, "grad_norm": 0.40176069736480713, "learning_rate": 1.1919865197520311e-05, "loss": 0.3898, "step": 31998 }, { "epoch": 0.878610653487095, "grad_norm": 0.46752214431762695, "learning_rate": 1.1919441336591358e-05, "loss": 0.5511, "step": 31999 }, { "epoch": 0.8786381109280615, "grad_norm": 0.4041384756565094, "learning_rate": 1.1919017472082033e-05, "loss": 0.5061, "step": 32000 }, { "epoch": 0.878665568369028, "grad_norm": 0.38660281896591187, "learning_rate": 1.1918593603993129e-05, "loss": 0.5524, "step": 32001 }, { "epoch": 0.8786930258099945, "grad_norm": 0.5176632404327393, "learning_rate": 1.1918169732325434e-05, "loss": 0.4718, "step": 32002 }, { "epoch": 0.878720483250961, "grad_norm": 0.40639564394950867, "learning_rate": 1.1917745857079741e-05, "loss": 0.5305, "step": 32003 }, { "epoch": 0.8787479406919275, "grad_norm": 0.35734692215919495, "learning_rate": 1.191732197825684e-05, "loss": 0.3743, "step": 32004 }, { "epoch": 0.878775398132894, "grad_norm": 0.3844582438468933, "learning_rate": 1.1916898095857522e-05, "loss": 0.4675, "step": 32005 }, { "epoch": 0.8788028555738605, "grad_norm": 0.37682437896728516, "learning_rate": 1.1916474209882578e-05, "loss": 0.4173, "step": 32006 }, { "epoch": 0.8788303130148271, "grad_norm": 0.43074601888656616, "learning_rate": 1.1916050320332795e-05, "loss": 0.5054, "step": 32007 }, { "epoch": 0.8788577704557935, "grad_norm": 0.45952683687210083, "learning_rate": 1.1915626427208969e-05, "loss": 0.521, "step": 32008 }, { "epoch": 0.87888522789676, "grad_norm": 0.44202834367752075, "learning_rate": 1.191520253051189e-05, "loss": 0.4322, "step": 32009 }, { "epoch": 0.8789126853377265, "grad_norm": 0.3344026803970337, "learning_rate": 1.1914778630242343e-05, "loss": 0.4618, "step": 32010 }, { "epoch": 0.878940142778693, "grad_norm": 0.3713780343532562, "learning_rate": 1.1914354726401125e-05, "loss": 0.4625, "step": 32011 }, { "epoch": 0.8789676002196595, "grad_norm": 0.37654852867126465, "learning_rate": 1.1913930818989021e-05, "loss": 0.5066, "step": 32012 }, { "epoch": 0.878995057660626, "grad_norm": 0.39898210763931274, "learning_rate": 1.191350690800683e-05, "loss": 0.5013, "step": 32013 }, { "epoch": 0.8790225151015926, "grad_norm": 0.3703964948654175, "learning_rate": 1.1913082993455335e-05, "loss": 0.4914, "step": 32014 }, { "epoch": 0.879049972542559, "grad_norm": 0.35936257243156433, "learning_rate": 1.191265907533533e-05, "loss": 0.4473, "step": 32015 }, { "epoch": 0.8790774299835256, "grad_norm": 0.4618874788284302, "learning_rate": 1.1912235153647606e-05, "loss": 0.5335, "step": 32016 }, { "epoch": 0.879104887424492, "grad_norm": 0.39840376377105713, "learning_rate": 1.1911811228392956e-05, "loss": 0.4734, "step": 32017 }, { "epoch": 0.8791323448654585, "grad_norm": 0.37391307950019836, "learning_rate": 1.1911387299572164e-05, "loss": 0.4965, "step": 32018 }, { "epoch": 0.879159802306425, "grad_norm": 0.4246211647987366, "learning_rate": 1.1910963367186025e-05, "loss": 0.5253, "step": 32019 }, { "epoch": 0.8791872597473915, "grad_norm": 0.36970582604408264, "learning_rate": 1.1910539431235331e-05, "loss": 0.5115, "step": 32020 }, { "epoch": 0.8792147171883581, "grad_norm": 0.3908650279045105, "learning_rate": 1.191011549172087e-05, "loss": 0.4835, "step": 32021 }, { "epoch": 0.8792421746293245, "grad_norm": 0.375203400850296, "learning_rate": 1.1909691548643436e-05, "loss": 0.4093, "step": 32022 }, { "epoch": 0.8792696320702911, "grad_norm": 0.4199284315109253, "learning_rate": 1.1909267602003813e-05, "loss": 0.5706, "step": 32023 }, { "epoch": 0.8792970895112575, "grad_norm": 0.3806227147579193, "learning_rate": 1.1908843651802801e-05, "loss": 0.527, "step": 32024 }, { "epoch": 0.8793245469522241, "grad_norm": 0.33185502886772156, "learning_rate": 1.1908419698041183e-05, "loss": 0.4983, "step": 32025 }, { "epoch": 0.8793520043931905, "grad_norm": 0.43008968234062195, "learning_rate": 1.1907995740719755e-05, "loss": 0.4392, "step": 32026 }, { "epoch": 0.879379461834157, "grad_norm": 0.36459285020828247, "learning_rate": 1.190757177983931e-05, "loss": 0.4528, "step": 32027 }, { "epoch": 0.8794069192751236, "grad_norm": 0.42241615056991577, "learning_rate": 1.1907147815400627e-05, "loss": 0.461, "step": 32028 }, { "epoch": 0.87943437671609, "grad_norm": 0.3753504753112793, "learning_rate": 1.190672384740451e-05, "loss": 0.5343, "step": 32029 }, { "epoch": 0.8794618341570566, "grad_norm": 0.41857773065567017, "learning_rate": 1.1906299875851741e-05, "loss": 0.5097, "step": 32030 }, { "epoch": 0.879489291598023, "grad_norm": 0.34342458844184875, "learning_rate": 1.1905875900743117e-05, "loss": 0.4013, "step": 32031 }, { "epoch": 0.8795167490389896, "grad_norm": 0.3791235387325287, "learning_rate": 1.1905451922079427e-05, "loss": 0.4209, "step": 32032 }, { "epoch": 0.879544206479956, "grad_norm": 0.4058675467967987, "learning_rate": 1.1905027939861457e-05, "loss": 0.5139, "step": 32033 }, { "epoch": 0.8795716639209226, "grad_norm": 0.4845820665359497, "learning_rate": 1.1904603954090007e-05, "loss": 0.4481, "step": 32034 }, { "epoch": 0.8795991213618891, "grad_norm": 0.39102426171302795, "learning_rate": 1.1904179964765861e-05, "loss": 0.4713, "step": 32035 }, { "epoch": 0.8796265788028556, "grad_norm": 0.4286566376686096, "learning_rate": 1.190375597188981e-05, "loss": 0.5997, "step": 32036 }, { "epoch": 0.8796540362438221, "grad_norm": 0.3659784197807312, "learning_rate": 1.1903331975462646e-05, "loss": 0.4866, "step": 32037 }, { "epoch": 0.8796814936847885, "grad_norm": 0.4067437946796417, "learning_rate": 1.190290797548516e-05, "loss": 0.4845, "step": 32038 }, { "epoch": 0.8797089511257551, "grad_norm": 0.4310038983821869, "learning_rate": 1.1902483971958147e-05, "loss": 0.5331, "step": 32039 }, { "epoch": 0.8797364085667215, "grad_norm": 0.3957591950893402, "learning_rate": 1.1902059964882393e-05, "loss": 0.4667, "step": 32040 }, { "epoch": 0.8797638660076881, "grad_norm": 0.46333348751068115, "learning_rate": 1.1901635954258688e-05, "loss": 0.558, "step": 32041 }, { "epoch": 0.8797913234486546, "grad_norm": 0.3651861548423767, "learning_rate": 1.1901211940087826e-05, "loss": 0.4411, "step": 32042 }, { "epoch": 0.8798187808896211, "grad_norm": 0.3846897780895233, "learning_rate": 1.1900787922370596e-05, "loss": 0.4918, "step": 32043 }, { "epoch": 0.8798462383305876, "grad_norm": 0.42736342549324036, "learning_rate": 1.1900363901107792e-05, "loss": 0.6054, "step": 32044 }, { "epoch": 0.879873695771554, "grad_norm": 0.3596585988998413, "learning_rate": 1.1899939876300203e-05, "loss": 0.4639, "step": 32045 }, { "epoch": 0.8799011532125206, "grad_norm": 0.42206114530563354, "learning_rate": 1.1899515847948616e-05, "loss": 0.4657, "step": 32046 }, { "epoch": 0.879928610653487, "grad_norm": 0.408849835395813, "learning_rate": 1.189909181605383e-05, "loss": 0.5279, "step": 32047 }, { "epoch": 0.8799560680944536, "grad_norm": 0.40449389815330505, "learning_rate": 1.1898667780616631e-05, "loss": 0.4696, "step": 32048 }, { "epoch": 0.8799835255354201, "grad_norm": 0.3709248900413513, "learning_rate": 1.1898243741637807e-05, "loss": 0.3951, "step": 32049 }, { "epoch": 0.8800109829763866, "grad_norm": 0.6570876240730286, "learning_rate": 1.1897819699118155e-05, "loss": 0.5076, "step": 32050 }, { "epoch": 0.8800384404173531, "grad_norm": 0.3638741672039032, "learning_rate": 1.1897395653058464e-05, "loss": 0.4618, "step": 32051 }, { "epoch": 0.8800658978583196, "grad_norm": 0.34563493728637695, "learning_rate": 1.1896971603459523e-05, "loss": 0.4946, "step": 32052 }, { "epoch": 0.8800933552992861, "grad_norm": 0.3757338225841522, "learning_rate": 1.1896547550322128e-05, "loss": 0.4954, "step": 32053 }, { "epoch": 0.8801208127402526, "grad_norm": 0.4365825653076172, "learning_rate": 1.1896123493647062e-05, "loss": 0.5067, "step": 32054 }, { "epoch": 0.8801482701812191, "grad_norm": 0.35737791657447815, "learning_rate": 1.1895699433435124e-05, "loss": 0.5241, "step": 32055 }, { "epoch": 0.8801757276221857, "grad_norm": 0.41004377603530884, "learning_rate": 1.18952753696871e-05, "loss": 0.5033, "step": 32056 }, { "epoch": 0.8802031850631521, "grad_norm": 0.43826383352279663, "learning_rate": 1.1894851302403782e-05, "loss": 0.4883, "step": 32057 }, { "epoch": 0.8802306425041186, "grad_norm": 0.3583780825138092, "learning_rate": 1.1894427231585963e-05, "loss": 0.4975, "step": 32058 }, { "epoch": 0.8802580999450851, "grad_norm": 0.4494962692260742, "learning_rate": 1.1894003157234431e-05, "loss": 0.4885, "step": 32059 }, { "epoch": 0.8802855573860516, "grad_norm": 0.3832293152809143, "learning_rate": 1.1893579079349982e-05, "loss": 0.4636, "step": 32060 }, { "epoch": 0.8803130148270181, "grad_norm": 0.37113338708877563, "learning_rate": 1.1893154997933399e-05, "loss": 0.4885, "step": 32061 }, { "epoch": 0.8803404722679846, "grad_norm": 0.4141026437282562, "learning_rate": 1.189273091298548e-05, "loss": 0.4654, "step": 32062 }, { "epoch": 0.8803679297089512, "grad_norm": 0.36419039964675903, "learning_rate": 1.1892306824507013e-05, "loss": 0.4776, "step": 32063 }, { "epoch": 0.8803953871499176, "grad_norm": 0.39587879180908203, "learning_rate": 1.189188273249879e-05, "loss": 0.5302, "step": 32064 }, { "epoch": 0.8804228445908842, "grad_norm": 0.3792327642440796, "learning_rate": 1.1891458636961603e-05, "loss": 0.4742, "step": 32065 }, { "epoch": 0.8804503020318506, "grad_norm": 0.3839843273162842, "learning_rate": 1.1891034537896242e-05, "loss": 0.4027, "step": 32066 }, { "epoch": 0.8804777594728171, "grad_norm": 0.3863787353038788, "learning_rate": 1.1890610435303498e-05, "loss": 0.5643, "step": 32067 }, { "epoch": 0.8805052169137836, "grad_norm": 0.4238605797290802, "learning_rate": 1.1890186329184161e-05, "loss": 0.4596, "step": 32068 }, { "epoch": 0.8805326743547501, "grad_norm": 0.35735079646110535, "learning_rate": 1.1889762219539022e-05, "loss": 0.4599, "step": 32069 }, { "epoch": 0.8805601317957167, "grad_norm": 0.38202327489852905, "learning_rate": 1.1889338106368875e-05, "loss": 0.4879, "step": 32070 }, { "epoch": 0.8805875892366831, "grad_norm": 0.46469634771347046, "learning_rate": 1.1888913989674511e-05, "loss": 0.4428, "step": 32071 }, { "epoch": 0.8806150466776497, "grad_norm": 0.3900083601474762, "learning_rate": 1.1888489869456716e-05, "loss": 0.5008, "step": 32072 }, { "epoch": 0.8806425041186161, "grad_norm": 0.38179516792297363, "learning_rate": 1.1888065745716289e-05, "loss": 0.4236, "step": 32073 }, { "epoch": 0.8806699615595827, "grad_norm": 0.4124305248260498, "learning_rate": 1.1887641618454012e-05, "loss": 0.5585, "step": 32074 }, { "epoch": 0.8806974190005491, "grad_norm": 0.3727075457572937, "learning_rate": 1.1887217487670683e-05, "loss": 0.4271, "step": 32075 }, { "epoch": 0.8807248764415156, "grad_norm": 0.38836339116096497, "learning_rate": 1.1886793353367094e-05, "loss": 0.4441, "step": 32076 }, { "epoch": 0.8807523338824822, "grad_norm": 0.3936639428138733, "learning_rate": 1.1886369215544029e-05, "loss": 0.5089, "step": 32077 }, { "epoch": 0.8807797913234486, "grad_norm": 0.3877533972263336, "learning_rate": 1.1885945074202286e-05, "loss": 0.4874, "step": 32078 }, { "epoch": 0.8808072487644152, "grad_norm": 0.3711925446987152, "learning_rate": 1.1885520929342654e-05, "loss": 0.4337, "step": 32079 }, { "epoch": 0.8808347062053816, "grad_norm": 0.41186392307281494, "learning_rate": 1.1885096780965919e-05, "loss": 0.4979, "step": 32080 }, { "epoch": 0.8808621636463482, "grad_norm": 0.37694042921066284, "learning_rate": 1.1884672629072883e-05, "loss": 0.5182, "step": 32081 }, { "epoch": 0.8808896210873146, "grad_norm": 0.3601510524749756, "learning_rate": 1.1884248473664326e-05, "loss": 0.5146, "step": 32082 }, { "epoch": 0.8809170785282812, "grad_norm": 0.6628205180168152, "learning_rate": 1.1883824314741047e-05, "loss": 0.4497, "step": 32083 }, { "epoch": 0.8809445359692477, "grad_norm": 0.4313538670539856, "learning_rate": 1.1883400152303833e-05, "loss": 0.5152, "step": 32084 }, { "epoch": 0.8809719934102141, "grad_norm": 0.38718125224113464, "learning_rate": 1.1882975986353475e-05, "loss": 0.4343, "step": 32085 }, { "epoch": 0.8809994508511807, "grad_norm": 0.46678808331489563, "learning_rate": 1.1882551816890771e-05, "loss": 0.5072, "step": 32086 }, { "epoch": 0.8810269082921471, "grad_norm": 0.3905032277107239, "learning_rate": 1.1882127643916501e-05, "loss": 0.4806, "step": 32087 }, { "epoch": 0.8810543657331137, "grad_norm": 0.38409483432769775, "learning_rate": 1.1881703467431468e-05, "loss": 0.439, "step": 32088 }, { "epoch": 0.8810818231740801, "grad_norm": 0.415054589509964, "learning_rate": 1.1881279287436453e-05, "loss": 0.5077, "step": 32089 }, { "epoch": 0.8811092806150467, "grad_norm": 0.41051170229911804, "learning_rate": 1.1880855103932254e-05, "loss": 0.4857, "step": 32090 }, { "epoch": 0.8811367380560132, "grad_norm": 0.39251744747161865, "learning_rate": 1.1880430916919658e-05, "loss": 0.4663, "step": 32091 }, { "epoch": 0.8811641954969797, "grad_norm": 0.9403777122497559, "learning_rate": 1.188000672639946e-05, "loss": 0.5023, "step": 32092 }, { "epoch": 0.8811916529379462, "grad_norm": 0.3428557515144348, "learning_rate": 1.1879582532372448e-05, "loss": 0.4217, "step": 32093 }, { "epoch": 0.8812191103789127, "grad_norm": 0.3441402316093445, "learning_rate": 1.1879158334839419e-05, "loss": 0.4689, "step": 32094 }, { "epoch": 0.8812465678198792, "grad_norm": 0.36560502648353577, "learning_rate": 1.1878734133801153e-05, "loss": 0.4158, "step": 32095 }, { "epoch": 0.8812740252608456, "grad_norm": 0.4121265411376953, "learning_rate": 1.1878309929258453e-05, "loss": 0.5806, "step": 32096 }, { "epoch": 0.8813014827018122, "grad_norm": 0.32833656668663025, "learning_rate": 1.1877885721212106e-05, "loss": 0.3955, "step": 32097 }, { "epoch": 0.8813289401427787, "grad_norm": 0.3870598375797272, "learning_rate": 1.18774615096629e-05, "loss": 0.5002, "step": 32098 }, { "epoch": 0.8813563975837452, "grad_norm": 0.4454132318496704, "learning_rate": 1.1877037294611631e-05, "loss": 0.4773, "step": 32099 }, { "epoch": 0.8813838550247117, "grad_norm": 0.4635816216468811, "learning_rate": 1.1876613076059084e-05, "loss": 0.5264, "step": 32100 }, { "epoch": 0.8814113124656782, "grad_norm": 0.40398865938186646, "learning_rate": 1.1876188854006062e-05, "loss": 0.5328, "step": 32101 }, { "epoch": 0.8814387699066447, "grad_norm": 0.43785589933395386, "learning_rate": 1.1875764628453343e-05, "loss": 0.4867, "step": 32102 }, { "epoch": 0.8814662273476112, "grad_norm": 0.4817052483558655, "learning_rate": 1.1875340399401728e-05, "loss": 0.5601, "step": 32103 }, { "epoch": 0.8814936847885777, "grad_norm": 0.46695569157600403, "learning_rate": 1.1874916166852002e-05, "loss": 0.5405, "step": 32104 }, { "epoch": 0.8815211422295443, "grad_norm": 0.4183516502380371, "learning_rate": 1.187449193080496e-05, "loss": 0.5866, "step": 32105 }, { "epoch": 0.8815485996705107, "grad_norm": 0.38090187311172485, "learning_rate": 1.1874067691261393e-05, "loss": 0.596, "step": 32106 }, { "epoch": 0.8815760571114772, "grad_norm": 0.40867894887924194, "learning_rate": 1.1873643448222092e-05, "loss": 0.4901, "step": 32107 }, { "epoch": 0.8816035145524437, "grad_norm": 0.3976020812988281, "learning_rate": 1.1873219201687844e-05, "loss": 0.4582, "step": 32108 }, { "epoch": 0.8816309719934102, "grad_norm": 0.43231889605522156, "learning_rate": 1.1872794951659447e-05, "loss": 0.4598, "step": 32109 }, { "epoch": 0.8816584294343767, "grad_norm": 0.3727076053619385, "learning_rate": 1.1872370698137692e-05, "loss": 0.4633, "step": 32110 }, { "epoch": 0.8816858868753432, "grad_norm": 0.39448443055152893, "learning_rate": 1.1871946441123366e-05, "loss": 0.5077, "step": 32111 }, { "epoch": 0.8817133443163098, "grad_norm": 0.41087251901626587, "learning_rate": 1.187152218061726e-05, "loss": 0.5077, "step": 32112 }, { "epoch": 0.8817408017572762, "grad_norm": 0.39023974537849426, "learning_rate": 1.187109791662017e-05, "loss": 0.4947, "step": 32113 }, { "epoch": 0.8817682591982428, "grad_norm": 0.41291481256484985, "learning_rate": 1.1870673649132887e-05, "loss": 0.4661, "step": 32114 }, { "epoch": 0.8817957166392092, "grad_norm": 0.5143707990646362, "learning_rate": 1.18702493781562e-05, "loss": 0.5273, "step": 32115 }, { "epoch": 0.8818231740801757, "grad_norm": 0.3820711374282837, "learning_rate": 1.1869825103690898e-05, "loss": 0.7375, "step": 32116 }, { "epoch": 0.8818506315211422, "grad_norm": 0.5015829801559448, "learning_rate": 1.1869400825737779e-05, "loss": 0.4869, "step": 32117 }, { "epoch": 0.8818780889621087, "grad_norm": 0.4504486918449402, "learning_rate": 1.1868976544297628e-05, "loss": 0.4907, "step": 32118 }, { "epoch": 0.8819055464030753, "grad_norm": 0.34252864122390747, "learning_rate": 1.186855225937124e-05, "loss": 0.4316, "step": 32119 }, { "epoch": 0.8819330038440417, "grad_norm": 0.46135252714157104, "learning_rate": 1.1868127970959407e-05, "loss": 0.4842, "step": 32120 }, { "epoch": 0.8819604612850083, "grad_norm": 0.3694794774055481, "learning_rate": 1.1867703679062917e-05, "loss": 0.5145, "step": 32121 }, { "epoch": 0.8819879187259747, "grad_norm": 0.4088258743286133, "learning_rate": 1.1867279383682566e-05, "loss": 0.4841, "step": 32122 }, { "epoch": 0.8820153761669413, "grad_norm": 0.3882768750190735, "learning_rate": 1.186685508481914e-05, "loss": 0.4991, "step": 32123 }, { "epoch": 0.8820428336079077, "grad_norm": 0.3928532302379608, "learning_rate": 1.1866430782473437e-05, "loss": 0.5007, "step": 32124 }, { "epoch": 0.8820702910488742, "grad_norm": 0.38079574704170227, "learning_rate": 1.1866006476646244e-05, "loss": 0.5797, "step": 32125 }, { "epoch": 0.8820977484898408, "grad_norm": 0.3903767466545105, "learning_rate": 1.186558216733835e-05, "loss": 0.4285, "step": 32126 }, { "epoch": 0.8821252059308072, "grad_norm": 0.4241892993450165, "learning_rate": 1.1865157854550553e-05, "loss": 0.5415, "step": 32127 }, { "epoch": 0.8821526633717738, "grad_norm": 0.3793124258518219, "learning_rate": 1.1864733538283641e-05, "loss": 0.4305, "step": 32128 }, { "epoch": 0.8821801208127402, "grad_norm": 0.4295221269130707, "learning_rate": 1.1864309218538406e-05, "loss": 0.5671, "step": 32129 }, { "epoch": 0.8822075782537068, "grad_norm": 0.3773331642150879, "learning_rate": 1.1863884895315637e-05, "loss": 0.4569, "step": 32130 }, { "epoch": 0.8822350356946732, "grad_norm": 0.4088532030582428, "learning_rate": 1.186346056861613e-05, "loss": 0.4807, "step": 32131 }, { "epoch": 0.8822624931356398, "grad_norm": 0.41398003697395325, "learning_rate": 1.1863036238440672e-05, "loss": 0.4587, "step": 32132 }, { "epoch": 0.8822899505766063, "grad_norm": 0.3913275897502899, "learning_rate": 1.186261190479006e-05, "loss": 0.4039, "step": 32133 }, { "epoch": 0.8823174080175727, "grad_norm": 0.4251953363418579, "learning_rate": 1.186218756766508e-05, "loss": 0.4913, "step": 32134 }, { "epoch": 0.8823448654585393, "grad_norm": 0.4061933159828186, "learning_rate": 1.1861763227066527e-05, "loss": 0.5518, "step": 32135 }, { "epoch": 0.8823723228995057, "grad_norm": 0.4285753667354584, "learning_rate": 1.186133888299519e-05, "loss": 0.5017, "step": 32136 }, { "epoch": 0.8823997803404723, "grad_norm": 0.39799264073371887, "learning_rate": 1.1860914535451864e-05, "loss": 0.5507, "step": 32137 }, { "epoch": 0.8824272377814387, "grad_norm": 0.3841661214828491, "learning_rate": 1.1860490184437339e-05, "loss": 0.4859, "step": 32138 }, { "epoch": 0.8824546952224053, "grad_norm": 0.3925778269767761, "learning_rate": 1.1860065829952404e-05, "loss": 0.3974, "step": 32139 }, { "epoch": 0.8824821526633718, "grad_norm": 0.35641440749168396, "learning_rate": 1.185964147199785e-05, "loss": 0.5035, "step": 32140 }, { "epoch": 0.8825096101043383, "grad_norm": 0.40499305725097656, "learning_rate": 1.1859217110574474e-05, "loss": 0.476, "step": 32141 }, { "epoch": 0.8825370675453048, "grad_norm": 0.3896653950214386, "learning_rate": 1.1858792745683067e-05, "loss": 0.5192, "step": 32142 }, { "epoch": 0.8825645249862712, "grad_norm": 0.3816666901111603, "learning_rate": 1.1858368377324417e-05, "loss": 0.4383, "step": 32143 }, { "epoch": 0.8825919824272378, "grad_norm": 0.3993043005466461, "learning_rate": 1.1857944005499315e-05, "loss": 0.461, "step": 32144 }, { "epoch": 0.8826194398682042, "grad_norm": 0.5778219103813171, "learning_rate": 1.1857519630208555e-05, "loss": 0.4686, "step": 32145 }, { "epoch": 0.8826468973091708, "grad_norm": 0.3837796449661255, "learning_rate": 1.1857095251452928e-05, "loss": 0.5388, "step": 32146 }, { "epoch": 0.8826743547501373, "grad_norm": 0.47597536444664, "learning_rate": 1.1856670869233226e-05, "loss": 0.5216, "step": 32147 }, { "epoch": 0.8827018121911038, "grad_norm": 0.41416996717453003, "learning_rate": 1.185624648355024e-05, "loss": 0.5847, "step": 32148 }, { "epoch": 0.8827292696320703, "grad_norm": 0.3612534701824188, "learning_rate": 1.1855822094404762e-05, "loss": 0.522, "step": 32149 }, { "epoch": 0.8827567270730368, "grad_norm": 0.4231935739517212, "learning_rate": 1.1855397701797585e-05, "loss": 0.4988, "step": 32150 }, { "epoch": 0.8827841845140033, "grad_norm": 0.5280308127403259, "learning_rate": 1.1854973305729495e-05, "loss": 0.5315, "step": 32151 }, { "epoch": 0.8828116419549698, "grad_norm": 0.41743096709251404, "learning_rate": 1.1854548906201293e-05, "loss": 0.4701, "step": 32152 }, { "epoch": 0.8828390993959363, "grad_norm": 0.4033883512020111, "learning_rate": 1.1854124503213763e-05, "loss": 0.4447, "step": 32153 }, { "epoch": 0.8828665568369028, "grad_norm": 0.35017630457878113, "learning_rate": 1.1853700096767698e-05, "loss": 0.5249, "step": 32154 }, { "epoch": 0.8828940142778693, "grad_norm": 0.38255488872528076, "learning_rate": 1.1853275686863893e-05, "loss": 0.4747, "step": 32155 }, { "epoch": 0.8829214717188358, "grad_norm": 0.4302094578742981, "learning_rate": 1.1852851273503134e-05, "loss": 0.531, "step": 32156 }, { "epoch": 0.8829489291598023, "grad_norm": 0.3728107213973999, "learning_rate": 1.1852426856686219e-05, "loss": 0.53, "step": 32157 }, { "epoch": 0.8829763866007688, "grad_norm": 0.42744162678718567, "learning_rate": 1.1852002436413934e-05, "loss": 0.5456, "step": 32158 }, { "epoch": 0.8830038440417353, "grad_norm": 0.4061581790447235, "learning_rate": 1.1851578012687078e-05, "loss": 0.4842, "step": 32159 }, { "epoch": 0.8830313014827018, "grad_norm": 0.3831661343574524, "learning_rate": 1.1851153585506435e-05, "loss": 0.537, "step": 32160 }, { "epoch": 0.8830587589236684, "grad_norm": 0.4399864077568054, "learning_rate": 1.1850729154872798e-05, "loss": 0.5373, "step": 32161 }, { "epoch": 0.8830862163646348, "grad_norm": 0.391813188791275, "learning_rate": 1.1850304720786962e-05, "loss": 0.5289, "step": 32162 }, { "epoch": 0.8831136738056014, "grad_norm": 0.4477876126766205, "learning_rate": 1.1849880283249717e-05, "loss": 0.5337, "step": 32163 }, { "epoch": 0.8831411312465678, "grad_norm": 0.4569886326789856, "learning_rate": 1.1849455842261857e-05, "loss": 0.6615, "step": 32164 }, { "epoch": 0.8831685886875343, "grad_norm": 0.41136762499809265, "learning_rate": 1.1849031397824166e-05, "loss": 0.5329, "step": 32165 }, { "epoch": 0.8831960461285008, "grad_norm": 0.4011352062225342, "learning_rate": 1.1848606949937447e-05, "loss": 0.5884, "step": 32166 }, { "epoch": 0.8832235035694673, "grad_norm": 0.41272225975990295, "learning_rate": 1.1848182498602485e-05, "loss": 0.5466, "step": 32167 }, { "epoch": 0.8832509610104339, "grad_norm": 0.43644753098487854, "learning_rate": 1.1847758043820071e-05, "loss": 0.4938, "step": 32168 }, { "epoch": 0.8832784184514003, "grad_norm": 0.39141562581062317, "learning_rate": 1.1847333585591001e-05, "loss": 0.5147, "step": 32169 }, { "epoch": 0.8833058758923669, "grad_norm": 0.34249141812324524, "learning_rate": 1.1846909123916061e-05, "loss": 0.4756, "step": 32170 }, { "epoch": 0.8833333333333333, "grad_norm": 0.4218800365924835, "learning_rate": 1.1846484658796048e-05, "loss": 0.522, "step": 32171 }, { "epoch": 0.8833607907742999, "grad_norm": 0.4212571680545807, "learning_rate": 1.1846060190231752e-05, "loss": 0.5223, "step": 32172 }, { "epoch": 0.8833882482152663, "grad_norm": 0.3780130445957184, "learning_rate": 1.1845635718223962e-05, "loss": 0.4909, "step": 32173 }, { "epoch": 0.8834157056562328, "grad_norm": 0.3961924910545349, "learning_rate": 1.1845211242773476e-05, "loss": 0.5045, "step": 32174 }, { "epoch": 0.8834431630971994, "grad_norm": 0.4031975269317627, "learning_rate": 1.184478676388108e-05, "loss": 0.4923, "step": 32175 }, { "epoch": 0.8834706205381658, "grad_norm": 0.3795796036720276, "learning_rate": 1.1844362281547567e-05, "loss": 0.5248, "step": 32176 }, { "epoch": 0.8834980779791324, "grad_norm": 0.3640701472759247, "learning_rate": 1.1843937795773735e-05, "loss": 0.3851, "step": 32177 }, { "epoch": 0.8835255354200988, "grad_norm": 0.37172266840934753, "learning_rate": 1.1843513306560363e-05, "loss": 0.5061, "step": 32178 }, { "epoch": 0.8835529928610654, "grad_norm": 0.42259564995765686, "learning_rate": 1.1843088813908257e-05, "loss": 0.5187, "step": 32179 }, { "epoch": 0.8835804503020318, "grad_norm": 0.4077850878238678, "learning_rate": 1.1842664317818196e-05, "loss": 0.4437, "step": 32180 }, { "epoch": 0.8836079077429984, "grad_norm": 0.46389225125312805, "learning_rate": 1.1842239818290981e-05, "loss": 0.4818, "step": 32181 }, { "epoch": 0.8836353651839649, "grad_norm": 0.37171807885169983, "learning_rate": 1.1841815315327403e-05, "loss": 0.4769, "step": 32182 }, { "epoch": 0.8836628226249313, "grad_norm": 0.3757531940937042, "learning_rate": 1.1841390808928248e-05, "loss": 0.4652, "step": 32183 }, { "epoch": 0.8836902800658979, "grad_norm": 0.39977747201919556, "learning_rate": 1.1840966299094314e-05, "loss": 0.5045, "step": 32184 }, { "epoch": 0.8837177375068643, "grad_norm": 0.34406939148902893, "learning_rate": 1.1840541785826387e-05, "loss": 0.4514, "step": 32185 }, { "epoch": 0.8837451949478309, "grad_norm": 0.3807407021522522, "learning_rate": 1.1840117269125265e-05, "loss": 0.5179, "step": 32186 }, { "epoch": 0.8837726523887973, "grad_norm": 0.36314013600349426, "learning_rate": 1.1839692748991737e-05, "loss": 0.5465, "step": 32187 }, { "epoch": 0.8838001098297639, "grad_norm": 0.5790812373161316, "learning_rate": 1.1839268225426592e-05, "loss": 0.5139, "step": 32188 }, { "epoch": 0.8838275672707304, "grad_norm": 0.42763590812683105, "learning_rate": 1.1838843698430629e-05, "loss": 0.4964, "step": 32189 }, { "epoch": 0.8838550247116969, "grad_norm": 0.3995521366596222, "learning_rate": 1.1838419168004633e-05, "loss": 0.444, "step": 32190 }, { "epoch": 0.8838824821526634, "grad_norm": 0.41822493076324463, "learning_rate": 1.18379946341494e-05, "loss": 0.5329, "step": 32191 }, { "epoch": 0.8839099395936298, "grad_norm": 0.4079596996307373, "learning_rate": 1.1837570096865718e-05, "loss": 0.4618, "step": 32192 }, { "epoch": 0.8839373970345964, "grad_norm": 0.39923998713493347, "learning_rate": 1.1837145556154381e-05, "loss": 0.5101, "step": 32193 }, { "epoch": 0.8839648544755628, "grad_norm": 0.3490218222141266, "learning_rate": 1.1836721012016184e-05, "loss": 0.4736, "step": 32194 }, { "epoch": 0.8839923119165294, "grad_norm": 0.3912433385848999, "learning_rate": 1.1836296464451915e-05, "loss": 0.5242, "step": 32195 }, { "epoch": 0.8840197693574959, "grad_norm": 0.36327850818634033, "learning_rate": 1.1835871913462367e-05, "loss": 0.4737, "step": 32196 }, { "epoch": 0.8840472267984624, "grad_norm": 0.3886588513851166, "learning_rate": 1.183544735904833e-05, "loss": 0.5638, "step": 32197 }, { "epoch": 0.8840746842394289, "grad_norm": 0.4627472460269928, "learning_rate": 1.18350228012106e-05, "loss": 0.4521, "step": 32198 }, { "epoch": 0.8841021416803954, "grad_norm": 0.3643319606781006, "learning_rate": 1.1834598239949968e-05, "loss": 0.5694, "step": 32199 }, { "epoch": 0.8841295991213619, "grad_norm": 0.36182764172554016, "learning_rate": 1.1834173675267224e-05, "loss": 0.532, "step": 32200 }, { "epoch": 0.8841570565623283, "grad_norm": 0.38726598024368286, "learning_rate": 1.1833749107163157e-05, "loss": 0.4638, "step": 32201 }, { "epoch": 0.8841845140032949, "grad_norm": 0.41657403111457825, "learning_rate": 1.1833324535638568e-05, "loss": 0.3977, "step": 32202 }, { "epoch": 0.8842119714442614, "grad_norm": 0.36668258905410767, "learning_rate": 1.1832899960694242e-05, "loss": 0.4848, "step": 32203 }, { "epoch": 0.8842394288852279, "grad_norm": 0.3804365396499634, "learning_rate": 1.1832475382330971e-05, "loss": 0.4361, "step": 32204 }, { "epoch": 0.8842668863261944, "grad_norm": 0.4318682849407196, "learning_rate": 1.1832050800549549e-05, "loss": 0.4455, "step": 32205 }, { "epoch": 0.8842943437671609, "grad_norm": 0.411655068397522, "learning_rate": 1.1831626215350766e-05, "loss": 0.4367, "step": 32206 }, { "epoch": 0.8843218012081274, "grad_norm": 0.39048585295677185, "learning_rate": 1.1831201626735418e-05, "loss": 0.3904, "step": 32207 }, { "epoch": 0.8843492586490939, "grad_norm": 0.33986759185791016, "learning_rate": 1.1830777034704294e-05, "loss": 0.4661, "step": 32208 }, { "epoch": 0.8843767160900604, "grad_norm": 0.39313238859176636, "learning_rate": 1.1830352439258186e-05, "loss": 0.4511, "step": 32209 }, { "epoch": 0.884404173531027, "grad_norm": 0.4647328555583954, "learning_rate": 1.1829927840397888e-05, "loss": 0.4766, "step": 32210 }, { "epoch": 0.8844316309719934, "grad_norm": 0.44786790013313293, "learning_rate": 1.1829503238124188e-05, "loss": 0.4907, "step": 32211 }, { "epoch": 0.88445908841296, "grad_norm": 0.4534197747707367, "learning_rate": 1.1829078632437883e-05, "loss": 0.5093, "step": 32212 }, { "epoch": 0.8844865458539264, "grad_norm": 0.4146195352077484, "learning_rate": 1.1828654023339762e-05, "loss": 0.5246, "step": 32213 }, { "epoch": 0.8845140032948929, "grad_norm": 0.3602127730846405, "learning_rate": 1.1828229410830616e-05, "loss": 0.4229, "step": 32214 }, { "epoch": 0.8845414607358594, "grad_norm": 0.3959024250507355, "learning_rate": 1.1827804794911239e-05, "loss": 0.4203, "step": 32215 }, { "epoch": 0.8845689181768259, "grad_norm": 0.38194435834884644, "learning_rate": 1.1827380175582423e-05, "loss": 0.5425, "step": 32216 }, { "epoch": 0.8845963756177925, "grad_norm": 0.4104350209236145, "learning_rate": 1.1826955552844961e-05, "loss": 0.5609, "step": 32217 }, { "epoch": 0.8846238330587589, "grad_norm": 0.4221123158931732, "learning_rate": 1.1826530926699645e-05, "loss": 0.4843, "step": 32218 }, { "epoch": 0.8846512904997255, "grad_norm": 0.35375499725341797, "learning_rate": 1.1826106297147262e-05, "loss": 0.4644, "step": 32219 }, { "epoch": 0.8846787479406919, "grad_norm": 0.372004896402359, "learning_rate": 1.1825681664188611e-05, "loss": 0.4379, "step": 32220 }, { "epoch": 0.8847062053816585, "grad_norm": 0.463944673538208, "learning_rate": 1.1825257027824481e-05, "loss": 0.4815, "step": 32221 }, { "epoch": 0.8847336628226249, "grad_norm": 0.31470710039138794, "learning_rate": 1.1824832388055661e-05, "loss": 0.4445, "step": 32222 }, { "epoch": 0.8847611202635914, "grad_norm": 0.3491107225418091, "learning_rate": 1.1824407744882948e-05, "loss": 0.487, "step": 32223 }, { "epoch": 0.884788577704558, "grad_norm": 0.39206451177597046, "learning_rate": 1.1823983098307133e-05, "loss": 0.5246, "step": 32224 }, { "epoch": 0.8848160351455244, "grad_norm": 0.44791409373283386, "learning_rate": 1.1823558448329006e-05, "loss": 0.5721, "step": 32225 }, { "epoch": 0.884843492586491, "grad_norm": 0.3837636411190033, "learning_rate": 1.1823133794949361e-05, "loss": 0.5349, "step": 32226 }, { "epoch": 0.8848709500274574, "grad_norm": 0.3785567581653595, "learning_rate": 1.1822709138168991e-05, "loss": 0.4714, "step": 32227 }, { "epoch": 0.884898407468424, "grad_norm": 0.38533204793930054, "learning_rate": 1.1822284477988686e-05, "loss": 0.4955, "step": 32228 }, { "epoch": 0.8849258649093904, "grad_norm": 0.3779972195625305, "learning_rate": 1.1821859814409238e-05, "loss": 0.4595, "step": 32229 }, { "epoch": 0.884953322350357, "grad_norm": 0.43550172448158264, "learning_rate": 1.1821435147431438e-05, "loss": 0.5219, "step": 32230 }, { "epoch": 0.8849807797913235, "grad_norm": 0.40131136775016785, "learning_rate": 1.1821010477056084e-05, "loss": 0.451, "step": 32231 }, { "epoch": 0.8850082372322899, "grad_norm": 0.4214985966682434, "learning_rate": 1.1820585803283962e-05, "loss": 0.4996, "step": 32232 }, { "epoch": 0.8850356946732565, "grad_norm": 0.37786179780960083, "learning_rate": 1.1820161126115868e-05, "loss": 0.55, "step": 32233 }, { "epoch": 0.8850631521142229, "grad_norm": 0.3775619566440582, "learning_rate": 1.1819736445552591e-05, "loss": 0.4858, "step": 32234 }, { "epoch": 0.8850906095551895, "grad_norm": 0.4334871768951416, "learning_rate": 1.1819311761594927e-05, "loss": 0.5804, "step": 32235 }, { "epoch": 0.8851180669961559, "grad_norm": 0.4062921106815338, "learning_rate": 1.1818887074243665e-05, "loss": 0.4861, "step": 32236 }, { "epoch": 0.8851455244371225, "grad_norm": 0.4451112747192383, "learning_rate": 1.1818462383499596e-05, "loss": 0.4682, "step": 32237 }, { "epoch": 0.885172981878089, "grad_norm": 0.3192233443260193, "learning_rate": 1.1818037689363514e-05, "loss": 0.5007, "step": 32238 }, { "epoch": 0.8852004393190555, "grad_norm": 0.38611313700675964, "learning_rate": 1.1817612991836216e-05, "loss": 0.4691, "step": 32239 }, { "epoch": 0.885227896760022, "grad_norm": 0.3647097647190094, "learning_rate": 1.1817188290918487e-05, "loss": 0.553, "step": 32240 }, { "epoch": 0.8852553542009884, "grad_norm": 0.371381551027298, "learning_rate": 1.1816763586611121e-05, "loss": 0.5702, "step": 32241 }, { "epoch": 0.885282811641955, "grad_norm": 0.48543012142181396, "learning_rate": 1.181633887891491e-05, "loss": 0.5819, "step": 32242 }, { "epoch": 0.8853102690829214, "grad_norm": 0.4578612148761749, "learning_rate": 1.181591416783065e-05, "loss": 0.4723, "step": 32243 }, { "epoch": 0.885337726523888, "grad_norm": 0.3944311738014221, "learning_rate": 1.181548945335913e-05, "loss": 0.4929, "step": 32244 }, { "epoch": 0.8853651839648545, "grad_norm": 0.35601234436035156, "learning_rate": 1.1815064735501142e-05, "loss": 0.4683, "step": 32245 }, { "epoch": 0.885392641405821, "grad_norm": 0.40630781650543213, "learning_rate": 1.1814640014257479e-05, "loss": 0.4764, "step": 32246 }, { "epoch": 0.8854200988467875, "grad_norm": 0.4339616000652313, "learning_rate": 1.1814215289628934e-05, "loss": 0.5842, "step": 32247 }, { "epoch": 0.885447556287754, "grad_norm": 0.4239129424095154, "learning_rate": 1.1813790561616298e-05, "loss": 0.524, "step": 32248 }, { "epoch": 0.8854750137287205, "grad_norm": 0.4122961759567261, "learning_rate": 1.1813365830220365e-05, "loss": 0.4637, "step": 32249 }, { "epoch": 0.8855024711696869, "grad_norm": 0.3633846044540405, "learning_rate": 1.1812941095441923e-05, "loss": 0.501, "step": 32250 }, { "epoch": 0.8855299286106535, "grad_norm": 0.38852912187576294, "learning_rate": 1.181251635728177e-05, "loss": 0.4841, "step": 32251 }, { "epoch": 0.88555738605162, "grad_norm": 0.3883962035179138, "learning_rate": 1.1812091615740694e-05, "loss": 0.5001, "step": 32252 }, { "epoch": 0.8855848434925865, "grad_norm": 0.3708323538303375, "learning_rate": 1.1811666870819489e-05, "loss": 0.4761, "step": 32253 }, { "epoch": 0.885612300933553, "grad_norm": 0.37440213561058044, "learning_rate": 1.1811242122518946e-05, "loss": 0.4884, "step": 32254 }, { "epoch": 0.8856397583745195, "grad_norm": 0.374057412147522, "learning_rate": 1.181081737083986e-05, "loss": 0.466, "step": 32255 }, { "epoch": 0.885667215815486, "grad_norm": 0.41832435131073, "learning_rate": 1.1810392615783021e-05, "loss": 0.5287, "step": 32256 }, { "epoch": 0.8856946732564525, "grad_norm": 0.44175073504447937, "learning_rate": 1.1809967857349223e-05, "loss": 0.5712, "step": 32257 }, { "epoch": 0.885722130697419, "grad_norm": 0.3755776882171631, "learning_rate": 1.1809543095539255e-05, "loss": 0.4574, "step": 32258 }, { "epoch": 0.8857495881383856, "grad_norm": 0.34833279252052307, "learning_rate": 1.1809118330353914e-05, "loss": 0.4279, "step": 32259 }, { "epoch": 0.885777045579352, "grad_norm": 0.4183677136898041, "learning_rate": 1.1808693561793987e-05, "loss": 0.5081, "step": 32260 }, { "epoch": 0.8858045030203185, "grad_norm": 0.3653058111667633, "learning_rate": 1.1808268789860274e-05, "loss": 0.4472, "step": 32261 }, { "epoch": 0.885831960461285, "grad_norm": 0.42404279112815857, "learning_rate": 1.180784401455356e-05, "loss": 0.5389, "step": 32262 }, { "epoch": 0.8858594179022515, "grad_norm": 0.3922504782676697, "learning_rate": 1.1807419235874637e-05, "loss": 0.4702, "step": 32263 }, { "epoch": 0.885886875343218, "grad_norm": 0.46520131826400757, "learning_rate": 1.1806994453824306e-05, "loss": 0.4426, "step": 32264 }, { "epoch": 0.8859143327841845, "grad_norm": 0.39741405844688416, "learning_rate": 1.180656966840335e-05, "loss": 0.4639, "step": 32265 }, { "epoch": 0.8859417902251511, "grad_norm": 0.3872389793395996, "learning_rate": 1.1806144879612564e-05, "loss": 0.5258, "step": 32266 }, { "epoch": 0.8859692476661175, "grad_norm": 0.43221431970596313, "learning_rate": 1.1805720087452743e-05, "loss": 0.5147, "step": 32267 }, { "epoch": 0.8859967051070841, "grad_norm": 0.37292519211769104, "learning_rate": 1.1805295291924677e-05, "loss": 0.4525, "step": 32268 }, { "epoch": 0.8860241625480505, "grad_norm": 0.3899640142917633, "learning_rate": 1.1804870493029161e-05, "loss": 0.6363, "step": 32269 }, { "epoch": 0.886051619989017, "grad_norm": 0.4001234471797943, "learning_rate": 1.1804445690766983e-05, "loss": 0.5041, "step": 32270 }, { "epoch": 0.8860790774299835, "grad_norm": 0.3919003903865814, "learning_rate": 1.1804020885138937e-05, "loss": 0.5335, "step": 32271 }, { "epoch": 0.88610653487095, "grad_norm": 0.4145071804523468, "learning_rate": 1.180359607614582e-05, "loss": 0.4394, "step": 32272 }, { "epoch": 0.8861339923119165, "grad_norm": 0.39130929112434387, "learning_rate": 1.1803171263788418e-05, "loss": 0.424, "step": 32273 }, { "epoch": 0.886161449752883, "grad_norm": 0.3803912401199341, "learning_rate": 1.1802746448067528e-05, "loss": 0.4773, "step": 32274 }, { "epoch": 0.8861889071938496, "grad_norm": 0.346413791179657, "learning_rate": 1.1802321628983942e-05, "loss": 0.477, "step": 32275 }, { "epoch": 0.886216364634816, "grad_norm": 0.41806361079216003, "learning_rate": 1.1801896806538447e-05, "loss": 0.4713, "step": 32276 }, { "epoch": 0.8862438220757826, "grad_norm": 0.3451319634914398, "learning_rate": 1.180147198073184e-05, "loss": 0.4306, "step": 32277 }, { "epoch": 0.886271279516749, "grad_norm": 0.38805362582206726, "learning_rate": 1.1801047151564912e-05, "loss": 0.6115, "step": 32278 }, { "epoch": 0.8862987369577155, "grad_norm": 0.3937864303588867, "learning_rate": 1.180062231903846e-05, "loss": 0.5451, "step": 32279 }, { "epoch": 0.886326194398682, "grad_norm": 0.41906753182411194, "learning_rate": 1.1800197483153271e-05, "loss": 0.5249, "step": 32280 }, { "epoch": 0.8863536518396485, "grad_norm": 0.3743286430835724, "learning_rate": 1.1799772643910137e-05, "loss": 0.4593, "step": 32281 }, { "epoch": 0.8863811092806151, "grad_norm": 0.4307684600353241, "learning_rate": 1.1799347801309856e-05, "loss": 0.4923, "step": 32282 }, { "epoch": 0.8864085667215815, "grad_norm": 0.3971366882324219, "learning_rate": 1.1798922955353216e-05, "loss": 0.4684, "step": 32283 }, { "epoch": 0.8864360241625481, "grad_norm": 0.4436178207397461, "learning_rate": 1.1798498106041007e-05, "loss": 0.4828, "step": 32284 }, { "epoch": 0.8864634816035145, "grad_norm": 0.40856921672821045, "learning_rate": 1.1798073253374032e-05, "loss": 0.4458, "step": 32285 }, { "epoch": 0.8864909390444811, "grad_norm": 0.38338544964790344, "learning_rate": 1.179764839735307e-05, "loss": 0.4774, "step": 32286 }, { "epoch": 0.8865183964854475, "grad_norm": 0.4247235059738159, "learning_rate": 1.1797223537978923e-05, "loss": 0.4788, "step": 32287 }, { "epoch": 0.886545853926414, "grad_norm": 1.4680722951889038, "learning_rate": 1.1796798675252382e-05, "loss": 0.4618, "step": 32288 }, { "epoch": 0.8865733113673806, "grad_norm": 0.3966628611087799, "learning_rate": 1.1796373809174233e-05, "loss": 0.4407, "step": 32289 }, { "epoch": 0.886600768808347, "grad_norm": 0.4033275842666626, "learning_rate": 1.1795948939745277e-05, "loss": 0.5064, "step": 32290 }, { "epoch": 0.8866282262493136, "grad_norm": 0.3833080530166626, "learning_rate": 1.1795524066966303e-05, "loss": 0.5277, "step": 32291 }, { "epoch": 0.88665568369028, "grad_norm": 0.4392809569835663, "learning_rate": 1.1795099190838102e-05, "loss": 0.4858, "step": 32292 }, { "epoch": 0.8866831411312466, "grad_norm": 0.3940228819847107, "learning_rate": 1.1794674311361471e-05, "loss": 0.5114, "step": 32293 }, { "epoch": 0.886710598572213, "grad_norm": 0.44981974363327026, "learning_rate": 1.1794249428537197e-05, "loss": 0.5166, "step": 32294 }, { "epoch": 0.8867380560131796, "grad_norm": 0.4200170636177063, "learning_rate": 1.1793824542366075e-05, "loss": 0.5414, "step": 32295 }, { "epoch": 0.8867655134541461, "grad_norm": 0.43664661049842834, "learning_rate": 1.1793399652848901e-05, "loss": 0.5494, "step": 32296 }, { "epoch": 0.8867929708951126, "grad_norm": 0.3656286299228668, "learning_rate": 1.179297475998646e-05, "loss": 0.4719, "step": 32297 }, { "epoch": 0.8868204283360791, "grad_norm": 0.3872431218624115, "learning_rate": 1.179254986377955e-05, "loss": 0.4738, "step": 32298 }, { "epoch": 0.8868478857770455, "grad_norm": 0.38341832160949707, "learning_rate": 1.1792124964228964e-05, "loss": 0.4458, "step": 32299 }, { "epoch": 0.8868753432180121, "grad_norm": 0.3539150655269623, "learning_rate": 1.1791700061335491e-05, "loss": 0.4317, "step": 32300 }, { "epoch": 0.8869028006589785, "grad_norm": 0.4034280776977539, "learning_rate": 1.1791275155099929e-05, "loss": 0.4997, "step": 32301 }, { "epoch": 0.8869302580999451, "grad_norm": 0.43483179807662964, "learning_rate": 1.1790850245523063e-05, "loss": 0.5214, "step": 32302 }, { "epoch": 0.8869577155409116, "grad_norm": 0.3797767460346222, "learning_rate": 1.1790425332605691e-05, "loss": 0.4729, "step": 32303 }, { "epoch": 0.8869851729818781, "grad_norm": 0.373826801776886, "learning_rate": 1.1790000416348604e-05, "loss": 0.4954, "step": 32304 }, { "epoch": 0.8870126304228446, "grad_norm": 0.4002823829650879, "learning_rate": 1.1789575496752597e-05, "loss": 0.5375, "step": 32305 }, { "epoch": 0.887040087863811, "grad_norm": 0.39299434423446655, "learning_rate": 1.1789150573818459e-05, "loss": 0.5001, "step": 32306 }, { "epoch": 0.8870675453047776, "grad_norm": 0.39473775029182434, "learning_rate": 1.1788725647546984e-05, "loss": 0.4947, "step": 32307 }, { "epoch": 0.887095002745744, "grad_norm": 0.42034900188446045, "learning_rate": 1.1788300717938965e-05, "loss": 0.5281, "step": 32308 }, { "epoch": 0.8871224601867106, "grad_norm": 0.3666178584098816, "learning_rate": 1.1787875784995193e-05, "loss": 0.4568, "step": 32309 }, { "epoch": 0.8871499176276771, "grad_norm": 0.3575306534767151, "learning_rate": 1.1787450848716464e-05, "loss": 0.4807, "step": 32310 }, { "epoch": 0.8871773750686436, "grad_norm": 0.3794727027416229, "learning_rate": 1.1787025909103568e-05, "loss": 0.5429, "step": 32311 }, { "epoch": 0.8872048325096101, "grad_norm": 0.42677199840545654, "learning_rate": 1.1786600966157298e-05, "loss": 0.5397, "step": 32312 }, { "epoch": 0.8872322899505766, "grad_norm": 0.3842495381832123, "learning_rate": 1.1786176019878449e-05, "loss": 0.4824, "step": 32313 }, { "epoch": 0.8872597473915431, "grad_norm": 0.41714659333229065, "learning_rate": 1.178575107026781e-05, "loss": 0.4555, "step": 32314 }, { "epoch": 0.8872872048325096, "grad_norm": 0.35265809297561646, "learning_rate": 1.1785326117326174e-05, "loss": 0.4254, "step": 32315 }, { "epoch": 0.8873146622734761, "grad_norm": 0.4360281527042389, "learning_rate": 1.1784901161054339e-05, "loss": 0.5909, "step": 32316 }, { "epoch": 0.8873421197144427, "grad_norm": 0.596787691116333, "learning_rate": 1.178447620145309e-05, "loss": 0.5033, "step": 32317 }, { "epoch": 0.8873695771554091, "grad_norm": 0.44819721579551697, "learning_rate": 1.1784051238523225e-05, "loss": 0.5593, "step": 32318 }, { "epoch": 0.8873970345963756, "grad_norm": 0.36871597170829773, "learning_rate": 1.1783626272265534e-05, "loss": 0.4819, "step": 32319 }, { "epoch": 0.8874244920373421, "grad_norm": 0.3932063579559326, "learning_rate": 1.1783201302680812e-05, "loss": 0.4944, "step": 32320 }, { "epoch": 0.8874519494783086, "grad_norm": 0.4390850067138672, "learning_rate": 1.178277632976985e-05, "loss": 0.4879, "step": 32321 }, { "epoch": 0.8874794069192751, "grad_norm": 0.41895365715026855, "learning_rate": 1.178235135353344e-05, "loss": 0.5454, "step": 32322 }, { "epoch": 0.8875068643602416, "grad_norm": 0.35101598501205444, "learning_rate": 1.1781926373972377e-05, "loss": 0.4813, "step": 32323 }, { "epoch": 0.8875343218012082, "grad_norm": 0.43347662687301636, "learning_rate": 1.1781501391087451e-05, "loss": 0.5112, "step": 32324 }, { "epoch": 0.8875617792421746, "grad_norm": 0.45791372656822205, "learning_rate": 1.1781076404879457e-05, "loss": 0.5788, "step": 32325 }, { "epoch": 0.8875892366831412, "grad_norm": 0.3925175368785858, "learning_rate": 1.1780651415349189e-05, "loss": 0.529, "step": 32326 }, { "epoch": 0.8876166941241076, "grad_norm": 0.41011446714401245, "learning_rate": 1.1780226422497436e-05, "loss": 0.5168, "step": 32327 }, { "epoch": 0.8876441515650741, "grad_norm": 0.42135438323020935, "learning_rate": 1.1779801426324992e-05, "loss": 0.4773, "step": 32328 }, { "epoch": 0.8876716090060406, "grad_norm": 0.44236207008361816, "learning_rate": 1.1779376426832649e-05, "loss": 0.5122, "step": 32329 }, { "epoch": 0.8876990664470071, "grad_norm": 0.36410778760910034, "learning_rate": 1.1778951424021205e-05, "loss": 0.4803, "step": 32330 }, { "epoch": 0.8877265238879737, "grad_norm": 0.38060396909713745, "learning_rate": 1.177852641789145e-05, "loss": 0.5327, "step": 32331 }, { "epoch": 0.8877539813289401, "grad_norm": 0.4074549973011017, "learning_rate": 1.1778101408444169e-05, "loss": 0.4457, "step": 32332 }, { "epoch": 0.8877814387699067, "grad_norm": 0.4042617082595825, "learning_rate": 1.1777676395680166e-05, "loss": 0.5639, "step": 32333 }, { "epoch": 0.8878088962108731, "grad_norm": 0.3944231867790222, "learning_rate": 1.1777251379600227e-05, "loss": 0.5142, "step": 32334 }, { "epoch": 0.8878363536518397, "grad_norm": 0.347436785697937, "learning_rate": 1.1776826360205148e-05, "loss": 0.4646, "step": 32335 }, { "epoch": 0.8878638110928061, "grad_norm": 0.3979927599430084, "learning_rate": 1.1776401337495725e-05, "loss": 0.5431, "step": 32336 }, { "epoch": 0.8878912685337726, "grad_norm": 0.4092268943786621, "learning_rate": 1.1775976311472739e-05, "loss": 0.5415, "step": 32337 }, { "epoch": 0.8879187259747392, "grad_norm": 0.456389844417572, "learning_rate": 1.1775551282136997e-05, "loss": 0.4889, "step": 32338 }, { "epoch": 0.8879461834157056, "grad_norm": 0.4914087653160095, "learning_rate": 1.177512624948928e-05, "loss": 0.4732, "step": 32339 }, { "epoch": 0.8879736408566722, "grad_norm": 0.3956902325153351, "learning_rate": 1.1774701213530388e-05, "loss": 0.3794, "step": 32340 }, { "epoch": 0.8880010982976386, "grad_norm": 0.5093299150466919, "learning_rate": 1.1774276174261113e-05, "loss": 0.4938, "step": 32341 }, { "epoch": 0.8880285557386052, "grad_norm": 0.48273202776908875, "learning_rate": 1.1773851131682244e-05, "loss": 0.4972, "step": 32342 }, { "epoch": 0.8880560131795716, "grad_norm": 0.38012975454330444, "learning_rate": 1.1773426085794579e-05, "loss": 0.4958, "step": 32343 }, { "epoch": 0.8880834706205382, "grad_norm": 0.4162265956401825, "learning_rate": 1.1773001036598906e-05, "loss": 0.5059, "step": 32344 }, { "epoch": 0.8881109280615047, "grad_norm": 0.42823150753974915, "learning_rate": 1.1772575984096022e-05, "loss": 0.5195, "step": 32345 }, { "epoch": 0.8881383855024712, "grad_norm": 0.4016713798046112, "learning_rate": 1.1772150928286716e-05, "loss": 0.469, "step": 32346 }, { "epoch": 0.8881658429434377, "grad_norm": 0.4203815758228302, "learning_rate": 1.1771725869171786e-05, "loss": 0.5084, "step": 32347 }, { "epoch": 0.8881933003844041, "grad_norm": 0.35750606656074524, "learning_rate": 1.177130080675202e-05, "loss": 0.4488, "step": 32348 }, { "epoch": 0.8882207578253707, "grad_norm": 0.34417724609375, "learning_rate": 1.1770875741028212e-05, "loss": 0.499, "step": 32349 }, { "epoch": 0.8882482152663371, "grad_norm": 0.5740039944648743, "learning_rate": 1.1770450672001156e-05, "loss": 0.5557, "step": 32350 }, { "epoch": 0.8882756727073037, "grad_norm": 0.39526838064193726, "learning_rate": 1.1770025599671643e-05, "loss": 0.4459, "step": 32351 }, { "epoch": 0.8883031301482702, "grad_norm": 0.3914460241794586, "learning_rate": 1.1769600524040472e-05, "loss": 0.5408, "step": 32352 }, { "epoch": 0.8883305875892367, "grad_norm": 0.4597156047821045, "learning_rate": 1.176917544510843e-05, "loss": 0.5075, "step": 32353 }, { "epoch": 0.8883580450302032, "grad_norm": 0.4138011336326599, "learning_rate": 1.1768750362876307e-05, "loss": 0.499, "step": 32354 }, { "epoch": 0.8883855024711697, "grad_norm": 0.37115558981895447, "learning_rate": 1.1768325277344903e-05, "loss": 0.4782, "step": 32355 }, { "epoch": 0.8884129599121362, "grad_norm": 0.8096694946289062, "learning_rate": 1.1767900188515007e-05, "loss": 0.4704, "step": 32356 }, { "epoch": 0.8884404173531026, "grad_norm": 0.38582080602645874, "learning_rate": 1.1767475096387413e-05, "loss": 0.4534, "step": 32357 }, { "epoch": 0.8884678747940692, "grad_norm": 0.4118908643722534, "learning_rate": 1.1767050000962917e-05, "loss": 0.463, "step": 32358 }, { "epoch": 0.8884953322350357, "grad_norm": 0.4188452959060669, "learning_rate": 1.1766624902242303e-05, "loss": 0.5489, "step": 32359 }, { "epoch": 0.8885227896760022, "grad_norm": 0.385082870721817, "learning_rate": 1.1766199800226374e-05, "loss": 0.5048, "step": 32360 }, { "epoch": 0.8885502471169687, "grad_norm": 0.40431004762649536, "learning_rate": 1.1765774694915917e-05, "loss": 0.5361, "step": 32361 }, { "epoch": 0.8885777045579352, "grad_norm": 0.38908153772354126, "learning_rate": 1.1765349586311727e-05, "loss": 0.5182, "step": 32362 }, { "epoch": 0.8886051619989017, "grad_norm": 0.3730350136756897, "learning_rate": 1.17649244744146e-05, "loss": 0.4621, "step": 32363 }, { "epoch": 0.8886326194398682, "grad_norm": 0.4007670283317566, "learning_rate": 1.1764499359225319e-05, "loss": 0.4809, "step": 32364 }, { "epoch": 0.8886600768808347, "grad_norm": 0.4085747003555298, "learning_rate": 1.1764074240744687e-05, "loss": 0.5064, "step": 32365 }, { "epoch": 0.8886875343218013, "grad_norm": 0.4137759506702423, "learning_rate": 1.1763649118973493e-05, "loss": 0.492, "step": 32366 }, { "epoch": 0.8887149917627677, "grad_norm": 0.44986778497695923, "learning_rate": 1.1763223993912532e-05, "loss": 0.4696, "step": 32367 }, { "epoch": 0.8887424492037342, "grad_norm": 0.39771515130996704, "learning_rate": 1.1762798865562594e-05, "loss": 0.5037, "step": 32368 }, { "epoch": 0.8887699066447007, "grad_norm": 0.37728968262672424, "learning_rate": 1.1762373733924473e-05, "loss": 0.4566, "step": 32369 }, { "epoch": 0.8887973640856672, "grad_norm": 0.41342806816101074, "learning_rate": 1.1761948598998964e-05, "loss": 0.5826, "step": 32370 }, { "epoch": 0.8888248215266337, "grad_norm": 0.40776675939559937, "learning_rate": 1.1761523460786857e-05, "loss": 0.5533, "step": 32371 }, { "epoch": 0.8888522789676002, "grad_norm": 0.4128161072731018, "learning_rate": 1.1761098319288945e-05, "loss": 0.4897, "step": 32372 }, { "epoch": 0.8888797364085668, "grad_norm": 0.3733905851840973, "learning_rate": 1.1760673174506028e-05, "loss": 0.497, "step": 32373 }, { "epoch": 0.8889071938495332, "grad_norm": 0.3621194064617157, "learning_rate": 1.176024802643889e-05, "loss": 0.537, "step": 32374 }, { "epoch": 0.8889346512904998, "grad_norm": 0.38264036178588867, "learning_rate": 1.1759822875088327e-05, "loss": 0.4505, "step": 32375 }, { "epoch": 0.8889621087314662, "grad_norm": 0.42262452840805054, "learning_rate": 1.1759397720455135e-05, "loss": 0.5588, "step": 32376 }, { "epoch": 0.8889895661724327, "grad_norm": 0.3561718165874481, "learning_rate": 1.1758972562540104e-05, "loss": 0.4528, "step": 32377 }, { "epoch": 0.8890170236133992, "grad_norm": 0.3670588433742523, "learning_rate": 1.1758547401344027e-05, "loss": 0.511, "step": 32378 }, { "epoch": 0.8890444810543657, "grad_norm": 0.41049832105636597, "learning_rate": 1.1758122236867697e-05, "loss": 0.4169, "step": 32379 }, { "epoch": 0.8890719384953323, "grad_norm": 0.47716331481933594, "learning_rate": 1.1757697069111908e-05, "loss": 0.5833, "step": 32380 }, { "epoch": 0.8890993959362987, "grad_norm": 0.41590675711631775, "learning_rate": 1.1757271898077456e-05, "loss": 0.545, "step": 32381 }, { "epoch": 0.8891268533772653, "grad_norm": 0.4433037340641022, "learning_rate": 1.1756846723765127e-05, "loss": 0.569, "step": 32382 }, { "epoch": 0.8891543108182317, "grad_norm": 0.3423425853252411, "learning_rate": 1.1756421546175722e-05, "loss": 0.4544, "step": 32383 }, { "epoch": 0.8891817682591983, "grad_norm": 0.4555296003818512, "learning_rate": 1.1755996365310028e-05, "loss": 0.531, "step": 32384 }, { "epoch": 0.8892092257001647, "grad_norm": 0.44451913237571716, "learning_rate": 1.1755571181168842e-05, "loss": 0.6125, "step": 32385 }, { "epoch": 0.8892366831411312, "grad_norm": 0.38836273550987244, "learning_rate": 1.1755145993752954e-05, "loss": 0.5076, "step": 32386 }, { "epoch": 0.8892641405820978, "grad_norm": 0.41401177644729614, "learning_rate": 1.1754720803063158e-05, "loss": 0.4525, "step": 32387 }, { "epoch": 0.8892915980230642, "grad_norm": 0.439525842666626, "learning_rate": 1.1754295609100249e-05, "loss": 0.6742, "step": 32388 }, { "epoch": 0.8893190554640308, "grad_norm": 0.3925476372241974, "learning_rate": 1.1753870411865016e-05, "loss": 0.454, "step": 32389 }, { "epoch": 0.8893465129049972, "grad_norm": 0.394071489572525, "learning_rate": 1.1753445211358259e-05, "loss": 0.4827, "step": 32390 }, { "epoch": 0.8893739703459638, "grad_norm": 0.3309151828289032, "learning_rate": 1.1753020007580764e-05, "loss": 0.4259, "step": 32391 }, { "epoch": 0.8894014277869302, "grad_norm": 0.3527893126010895, "learning_rate": 1.1752594800533327e-05, "loss": 0.4615, "step": 32392 }, { "epoch": 0.8894288852278968, "grad_norm": 0.32082241773605347, "learning_rate": 1.1752169590216745e-05, "loss": 0.4563, "step": 32393 }, { "epoch": 0.8894563426688633, "grad_norm": 0.4568484127521515, "learning_rate": 1.1751744376631806e-05, "loss": 0.5384, "step": 32394 }, { "epoch": 0.8894838001098297, "grad_norm": 0.392188161611557, "learning_rate": 1.1751319159779302e-05, "loss": 0.5142, "step": 32395 }, { "epoch": 0.8895112575507963, "grad_norm": 0.39252617955207825, "learning_rate": 1.175089393966003e-05, "loss": 0.5056, "step": 32396 }, { "epoch": 0.8895387149917627, "grad_norm": 0.3725234270095825, "learning_rate": 1.1750468716274782e-05, "loss": 0.5134, "step": 32397 }, { "epoch": 0.8895661724327293, "grad_norm": 0.3647273778915405, "learning_rate": 1.1750043489624351e-05, "loss": 0.414, "step": 32398 }, { "epoch": 0.8895936298736957, "grad_norm": 0.4010416567325592, "learning_rate": 1.1749618259709534e-05, "loss": 0.5629, "step": 32399 }, { "epoch": 0.8896210873146623, "grad_norm": 0.39836594462394714, "learning_rate": 1.1749193026531117e-05, "loss": 0.4913, "step": 32400 }, { "epoch": 0.8896485447556288, "grad_norm": 0.41681885719299316, "learning_rate": 1.1748767790089897e-05, "loss": 0.4631, "step": 32401 }, { "epoch": 0.8896760021965953, "grad_norm": 0.38705354928970337, "learning_rate": 1.1748342550386667e-05, "loss": 0.4992, "step": 32402 }, { "epoch": 0.8897034596375618, "grad_norm": 0.4186708927154541, "learning_rate": 1.174791730742222e-05, "loss": 0.4924, "step": 32403 }, { "epoch": 0.8897309170785282, "grad_norm": 0.42460423707962036, "learning_rate": 1.174749206119735e-05, "loss": 0.4569, "step": 32404 }, { "epoch": 0.8897583745194948, "grad_norm": 0.4063410758972168, "learning_rate": 1.1747066811712849e-05, "loss": 0.509, "step": 32405 }, { "epoch": 0.8897858319604612, "grad_norm": 0.41847217082977295, "learning_rate": 1.1746641558969511e-05, "loss": 0.4928, "step": 32406 }, { "epoch": 0.8898132894014278, "grad_norm": 0.41622909903526306, "learning_rate": 1.174621630296813e-05, "loss": 0.5318, "step": 32407 }, { "epoch": 0.8898407468423943, "grad_norm": 0.3933540880680084, "learning_rate": 1.1745791043709496e-05, "loss": 0.4907, "step": 32408 }, { "epoch": 0.8898682042833608, "grad_norm": 0.4651888310909271, "learning_rate": 1.1745365781194406e-05, "loss": 0.4883, "step": 32409 }, { "epoch": 0.8898956617243273, "grad_norm": 0.4539794921875, "learning_rate": 1.174494051542365e-05, "loss": 0.6052, "step": 32410 }, { "epoch": 0.8899231191652938, "grad_norm": 0.5250715613365173, "learning_rate": 1.1744515246398027e-05, "loss": 0.5026, "step": 32411 }, { "epoch": 0.8899505766062603, "grad_norm": 0.384137362241745, "learning_rate": 1.1744089974118325e-05, "loss": 0.4773, "step": 32412 }, { "epoch": 0.8899780340472268, "grad_norm": 0.4267968237400055, "learning_rate": 1.1743664698585335e-05, "loss": 0.4936, "step": 32413 }, { "epoch": 0.8900054914881933, "grad_norm": 0.8176286816596985, "learning_rate": 1.1743239419799859e-05, "loss": 0.5174, "step": 32414 }, { "epoch": 0.8900329489291599, "grad_norm": 0.39010506868362427, "learning_rate": 1.1742814137762679e-05, "loss": 0.4746, "step": 32415 }, { "epoch": 0.8900604063701263, "grad_norm": 0.4035094678401947, "learning_rate": 1.17423888524746e-05, "loss": 0.4107, "step": 32416 }, { "epoch": 0.8900878638110928, "grad_norm": 0.3504658043384552, "learning_rate": 1.1741963563936407e-05, "loss": 0.4646, "step": 32417 }, { "epoch": 0.8901153212520593, "grad_norm": 0.39697760343551636, "learning_rate": 1.1741538272148896e-05, "loss": 0.5116, "step": 32418 }, { "epoch": 0.8901427786930258, "grad_norm": 0.39515557885169983, "learning_rate": 1.1741112977112862e-05, "loss": 0.5875, "step": 32419 }, { "epoch": 0.8901702361339923, "grad_norm": 0.50847989320755, "learning_rate": 1.1740687678829094e-05, "loss": 0.6076, "step": 32420 }, { "epoch": 0.8901976935749588, "grad_norm": 0.4187418818473816, "learning_rate": 1.174026237729839e-05, "loss": 0.5485, "step": 32421 }, { "epoch": 0.8902251510159254, "grad_norm": 0.38995087146759033, "learning_rate": 1.173983707252154e-05, "loss": 0.519, "step": 32422 }, { "epoch": 0.8902526084568918, "grad_norm": 0.4382348358631134, "learning_rate": 1.173941176449934e-05, "loss": 0.4671, "step": 32423 }, { "epoch": 0.8902800658978584, "grad_norm": 0.36309435963630676, "learning_rate": 1.1738986453232582e-05, "loss": 0.4533, "step": 32424 }, { "epoch": 0.8903075233388248, "grad_norm": 0.3785346448421478, "learning_rate": 1.1738561138722058e-05, "loss": 0.4326, "step": 32425 }, { "epoch": 0.8903349807797913, "grad_norm": 0.4069986045360565, "learning_rate": 1.1738135820968562e-05, "loss": 0.5193, "step": 32426 }, { "epoch": 0.8903624382207578, "grad_norm": 0.40305399894714355, "learning_rate": 1.173771049997289e-05, "loss": 0.5261, "step": 32427 }, { "epoch": 0.8903898956617243, "grad_norm": 0.41060611605644226, "learning_rate": 1.173728517573583e-05, "loss": 0.5317, "step": 32428 }, { "epoch": 0.8904173531026909, "grad_norm": 0.351764976978302, "learning_rate": 1.1736859848258183e-05, "loss": 0.4578, "step": 32429 }, { "epoch": 0.8904448105436573, "grad_norm": 0.699099600315094, "learning_rate": 1.1736434517540735e-05, "loss": 0.3935, "step": 32430 }, { "epoch": 0.8904722679846239, "grad_norm": 0.40426191687583923, "learning_rate": 1.1736009183584283e-05, "loss": 0.4564, "step": 32431 }, { "epoch": 0.8904997254255903, "grad_norm": 0.4306930601596832, "learning_rate": 1.173558384638962e-05, "loss": 0.5574, "step": 32432 }, { "epoch": 0.8905271828665569, "grad_norm": 0.41826358437538147, "learning_rate": 1.1735158505957537e-05, "loss": 0.5673, "step": 32433 }, { "epoch": 0.8905546403075233, "grad_norm": 0.3304488956928253, "learning_rate": 1.1734733162288834e-05, "loss": 0.4091, "step": 32434 }, { "epoch": 0.8905820977484898, "grad_norm": 0.48887091875076294, "learning_rate": 1.1734307815384298e-05, "loss": 0.446, "step": 32435 }, { "epoch": 0.8906095551894564, "grad_norm": 0.38786226511001587, "learning_rate": 1.1733882465244724e-05, "loss": 0.5353, "step": 32436 }, { "epoch": 0.8906370126304228, "grad_norm": 0.37554997205734253, "learning_rate": 1.1733457111870905e-05, "loss": 0.5003, "step": 32437 }, { "epoch": 0.8906644700713894, "grad_norm": 0.35488516092300415, "learning_rate": 1.1733031755263637e-05, "loss": 0.4615, "step": 32438 }, { "epoch": 0.8906919275123558, "grad_norm": 0.3990750312805176, "learning_rate": 1.1732606395423711e-05, "loss": 0.5104, "step": 32439 }, { "epoch": 0.8907193849533224, "grad_norm": 0.398849755525589, "learning_rate": 1.1732181032351921e-05, "loss": 0.4935, "step": 32440 }, { "epoch": 0.8907468423942888, "grad_norm": 0.46101969480514526, "learning_rate": 1.1731755666049058e-05, "loss": 0.453, "step": 32441 }, { "epoch": 0.8907742998352554, "grad_norm": 0.36402982473373413, "learning_rate": 1.1731330296515924e-05, "loss": 0.4695, "step": 32442 }, { "epoch": 0.8908017572762219, "grad_norm": 0.4021591246128082, "learning_rate": 1.1730904923753301e-05, "loss": 0.5031, "step": 32443 }, { "epoch": 0.8908292147171883, "grad_norm": 0.4736441373825073, "learning_rate": 1.173047954776199e-05, "loss": 0.4975, "step": 32444 }, { "epoch": 0.8908566721581549, "grad_norm": 0.4110836386680603, "learning_rate": 1.1730054168542784e-05, "loss": 0.5047, "step": 32445 }, { "epoch": 0.8908841295991213, "grad_norm": 0.3443698287010193, "learning_rate": 1.1729628786096469e-05, "loss": 0.4938, "step": 32446 }, { "epoch": 0.8909115870400879, "grad_norm": 0.39684832096099854, "learning_rate": 1.172920340042385e-05, "loss": 0.5027, "step": 32447 }, { "epoch": 0.8909390444810543, "grad_norm": 0.37833935022354126, "learning_rate": 1.1728778011525715e-05, "loss": 0.5145, "step": 32448 }, { "epoch": 0.8909665019220209, "grad_norm": 0.39248815178871155, "learning_rate": 1.1728352619402854e-05, "loss": 0.5254, "step": 32449 }, { "epoch": 0.8909939593629874, "grad_norm": 0.38658639788627625, "learning_rate": 1.1727927224056063e-05, "loss": 0.4603, "step": 32450 }, { "epoch": 0.8910214168039539, "grad_norm": 0.39409154653549194, "learning_rate": 1.1727501825486139e-05, "loss": 0.5641, "step": 32451 }, { "epoch": 0.8910488742449204, "grad_norm": 0.35562965273857117, "learning_rate": 1.1727076423693872e-05, "loss": 0.478, "step": 32452 }, { "epoch": 0.8910763316858868, "grad_norm": 0.41538628935813904, "learning_rate": 1.1726651018680058e-05, "loss": 0.5032, "step": 32453 }, { "epoch": 0.8911037891268534, "grad_norm": 0.3801780641078949, "learning_rate": 1.1726225610445486e-05, "loss": 0.4434, "step": 32454 }, { "epoch": 0.8911312465678198, "grad_norm": 0.33724385499954224, "learning_rate": 1.1725800198990954e-05, "loss": 0.3648, "step": 32455 }, { "epoch": 0.8911587040087864, "grad_norm": 0.4919099807739258, "learning_rate": 1.1725374784317254e-05, "loss": 0.603, "step": 32456 }, { "epoch": 0.8911861614497529, "grad_norm": 0.3811601996421814, "learning_rate": 1.1724949366425175e-05, "loss": 0.4982, "step": 32457 }, { "epoch": 0.8912136188907194, "grad_norm": 0.4133003056049347, "learning_rate": 1.172452394531552e-05, "loss": 0.6423, "step": 32458 }, { "epoch": 0.8912410763316859, "grad_norm": 0.4586585760116577, "learning_rate": 1.1724098520989076e-05, "loss": 0.4948, "step": 32459 }, { "epoch": 0.8912685337726524, "grad_norm": 0.39781269431114197, "learning_rate": 1.1723673093446637e-05, "loss": 0.5143, "step": 32460 }, { "epoch": 0.8912959912136189, "grad_norm": 0.43534761667251587, "learning_rate": 1.1723247662689e-05, "loss": 0.4383, "step": 32461 }, { "epoch": 0.8913234486545853, "grad_norm": 0.3441343903541565, "learning_rate": 1.1722822228716952e-05, "loss": 0.5113, "step": 32462 }, { "epoch": 0.8913509060955519, "grad_norm": 0.3912842571735382, "learning_rate": 1.1722396791531293e-05, "loss": 0.5308, "step": 32463 }, { "epoch": 0.8913783635365184, "grad_norm": 0.43969395756721497, "learning_rate": 1.1721971351132815e-05, "loss": 0.4847, "step": 32464 }, { "epoch": 0.8914058209774849, "grad_norm": 0.4065682888031006, "learning_rate": 1.1721545907522312e-05, "loss": 0.4496, "step": 32465 }, { "epoch": 0.8914332784184514, "grad_norm": 0.46845540404319763, "learning_rate": 1.1721120460700575e-05, "loss": 0.502, "step": 32466 }, { "epoch": 0.8914607358594179, "grad_norm": 0.3653510510921478, "learning_rate": 1.1720695010668398e-05, "loss": 0.3692, "step": 32467 }, { "epoch": 0.8914881933003844, "grad_norm": 0.38543039560317993, "learning_rate": 1.1720269557426576e-05, "loss": 0.4764, "step": 32468 }, { "epoch": 0.8915156507413509, "grad_norm": 0.35894346237182617, "learning_rate": 1.1719844100975903e-05, "loss": 0.5105, "step": 32469 }, { "epoch": 0.8915431081823174, "grad_norm": 0.3557935357093811, "learning_rate": 1.1719418641317168e-05, "loss": 0.4714, "step": 32470 }, { "epoch": 0.891570565623284, "grad_norm": 0.3557232916355133, "learning_rate": 1.1718993178451175e-05, "loss": 0.5171, "step": 32471 }, { "epoch": 0.8915980230642504, "grad_norm": 0.3811531662940979, "learning_rate": 1.1718567712378706e-05, "loss": 0.4417, "step": 32472 }, { "epoch": 0.891625480505217, "grad_norm": 0.37903130054473877, "learning_rate": 1.171814224310056e-05, "loss": 0.4459, "step": 32473 }, { "epoch": 0.8916529379461834, "grad_norm": 0.3766954839229584, "learning_rate": 1.1717716770617536e-05, "loss": 0.5145, "step": 32474 }, { "epoch": 0.8916803953871499, "grad_norm": 0.4396255910396576, "learning_rate": 1.1717291294930415e-05, "loss": 0.4645, "step": 32475 }, { "epoch": 0.8917078528281164, "grad_norm": 0.382417231798172, "learning_rate": 1.1716865816040002e-05, "loss": 0.5129, "step": 32476 }, { "epoch": 0.8917353102690829, "grad_norm": 0.4136011600494385, "learning_rate": 1.1716440333947083e-05, "loss": 0.5576, "step": 32477 }, { "epoch": 0.8917627677100495, "grad_norm": 0.40323665738105774, "learning_rate": 1.1716014848652457e-05, "loss": 0.4371, "step": 32478 }, { "epoch": 0.8917902251510159, "grad_norm": 0.3513175845146179, "learning_rate": 1.1715589360156918e-05, "loss": 0.4489, "step": 32479 }, { "epoch": 0.8918176825919825, "grad_norm": 0.45766788721084595, "learning_rate": 1.1715163868461253e-05, "loss": 0.532, "step": 32480 }, { "epoch": 0.8918451400329489, "grad_norm": 0.35563552379608154, "learning_rate": 1.1714738373566262e-05, "loss": 0.5176, "step": 32481 }, { "epoch": 0.8918725974739155, "grad_norm": 0.42652571201324463, "learning_rate": 1.1714312875472736e-05, "loss": 0.5213, "step": 32482 }, { "epoch": 0.8919000549148819, "grad_norm": 0.3864377737045288, "learning_rate": 1.1713887374181469e-05, "loss": 0.5289, "step": 32483 }, { "epoch": 0.8919275123558484, "grad_norm": 0.34865570068359375, "learning_rate": 1.1713461869693256e-05, "loss": 0.4575, "step": 32484 }, { "epoch": 0.891954969796815, "grad_norm": 0.3778305649757385, "learning_rate": 1.171303636200889e-05, "loss": 0.4684, "step": 32485 }, { "epoch": 0.8919824272377814, "grad_norm": 0.42127057909965515, "learning_rate": 1.1712610851129162e-05, "loss": 0.5192, "step": 32486 }, { "epoch": 0.892009884678748, "grad_norm": 0.4252839982509613, "learning_rate": 1.1712185337054874e-05, "loss": 0.5792, "step": 32487 }, { "epoch": 0.8920373421197144, "grad_norm": 0.334799587726593, "learning_rate": 1.1711759819786808e-05, "loss": 0.4278, "step": 32488 }, { "epoch": 0.892064799560681, "grad_norm": 0.39321208000183105, "learning_rate": 1.1711334299325765e-05, "loss": 0.4736, "step": 32489 }, { "epoch": 0.8920922570016474, "grad_norm": 0.42462244629859924, "learning_rate": 1.1710908775672536e-05, "loss": 0.4465, "step": 32490 }, { "epoch": 0.892119714442614, "grad_norm": 0.4046567380428314, "learning_rate": 1.171048324882792e-05, "loss": 0.483, "step": 32491 }, { "epoch": 0.8921471718835805, "grad_norm": 0.3680742383003235, "learning_rate": 1.1710057718792705e-05, "loss": 0.4484, "step": 32492 }, { "epoch": 0.8921746293245469, "grad_norm": 0.3571147620677948, "learning_rate": 1.1709632185567685e-05, "loss": 0.4912, "step": 32493 }, { "epoch": 0.8922020867655135, "grad_norm": 0.4114469587802887, "learning_rate": 1.1709206649153656e-05, "loss": 0.4984, "step": 32494 }, { "epoch": 0.8922295442064799, "grad_norm": 0.4509887397289276, "learning_rate": 1.1708781109551413e-05, "loss": 0.4454, "step": 32495 }, { "epoch": 0.8922570016474465, "grad_norm": 0.42692023515701294, "learning_rate": 1.1708355566761747e-05, "loss": 0.4705, "step": 32496 }, { "epoch": 0.8922844590884129, "grad_norm": 0.43807896971702576, "learning_rate": 1.1707930020785452e-05, "loss": 0.5619, "step": 32497 }, { "epoch": 0.8923119165293795, "grad_norm": 0.4062166213989258, "learning_rate": 1.170750447162332e-05, "loss": 0.5549, "step": 32498 }, { "epoch": 0.892339373970346, "grad_norm": 0.3357721269130707, "learning_rate": 1.1707078919276153e-05, "loss": 0.4754, "step": 32499 }, { "epoch": 0.8923668314113125, "grad_norm": 0.34605303406715393, "learning_rate": 1.1706653363744736e-05, "loss": 0.4792, "step": 32500 }, { "epoch": 0.892394288852279, "grad_norm": 0.39949265122413635, "learning_rate": 1.1706227805029864e-05, "loss": 0.434, "step": 32501 }, { "epoch": 0.8924217462932454, "grad_norm": 0.42568159103393555, "learning_rate": 1.1705802243132337e-05, "loss": 0.4876, "step": 32502 }, { "epoch": 0.892449203734212, "grad_norm": 0.38674062490463257, "learning_rate": 1.1705376678052938e-05, "loss": 0.5222, "step": 32503 }, { "epoch": 0.8924766611751784, "grad_norm": 0.40564924478530884, "learning_rate": 1.1704951109792473e-05, "loss": 0.4667, "step": 32504 }, { "epoch": 0.892504118616145, "grad_norm": 0.41106119751930237, "learning_rate": 1.1704525538351728e-05, "loss": 0.485, "step": 32505 }, { "epoch": 0.8925315760571115, "grad_norm": 0.4139018654823303, "learning_rate": 1.1704099963731496e-05, "loss": 0.5707, "step": 32506 }, { "epoch": 0.892559033498078, "grad_norm": 0.5302929282188416, "learning_rate": 1.1703674385932578e-05, "loss": 0.5028, "step": 32507 }, { "epoch": 0.8925864909390445, "grad_norm": 0.36119917035102844, "learning_rate": 1.170324880495576e-05, "loss": 0.566, "step": 32508 }, { "epoch": 0.892613948380011, "grad_norm": 0.4036698639392853, "learning_rate": 1.1702823220801842e-05, "loss": 0.6021, "step": 32509 }, { "epoch": 0.8926414058209775, "grad_norm": 0.39405354857444763, "learning_rate": 1.1702397633471615e-05, "loss": 0.4292, "step": 32510 }, { "epoch": 0.892668863261944, "grad_norm": 0.40856075286865234, "learning_rate": 1.1701972042965872e-05, "loss": 0.5068, "step": 32511 }, { "epoch": 0.8926963207029105, "grad_norm": 0.4929540455341339, "learning_rate": 1.170154644928541e-05, "loss": 0.4838, "step": 32512 }, { "epoch": 0.892723778143877, "grad_norm": 0.3865600526332855, "learning_rate": 1.1701120852431016e-05, "loss": 0.5091, "step": 32513 }, { "epoch": 0.8927512355848435, "grad_norm": 0.3650461435317993, "learning_rate": 1.1700695252403493e-05, "loss": 0.4434, "step": 32514 }, { "epoch": 0.89277869302581, "grad_norm": 0.3840435743331909, "learning_rate": 1.1700269649203628e-05, "loss": 0.573, "step": 32515 }, { "epoch": 0.8928061504667765, "grad_norm": 0.4811302721500397, "learning_rate": 1.169984404283222e-05, "loss": 0.5171, "step": 32516 }, { "epoch": 0.892833607907743, "grad_norm": 0.397836834192276, "learning_rate": 1.169941843329006e-05, "loss": 0.4936, "step": 32517 }, { "epoch": 0.8928610653487095, "grad_norm": 0.4077151417732239, "learning_rate": 1.169899282057794e-05, "loss": 0.5473, "step": 32518 }, { "epoch": 0.892888522789676, "grad_norm": 0.4028734862804413, "learning_rate": 1.169856720469666e-05, "loss": 0.4753, "step": 32519 }, { "epoch": 0.8929159802306426, "grad_norm": 0.38280826807022095, "learning_rate": 1.1698141585647006e-05, "loss": 0.5099, "step": 32520 }, { "epoch": 0.892943437671609, "grad_norm": 0.4188937544822693, "learning_rate": 1.1697715963429776e-05, "loss": 0.4877, "step": 32521 }, { "epoch": 0.8929708951125755, "grad_norm": 0.45384013652801514, "learning_rate": 1.1697290338045765e-05, "loss": 0.5166, "step": 32522 }, { "epoch": 0.892998352553542, "grad_norm": 0.34766751527786255, "learning_rate": 1.1696864709495767e-05, "loss": 0.4288, "step": 32523 }, { "epoch": 0.8930258099945085, "grad_norm": 0.406495600938797, "learning_rate": 1.1696439077780574e-05, "loss": 0.5127, "step": 32524 }, { "epoch": 0.893053267435475, "grad_norm": 0.3738694489002228, "learning_rate": 1.1696013442900982e-05, "loss": 0.4391, "step": 32525 }, { "epoch": 0.8930807248764415, "grad_norm": 0.492237389087677, "learning_rate": 1.1695587804857782e-05, "loss": 0.4348, "step": 32526 }, { "epoch": 0.8931081823174081, "grad_norm": 0.34916797280311584, "learning_rate": 1.1695162163651767e-05, "loss": 0.5332, "step": 32527 }, { "epoch": 0.8931356397583745, "grad_norm": 0.3778017461299896, "learning_rate": 1.1694736519283738e-05, "loss": 0.4467, "step": 32528 }, { "epoch": 0.8931630971993411, "grad_norm": 0.5276066660881042, "learning_rate": 1.1694310871754485e-05, "loss": 0.5121, "step": 32529 }, { "epoch": 0.8931905546403075, "grad_norm": 0.34471598267555237, "learning_rate": 1.1693885221064798e-05, "loss": 0.3963, "step": 32530 }, { "epoch": 0.893218012081274, "grad_norm": 0.39737752079963684, "learning_rate": 1.1693459567215478e-05, "loss": 0.4895, "step": 32531 }, { "epoch": 0.8932454695222405, "grad_norm": 0.40448781847953796, "learning_rate": 1.169303391020731e-05, "loss": 0.4668, "step": 32532 }, { "epoch": 0.893272926963207, "grad_norm": 0.41511252522468567, "learning_rate": 1.1692608250041098e-05, "loss": 0.4714, "step": 32533 }, { "epoch": 0.8933003844041736, "grad_norm": 0.43033212423324585, "learning_rate": 1.169218258671763e-05, "loss": 0.4783, "step": 32534 }, { "epoch": 0.89332784184514, "grad_norm": 0.4088570773601532, "learning_rate": 1.16917569202377e-05, "loss": 0.4476, "step": 32535 }, { "epoch": 0.8933552992861066, "grad_norm": 0.38479411602020264, "learning_rate": 1.1691331250602106e-05, "loss": 0.4298, "step": 32536 }, { "epoch": 0.893382756727073, "grad_norm": 0.3611641526222229, "learning_rate": 1.1690905577811638e-05, "loss": 0.4685, "step": 32537 }, { "epoch": 0.8934102141680396, "grad_norm": 0.40367981791496277, "learning_rate": 1.169047990186709e-05, "loss": 0.5107, "step": 32538 }, { "epoch": 0.893437671609006, "grad_norm": 0.4271829426288605, "learning_rate": 1.1690054222769258e-05, "loss": 0.5252, "step": 32539 }, { "epoch": 0.8934651290499726, "grad_norm": 0.3868961036205292, "learning_rate": 1.1689628540518938e-05, "loss": 0.4547, "step": 32540 }, { "epoch": 0.893492586490939, "grad_norm": 0.40087834000587463, "learning_rate": 1.1689202855116922e-05, "loss": 0.5829, "step": 32541 }, { "epoch": 0.8935200439319055, "grad_norm": 0.37643304467201233, "learning_rate": 1.1688777166564e-05, "loss": 0.4517, "step": 32542 }, { "epoch": 0.8935475013728721, "grad_norm": 0.4646138846874237, "learning_rate": 1.1688351474860972e-05, "loss": 0.4663, "step": 32543 }, { "epoch": 0.8935749588138385, "grad_norm": 0.41672787070274353, "learning_rate": 1.1687925780008626e-05, "loss": 0.5313, "step": 32544 }, { "epoch": 0.8936024162548051, "grad_norm": 0.3525448739528656, "learning_rate": 1.1687500082007766e-05, "loss": 0.4867, "step": 32545 }, { "epoch": 0.8936298736957715, "grad_norm": 0.35519030690193176, "learning_rate": 1.1687074380859175e-05, "loss": 0.4652, "step": 32546 }, { "epoch": 0.8936573311367381, "grad_norm": 0.35632357001304626, "learning_rate": 1.1686648676563652e-05, "loss": 0.5282, "step": 32547 }, { "epoch": 0.8936847885777045, "grad_norm": 0.4244459867477417, "learning_rate": 1.1686222969121995e-05, "loss": 0.4843, "step": 32548 }, { "epoch": 0.893712246018671, "grad_norm": 0.37945252656936646, "learning_rate": 1.168579725853499e-05, "loss": 0.4921, "step": 32549 }, { "epoch": 0.8937397034596376, "grad_norm": 0.41924500465393066, "learning_rate": 1.1685371544803434e-05, "loss": 0.5497, "step": 32550 }, { "epoch": 0.893767160900604, "grad_norm": 0.3704057037830353, "learning_rate": 1.1684945827928126e-05, "loss": 0.4594, "step": 32551 }, { "epoch": 0.8937946183415706, "grad_norm": 0.3957259953022003, "learning_rate": 1.1684520107909854e-05, "loss": 0.4715, "step": 32552 }, { "epoch": 0.893822075782537, "grad_norm": 0.3904044032096863, "learning_rate": 1.1684094384749415e-05, "loss": 0.4949, "step": 32553 }, { "epoch": 0.8938495332235036, "grad_norm": 0.4437530040740967, "learning_rate": 1.1683668658447603e-05, "loss": 0.5827, "step": 32554 }, { "epoch": 0.89387699066447, "grad_norm": 0.381779283285141, "learning_rate": 1.168324292900521e-05, "loss": 0.5405, "step": 32555 }, { "epoch": 0.8939044481054366, "grad_norm": 0.38041165471076965, "learning_rate": 1.1682817196423033e-05, "loss": 0.482, "step": 32556 }, { "epoch": 0.8939319055464031, "grad_norm": 0.3589346408843994, "learning_rate": 1.1682391460701863e-05, "loss": 0.4728, "step": 32557 }, { "epoch": 0.8939593629873696, "grad_norm": 0.4665050804615021, "learning_rate": 1.16819657218425e-05, "loss": 0.4561, "step": 32558 }, { "epoch": 0.8939868204283361, "grad_norm": 0.3728574216365814, "learning_rate": 1.1681539979845731e-05, "loss": 0.46, "step": 32559 }, { "epoch": 0.8940142778693025, "grad_norm": 0.37752634286880493, "learning_rate": 1.1681114234712353e-05, "loss": 0.4666, "step": 32560 }, { "epoch": 0.8940417353102691, "grad_norm": 0.38963356614112854, "learning_rate": 1.1680688486443161e-05, "loss": 0.4949, "step": 32561 }, { "epoch": 0.8940691927512355, "grad_norm": 0.4223586320877075, "learning_rate": 1.1680262735038949e-05, "loss": 0.5086, "step": 32562 }, { "epoch": 0.8940966501922021, "grad_norm": 0.3784710168838501, "learning_rate": 1.167983698050051e-05, "loss": 0.4964, "step": 32563 }, { "epoch": 0.8941241076331686, "grad_norm": 0.41445478796958923, "learning_rate": 1.1679411222828641e-05, "loss": 0.4874, "step": 32564 }, { "epoch": 0.8941515650741351, "grad_norm": 0.41318026185035706, "learning_rate": 1.167898546202413e-05, "loss": 0.4654, "step": 32565 }, { "epoch": 0.8941790225151016, "grad_norm": 0.38548168540000916, "learning_rate": 1.1678559698087778e-05, "loss": 0.4939, "step": 32566 }, { "epoch": 0.8942064799560681, "grad_norm": 0.3867526650428772, "learning_rate": 1.1678133931020377e-05, "loss": 0.5269, "step": 32567 }, { "epoch": 0.8942339373970346, "grad_norm": 0.382984459400177, "learning_rate": 1.1677708160822719e-05, "loss": 0.5331, "step": 32568 }, { "epoch": 0.894261394838001, "grad_norm": 0.37195590138435364, "learning_rate": 1.1677282387495601e-05, "loss": 0.4826, "step": 32569 }, { "epoch": 0.8942888522789676, "grad_norm": 0.4366418123245239, "learning_rate": 1.1676856611039815e-05, "loss": 0.5392, "step": 32570 }, { "epoch": 0.8943163097199341, "grad_norm": 0.3595702648162842, "learning_rate": 1.1676430831456158e-05, "loss": 0.4419, "step": 32571 }, { "epoch": 0.8943437671609006, "grad_norm": 0.34699302911758423, "learning_rate": 1.167600504874542e-05, "loss": 0.4392, "step": 32572 }, { "epoch": 0.8943712246018671, "grad_norm": 0.3654663562774658, "learning_rate": 1.1675579262908398e-05, "loss": 0.4035, "step": 32573 }, { "epoch": 0.8943986820428336, "grad_norm": 0.3672502934932709, "learning_rate": 1.1675153473945889e-05, "loss": 0.4208, "step": 32574 }, { "epoch": 0.8944261394838001, "grad_norm": 0.3843938112258911, "learning_rate": 1.1674727681858679e-05, "loss": 0.518, "step": 32575 }, { "epoch": 0.8944535969247666, "grad_norm": 0.3730832636356354, "learning_rate": 1.1674301886647572e-05, "loss": 0.5056, "step": 32576 }, { "epoch": 0.8944810543657331, "grad_norm": 0.416843980550766, "learning_rate": 1.1673876088313355e-05, "loss": 0.4883, "step": 32577 }, { "epoch": 0.8945085118066997, "grad_norm": 0.470184862613678, "learning_rate": 1.1673450286856826e-05, "loss": 0.4605, "step": 32578 }, { "epoch": 0.8945359692476661, "grad_norm": 0.34935232996940613, "learning_rate": 1.1673024482278778e-05, "loss": 0.4627, "step": 32579 }, { "epoch": 0.8945634266886326, "grad_norm": 0.3633621633052826, "learning_rate": 1.1672598674580007e-05, "loss": 0.4584, "step": 32580 }, { "epoch": 0.8945908841295991, "grad_norm": 0.395826518535614, "learning_rate": 1.1672172863761302e-05, "loss": 0.549, "step": 32581 }, { "epoch": 0.8946183415705656, "grad_norm": 0.35172855854034424, "learning_rate": 1.1671747049823465e-05, "loss": 0.4927, "step": 32582 }, { "epoch": 0.8946457990115321, "grad_norm": 0.39305880665779114, "learning_rate": 1.1671321232767282e-05, "loss": 0.5254, "step": 32583 }, { "epoch": 0.8946732564524986, "grad_norm": 0.44602951407432556, "learning_rate": 1.1670895412593555e-05, "loss": 0.5525, "step": 32584 }, { "epoch": 0.8947007138934652, "grad_norm": 0.3906934857368469, "learning_rate": 1.1670469589303073e-05, "loss": 0.4984, "step": 32585 }, { "epoch": 0.8947281713344316, "grad_norm": 0.3662193715572357, "learning_rate": 1.167004376289663e-05, "loss": 0.4205, "step": 32586 }, { "epoch": 0.8947556287753982, "grad_norm": 0.38284143805503845, "learning_rate": 1.1669617933375026e-05, "loss": 0.4211, "step": 32587 }, { "epoch": 0.8947830862163646, "grad_norm": 0.4032760560512543, "learning_rate": 1.1669192100739048e-05, "loss": 0.4761, "step": 32588 }, { "epoch": 0.8948105436573311, "grad_norm": 0.5354055166244507, "learning_rate": 1.16687662649895e-05, "loss": 0.4796, "step": 32589 }, { "epoch": 0.8948380010982976, "grad_norm": 0.4171702563762665, "learning_rate": 1.1668340426127167e-05, "loss": 0.5242, "step": 32590 }, { "epoch": 0.8948654585392641, "grad_norm": 0.4452265202999115, "learning_rate": 1.1667914584152844e-05, "loss": 0.4928, "step": 32591 }, { "epoch": 0.8948929159802307, "grad_norm": 0.38703668117523193, "learning_rate": 1.166748873906733e-05, "loss": 0.519, "step": 32592 }, { "epoch": 0.8949203734211971, "grad_norm": 0.429293155670166, "learning_rate": 1.1667062890871419e-05, "loss": 0.4956, "step": 32593 }, { "epoch": 0.8949478308621637, "grad_norm": 0.3767801523208618, "learning_rate": 1.16666370395659e-05, "loss": 0.5358, "step": 32594 }, { "epoch": 0.8949752883031301, "grad_norm": 0.5724570751190186, "learning_rate": 1.1666211185151575e-05, "loss": 0.5134, "step": 32595 }, { "epoch": 0.8950027457440967, "grad_norm": 0.43475452065467834, "learning_rate": 1.166578532762923e-05, "loss": 0.4879, "step": 32596 }, { "epoch": 0.8950302031850631, "grad_norm": 0.4025103449821472, "learning_rate": 1.1665359466999669e-05, "loss": 0.5131, "step": 32597 }, { "epoch": 0.8950576606260296, "grad_norm": 0.3740403950214386, "learning_rate": 1.166493360326368e-05, "loss": 0.4808, "step": 32598 }, { "epoch": 0.8950851180669962, "grad_norm": 0.388345867395401, "learning_rate": 1.1664507736422053e-05, "loss": 0.4733, "step": 32599 }, { "epoch": 0.8951125755079626, "grad_norm": 0.42624130845069885, "learning_rate": 1.1664081866475595e-05, "loss": 0.4941, "step": 32600 }, { "epoch": 0.8951400329489292, "grad_norm": 0.37275442481040955, "learning_rate": 1.1663655993425087e-05, "loss": 0.4224, "step": 32601 }, { "epoch": 0.8951674903898956, "grad_norm": 0.45872315764427185, "learning_rate": 1.1663230117271333e-05, "loss": 0.506, "step": 32602 }, { "epoch": 0.8951949478308622, "grad_norm": 0.38736942410469055, "learning_rate": 1.1662804238015124e-05, "loss": 0.5161, "step": 32603 }, { "epoch": 0.8952224052718286, "grad_norm": 0.4123973250389099, "learning_rate": 1.1662378355657251e-05, "loss": 0.484, "step": 32604 }, { "epoch": 0.8952498627127952, "grad_norm": 0.36702993512153625, "learning_rate": 1.1661952470198516e-05, "loss": 0.4973, "step": 32605 }, { "epoch": 0.8952773201537617, "grad_norm": 0.39501234889030457, "learning_rate": 1.1661526581639706e-05, "loss": 0.5544, "step": 32606 }, { "epoch": 0.8953047775947282, "grad_norm": 0.39999252557754517, "learning_rate": 1.1661100689981622e-05, "loss": 0.5208, "step": 32607 }, { "epoch": 0.8953322350356947, "grad_norm": 0.3906855285167694, "learning_rate": 1.1660674795225053e-05, "loss": 0.5247, "step": 32608 }, { "epoch": 0.8953596924766611, "grad_norm": 0.4163754880428314, "learning_rate": 1.1660248897370796e-05, "loss": 0.5775, "step": 32609 }, { "epoch": 0.8953871499176277, "grad_norm": 0.3981000483036041, "learning_rate": 1.1659822996419645e-05, "loss": 0.5837, "step": 32610 }, { "epoch": 0.8954146073585941, "grad_norm": 0.3910103738307953, "learning_rate": 1.1659397092372396e-05, "loss": 0.4698, "step": 32611 }, { "epoch": 0.8954420647995607, "grad_norm": 0.36317509412765503, "learning_rate": 1.1658971185229838e-05, "loss": 0.4652, "step": 32612 }, { "epoch": 0.8954695222405272, "grad_norm": 0.3790709972381592, "learning_rate": 1.1658545274992772e-05, "loss": 0.5216, "step": 32613 }, { "epoch": 0.8954969796814937, "grad_norm": 0.42623037099838257, "learning_rate": 1.1658119361661989e-05, "loss": 0.4903, "step": 32614 }, { "epoch": 0.8955244371224602, "grad_norm": 0.3793802261352539, "learning_rate": 1.1657693445238282e-05, "loss": 0.5134, "step": 32615 }, { "epoch": 0.8955518945634267, "grad_norm": 0.33462151885032654, "learning_rate": 1.1657267525722453e-05, "loss": 0.4622, "step": 32616 }, { "epoch": 0.8955793520043932, "grad_norm": 0.43516841530799866, "learning_rate": 1.1656841603115286e-05, "loss": 0.4306, "step": 32617 }, { "epoch": 0.8956068094453596, "grad_norm": 0.43572551012039185, "learning_rate": 1.1656415677417583e-05, "loss": 0.5001, "step": 32618 }, { "epoch": 0.8956342668863262, "grad_norm": 0.4188925623893738, "learning_rate": 1.1655989748630134e-05, "loss": 0.5366, "step": 32619 }, { "epoch": 0.8956617243272927, "grad_norm": 0.3632935583591461, "learning_rate": 1.1655563816753738e-05, "loss": 0.5301, "step": 32620 }, { "epoch": 0.8956891817682592, "grad_norm": 0.44817492365837097, "learning_rate": 1.1655137881789187e-05, "loss": 0.439, "step": 32621 }, { "epoch": 0.8957166392092257, "grad_norm": 0.36852961778640747, "learning_rate": 1.1654711943737273e-05, "loss": 0.4187, "step": 32622 }, { "epoch": 0.8957440966501922, "grad_norm": 0.42394590377807617, "learning_rate": 1.1654286002598797e-05, "loss": 0.4922, "step": 32623 }, { "epoch": 0.8957715540911587, "grad_norm": 0.40181198716163635, "learning_rate": 1.1653860058374549e-05, "loss": 0.4384, "step": 32624 }, { "epoch": 0.8957990115321252, "grad_norm": 0.34506380558013916, "learning_rate": 1.165343411106532e-05, "loss": 0.4096, "step": 32625 }, { "epoch": 0.8958264689730917, "grad_norm": 0.4283783435821533, "learning_rate": 1.1653008160671913e-05, "loss": 0.5172, "step": 32626 }, { "epoch": 0.8958539264140583, "grad_norm": 0.38553136587142944, "learning_rate": 1.1652582207195116e-05, "loss": 0.4793, "step": 32627 }, { "epoch": 0.8958813838550247, "grad_norm": 0.35982194542884827, "learning_rate": 1.1652156250635727e-05, "loss": 0.4765, "step": 32628 }, { "epoch": 0.8959088412959912, "grad_norm": 0.37828367948532104, "learning_rate": 1.1651730290994538e-05, "loss": 0.4661, "step": 32629 }, { "epoch": 0.8959362987369577, "grad_norm": 0.4036012887954712, "learning_rate": 1.1651304328272346e-05, "loss": 0.491, "step": 32630 }, { "epoch": 0.8959637561779242, "grad_norm": 0.40595516562461853, "learning_rate": 1.1650878362469944e-05, "loss": 0.5271, "step": 32631 }, { "epoch": 0.8959912136188907, "grad_norm": 0.49353137612342834, "learning_rate": 1.1650452393588125e-05, "loss": 0.5215, "step": 32632 }, { "epoch": 0.8960186710598572, "grad_norm": 0.43085089325904846, "learning_rate": 1.1650026421627686e-05, "loss": 0.4605, "step": 32633 }, { "epoch": 0.8960461285008238, "grad_norm": 0.3422888517379761, "learning_rate": 1.1649600446589427e-05, "loss": 0.4872, "step": 32634 }, { "epoch": 0.8960735859417902, "grad_norm": 0.34748417139053345, "learning_rate": 1.1649174468474129e-05, "loss": 0.4674, "step": 32635 }, { "epoch": 0.8961010433827568, "grad_norm": 0.41219890117645264, "learning_rate": 1.16487484872826e-05, "loss": 0.5229, "step": 32636 }, { "epoch": 0.8961285008237232, "grad_norm": 0.36417707800865173, "learning_rate": 1.1648322503015623e-05, "loss": 0.4627, "step": 32637 }, { "epoch": 0.8961559582646897, "grad_norm": 0.4020203649997711, "learning_rate": 1.1647896515674002e-05, "loss": 0.5303, "step": 32638 }, { "epoch": 0.8961834157056562, "grad_norm": 0.39101287722587585, "learning_rate": 1.1647470525258528e-05, "loss": 0.4826, "step": 32639 }, { "epoch": 0.8962108731466227, "grad_norm": 0.40349602699279785, "learning_rate": 1.1647044531769996e-05, "loss": 0.4245, "step": 32640 }, { "epoch": 0.8962383305875893, "grad_norm": 0.4252493381500244, "learning_rate": 1.16466185352092e-05, "loss": 0.5964, "step": 32641 }, { "epoch": 0.8962657880285557, "grad_norm": 0.3932085335254669, "learning_rate": 1.1646192535576934e-05, "loss": 0.4328, "step": 32642 }, { "epoch": 0.8962932454695223, "grad_norm": 0.37804657220840454, "learning_rate": 1.1645766532873994e-05, "loss": 0.4187, "step": 32643 }, { "epoch": 0.8963207029104887, "grad_norm": 0.4784456491470337, "learning_rate": 1.1645340527101174e-05, "loss": 0.5875, "step": 32644 }, { "epoch": 0.8963481603514553, "grad_norm": 0.3721034824848175, "learning_rate": 1.164491451825927e-05, "loss": 0.556, "step": 32645 }, { "epoch": 0.8963756177924217, "grad_norm": 0.4007091224193573, "learning_rate": 1.1644488506349075e-05, "loss": 0.4644, "step": 32646 }, { "epoch": 0.8964030752333882, "grad_norm": 0.3999159634113312, "learning_rate": 1.1644062491371382e-05, "loss": 0.5254, "step": 32647 }, { "epoch": 0.8964305326743548, "grad_norm": 0.38134151697158813, "learning_rate": 1.164363647332699e-05, "loss": 0.4093, "step": 32648 }, { "epoch": 0.8964579901153212, "grad_norm": 0.4016670882701874, "learning_rate": 1.164321045221669e-05, "loss": 0.4601, "step": 32649 }, { "epoch": 0.8964854475562878, "grad_norm": 0.5225759148597717, "learning_rate": 1.1642784428041279e-05, "loss": 0.4775, "step": 32650 }, { "epoch": 0.8965129049972542, "grad_norm": 0.3884890675544739, "learning_rate": 1.164235840080155e-05, "loss": 0.4666, "step": 32651 }, { "epoch": 0.8965403624382208, "grad_norm": 0.37912389636039734, "learning_rate": 1.1641932370498298e-05, "loss": 0.5009, "step": 32652 }, { "epoch": 0.8965678198791872, "grad_norm": 0.4009639620780945, "learning_rate": 1.164150633713232e-05, "loss": 0.4635, "step": 32653 }, { "epoch": 0.8965952773201538, "grad_norm": 0.3754901885986328, "learning_rate": 1.1641080300704409e-05, "loss": 0.553, "step": 32654 }, { "epoch": 0.8966227347611203, "grad_norm": 0.37446075677871704, "learning_rate": 1.1640654261215358e-05, "loss": 0.4297, "step": 32655 }, { "epoch": 0.8966501922020867, "grad_norm": 0.42605385184288025, "learning_rate": 1.164022821866596e-05, "loss": 0.4921, "step": 32656 }, { "epoch": 0.8966776496430533, "grad_norm": 0.3782831132411957, "learning_rate": 1.1639802173057017e-05, "loss": 0.4404, "step": 32657 }, { "epoch": 0.8967051070840197, "grad_norm": 0.43848246335983276, "learning_rate": 1.163937612438932e-05, "loss": 0.4914, "step": 32658 }, { "epoch": 0.8967325645249863, "grad_norm": 0.38480740785598755, "learning_rate": 1.1638950072663661e-05, "loss": 0.5043, "step": 32659 }, { "epoch": 0.8967600219659527, "grad_norm": 0.38077038526535034, "learning_rate": 1.163852401788084e-05, "loss": 0.5339, "step": 32660 }, { "epoch": 0.8967874794069193, "grad_norm": 0.3458142876625061, "learning_rate": 1.1638097960041645e-05, "loss": 0.4727, "step": 32661 }, { "epoch": 0.8968149368478858, "grad_norm": 0.3869684636592865, "learning_rate": 1.1637671899146876e-05, "loss": 0.5566, "step": 32662 }, { "epoch": 0.8968423942888523, "grad_norm": 0.4756930470466614, "learning_rate": 1.1637245835197326e-05, "loss": 0.4799, "step": 32663 }, { "epoch": 0.8968698517298188, "grad_norm": 0.3847728669643402, "learning_rate": 1.1636819768193793e-05, "loss": 0.5204, "step": 32664 }, { "epoch": 0.8968973091707853, "grad_norm": 0.41690969467163086, "learning_rate": 1.1636393698137066e-05, "loss": 0.5024, "step": 32665 }, { "epoch": 0.8969247666117518, "grad_norm": 0.42319172620773315, "learning_rate": 1.1635967625027943e-05, "loss": 0.5077, "step": 32666 }, { "epoch": 0.8969522240527182, "grad_norm": 0.36244162917137146, "learning_rate": 1.1635541548867217e-05, "loss": 0.4654, "step": 32667 }, { "epoch": 0.8969796814936848, "grad_norm": 0.43067094683647156, "learning_rate": 1.1635115469655682e-05, "loss": 0.4428, "step": 32668 }, { "epoch": 0.8970071389346513, "grad_norm": 0.35173308849334717, "learning_rate": 1.1634689387394141e-05, "loss": 0.5053, "step": 32669 }, { "epoch": 0.8970345963756178, "grad_norm": 0.4097588062286377, "learning_rate": 1.163426330208338e-05, "loss": 0.4719, "step": 32670 }, { "epoch": 0.8970620538165843, "grad_norm": 0.36013802886009216, "learning_rate": 1.1633837213724193e-05, "loss": 0.4676, "step": 32671 }, { "epoch": 0.8970895112575508, "grad_norm": 0.3869297206401825, "learning_rate": 1.1633411122317381e-05, "loss": 0.4442, "step": 32672 }, { "epoch": 0.8971169686985173, "grad_norm": 0.416865736246109, "learning_rate": 1.1632985027863738e-05, "loss": 0.4441, "step": 32673 }, { "epoch": 0.8971444261394838, "grad_norm": 0.3334949016571045, "learning_rate": 1.1632558930364053e-05, "loss": 0.4544, "step": 32674 }, { "epoch": 0.8971718835804503, "grad_norm": 0.5066133141517639, "learning_rate": 1.1632132829819128e-05, "loss": 0.4912, "step": 32675 }, { "epoch": 0.8971993410214169, "grad_norm": 0.42233145236968994, "learning_rate": 1.163170672622975e-05, "loss": 0.4664, "step": 32676 }, { "epoch": 0.8972267984623833, "grad_norm": 0.37307772040367126, "learning_rate": 1.163128061959672e-05, "loss": 0.5135, "step": 32677 }, { "epoch": 0.8972542559033498, "grad_norm": 0.3963410258293152, "learning_rate": 1.1630854509920832e-05, "loss": 0.5571, "step": 32678 }, { "epoch": 0.8972817133443163, "grad_norm": 0.4539443254470825, "learning_rate": 1.163042839720288e-05, "loss": 0.5114, "step": 32679 }, { "epoch": 0.8973091707852828, "grad_norm": 0.39364922046661377, "learning_rate": 1.1630002281443657e-05, "loss": 0.5033, "step": 32680 }, { "epoch": 0.8973366282262493, "grad_norm": 0.4868185818195343, "learning_rate": 1.162957616264396e-05, "loss": 0.4811, "step": 32681 }, { "epoch": 0.8973640856672158, "grad_norm": 0.44907325506210327, "learning_rate": 1.1629150040804584e-05, "loss": 0.5717, "step": 32682 }, { "epoch": 0.8973915431081824, "grad_norm": 0.4163096249103546, "learning_rate": 1.1628723915926323e-05, "loss": 0.5255, "step": 32683 }, { "epoch": 0.8974190005491488, "grad_norm": 0.35299837589263916, "learning_rate": 1.1628297788009972e-05, "loss": 0.3841, "step": 32684 }, { "epoch": 0.8974464579901154, "grad_norm": 0.3902292847633362, "learning_rate": 1.1627871657056327e-05, "loss": 0.4343, "step": 32685 }, { "epoch": 0.8974739154310818, "grad_norm": 0.4132979214191437, "learning_rate": 1.1627445523066179e-05, "loss": 0.4605, "step": 32686 }, { "epoch": 0.8975013728720483, "grad_norm": 0.38919907808303833, "learning_rate": 1.162701938604033e-05, "loss": 0.4933, "step": 32687 }, { "epoch": 0.8975288303130148, "grad_norm": 0.42582741379737854, "learning_rate": 1.1626593245979568e-05, "loss": 0.4339, "step": 32688 }, { "epoch": 0.8975562877539813, "grad_norm": 0.39474114775657654, "learning_rate": 1.162616710288469e-05, "loss": 0.5058, "step": 32689 }, { "epoch": 0.8975837451949479, "grad_norm": 0.3754110038280487, "learning_rate": 1.1625740956756493e-05, "loss": 0.4989, "step": 32690 }, { "epoch": 0.8976112026359143, "grad_norm": 0.41280537843704224, "learning_rate": 1.1625314807595766e-05, "loss": 0.4455, "step": 32691 }, { "epoch": 0.8976386600768809, "grad_norm": 0.39651426672935486, "learning_rate": 1.1624888655403312e-05, "loss": 0.4889, "step": 32692 }, { "epoch": 0.8976661175178473, "grad_norm": 0.3628430664539337, "learning_rate": 1.1624462500179923e-05, "loss": 0.4682, "step": 32693 }, { "epoch": 0.8976935749588139, "grad_norm": 0.41252532601356506, "learning_rate": 1.162403634192639e-05, "loss": 0.479, "step": 32694 }, { "epoch": 0.8977210323997803, "grad_norm": 0.4267137348651886, "learning_rate": 1.1623610180643512e-05, "loss": 0.5091, "step": 32695 }, { "epoch": 0.8977484898407468, "grad_norm": 0.41550132632255554, "learning_rate": 1.1623184016332083e-05, "loss": 0.4648, "step": 32696 }, { "epoch": 0.8977759472817134, "grad_norm": 0.4152181148529053, "learning_rate": 1.1622757848992897e-05, "loss": 0.4424, "step": 32697 }, { "epoch": 0.8978034047226798, "grad_norm": 0.3918894827365875, "learning_rate": 1.1622331678626753e-05, "loss": 0.5132, "step": 32698 }, { "epoch": 0.8978308621636464, "grad_norm": 0.39079949259757996, "learning_rate": 1.1621905505234438e-05, "loss": 0.4565, "step": 32699 }, { "epoch": 0.8978583196046128, "grad_norm": 0.402592271566391, "learning_rate": 1.1621479328816753e-05, "loss": 0.5014, "step": 32700 }, { "epoch": 0.8978857770455794, "grad_norm": 0.4398922026157379, "learning_rate": 1.1621053149374493e-05, "loss": 0.4765, "step": 32701 }, { "epoch": 0.8979132344865458, "grad_norm": 0.3655937612056732, "learning_rate": 1.162062696690845e-05, "loss": 0.526, "step": 32702 }, { "epoch": 0.8979406919275124, "grad_norm": 0.477766752243042, "learning_rate": 1.1620200781419421e-05, "loss": 0.5944, "step": 32703 }, { "epoch": 0.8979681493684789, "grad_norm": 0.4063863456249237, "learning_rate": 1.1619774592908202e-05, "loss": 0.5049, "step": 32704 }, { "epoch": 0.8979956068094453, "grad_norm": 0.3810517489910126, "learning_rate": 1.1619348401375585e-05, "loss": 0.507, "step": 32705 }, { "epoch": 0.8980230642504119, "grad_norm": 0.43409061431884766, "learning_rate": 1.1618922206822366e-05, "loss": 0.5374, "step": 32706 }, { "epoch": 0.8980505216913783, "grad_norm": 0.33770066499710083, "learning_rate": 1.161849600924934e-05, "loss": 0.424, "step": 32707 }, { "epoch": 0.8980779791323449, "grad_norm": 0.3927633464336395, "learning_rate": 1.1618069808657301e-05, "loss": 0.4806, "step": 32708 }, { "epoch": 0.8981054365733113, "grad_norm": 0.4736536145210266, "learning_rate": 1.161764360504705e-05, "loss": 0.5636, "step": 32709 }, { "epoch": 0.8981328940142779, "grad_norm": 0.5429362058639526, "learning_rate": 1.1617217398419376e-05, "loss": 0.5601, "step": 32710 }, { "epoch": 0.8981603514552444, "grad_norm": 0.3783341944217682, "learning_rate": 1.1616791188775074e-05, "loss": 0.5182, "step": 32711 }, { "epoch": 0.8981878088962109, "grad_norm": 0.3604768216609955, "learning_rate": 1.161636497611494e-05, "loss": 0.4873, "step": 32712 }, { "epoch": 0.8982152663371774, "grad_norm": 0.38956743478775024, "learning_rate": 1.1615938760439769e-05, "loss": 0.5089, "step": 32713 }, { "epoch": 0.8982427237781438, "grad_norm": 0.3936191201210022, "learning_rate": 1.1615512541750357e-05, "loss": 0.5062, "step": 32714 }, { "epoch": 0.8982701812191104, "grad_norm": 0.39461660385131836, "learning_rate": 1.1615086320047501e-05, "loss": 0.4796, "step": 32715 }, { "epoch": 0.8982976386600768, "grad_norm": 5.1553850173950195, "learning_rate": 1.1614660095331991e-05, "loss": 0.4927, "step": 32716 }, { "epoch": 0.8983250961010434, "grad_norm": 0.37210798263549805, "learning_rate": 1.1614233867604625e-05, "loss": 0.4139, "step": 32717 }, { "epoch": 0.8983525535420099, "grad_norm": 0.37070798873901367, "learning_rate": 1.1613807636866197e-05, "loss": 0.4741, "step": 32718 }, { "epoch": 0.8983800109829764, "grad_norm": 0.4042204022407532, "learning_rate": 1.1613381403117506e-05, "loss": 0.462, "step": 32719 }, { "epoch": 0.8984074684239429, "grad_norm": 0.4323727786540985, "learning_rate": 1.1612955166359338e-05, "loss": 0.5619, "step": 32720 }, { "epoch": 0.8984349258649094, "grad_norm": 0.35440149903297424, "learning_rate": 1.1612528926592498e-05, "loss": 0.4523, "step": 32721 }, { "epoch": 0.8984623833058759, "grad_norm": 0.40436050295829773, "learning_rate": 1.1612102683817778e-05, "loss": 0.4753, "step": 32722 }, { "epoch": 0.8984898407468423, "grad_norm": 0.34313881397247314, "learning_rate": 1.1611676438035969e-05, "loss": 0.4749, "step": 32723 }, { "epoch": 0.8985172981878089, "grad_norm": 0.4020698070526123, "learning_rate": 1.1611250189247869e-05, "loss": 0.6204, "step": 32724 }, { "epoch": 0.8985447556287754, "grad_norm": 0.35529419779777527, "learning_rate": 1.1610823937454274e-05, "loss": 0.4861, "step": 32725 }, { "epoch": 0.8985722130697419, "grad_norm": 0.5045909285545349, "learning_rate": 1.161039768265598e-05, "loss": 0.5748, "step": 32726 }, { "epoch": 0.8985996705107084, "grad_norm": 0.3488752543926239, "learning_rate": 1.1609971424853779e-05, "loss": 0.4657, "step": 32727 }, { "epoch": 0.8986271279516749, "grad_norm": 0.37972691655158997, "learning_rate": 1.1609545164048465e-05, "loss": 0.5077, "step": 32728 }, { "epoch": 0.8986545853926414, "grad_norm": 0.3996376693248749, "learning_rate": 1.160911890024084e-05, "loss": 0.4433, "step": 32729 }, { "epoch": 0.8986820428336079, "grad_norm": 0.35644859075546265, "learning_rate": 1.1608692633431691e-05, "loss": 0.4958, "step": 32730 }, { "epoch": 0.8987095002745744, "grad_norm": 0.40972837805747986, "learning_rate": 1.1608266363621819e-05, "loss": 0.4489, "step": 32731 }, { "epoch": 0.898736957715541, "grad_norm": 0.41396191716194153, "learning_rate": 1.1607840090812017e-05, "loss": 0.5268, "step": 32732 }, { "epoch": 0.8987644151565074, "grad_norm": 0.37637242674827576, "learning_rate": 1.1607413815003076e-05, "loss": 0.4525, "step": 32733 }, { "epoch": 0.898791872597474, "grad_norm": 0.3816293776035309, "learning_rate": 1.1606987536195802e-05, "loss": 0.4843, "step": 32734 }, { "epoch": 0.8988193300384404, "grad_norm": 0.41885194182395935, "learning_rate": 1.160656125439098e-05, "loss": 0.4069, "step": 32735 }, { "epoch": 0.8988467874794069, "grad_norm": 0.37986546754837036, "learning_rate": 1.1606134969589406e-05, "loss": 0.4711, "step": 32736 }, { "epoch": 0.8988742449203734, "grad_norm": 1.3919717073440552, "learning_rate": 1.1605708681791881e-05, "loss": 0.5011, "step": 32737 }, { "epoch": 0.8989017023613399, "grad_norm": 0.36967402696609497, "learning_rate": 1.1605282390999195e-05, "loss": 0.4047, "step": 32738 }, { "epoch": 0.8989291598023065, "grad_norm": 0.422403484582901, "learning_rate": 1.1604856097212148e-05, "loss": 0.5106, "step": 32739 }, { "epoch": 0.8989566172432729, "grad_norm": 0.40169787406921387, "learning_rate": 1.1604429800431528e-05, "loss": 0.4472, "step": 32740 }, { "epoch": 0.8989840746842395, "grad_norm": 0.3655209243297577, "learning_rate": 1.1604003500658137e-05, "loss": 0.5138, "step": 32741 }, { "epoch": 0.8990115321252059, "grad_norm": 0.41175416111946106, "learning_rate": 1.1603577197892769e-05, "loss": 0.5225, "step": 32742 }, { "epoch": 0.8990389895661725, "grad_norm": 0.4078075587749481, "learning_rate": 1.1603150892136213e-05, "loss": 0.4891, "step": 32743 }, { "epoch": 0.8990664470071389, "grad_norm": 0.41250982880592346, "learning_rate": 1.1602724583389272e-05, "loss": 0.5571, "step": 32744 }, { "epoch": 0.8990939044481054, "grad_norm": 0.4074331521987915, "learning_rate": 1.1602298271652738e-05, "loss": 0.4976, "step": 32745 }, { "epoch": 0.899121361889072, "grad_norm": 0.47156479954719543, "learning_rate": 1.1601871956927404e-05, "loss": 0.5694, "step": 32746 }, { "epoch": 0.8991488193300384, "grad_norm": 0.3842099606990814, "learning_rate": 1.1601445639214071e-05, "loss": 0.4886, "step": 32747 }, { "epoch": 0.899176276771005, "grad_norm": 0.38151857256889343, "learning_rate": 1.160101931851353e-05, "loss": 0.5246, "step": 32748 }, { "epoch": 0.8992037342119714, "grad_norm": 0.4067126214504242, "learning_rate": 1.1600592994826576e-05, "loss": 0.5173, "step": 32749 }, { "epoch": 0.899231191652938, "grad_norm": 0.40469205379486084, "learning_rate": 1.1600166668154005e-05, "loss": 0.5535, "step": 32750 }, { "epoch": 0.8992586490939044, "grad_norm": 0.40603646636009216, "learning_rate": 1.1599740338496612e-05, "loss": 0.5167, "step": 32751 }, { "epoch": 0.899286106534871, "grad_norm": 0.4026154577732086, "learning_rate": 1.1599314005855196e-05, "loss": 0.4483, "step": 32752 }, { "epoch": 0.8993135639758375, "grad_norm": 0.3870466649532318, "learning_rate": 1.1598887670230545e-05, "loss": 0.538, "step": 32753 }, { "epoch": 0.8993410214168039, "grad_norm": 0.409155011177063, "learning_rate": 1.159846133162346e-05, "loss": 0.5344, "step": 32754 }, { "epoch": 0.8993684788577705, "grad_norm": 0.387613981962204, "learning_rate": 1.1598034990034734e-05, "loss": 0.5011, "step": 32755 }, { "epoch": 0.8993959362987369, "grad_norm": 0.40535223484039307, "learning_rate": 1.1597608645465161e-05, "loss": 0.515, "step": 32756 }, { "epoch": 0.8994233937397035, "grad_norm": 0.3798578083515167, "learning_rate": 1.1597182297915542e-05, "loss": 0.5122, "step": 32757 }, { "epoch": 0.8994508511806699, "grad_norm": 0.41384249925613403, "learning_rate": 1.1596755947386667e-05, "loss": 0.4713, "step": 32758 }, { "epoch": 0.8994783086216365, "grad_norm": 0.37009114027023315, "learning_rate": 1.159632959387933e-05, "loss": 0.4991, "step": 32759 }, { "epoch": 0.899505766062603, "grad_norm": 0.393777459859848, "learning_rate": 1.1595903237394333e-05, "loss": 0.4499, "step": 32760 }, { "epoch": 0.8995332235035695, "grad_norm": 0.5197467803955078, "learning_rate": 1.159547687793246e-05, "loss": 0.5145, "step": 32761 }, { "epoch": 0.899560680944536, "grad_norm": 0.35659635066986084, "learning_rate": 1.1595050515494521e-05, "loss": 0.4204, "step": 32762 }, { "epoch": 0.8995881383855024, "grad_norm": 0.45321881771087646, "learning_rate": 1.1594624150081302e-05, "loss": 0.494, "step": 32763 }, { "epoch": 0.899615595826469, "grad_norm": 0.3872327506542206, "learning_rate": 1.1594197781693597e-05, "loss": 0.5201, "step": 32764 }, { "epoch": 0.8996430532674354, "grad_norm": 0.4133322536945343, "learning_rate": 1.1593771410332206e-05, "loss": 0.4897, "step": 32765 }, { "epoch": 0.899670510708402, "grad_norm": 0.3595501780509949, "learning_rate": 1.1593345035997926e-05, "loss": 0.4461, "step": 32766 }, { "epoch": 0.8996979681493685, "grad_norm": 0.48478463292121887, "learning_rate": 1.1592918658691542e-05, "loss": 0.4321, "step": 32767 }, { "epoch": 0.899725425590335, "grad_norm": 0.38440829515457153, "learning_rate": 1.159249227841386e-05, "loss": 0.3926, "step": 32768 }, { "epoch": 0.8997528830313015, "grad_norm": 0.34093913435935974, "learning_rate": 1.1592065895165672e-05, "loss": 0.4699, "step": 32769 }, { "epoch": 0.899780340472268, "grad_norm": 0.37332603335380554, "learning_rate": 1.1591639508947772e-05, "loss": 0.5199, "step": 32770 }, { "epoch": 0.8998077979132345, "grad_norm": 0.3840343952178955, "learning_rate": 1.1591213119760957e-05, "loss": 0.4396, "step": 32771 }, { "epoch": 0.899835255354201, "grad_norm": 0.35701295733451843, "learning_rate": 1.1590786727606022e-05, "loss": 0.515, "step": 32772 }, { "epoch": 0.8998627127951675, "grad_norm": 0.4172215163707733, "learning_rate": 1.1590360332483763e-05, "loss": 0.5122, "step": 32773 }, { "epoch": 0.899890170236134, "grad_norm": 0.4152337908744812, "learning_rate": 1.1589933934394971e-05, "loss": 0.4626, "step": 32774 }, { "epoch": 0.8999176276771005, "grad_norm": 0.4170018434524536, "learning_rate": 1.1589507533340447e-05, "loss": 0.4433, "step": 32775 }, { "epoch": 0.899945085118067, "grad_norm": 0.3722338378429413, "learning_rate": 1.1589081129320984e-05, "loss": 0.41, "step": 32776 }, { "epoch": 0.8999725425590335, "grad_norm": 0.37589994072914124, "learning_rate": 1.1588654722337379e-05, "loss": 0.5707, "step": 32777 }, { "epoch": 0.9, "grad_norm": 0.3769027292728424, "learning_rate": 1.1588228312390423e-05, "loss": 0.5549, "step": 32778 }, { "epoch": 0.9000274574409665, "grad_norm": 0.3728613555431366, "learning_rate": 1.1587801899480914e-05, "loss": 0.4893, "step": 32779 }, { "epoch": 0.900054914881933, "grad_norm": 0.35271498560905457, "learning_rate": 1.1587375483609651e-05, "loss": 0.4424, "step": 32780 }, { "epoch": 0.9000823723228996, "grad_norm": 0.48649755120277405, "learning_rate": 1.1586949064777424e-05, "loss": 0.4966, "step": 32781 }, { "epoch": 0.900109829763866, "grad_norm": 0.37999382615089417, "learning_rate": 1.1586522642985031e-05, "loss": 0.4792, "step": 32782 }, { "epoch": 0.9001372872048325, "grad_norm": 0.7673463225364685, "learning_rate": 1.1586096218233266e-05, "loss": 0.4988, "step": 32783 }, { "epoch": 0.900164744645799, "grad_norm": 0.38595688343048096, "learning_rate": 1.1585669790522929e-05, "loss": 0.4628, "step": 32784 }, { "epoch": 0.9001922020867655, "grad_norm": 0.41306746006011963, "learning_rate": 1.1585243359854807e-05, "loss": 0.4877, "step": 32785 }, { "epoch": 0.900219659527732, "grad_norm": 0.5601764917373657, "learning_rate": 1.1584816926229703e-05, "loss": 0.4857, "step": 32786 }, { "epoch": 0.9002471169686985, "grad_norm": 0.44512608647346497, "learning_rate": 1.1584390489648406e-05, "loss": 0.5227, "step": 32787 }, { "epoch": 0.9002745744096651, "grad_norm": 0.36789053678512573, "learning_rate": 1.158396405011172e-05, "loss": 0.4684, "step": 32788 }, { "epoch": 0.9003020318506315, "grad_norm": 0.3996233642101288, "learning_rate": 1.1583537607620435e-05, "loss": 0.4917, "step": 32789 }, { "epoch": 0.9003294892915981, "grad_norm": 0.4338568150997162, "learning_rate": 1.1583111162175341e-05, "loss": 0.5649, "step": 32790 }, { "epoch": 0.9003569467325645, "grad_norm": 0.40710774064064026, "learning_rate": 1.1582684713777246e-05, "loss": 0.5483, "step": 32791 }, { "epoch": 0.900384404173531, "grad_norm": 0.40178051590919495, "learning_rate": 1.1582258262426936e-05, "loss": 0.4952, "step": 32792 }, { "epoch": 0.9004118616144975, "grad_norm": 0.389490008354187, "learning_rate": 1.158183180812521e-05, "loss": 0.5928, "step": 32793 }, { "epoch": 0.900439319055464, "grad_norm": 0.4393548369407654, "learning_rate": 1.1581405350872862e-05, "loss": 0.5137, "step": 32794 }, { "epoch": 0.9004667764964306, "grad_norm": 0.462162584066391, "learning_rate": 1.1580978890670688e-05, "loss": 0.55, "step": 32795 }, { "epoch": 0.900494233937397, "grad_norm": 0.4594816267490387, "learning_rate": 1.1580552427519486e-05, "loss": 0.5041, "step": 32796 }, { "epoch": 0.9005216913783636, "grad_norm": 0.41510316729545593, "learning_rate": 1.158012596142005e-05, "loss": 0.4955, "step": 32797 }, { "epoch": 0.90054914881933, "grad_norm": 0.3712053596973419, "learning_rate": 1.157969949237317e-05, "loss": 0.4897, "step": 32798 }, { "epoch": 0.9005766062602966, "grad_norm": 0.3646470010280609, "learning_rate": 1.157927302037965e-05, "loss": 0.4872, "step": 32799 }, { "epoch": 0.900604063701263, "grad_norm": 0.3846958577632904, "learning_rate": 1.157884654544028e-05, "loss": 0.4434, "step": 32800 }, { "epoch": 0.9006315211422296, "grad_norm": 0.6313398480415344, "learning_rate": 1.157842006755586e-05, "loss": 0.4735, "step": 32801 }, { "epoch": 0.9006589785831961, "grad_norm": 0.38445010781288147, "learning_rate": 1.157799358672718e-05, "loss": 0.5022, "step": 32802 }, { "epoch": 0.9006864360241625, "grad_norm": 0.33676573634147644, "learning_rate": 1.157756710295504e-05, "loss": 0.4022, "step": 32803 }, { "epoch": 0.9007138934651291, "grad_norm": 0.4172518849372864, "learning_rate": 1.1577140616240233e-05, "loss": 0.4789, "step": 32804 }, { "epoch": 0.9007413509060955, "grad_norm": 0.35785675048828125, "learning_rate": 1.1576714126583555e-05, "loss": 0.4307, "step": 32805 }, { "epoch": 0.9007688083470621, "grad_norm": 0.42172878980636597, "learning_rate": 1.1576287633985803e-05, "loss": 0.534, "step": 32806 }, { "epoch": 0.9007962657880285, "grad_norm": 0.42864108085632324, "learning_rate": 1.1575861138447771e-05, "loss": 0.5053, "step": 32807 }, { "epoch": 0.9008237232289951, "grad_norm": 0.4352286756038666, "learning_rate": 1.1575434639970254e-05, "loss": 0.5569, "step": 32808 }, { "epoch": 0.9008511806699615, "grad_norm": 0.39410310983657837, "learning_rate": 1.157500813855405e-05, "loss": 0.4693, "step": 32809 }, { "epoch": 0.900878638110928, "grad_norm": 0.3816404640674591, "learning_rate": 1.157458163419995e-05, "loss": 0.4723, "step": 32810 }, { "epoch": 0.9009060955518946, "grad_norm": 0.3467515707015991, "learning_rate": 1.1574155126908756e-05, "loss": 0.4513, "step": 32811 }, { "epoch": 0.900933552992861, "grad_norm": 0.5754850506782532, "learning_rate": 1.157372861668126e-05, "loss": 0.5277, "step": 32812 }, { "epoch": 0.9009610104338276, "grad_norm": 0.42997023463249207, "learning_rate": 1.1573302103518258e-05, "loss": 0.4688, "step": 32813 }, { "epoch": 0.900988467874794, "grad_norm": 0.4190651476383209, "learning_rate": 1.1572875587420544e-05, "loss": 0.4537, "step": 32814 }, { "epoch": 0.9010159253157606, "grad_norm": 0.4189777374267578, "learning_rate": 1.1572449068388919e-05, "loss": 0.3822, "step": 32815 }, { "epoch": 0.901043382756727, "grad_norm": 0.4212717115879059, "learning_rate": 1.1572022546424168e-05, "loss": 0.5259, "step": 32816 }, { "epoch": 0.9010708401976936, "grad_norm": 0.3902416527271271, "learning_rate": 1.1571596021527099e-05, "loss": 0.4915, "step": 32817 }, { "epoch": 0.9010982976386601, "grad_norm": 0.3393566906452179, "learning_rate": 1.1571169493698497e-05, "loss": 0.3896, "step": 32818 }, { "epoch": 0.9011257550796266, "grad_norm": 0.38766077160835266, "learning_rate": 1.1570742962939168e-05, "loss": 0.511, "step": 32819 }, { "epoch": 0.9011532125205931, "grad_norm": 0.36420729756355286, "learning_rate": 1.15703164292499e-05, "loss": 0.4633, "step": 32820 }, { "epoch": 0.9011806699615595, "grad_norm": 0.4324902296066284, "learning_rate": 1.1569889892631488e-05, "loss": 0.4692, "step": 32821 }, { "epoch": 0.9012081274025261, "grad_norm": 0.44995298981666565, "learning_rate": 1.156946335308473e-05, "loss": 0.5115, "step": 32822 }, { "epoch": 0.9012355848434925, "grad_norm": 0.34962910413742065, "learning_rate": 1.1569036810610426e-05, "loss": 0.3849, "step": 32823 }, { "epoch": 0.9012630422844591, "grad_norm": 0.4184189736843109, "learning_rate": 1.1568610265209365e-05, "loss": 0.4699, "step": 32824 }, { "epoch": 0.9012904997254256, "grad_norm": 0.4056295156478882, "learning_rate": 1.1568183716882346e-05, "loss": 0.4749, "step": 32825 }, { "epoch": 0.9013179571663921, "grad_norm": 0.43671050667762756, "learning_rate": 1.1567757165630163e-05, "loss": 0.4831, "step": 32826 }, { "epoch": 0.9013454146073586, "grad_norm": 0.43156298995018005, "learning_rate": 1.1567330611453614e-05, "loss": 0.4349, "step": 32827 }, { "epoch": 0.9013728720483251, "grad_norm": 0.41672977805137634, "learning_rate": 1.1566904054353491e-05, "loss": 0.4308, "step": 32828 }, { "epoch": 0.9014003294892916, "grad_norm": 0.4665127694606781, "learning_rate": 1.1566477494330592e-05, "loss": 0.5467, "step": 32829 }, { "epoch": 0.901427786930258, "grad_norm": 0.4095206558704376, "learning_rate": 1.1566050931385715e-05, "loss": 0.5159, "step": 32830 }, { "epoch": 0.9014552443712246, "grad_norm": 0.4141923189163208, "learning_rate": 1.1565624365519652e-05, "loss": 0.4757, "step": 32831 }, { "epoch": 0.9014827018121911, "grad_norm": 0.4334023594856262, "learning_rate": 1.1565197796733201e-05, "loss": 0.567, "step": 32832 }, { "epoch": 0.9015101592531576, "grad_norm": 0.3434852659702301, "learning_rate": 1.1564771225027156e-05, "loss": 0.5069, "step": 32833 }, { "epoch": 0.9015376166941241, "grad_norm": 0.4589049518108368, "learning_rate": 1.156434465040231e-05, "loss": 0.5403, "step": 32834 }, { "epoch": 0.9015650741350906, "grad_norm": 0.4140706956386566, "learning_rate": 1.1563918072859466e-05, "loss": 0.4555, "step": 32835 }, { "epoch": 0.9015925315760571, "grad_norm": 0.44501492381095886, "learning_rate": 1.1563491492399413e-05, "loss": 0.369, "step": 32836 }, { "epoch": 0.9016199890170236, "grad_norm": 0.4057570695877075, "learning_rate": 1.156306490902295e-05, "loss": 0.4657, "step": 32837 }, { "epoch": 0.9016474464579901, "grad_norm": 0.3512583374977112, "learning_rate": 1.1562638322730875e-05, "loss": 0.3969, "step": 32838 }, { "epoch": 0.9016749038989567, "grad_norm": 0.6557492017745972, "learning_rate": 1.1562211733523976e-05, "loss": 0.481, "step": 32839 }, { "epoch": 0.9017023613399231, "grad_norm": 0.39824166893959045, "learning_rate": 1.1561785141403057e-05, "loss": 0.4728, "step": 32840 }, { "epoch": 0.9017298187808896, "grad_norm": 0.39168581366539, "learning_rate": 1.1561358546368907e-05, "loss": 0.4645, "step": 32841 }, { "epoch": 0.9017572762218561, "grad_norm": 0.3670452833175659, "learning_rate": 1.156093194842233e-05, "loss": 0.3855, "step": 32842 }, { "epoch": 0.9017847336628226, "grad_norm": 0.4192328155040741, "learning_rate": 1.1560505347564113e-05, "loss": 0.4502, "step": 32843 }, { "epoch": 0.9018121911037891, "grad_norm": 0.4151935279369354, "learning_rate": 1.1560078743795055e-05, "loss": 0.5038, "step": 32844 }, { "epoch": 0.9018396485447556, "grad_norm": 0.5091087222099304, "learning_rate": 1.1559652137115953e-05, "loss": 0.5163, "step": 32845 }, { "epoch": 0.9018671059857222, "grad_norm": 0.4174017310142517, "learning_rate": 1.1559225527527604e-05, "loss": 0.5316, "step": 32846 }, { "epoch": 0.9018945634266886, "grad_norm": 0.42163360118865967, "learning_rate": 1.15587989150308e-05, "loss": 0.5449, "step": 32847 }, { "epoch": 0.9019220208676552, "grad_norm": 0.3756042718887329, "learning_rate": 1.1558372299626339e-05, "loss": 0.4562, "step": 32848 }, { "epoch": 0.9019494783086216, "grad_norm": 0.4711083471775055, "learning_rate": 1.1557945681315015e-05, "loss": 0.4328, "step": 32849 }, { "epoch": 0.9019769357495881, "grad_norm": 0.39452067017555237, "learning_rate": 1.1557519060097626e-05, "loss": 0.4663, "step": 32850 }, { "epoch": 0.9020043931905546, "grad_norm": 0.4209335446357727, "learning_rate": 1.1557092435974968e-05, "loss": 0.5205, "step": 32851 }, { "epoch": 0.9020318506315211, "grad_norm": 0.3873645067214966, "learning_rate": 1.1556665808947834e-05, "loss": 0.456, "step": 32852 }, { "epoch": 0.9020593080724877, "grad_norm": 0.3915815055370331, "learning_rate": 1.1556239179017021e-05, "loss": 0.5009, "step": 32853 }, { "epoch": 0.9020867655134541, "grad_norm": 0.5327669978141785, "learning_rate": 1.1555812546183324e-05, "loss": 0.5084, "step": 32854 }, { "epoch": 0.9021142229544207, "grad_norm": 0.4577653110027313, "learning_rate": 1.1555385910447545e-05, "loss": 0.5065, "step": 32855 }, { "epoch": 0.9021416803953871, "grad_norm": 0.4147915542125702, "learning_rate": 1.1554959271810474e-05, "loss": 0.4882, "step": 32856 }, { "epoch": 0.9021691378363537, "grad_norm": 0.44013750553131104, "learning_rate": 1.1554532630272903e-05, "loss": 0.5654, "step": 32857 }, { "epoch": 0.9021965952773201, "grad_norm": 0.41192954778671265, "learning_rate": 1.1554105985835635e-05, "loss": 0.5268, "step": 32858 }, { "epoch": 0.9022240527182867, "grad_norm": 0.38591504096984863, "learning_rate": 1.1553679338499467e-05, "loss": 0.5358, "step": 32859 }, { "epoch": 0.9022515101592532, "grad_norm": 0.3745200037956238, "learning_rate": 1.1553252688265183e-05, "loss": 0.4868, "step": 32860 }, { "epoch": 0.9022789676002196, "grad_norm": 0.3922201991081238, "learning_rate": 1.1552826035133595e-05, "loss": 0.4588, "step": 32861 }, { "epoch": 0.9023064250411862, "grad_norm": 0.42202863097190857, "learning_rate": 1.1552399379105486e-05, "loss": 0.4834, "step": 32862 }, { "epoch": 0.9023338824821526, "grad_norm": 0.3742457330226898, "learning_rate": 1.155197272018166e-05, "loss": 0.5273, "step": 32863 }, { "epoch": 0.9023613399231192, "grad_norm": 0.4507403075695038, "learning_rate": 1.1551546058362908e-05, "loss": 0.5198, "step": 32864 }, { "epoch": 0.9023887973640856, "grad_norm": 0.3837156593799591, "learning_rate": 1.1551119393650025e-05, "loss": 0.4969, "step": 32865 }, { "epoch": 0.9024162548050522, "grad_norm": 0.37271416187286377, "learning_rate": 1.1550692726043812e-05, "loss": 0.4556, "step": 32866 }, { "epoch": 0.9024437122460187, "grad_norm": 0.4330938458442688, "learning_rate": 1.155026605554506e-05, "loss": 0.5093, "step": 32867 }, { "epoch": 0.9024711696869852, "grad_norm": 0.6025408506393433, "learning_rate": 1.1549839382154572e-05, "loss": 0.5015, "step": 32868 }, { "epoch": 0.9024986271279517, "grad_norm": 0.37133339047431946, "learning_rate": 1.1549412705873136e-05, "loss": 0.4664, "step": 32869 }, { "epoch": 0.9025260845689181, "grad_norm": 0.43310031294822693, "learning_rate": 1.1548986026701549e-05, "loss": 0.5061, "step": 32870 }, { "epoch": 0.9025535420098847, "grad_norm": 0.39530283212661743, "learning_rate": 1.154855934464061e-05, "loss": 0.4724, "step": 32871 }, { "epoch": 0.9025809994508511, "grad_norm": 0.353142648935318, "learning_rate": 1.1548132659691114e-05, "loss": 0.4358, "step": 32872 }, { "epoch": 0.9026084568918177, "grad_norm": 0.35216620564460754, "learning_rate": 1.1547705971853855e-05, "loss": 0.4821, "step": 32873 }, { "epoch": 0.9026359143327842, "grad_norm": 0.3865971565246582, "learning_rate": 1.1547279281129633e-05, "loss": 0.519, "step": 32874 }, { "epoch": 0.9026633717737507, "grad_norm": 0.48260316252708435, "learning_rate": 1.154685258751924e-05, "loss": 0.489, "step": 32875 }, { "epoch": 0.9026908292147172, "grad_norm": 0.4394557774066925, "learning_rate": 1.1546425891023471e-05, "loss": 0.4473, "step": 32876 }, { "epoch": 0.9027182866556837, "grad_norm": 0.45452961325645447, "learning_rate": 1.1545999191643127e-05, "loss": 0.5361, "step": 32877 }, { "epoch": 0.9027457440966502, "grad_norm": 0.40256431698799133, "learning_rate": 1.1545572489379e-05, "loss": 0.5123, "step": 32878 }, { "epoch": 0.9027732015376166, "grad_norm": 0.3568284809589386, "learning_rate": 1.1545145784231889e-05, "loss": 0.4567, "step": 32879 }, { "epoch": 0.9028006589785832, "grad_norm": 0.39027783274650574, "learning_rate": 1.1544719076202584e-05, "loss": 0.5149, "step": 32880 }, { "epoch": 0.9028281164195497, "grad_norm": 0.42398491501808167, "learning_rate": 1.154429236529189e-05, "loss": 0.477, "step": 32881 }, { "epoch": 0.9028555738605162, "grad_norm": 0.37882938981056213, "learning_rate": 1.1543865651500593e-05, "loss": 0.4288, "step": 32882 }, { "epoch": 0.9028830313014827, "grad_norm": 0.4132899045944214, "learning_rate": 1.1543438934829495e-05, "loss": 0.5506, "step": 32883 }, { "epoch": 0.9029104887424492, "grad_norm": 0.3987480401992798, "learning_rate": 1.154301221527939e-05, "loss": 0.5557, "step": 32884 }, { "epoch": 0.9029379461834157, "grad_norm": 0.5017302632331848, "learning_rate": 1.1542585492851076e-05, "loss": 0.483, "step": 32885 }, { "epoch": 0.9029654036243822, "grad_norm": 0.34274178743362427, "learning_rate": 1.154215876754535e-05, "loss": 0.4593, "step": 32886 }, { "epoch": 0.9029928610653487, "grad_norm": 0.48922625184059143, "learning_rate": 1.1541732039363003e-05, "loss": 0.4626, "step": 32887 }, { "epoch": 0.9030203185063153, "grad_norm": 0.39497941732406616, "learning_rate": 1.1541305308304835e-05, "loss": 0.4256, "step": 32888 }, { "epoch": 0.9030477759472817, "grad_norm": 0.43135184049606323, "learning_rate": 1.1540878574371639e-05, "loss": 0.4747, "step": 32889 }, { "epoch": 0.9030752333882482, "grad_norm": 0.3749432861804962, "learning_rate": 1.1540451837564215e-05, "loss": 0.4414, "step": 32890 }, { "epoch": 0.9031026908292147, "grad_norm": 0.3845439553260803, "learning_rate": 1.1540025097883356e-05, "loss": 0.4969, "step": 32891 }, { "epoch": 0.9031301482701812, "grad_norm": 0.3509191572666168, "learning_rate": 1.1539598355329857e-05, "loss": 0.4094, "step": 32892 }, { "epoch": 0.9031576057111477, "grad_norm": 0.42373889684677124, "learning_rate": 1.1539171609904519e-05, "loss": 0.523, "step": 32893 }, { "epoch": 0.9031850631521142, "grad_norm": 0.5177987217903137, "learning_rate": 1.1538744861608132e-05, "loss": 0.4096, "step": 32894 }, { "epoch": 0.9032125205930808, "grad_norm": 0.40102165937423706, "learning_rate": 1.1538318110441494e-05, "loss": 0.5546, "step": 32895 }, { "epoch": 0.9032399780340472, "grad_norm": 0.3825879395008087, "learning_rate": 1.1537891356405403e-05, "loss": 0.5111, "step": 32896 }, { "epoch": 0.9032674354750138, "grad_norm": 0.38967806100845337, "learning_rate": 1.1537464599500654e-05, "loss": 0.4187, "step": 32897 }, { "epoch": 0.9032948929159802, "grad_norm": 0.3750987648963928, "learning_rate": 1.1537037839728043e-05, "loss": 0.4756, "step": 32898 }, { "epoch": 0.9033223503569467, "grad_norm": 0.4920770227909088, "learning_rate": 1.1536611077088364e-05, "loss": 0.4632, "step": 32899 }, { "epoch": 0.9033498077979132, "grad_norm": 0.40388619899749756, "learning_rate": 1.1536184311582417e-05, "loss": 0.4709, "step": 32900 }, { "epoch": 0.9033772652388797, "grad_norm": 0.39074090123176575, "learning_rate": 1.1535757543210995e-05, "loss": 0.4544, "step": 32901 }, { "epoch": 0.9034047226798463, "grad_norm": 0.3648351728916168, "learning_rate": 1.1535330771974895e-05, "loss": 0.5118, "step": 32902 }, { "epoch": 0.9034321801208127, "grad_norm": 0.3560994565486908, "learning_rate": 1.1534903997874916e-05, "loss": 0.4601, "step": 32903 }, { "epoch": 0.9034596375617793, "grad_norm": 0.3732188045978546, "learning_rate": 1.1534477220911846e-05, "loss": 0.5302, "step": 32904 }, { "epoch": 0.9034870950027457, "grad_norm": 0.36562395095825195, "learning_rate": 1.153405044108649e-05, "loss": 0.3998, "step": 32905 }, { "epoch": 0.9035145524437123, "grad_norm": 0.48745468258857727, "learning_rate": 1.1533623658399637e-05, "loss": 0.4606, "step": 32906 }, { "epoch": 0.9035420098846787, "grad_norm": 0.3853543698787689, "learning_rate": 1.1533196872852091e-05, "loss": 0.4803, "step": 32907 }, { "epoch": 0.9035694673256452, "grad_norm": 0.40514639019966125, "learning_rate": 1.153277008444464e-05, "loss": 0.5204, "step": 32908 }, { "epoch": 0.9035969247666118, "grad_norm": 0.3845973014831543, "learning_rate": 1.1532343293178084e-05, "loss": 0.5801, "step": 32909 }, { "epoch": 0.9036243822075782, "grad_norm": 0.37984955310821533, "learning_rate": 1.153191649905322e-05, "loss": 0.4962, "step": 32910 }, { "epoch": 0.9036518396485448, "grad_norm": 0.3460088074207306, "learning_rate": 1.1531489702070842e-05, "loss": 0.4586, "step": 32911 }, { "epoch": 0.9036792970895112, "grad_norm": 0.3737717568874359, "learning_rate": 1.1531062902231746e-05, "loss": 0.4167, "step": 32912 }, { "epoch": 0.9037067545304778, "grad_norm": 0.4000113308429718, "learning_rate": 1.1530636099536732e-05, "loss": 0.5129, "step": 32913 }, { "epoch": 0.9037342119714442, "grad_norm": 0.4116954207420349, "learning_rate": 1.1530209293986589e-05, "loss": 0.4977, "step": 32914 }, { "epoch": 0.9037616694124108, "grad_norm": 0.3910031318664551, "learning_rate": 1.1529782485582122e-05, "loss": 0.4666, "step": 32915 }, { "epoch": 0.9037891268533773, "grad_norm": 0.41079604625701904, "learning_rate": 1.1529355674324117e-05, "loss": 0.4753, "step": 32916 }, { "epoch": 0.9038165842943438, "grad_norm": 0.3570103049278259, "learning_rate": 1.152892886021338e-05, "loss": 0.4743, "step": 32917 }, { "epoch": 0.9038440417353103, "grad_norm": 0.42445775866508484, "learning_rate": 1.1528502043250702e-05, "loss": 0.5181, "step": 32918 }, { "epoch": 0.9038714991762767, "grad_norm": 0.5094816088676453, "learning_rate": 1.1528075223436877e-05, "loss": 0.5479, "step": 32919 }, { "epoch": 0.9038989566172433, "grad_norm": 0.42870694398880005, "learning_rate": 1.1527648400772708e-05, "loss": 0.49, "step": 32920 }, { "epoch": 0.9039264140582097, "grad_norm": 0.39834684133529663, "learning_rate": 1.1527221575258985e-05, "loss": 0.5414, "step": 32921 }, { "epoch": 0.9039538714991763, "grad_norm": 0.34285876154899597, "learning_rate": 1.1526794746896505e-05, "loss": 0.4484, "step": 32922 }, { "epoch": 0.9039813289401428, "grad_norm": 0.4397629499435425, "learning_rate": 1.1526367915686067e-05, "loss": 0.4998, "step": 32923 }, { "epoch": 0.9040087863811093, "grad_norm": 0.42304080724716187, "learning_rate": 1.1525941081628467e-05, "loss": 0.452, "step": 32924 }, { "epoch": 0.9040362438220758, "grad_norm": 0.379070520401001, "learning_rate": 1.1525514244724498e-05, "loss": 0.5259, "step": 32925 }, { "epoch": 0.9040637012630423, "grad_norm": 0.40844911336898804, "learning_rate": 1.152508740497496e-05, "loss": 0.4711, "step": 32926 }, { "epoch": 0.9040911587040088, "grad_norm": 0.36439648270606995, "learning_rate": 1.1524660562380645e-05, "loss": 0.4361, "step": 32927 }, { "epoch": 0.9041186161449752, "grad_norm": 0.4355107545852661, "learning_rate": 1.1524233716942352e-05, "loss": 0.5593, "step": 32928 }, { "epoch": 0.9041460735859418, "grad_norm": 0.3967171013355255, "learning_rate": 1.1523806868660876e-05, "loss": 0.5241, "step": 32929 }, { "epoch": 0.9041735310269083, "grad_norm": 0.3897009491920471, "learning_rate": 1.1523380017537017e-05, "loss": 0.4738, "step": 32930 }, { "epoch": 0.9042009884678748, "grad_norm": 0.4444959759712219, "learning_rate": 1.1522953163571564e-05, "loss": 0.5064, "step": 32931 }, { "epoch": 0.9042284459088413, "grad_norm": 0.3896864354610443, "learning_rate": 1.1522526306765318e-05, "loss": 0.5227, "step": 32932 }, { "epoch": 0.9042559033498078, "grad_norm": 0.37202221155166626, "learning_rate": 1.1522099447119077e-05, "loss": 0.5338, "step": 32933 }, { "epoch": 0.9042833607907743, "grad_norm": 0.42200562357902527, "learning_rate": 1.1521672584633631e-05, "loss": 0.5341, "step": 32934 }, { "epoch": 0.9043108182317408, "grad_norm": 0.4063025414943695, "learning_rate": 1.1521245719309783e-05, "loss": 0.4567, "step": 32935 }, { "epoch": 0.9043382756727073, "grad_norm": 0.3777836859226227, "learning_rate": 1.1520818851148327e-05, "loss": 0.4761, "step": 32936 }, { "epoch": 0.9043657331136739, "grad_norm": 0.40472519397735596, "learning_rate": 1.1520391980150056e-05, "loss": 0.5333, "step": 32937 }, { "epoch": 0.9043931905546403, "grad_norm": 0.43095266819000244, "learning_rate": 1.1519965106315771e-05, "loss": 0.5037, "step": 32938 }, { "epoch": 0.9044206479956068, "grad_norm": 0.37650883197784424, "learning_rate": 1.1519538229646266e-05, "loss": 0.4781, "step": 32939 }, { "epoch": 0.9044481054365733, "grad_norm": 0.404582679271698, "learning_rate": 1.1519111350142334e-05, "loss": 0.4849, "step": 32940 }, { "epoch": 0.9044755628775398, "grad_norm": 0.41306978464126587, "learning_rate": 1.1518684467804777e-05, "loss": 0.5527, "step": 32941 }, { "epoch": 0.9045030203185063, "grad_norm": 0.37526121735572815, "learning_rate": 1.1518257582634386e-05, "loss": 0.4938, "step": 32942 }, { "epoch": 0.9045304777594728, "grad_norm": 0.39458754658699036, "learning_rate": 1.1517830694631964e-05, "loss": 0.4854, "step": 32943 }, { "epoch": 0.9045579352004394, "grad_norm": 0.41725480556488037, "learning_rate": 1.1517403803798301e-05, "loss": 0.4677, "step": 32944 }, { "epoch": 0.9045853926414058, "grad_norm": 0.3576461970806122, "learning_rate": 1.1516976910134197e-05, "loss": 0.4947, "step": 32945 }, { "epoch": 0.9046128500823724, "grad_norm": 0.36625936627388, "learning_rate": 1.1516550013640447e-05, "loss": 0.397, "step": 32946 }, { "epoch": 0.9046403075233388, "grad_norm": 0.49743062257766724, "learning_rate": 1.1516123114317843e-05, "loss": 0.5405, "step": 32947 }, { "epoch": 0.9046677649643053, "grad_norm": 0.4104597866535187, "learning_rate": 1.1515696212167189e-05, "loss": 0.5287, "step": 32948 }, { "epoch": 0.9046952224052718, "grad_norm": 0.43194350600242615, "learning_rate": 1.1515269307189278e-05, "loss": 0.4088, "step": 32949 }, { "epoch": 0.9047226798462383, "grad_norm": 0.4039981961250305, "learning_rate": 1.1514842399384905e-05, "loss": 0.548, "step": 32950 }, { "epoch": 0.9047501372872049, "grad_norm": 0.4241907298564911, "learning_rate": 1.1514415488754869e-05, "loss": 0.538, "step": 32951 }, { "epoch": 0.9047775947281713, "grad_norm": 0.3832662105560303, "learning_rate": 1.1513988575299966e-05, "loss": 0.5216, "step": 32952 }, { "epoch": 0.9048050521691379, "grad_norm": 0.3564370572566986, "learning_rate": 1.1513561659020987e-05, "loss": 0.5423, "step": 32953 }, { "epoch": 0.9048325096101043, "grad_norm": 0.4619560241699219, "learning_rate": 1.1513134739918734e-05, "loss": 0.5836, "step": 32954 }, { "epoch": 0.9048599670510709, "grad_norm": 0.4107954800128937, "learning_rate": 1.1512707817994001e-05, "loss": 0.4686, "step": 32955 }, { "epoch": 0.9048874244920373, "grad_norm": 0.42462560534477234, "learning_rate": 1.1512280893247587e-05, "loss": 0.5381, "step": 32956 }, { "epoch": 0.9049148819330038, "grad_norm": 0.5784880518913269, "learning_rate": 1.1511853965680287e-05, "loss": 0.5949, "step": 32957 }, { "epoch": 0.9049423393739704, "grad_norm": 0.3743045926094055, "learning_rate": 1.1511427035292895e-05, "loss": 0.4741, "step": 32958 }, { "epoch": 0.9049697968149368, "grad_norm": 0.38537949323654175, "learning_rate": 1.1511000102086209e-05, "loss": 0.548, "step": 32959 }, { "epoch": 0.9049972542559034, "grad_norm": 0.46489158272743225, "learning_rate": 1.1510573166061025e-05, "loss": 0.5181, "step": 32960 }, { "epoch": 0.9050247116968698, "grad_norm": 0.4267701506614685, "learning_rate": 1.1510146227218143e-05, "loss": 0.5751, "step": 32961 }, { "epoch": 0.9050521691378364, "grad_norm": 0.40937256813049316, "learning_rate": 1.1509719285558357e-05, "loss": 0.4276, "step": 32962 }, { "epoch": 0.9050796265788028, "grad_norm": 0.4053786098957062, "learning_rate": 1.1509292341082456e-05, "loss": 0.4718, "step": 32963 }, { "epoch": 0.9051070840197694, "grad_norm": 0.6522948145866394, "learning_rate": 1.1508865393791248e-05, "loss": 0.5772, "step": 32964 }, { "epoch": 0.9051345414607359, "grad_norm": 0.4114609360694885, "learning_rate": 1.1508438443685523e-05, "loss": 0.4913, "step": 32965 }, { "epoch": 0.9051619989017023, "grad_norm": 0.4251357316970825, "learning_rate": 1.150801149076608e-05, "loss": 0.5286, "step": 32966 }, { "epoch": 0.9051894563426689, "grad_norm": 0.4173600971698761, "learning_rate": 1.1507584535033715e-05, "loss": 0.4648, "step": 32967 }, { "epoch": 0.9052169137836353, "grad_norm": 0.4281451404094696, "learning_rate": 1.150715757648922e-05, "loss": 0.5118, "step": 32968 }, { "epoch": 0.9052443712246019, "grad_norm": 0.42779749631881714, "learning_rate": 1.1506730615133402e-05, "loss": 0.4871, "step": 32969 }, { "epoch": 0.9052718286655683, "grad_norm": 0.38229596614837646, "learning_rate": 1.1506303650967046e-05, "loss": 0.4882, "step": 32970 }, { "epoch": 0.9052992861065349, "grad_norm": 0.3938492238521576, "learning_rate": 1.150587668399095e-05, "loss": 0.4867, "step": 32971 }, { "epoch": 0.9053267435475014, "grad_norm": 0.3812597990036011, "learning_rate": 1.1505449714205918e-05, "loss": 0.5283, "step": 32972 }, { "epoch": 0.9053542009884679, "grad_norm": 0.4416571259498596, "learning_rate": 1.1505022741612739e-05, "loss": 0.5223, "step": 32973 }, { "epoch": 0.9053816584294344, "grad_norm": 0.42086246609687805, "learning_rate": 1.1504595766212214e-05, "loss": 0.5976, "step": 32974 }, { "epoch": 0.9054091158704008, "grad_norm": 0.36024028062820435, "learning_rate": 1.1504168788005139e-05, "loss": 0.4152, "step": 32975 }, { "epoch": 0.9054365733113674, "grad_norm": 0.4255084693431854, "learning_rate": 1.1503741806992306e-05, "loss": 0.526, "step": 32976 }, { "epoch": 0.9054640307523338, "grad_norm": 0.4079798758029938, "learning_rate": 1.1503314823174516e-05, "loss": 0.4006, "step": 32977 }, { "epoch": 0.9054914881933004, "grad_norm": 0.41032978892326355, "learning_rate": 1.1502887836552564e-05, "loss": 0.4013, "step": 32978 }, { "epoch": 0.9055189456342669, "grad_norm": 0.3880426585674286, "learning_rate": 1.1502460847127245e-05, "loss": 0.5364, "step": 32979 }, { "epoch": 0.9055464030752334, "grad_norm": 0.38841116428375244, "learning_rate": 1.1502033854899359e-05, "loss": 0.4574, "step": 32980 }, { "epoch": 0.9055738605161999, "grad_norm": 0.4404136538505554, "learning_rate": 1.1501606859869701e-05, "loss": 0.5541, "step": 32981 }, { "epoch": 0.9056013179571664, "grad_norm": 0.4262426793575287, "learning_rate": 1.1501179862039064e-05, "loss": 0.4072, "step": 32982 }, { "epoch": 0.9056287753981329, "grad_norm": 0.3924223482608795, "learning_rate": 1.150075286140825e-05, "loss": 0.4274, "step": 32983 }, { "epoch": 0.9056562328390994, "grad_norm": 0.41102784872055054, "learning_rate": 1.1500325857978052e-05, "loss": 0.4257, "step": 32984 }, { "epoch": 0.9056836902800659, "grad_norm": 0.4040723443031311, "learning_rate": 1.149989885174927e-05, "loss": 0.558, "step": 32985 }, { "epoch": 0.9057111477210325, "grad_norm": 0.49618181586265564, "learning_rate": 1.1499471842722694e-05, "loss": 0.5083, "step": 32986 }, { "epoch": 0.9057386051619989, "grad_norm": 0.4482768774032593, "learning_rate": 1.1499044830899126e-05, "loss": 0.5094, "step": 32987 }, { "epoch": 0.9057660626029654, "grad_norm": 0.37323126196861267, "learning_rate": 1.1498617816279363e-05, "loss": 0.4034, "step": 32988 }, { "epoch": 0.9057935200439319, "grad_norm": 0.39099809527397156, "learning_rate": 1.1498190798864195e-05, "loss": 0.4955, "step": 32989 }, { "epoch": 0.9058209774848984, "grad_norm": 0.38071009516716003, "learning_rate": 1.1497763778654428e-05, "loss": 0.4645, "step": 32990 }, { "epoch": 0.9058484349258649, "grad_norm": 0.3178936541080475, "learning_rate": 1.1497336755650848e-05, "loss": 0.3868, "step": 32991 }, { "epoch": 0.9058758923668314, "grad_norm": 0.41623392701148987, "learning_rate": 1.149690972985426e-05, "loss": 0.4889, "step": 32992 }, { "epoch": 0.905903349807798, "grad_norm": 0.379284530878067, "learning_rate": 1.1496482701265458e-05, "loss": 0.5209, "step": 32993 }, { "epoch": 0.9059308072487644, "grad_norm": 0.3935716152191162, "learning_rate": 1.1496055669885237e-05, "loss": 0.4199, "step": 32994 }, { "epoch": 0.905958264689731, "grad_norm": 0.45475319027900696, "learning_rate": 1.1495628635714397e-05, "loss": 0.5454, "step": 32995 }, { "epoch": 0.9059857221306974, "grad_norm": 0.3842626214027405, "learning_rate": 1.1495201598753729e-05, "loss": 0.5389, "step": 32996 }, { "epoch": 0.9060131795716639, "grad_norm": 0.4071104824542999, "learning_rate": 1.1494774559004036e-05, "loss": 0.5542, "step": 32997 }, { "epoch": 0.9060406370126304, "grad_norm": 0.41031256318092346, "learning_rate": 1.1494347516466111e-05, "loss": 0.5407, "step": 32998 }, { "epoch": 0.9060680944535969, "grad_norm": 0.4070524275302887, "learning_rate": 1.1493920471140747e-05, "loss": 0.497, "step": 32999 }, { "epoch": 0.9060955518945635, "grad_norm": 0.3654446303844452, "learning_rate": 1.149349342302875e-05, "loss": 0.5342, "step": 33000 }, { "epoch": 0.9061230093355299, "grad_norm": 0.4330393075942993, "learning_rate": 1.1493066372130907e-05, "loss": 0.4364, "step": 33001 }, { "epoch": 0.9061504667764965, "grad_norm": 0.3173982799053192, "learning_rate": 1.149263931844802e-05, "loss": 0.3231, "step": 33002 }, { "epoch": 0.9061779242174629, "grad_norm": 0.41309812664985657, "learning_rate": 1.1492212261980886e-05, "loss": 0.5254, "step": 33003 }, { "epoch": 0.9062053816584295, "grad_norm": 0.3755654990673065, "learning_rate": 1.1491785202730297e-05, "loss": 0.4709, "step": 33004 }, { "epoch": 0.9062328390993959, "grad_norm": 0.3582751154899597, "learning_rate": 1.1491358140697053e-05, "loss": 0.5126, "step": 33005 }, { "epoch": 0.9062602965403624, "grad_norm": 0.42414960265159607, "learning_rate": 1.1490931075881953e-05, "loss": 0.3944, "step": 33006 }, { "epoch": 0.906287753981329, "grad_norm": 0.44700804352760315, "learning_rate": 1.1490504008285785e-05, "loss": 0.485, "step": 33007 }, { "epoch": 0.9063152114222954, "grad_norm": 0.36089470982551575, "learning_rate": 1.1490076937909355e-05, "loss": 0.5075, "step": 33008 }, { "epoch": 0.906342668863262, "grad_norm": 0.42052751779556274, "learning_rate": 1.1489649864753456e-05, "loss": 0.3861, "step": 33009 }, { "epoch": 0.9063701263042284, "grad_norm": 0.3929709792137146, "learning_rate": 1.1489222788818885e-05, "loss": 0.5151, "step": 33010 }, { "epoch": 0.906397583745195, "grad_norm": 0.3185974359512329, "learning_rate": 1.148879571010644e-05, "loss": 0.3736, "step": 33011 }, { "epoch": 0.9064250411861614, "grad_norm": 0.365153431892395, "learning_rate": 1.148836862861691e-05, "loss": 0.5556, "step": 33012 }, { "epoch": 0.906452498627128, "grad_norm": 0.4031786024570465, "learning_rate": 1.1487941544351102e-05, "loss": 0.5241, "step": 33013 }, { "epoch": 0.9064799560680945, "grad_norm": 0.3949236571788788, "learning_rate": 1.1487514457309808e-05, "loss": 0.4355, "step": 33014 }, { "epoch": 0.9065074135090609, "grad_norm": 0.4149945080280304, "learning_rate": 1.1487087367493823e-05, "loss": 0.5168, "step": 33015 }, { "epoch": 0.9065348709500275, "grad_norm": 0.4180516004562378, "learning_rate": 1.1486660274903947e-05, "loss": 0.4564, "step": 33016 }, { "epoch": 0.9065623283909939, "grad_norm": 0.4485757648944855, "learning_rate": 1.1486233179540973e-05, "loss": 0.4751, "step": 33017 }, { "epoch": 0.9065897858319605, "grad_norm": 0.3575478792190552, "learning_rate": 1.1485806081405701e-05, "loss": 0.4638, "step": 33018 }, { "epoch": 0.9066172432729269, "grad_norm": 0.4810888469219208, "learning_rate": 1.1485378980498928e-05, "loss": 0.5429, "step": 33019 }, { "epoch": 0.9066447007138935, "grad_norm": 0.4262485206127167, "learning_rate": 1.1484951876821446e-05, "loss": 0.5169, "step": 33020 }, { "epoch": 0.90667215815486, "grad_norm": 0.4077582359313965, "learning_rate": 1.1484524770374057e-05, "loss": 0.4346, "step": 33021 }, { "epoch": 0.9066996155958265, "grad_norm": 0.4152623116970062, "learning_rate": 1.1484097661157555e-05, "loss": 0.5829, "step": 33022 }, { "epoch": 0.906727073036793, "grad_norm": 0.3998049199581146, "learning_rate": 1.1483670549172737e-05, "loss": 0.5453, "step": 33023 }, { "epoch": 0.9067545304777594, "grad_norm": 0.4217193126678467, "learning_rate": 1.14832434344204e-05, "loss": 0.4469, "step": 33024 }, { "epoch": 0.906781987918726, "grad_norm": 0.44294941425323486, "learning_rate": 1.148281631690134e-05, "loss": 0.5336, "step": 33025 }, { "epoch": 0.9068094453596924, "grad_norm": 0.38631778955459595, "learning_rate": 1.1482389196616354e-05, "loss": 0.5107, "step": 33026 }, { "epoch": 0.906836902800659, "grad_norm": 0.4077947735786438, "learning_rate": 1.148196207356624e-05, "loss": 0.5287, "step": 33027 }, { "epoch": 0.9068643602416255, "grad_norm": 0.47865787148475647, "learning_rate": 1.1481534947751793e-05, "loss": 0.5445, "step": 33028 }, { "epoch": 0.906891817682592, "grad_norm": 0.4410680830478668, "learning_rate": 1.1481107819173813e-05, "loss": 0.5032, "step": 33029 }, { "epoch": 0.9069192751235585, "grad_norm": 0.3489164412021637, "learning_rate": 1.1480680687833092e-05, "loss": 0.5227, "step": 33030 }, { "epoch": 0.906946732564525, "grad_norm": 0.4052288234233856, "learning_rate": 1.1480253553730429e-05, "loss": 0.5352, "step": 33031 }, { "epoch": 0.9069741900054915, "grad_norm": 0.3693874776363373, "learning_rate": 1.1479826416866622e-05, "loss": 0.4897, "step": 33032 }, { "epoch": 0.907001647446458, "grad_norm": 0.4372846484184265, "learning_rate": 1.1479399277242465e-05, "loss": 0.4229, "step": 33033 }, { "epoch": 0.9070291048874245, "grad_norm": 0.35235658288002014, "learning_rate": 1.1478972134858756e-05, "loss": 0.4621, "step": 33034 }, { "epoch": 0.907056562328391, "grad_norm": 0.4636097848415375, "learning_rate": 1.1478544989716292e-05, "loss": 0.5149, "step": 33035 }, { "epoch": 0.9070840197693575, "grad_norm": 0.4334105849266052, "learning_rate": 1.147811784181587e-05, "loss": 0.478, "step": 33036 }, { "epoch": 0.907111477210324, "grad_norm": 0.5043298602104187, "learning_rate": 1.1477690691158288e-05, "loss": 0.4831, "step": 33037 }, { "epoch": 0.9071389346512905, "grad_norm": 0.3736336827278137, "learning_rate": 1.1477263537744338e-05, "loss": 0.4796, "step": 33038 }, { "epoch": 0.907166392092257, "grad_norm": 0.3924731910228729, "learning_rate": 1.1476836381574823e-05, "loss": 0.4515, "step": 33039 }, { "epoch": 0.9071938495332235, "grad_norm": 0.44168519973754883, "learning_rate": 1.1476409222650535e-05, "loss": 0.5674, "step": 33040 }, { "epoch": 0.90722130697419, "grad_norm": 0.48789212107658386, "learning_rate": 1.1475982060972273e-05, "loss": 0.4636, "step": 33041 }, { "epoch": 0.9072487644151566, "grad_norm": 0.4664080739021301, "learning_rate": 1.1475554896540835e-05, "loss": 0.5362, "step": 33042 }, { "epoch": 0.907276221856123, "grad_norm": 0.4528812766075134, "learning_rate": 1.1475127729357015e-05, "loss": 0.5751, "step": 33043 }, { "epoch": 0.9073036792970895, "grad_norm": 0.37311992049217224, "learning_rate": 1.147470055942161e-05, "loss": 0.4323, "step": 33044 }, { "epoch": 0.907331136738056, "grad_norm": 0.48098689317703247, "learning_rate": 1.1474273386735419e-05, "loss": 0.6199, "step": 33045 }, { "epoch": 0.9073585941790225, "grad_norm": 0.3694537878036499, "learning_rate": 1.1473846211299237e-05, "loss": 0.454, "step": 33046 }, { "epoch": 0.907386051619989, "grad_norm": 0.39207932353019714, "learning_rate": 1.1473419033113862e-05, "loss": 0.4812, "step": 33047 }, { "epoch": 0.9074135090609555, "grad_norm": 0.39676544070243835, "learning_rate": 1.1472991852180089e-05, "loss": 0.4986, "step": 33048 }, { "epoch": 0.9074409665019221, "grad_norm": 0.34334975481033325, "learning_rate": 1.1472564668498718e-05, "loss": 0.388, "step": 33049 }, { "epoch": 0.9074684239428885, "grad_norm": 0.3459285497665405, "learning_rate": 1.1472137482070544e-05, "loss": 0.47, "step": 33050 }, { "epoch": 0.9074958813838551, "grad_norm": 0.4609956443309784, "learning_rate": 1.1471710292896361e-05, "loss": 0.4975, "step": 33051 }, { "epoch": 0.9075233388248215, "grad_norm": 0.35907086730003357, "learning_rate": 1.1471283100976971e-05, "loss": 0.5185, "step": 33052 }, { "epoch": 0.907550796265788, "grad_norm": 0.41893360018730164, "learning_rate": 1.1470855906313166e-05, "loss": 0.545, "step": 33053 }, { "epoch": 0.9075782537067545, "grad_norm": 0.4535546898841858, "learning_rate": 1.1470428708905747e-05, "loss": 0.4447, "step": 33054 }, { "epoch": 0.907605711147721, "grad_norm": 0.38038602471351624, "learning_rate": 1.1470001508755511e-05, "loss": 0.5353, "step": 33055 }, { "epoch": 0.9076331685886876, "grad_norm": 0.41682732105255127, "learning_rate": 1.146957430586325e-05, "loss": 0.5267, "step": 33056 }, { "epoch": 0.907660626029654, "grad_norm": 0.39049237966537476, "learning_rate": 1.1469147100229765e-05, "loss": 0.521, "step": 33057 }, { "epoch": 0.9076880834706206, "grad_norm": 0.4202724099159241, "learning_rate": 1.146871989185585e-05, "loss": 0.4754, "step": 33058 }, { "epoch": 0.907715540911587, "grad_norm": 0.42585301399230957, "learning_rate": 1.1468292680742305e-05, "loss": 0.4963, "step": 33059 }, { "epoch": 0.9077429983525536, "grad_norm": 0.36066341400146484, "learning_rate": 1.1467865466889929e-05, "loss": 0.4135, "step": 33060 }, { "epoch": 0.90777045579352, "grad_norm": 0.33537542819976807, "learning_rate": 1.146743825029951e-05, "loss": 0.4249, "step": 33061 }, { "epoch": 0.9077979132344866, "grad_norm": 0.40968504548072815, "learning_rate": 1.1467011030971853e-05, "loss": 0.5412, "step": 33062 }, { "epoch": 0.9078253706754531, "grad_norm": 0.47990649938583374, "learning_rate": 1.1466583808907752e-05, "loss": 0.4929, "step": 33063 }, { "epoch": 0.9078528281164195, "grad_norm": 0.40321773290634155, "learning_rate": 1.1466156584108003e-05, "loss": 0.5787, "step": 33064 }, { "epoch": 0.9078802855573861, "grad_norm": 0.3833484649658203, "learning_rate": 1.1465729356573405e-05, "loss": 0.4618, "step": 33065 }, { "epoch": 0.9079077429983525, "grad_norm": 0.3453899025917053, "learning_rate": 1.1465302126304754e-05, "loss": 0.3459, "step": 33066 }, { "epoch": 0.9079352004393191, "grad_norm": 0.4606780409812927, "learning_rate": 1.1464874893302849e-05, "loss": 0.5134, "step": 33067 }, { "epoch": 0.9079626578802855, "grad_norm": 0.41391780972480774, "learning_rate": 1.146444765756848e-05, "loss": 0.4312, "step": 33068 }, { "epoch": 0.9079901153212521, "grad_norm": 0.4663473963737488, "learning_rate": 1.1464020419102453e-05, "loss": 0.6139, "step": 33069 }, { "epoch": 0.9080175727622186, "grad_norm": 0.36777111887931824, "learning_rate": 1.1463593177905556e-05, "loss": 0.424, "step": 33070 }, { "epoch": 0.908045030203185, "grad_norm": 0.35914888978004456, "learning_rate": 1.1463165933978596e-05, "loss": 0.483, "step": 33071 }, { "epoch": 0.9080724876441516, "grad_norm": 0.3916766047477722, "learning_rate": 1.1462738687322361e-05, "loss": 0.4851, "step": 33072 }, { "epoch": 0.908099945085118, "grad_norm": 0.5756719708442688, "learning_rate": 1.1462311437937652e-05, "loss": 0.5248, "step": 33073 }, { "epoch": 0.9081274025260846, "grad_norm": 0.43903452157974243, "learning_rate": 1.1461884185825267e-05, "loss": 0.5006, "step": 33074 }, { "epoch": 0.908154859967051, "grad_norm": 0.37008246779441833, "learning_rate": 1.1461456930986e-05, "loss": 0.5273, "step": 33075 }, { "epoch": 0.9081823174080176, "grad_norm": 0.38046038150787354, "learning_rate": 1.146102967342065e-05, "loss": 0.4714, "step": 33076 }, { "epoch": 0.908209774848984, "grad_norm": 0.3595348000526428, "learning_rate": 1.1460602413130014e-05, "loss": 0.4793, "step": 33077 }, { "epoch": 0.9082372322899506, "grad_norm": 0.40073370933532715, "learning_rate": 1.1460175150114888e-05, "loss": 0.4496, "step": 33078 }, { "epoch": 0.9082646897309171, "grad_norm": 0.35275787115097046, "learning_rate": 1.1459747884376067e-05, "loss": 0.4755, "step": 33079 }, { "epoch": 0.9082921471718836, "grad_norm": 0.38199031352996826, "learning_rate": 1.1459320615914351e-05, "loss": 0.4718, "step": 33080 }, { "epoch": 0.9083196046128501, "grad_norm": 0.699432909488678, "learning_rate": 1.145889334473054e-05, "loss": 0.481, "step": 33081 }, { "epoch": 0.9083470620538165, "grad_norm": 0.36426064372062683, "learning_rate": 1.1458466070825423e-05, "loss": 0.4945, "step": 33082 }, { "epoch": 0.9083745194947831, "grad_norm": 0.39134448766708374, "learning_rate": 1.1458038794199804e-05, "loss": 0.495, "step": 33083 }, { "epoch": 0.9084019769357495, "grad_norm": 0.4984046518802643, "learning_rate": 1.1457611514854476e-05, "loss": 0.4985, "step": 33084 }, { "epoch": 0.9084294343767161, "grad_norm": 0.4959951341152191, "learning_rate": 1.1457184232790237e-05, "loss": 0.4877, "step": 33085 }, { "epoch": 0.9084568918176826, "grad_norm": 0.37100088596343994, "learning_rate": 1.1456756948007885e-05, "loss": 0.5471, "step": 33086 }, { "epoch": 0.9084843492586491, "grad_norm": 0.4096302092075348, "learning_rate": 1.1456329660508213e-05, "loss": 0.4722, "step": 33087 }, { "epoch": 0.9085118066996156, "grad_norm": 0.42612117528915405, "learning_rate": 1.1455902370292027e-05, "loss": 0.5493, "step": 33088 }, { "epoch": 0.9085392641405821, "grad_norm": 0.39556312561035156, "learning_rate": 1.1455475077360116e-05, "loss": 0.4611, "step": 33089 }, { "epoch": 0.9085667215815486, "grad_norm": 0.3768816590309143, "learning_rate": 1.145504778171328e-05, "loss": 0.4938, "step": 33090 }, { "epoch": 0.908594179022515, "grad_norm": 0.3724977374076843, "learning_rate": 1.1454620483352314e-05, "loss": 0.4276, "step": 33091 }, { "epoch": 0.9086216364634816, "grad_norm": 0.37662702798843384, "learning_rate": 1.1454193182278018e-05, "loss": 0.4884, "step": 33092 }, { "epoch": 0.9086490939044481, "grad_norm": 0.3518172800540924, "learning_rate": 1.1453765878491188e-05, "loss": 0.4056, "step": 33093 }, { "epoch": 0.9086765513454146, "grad_norm": 0.3737923800945282, "learning_rate": 1.145333857199262e-05, "loss": 0.4392, "step": 33094 }, { "epoch": 0.9087040087863811, "grad_norm": 0.42448192834854126, "learning_rate": 1.145291126278311e-05, "loss": 0.4812, "step": 33095 }, { "epoch": 0.9087314662273476, "grad_norm": 0.38830214738845825, "learning_rate": 1.145248395086346e-05, "loss": 0.4852, "step": 33096 }, { "epoch": 0.9087589236683141, "grad_norm": 0.41828227043151855, "learning_rate": 1.1452056636234461e-05, "loss": 0.5038, "step": 33097 }, { "epoch": 0.9087863811092806, "grad_norm": 0.3983025550842285, "learning_rate": 1.1451629318896913e-05, "loss": 0.5138, "step": 33098 }, { "epoch": 0.9088138385502471, "grad_norm": 0.3972274363040924, "learning_rate": 1.1451201998851615e-05, "loss": 0.4733, "step": 33099 }, { "epoch": 0.9088412959912137, "grad_norm": 0.406499445438385, "learning_rate": 1.1450774676099362e-05, "loss": 0.5821, "step": 33100 }, { "epoch": 0.9088687534321801, "grad_norm": 0.3908696472644806, "learning_rate": 1.1450347350640948e-05, "loss": 0.4955, "step": 33101 }, { "epoch": 0.9088962108731466, "grad_norm": 0.36769968271255493, "learning_rate": 1.1449920022477176e-05, "loss": 0.4736, "step": 33102 }, { "epoch": 0.9089236683141131, "grad_norm": 0.38542038202285767, "learning_rate": 1.1449492691608841e-05, "loss": 0.465, "step": 33103 }, { "epoch": 0.9089511257550796, "grad_norm": 0.41010528802871704, "learning_rate": 1.144906535803674e-05, "loss": 0.4709, "step": 33104 }, { "epoch": 0.9089785831960461, "grad_norm": 0.39042380452156067, "learning_rate": 1.1448638021761667e-05, "loss": 0.411, "step": 33105 }, { "epoch": 0.9090060406370126, "grad_norm": 0.41408607363700867, "learning_rate": 1.1448210682784425e-05, "loss": 0.4852, "step": 33106 }, { "epoch": 0.9090334980779792, "grad_norm": 0.4114986062049866, "learning_rate": 1.1447783341105803e-05, "loss": 0.5183, "step": 33107 }, { "epoch": 0.9090609555189456, "grad_norm": 0.3943922221660614, "learning_rate": 1.1447355996726605e-05, "loss": 0.4033, "step": 33108 }, { "epoch": 0.9090884129599122, "grad_norm": 0.3915098011493683, "learning_rate": 1.1446928649647628e-05, "loss": 0.4571, "step": 33109 }, { "epoch": 0.9091158704008786, "grad_norm": 0.36691197752952576, "learning_rate": 1.1446501299869666e-05, "loss": 0.4208, "step": 33110 }, { "epoch": 0.9091433278418452, "grad_norm": 0.3648339807987213, "learning_rate": 1.1446073947393517e-05, "loss": 0.4578, "step": 33111 }, { "epoch": 0.9091707852828116, "grad_norm": 0.429741770029068, "learning_rate": 1.1445646592219982e-05, "loss": 0.5627, "step": 33112 }, { "epoch": 0.9091982427237781, "grad_norm": 0.521229088306427, "learning_rate": 1.144521923434985e-05, "loss": 0.6492, "step": 33113 }, { "epoch": 0.9092257001647447, "grad_norm": 0.37684911489486694, "learning_rate": 1.1444791873783924e-05, "loss": 0.5183, "step": 33114 }, { "epoch": 0.9092531576057111, "grad_norm": 0.4135623872280121, "learning_rate": 1.1444364510523e-05, "loss": 0.4603, "step": 33115 }, { "epoch": 0.9092806150466777, "grad_norm": 0.350619912147522, "learning_rate": 1.1443937144567875e-05, "loss": 0.4815, "step": 33116 }, { "epoch": 0.9093080724876441, "grad_norm": 0.3928729295730591, "learning_rate": 1.144350977591935e-05, "loss": 0.4593, "step": 33117 }, { "epoch": 0.9093355299286107, "grad_norm": 0.34866589307785034, "learning_rate": 1.1443082404578214e-05, "loss": 0.4963, "step": 33118 }, { "epoch": 0.9093629873695771, "grad_norm": 0.44498178362846375, "learning_rate": 1.1442655030545272e-05, "loss": 0.5314, "step": 33119 }, { "epoch": 0.9093904448105437, "grad_norm": 0.40811270475387573, "learning_rate": 1.1442227653821316e-05, "loss": 0.4644, "step": 33120 }, { "epoch": 0.9094179022515102, "grad_norm": 0.39578723907470703, "learning_rate": 1.1441800274407146e-05, "loss": 0.507, "step": 33121 }, { "epoch": 0.9094453596924766, "grad_norm": 0.3812606930732727, "learning_rate": 1.1441372892303559e-05, "loss": 0.4041, "step": 33122 }, { "epoch": 0.9094728171334432, "grad_norm": 0.41026046872138977, "learning_rate": 1.144094550751135e-05, "loss": 0.5135, "step": 33123 }, { "epoch": 0.9095002745744096, "grad_norm": 0.4123243987560272, "learning_rate": 1.1440518120031317e-05, "loss": 0.5179, "step": 33124 }, { "epoch": 0.9095277320153762, "grad_norm": 0.38606804609298706, "learning_rate": 1.1440090729864259e-05, "loss": 0.5389, "step": 33125 }, { "epoch": 0.9095551894563426, "grad_norm": 0.33387258648872375, "learning_rate": 1.1439663337010973e-05, "loss": 0.4215, "step": 33126 }, { "epoch": 0.9095826468973092, "grad_norm": 0.42763251066207886, "learning_rate": 1.1439235941472255e-05, "loss": 0.5469, "step": 33127 }, { "epoch": 0.9096101043382757, "grad_norm": 0.36759066581726074, "learning_rate": 1.14388085432489e-05, "loss": 0.4486, "step": 33128 }, { "epoch": 0.9096375617792422, "grad_norm": 0.3896561861038208, "learning_rate": 1.1438381142341712e-05, "loss": 0.4949, "step": 33129 }, { "epoch": 0.9096650192202087, "grad_norm": 0.42469435930252075, "learning_rate": 1.1437953738751484e-05, "loss": 0.5195, "step": 33130 }, { "epoch": 0.9096924766611751, "grad_norm": 0.400480180978775, "learning_rate": 1.143752633247901e-05, "loss": 0.4695, "step": 33131 }, { "epoch": 0.9097199341021417, "grad_norm": 0.3548296391963959, "learning_rate": 1.1437098923525092e-05, "loss": 0.4624, "step": 33132 }, { "epoch": 0.9097473915431081, "grad_norm": 0.3553624451160431, "learning_rate": 1.1436671511890525e-05, "loss": 0.4312, "step": 33133 }, { "epoch": 0.9097748489840747, "grad_norm": 0.4570797383785248, "learning_rate": 1.1436244097576108e-05, "loss": 0.4984, "step": 33134 }, { "epoch": 0.9098023064250412, "grad_norm": 0.34384825825691223, "learning_rate": 1.1435816680582639e-05, "loss": 0.4069, "step": 33135 }, { "epoch": 0.9098297638660077, "grad_norm": 0.3956947922706604, "learning_rate": 1.1435389260910912e-05, "loss": 0.4894, "step": 33136 }, { "epoch": 0.9098572213069742, "grad_norm": 0.42510855197906494, "learning_rate": 1.1434961838561726e-05, "loss": 0.5387, "step": 33137 }, { "epoch": 0.9098846787479407, "grad_norm": 0.6288581490516663, "learning_rate": 1.1434534413535876e-05, "loss": 0.5392, "step": 33138 }, { "epoch": 0.9099121361889072, "grad_norm": 0.4366309344768524, "learning_rate": 1.1434106985834165e-05, "loss": 0.5184, "step": 33139 }, { "epoch": 0.9099395936298736, "grad_norm": 0.4251192808151245, "learning_rate": 1.1433679555457386e-05, "loss": 0.5746, "step": 33140 }, { "epoch": 0.9099670510708402, "grad_norm": 0.45134472846984863, "learning_rate": 1.1433252122406335e-05, "loss": 0.515, "step": 33141 }, { "epoch": 0.9099945085118067, "grad_norm": 0.3993982970714569, "learning_rate": 1.1432824686681813e-05, "loss": 0.4802, "step": 33142 }, { "epoch": 0.9100219659527732, "grad_norm": 0.3716014325618744, "learning_rate": 1.1432397248284618e-05, "loss": 0.439, "step": 33143 }, { "epoch": 0.9100494233937397, "grad_norm": 0.3878976106643677, "learning_rate": 1.1431969807215541e-05, "loss": 0.4659, "step": 33144 }, { "epoch": 0.9100768808347062, "grad_norm": 0.37241220474243164, "learning_rate": 1.1431542363475386e-05, "loss": 0.523, "step": 33145 }, { "epoch": 0.9101043382756727, "grad_norm": 0.39115074276924133, "learning_rate": 1.1431114917064945e-05, "loss": 0.551, "step": 33146 }, { "epoch": 0.9101317957166392, "grad_norm": 0.4192158877849579, "learning_rate": 1.143068746798502e-05, "loss": 0.517, "step": 33147 }, { "epoch": 0.9101592531576057, "grad_norm": 0.3560570180416107, "learning_rate": 1.1430260016236405e-05, "loss": 0.4375, "step": 33148 }, { "epoch": 0.9101867105985723, "grad_norm": 0.39886757731437683, "learning_rate": 1.1429832561819897e-05, "loss": 0.521, "step": 33149 }, { "epoch": 0.9102141680395387, "grad_norm": 0.4082705080509186, "learning_rate": 1.14294051047363e-05, "loss": 0.5029, "step": 33150 }, { "epoch": 0.9102416254805052, "grad_norm": 0.5149462819099426, "learning_rate": 1.1428977644986401e-05, "loss": 0.4814, "step": 33151 }, { "epoch": 0.9102690829214717, "grad_norm": 0.41780000925064087, "learning_rate": 1.1428550182571005e-05, "loss": 0.4784, "step": 33152 }, { "epoch": 0.9102965403624382, "grad_norm": 0.3567448556423187, "learning_rate": 1.1428122717490908e-05, "loss": 0.5316, "step": 33153 }, { "epoch": 0.9103239978034047, "grad_norm": 0.3563809394836426, "learning_rate": 1.1427695249746904e-05, "loss": 0.4572, "step": 33154 }, { "epoch": 0.9103514552443712, "grad_norm": 0.39646559953689575, "learning_rate": 1.1427267779339795e-05, "loss": 0.462, "step": 33155 }, { "epoch": 0.9103789126853378, "grad_norm": 1.426276683807373, "learning_rate": 1.1426840306270375e-05, "loss": 0.5162, "step": 33156 }, { "epoch": 0.9104063701263042, "grad_norm": 0.40234455466270447, "learning_rate": 1.1426412830539441e-05, "loss": 0.4827, "step": 33157 }, { "epoch": 0.9104338275672708, "grad_norm": 0.4022219181060791, "learning_rate": 1.1425985352147794e-05, "loss": 0.4799, "step": 33158 }, { "epoch": 0.9104612850082372, "grad_norm": 0.39916443824768066, "learning_rate": 1.1425557871096227e-05, "loss": 0.4613, "step": 33159 }, { "epoch": 0.9104887424492037, "grad_norm": 0.36313945055007935, "learning_rate": 1.1425130387385543e-05, "loss": 0.3822, "step": 33160 }, { "epoch": 0.9105161998901702, "grad_norm": 0.3736327886581421, "learning_rate": 1.1424702901016534e-05, "loss": 0.4125, "step": 33161 }, { "epoch": 0.9105436573311367, "grad_norm": 0.4331866502761841, "learning_rate": 1.1424275411989998e-05, "loss": 0.4915, "step": 33162 }, { "epoch": 0.9105711147721033, "grad_norm": 0.36839228868484497, "learning_rate": 1.1423847920306736e-05, "loss": 0.4627, "step": 33163 }, { "epoch": 0.9105985722130697, "grad_norm": 0.3644300401210785, "learning_rate": 1.1423420425967542e-05, "loss": 0.4745, "step": 33164 }, { "epoch": 0.9106260296540363, "grad_norm": 0.40666380524635315, "learning_rate": 1.1422992928973216e-05, "loss": 0.451, "step": 33165 }, { "epoch": 0.9106534870950027, "grad_norm": 0.428646445274353, "learning_rate": 1.1422565429324552e-05, "loss": 0.6008, "step": 33166 }, { "epoch": 0.9106809445359693, "grad_norm": 0.35679861903190613, "learning_rate": 1.142213792702235e-05, "loss": 0.4887, "step": 33167 }, { "epoch": 0.9107084019769357, "grad_norm": 0.37931734323501587, "learning_rate": 1.1421710422067407e-05, "loss": 0.482, "step": 33168 }, { "epoch": 0.9107358594179022, "grad_norm": 0.340728759765625, "learning_rate": 1.1421282914460522e-05, "loss": 0.4521, "step": 33169 }, { "epoch": 0.9107633168588688, "grad_norm": 0.4264187812805176, "learning_rate": 1.1420855404202489e-05, "loss": 0.5635, "step": 33170 }, { "epoch": 0.9107907742998352, "grad_norm": 0.41844844818115234, "learning_rate": 1.1420427891294107e-05, "loss": 0.4753, "step": 33171 }, { "epoch": 0.9108182317408018, "grad_norm": 0.4013041853904724, "learning_rate": 1.1420000375736173e-05, "loss": 0.4856, "step": 33172 }, { "epoch": 0.9108456891817682, "grad_norm": 0.4040120244026184, "learning_rate": 1.1419572857529486e-05, "loss": 0.4956, "step": 33173 }, { "epoch": 0.9108731466227348, "grad_norm": 0.36364638805389404, "learning_rate": 1.1419145336674844e-05, "loss": 0.3901, "step": 33174 }, { "epoch": 0.9109006040637012, "grad_norm": 0.3928261995315552, "learning_rate": 1.1418717813173042e-05, "loss": 0.474, "step": 33175 }, { "epoch": 0.9109280615046678, "grad_norm": 0.4124365746974945, "learning_rate": 1.1418290287024878e-05, "loss": 0.3647, "step": 33176 }, { "epoch": 0.9109555189456343, "grad_norm": 0.41284263134002686, "learning_rate": 1.1417862758231149e-05, "loss": 0.4631, "step": 33177 }, { "epoch": 0.9109829763866008, "grad_norm": 0.39145007729530334, "learning_rate": 1.1417435226792654e-05, "loss": 0.5445, "step": 33178 }, { "epoch": 0.9110104338275673, "grad_norm": 0.4044134318828583, "learning_rate": 1.1417007692710194e-05, "loss": 0.4594, "step": 33179 }, { "epoch": 0.9110378912685337, "grad_norm": 0.36351510882377625, "learning_rate": 1.1416580155984555e-05, "loss": 0.4532, "step": 33180 }, { "epoch": 0.9110653487095003, "grad_norm": 0.39040520787239075, "learning_rate": 1.1416152616616548e-05, "loss": 0.5043, "step": 33181 }, { "epoch": 0.9110928061504667, "grad_norm": 0.45861244201660156, "learning_rate": 1.141572507460696e-05, "loss": 0.4347, "step": 33182 }, { "epoch": 0.9111202635914333, "grad_norm": 0.343658983707428, "learning_rate": 1.1415297529956598e-05, "loss": 0.378, "step": 33183 }, { "epoch": 0.9111477210323998, "grad_norm": 0.42099499702453613, "learning_rate": 1.141486998266625e-05, "loss": 0.524, "step": 33184 }, { "epoch": 0.9111751784733663, "grad_norm": 0.3802451193332672, "learning_rate": 1.1414442432736719e-05, "loss": 0.534, "step": 33185 }, { "epoch": 0.9112026359143328, "grad_norm": 0.40459051728248596, "learning_rate": 1.1414014880168803e-05, "loss": 0.5352, "step": 33186 }, { "epoch": 0.9112300933552993, "grad_norm": 0.4014900326728821, "learning_rate": 1.1413587324963297e-05, "loss": 0.5115, "step": 33187 }, { "epoch": 0.9112575507962658, "grad_norm": 0.39440059661865234, "learning_rate": 1.1413159767120998e-05, "loss": 0.5525, "step": 33188 }, { "epoch": 0.9112850082372322, "grad_norm": 0.3816300928592682, "learning_rate": 1.1412732206642708e-05, "loss": 0.5211, "step": 33189 }, { "epoch": 0.9113124656781988, "grad_norm": 0.3885083496570587, "learning_rate": 1.1412304643529218e-05, "loss": 0.3763, "step": 33190 }, { "epoch": 0.9113399231191653, "grad_norm": 0.34326091408729553, "learning_rate": 1.141187707778133e-05, "loss": 0.4163, "step": 33191 }, { "epoch": 0.9113673805601318, "grad_norm": 0.3493137061595917, "learning_rate": 1.1411449509399844e-05, "loss": 0.4517, "step": 33192 }, { "epoch": 0.9113948380010983, "grad_norm": 0.3930020332336426, "learning_rate": 1.141102193838555e-05, "loss": 0.5476, "step": 33193 }, { "epoch": 0.9114222954420648, "grad_norm": 0.4072372615337372, "learning_rate": 1.1410594364739252e-05, "loss": 0.4332, "step": 33194 }, { "epoch": 0.9114497528830313, "grad_norm": 0.41620534658432007, "learning_rate": 1.1410166788461742e-05, "loss": 0.5207, "step": 33195 }, { "epoch": 0.9114772103239978, "grad_norm": 0.4529667794704437, "learning_rate": 1.1409739209553825e-05, "loss": 0.5028, "step": 33196 }, { "epoch": 0.9115046677649643, "grad_norm": 0.3901026248931885, "learning_rate": 1.1409311628016295e-05, "loss": 0.505, "step": 33197 }, { "epoch": 0.9115321252059309, "grad_norm": 0.48599866032600403, "learning_rate": 1.1408884043849947e-05, "loss": 0.5226, "step": 33198 }, { "epoch": 0.9115595826468973, "grad_norm": 0.4142121374607086, "learning_rate": 1.1408456457055579e-05, "loss": 0.5065, "step": 33199 }, { "epoch": 0.9115870400878638, "grad_norm": 0.40359431505203247, "learning_rate": 1.1408028867633991e-05, "loss": 0.4704, "step": 33200 }, { "epoch": 0.9116144975288303, "grad_norm": 0.43472006916999817, "learning_rate": 1.1407601275585983e-05, "loss": 0.5448, "step": 33201 }, { "epoch": 0.9116419549697968, "grad_norm": 0.3834371268749237, "learning_rate": 1.1407173680912347e-05, "loss": 0.4486, "step": 33202 }, { "epoch": 0.9116694124107633, "grad_norm": 0.402173787355423, "learning_rate": 1.1406746083613883e-05, "loss": 0.5357, "step": 33203 }, { "epoch": 0.9116968698517298, "grad_norm": 0.895419180393219, "learning_rate": 1.1406318483691387e-05, "loss": 0.5253, "step": 33204 }, { "epoch": 0.9117243272926964, "grad_norm": 0.35169127583503723, "learning_rate": 1.140589088114566e-05, "loss": 0.503, "step": 33205 }, { "epoch": 0.9117517847336628, "grad_norm": 0.42632538080215454, "learning_rate": 1.1405463275977498e-05, "loss": 0.4935, "step": 33206 }, { "epoch": 0.9117792421746294, "grad_norm": 0.35165640711784363, "learning_rate": 1.1405035668187698e-05, "loss": 0.4133, "step": 33207 }, { "epoch": 0.9118066996155958, "grad_norm": 0.4498632550239563, "learning_rate": 1.1404608057777055e-05, "loss": 0.3807, "step": 33208 }, { "epoch": 0.9118341570565623, "grad_norm": 0.4246140718460083, "learning_rate": 1.1404180444746374e-05, "loss": 0.5027, "step": 33209 }, { "epoch": 0.9118616144975288, "grad_norm": 0.4031001329421997, "learning_rate": 1.1403752829096448e-05, "loss": 0.507, "step": 33210 }, { "epoch": 0.9118890719384953, "grad_norm": 0.3955107629299164, "learning_rate": 1.1403325210828074e-05, "loss": 0.4631, "step": 33211 }, { "epoch": 0.9119165293794619, "grad_norm": 0.40645620226860046, "learning_rate": 1.140289758994205e-05, "loss": 0.5691, "step": 33212 }, { "epoch": 0.9119439868204283, "grad_norm": 0.4885683059692383, "learning_rate": 1.1402469966439173e-05, "loss": 0.4177, "step": 33213 }, { "epoch": 0.9119714442613949, "grad_norm": 0.3716363310813904, "learning_rate": 1.1402042340320245e-05, "loss": 0.4417, "step": 33214 }, { "epoch": 0.9119989017023613, "grad_norm": 0.4086598753929138, "learning_rate": 1.140161471158606e-05, "loss": 0.4599, "step": 33215 }, { "epoch": 0.9120263591433279, "grad_norm": 0.36536434292793274, "learning_rate": 1.1401187080237413e-05, "loss": 0.3802, "step": 33216 }, { "epoch": 0.9120538165842943, "grad_norm": 0.39356744289398193, "learning_rate": 1.140075944627511e-05, "loss": 0.4777, "step": 33217 }, { "epoch": 0.9120812740252608, "grad_norm": 0.4061787724494934, "learning_rate": 1.1400331809699941e-05, "loss": 0.4756, "step": 33218 }, { "epoch": 0.9121087314662274, "grad_norm": 0.4425891637802124, "learning_rate": 1.1399904170512703e-05, "loss": 0.5082, "step": 33219 }, { "epoch": 0.9121361889071938, "grad_norm": 0.7024703621864319, "learning_rate": 1.1399476528714202e-05, "loss": 0.3965, "step": 33220 }, { "epoch": 0.9121636463481604, "grad_norm": 0.43580636382102966, "learning_rate": 1.1399048884305226e-05, "loss": 0.5464, "step": 33221 }, { "epoch": 0.9121911037891268, "grad_norm": 0.38742151856422424, "learning_rate": 1.1398621237286582e-05, "loss": 0.4095, "step": 33222 }, { "epoch": 0.9122185612300934, "grad_norm": 0.4103691875934601, "learning_rate": 1.1398193587659061e-05, "loss": 0.4926, "step": 33223 }, { "epoch": 0.9122460186710598, "grad_norm": 0.42901286482810974, "learning_rate": 1.1397765935423462e-05, "loss": 0.5242, "step": 33224 }, { "epoch": 0.9122734761120264, "grad_norm": 0.3242127597332001, "learning_rate": 1.1397338280580586e-05, "loss": 0.3945, "step": 33225 }, { "epoch": 0.9123009335529929, "grad_norm": 0.3531312048435211, "learning_rate": 1.1396910623131223e-05, "loss": 0.48, "step": 33226 }, { "epoch": 0.9123283909939593, "grad_norm": 0.39910373091697693, "learning_rate": 1.139648296307618e-05, "loss": 0.4583, "step": 33227 }, { "epoch": 0.9123558484349259, "grad_norm": 0.4098438620567322, "learning_rate": 1.1396055300416252e-05, "loss": 0.4681, "step": 33228 }, { "epoch": 0.9123833058758923, "grad_norm": 0.6536081433296204, "learning_rate": 1.1395627635152231e-05, "loss": 0.4699, "step": 33229 }, { "epoch": 0.9124107633168589, "grad_norm": 0.4102379381656647, "learning_rate": 1.139519996728492e-05, "loss": 0.4892, "step": 33230 }, { "epoch": 0.9124382207578253, "grad_norm": 0.3981364369392395, "learning_rate": 1.1394772296815115e-05, "loss": 0.5093, "step": 33231 }, { "epoch": 0.9124656781987919, "grad_norm": 0.3571736514568329, "learning_rate": 1.1394344623743618e-05, "loss": 0.4622, "step": 33232 }, { "epoch": 0.9124931356397584, "grad_norm": 0.37555983662605286, "learning_rate": 1.1393916948071221e-05, "loss": 0.4773, "step": 33233 }, { "epoch": 0.9125205930807249, "grad_norm": 0.5989987850189209, "learning_rate": 1.1393489269798725e-05, "loss": 0.5654, "step": 33234 }, { "epoch": 0.9125480505216914, "grad_norm": 0.36564117670059204, "learning_rate": 1.1393061588926925e-05, "loss": 0.4865, "step": 33235 }, { "epoch": 0.9125755079626579, "grad_norm": 0.3931315243244171, "learning_rate": 1.1392633905456625e-05, "loss": 0.4216, "step": 33236 }, { "epoch": 0.9126029654036244, "grad_norm": 0.4013591706752777, "learning_rate": 1.1392206219388613e-05, "loss": 0.5932, "step": 33237 }, { "epoch": 0.9126304228445908, "grad_norm": 0.37055855989456177, "learning_rate": 1.1391778530723694e-05, "loss": 0.4976, "step": 33238 }, { "epoch": 0.9126578802855574, "grad_norm": 0.609919548034668, "learning_rate": 1.1391350839462662e-05, "loss": 0.425, "step": 33239 }, { "epoch": 0.9126853377265239, "grad_norm": 0.4533447325229645, "learning_rate": 1.1390923145606321e-05, "loss": 0.5435, "step": 33240 }, { "epoch": 0.9127127951674904, "grad_norm": 0.37283217906951904, "learning_rate": 1.1390495449155461e-05, "loss": 0.4633, "step": 33241 }, { "epoch": 0.9127402526084569, "grad_norm": 0.4493156969547272, "learning_rate": 1.1390067750110885e-05, "loss": 0.5318, "step": 33242 }, { "epoch": 0.9127677100494234, "grad_norm": 0.3815068006515503, "learning_rate": 1.1389640048473388e-05, "loss": 0.5023, "step": 33243 }, { "epoch": 0.9127951674903899, "grad_norm": 0.3648495078086853, "learning_rate": 1.1389212344243767e-05, "loss": 0.4297, "step": 33244 }, { "epoch": 0.9128226249313564, "grad_norm": 0.5694277286529541, "learning_rate": 1.1388784637422825e-05, "loss": 0.5709, "step": 33245 }, { "epoch": 0.9128500823723229, "grad_norm": 0.7918714880943298, "learning_rate": 1.1388356928011352e-05, "loss": 0.5502, "step": 33246 }, { "epoch": 0.9128775398132895, "grad_norm": 0.42878714203834534, "learning_rate": 1.1387929216010153e-05, "loss": 0.4994, "step": 33247 }, { "epoch": 0.9129049972542559, "grad_norm": 0.36424633860588074, "learning_rate": 1.1387501501420027e-05, "loss": 0.4685, "step": 33248 }, { "epoch": 0.9129324546952224, "grad_norm": 0.35172098875045776, "learning_rate": 1.1387073784241762e-05, "loss": 0.4075, "step": 33249 }, { "epoch": 0.9129599121361889, "grad_norm": 0.38967764377593994, "learning_rate": 1.1386646064476163e-05, "loss": 0.4615, "step": 33250 }, { "epoch": 0.9129873695771554, "grad_norm": 0.3774595558643341, "learning_rate": 1.1386218342124026e-05, "loss": 0.5283, "step": 33251 }, { "epoch": 0.9130148270181219, "grad_norm": 0.41250696778297424, "learning_rate": 1.1385790617186152e-05, "loss": 0.5552, "step": 33252 }, { "epoch": 0.9130422844590884, "grad_norm": 0.3871211111545563, "learning_rate": 1.1385362889663337e-05, "loss": 0.4794, "step": 33253 }, { "epoch": 0.913069741900055, "grad_norm": 0.3958478271961212, "learning_rate": 1.1384935159556375e-05, "loss": 0.4888, "step": 33254 }, { "epoch": 0.9130971993410214, "grad_norm": 0.38556355237960815, "learning_rate": 1.1384507426866067e-05, "loss": 0.5029, "step": 33255 }, { "epoch": 0.913124656781988, "grad_norm": 0.37854525446891785, "learning_rate": 1.138407969159321e-05, "loss": 0.4649, "step": 33256 }, { "epoch": 0.9131521142229544, "grad_norm": 0.4754444658756256, "learning_rate": 1.1383651953738606e-05, "loss": 0.4898, "step": 33257 }, { "epoch": 0.9131795716639209, "grad_norm": 0.4187973141670227, "learning_rate": 1.1383224213303046e-05, "loss": 0.5086, "step": 33258 }, { "epoch": 0.9132070291048874, "grad_norm": 0.34257519245147705, "learning_rate": 1.1382796470287332e-05, "loss": 0.329, "step": 33259 }, { "epoch": 0.9132344865458539, "grad_norm": 0.38011637330055237, "learning_rate": 1.1382368724692262e-05, "loss": 0.527, "step": 33260 }, { "epoch": 0.9132619439868205, "grad_norm": 0.38932064175605774, "learning_rate": 1.1381940976518635e-05, "loss": 0.5154, "step": 33261 }, { "epoch": 0.9132894014277869, "grad_norm": 0.3985004425048828, "learning_rate": 1.1381513225767245e-05, "loss": 0.5086, "step": 33262 }, { "epoch": 0.9133168588687535, "grad_norm": 0.3966637849807739, "learning_rate": 1.138108547243889e-05, "loss": 0.4992, "step": 33263 }, { "epoch": 0.9133443163097199, "grad_norm": 0.42909038066864014, "learning_rate": 1.1380657716534374e-05, "loss": 0.5458, "step": 33264 }, { "epoch": 0.9133717737506865, "grad_norm": 0.5516573786735535, "learning_rate": 1.138022995805449e-05, "loss": 0.4232, "step": 33265 }, { "epoch": 0.9133992311916529, "grad_norm": 0.3379818797111511, "learning_rate": 1.1379802197000034e-05, "loss": 0.4589, "step": 33266 }, { "epoch": 0.9134266886326194, "grad_norm": 0.37583765387535095, "learning_rate": 1.1379374433371807e-05, "loss": 0.5003, "step": 33267 }, { "epoch": 0.913454146073586, "grad_norm": 0.5642133355140686, "learning_rate": 1.1378946667170606e-05, "loss": 0.501, "step": 33268 }, { "epoch": 0.9134816035145524, "grad_norm": 0.4027680456638336, "learning_rate": 1.1378518898397232e-05, "loss": 0.5164, "step": 33269 }, { "epoch": 0.913509060955519, "grad_norm": 0.3852553069591522, "learning_rate": 1.1378091127052481e-05, "loss": 0.4956, "step": 33270 }, { "epoch": 0.9135365183964854, "grad_norm": 0.4264400601387024, "learning_rate": 1.1377663353137146e-05, "loss": 0.5565, "step": 33271 }, { "epoch": 0.913563975837452, "grad_norm": 0.376498818397522, "learning_rate": 1.1377235576652032e-05, "loss": 0.453, "step": 33272 }, { "epoch": 0.9135914332784184, "grad_norm": 0.37323880195617676, "learning_rate": 1.1376807797597933e-05, "loss": 0.5102, "step": 33273 }, { "epoch": 0.913618890719385, "grad_norm": 0.3864535689353943, "learning_rate": 1.137638001597565e-05, "loss": 0.5491, "step": 33274 }, { "epoch": 0.9136463481603515, "grad_norm": 0.3389412462711334, "learning_rate": 1.1375952231785976e-05, "loss": 0.4043, "step": 33275 }, { "epoch": 0.913673805601318, "grad_norm": 0.3731346130371094, "learning_rate": 1.1375524445029713e-05, "loss": 0.5637, "step": 33276 }, { "epoch": 0.9137012630422845, "grad_norm": 0.3787975609302521, "learning_rate": 1.137509665570766e-05, "loss": 0.4392, "step": 33277 }, { "epoch": 0.9137287204832509, "grad_norm": 0.40640127658843994, "learning_rate": 1.137466886382061e-05, "loss": 0.48, "step": 33278 }, { "epoch": 0.9137561779242175, "grad_norm": 0.39419639110565186, "learning_rate": 1.1374241069369365e-05, "loss": 0.4102, "step": 33279 }, { "epoch": 0.9137836353651839, "grad_norm": 0.4369790554046631, "learning_rate": 1.1373813272354723e-05, "loss": 0.4657, "step": 33280 }, { "epoch": 0.9138110928061505, "grad_norm": 0.3530455231666565, "learning_rate": 1.1373385472777478e-05, "loss": 0.481, "step": 33281 }, { "epoch": 0.913838550247117, "grad_norm": 0.43074536323547363, "learning_rate": 1.1372957670638434e-05, "loss": 0.5782, "step": 33282 }, { "epoch": 0.9138660076880835, "grad_norm": 0.39321398735046387, "learning_rate": 1.1372529865938381e-05, "loss": 0.4834, "step": 33283 }, { "epoch": 0.91389346512905, "grad_norm": 0.43572160601615906, "learning_rate": 1.1372102058678127e-05, "loss": 0.473, "step": 33284 }, { "epoch": 0.9139209225700164, "grad_norm": 0.39124253392219543, "learning_rate": 1.1371674248858462e-05, "loss": 0.4502, "step": 33285 }, { "epoch": 0.913948380010983, "grad_norm": 0.37752214074134827, "learning_rate": 1.1371246436480185e-05, "loss": 0.4985, "step": 33286 }, { "epoch": 0.9139758374519494, "grad_norm": 0.408319890499115, "learning_rate": 1.13708186215441e-05, "loss": 0.5272, "step": 33287 }, { "epoch": 0.914003294892916, "grad_norm": 0.490293025970459, "learning_rate": 1.1370390804050997e-05, "loss": 0.5763, "step": 33288 }, { "epoch": 0.9140307523338825, "grad_norm": 0.38309866189956665, "learning_rate": 1.136996298400168e-05, "loss": 0.4547, "step": 33289 }, { "epoch": 0.914058209774849, "grad_norm": 0.38430216908454895, "learning_rate": 1.1369535161396947e-05, "loss": 0.5118, "step": 33290 }, { "epoch": 0.9140856672158155, "grad_norm": 0.5054970979690552, "learning_rate": 1.1369107336237587e-05, "loss": 0.5558, "step": 33291 }, { "epoch": 0.914113124656782, "grad_norm": 0.393523633480072, "learning_rate": 1.1368679508524412e-05, "loss": 0.4609, "step": 33292 }, { "epoch": 0.9141405820977485, "grad_norm": 0.37956392765045166, "learning_rate": 1.1368251678258209e-05, "loss": 0.4152, "step": 33293 }, { "epoch": 0.914168039538715, "grad_norm": 0.45804134011268616, "learning_rate": 1.1367823845439781e-05, "loss": 0.5968, "step": 33294 }, { "epoch": 0.9141954969796815, "grad_norm": 0.3888421654701233, "learning_rate": 1.1367396010069927e-05, "loss": 0.5077, "step": 33295 }, { "epoch": 0.914222954420648, "grad_norm": 0.4175409972667694, "learning_rate": 1.1366968172149437e-05, "loss": 0.4433, "step": 33296 }, { "epoch": 0.9142504118616145, "grad_norm": 0.3859711289405823, "learning_rate": 1.1366540331679122e-05, "loss": 0.5088, "step": 33297 }, { "epoch": 0.914277869302581, "grad_norm": 0.37746697664260864, "learning_rate": 1.136611248865977e-05, "loss": 0.5056, "step": 33298 }, { "epoch": 0.9143053267435475, "grad_norm": 0.37914538383483887, "learning_rate": 1.136568464309218e-05, "loss": 0.5299, "step": 33299 }, { "epoch": 0.914332784184514, "grad_norm": 0.38242048025131226, "learning_rate": 1.1365256794977156e-05, "loss": 0.4841, "step": 33300 }, { "epoch": 0.9143602416254805, "grad_norm": 0.3920416235923767, "learning_rate": 1.136482894431549e-05, "loss": 0.5374, "step": 33301 }, { "epoch": 0.914387699066447, "grad_norm": 0.5411343574523926, "learning_rate": 1.1364401091107984e-05, "loss": 0.5014, "step": 33302 }, { "epoch": 0.9144151565074136, "grad_norm": 0.4681203365325928, "learning_rate": 1.1363973235355434e-05, "loss": 0.5132, "step": 33303 }, { "epoch": 0.91444261394838, "grad_norm": 0.46322840452194214, "learning_rate": 1.1363545377058636e-05, "loss": 0.5653, "step": 33304 }, { "epoch": 0.9144700713893466, "grad_norm": 0.4795876145362854, "learning_rate": 1.1363117516218393e-05, "loss": 0.5017, "step": 33305 }, { "epoch": 0.914497528830313, "grad_norm": 0.3977147936820984, "learning_rate": 1.1362689652835501e-05, "loss": 0.4927, "step": 33306 }, { "epoch": 0.9145249862712795, "grad_norm": 0.40267637372016907, "learning_rate": 1.1362261786910758e-05, "loss": 0.5577, "step": 33307 }, { "epoch": 0.914552443712246, "grad_norm": 0.4260820746421814, "learning_rate": 1.1361833918444962e-05, "loss": 0.5044, "step": 33308 }, { "epoch": 0.9145799011532125, "grad_norm": 0.3954068720340729, "learning_rate": 1.136140604743891e-05, "loss": 0.464, "step": 33309 }, { "epoch": 0.9146073585941791, "grad_norm": 0.3910139501094818, "learning_rate": 1.1360978173893403e-05, "loss": 0.624, "step": 33310 }, { "epoch": 0.9146348160351455, "grad_norm": 0.5402277708053589, "learning_rate": 1.1360550297809236e-05, "loss": 0.5854, "step": 33311 }, { "epoch": 0.9146622734761121, "grad_norm": 0.3928172290325165, "learning_rate": 1.1360122419187207e-05, "loss": 0.4608, "step": 33312 }, { "epoch": 0.9146897309170785, "grad_norm": 0.544653594493866, "learning_rate": 1.1359694538028119e-05, "loss": 0.502, "step": 33313 }, { "epoch": 0.914717188358045, "grad_norm": 0.40395694971084595, "learning_rate": 1.1359266654332764e-05, "loss": 0.4432, "step": 33314 }, { "epoch": 0.9147446457990115, "grad_norm": 0.3679070770740509, "learning_rate": 1.1358838768101946e-05, "loss": 0.4824, "step": 33315 }, { "epoch": 0.914772103239978, "grad_norm": 0.35480719804763794, "learning_rate": 1.1358410879336456e-05, "loss": 0.49, "step": 33316 }, { "epoch": 0.9147995606809446, "grad_norm": 0.3823293447494507, "learning_rate": 1.1357982988037097e-05, "loss": 0.4662, "step": 33317 }, { "epoch": 0.914827018121911, "grad_norm": 0.3486495912075043, "learning_rate": 1.1357555094204668e-05, "loss": 0.3936, "step": 33318 }, { "epoch": 0.9148544755628776, "grad_norm": 0.3745483458042145, "learning_rate": 1.1357127197839965e-05, "loss": 0.4649, "step": 33319 }, { "epoch": 0.914881933003844, "grad_norm": 0.3733184039592743, "learning_rate": 1.1356699298943787e-05, "loss": 0.4638, "step": 33320 }, { "epoch": 0.9149093904448106, "grad_norm": 0.4469298720359802, "learning_rate": 1.1356271397516932e-05, "loss": 0.438, "step": 33321 }, { "epoch": 0.914936847885777, "grad_norm": 0.39146697521209717, "learning_rate": 1.1355843493560196e-05, "loss": 0.5252, "step": 33322 }, { "epoch": 0.9149643053267436, "grad_norm": 0.42951714992523193, "learning_rate": 1.1355415587074381e-05, "loss": 0.5603, "step": 33323 }, { "epoch": 0.9149917627677101, "grad_norm": 0.4340181350708008, "learning_rate": 1.1354987678060281e-05, "loss": 0.5098, "step": 33324 }, { "epoch": 0.9150192202086765, "grad_norm": 0.41008666157722473, "learning_rate": 1.13545597665187e-05, "loss": 0.4528, "step": 33325 }, { "epoch": 0.9150466776496431, "grad_norm": 0.386633962392807, "learning_rate": 1.1354131852450431e-05, "loss": 0.4582, "step": 33326 }, { "epoch": 0.9150741350906095, "grad_norm": 0.48039302229881287, "learning_rate": 1.1353703935856272e-05, "loss": 0.4647, "step": 33327 }, { "epoch": 0.9151015925315761, "grad_norm": 0.3808883726596832, "learning_rate": 1.1353276016737027e-05, "loss": 0.4408, "step": 33328 }, { "epoch": 0.9151290499725425, "grad_norm": 0.41079777479171753, "learning_rate": 1.135284809509349e-05, "loss": 0.5378, "step": 33329 }, { "epoch": 0.9151565074135091, "grad_norm": 0.4387950599193573, "learning_rate": 1.1352420170926454e-05, "loss": 0.5315, "step": 33330 }, { "epoch": 0.9151839648544756, "grad_norm": 0.3882382810115814, "learning_rate": 1.135199224423673e-05, "loss": 0.5075, "step": 33331 }, { "epoch": 0.9152114222954421, "grad_norm": 0.5053169131278992, "learning_rate": 1.1351564315025104e-05, "loss": 0.5215, "step": 33332 }, { "epoch": 0.9152388797364086, "grad_norm": 0.3969508409500122, "learning_rate": 1.135113638329238e-05, "loss": 0.4355, "step": 33333 }, { "epoch": 0.915266337177375, "grad_norm": 0.3586532175540924, "learning_rate": 1.1350708449039357e-05, "loss": 0.5047, "step": 33334 }, { "epoch": 0.9152937946183416, "grad_norm": 0.3917367458343506, "learning_rate": 1.135028051226683e-05, "loss": 0.4978, "step": 33335 }, { "epoch": 0.915321252059308, "grad_norm": 0.3701058030128479, "learning_rate": 1.13498525729756e-05, "loss": 0.5167, "step": 33336 }, { "epoch": 0.9153487095002746, "grad_norm": 0.5030088424682617, "learning_rate": 1.134942463116646e-05, "loss": 0.4383, "step": 33337 }, { "epoch": 0.9153761669412411, "grad_norm": 0.4423559904098511, "learning_rate": 1.134899668684022e-05, "loss": 0.5634, "step": 33338 }, { "epoch": 0.9154036243822076, "grad_norm": 0.4709896445274353, "learning_rate": 1.1348568739997667e-05, "loss": 0.522, "step": 33339 }, { "epoch": 0.9154310818231741, "grad_norm": 0.4995152950286865, "learning_rate": 1.1348140790639601e-05, "loss": 0.5774, "step": 33340 }, { "epoch": 0.9154585392641406, "grad_norm": 0.3408285677433014, "learning_rate": 1.1347712838766825e-05, "loss": 0.4756, "step": 33341 }, { "epoch": 0.9154859967051071, "grad_norm": 0.401239275932312, "learning_rate": 1.1347284884380134e-05, "loss": 0.4874, "step": 33342 }, { "epoch": 0.9155134541460735, "grad_norm": 0.3945220112800598, "learning_rate": 1.1346856927480323e-05, "loss": 0.4712, "step": 33343 }, { "epoch": 0.9155409115870401, "grad_norm": 0.36609897017478943, "learning_rate": 1.13464289680682e-05, "loss": 0.4436, "step": 33344 }, { "epoch": 0.9155683690280065, "grad_norm": 0.39063096046447754, "learning_rate": 1.1346001006144552e-05, "loss": 0.4698, "step": 33345 }, { "epoch": 0.9155958264689731, "grad_norm": 0.410415917634964, "learning_rate": 1.1345573041710186e-05, "loss": 0.474, "step": 33346 }, { "epoch": 0.9156232839099396, "grad_norm": 0.3661254346370697, "learning_rate": 1.1345145074765895e-05, "loss": 0.4799, "step": 33347 }, { "epoch": 0.9156507413509061, "grad_norm": 0.3786481022834778, "learning_rate": 1.1344717105312477e-05, "loss": 0.5218, "step": 33348 }, { "epoch": 0.9156781987918726, "grad_norm": 0.41108810901641846, "learning_rate": 1.1344289133350737e-05, "loss": 0.5067, "step": 33349 }, { "epoch": 0.9157056562328391, "grad_norm": 0.4208597242832184, "learning_rate": 1.1343861158881466e-05, "loss": 0.4616, "step": 33350 }, { "epoch": 0.9157331136738056, "grad_norm": 0.37221208214759827, "learning_rate": 1.1343433181905466e-05, "loss": 0.5027, "step": 33351 }, { "epoch": 0.915760571114772, "grad_norm": 0.4566466510295868, "learning_rate": 1.1343005202423534e-05, "loss": 0.4878, "step": 33352 }, { "epoch": 0.9157880285557386, "grad_norm": 0.40486958622932434, "learning_rate": 1.1342577220436468e-05, "loss": 0.4784, "step": 33353 }, { "epoch": 0.9158154859967051, "grad_norm": 0.41305986046791077, "learning_rate": 1.1342149235945068e-05, "loss": 0.4748, "step": 33354 }, { "epoch": 0.9158429434376716, "grad_norm": 0.32712265849113464, "learning_rate": 1.134172124895013e-05, "loss": 0.4158, "step": 33355 }, { "epoch": 0.9158704008786381, "grad_norm": 0.4024880528450012, "learning_rate": 1.1341293259452455e-05, "loss": 0.4429, "step": 33356 }, { "epoch": 0.9158978583196046, "grad_norm": 0.5985063314437866, "learning_rate": 1.1340865267452841e-05, "loss": 0.5642, "step": 33357 }, { "epoch": 0.9159253157605711, "grad_norm": 0.43173718452453613, "learning_rate": 1.1340437272952083e-05, "loss": 0.5793, "step": 33358 }, { "epoch": 0.9159527732015376, "grad_norm": 0.4314613938331604, "learning_rate": 1.1340009275950982e-05, "loss": 0.574, "step": 33359 }, { "epoch": 0.9159802306425041, "grad_norm": 0.4476182162761688, "learning_rate": 1.133958127645034e-05, "loss": 0.4807, "step": 33360 }, { "epoch": 0.9160076880834707, "grad_norm": 0.3915746808052063, "learning_rate": 1.1339153274450945e-05, "loss": 0.546, "step": 33361 }, { "epoch": 0.9160351455244371, "grad_norm": 0.3582809865474701, "learning_rate": 1.1338725269953605e-05, "loss": 0.4877, "step": 33362 }, { "epoch": 0.9160626029654036, "grad_norm": 0.43572449684143066, "learning_rate": 1.1338297262959115e-05, "loss": 0.4609, "step": 33363 }, { "epoch": 0.9160900604063701, "grad_norm": 0.3788011968135834, "learning_rate": 1.1337869253468274e-05, "loss": 0.5154, "step": 33364 }, { "epoch": 0.9161175178473366, "grad_norm": 0.31946220993995667, "learning_rate": 1.133744124148188e-05, "loss": 0.4591, "step": 33365 }, { "epoch": 0.9161449752883031, "grad_norm": 0.4189729690551758, "learning_rate": 1.1337013227000727e-05, "loss": 0.5374, "step": 33366 }, { "epoch": 0.9161724327292696, "grad_norm": 0.4191599488258362, "learning_rate": 1.1336585210025624e-05, "loss": 0.4532, "step": 33367 }, { "epoch": 0.9161998901702362, "grad_norm": 0.3796944320201874, "learning_rate": 1.1336157190557357e-05, "loss": 0.4485, "step": 33368 }, { "epoch": 0.9162273476112026, "grad_norm": 0.34947705268859863, "learning_rate": 1.1335729168596736e-05, "loss": 0.4781, "step": 33369 }, { "epoch": 0.9162548050521692, "grad_norm": 0.3946007788181305, "learning_rate": 1.1335301144144551e-05, "loss": 0.4554, "step": 33370 }, { "epoch": 0.9162822624931356, "grad_norm": 0.4053274393081665, "learning_rate": 1.1334873117201601e-05, "loss": 0.4609, "step": 33371 }, { "epoch": 0.9163097199341022, "grad_norm": 0.39245977997779846, "learning_rate": 1.1334445087768691e-05, "loss": 0.4162, "step": 33372 }, { "epoch": 0.9163371773750686, "grad_norm": 0.41766420006752014, "learning_rate": 1.1334017055846614e-05, "loss": 0.5275, "step": 33373 }, { "epoch": 0.9163646348160351, "grad_norm": 0.417843759059906, "learning_rate": 1.1333589021436167e-05, "loss": 0.4748, "step": 33374 }, { "epoch": 0.9163920922570017, "grad_norm": 0.3954229950904846, "learning_rate": 1.1333160984538152e-05, "loss": 0.4482, "step": 33375 }, { "epoch": 0.9164195496979681, "grad_norm": 0.3804294466972351, "learning_rate": 1.1332732945153366e-05, "loss": 0.427, "step": 33376 }, { "epoch": 0.9164470071389347, "grad_norm": 0.4269979000091553, "learning_rate": 1.1332304903282609e-05, "loss": 0.4944, "step": 33377 }, { "epoch": 0.9164744645799011, "grad_norm": 0.43120691180229187, "learning_rate": 1.1331876858926677e-05, "loss": 0.5462, "step": 33378 }, { "epoch": 0.9165019220208677, "grad_norm": 0.4055705666542053, "learning_rate": 1.133144881208637e-05, "loss": 0.498, "step": 33379 }, { "epoch": 0.9165293794618341, "grad_norm": 0.3698061406612396, "learning_rate": 1.1331020762762487e-05, "loss": 0.4683, "step": 33380 }, { "epoch": 0.9165568369028007, "grad_norm": 0.4034044146537781, "learning_rate": 1.1330592710955823e-05, "loss": 0.4935, "step": 33381 }, { "epoch": 0.9165842943437672, "grad_norm": 0.5911890268325806, "learning_rate": 1.133016465666718e-05, "loss": 0.4225, "step": 33382 }, { "epoch": 0.9166117517847336, "grad_norm": 0.35995280742645264, "learning_rate": 1.1329736599897356e-05, "loss": 0.4425, "step": 33383 }, { "epoch": 0.9166392092257002, "grad_norm": 0.46822601556777954, "learning_rate": 1.1329308540647148e-05, "loss": 0.4848, "step": 33384 }, { "epoch": 0.9166666666666666, "grad_norm": 0.44043850898742676, "learning_rate": 1.1328880478917356e-05, "loss": 0.4833, "step": 33385 }, { "epoch": 0.9166941241076332, "grad_norm": 0.384834885597229, "learning_rate": 1.1328452414708776e-05, "loss": 0.5434, "step": 33386 }, { "epoch": 0.9167215815485996, "grad_norm": 0.34816452860832214, "learning_rate": 1.132802434802221e-05, "loss": 0.4968, "step": 33387 }, { "epoch": 0.9167490389895662, "grad_norm": 0.4711938500404358, "learning_rate": 1.1327596278858458e-05, "loss": 0.5106, "step": 33388 }, { "epoch": 0.9167764964305327, "grad_norm": 0.3905113935470581, "learning_rate": 1.132716820721831e-05, "loss": 0.5243, "step": 33389 }, { "epoch": 0.9168039538714992, "grad_norm": 0.5038570165634155, "learning_rate": 1.1326740133102572e-05, "loss": 0.5167, "step": 33390 }, { "epoch": 0.9168314113124657, "grad_norm": 0.3778712749481201, "learning_rate": 1.132631205651204e-05, "loss": 0.4911, "step": 33391 }, { "epoch": 0.9168588687534321, "grad_norm": 0.37944290041923523, "learning_rate": 1.1325883977447511e-05, "loss": 0.4005, "step": 33392 }, { "epoch": 0.9168863261943987, "grad_norm": 0.39735177159309387, "learning_rate": 1.1325455895909788e-05, "loss": 0.5391, "step": 33393 }, { "epoch": 0.9169137836353651, "grad_norm": 0.4089530110359192, "learning_rate": 1.1325027811899665e-05, "loss": 0.4677, "step": 33394 }, { "epoch": 0.9169412410763317, "grad_norm": 0.36829906702041626, "learning_rate": 1.1324599725417941e-05, "loss": 0.4666, "step": 33395 }, { "epoch": 0.9169686985172982, "grad_norm": 0.37514519691467285, "learning_rate": 1.132417163646542e-05, "loss": 0.4571, "step": 33396 }, { "epoch": 0.9169961559582647, "grad_norm": 0.37303340435028076, "learning_rate": 1.1323743545042892e-05, "loss": 0.4497, "step": 33397 }, { "epoch": 0.9170236133992312, "grad_norm": 0.40318402647972107, "learning_rate": 1.1323315451151162e-05, "loss": 0.4872, "step": 33398 }, { "epoch": 0.9170510708401977, "grad_norm": 0.408075749874115, "learning_rate": 1.1322887354791023e-05, "loss": 0.4356, "step": 33399 }, { "epoch": 0.9170785282811642, "grad_norm": 0.36907851696014404, "learning_rate": 1.1322459255963284e-05, "loss": 0.5218, "step": 33400 }, { "epoch": 0.9171059857221306, "grad_norm": 0.4453272223472595, "learning_rate": 1.132203115466873e-05, "loss": 0.5288, "step": 33401 }, { "epoch": 0.9171334431630972, "grad_norm": 0.41873544454574585, "learning_rate": 1.1321603050908168e-05, "loss": 0.4758, "step": 33402 }, { "epoch": 0.9171609006040637, "grad_norm": 0.3583988845348358, "learning_rate": 1.1321174944682393e-05, "loss": 0.4866, "step": 33403 }, { "epoch": 0.9171883580450302, "grad_norm": 0.39775407314300537, "learning_rate": 1.1320746835992209e-05, "loss": 0.4739, "step": 33404 }, { "epoch": 0.9172158154859967, "grad_norm": 0.3745875358581543, "learning_rate": 1.1320318724838406e-05, "loss": 0.4637, "step": 33405 }, { "epoch": 0.9172432729269632, "grad_norm": 0.45228666067123413, "learning_rate": 1.1319890611221791e-05, "loss": 0.5564, "step": 33406 }, { "epoch": 0.9172707303679297, "grad_norm": 0.42812275886535645, "learning_rate": 1.1319462495143157e-05, "loss": 0.5017, "step": 33407 }, { "epoch": 0.9172981878088962, "grad_norm": 0.3478068709373474, "learning_rate": 1.1319034376603304e-05, "loss": 0.5026, "step": 33408 }, { "epoch": 0.9173256452498627, "grad_norm": 0.3622744083404541, "learning_rate": 1.131860625560303e-05, "loss": 0.4602, "step": 33409 }, { "epoch": 0.9173531026908293, "grad_norm": 0.3829619586467743, "learning_rate": 1.1318178132143136e-05, "loss": 0.5416, "step": 33410 }, { "epoch": 0.9173805601317957, "grad_norm": 0.5032811760902405, "learning_rate": 1.131775000622442e-05, "loss": 0.4844, "step": 33411 }, { "epoch": 0.9174080175727622, "grad_norm": 0.4044209420681, "learning_rate": 1.1317321877847677e-05, "loss": 0.4953, "step": 33412 }, { "epoch": 0.9174354750137287, "grad_norm": 0.36343812942504883, "learning_rate": 1.131689374701371e-05, "loss": 0.492, "step": 33413 }, { "epoch": 0.9174629324546952, "grad_norm": 0.39200693368911743, "learning_rate": 1.1316465613723318e-05, "loss": 0.535, "step": 33414 }, { "epoch": 0.9174903898956617, "grad_norm": 0.4196426570415497, "learning_rate": 1.1316037477977293e-05, "loss": 0.4337, "step": 33415 }, { "epoch": 0.9175178473366282, "grad_norm": 0.3903834819793701, "learning_rate": 1.1315609339776442e-05, "loss": 0.4928, "step": 33416 }, { "epoch": 0.9175453047775948, "grad_norm": 0.4253719449043274, "learning_rate": 1.1315181199121555e-05, "loss": 0.5044, "step": 33417 }, { "epoch": 0.9175727622185612, "grad_norm": 0.3729749023914337, "learning_rate": 1.1314753056013439e-05, "loss": 0.452, "step": 33418 }, { "epoch": 0.9176002196595278, "grad_norm": 0.3898993730545044, "learning_rate": 1.131432491045289e-05, "loss": 0.5351, "step": 33419 }, { "epoch": 0.9176276771004942, "grad_norm": 0.3984692394733429, "learning_rate": 1.1313896762440701e-05, "loss": 0.5305, "step": 33420 }, { "epoch": 0.9176551345414607, "grad_norm": 0.41409561038017273, "learning_rate": 1.1313468611977678e-05, "loss": 0.491, "step": 33421 }, { "epoch": 0.9176825919824272, "grad_norm": 0.3614175319671631, "learning_rate": 1.1313040459064614e-05, "loss": 0.4762, "step": 33422 }, { "epoch": 0.9177100494233937, "grad_norm": 0.4203396737575531, "learning_rate": 1.1312612303702315e-05, "loss": 0.4798, "step": 33423 }, { "epoch": 0.9177375068643603, "grad_norm": 0.3898731768131256, "learning_rate": 1.1312184145891575e-05, "loss": 0.5486, "step": 33424 }, { "epoch": 0.9177649643053267, "grad_norm": 0.39737266302108765, "learning_rate": 1.131175598563319e-05, "loss": 0.5299, "step": 33425 }, { "epoch": 0.9177924217462933, "grad_norm": 0.3494638204574585, "learning_rate": 1.1311327822927962e-05, "loss": 0.4162, "step": 33426 }, { "epoch": 0.9178198791872597, "grad_norm": 0.40365323424339294, "learning_rate": 1.1310899657776689e-05, "loss": 0.5354, "step": 33427 }, { "epoch": 0.9178473366282263, "grad_norm": 0.42206481099128723, "learning_rate": 1.131047149018017e-05, "loss": 0.5162, "step": 33428 }, { "epoch": 0.9178747940691927, "grad_norm": 0.41986334323883057, "learning_rate": 1.1310043320139205e-05, "loss": 0.4878, "step": 33429 }, { "epoch": 0.9179022515101593, "grad_norm": 0.3610078692436218, "learning_rate": 1.1309615147654589e-05, "loss": 0.473, "step": 33430 }, { "epoch": 0.9179297089511258, "grad_norm": 0.38934507966041565, "learning_rate": 1.1309186972727124e-05, "loss": 0.4662, "step": 33431 }, { "epoch": 0.9179571663920922, "grad_norm": 0.40797901153564453, "learning_rate": 1.1308758795357606e-05, "loss": 0.5193, "step": 33432 }, { "epoch": 0.9179846238330588, "grad_norm": 0.4180936813354492, "learning_rate": 1.1308330615546837e-05, "loss": 0.513, "step": 33433 }, { "epoch": 0.9180120812740252, "grad_norm": 0.49750569462776184, "learning_rate": 1.1307902433295614e-05, "loss": 0.517, "step": 33434 }, { "epoch": 0.9180395387149918, "grad_norm": 0.3997799754142761, "learning_rate": 1.1307474248604733e-05, "loss": 0.5241, "step": 33435 }, { "epoch": 0.9180669961559582, "grad_norm": 0.372850626707077, "learning_rate": 1.1307046061474998e-05, "loss": 0.4812, "step": 33436 }, { "epoch": 0.9180944535969248, "grad_norm": 0.4008029103279114, "learning_rate": 1.1306617871907202e-05, "loss": 0.5255, "step": 33437 }, { "epoch": 0.9181219110378913, "grad_norm": 0.459622859954834, "learning_rate": 1.130618967990215e-05, "loss": 0.547, "step": 33438 }, { "epoch": 0.9181493684788578, "grad_norm": 0.3851218521595001, "learning_rate": 1.1305761485460633e-05, "loss": 0.4473, "step": 33439 }, { "epoch": 0.9181768259198243, "grad_norm": 0.41296154260635376, "learning_rate": 1.1305333288583458e-05, "loss": 0.4465, "step": 33440 }, { "epoch": 0.9182042833607907, "grad_norm": 0.41113805770874023, "learning_rate": 1.1304905089271419e-05, "loss": 0.5198, "step": 33441 }, { "epoch": 0.9182317408017573, "grad_norm": 0.39154544472694397, "learning_rate": 1.1304476887525313e-05, "loss": 0.4781, "step": 33442 }, { "epoch": 0.9182591982427237, "grad_norm": 0.38461583852767944, "learning_rate": 1.1304048683345945e-05, "loss": 0.4653, "step": 33443 }, { "epoch": 0.9182866556836903, "grad_norm": 0.3371267020702362, "learning_rate": 1.1303620476734108e-05, "loss": 0.5302, "step": 33444 }, { "epoch": 0.9183141131246568, "grad_norm": 0.35716357827186584, "learning_rate": 1.1303192267690604e-05, "loss": 0.4777, "step": 33445 }, { "epoch": 0.9183415705656233, "grad_norm": 0.43677666783332825, "learning_rate": 1.130276405621623e-05, "loss": 0.4496, "step": 33446 }, { "epoch": 0.9183690280065898, "grad_norm": 0.5293024778366089, "learning_rate": 1.1302335842311783e-05, "loss": 0.5157, "step": 33447 }, { "epoch": 0.9183964854475563, "grad_norm": 0.47139036655426025, "learning_rate": 1.1301907625978067e-05, "loss": 0.5269, "step": 33448 }, { "epoch": 0.9184239428885228, "grad_norm": 0.46659836173057556, "learning_rate": 1.1301479407215874e-05, "loss": 0.5411, "step": 33449 }, { "epoch": 0.9184514003294892, "grad_norm": 0.3640977442264557, "learning_rate": 1.1301051186026011e-05, "loss": 0.4557, "step": 33450 }, { "epoch": 0.9184788577704558, "grad_norm": 0.37758591771125793, "learning_rate": 1.1300622962409272e-05, "loss": 0.5494, "step": 33451 }, { "epoch": 0.9185063152114223, "grad_norm": 0.41483551263809204, "learning_rate": 1.1300194736366452e-05, "loss": 0.4968, "step": 33452 }, { "epoch": 0.9185337726523888, "grad_norm": 0.38981249928474426, "learning_rate": 1.1299766507898358e-05, "loss": 0.4954, "step": 33453 }, { "epoch": 0.9185612300933553, "grad_norm": 0.3855620324611664, "learning_rate": 1.129933827700578e-05, "loss": 0.5208, "step": 33454 }, { "epoch": 0.9185886875343218, "grad_norm": 0.4112050235271454, "learning_rate": 1.1298910043689526e-05, "loss": 0.4459, "step": 33455 }, { "epoch": 0.9186161449752883, "grad_norm": 0.5465002655982971, "learning_rate": 1.1298481807950386e-05, "loss": 0.5222, "step": 33456 }, { "epoch": 0.9186436024162548, "grad_norm": 0.413661390542984, "learning_rate": 1.1298053569789166e-05, "loss": 0.4244, "step": 33457 }, { "epoch": 0.9186710598572213, "grad_norm": 0.4459654986858368, "learning_rate": 1.1297625329206663e-05, "loss": 0.4601, "step": 33458 }, { "epoch": 0.9186985172981879, "grad_norm": 0.40080907940864563, "learning_rate": 1.129719708620367e-05, "loss": 0.5517, "step": 33459 }, { "epoch": 0.9187259747391543, "grad_norm": 0.38507068157196045, "learning_rate": 1.1296768840780996e-05, "loss": 0.5813, "step": 33460 }, { "epoch": 0.9187534321801208, "grad_norm": 0.3660943806171417, "learning_rate": 1.129634059293943e-05, "loss": 0.4672, "step": 33461 }, { "epoch": 0.9187808896210873, "grad_norm": 0.4039769172668457, "learning_rate": 1.1295912342679778e-05, "loss": 0.5615, "step": 33462 }, { "epoch": 0.9188083470620538, "grad_norm": 0.4475967586040497, "learning_rate": 1.1295484090002834e-05, "loss": 0.5334, "step": 33463 }, { "epoch": 0.9188358045030203, "grad_norm": 0.4221562445163727, "learning_rate": 1.1295055834909398e-05, "loss": 0.5382, "step": 33464 }, { "epoch": 0.9188632619439868, "grad_norm": 0.46099525690078735, "learning_rate": 1.1294627577400272e-05, "loss": 0.5086, "step": 33465 }, { "epoch": 0.9188907193849534, "grad_norm": 0.40127599239349365, "learning_rate": 1.129419931747625e-05, "loss": 0.4864, "step": 33466 }, { "epoch": 0.9189181768259198, "grad_norm": 0.3171643316745758, "learning_rate": 1.1293771055138136e-05, "loss": 0.3604, "step": 33467 }, { "epoch": 0.9189456342668864, "grad_norm": 0.40399375557899475, "learning_rate": 1.1293342790386723e-05, "loss": 0.5067, "step": 33468 }, { "epoch": 0.9189730917078528, "grad_norm": 0.4128081202507019, "learning_rate": 1.1292914523222814e-05, "loss": 0.5054, "step": 33469 }, { "epoch": 0.9190005491488193, "grad_norm": 0.36236825585365295, "learning_rate": 1.1292486253647208e-05, "loss": 0.4375, "step": 33470 }, { "epoch": 0.9190280065897858, "grad_norm": 0.3873863220214844, "learning_rate": 1.1292057981660702e-05, "loss": 0.4811, "step": 33471 }, { "epoch": 0.9190554640307523, "grad_norm": 0.42211177945137024, "learning_rate": 1.1291629707264093e-05, "loss": 0.5194, "step": 33472 }, { "epoch": 0.9190829214717189, "grad_norm": 0.4072783589363098, "learning_rate": 1.1291201430458187e-05, "loss": 0.6063, "step": 33473 }, { "epoch": 0.9191103789126853, "grad_norm": 0.34950846433639526, "learning_rate": 1.1290773151243775e-05, "loss": 0.482, "step": 33474 }, { "epoch": 0.9191378363536519, "grad_norm": 0.38999101519584656, "learning_rate": 1.1290344869621659e-05, "loss": 0.4876, "step": 33475 }, { "epoch": 0.9191652937946183, "grad_norm": 0.38662028312683105, "learning_rate": 1.1289916585592639e-05, "loss": 0.4591, "step": 33476 }, { "epoch": 0.9191927512355849, "grad_norm": 0.359781414270401, "learning_rate": 1.1289488299157512e-05, "loss": 0.4328, "step": 33477 }, { "epoch": 0.9192202086765513, "grad_norm": 0.39228641986846924, "learning_rate": 1.128906001031708e-05, "loss": 0.496, "step": 33478 }, { "epoch": 0.9192476661175178, "grad_norm": 0.4122947156429291, "learning_rate": 1.1288631719072136e-05, "loss": 0.5148, "step": 33479 }, { "epoch": 0.9192751235584844, "grad_norm": 0.4801008999347687, "learning_rate": 1.1288203425423487e-05, "loss": 0.5098, "step": 33480 }, { "epoch": 0.9193025809994508, "grad_norm": 0.3764784038066864, "learning_rate": 1.1287775129371925e-05, "loss": 0.4949, "step": 33481 }, { "epoch": 0.9193300384404174, "grad_norm": 0.4128127694129944, "learning_rate": 1.128734683091825e-05, "loss": 0.5328, "step": 33482 }, { "epoch": 0.9193574958813838, "grad_norm": 0.4790257215499878, "learning_rate": 1.1286918530063264e-05, "loss": 0.4603, "step": 33483 }, { "epoch": 0.9193849533223504, "grad_norm": 0.4688652455806732, "learning_rate": 1.1286490226807766e-05, "loss": 0.5365, "step": 33484 }, { "epoch": 0.9194124107633168, "grad_norm": 0.39019814133644104, "learning_rate": 1.1286061921152549e-05, "loss": 0.414, "step": 33485 }, { "epoch": 0.9194398682042834, "grad_norm": 0.36712315678596497, "learning_rate": 1.1285633613098418e-05, "loss": 0.4494, "step": 33486 }, { "epoch": 0.9194673256452499, "grad_norm": 0.4300134778022766, "learning_rate": 1.1285205302646172e-05, "loss": 0.5066, "step": 33487 }, { "epoch": 0.9194947830862163, "grad_norm": 0.34062299132347107, "learning_rate": 1.1284776989796607e-05, "loss": 0.3781, "step": 33488 }, { "epoch": 0.9195222405271829, "grad_norm": 0.4115528166294098, "learning_rate": 1.1284348674550523e-05, "loss": 0.5562, "step": 33489 }, { "epoch": 0.9195496979681493, "grad_norm": 0.37328290939331055, "learning_rate": 1.1283920356908715e-05, "loss": 0.4737, "step": 33490 }, { "epoch": 0.9195771554091159, "grad_norm": 0.3767518699169159, "learning_rate": 1.128349203687199e-05, "loss": 0.4985, "step": 33491 }, { "epoch": 0.9196046128500823, "grad_norm": 0.4659891128540039, "learning_rate": 1.128306371444114e-05, "loss": 0.5271, "step": 33492 }, { "epoch": 0.9196320702910489, "grad_norm": 0.427915096282959, "learning_rate": 1.1282635389616967e-05, "loss": 0.5196, "step": 33493 }, { "epoch": 0.9196595277320154, "grad_norm": 0.409264475107193, "learning_rate": 1.1282207062400273e-05, "loss": 0.5095, "step": 33494 }, { "epoch": 0.9196869851729819, "grad_norm": 0.36344513297080994, "learning_rate": 1.1281778732791848e-05, "loss": 0.4092, "step": 33495 }, { "epoch": 0.9197144426139484, "grad_norm": 0.3781796395778656, "learning_rate": 1.12813504007925e-05, "loss": 0.468, "step": 33496 }, { "epoch": 0.9197419000549149, "grad_norm": 0.3973129093647003, "learning_rate": 1.1280922066403023e-05, "loss": 0.5297, "step": 33497 }, { "epoch": 0.9197693574958814, "grad_norm": 0.35214295983314514, "learning_rate": 1.1280493729624222e-05, "loss": 0.4751, "step": 33498 }, { "epoch": 0.9197968149368478, "grad_norm": 0.349069744348526, "learning_rate": 1.128006539045689e-05, "loss": 0.4378, "step": 33499 }, { "epoch": 0.9198242723778144, "grad_norm": 0.382144033908844, "learning_rate": 1.1279637048901822e-05, "loss": 0.4571, "step": 33500 }, { "epoch": 0.9198517298187809, "grad_norm": 0.46528345346450806, "learning_rate": 1.1279208704959827e-05, "loss": 0.5267, "step": 33501 }, { "epoch": 0.9198791872597474, "grad_norm": 0.37248528003692627, "learning_rate": 1.12787803586317e-05, "loss": 0.5013, "step": 33502 }, { "epoch": 0.9199066447007139, "grad_norm": 0.41320520639419556, "learning_rate": 1.1278352009918236e-05, "loss": 0.5511, "step": 33503 }, { "epoch": 0.9199341021416804, "grad_norm": 0.38980379700660706, "learning_rate": 1.1277923658820243e-05, "loss": 0.52, "step": 33504 }, { "epoch": 0.9199615595826469, "grad_norm": 0.40667998790740967, "learning_rate": 1.1277495305338508e-05, "loss": 0.4823, "step": 33505 }, { "epoch": 0.9199890170236134, "grad_norm": 0.3956132233142853, "learning_rate": 1.127706694947384e-05, "loss": 0.5076, "step": 33506 }, { "epoch": 0.9200164744645799, "grad_norm": 0.4420919120311737, "learning_rate": 1.1276638591227036e-05, "loss": 0.4476, "step": 33507 }, { "epoch": 0.9200439319055465, "grad_norm": 0.42444223165512085, "learning_rate": 1.1276210230598892e-05, "loss": 0.483, "step": 33508 }, { "epoch": 0.9200713893465129, "grad_norm": 0.3794093430042267, "learning_rate": 1.1275781867590208e-05, "loss": 0.5105, "step": 33509 }, { "epoch": 0.9200988467874794, "grad_norm": 0.3961133658885956, "learning_rate": 1.1275353502201782e-05, "loss": 0.5031, "step": 33510 }, { "epoch": 0.9201263042284459, "grad_norm": 0.406255304813385, "learning_rate": 1.1274925134434418e-05, "loss": 0.4303, "step": 33511 }, { "epoch": 0.9201537616694124, "grad_norm": 0.3567756116390228, "learning_rate": 1.1274496764288912e-05, "loss": 0.4447, "step": 33512 }, { "epoch": 0.9201812191103789, "grad_norm": 0.38023263216018677, "learning_rate": 1.127406839176606e-05, "loss": 0.4664, "step": 33513 }, { "epoch": 0.9202086765513454, "grad_norm": 0.39149847626686096, "learning_rate": 1.1273640016866665e-05, "loss": 0.4465, "step": 33514 }, { "epoch": 0.920236133992312, "grad_norm": 0.4221875071525574, "learning_rate": 1.1273211639591527e-05, "loss": 0.4736, "step": 33515 }, { "epoch": 0.9202635914332784, "grad_norm": 0.38551434874534607, "learning_rate": 1.127278325994144e-05, "loss": 0.4871, "step": 33516 }, { "epoch": 0.920291048874245, "grad_norm": 0.39533379673957825, "learning_rate": 1.1272354877917207e-05, "loss": 0.5403, "step": 33517 }, { "epoch": 0.9203185063152114, "grad_norm": 0.39006808400154114, "learning_rate": 1.1271926493519624e-05, "loss": 0.552, "step": 33518 }, { "epoch": 0.9203459637561779, "grad_norm": 0.4117191433906555, "learning_rate": 1.1271498106749497e-05, "loss": 0.4498, "step": 33519 }, { "epoch": 0.9203734211971444, "grad_norm": 0.4306970536708832, "learning_rate": 1.1271069717607617e-05, "loss": 0.5553, "step": 33520 }, { "epoch": 0.9204008786381109, "grad_norm": 0.3882336914539337, "learning_rate": 1.1270641326094785e-05, "loss": 0.4652, "step": 33521 }, { "epoch": 0.9204283360790775, "grad_norm": 0.4410727620124817, "learning_rate": 1.1270212932211804e-05, "loss": 0.4949, "step": 33522 }, { "epoch": 0.9204557935200439, "grad_norm": 0.366113543510437, "learning_rate": 1.1269784535959467e-05, "loss": 0.5247, "step": 33523 }, { "epoch": 0.9204832509610105, "grad_norm": 0.4300064742565155, "learning_rate": 1.126935613733858e-05, "loss": 0.546, "step": 33524 }, { "epoch": 0.9205107084019769, "grad_norm": 0.4883978068828583, "learning_rate": 1.126892773634994e-05, "loss": 0.5061, "step": 33525 }, { "epoch": 0.9205381658429435, "grad_norm": 1.0996953248977661, "learning_rate": 1.126849933299434e-05, "loss": 0.5013, "step": 33526 }, { "epoch": 0.9205656232839099, "grad_norm": 0.36610347032546997, "learning_rate": 1.1268070927272588e-05, "loss": 0.5125, "step": 33527 }, { "epoch": 0.9205930807248764, "grad_norm": 0.4561777412891388, "learning_rate": 1.1267642519185476e-05, "loss": 0.5618, "step": 33528 }, { "epoch": 0.920620538165843, "grad_norm": 0.3861973285675049, "learning_rate": 1.1267214108733809e-05, "loss": 0.5092, "step": 33529 }, { "epoch": 0.9206479956068094, "grad_norm": 0.3433125913143158, "learning_rate": 1.126678569591838e-05, "loss": 0.4671, "step": 33530 }, { "epoch": 0.920675453047776, "grad_norm": 0.4042072594165802, "learning_rate": 1.1266357280739992e-05, "loss": 0.5009, "step": 33531 }, { "epoch": 0.9207029104887424, "grad_norm": 0.4350067973136902, "learning_rate": 1.1265928863199447e-05, "loss": 0.4858, "step": 33532 }, { "epoch": 0.920730367929709, "grad_norm": 0.49120238423347473, "learning_rate": 1.126550044329754e-05, "loss": 0.4963, "step": 33533 }, { "epoch": 0.9207578253706754, "grad_norm": 0.36439359188079834, "learning_rate": 1.1265072021035065e-05, "loss": 0.5197, "step": 33534 }, { "epoch": 0.920785282811642, "grad_norm": 0.3887249231338501, "learning_rate": 1.1264643596412834e-05, "loss": 0.4786, "step": 33535 }, { "epoch": 0.9208127402526085, "grad_norm": 0.39717593789100647, "learning_rate": 1.1264215169431634e-05, "loss": 0.5339, "step": 33536 }, { "epoch": 0.920840197693575, "grad_norm": 0.3658789396286011, "learning_rate": 1.126378674009227e-05, "loss": 0.5187, "step": 33537 }, { "epoch": 0.9208676551345415, "grad_norm": 0.3783651888370514, "learning_rate": 1.1263358308395544e-05, "loss": 0.5213, "step": 33538 }, { "epoch": 0.9208951125755079, "grad_norm": 0.3881324529647827, "learning_rate": 1.1262929874342248e-05, "loss": 0.4964, "step": 33539 }, { "epoch": 0.9209225700164745, "grad_norm": 0.5824635624885559, "learning_rate": 1.1262501437933187e-05, "loss": 0.4346, "step": 33540 }, { "epoch": 0.9209500274574409, "grad_norm": 0.41080838441848755, "learning_rate": 1.1262072999169155e-05, "loss": 0.5333, "step": 33541 }, { "epoch": 0.9209774848984075, "grad_norm": 0.43444502353668213, "learning_rate": 1.1261644558050957e-05, "loss": 0.6005, "step": 33542 }, { "epoch": 0.921004942339374, "grad_norm": 0.3883773386478424, "learning_rate": 1.1261216114579387e-05, "loss": 0.5352, "step": 33543 }, { "epoch": 0.9210323997803405, "grad_norm": 0.4077460467815399, "learning_rate": 1.1260787668755246e-05, "loss": 0.5099, "step": 33544 }, { "epoch": 0.921059857221307, "grad_norm": 0.37841638922691345, "learning_rate": 1.1260359220579337e-05, "loss": 0.4623, "step": 33545 }, { "epoch": 0.9210873146622734, "grad_norm": 0.3361000716686249, "learning_rate": 1.1259930770052454e-05, "loss": 0.3612, "step": 33546 }, { "epoch": 0.92111477210324, "grad_norm": 0.4301963150501251, "learning_rate": 1.1259502317175397e-05, "loss": 0.6004, "step": 33547 }, { "epoch": 0.9211422295442064, "grad_norm": 0.39785677194595337, "learning_rate": 1.1259073861948969e-05, "loss": 0.5126, "step": 33548 }, { "epoch": 0.921169686985173, "grad_norm": 0.39541199803352356, "learning_rate": 1.1258645404373962e-05, "loss": 0.4653, "step": 33549 }, { "epoch": 0.9211971444261395, "grad_norm": 0.4626021683216095, "learning_rate": 1.1258216944451183e-05, "loss": 0.5293, "step": 33550 }, { "epoch": 0.921224601867106, "grad_norm": 0.4504675567150116, "learning_rate": 1.1257788482181426e-05, "loss": 0.3701, "step": 33551 }, { "epoch": 0.9212520593080725, "grad_norm": 0.3415599763393402, "learning_rate": 1.1257360017565494e-05, "loss": 0.4642, "step": 33552 }, { "epoch": 0.921279516749039, "grad_norm": 0.34440863132476807, "learning_rate": 1.1256931550604183e-05, "loss": 0.3549, "step": 33553 }, { "epoch": 0.9213069741900055, "grad_norm": 0.42272457480430603, "learning_rate": 1.1256503081298291e-05, "loss": 0.5225, "step": 33554 }, { "epoch": 0.921334431630972, "grad_norm": 0.37755006551742554, "learning_rate": 1.1256074609648624e-05, "loss": 0.4905, "step": 33555 }, { "epoch": 0.9213618890719385, "grad_norm": 0.4045681357383728, "learning_rate": 1.1255646135655976e-05, "loss": 0.5122, "step": 33556 }, { "epoch": 0.921389346512905, "grad_norm": 0.39848488569259644, "learning_rate": 1.1255217659321144e-05, "loss": 0.4962, "step": 33557 }, { "epoch": 0.9214168039538715, "grad_norm": 0.40182968974113464, "learning_rate": 1.1254789180644935e-05, "loss": 0.47, "step": 33558 }, { "epoch": 0.921444261394838, "grad_norm": 0.3683414161205292, "learning_rate": 1.125436069962814e-05, "loss": 0.4621, "step": 33559 }, { "epoch": 0.9214717188358045, "grad_norm": 0.34530675411224365, "learning_rate": 1.1253932216271565e-05, "loss": 0.5285, "step": 33560 }, { "epoch": 0.921499176276771, "grad_norm": 0.4015193283557892, "learning_rate": 1.1253503730576005e-05, "loss": 0.4516, "step": 33561 }, { "epoch": 0.9215266337177375, "grad_norm": 0.4156322479248047, "learning_rate": 1.1253075242542259e-05, "loss": 0.4732, "step": 33562 }, { "epoch": 0.921554091158704, "grad_norm": 0.38956162333488464, "learning_rate": 1.1252646752171131e-05, "loss": 0.474, "step": 33563 }, { "epoch": 0.9215815485996706, "grad_norm": 0.40862521529197693, "learning_rate": 1.1252218259463415e-05, "loss": 0.4503, "step": 33564 }, { "epoch": 0.921609006040637, "grad_norm": 0.40014010667800903, "learning_rate": 1.125178976441991e-05, "loss": 0.5492, "step": 33565 }, { "epoch": 0.9216364634816036, "grad_norm": 0.416425496339798, "learning_rate": 1.1251361267041421e-05, "loss": 0.513, "step": 33566 }, { "epoch": 0.92166392092257, "grad_norm": 0.4537571370601654, "learning_rate": 1.1250932767328741e-05, "loss": 0.5258, "step": 33567 }, { "epoch": 0.9216913783635365, "grad_norm": 0.40234842896461487, "learning_rate": 1.1250504265282675e-05, "loss": 0.3747, "step": 33568 }, { "epoch": 0.921718835804503, "grad_norm": 0.4015073776245117, "learning_rate": 1.125007576090402e-05, "loss": 0.4262, "step": 33569 }, { "epoch": 0.9217462932454695, "grad_norm": 0.40984046459198, "learning_rate": 1.1249647254193573e-05, "loss": 0.5113, "step": 33570 }, { "epoch": 0.9217737506864361, "grad_norm": 0.48574188351631165, "learning_rate": 1.1249218745152134e-05, "loss": 0.606, "step": 33571 }, { "epoch": 0.9218012081274025, "grad_norm": 0.39738237857818604, "learning_rate": 1.1248790233780503e-05, "loss": 0.4859, "step": 33572 }, { "epoch": 0.9218286655683691, "grad_norm": 0.39110249280929565, "learning_rate": 1.1248361720079482e-05, "loss": 0.5017, "step": 33573 }, { "epoch": 0.9218561230093355, "grad_norm": 5.463809013366699, "learning_rate": 1.1247933204049867e-05, "loss": 0.3788, "step": 33574 }, { "epoch": 0.921883580450302, "grad_norm": 0.3050614297389984, "learning_rate": 1.1247504685692459e-05, "loss": 0.4238, "step": 33575 }, { "epoch": 0.9219110378912685, "grad_norm": 0.3856457769870758, "learning_rate": 1.1247076165008055e-05, "loss": 0.3857, "step": 33576 }, { "epoch": 0.921938495332235, "grad_norm": 0.41746076941490173, "learning_rate": 1.1246647641997457e-05, "loss": 0.5095, "step": 33577 }, { "epoch": 0.9219659527732016, "grad_norm": 0.38316890597343445, "learning_rate": 1.1246219116661463e-05, "loss": 0.4982, "step": 33578 }, { "epoch": 0.921993410214168, "grad_norm": 0.3629477918148041, "learning_rate": 1.1245790589000873e-05, "loss": 0.4627, "step": 33579 }, { "epoch": 0.9220208676551346, "grad_norm": 0.38064074516296387, "learning_rate": 1.1245362059016485e-05, "loss": 0.4444, "step": 33580 }, { "epoch": 0.922048325096101, "grad_norm": 0.3856874704360962, "learning_rate": 1.1244933526709102e-05, "loss": 0.4453, "step": 33581 }, { "epoch": 0.9220757825370676, "grad_norm": 0.5371312499046326, "learning_rate": 1.1244504992079516e-05, "loss": 0.5265, "step": 33582 }, { "epoch": 0.922103239978034, "grad_norm": 0.3926154375076294, "learning_rate": 1.1244076455128535e-05, "loss": 0.5041, "step": 33583 }, { "epoch": 0.9221306974190006, "grad_norm": 0.40004971623420715, "learning_rate": 1.1243647915856953e-05, "loss": 0.5302, "step": 33584 }, { "epoch": 0.9221581548599671, "grad_norm": 0.40102419257164, "learning_rate": 1.1243219374265568e-05, "loss": 0.5301, "step": 33585 }, { "epoch": 0.9221856123009335, "grad_norm": 0.3785547912120819, "learning_rate": 1.1242790830355186e-05, "loss": 0.5245, "step": 33586 }, { "epoch": 0.9222130697419001, "grad_norm": 0.4194906949996948, "learning_rate": 1.1242362284126603e-05, "loss": 0.5051, "step": 33587 }, { "epoch": 0.9222405271828665, "grad_norm": 0.4297010898590088, "learning_rate": 1.1241933735580613e-05, "loss": 0.4791, "step": 33588 }, { "epoch": 0.9222679846238331, "grad_norm": 0.3865632712841034, "learning_rate": 1.1241505184718024e-05, "loss": 0.4313, "step": 33589 }, { "epoch": 0.9222954420647995, "grad_norm": 0.38926079869270325, "learning_rate": 1.124107663153963e-05, "loss": 0.6159, "step": 33590 }, { "epoch": 0.9223228995057661, "grad_norm": 0.3743076026439667, "learning_rate": 1.1240648076046232e-05, "loss": 0.5165, "step": 33591 }, { "epoch": 0.9223503569467326, "grad_norm": 0.3619472086429596, "learning_rate": 1.1240219518238631e-05, "loss": 0.4953, "step": 33592 }, { "epoch": 0.9223778143876991, "grad_norm": 0.40082281827926636, "learning_rate": 1.1239790958117622e-05, "loss": 0.5064, "step": 33593 }, { "epoch": 0.9224052718286656, "grad_norm": 0.44288012385368347, "learning_rate": 1.123936239568401e-05, "loss": 0.5128, "step": 33594 }, { "epoch": 0.922432729269632, "grad_norm": 0.3568223714828491, "learning_rate": 1.1238933830938591e-05, "loss": 0.4861, "step": 33595 }, { "epoch": 0.9224601867105986, "grad_norm": 0.40344366431236267, "learning_rate": 1.1238505263882164e-05, "loss": 0.4914, "step": 33596 }, { "epoch": 0.922487644151565, "grad_norm": 0.35338926315307617, "learning_rate": 1.1238076694515532e-05, "loss": 0.4207, "step": 33597 }, { "epoch": 0.9225151015925316, "grad_norm": 0.41320326924324036, "learning_rate": 1.1237648122839488e-05, "loss": 0.5127, "step": 33598 }, { "epoch": 0.9225425590334981, "grad_norm": 0.38725799322128296, "learning_rate": 1.1237219548854839e-05, "loss": 0.5229, "step": 33599 }, { "epoch": 0.9225700164744646, "grad_norm": 0.3916696012020111, "learning_rate": 1.123679097256238e-05, "loss": 0.4373, "step": 33600 }, { "epoch": 0.9225974739154311, "grad_norm": 0.39490580558776855, "learning_rate": 1.1236362393962907e-05, "loss": 0.4639, "step": 33601 }, { "epoch": 0.9226249313563976, "grad_norm": 0.5567611455917358, "learning_rate": 1.1235933813057226e-05, "loss": 0.5732, "step": 33602 }, { "epoch": 0.9226523887973641, "grad_norm": 0.34825384616851807, "learning_rate": 1.1235505229846134e-05, "loss": 0.416, "step": 33603 }, { "epoch": 0.9226798462383305, "grad_norm": 0.39929965138435364, "learning_rate": 1.123507664433043e-05, "loss": 0.5219, "step": 33604 }, { "epoch": 0.9227073036792971, "grad_norm": 0.39863914251327515, "learning_rate": 1.1234648056510917e-05, "loss": 0.5215, "step": 33605 }, { "epoch": 0.9227347611202636, "grad_norm": 0.4329266846179962, "learning_rate": 1.1234219466388388e-05, "loss": 0.3729, "step": 33606 }, { "epoch": 0.9227622185612301, "grad_norm": 0.42079782485961914, "learning_rate": 1.1233790873963647e-05, "loss": 0.5132, "step": 33607 }, { "epoch": 0.9227896760021966, "grad_norm": 0.4082830846309662, "learning_rate": 1.123336227923749e-05, "loss": 0.5151, "step": 33608 }, { "epoch": 0.9228171334431631, "grad_norm": 0.4498313367366791, "learning_rate": 1.1232933682210721e-05, "loss": 0.549, "step": 33609 }, { "epoch": 0.9228445908841296, "grad_norm": 0.4211365580558777, "learning_rate": 1.1232505082884137e-05, "loss": 0.5022, "step": 33610 }, { "epoch": 0.9228720483250961, "grad_norm": 0.4276675283908844, "learning_rate": 1.1232076481258537e-05, "loss": 0.4958, "step": 33611 }, { "epoch": 0.9228995057660626, "grad_norm": 0.47678443789482117, "learning_rate": 1.1231647877334722e-05, "loss": 0.4522, "step": 33612 }, { "epoch": 0.922926963207029, "grad_norm": 0.3725253641605377, "learning_rate": 1.123121927111349e-05, "loss": 0.5073, "step": 33613 }, { "epoch": 0.9229544206479956, "grad_norm": 0.374092161655426, "learning_rate": 1.1230790662595642e-05, "loss": 0.5128, "step": 33614 }, { "epoch": 0.9229818780889621, "grad_norm": 0.37868228554725647, "learning_rate": 1.1230362051781976e-05, "loss": 0.5053, "step": 33615 }, { "epoch": 0.9230093355299286, "grad_norm": 0.40199270844459534, "learning_rate": 1.122993343867329e-05, "loss": 0.5052, "step": 33616 }, { "epoch": 0.9230367929708951, "grad_norm": 0.391030877828598, "learning_rate": 1.1229504823270389e-05, "loss": 0.5132, "step": 33617 }, { "epoch": 0.9230642504118616, "grad_norm": 0.3649752736091614, "learning_rate": 1.1229076205574066e-05, "loss": 0.4619, "step": 33618 }, { "epoch": 0.9230917078528281, "grad_norm": 0.3879531919956207, "learning_rate": 1.1228647585585127e-05, "loss": 0.4552, "step": 33619 }, { "epoch": 0.9231191652937946, "grad_norm": 0.3533819317817688, "learning_rate": 1.1228218963304365e-05, "loss": 0.4025, "step": 33620 }, { "epoch": 0.9231466227347611, "grad_norm": 0.38680699467658997, "learning_rate": 1.1227790338732584e-05, "loss": 0.5288, "step": 33621 }, { "epoch": 0.9231740801757277, "grad_norm": 0.4144706428050995, "learning_rate": 1.1227361711870584e-05, "loss": 0.5602, "step": 33622 }, { "epoch": 0.9232015376166941, "grad_norm": 0.36380189657211304, "learning_rate": 1.1226933082719158e-05, "loss": 0.4701, "step": 33623 }, { "epoch": 0.9232289950576607, "grad_norm": 0.3585013747215271, "learning_rate": 1.1226504451279114e-05, "loss": 0.3908, "step": 33624 }, { "epoch": 0.9232564524986271, "grad_norm": 0.36456671357154846, "learning_rate": 1.1226075817551245e-05, "loss": 0.4361, "step": 33625 }, { "epoch": 0.9232839099395936, "grad_norm": 0.38512855768203735, "learning_rate": 1.1225647181536357e-05, "loss": 0.427, "step": 33626 }, { "epoch": 0.9233113673805601, "grad_norm": 0.42477795481681824, "learning_rate": 1.1225218543235243e-05, "loss": 0.4212, "step": 33627 }, { "epoch": 0.9233388248215266, "grad_norm": 0.3935146629810333, "learning_rate": 1.1224789902648705e-05, "loss": 0.5103, "step": 33628 }, { "epoch": 0.9233662822624932, "grad_norm": 0.3841167390346527, "learning_rate": 1.1224361259777545e-05, "loss": 0.4974, "step": 33629 }, { "epoch": 0.9233937397034596, "grad_norm": 0.5502886176109314, "learning_rate": 1.1223932614622558e-05, "loss": 0.4713, "step": 33630 }, { "epoch": 0.9234211971444262, "grad_norm": 0.4251832664012909, "learning_rate": 1.122350396718455e-05, "loss": 0.5076, "step": 33631 }, { "epoch": 0.9234486545853926, "grad_norm": 0.3963448703289032, "learning_rate": 1.1223075317464316e-05, "loss": 0.4181, "step": 33632 }, { "epoch": 0.9234761120263592, "grad_norm": 0.3985423147678375, "learning_rate": 1.122264666546265e-05, "loss": 0.4625, "step": 33633 }, { "epoch": 0.9235035694673256, "grad_norm": 0.3526234030723572, "learning_rate": 1.1222218011180366e-05, "loss": 0.3928, "step": 33634 }, { "epoch": 0.9235310269082921, "grad_norm": 0.39430487155914307, "learning_rate": 1.122178935461825e-05, "loss": 0.5714, "step": 33635 }, { "epoch": 0.9235584843492587, "grad_norm": 0.3298690617084503, "learning_rate": 1.1221360695777108e-05, "loss": 0.4407, "step": 33636 }, { "epoch": 0.9235859417902251, "grad_norm": 0.45379438996315, "learning_rate": 1.1220932034657736e-05, "loss": 0.5513, "step": 33637 }, { "epoch": 0.9236133992311917, "grad_norm": 0.385770320892334, "learning_rate": 1.1220503371260943e-05, "loss": 0.527, "step": 33638 }, { "epoch": 0.9236408566721581, "grad_norm": 0.3812829852104187, "learning_rate": 1.1220074705587519e-05, "loss": 0.5202, "step": 33639 }, { "epoch": 0.9236683141131247, "grad_norm": 0.3865065276622772, "learning_rate": 1.121964603763826e-05, "loss": 0.5284, "step": 33640 }, { "epoch": 0.9236957715540911, "grad_norm": 0.4120789170265198, "learning_rate": 1.1219217367413979e-05, "loss": 0.4651, "step": 33641 }, { "epoch": 0.9237232289950577, "grad_norm": 0.3702388405799866, "learning_rate": 1.1218788694915466e-05, "loss": 0.4325, "step": 33642 }, { "epoch": 0.9237506864360242, "grad_norm": 0.46189385652542114, "learning_rate": 1.1218360020143524e-05, "loss": 0.4568, "step": 33643 }, { "epoch": 0.9237781438769906, "grad_norm": 0.4478963017463684, "learning_rate": 1.1217931343098952e-05, "loss": 0.4875, "step": 33644 }, { "epoch": 0.9238056013179572, "grad_norm": 0.4301338791847229, "learning_rate": 1.1217502663782547e-05, "loss": 0.4901, "step": 33645 }, { "epoch": 0.9238330587589236, "grad_norm": 0.35821956396102905, "learning_rate": 1.1217073982195112e-05, "loss": 0.4788, "step": 33646 }, { "epoch": 0.9238605161998902, "grad_norm": 0.3755975067615509, "learning_rate": 1.1216645298337445e-05, "loss": 0.5667, "step": 33647 }, { "epoch": 0.9238879736408566, "grad_norm": 0.4175121784210205, "learning_rate": 1.1216216612210349e-05, "loss": 0.4209, "step": 33648 }, { "epoch": 0.9239154310818232, "grad_norm": 0.3758852481842041, "learning_rate": 1.1215787923814618e-05, "loss": 0.4178, "step": 33649 }, { "epoch": 0.9239428885227897, "grad_norm": 0.39671725034713745, "learning_rate": 1.1215359233151056e-05, "loss": 0.6035, "step": 33650 }, { "epoch": 0.9239703459637562, "grad_norm": 0.4684582054615021, "learning_rate": 1.1214930540220462e-05, "loss": 0.4977, "step": 33651 }, { "epoch": 0.9239978034047227, "grad_norm": 0.4517892301082611, "learning_rate": 1.1214501845023631e-05, "loss": 0.555, "step": 33652 }, { "epoch": 0.9240252608456891, "grad_norm": 0.5156253576278687, "learning_rate": 1.1214073147561369e-05, "loss": 0.5429, "step": 33653 }, { "epoch": 0.9240527182866557, "grad_norm": 0.46190550923347473, "learning_rate": 1.1213644447834474e-05, "loss": 0.5137, "step": 33654 }, { "epoch": 0.9240801757276221, "grad_norm": 0.4488618075847626, "learning_rate": 1.1213215745843743e-05, "loss": 0.511, "step": 33655 }, { "epoch": 0.9241076331685887, "grad_norm": 0.3764214515686035, "learning_rate": 1.1212787041589978e-05, "loss": 0.5374, "step": 33656 }, { "epoch": 0.9241350906095552, "grad_norm": 0.40162402391433716, "learning_rate": 1.1212358335073979e-05, "loss": 0.4172, "step": 33657 }, { "epoch": 0.9241625480505217, "grad_norm": 0.3412168622016907, "learning_rate": 1.1211929626296542e-05, "loss": 0.4621, "step": 33658 }, { "epoch": 0.9241900054914882, "grad_norm": 0.38725045323371887, "learning_rate": 1.1211500915258472e-05, "loss": 0.4513, "step": 33659 }, { "epoch": 0.9242174629324547, "grad_norm": 0.4163564443588257, "learning_rate": 1.1211072201960565e-05, "loss": 0.5021, "step": 33660 }, { "epoch": 0.9242449203734212, "grad_norm": 0.38413307070732117, "learning_rate": 1.1210643486403623e-05, "loss": 0.4716, "step": 33661 }, { "epoch": 0.9242723778143876, "grad_norm": 0.40503302216529846, "learning_rate": 1.1210214768588445e-05, "loss": 0.6095, "step": 33662 }, { "epoch": 0.9242998352553542, "grad_norm": 0.38034528493881226, "learning_rate": 1.1209786048515827e-05, "loss": 0.5355, "step": 33663 }, { "epoch": 0.9243272926963207, "grad_norm": 0.46727293729782104, "learning_rate": 1.1209357326186576e-05, "loss": 0.5375, "step": 33664 }, { "epoch": 0.9243547501372872, "grad_norm": 0.34815046191215515, "learning_rate": 1.1208928601601483e-05, "loss": 0.4239, "step": 33665 }, { "epoch": 0.9243822075782537, "grad_norm": 0.3737133741378784, "learning_rate": 1.1208499874761355e-05, "loss": 0.4795, "step": 33666 }, { "epoch": 0.9244096650192202, "grad_norm": 0.5055474042892456, "learning_rate": 1.1208071145666993e-05, "loss": 0.514, "step": 33667 }, { "epoch": 0.9244371224601867, "grad_norm": 0.38395246863365173, "learning_rate": 1.1207642414319186e-05, "loss": 0.4731, "step": 33668 }, { "epoch": 0.9244645799011532, "grad_norm": 0.44666358828544617, "learning_rate": 1.1207213680718742e-05, "loss": 0.5773, "step": 33669 }, { "epoch": 0.9244920373421197, "grad_norm": 0.38091903924942017, "learning_rate": 1.1206784944866463e-05, "loss": 0.4236, "step": 33670 }, { "epoch": 0.9245194947830863, "grad_norm": 0.5094394683837891, "learning_rate": 1.120635620676314e-05, "loss": 0.418, "step": 33671 }, { "epoch": 0.9245469522240527, "grad_norm": 0.4583567678928375, "learning_rate": 1.1205927466409581e-05, "loss": 0.5887, "step": 33672 }, { "epoch": 0.9245744096650192, "grad_norm": 0.42334020137786865, "learning_rate": 1.120549872380658e-05, "loss": 0.6005, "step": 33673 }, { "epoch": 0.9246018671059857, "grad_norm": 0.43318885564804077, "learning_rate": 1.1205069978954941e-05, "loss": 0.5211, "step": 33674 }, { "epoch": 0.9246293245469522, "grad_norm": 0.3873750865459442, "learning_rate": 1.1204641231855462e-05, "loss": 0.4964, "step": 33675 }, { "epoch": 0.9246567819879187, "grad_norm": 0.39056259393692017, "learning_rate": 1.120421248250894e-05, "loss": 0.4828, "step": 33676 }, { "epoch": 0.9246842394288852, "grad_norm": 0.3860107958316803, "learning_rate": 1.120378373091618e-05, "loss": 0.5072, "step": 33677 }, { "epoch": 0.9247116968698518, "grad_norm": 0.3684338331222534, "learning_rate": 1.1203354977077978e-05, "loss": 0.4948, "step": 33678 }, { "epoch": 0.9247391543108182, "grad_norm": 0.5751407146453857, "learning_rate": 1.1202926220995136e-05, "loss": 0.529, "step": 33679 }, { "epoch": 0.9247666117517848, "grad_norm": 0.3608757257461548, "learning_rate": 1.1202497462668454e-05, "loss": 0.4246, "step": 33680 }, { "epoch": 0.9247940691927512, "grad_norm": 0.41896677017211914, "learning_rate": 1.1202068702098725e-05, "loss": 0.4965, "step": 33681 }, { "epoch": 0.9248215266337177, "grad_norm": 0.3959046006202698, "learning_rate": 1.120163993928676e-05, "loss": 0.5508, "step": 33682 }, { "epoch": 0.9248489840746842, "grad_norm": 0.42785173654556274, "learning_rate": 1.1201211174233348e-05, "loss": 0.5252, "step": 33683 }, { "epoch": 0.9248764415156507, "grad_norm": 0.41327404975891113, "learning_rate": 1.1200782406939297e-05, "loss": 0.4862, "step": 33684 }, { "epoch": 0.9249038989566173, "grad_norm": 0.3773987293243408, "learning_rate": 1.1200353637405405e-05, "loss": 0.4606, "step": 33685 }, { "epoch": 0.9249313563975837, "grad_norm": 0.4350607693195343, "learning_rate": 1.1199924865632466e-05, "loss": 0.5487, "step": 33686 }, { "epoch": 0.9249588138385503, "grad_norm": 0.36752796173095703, "learning_rate": 1.1199496091621286e-05, "loss": 0.5359, "step": 33687 }, { "epoch": 0.9249862712795167, "grad_norm": 0.4487992525100708, "learning_rate": 1.1199067315372668e-05, "loss": 0.497, "step": 33688 }, { "epoch": 0.9250137287204833, "grad_norm": 0.40307527780532837, "learning_rate": 1.1198638536887399e-05, "loss": 0.4288, "step": 33689 }, { "epoch": 0.9250411861614497, "grad_norm": 1.5735903978347778, "learning_rate": 1.1198209756166289e-05, "loss": 0.52, "step": 33690 }, { "epoch": 0.9250686436024163, "grad_norm": 0.35398319363594055, "learning_rate": 1.1197780973210138e-05, "loss": 0.4699, "step": 33691 }, { "epoch": 0.9250961010433828, "grad_norm": 0.3859114944934845, "learning_rate": 1.1197352188019741e-05, "loss": 0.4535, "step": 33692 }, { "epoch": 0.9251235584843492, "grad_norm": 0.3988330662250519, "learning_rate": 1.11969234005959e-05, "loss": 0.4977, "step": 33693 }, { "epoch": 0.9251510159253158, "grad_norm": 0.39542096853256226, "learning_rate": 1.1196494610939414e-05, "loss": 0.5189, "step": 33694 }, { "epoch": 0.9251784733662822, "grad_norm": 0.35293254256248474, "learning_rate": 1.1196065819051085e-05, "loss": 0.5145, "step": 33695 }, { "epoch": 0.9252059308072488, "grad_norm": 0.37011709809303284, "learning_rate": 1.1195637024931711e-05, "loss": 0.3979, "step": 33696 }, { "epoch": 0.9252333882482152, "grad_norm": 0.3985292315483093, "learning_rate": 1.1195208228582093e-05, "loss": 0.5932, "step": 33697 }, { "epoch": 0.9252608456891818, "grad_norm": 0.3876800835132599, "learning_rate": 1.119477943000303e-05, "loss": 0.5276, "step": 33698 }, { "epoch": 0.9252883031301483, "grad_norm": 0.3846152126789093, "learning_rate": 1.119435062919532e-05, "loss": 0.5021, "step": 33699 }, { "epoch": 0.9253157605711148, "grad_norm": 0.3558667302131653, "learning_rate": 1.119392182615977e-05, "loss": 0.4635, "step": 33700 }, { "epoch": 0.9253432180120813, "grad_norm": 0.442457914352417, "learning_rate": 1.1193493020897173e-05, "loss": 0.5351, "step": 33701 }, { "epoch": 0.9253706754530477, "grad_norm": 0.4747838079929352, "learning_rate": 1.1193064213408326e-05, "loss": 0.4875, "step": 33702 }, { "epoch": 0.9253981328940143, "grad_norm": 0.41236746311187744, "learning_rate": 1.1192635403694038e-05, "loss": 0.5509, "step": 33703 }, { "epoch": 0.9254255903349807, "grad_norm": 0.386768102645874, "learning_rate": 1.1192206591755102e-05, "loss": 0.3994, "step": 33704 }, { "epoch": 0.9254530477759473, "grad_norm": 0.3642443120479584, "learning_rate": 1.1191777777592321e-05, "loss": 0.4538, "step": 33705 }, { "epoch": 0.9254805052169138, "grad_norm": 0.3774529993534088, "learning_rate": 1.1191348961206496e-05, "loss": 0.4885, "step": 33706 }, { "epoch": 0.9255079626578803, "grad_norm": 0.3954784870147705, "learning_rate": 1.119092014259842e-05, "loss": 0.4429, "step": 33707 }, { "epoch": 0.9255354200988468, "grad_norm": 0.3751177489757538, "learning_rate": 1.1190491321768903e-05, "loss": 0.5509, "step": 33708 }, { "epoch": 0.9255628775398133, "grad_norm": 0.38177812099456787, "learning_rate": 1.1190062498718736e-05, "loss": 0.4489, "step": 33709 }, { "epoch": 0.9255903349807798, "grad_norm": 0.3579184114933014, "learning_rate": 1.1189633673448726e-05, "loss": 0.4666, "step": 33710 }, { "epoch": 0.9256177924217462, "grad_norm": 0.37936633825302124, "learning_rate": 1.1189204845959669e-05, "loss": 0.456, "step": 33711 }, { "epoch": 0.9256452498627128, "grad_norm": 0.35279977321624756, "learning_rate": 1.1188776016252363e-05, "loss": 0.4518, "step": 33712 }, { "epoch": 0.9256727073036793, "grad_norm": 0.654310941696167, "learning_rate": 1.1188347184327613e-05, "loss": 0.4764, "step": 33713 }, { "epoch": 0.9257001647446458, "grad_norm": 0.37166836857795715, "learning_rate": 1.1187918350186213e-05, "loss": 0.5291, "step": 33714 }, { "epoch": 0.9257276221856123, "grad_norm": 0.5453556180000305, "learning_rate": 1.1187489513828969e-05, "loss": 0.5095, "step": 33715 }, { "epoch": 0.9257550796265788, "grad_norm": 0.4025064706802368, "learning_rate": 1.1187060675256679e-05, "loss": 0.506, "step": 33716 }, { "epoch": 0.9257825370675453, "grad_norm": 0.40015554428100586, "learning_rate": 1.1186631834470136e-05, "loss": 0.3935, "step": 33717 }, { "epoch": 0.9258099945085118, "grad_norm": 0.3717177212238312, "learning_rate": 1.1186202991470153e-05, "loss": 0.4678, "step": 33718 }, { "epoch": 0.9258374519494783, "grad_norm": 0.4700607657432556, "learning_rate": 1.118577414625752e-05, "loss": 0.4678, "step": 33719 }, { "epoch": 0.9258649093904449, "grad_norm": 0.3869458734989166, "learning_rate": 1.1185345298833037e-05, "loss": 0.573, "step": 33720 }, { "epoch": 0.9258923668314113, "grad_norm": 0.43616998195648193, "learning_rate": 1.1184916449197509e-05, "loss": 0.5071, "step": 33721 }, { "epoch": 0.9259198242723778, "grad_norm": 0.40419280529022217, "learning_rate": 1.1184487597351735e-05, "loss": 0.4879, "step": 33722 }, { "epoch": 0.9259472817133443, "grad_norm": 0.45413118600845337, "learning_rate": 1.118405874329651e-05, "loss": 0.5514, "step": 33723 }, { "epoch": 0.9259747391543108, "grad_norm": 0.3713375926017761, "learning_rate": 1.1183629887032643e-05, "loss": 0.4473, "step": 33724 }, { "epoch": 0.9260021965952773, "grad_norm": 0.3848397433757782, "learning_rate": 1.1183201028560924e-05, "loss": 0.488, "step": 33725 }, { "epoch": 0.9260296540362438, "grad_norm": 0.38258564472198486, "learning_rate": 1.1182772167882158e-05, "loss": 0.4143, "step": 33726 }, { "epoch": 0.9260571114772104, "grad_norm": 0.36616215109825134, "learning_rate": 1.1182343304997146e-05, "loss": 0.5755, "step": 33727 }, { "epoch": 0.9260845689181768, "grad_norm": 0.379401296377182, "learning_rate": 1.1181914439906685e-05, "loss": 0.5442, "step": 33728 }, { "epoch": 0.9261120263591434, "grad_norm": 0.35111039876937866, "learning_rate": 1.1181485572611579e-05, "loss": 0.4614, "step": 33729 }, { "epoch": 0.9261394838001098, "grad_norm": 0.38678061962127686, "learning_rate": 1.118105670311262e-05, "loss": 0.4746, "step": 33730 }, { "epoch": 0.9261669412410763, "grad_norm": 0.34825825691223145, "learning_rate": 1.118062783141062e-05, "loss": 0.4912, "step": 33731 }, { "epoch": 0.9261943986820428, "grad_norm": 0.4604443907737732, "learning_rate": 1.1180198957506367e-05, "loss": 0.4953, "step": 33732 }, { "epoch": 0.9262218561230093, "grad_norm": 0.37800613045692444, "learning_rate": 1.1179770081400666e-05, "loss": 0.4462, "step": 33733 }, { "epoch": 0.9262493135639759, "grad_norm": 0.40627744793891907, "learning_rate": 1.1179341203094323e-05, "loss": 0.5028, "step": 33734 }, { "epoch": 0.9262767710049423, "grad_norm": 0.3819639980792999, "learning_rate": 1.1178912322588127e-05, "loss": 0.4146, "step": 33735 }, { "epoch": 0.9263042284459089, "grad_norm": 0.48233404755592346, "learning_rate": 1.1178483439882885e-05, "loss": 0.5592, "step": 33736 }, { "epoch": 0.9263316858868753, "grad_norm": 0.39878585934638977, "learning_rate": 1.1178054554979397e-05, "loss": 0.4754, "step": 33737 }, { "epoch": 0.9263591433278419, "grad_norm": 0.4042608141899109, "learning_rate": 1.1177625667878458e-05, "loss": 0.4607, "step": 33738 }, { "epoch": 0.9263866007688083, "grad_norm": 0.41842836141586304, "learning_rate": 1.1177196778580872e-05, "loss": 0.5571, "step": 33739 }, { "epoch": 0.9264140582097748, "grad_norm": 0.40282154083251953, "learning_rate": 1.1176767887087439e-05, "loss": 0.466, "step": 33740 }, { "epoch": 0.9264415156507414, "grad_norm": 0.4075777232646942, "learning_rate": 1.1176338993398959e-05, "loss": 0.4561, "step": 33741 }, { "epoch": 0.9264689730917078, "grad_norm": 0.39593324065208435, "learning_rate": 1.1175910097516232e-05, "loss": 0.4649, "step": 33742 }, { "epoch": 0.9264964305326744, "grad_norm": 0.3583887815475464, "learning_rate": 1.1175481199440055e-05, "loss": 0.4336, "step": 33743 }, { "epoch": 0.9265238879736408, "grad_norm": 0.36977893114089966, "learning_rate": 1.1175052299171232e-05, "loss": 0.4772, "step": 33744 }, { "epoch": 0.9265513454146074, "grad_norm": 0.348395973443985, "learning_rate": 1.117462339671056e-05, "loss": 0.4498, "step": 33745 }, { "epoch": 0.9265788028555738, "grad_norm": 0.40515995025634766, "learning_rate": 1.1174194492058841e-05, "loss": 0.4831, "step": 33746 }, { "epoch": 0.9266062602965404, "grad_norm": 0.43619462847709656, "learning_rate": 1.1173765585216876e-05, "loss": 0.4644, "step": 33747 }, { "epoch": 0.9266337177375069, "grad_norm": 0.39459624886512756, "learning_rate": 1.117333667618546e-05, "loss": 0.4698, "step": 33748 }, { "epoch": 0.9266611751784734, "grad_norm": 0.5384092926979065, "learning_rate": 1.1172907764965402e-05, "loss": 0.439, "step": 33749 }, { "epoch": 0.9266886326194399, "grad_norm": 0.4248936176300049, "learning_rate": 1.1172478851557494e-05, "loss": 0.4991, "step": 33750 }, { "epoch": 0.9267160900604063, "grad_norm": 0.5084558725357056, "learning_rate": 1.1172049935962536e-05, "loss": 0.5107, "step": 33751 }, { "epoch": 0.9267435475013729, "grad_norm": 0.5645139813423157, "learning_rate": 1.1171621018181334e-05, "loss": 0.5247, "step": 33752 }, { "epoch": 0.9267710049423393, "grad_norm": 0.37131965160369873, "learning_rate": 1.1171192098214682e-05, "loss": 0.4313, "step": 33753 }, { "epoch": 0.9267984623833059, "grad_norm": 0.4022738039493561, "learning_rate": 1.1170763176063386e-05, "loss": 0.4684, "step": 33754 }, { "epoch": 0.9268259198242724, "grad_norm": 0.403987854719162, "learning_rate": 1.1170334251728243e-05, "loss": 0.5059, "step": 33755 }, { "epoch": 0.9268533772652389, "grad_norm": 0.3477700650691986, "learning_rate": 1.1169905325210052e-05, "loss": 0.4283, "step": 33756 }, { "epoch": 0.9268808347062054, "grad_norm": 0.3964722454547882, "learning_rate": 1.1169476396509614e-05, "loss": 0.4483, "step": 33757 }, { "epoch": 0.9269082921471719, "grad_norm": 0.3661794662475586, "learning_rate": 1.116904746562773e-05, "loss": 0.4664, "step": 33758 }, { "epoch": 0.9269357495881384, "grad_norm": 0.4023735225200653, "learning_rate": 1.1168618532565199e-05, "loss": 0.5469, "step": 33759 }, { "epoch": 0.9269632070291048, "grad_norm": 0.36157602071762085, "learning_rate": 1.116818959732282e-05, "loss": 0.4258, "step": 33760 }, { "epoch": 0.9269906644700714, "grad_norm": 0.4121299982070923, "learning_rate": 1.1167760659901396e-05, "loss": 0.5207, "step": 33761 }, { "epoch": 0.9270181219110379, "grad_norm": 0.38283249735832214, "learning_rate": 1.1167331720301726e-05, "loss": 0.5539, "step": 33762 }, { "epoch": 0.9270455793520044, "grad_norm": 0.4154175817966461, "learning_rate": 1.1166902778524611e-05, "loss": 0.5222, "step": 33763 }, { "epoch": 0.9270730367929709, "grad_norm": 0.4225829541683197, "learning_rate": 1.1166473834570846e-05, "loss": 0.5536, "step": 33764 }, { "epoch": 0.9271004942339374, "grad_norm": 0.4028390347957611, "learning_rate": 1.1166044888441239e-05, "loss": 0.4487, "step": 33765 }, { "epoch": 0.9271279516749039, "grad_norm": 0.41038772463798523, "learning_rate": 1.1165615940136582e-05, "loss": 0.5245, "step": 33766 }, { "epoch": 0.9271554091158704, "grad_norm": 0.4044465720653534, "learning_rate": 1.1165186989657684e-05, "loss": 0.5709, "step": 33767 }, { "epoch": 0.9271828665568369, "grad_norm": 0.4244092106819153, "learning_rate": 1.1164758037005338e-05, "loss": 0.5198, "step": 33768 }, { "epoch": 0.9272103239978035, "grad_norm": 0.4254436194896698, "learning_rate": 1.1164329082180345e-05, "loss": 0.5044, "step": 33769 }, { "epoch": 0.9272377814387699, "grad_norm": 0.3905060589313507, "learning_rate": 1.1163900125183509e-05, "loss": 0.3832, "step": 33770 }, { "epoch": 0.9272652388797364, "grad_norm": 0.3727901577949524, "learning_rate": 1.1163471166015625e-05, "loss": 0.5081, "step": 33771 }, { "epoch": 0.9272926963207029, "grad_norm": 0.3684048354625702, "learning_rate": 1.1163042204677498e-05, "loss": 0.5439, "step": 33772 }, { "epoch": 0.9273201537616694, "grad_norm": 0.3536275029182434, "learning_rate": 1.1162613241169929e-05, "loss": 0.4175, "step": 33773 }, { "epoch": 0.9273476112026359, "grad_norm": 0.36262378096580505, "learning_rate": 1.1162184275493711e-05, "loss": 0.5014, "step": 33774 }, { "epoch": 0.9273750686436024, "grad_norm": 0.4231962263584137, "learning_rate": 1.1161755307649648e-05, "loss": 0.5499, "step": 33775 }, { "epoch": 0.927402526084569, "grad_norm": 0.39009636640548706, "learning_rate": 1.1161326337638543e-05, "loss": 0.5345, "step": 33776 }, { "epoch": 0.9274299835255354, "grad_norm": 0.3861329257488251, "learning_rate": 1.1160897365461195e-05, "loss": 0.4999, "step": 33777 }, { "epoch": 0.927457440966502, "grad_norm": 0.43086180090904236, "learning_rate": 1.11604683911184e-05, "loss": 0.5434, "step": 33778 }, { "epoch": 0.9274848984074684, "grad_norm": 0.4597713053226471, "learning_rate": 1.1160039414610959e-05, "loss": 0.5626, "step": 33779 }, { "epoch": 0.9275123558484349, "grad_norm": 0.39543578028678894, "learning_rate": 1.1159610435939679e-05, "loss": 0.5107, "step": 33780 }, { "epoch": 0.9275398132894014, "grad_norm": 0.3632591962814331, "learning_rate": 1.1159181455105354e-05, "loss": 0.4892, "step": 33781 }, { "epoch": 0.9275672707303679, "grad_norm": 0.39018383622169495, "learning_rate": 1.1158752472108785e-05, "loss": 0.5837, "step": 33782 }, { "epoch": 0.9275947281713345, "grad_norm": 0.3927360475063324, "learning_rate": 1.1158323486950774e-05, "loss": 0.4742, "step": 33783 }, { "epoch": 0.9276221856123009, "grad_norm": 0.39533814787864685, "learning_rate": 1.1157894499632115e-05, "loss": 0.5651, "step": 33784 }, { "epoch": 0.9276496430532675, "grad_norm": 0.5675268173217773, "learning_rate": 1.115746551015362e-05, "loss": 0.5023, "step": 33785 }, { "epoch": 0.9276771004942339, "grad_norm": 0.3743692636489868, "learning_rate": 1.1157036518516081e-05, "loss": 0.4384, "step": 33786 }, { "epoch": 0.9277045579352005, "grad_norm": 0.36857888102531433, "learning_rate": 1.1156607524720295e-05, "loss": 0.4245, "step": 33787 }, { "epoch": 0.9277320153761669, "grad_norm": 0.4698542654514313, "learning_rate": 1.1156178528767071e-05, "loss": 0.5483, "step": 33788 }, { "epoch": 0.9277594728171334, "grad_norm": 0.42110148072242737, "learning_rate": 1.1155749530657203e-05, "loss": 0.4558, "step": 33789 }, { "epoch": 0.9277869302581, "grad_norm": 0.35499307513237, "learning_rate": 1.1155320530391496e-05, "loss": 0.4726, "step": 33790 }, { "epoch": 0.9278143876990664, "grad_norm": 0.44440871477127075, "learning_rate": 1.1154891527970746e-05, "loss": 0.5533, "step": 33791 }, { "epoch": 0.927841845140033, "grad_norm": 0.34942129254341125, "learning_rate": 1.1154462523395753e-05, "loss": 0.5547, "step": 33792 }, { "epoch": 0.9278693025809994, "grad_norm": 0.45745357871055603, "learning_rate": 1.1154033516667322e-05, "loss": 0.6138, "step": 33793 }, { "epoch": 0.927896760021966, "grad_norm": 0.9136677980422974, "learning_rate": 1.1153604507786247e-05, "loss": 0.4854, "step": 33794 }, { "epoch": 0.9279242174629324, "grad_norm": 0.44959521293640137, "learning_rate": 1.1153175496753334e-05, "loss": 0.5357, "step": 33795 }, { "epoch": 0.927951674903899, "grad_norm": 0.36974024772644043, "learning_rate": 1.1152746483569382e-05, "loss": 0.4779, "step": 33796 }, { "epoch": 0.9279791323448655, "grad_norm": 0.4513631761074066, "learning_rate": 1.1152317468235185e-05, "loss": 0.5342, "step": 33797 }, { "epoch": 0.928006589785832, "grad_norm": 0.3882017135620117, "learning_rate": 1.115188845075155e-05, "loss": 0.4443, "step": 33798 }, { "epoch": 0.9280340472267985, "grad_norm": 0.46073058247566223, "learning_rate": 1.1151459431119276e-05, "loss": 0.4752, "step": 33799 }, { "epoch": 0.9280615046677649, "grad_norm": 0.3850788176059723, "learning_rate": 1.1151030409339162e-05, "loss": 0.4809, "step": 33800 }, { "epoch": 0.9280889621087315, "grad_norm": 0.3836579918861389, "learning_rate": 1.115060138541201e-05, "loss": 0.4349, "step": 33801 }, { "epoch": 0.9281164195496979, "grad_norm": 0.41672343015670776, "learning_rate": 1.115017235933862e-05, "loss": 0.4594, "step": 33802 }, { "epoch": 0.9281438769906645, "grad_norm": 0.3871751129627228, "learning_rate": 1.114974333111979e-05, "loss": 0.4863, "step": 33803 }, { "epoch": 0.928171334431631, "grad_norm": 0.49239087104797363, "learning_rate": 1.1149314300756322e-05, "loss": 0.4479, "step": 33804 }, { "epoch": 0.9281987918725975, "grad_norm": 0.5685789585113525, "learning_rate": 1.1148885268249017e-05, "loss": 0.5479, "step": 33805 }, { "epoch": 0.928226249313564, "grad_norm": 0.40041565895080566, "learning_rate": 1.114845623359867e-05, "loss": 0.475, "step": 33806 }, { "epoch": 0.9282537067545304, "grad_norm": 0.43159371614456177, "learning_rate": 1.114802719680609e-05, "loss": 0.5594, "step": 33807 }, { "epoch": 0.928281164195497, "grad_norm": 0.38588353991508484, "learning_rate": 1.1147598157872072e-05, "loss": 0.5371, "step": 33808 }, { "epoch": 0.9283086216364634, "grad_norm": 0.4354581832885742, "learning_rate": 1.1147169116797416e-05, "loss": 0.5771, "step": 33809 }, { "epoch": 0.92833607907743, "grad_norm": 0.4921262860298157, "learning_rate": 1.1146740073582927e-05, "loss": 0.4894, "step": 33810 }, { "epoch": 0.9283635365183965, "grad_norm": 0.4122196137905121, "learning_rate": 1.1146311028229398e-05, "loss": 0.5269, "step": 33811 }, { "epoch": 0.928390993959363, "grad_norm": 0.3391551077365875, "learning_rate": 1.1145881980737634e-05, "loss": 0.4253, "step": 33812 }, { "epoch": 0.9284184514003295, "grad_norm": 0.4376925528049469, "learning_rate": 1.1145452931108435e-05, "loss": 0.5566, "step": 33813 }, { "epoch": 0.928445908841296, "grad_norm": 0.36611905694007874, "learning_rate": 1.1145023879342601e-05, "loss": 0.4264, "step": 33814 }, { "epoch": 0.9284733662822625, "grad_norm": 0.3687635362148285, "learning_rate": 1.114459482544093e-05, "loss": 0.3909, "step": 33815 }, { "epoch": 0.928500823723229, "grad_norm": 0.39441466331481934, "learning_rate": 1.1144165769404224e-05, "loss": 0.5287, "step": 33816 }, { "epoch": 0.9285282811641955, "grad_norm": 0.39531370997428894, "learning_rate": 1.1143736711233288e-05, "loss": 0.4482, "step": 33817 }, { "epoch": 0.928555738605162, "grad_norm": 0.36510592699050903, "learning_rate": 1.1143307650928914e-05, "loss": 0.4813, "step": 33818 }, { "epoch": 0.9285831960461285, "grad_norm": 0.44977667927742004, "learning_rate": 1.1142878588491908e-05, "loss": 0.5056, "step": 33819 }, { "epoch": 0.928610653487095, "grad_norm": 0.4156414568424225, "learning_rate": 1.1142449523923069e-05, "loss": 0.5385, "step": 33820 }, { "epoch": 0.9286381109280615, "grad_norm": 0.35752302408218384, "learning_rate": 1.1142020457223195e-05, "loss": 0.4431, "step": 33821 }, { "epoch": 0.928665568369028, "grad_norm": 0.5257189869880676, "learning_rate": 1.1141591388393091e-05, "loss": 0.4534, "step": 33822 }, { "epoch": 0.9286930258099945, "grad_norm": 0.39845407009124756, "learning_rate": 1.1141162317433551e-05, "loss": 0.5232, "step": 33823 }, { "epoch": 0.928720483250961, "grad_norm": 0.49510690569877625, "learning_rate": 1.1140733244345383e-05, "loss": 0.5124, "step": 33824 }, { "epoch": 0.9287479406919276, "grad_norm": 0.43215081095695496, "learning_rate": 1.1140304169129383e-05, "loss": 0.4653, "step": 33825 }, { "epoch": 0.928775398132894, "grad_norm": 0.34577497839927673, "learning_rate": 1.113987509178635e-05, "loss": 0.4415, "step": 33826 }, { "epoch": 0.9288028555738606, "grad_norm": 0.46081218123435974, "learning_rate": 1.1139446012317085e-05, "loss": 0.5002, "step": 33827 }, { "epoch": 0.928830313014827, "grad_norm": 0.41249343752861023, "learning_rate": 1.1139016930722391e-05, "loss": 0.4091, "step": 33828 }, { "epoch": 0.9288577704557935, "grad_norm": 0.4276910424232483, "learning_rate": 1.1138587847003069e-05, "loss": 0.521, "step": 33829 }, { "epoch": 0.92888522789676, "grad_norm": 0.4002867341041565, "learning_rate": 1.1138158761159917e-05, "loss": 0.5203, "step": 33830 }, { "epoch": 0.9289126853377265, "grad_norm": 0.3752381205558777, "learning_rate": 1.113772967319373e-05, "loss": 0.484, "step": 33831 }, { "epoch": 0.9289401427786931, "grad_norm": 0.42566466331481934, "learning_rate": 1.1137300583105322e-05, "loss": 0.5363, "step": 33832 }, { "epoch": 0.9289676002196595, "grad_norm": 0.41964268684387207, "learning_rate": 1.113687149089548e-05, "loss": 0.4418, "step": 33833 }, { "epoch": 0.9289950576606261, "grad_norm": 0.4044785499572754, "learning_rate": 1.1136442396565012e-05, "loss": 0.4846, "step": 33834 }, { "epoch": 0.9290225151015925, "grad_norm": 0.441012442111969, "learning_rate": 1.1136013300114717e-05, "loss": 0.4762, "step": 33835 }, { "epoch": 0.929049972542559, "grad_norm": 0.397538423538208, "learning_rate": 1.1135584201545391e-05, "loss": 0.5129, "step": 33836 }, { "epoch": 0.9290774299835255, "grad_norm": 0.4115763008594513, "learning_rate": 1.1135155100857841e-05, "loss": 0.4733, "step": 33837 }, { "epoch": 0.929104887424492, "grad_norm": 0.39494460821151733, "learning_rate": 1.1134725998052862e-05, "loss": 0.4582, "step": 33838 }, { "epoch": 0.9291323448654586, "grad_norm": 0.4547971189022064, "learning_rate": 1.113429689313126e-05, "loss": 0.4234, "step": 33839 }, { "epoch": 0.929159802306425, "grad_norm": 0.487589567899704, "learning_rate": 1.1133867786093833e-05, "loss": 0.4136, "step": 33840 }, { "epoch": 0.9291872597473916, "grad_norm": 0.4620743691921234, "learning_rate": 1.1133438676941376e-05, "loss": 0.5002, "step": 33841 }, { "epoch": 0.929214717188358, "grad_norm": 0.3997475802898407, "learning_rate": 1.1133009565674697e-05, "loss": 0.4955, "step": 33842 }, { "epoch": 0.9292421746293246, "grad_norm": 0.3674972653388977, "learning_rate": 1.1132580452294595e-05, "loss": 0.4933, "step": 33843 }, { "epoch": 0.929269632070291, "grad_norm": 0.46377331018447876, "learning_rate": 1.1132151336801866e-05, "loss": 0.5189, "step": 33844 }, { "epoch": 0.9292970895112576, "grad_norm": 0.5363406538963318, "learning_rate": 1.1131722219197315e-05, "loss": 0.5663, "step": 33845 }, { "epoch": 0.9293245469522241, "grad_norm": 0.40679609775543213, "learning_rate": 1.113129309948174e-05, "loss": 0.5147, "step": 33846 }, { "epoch": 0.9293520043931905, "grad_norm": 0.34440159797668457, "learning_rate": 1.1130863977655944e-05, "loss": 0.4205, "step": 33847 }, { "epoch": 0.9293794618341571, "grad_norm": 0.39028841257095337, "learning_rate": 1.1130434853720724e-05, "loss": 0.4779, "step": 33848 }, { "epoch": 0.9294069192751235, "grad_norm": 0.368453711271286, "learning_rate": 1.113000572767688e-05, "loss": 0.4592, "step": 33849 }, { "epoch": 0.9294343767160901, "grad_norm": 0.3854160010814667, "learning_rate": 1.1129576599525221e-05, "loss": 0.4851, "step": 33850 }, { "epoch": 0.9294618341570565, "grad_norm": 0.44401976466178894, "learning_rate": 1.1129147469266534e-05, "loss": 0.5356, "step": 33851 }, { "epoch": 0.9294892915980231, "grad_norm": 0.4123310148715973, "learning_rate": 1.1128718336901632e-05, "loss": 0.4402, "step": 33852 }, { "epoch": 0.9295167490389896, "grad_norm": 0.4242653250694275, "learning_rate": 1.1128289202431308e-05, "loss": 0.4488, "step": 33853 }, { "epoch": 0.9295442064799561, "grad_norm": 0.3958868384361267, "learning_rate": 1.1127860065856364e-05, "loss": 0.4932, "step": 33854 }, { "epoch": 0.9295716639209226, "grad_norm": 0.33216503262519836, "learning_rate": 1.1127430927177602e-05, "loss": 0.3584, "step": 33855 }, { "epoch": 0.929599121361889, "grad_norm": 0.36972710490226746, "learning_rate": 1.1127001786395822e-05, "loss": 0.5546, "step": 33856 }, { "epoch": 0.9296265788028556, "grad_norm": 0.37614285945892334, "learning_rate": 1.112657264351182e-05, "loss": 0.493, "step": 33857 }, { "epoch": 0.929654036243822, "grad_norm": 0.38602781295776367, "learning_rate": 1.1126143498526407e-05, "loss": 0.4987, "step": 33858 }, { "epoch": 0.9296814936847886, "grad_norm": 0.38770607113838196, "learning_rate": 1.1125714351440373e-05, "loss": 0.5444, "step": 33859 }, { "epoch": 0.9297089511257551, "grad_norm": 0.46298202872276306, "learning_rate": 1.1125285202254523e-05, "loss": 0.5073, "step": 33860 }, { "epoch": 0.9297364085667216, "grad_norm": 0.5163595676422119, "learning_rate": 1.1124856050969658e-05, "loss": 0.4022, "step": 33861 }, { "epoch": 0.9297638660076881, "grad_norm": 0.41576531529426575, "learning_rate": 1.1124426897586575e-05, "loss": 0.5659, "step": 33862 }, { "epoch": 0.9297913234486546, "grad_norm": 0.3902250826358795, "learning_rate": 1.112399774210608e-05, "loss": 0.4423, "step": 33863 }, { "epoch": 0.9298187808896211, "grad_norm": 0.3809581995010376, "learning_rate": 1.1123568584528967e-05, "loss": 0.4702, "step": 33864 }, { "epoch": 0.9298462383305875, "grad_norm": 0.4223405420780182, "learning_rate": 1.1123139424856041e-05, "loss": 0.5005, "step": 33865 }, { "epoch": 0.9298736957715541, "grad_norm": 0.4074265658855438, "learning_rate": 1.1122710263088103e-05, "loss": 0.5337, "step": 33866 }, { "epoch": 0.9299011532125206, "grad_norm": 0.4387895464897156, "learning_rate": 1.112228109922595e-05, "loss": 0.5343, "step": 33867 }, { "epoch": 0.9299286106534871, "grad_norm": 0.3715851306915283, "learning_rate": 1.1121851933270388e-05, "loss": 0.4628, "step": 33868 }, { "epoch": 0.9299560680944536, "grad_norm": 0.38258302211761475, "learning_rate": 1.1121422765222213e-05, "loss": 0.5047, "step": 33869 }, { "epoch": 0.9299835255354201, "grad_norm": 0.449955016374588, "learning_rate": 1.1120993595082223e-05, "loss": 0.5602, "step": 33870 }, { "epoch": 0.9300109829763866, "grad_norm": 0.38227182626724243, "learning_rate": 1.1120564422851227e-05, "loss": 0.4758, "step": 33871 }, { "epoch": 0.9300384404173531, "grad_norm": 0.38537880778312683, "learning_rate": 1.1120135248530017e-05, "loss": 0.4663, "step": 33872 }, { "epoch": 0.9300658978583196, "grad_norm": 0.39640843868255615, "learning_rate": 1.11197060721194e-05, "loss": 0.5013, "step": 33873 }, { "epoch": 0.9300933552992862, "grad_norm": 0.37811675667762756, "learning_rate": 1.1119276893620174e-05, "loss": 0.5478, "step": 33874 }, { "epoch": 0.9301208127402526, "grad_norm": 0.3421507477760315, "learning_rate": 1.1118847713033136e-05, "loss": 0.4359, "step": 33875 }, { "epoch": 0.9301482701812192, "grad_norm": 0.37096068263053894, "learning_rate": 1.1118418530359091e-05, "loss": 0.4946, "step": 33876 }, { "epoch": 0.9301757276221856, "grad_norm": 0.3866284489631653, "learning_rate": 1.111798934559884e-05, "loss": 0.4707, "step": 33877 }, { "epoch": 0.9302031850631521, "grad_norm": 0.5095987915992737, "learning_rate": 1.1117560158753182e-05, "loss": 0.533, "step": 33878 }, { "epoch": 0.9302306425041186, "grad_norm": 0.37969622015953064, "learning_rate": 1.1117130969822919e-05, "loss": 0.453, "step": 33879 }, { "epoch": 0.9302580999450851, "grad_norm": 0.42900756001472473, "learning_rate": 1.1116701778808845e-05, "loss": 0.4888, "step": 33880 }, { "epoch": 0.9302855573860516, "grad_norm": 0.5101460218429565, "learning_rate": 1.111627258571177e-05, "loss": 0.5881, "step": 33881 }, { "epoch": 0.9303130148270181, "grad_norm": 0.39419567584991455, "learning_rate": 1.1115843390532488e-05, "loss": 0.5334, "step": 33882 }, { "epoch": 0.9303404722679847, "grad_norm": 0.39638853073120117, "learning_rate": 1.1115414193271806e-05, "loss": 0.4994, "step": 33883 }, { "epoch": 0.9303679297089511, "grad_norm": 0.41686609387397766, "learning_rate": 1.1114984993930518e-05, "loss": 0.4689, "step": 33884 }, { "epoch": 0.9303953871499177, "grad_norm": 0.3739151656627655, "learning_rate": 1.1114555792509425e-05, "loss": 0.4686, "step": 33885 }, { "epoch": 0.9304228445908841, "grad_norm": 0.40521782636642456, "learning_rate": 1.1114126589009332e-05, "loss": 0.5613, "step": 33886 }, { "epoch": 0.9304503020318506, "grad_norm": 0.42222103476524353, "learning_rate": 1.1113697383431038e-05, "loss": 0.4861, "step": 33887 }, { "epoch": 0.9304777594728171, "grad_norm": 26.999393463134766, "learning_rate": 1.111326817577534e-05, "loss": 0.5188, "step": 33888 }, { "epoch": 0.9305052169137836, "grad_norm": 0.4805575907230377, "learning_rate": 1.1112838966043045e-05, "loss": 0.5503, "step": 33889 }, { "epoch": 0.9305326743547502, "grad_norm": 0.37578314542770386, "learning_rate": 1.1112409754234947e-05, "loss": 0.4546, "step": 33890 }, { "epoch": 0.9305601317957166, "grad_norm": 0.4563201069831848, "learning_rate": 1.1111980540351851e-05, "loss": 0.4951, "step": 33891 }, { "epoch": 0.9305875892366832, "grad_norm": 0.4154123365879059, "learning_rate": 1.1111551324394559e-05, "loss": 0.5337, "step": 33892 }, { "epoch": 0.9306150466776496, "grad_norm": 0.891053318977356, "learning_rate": 1.1111122106363865e-05, "loss": 0.445, "step": 33893 }, { "epoch": 0.9306425041186162, "grad_norm": 0.3605031669139862, "learning_rate": 1.1110692886260576e-05, "loss": 0.4976, "step": 33894 }, { "epoch": 0.9306699615595826, "grad_norm": 0.40836673974990845, "learning_rate": 1.111026366408549e-05, "loss": 0.4951, "step": 33895 }, { "epoch": 0.9306974190005491, "grad_norm": 0.41237586736679077, "learning_rate": 1.1109834439839408e-05, "loss": 0.4365, "step": 33896 }, { "epoch": 0.9307248764415157, "grad_norm": 0.45696115493774414, "learning_rate": 1.110940521352313e-05, "loss": 0.5467, "step": 33897 }, { "epoch": 0.9307523338824821, "grad_norm": 0.3778960704803467, "learning_rate": 1.1108975985137458e-05, "loss": 0.4981, "step": 33898 }, { "epoch": 0.9307797913234487, "grad_norm": 0.3816961348056793, "learning_rate": 1.1108546754683191e-05, "loss": 0.5561, "step": 33899 }, { "epoch": 0.9308072487644151, "grad_norm": 0.47894003987312317, "learning_rate": 1.1108117522161129e-05, "loss": 0.4996, "step": 33900 }, { "epoch": 0.9308347062053817, "grad_norm": 0.44234177470207214, "learning_rate": 1.1107688287572076e-05, "loss": 0.5434, "step": 33901 }, { "epoch": 0.9308621636463481, "grad_norm": 0.38688674569129944, "learning_rate": 1.1107259050916832e-05, "loss": 0.4291, "step": 33902 }, { "epoch": 0.9308896210873147, "grad_norm": 0.3314630687236786, "learning_rate": 1.1106829812196194e-05, "loss": 0.3991, "step": 33903 }, { "epoch": 0.9309170785282812, "grad_norm": 0.3693886399269104, "learning_rate": 1.1106400571410967e-05, "loss": 0.4634, "step": 33904 }, { "epoch": 0.9309445359692476, "grad_norm": 0.49525579810142517, "learning_rate": 1.1105971328561952e-05, "loss": 0.442, "step": 33905 }, { "epoch": 0.9309719934102142, "grad_norm": 0.3619783818721771, "learning_rate": 1.110554208364994e-05, "loss": 0.3978, "step": 33906 }, { "epoch": 0.9309994508511806, "grad_norm": 0.48886600136756897, "learning_rate": 1.1105112836675746e-05, "loss": 0.4939, "step": 33907 }, { "epoch": 0.9310269082921472, "grad_norm": 0.3621227443218231, "learning_rate": 1.110468358764016e-05, "loss": 0.482, "step": 33908 }, { "epoch": 0.9310543657331136, "grad_norm": 0.3662101924419403, "learning_rate": 1.110425433654399e-05, "loss": 0.4986, "step": 33909 }, { "epoch": 0.9310818231740802, "grad_norm": 0.3719158172607422, "learning_rate": 1.110382508338803e-05, "loss": 0.5122, "step": 33910 }, { "epoch": 0.9311092806150467, "grad_norm": 0.3949300944805145, "learning_rate": 1.1103395828173085e-05, "loss": 0.5311, "step": 33911 }, { "epoch": 0.9311367380560132, "grad_norm": 0.35517632961273193, "learning_rate": 1.1102966570899956e-05, "loss": 0.4186, "step": 33912 }, { "epoch": 0.9311641954969797, "grad_norm": 0.4252881407737732, "learning_rate": 1.1102537311569439e-05, "loss": 0.4513, "step": 33913 }, { "epoch": 0.9311916529379461, "grad_norm": 1.6019890308380127, "learning_rate": 1.1102108050182343e-05, "loss": 0.4907, "step": 33914 }, { "epoch": 0.9312191103789127, "grad_norm": 0.4416545629501343, "learning_rate": 1.110167878673946e-05, "loss": 0.4711, "step": 33915 }, { "epoch": 0.9312465678198791, "grad_norm": 0.9622458219528198, "learning_rate": 1.1101249521241594e-05, "loss": 0.493, "step": 33916 }, { "epoch": 0.9312740252608457, "grad_norm": 0.36465930938720703, "learning_rate": 1.1100820253689548e-05, "loss": 0.4796, "step": 33917 }, { "epoch": 0.9313014827018122, "grad_norm": 0.4122917354106903, "learning_rate": 1.1100390984084119e-05, "loss": 0.4735, "step": 33918 }, { "epoch": 0.9313289401427787, "grad_norm": 0.43964335322380066, "learning_rate": 1.1099961712426112e-05, "loss": 0.5396, "step": 33919 }, { "epoch": 0.9313563975837452, "grad_norm": 0.4035888910293579, "learning_rate": 1.1099532438716328e-05, "loss": 0.4925, "step": 33920 }, { "epoch": 0.9313838550247117, "grad_norm": 0.41078150272369385, "learning_rate": 1.1099103162955558e-05, "loss": 0.4373, "step": 33921 }, { "epoch": 0.9314113124656782, "grad_norm": 0.38426533341407776, "learning_rate": 1.1098673885144616e-05, "loss": 0.4941, "step": 33922 }, { "epoch": 0.9314387699066446, "grad_norm": 0.38748496770858765, "learning_rate": 1.1098244605284295e-05, "loss": 0.5191, "step": 33923 }, { "epoch": 0.9314662273476112, "grad_norm": 0.5053889751434326, "learning_rate": 1.1097815323375394e-05, "loss": 0.485, "step": 33924 }, { "epoch": 0.9314936847885777, "grad_norm": 0.40390557050704956, "learning_rate": 1.1097386039418718e-05, "loss": 0.5811, "step": 33925 }, { "epoch": 0.9315211422295442, "grad_norm": 0.4088670611381531, "learning_rate": 1.1096956753415066e-05, "loss": 0.5318, "step": 33926 }, { "epoch": 0.9315485996705107, "grad_norm": 0.4980161190032959, "learning_rate": 1.1096527465365243e-05, "loss": 0.5016, "step": 33927 }, { "epoch": 0.9315760571114772, "grad_norm": 0.36377063393592834, "learning_rate": 1.1096098175270046e-05, "loss": 0.511, "step": 33928 }, { "epoch": 0.9316035145524437, "grad_norm": 0.513690710067749, "learning_rate": 1.1095668883130275e-05, "loss": 0.4711, "step": 33929 }, { "epoch": 0.9316309719934102, "grad_norm": 0.3738226592540741, "learning_rate": 1.1095239588946731e-05, "loss": 0.4798, "step": 33930 }, { "epoch": 0.9316584294343767, "grad_norm": 0.37531641125679016, "learning_rate": 1.1094810292720216e-05, "loss": 0.4493, "step": 33931 }, { "epoch": 0.9316858868753433, "grad_norm": 0.3708914518356323, "learning_rate": 1.1094380994451532e-05, "loss": 0.4586, "step": 33932 }, { "epoch": 0.9317133443163097, "grad_norm": 0.39712953567504883, "learning_rate": 1.1093951694141478e-05, "loss": 0.5125, "step": 33933 }, { "epoch": 0.9317408017572762, "grad_norm": 0.37745749950408936, "learning_rate": 1.1093522391790851e-05, "loss": 0.4628, "step": 33934 }, { "epoch": 0.9317682591982427, "grad_norm": 0.3886372745037079, "learning_rate": 1.1093093087400459e-05, "loss": 0.4504, "step": 33935 }, { "epoch": 0.9317957166392092, "grad_norm": 0.4136604070663452, "learning_rate": 1.10926637809711e-05, "loss": 0.4652, "step": 33936 }, { "epoch": 0.9318231740801757, "grad_norm": 0.4370853900909424, "learning_rate": 1.1092234472503574e-05, "loss": 0.4487, "step": 33937 }, { "epoch": 0.9318506315211422, "grad_norm": 0.39789655804634094, "learning_rate": 1.1091805161998681e-05, "loss": 0.5612, "step": 33938 }, { "epoch": 0.9318780889621088, "grad_norm": 0.5252246260643005, "learning_rate": 1.1091375849457222e-05, "loss": 0.5116, "step": 33939 }, { "epoch": 0.9319055464030752, "grad_norm": 0.3528762459754944, "learning_rate": 1.109094653488e-05, "loss": 0.3937, "step": 33940 }, { "epoch": 0.9319330038440418, "grad_norm": 0.4359288513660431, "learning_rate": 1.1090517218267819e-05, "loss": 0.4634, "step": 33941 }, { "epoch": 0.9319604612850082, "grad_norm": 0.395525187253952, "learning_rate": 1.1090087899621467e-05, "loss": 0.4477, "step": 33942 }, { "epoch": 0.9319879187259748, "grad_norm": 0.4318186640739441, "learning_rate": 1.1089658578941758e-05, "loss": 0.4712, "step": 33943 }, { "epoch": 0.9320153761669412, "grad_norm": 0.3591901957988739, "learning_rate": 1.1089229256229486e-05, "loss": 0.4313, "step": 33944 }, { "epoch": 0.9320428336079077, "grad_norm": 0.4308105707168579, "learning_rate": 1.1088799931485456e-05, "loss": 0.5312, "step": 33945 }, { "epoch": 0.9320702910488743, "grad_norm": 0.43616336584091187, "learning_rate": 1.1088370604710465e-05, "loss": 0.4165, "step": 33946 }, { "epoch": 0.9320977484898407, "grad_norm": 0.3756295442581177, "learning_rate": 1.1087941275905316e-05, "loss": 0.5139, "step": 33947 }, { "epoch": 0.9321252059308073, "grad_norm": 0.394901305437088, "learning_rate": 1.1087511945070811e-05, "loss": 0.513, "step": 33948 }, { "epoch": 0.9321526633717737, "grad_norm": 0.36664795875549316, "learning_rate": 1.1087082612207745e-05, "loss": 0.5158, "step": 33949 }, { "epoch": 0.9321801208127403, "grad_norm": 0.4760986268520355, "learning_rate": 1.1086653277316925e-05, "loss": 0.5935, "step": 33950 }, { "epoch": 0.9322075782537067, "grad_norm": 0.398243248462677, "learning_rate": 1.1086223940399153e-05, "loss": 0.4619, "step": 33951 }, { "epoch": 0.9322350356946733, "grad_norm": 0.3957809507846832, "learning_rate": 1.1085794601455223e-05, "loss": 0.4999, "step": 33952 }, { "epoch": 0.9322624931356398, "grad_norm": 0.3805885314941406, "learning_rate": 1.1085365260485941e-05, "loss": 0.4863, "step": 33953 }, { "epoch": 0.9322899505766062, "grad_norm": 0.449421226978302, "learning_rate": 1.1084935917492106e-05, "loss": 0.4666, "step": 33954 }, { "epoch": 0.9323174080175728, "grad_norm": 0.40944716334342957, "learning_rate": 1.1084506572474516e-05, "loss": 0.5331, "step": 33955 }, { "epoch": 0.9323448654585392, "grad_norm": 0.39124244451522827, "learning_rate": 1.108407722543398e-05, "loss": 0.4781, "step": 33956 }, { "epoch": 0.9323723228995058, "grad_norm": 0.40173661708831787, "learning_rate": 1.108364787637129e-05, "loss": 0.4633, "step": 33957 }, { "epoch": 0.9323997803404722, "grad_norm": 0.37273404002189636, "learning_rate": 1.1083218525287254e-05, "loss": 0.4449, "step": 33958 }, { "epoch": 0.9324272377814388, "grad_norm": 0.3965570628643036, "learning_rate": 1.108278917218267e-05, "loss": 0.4455, "step": 33959 }, { "epoch": 0.9324546952224053, "grad_norm": 0.39474308490753174, "learning_rate": 1.1082359817058335e-05, "loss": 0.5068, "step": 33960 }, { "epoch": 0.9324821526633718, "grad_norm": 0.3441825211048126, "learning_rate": 1.1081930459915058e-05, "loss": 0.4438, "step": 33961 }, { "epoch": 0.9325096101043383, "grad_norm": 0.343740314245224, "learning_rate": 1.108150110075363e-05, "loss": 0.4, "step": 33962 }, { "epoch": 0.9325370675453047, "grad_norm": 0.41003623604774475, "learning_rate": 1.1081071739574863e-05, "loss": 0.5445, "step": 33963 }, { "epoch": 0.9325645249862713, "grad_norm": 0.3499546945095062, "learning_rate": 1.108064237637955e-05, "loss": 0.4333, "step": 33964 }, { "epoch": 0.9325919824272377, "grad_norm": 0.4043712019920349, "learning_rate": 1.1080213011168492e-05, "loss": 0.5794, "step": 33965 }, { "epoch": 0.9326194398682043, "grad_norm": 0.4591403901576996, "learning_rate": 1.1079783643942494e-05, "loss": 0.4861, "step": 33966 }, { "epoch": 0.9326468973091708, "grad_norm": 0.32576507329940796, "learning_rate": 1.1079354274702356e-05, "loss": 0.4081, "step": 33967 }, { "epoch": 0.9326743547501373, "grad_norm": 0.3941132426261902, "learning_rate": 1.1078924903448876e-05, "loss": 0.446, "step": 33968 }, { "epoch": 0.9327018121911038, "grad_norm": 0.3657786250114441, "learning_rate": 1.1078495530182855e-05, "loss": 0.4306, "step": 33969 }, { "epoch": 0.9327292696320703, "grad_norm": 0.39685970544815063, "learning_rate": 1.1078066154905098e-05, "loss": 0.4987, "step": 33970 }, { "epoch": 0.9327567270730368, "grad_norm": 0.4271356463432312, "learning_rate": 1.1077636777616402e-05, "loss": 0.5007, "step": 33971 }, { "epoch": 0.9327841845140032, "grad_norm": 0.4209103286266327, "learning_rate": 1.1077207398317573e-05, "loss": 0.4904, "step": 33972 }, { "epoch": 0.9328116419549698, "grad_norm": 0.40894943475723267, "learning_rate": 1.1076778017009404e-05, "loss": 0.453, "step": 33973 }, { "epoch": 0.9328390993959363, "grad_norm": 0.5085077285766602, "learning_rate": 1.1076348633692704e-05, "loss": 0.4341, "step": 33974 }, { "epoch": 0.9328665568369028, "grad_norm": 0.37718498706817627, "learning_rate": 1.1075919248368268e-05, "loss": 0.4726, "step": 33975 }, { "epoch": 0.9328940142778693, "grad_norm": 0.43744465708732605, "learning_rate": 1.1075489861036898e-05, "loss": 0.4749, "step": 33976 }, { "epoch": 0.9329214717188358, "grad_norm": 0.3458517789840698, "learning_rate": 1.1075060471699401e-05, "loss": 0.4704, "step": 33977 }, { "epoch": 0.9329489291598023, "grad_norm": 0.4000091552734375, "learning_rate": 1.1074631080356568e-05, "loss": 0.4959, "step": 33978 }, { "epoch": 0.9329763866007688, "grad_norm": 1.368048906326294, "learning_rate": 1.1074201687009207e-05, "loss": 0.5002, "step": 33979 }, { "epoch": 0.9330038440417353, "grad_norm": 0.4064599573612213, "learning_rate": 1.1073772291658118e-05, "loss": 0.553, "step": 33980 }, { "epoch": 0.9330313014827019, "grad_norm": 0.4002901315689087, "learning_rate": 1.10733428943041e-05, "loss": 0.5453, "step": 33981 }, { "epoch": 0.9330587589236683, "grad_norm": 0.3875720500946045, "learning_rate": 1.1072913494947954e-05, "loss": 0.455, "step": 33982 }, { "epoch": 0.9330862163646348, "grad_norm": 0.3715759217739105, "learning_rate": 1.1072484093590484e-05, "loss": 0.4912, "step": 33983 }, { "epoch": 0.9331136738056013, "grad_norm": 0.47581928968429565, "learning_rate": 1.1072054690232488e-05, "loss": 0.5731, "step": 33984 }, { "epoch": 0.9331411312465678, "grad_norm": 0.371591180562973, "learning_rate": 1.1071625284874765e-05, "loss": 0.4458, "step": 33985 }, { "epoch": 0.9331685886875343, "grad_norm": 0.3524874448776245, "learning_rate": 1.1071195877518123e-05, "loss": 0.5685, "step": 33986 }, { "epoch": 0.9331960461285008, "grad_norm": 0.3871762752532959, "learning_rate": 1.1070766468163356e-05, "loss": 0.4401, "step": 33987 }, { "epoch": 0.9332235035694674, "grad_norm": 0.38296979665756226, "learning_rate": 1.1070337056811272e-05, "loss": 0.5333, "step": 33988 }, { "epoch": 0.9332509610104338, "grad_norm": 0.3662991523742676, "learning_rate": 1.1069907643462662e-05, "loss": 0.5472, "step": 33989 }, { "epoch": 0.9332784184514004, "grad_norm": 0.3898472487926483, "learning_rate": 1.1069478228118335e-05, "loss": 0.4795, "step": 33990 }, { "epoch": 0.9333058758923668, "grad_norm": 0.4285678267478943, "learning_rate": 1.1069048810779093e-05, "loss": 0.4859, "step": 33991 }, { "epoch": 0.9333333333333333, "grad_norm": 0.38592728972435, "learning_rate": 1.1068619391445729e-05, "loss": 0.4416, "step": 33992 }, { "epoch": 0.9333607907742998, "grad_norm": 0.39011332392692566, "learning_rate": 1.1068189970119052e-05, "loss": 0.4918, "step": 33993 }, { "epoch": 0.9333882482152663, "grad_norm": 0.39472246170043945, "learning_rate": 1.1067760546799857e-05, "loss": 0.4644, "step": 33994 }, { "epoch": 0.9334157056562329, "grad_norm": 0.400867223739624, "learning_rate": 1.1067331121488951e-05, "loss": 0.4224, "step": 33995 }, { "epoch": 0.9334431630971993, "grad_norm": 1.5188970565795898, "learning_rate": 1.1066901694187131e-05, "loss": 0.4582, "step": 33996 }, { "epoch": 0.9334706205381659, "grad_norm": 0.3935699760913849, "learning_rate": 1.1066472264895195e-05, "loss": 0.496, "step": 33997 }, { "epoch": 0.9334980779791323, "grad_norm": 0.34767046570777893, "learning_rate": 1.1066042833613952e-05, "loss": 0.5268, "step": 33998 }, { "epoch": 0.9335255354200989, "grad_norm": 0.4188593626022339, "learning_rate": 1.1065613400344197e-05, "loss": 0.4764, "step": 33999 }, { "epoch": 0.9335529928610653, "grad_norm": 0.41617467999458313, "learning_rate": 1.1065183965086735e-05, "loss": 0.4645, "step": 34000 }, { "epoch": 0.9335804503020319, "grad_norm": 0.3855729401111603, "learning_rate": 1.1064754527842364e-05, "loss": 0.5222, "step": 34001 }, { "epoch": 0.9336079077429984, "grad_norm": 0.41145092248916626, "learning_rate": 1.1064325088611884e-05, "loss": 0.5324, "step": 34002 }, { "epoch": 0.9336353651839648, "grad_norm": 0.3413155674934387, "learning_rate": 1.1063895647396101e-05, "loss": 0.4994, "step": 34003 }, { "epoch": 0.9336628226249314, "grad_norm": 0.3970710039138794, "learning_rate": 1.106346620419581e-05, "loss": 0.528, "step": 34004 }, { "epoch": 0.9336902800658978, "grad_norm": 0.39236852526664734, "learning_rate": 1.1063036759011818e-05, "loss": 0.4915, "step": 34005 }, { "epoch": 0.9337177375068644, "grad_norm": 0.3829016387462616, "learning_rate": 1.1062607311844924e-05, "loss": 0.4606, "step": 34006 }, { "epoch": 0.9337451949478308, "grad_norm": 0.39378589391708374, "learning_rate": 1.1062177862695924e-05, "loss": 0.4749, "step": 34007 }, { "epoch": 0.9337726523887974, "grad_norm": 0.42544811964035034, "learning_rate": 1.1061748411565627e-05, "loss": 0.4165, "step": 34008 }, { "epoch": 0.9338001098297639, "grad_norm": 0.6946243643760681, "learning_rate": 1.1061318958454827e-05, "loss": 0.4912, "step": 34009 }, { "epoch": 0.9338275672707304, "grad_norm": 0.3871946334838867, "learning_rate": 1.1060889503364331e-05, "loss": 0.582, "step": 34010 }, { "epoch": 0.9338550247116969, "grad_norm": 0.5361049771308899, "learning_rate": 1.1060460046294934e-05, "loss": 0.4617, "step": 34011 }, { "epoch": 0.9338824821526633, "grad_norm": 0.5163166522979736, "learning_rate": 1.1060030587247446e-05, "loss": 0.5956, "step": 34012 }, { "epoch": 0.9339099395936299, "grad_norm": 0.3849034011363983, "learning_rate": 1.1059601126222659e-05, "loss": 0.5147, "step": 34013 }, { "epoch": 0.9339373970345963, "grad_norm": 0.43492400646209717, "learning_rate": 1.1059171663221378e-05, "loss": 0.6157, "step": 34014 }, { "epoch": 0.9339648544755629, "grad_norm": 0.3841484785079956, "learning_rate": 1.1058742198244405e-05, "loss": 0.5126, "step": 34015 }, { "epoch": 0.9339923119165294, "grad_norm": 0.363029420375824, "learning_rate": 1.1058312731292539e-05, "loss": 0.4801, "step": 34016 }, { "epoch": 0.9340197693574959, "grad_norm": 0.36999085545539856, "learning_rate": 1.1057883262366582e-05, "loss": 0.4638, "step": 34017 }, { "epoch": 0.9340472267984624, "grad_norm": 0.39883914589881897, "learning_rate": 1.1057453791467337e-05, "loss": 0.5429, "step": 34018 }, { "epoch": 0.9340746842394289, "grad_norm": 0.4001128673553467, "learning_rate": 1.10570243185956e-05, "loss": 0.4807, "step": 34019 }, { "epoch": 0.9341021416803954, "grad_norm": 0.3562025725841522, "learning_rate": 1.1056594843752177e-05, "loss": 0.4578, "step": 34020 }, { "epoch": 0.9341295991213618, "grad_norm": 0.37068548798561096, "learning_rate": 1.1056165366937868e-05, "loss": 0.5466, "step": 34021 }, { "epoch": 0.9341570565623284, "grad_norm": 0.3746839761734009, "learning_rate": 1.1055735888153472e-05, "loss": 0.4901, "step": 34022 }, { "epoch": 0.9341845140032949, "grad_norm": 0.4139995872974396, "learning_rate": 1.1055306407399794e-05, "loss": 0.5109, "step": 34023 }, { "epoch": 0.9342119714442614, "grad_norm": 0.4165017902851105, "learning_rate": 1.105487692467763e-05, "loss": 0.5985, "step": 34024 }, { "epoch": 0.9342394288852279, "grad_norm": 0.38847458362579346, "learning_rate": 1.1054447439987785e-05, "loss": 0.5263, "step": 34025 }, { "epoch": 0.9342668863261944, "grad_norm": 0.4455024302005768, "learning_rate": 1.105401795333106e-05, "loss": 0.5066, "step": 34026 }, { "epoch": 0.9342943437671609, "grad_norm": 0.38331758975982666, "learning_rate": 1.105358846470825e-05, "loss": 0.4463, "step": 34027 }, { "epoch": 0.9343218012081274, "grad_norm": 0.3850729763507843, "learning_rate": 1.1053158974120168e-05, "loss": 0.4737, "step": 34028 }, { "epoch": 0.9343492586490939, "grad_norm": 0.41613122820854187, "learning_rate": 1.1052729481567605e-05, "loss": 0.4437, "step": 34029 }, { "epoch": 0.9343767160900605, "grad_norm": 0.41709765791893005, "learning_rate": 1.1052299987051368e-05, "loss": 0.4271, "step": 34030 }, { "epoch": 0.9344041735310269, "grad_norm": 0.35880815982818604, "learning_rate": 1.1051870490572253e-05, "loss": 0.4031, "step": 34031 }, { "epoch": 0.9344316309719934, "grad_norm": 0.46162381768226624, "learning_rate": 1.1051440992131063e-05, "loss": 0.5287, "step": 34032 }, { "epoch": 0.9344590884129599, "grad_norm": 0.42325034737586975, "learning_rate": 1.1051011491728603e-05, "loss": 0.4789, "step": 34033 }, { "epoch": 0.9344865458539264, "grad_norm": 0.38202816247940063, "learning_rate": 1.105058198936567e-05, "loss": 0.4761, "step": 34034 }, { "epoch": 0.9345140032948929, "grad_norm": 0.4627980887889862, "learning_rate": 1.1050152485043065e-05, "loss": 0.5546, "step": 34035 }, { "epoch": 0.9345414607358594, "grad_norm": 0.3907657265663147, "learning_rate": 1.1049722978761592e-05, "loss": 0.5408, "step": 34036 }, { "epoch": 0.934568918176826, "grad_norm": 0.387103796005249, "learning_rate": 1.1049293470522049e-05, "loss": 0.531, "step": 34037 }, { "epoch": 0.9345963756177924, "grad_norm": 0.3866257667541504, "learning_rate": 1.104886396032524e-05, "loss": 0.452, "step": 34038 }, { "epoch": 0.934623833058759, "grad_norm": 0.35728907585144043, "learning_rate": 1.1048434448171967e-05, "loss": 0.4624, "step": 34039 }, { "epoch": 0.9346512904997254, "grad_norm": 0.3859559893608093, "learning_rate": 1.1048004934063025e-05, "loss": 0.4516, "step": 34040 }, { "epoch": 0.934678747940692, "grad_norm": 0.4023617208003998, "learning_rate": 1.1047575417999222e-05, "loss": 0.5199, "step": 34041 }, { "epoch": 0.9347062053816584, "grad_norm": 0.38834381103515625, "learning_rate": 1.1047145899981356e-05, "loss": 0.4903, "step": 34042 }, { "epoch": 0.9347336628226249, "grad_norm": 0.35221409797668457, "learning_rate": 1.1046716380010228e-05, "loss": 0.5066, "step": 34043 }, { "epoch": 0.9347611202635915, "grad_norm": 0.37664228677749634, "learning_rate": 1.104628685808664e-05, "loss": 0.4035, "step": 34044 }, { "epoch": 0.9347885777045579, "grad_norm": 0.42759084701538086, "learning_rate": 1.1045857334211394e-05, "loss": 0.5176, "step": 34045 }, { "epoch": 0.9348160351455245, "grad_norm": 0.4385153353214264, "learning_rate": 1.104542780838529e-05, "loss": 0.5129, "step": 34046 }, { "epoch": 0.9348434925864909, "grad_norm": 0.36124637722969055, "learning_rate": 1.104499828060913e-05, "loss": 0.3727, "step": 34047 }, { "epoch": 0.9348709500274575, "grad_norm": 0.44429200887680054, "learning_rate": 1.1044568750883713e-05, "loss": 0.5255, "step": 34048 }, { "epoch": 0.9348984074684239, "grad_norm": 0.43668457865715027, "learning_rate": 1.1044139219209841e-05, "loss": 0.4886, "step": 34049 }, { "epoch": 0.9349258649093904, "grad_norm": 0.40713199973106384, "learning_rate": 1.1043709685588317e-05, "loss": 0.4534, "step": 34050 }, { "epoch": 0.934953322350357, "grad_norm": 0.5649691820144653, "learning_rate": 1.1043280150019943e-05, "loss": 0.4552, "step": 34051 }, { "epoch": 0.9349807797913234, "grad_norm": 0.4544326364994049, "learning_rate": 1.104285061250552e-05, "loss": 0.5296, "step": 34052 }, { "epoch": 0.93500823723229, "grad_norm": 0.3777252733707428, "learning_rate": 1.1042421073045843e-05, "loss": 0.4184, "step": 34053 }, { "epoch": 0.9350356946732564, "grad_norm": 0.37462204694747925, "learning_rate": 1.1041991531641723e-05, "loss": 0.4721, "step": 34054 }, { "epoch": 0.935063152114223, "grad_norm": 0.43971070647239685, "learning_rate": 1.1041561988293951e-05, "loss": 0.4915, "step": 34055 }, { "epoch": 0.9350906095551894, "grad_norm": 0.4311192035675049, "learning_rate": 1.1041132443003337e-05, "loss": 0.4813, "step": 34056 }, { "epoch": 0.935118066996156, "grad_norm": 0.41088420152664185, "learning_rate": 1.1040702895770679e-05, "loss": 0.4913, "step": 34057 }, { "epoch": 0.9351455244371225, "grad_norm": 0.38271889090538025, "learning_rate": 1.1040273346596774e-05, "loss": 0.4999, "step": 34058 }, { "epoch": 0.935172981878089, "grad_norm": 0.47614309191703796, "learning_rate": 1.1039843795482432e-05, "loss": 0.4872, "step": 34059 }, { "epoch": 0.9352004393190555, "grad_norm": 0.3648003041744232, "learning_rate": 1.1039414242428447e-05, "loss": 0.4188, "step": 34060 }, { "epoch": 0.9352278967600219, "grad_norm": 0.360645592212677, "learning_rate": 1.1038984687435623e-05, "loss": 0.4178, "step": 34061 }, { "epoch": 0.9352553542009885, "grad_norm": 0.3796738088130951, "learning_rate": 1.1038555130504761e-05, "loss": 0.4733, "step": 34062 }, { "epoch": 0.9352828116419549, "grad_norm": 0.3753422498703003, "learning_rate": 1.1038125571636661e-05, "loss": 0.4421, "step": 34063 }, { "epoch": 0.9353102690829215, "grad_norm": 0.381559818983078, "learning_rate": 1.1037696010832128e-05, "loss": 0.4723, "step": 34064 }, { "epoch": 0.935337726523888, "grad_norm": 0.40872710943222046, "learning_rate": 1.1037266448091961e-05, "loss": 0.4775, "step": 34065 }, { "epoch": 0.9353651839648545, "grad_norm": 0.41046789288520813, "learning_rate": 1.1036836883416959e-05, "loss": 0.5231, "step": 34066 }, { "epoch": 0.935392641405821, "grad_norm": 0.37612971663475037, "learning_rate": 1.1036407316807926e-05, "loss": 0.3919, "step": 34067 }, { "epoch": 0.9354200988467875, "grad_norm": 0.3987826108932495, "learning_rate": 1.1035977748265661e-05, "loss": 0.4033, "step": 34068 }, { "epoch": 0.935447556287754, "grad_norm": 0.3781321942806244, "learning_rate": 1.103554817779097e-05, "loss": 0.4447, "step": 34069 }, { "epoch": 0.9354750137287204, "grad_norm": 0.41998839378356934, "learning_rate": 1.1035118605384649e-05, "loss": 0.5554, "step": 34070 }, { "epoch": 0.935502471169687, "grad_norm": 0.36286255717277527, "learning_rate": 1.1034689031047501e-05, "loss": 0.3956, "step": 34071 }, { "epoch": 0.9355299286106535, "grad_norm": 0.3930697441101074, "learning_rate": 1.103425945478033e-05, "loss": 0.4851, "step": 34072 }, { "epoch": 0.93555738605162, "grad_norm": 0.37034595012664795, "learning_rate": 1.1033829876583933e-05, "loss": 0.5221, "step": 34073 }, { "epoch": 0.9355848434925865, "grad_norm": 0.3654637932777405, "learning_rate": 1.1033400296459114e-05, "loss": 0.5642, "step": 34074 }, { "epoch": 0.935612300933553, "grad_norm": 0.38869455456733704, "learning_rate": 1.1032970714406675e-05, "loss": 0.4977, "step": 34075 }, { "epoch": 0.9356397583745195, "grad_norm": 0.36387914419174194, "learning_rate": 1.1032541130427413e-05, "loss": 0.4434, "step": 34076 }, { "epoch": 0.935667215815486, "grad_norm": 0.377834290266037, "learning_rate": 1.1032111544522132e-05, "loss": 0.4759, "step": 34077 }, { "epoch": 0.9356946732564525, "grad_norm": 0.34992319345474243, "learning_rate": 1.1031681956691638e-05, "loss": 0.503, "step": 34078 }, { "epoch": 0.935722130697419, "grad_norm": 0.3653177320957184, "learning_rate": 1.1031252366936722e-05, "loss": 0.4447, "step": 34079 }, { "epoch": 0.9357495881383855, "grad_norm": 0.41903015971183777, "learning_rate": 1.1030822775258197e-05, "loss": 0.5556, "step": 34080 }, { "epoch": 0.935777045579352, "grad_norm": 0.3756762742996216, "learning_rate": 1.1030393181656854e-05, "loss": 0.5458, "step": 34081 }, { "epoch": 0.9358045030203185, "grad_norm": 0.4192802906036377, "learning_rate": 1.1029963586133501e-05, "loss": 0.4368, "step": 34082 }, { "epoch": 0.935831960461285, "grad_norm": 0.36061546206474304, "learning_rate": 1.1029533988688939e-05, "loss": 0.4756, "step": 34083 }, { "epoch": 0.9358594179022515, "grad_norm": 0.3724822402000427, "learning_rate": 1.1029104389323963e-05, "loss": 0.5181, "step": 34084 }, { "epoch": 0.935886875343218, "grad_norm": 0.4200810194015503, "learning_rate": 1.1028674788039382e-05, "loss": 0.4702, "step": 34085 }, { "epoch": 0.9359143327841846, "grad_norm": 0.3883519172668457, "learning_rate": 1.1028245184835993e-05, "loss": 0.4691, "step": 34086 }, { "epoch": 0.935941790225151, "grad_norm": 0.3525243401527405, "learning_rate": 1.10278155797146e-05, "loss": 0.4656, "step": 34087 }, { "epoch": 0.9359692476661176, "grad_norm": 0.38445210456848145, "learning_rate": 1.1027385972676002e-05, "loss": 0.4809, "step": 34088 }, { "epoch": 0.935996705107084, "grad_norm": 0.4525030255317688, "learning_rate": 1.1026956363721e-05, "loss": 0.5228, "step": 34089 }, { "epoch": 0.9360241625480505, "grad_norm": 0.38695308566093445, "learning_rate": 1.10265267528504e-05, "loss": 0.5057, "step": 34090 }, { "epoch": 0.936051619989017, "grad_norm": 0.4670450985431671, "learning_rate": 1.1026097140064997e-05, "loss": 0.549, "step": 34091 }, { "epoch": 0.9360790774299835, "grad_norm": 0.35982105135917664, "learning_rate": 1.1025667525365596e-05, "loss": 0.4496, "step": 34092 }, { "epoch": 0.9361065348709501, "grad_norm": 0.4590197205543518, "learning_rate": 1.1025237908752998e-05, "loss": 0.5536, "step": 34093 }, { "epoch": 0.9361339923119165, "grad_norm": 0.4020620584487915, "learning_rate": 1.1024808290228001e-05, "loss": 0.527, "step": 34094 }, { "epoch": 0.9361614497528831, "grad_norm": 0.3984156548976898, "learning_rate": 1.1024378669791414e-05, "loss": 0.5438, "step": 34095 }, { "epoch": 0.9361889071938495, "grad_norm": 0.4083051383495331, "learning_rate": 1.1023949047444033e-05, "loss": 0.558, "step": 34096 }, { "epoch": 0.9362163646348161, "grad_norm": 0.41695886850357056, "learning_rate": 1.102351942318666e-05, "loss": 0.431, "step": 34097 }, { "epoch": 0.9362438220757825, "grad_norm": 0.3727676570415497, "learning_rate": 1.1023089797020095e-05, "loss": 0.4698, "step": 34098 }, { "epoch": 0.936271279516749, "grad_norm": 0.4032125771045685, "learning_rate": 1.102266016894514e-05, "loss": 0.4804, "step": 34099 }, { "epoch": 0.9362987369577156, "grad_norm": 0.38922083377838135, "learning_rate": 1.1022230538962602e-05, "loss": 0.473, "step": 34100 }, { "epoch": 0.936326194398682, "grad_norm": 0.48115548491477966, "learning_rate": 1.1021800907073275e-05, "loss": 0.4194, "step": 34101 }, { "epoch": 0.9363536518396486, "grad_norm": 0.3662664592266083, "learning_rate": 1.1021371273277964e-05, "loss": 0.4936, "step": 34102 }, { "epoch": 0.936381109280615, "grad_norm": 0.4099118411540985, "learning_rate": 1.1020941637577466e-05, "loss": 0.4077, "step": 34103 }, { "epoch": 0.9364085667215816, "grad_norm": 0.41471174359321594, "learning_rate": 1.102051199997259e-05, "loss": 0.5543, "step": 34104 }, { "epoch": 0.936436024162548, "grad_norm": 0.3946733772754669, "learning_rate": 1.1020082360464133e-05, "loss": 0.4788, "step": 34105 }, { "epoch": 0.9364634816035146, "grad_norm": 0.3789057433605194, "learning_rate": 1.1019652719052899e-05, "loss": 0.4923, "step": 34106 }, { "epoch": 0.9364909390444811, "grad_norm": 0.37840285897254944, "learning_rate": 1.1019223075739683e-05, "loss": 0.5341, "step": 34107 }, { "epoch": 0.9365183964854475, "grad_norm": 0.38380861282348633, "learning_rate": 1.1018793430525291e-05, "loss": 0.4722, "step": 34108 }, { "epoch": 0.9365458539264141, "grad_norm": 0.3561320900917053, "learning_rate": 1.1018363783410525e-05, "loss": 0.4453, "step": 34109 }, { "epoch": 0.9365733113673805, "grad_norm": 0.4056204557418823, "learning_rate": 1.1017934134396186e-05, "loss": 0.4526, "step": 34110 }, { "epoch": 0.9366007688083471, "grad_norm": 0.4097292423248291, "learning_rate": 1.1017504483483074e-05, "loss": 0.5305, "step": 34111 }, { "epoch": 0.9366282262493135, "grad_norm": 0.47967031598091125, "learning_rate": 1.1017074830671991e-05, "loss": 0.4992, "step": 34112 }, { "epoch": 0.9366556836902801, "grad_norm": 0.3612501621246338, "learning_rate": 1.1016645175963743e-05, "loss": 0.3997, "step": 34113 }, { "epoch": 0.9366831411312466, "grad_norm": 0.3712849020957947, "learning_rate": 1.1016215519359123e-05, "loss": 0.5492, "step": 34114 }, { "epoch": 0.9367105985722131, "grad_norm": 0.40944692492485046, "learning_rate": 1.1015785860858937e-05, "loss": 0.4759, "step": 34115 }, { "epoch": 0.9367380560131796, "grad_norm": 0.36818262934684753, "learning_rate": 1.1015356200463989e-05, "loss": 0.5602, "step": 34116 }, { "epoch": 0.936765513454146, "grad_norm": 0.4300045073032379, "learning_rate": 1.1014926538175073e-05, "loss": 0.5892, "step": 34117 }, { "epoch": 0.9367929708951126, "grad_norm": 0.41327106952667236, "learning_rate": 1.1014496873993e-05, "loss": 0.4864, "step": 34118 }, { "epoch": 0.936820428336079, "grad_norm": 0.4379746615886688, "learning_rate": 1.1014067207918563e-05, "loss": 0.5722, "step": 34119 }, { "epoch": 0.9368478857770456, "grad_norm": 0.4393318295478821, "learning_rate": 1.1013637539952569e-05, "loss": 0.4582, "step": 34120 }, { "epoch": 0.9368753432180121, "grad_norm": 0.389230877161026, "learning_rate": 1.1013207870095817e-05, "loss": 0.5665, "step": 34121 }, { "epoch": 0.9369028006589786, "grad_norm": 0.3755532503128052, "learning_rate": 1.101277819834911e-05, "loss": 0.5639, "step": 34122 }, { "epoch": 0.9369302580999451, "grad_norm": 0.41278600692749023, "learning_rate": 1.1012348524713246e-05, "loss": 0.4977, "step": 34123 }, { "epoch": 0.9369577155409116, "grad_norm": 0.46407657861709595, "learning_rate": 1.1011918849189029e-05, "loss": 0.4606, "step": 34124 }, { "epoch": 0.9369851729818781, "grad_norm": 0.40687209367752075, "learning_rate": 1.1011489171777261e-05, "loss": 0.5541, "step": 34125 }, { "epoch": 0.9370126304228446, "grad_norm": 0.3833810091018677, "learning_rate": 1.1011059492478743e-05, "loss": 0.4878, "step": 34126 }, { "epoch": 0.9370400878638111, "grad_norm": 0.4015711545944214, "learning_rate": 1.1010629811294278e-05, "loss": 0.5272, "step": 34127 }, { "epoch": 0.9370675453047776, "grad_norm": 0.37204477190971375, "learning_rate": 1.1010200128224664e-05, "loss": 0.5155, "step": 34128 }, { "epoch": 0.9370950027457441, "grad_norm": 0.4277205169200897, "learning_rate": 1.1009770443270707e-05, "loss": 0.5034, "step": 34129 }, { "epoch": 0.9371224601867106, "grad_norm": 0.4123181402683258, "learning_rate": 1.10093407564332e-05, "loss": 0.4962, "step": 34130 }, { "epoch": 0.9371499176276771, "grad_norm": 0.4069671034812927, "learning_rate": 1.1008911067712955e-05, "loss": 0.5003, "step": 34131 }, { "epoch": 0.9371773750686436, "grad_norm": 0.4363509714603424, "learning_rate": 1.1008481377110768e-05, "loss": 0.5021, "step": 34132 }, { "epoch": 0.9372048325096101, "grad_norm": 0.39486226439476013, "learning_rate": 1.100805168462744e-05, "loss": 0.5632, "step": 34133 }, { "epoch": 0.9372322899505766, "grad_norm": 0.456407368183136, "learning_rate": 1.1007621990263778e-05, "loss": 0.4583, "step": 34134 }, { "epoch": 0.9372597473915432, "grad_norm": 0.40210938453674316, "learning_rate": 1.1007192294020574e-05, "loss": 0.5345, "step": 34135 }, { "epoch": 0.9372872048325096, "grad_norm": 0.40387752652168274, "learning_rate": 1.100676259589864e-05, "loss": 0.5218, "step": 34136 }, { "epoch": 0.9373146622734762, "grad_norm": 0.40913301706314087, "learning_rate": 1.100633289589877e-05, "loss": 0.5435, "step": 34137 }, { "epoch": 0.9373421197144426, "grad_norm": 0.396953284740448, "learning_rate": 1.1005903194021766e-05, "loss": 0.4255, "step": 34138 }, { "epoch": 0.9373695771554091, "grad_norm": 0.43009939789772034, "learning_rate": 1.1005473490268434e-05, "loss": 0.4633, "step": 34139 }, { "epoch": 0.9373970345963756, "grad_norm": 0.3662225604057312, "learning_rate": 1.1005043784639573e-05, "loss": 0.5021, "step": 34140 }, { "epoch": 0.9374244920373421, "grad_norm": 0.435918390750885, "learning_rate": 1.1004614077135982e-05, "loss": 0.508, "step": 34141 }, { "epoch": 0.9374519494783087, "grad_norm": 0.4059765040874481, "learning_rate": 1.1004184367758468e-05, "loss": 0.5146, "step": 34142 }, { "epoch": 0.9374794069192751, "grad_norm": 0.4313197731971741, "learning_rate": 1.1003754656507828e-05, "loss": 0.5451, "step": 34143 }, { "epoch": 0.9375068643602417, "grad_norm": 0.3622271418571472, "learning_rate": 1.1003324943384865e-05, "loss": 0.3988, "step": 34144 }, { "epoch": 0.9375343218012081, "grad_norm": 0.358955055475235, "learning_rate": 1.1002895228390385e-05, "loss": 0.464, "step": 34145 }, { "epoch": 0.9375617792421747, "grad_norm": 0.3763884902000427, "learning_rate": 1.100246551152518e-05, "loss": 0.468, "step": 34146 }, { "epoch": 0.9375892366831411, "grad_norm": 0.40837815403938293, "learning_rate": 1.100203579279006e-05, "loss": 0.4669, "step": 34147 }, { "epoch": 0.9376166941241076, "grad_norm": 0.3665204346179962, "learning_rate": 1.100160607218582e-05, "loss": 0.4162, "step": 34148 }, { "epoch": 0.9376441515650741, "grad_norm": 0.3869040906429291, "learning_rate": 1.1001176349713268e-05, "loss": 0.4353, "step": 34149 }, { "epoch": 0.9376716090060406, "grad_norm": 0.39645808935165405, "learning_rate": 1.1000746625373202e-05, "loss": 0.5253, "step": 34150 }, { "epoch": 0.9376990664470072, "grad_norm": 0.44957008957862854, "learning_rate": 1.1000316899166423e-05, "loss": 0.5459, "step": 34151 }, { "epoch": 0.9377265238879736, "grad_norm": 0.39621973037719727, "learning_rate": 1.0999887171093736e-05, "loss": 0.4841, "step": 34152 }, { "epoch": 0.9377539813289402, "grad_norm": 0.4622866213321686, "learning_rate": 1.0999457441155941e-05, "loss": 0.5601, "step": 34153 }, { "epoch": 0.9377814387699066, "grad_norm": 0.42490309476852417, "learning_rate": 1.0999027709353833e-05, "loss": 0.4931, "step": 34154 }, { "epoch": 0.9378088962108732, "grad_norm": 0.640728235244751, "learning_rate": 1.0998597975688226e-05, "loss": 0.5319, "step": 34155 }, { "epoch": 0.9378363536518396, "grad_norm": 0.5038220882415771, "learning_rate": 1.099816824015991e-05, "loss": 0.5998, "step": 34156 }, { "epoch": 0.9378638110928061, "grad_norm": 0.42235735058784485, "learning_rate": 1.0997738502769694e-05, "loss": 0.421, "step": 34157 }, { "epoch": 0.9378912685337727, "grad_norm": 0.3779776692390442, "learning_rate": 1.0997308763518378e-05, "loss": 0.4672, "step": 34158 }, { "epoch": 0.9379187259747391, "grad_norm": 0.3780769109725952, "learning_rate": 1.0996879022406763e-05, "loss": 0.4722, "step": 34159 }, { "epoch": 0.9379461834157057, "grad_norm": 0.6078518033027649, "learning_rate": 1.099644927943565e-05, "loss": 0.4404, "step": 34160 }, { "epoch": 0.9379736408566721, "grad_norm": 0.4144550561904907, "learning_rate": 1.0996019534605839e-05, "loss": 0.5264, "step": 34161 }, { "epoch": 0.9380010982976387, "grad_norm": 0.4366423785686493, "learning_rate": 1.0995589787918137e-05, "loss": 0.4385, "step": 34162 }, { "epoch": 0.9380285557386051, "grad_norm": 0.43204960227012634, "learning_rate": 1.0995160039373339e-05, "loss": 0.4902, "step": 34163 }, { "epoch": 0.9380560131795717, "grad_norm": 0.3818499743938446, "learning_rate": 1.0994730288972253e-05, "loss": 0.4383, "step": 34164 }, { "epoch": 0.9380834706205382, "grad_norm": 0.3784787058830261, "learning_rate": 1.0994300536715677e-05, "loss": 0.5286, "step": 34165 }, { "epoch": 0.9381109280615046, "grad_norm": 0.38721731305122375, "learning_rate": 1.099387078260441e-05, "loss": 0.489, "step": 34166 }, { "epoch": 0.9381383855024712, "grad_norm": 0.3442370295524597, "learning_rate": 1.099344102663926e-05, "loss": 0.4599, "step": 34167 }, { "epoch": 0.9381658429434376, "grad_norm": 0.3921426236629486, "learning_rate": 1.0993011268821023e-05, "loss": 0.5967, "step": 34168 }, { "epoch": 0.9381933003844042, "grad_norm": 0.39721623063087463, "learning_rate": 1.0992581509150507e-05, "loss": 0.5402, "step": 34169 }, { "epoch": 0.9382207578253706, "grad_norm": 0.4162483513355255, "learning_rate": 1.0992151747628505e-05, "loss": 0.5028, "step": 34170 }, { "epoch": 0.9382482152663372, "grad_norm": 0.3767414689064026, "learning_rate": 1.0991721984255825e-05, "loss": 0.5194, "step": 34171 }, { "epoch": 0.9382756727073037, "grad_norm": 0.39086925983428955, "learning_rate": 1.099129221903327e-05, "loss": 0.5738, "step": 34172 }, { "epoch": 0.9383031301482702, "grad_norm": 0.39010483026504517, "learning_rate": 1.0990862451961632e-05, "loss": 0.3546, "step": 34173 }, { "epoch": 0.9383305875892367, "grad_norm": 0.3959309756755829, "learning_rate": 1.0990432683041726e-05, "loss": 0.4614, "step": 34174 }, { "epoch": 0.9383580450302031, "grad_norm": 0.38558048009872437, "learning_rate": 1.0990002912274342e-05, "loss": 0.478, "step": 34175 }, { "epoch": 0.9383855024711697, "grad_norm": 0.41956856846809387, "learning_rate": 1.098957313966029e-05, "loss": 0.508, "step": 34176 }, { "epoch": 0.9384129599121361, "grad_norm": 0.3705075979232788, "learning_rate": 1.0989143365200368e-05, "loss": 0.4939, "step": 34177 }, { "epoch": 0.9384404173531027, "grad_norm": 0.38121771812438965, "learning_rate": 1.0988713588895377e-05, "loss": 0.5324, "step": 34178 }, { "epoch": 0.9384678747940692, "grad_norm": 0.40335068106651306, "learning_rate": 1.0988283810746118e-05, "loss": 0.4643, "step": 34179 }, { "epoch": 0.9384953322350357, "grad_norm": 0.9911664128303528, "learning_rate": 1.0987854030753396e-05, "loss": 0.4532, "step": 34180 }, { "epoch": 0.9385227896760022, "grad_norm": 0.42643287777900696, "learning_rate": 1.0987424248918013e-05, "loss": 0.5873, "step": 34181 }, { "epoch": 0.9385502471169687, "grad_norm": 0.3522125482559204, "learning_rate": 1.0986994465240766e-05, "loss": 0.4877, "step": 34182 }, { "epoch": 0.9385777045579352, "grad_norm": 0.38926488161087036, "learning_rate": 1.098656467972246e-05, "loss": 0.4501, "step": 34183 }, { "epoch": 0.9386051619989016, "grad_norm": 0.3781294524669647, "learning_rate": 1.0986134892363895e-05, "loss": 0.5551, "step": 34184 }, { "epoch": 0.9386326194398682, "grad_norm": 0.4107251763343811, "learning_rate": 1.0985705103165873e-05, "loss": 0.5202, "step": 34185 }, { "epoch": 0.9386600768808347, "grad_norm": 0.3944930136203766, "learning_rate": 1.09852753121292e-05, "loss": 0.4275, "step": 34186 }, { "epoch": 0.9386875343218012, "grad_norm": 0.37033870816230774, "learning_rate": 1.0984845519254671e-05, "loss": 0.5067, "step": 34187 }, { "epoch": 0.9387149917627677, "grad_norm": 0.4689740538597107, "learning_rate": 1.0984415724543091e-05, "loss": 0.5684, "step": 34188 }, { "epoch": 0.9387424492037342, "grad_norm": 0.4010498523712158, "learning_rate": 1.0983985927995264e-05, "loss": 0.5387, "step": 34189 }, { "epoch": 0.9387699066447007, "grad_norm": 0.41086992621421814, "learning_rate": 1.0983556129611984e-05, "loss": 0.4914, "step": 34190 }, { "epoch": 0.9387973640856672, "grad_norm": 0.38165196776390076, "learning_rate": 1.0983126329394065e-05, "loss": 0.4495, "step": 34191 }, { "epoch": 0.9388248215266337, "grad_norm": 0.432559072971344, "learning_rate": 1.0982696527342296e-05, "loss": 0.4522, "step": 34192 }, { "epoch": 0.9388522789676003, "grad_norm": 0.3728601336479187, "learning_rate": 1.0982266723457486e-05, "loss": 0.3997, "step": 34193 }, { "epoch": 0.9388797364085667, "grad_norm": 0.38037121295928955, "learning_rate": 1.0981836917740438e-05, "loss": 0.4486, "step": 34194 }, { "epoch": 0.9389071938495333, "grad_norm": 0.379643976688385, "learning_rate": 1.0981407110191946e-05, "loss": 0.4863, "step": 34195 }, { "epoch": 0.9389346512904997, "grad_norm": 0.3734078109264374, "learning_rate": 1.098097730081282e-05, "loss": 0.4143, "step": 34196 }, { "epoch": 0.9389621087314662, "grad_norm": 0.3700469136238098, "learning_rate": 1.0980547489603854e-05, "loss": 0.46, "step": 34197 }, { "epoch": 0.9389895661724327, "grad_norm": 0.40587592124938965, "learning_rate": 1.0980117676565857e-05, "loss": 0.56, "step": 34198 }, { "epoch": 0.9390170236133992, "grad_norm": 0.45718255639076233, "learning_rate": 1.097968786169963e-05, "loss": 0.3858, "step": 34199 }, { "epoch": 0.9390444810543658, "grad_norm": 0.4867539405822754, "learning_rate": 1.097925804500597e-05, "loss": 0.5575, "step": 34200 }, { "epoch": 0.9390719384953322, "grad_norm": 0.46798643469810486, "learning_rate": 1.097882822648568e-05, "loss": 0.4891, "step": 34201 }, { "epoch": 0.9390993959362988, "grad_norm": 0.4035952389240265, "learning_rate": 1.0978398406139565e-05, "loss": 0.541, "step": 34202 }, { "epoch": 0.9391268533772652, "grad_norm": 0.383949339389801, "learning_rate": 1.0977968583968424e-05, "loss": 0.518, "step": 34203 }, { "epoch": 0.9391543108182318, "grad_norm": 0.39391234517097473, "learning_rate": 1.0977538759973061e-05, "loss": 0.4706, "step": 34204 }, { "epoch": 0.9391817682591982, "grad_norm": 0.42308664321899414, "learning_rate": 1.0977108934154273e-05, "loss": 0.4839, "step": 34205 }, { "epoch": 0.9392092257001647, "grad_norm": 0.4556795060634613, "learning_rate": 1.0976679106512867e-05, "loss": 0.5768, "step": 34206 }, { "epoch": 0.9392366831411313, "grad_norm": 0.5138306021690369, "learning_rate": 1.0976249277049643e-05, "loss": 0.5105, "step": 34207 }, { "epoch": 0.9392641405820977, "grad_norm": 0.47561928629875183, "learning_rate": 1.0975819445765401e-05, "loss": 0.4394, "step": 34208 }, { "epoch": 0.9392915980230643, "grad_norm": 0.3677525818347931, "learning_rate": 1.0975389612660945e-05, "loss": 0.4439, "step": 34209 }, { "epoch": 0.9393190554640307, "grad_norm": 0.3644654154777527, "learning_rate": 1.0974959777737075e-05, "loss": 0.472, "step": 34210 }, { "epoch": 0.9393465129049973, "grad_norm": 0.42126184701919556, "learning_rate": 1.0974529940994596e-05, "loss": 0.4983, "step": 34211 }, { "epoch": 0.9393739703459637, "grad_norm": 0.4833011031150818, "learning_rate": 1.0974100102434309e-05, "loss": 0.4433, "step": 34212 }, { "epoch": 0.9394014277869303, "grad_norm": 0.42344996333122253, "learning_rate": 1.097367026205701e-05, "loss": 0.558, "step": 34213 }, { "epoch": 0.9394288852278968, "grad_norm": 0.3769623041152954, "learning_rate": 1.0973240419863506e-05, "loss": 0.4736, "step": 34214 }, { "epoch": 0.9394563426688632, "grad_norm": 0.42467862367630005, "learning_rate": 1.09728105758546e-05, "loss": 0.5147, "step": 34215 }, { "epoch": 0.9394838001098298, "grad_norm": 0.36711591482162476, "learning_rate": 1.097238073003109e-05, "loss": 0.5192, "step": 34216 }, { "epoch": 0.9395112575507962, "grad_norm": 0.38382554054260254, "learning_rate": 1.0971950882393782e-05, "loss": 0.4458, "step": 34217 }, { "epoch": 0.9395387149917628, "grad_norm": 0.3765406608581543, "learning_rate": 1.0971521032943474e-05, "loss": 0.4324, "step": 34218 }, { "epoch": 0.9395661724327292, "grad_norm": 0.39666980504989624, "learning_rate": 1.097109118168097e-05, "loss": 0.448, "step": 34219 }, { "epoch": 0.9395936298736958, "grad_norm": 0.3724711239337921, "learning_rate": 1.097066132860707e-05, "loss": 0.4523, "step": 34220 }, { "epoch": 0.9396210873146623, "grad_norm": 0.4666835069656372, "learning_rate": 1.0970231473722576e-05, "loss": 0.488, "step": 34221 }, { "epoch": 0.9396485447556288, "grad_norm": 0.5439835786819458, "learning_rate": 1.0969801617028293e-05, "loss": 0.5327, "step": 34222 }, { "epoch": 0.9396760021965953, "grad_norm": 0.45710933208465576, "learning_rate": 1.0969371758525019e-05, "loss": 0.5808, "step": 34223 }, { "epoch": 0.9397034596375617, "grad_norm": 0.747260332107544, "learning_rate": 1.0968941898213556e-05, "loss": 0.5605, "step": 34224 }, { "epoch": 0.9397309170785283, "grad_norm": 0.39793887734413147, "learning_rate": 1.0968512036094711e-05, "loss": 0.4759, "step": 34225 }, { "epoch": 0.9397583745194947, "grad_norm": 0.4846106767654419, "learning_rate": 1.0968082172169279e-05, "loss": 0.504, "step": 34226 }, { "epoch": 0.9397858319604613, "grad_norm": 0.49286824464797974, "learning_rate": 1.0967652306438066e-05, "loss": 0.5315, "step": 34227 }, { "epoch": 0.9398132894014278, "grad_norm": 0.43346303701400757, "learning_rate": 1.096722243890187e-05, "loss": 0.5133, "step": 34228 }, { "epoch": 0.9398407468423943, "grad_norm": 0.4159921705722809, "learning_rate": 1.0966792569561498e-05, "loss": 0.5071, "step": 34229 }, { "epoch": 0.9398682042833608, "grad_norm": 0.3812239170074463, "learning_rate": 1.0966362698417749e-05, "loss": 0.5256, "step": 34230 }, { "epoch": 0.9398956617243273, "grad_norm": 0.34175023436546326, "learning_rate": 1.0965932825471424e-05, "loss": 0.4965, "step": 34231 }, { "epoch": 0.9399231191652938, "grad_norm": 0.4412270784378052, "learning_rate": 1.0965502950723328e-05, "loss": 0.505, "step": 34232 }, { "epoch": 0.9399505766062602, "grad_norm": 0.3378070890903473, "learning_rate": 1.0965073074174259e-05, "loss": 0.3658, "step": 34233 }, { "epoch": 0.9399780340472268, "grad_norm": 0.3936670124530792, "learning_rate": 1.096464319582502e-05, "loss": 0.535, "step": 34234 }, { "epoch": 0.9400054914881933, "grad_norm": 0.38661515712738037, "learning_rate": 1.0964213315676418e-05, "loss": 0.4207, "step": 34235 }, { "epoch": 0.9400329489291598, "grad_norm": 0.42031341791152954, "learning_rate": 1.0963783433729245e-05, "loss": 0.4327, "step": 34236 }, { "epoch": 0.9400604063701263, "grad_norm": 0.3859327733516693, "learning_rate": 1.096335354998431e-05, "loss": 0.4522, "step": 34237 }, { "epoch": 0.9400878638110928, "grad_norm": 0.41897261142730713, "learning_rate": 1.0962923664442415e-05, "loss": 0.4092, "step": 34238 }, { "epoch": 0.9401153212520593, "grad_norm": 0.370349645614624, "learning_rate": 1.0962493777104356e-05, "loss": 0.4446, "step": 34239 }, { "epoch": 0.9401427786930258, "grad_norm": 0.4193271994590759, "learning_rate": 1.0962063887970944e-05, "loss": 0.4652, "step": 34240 }, { "epoch": 0.9401702361339923, "grad_norm": 0.43900418281555176, "learning_rate": 1.0961633997042972e-05, "loss": 0.562, "step": 34241 }, { "epoch": 0.9401976935749589, "grad_norm": 0.3703499138355255, "learning_rate": 1.0961204104321247e-05, "loss": 0.447, "step": 34242 }, { "epoch": 0.9402251510159253, "grad_norm": 0.4241028130054474, "learning_rate": 1.0960774209806572e-05, "loss": 0.5169, "step": 34243 }, { "epoch": 0.9402526084568918, "grad_norm": 0.3693733215332031, "learning_rate": 1.0960344313499743e-05, "loss": 0.4872, "step": 34244 }, { "epoch": 0.9402800658978583, "grad_norm": 0.5232754945755005, "learning_rate": 1.0959914415401567e-05, "loss": 0.4656, "step": 34245 }, { "epoch": 0.9403075233388248, "grad_norm": 0.3382304012775421, "learning_rate": 1.0959484515512844e-05, "loss": 0.5292, "step": 34246 }, { "epoch": 0.9403349807797913, "grad_norm": 0.4224871098995209, "learning_rate": 1.0959054613834374e-05, "loss": 0.5505, "step": 34247 }, { "epoch": 0.9403624382207578, "grad_norm": 0.40477797389030457, "learning_rate": 1.0958624710366966e-05, "loss": 0.5574, "step": 34248 }, { "epoch": 0.9403898956617244, "grad_norm": 0.4357987642288208, "learning_rate": 1.0958194805111413e-05, "loss": 0.5254, "step": 34249 }, { "epoch": 0.9404173531026908, "grad_norm": 0.3562454581260681, "learning_rate": 1.095776489806852e-05, "loss": 0.4557, "step": 34250 }, { "epoch": 0.9404448105436574, "grad_norm": 0.371247261762619, "learning_rate": 1.0957334989239095e-05, "loss": 0.4838, "step": 34251 }, { "epoch": 0.9404722679846238, "grad_norm": 0.42819562554359436, "learning_rate": 1.095690507862393e-05, "loss": 0.4581, "step": 34252 }, { "epoch": 0.9404997254255903, "grad_norm": 0.3985109329223633, "learning_rate": 1.0956475166223832e-05, "loss": 0.5126, "step": 34253 }, { "epoch": 0.9405271828665568, "grad_norm": 0.3684120178222656, "learning_rate": 1.0956045252039601e-05, "loss": 0.4032, "step": 34254 }, { "epoch": 0.9405546403075233, "grad_norm": 0.47738027572631836, "learning_rate": 1.0955615336072045e-05, "loss": 0.4891, "step": 34255 }, { "epoch": 0.9405820977484899, "grad_norm": 0.4095081090927124, "learning_rate": 1.0955185418321961e-05, "loss": 0.495, "step": 34256 }, { "epoch": 0.9406095551894563, "grad_norm": 0.3756501078605652, "learning_rate": 1.0954755498790148e-05, "loss": 0.5013, "step": 34257 }, { "epoch": 0.9406370126304229, "grad_norm": 0.36128130555152893, "learning_rate": 1.0954325577477413e-05, "loss": 0.4673, "step": 34258 }, { "epoch": 0.9406644700713893, "grad_norm": 0.3968930244445801, "learning_rate": 1.0953895654384556e-05, "loss": 0.4882, "step": 34259 }, { "epoch": 0.9406919275123559, "grad_norm": 0.3544952869415283, "learning_rate": 1.095346572951238e-05, "loss": 0.4747, "step": 34260 }, { "epoch": 0.9407193849533223, "grad_norm": 0.42824578285217285, "learning_rate": 1.0953035802861688e-05, "loss": 0.5112, "step": 34261 }, { "epoch": 0.9407468423942889, "grad_norm": 0.39310553669929504, "learning_rate": 1.0952605874433275e-05, "loss": 0.5193, "step": 34262 }, { "epoch": 0.9407742998352554, "grad_norm": 0.6414165496826172, "learning_rate": 1.095217594422795e-05, "loss": 0.4967, "step": 34263 }, { "epoch": 0.9408017572762218, "grad_norm": 0.39318719506263733, "learning_rate": 1.0951746012246516e-05, "loss": 0.5287, "step": 34264 }, { "epoch": 0.9408292147171884, "grad_norm": 0.3813847303390503, "learning_rate": 1.0951316078489766e-05, "loss": 0.5626, "step": 34265 }, { "epoch": 0.9408566721581548, "grad_norm": 0.4017113745212555, "learning_rate": 1.0950886142958513e-05, "loss": 0.5897, "step": 34266 }, { "epoch": 0.9408841295991214, "grad_norm": 0.4142880141735077, "learning_rate": 1.0950456205653551e-05, "loss": 0.5404, "step": 34267 }, { "epoch": 0.9409115870400878, "grad_norm": 0.4135507345199585, "learning_rate": 1.0950026266575686e-05, "loss": 0.4316, "step": 34268 }, { "epoch": 0.9409390444810544, "grad_norm": 0.38454189896583557, "learning_rate": 1.094959632572572e-05, "loss": 0.4793, "step": 34269 }, { "epoch": 0.9409665019220209, "grad_norm": 0.46356332302093506, "learning_rate": 1.0949166383104451e-05, "loss": 0.4791, "step": 34270 }, { "epoch": 0.9409939593629874, "grad_norm": 0.38309699296951294, "learning_rate": 1.0948736438712686e-05, "loss": 0.5198, "step": 34271 }, { "epoch": 0.9410214168039539, "grad_norm": 0.38631248474121094, "learning_rate": 1.0948306492551224e-05, "loss": 0.4869, "step": 34272 }, { "epoch": 0.9410488742449203, "grad_norm": 0.49224069714546204, "learning_rate": 1.0947876544620869e-05, "loss": 0.4848, "step": 34273 }, { "epoch": 0.9410763316858869, "grad_norm": 0.3964882493019104, "learning_rate": 1.094744659492242e-05, "loss": 0.4255, "step": 34274 }, { "epoch": 0.9411037891268533, "grad_norm": 0.4052979350090027, "learning_rate": 1.094701664345668e-05, "loss": 0.461, "step": 34275 }, { "epoch": 0.9411312465678199, "grad_norm": 0.33843037486076355, "learning_rate": 1.0946586690224455e-05, "loss": 0.4049, "step": 34276 }, { "epoch": 0.9411587040087864, "grad_norm": 0.37387216091156006, "learning_rate": 1.0946156735226538e-05, "loss": 0.5377, "step": 34277 }, { "epoch": 0.9411861614497529, "grad_norm": 0.3806493580341339, "learning_rate": 1.0945726778463742e-05, "loss": 0.4464, "step": 34278 }, { "epoch": 0.9412136188907194, "grad_norm": 0.4058409333229065, "learning_rate": 1.0945296819936862e-05, "loss": 0.5457, "step": 34279 }, { "epoch": 0.9412410763316859, "grad_norm": 0.4173944592475891, "learning_rate": 1.09448668596467e-05, "loss": 0.4558, "step": 34280 }, { "epoch": 0.9412685337726524, "grad_norm": 0.39042699337005615, "learning_rate": 1.0944436897594063e-05, "loss": 0.5066, "step": 34281 }, { "epoch": 0.9412959912136188, "grad_norm": 0.40072834491729736, "learning_rate": 1.094400693377975e-05, "loss": 0.4732, "step": 34282 }, { "epoch": 0.9413234486545854, "grad_norm": 0.3889920711517334, "learning_rate": 1.0943576968204558e-05, "loss": 0.4397, "step": 34283 }, { "epoch": 0.9413509060955519, "grad_norm": 0.3991536498069763, "learning_rate": 1.0943147000869299e-05, "loss": 0.4608, "step": 34284 }, { "epoch": 0.9413783635365184, "grad_norm": 0.4085448682308197, "learning_rate": 1.0942717031774765e-05, "loss": 0.4417, "step": 34285 }, { "epoch": 0.9414058209774849, "grad_norm": 0.3954102694988251, "learning_rate": 1.0942287060921769e-05, "loss": 0.4841, "step": 34286 }, { "epoch": 0.9414332784184514, "grad_norm": 0.38158246874809265, "learning_rate": 1.0941857088311103e-05, "loss": 0.4609, "step": 34287 }, { "epoch": 0.9414607358594179, "grad_norm": 0.3868821859359741, "learning_rate": 1.0941427113943572e-05, "loss": 0.5054, "step": 34288 }, { "epoch": 0.9414881933003844, "grad_norm": 0.5771820545196533, "learning_rate": 1.094099713781998e-05, "loss": 0.4897, "step": 34289 }, { "epoch": 0.9415156507413509, "grad_norm": 0.3376457095146179, "learning_rate": 1.0940567159941126e-05, "loss": 0.418, "step": 34290 }, { "epoch": 0.9415431081823175, "grad_norm": 0.5123295783996582, "learning_rate": 1.0940137180307819e-05, "loss": 0.5313, "step": 34291 }, { "epoch": 0.9415705656232839, "grad_norm": 0.3634682893753052, "learning_rate": 1.0939707198920855e-05, "loss": 0.4437, "step": 34292 }, { "epoch": 0.9415980230642504, "grad_norm": 0.4365377426147461, "learning_rate": 1.0939277215781033e-05, "loss": 0.5429, "step": 34293 }, { "epoch": 0.9416254805052169, "grad_norm": 0.4119139015674591, "learning_rate": 1.0938847230889162e-05, "loss": 0.5333, "step": 34294 }, { "epoch": 0.9416529379461834, "grad_norm": 0.41945919394493103, "learning_rate": 1.0938417244246042e-05, "loss": 0.5041, "step": 34295 }, { "epoch": 0.9416803953871499, "grad_norm": 0.40743187069892883, "learning_rate": 1.0937987255852474e-05, "loss": 0.465, "step": 34296 }, { "epoch": 0.9417078528281164, "grad_norm": 0.37929290533065796, "learning_rate": 1.093755726570926e-05, "loss": 0.4893, "step": 34297 }, { "epoch": 0.941735310269083, "grad_norm": 0.43689969182014465, "learning_rate": 1.0937127273817201e-05, "loss": 0.5104, "step": 34298 }, { "epoch": 0.9417627677100494, "grad_norm": 0.3844442069530487, "learning_rate": 1.0936697280177103e-05, "loss": 0.4626, "step": 34299 }, { "epoch": 0.941790225151016, "grad_norm": 0.4615590274333954, "learning_rate": 1.0936267284789763e-05, "loss": 0.5386, "step": 34300 }, { "epoch": 0.9418176825919824, "grad_norm": 0.46896442770957947, "learning_rate": 1.0935837287655986e-05, "loss": 0.5425, "step": 34301 }, { "epoch": 0.941845140032949, "grad_norm": 0.4365326166152954, "learning_rate": 1.0935407288776576e-05, "loss": 0.4838, "step": 34302 }, { "epoch": 0.9418725974739154, "grad_norm": 0.359025776386261, "learning_rate": 1.0934977288152331e-05, "loss": 0.4457, "step": 34303 }, { "epoch": 0.9419000549148819, "grad_norm": 0.37281250953674316, "learning_rate": 1.0934547285784056e-05, "loss": 0.463, "step": 34304 }, { "epoch": 0.9419275123558485, "grad_norm": 0.43920400738716125, "learning_rate": 1.0934117281672554e-05, "loss": 0.5551, "step": 34305 }, { "epoch": 0.9419549697968149, "grad_norm": 0.36720436811447144, "learning_rate": 1.0933687275818621e-05, "loss": 0.4559, "step": 34306 }, { "epoch": 0.9419824272377815, "grad_norm": 0.4490230977535248, "learning_rate": 1.0933257268223066e-05, "loss": 0.5115, "step": 34307 }, { "epoch": 0.9420098846787479, "grad_norm": 0.37182432413101196, "learning_rate": 1.0932827258886684e-05, "loss": 0.4932, "step": 34308 }, { "epoch": 0.9420373421197145, "grad_norm": 0.3384484648704529, "learning_rate": 1.0932397247810287e-05, "loss": 0.4854, "step": 34309 }, { "epoch": 0.9420647995606809, "grad_norm": 0.45778846740722656, "learning_rate": 1.0931967234994669e-05, "loss": 0.4618, "step": 34310 }, { "epoch": 0.9420922570016474, "grad_norm": 0.396276593208313, "learning_rate": 1.0931537220440636e-05, "loss": 0.5317, "step": 34311 }, { "epoch": 0.942119714442614, "grad_norm": 0.35565200448036194, "learning_rate": 1.0931107204148988e-05, "loss": 0.4509, "step": 34312 }, { "epoch": 0.9421471718835804, "grad_norm": 0.3821839690208435, "learning_rate": 1.0930677186120529e-05, "loss": 0.4731, "step": 34313 }, { "epoch": 0.942174629324547, "grad_norm": 0.4325721859931946, "learning_rate": 1.0930247166356056e-05, "loss": 0.5405, "step": 34314 }, { "epoch": 0.9422020867655134, "grad_norm": 0.36549898982048035, "learning_rate": 1.0929817144856378e-05, "loss": 0.4802, "step": 34315 }, { "epoch": 0.94222954420648, "grad_norm": 0.41403937339782715, "learning_rate": 1.0929387121622295e-05, "loss": 0.5059, "step": 34316 }, { "epoch": 0.9422570016474464, "grad_norm": 0.3875843584537506, "learning_rate": 1.0928957096654607e-05, "loss": 0.5124, "step": 34317 }, { "epoch": 0.942284459088413, "grad_norm": 0.49516770243644714, "learning_rate": 1.092852706995412e-05, "loss": 0.4322, "step": 34318 }, { "epoch": 0.9423119165293795, "grad_norm": 0.4186944365501404, "learning_rate": 1.0928097041521631e-05, "loss": 0.473, "step": 34319 }, { "epoch": 0.942339373970346, "grad_norm": 0.42165496945381165, "learning_rate": 1.0927667011357945e-05, "loss": 0.516, "step": 34320 }, { "epoch": 0.9423668314113125, "grad_norm": 0.37026122212409973, "learning_rate": 1.0927236979463863e-05, "loss": 0.4752, "step": 34321 }, { "epoch": 0.9423942888522789, "grad_norm": 0.4103066027164459, "learning_rate": 1.0926806945840191e-05, "loss": 0.4909, "step": 34322 }, { "epoch": 0.9424217462932455, "grad_norm": 0.35498419404029846, "learning_rate": 1.0926376910487728e-05, "loss": 0.4712, "step": 34323 }, { "epoch": 0.9424492037342119, "grad_norm": 0.3811432719230652, "learning_rate": 1.0925946873407274e-05, "loss": 0.5211, "step": 34324 }, { "epoch": 0.9424766611751785, "grad_norm": 0.4976426661014557, "learning_rate": 1.0925516834599635e-05, "loss": 0.4688, "step": 34325 }, { "epoch": 0.942504118616145, "grad_norm": 0.3707168698310852, "learning_rate": 1.0925086794065612e-05, "loss": 0.45, "step": 34326 }, { "epoch": 0.9425315760571115, "grad_norm": 0.41980940103530884, "learning_rate": 1.0924656751806004e-05, "loss": 0.492, "step": 34327 }, { "epoch": 0.942559033498078, "grad_norm": 0.39333900809288025, "learning_rate": 1.092422670782162e-05, "loss": 0.4943, "step": 34328 }, { "epoch": 0.9425864909390445, "grad_norm": 0.3975180387496948, "learning_rate": 1.0923796662113255e-05, "loss": 0.5146, "step": 34329 }, { "epoch": 0.942613948380011, "grad_norm": 0.39977720379829407, "learning_rate": 1.0923366614681716e-05, "loss": 0.4842, "step": 34330 }, { "epoch": 0.9426414058209774, "grad_norm": 0.3974045515060425, "learning_rate": 1.0922936565527807e-05, "loss": 0.4486, "step": 34331 }, { "epoch": 0.942668863261944, "grad_norm": 0.4272530972957611, "learning_rate": 1.092250651465232e-05, "loss": 0.5364, "step": 34332 }, { "epoch": 0.9426963207029105, "grad_norm": 0.42066338658332825, "learning_rate": 1.0922076462056069e-05, "loss": 0.5035, "step": 34333 }, { "epoch": 0.942723778143877, "grad_norm": 0.35441818833351135, "learning_rate": 1.0921646407739848e-05, "loss": 0.4076, "step": 34334 }, { "epoch": 0.9427512355848435, "grad_norm": 0.40083596110343933, "learning_rate": 1.0921216351704465e-05, "loss": 0.4796, "step": 34335 }, { "epoch": 0.94277869302581, "grad_norm": 0.3732988238334656, "learning_rate": 1.0920786293950718e-05, "loss": 0.468, "step": 34336 }, { "epoch": 0.9428061504667765, "grad_norm": 0.46196866035461426, "learning_rate": 1.092035623447941e-05, "loss": 0.5268, "step": 34337 }, { "epoch": 0.942833607907743, "grad_norm": 0.387746661901474, "learning_rate": 1.0919926173291345e-05, "loss": 0.4524, "step": 34338 }, { "epoch": 0.9428610653487095, "grad_norm": 0.3928948938846588, "learning_rate": 1.0919496110387323e-05, "loss": 0.5138, "step": 34339 }, { "epoch": 0.942888522789676, "grad_norm": 0.3738824129104614, "learning_rate": 1.0919066045768147e-05, "loss": 0.4436, "step": 34340 }, { "epoch": 0.9429159802306425, "grad_norm": 0.35403409600257874, "learning_rate": 1.0918635979434622e-05, "loss": 0.512, "step": 34341 }, { "epoch": 0.942943437671609, "grad_norm": 0.48494502902030945, "learning_rate": 1.0918205911387545e-05, "loss": 0.5569, "step": 34342 }, { "epoch": 0.9429708951125755, "grad_norm": 0.4632241129875183, "learning_rate": 1.0917775841627722e-05, "loss": 0.5595, "step": 34343 }, { "epoch": 0.942998352553542, "grad_norm": 0.3741208612918854, "learning_rate": 1.0917345770155953e-05, "loss": 0.5038, "step": 34344 }, { "epoch": 0.9430258099945085, "grad_norm": 0.45713791251182556, "learning_rate": 1.0916915696973044e-05, "loss": 0.4361, "step": 34345 }, { "epoch": 0.943053267435475, "grad_norm": 0.43308812379837036, "learning_rate": 1.0916485622079794e-05, "loss": 0.458, "step": 34346 }, { "epoch": 0.9430807248764416, "grad_norm": 0.35965099930763245, "learning_rate": 1.0916055545477004e-05, "loss": 0.4716, "step": 34347 }, { "epoch": 0.943108182317408, "grad_norm": 0.3790360987186432, "learning_rate": 1.091562546716548e-05, "loss": 0.4769, "step": 34348 }, { "epoch": 0.9431356397583746, "grad_norm": 0.464787095785141, "learning_rate": 1.091519538714602e-05, "loss": 0.558, "step": 34349 }, { "epoch": 0.943163097199341, "grad_norm": 0.3460358679294586, "learning_rate": 1.091476530541943e-05, "loss": 0.502, "step": 34350 }, { "epoch": 0.9431905546403075, "grad_norm": 0.4164578914642334, "learning_rate": 1.0914335221986511e-05, "loss": 0.4873, "step": 34351 }, { "epoch": 0.943218012081274, "grad_norm": 0.4739196300506592, "learning_rate": 1.0913905136848064e-05, "loss": 0.6225, "step": 34352 }, { "epoch": 0.9432454695222405, "grad_norm": 0.4238758981227875, "learning_rate": 1.0913475050004893e-05, "loss": 0.5059, "step": 34353 }, { "epoch": 0.9432729269632071, "grad_norm": 0.36139002442359924, "learning_rate": 1.0913044961457799e-05, "loss": 0.4647, "step": 34354 }, { "epoch": 0.9433003844041735, "grad_norm": 0.38505256175994873, "learning_rate": 1.0912614871207586e-05, "loss": 0.5057, "step": 34355 }, { "epoch": 0.9433278418451401, "grad_norm": 0.3610764443874359, "learning_rate": 1.0912184779255053e-05, "loss": 0.5094, "step": 34356 }, { "epoch": 0.9433552992861065, "grad_norm": 0.4792734384536743, "learning_rate": 1.0911754685601006e-05, "loss": 0.5365, "step": 34357 }, { "epoch": 0.9433827567270731, "grad_norm": 0.3834724724292755, "learning_rate": 1.0911324590246243e-05, "loss": 0.4327, "step": 34358 }, { "epoch": 0.9434102141680395, "grad_norm": 0.4602283537387848, "learning_rate": 1.091089449319157e-05, "loss": 0.4416, "step": 34359 }, { "epoch": 0.943437671609006, "grad_norm": 0.3879586160182953, "learning_rate": 1.091046439443779e-05, "loss": 0.5875, "step": 34360 }, { "epoch": 0.9434651290499726, "grad_norm": 0.3724091947078705, "learning_rate": 1.09100342939857e-05, "loss": 0.4451, "step": 34361 }, { "epoch": 0.943492586490939, "grad_norm": 0.45191749930381775, "learning_rate": 1.0909604191836109e-05, "loss": 0.4995, "step": 34362 }, { "epoch": 0.9435200439319056, "grad_norm": 0.3331415057182312, "learning_rate": 1.0909174087989816e-05, "loss": 0.3892, "step": 34363 }, { "epoch": 0.943547501372872, "grad_norm": 0.3681741952896118, "learning_rate": 1.090874398244762e-05, "loss": 0.4977, "step": 34364 }, { "epoch": 0.9435749588138386, "grad_norm": 0.4593007266521454, "learning_rate": 1.0908313875210327e-05, "loss": 0.5419, "step": 34365 }, { "epoch": 0.943602416254805, "grad_norm": 0.4323198199272156, "learning_rate": 1.090788376627874e-05, "loss": 0.4281, "step": 34366 }, { "epoch": 0.9436298736957716, "grad_norm": 0.3505270183086395, "learning_rate": 1.0907453655653659e-05, "loss": 0.4224, "step": 34367 }, { "epoch": 0.9436573311367381, "grad_norm": 0.3718193769454956, "learning_rate": 1.0907023543335891e-05, "loss": 0.4709, "step": 34368 }, { "epoch": 0.9436847885777045, "grad_norm": 0.41437041759490967, "learning_rate": 1.090659342932623e-05, "loss": 0.5635, "step": 34369 }, { "epoch": 0.9437122460186711, "grad_norm": 0.4005882740020752, "learning_rate": 1.0906163313625485e-05, "loss": 0.4437, "step": 34370 }, { "epoch": 0.9437397034596375, "grad_norm": 0.3827400505542755, "learning_rate": 1.0905733196234454e-05, "loss": 0.4833, "step": 34371 }, { "epoch": 0.9437671609006041, "grad_norm": 0.4193068742752075, "learning_rate": 1.0905303077153944e-05, "loss": 0.469, "step": 34372 }, { "epoch": 0.9437946183415705, "grad_norm": 0.3999626636505127, "learning_rate": 1.0904872956384753e-05, "loss": 0.4577, "step": 34373 }, { "epoch": 0.9438220757825371, "grad_norm": 0.3991954028606415, "learning_rate": 1.0904442833927686e-05, "loss": 0.5321, "step": 34374 }, { "epoch": 0.9438495332235036, "grad_norm": 0.4415748417377472, "learning_rate": 1.0904012709783547e-05, "loss": 0.5468, "step": 34375 }, { "epoch": 0.9438769906644701, "grad_norm": 0.5460970997810364, "learning_rate": 1.0903582583953132e-05, "loss": 0.5767, "step": 34376 }, { "epoch": 0.9439044481054366, "grad_norm": 0.3361993730068207, "learning_rate": 1.0903152456437249e-05, "loss": 0.493, "step": 34377 }, { "epoch": 0.943931905546403, "grad_norm": 0.48174232244491577, "learning_rate": 1.0902722327236697e-05, "loss": 0.5659, "step": 34378 }, { "epoch": 0.9439593629873696, "grad_norm": 0.6249967813491821, "learning_rate": 1.090229219635228e-05, "loss": 0.4965, "step": 34379 }, { "epoch": 0.943986820428336, "grad_norm": 0.36034998297691345, "learning_rate": 1.0901862063784802e-05, "loss": 0.4334, "step": 34380 }, { "epoch": 0.9440142778693026, "grad_norm": 0.38194742798805237, "learning_rate": 1.090143192953506e-05, "loss": 0.4557, "step": 34381 }, { "epoch": 0.9440417353102691, "grad_norm": 0.38510337471961975, "learning_rate": 1.0901001793603863e-05, "loss": 0.5041, "step": 34382 }, { "epoch": 0.9440691927512356, "grad_norm": 0.38397330045700073, "learning_rate": 1.0900571655992007e-05, "loss": 0.4619, "step": 34383 }, { "epoch": 0.9440966501922021, "grad_norm": 0.3415156900882721, "learning_rate": 1.09001415167003e-05, "loss": 0.4541, "step": 34384 }, { "epoch": 0.9441241076331686, "grad_norm": 0.37674885988235474, "learning_rate": 1.0899711375729543e-05, "loss": 0.4509, "step": 34385 }, { "epoch": 0.9441515650741351, "grad_norm": 0.40644222497940063, "learning_rate": 1.0899281233080535e-05, "loss": 0.4911, "step": 34386 }, { "epoch": 0.9441790225151016, "grad_norm": 0.5093401074409485, "learning_rate": 1.0898851088754082e-05, "loss": 0.5369, "step": 34387 }, { "epoch": 0.9442064799560681, "grad_norm": 0.41910773515701294, "learning_rate": 1.0898420942750982e-05, "loss": 0.5758, "step": 34388 }, { "epoch": 0.9442339373970347, "grad_norm": 0.4840419590473175, "learning_rate": 1.0897990795072042e-05, "loss": 0.5092, "step": 34389 }, { "epoch": 0.9442613948380011, "grad_norm": 0.443649023771286, "learning_rate": 1.0897560645718064e-05, "loss": 0.4409, "step": 34390 }, { "epoch": 0.9442888522789676, "grad_norm": 0.4136585593223572, "learning_rate": 1.0897130494689845e-05, "loss": 0.5634, "step": 34391 }, { "epoch": 0.9443163097199341, "grad_norm": 0.37525373697280884, "learning_rate": 1.0896700341988194e-05, "loss": 0.4666, "step": 34392 }, { "epoch": 0.9443437671609006, "grad_norm": 0.43561896681785583, "learning_rate": 1.089627018761391e-05, "loss": 0.5693, "step": 34393 }, { "epoch": 0.9443712246018671, "grad_norm": 0.3583577871322632, "learning_rate": 1.0895840031567798e-05, "loss": 0.5314, "step": 34394 }, { "epoch": 0.9443986820428336, "grad_norm": 0.4119987189769745, "learning_rate": 1.0895409873850654e-05, "loss": 0.4923, "step": 34395 }, { "epoch": 0.9444261394838002, "grad_norm": 0.40650293231010437, "learning_rate": 1.0894979714463288e-05, "loss": 0.4935, "step": 34396 }, { "epoch": 0.9444535969247666, "grad_norm": 0.3826306462287903, "learning_rate": 1.0894549553406499e-05, "loss": 0.5094, "step": 34397 }, { "epoch": 0.9444810543657332, "grad_norm": 0.42364734411239624, "learning_rate": 1.089411939068109e-05, "loss": 0.5203, "step": 34398 }, { "epoch": 0.9445085118066996, "grad_norm": 0.42609867453575134, "learning_rate": 1.0893689226287863e-05, "loss": 0.4878, "step": 34399 }, { "epoch": 0.9445359692476661, "grad_norm": 0.39209553599357605, "learning_rate": 1.089325906022762e-05, "loss": 0.5345, "step": 34400 }, { "epoch": 0.9445634266886326, "grad_norm": 0.4364708960056305, "learning_rate": 1.0892828892501161e-05, "loss": 0.4793, "step": 34401 }, { "epoch": 0.9445908841295991, "grad_norm": 0.41138479113578796, "learning_rate": 1.0892398723109295e-05, "loss": 0.5339, "step": 34402 }, { "epoch": 0.9446183415705657, "grad_norm": 0.49692976474761963, "learning_rate": 1.0891968552052821e-05, "loss": 0.5082, "step": 34403 }, { "epoch": 0.9446457990115321, "grad_norm": 0.393375039100647, "learning_rate": 1.0891538379332536e-05, "loss": 0.4356, "step": 34404 }, { "epoch": 0.9446732564524987, "grad_norm": 0.39509040117263794, "learning_rate": 1.0891108204949252e-05, "loss": 0.4949, "step": 34405 }, { "epoch": 0.9447007138934651, "grad_norm": 0.36306121945381165, "learning_rate": 1.0890678028903765e-05, "loss": 0.4716, "step": 34406 }, { "epoch": 0.9447281713344317, "grad_norm": 0.4500408470630646, "learning_rate": 1.0890247851196878e-05, "loss": 0.4417, "step": 34407 }, { "epoch": 0.9447556287753981, "grad_norm": 0.4679945707321167, "learning_rate": 1.0889817671829397e-05, "loss": 0.5257, "step": 34408 }, { "epoch": 0.9447830862163646, "grad_norm": 0.4572781026363373, "learning_rate": 1.0889387490802122e-05, "loss": 0.514, "step": 34409 }, { "epoch": 0.9448105436573312, "grad_norm": 0.42408308386802673, "learning_rate": 1.0888957308115855e-05, "loss": 0.5051, "step": 34410 }, { "epoch": 0.9448380010982976, "grad_norm": 0.41350463032722473, "learning_rate": 1.0888527123771398e-05, "loss": 0.5055, "step": 34411 }, { "epoch": 0.9448654585392642, "grad_norm": 0.41754159331321716, "learning_rate": 1.0888096937769555e-05, "loss": 0.4847, "step": 34412 }, { "epoch": 0.9448929159802306, "grad_norm": 0.41462400555610657, "learning_rate": 1.0887666750111127e-05, "loss": 0.5183, "step": 34413 }, { "epoch": 0.9449203734211972, "grad_norm": 0.4070112109184265, "learning_rate": 1.0887236560796916e-05, "loss": 0.4307, "step": 34414 }, { "epoch": 0.9449478308621636, "grad_norm": 0.40161454677581787, "learning_rate": 1.088680636982773e-05, "loss": 0.5281, "step": 34415 }, { "epoch": 0.9449752883031302, "grad_norm": 0.3873355984687805, "learning_rate": 1.0886376177204365e-05, "loss": 0.5526, "step": 34416 }, { "epoch": 0.9450027457440966, "grad_norm": 0.3865630030632019, "learning_rate": 1.0885945982927623e-05, "loss": 0.4048, "step": 34417 }, { "epoch": 0.9450302031850631, "grad_norm": 0.4034617245197296, "learning_rate": 1.088551578699831e-05, "loss": 0.5144, "step": 34418 }, { "epoch": 0.9450576606260297, "grad_norm": 0.38899025321006775, "learning_rate": 1.088508558941723e-05, "loss": 0.447, "step": 34419 }, { "epoch": 0.9450851180669961, "grad_norm": 0.43000245094299316, "learning_rate": 1.0884655390185181e-05, "loss": 0.4331, "step": 34420 }, { "epoch": 0.9451125755079627, "grad_norm": 0.4323467016220093, "learning_rate": 1.0884225189302968e-05, "loss": 0.4963, "step": 34421 }, { "epoch": 0.9451400329489291, "grad_norm": 0.4045164883136749, "learning_rate": 1.0883794986771392e-05, "loss": 0.4927, "step": 34422 }, { "epoch": 0.9451674903898957, "grad_norm": 0.3759559690952301, "learning_rate": 1.0883364782591256e-05, "loss": 0.4532, "step": 34423 }, { "epoch": 0.9451949478308621, "grad_norm": 0.3807232975959778, "learning_rate": 1.0882934576763367e-05, "loss": 0.5352, "step": 34424 }, { "epoch": 0.9452224052718287, "grad_norm": 0.42324236035346985, "learning_rate": 1.0882504369288519e-05, "loss": 0.5165, "step": 34425 }, { "epoch": 0.9452498627127952, "grad_norm": 0.4306406080722809, "learning_rate": 1.0882074160167518e-05, "loss": 0.483, "step": 34426 }, { "epoch": 0.9452773201537616, "grad_norm": 0.5811260938644409, "learning_rate": 1.0881643949401169e-05, "loss": 0.4887, "step": 34427 }, { "epoch": 0.9453047775947282, "grad_norm": 0.39248019456863403, "learning_rate": 1.088121373699027e-05, "loss": 0.5391, "step": 34428 }, { "epoch": 0.9453322350356946, "grad_norm": 0.3769646883010864, "learning_rate": 1.0880783522935632e-05, "loss": 0.475, "step": 34429 }, { "epoch": 0.9453596924766612, "grad_norm": 0.43171361088752747, "learning_rate": 1.0880353307238045e-05, "loss": 0.4915, "step": 34430 }, { "epoch": 0.9453871499176276, "grad_norm": 0.40790534019470215, "learning_rate": 1.0879923089898322e-05, "loss": 0.4766, "step": 34431 }, { "epoch": 0.9454146073585942, "grad_norm": 0.43836453557014465, "learning_rate": 1.0879492870917258e-05, "loss": 0.4892, "step": 34432 }, { "epoch": 0.9454420647995607, "grad_norm": 0.3763732612133026, "learning_rate": 1.0879062650295664e-05, "loss": 0.4055, "step": 34433 }, { "epoch": 0.9454695222405272, "grad_norm": 0.39247700572013855, "learning_rate": 1.0878632428034338e-05, "loss": 0.5451, "step": 34434 }, { "epoch": 0.9454969796814937, "grad_norm": 0.36050668358802795, "learning_rate": 1.0878202204134075e-05, "loss": 0.4928, "step": 34435 }, { "epoch": 0.9455244371224601, "grad_norm": 0.38116422295570374, "learning_rate": 1.0877771978595692e-05, "loss": 0.4784, "step": 34436 }, { "epoch": 0.9455518945634267, "grad_norm": 0.4020107090473175, "learning_rate": 1.087734175141998e-05, "loss": 0.4782, "step": 34437 }, { "epoch": 0.9455793520043931, "grad_norm": 0.35953962802886963, "learning_rate": 1.0876911522607747e-05, "loss": 0.4318, "step": 34438 }, { "epoch": 0.9456068094453597, "grad_norm": 0.38134926557540894, "learning_rate": 1.0876481292159795e-05, "loss": 0.4545, "step": 34439 }, { "epoch": 0.9456342668863262, "grad_norm": 0.36404499411582947, "learning_rate": 1.0876051060076921e-05, "loss": 0.433, "step": 34440 }, { "epoch": 0.9456617243272927, "grad_norm": 0.3868614137172699, "learning_rate": 1.0875620826359939e-05, "loss": 0.5758, "step": 34441 }, { "epoch": 0.9456891817682592, "grad_norm": 0.39338114857673645, "learning_rate": 1.0875190591009641e-05, "loss": 0.5147, "step": 34442 }, { "epoch": 0.9457166392092257, "grad_norm": 0.36203157901763916, "learning_rate": 1.0874760354026833e-05, "loss": 0.5015, "step": 34443 }, { "epoch": 0.9457440966501922, "grad_norm": 0.412622332572937, "learning_rate": 1.0874330115412319e-05, "loss": 0.5225, "step": 34444 }, { "epoch": 0.9457715540911587, "grad_norm": 0.4001764953136444, "learning_rate": 1.0873899875166898e-05, "loss": 0.5224, "step": 34445 }, { "epoch": 0.9457990115321252, "grad_norm": 0.4492122232913971, "learning_rate": 1.0873469633291378e-05, "loss": 0.4715, "step": 34446 }, { "epoch": 0.9458264689730917, "grad_norm": 0.3963572680950165, "learning_rate": 1.0873039389786557e-05, "loss": 0.5476, "step": 34447 }, { "epoch": 0.9458539264140582, "grad_norm": 0.38389208912849426, "learning_rate": 1.0872609144653239e-05, "loss": 0.4593, "step": 34448 }, { "epoch": 0.9458813838550247, "grad_norm": 0.4157109260559082, "learning_rate": 1.0872178897892228e-05, "loss": 0.505, "step": 34449 }, { "epoch": 0.9459088412959912, "grad_norm": 0.35179412364959717, "learning_rate": 1.0871748649504321e-05, "loss": 0.5026, "step": 34450 }, { "epoch": 0.9459362987369577, "grad_norm": 0.3608151078224182, "learning_rate": 1.0871318399490328e-05, "loss": 0.4851, "step": 34451 }, { "epoch": 0.9459637561779242, "grad_norm": 0.38519468903541565, "learning_rate": 1.0870888147851046e-05, "loss": 0.5254, "step": 34452 }, { "epoch": 0.9459912136188907, "grad_norm": 0.3617476224899292, "learning_rate": 1.0870457894587279e-05, "loss": 0.5086, "step": 34453 }, { "epoch": 0.9460186710598573, "grad_norm": 0.44527456164360046, "learning_rate": 1.0870027639699834e-05, "loss": 0.5188, "step": 34454 }, { "epoch": 0.9460461285008237, "grad_norm": 0.39935028553009033, "learning_rate": 1.0869597383189508e-05, "loss": 0.4663, "step": 34455 }, { "epoch": 0.9460735859417903, "grad_norm": 0.3997553884983063, "learning_rate": 1.0869167125057104e-05, "loss": 0.4284, "step": 34456 }, { "epoch": 0.9461010433827567, "grad_norm": 0.4336923658847809, "learning_rate": 1.0868736865303426e-05, "loss": 0.4941, "step": 34457 }, { "epoch": 0.9461285008237232, "grad_norm": 0.3632863461971283, "learning_rate": 1.0868306603929276e-05, "loss": 0.5155, "step": 34458 }, { "epoch": 0.9461559582646897, "grad_norm": 0.4428729712963104, "learning_rate": 1.086787634093546e-05, "loss": 0.4743, "step": 34459 }, { "epoch": 0.9461834157056562, "grad_norm": 0.4982559382915497, "learning_rate": 1.0867446076322776e-05, "loss": 0.5912, "step": 34460 }, { "epoch": 0.9462108731466228, "grad_norm": 0.4042717516422272, "learning_rate": 1.0867015810092025e-05, "loss": 0.4501, "step": 34461 }, { "epoch": 0.9462383305875892, "grad_norm": 0.3389650881290436, "learning_rate": 1.0866585542244017e-05, "loss": 0.4894, "step": 34462 }, { "epoch": 0.9462657880285558, "grad_norm": 0.41241028904914856, "learning_rate": 1.0866155272779549e-05, "loss": 0.5685, "step": 34463 }, { "epoch": 0.9462932454695222, "grad_norm": 0.5016523003578186, "learning_rate": 1.0865725001699426e-05, "loss": 0.5448, "step": 34464 }, { "epoch": 0.9463207029104888, "grad_norm": 0.4708002209663391, "learning_rate": 1.086529472900445e-05, "loss": 0.5423, "step": 34465 }, { "epoch": 0.9463481603514552, "grad_norm": 0.39178112149238586, "learning_rate": 1.086486445469542e-05, "loss": 0.4177, "step": 34466 }, { "epoch": 0.9463756177924217, "grad_norm": 0.4410799443721771, "learning_rate": 1.0864434178773144e-05, "loss": 0.5192, "step": 34467 }, { "epoch": 0.9464030752333883, "grad_norm": 0.37554121017456055, "learning_rate": 1.0864003901238422e-05, "loss": 0.4589, "step": 34468 }, { "epoch": 0.9464305326743547, "grad_norm": 0.4408629536628723, "learning_rate": 1.0863573622092055e-05, "loss": 0.5092, "step": 34469 }, { "epoch": 0.9464579901153213, "grad_norm": 0.3760436773300171, "learning_rate": 1.0863143341334852e-05, "loss": 0.5242, "step": 34470 }, { "epoch": 0.9464854475562877, "grad_norm": 0.3909115493297577, "learning_rate": 1.0862713058967609e-05, "loss": 0.5372, "step": 34471 }, { "epoch": 0.9465129049972543, "grad_norm": 0.391248881816864, "learning_rate": 1.0862282774991132e-05, "loss": 0.5147, "step": 34472 }, { "epoch": 0.9465403624382207, "grad_norm": 0.3821565806865692, "learning_rate": 1.086185248940622e-05, "loss": 0.4907, "step": 34473 }, { "epoch": 0.9465678198791873, "grad_norm": 0.3716680109500885, "learning_rate": 1.0861422202213679e-05, "loss": 0.4957, "step": 34474 }, { "epoch": 0.9465952773201538, "grad_norm": 0.37615591287612915, "learning_rate": 1.086099191341431e-05, "loss": 0.4607, "step": 34475 }, { "epoch": 0.9466227347611202, "grad_norm": 0.39856359362602234, "learning_rate": 1.0860561623008917e-05, "loss": 0.4648, "step": 34476 }, { "epoch": 0.9466501922020868, "grad_norm": 0.37461385130882263, "learning_rate": 1.0860131330998303e-05, "loss": 0.4865, "step": 34477 }, { "epoch": 0.9466776496430532, "grad_norm": 0.39435747265815735, "learning_rate": 1.0859701037383269e-05, "loss": 0.5698, "step": 34478 }, { "epoch": 0.9467051070840198, "grad_norm": 0.4208971858024597, "learning_rate": 1.0859270742164616e-05, "loss": 0.4537, "step": 34479 }, { "epoch": 0.9467325645249862, "grad_norm": 0.37801435589790344, "learning_rate": 1.085884044534315e-05, "loss": 0.4694, "step": 34480 }, { "epoch": 0.9467600219659528, "grad_norm": 0.3790411949157715, "learning_rate": 1.0858410146919674e-05, "loss": 0.4308, "step": 34481 }, { "epoch": 0.9467874794069193, "grad_norm": 0.37484246492385864, "learning_rate": 1.0857979846894985e-05, "loss": 0.478, "step": 34482 }, { "epoch": 0.9468149368478858, "grad_norm": 0.3938000202178955, "learning_rate": 1.0857549545269897e-05, "loss": 0.4706, "step": 34483 }, { "epoch": 0.9468423942888523, "grad_norm": 0.37731337547302246, "learning_rate": 1.0857119242045198e-05, "loss": 0.5506, "step": 34484 }, { "epoch": 0.9468698517298187, "grad_norm": 0.481862872838974, "learning_rate": 1.0856688937221701e-05, "loss": 0.4481, "step": 34485 }, { "epoch": 0.9468973091707853, "grad_norm": 0.3912605941295624, "learning_rate": 1.0856258630800208e-05, "loss": 0.5244, "step": 34486 }, { "epoch": 0.9469247666117517, "grad_norm": 0.35409078001976013, "learning_rate": 1.0855828322781517e-05, "loss": 0.4708, "step": 34487 }, { "epoch": 0.9469522240527183, "grad_norm": 0.37956178188323975, "learning_rate": 1.0855398013166433e-05, "loss": 0.4735, "step": 34488 }, { "epoch": 0.9469796814936848, "grad_norm": 0.35397493839263916, "learning_rate": 1.0854967701955758e-05, "loss": 0.467, "step": 34489 }, { "epoch": 0.9470071389346513, "grad_norm": 0.3804638087749481, "learning_rate": 1.0854537389150298e-05, "loss": 0.359, "step": 34490 }, { "epoch": 0.9470345963756178, "grad_norm": 0.3835681080818176, "learning_rate": 1.085410707475085e-05, "loss": 0.4271, "step": 34491 }, { "epoch": 0.9470620538165843, "grad_norm": 0.37310591340065, "learning_rate": 1.085367675875822e-05, "loss": 0.471, "step": 34492 }, { "epoch": 0.9470895112575508, "grad_norm": 0.39213475584983826, "learning_rate": 1.0853246441173213e-05, "loss": 0.5181, "step": 34493 }, { "epoch": 0.9471169686985172, "grad_norm": 0.4634478688240051, "learning_rate": 1.0852816121996625e-05, "loss": 0.5628, "step": 34494 }, { "epoch": 0.9471444261394838, "grad_norm": 0.37715181708335876, "learning_rate": 1.0852385801229268e-05, "loss": 0.5188, "step": 34495 }, { "epoch": 0.9471718835804503, "grad_norm": 0.37696731090545654, "learning_rate": 1.0851955478871936e-05, "loss": 0.4275, "step": 34496 }, { "epoch": 0.9471993410214168, "grad_norm": 0.35155847668647766, "learning_rate": 1.0851525154925436e-05, "loss": 0.4431, "step": 34497 }, { "epoch": 0.9472267984623833, "grad_norm": 0.3785460889339447, "learning_rate": 1.085109482939057e-05, "loss": 0.4827, "step": 34498 }, { "epoch": 0.9472542559033498, "grad_norm": 0.4514460563659668, "learning_rate": 1.085066450226814e-05, "loss": 0.5107, "step": 34499 }, { "epoch": 0.9472817133443163, "grad_norm": 0.3865106403827667, "learning_rate": 1.085023417355895e-05, "loss": 0.4409, "step": 34500 }, { "epoch": 0.9473091707852828, "grad_norm": 0.40282949805259705, "learning_rate": 1.0849803843263802e-05, "loss": 0.5318, "step": 34501 }, { "epoch": 0.9473366282262493, "grad_norm": 0.5125645399093628, "learning_rate": 1.0849373511383497e-05, "loss": 0.5474, "step": 34502 }, { "epoch": 0.9473640856672159, "grad_norm": 0.3417424261569977, "learning_rate": 1.084894317791884e-05, "loss": 0.4402, "step": 34503 }, { "epoch": 0.9473915431081823, "grad_norm": 0.3710900843143463, "learning_rate": 1.0848512842870633e-05, "loss": 0.4719, "step": 34504 }, { "epoch": 0.9474190005491488, "grad_norm": 0.42811399698257446, "learning_rate": 1.084808250623968e-05, "loss": 0.5406, "step": 34505 }, { "epoch": 0.9474464579901153, "grad_norm": 0.37875255942344666, "learning_rate": 1.0847652168026781e-05, "loss": 0.4712, "step": 34506 }, { "epoch": 0.9474739154310818, "grad_norm": 0.4103841483592987, "learning_rate": 1.0847221828232738e-05, "loss": 0.5172, "step": 34507 }, { "epoch": 0.9475013728720483, "grad_norm": 0.4241507053375244, "learning_rate": 1.084679148685836e-05, "loss": 0.5843, "step": 34508 }, { "epoch": 0.9475288303130148, "grad_norm": 0.46203652024269104, "learning_rate": 1.0846361143904445e-05, "loss": 0.4832, "step": 34509 }, { "epoch": 0.9475562877539814, "grad_norm": 0.36751148104667664, "learning_rate": 1.0845930799371793e-05, "loss": 0.5307, "step": 34510 }, { "epoch": 0.9475837451949478, "grad_norm": 0.3751312792301178, "learning_rate": 1.0845500453261213e-05, "loss": 0.5214, "step": 34511 }, { "epoch": 0.9476112026359144, "grad_norm": 0.3261478543281555, "learning_rate": 1.0845070105573502e-05, "loss": 0.3886, "step": 34512 }, { "epoch": 0.9476386600768808, "grad_norm": 0.5468853116035461, "learning_rate": 1.084463975630947e-05, "loss": 0.5087, "step": 34513 }, { "epoch": 0.9476661175178474, "grad_norm": 0.4350268840789795, "learning_rate": 1.0844209405469915e-05, "loss": 0.5036, "step": 34514 }, { "epoch": 0.9476935749588138, "grad_norm": 0.5203311443328857, "learning_rate": 1.0843779053055637e-05, "loss": 0.4999, "step": 34515 }, { "epoch": 0.9477210323997803, "grad_norm": 0.41822293400764465, "learning_rate": 1.0843348699067441e-05, "loss": 0.4987, "step": 34516 }, { "epoch": 0.9477484898407469, "grad_norm": 0.39376115798950195, "learning_rate": 1.0842918343506135e-05, "loss": 0.4959, "step": 34517 }, { "epoch": 0.9477759472817133, "grad_norm": 0.3882836699485779, "learning_rate": 1.0842487986372514e-05, "loss": 0.4225, "step": 34518 }, { "epoch": 0.9478034047226799, "grad_norm": 0.4086126387119293, "learning_rate": 1.0842057627667384e-05, "loss": 0.5251, "step": 34519 }, { "epoch": 0.9478308621636463, "grad_norm": 0.41174447536468506, "learning_rate": 1.0841627267391547e-05, "loss": 0.5245, "step": 34520 }, { "epoch": 0.9478583196046129, "grad_norm": 0.40575921535491943, "learning_rate": 1.084119690554581e-05, "loss": 0.4772, "step": 34521 }, { "epoch": 0.9478857770455793, "grad_norm": 0.3806719481945038, "learning_rate": 1.0840766542130971e-05, "loss": 0.5257, "step": 34522 }, { "epoch": 0.9479132344865459, "grad_norm": 0.42496222257614136, "learning_rate": 1.0840336177147832e-05, "loss": 0.5608, "step": 34523 }, { "epoch": 0.9479406919275124, "grad_norm": 0.3672117590904236, "learning_rate": 1.08399058105972e-05, "loss": 0.4846, "step": 34524 }, { "epoch": 0.9479681493684788, "grad_norm": 0.42602258920669556, "learning_rate": 1.0839475442479873e-05, "loss": 0.5212, "step": 34525 }, { "epoch": 0.9479956068094454, "grad_norm": 0.5399554371833801, "learning_rate": 1.0839045072796658e-05, "loss": 0.4987, "step": 34526 }, { "epoch": 0.9480230642504118, "grad_norm": 0.926740288734436, "learning_rate": 1.0838614701548357e-05, "loss": 0.541, "step": 34527 }, { "epoch": 0.9480505216913784, "grad_norm": 0.411208838224411, "learning_rate": 1.083818432873577e-05, "loss": 0.4973, "step": 34528 }, { "epoch": 0.9480779791323448, "grad_norm": 0.4116266965866089, "learning_rate": 1.0837753954359704e-05, "loss": 0.4954, "step": 34529 }, { "epoch": 0.9481054365733114, "grad_norm": 0.4084867835044861, "learning_rate": 1.0837323578420958e-05, "loss": 0.5325, "step": 34530 }, { "epoch": 0.9481328940142779, "grad_norm": 0.3790270686149597, "learning_rate": 1.0836893200920337e-05, "loss": 0.4421, "step": 34531 }, { "epoch": 0.9481603514552444, "grad_norm": 0.33989396691322327, "learning_rate": 1.0836462821858645e-05, "loss": 0.5026, "step": 34532 }, { "epoch": 0.9481878088962109, "grad_norm": 0.4065544307231903, "learning_rate": 1.0836032441236677e-05, "loss": 0.4216, "step": 34533 }, { "epoch": 0.9482152663371773, "grad_norm": 0.388295978307724, "learning_rate": 1.0835602059055248e-05, "loss": 0.5044, "step": 34534 }, { "epoch": 0.9482427237781439, "grad_norm": 0.35684844851493835, "learning_rate": 1.083517167531515e-05, "loss": 0.3939, "step": 34535 }, { "epoch": 0.9482701812191103, "grad_norm": 0.38088706135749817, "learning_rate": 1.0834741290017192e-05, "loss": 0.5185, "step": 34536 }, { "epoch": 0.9482976386600769, "grad_norm": 0.4406750500202179, "learning_rate": 1.0834310903162173e-05, "loss": 0.4802, "step": 34537 }, { "epoch": 0.9483250961010434, "grad_norm": 0.35414063930511475, "learning_rate": 1.08338805147509e-05, "loss": 0.3912, "step": 34538 }, { "epoch": 0.9483525535420099, "grad_norm": 0.393359899520874, "learning_rate": 1.0833450124784174e-05, "loss": 0.468, "step": 34539 }, { "epoch": 0.9483800109829764, "grad_norm": 0.4828825294971466, "learning_rate": 1.0833019733262797e-05, "loss": 0.4854, "step": 34540 }, { "epoch": 0.9484074684239429, "grad_norm": 0.4260897934436798, "learning_rate": 1.0832589340187573e-05, "loss": 0.497, "step": 34541 }, { "epoch": 0.9484349258649094, "grad_norm": 0.41364359855651855, "learning_rate": 1.0832158945559304e-05, "loss": 0.4826, "step": 34542 }, { "epoch": 0.9484623833058758, "grad_norm": 0.401533305644989, "learning_rate": 1.0831728549378793e-05, "loss": 0.4692, "step": 34543 }, { "epoch": 0.9484898407468424, "grad_norm": 0.512458324432373, "learning_rate": 1.0831298151646843e-05, "loss": 0.4722, "step": 34544 }, { "epoch": 0.9485172981878089, "grad_norm": 0.39818015694618225, "learning_rate": 1.0830867752364255e-05, "loss": 0.5466, "step": 34545 }, { "epoch": 0.9485447556287754, "grad_norm": 0.4256613850593567, "learning_rate": 1.0830437351531834e-05, "loss": 0.594, "step": 34546 }, { "epoch": 0.9485722130697419, "grad_norm": 0.36860036849975586, "learning_rate": 1.0830006949150382e-05, "loss": 0.4862, "step": 34547 }, { "epoch": 0.9485996705107084, "grad_norm": 0.43739053606987, "learning_rate": 1.0829576545220703e-05, "loss": 0.5095, "step": 34548 }, { "epoch": 0.9486271279516749, "grad_norm": 0.37709423899650574, "learning_rate": 1.0829146139743601e-05, "loss": 0.5352, "step": 34549 }, { "epoch": 0.9486545853926414, "grad_norm": 0.4190336763858795, "learning_rate": 1.0828715732719874e-05, "loss": 0.4322, "step": 34550 }, { "epoch": 0.9486820428336079, "grad_norm": 0.34288254380226135, "learning_rate": 1.0828285324150329e-05, "loss": 0.4426, "step": 34551 }, { "epoch": 0.9487095002745745, "grad_norm": 0.38621872663497925, "learning_rate": 1.0827854914035765e-05, "loss": 0.5352, "step": 34552 }, { "epoch": 0.9487369577155409, "grad_norm": 0.3894032835960388, "learning_rate": 1.082742450237699e-05, "loss": 0.5133, "step": 34553 }, { "epoch": 0.9487644151565074, "grad_norm": 0.34158602356910706, "learning_rate": 1.0826994089174802e-05, "loss": 0.4923, "step": 34554 }, { "epoch": 0.9487918725974739, "grad_norm": 0.41160640120506287, "learning_rate": 1.0826563674430009e-05, "loss": 0.4881, "step": 34555 }, { "epoch": 0.9488193300384404, "grad_norm": 0.40888187289237976, "learning_rate": 1.082613325814341e-05, "loss": 0.5437, "step": 34556 }, { "epoch": 0.9488467874794069, "grad_norm": 0.39908212423324585, "learning_rate": 1.0825702840315807e-05, "loss": 0.5945, "step": 34557 }, { "epoch": 0.9488742449203734, "grad_norm": 0.3669831454753876, "learning_rate": 1.0825272420948007e-05, "loss": 0.4953, "step": 34558 }, { "epoch": 0.94890170236134, "grad_norm": 0.38587436079978943, "learning_rate": 1.0824842000040807e-05, "loss": 0.4642, "step": 34559 }, { "epoch": 0.9489291598023064, "grad_norm": 1.354231834411621, "learning_rate": 1.0824411577595017e-05, "loss": 0.5105, "step": 34560 }, { "epoch": 0.948956617243273, "grad_norm": 0.4161090850830078, "learning_rate": 1.0823981153611437e-05, "loss": 0.5473, "step": 34561 }, { "epoch": 0.9489840746842394, "grad_norm": 0.5999234914779663, "learning_rate": 1.0823550728090865e-05, "loss": 0.5215, "step": 34562 }, { "epoch": 0.949011532125206, "grad_norm": 0.4099833071231842, "learning_rate": 1.0823120301034111e-05, "loss": 0.5335, "step": 34563 }, { "epoch": 0.9490389895661724, "grad_norm": 0.4114846885204315, "learning_rate": 1.0822689872441974e-05, "loss": 0.4973, "step": 34564 }, { "epoch": 0.9490664470071389, "grad_norm": 0.42816853523254395, "learning_rate": 1.082225944231526e-05, "loss": 0.4918, "step": 34565 }, { "epoch": 0.9490939044481055, "grad_norm": 0.4268355369567871, "learning_rate": 1.0821829010654768e-05, "loss": 0.5144, "step": 34566 }, { "epoch": 0.9491213618890719, "grad_norm": 0.37574535608291626, "learning_rate": 1.0821398577461302e-05, "loss": 0.4993, "step": 34567 }, { "epoch": 0.9491488193300385, "grad_norm": 0.37180137634277344, "learning_rate": 1.0820968142735666e-05, "loss": 0.519, "step": 34568 }, { "epoch": 0.9491762767710049, "grad_norm": 0.37404707074165344, "learning_rate": 1.082053770647866e-05, "loss": 0.4734, "step": 34569 }, { "epoch": 0.9492037342119715, "grad_norm": 0.4269319772720337, "learning_rate": 1.0820107268691093e-05, "loss": 0.5173, "step": 34570 }, { "epoch": 0.9492311916529379, "grad_norm": 0.37533214688301086, "learning_rate": 1.0819676829373764e-05, "loss": 0.5355, "step": 34571 }, { "epoch": 0.9492586490939044, "grad_norm": 0.3804558217525482, "learning_rate": 1.0819246388527473e-05, "loss": 0.4381, "step": 34572 }, { "epoch": 0.949286106534871, "grad_norm": 0.5545331239700317, "learning_rate": 1.0818815946153029e-05, "loss": 0.5632, "step": 34573 }, { "epoch": 0.9493135639758374, "grad_norm": 0.3963298499584198, "learning_rate": 1.0818385502251229e-05, "loss": 0.4793, "step": 34574 }, { "epoch": 0.949341021416804, "grad_norm": 0.40409693121910095, "learning_rate": 1.0817955056822882e-05, "loss": 0.5401, "step": 34575 }, { "epoch": 0.9493684788577704, "grad_norm": 0.38733068108558655, "learning_rate": 1.0817524609868787e-05, "loss": 0.489, "step": 34576 }, { "epoch": 0.949395936298737, "grad_norm": 0.3964945375919342, "learning_rate": 1.0817094161389747e-05, "loss": 0.5126, "step": 34577 }, { "epoch": 0.9494233937397034, "grad_norm": 0.4345715045928955, "learning_rate": 1.0816663711386565e-05, "loss": 0.5729, "step": 34578 }, { "epoch": 0.94945085118067, "grad_norm": 0.41696643829345703, "learning_rate": 1.0816233259860047e-05, "loss": 0.5911, "step": 34579 }, { "epoch": 0.9494783086216365, "grad_norm": 0.4535013735294342, "learning_rate": 1.0815802806810991e-05, "loss": 0.5009, "step": 34580 }, { "epoch": 0.949505766062603, "grad_norm": 0.35992076992988586, "learning_rate": 1.0815372352240203e-05, "loss": 0.4447, "step": 34581 }, { "epoch": 0.9495332235035695, "grad_norm": 0.3586016893386841, "learning_rate": 1.0814941896148486e-05, "loss": 0.4528, "step": 34582 }, { "epoch": 0.9495606809445359, "grad_norm": 0.40384766459465027, "learning_rate": 1.0814511438536641e-05, "loss": 0.4937, "step": 34583 }, { "epoch": 0.9495881383855025, "grad_norm": 0.4180490970611572, "learning_rate": 1.0814080979405475e-05, "loss": 0.5094, "step": 34584 }, { "epoch": 0.9496155958264689, "grad_norm": 0.5167869329452515, "learning_rate": 1.0813650518755784e-05, "loss": 0.511, "step": 34585 }, { "epoch": 0.9496430532674355, "grad_norm": 0.40917059779167175, "learning_rate": 1.0813220056588379e-05, "loss": 0.5148, "step": 34586 }, { "epoch": 0.949670510708402, "grad_norm": 0.44848203659057617, "learning_rate": 1.0812789592904053e-05, "loss": 0.4817, "step": 34587 }, { "epoch": 0.9496979681493685, "grad_norm": 0.4109098017215729, "learning_rate": 1.0812359127703621e-05, "loss": 0.527, "step": 34588 }, { "epoch": 0.949725425590335, "grad_norm": 0.4482218325138092, "learning_rate": 1.081192866098788e-05, "loss": 0.4546, "step": 34589 }, { "epoch": 0.9497528830313015, "grad_norm": 0.38739490509033203, "learning_rate": 1.081149819275763e-05, "loss": 0.5531, "step": 34590 }, { "epoch": 0.949780340472268, "grad_norm": 0.43590620160102844, "learning_rate": 1.0811067723013677e-05, "loss": 0.5458, "step": 34591 }, { "epoch": 0.9498077979132344, "grad_norm": 0.41062605381011963, "learning_rate": 1.0810637251756826e-05, "loss": 0.5591, "step": 34592 }, { "epoch": 0.949835255354201, "grad_norm": 0.3849201500415802, "learning_rate": 1.0810206778987875e-05, "loss": 0.4577, "step": 34593 }, { "epoch": 0.9498627127951675, "grad_norm": 0.37505054473876953, "learning_rate": 1.0809776304707633e-05, "loss": 0.4927, "step": 34594 }, { "epoch": 0.949890170236134, "grad_norm": 0.3688153922557831, "learning_rate": 1.0809345828916898e-05, "loss": 0.4504, "step": 34595 }, { "epoch": 0.9499176276771005, "grad_norm": 0.3523881137371063, "learning_rate": 1.0808915351616473e-05, "loss": 0.4086, "step": 34596 }, { "epoch": 0.949945085118067, "grad_norm": 0.6429381370544434, "learning_rate": 1.0808484872807165e-05, "loss": 0.5521, "step": 34597 }, { "epoch": 0.9499725425590335, "grad_norm": 0.40183520317077637, "learning_rate": 1.0808054392489775e-05, "loss": 0.4554, "step": 34598 }, { "epoch": 0.95, "grad_norm": 0.42293375730514526, "learning_rate": 1.0807623910665106e-05, "loss": 0.521, "step": 34599 }, { "epoch": 0.9500274574409665, "grad_norm": 0.5955321192741394, "learning_rate": 1.0807193427333958e-05, "loss": 0.5488, "step": 34600 }, { "epoch": 0.950054914881933, "grad_norm": 0.36651739478111267, "learning_rate": 1.0806762942497137e-05, "loss": 0.5514, "step": 34601 }, { "epoch": 0.9500823723228995, "grad_norm": 0.41866442561149597, "learning_rate": 1.0806332456155448e-05, "loss": 0.4887, "step": 34602 }, { "epoch": 0.950109829763866, "grad_norm": 0.4281187355518341, "learning_rate": 1.0805901968309688e-05, "loss": 0.5025, "step": 34603 }, { "epoch": 0.9501372872048325, "grad_norm": 0.4019439220428467, "learning_rate": 1.0805471478960668e-05, "loss": 0.5312, "step": 34604 }, { "epoch": 0.950164744645799, "grad_norm": 0.35646089911460876, "learning_rate": 1.0805040988109183e-05, "loss": 0.3947, "step": 34605 }, { "epoch": 0.9501922020867655, "grad_norm": 0.367939829826355, "learning_rate": 1.0804610495756043e-05, "loss": 0.4399, "step": 34606 }, { "epoch": 0.950219659527732, "grad_norm": 0.4174533188343048, "learning_rate": 1.0804180001902047e-05, "loss": 0.4466, "step": 34607 }, { "epoch": 0.9502471169686986, "grad_norm": 0.3951793909072876, "learning_rate": 1.0803749506547997e-05, "loss": 0.5332, "step": 34608 }, { "epoch": 0.950274574409665, "grad_norm": 0.44432249665260315, "learning_rate": 1.0803319009694698e-05, "loss": 0.5475, "step": 34609 }, { "epoch": 0.9503020318506316, "grad_norm": 0.37314918637275696, "learning_rate": 1.0802888511342953e-05, "loss": 0.4531, "step": 34610 }, { "epoch": 0.950329489291598, "grad_norm": 0.4050174653530121, "learning_rate": 1.0802458011493563e-05, "loss": 0.5284, "step": 34611 }, { "epoch": 0.9503569467325645, "grad_norm": 0.41504377126693726, "learning_rate": 1.0802027510147336e-05, "loss": 0.477, "step": 34612 }, { "epoch": 0.950384404173531, "grad_norm": 0.6143400073051453, "learning_rate": 1.0801597007305066e-05, "loss": 0.5294, "step": 34613 }, { "epoch": 0.9504118616144975, "grad_norm": 0.3837282061576843, "learning_rate": 1.0801166502967569e-05, "loss": 0.4892, "step": 34614 }, { "epoch": 0.9504393190554641, "grad_norm": 0.38294512033462524, "learning_rate": 1.0800735997135636e-05, "loss": 0.4197, "step": 34615 }, { "epoch": 0.9504667764964305, "grad_norm": 0.37327584624290466, "learning_rate": 1.0800305489810077e-05, "loss": 0.5724, "step": 34616 }, { "epoch": 0.9504942339373971, "grad_norm": 0.380982905626297, "learning_rate": 1.0799874980991691e-05, "loss": 0.4297, "step": 34617 }, { "epoch": 0.9505216913783635, "grad_norm": 0.3949768543243408, "learning_rate": 1.0799444470681285e-05, "loss": 0.4403, "step": 34618 }, { "epoch": 0.9505491488193301, "grad_norm": 0.46822884678840637, "learning_rate": 1.0799013958879658e-05, "loss": 0.4757, "step": 34619 }, { "epoch": 0.9505766062602965, "grad_norm": 0.3878385126590729, "learning_rate": 1.0798583445587617e-05, "loss": 0.5409, "step": 34620 }, { "epoch": 0.950604063701263, "grad_norm": 0.3857860863208771, "learning_rate": 1.0798152930805959e-05, "loss": 0.4436, "step": 34621 }, { "epoch": 0.9506315211422296, "grad_norm": 0.40292465686798096, "learning_rate": 1.0797722414535494e-05, "loss": 0.483, "step": 34622 }, { "epoch": 0.950658978583196, "grad_norm": 0.40507784485816956, "learning_rate": 1.0797291896777025e-05, "loss": 0.5562, "step": 34623 }, { "epoch": 0.9506864360241626, "grad_norm": 0.5070155262947083, "learning_rate": 1.0796861377531346e-05, "loss": 0.4017, "step": 34624 }, { "epoch": 0.950713893465129, "grad_norm": 0.41590598225593567, "learning_rate": 1.0796430856799272e-05, "loss": 0.5457, "step": 34625 }, { "epoch": 0.9507413509060956, "grad_norm": 0.38884925842285156, "learning_rate": 1.0796000334581595e-05, "loss": 0.4489, "step": 34626 }, { "epoch": 0.950768808347062, "grad_norm": 0.3932308256626129, "learning_rate": 1.0795569810879125e-05, "loss": 0.5175, "step": 34627 }, { "epoch": 0.9507962657880286, "grad_norm": 0.3416912257671356, "learning_rate": 1.0795139285692666e-05, "loss": 0.4722, "step": 34628 }, { "epoch": 0.9508237232289951, "grad_norm": 0.36683398485183716, "learning_rate": 1.0794708759023016e-05, "loss": 0.4641, "step": 34629 }, { "epoch": 0.9508511806699615, "grad_norm": 0.42926549911499023, "learning_rate": 1.079427823087098e-05, "loss": 0.5298, "step": 34630 }, { "epoch": 0.9508786381109281, "grad_norm": 0.38897180557250977, "learning_rate": 1.0793847701237364e-05, "loss": 0.4809, "step": 34631 }, { "epoch": 0.9509060955518945, "grad_norm": 0.4393659830093384, "learning_rate": 1.0793417170122966e-05, "loss": 0.5766, "step": 34632 }, { "epoch": 0.9509335529928611, "grad_norm": 0.38501331210136414, "learning_rate": 1.0792986637528596e-05, "loss": 0.4315, "step": 34633 }, { "epoch": 0.9509610104338275, "grad_norm": 0.4359462559223175, "learning_rate": 1.0792556103455048e-05, "loss": 0.5829, "step": 34634 }, { "epoch": 0.9509884678747941, "grad_norm": 0.40449580550193787, "learning_rate": 1.079212556790313e-05, "loss": 0.5016, "step": 34635 }, { "epoch": 0.9510159253157606, "grad_norm": 0.38746634125709534, "learning_rate": 1.0791695030873648e-05, "loss": 0.4953, "step": 34636 }, { "epoch": 0.9510433827567271, "grad_norm": 0.37471598386764526, "learning_rate": 1.0791264492367402e-05, "loss": 0.478, "step": 34637 }, { "epoch": 0.9510708401976936, "grad_norm": 0.4485347867012024, "learning_rate": 1.0790833952385196e-05, "loss": 0.505, "step": 34638 }, { "epoch": 0.95109829763866, "grad_norm": 0.40403518080711365, "learning_rate": 1.0790403410927828e-05, "loss": 0.4795, "step": 34639 }, { "epoch": 0.9511257550796266, "grad_norm": 0.4250563085079193, "learning_rate": 1.0789972867996108e-05, "loss": 0.5395, "step": 34640 }, { "epoch": 0.951153212520593, "grad_norm": 0.647186815738678, "learning_rate": 1.0789542323590839e-05, "loss": 0.5047, "step": 34641 }, { "epoch": 0.9511806699615596, "grad_norm": 0.5865503549575806, "learning_rate": 1.0789111777712816e-05, "loss": 0.4569, "step": 34642 }, { "epoch": 0.9512081274025261, "grad_norm": 0.6084676384925842, "learning_rate": 1.0788681230362852e-05, "loss": 0.4093, "step": 34643 }, { "epoch": 0.9512355848434926, "grad_norm": 0.4614333510398865, "learning_rate": 1.0788250681541743e-05, "loss": 0.4772, "step": 34644 }, { "epoch": 0.9512630422844591, "grad_norm": 3.5255706310272217, "learning_rate": 1.0787820131250298e-05, "loss": 0.5459, "step": 34645 }, { "epoch": 0.9512904997254256, "grad_norm": 0.3893234431743622, "learning_rate": 1.0787389579489313e-05, "loss": 0.462, "step": 34646 }, { "epoch": 0.9513179571663921, "grad_norm": 0.4093390107154846, "learning_rate": 1.0786959026259598e-05, "loss": 0.4802, "step": 34647 }, { "epoch": 0.9513454146073586, "grad_norm": 0.3936988115310669, "learning_rate": 1.0786528471561952e-05, "loss": 0.5192, "step": 34648 }, { "epoch": 0.9513728720483251, "grad_norm": 0.40760713815689087, "learning_rate": 1.078609791539718e-05, "loss": 0.5061, "step": 34649 }, { "epoch": 0.9514003294892917, "grad_norm": 0.41084179282188416, "learning_rate": 1.0785667357766085e-05, "loss": 0.4805, "step": 34650 }, { "epoch": 0.9514277869302581, "grad_norm": 0.46504950523376465, "learning_rate": 1.078523679866947e-05, "loss": 0.5393, "step": 34651 }, { "epoch": 0.9514552443712246, "grad_norm": 0.3490103483200073, "learning_rate": 1.0784806238108134e-05, "loss": 0.4689, "step": 34652 }, { "epoch": 0.9514827018121911, "grad_norm": 0.4992573857307434, "learning_rate": 1.0784375676082887e-05, "loss": 0.4846, "step": 34653 }, { "epoch": 0.9515101592531576, "grad_norm": 0.45588934421539307, "learning_rate": 1.0783945112594531e-05, "loss": 0.5361, "step": 34654 }, { "epoch": 0.9515376166941241, "grad_norm": 0.6914562582969666, "learning_rate": 1.0783514547643862e-05, "loss": 0.5089, "step": 34655 }, { "epoch": 0.9515650741350906, "grad_norm": 0.4092187285423279, "learning_rate": 1.0783083981231694e-05, "loss": 0.546, "step": 34656 }, { "epoch": 0.9515925315760572, "grad_norm": 0.42216941714286804, "learning_rate": 1.078265341335882e-05, "loss": 0.5202, "step": 34657 }, { "epoch": 0.9516199890170236, "grad_norm": 0.42972487211227417, "learning_rate": 1.078222284402605e-05, "loss": 0.4927, "step": 34658 }, { "epoch": 0.9516474464579902, "grad_norm": 0.4035871922969818, "learning_rate": 1.0781792273234184e-05, "loss": 0.5409, "step": 34659 }, { "epoch": 0.9516749038989566, "grad_norm": 0.4005240797996521, "learning_rate": 1.0781361700984025e-05, "loss": 0.544, "step": 34660 }, { "epoch": 0.9517023613399231, "grad_norm": 0.38408926129341125, "learning_rate": 1.0780931127276379e-05, "loss": 0.3526, "step": 34661 }, { "epoch": 0.9517298187808896, "grad_norm": 0.4006398022174835, "learning_rate": 1.0780500552112046e-05, "loss": 0.4808, "step": 34662 }, { "epoch": 0.9517572762218561, "grad_norm": 0.4167468249797821, "learning_rate": 1.0780069975491832e-05, "loss": 0.5772, "step": 34663 }, { "epoch": 0.9517847336628227, "grad_norm": 0.40460366010665894, "learning_rate": 1.0779639397416537e-05, "loss": 0.4344, "step": 34664 }, { "epoch": 0.9518121911037891, "grad_norm": 0.3931076228618622, "learning_rate": 1.0779208817886964e-05, "loss": 0.4909, "step": 34665 }, { "epoch": 0.9518396485447557, "grad_norm": 0.37698519229888916, "learning_rate": 1.077877823690392e-05, "loss": 0.5221, "step": 34666 }, { "epoch": 0.9518671059857221, "grad_norm": 0.39005914330482483, "learning_rate": 1.0778347654468204e-05, "loss": 0.4571, "step": 34667 }, { "epoch": 0.9518945634266887, "grad_norm": 0.45187491178512573, "learning_rate": 1.0777917070580623e-05, "loss": 0.4705, "step": 34668 }, { "epoch": 0.9519220208676551, "grad_norm": 0.362453430891037, "learning_rate": 1.0777486485241978e-05, "loss": 0.4763, "step": 34669 }, { "epoch": 0.9519494783086216, "grad_norm": 0.8331220746040344, "learning_rate": 1.0777055898453072e-05, "loss": 0.5448, "step": 34670 }, { "epoch": 0.9519769357495882, "grad_norm": 0.38890767097473145, "learning_rate": 1.0776625310214712e-05, "loss": 0.4775, "step": 34671 }, { "epoch": 0.9520043931905546, "grad_norm": 0.3732020854949951, "learning_rate": 1.0776194720527694e-05, "loss": 0.4215, "step": 34672 }, { "epoch": 0.9520318506315212, "grad_norm": 0.357217937707901, "learning_rate": 1.0775764129392827e-05, "loss": 0.4668, "step": 34673 }, { "epoch": 0.9520593080724876, "grad_norm": 0.3584648668766022, "learning_rate": 1.0775333536810912e-05, "loss": 0.455, "step": 34674 }, { "epoch": 0.9520867655134542, "grad_norm": 0.5152673125267029, "learning_rate": 1.0774902942782752e-05, "loss": 0.5568, "step": 34675 }, { "epoch": 0.9521142229544206, "grad_norm": 0.40125662088394165, "learning_rate": 1.077447234730915e-05, "loss": 0.5002, "step": 34676 }, { "epoch": 0.9521416803953872, "grad_norm": 0.3741495609283447, "learning_rate": 1.0774041750390914e-05, "loss": 0.5062, "step": 34677 }, { "epoch": 0.9521691378363537, "grad_norm": 0.3984069526195526, "learning_rate": 1.0773611152028838e-05, "loss": 0.4728, "step": 34678 }, { "epoch": 0.9521965952773201, "grad_norm": 0.3916851282119751, "learning_rate": 1.0773180552223732e-05, "loss": 0.5101, "step": 34679 }, { "epoch": 0.9522240527182867, "grad_norm": 0.39156991243362427, "learning_rate": 1.0772749950976397e-05, "loss": 0.4236, "step": 34680 }, { "epoch": 0.9522515101592531, "grad_norm": 0.39904576539993286, "learning_rate": 1.077231934828764e-05, "loss": 0.5385, "step": 34681 }, { "epoch": 0.9522789676002197, "grad_norm": 0.36566048860549927, "learning_rate": 1.0771888744158256e-05, "loss": 0.4054, "step": 34682 }, { "epoch": 0.9523064250411861, "grad_norm": 0.4323814809322357, "learning_rate": 1.0771458138589054e-05, "loss": 0.5066, "step": 34683 }, { "epoch": 0.9523338824821527, "grad_norm": 0.3970227539539337, "learning_rate": 1.077102753158084e-05, "loss": 0.5013, "step": 34684 }, { "epoch": 0.9523613399231191, "grad_norm": 0.3950727880001068, "learning_rate": 1.0770596923134413e-05, "loss": 0.4348, "step": 34685 }, { "epoch": 0.9523887973640857, "grad_norm": 0.3963001072406769, "learning_rate": 1.0770166313250574e-05, "loss": 0.5456, "step": 34686 }, { "epoch": 0.9524162548050522, "grad_norm": 0.40517473220825195, "learning_rate": 1.076973570193013e-05, "loss": 0.6088, "step": 34687 }, { "epoch": 0.9524437122460186, "grad_norm": 0.35962149500846863, "learning_rate": 1.0769305089173883e-05, "loss": 0.4579, "step": 34688 }, { "epoch": 0.9524711696869852, "grad_norm": 0.3653494417667389, "learning_rate": 1.0768874474982638e-05, "loss": 0.4521, "step": 34689 }, { "epoch": 0.9524986271279516, "grad_norm": 0.3759037256240845, "learning_rate": 1.0768443859357196e-05, "loss": 0.4815, "step": 34690 }, { "epoch": 0.9525260845689182, "grad_norm": 0.37586599588394165, "learning_rate": 1.076801324229836e-05, "loss": 0.4908, "step": 34691 }, { "epoch": 0.9525535420098846, "grad_norm": 0.4187792241573334, "learning_rate": 1.0767582623806935e-05, "loss": 0.4505, "step": 34692 }, { "epoch": 0.9525809994508512, "grad_norm": 0.3643229007720947, "learning_rate": 1.0767152003883724e-05, "loss": 0.4856, "step": 34693 }, { "epoch": 0.9526084568918177, "grad_norm": 0.3992065489292145, "learning_rate": 1.0766721382529527e-05, "loss": 0.5307, "step": 34694 }, { "epoch": 0.9526359143327842, "grad_norm": 0.3604598343372345, "learning_rate": 1.0766290759745153e-05, "loss": 0.4063, "step": 34695 }, { "epoch": 0.9526633717737507, "grad_norm": 0.3935020864009857, "learning_rate": 1.0765860135531401e-05, "loss": 0.5994, "step": 34696 }, { "epoch": 0.9526908292147171, "grad_norm": 0.41195449233055115, "learning_rate": 1.0765429509889077e-05, "loss": 0.4959, "step": 34697 }, { "epoch": 0.9527182866556837, "grad_norm": 0.39934781193733215, "learning_rate": 1.0764998882818977e-05, "loss": 0.5226, "step": 34698 }, { "epoch": 0.9527457440966501, "grad_norm": 0.40157195925712585, "learning_rate": 1.0764568254321916e-05, "loss": 0.4989, "step": 34699 }, { "epoch": 0.9527732015376167, "grad_norm": 0.36365458369255066, "learning_rate": 1.076413762439869e-05, "loss": 0.3707, "step": 34700 }, { "epoch": 0.9528006589785832, "grad_norm": 0.38507965207099915, "learning_rate": 1.07637069930501e-05, "loss": 0.5191, "step": 34701 }, { "epoch": 0.9528281164195497, "grad_norm": 0.37922927737236023, "learning_rate": 1.0763276360276957e-05, "loss": 0.4894, "step": 34702 }, { "epoch": 0.9528555738605162, "grad_norm": 0.4038503170013428, "learning_rate": 1.076284572608006e-05, "loss": 0.446, "step": 34703 }, { "epoch": 0.9528830313014827, "grad_norm": 0.3981234133243561, "learning_rate": 1.076241509046021e-05, "loss": 0.4322, "step": 34704 }, { "epoch": 0.9529104887424492, "grad_norm": 0.36783644556999207, "learning_rate": 1.0761984453418213e-05, "loss": 0.4191, "step": 34705 }, { "epoch": 0.9529379461834157, "grad_norm": 0.3733910024166107, "learning_rate": 1.0761553814954871e-05, "loss": 0.3462, "step": 34706 }, { "epoch": 0.9529654036243822, "grad_norm": 0.3762595057487488, "learning_rate": 1.0761123175070989e-05, "loss": 0.4605, "step": 34707 }, { "epoch": 0.9529928610653488, "grad_norm": 0.4503692090511322, "learning_rate": 1.076069253376737e-05, "loss": 0.5273, "step": 34708 }, { "epoch": 0.9530203185063152, "grad_norm": 0.38910171389579773, "learning_rate": 1.0760261891044814e-05, "loss": 0.465, "step": 34709 }, { "epoch": 0.9530477759472817, "grad_norm": 0.4605788290500641, "learning_rate": 1.075983124690413e-05, "loss": 0.5668, "step": 34710 }, { "epoch": 0.9530752333882482, "grad_norm": 0.38245296478271484, "learning_rate": 1.0759400601346117e-05, "loss": 0.4487, "step": 34711 }, { "epoch": 0.9531026908292147, "grad_norm": 0.38746464252471924, "learning_rate": 1.0758969954371578e-05, "loss": 0.3899, "step": 34712 }, { "epoch": 0.9531301482701812, "grad_norm": 0.3762000799179077, "learning_rate": 1.0758539305981322e-05, "loss": 0.4369, "step": 34713 }, { "epoch": 0.9531576057111477, "grad_norm": 0.4149077236652374, "learning_rate": 1.0758108656176143e-05, "loss": 0.4594, "step": 34714 }, { "epoch": 0.9531850631521143, "grad_norm": 0.39852505922317505, "learning_rate": 1.0757678004956852e-05, "loss": 0.4671, "step": 34715 }, { "epoch": 0.9532125205930807, "grad_norm": 0.3510734438896179, "learning_rate": 1.075724735232425e-05, "loss": 0.4414, "step": 34716 }, { "epoch": 0.9532399780340473, "grad_norm": 0.42359659075737, "learning_rate": 1.075681669827914e-05, "loss": 0.5653, "step": 34717 }, { "epoch": 0.9532674354750137, "grad_norm": 0.44597098231315613, "learning_rate": 1.0756386042822324e-05, "loss": 0.5209, "step": 34718 }, { "epoch": 0.9532948929159802, "grad_norm": 0.41036367416381836, "learning_rate": 1.0755955385954608e-05, "loss": 0.5195, "step": 34719 }, { "epoch": 0.9533223503569467, "grad_norm": 0.3762610852718353, "learning_rate": 1.0755524727676794e-05, "loss": 0.4536, "step": 34720 }, { "epoch": 0.9533498077979132, "grad_norm": 0.4061617851257324, "learning_rate": 1.0755094067989685e-05, "loss": 0.5099, "step": 34721 }, { "epoch": 0.9533772652388798, "grad_norm": 0.3931017518043518, "learning_rate": 1.0754663406894083e-05, "loss": 0.4892, "step": 34722 }, { "epoch": 0.9534047226798462, "grad_norm": 0.3837074339389801, "learning_rate": 1.0754232744390793e-05, "loss": 0.4752, "step": 34723 }, { "epoch": 0.9534321801208128, "grad_norm": 0.5086297988891602, "learning_rate": 1.075380208048062e-05, "loss": 0.494, "step": 34724 }, { "epoch": 0.9534596375617792, "grad_norm": 0.4013034701347351, "learning_rate": 1.0753371415164366e-05, "loss": 0.4861, "step": 34725 }, { "epoch": 0.9534870950027458, "grad_norm": 0.41513222455978394, "learning_rate": 1.0752940748442833e-05, "loss": 0.5575, "step": 34726 }, { "epoch": 0.9535145524437122, "grad_norm": 0.4832100570201874, "learning_rate": 1.0752510080316824e-05, "loss": 0.4796, "step": 34727 }, { "epoch": 0.9535420098846787, "grad_norm": 0.3570649325847626, "learning_rate": 1.0752079410787145e-05, "loss": 0.4733, "step": 34728 }, { "epoch": 0.9535694673256453, "grad_norm": 0.4189216196537018, "learning_rate": 1.0751648739854597e-05, "loss": 0.4836, "step": 34729 }, { "epoch": 0.9535969247666117, "grad_norm": 0.47783076763153076, "learning_rate": 1.0751218067519983e-05, "loss": 0.449, "step": 34730 }, { "epoch": 0.9536243822075783, "grad_norm": 0.6438360214233398, "learning_rate": 1.075078739378411e-05, "loss": 0.5261, "step": 34731 }, { "epoch": 0.9536518396485447, "grad_norm": 0.37879130244255066, "learning_rate": 1.0750356718647778e-05, "loss": 0.523, "step": 34732 }, { "epoch": 0.9536792970895113, "grad_norm": 0.37147605419158936, "learning_rate": 1.0749926042111791e-05, "loss": 0.5151, "step": 34733 }, { "epoch": 0.9537067545304777, "grad_norm": 0.4015268087387085, "learning_rate": 1.0749495364176954e-05, "loss": 0.4552, "step": 34734 }, { "epoch": 0.9537342119714443, "grad_norm": 0.4174935221672058, "learning_rate": 1.0749064684844066e-05, "loss": 0.4657, "step": 34735 }, { "epoch": 0.9537616694124108, "grad_norm": 0.38166555762290955, "learning_rate": 1.0748634004113936e-05, "loss": 0.5, "step": 34736 }, { "epoch": 0.9537891268533772, "grad_norm": 0.40984830260276794, "learning_rate": 1.0748203321987365e-05, "loss": 0.4693, "step": 34737 }, { "epoch": 0.9538165842943438, "grad_norm": 0.41083550453186035, "learning_rate": 1.0747772638465152e-05, "loss": 0.5232, "step": 34738 }, { "epoch": 0.9538440417353102, "grad_norm": 0.4378269612789154, "learning_rate": 1.074734195354811e-05, "loss": 0.4738, "step": 34739 }, { "epoch": 0.9538714991762768, "grad_norm": 0.3730698823928833, "learning_rate": 1.074691126723703e-05, "loss": 0.4676, "step": 34740 }, { "epoch": 0.9538989566172432, "grad_norm": 0.372438907623291, "learning_rate": 1.0746480579532727e-05, "loss": 0.5168, "step": 34741 }, { "epoch": 0.9539264140582098, "grad_norm": 0.4215037226676941, "learning_rate": 1.0746049890435999e-05, "loss": 0.4673, "step": 34742 }, { "epoch": 0.9539538714991763, "grad_norm": 0.3262975513935089, "learning_rate": 1.0745619199947648e-05, "loss": 0.4282, "step": 34743 }, { "epoch": 0.9539813289401428, "grad_norm": 0.3862096667289734, "learning_rate": 1.0745188508068482e-05, "loss": 0.4854, "step": 34744 }, { "epoch": 0.9540087863811093, "grad_norm": 0.37320539355278015, "learning_rate": 1.0744757814799298e-05, "loss": 0.476, "step": 34745 }, { "epoch": 0.9540362438220757, "grad_norm": 0.39405685663223267, "learning_rate": 1.0744327120140907e-05, "loss": 0.4945, "step": 34746 }, { "epoch": 0.9540637012630423, "grad_norm": 0.3867015242576599, "learning_rate": 1.0743896424094107e-05, "loss": 0.4906, "step": 34747 }, { "epoch": 0.9540911587040087, "grad_norm": 0.37037187814712524, "learning_rate": 1.0743465726659702e-05, "loss": 0.5063, "step": 34748 }, { "epoch": 0.9541186161449753, "grad_norm": 0.35702237486839294, "learning_rate": 1.0743035027838496e-05, "loss": 0.4612, "step": 34749 }, { "epoch": 0.9541460735859418, "grad_norm": 0.3745707869529724, "learning_rate": 1.0742604327631289e-05, "loss": 0.3941, "step": 34750 }, { "epoch": 0.9541735310269083, "grad_norm": 0.4496639668941498, "learning_rate": 1.0742173626038895e-05, "loss": 0.5241, "step": 34751 }, { "epoch": 0.9542009884678748, "grad_norm": 0.38597381114959717, "learning_rate": 1.0741742923062107e-05, "loss": 0.455, "step": 34752 }, { "epoch": 0.9542284459088413, "grad_norm": 0.4154333174228668, "learning_rate": 1.074131221870173e-05, "loss": 0.571, "step": 34753 }, { "epoch": 0.9542559033498078, "grad_norm": 0.41185128688812256, "learning_rate": 1.0740881512958573e-05, "loss": 0.4914, "step": 34754 }, { "epoch": 0.9542833607907742, "grad_norm": 0.3850076198577881, "learning_rate": 1.074045080583343e-05, "loss": 0.4266, "step": 34755 }, { "epoch": 0.9543108182317408, "grad_norm": 0.5379679203033447, "learning_rate": 1.0740020097327115e-05, "loss": 0.4525, "step": 34756 }, { "epoch": 0.9543382756727073, "grad_norm": 0.43927571177482605, "learning_rate": 1.0739589387440425e-05, "loss": 0.5382, "step": 34757 }, { "epoch": 0.9543657331136738, "grad_norm": 0.3820250928401947, "learning_rate": 1.0739158676174161e-05, "loss": 0.5428, "step": 34758 }, { "epoch": 0.9543931905546403, "grad_norm": 0.4412820637226105, "learning_rate": 1.0738727963529136e-05, "loss": 0.4866, "step": 34759 }, { "epoch": 0.9544206479956068, "grad_norm": 0.3923195004463196, "learning_rate": 1.0738297249506145e-05, "loss": 0.4643, "step": 34760 }, { "epoch": 0.9544481054365733, "grad_norm": 0.41386234760284424, "learning_rate": 1.0737866534105994e-05, "loss": 0.4449, "step": 34761 }, { "epoch": 0.9544755628775398, "grad_norm": 0.38445335626602173, "learning_rate": 1.0737435817329486e-05, "loss": 0.4712, "step": 34762 }, { "epoch": 0.9545030203185063, "grad_norm": 0.3935084939002991, "learning_rate": 1.0737005099177424e-05, "loss": 0.5247, "step": 34763 }, { "epoch": 0.9545304777594729, "grad_norm": 0.43227770924568176, "learning_rate": 1.0736574379650615e-05, "loss": 0.5012, "step": 34764 }, { "epoch": 0.9545579352004393, "grad_norm": 0.37367716431617737, "learning_rate": 1.073614365874986e-05, "loss": 0.4458, "step": 34765 }, { "epoch": 0.9545853926414058, "grad_norm": 0.3809497356414795, "learning_rate": 1.0735712936475958e-05, "loss": 0.5056, "step": 34766 }, { "epoch": 0.9546128500823723, "grad_norm": 0.3848358392715454, "learning_rate": 1.0735282212829721e-05, "loss": 0.5126, "step": 34767 }, { "epoch": 0.9546403075233388, "grad_norm": 0.42073187232017517, "learning_rate": 1.0734851487811943e-05, "loss": 0.4787, "step": 34768 }, { "epoch": 0.9546677649643053, "grad_norm": 0.4400480389595032, "learning_rate": 1.0734420761423437e-05, "loss": 0.4682, "step": 34769 }, { "epoch": 0.9546952224052718, "grad_norm": 0.3849635124206543, "learning_rate": 1.0733990033665001e-05, "loss": 0.5302, "step": 34770 }, { "epoch": 0.9547226798462384, "grad_norm": 0.3995858132839203, "learning_rate": 1.0733559304537438e-05, "loss": 0.5, "step": 34771 }, { "epoch": 0.9547501372872048, "grad_norm": 0.4570026993751526, "learning_rate": 1.0733128574041554e-05, "loss": 0.5658, "step": 34772 }, { "epoch": 0.9547775947281714, "grad_norm": 0.4237518906593323, "learning_rate": 1.0732697842178147e-05, "loss": 0.5569, "step": 34773 }, { "epoch": 0.9548050521691378, "grad_norm": 0.3687550723552704, "learning_rate": 1.0732267108948031e-05, "loss": 0.4124, "step": 34774 }, { "epoch": 0.9548325096101044, "grad_norm": 0.4449837803840637, "learning_rate": 1.0731836374352001e-05, "loss": 0.5949, "step": 34775 }, { "epoch": 0.9548599670510708, "grad_norm": 0.3826906383037567, "learning_rate": 1.0731405638390859e-05, "loss": 0.4732, "step": 34776 }, { "epoch": 0.9548874244920373, "grad_norm": 0.4545523524284363, "learning_rate": 1.0730974901065416e-05, "loss": 0.4883, "step": 34777 }, { "epoch": 0.9549148819330039, "grad_norm": 0.3914506733417511, "learning_rate": 1.0730544162376472e-05, "loss": 0.5674, "step": 34778 }, { "epoch": 0.9549423393739703, "grad_norm": 0.41449588537216187, "learning_rate": 1.0730113422324828e-05, "loss": 0.4166, "step": 34779 }, { "epoch": 0.9549697968149369, "grad_norm": 0.3721112012863159, "learning_rate": 1.0729682680911291e-05, "loss": 0.4684, "step": 34780 }, { "epoch": 0.9549972542559033, "grad_norm": 0.3512680232524872, "learning_rate": 1.072925193813666e-05, "loss": 0.4774, "step": 34781 }, { "epoch": 0.9550247116968699, "grad_norm": 0.412949800491333, "learning_rate": 1.0728821194001743e-05, "loss": 0.4603, "step": 34782 }, { "epoch": 0.9550521691378363, "grad_norm": 0.41856303811073303, "learning_rate": 1.0728390448507344e-05, "loss": 0.5, "step": 34783 }, { "epoch": 0.9550796265788029, "grad_norm": 0.3839171826839447, "learning_rate": 1.072795970165426e-05, "loss": 0.4395, "step": 34784 }, { "epoch": 0.9551070840197694, "grad_norm": 0.38706550002098083, "learning_rate": 1.0727528953443303e-05, "loss": 0.4543, "step": 34785 }, { "epoch": 0.9551345414607358, "grad_norm": 0.36836111545562744, "learning_rate": 1.0727098203875268e-05, "loss": 0.4543, "step": 34786 }, { "epoch": 0.9551619989017024, "grad_norm": 0.401315301656723, "learning_rate": 1.0726667452950967e-05, "loss": 0.4767, "step": 34787 }, { "epoch": 0.9551894563426688, "grad_norm": 0.41283655166625977, "learning_rate": 1.0726236700671198e-05, "loss": 0.4993, "step": 34788 }, { "epoch": 0.9552169137836354, "grad_norm": 0.396470308303833, "learning_rate": 1.0725805947036764e-05, "loss": 0.4736, "step": 34789 }, { "epoch": 0.9552443712246018, "grad_norm": 0.41168782114982605, "learning_rate": 1.0725375192048472e-05, "loss": 0.4901, "step": 34790 }, { "epoch": 0.9552718286655684, "grad_norm": 0.4027193784713745, "learning_rate": 1.0724944435707123e-05, "loss": 0.5488, "step": 34791 }, { "epoch": 0.9552992861065349, "grad_norm": 0.3457355201244354, "learning_rate": 1.0724513678013522e-05, "loss": 0.4108, "step": 34792 }, { "epoch": 0.9553267435475014, "grad_norm": 0.35651129484176636, "learning_rate": 1.0724082918968473e-05, "loss": 0.4476, "step": 34793 }, { "epoch": 0.9553542009884679, "grad_norm": 0.43909981846809387, "learning_rate": 1.0723652158572774e-05, "loss": 0.4891, "step": 34794 }, { "epoch": 0.9553816584294343, "grad_norm": 0.3919883072376251, "learning_rate": 1.0723221396827236e-05, "loss": 0.4685, "step": 34795 }, { "epoch": 0.9554091158704009, "grad_norm": 0.3805553615093231, "learning_rate": 1.0722790633732661e-05, "loss": 0.4198, "step": 34796 }, { "epoch": 0.9554365733113673, "grad_norm": 0.4319494962692261, "learning_rate": 1.0722359869289846e-05, "loss": 0.4865, "step": 34797 }, { "epoch": 0.9554640307523339, "grad_norm": 0.4117768406867981, "learning_rate": 1.0721929103499604e-05, "loss": 0.5326, "step": 34798 }, { "epoch": 0.9554914881933004, "grad_norm": 0.3864593505859375, "learning_rate": 1.072149833636273e-05, "loss": 0.4621, "step": 34799 }, { "epoch": 0.9555189456342669, "grad_norm": 0.4172755777835846, "learning_rate": 1.0721067567880032e-05, "loss": 0.4923, "step": 34800 }, { "epoch": 0.9555464030752334, "grad_norm": 0.3540259897708893, "learning_rate": 1.0720636798052315e-05, "loss": 0.3603, "step": 34801 }, { "epoch": 0.9555738605161999, "grad_norm": 0.4306371808052063, "learning_rate": 1.0720206026880378e-05, "loss": 0.4626, "step": 34802 }, { "epoch": 0.9556013179571664, "grad_norm": 0.3914659917354584, "learning_rate": 1.0719775254365031e-05, "loss": 0.5021, "step": 34803 }, { "epoch": 0.9556287753981328, "grad_norm": 0.3891967236995697, "learning_rate": 1.0719344480507067e-05, "loss": 0.4491, "step": 34804 }, { "epoch": 0.9556562328390994, "grad_norm": 0.38523900508880615, "learning_rate": 1.0718913705307302e-05, "loss": 0.5565, "step": 34805 }, { "epoch": 0.955683690280066, "grad_norm": 0.4280299246311188, "learning_rate": 1.0718482928766529e-05, "loss": 0.5774, "step": 34806 }, { "epoch": 0.9557111477210324, "grad_norm": 0.38938504457473755, "learning_rate": 1.0718052150885559e-05, "loss": 0.52, "step": 34807 }, { "epoch": 0.9557386051619989, "grad_norm": 0.5108791589736938, "learning_rate": 1.071762137166519e-05, "loss": 0.5308, "step": 34808 }, { "epoch": 0.9557660626029654, "grad_norm": 0.4434411823749542, "learning_rate": 1.0717190591106232e-05, "loss": 0.4761, "step": 34809 }, { "epoch": 0.9557935200439319, "grad_norm": 0.42142194509506226, "learning_rate": 1.071675980920948e-05, "loss": 0.4864, "step": 34810 }, { "epoch": 0.9558209774848984, "grad_norm": 0.3614460527896881, "learning_rate": 1.0716329025975745e-05, "loss": 0.453, "step": 34811 }, { "epoch": 0.9558484349258649, "grad_norm": 0.5181878209114075, "learning_rate": 1.0715898241405826e-05, "loss": 0.4137, "step": 34812 }, { "epoch": 0.9558758923668315, "grad_norm": 0.48544925451278687, "learning_rate": 1.071546745550053e-05, "loss": 0.481, "step": 34813 }, { "epoch": 0.9559033498077979, "grad_norm": 0.40290987491607666, "learning_rate": 1.0715036668260659e-05, "loss": 0.5476, "step": 34814 }, { "epoch": 0.9559308072487644, "grad_norm": 0.4516204297542572, "learning_rate": 1.0714605879687014e-05, "loss": 0.4418, "step": 34815 }, { "epoch": 0.9559582646897309, "grad_norm": 0.387788325548172, "learning_rate": 1.0714175089780405e-05, "loss": 0.4073, "step": 34816 }, { "epoch": 0.9559857221306974, "grad_norm": 0.37849318981170654, "learning_rate": 1.0713744298541627e-05, "loss": 0.5072, "step": 34817 }, { "epoch": 0.9560131795716639, "grad_norm": 0.4947929084300995, "learning_rate": 1.0713313505971492e-05, "loss": 0.4438, "step": 34818 }, { "epoch": 0.9560406370126304, "grad_norm": 0.4259732663631439, "learning_rate": 1.0712882712070798e-05, "loss": 0.459, "step": 34819 }, { "epoch": 0.956068094453597, "grad_norm": 0.4463706612586975, "learning_rate": 1.071245191684035e-05, "loss": 0.5479, "step": 34820 }, { "epoch": 0.9560955518945634, "grad_norm": 0.401046484708786, "learning_rate": 1.0712021120280951e-05, "loss": 0.4624, "step": 34821 }, { "epoch": 0.95612300933553, "grad_norm": 0.3881068825721741, "learning_rate": 1.0711590322393406e-05, "loss": 0.4595, "step": 34822 }, { "epoch": 0.9561504667764964, "grad_norm": 0.434304416179657, "learning_rate": 1.0711159523178519e-05, "loss": 0.4379, "step": 34823 }, { "epoch": 0.956177924217463, "grad_norm": 0.42218950390815735, "learning_rate": 1.0710728722637091e-05, "loss": 0.4977, "step": 34824 }, { "epoch": 0.9562053816584294, "grad_norm": 0.3998181223869324, "learning_rate": 1.0710297920769928e-05, "loss": 0.4608, "step": 34825 }, { "epoch": 0.9562328390993959, "grad_norm": 0.3457197844982147, "learning_rate": 1.0709867117577833e-05, "loss": 0.5315, "step": 34826 }, { "epoch": 0.9562602965403625, "grad_norm": 0.4187474250793457, "learning_rate": 1.070943631306161e-05, "loss": 0.6175, "step": 34827 }, { "epoch": 0.9562877539813289, "grad_norm": 0.4373100697994232, "learning_rate": 1.0709005507222059e-05, "loss": 0.5093, "step": 34828 }, { "epoch": 0.9563152114222955, "grad_norm": 0.3753771185874939, "learning_rate": 1.0708574700059988e-05, "loss": 0.4248, "step": 34829 }, { "epoch": 0.9563426688632619, "grad_norm": 0.3930647671222687, "learning_rate": 1.0708143891576197e-05, "loss": 0.4954, "step": 34830 }, { "epoch": 0.9563701263042285, "grad_norm": 0.4742387533187866, "learning_rate": 1.0707713081771497e-05, "loss": 0.506, "step": 34831 }, { "epoch": 0.9563975837451949, "grad_norm": 0.388192355632782, "learning_rate": 1.0707282270646682e-05, "loss": 0.4136, "step": 34832 }, { "epoch": 0.9564250411861615, "grad_norm": 0.4137936234474182, "learning_rate": 1.070685145820256e-05, "loss": 0.4913, "step": 34833 }, { "epoch": 0.956452498627128, "grad_norm": 0.33610060811042786, "learning_rate": 1.0706420644439935e-05, "loss": 0.4579, "step": 34834 }, { "epoch": 0.9564799560680944, "grad_norm": 2.745361566543579, "learning_rate": 1.0705989829359609e-05, "loss": 0.4532, "step": 34835 }, { "epoch": 0.956507413509061, "grad_norm": 0.36817431449890137, "learning_rate": 1.0705559012962388e-05, "loss": 0.4346, "step": 34836 }, { "epoch": 0.9565348709500274, "grad_norm": 0.3892417550086975, "learning_rate": 1.0705128195249075e-05, "loss": 0.5056, "step": 34837 }, { "epoch": 0.956562328390994, "grad_norm": 0.4678867757320404, "learning_rate": 1.0704697376220472e-05, "loss": 0.4908, "step": 34838 }, { "epoch": 0.9565897858319604, "grad_norm": 0.438326358795166, "learning_rate": 1.0704266555877383e-05, "loss": 0.5581, "step": 34839 }, { "epoch": 0.956617243272927, "grad_norm": 0.40344902873039246, "learning_rate": 1.0703835734220613e-05, "loss": 0.4517, "step": 34840 }, { "epoch": 0.9566447007138935, "grad_norm": 0.43591317534446716, "learning_rate": 1.0703404911250963e-05, "loss": 0.512, "step": 34841 }, { "epoch": 0.95667215815486, "grad_norm": 0.39156022667884827, "learning_rate": 1.0702974086969243e-05, "loss": 0.5534, "step": 34842 }, { "epoch": 0.9566996155958265, "grad_norm": 0.40702155232429504, "learning_rate": 1.0702543261376247e-05, "loss": 0.5487, "step": 34843 }, { "epoch": 0.9567270730367929, "grad_norm": 0.5510063767433167, "learning_rate": 1.0702112434472784e-05, "loss": 0.5157, "step": 34844 }, { "epoch": 0.9567545304777595, "grad_norm": 0.5046184659004211, "learning_rate": 1.0701681606259659e-05, "loss": 0.5603, "step": 34845 }, { "epoch": 0.9567819879187259, "grad_norm": 0.4307788610458374, "learning_rate": 1.0701250776737673e-05, "loss": 0.5312, "step": 34846 }, { "epoch": 0.9568094453596925, "grad_norm": 0.37545645236968994, "learning_rate": 1.070081994590763e-05, "loss": 0.5139, "step": 34847 }, { "epoch": 0.956836902800659, "grad_norm": 0.39865291118621826, "learning_rate": 1.0700389113770335e-05, "loss": 0.4783, "step": 34848 }, { "epoch": 0.9568643602416255, "grad_norm": 0.3642352819442749, "learning_rate": 1.069995828032659e-05, "loss": 0.4745, "step": 34849 }, { "epoch": 0.956891817682592, "grad_norm": 0.43077394366264343, "learning_rate": 1.0699527445577203e-05, "loss": 0.5855, "step": 34850 }, { "epoch": 0.9569192751235585, "grad_norm": 0.3709624707698822, "learning_rate": 1.0699096609522967e-05, "loss": 0.5048, "step": 34851 }, { "epoch": 0.956946732564525, "grad_norm": 0.35273241996765137, "learning_rate": 1.0698665772164698e-05, "loss": 0.4708, "step": 34852 }, { "epoch": 0.9569741900054914, "grad_norm": 0.3982519805431366, "learning_rate": 1.0698234933503193e-05, "loss": 0.5059, "step": 34853 }, { "epoch": 0.957001647446458, "grad_norm": 0.3662751019001007, "learning_rate": 1.0697804093539257e-05, "loss": 0.4868, "step": 34854 }, { "epoch": 0.9570291048874245, "grad_norm": 0.37330344319343567, "learning_rate": 1.0697373252273694e-05, "loss": 0.5255, "step": 34855 }, { "epoch": 0.957056562328391, "grad_norm": 0.34841397404670715, "learning_rate": 1.0696942409707306e-05, "loss": 0.4455, "step": 34856 }, { "epoch": 0.9570840197693575, "grad_norm": 0.45505964756011963, "learning_rate": 1.06965115658409e-05, "loss": 0.5799, "step": 34857 }, { "epoch": 0.957111477210324, "grad_norm": 0.3779584765434265, "learning_rate": 1.0696080720675279e-05, "loss": 0.5103, "step": 34858 }, { "epoch": 0.9571389346512905, "grad_norm": 0.38087788224220276, "learning_rate": 1.069564987421124e-05, "loss": 0.4767, "step": 34859 }, { "epoch": 0.957166392092257, "grad_norm": 0.6527154445648193, "learning_rate": 1.0695219026449597e-05, "loss": 0.4805, "step": 34860 }, { "epoch": 0.9571938495332235, "grad_norm": 0.4667859673500061, "learning_rate": 1.0694788177391144e-05, "loss": 0.4675, "step": 34861 }, { "epoch": 0.9572213069741901, "grad_norm": 0.4071952998638153, "learning_rate": 1.0694357327036693e-05, "loss": 0.4492, "step": 34862 }, { "epoch": 0.9572487644151565, "grad_norm": 0.3688203990459442, "learning_rate": 1.0693926475387044e-05, "loss": 0.4331, "step": 34863 }, { "epoch": 0.957276221856123, "grad_norm": 0.35221922397613525, "learning_rate": 1.0693495622442998e-05, "loss": 0.4752, "step": 34864 }, { "epoch": 0.9573036792970895, "grad_norm": 0.4402889609336853, "learning_rate": 1.0693064768205363e-05, "loss": 0.4076, "step": 34865 }, { "epoch": 0.957331136738056, "grad_norm": 0.43967124819755554, "learning_rate": 1.0692633912674941e-05, "loss": 0.5201, "step": 34866 }, { "epoch": 0.9573585941790225, "grad_norm": 0.3597395420074463, "learning_rate": 1.0692203055852535e-05, "loss": 0.5286, "step": 34867 }, { "epoch": 0.957386051619989, "grad_norm": 0.4124549329280853, "learning_rate": 1.0691772197738952e-05, "loss": 0.4498, "step": 34868 }, { "epoch": 0.9574135090609556, "grad_norm": 0.4075019955635071, "learning_rate": 1.0691341338334989e-05, "loss": 0.5651, "step": 34869 }, { "epoch": 0.957440966501922, "grad_norm": 0.4862573742866516, "learning_rate": 1.0690910477641456e-05, "loss": 0.5161, "step": 34870 }, { "epoch": 0.9574684239428886, "grad_norm": 0.34486445784568787, "learning_rate": 1.0690479615659153e-05, "loss": 0.4252, "step": 34871 }, { "epoch": 0.957495881383855, "grad_norm": 0.3771526515483856, "learning_rate": 1.0690048752388886e-05, "loss": 0.5074, "step": 34872 }, { "epoch": 0.9575233388248215, "grad_norm": 0.34537914395332336, "learning_rate": 1.068961788783146e-05, "loss": 0.4176, "step": 34873 }, { "epoch": 0.957550796265788, "grad_norm": 0.39992812275886536, "learning_rate": 1.0689187021987673e-05, "loss": 0.457, "step": 34874 }, { "epoch": 0.9575782537067545, "grad_norm": 0.43850424885749817, "learning_rate": 1.0688756154858336e-05, "loss": 0.5059, "step": 34875 }, { "epoch": 0.9576057111477211, "grad_norm": 0.42774802446365356, "learning_rate": 1.0688325286444248e-05, "loss": 0.4925, "step": 34876 }, { "epoch": 0.9576331685886875, "grad_norm": 0.37670019268989563, "learning_rate": 1.0687894416746209e-05, "loss": 0.4339, "step": 34877 }, { "epoch": 0.9576606260296541, "grad_norm": 0.5222710967063904, "learning_rate": 1.0687463545765033e-05, "loss": 0.3915, "step": 34878 }, { "epoch": 0.9576880834706205, "grad_norm": 0.35696977376937866, "learning_rate": 1.0687032673501514e-05, "loss": 0.5217, "step": 34879 }, { "epoch": 0.9577155409115871, "grad_norm": 0.3915060758590698, "learning_rate": 1.0686601799956462e-05, "loss": 0.4881, "step": 34880 }, { "epoch": 0.9577429983525535, "grad_norm": 0.4251434803009033, "learning_rate": 1.0686170925130678e-05, "loss": 0.4153, "step": 34881 }, { "epoch": 0.95777045579352, "grad_norm": 0.3931671679019928, "learning_rate": 1.0685740049024967e-05, "loss": 0.496, "step": 34882 }, { "epoch": 0.9577979132344866, "grad_norm": 0.44885900616645813, "learning_rate": 1.0685309171640133e-05, "loss": 0.4612, "step": 34883 }, { "epoch": 0.957825370675453, "grad_norm": 0.4375171959400177, "learning_rate": 1.0684878292976975e-05, "loss": 0.4914, "step": 34884 }, { "epoch": 0.9578528281164196, "grad_norm": 0.370837926864624, "learning_rate": 1.0684447413036303e-05, "loss": 0.4798, "step": 34885 }, { "epoch": 0.957880285557386, "grad_norm": 0.41526880860328674, "learning_rate": 1.068401653181892e-05, "loss": 0.4982, "step": 34886 }, { "epoch": 0.9579077429983526, "grad_norm": 0.4293258488178253, "learning_rate": 1.0683585649325623e-05, "loss": 0.5682, "step": 34887 }, { "epoch": 0.957935200439319, "grad_norm": 0.357089638710022, "learning_rate": 1.0683154765557223e-05, "loss": 0.488, "step": 34888 }, { "epoch": 0.9579626578802856, "grad_norm": 0.43192946910858154, "learning_rate": 1.0682723880514523e-05, "loss": 0.4487, "step": 34889 }, { "epoch": 0.9579901153212521, "grad_norm": 0.3651573657989502, "learning_rate": 1.0682292994198323e-05, "loss": 0.4809, "step": 34890 }, { "epoch": 0.9580175727622185, "grad_norm": 0.3752998411655426, "learning_rate": 1.0681862106609428e-05, "loss": 0.5925, "step": 34891 }, { "epoch": 0.9580450302031851, "grad_norm": 0.4575667977333069, "learning_rate": 1.0681431217748644e-05, "loss": 0.4078, "step": 34892 }, { "epoch": 0.9580724876441515, "grad_norm": 0.3816748559474945, "learning_rate": 1.0681000327616772e-05, "loss": 0.4645, "step": 34893 }, { "epoch": 0.9580999450851181, "grad_norm": 0.4466944634914398, "learning_rate": 1.0680569436214618e-05, "loss": 0.51, "step": 34894 }, { "epoch": 0.9581274025260845, "grad_norm": 0.370303750038147, "learning_rate": 1.0680138543542986e-05, "loss": 0.4647, "step": 34895 }, { "epoch": 0.9581548599670511, "grad_norm": 0.5035239458084106, "learning_rate": 1.0679707649602677e-05, "loss": 0.4758, "step": 34896 }, { "epoch": 0.9581823174080176, "grad_norm": 0.3818584084510803, "learning_rate": 1.0679276754394496e-05, "loss": 0.4845, "step": 34897 }, { "epoch": 0.9582097748489841, "grad_norm": 0.3622732162475586, "learning_rate": 1.0678845857919248e-05, "loss": 0.5461, "step": 34898 }, { "epoch": 0.9582372322899506, "grad_norm": 0.3862021267414093, "learning_rate": 1.0678414960177734e-05, "loss": 0.4556, "step": 34899 }, { "epoch": 0.958264689730917, "grad_norm": 0.4323843717575073, "learning_rate": 1.0677984061170762e-05, "loss": 0.6047, "step": 34900 }, { "epoch": 0.9582921471718836, "grad_norm": 0.49893391132354736, "learning_rate": 1.0677553160899135e-05, "loss": 0.5005, "step": 34901 }, { "epoch": 0.95831960461285, "grad_norm": 0.406418114900589, "learning_rate": 1.067712225936365e-05, "loss": 0.4594, "step": 34902 }, { "epoch": 0.9583470620538166, "grad_norm": 0.39285388588905334, "learning_rate": 1.0676691356565119e-05, "loss": 0.4661, "step": 34903 }, { "epoch": 0.9583745194947831, "grad_norm": 0.4982675313949585, "learning_rate": 1.0676260452504342e-05, "loss": 0.4923, "step": 34904 }, { "epoch": 0.9584019769357496, "grad_norm": 0.44003966450691223, "learning_rate": 1.0675829547182123e-05, "loss": 0.5194, "step": 34905 }, { "epoch": 0.9584294343767161, "grad_norm": 0.4342060983181, "learning_rate": 1.0675398640599269e-05, "loss": 0.4561, "step": 34906 }, { "epoch": 0.9584568918176826, "grad_norm": 0.3819846212863922, "learning_rate": 1.0674967732756576e-05, "loss": 0.5379, "step": 34907 }, { "epoch": 0.9584843492586491, "grad_norm": 0.36857354640960693, "learning_rate": 1.0674536823654856e-05, "loss": 0.459, "step": 34908 }, { "epoch": 0.9585118066996156, "grad_norm": 0.47545304894447327, "learning_rate": 1.0674105913294907e-05, "loss": 0.5873, "step": 34909 }, { "epoch": 0.9585392641405821, "grad_norm": 0.4128669202327728, "learning_rate": 1.067367500167754e-05, "loss": 0.4599, "step": 34910 }, { "epoch": 0.9585667215815487, "grad_norm": 0.36019137501716614, "learning_rate": 1.067324408880355e-05, "loss": 0.4215, "step": 34911 }, { "epoch": 0.9585941790225151, "grad_norm": 0.4185422658920288, "learning_rate": 1.0672813174673746e-05, "loss": 0.4546, "step": 34912 }, { "epoch": 0.9586216364634816, "grad_norm": 0.4084666073322296, "learning_rate": 1.0672382259288932e-05, "loss": 0.5096, "step": 34913 }, { "epoch": 0.9586490939044481, "grad_norm": 0.4329037666320801, "learning_rate": 1.067195134264991e-05, "loss": 0.5479, "step": 34914 }, { "epoch": 0.9586765513454146, "grad_norm": 0.39354780316352844, "learning_rate": 1.0671520424757483e-05, "loss": 0.4783, "step": 34915 }, { "epoch": 0.9587040087863811, "grad_norm": 0.38188549876213074, "learning_rate": 1.0671089505612455e-05, "loss": 0.4903, "step": 34916 }, { "epoch": 0.9587314662273476, "grad_norm": 0.4154590666294098, "learning_rate": 1.0670658585215634e-05, "loss": 0.4761, "step": 34917 }, { "epoch": 0.9587589236683142, "grad_norm": 0.39028167724609375, "learning_rate": 1.067022766356782e-05, "loss": 0.5124, "step": 34918 }, { "epoch": 0.9587863811092806, "grad_norm": 0.3643805980682373, "learning_rate": 1.0669796740669815e-05, "loss": 0.4813, "step": 34919 }, { "epoch": 0.9588138385502472, "grad_norm": 0.3959442973136902, "learning_rate": 1.0669365816522428e-05, "loss": 0.516, "step": 34920 }, { "epoch": 0.9588412959912136, "grad_norm": 0.41326841711997986, "learning_rate": 1.0668934891126459e-05, "loss": 0.501, "step": 34921 }, { "epoch": 0.9588687534321801, "grad_norm": 0.4199479818344116, "learning_rate": 1.0668503964482715e-05, "loss": 0.5567, "step": 34922 }, { "epoch": 0.9588962108731466, "grad_norm": 0.3919658660888672, "learning_rate": 1.0668073036591994e-05, "loss": 0.4702, "step": 34923 }, { "epoch": 0.9589236683141131, "grad_norm": 0.3695708215236664, "learning_rate": 1.0667642107455105e-05, "loss": 0.4946, "step": 34924 }, { "epoch": 0.9589511257550797, "grad_norm": 0.40789997577667236, "learning_rate": 1.0667211177072854e-05, "loss": 0.5156, "step": 34925 }, { "epoch": 0.9589785831960461, "grad_norm": 0.38275912404060364, "learning_rate": 1.0666780245446036e-05, "loss": 0.4477, "step": 34926 }, { "epoch": 0.9590060406370127, "grad_norm": 0.5019537806510925, "learning_rate": 1.0666349312575463e-05, "loss": 0.5935, "step": 34927 }, { "epoch": 0.9590334980779791, "grad_norm": 0.42169657349586487, "learning_rate": 1.0665918378461932e-05, "loss": 0.4904, "step": 34928 }, { "epoch": 0.9590609555189457, "grad_norm": 0.39031538367271423, "learning_rate": 1.0665487443106254e-05, "loss": 0.4719, "step": 34929 }, { "epoch": 0.9590884129599121, "grad_norm": 0.40948596596717834, "learning_rate": 1.0665056506509233e-05, "loss": 0.3821, "step": 34930 }, { "epoch": 0.9591158704008786, "grad_norm": 0.45648789405822754, "learning_rate": 1.0664625568671662e-05, "loss": 0.5657, "step": 34931 }, { "epoch": 0.9591433278418452, "grad_norm": 0.3506152927875519, "learning_rate": 1.0664194629594357e-05, "loss": 0.506, "step": 34932 }, { "epoch": 0.9591707852828116, "grad_norm": 0.47944191098213196, "learning_rate": 1.0663763689278114e-05, "loss": 0.4945, "step": 34933 }, { "epoch": 0.9591982427237782, "grad_norm": 0.4017898738384247, "learning_rate": 1.0663332747723744e-05, "loss": 0.4702, "step": 34934 }, { "epoch": 0.9592257001647446, "grad_norm": 0.3807298243045807, "learning_rate": 1.0662901804932044e-05, "loss": 0.4459, "step": 34935 }, { "epoch": 0.9592531576057112, "grad_norm": 0.4639369547367096, "learning_rate": 1.0662470860903821e-05, "loss": 0.4881, "step": 34936 }, { "epoch": 0.9592806150466776, "grad_norm": 0.3568728566169739, "learning_rate": 1.0662039915639879e-05, "loss": 0.4172, "step": 34937 }, { "epoch": 0.9593080724876442, "grad_norm": 0.36428168416023254, "learning_rate": 1.0661608969141022e-05, "loss": 0.4858, "step": 34938 }, { "epoch": 0.9593355299286107, "grad_norm": 0.39018502831459045, "learning_rate": 1.066117802140805e-05, "loss": 0.4766, "step": 34939 }, { "epoch": 0.9593629873695771, "grad_norm": 0.4842870533466339, "learning_rate": 1.0660747072441772e-05, "loss": 0.4549, "step": 34940 }, { "epoch": 0.9593904448105437, "grad_norm": 0.4068223834037781, "learning_rate": 1.066031612224299e-05, "loss": 0.4995, "step": 34941 }, { "epoch": 0.9594179022515101, "grad_norm": 0.4166308641433716, "learning_rate": 1.0659885170812507e-05, "loss": 0.5024, "step": 34942 }, { "epoch": 0.9594453596924767, "grad_norm": 0.4557074010372162, "learning_rate": 1.065945421815113e-05, "loss": 0.4655, "step": 34943 }, { "epoch": 0.9594728171334431, "grad_norm": 0.38663241267204285, "learning_rate": 1.0659023264259655e-05, "loss": 0.4498, "step": 34944 }, { "epoch": 0.9595002745744097, "grad_norm": 0.36388587951660156, "learning_rate": 1.0658592309138895e-05, "loss": 0.4899, "step": 34945 }, { "epoch": 0.9595277320153762, "grad_norm": 0.40665486454963684, "learning_rate": 1.0658161352789648e-05, "loss": 0.5525, "step": 34946 }, { "epoch": 0.9595551894563427, "grad_norm": 0.4209216833114624, "learning_rate": 1.0657730395212722e-05, "loss": 0.5322, "step": 34947 }, { "epoch": 0.9595826468973092, "grad_norm": 0.431682288646698, "learning_rate": 1.0657299436408921e-05, "loss": 0.4703, "step": 34948 }, { "epoch": 0.9596101043382756, "grad_norm": 0.33075401186943054, "learning_rate": 1.0656868476379043e-05, "loss": 0.4127, "step": 34949 }, { "epoch": 0.9596375617792422, "grad_norm": 0.7805789113044739, "learning_rate": 1.0656437515123896e-05, "loss": 0.5576, "step": 34950 }, { "epoch": 0.9596650192202086, "grad_norm": 0.36675503849983215, "learning_rate": 1.0656006552644287e-05, "loss": 0.4508, "step": 34951 }, { "epoch": 0.9596924766611752, "grad_norm": 0.398968368768692, "learning_rate": 1.0655575588941012e-05, "loss": 0.5475, "step": 34952 }, { "epoch": 0.9597199341021416, "grad_norm": 0.3925941288471222, "learning_rate": 1.0655144624014882e-05, "loss": 0.4807, "step": 34953 }, { "epoch": 0.9597473915431082, "grad_norm": 0.37263157963752747, "learning_rate": 1.0654713657866696e-05, "loss": 0.4413, "step": 34954 }, { "epoch": 0.9597748489840747, "grad_norm": 0.35503727197647095, "learning_rate": 1.0654282690497262e-05, "loss": 0.4794, "step": 34955 }, { "epoch": 0.9598023064250412, "grad_norm": 0.34454530477523804, "learning_rate": 1.0653851721907383e-05, "loss": 0.4516, "step": 34956 }, { "epoch": 0.9598297638660077, "grad_norm": 0.5871736407279968, "learning_rate": 1.0653420752097859e-05, "loss": 0.4686, "step": 34957 }, { "epoch": 0.9598572213069742, "grad_norm": 0.3943098783493042, "learning_rate": 1.0652989781069499e-05, "loss": 0.469, "step": 34958 }, { "epoch": 0.9598846787479407, "grad_norm": 0.36203649640083313, "learning_rate": 1.0652558808823102e-05, "loss": 0.4276, "step": 34959 }, { "epoch": 0.9599121361889071, "grad_norm": 0.4105449318885803, "learning_rate": 1.0652127835359476e-05, "loss": 0.5127, "step": 34960 }, { "epoch": 0.9599395936298737, "grad_norm": 0.45714834332466125, "learning_rate": 1.0651696860679426e-05, "loss": 0.5721, "step": 34961 }, { "epoch": 0.9599670510708402, "grad_norm": 0.42296817898750305, "learning_rate": 1.0651265884783747e-05, "loss": 0.5012, "step": 34962 }, { "epoch": 0.9599945085118067, "grad_norm": 0.35250380635261536, "learning_rate": 1.0650834907673255e-05, "loss": 0.45, "step": 34963 }, { "epoch": 0.9600219659527732, "grad_norm": 0.36401161551475525, "learning_rate": 1.0650403929348745e-05, "loss": 0.4194, "step": 34964 }, { "epoch": 0.9600494233937397, "grad_norm": 0.42181822657585144, "learning_rate": 1.0649972949811027e-05, "loss": 0.4733, "step": 34965 }, { "epoch": 0.9600768808347062, "grad_norm": 0.4121161103248596, "learning_rate": 1.0649541969060902e-05, "loss": 0.4471, "step": 34966 }, { "epoch": 0.9601043382756727, "grad_norm": 0.41662853956222534, "learning_rate": 1.064911098709917e-05, "loss": 0.5157, "step": 34967 }, { "epoch": 0.9601317957166392, "grad_norm": 0.7949236631393433, "learning_rate": 1.0648680003926642e-05, "loss": 0.4322, "step": 34968 }, { "epoch": 0.9601592531576058, "grad_norm": 0.38606762886047363, "learning_rate": 1.064824901954412e-05, "loss": 0.5529, "step": 34969 }, { "epoch": 0.9601867105985722, "grad_norm": 0.43026822805404663, "learning_rate": 1.0647818033952402e-05, "loss": 0.5097, "step": 34970 }, { "epoch": 0.9602141680395387, "grad_norm": 0.37150654196739197, "learning_rate": 1.06473870471523e-05, "loss": 0.5578, "step": 34971 }, { "epoch": 0.9602416254805052, "grad_norm": 0.5877676606178284, "learning_rate": 1.0646956059144614e-05, "loss": 0.5176, "step": 34972 }, { "epoch": 0.9602690829214717, "grad_norm": 0.41161447763442993, "learning_rate": 1.0646525069930146e-05, "loss": 0.46, "step": 34973 }, { "epoch": 0.9602965403624382, "grad_norm": 0.9766450524330139, "learning_rate": 1.0646094079509709e-05, "loss": 0.5327, "step": 34974 }, { "epoch": 0.9603239978034047, "grad_norm": 0.38749200105667114, "learning_rate": 1.0645663087884092e-05, "loss": 0.4959, "step": 34975 }, { "epoch": 0.9603514552443713, "grad_norm": 0.47836920619010925, "learning_rate": 1.0645232095054114e-05, "loss": 0.471, "step": 34976 }, { "epoch": 0.9603789126853377, "grad_norm": 0.354869544506073, "learning_rate": 1.0644801101020567e-05, "loss": 0.4321, "step": 34977 }, { "epoch": 0.9604063701263043, "grad_norm": 0.45461058616638184, "learning_rate": 1.0644370105784263e-05, "loss": 0.4931, "step": 34978 }, { "epoch": 0.9604338275672707, "grad_norm": 0.4363061785697937, "learning_rate": 1.0643939109346004e-05, "loss": 0.5794, "step": 34979 }, { "epoch": 0.9604612850082372, "grad_norm": 0.39782387018203735, "learning_rate": 1.064350811170659e-05, "loss": 0.4233, "step": 34980 }, { "epoch": 0.9604887424492037, "grad_norm": 0.4188481867313385, "learning_rate": 1.064307711286683e-05, "loss": 0.544, "step": 34981 }, { "epoch": 0.9605161998901702, "grad_norm": 0.47666648030281067, "learning_rate": 1.0642646112827527e-05, "loss": 0.4657, "step": 34982 }, { "epoch": 0.9605436573311368, "grad_norm": 0.4199239909648895, "learning_rate": 1.0642215111589483e-05, "loss": 0.5658, "step": 34983 }, { "epoch": 0.9605711147721032, "grad_norm": 0.3540232479572296, "learning_rate": 1.0641784109153504e-05, "loss": 0.4458, "step": 34984 }, { "epoch": 0.9605985722130698, "grad_norm": 0.3639153838157654, "learning_rate": 1.0641353105520389e-05, "loss": 0.4236, "step": 34985 }, { "epoch": 0.9606260296540362, "grad_norm": 0.4380652904510498, "learning_rate": 1.0640922100690949e-05, "loss": 0.409, "step": 34986 }, { "epoch": 0.9606534870950028, "grad_norm": 0.3747994005680084, "learning_rate": 1.0640491094665984e-05, "loss": 0.4352, "step": 34987 }, { "epoch": 0.9606809445359692, "grad_norm": 0.43306323885917664, "learning_rate": 1.0640060087446298e-05, "loss": 0.5169, "step": 34988 }, { "epoch": 0.9607084019769357, "grad_norm": 0.43436717987060547, "learning_rate": 1.0639629079032695e-05, "loss": 0.4293, "step": 34989 }, { "epoch": 0.9607358594179023, "grad_norm": 0.40504080057144165, "learning_rate": 1.063919806942598e-05, "loss": 0.5101, "step": 34990 }, { "epoch": 0.9607633168588687, "grad_norm": 0.40714162588119507, "learning_rate": 1.0638767058626957e-05, "loss": 0.4652, "step": 34991 }, { "epoch": 0.9607907742998353, "grad_norm": 0.4359044134616852, "learning_rate": 1.0638336046636433e-05, "loss": 0.4639, "step": 34992 }, { "epoch": 0.9608182317408017, "grad_norm": 0.4037801921367645, "learning_rate": 1.0637905033455203e-05, "loss": 0.4759, "step": 34993 }, { "epoch": 0.9608456891817683, "grad_norm": 0.4182799458503723, "learning_rate": 1.0637474019084079e-05, "loss": 0.3785, "step": 34994 }, { "epoch": 0.9608731466227347, "grad_norm": 0.40907973051071167, "learning_rate": 1.0637043003523861e-05, "loss": 0.5232, "step": 34995 }, { "epoch": 0.9609006040637013, "grad_norm": 0.3561069965362549, "learning_rate": 1.0636611986775358e-05, "loss": 0.4887, "step": 34996 }, { "epoch": 0.9609280615046678, "grad_norm": 0.35056272149086, "learning_rate": 1.0636180968839368e-05, "loss": 0.4124, "step": 34997 }, { "epoch": 0.9609555189456342, "grad_norm": 0.45574283599853516, "learning_rate": 1.0635749949716698e-05, "loss": 0.5439, "step": 34998 }, { "epoch": 0.9609829763866008, "grad_norm": 0.40283820033073425, "learning_rate": 1.0635318929408152e-05, "loss": 0.4661, "step": 34999 }, { "epoch": 0.9610104338275672, "grad_norm": 0.38372161984443665, "learning_rate": 1.0634887907914533e-05, "loss": 0.4803, "step": 35000 }, { "epoch": 0.9610378912685338, "grad_norm": 0.37232908606529236, "learning_rate": 1.0634456885236643e-05, "loss": 0.4327, "step": 35001 }, { "epoch": 0.9610653487095002, "grad_norm": 0.4131854176521301, "learning_rate": 1.0634025861375292e-05, "loss": 0.5196, "step": 35002 }, { "epoch": 0.9610928061504668, "grad_norm": 0.38301974534988403, "learning_rate": 1.0633594836331278e-05, "loss": 0.4645, "step": 35003 }, { "epoch": 0.9611202635914333, "grad_norm": 0.3789653778076172, "learning_rate": 1.063316381010541e-05, "loss": 0.5257, "step": 35004 }, { "epoch": 0.9611477210323998, "grad_norm": 0.4055803120136261, "learning_rate": 1.0632732782698487e-05, "loss": 0.5063, "step": 35005 }, { "epoch": 0.9611751784733663, "grad_norm": 0.3278353810310364, "learning_rate": 1.0632301754111315e-05, "loss": 0.4184, "step": 35006 }, { "epoch": 0.9612026359143327, "grad_norm": 0.3692885935306549, "learning_rate": 1.0631870724344699e-05, "loss": 0.4538, "step": 35007 }, { "epoch": 0.9612300933552993, "grad_norm": 0.37450674176216125, "learning_rate": 1.0631439693399444e-05, "loss": 0.5461, "step": 35008 }, { "epoch": 0.9612575507962657, "grad_norm": 0.42019468545913696, "learning_rate": 1.0631008661276352e-05, "loss": 0.5064, "step": 35009 }, { "epoch": 0.9612850082372323, "grad_norm": 0.3656025826931, "learning_rate": 1.0630577627976226e-05, "loss": 0.4422, "step": 35010 }, { "epoch": 0.9613124656781988, "grad_norm": 0.3657659590244293, "learning_rate": 1.0630146593499873e-05, "loss": 0.3879, "step": 35011 }, { "epoch": 0.9613399231191653, "grad_norm": 0.37266483902931213, "learning_rate": 1.0629715557848095e-05, "loss": 0.3717, "step": 35012 }, { "epoch": 0.9613673805601318, "grad_norm": 0.3925817608833313, "learning_rate": 1.0629284521021697e-05, "loss": 0.5479, "step": 35013 }, { "epoch": 0.9613948380010983, "grad_norm": 0.39141324162483215, "learning_rate": 1.0628853483021479e-05, "loss": 0.455, "step": 35014 }, { "epoch": 0.9614222954420648, "grad_norm": 0.44831138849258423, "learning_rate": 1.0628422443848254e-05, "loss": 0.4602, "step": 35015 }, { "epoch": 0.9614497528830313, "grad_norm": 0.4011302590370178, "learning_rate": 1.0627991403502816e-05, "loss": 0.5449, "step": 35016 }, { "epoch": 0.9614772103239978, "grad_norm": 0.39860227704048157, "learning_rate": 1.0627560361985976e-05, "loss": 0.4135, "step": 35017 }, { "epoch": 0.9615046677649643, "grad_norm": 0.4384708106517792, "learning_rate": 1.0627129319298537e-05, "loss": 0.5327, "step": 35018 }, { "epoch": 0.9615321252059308, "grad_norm": 0.42406749725341797, "learning_rate": 1.0626698275441298e-05, "loss": 0.5896, "step": 35019 }, { "epoch": 0.9615595826468973, "grad_norm": 0.5301697850227356, "learning_rate": 1.0626267230415071e-05, "loss": 0.5204, "step": 35020 }, { "epoch": 0.9615870400878638, "grad_norm": 0.3955535590648651, "learning_rate": 1.062583618422065e-05, "loss": 0.576, "step": 35021 }, { "epoch": 0.9616144975288303, "grad_norm": 0.4350195527076721, "learning_rate": 1.0625405136858849e-05, "loss": 0.4935, "step": 35022 }, { "epoch": 0.9616419549697968, "grad_norm": 0.4172796905040741, "learning_rate": 1.0624974088330469e-05, "loss": 0.4573, "step": 35023 }, { "epoch": 0.9616694124107633, "grad_norm": 0.3821723461151123, "learning_rate": 1.0624543038636307e-05, "loss": 0.4419, "step": 35024 }, { "epoch": 0.9616968698517299, "grad_norm": 0.4648793935775757, "learning_rate": 1.0624111987777179e-05, "loss": 0.5604, "step": 35025 }, { "epoch": 0.9617243272926963, "grad_norm": 0.3987381160259247, "learning_rate": 1.062368093575388e-05, "loss": 0.5309, "step": 35026 }, { "epoch": 0.9617517847336629, "grad_norm": 0.36720743775367737, "learning_rate": 1.0623249882567217e-05, "loss": 0.4276, "step": 35027 }, { "epoch": 0.9617792421746293, "grad_norm": 0.4243626296520233, "learning_rate": 1.0622818828217996e-05, "loss": 0.5121, "step": 35028 }, { "epoch": 0.9618066996155958, "grad_norm": 0.4060686528682709, "learning_rate": 1.0622387772707017e-05, "loss": 0.4381, "step": 35029 }, { "epoch": 0.9618341570565623, "grad_norm": 0.38944101333618164, "learning_rate": 1.0621956716035086e-05, "loss": 0.4591, "step": 35030 }, { "epoch": 0.9618616144975288, "grad_norm": 0.4655141830444336, "learning_rate": 1.0621525658203011e-05, "loss": 0.4465, "step": 35031 }, { "epoch": 0.9618890719384954, "grad_norm": 0.40796688199043274, "learning_rate": 1.0621094599211587e-05, "loss": 0.5322, "step": 35032 }, { "epoch": 0.9619165293794618, "grad_norm": 0.3869318962097168, "learning_rate": 1.0620663539061626e-05, "loss": 0.454, "step": 35033 }, { "epoch": 0.9619439868204284, "grad_norm": 0.352305144071579, "learning_rate": 1.0620232477753929e-05, "loss": 0.5454, "step": 35034 }, { "epoch": 0.9619714442613948, "grad_norm": 0.429841548204422, "learning_rate": 1.06198014152893e-05, "loss": 0.5174, "step": 35035 }, { "epoch": 0.9619989017023614, "grad_norm": 0.39434313774108887, "learning_rate": 1.0619370351668545e-05, "loss": 0.5338, "step": 35036 }, { "epoch": 0.9620263591433278, "grad_norm": 0.41424140334129333, "learning_rate": 1.0618939286892464e-05, "loss": 0.5293, "step": 35037 }, { "epoch": 0.9620538165842943, "grad_norm": 0.40168675780296326, "learning_rate": 1.0618508220961867e-05, "loss": 0.5343, "step": 35038 }, { "epoch": 0.9620812740252609, "grad_norm": 0.4102764427661896, "learning_rate": 1.0618077153877553e-05, "loss": 0.5074, "step": 35039 }, { "epoch": 0.9621087314662273, "grad_norm": 0.34754207730293274, "learning_rate": 1.0617646085640329e-05, "loss": 0.3947, "step": 35040 }, { "epoch": 0.9621361889071939, "grad_norm": 0.40536993741989136, "learning_rate": 1.0617215016250996e-05, "loss": 0.4384, "step": 35041 }, { "epoch": 0.9621636463481603, "grad_norm": 0.429420530796051, "learning_rate": 1.0616783945710361e-05, "loss": 0.4603, "step": 35042 }, { "epoch": 0.9621911037891269, "grad_norm": 0.37821823358535767, "learning_rate": 1.061635287401923e-05, "loss": 0.49, "step": 35043 }, { "epoch": 0.9622185612300933, "grad_norm": 0.4219171702861786, "learning_rate": 1.0615921801178399e-05, "loss": 0.5034, "step": 35044 }, { "epoch": 0.9622460186710599, "grad_norm": 0.5050997138023376, "learning_rate": 1.061549072718868e-05, "loss": 0.5042, "step": 35045 }, { "epoch": 0.9622734761120264, "grad_norm": 0.41063323616981506, "learning_rate": 1.0615059652050874e-05, "loss": 0.5285, "step": 35046 }, { "epoch": 0.9623009335529928, "grad_norm": 0.3942725658416748, "learning_rate": 1.0614628575765784e-05, "loss": 0.4923, "step": 35047 }, { "epoch": 0.9623283909939594, "grad_norm": 0.37701573967933655, "learning_rate": 1.0614197498334217e-05, "loss": 0.3617, "step": 35048 }, { "epoch": 0.9623558484349258, "grad_norm": 0.38888129591941833, "learning_rate": 1.0613766419756977e-05, "loss": 0.4445, "step": 35049 }, { "epoch": 0.9623833058758924, "grad_norm": 0.39639461040496826, "learning_rate": 1.0613335340034864e-05, "loss": 0.4896, "step": 35050 }, { "epoch": 0.9624107633168588, "grad_norm": 0.46504074335098267, "learning_rate": 1.0612904259168686e-05, "loss": 0.5968, "step": 35051 }, { "epoch": 0.9624382207578254, "grad_norm": 0.3956160545349121, "learning_rate": 1.0612473177159246e-05, "loss": 0.5043, "step": 35052 }, { "epoch": 0.9624656781987919, "grad_norm": 0.35144513845443726, "learning_rate": 1.061204209400735e-05, "loss": 0.3847, "step": 35053 }, { "epoch": 0.9624931356397584, "grad_norm": 0.38380151987075806, "learning_rate": 1.0611611009713798e-05, "loss": 0.5894, "step": 35054 }, { "epoch": 0.9625205930807249, "grad_norm": 0.395063579082489, "learning_rate": 1.0611179924279396e-05, "loss": 0.517, "step": 35055 }, { "epoch": 0.9625480505216913, "grad_norm": 0.3883573114871979, "learning_rate": 1.0610748837704947e-05, "loss": 0.5131, "step": 35056 }, { "epoch": 0.9625755079626579, "grad_norm": 0.36344966292381287, "learning_rate": 1.061031774999126e-05, "loss": 0.4635, "step": 35057 }, { "epoch": 0.9626029654036243, "grad_norm": 0.6213597059249878, "learning_rate": 1.0609886661139132e-05, "loss": 0.5232, "step": 35058 }, { "epoch": 0.9626304228445909, "grad_norm": 0.39458954334259033, "learning_rate": 1.0609455571149374e-05, "loss": 0.5304, "step": 35059 }, { "epoch": 0.9626578802855574, "grad_norm": 0.39464592933654785, "learning_rate": 1.0609024480022785e-05, "loss": 0.4731, "step": 35060 }, { "epoch": 0.9626853377265239, "grad_norm": 0.36968302726745605, "learning_rate": 1.0608593387760172e-05, "loss": 0.4203, "step": 35061 }, { "epoch": 0.9627127951674904, "grad_norm": 0.3899702727794647, "learning_rate": 1.0608162294362339e-05, "loss": 0.5298, "step": 35062 }, { "epoch": 0.9627402526084569, "grad_norm": 0.5001720786094666, "learning_rate": 1.0607731199830087e-05, "loss": 0.4226, "step": 35063 }, { "epoch": 0.9627677100494234, "grad_norm": 0.40031319856643677, "learning_rate": 1.0607300104164221e-05, "loss": 0.5354, "step": 35064 }, { "epoch": 0.9627951674903898, "grad_norm": 0.38719120621681213, "learning_rate": 1.060686900736555e-05, "loss": 0.4466, "step": 35065 }, { "epoch": 0.9628226249313564, "grad_norm": 0.46716395020484924, "learning_rate": 1.0606437909434873e-05, "loss": 0.5345, "step": 35066 }, { "epoch": 0.962850082372323, "grad_norm": 0.39573055505752563, "learning_rate": 1.0606006810372997e-05, "loss": 0.4312, "step": 35067 }, { "epoch": 0.9628775398132894, "grad_norm": 0.35435113310813904, "learning_rate": 1.0605575710180723e-05, "loss": 0.4688, "step": 35068 }, { "epoch": 0.9629049972542559, "grad_norm": 0.3999537527561188, "learning_rate": 1.0605144608858858e-05, "loss": 0.4927, "step": 35069 }, { "epoch": 0.9629324546952224, "grad_norm": 0.37166735529899597, "learning_rate": 1.0604713506408205e-05, "loss": 0.471, "step": 35070 }, { "epoch": 0.9629599121361889, "grad_norm": 0.35434702038764954, "learning_rate": 1.060428240282957e-05, "loss": 0.4971, "step": 35071 }, { "epoch": 0.9629873695771554, "grad_norm": 0.3562745153903961, "learning_rate": 1.0603851298123754e-05, "loss": 0.4535, "step": 35072 }, { "epoch": 0.9630148270181219, "grad_norm": 0.3910257816314697, "learning_rate": 1.060342019229156e-05, "loss": 0.4734, "step": 35073 }, { "epoch": 0.9630422844590885, "grad_norm": 0.38433828949928284, "learning_rate": 1.0602989085333797e-05, "loss": 0.4947, "step": 35074 }, { "epoch": 0.9630697419000549, "grad_norm": 0.3929502069950104, "learning_rate": 1.0602557977251266e-05, "loss": 0.4886, "step": 35075 }, { "epoch": 0.9630971993410214, "grad_norm": 0.6152403354644775, "learning_rate": 1.0602126868044776e-05, "loss": 0.5205, "step": 35076 }, { "epoch": 0.9631246567819879, "grad_norm": 0.3721361458301544, "learning_rate": 1.0601695757715122e-05, "loss": 0.436, "step": 35077 }, { "epoch": 0.9631521142229544, "grad_norm": 0.3764816224575043, "learning_rate": 1.0601264646263116e-05, "loss": 0.5671, "step": 35078 }, { "epoch": 0.9631795716639209, "grad_norm": 0.4254549443721771, "learning_rate": 1.060083353368956e-05, "loss": 0.5363, "step": 35079 }, { "epoch": 0.9632070291048874, "grad_norm": 0.5082731246948242, "learning_rate": 1.0600402419995254e-05, "loss": 0.4796, "step": 35080 }, { "epoch": 0.963234486545854, "grad_norm": 0.3443826735019684, "learning_rate": 1.0599971305181012e-05, "loss": 0.3866, "step": 35081 }, { "epoch": 0.9632619439868204, "grad_norm": 0.43061211705207825, "learning_rate": 1.0599540189247628e-05, "loss": 0.4774, "step": 35082 }, { "epoch": 0.963289401427787, "grad_norm": 0.7771103978157043, "learning_rate": 1.0599109072195911e-05, "loss": 0.5787, "step": 35083 }, { "epoch": 0.9633168588687534, "grad_norm": 0.428404837846756, "learning_rate": 1.0598677954026664e-05, "loss": 0.5589, "step": 35084 }, { "epoch": 0.96334431630972, "grad_norm": 0.3395729064941406, "learning_rate": 1.059824683474069e-05, "loss": 0.3543, "step": 35085 }, { "epoch": 0.9633717737506864, "grad_norm": 0.4284113645553589, "learning_rate": 1.05978157143388e-05, "loss": 0.5227, "step": 35086 }, { "epoch": 0.9633992311916529, "grad_norm": 0.4060446619987488, "learning_rate": 1.059738459282179e-05, "loss": 0.5498, "step": 35087 }, { "epoch": 0.9634266886326195, "grad_norm": 0.3957080543041229, "learning_rate": 1.0596953470190466e-05, "loss": 0.535, "step": 35088 }, { "epoch": 0.9634541460735859, "grad_norm": 0.3488420844078064, "learning_rate": 1.0596522346445633e-05, "loss": 0.4056, "step": 35089 }, { "epoch": 0.9634816035145525, "grad_norm": 0.3663260042667389, "learning_rate": 1.0596091221588094e-05, "loss": 0.411, "step": 35090 }, { "epoch": 0.9635090609555189, "grad_norm": 0.3592241108417511, "learning_rate": 1.0595660095618659e-05, "loss": 0.4567, "step": 35091 }, { "epoch": 0.9635365183964855, "grad_norm": 0.4094405472278595, "learning_rate": 1.0595228968538124e-05, "loss": 0.5345, "step": 35092 }, { "epoch": 0.9635639758374519, "grad_norm": 0.39761242270469666, "learning_rate": 1.05947978403473e-05, "loss": 0.5087, "step": 35093 }, { "epoch": 0.9635914332784185, "grad_norm": 0.41071638464927673, "learning_rate": 1.0594366711046988e-05, "loss": 0.5127, "step": 35094 }, { "epoch": 0.963618890719385, "grad_norm": 0.40036240220069885, "learning_rate": 1.0593935580637988e-05, "loss": 0.4668, "step": 35095 }, { "epoch": 0.9636463481603514, "grad_norm": 0.3595679998397827, "learning_rate": 1.0593504449121115e-05, "loss": 0.4405, "step": 35096 }, { "epoch": 0.963673805601318, "grad_norm": 0.3773963153362274, "learning_rate": 1.0593073316497162e-05, "loss": 0.4912, "step": 35097 }, { "epoch": 0.9637012630422844, "grad_norm": 0.43004322052001953, "learning_rate": 1.0592642182766939e-05, "loss": 0.5251, "step": 35098 }, { "epoch": 0.963728720483251, "grad_norm": 0.40358099341392517, "learning_rate": 1.0592211047931252e-05, "loss": 0.5691, "step": 35099 }, { "epoch": 0.9637561779242174, "grad_norm": 0.39737117290496826, "learning_rate": 1.0591779911990898e-05, "loss": 0.508, "step": 35100 }, { "epoch": 0.963783635365184, "grad_norm": 0.44219550490379333, "learning_rate": 1.0591348774946689e-05, "loss": 0.5063, "step": 35101 }, { "epoch": 0.9638110928061505, "grad_norm": 0.45346418023109436, "learning_rate": 1.0590917636799423e-05, "loss": 0.5213, "step": 35102 }, { "epoch": 0.963838550247117, "grad_norm": 0.41193434596061707, "learning_rate": 1.059048649754991e-05, "loss": 0.5503, "step": 35103 }, { "epoch": 0.9638660076880835, "grad_norm": 0.40078970789909363, "learning_rate": 1.059005535719895e-05, "loss": 0.4897, "step": 35104 }, { "epoch": 0.9638934651290499, "grad_norm": 0.4443824589252472, "learning_rate": 1.0589624215747347e-05, "loss": 0.5466, "step": 35105 }, { "epoch": 0.9639209225700165, "grad_norm": 0.39751601219177246, "learning_rate": 1.058919307319591e-05, "loss": 0.4454, "step": 35106 }, { "epoch": 0.9639483800109829, "grad_norm": 0.41033726930618286, "learning_rate": 1.0588761929545435e-05, "loss": 0.5007, "step": 35107 }, { "epoch": 0.9639758374519495, "grad_norm": 0.8612298965454102, "learning_rate": 1.0588330784796734e-05, "loss": 0.5292, "step": 35108 }, { "epoch": 0.964003294892916, "grad_norm": 0.36748945713043213, "learning_rate": 1.0587899638950608e-05, "loss": 0.4952, "step": 35109 }, { "epoch": 0.9640307523338825, "grad_norm": 0.35057544708251953, "learning_rate": 1.0587468492007862e-05, "loss": 0.4472, "step": 35110 }, { "epoch": 0.964058209774849, "grad_norm": 0.4168172776699066, "learning_rate": 1.05870373439693e-05, "loss": 0.5443, "step": 35111 }, { "epoch": 0.9640856672158155, "grad_norm": 0.5152775049209595, "learning_rate": 1.0586606194835724e-05, "loss": 0.5767, "step": 35112 }, { "epoch": 0.964113124656782, "grad_norm": 0.35513848066329956, "learning_rate": 1.0586175044607942e-05, "loss": 0.5042, "step": 35113 }, { "epoch": 0.9641405820977484, "grad_norm": 0.5160991549491882, "learning_rate": 1.0585743893286755e-05, "loss": 0.5442, "step": 35114 }, { "epoch": 0.964168039538715, "grad_norm": 0.4300702214241028, "learning_rate": 1.0585312740872973e-05, "loss": 0.4969, "step": 35115 }, { "epoch": 0.9641954969796815, "grad_norm": 0.41512665152549744, "learning_rate": 1.0584881587367393e-05, "loss": 0.5251, "step": 35116 }, { "epoch": 0.964222954420648, "grad_norm": 0.4042361080646515, "learning_rate": 1.058445043277082e-05, "loss": 0.5582, "step": 35117 }, { "epoch": 0.9642504118616145, "grad_norm": 0.5207343101501465, "learning_rate": 1.0584019277084063e-05, "loss": 0.443, "step": 35118 }, { "epoch": 0.964277869302581, "grad_norm": 0.40139713883399963, "learning_rate": 1.0583588120307922e-05, "loss": 0.4428, "step": 35119 }, { "epoch": 0.9643053267435475, "grad_norm": 0.38012877106666565, "learning_rate": 1.0583156962443204e-05, "loss": 0.4526, "step": 35120 }, { "epoch": 0.964332784184514, "grad_norm": 0.3876039385795593, "learning_rate": 1.0582725803490715e-05, "loss": 0.5092, "step": 35121 }, { "epoch": 0.9643602416254805, "grad_norm": 0.36622312664985657, "learning_rate": 1.058229464345125e-05, "loss": 0.4824, "step": 35122 }, { "epoch": 0.9643876990664471, "grad_norm": 0.39014801383018494, "learning_rate": 1.0581863482325625e-05, "loss": 0.4996, "step": 35123 }, { "epoch": 0.9644151565074135, "grad_norm": 0.3894929885864258, "learning_rate": 1.0581432320114638e-05, "loss": 0.4653, "step": 35124 }, { "epoch": 0.96444261394838, "grad_norm": 0.3953203856945038, "learning_rate": 1.058100115681909e-05, "loss": 0.5198, "step": 35125 }, { "epoch": 0.9644700713893465, "grad_norm": 0.4546075463294983, "learning_rate": 1.0580569992439795e-05, "loss": 0.5523, "step": 35126 }, { "epoch": 0.964497528830313, "grad_norm": 0.3914923071861267, "learning_rate": 1.0580138826977546e-05, "loss": 0.47, "step": 35127 }, { "epoch": 0.9645249862712795, "grad_norm": 0.3735159635543823, "learning_rate": 1.0579707660433158e-05, "loss": 0.4768, "step": 35128 }, { "epoch": 0.964552443712246, "grad_norm": 0.4204539656639099, "learning_rate": 1.057927649280743e-05, "loss": 0.4671, "step": 35129 }, { "epoch": 0.9645799011532126, "grad_norm": 0.40397655963897705, "learning_rate": 1.0578845324101163e-05, "loss": 0.493, "step": 35130 }, { "epoch": 0.964607358594179, "grad_norm": 0.4602471590042114, "learning_rate": 1.0578414154315166e-05, "loss": 0.5096, "step": 35131 }, { "epoch": 0.9646348160351456, "grad_norm": 0.4990037977695465, "learning_rate": 1.0577982983450241e-05, "loss": 0.4683, "step": 35132 }, { "epoch": 0.964662273476112, "grad_norm": 0.4145471751689911, "learning_rate": 1.0577551811507196e-05, "loss": 0.4563, "step": 35133 }, { "epoch": 0.9646897309170785, "grad_norm": 0.4233947992324829, "learning_rate": 1.0577120638486833e-05, "loss": 0.4518, "step": 35134 }, { "epoch": 0.964717188358045, "grad_norm": 0.39893364906311035, "learning_rate": 1.0576689464389953e-05, "loss": 0.4232, "step": 35135 }, { "epoch": 0.9647446457990115, "grad_norm": 0.3843309283256531, "learning_rate": 1.0576258289217364e-05, "loss": 0.4624, "step": 35136 }, { "epoch": 0.9647721032399781, "grad_norm": 0.4080811142921448, "learning_rate": 1.0575827112969872e-05, "loss": 0.5289, "step": 35137 }, { "epoch": 0.9647995606809445, "grad_norm": 0.7142015695571899, "learning_rate": 1.0575395935648274e-05, "loss": 0.4281, "step": 35138 }, { "epoch": 0.9648270181219111, "grad_norm": 0.33257949352264404, "learning_rate": 1.057496475725338e-05, "loss": 0.3195, "step": 35139 }, { "epoch": 0.9648544755628775, "grad_norm": 0.44068193435668945, "learning_rate": 1.0574533577785995e-05, "loss": 0.5046, "step": 35140 }, { "epoch": 0.9648819330038441, "grad_norm": 0.43139567971229553, "learning_rate": 1.0574102397246922e-05, "loss": 0.517, "step": 35141 }, { "epoch": 0.9649093904448105, "grad_norm": 0.4764642119407654, "learning_rate": 1.0573671215636964e-05, "loss": 0.4783, "step": 35142 }, { "epoch": 0.964936847885777, "grad_norm": 0.4366532862186432, "learning_rate": 1.0573240032956924e-05, "loss": 0.5374, "step": 35143 }, { "epoch": 0.9649643053267436, "grad_norm": 0.4292571544647217, "learning_rate": 1.0572808849207611e-05, "loss": 0.5128, "step": 35144 }, { "epoch": 0.96499176276771, "grad_norm": 0.40761685371398926, "learning_rate": 1.0572377664389826e-05, "loss": 0.4853, "step": 35145 }, { "epoch": 0.9650192202086766, "grad_norm": 0.3948383033275604, "learning_rate": 1.0571946478504374e-05, "loss": 0.5176, "step": 35146 }, { "epoch": 0.965046677649643, "grad_norm": 0.452436625957489, "learning_rate": 1.0571515291552059e-05, "loss": 0.5091, "step": 35147 }, { "epoch": 0.9650741350906096, "grad_norm": 0.398105263710022, "learning_rate": 1.0571084103533685e-05, "loss": 0.4739, "step": 35148 }, { "epoch": 0.965101592531576, "grad_norm": 0.3942793905735016, "learning_rate": 1.0570652914450059e-05, "loss": 0.5113, "step": 35149 }, { "epoch": 0.9651290499725426, "grad_norm": 0.43880605697631836, "learning_rate": 1.057022172430198e-05, "loss": 0.502, "step": 35150 }, { "epoch": 0.9651565074135091, "grad_norm": 0.3824721872806549, "learning_rate": 1.0569790533090258e-05, "loss": 0.4859, "step": 35151 }, { "epoch": 0.9651839648544756, "grad_norm": 0.40516743063926697, "learning_rate": 1.0569359340815694e-05, "loss": 0.4933, "step": 35152 }, { "epoch": 0.9652114222954421, "grad_norm": 0.3819863200187683, "learning_rate": 1.0568928147479091e-05, "loss": 0.5001, "step": 35153 }, { "epoch": 0.9652388797364085, "grad_norm": 0.39252305030822754, "learning_rate": 1.056849695308126e-05, "loss": 0.4421, "step": 35154 }, { "epoch": 0.9652663371773751, "grad_norm": 0.35672587156295776, "learning_rate": 1.0568065757622998e-05, "loss": 0.4695, "step": 35155 }, { "epoch": 0.9652937946183415, "grad_norm": 0.4153802990913391, "learning_rate": 1.056763456110511e-05, "loss": 0.4696, "step": 35156 }, { "epoch": 0.9653212520593081, "grad_norm": 0.4669193625450134, "learning_rate": 1.0567203363528407e-05, "loss": 0.5195, "step": 35157 }, { "epoch": 0.9653487095002746, "grad_norm": 0.4475369155406952, "learning_rate": 1.0566772164893686e-05, "loss": 0.4907, "step": 35158 }, { "epoch": 0.9653761669412411, "grad_norm": 0.42069876194000244, "learning_rate": 1.0566340965201753e-05, "loss": 0.4239, "step": 35159 }, { "epoch": 0.9654036243822076, "grad_norm": 0.41112130880355835, "learning_rate": 1.0565909764453418e-05, "loss": 0.4425, "step": 35160 }, { "epoch": 0.965431081823174, "grad_norm": 0.3801395893096924, "learning_rate": 1.0565478562649476e-05, "loss": 0.4993, "step": 35161 }, { "epoch": 0.9654585392641406, "grad_norm": 0.392825186252594, "learning_rate": 1.0565047359790736e-05, "loss": 0.4294, "step": 35162 }, { "epoch": 0.965485996705107, "grad_norm": 0.38481423258781433, "learning_rate": 1.0564616155878005e-05, "loss": 0.4969, "step": 35163 }, { "epoch": 0.9655134541460736, "grad_norm": 0.33402401208877563, "learning_rate": 1.0564184950912084e-05, "loss": 0.4137, "step": 35164 }, { "epoch": 0.9655409115870401, "grad_norm": 0.37079188227653503, "learning_rate": 1.0563753744893776e-05, "loss": 0.4682, "step": 35165 }, { "epoch": 0.9655683690280066, "grad_norm": 0.4097210764884949, "learning_rate": 1.0563322537823888e-05, "loss": 0.5084, "step": 35166 }, { "epoch": 0.9655958264689731, "grad_norm": 0.36722564697265625, "learning_rate": 1.0562891329703227e-05, "loss": 0.496, "step": 35167 }, { "epoch": 0.9656232839099396, "grad_norm": 0.4203089773654938, "learning_rate": 1.0562460120532592e-05, "loss": 0.4919, "step": 35168 }, { "epoch": 0.9656507413509061, "grad_norm": 0.4010378420352936, "learning_rate": 1.0562028910312786e-05, "loss": 0.5215, "step": 35169 }, { "epoch": 0.9656781987918726, "grad_norm": 0.3728638291358948, "learning_rate": 1.0561597699044622e-05, "loss": 0.5531, "step": 35170 }, { "epoch": 0.9657056562328391, "grad_norm": 0.3910488784313202, "learning_rate": 1.0561166486728896e-05, "loss": 0.5017, "step": 35171 }, { "epoch": 0.9657331136738057, "grad_norm": 0.4086301624774933, "learning_rate": 1.0560735273366415e-05, "loss": 0.4937, "step": 35172 }, { "epoch": 0.9657605711147721, "grad_norm": 0.3600023686885834, "learning_rate": 1.0560304058957986e-05, "loss": 0.386, "step": 35173 }, { "epoch": 0.9657880285557386, "grad_norm": 0.4839244484901428, "learning_rate": 1.0559872843504408e-05, "loss": 0.4936, "step": 35174 }, { "epoch": 0.9658154859967051, "grad_norm": 0.42023321986198425, "learning_rate": 1.0559441627006493e-05, "loss": 0.5811, "step": 35175 }, { "epoch": 0.9658429434376716, "grad_norm": 0.45257657766342163, "learning_rate": 1.0559010409465036e-05, "loss": 0.4665, "step": 35176 }, { "epoch": 0.9658704008786381, "grad_norm": 0.36065351963043213, "learning_rate": 1.0558579190880848e-05, "loss": 0.4051, "step": 35177 }, { "epoch": 0.9658978583196046, "grad_norm": 0.4357685446739197, "learning_rate": 1.0558147971254734e-05, "loss": 0.4756, "step": 35178 }, { "epoch": 0.9659253157605712, "grad_norm": 0.48981696367263794, "learning_rate": 1.0557716750587492e-05, "loss": 0.4548, "step": 35179 }, { "epoch": 0.9659527732015376, "grad_norm": 0.4535842537879944, "learning_rate": 1.0557285528879932e-05, "loss": 0.4248, "step": 35180 }, { "epoch": 0.9659802306425042, "grad_norm": 0.3603714108467102, "learning_rate": 1.0556854306132855e-05, "loss": 0.4412, "step": 35181 }, { "epoch": 0.9660076880834706, "grad_norm": 0.3603188395500183, "learning_rate": 1.055642308234707e-05, "loss": 0.4841, "step": 35182 }, { "epoch": 0.9660351455244371, "grad_norm": 0.40588152408599854, "learning_rate": 1.0555991857523378e-05, "loss": 0.4654, "step": 35183 }, { "epoch": 0.9660626029654036, "grad_norm": 0.47059527039527893, "learning_rate": 1.055556063166258e-05, "loss": 0.5162, "step": 35184 }, { "epoch": 0.9660900604063701, "grad_norm": 0.4862455129623413, "learning_rate": 1.0555129404765488e-05, "loss": 0.5763, "step": 35185 }, { "epoch": 0.9661175178473367, "grad_norm": 0.35452792048454285, "learning_rate": 1.0554698176832902e-05, "loss": 0.47, "step": 35186 }, { "epoch": 0.9661449752883031, "grad_norm": 0.4169101417064667, "learning_rate": 1.0554266947865625e-05, "loss": 0.5494, "step": 35187 }, { "epoch": 0.9661724327292697, "grad_norm": 0.4525775611400604, "learning_rate": 1.0553835717864464e-05, "loss": 0.495, "step": 35188 }, { "epoch": 0.9661998901702361, "grad_norm": 0.3601551651954651, "learning_rate": 1.0553404486830224e-05, "loss": 0.4646, "step": 35189 }, { "epoch": 0.9662273476112027, "grad_norm": 0.38217419385910034, "learning_rate": 1.0552973254763707e-05, "loss": 0.5612, "step": 35190 }, { "epoch": 0.9662548050521691, "grad_norm": 0.40226560831069946, "learning_rate": 1.055254202166572e-05, "loss": 0.468, "step": 35191 }, { "epoch": 0.9662822624931356, "grad_norm": 0.4273045063018799, "learning_rate": 1.0552110787537063e-05, "loss": 0.4992, "step": 35192 }, { "epoch": 0.9663097199341022, "grad_norm": 0.4824993908405304, "learning_rate": 1.0551679552378544e-05, "loss": 0.4656, "step": 35193 }, { "epoch": 0.9663371773750686, "grad_norm": 0.35287633538246155, "learning_rate": 1.0551248316190965e-05, "loss": 0.4669, "step": 35194 }, { "epoch": 0.9663646348160352, "grad_norm": 0.4394061863422394, "learning_rate": 1.0550817078975135e-05, "loss": 0.447, "step": 35195 }, { "epoch": 0.9663920922570016, "grad_norm": 0.4234198033809662, "learning_rate": 1.0550385840731856e-05, "loss": 0.5361, "step": 35196 }, { "epoch": 0.9664195496979682, "grad_norm": 0.3381689488887787, "learning_rate": 1.0549954601461927e-05, "loss": 0.5038, "step": 35197 }, { "epoch": 0.9664470071389346, "grad_norm": 0.38648736476898193, "learning_rate": 1.054952336116616e-05, "loss": 0.5509, "step": 35198 }, { "epoch": 0.9664744645799012, "grad_norm": 0.3836428225040436, "learning_rate": 1.0549092119845359e-05, "loss": 0.4694, "step": 35199 }, { "epoch": 0.9665019220208677, "grad_norm": 0.3683060109615326, "learning_rate": 1.0548660877500323e-05, "loss": 0.5108, "step": 35200 }, { "epoch": 0.9665293794618341, "grad_norm": 0.40123680233955383, "learning_rate": 1.0548229634131859e-05, "loss": 0.5055, "step": 35201 }, { "epoch": 0.9665568369028007, "grad_norm": 0.42846211791038513, "learning_rate": 1.0547798389740773e-05, "loss": 0.5492, "step": 35202 }, { "epoch": 0.9665842943437671, "grad_norm": 0.4050486981868744, "learning_rate": 1.0547367144327866e-05, "loss": 0.4686, "step": 35203 }, { "epoch": 0.9666117517847337, "grad_norm": 0.547946035861969, "learning_rate": 1.0546935897893948e-05, "loss": 0.597, "step": 35204 }, { "epoch": 0.9666392092257001, "grad_norm": 0.39013683795928955, "learning_rate": 1.0546504650439818e-05, "loss": 0.5224, "step": 35205 }, { "epoch": 0.9666666666666667, "grad_norm": 0.3873266279697418, "learning_rate": 1.0546073401966283e-05, "loss": 0.4872, "step": 35206 }, { "epoch": 0.9666941241076332, "grad_norm": 0.8980872631072998, "learning_rate": 1.0545642152474148e-05, "loss": 0.5151, "step": 35207 }, { "epoch": 0.9667215815485997, "grad_norm": 0.48618969321250916, "learning_rate": 1.0545210901964214e-05, "loss": 0.4614, "step": 35208 }, { "epoch": 0.9667490389895662, "grad_norm": 0.37211450934410095, "learning_rate": 1.0544779650437288e-05, "loss": 0.4552, "step": 35209 }, { "epoch": 0.9667764964305327, "grad_norm": 0.42884454131126404, "learning_rate": 1.0544348397894176e-05, "loss": 0.5103, "step": 35210 }, { "epoch": 0.9668039538714992, "grad_norm": 0.42203205823898315, "learning_rate": 1.0543917144335679e-05, "loss": 0.4653, "step": 35211 }, { "epoch": 0.9668314113124656, "grad_norm": 0.38505497574806213, "learning_rate": 1.05434858897626e-05, "loss": 0.481, "step": 35212 }, { "epoch": 0.9668588687534322, "grad_norm": 0.36655324697494507, "learning_rate": 1.0543054634175751e-05, "loss": 0.497, "step": 35213 }, { "epoch": 0.9668863261943987, "grad_norm": 0.39480558037757874, "learning_rate": 1.0542623377575932e-05, "loss": 0.4578, "step": 35214 }, { "epoch": 0.9669137836353652, "grad_norm": 0.41965165734291077, "learning_rate": 1.0542192119963943e-05, "loss": 0.5423, "step": 35215 }, { "epoch": 0.9669412410763317, "grad_norm": 0.382213294506073, "learning_rate": 1.0541760861340596e-05, "loss": 0.5494, "step": 35216 }, { "epoch": 0.9669686985172982, "grad_norm": 0.4118514657020569, "learning_rate": 1.0541329601706691e-05, "loss": 0.4941, "step": 35217 }, { "epoch": 0.9669961559582647, "grad_norm": 0.39539626240730286, "learning_rate": 1.0540898341063033e-05, "loss": 0.4422, "step": 35218 }, { "epoch": 0.9670236133992312, "grad_norm": 0.35536989569664, "learning_rate": 1.0540467079410428e-05, "loss": 0.4879, "step": 35219 }, { "epoch": 0.9670510708401977, "grad_norm": 0.4473486542701721, "learning_rate": 1.0540035816749678e-05, "loss": 0.5038, "step": 35220 }, { "epoch": 0.9670785282811641, "grad_norm": 0.4342859983444214, "learning_rate": 1.053960455308159e-05, "loss": 0.5208, "step": 35221 }, { "epoch": 0.9671059857221307, "grad_norm": 0.39077940583229065, "learning_rate": 1.053917328840697e-05, "loss": 0.4907, "step": 35222 }, { "epoch": 0.9671334431630972, "grad_norm": 0.3444693684577942, "learning_rate": 1.0538742022726613e-05, "loss": 0.4775, "step": 35223 }, { "epoch": 0.9671609006040637, "grad_norm": 0.3865779638290405, "learning_rate": 1.0538310756041337e-05, "loss": 0.4334, "step": 35224 }, { "epoch": 0.9671883580450302, "grad_norm": 0.5065836906433105, "learning_rate": 1.0537879488351933e-05, "loss": 0.4743, "step": 35225 }, { "epoch": 0.9672158154859967, "grad_norm": 0.3807596266269684, "learning_rate": 1.053744821965922e-05, "loss": 0.4907, "step": 35226 }, { "epoch": 0.9672432729269632, "grad_norm": 0.3936893343925476, "learning_rate": 1.053701694996399e-05, "loss": 0.4417, "step": 35227 }, { "epoch": 0.9672707303679297, "grad_norm": 0.36001309752464294, "learning_rate": 1.0536585679267051e-05, "loss": 0.3984, "step": 35228 }, { "epoch": 0.9672981878088962, "grad_norm": 0.3549315929412842, "learning_rate": 1.053615440756921e-05, "loss": 0.4323, "step": 35229 }, { "epoch": 0.9673256452498628, "grad_norm": 0.434817910194397, "learning_rate": 1.0535723134871267e-05, "loss": 0.5069, "step": 35230 }, { "epoch": 0.9673531026908292, "grad_norm": 0.47934338450431824, "learning_rate": 1.0535291861174033e-05, "loss": 0.4308, "step": 35231 }, { "epoch": 0.9673805601317957, "grad_norm": 0.344530314207077, "learning_rate": 1.053486058647831e-05, "loss": 0.3591, "step": 35232 }, { "epoch": 0.9674080175727622, "grad_norm": 0.4028472900390625, "learning_rate": 1.0534429310784897e-05, "loss": 0.5576, "step": 35233 }, { "epoch": 0.9674354750137287, "grad_norm": 0.38141193985939026, "learning_rate": 1.0533998034094607e-05, "loss": 0.5124, "step": 35234 }, { "epoch": 0.9674629324546952, "grad_norm": 0.42573490738868713, "learning_rate": 1.0533566756408237e-05, "loss": 0.4404, "step": 35235 }, { "epoch": 0.9674903898956617, "grad_norm": 0.4179020822048187, "learning_rate": 1.0533135477726594e-05, "loss": 0.4503, "step": 35236 }, { "epoch": 0.9675178473366283, "grad_norm": 0.35687190294265747, "learning_rate": 1.0532704198050487e-05, "loss": 0.4617, "step": 35237 }, { "epoch": 0.9675453047775947, "grad_norm": 0.3918001651763916, "learning_rate": 1.0532272917380713e-05, "loss": 0.5092, "step": 35238 }, { "epoch": 0.9675727622185613, "grad_norm": 0.4563337564468384, "learning_rate": 1.0531841635718083e-05, "loss": 0.453, "step": 35239 }, { "epoch": 0.9676002196595277, "grad_norm": 0.36510682106018066, "learning_rate": 1.0531410353063398e-05, "loss": 0.5322, "step": 35240 }, { "epoch": 0.9676276771004942, "grad_norm": 0.3872332274913788, "learning_rate": 1.0530979069417463e-05, "loss": 0.5008, "step": 35241 }, { "epoch": 0.9676551345414607, "grad_norm": 0.45500314235687256, "learning_rate": 1.0530547784781083e-05, "loss": 0.5574, "step": 35242 }, { "epoch": 0.9676825919824272, "grad_norm": 0.37396931648254395, "learning_rate": 1.0530116499155059e-05, "loss": 0.4345, "step": 35243 }, { "epoch": 0.9677100494233938, "grad_norm": 0.327628493309021, "learning_rate": 1.0529685212540202e-05, "loss": 0.4362, "step": 35244 }, { "epoch": 0.9677375068643602, "grad_norm": 0.42485496401786804, "learning_rate": 1.0529253924937311e-05, "loss": 0.4356, "step": 35245 }, { "epoch": 0.9677649643053268, "grad_norm": 0.3608970046043396, "learning_rate": 1.0528822636347191e-05, "loss": 0.4904, "step": 35246 }, { "epoch": 0.9677924217462932, "grad_norm": 0.44611889123916626, "learning_rate": 1.0528391346770653e-05, "loss": 0.6327, "step": 35247 }, { "epoch": 0.9678198791872598, "grad_norm": 0.45783281326293945, "learning_rate": 1.0527960056208494e-05, "loss": 0.5318, "step": 35248 }, { "epoch": 0.9678473366282262, "grad_norm": 0.34609296917915344, "learning_rate": 1.052752876466152e-05, "loss": 0.4791, "step": 35249 }, { "epoch": 0.9678747940691927, "grad_norm": 0.4316384196281433, "learning_rate": 1.0527097472130537e-05, "loss": 0.4989, "step": 35250 }, { "epoch": 0.9679022515101593, "grad_norm": 0.4224710166454315, "learning_rate": 1.0526666178616347e-05, "loss": 0.4596, "step": 35251 }, { "epoch": 0.9679297089511257, "grad_norm": 0.41421663761138916, "learning_rate": 1.0526234884119759e-05, "loss": 0.423, "step": 35252 }, { "epoch": 0.9679571663920923, "grad_norm": 0.5299590826034546, "learning_rate": 1.0525803588641576e-05, "loss": 0.4575, "step": 35253 }, { "epoch": 0.9679846238330587, "grad_norm": 0.3687589764595032, "learning_rate": 1.0525372292182598e-05, "loss": 0.4962, "step": 35254 }, { "epoch": 0.9680120812740253, "grad_norm": 0.39064204692840576, "learning_rate": 1.0524940994743637e-05, "loss": 0.4771, "step": 35255 }, { "epoch": 0.9680395387149917, "grad_norm": 0.3631851375102997, "learning_rate": 1.0524509696325491e-05, "loss": 0.4643, "step": 35256 }, { "epoch": 0.9680669961559583, "grad_norm": 0.5421507358551025, "learning_rate": 1.0524078396928968e-05, "loss": 0.4116, "step": 35257 }, { "epoch": 0.9680944535969248, "grad_norm": 0.46866410970687866, "learning_rate": 1.0523647096554872e-05, "loss": 0.4194, "step": 35258 }, { "epoch": 0.9681219110378912, "grad_norm": 0.5175915956497192, "learning_rate": 1.0523215795204005e-05, "loss": 0.4652, "step": 35259 }, { "epoch": 0.9681493684788578, "grad_norm": 0.356082558631897, "learning_rate": 1.0522784492877176e-05, "loss": 0.5102, "step": 35260 }, { "epoch": 0.9681768259198242, "grad_norm": 0.44077247381210327, "learning_rate": 1.0522353189575183e-05, "loss": 0.4829, "step": 35261 }, { "epoch": 0.9682042833607908, "grad_norm": 0.4087403118610382, "learning_rate": 1.0521921885298839e-05, "loss": 0.5354, "step": 35262 }, { "epoch": 0.9682317408017572, "grad_norm": 0.4179949462413788, "learning_rate": 1.0521490580048943e-05, "loss": 0.4846, "step": 35263 }, { "epoch": 0.9682591982427238, "grad_norm": 0.36364561319351196, "learning_rate": 1.05210592738263e-05, "loss": 0.4447, "step": 35264 }, { "epoch": 0.9682866556836903, "grad_norm": 0.4151965081691742, "learning_rate": 1.0520627966631715e-05, "loss": 0.4686, "step": 35265 }, { "epoch": 0.9683141131246568, "grad_norm": 0.37374067306518555, "learning_rate": 1.052019665846599e-05, "loss": 0.4936, "step": 35266 }, { "epoch": 0.9683415705656233, "grad_norm": 0.49310266971588135, "learning_rate": 1.0519765349329938e-05, "loss": 0.5591, "step": 35267 }, { "epoch": 0.9683690280065897, "grad_norm": 0.37933409214019775, "learning_rate": 1.0519334039224357e-05, "loss": 0.4465, "step": 35268 }, { "epoch": 0.9683964854475563, "grad_norm": 0.3648313581943512, "learning_rate": 1.0518902728150046e-05, "loss": 0.439, "step": 35269 }, { "epoch": 0.9684239428885227, "grad_norm": 0.38375306129455566, "learning_rate": 1.0518471416107822e-05, "loss": 0.6307, "step": 35270 }, { "epoch": 0.9684514003294893, "grad_norm": 0.3378183841705322, "learning_rate": 1.051804010309848e-05, "loss": 0.4507, "step": 35271 }, { "epoch": 0.9684788577704558, "grad_norm": 0.4398214817047119, "learning_rate": 1.0517608789122829e-05, "loss": 0.4443, "step": 35272 }, { "epoch": 0.9685063152114223, "grad_norm": 0.3931901156902313, "learning_rate": 1.0517177474181672e-05, "loss": 0.4875, "step": 35273 }, { "epoch": 0.9685337726523888, "grad_norm": 0.37506967782974243, "learning_rate": 1.0516746158275814e-05, "loss": 0.4567, "step": 35274 }, { "epoch": 0.9685612300933553, "grad_norm": 0.38987135887145996, "learning_rate": 1.0516314841406061e-05, "loss": 0.5107, "step": 35275 }, { "epoch": 0.9685886875343218, "grad_norm": 0.3943865895271301, "learning_rate": 1.0515883523573213e-05, "loss": 0.4887, "step": 35276 }, { "epoch": 0.9686161449752883, "grad_norm": 0.39142268896102905, "learning_rate": 1.0515452204778082e-05, "loss": 0.4862, "step": 35277 }, { "epoch": 0.9686436024162548, "grad_norm": 0.4039900302886963, "learning_rate": 1.0515020885021465e-05, "loss": 0.5247, "step": 35278 }, { "epoch": 0.9686710598572214, "grad_norm": 0.3889152407646179, "learning_rate": 1.051458956430417e-05, "loss": 0.5058, "step": 35279 }, { "epoch": 0.9686985172981878, "grad_norm": 0.5194513201713562, "learning_rate": 1.0514158242627004e-05, "loss": 0.5264, "step": 35280 }, { "epoch": 0.9687259747391543, "grad_norm": 0.4398387372493744, "learning_rate": 1.0513726919990763e-05, "loss": 0.5597, "step": 35281 }, { "epoch": 0.9687534321801208, "grad_norm": 0.3662051856517792, "learning_rate": 1.0513295596396262e-05, "loss": 0.4112, "step": 35282 }, { "epoch": 0.9687808896210873, "grad_norm": 0.37442800402641296, "learning_rate": 1.0512864271844298e-05, "loss": 0.4814, "step": 35283 }, { "epoch": 0.9688083470620538, "grad_norm": 0.40492838621139526, "learning_rate": 1.0512432946335682e-05, "loss": 0.4686, "step": 35284 }, { "epoch": 0.9688358045030203, "grad_norm": 0.3571101427078247, "learning_rate": 1.0512001619871213e-05, "loss": 0.4343, "step": 35285 }, { "epoch": 0.9688632619439869, "grad_norm": 0.41798287630081177, "learning_rate": 1.0511570292451695e-05, "loss": 0.6321, "step": 35286 }, { "epoch": 0.9688907193849533, "grad_norm": 0.3878006637096405, "learning_rate": 1.0511138964077941e-05, "loss": 0.456, "step": 35287 }, { "epoch": 0.9689181768259199, "grad_norm": 0.4293641746044159, "learning_rate": 1.0510707634750746e-05, "loss": 0.4226, "step": 35288 }, { "epoch": 0.9689456342668863, "grad_norm": 0.38315150141716003, "learning_rate": 1.0510276304470919e-05, "loss": 0.5005, "step": 35289 }, { "epoch": 0.9689730917078528, "grad_norm": 0.3696073889732361, "learning_rate": 1.050984497323926e-05, "loss": 0.4421, "step": 35290 }, { "epoch": 0.9690005491488193, "grad_norm": 0.3762882947921753, "learning_rate": 1.0509413641056583e-05, "loss": 0.5326, "step": 35291 }, { "epoch": 0.9690280065897858, "grad_norm": 0.3689131736755371, "learning_rate": 1.0508982307923686e-05, "loss": 0.4537, "step": 35292 }, { "epoch": 0.9690554640307524, "grad_norm": 0.46575266122817993, "learning_rate": 1.0508550973841372e-05, "loss": 0.6473, "step": 35293 }, { "epoch": 0.9690829214717188, "grad_norm": 0.40208199620246887, "learning_rate": 1.050811963881045e-05, "loss": 0.5051, "step": 35294 }, { "epoch": 0.9691103789126854, "grad_norm": 0.35573604702949524, "learning_rate": 1.050768830283172e-05, "loss": 0.4128, "step": 35295 }, { "epoch": 0.9691378363536518, "grad_norm": 0.5084167718887329, "learning_rate": 1.0507256965905995e-05, "loss": 0.4701, "step": 35296 }, { "epoch": 0.9691652937946184, "grad_norm": 0.37014779448509216, "learning_rate": 1.050682562803407e-05, "loss": 0.5334, "step": 35297 }, { "epoch": 0.9691927512355848, "grad_norm": 0.3922448754310608, "learning_rate": 1.0506394289216752e-05, "loss": 0.4568, "step": 35298 }, { "epoch": 0.9692202086765513, "grad_norm": 0.40157178044319153, "learning_rate": 1.0505962949454852e-05, "loss": 0.4894, "step": 35299 }, { "epoch": 0.9692476661175179, "grad_norm": 0.3696901798248291, "learning_rate": 1.0505531608749162e-05, "loss": 0.4434, "step": 35300 }, { "epoch": 0.9692751235584843, "grad_norm": 0.3998410105705261, "learning_rate": 1.05051002671005e-05, "loss": 0.41, "step": 35301 }, { "epoch": 0.9693025809994509, "grad_norm": 0.40089818835258484, "learning_rate": 1.0504668924509664e-05, "loss": 0.524, "step": 35302 }, { "epoch": 0.9693300384404173, "grad_norm": 0.4077780246734619, "learning_rate": 1.0504237580977459e-05, "loss": 0.532, "step": 35303 }, { "epoch": 0.9693574958813839, "grad_norm": 0.571808397769928, "learning_rate": 1.050380623650469e-05, "loss": 0.5187, "step": 35304 }, { "epoch": 0.9693849533223503, "grad_norm": 0.39623314142227173, "learning_rate": 1.050337489109216e-05, "loss": 0.4226, "step": 35305 }, { "epoch": 0.9694124107633169, "grad_norm": 0.41927003860473633, "learning_rate": 1.0502943544740678e-05, "loss": 0.5073, "step": 35306 }, { "epoch": 0.9694398682042834, "grad_norm": 0.42365169525146484, "learning_rate": 1.0502512197451045e-05, "loss": 0.4597, "step": 35307 }, { "epoch": 0.9694673256452498, "grad_norm": 0.39856570959091187, "learning_rate": 1.0502080849224063e-05, "loss": 0.483, "step": 35308 }, { "epoch": 0.9694947830862164, "grad_norm": 0.4248426556587219, "learning_rate": 1.0501649500060544e-05, "loss": 0.4632, "step": 35309 }, { "epoch": 0.9695222405271828, "grad_norm": 0.3828819692134857, "learning_rate": 1.050121814996129e-05, "loss": 0.4544, "step": 35310 }, { "epoch": 0.9695496979681494, "grad_norm": 0.4011395275592804, "learning_rate": 1.0500786798927098e-05, "loss": 0.5924, "step": 35311 }, { "epoch": 0.9695771554091158, "grad_norm": 0.46283507347106934, "learning_rate": 1.0500355446958784e-05, "loss": 0.6021, "step": 35312 }, { "epoch": 0.9696046128500824, "grad_norm": 0.5221994519233704, "learning_rate": 1.0499924094057144e-05, "loss": 0.4555, "step": 35313 }, { "epoch": 0.9696320702910489, "grad_norm": 0.45823419094085693, "learning_rate": 1.049949274022299e-05, "loss": 0.4055, "step": 35314 }, { "epoch": 0.9696595277320154, "grad_norm": 0.36934179067611694, "learning_rate": 1.049906138545712e-05, "loss": 0.4038, "step": 35315 }, { "epoch": 0.9696869851729819, "grad_norm": 0.3776303827762604, "learning_rate": 1.049863002976034e-05, "loss": 0.5189, "step": 35316 }, { "epoch": 0.9697144426139483, "grad_norm": 0.42835167050361633, "learning_rate": 1.049819867313346e-05, "loss": 0.4966, "step": 35317 }, { "epoch": 0.9697419000549149, "grad_norm": 0.377970814704895, "learning_rate": 1.0497767315577274e-05, "loss": 0.4941, "step": 35318 }, { "epoch": 0.9697693574958813, "grad_norm": 0.4862080216407776, "learning_rate": 1.0497335957092598e-05, "loss": 0.5093, "step": 35319 }, { "epoch": 0.9697968149368479, "grad_norm": 0.3646729290485382, "learning_rate": 1.0496904597680233e-05, "loss": 0.465, "step": 35320 }, { "epoch": 0.9698242723778144, "grad_norm": 0.4391828179359436, "learning_rate": 1.0496473237340978e-05, "loss": 0.483, "step": 35321 }, { "epoch": 0.9698517298187809, "grad_norm": 0.38260188698768616, "learning_rate": 1.0496041876075645e-05, "loss": 0.5498, "step": 35322 }, { "epoch": 0.9698791872597474, "grad_norm": 0.3941715359687805, "learning_rate": 1.0495610513885034e-05, "loss": 0.4861, "step": 35323 }, { "epoch": 0.9699066447007139, "grad_norm": 0.39455848932266235, "learning_rate": 1.0495179150769952e-05, "loss": 0.4654, "step": 35324 }, { "epoch": 0.9699341021416804, "grad_norm": 0.38729339838027954, "learning_rate": 1.0494747786731203e-05, "loss": 0.547, "step": 35325 }, { "epoch": 0.9699615595826468, "grad_norm": 0.41798803210258484, "learning_rate": 1.0494316421769591e-05, "loss": 0.5022, "step": 35326 }, { "epoch": 0.9699890170236134, "grad_norm": 0.4365840256214142, "learning_rate": 1.049388505588592e-05, "loss": 0.5822, "step": 35327 }, { "epoch": 0.97001647446458, "grad_norm": 0.38248059153556824, "learning_rate": 1.0493453689081e-05, "loss": 0.4805, "step": 35328 }, { "epoch": 0.9700439319055464, "grad_norm": 0.3681471049785614, "learning_rate": 1.0493022321355624e-05, "loss": 0.4746, "step": 35329 }, { "epoch": 0.9700713893465129, "grad_norm": 0.353360652923584, "learning_rate": 1.049259095271061e-05, "loss": 0.4517, "step": 35330 }, { "epoch": 0.9700988467874794, "grad_norm": 0.40918052196502686, "learning_rate": 1.0492159583146753e-05, "loss": 0.4803, "step": 35331 }, { "epoch": 0.9701263042284459, "grad_norm": 0.447299063205719, "learning_rate": 1.0491728212664865e-05, "loss": 0.5057, "step": 35332 }, { "epoch": 0.9701537616694124, "grad_norm": 0.37964752316474915, "learning_rate": 1.0491296841265745e-05, "loss": 0.4548, "step": 35333 }, { "epoch": 0.9701812191103789, "grad_norm": 0.9088616967201233, "learning_rate": 1.04908654689502e-05, "loss": 0.5351, "step": 35334 }, { "epoch": 0.9702086765513455, "grad_norm": 0.43339815735816956, "learning_rate": 1.0490434095719033e-05, "loss": 0.5219, "step": 35335 }, { "epoch": 0.9702361339923119, "grad_norm": 0.3695591986179352, "learning_rate": 1.049000272157305e-05, "loss": 0.3895, "step": 35336 }, { "epoch": 0.9702635914332784, "grad_norm": 0.35835427045822144, "learning_rate": 1.0489571346513057e-05, "loss": 0.4565, "step": 35337 }, { "epoch": 0.9702910488742449, "grad_norm": 0.4142257571220398, "learning_rate": 1.0489139970539859e-05, "loss": 0.4117, "step": 35338 }, { "epoch": 0.9703185063152114, "grad_norm": 0.44836097955703735, "learning_rate": 1.0488708593654253e-05, "loss": 0.5355, "step": 35339 }, { "epoch": 0.9703459637561779, "grad_norm": 0.39956575632095337, "learning_rate": 1.0488277215857054e-05, "loss": 0.4669, "step": 35340 }, { "epoch": 0.9703734211971444, "grad_norm": 0.3683744966983795, "learning_rate": 1.0487845837149062e-05, "loss": 0.4581, "step": 35341 }, { "epoch": 0.970400878638111, "grad_norm": 0.4864336848258972, "learning_rate": 1.048741445753108e-05, "loss": 0.5039, "step": 35342 }, { "epoch": 0.9704283360790774, "grad_norm": 0.37478581070899963, "learning_rate": 1.0486983077003916e-05, "loss": 0.4951, "step": 35343 }, { "epoch": 0.970455793520044, "grad_norm": 0.3930501937866211, "learning_rate": 1.0486551695568372e-05, "loss": 0.4844, "step": 35344 }, { "epoch": 0.9704832509610104, "grad_norm": 0.4828488826751709, "learning_rate": 1.0486120313225255e-05, "loss": 0.4417, "step": 35345 }, { "epoch": 0.970510708401977, "grad_norm": 0.44286757707595825, "learning_rate": 1.0485688929975368e-05, "loss": 0.5165, "step": 35346 }, { "epoch": 0.9705381658429434, "grad_norm": 0.5261597037315369, "learning_rate": 1.0485257545819514e-05, "loss": 0.4865, "step": 35347 }, { "epoch": 0.9705656232839099, "grad_norm": 0.4111712574958801, "learning_rate": 1.0484826160758503e-05, "loss": 0.5037, "step": 35348 }, { "epoch": 0.9705930807248765, "grad_norm": 0.5316330194473267, "learning_rate": 1.0484394774793134e-05, "loss": 0.5375, "step": 35349 }, { "epoch": 0.9706205381658429, "grad_norm": 0.35026755928993225, "learning_rate": 1.0483963387924216e-05, "loss": 0.4055, "step": 35350 }, { "epoch": 0.9706479956068095, "grad_norm": 0.41622406244277954, "learning_rate": 1.0483532000152552e-05, "loss": 0.5322, "step": 35351 }, { "epoch": 0.9706754530477759, "grad_norm": 0.41895249485969543, "learning_rate": 1.0483100611478944e-05, "loss": 0.5284, "step": 35352 }, { "epoch": 0.9707029104887425, "grad_norm": 0.3625195324420929, "learning_rate": 1.0482669221904203e-05, "loss": 0.415, "step": 35353 }, { "epoch": 0.9707303679297089, "grad_norm": 0.4268939793109894, "learning_rate": 1.0482237831429125e-05, "loss": 0.4894, "step": 35354 }, { "epoch": 0.9707578253706755, "grad_norm": 0.3716507852077484, "learning_rate": 1.0481806440054522e-05, "loss": 0.4924, "step": 35355 }, { "epoch": 0.970785282811642, "grad_norm": 0.8084063529968262, "learning_rate": 1.0481375047781199e-05, "loss": 0.576, "step": 35356 }, { "epoch": 0.9708127402526084, "grad_norm": 0.48432406783103943, "learning_rate": 1.0480943654609954e-05, "loss": 0.4939, "step": 35357 }, { "epoch": 0.970840197693575, "grad_norm": 0.41810891032218933, "learning_rate": 1.0480512260541597e-05, "loss": 0.4876, "step": 35358 }, { "epoch": 0.9708676551345414, "grad_norm": 0.37597423791885376, "learning_rate": 1.0480080865576931e-05, "loss": 0.4973, "step": 35359 }, { "epoch": 0.970895112575508, "grad_norm": 0.390659362077713, "learning_rate": 1.0479649469716762e-05, "loss": 0.4367, "step": 35360 }, { "epoch": 0.9709225700164744, "grad_norm": 0.3939586281776428, "learning_rate": 1.0479218072961892e-05, "loss": 0.4013, "step": 35361 }, { "epoch": 0.970950027457441, "grad_norm": 0.4134536683559418, "learning_rate": 1.0478786675313128e-05, "loss": 0.4821, "step": 35362 }, { "epoch": 0.9709774848984075, "grad_norm": 0.40027040243148804, "learning_rate": 1.0478355276771275e-05, "loss": 0.4297, "step": 35363 }, { "epoch": 0.971004942339374, "grad_norm": 0.4115735590457916, "learning_rate": 1.0477923877337135e-05, "loss": 0.4577, "step": 35364 }, { "epoch": 0.9710323997803405, "grad_norm": 0.427608847618103, "learning_rate": 1.0477492477011514e-05, "loss": 0.5014, "step": 35365 }, { "epoch": 0.9710598572213069, "grad_norm": 0.36481326818466187, "learning_rate": 1.047706107579522e-05, "loss": 0.5228, "step": 35366 }, { "epoch": 0.9710873146622735, "grad_norm": 0.3926599621772766, "learning_rate": 1.0476629673689052e-05, "loss": 0.5393, "step": 35367 }, { "epoch": 0.9711147721032399, "grad_norm": 0.37156835198402405, "learning_rate": 1.047619827069382e-05, "loss": 0.5211, "step": 35368 }, { "epoch": 0.9711422295442065, "grad_norm": 0.3369171917438507, "learning_rate": 1.0475766866810326e-05, "loss": 0.4539, "step": 35369 }, { "epoch": 0.971169686985173, "grad_norm": 0.3951667547225952, "learning_rate": 1.0475335462039373e-05, "loss": 0.4646, "step": 35370 }, { "epoch": 0.9711971444261395, "grad_norm": 0.46727484464645386, "learning_rate": 1.047490405638177e-05, "loss": 0.4513, "step": 35371 }, { "epoch": 0.971224601867106, "grad_norm": 0.44600722193717957, "learning_rate": 1.0474472649838318e-05, "loss": 0.46, "step": 35372 }, { "epoch": 0.9712520593080725, "grad_norm": 0.3673432171344757, "learning_rate": 1.047404124240982e-05, "loss": 0.4174, "step": 35373 }, { "epoch": 0.971279516749039, "grad_norm": 0.44221073389053345, "learning_rate": 1.047360983409709e-05, "loss": 0.5845, "step": 35374 }, { "epoch": 0.9713069741900054, "grad_norm": 0.39678990840911865, "learning_rate": 1.047317842490092e-05, "loss": 0.4527, "step": 35375 }, { "epoch": 0.971334431630972, "grad_norm": 0.4277998208999634, "learning_rate": 1.0472747014822127e-05, "loss": 0.4347, "step": 35376 }, { "epoch": 0.9713618890719385, "grad_norm": 0.3877805769443512, "learning_rate": 1.0472315603861507e-05, "loss": 0.4699, "step": 35377 }, { "epoch": 0.971389346512905, "grad_norm": 0.36032816767692566, "learning_rate": 1.0471884192019867e-05, "loss": 0.4557, "step": 35378 }, { "epoch": 0.9714168039538715, "grad_norm": 0.40084442496299744, "learning_rate": 1.0471452779298015e-05, "loss": 0.4652, "step": 35379 }, { "epoch": 0.971444261394838, "grad_norm": 0.3588332533836365, "learning_rate": 1.047102136569675e-05, "loss": 0.4735, "step": 35380 }, { "epoch": 0.9714717188358045, "grad_norm": 0.3769461512565613, "learning_rate": 1.0470589951216883e-05, "loss": 0.4502, "step": 35381 }, { "epoch": 0.971499176276771, "grad_norm": 0.41019707918167114, "learning_rate": 1.0470158535859214e-05, "loss": 0.4452, "step": 35382 }, { "epoch": 0.9715266337177375, "grad_norm": 0.3764742910861969, "learning_rate": 1.0469727119624548e-05, "loss": 0.4747, "step": 35383 }, { "epoch": 0.9715540911587041, "grad_norm": 0.38040587306022644, "learning_rate": 1.0469295702513693e-05, "loss": 0.4518, "step": 35384 }, { "epoch": 0.9715815485996705, "grad_norm": 0.38592129945755005, "learning_rate": 1.0468864284527451e-05, "loss": 0.4655, "step": 35385 }, { "epoch": 0.971609006040637, "grad_norm": 0.35410210490226746, "learning_rate": 1.0468432865666627e-05, "loss": 0.4987, "step": 35386 }, { "epoch": 0.9716364634816035, "grad_norm": 0.621239960193634, "learning_rate": 1.0468001445932028e-05, "loss": 0.4679, "step": 35387 }, { "epoch": 0.97166392092257, "grad_norm": 0.40800392627716064, "learning_rate": 1.0467570025324455e-05, "loss": 0.4619, "step": 35388 }, { "epoch": 0.9716913783635365, "grad_norm": 0.4141329824924469, "learning_rate": 1.0467138603844715e-05, "loss": 0.468, "step": 35389 }, { "epoch": 0.971718835804503, "grad_norm": 0.41259297728538513, "learning_rate": 1.0466707181493614e-05, "loss": 0.4847, "step": 35390 }, { "epoch": 0.9717462932454696, "grad_norm": 0.41452136635780334, "learning_rate": 1.0466275758271953e-05, "loss": 0.5279, "step": 35391 }, { "epoch": 0.971773750686436, "grad_norm": 0.3620988726615906, "learning_rate": 1.046584433418054e-05, "loss": 0.4825, "step": 35392 }, { "epoch": 0.9718012081274026, "grad_norm": 0.4282592833042145, "learning_rate": 1.0465412909220176e-05, "loss": 0.4838, "step": 35393 }, { "epoch": 0.971828665568369, "grad_norm": 0.3774849474430084, "learning_rate": 1.0464981483391671e-05, "loss": 0.4821, "step": 35394 }, { "epoch": 0.9718561230093355, "grad_norm": 0.42511358857154846, "learning_rate": 1.046455005669583e-05, "loss": 0.5358, "step": 35395 }, { "epoch": 0.971883580450302, "grad_norm": 0.4402100741863251, "learning_rate": 1.046411862913345e-05, "loss": 0.5471, "step": 35396 }, { "epoch": 0.9719110378912685, "grad_norm": 0.4291347563266754, "learning_rate": 1.0463687200705343e-05, "loss": 0.4387, "step": 35397 }, { "epoch": 0.9719384953322351, "grad_norm": 0.3888815939426422, "learning_rate": 1.0463255771412311e-05, "loss": 0.5291, "step": 35398 }, { "epoch": 0.9719659527732015, "grad_norm": 0.3810955286026001, "learning_rate": 1.0462824341255158e-05, "loss": 0.5237, "step": 35399 }, { "epoch": 0.9719934102141681, "grad_norm": 0.38539084792137146, "learning_rate": 1.0462392910234692e-05, "loss": 0.5618, "step": 35400 }, { "epoch": 0.9720208676551345, "grad_norm": 0.4059462249279022, "learning_rate": 1.0461961478351712e-05, "loss": 0.4944, "step": 35401 }, { "epoch": 0.9720483250961011, "grad_norm": 0.40650659799575806, "learning_rate": 1.046153004560703e-05, "loss": 0.5987, "step": 35402 }, { "epoch": 0.9720757825370675, "grad_norm": 0.44722166657447815, "learning_rate": 1.0461098612001449e-05, "loss": 0.4993, "step": 35403 }, { "epoch": 0.972103239978034, "grad_norm": 0.4423178732395172, "learning_rate": 1.0460667177535766e-05, "loss": 0.4215, "step": 35404 }, { "epoch": 0.9721306974190006, "grad_norm": 0.47665658593177795, "learning_rate": 1.0460235742210797e-05, "loss": 0.4136, "step": 35405 }, { "epoch": 0.972158154859967, "grad_norm": 0.4222513437271118, "learning_rate": 1.0459804306027338e-05, "loss": 0.5032, "step": 35406 }, { "epoch": 0.9721856123009336, "grad_norm": 0.4050971567630768, "learning_rate": 1.0459372868986197e-05, "loss": 0.4598, "step": 35407 }, { "epoch": 0.9722130697419, "grad_norm": 0.391973614692688, "learning_rate": 1.045894143108818e-05, "loss": 0.5774, "step": 35408 }, { "epoch": 0.9722405271828666, "grad_norm": 0.5274940133094788, "learning_rate": 1.0458509992334092e-05, "loss": 0.5961, "step": 35409 }, { "epoch": 0.972267984623833, "grad_norm": 0.5191110372543335, "learning_rate": 1.0458078552724735e-05, "loss": 0.4117, "step": 35410 }, { "epoch": 0.9722954420647996, "grad_norm": 0.382641464471817, "learning_rate": 1.0457647112260915e-05, "loss": 0.4512, "step": 35411 }, { "epoch": 0.9723228995057661, "grad_norm": 0.4201924502849579, "learning_rate": 1.0457215670943439e-05, "loss": 0.4548, "step": 35412 }, { "epoch": 0.9723503569467326, "grad_norm": 0.398602694272995, "learning_rate": 1.0456784228773109e-05, "loss": 0.4804, "step": 35413 }, { "epoch": 0.9723778143876991, "grad_norm": 0.3306635320186615, "learning_rate": 1.045635278575073e-05, "loss": 0.4602, "step": 35414 }, { "epoch": 0.9724052718286655, "grad_norm": 0.3934793174266815, "learning_rate": 1.0455921341877109e-05, "loss": 0.5047, "step": 35415 }, { "epoch": 0.9724327292696321, "grad_norm": 0.3212246298789978, "learning_rate": 1.0455489897153045e-05, "loss": 0.3287, "step": 35416 }, { "epoch": 0.9724601867105985, "grad_norm": 0.4272630214691162, "learning_rate": 1.0455058451579352e-05, "loss": 0.4695, "step": 35417 }, { "epoch": 0.9724876441515651, "grad_norm": 0.38015344738960266, "learning_rate": 1.0454627005156828e-05, "loss": 0.5063, "step": 35418 }, { "epoch": 0.9725151015925316, "grad_norm": 0.41025152802467346, "learning_rate": 1.0454195557886277e-05, "loss": 0.4949, "step": 35419 }, { "epoch": 0.9725425590334981, "grad_norm": 0.3468673825263977, "learning_rate": 1.0453764109768509e-05, "loss": 0.4876, "step": 35420 }, { "epoch": 0.9725700164744646, "grad_norm": 0.41677606105804443, "learning_rate": 1.0453332660804327e-05, "loss": 0.5154, "step": 35421 }, { "epoch": 0.972597473915431, "grad_norm": 0.4540795385837555, "learning_rate": 1.0452901210994532e-05, "loss": 0.6024, "step": 35422 }, { "epoch": 0.9726249313563976, "grad_norm": 0.35282301902770996, "learning_rate": 1.0452469760339935e-05, "loss": 0.4304, "step": 35423 }, { "epoch": 0.972652388797364, "grad_norm": 0.3482348322868347, "learning_rate": 1.0452038308841335e-05, "loss": 0.5325, "step": 35424 }, { "epoch": 0.9726798462383306, "grad_norm": 0.4009857177734375, "learning_rate": 1.0451606856499541e-05, "loss": 0.4892, "step": 35425 }, { "epoch": 0.9727073036792971, "grad_norm": 0.3899390399456024, "learning_rate": 1.0451175403315355e-05, "loss": 0.5077, "step": 35426 }, { "epoch": 0.9727347611202636, "grad_norm": 0.38577014207839966, "learning_rate": 1.0450743949289581e-05, "loss": 0.5133, "step": 35427 }, { "epoch": 0.9727622185612301, "grad_norm": 0.5092644095420837, "learning_rate": 1.045031249442303e-05, "loss": 0.5407, "step": 35428 }, { "epoch": 0.9727896760021966, "grad_norm": 0.3680839240550995, "learning_rate": 1.0449881038716498e-05, "loss": 0.4855, "step": 35429 }, { "epoch": 0.9728171334431631, "grad_norm": 0.34423157572746277, "learning_rate": 1.0449449582170796e-05, "loss": 0.4315, "step": 35430 }, { "epoch": 0.9728445908841296, "grad_norm": 0.4277671277523041, "learning_rate": 1.0449018124786729e-05, "loss": 0.4939, "step": 35431 }, { "epoch": 0.9728720483250961, "grad_norm": 0.37755700945854187, "learning_rate": 1.0448586666565097e-05, "loss": 0.3719, "step": 35432 }, { "epoch": 0.9728995057660627, "grad_norm": 0.45846495032310486, "learning_rate": 1.0448155207506709e-05, "loss": 0.4881, "step": 35433 }, { "epoch": 0.9729269632070291, "grad_norm": 0.4090428054332733, "learning_rate": 1.044772374761237e-05, "loss": 0.4847, "step": 35434 }, { "epoch": 0.9729544206479956, "grad_norm": 0.39559710025787354, "learning_rate": 1.0447292286882882e-05, "loss": 0.4499, "step": 35435 }, { "epoch": 0.9729818780889621, "grad_norm": 0.3551698327064514, "learning_rate": 1.0446860825319051e-05, "loss": 0.4736, "step": 35436 }, { "epoch": 0.9730093355299286, "grad_norm": 0.3916212022304535, "learning_rate": 1.0446429362921681e-05, "loss": 0.4328, "step": 35437 }, { "epoch": 0.9730367929708951, "grad_norm": 0.37326064705848694, "learning_rate": 1.044599789969158e-05, "loss": 0.472, "step": 35438 }, { "epoch": 0.9730642504118616, "grad_norm": 0.42463308572769165, "learning_rate": 1.044556643562955e-05, "loss": 0.4637, "step": 35439 }, { "epoch": 0.9730917078528282, "grad_norm": 4.449949264526367, "learning_rate": 1.0445134970736394e-05, "loss": 0.4832, "step": 35440 }, { "epoch": 0.9731191652937946, "grad_norm": 0.4110054075717926, "learning_rate": 1.044470350501292e-05, "loss": 0.4848, "step": 35441 }, { "epoch": 0.9731466227347612, "grad_norm": 0.40774405002593994, "learning_rate": 1.0444272038459933e-05, "loss": 0.5706, "step": 35442 }, { "epoch": 0.9731740801757276, "grad_norm": 0.3579520285129547, "learning_rate": 1.0443840571078238e-05, "loss": 0.4399, "step": 35443 }, { "epoch": 0.9732015376166941, "grad_norm": 0.41052699089050293, "learning_rate": 1.0443409102868638e-05, "loss": 0.4693, "step": 35444 }, { "epoch": 0.9732289950576606, "grad_norm": 0.5138821601867676, "learning_rate": 1.0442977633831937e-05, "loss": 0.5236, "step": 35445 }, { "epoch": 0.9732564524986271, "grad_norm": 0.4262307584285736, "learning_rate": 1.0442546163968942e-05, "loss": 0.531, "step": 35446 }, { "epoch": 0.9732839099395937, "grad_norm": 0.3664458990097046, "learning_rate": 1.0442114693280456e-05, "loss": 0.4321, "step": 35447 }, { "epoch": 0.9733113673805601, "grad_norm": 0.4016006886959076, "learning_rate": 1.0441683221767287e-05, "loss": 0.4855, "step": 35448 }, { "epoch": 0.9733388248215267, "grad_norm": 0.3579981029033661, "learning_rate": 1.0441251749430239e-05, "loss": 0.4541, "step": 35449 }, { "epoch": 0.9733662822624931, "grad_norm": 0.3864275813102722, "learning_rate": 1.0440820276270112e-05, "loss": 0.4886, "step": 35450 }, { "epoch": 0.9733937397034597, "grad_norm": 0.3712902069091797, "learning_rate": 1.0440388802287718e-05, "loss": 0.5047, "step": 35451 }, { "epoch": 0.9734211971444261, "grad_norm": 0.4008288085460663, "learning_rate": 1.0439957327483855e-05, "loss": 0.5125, "step": 35452 }, { "epoch": 0.9734486545853926, "grad_norm": 0.3411776125431061, "learning_rate": 1.0439525851859333e-05, "loss": 0.4019, "step": 35453 }, { "epoch": 0.9734761120263592, "grad_norm": 0.41069066524505615, "learning_rate": 1.0439094375414953e-05, "loss": 0.5073, "step": 35454 }, { "epoch": 0.9735035694673256, "grad_norm": 0.3839914798736572, "learning_rate": 1.0438662898151526e-05, "loss": 0.4277, "step": 35455 }, { "epoch": 0.9735310269082922, "grad_norm": 0.4457632601261139, "learning_rate": 1.0438231420069852e-05, "loss": 0.5826, "step": 35456 }, { "epoch": 0.9735584843492586, "grad_norm": 0.369057297706604, "learning_rate": 1.0437799941170731e-05, "loss": 0.4477, "step": 35457 }, { "epoch": 0.9735859417902252, "grad_norm": 0.3898696303367615, "learning_rate": 1.043736846145498e-05, "loss": 0.4929, "step": 35458 }, { "epoch": 0.9736133992311916, "grad_norm": 0.4229927361011505, "learning_rate": 1.0436936980923393e-05, "loss": 0.4832, "step": 35459 }, { "epoch": 0.9736408566721582, "grad_norm": 0.4249531030654907, "learning_rate": 1.043650549957678e-05, "loss": 0.6034, "step": 35460 }, { "epoch": 0.9736683141131247, "grad_norm": 0.4411903917789459, "learning_rate": 1.0436074017415947e-05, "loss": 0.4941, "step": 35461 }, { "epoch": 0.9736957715540911, "grad_norm": 0.6471553444862366, "learning_rate": 1.0435642534441695e-05, "loss": 0.5333, "step": 35462 }, { "epoch": 0.9737232289950577, "grad_norm": 0.4284100830554962, "learning_rate": 1.0435211050654832e-05, "loss": 0.5512, "step": 35463 }, { "epoch": 0.9737506864360241, "grad_norm": 0.3816041052341461, "learning_rate": 1.0434779566056159e-05, "loss": 0.5308, "step": 35464 }, { "epoch": 0.9737781438769907, "grad_norm": 0.4088088572025299, "learning_rate": 1.0434348080646485e-05, "loss": 0.5234, "step": 35465 }, { "epoch": 0.9738056013179571, "grad_norm": 0.3623411953449249, "learning_rate": 1.0433916594426612e-05, "loss": 0.4233, "step": 35466 }, { "epoch": 0.9738330587589237, "grad_norm": 0.3964916467666626, "learning_rate": 1.0433485107397347e-05, "loss": 0.4772, "step": 35467 }, { "epoch": 0.9738605161998902, "grad_norm": 0.3861945569515228, "learning_rate": 1.0433053619559496e-05, "loss": 0.5036, "step": 35468 }, { "epoch": 0.9738879736408567, "grad_norm": 1.1954463720321655, "learning_rate": 1.0432622130913859e-05, "loss": 0.5544, "step": 35469 }, { "epoch": 0.9739154310818232, "grad_norm": 0.3810594081878662, "learning_rate": 1.0432190641461243e-05, "loss": 0.4936, "step": 35470 }, { "epoch": 0.9739428885227897, "grad_norm": 1.0837926864624023, "learning_rate": 1.0431759151202454e-05, "loss": 0.4146, "step": 35471 }, { "epoch": 0.9739703459637562, "grad_norm": 0.4835358262062073, "learning_rate": 1.0431327660138299e-05, "loss": 0.5289, "step": 35472 }, { "epoch": 0.9739978034047226, "grad_norm": 0.6596868634223938, "learning_rate": 1.043089616826958e-05, "loss": 0.499, "step": 35473 }, { "epoch": 0.9740252608456892, "grad_norm": 0.37806734442710876, "learning_rate": 1.0430464675597098e-05, "loss": 0.4721, "step": 35474 }, { "epoch": 0.9740527182866557, "grad_norm": 0.4226413071155548, "learning_rate": 1.0430033182121666e-05, "loss": 0.4962, "step": 35475 }, { "epoch": 0.9740801757276222, "grad_norm": 0.35174471139907837, "learning_rate": 1.0429601687844083e-05, "loss": 0.4699, "step": 35476 }, { "epoch": 0.9741076331685887, "grad_norm": 0.36453482508659363, "learning_rate": 1.0429170192765157e-05, "loss": 0.4962, "step": 35477 }, { "epoch": 0.9741350906095552, "grad_norm": 0.4542052745819092, "learning_rate": 1.0428738696885691e-05, "loss": 0.5278, "step": 35478 }, { "epoch": 0.9741625480505217, "grad_norm": 0.4061007499694824, "learning_rate": 1.0428307200206488e-05, "loss": 0.5051, "step": 35479 }, { "epoch": 0.9741900054914882, "grad_norm": 0.46456584334373474, "learning_rate": 1.0427875702728361e-05, "loss": 0.4781, "step": 35480 }, { "epoch": 0.9742174629324547, "grad_norm": 0.3900653123855591, "learning_rate": 1.0427444204452105e-05, "loss": 0.5493, "step": 35481 }, { "epoch": 0.9742449203734213, "grad_norm": 0.39813268184661865, "learning_rate": 1.042701270537853e-05, "loss": 0.6131, "step": 35482 }, { "epoch": 0.9742723778143877, "grad_norm": 0.40616950392723083, "learning_rate": 1.0426581205508442e-05, "loss": 0.489, "step": 35483 }, { "epoch": 0.9742998352553542, "grad_norm": 0.3851638436317444, "learning_rate": 1.0426149704842638e-05, "loss": 0.4657, "step": 35484 }, { "epoch": 0.9743272926963207, "grad_norm": 0.3745310604572296, "learning_rate": 1.0425718203381937e-05, "loss": 0.5329, "step": 35485 }, { "epoch": 0.9743547501372872, "grad_norm": 0.4325345456600189, "learning_rate": 1.042528670112713e-05, "loss": 0.4653, "step": 35486 }, { "epoch": 0.9743822075782537, "grad_norm": 0.40713319182395935, "learning_rate": 1.0424855198079028e-05, "loss": 0.5605, "step": 35487 }, { "epoch": 0.9744096650192202, "grad_norm": 0.45896539092063904, "learning_rate": 1.0424423694238437e-05, "loss": 0.5148, "step": 35488 }, { "epoch": 0.9744371224601867, "grad_norm": 0.4434948265552521, "learning_rate": 1.042399218960616e-05, "loss": 0.5338, "step": 35489 }, { "epoch": 0.9744645799011532, "grad_norm": 0.4000347852706909, "learning_rate": 1.0423560684183003e-05, "loss": 0.466, "step": 35490 }, { "epoch": 0.9744920373421198, "grad_norm": 0.46256157755851746, "learning_rate": 1.0423129177969767e-05, "loss": 0.5897, "step": 35491 }, { "epoch": 0.9745194947830862, "grad_norm": 0.44732221961021423, "learning_rate": 1.0422697670967264e-05, "loss": 0.5327, "step": 35492 }, { "epoch": 0.9745469522240527, "grad_norm": 0.3578735888004303, "learning_rate": 1.0422266163176293e-05, "loss": 0.4635, "step": 35493 }, { "epoch": 0.9745744096650192, "grad_norm": 0.42395085096359253, "learning_rate": 1.042183465459766e-05, "loss": 0.4467, "step": 35494 }, { "epoch": 0.9746018671059857, "grad_norm": 0.35897156596183777, "learning_rate": 1.0421403145232172e-05, "loss": 0.3549, "step": 35495 }, { "epoch": 0.9746293245469522, "grad_norm": 0.39451852440834045, "learning_rate": 1.0420971635080635e-05, "loss": 0.4561, "step": 35496 }, { "epoch": 0.9746567819879187, "grad_norm": 0.39227989315986633, "learning_rate": 1.0420540124143848e-05, "loss": 0.5153, "step": 35497 }, { "epoch": 0.9746842394288853, "grad_norm": 0.4376016855239868, "learning_rate": 1.042010861242262e-05, "loss": 0.4188, "step": 35498 }, { "epoch": 0.9747116968698517, "grad_norm": 0.4387200176715851, "learning_rate": 1.0419677099917755e-05, "loss": 0.5042, "step": 35499 }, { "epoch": 0.9747391543108183, "grad_norm": 0.3767997920513153, "learning_rate": 1.041924558663006e-05, "loss": 0.4181, "step": 35500 }, { "epoch": 0.9747666117517847, "grad_norm": 0.3659318685531616, "learning_rate": 1.0418814072560336e-05, "loss": 0.4667, "step": 35501 }, { "epoch": 0.9747940691927512, "grad_norm": 0.4739714562892914, "learning_rate": 1.0418382557709392e-05, "loss": 0.5227, "step": 35502 }, { "epoch": 0.9748215266337177, "grad_norm": 0.392075777053833, "learning_rate": 1.0417951042078032e-05, "loss": 0.5335, "step": 35503 }, { "epoch": 0.9748489840746842, "grad_norm": 0.37919655442237854, "learning_rate": 1.0417519525667055e-05, "loss": 0.4502, "step": 35504 }, { "epoch": 0.9748764415156508, "grad_norm": 0.40832674503326416, "learning_rate": 1.0417088008477276e-05, "loss": 0.5082, "step": 35505 }, { "epoch": 0.9749038989566172, "grad_norm": 0.40492209792137146, "learning_rate": 1.0416656490509493e-05, "loss": 0.4666, "step": 35506 }, { "epoch": 0.9749313563975838, "grad_norm": 0.37874260544776917, "learning_rate": 1.0416224971764511e-05, "loss": 0.4996, "step": 35507 }, { "epoch": 0.9749588138385502, "grad_norm": 0.4097778797149658, "learning_rate": 1.0415793452243139e-05, "loss": 0.4006, "step": 35508 }, { "epoch": 0.9749862712795168, "grad_norm": 0.36957693099975586, "learning_rate": 1.0415361931946178e-05, "loss": 0.4718, "step": 35509 }, { "epoch": 0.9750137287204832, "grad_norm": 0.4080786406993866, "learning_rate": 1.0414930410874435e-05, "loss": 0.4539, "step": 35510 }, { "epoch": 0.9750411861614497, "grad_norm": 0.38348156213760376, "learning_rate": 1.0414498889028715e-05, "loss": 0.4384, "step": 35511 }, { "epoch": 0.9750686436024163, "grad_norm": 0.43673014640808105, "learning_rate": 1.0414067366409821e-05, "loss": 0.5203, "step": 35512 }, { "epoch": 0.9750961010433827, "grad_norm": 0.7365438342094421, "learning_rate": 1.0413635843018562e-05, "loss": 0.4814, "step": 35513 }, { "epoch": 0.9751235584843493, "grad_norm": 0.3693685829639435, "learning_rate": 1.0413204318855736e-05, "loss": 0.4945, "step": 35514 }, { "epoch": 0.9751510159253157, "grad_norm": 0.3943171203136444, "learning_rate": 1.0412772793922155e-05, "loss": 0.4798, "step": 35515 }, { "epoch": 0.9751784733662823, "grad_norm": 0.3828733265399933, "learning_rate": 1.041234126821862e-05, "loss": 0.514, "step": 35516 }, { "epoch": 0.9752059308072487, "grad_norm": 0.3972199559211731, "learning_rate": 1.0411909741745936e-05, "loss": 0.4664, "step": 35517 }, { "epoch": 0.9752333882482153, "grad_norm": 0.367360919713974, "learning_rate": 1.041147821450491e-05, "loss": 0.5323, "step": 35518 }, { "epoch": 0.9752608456891818, "grad_norm": 0.35214126110076904, "learning_rate": 1.0411046686496346e-05, "loss": 0.3837, "step": 35519 }, { "epoch": 0.9752883031301482, "grad_norm": 0.356715589761734, "learning_rate": 1.0410615157721048e-05, "loss": 0.4585, "step": 35520 }, { "epoch": 0.9753157605711148, "grad_norm": 0.41109955310821533, "learning_rate": 1.0410183628179821e-05, "loss": 0.4898, "step": 35521 }, { "epoch": 0.9753432180120812, "grad_norm": 0.3209027349948883, "learning_rate": 1.0409752097873471e-05, "loss": 0.4247, "step": 35522 }, { "epoch": 0.9753706754530478, "grad_norm": 0.42218342423439026, "learning_rate": 1.0409320566802805e-05, "loss": 0.5165, "step": 35523 }, { "epoch": 0.9753981328940142, "grad_norm": 0.4721704125404358, "learning_rate": 1.0408889034968625e-05, "loss": 0.5376, "step": 35524 }, { "epoch": 0.9754255903349808, "grad_norm": 0.37771743535995483, "learning_rate": 1.0408457502371734e-05, "loss": 0.4214, "step": 35525 }, { "epoch": 0.9754530477759473, "grad_norm": 0.3869120478630066, "learning_rate": 1.040802596901294e-05, "loss": 0.5191, "step": 35526 }, { "epoch": 0.9754805052169138, "grad_norm": 0.3876940608024597, "learning_rate": 1.040759443489305e-05, "loss": 0.515, "step": 35527 }, { "epoch": 0.9755079626578803, "grad_norm": 0.3877614438533783, "learning_rate": 1.0407162900012861e-05, "loss": 0.4541, "step": 35528 }, { "epoch": 0.9755354200988468, "grad_norm": 0.4480344355106354, "learning_rate": 1.0406731364373189e-05, "loss": 0.4391, "step": 35529 }, { "epoch": 0.9755628775398133, "grad_norm": 0.4219900667667389, "learning_rate": 1.040629982797483e-05, "loss": 0.4997, "step": 35530 }, { "epoch": 0.9755903349807797, "grad_norm": 0.3863013684749603, "learning_rate": 1.0405868290818592e-05, "loss": 0.5303, "step": 35531 }, { "epoch": 0.9756177924217463, "grad_norm": 0.39891764521598816, "learning_rate": 1.0405436752905283e-05, "loss": 0.5131, "step": 35532 }, { "epoch": 0.9756452498627128, "grad_norm": 0.3717018663883209, "learning_rate": 1.0405005214235701e-05, "loss": 0.5029, "step": 35533 }, { "epoch": 0.9756727073036793, "grad_norm": 0.3987565338611603, "learning_rate": 1.0404573674810659e-05, "loss": 0.4924, "step": 35534 }, { "epoch": 0.9757001647446458, "grad_norm": 0.3658978044986725, "learning_rate": 1.0404142134630953e-05, "loss": 0.5103, "step": 35535 }, { "epoch": 0.9757276221856123, "grad_norm": 0.3738052248954773, "learning_rate": 1.0403710593697396e-05, "loss": 0.4949, "step": 35536 }, { "epoch": 0.9757550796265788, "grad_norm": 0.3871084153652191, "learning_rate": 1.0403279052010791e-05, "loss": 0.5325, "step": 35537 }, { "epoch": 0.9757825370675453, "grad_norm": 0.45978736877441406, "learning_rate": 1.0402847509571941e-05, "loss": 0.4831, "step": 35538 }, { "epoch": 0.9758099945085118, "grad_norm": 0.4115150272846222, "learning_rate": 1.0402415966381651e-05, "loss": 0.4754, "step": 35539 }, { "epoch": 0.9758374519494784, "grad_norm": 0.375669926404953, "learning_rate": 1.0401984422440726e-05, "loss": 0.4943, "step": 35540 }, { "epoch": 0.9758649093904448, "grad_norm": 0.43864935636520386, "learning_rate": 1.0401552877749975e-05, "loss": 0.4812, "step": 35541 }, { "epoch": 0.9758923668314113, "grad_norm": 0.4595482647418976, "learning_rate": 1.0401121332310197e-05, "loss": 0.4688, "step": 35542 }, { "epoch": 0.9759198242723778, "grad_norm": 0.3689318001270294, "learning_rate": 1.04006897861222e-05, "loss": 0.4288, "step": 35543 }, { "epoch": 0.9759472817133443, "grad_norm": 0.3703960180282593, "learning_rate": 1.040025823918679e-05, "loss": 0.4995, "step": 35544 }, { "epoch": 0.9759747391543108, "grad_norm": 0.4074922204017639, "learning_rate": 1.039982669150477e-05, "loss": 0.4971, "step": 35545 }, { "epoch": 0.9760021965952773, "grad_norm": 0.416729599237442, "learning_rate": 1.0399395143076942e-05, "loss": 0.531, "step": 35546 }, { "epoch": 0.9760296540362439, "grad_norm": 0.43740129470825195, "learning_rate": 1.039896359390412e-05, "loss": 0.5094, "step": 35547 }, { "epoch": 0.9760571114772103, "grad_norm": 0.4539346396923065, "learning_rate": 1.03985320439871e-05, "loss": 0.4478, "step": 35548 }, { "epoch": 0.9760845689181769, "grad_norm": 0.3982706367969513, "learning_rate": 1.0398100493326693e-05, "loss": 0.5407, "step": 35549 }, { "epoch": 0.9761120263591433, "grad_norm": 0.3888145983219147, "learning_rate": 1.0397668941923702e-05, "loss": 0.4899, "step": 35550 }, { "epoch": 0.9761394838001098, "grad_norm": 0.4596226215362549, "learning_rate": 1.039723738977893e-05, "loss": 0.5656, "step": 35551 }, { "epoch": 0.9761669412410763, "grad_norm": 0.4190945625305176, "learning_rate": 1.0396805836893182e-05, "loss": 0.4811, "step": 35552 }, { "epoch": 0.9761943986820428, "grad_norm": 0.3958438038825989, "learning_rate": 1.0396374283267265e-05, "loss": 0.533, "step": 35553 }, { "epoch": 0.9762218561230094, "grad_norm": 0.5106979608535767, "learning_rate": 1.0395942728901985e-05, "loss": 0.4534, "step": 35554 }, { "epoch": 0.9762493135639758, "grad_norm": 0.4133162796497345, "learning_rate": 1.0395511173798144e-05, "loss": 0.4746, "step": 35555 }, { "epoch": 0.9762767710049424, "grad_norm": 0.38522419333457947, "learning_rate": 1.039507961795655e-05, "loss": 0.4734, "step": 35556 }, { "epoch": 0.9763042284459088, "grad_norm": 0.4371401071548462, "learning_rate": 1.0394648061378005e-05, "loss": 0.5741, "step": 35557 }, { "epoch": 0.9763316858868754, "grad_norm": 0.351757675409317, "learning_rate": 1.0394216504063318e-05, "loss": 0.4048, "step": 35558 }, { "epoch": 0.9763591433278418, "grad_norm": 0.42419517040252686, "learning_rate": 1.0393784946013287e-05, "loss": 0.5217, "step": 35559 }, { "epoch": 0.9763866007688083, "grad_norm": 0.3822602927684784, "learning_rate": 1.0393353387228726e-05, "loss": 0.4415, "step": 35560 }, { "epoch": 0.9764140582097749, "grad_norm": 0.4374276101589203, "learning_rate": 1.039292182771043e-05, "loss": 0.6365, "step": 35561 }, { "epoch": 0.9764415156507413, "grad_norm": 0.3865448832511902, "learning_rate": 1.0392490267459217e-05, "loss": 0.4044, "step": 35562 }, { "epoch": 0.9764689730917079, "grad_norm": 0.4057079553604126, "learning_rate": 1.039205870647588e-05, "loss": 0.5168, "step": 35563 }, { "epoch": 0.9764964305326743, "grad_norm": 0.4008456766605377, "learning_rate": 1.0391627144761227e-05, "loss": 0.6055, "step": 35564 }, { "epoch": 0.9765238879736409, "grad_norm": 0.369427353143692, "learning_rate": 1.0391195582316067e-05, "loss": 0.474, "step": 35565 }, { "epoch": 0.9765513454146073, "grad_norm": 0.41058972477912903, "learning_rate": 1.03907640191412e-05, "loss": 0.5387, "step": 35566 }, { "epoch": 0.9765788028555739, "grad_norm": 0.4455486238002777, "learning_rate": 1.0390332455237436e-05, "loss": 0.4876, "step": 35567 }, { "epoch": 0.9766062602965404, "grad_norm": 0.5007911920547485, "learning_rate": 1.0389900890605576e-05, "loss": 0.543, "step": 35568 }, { "epoch": 0.9766337177375068, "grad_norm": 0.38961565494537354, "learning_rate": 1.0389469325246425e-05, "loss": 0.4965, "step": 35569 }, { "epoch": 0.9766611751784734, "grad_norm": 0.37068209052085876, "learning_rate": 1.0389037759160792e-05, "loss": 0.4495, "step": 35570 }, { "epoch": 0.9766886326194398, "grad_norm": 0.48533377051353455, "learning_rate": 1.0388606192349475e-05, "loss": 0.4638, "step": 35571 }, { "epoch": 0.9767160900604064, "grad_norm": 0.3933269679546356, "learning_rate": 1.0388174624813288e-05, "loss": 0.5139, "step": 35572 }, { "epoch": 0.9767435475013728, "grad_norm": 0.441095769405365, "learning_rate": 1.0387743056553032e-05, "loss": 0.4521, "step": 35573 }, { "epoch": 0.9767710049423394, "grad_norm": 0.40632206201553345, "learning_rate": 1.0387311487569506e-05, "loss": 0.5453, "step": 35574 }, { "epoch": 0.9767984623833059, "grad_norm": 0.35889536142349243, "learning_rate": 1.0386879917863526e-05, "loss": 0.4354, "step": 35575 }, { "epoch": 0.9768259198242724, "grad_norm": 0.4335116744041443, "learning_rate": 1.0386448347435892e-05, "loss": 0.4763, "step": 35576 }, { "epoch": 0.9768533772652389, "grad_norm": 0.3621806502342224, "learning_rate": 1.0386016776287403e-05, "loss": 0.4217, "step": 35577 }, { "epoch": 0.9768808347062053, "grad_norm": 0.3836539685726166, "learning_rate": 1.0385585204418872e-05, "loss": 0.4908, "step": 35578 }, { "epoch": 0.9769082921471719, "grad_norm": 0.42796656489372253, "learning_rate": 1.0385153631831101e-05, "loss": 0.4895, "step": 35579 }, { "epoch": 0.9769357495881383, "grad_norm": 0.5622440576553345, "learning_rate": 1.0384722058524899e-05, "loss": 0.4463, "step": 35580 }, { "epoch": 0.9769632070291049, "grad_norm": 0.48616674542427063, "learning_rate": 1.0384290484501064e-05, "loss": 0.4955, "step": 35581 }, { "epoch": 0.9769906644700714, "grad_norm": 0.43931734561920166, "learning_rate": 1.0383858909760405e-05, "loss": 0.4869, "step": 35582 }, { "epoch": 0.9770181219110379, "grad_norm": 0.40451472997665405, "learning_rate": 1.0383427334303728e-05, "loss": 0.4634, "step": 35583 }, { "epoch": 0.9770455793520044, "grad_norm": 0.4192011058330536, "learning_rate": 1.0382995758131835e-05, "loss": 0.496, "step": 35584 }, { "epoch": 0.9770730367929709, "grad_norm": 0.3665729761123657, "learning_rate": 1.0382564181245536e-05, "loss": 0.4304, "step": 35585 }, { "epoch": 0.9771004942339374, "grad_norm": 0.39660534262657166, "learning_rate": 1.0382132603645629e-05, "loss": 0.3989, "step": 35586 }, { "epoch": 0.9771279516749038, "grad_norm": 0.50513756275177, "learning_rate": 1.0381701025332924e-05, "loss": 0.455, "step": 35587 }, { "epoch": 0.9771554091158704, "grad_norm": 0.7616722583770752, "learning_rate": 1.0381269446308226e-05, "loss": 0.5142, "step": 35588 }, { "epoch": 0.977182866556837, "grad_norm": 0.4265362024307251, "learning_rate": 1.0380837866572336e-05, "loss": 0.4531, "step": 35589 }, { "epoch": 0.9772103239978034, "grad_norm": 0.37749770283699036, "learning_rate": 1.0380406286126064e-05, "loss": 0.4863, "step": 35590 }, { "epoch": 0.9772377814387699, "grad_norm": 0.37879884243011475, "learning_rate": 1.0379974704970212e-05, "loss": 0.5376, "step": 35591 }, { "epoch": 0.9772652388797364, "grad_norm": 0.5268937945365906, "learning_rate": 1.0379543123105587e-05, "loss": 0.4774, "step": 35592 }, { "epoch": 0.9772926963207029, "grad_norm": 0.4176996350288391, "learning_rate": 1.0379111540532991e-05, "loss": 0.4892, "step": 35593 }, { "epoch": 0.9773201537616694, "grad_norm": 0.4020625054836273, "learning_rate": 1.0378679957253235e-05, "loss": 0.5698, "step": 35594 }, { "epoch": 0.9773476112026359, "grad_norm": 0.38314497470855713, "learning_rate": 1.0378248373267114e-05, "loss": 0.4778, "step": 35595 }, { "epoch": 0.9773750686436025, "grad_norm": 0.46012088656425476, "learning_rate": 1.0377816788575444e-05, "loss": 0.4958, "step": 35596 }, { "epoch": 0.9774025260845689, "grad_norm": 0.3905937969684601, "learning_rate": 1.0377385203179022e-05, "loss": 0.4959, "step": 35597 }, { "epoch": 0.9774299835255355, "grad_norm": 0.4023892283439636, "learning_rate": 1.0376953617078658e-05, "loss": 0.4735, "step": 35598 }, { "epoch": 0.9774574409665019, "grad_norm": 0.36518847942352295, "learning_rate": 1.0376522030275156e-05, "loss": 0.4581, "step": 35599 }, { "epoch": 0.9774848984074684, "grad_norm": 0.4208669662475586, "learning_rate": 1.0376090442769317e-05, "loss": 0.4651, "step": 35600 }, { "epoch": 0.9775123558484349, "grad_norm": 0.6033281087875366, "learning_rate": 1.0375658854561952e-05, "loss": 0.5903, "step": 35601 }, { "epoch": 0.9775398132894014, "grad_norm": 0.4088839888572693, "learning_rate": 1.0375227265653861e-05, "loss": 0.4659, "step": 35602 }, { "epoch": 0.977567270730368, "grad_norm": 0.48979827761650085, "learning_rate": 1.0374795676045854e-05, "loss": 0.4842, "step": 35603 }, { "epoch": 0.9775947281713344, "grad_norm": 0.37280383706092834, "learning_rate": 1.0374364085738733e-05, "loss": 0.4397, "step": 35604 }, { "epoch": 0.977622185612301, "grad_norm": 0.4246499538421631, "learning_rate": 1.0373932494733301e-05, "loss": 0.5531, "step": 35605 }, { "epoch": 0.9776496430532674, "grad_norm": 0.3848366141319275, "learning_rate": 1.037350090303037e-05, "loss": 0.4805, "step": 35606 }, { "epoch": 0.977677100494234, "grad_norm": 0.4599844217300415, "learning_rate": 1.0373069310630737e-05, "loss": 0.55, "step": 35607 }, { "epoch": 0.9777045579352004, "grad_norm": 0.3777739107608795, "learning_rate": 1.037263771753521e-05, "loss": 0.4257, "step": 35608 }, { "epoch": 0.9777320153761669, "grad_norm": 0.37875357270240784, "learning_rate": 1.0372206123744599e-05, "loss": 0.5005, "step": 35609 }, { "epoch": 0.9777594728171335, "grad_norm": 0.7079125046730042, "learning_rate": 1.03717745292597e-05, "loss": 0.5929, "step": 35610 }, { "epoch": 0.9777869302580999, "grad_norm": 0.40653476119041443, "learning_rate": 1.0371342934081326e-05, "loss": 0.4161, "step": 35611 }, { "epoch": 0.9778143876990665, "grad_norm": 0.39858266711235046, "learning_rate": 1.0370911338210279e-05, "loss": 0.5063, "step": 35612 }, { "epoch": 0.9778418451400329, "grad_norm": 0.4415059983730316, "learning_rate": 1.0370479741647362e-05, "loss": 0.476, "step": 35613 }, { "epoch": 0.9778693025809995, "grad_norm": 0.37019920349121094, "learning_rate": 1.0370048144393383e-05, "loss": 0.4135, "step": 35614 }, { "epoch": 0.9778967600219659, "grad_norm": 0.3642041087150574, "learning_rate": 1.0369616546449144e-05, "loss": 0.4655, "step": 35615 }, { "epoch": 0.9779242174629325, "grad_norm": 0.3798244595527649, "learning_rate": 1.0369184947815457e-05, "loss": 0.4259, "step": 35616 }, { "epoch": 0.977951674903899, "grad_norm": 0.43585699796676636, "learning_rate": 1.0368753348493118e-05, "loss": 0.5485, "step": 35617 }, { "epoch": 0.9779791323448654, "grad_norm": 0.3948533535003662, "learning_rate": 1.0368321748482937e-05, "loss": 0.5907, "step": 35618 }, { "epoch": 0.978006589785832, "grad_norm": 0.39542654156684875, "learning_rate": 1.036789014778572e-05, "loss": 0.5484, "step": 35619 }, { "epoch": 0.9780340472267984, "grad_norm": 0.42235615849494934, "learning_rate": 1.0367458546402269e-05, "loss": 0.4998, "step": 35620 }, { "epoch": 0.978061504667765, "grad_norm": 0.3504272401332855, "learning_rate": 1.036702694433339e-05, "loss": 0.4655, "step": 35621 }, { "epoch": 0.9780889621087314, "grad_norm": 0.39316079020500183, "learning_rate": 1.0366595341579893e-05, "loss": 0.5527, "step": 35622 }, { "epoch": 0.978116419549698, "grad_norm": 0.35629552602767944, "learning_rate": 1.0366163738142574e-05, "loss": 0.3595, "step": 35623 }, { "epoch": 0.9781438769906645, "grad_norm": 0.36042025685310364, "learning_rate": 1.0365732134022244e-05, "loss": 0.5045, "step": 35624 }, { "epoch": 0.978171334431631, "grad_norm": 0.420318603515625, "learning_rate": 1.0365300529219708e-05, "loss": 0.542, "step": 35625 }, { "epoch": 0.9781987918725975, "grad_norm": 0.40186813473701477, "learning_rate": 1.0364868923735769e-05, "loss": 0.5343, "step": 35626 }, { "epoch": 0.978226249313564, "grad_norm": 0.3955312967300415, "learning_rate": 1.0364437317571232e-05, "loss": 0.5413, "step": 35627 }, { "epoch": 0.9782537067545305, "grad_norm": 0.6639621257781982, "learning_rate": 1.0364005710726902e-05, "loss": 0.5103, "step": 35628 }, { "epoch": 0.9782811641954969, "grad_norm": 0.3867313861846924, "learning_rate": 1.0363574103203591e-05, "loss": 0.506, "step": 35629 }, { "epoch": 0.9783086216364635, "grad_norm": 0.3850685954093933, "learning_rate": 1.0363142495002093e-05, "loss": 0.5106, "step": 35630 }, { "epoch": 0.97833607907743, "grad_norm": 0.40667724609375, "learning_rate": 1.036271088612322e-05, "loss": 0.5129, "step": 35631 }, { "epoch": 0.9783635365183965, "grad_norm": 0.3990814685821533, "learning_rate": 1.0362279276567775e-05, "loss": 0.5063, "step": 35632 }, { "epoch": 0.978390993959363, "grad_norm": 0.44331759214401245, "learning_rate": 1.0361847666336564e-05, "loss": 0.4797, "step": 35633 }, { "epoch": 0.9784184514003295, "grad_norm": 0.357075572013855, "learning_rate": 1.0361416055430391e-05, "loss": 0.4954, "step": 35634 }, { "epoch": 0.978445908841296, "grad_norm": 0.40576261281967163, "learning_rate": 1.0360984443850061e-05, "loss": 0.4499, "step": 35635 }, { "epoch": 0.9784733662822624, "grad_norm": 0.3910224139690399, "learning_rate": 1.0360552831596382e-05, "loss": 0.5163, "step": 35636 }, { "epoch": 0.978500823723229, "grad_norm": 0.3894016742706299, "learning_rate": 1.0360121218670156e-05, "loss": 0.5221, "step": 35637 }, { "epoch": 0.9785282811641955, "grad_norm": 0.3835654854774475, "learning_rate": 1.0359689605072187e-05, "loss": 0.5276, "step": 35638 }, { "epoch": 0.978555738605162, "grad_norm": 0.40052521228790283, "learning_rate": 1.0359257990803287e-05, "loss": 0.5099, "step": 35639 }, { "epoch": 0.9785831960461285, "grad_norm": 0.44491758942604065, "learning_rate": 1.0358826375864251e-05, "loss": 0.4935, "step": 35640 }, { "epoch": 0.978610653487095, "grad_norm": 0.40891605615615845, "learning_rate": 1.0358394760255891e-05, "loss": 0.5685, "step": 35641 }, { "epoch": 0.9786381109280615, "grad_norm": 0.5101825594902039, "learning_rate": 1.0357963143979013e-05, "loss": 0.5444, "step": 35642 }, { "epoch": 0.978665568369028, "grad_norm": 0.43782755732536316, "learning_rate": 1.0357531527034415e-05, "loss": 0.492, "step": 35643 }, { "epoch": 0.9786930258099945, "grad_norm": 0.40683406591415405, "learning_rate": 1.0357099909422909e-05, "loss": 0.5724, "step": 35644 }, { "epoch": 0.9787204832509611, "grad_norm": 0.41675204038619995, "learning_rate": 1.0356668291145298e-05, "loss": 0.4927, "step": 35645 }, { "epoch": 0.9787479406919275, "grad_norm": 0.38973888754844666, "learning_rate": 1.0356236672202387e-05, "loss": 0.4508, "step": 35646 }, { "epoch": 0.978775398132894, "grad_norm": 0.48215413093566895, "learning_rate": 1.0355805052594978e-05, "loss": 0.5227, "step": 35647 }, { "epoch": 0.9788028555738605, "grad_norm": 0.3846275806427002, "learning_rate": 1.0355373432323884e-05, "loss": 0.4509, "step": 35648 }, { "epoch": 0.978830313014827, "grad_norm": 0.3776712119579315, "learning_rate": 1.0354941811389903e-05, "loss": 0.5, "step": 35649 }, { "epoch": 0.9788577704557935, "grad_norm": 0.5428509712219238, "learning_rate": 1.0354510189793839e-05, "loss": 0.5673, "step": 35650 }, { "epoch": 0.97888522789676, "grad_norm": 0.4153428077697754, "learning_rate": 1.0354078567536503e-05, "loss": 0.5188, "step": 35651 }, { "epoch": 0.9789126853377266, "grad_norm": 0.40773656964302063, "learning_rate": 1.0353646944618696e-05, "loss": 0.5352, "step": 35652 }, { "epoch": 0.978940142778693, "grad_norm": 0.37280815839767456, "learning_rate": 1.0353215321041227e-05, "loss": 0.452, "step": 35653 }, { "epoch": 0.9789676002196596, "grad_norm": 0.3427037000656128, "learning_rate": 1.03527836968049e-05, "loss": 0.4406, "step": 35654 }, { "epoch": 0.978995057660626, "grad_norm": 0.38776543736457825, "learning_rate": 1.0352352071910513e-05, "loss": 0.3819, "step": 35655 }, { "epoch": 0.9790225151015925, "grad_norm": 0.37816688418388367, "learning_rate": 1.0351920446358883e-05, "loss": 0.4466, "step": 35656 }, { "epoch": 0.979049972542559, "grad_norm": 0.8049741983413696, "learning_rate": 1.0351488820150806e-05, "loss": 0.4766, "step": 35657 }, { "epoch": 0.9790774299835255, "grad_norm": 0.41739746928215027, "learning_rate": 1.035105719328709e-05, "loss": 0.5074, "step": 35658 }, { "epoch": 0.9791048874244921, "grad_norm": 0.5432574152946472, "learning_rate": 1.0350625565768541e-05, "loss": 0.4318, "step": 35659 }, { "epoch": 0.9791323448654585, "grad_norm": 0.36295273900032043, "learning_rate": 1.0350193937595962e-05, "loss": 0.4025, "step": 35660 }, { "epoch": 0.9791598023064251, "grad_norm": 0.36984434723854065, "learning_rate": 1.0349762308770163e-05, "loss": 0.5202, "step": 35661 }, { "epoch": 0.9791872597473915, "grad_norm": 0.3596680164337158, "learning_rate": 1.0349330679291943e-05, "loss": 0.4842, "step": 35662 }, { "epoch": 0.9792147171883581, "grad_norm": 0.4076487123966217, "learning_rate": 1.0348899049162111e-05, "loss": 0.5257, "step": 35663 }, { "epoch": 0.9792421746293245, "grad_norm": 0.4103064239025116, "learning_rate": 1.0348467418381468e-05, "loss": 0.4525, "step": 35664 }, { "epoch": 0.979269632070291, "grad_norm": 0.37104830145835876, "learning_rate": 1.0348035786950826e-05, "loss": 0.4073, "step": 35665 }, { "epoch": 0.9792970895112576, "grad_norm": 0.40959396958351135, "learning_rate": 1.0347604154870986e-05, "loss": 0.5085, "step": 35666 }, { "epoch": 0.979324546952224, "grad_norm": 0.420642226934433, "learning_rate": 1.034717252214275e-05, "loss": 0.5385, "step": 35667 }, { "epoch": 0.9793520043931906, "grad_norm": 0.385292649269104, "learning_rate": 1.0346740888766928e-05, "loss": 0.4741, "step": 35668 }, { "epoch": 0.979379461834157, "grad_norm": 0.4030839800834656, "learning_rate": 1.0346309254744325e-05, "loss": 0.4411, "step": 35669 }, { "epoch": 0.9794069192751236, "grad_norm": 0.37368035316467285, "learning_rate": 1.0345877620075742e-05, "loss": 0.4393, "step": 35670 }, { "epoch": 0.97943437671609, "grad_norm": 0.39844584465026855, "learning_rate": 1.0345445984761987e-05, "loss": 0.46, "step": 35671 }, { "epoch": 0.9794618341570566, "grad_norm": 0.4679358899593353, "learning_rate": 1.0345014348803868e-05, "loss": 0.4443, "step": 35672 }, { "epoch": 0.9794892915980231, "grad_norm": 0.4076031744480133, "learning_rate": 1.0344582712202185e-05, "loss": 0.5208, "step": 35673 }, { "epoch": 0.9795167490389896, "grad_norm": 0.4593210220336914, "learning_rate": 1.0344151074957746e-05, "loss": 0.5319, "step": 35674 }, { "epoch": 0.9795442064799561, "grad_norm": 0.3903653621673584, "learning_rate": 1.0343719437071356e-05, "loss": 0.4399, "step": 35675 }, { "epoch": 0.9795716639209225, "grad_norm": 0.3877254128456116, "learning_rate": 1.0343287798543817e-05, "loss": 0.5079, "step": 35676 }, { "epoch": 0.9795991213618891, "grad_norm": 0.4152837097644806, "learning_rate": 1.0342856159375937e-05, "loss": 0.5958, "step": 35677 }, { "epoch": 0.9796265788028555, "grad_norm": 0.39961931109428406, "learning_rate": 1.0342424519568524e-05, "loss": 0.4691, "step": 35678 }, { "epoch": 0.9796540362438221, "grad_norm": 0.3945414423942566, "learning_rate": 1.034199287912238e-05, "loss": 0.4748, "step": 35679 }, { "epoch": 0.9796814936847886, "grad_norm": 0.38891908526420593, "learning_rate": 1.0341561238038307e-05, "loss": 0.5076, "step": 35680 }, { "epoch": 0.9797089511257551, "grad_norm": 0.3681289255619049, "learning_rate": 1.0341129596317115e-05, "loss": 0.4399, "step": 35681 }, { "epoch": 0.9797364085667216, "grad_norm": 0.35155782103538513, "learning_rate": 1.0340697953959606e-05, "loss": 0.4553, "step": 35682 }, { "epoch": 0.9797638660076881, "grad_norm": 0.4182716906070709, "learning_rate": 1.034026631096659e-05, "loss": 0.4122, "step": 35683 }, { "epoch": 0.9797913234486546, "grad_norm": 0.37505003809928894, "learning_rate": 1.0339834667338867e-05, "loss": 0.4164, "step": 35684 }, { "epoch": 0.979818780889621, "grad_norm": 0.4123368263244629, "learning_rate": 1.033940302307724e-05, "loss": 0.5581, "step": 35685 }, { "epoch": 0.9798462383305876, "grad_norm": 0.38026320934295654, "learning_rate": 1.0338971378182523e-05, "loss": 0.4095, "step": 35686 }, { "epoch": 0.9798736957715541, "grad_norm": 0.41206711530685425, "learning_rate": 1.0338539732655514e-05, "loss": 0.499, "step": 35687 }, { "epoch": 0.9799011532125206, "grad_norm": 0.3769885003566742, "learning_rate": 1.0338108086497021e-05, "loss": 0.4718, "step": 35688 }, { "epoch": 0.9799286106534871, "grad_norm": 0.3917432129383087, "learning_rate": 1.033767643970785e-05, "loss": 0.4843, "step": 35689 }, { "epoch": 0.9799560680944536, "grad_norm": 0.38277876377105713, "learning_rate": 1.0337244792288802e-05, "loss": 0.4921, "step": 35690 }, { "epoch": 0.9799835255354201, "grad_norm": 0.4479775130748749, "learning_rate": 1.0336813144240685e-05, "loss": 0.528, "step": 35691 }, { "epoch": 0.9800109829763866, "grad_norm": 0.3771226108074188, "learning_rate": 1.0336381495564305e-05, "loss": 0.4445, "step": 35692 }, { "epoch": 0.9800384404173531, "grad_norm": 0.42708665132522583, "learning_rate": 1.0335949846260465e-05, "loss": 0.5931, "step": 35693 }, { "epoch": 0.9800658978583197, "grad_norm": 0.3876267969608307, "learning_rate": 1.0335518196329973e-05, "loss": 0.4353, "step": 35694 }, { "epoch": 0.9800933552992861, "grad_norm": 0.43144622445106506, "learning_rate": 1.033508654577363e-05, "loss": 0.4513, "step": 35695 }, { "epoch": 0.9801208127402526, "grad_norm": 0.3793286681175232, "learning_rate": 1.0334654894592247e-05, "loss": 0.5133, "step": 35696 }, { "epoch": 0.9801482701812191, "grad_norm": 0.44197043776512146, "learning_rate": 1.0334223242786625e-05, "loss": 0.4542, "step": 35697 }, { "epoch": 0.9801757276221856, "grad_norm": 0.42545467615127563, "learning_rate": 1.0333791590357568e-05, "loss": 0.4293, "step": 35698 }, { "epoch": 0.9802031850631521, "grad_norm": 0.3774081766605377, "learning_rate": 1.0333359937305885e-05, "loss": 0.5179, "step": 35699 }, { "epoch": 0.9802306425041186, "grad_norm": 0.5611757040023804, "learning_rate": 1.033292828363238e-05, "loss": 0.5272, "step": 35700 }, { "epoch": 0.9802580999450852, "grad_norm": 0.41780737042427063, "learning_rate": 1.0332496629337854e-05, "loss": 0.4865, "step": 35701 }, { "epoch": 0.9802855573860516, "grad_norm": 0.351351797580719, "learning_rate": 1.033206497442312e-05, "loss": 0.422, "step": 35702 }, { "epoch": 0.9803130148270182, "grad_norm": 0.46179863810539246, "learning_rate": 1.0331633318888973e-05, "loss": 0.5166, "step": 35703 }, { "epoch": 0.9803404722679846, "grad_norm": 0.3404046893119812, "learning_rate": 1.033120166273623e-05, "loss": 0.4469, "step": 35704 }, { "epoch": 0.9803679297089511, "grad_norm": 0.5359817743301392, "learning_rate": 1.0330770005965688e-05, "loss": 0.5063, "step": 35705 }, { "epoch": 0.9803953871499176, "grad_norm": 0.44622230529785156, "learning_rate": 1.0330338348578153e-05, "loss": 0.5078, "step": 35706 }, { "epoch": 0.9804228445908841, "grad_norm": 0.40354377031326294, "learning_rate": 1.0329906690574434e-05, "loss": 0.5105, "step": 35707 }, { "epoch": 0.9804503020318507, "grad_norm": 0.3503013253211975, "learning_rate": 1.032947503195533e-05, "loss": 0.4871, "step": 35708 }, { "epoch": 0.9804777594728171, "grad_norm": 0.35856467485427856, "learning_rate": 1.0329043372721654e-05, "loss": 0.4272, "step": 35709 }, { "epoch": 0.9805052169137837, "grad_norm": 0.41298797726631165, "learning_rate": 1.0328611712874206e-05, "loss": 0.4742, "step": 35710 }, { "epoch": 0.9805326743547501, "grad_norm": 0.38325366377830505, "learning_rate": 1.032818005241379e-05, "loss": 0.4976, "step": 35711 }, { "epoch": 0.9805601317957167, "grad_norm": 0.4280915856361389, "learning_rate": 1.0327748391341216e-05, "loss": 0.5781, "step": 35712 }, { "epoch": 0.9805875892366831, "grad_norm": 0.41334372758865356, "learning_rate": 1.0327316729657283e-05, "loss": 0.4822, "step": 35713 }, { "epoch": 0.9806150466776496, "grad_norm": 0.4031146466732025, "learning_rate": 1.0326885067362806e-05, "loss": 0.4859, "step": 35714 }, { "epoch": 0.9806425041186162, "grad_norm": 0.3922255337238312, "learning_rate": 1.0326453404458581e-05, "loss": 0.5559, "step": 35715 }, { "epoch": 0.9806699615595826, "grad_norm": 0.4018302857875824, "learning_rate": 1.0326021740945416e-05, "loss": 0.472, "step": 35716 }, { "epoch": 0.9806974190005492, "grad_norm": 0.42052239179611206, "learning_rate": 1.0325590076824117e-05, "loss": 0.4971, "step": 35717 }, { "epoch": 0.9807248764415156, "grad_norm": 0.36463338136672974, "learning_rate": 1.0325158412095489e-05, "loss": 0.4983, "step": 35718 }, { "epoch": 0.9807523338824822, "grad_norm": 0.3915511965751648, "learning_rate": 1.0324726746760334e-05, "loss": 0.5701, "step": 35719 }, { "epoch": 0.9807797913234486, "grad_norm": 0.40413761138916016, "learning_rate": 1.0324295080819462e-05, "loss": 0.4923, "step": 35720 }, { "epoch": 0.9808072487644152, "grad_norm": 0.39829888939857483, "learning_rate": 1.0323863414273674e-05, "loss": 0.491, "step": 35721 }, { "epoch": 0.9808347062053817, "grad_norm": 0.3947107195854187, "learning_rate": 1.032343174712378e-05, "loss": 0.5053, "step": 35722 }, { "epoch": 0.9808621636463482, "grad_norm": 0.4124845862388611, "learning_rate": 1.0323000079370581e-05, "loss": 0.5228, "step": 35723 }, { "epoch": 0.9808896210873147, "grad_norm": 0.4729115664958954, "learning_rate": 1.0322568411014887e-05, "loss": 0.5812, "step": 35724 }, { "epoch": 0.9809170785282811, "grad_norm": 0.43015727400779724, "learning_rate": 1.0322136742057495e-05, "loss": 0.4521, "step": 35725 }, { "epoch": 0.9809445359692477, "grad_norm": 0.4306129515171051, "learning_rate": 1.0321705072499217e-05, "loss": 0.4745, "step": 35726 }, { "epoch": 0.9809719934102141, "grad_norm": 0.41342827677726746, "learning_rate": 1.0321273402340858e-05, "loss": 0.5033, "step": 35727 }, { "epoch": 0.9809994508511807, "grad_norm": 0.4113723039627075, "learning_rate": 1.0320841731583222e-05, "loss": 0.5204, "step": 35728 }, { "epoch": 0.9810269082921472, "grad_norm": 0.3630935251712799, "learning_rate": 1.032041006022711e-05, "loss": 0.4625, "step": 35729 }, { "epoch": 0.9810543657331137, "grad_norm": 0.34441471099853516, "learning_rate": 1.0319978388273334e-05, "loss": 0.4239, "step": 35730 }, { "epoch": 0.9810818231740802, "grad_norm": 0.4040548503398895, "learning_rate": 1.0319546715722697e-05, "loss": 0.5595, "step": 35731 }, { "epoch": 0.9811092806150467, "grad_norm": 0.39866748452186584, "learning_rate": 1.0319115042576003e-05, "loss": 0.4412, "step": 35732 }, { "epoch": 0.9811367380560132, "grad_norm": 0.3829689621925354, "learning_rate": 1.0318683368834055e-05, "loss": 0.459, "step": 35733 }, { "epoch": 0.9811641954969796, "grad_norm": 0.37972235679626465, "learning_rate": 1.031825169449766e-05, "loss": 0.401, "step": 35734 }, { "epoch": 0.9811916529379462, "grad_norm": 0.4352414309978485, "learning_rate": 1.0317820019567628e-05, "loss": 0.5672, "step": 35735 }, { "epoch": 0.9812191103789127, "grad_norm": 0.4058671295642853, "learning_rate": 1.031738834404476e-05, "loss": 0.4709, "step": 35736 }, { "epoch": 0.9812465678198792, "grad_norm": 0.5926852226257324, "learning_rate": 1.0316956667929858e-05, "loss": 0.4901, "step": 35737 }, { "epoch": 0.9812740252608457, "grad_norm": 0.3611038029193878, "learning_rate": 1.0316524991223736e-05, "loss": 0.4567, "step": 35738 }, { "epoch": 0.9813014827018122, "grad_norm": 0.40065011382102966, "learning_rate": 1.0316093313927187e-05, "loss": 0.523, "step": 35739 }, { "epoch": 0.9813289401427787, "grad_norm": 0.5626040101051331, "learning_rate": 1.0315661636041028e-05, "loss": 0.4703, "step": 35740 }, { "epoch": 0.9813563975837452, "grad_norm": 0.3746773302555084, "learning_rate": 1.031522995756606e-05, "loss": 0.4648, "step": 35741 }, { "epoch": 0.9813838550247117, "grad_norm": 0.3581002354621887, "learning_rate": 1.0314798278503082e-05, "loss": 0.463, "step": 35742 }, { "epoch": 0.9814113124656783, "grad_norm": 0.41637828946113586, "learning_rate": 1.031436659885291e-05, "loss": 0.4215, "step": 35743 }, { "epoch": 0.9814387699066447, "grad_norm": 0.5147737860679626, "learning_rate": 1.031393491861634e-05, "loss": 0.5295, "step": 35744 }, { "epoch": 0.9814662273476112, "grad_norm": 0.35682910680770874, "learning_rate": 1.0313503237794185e-05, "loss": 0.4571, "step": 35745 }, { "epoch": 0.9814936847885777, "grad_norm": 0.4223479926586151, "learning_rate": 1.0313071556387246e-05, "loss": 0.5104, "step": 35746 }, { "epoch": 0.9815211422295442, "grad_norm": 0.3948824405670166, "learning_rate": 1.0312639874396325e-05, "loss": 0.4993, "step": 35747 }, { "epoch": 0.9815485996705107, "grad_norm": 0.47979676723480225, "learning_rate": 1.0312208191822235e-05, "loss": 0.4884, "step": 35748 }, { "epoch": 0.9815760571114772, "grad_norm": 0.38390734791755676, "learning_rate": 1.0311776508665776e-05, "loss": 0.4779, "step": 35749 }, { "epoch": 0.9816035145524438, "grad_norm": 0.3709476888179779, "learning_rate": 1.0311344824927752e-05, "loss": 0.4721, "step": 35750 }, { "epoch": 0.9816309719934102, "grad_norm": 0.42721185088157654, "learning_rate": 1.0310913140608974e-05, "loss": 0.5721, "step": 35751 }, { "epoch": 0.9816584294343768, "grad_norm": 0.40163007378578186, "learning_rate": 1.031048145571024e-05, "loss": 0.4355, "step": 35752 }, { "epoch": 0.9816858868753432, "grad_norm": 0.4352487027645111, "learning_rate": 1.031004977023236e-05, "loss": 0.5441, "step": 35753 }, { "epoch": 0.9817133443163097, "grad_norm": 0.40151605010032654, "learning_rate": 1.030961808417614e-05, "loss": 0.5592, "step": 35754 }, { "epoch": 0.9817408017572762, "grad_norm": 0.37884339690208435, "learning_rate": 1.030918639754238e-05, "loss": 0.4996, "step": 35755 }, { "epoch": 0.9817682591982427, "grad_norm": 0.37323030829429626, "learning_rate": 1.0308754710331893e-05, "loss": 0.5175, "step": 35756 }, { "epoch": 0.9817957166392092, "grad_norm": 0.6492051482200623, "learning_rate": 1.0308323022545476e-05, "loss": 0.4916, "step": 35757 }, { "epoch": 0.9818231740801757, "grad_norm": 0.40114280581474304, "learning_rate": 1.0307891334183944e-05, "loss": 0.4317, "step": 35758 }, { "epoch": 0.9818506315211423, "grad_norm": 0.44006869196891785, "learning_rate": 1.0307459645248092e-05, "loss": 0.5343, "step": 35759 }, { "epoch": 0.9818780889621087, "grad_norm": 0.35076451301574707, "learning_rate": 1.0307027955738728e-05, "loss": 0.5254, "step": 35760 }, { "epoch": 0.9819055464030753, "grad_norm": 0.3872314989566803, "learning_rate": 1.0306596265656662e-05, "loss": 0.3372, "step": 35761 }, { "epoch": 0.9819330038440417, "grad_norm": 0.4234462082386017, "learning_rate": 1.0306164575002697e-05, "loss": 0.4356, "step": 35762 }, { "epoch": 0.9819604612850082, "grad_norm": 0.4075971245765686, "learning_rate": 1.0305732883777634e-05, "loss": 0.5204, "step": 35763 }, { "epoch": 0.9819879187259747, "grad_norm": 0.3958650827407837, "learning_rate": 1.0305301191982285e-05, "loss": 0.4831, "step": 35764 }, { "epoch": 0.9820153761669412, "grad_norm": 0.38426199555397034, "learning_rate": 1.030486949961745e-05, "loss": 0.5056, "step": 35765 }, { "epoch": 0.9820428336079078, "grad_norm": 0.3639121651649475, "learning_rate": 1.0304437806683935e-05, "loss": 0.452, "step": 35766 }, { "epoch": 0.9820702910488742, "grad_norm": 0.3581767976284027, "learning_rate": 1.030400611318255e-05, "loss": 0.388, "step": 35767 }, { "epoch": 0.9820977484898408, "grad_norm": 0.3984135687351227, "learning_rate": 1.030357441911409e-05, "loss": 0.4601, "step": 35768 }, { "epoch": 0.9821252059308072, "grad_norm": 0.39617398381233215, "learning_rate": 1.0303142724479373e-05, "loss": 0.4566, "step": 35769 }, { "epoch": 0.9821526633717738, "grad_norm": 0.3833697736263275, "learning_rate": 1.0302711029279195e-05, "loss": 0.4872, "step": 35770 }, { "epoch": 0.9821801208127402, "grad_norm": 0.3621375560760498, "learning_rate": 1.0302279333514367e-05, "loss": 0.4709, "step": 35771 }, { "epoch": 0.9822075782537067, "grad_norm": 0.42509233951568604, "learning_rate": 1.0301847637185688e-05, "loss": 0.4968, "step": 35772 }, { "epoch": 0.9822350356946733, "grad_norm": 0.507575273513794, "learning_rate": 1.0301415940293969e-05, "loss": 0.6043, "step": 35773 }, { "epoch": 0.9822624931356397, "grad_norm": 0.399944931268692, "learning_rate": 1.0300984242840012e-05, "loss": 0.4707, "step": 35774 }, { "epoch": 0.9822899505766063, "grad_norm": 0.400417685508728, "learning_rate": 1.0300552544824625e-05, "loss": 0.515, "step": 35775 }, { "epoch": 0.9823174080175727, "grad_norm": 0.38797062635421753, "learning_rate": 1.0300120846248611e-05, "loss": 0.4972, "step": 35776 }, { "epoch": 0.9823448654585393, "grad_norm": 0.38659703731536865, "learning_rate": 1.0299689147112778e-05, "loss": 0.5407, "step": 35777 }, { "epoch": 0.9823723228995057, "grad_norm": 0.36106589436531067, "learning_rate": 1.0299257447417923e-05, "loss": 0.4499, "step": 35778 }, { "epoch": 0.9823997803404723, "grad_norm": 0.39751997590065, "learning_rate": 1.0298825747164863e-05, "loss": 0.4729, "step": 35779 }, { "epoch": 0.9824272377814388, "grad_norm": 0.3831535577774048, "learning_rate": 1.0298394046354396e-05, "loss": 0.5205, "step": 35780 }, { "epoch": 0.9824546952224052, "grad_norm": 0.39097270369529724, "learning_rate": 1.0297962344987327e-05, "loss": 0.5616, "step": 35781 }, { "epoch": 0.9824821526633718, "grad_norm": 0.36214596033096313, "learning_rate": 1.0297530643064464e-05, "loss": 0.4269, "step": 35782 }, { "epoch": 0.9825096101043382, "grad_norm": 0.39185577630996704, "learning_rate": 1.0297098940586611e-05, "loss": 0.4696, "step": 35783 }, { "epoch": 0.9825370675453048, "grad_norm": 0.45833882689476013, "learning_rate": 1.0296667237554577e-05, "loss": 0.5139, "step": 35784 }, { "epoch": 0.9825645249862712, "grad_norm": 0.3687548339366913, "learning_rate": 1.029623553396916e-05, "loss": 0.5075, "step": 35785 }, { "epoch": 0.9825919824272378, "grad_norm": 0.4375058710575104, "learning_rate": 1.029580382983117e-05, "loss": 0.4792, "step": 35786 }, { "epoch": 0.9826194398682043, "grad_norm": 0.3713165521621704, "learning_rate": 1.0295372125141415e-05, "loss": 0.4913, "step": 35787 }, { "epoch": 0.9826468973091708, "grad_norm": 0.41743677854537964, "learning_rate": 1.029494041990069e-05, "loss": 0.5358, "step": 35788 }, { "epoch": 0.9826743547501373, "grad_norm": 0.550894021987915, "learning_rate": 1.0294508714109811e-05, "loss": 0.5423, "step": 35789 }, { "epoch": 0.9827018121911038, "grad_norm": 0.4720502495765686, "learning_rate": 1.029407700776958e-05, "loss": 0.4958, "step": 35790 }, { "epoch": 0.9827292696320703, "grad_norm": 0.4038867652416229, "learning_rate": 1.0293645300880801e-05, "loss": 0.4347, "step": 35791 }, { "epoch": 0.9827567270730367, "grad_norm": 0.39271071553230286, "learning_rate": 1.029321359344428e-05, "loss": 0.4234, "step": 35792 }, { "epoch": 0.9827841845140033, "grad_norm": 0.41609352827072144, "learning_rate": 1.029278188546082e-05, "loss": 0.5538, "step": 35793 }, { "epoch": 0.9828116419549698, "grad_norm": 0.5017848610877991, "learning_rate": 1.029235017693123e-05, "loss": 0.5566, "step": 35794 }, { "epoch": 0.9828390993959363, "grad_norm": 0.3779248893260956, "learning_rate": 1.0291918467856314e-05, "loss": 0.4961, "step": 35795 }, { "epoch": 0.9828665568369028, "grad_norm": 0.4280809462070465, "learning_rate": 1.0291486758236875e-05, "loss": 0.4466, "step": 35796 }, { "epoch": 0.9828940142778693, "grad_norm": 0.3619506359100342, "learning_rate": 1.0291055048073722e-05, "loss": 0.4035, "step": 35797 }, { "epoch": 0.9829214717188358, "grad_norm": 0.356212317943573, "learning_rate": 1.029062333736766e-05, "loss": 0.4435, "step": 35798 }, { "epoch": 0.9829489291598023, "grad_norm": 0.4051038920879364, "learning_rate": 1.0290191626119488e-05, "loss": 0.4586, "step": 35799 }, { "epoch": 0.9829763866007688, "grad_norm": 0.42294174432754517, "learning_rate": 1.028975991433002e-05, "loss": 0.5095, "step": 35800 }, { "epoch": 0.9830038440417354, "grad_norm": 0.4230377674102783, "learning_rate": 1.0289328202000055e-05, "loss": 0.4434, "step": 35801 }, { "epoch": 0.9830313014827018, "grad_norm": 0.40009137988090515, "learning_rate": 1.0288896489130402e-05, "loss": 0.5154, "step": 35802 }, { "epoch": 0.9830587589236683, "grad_norm": 0.42640841007232666, "learning_rate": 1.0288464775721865e-05, "loss": 0.5104, "step": 35803 }, { "epoch": 0.9830862163646348, "grad_norm": 0.44533705711364746, "learning_rate": 1.0288033061775247e-05, "loss": 0.5079, "step": 35804 }, { "epoch": 0.9831136738056013, "grad_norm": 0.40413016080856323, "learning_rate": 1.0287601347291358e-05, "loss": 0.3989, "step": 35805 }, { "epoch": 0.9831411312465678, "grad_norm": 0.38781237602233887, "learning_rate": 1.0287169632270997e-05, "loss": 0.5816, "step": 35806 }, { "epoch": 0.9831685886875343, "grad_norm": 0.3577538728713989, "learning_rate": 1.0286737916714977e-05, "loss": 0.366, "step": 35807 }, { "epoch": 0.9831960461285009, "grad_norm": 0.4122081398963928, "learning_rate": 1.0286306200624099e-05, "loss": 0.4865, "step": 35808 }, { "epoch": 0.9832235035694673, "grad_norm": 0.41420477628707886, "learning_rate": 1.0285874483999166e-05, "loss": 0.4973, "step": 35809 }, { "epoch": 0.9832509610104339, "grad_norm": 0.4152459502220154, "learning_rate": 1.0285442766840987e-05, "loss": 0.5331, "step": 35810 }, { "epoch": 0.9832784184514003, "grad_norm": 0.4097081124782562, "learning_rate": 1.0285011049150367e-05, "loss": 0.4536, "step": 35811 }, { "epoch": 0.9833058758923668, "grad_norm": 0.6296291947364807, "learning_rate": 1.0284579330928108e-05, "loss": 0.4552, "step": 35812 }, { "epoch": 0.9833333333333333, "grad_norm": 0.4035436809062958, "learning_rate": 1.0284147612175021e-05, "loss": 0.5453, "step": 35813 }, { "epoch": 0.9833607907742998, "grad_norm": 0.3731517493724823, "learning_rate": 1.0283715892891906e-05, "loss": 0.4227, "step": 35814 }, { "epoch": 0.9833882482152664, "grad_norm": 0.36442381143569946, "learning_rate": 1.0283284173079571e-05, "loss": 0.3977, "step": 35815 }, { "epoch": 0.9834157056562328, "grad_norm": 0.42816275358200073, "learning_rate": 1.0282852452738819e-05, "loss": 0.5991, "step": 35816 }, { "epoch": 0.9834431630971994, "grad_norm": 0.3720617890357971, "learning_rate": 1.028242073187046e-05, "loss": 0.4241, "step": 35817 }, { "epoch": 0.9834706205381658, "grad_norm": 0.4244568943977356, "learning_rate": 1.0281989010475294e-05, "loss": 0.4836, "step": 35818 }, { "epoch": 0.9834980779791324, "grad_norm": 0.3819500505924225, "learning_rate": 1.0281557288554127e-05, "loss": 0.4071, "step": 35819 }, { "epoch": 0.9835255354200988, "grad_norm": 0.4255686104297638, "learning_rate": 1.0281125566107771e-05, "loss": 0.5734, "step": 35820 }, { "epoch": 0.9835529928610653, "grad_norm": 0.4852124750614166, "learning_rate": 1.028069384313702e-05, "loss": 0.522, "step": 35821 }, { "epoch": 0.9835804503020319, "grad_norm": 0.4581727087497711, "learning_rate": 1.028026211964269e-05, "loss": 0.5145, "step": 35822 }, { "epoch": 0.9836079077429983, "grad_norm": 0.39095133543014526, "learning_rate": 1.0279830395625581e-05, "loss": 0.4989, "step": 35823 }, { "epoch": 0.9836353651839649, "grad_norm": 0.43285563588142395, "learning_rate": 1.0279398671086496e-05, "loss": 0.5299, "step": 35824 }, { "epoch": 0.9836628226249313, "grad_norm": 0.5313538908958435, "learning_rate": 1.0278966946026245e-05, "loss": 0.5356, "step": 35825 }, { "epoch": 0.9836902800658979, "grad_norm": 0.3843991160392761, "learning_rate": 1.0278535220445631e-05, "loss": 0.532, "step": 35826 }, { "epoch": 0.9837177375068643, "grad_norm": 0.4050960838794708, "learning_rate": 1.027810349434546e-05, "loss": 0.5208, "step": 35827 }, { "epoch": 0.9837451949478309, "grad_norm": 0.9955090880393982, "learning_rate": 1.0277671767726539e-05, "loss": 0.5487, "step": 35828 }, { "epoch": 0.9837726523887974, "grad_norm": 0.3841996192932129, "learning_rate": 1.0277240040589671e-05, "loss": 0.4299, "step": 35829 }, { "epoch": 0.9838001098297638, "grad_norm": 0.41949111223220825, "learning_rate": 1.0276808312935661e-05, "loss": 0.4351, "step": 35830 }, { "epoch": 0.9838275672707304, "grad_norm": 0.3774500787258148, "learning_rate": 1.0276376584765316e-05, "loss": 0.5082, "step": 35831 }, { "epoch": 0.9838550247116968, "grad_norm": 0.48335134983062744, "learning_rate": 1.027594485607944e-05, "loss": 0.4915, "step": 35832 }, { "epoch": 0.9838824821526634, "grad_norm": 0.5616464614868164, "learning_rate": 1.0275513126878838e-05, "loss": 0.576, "step": 35833 }, { "epoch": 0.9839099395936298, "grad_norm": 0.40466129779815674, "learning_rate": 1.0275081397164317e-05, "loss": 0.4787, "step": 35834 }, { "epoch": 0.9839373970345964, "grad_norm": 0.3814612329006195, "learning_rate": 1.0274649666936682e-05, "loss": 0.4833, "step": 35835 }, { "epoch": 0.9839648544755629, "grad_norm": 0.3728581368923187, "learning_rate": 1.0274217936196735e-05, "loss": 0.3849, "step": 35836 }, { "epoch": 0.9839923119165294, "grad_norm": 0.42403820157051086, "learning_rate": 1.0273786204945286e-05, "loss": 0.4774, "step": 35837 }, { "epoch": 0.9840197693574959, "grad_norm": 0.4510997235774994, "learning_rate": 1.0273354473183136e-05, "loss": 0.462, "step": 35838 }, { "epoch": 0.9840472267984623, "grad_norm": 0.38059067726135254, "learning_rate": 1.0272922740911098e-05, "loss": 0.4805, "step": 35839 }, { "epoch": 0.9840746842394289, "grad_norm": 0.43444588780403137, "learning_rate": 1.027249100812997e-05, "loss": 0.4237, "step": 35840 }, { "epoch": 0.9841021416803953, "grad_norm": 0.3787662088871002, "learning_rate": 1.0272059274840555e-05, "loss": 0.3966, "step": 35841 }, { "epoch": 0.9841295991213619, "grad_norm": 0.4093223214149475, "learning_rate": 1.0271627541043668e-05, "loss": 0.5684, "step": 35842 }, { "epoch": 0.9841570565623284, "grad_norm": 0.43822282552719116, "learning_rate": 1.0271195806740105e-05, "loss": 0.4352, "step": 35843 }, { "epoch": 0.9841845140032949, "grad_norm": 0.34668025374412537, "learning_rate": 1.027076407193068e-05, "loss": 0.387, "step": 35844 }, { "epoch": 0.9842119714442614, "grad_norm": 0.36072438955307007, "learning_rate": 1.0270332336616189e-05, "loss": 0.5027, "step": 35845 }, { "epoch": 0.9842394288852279, "grad_norm": 0.5118063688278198, "learning_rate": 1.0269900600797445e-05, "loss": 0.4277, "step": 35846 }, { "epoch": 0.9842668863261944, "grad_norm": 0.39052069187164307, "learning_rate": 1.0269468864475248e-05, "loss": 0.484, "step": 35847 }, { "epoch": 0.9842943437671609, "grad_norm": 0.4438621699810028, "learning_rate": 1.0269037127650405e-05, "loss": 0.5006, "step": 35848 }, { "epoch": 0.9843218012081274, "grad_norm": 0.3682904541492462, "learning_rate": 1.0268605390323725e-05, "loss": 0.5157, "step": 35849 }, { "epoch": 0.984349258649094, "grad_norm": 0.4207058250904083, "learning_rate": 1.0268173652496007e-05, "loss": 0.4588, "step": 35850 }, { "epoch": 0.9843767160900604, "grad_norm": 0.4060781002044678, "learning_rate": 1.026774191416806e-05, "loss": 0.4721, "step": 35851 }, { "epoch": 0.9844041735310269, "grad_norm": 0.4139321446418762, "learning_rate": 1.0267310175340692e-05, "loss": 0.4531, "step": 35852 }, { "epoch": 0.9844316309719934, "grad_norm": 0.46698352694511414, "learning_rate": 1.0266878436014702e-05, "loss": 0.5048, "step": 35853 }, { "epoch": 0.9844590884129599, "grad_norm": 0.40057873725891113, "learning_rate": 1.0266446696190899e-05, "loss": 0.5507, "step": 35854 }, { "epoch": 0.9844865458539264, "grad_norm": 0.36569857597351074, "learning_rate": 1.0266014955870092e-05, "loss": 0.509, "step": 35855 }, { "epoch": 0.9845140032948929, "grad_norm": 0.46380218863487244, "learning_rate": 1.0265583215053077e-05, "loss": 0.5641, "step": 35856 }, { "epoch": 0.9845414607358595, "grad_norm": 0.40535977482795715, "learning_rate": 1.0265151473740666e-05, "loss": 0.4355, "step": 35857 }, { "epoch": 0.9845689181768259, "grad_norm": 0.4088630676269531, "learning_rate": 1.026471973193366e-05, "loss": 0.4834, "step": 35858 }, { "epoch": 0.9845963756177925, "grad_norm": 0.3284760117530823, "learning_rate": 1.0264287989632872e-05, "loss": 0.3934, "step": 35859 }, { "epoch": 0.9846238330587589, "grad_norm": 0.4551207423210144, "learning_rate": 1.0263856246839104e-05, "loss": 0.4903, "step": 35860 }, { "epoch": 0.9846512904997254, "grad_norm": 0.4031596779823303, "learning_rate": 1.0263424503553154e-05, "loss": 0.4841, "step": 35861 }, { "epoch": 0.9846787479406919, "grad_norm": 0.4002750813961029, "learning_rate": 1.0262992759775838e-05, "loss": 0.5253, "step": 35862 }, { "epoch": 0.9847062053816584, "grad_norm": 0.46921223402023315, "learning_rate": 1.0262561015507952e-05, "loss": 0.5081, "step": 35863 }, { "epoch": 0.984733662822625, "grad_norm": 0.3614066243171692, "learning_rate": 1.026212927075031e-05, "loss": 0.4878, "step": 35864 }, { "epoch": 0.9847611202635914, "grad_norm": 0.37360450625419617, "learning_rate": 1.0261697525503713e-05, "loss": 0.4975, "step": 35865 }, { "epoch": 0.984788577704558, "grad_norm": 0.4042102098464966, "learning_rate": 1.0261265779768964e-05, "loss": 0.5485, "step": 35866 }, { "epoch": 0.9848160351455244, "grad_norm": 0.43489503860473633, "learning_rate": 1.0260834033546872e-05, "loss": 0.4649, "step": 35867 }, { "epoch": 0.984843492586491, "grad_norm": 0.3597380816936493, "learning_rate": 1.026040228683824e-05, "loss": 0.4583, "step": 35868 }, { "epoch": 0.9848709500274574, "grad_norm": 0.42621827125549316, "learning_rate": 1.0259970539643877e-05, "loss": 0.4676, "step": 35869 }, { "epoch": 0.9848984074684239, "grad_norm": 0.4464735686779022, "learning_rate": 1.0259538791964587e-05, "loss": 0.5105, "step": 35870 }, { "epoch": 0.9849258649093905, "grad_norm": 0.4546196758747101, "learning_rate": 1.025910704380117e-05, "loss": 0.5113, "step": 35871 }, { "epoch": 0.9849533223503569, "grad_norm": 0.3431130349636078, "learning_rate": 1.0258675295154438e-05, "loss": 0.4771, "step": 35872 }, { "epoch": 0.9849807797913235, "grad_norm": 0.4267568588256836, "learning_rate": 1.0258243546025197e-05, "loss": 0.4783, "step": 35873 }, { "epoch": 0.9850082372322899, "grad_norm": 0.402377724647522, "learning_rate": 1.0257811796414245e-05, "loss": 0.5318, "step": 35874 }, { "epoch": 0.9850356946732565, "grad_norm": 0.3922879099845886, "learning_rate": 1.0257380046322394e-05, "loss": 0.4415, "step": 35875 }, { "epoch": 0.9850631521142229, "grad_norm": 0.4758348762989044, "learning_rate": 1.0256948295750444e-05, "loss": 0.4104, "step": 35876 }, { "epoch": 0.9850906095551895, "grad_norm": 0.3673476576805115, "learning_rate": 1.0256516544699207e-05, "loss": 0.4512, "step": 35877 }, { "epoch": 0.985118066996156, "grad_norm": 0.36103764176368713, "learning_rate": 1.0256084793169485e-05, "loss": 0.4784, "step": 35878 }, { "epoch": 0.9851455244371224, "grad_norm": 0.4713972508907318, "learning_rate": 1.025565304116208e-05, "loss": 0.5344, "step": 35879 }, { "epoch": 0.985172981878089, "grad_norm": 0.430062472820282, "learning_rate": 1.0255221288677804e-05, "loss": 0.5182, "step": 35880 }, { "epoch": 0.9852004393190554, "grad_norm": 0.4730035662651062, "learning_rate": 1.0254789535717455e-05, "loss": 0.5348, "step": 35881 }, { "epoch": 0.985227896760022, "grad_norm": 0.3583771586418152, "learning_rate": 1.0254357782281846e-05, "loss": 0.4436, "step": 35882 }, { "epoch": 0.9852553542009884, "grad_norm": 0.3776409327983856, "learning_rate": 1.0253926028371776e-05, "loss": 0.4663, "step": 35883 }, { "epoch": 0.985282811641955, "grad_norm": 0.3553338348865509, "learning_rate": 1.0253494273988051e-05, "loss": 0.4745, "step": 35884 }, { "epoch": 0.9853102690829215, "grad_norm": 0.39739060401916504, "learning_rate": 1.0253062519131482e-05, "loss": 0.4404, "step": 35885 }, { "epoch": 0.985337726523888, "grad_norm": 0.39368781447410583, "learning_rate": 1.0252630763802871e-05, "loss": 0.4405, "step": 35886 }, { "epoch": 0.9853651839648545, "grad_norm": 0.46305498480796814, "learning_rate": 1.025219900800302e-05, "loss": 0.5276, "step": 35887 }, { "epoch": 0.985392641405821, "grad_norm": 0.37801069021224976, "learning_rate": 1.025176725173274e-05, "loss": 0.4145, "step": 35888 }, { "epoch": 0.9854200988467875, "grad_norm": 0.4386086165904999, "learning_rate": 1.025133549499283e-05, "loss": 0.4771, "step": 35889 }, { "epoch": 0.9854475562877539, "grad_norm": 0.429776132106781, "learning_rate": 1.0250903737784103e-05, "loss": 0.5094, "step": 35890 }, { "epoch": 0.9854750137287205, "grad_norm": 0.4200189411640167, "learning_rate": 1.0250471980107361e-05, "loss": 0.4858, "step": 35891 }, { "epoch": 0.985502471169687, "grad_norm": 0.37305933237075806, "learning_rate": 1.0250040221963406e-05, "loss": 0.4428, "step": 35892 }, { "epoch": 0.9855299286106535, "grad_norm": 0.37603890895843506, "learning_rate": 1.0249608463353048e-05, "loss": 0.3943, "step": 35893 }, { "epoch": 0.98555738605162, "grad_norm": 0.41333645582199097, "learning_rate": 1.0249176704277088e-05, "loss": 0.5184, "step": 35894 }, { "epoch": 0.9855848434925865, "grad_norm": 0.3858993649482727, "learning_rate": 1.0248744944736335e-05, "loss": 0.5299, "step": 35895 }, { "epoch": 0.985612300933553, "grad_norm": 0.41077783703804016, "learning_rate": 1.0248313184731596e-05, "loss": 0.4569, "step": 35896 }, { "epoch": 0.9856397583745194, "grad_norm": 0.4076900780200958, "learning_rate": 1.024788142426367e-05, "loss": 0.467, "step": 35897 }, { "epoch": 0.985667215815486, "grad_norm": 0.40137913823127747, "learning_rate": 1.0247449663333368e-05, "loss": 0.517, "step": 35898 }, { "epoch": 0.9856946732564525, "grad_norm": 0.3849411606788635, "learning_rate": 1.0247017901941492e-05, "loss": 0.4351, "step": 35899 }, { "epoch": 0.985722130697419, "grad_norm": 0.3792320787906647, "learning_rate": 1.024658614008885e-05, "loss": 0.456, "step": 35900 }, { "epoch": 0.9857495881383855, "grad_norm": 0.4087684452533722, "learning_rate": 1.0246154377776247e-05, "loss": 0.4733, "step": 35901 }, { "epoch": 0.985777045579352, "grad_norm": 0.39333704113960266, "learning_rate": 1.0245722615004485e-05, "loss": 0.4731, "step": 35902 }, { "epoch": 0.9858045030203185, "grad_norm": 0.3806110918521881, "learning_rate": 1.0245290851774374e-05, "loss": 0.4651, "step": 35903 }, { "epoch": 0.985831960461285, "grad_norm": 0.3945881724357605, "learning_rate": 1.0244859088086719e-05, "loss": 0.4748, "step": 35904 }, { "epoch": 0.9858594179022515, "grad_norm": 0.47852659225463867, "learning_rate": 1.0244427323942318e-05, "loss": 0.4901, "step": 35905 }, { "epoch": 0.9858868753432181, "grad_norm": 0.4202409088611603, "learning_rate": 1.0243995559341986e-05, "loss": 0.4864, "step": 35906 }, { "epoch": 0.9859143327841845, "grad_norm": 0.439973384141922, "learning_rate": 1.0243563794286524e-05, "loss": 0.4348, "step": 35907 }, { "epoch": 0.985941790225151, "grad_norm": 0.39073365926742554, "learning_rate": 1.0243132028776736e-05, "loss": 0.5319, "step": 35908 }, { "epoch": 0.9859692476661175, "grad_norm": 0.43287861347198486, "learning_rate": 1.0242700262813432e-05, "loss": 0.502, "step": 35909 }, { "epoch": 0.985996705107084, "grad_norm": 0.4187278747558594, "learning_rate": 1.0242268496397412e-05, "loss": 0.4923, "step": 35910 }, { "epoch": 0.9860241625480505, "grad_norm": 0.3760363757610321, "learning_rate": 1.0241836729529487e-05, "loss": 0.4299, "step": 35911 }, { "epoch": 0.986051619989017, "grad_norm": 0.41000309586524963, "learning_rate": 1.0241404962210455e-05, "loss": 0.4652, "step": 35912 }, { "epoch": 0.9860790774299836, "grad_norm": 0.38554006814956665, "learning_rate": 1.0240973194441131e-05, "loss": 0.4703, "step": 35913 }, { "epoch": 0.98610653487095, "grad_norm": 0.3600612282752991, "learning_rate": 1.0240541426222312e-05, "loss": 0.4266, "step": 35914 }, { "epoch": 0.9861339923119166, "grad_norm": 0.46548303961753845, "learning_rate": 1.0240109657554804e-05, "loss": 0.5273, "step": 35915 }, { "epoch": 0.986161449752883, "grad_norm": 0.4601188898086548, "learning_rate": 1.023967788843942e-05, "loss": 0.5782, "step": 35916 }, { "epoch": 0.9861889071938496, "grad_norm": 0.41570940613746643, "learning_rate": 1.0239246118876957e-05, "loss": 0.4601, "step": 35917 }, { "epoch": 0.986216364634816, "grad_norm": 0.4140207767486572, "learning_rate": 1.0238814348868225e-05, "loss": 0.4929, "step": 35918 }, { "epoch": 0.9862438220757825, "grad_norm": 0.3649998605251312, "learning_rate": 1.0238382578414028e-05, "loss": 0.4412, "step": 35919 }, { "epoch": 0.9862712795167491, "grad_norm": 0.3667534291744232, "learning_rate": 1.023795080751517e-05, "loss": 0.4639, "step": 35920 }, { "epoch": 0.9862987369577155, "grad_norm": 0.41861391067504883, "learning_rate": 1.0237519036172459e-05, "loss": 0.5043, "step": 35921 }, { "epoch": 0.9863261943986821, "grad_norm": 0.3923702538013458, "learning_rate": 1.0237087264386698e-05, "loss": 0.5248, "step": 35922 }, { "epoch": 0.9863536518396485, "grad_norm": 0.39076361060142517, "learning_rate": 1.0236655492158694e-05, "loss": 0.5654, "step": 35923 }, { "epoch": 0.9863811092806151, "grad_norm": 0.3658798635005951, "learning_rate": 1.0236223719489254e-05, "loss": 0.5016, "step": 35924 }, { "epoch": 0.9864085667215815, "grad_norm": 0.37370914220809937, "learning_rate": 1.0235791946379177e-05, "loss": 0.5301, "step": 35925 }, { "epoch": 0.986436024162548, "grad_norm": 0.41162198781967163, "learning_rate": 1.0235360172829276e-05, "loss": 0.4456, "step": 35926 }, { "epoch": 0.9864634816035146, "grad_norm": 0.35184410214424133, "learning_rate": 1.0234928398840353e-05, "loss": 0.3996, "step": 35927 }, { "epoch": 0.986490939044481, "grad_norm": 0.41203826665878296, "learning_rate": 1.0234496624413213e-05, "loss": 0.4319, "step": 35928 }, { "epoch": 0.9865183964854476, "grad_norm": 0.38572144508361816, "learning_rate": 1.0234064849548665e-05, "loss": 0.4465, "step": 35929 }, { "epoch": 0.986545853926414, "grad_norm": 0.3804638981819153, "learning_rate": 1.0233633074247505e-05, "loss": 0.4923, "step": 35930 }, { "epoch": 0.9865733113673806, "grad_norm": 0.3943641781806946, "learning_rate": 1.0233201298510548e-05, "loss": 0.5037, "step": 35931 }, { "epoch": 0.986600768808347, "grad_norm": 0.399070680141449, "learning_rate": 1.02327695223386e-05, "loss": 0.5261, "step": 35932 }, { "epoch": 0.9866282262493136, "grad_norm": 0.38374871015548706, "learning_rate": 1.0232337745732457e-05, "loss": 0.3803, "step": 35933 }, { "epoch": 0.9866556836902801, "grad_norm": 0.3979797661304474, "learning_rate": 1.0231905968692933e-05, "loss": 0.4631, "step": 35934 }, { "epoch": 0.9866831411312466, "grad_norm": 0.3888411223888397, "learning_rate": 1.023147419122083e-05, "loss": 0.4597, "step": 35935 }, { "epoch": 0.9867105985722131, "grad_norm": 0.41087114810943604, "learning_rate": 1.0231042413316953e-05, "loss": 0.4773, "step": 35936 }, { "epoch": 0.9867380560131795, "grad_norm": 0.46334725618362427, "learning_rate": 1.0230610634982107e-05, "loss": 0.5158, "step": 35937 }, { "epoch": 0.9867655134541461, "grad_norm": 0.3888024091720581, "learning_rate": 1.0230178856217099e-05, "loss": 0.3919, "step": 35938 }, { "epoch": 0.9867929708951125, "grad_norm": 0.4669295847415924, "learning_rate": 1.0229747077022734e-05, "loss": 0.3931, "step": 35939 }, { "epoch": 0.9868204283360791, "grad_norm": 0.3998545706272125, "learning_rate": 1.022931529739982e-05, "loss": 0.4804, "step": 35940 }, { "epoch": 0.9868478857770456, "grad_norm": 0.3952127695083618, "learning_rate": 1.0228883517349155e-05, "loss": 0.5434, "step": 35941 }, { "epoch": 0.9868753432180121, "grad_norm": 0.3617648780345917, "learning_rate": 1.0228451736871554e-05, "loss": 0.4526, "step": 35942 }, { "epoch": 0.9869028006589786, "grad_norm": 0.40099674463272095, "learning_rate": 1.0228019955967815e-05, "loss": 0.5058, "step": 35943 }, { "epoch": 0.9869302580999451, "grad_norm": 0.44349610805511475, "learning_rate": 1.0227588174638746e-05, "loss": 0.4988, "step": 35944 }, { "epoch": 0.9869577155409116, "grad_norm": 0.3487011790275574, "learning_rate": 1.0227156392885154e-05, "loss": 0.5074, "step": 35945 }, { "epoch": 0.986985172981878, "grad_norm": 0.40472981333732605, "learning_rate": 1.0226724610707842e-05, "loss": 0.4751, "step": 35946 }, { "epoch": 0.9870126304228446, "grad_norm": 0.3828586935997009, "learning_rate": 1.0226292828107616e-05, "loss": 0.4945, "step": 35947 }, { "epoch": 0.9870400878638111, "grad_norm": 0.4858611822128296, "learning_rate": 1.0225861045085282e-05, "loss": 0.4764, "step": 35948 }, { "epoch": 0.9870675453047776, "grad_norm": 0.362066388130188, "learning_rate": 1.0225429261641643e-05, "loss": 0.409, "step": 35949 }, { "epoch": 0.9870950027457441, "grad_norm": 0.43630310893058777, "learning_rate": 1.0224997477777509e-05, "loss": 0.507, "step": 35950 }, { "epoch": 0.9871224601867106, "grad_norm": 0.4106808602809906, "learning_rate": 1.0224565693493682e-05, "loss": 0.5008, "step": 35951 }, { "epoch": 0.9871499176276771, "grad_norm": 0.3993169665336609, "learning_rate": 1.022413390879097e-05, "loss": 0.4903, "step": 35952 }, { "epoch": 0.9871773750686436, "grad_norm": 0.4855341911315918, "learning_rate": 1.0223702123670177e-05, "loss": 0.4535, "step": 35953 }, { "epoch": 0.9872048325096101, "grad_norm": 0.401519238948822, "learning_rate": 1.0223270338132102e-05, "loss": 0.5815, "step": 35954 }, { "epoch": 0.9872322899505767, "grad_norm": 0.46157002449035645, "learning_rate": 1.0222838552177563e-05, "loss": 0.4753, "step": 35955 }, { "epoch": 0.9872597473915431, "grad_norm": 0.3718825876712799, "learning_rate": 1.0222406765807355e-05, "loss": 0.5589, "step": 35956 }, { "epoch": 0.9872872048325096, "grad_norm": 0.41220787167549133, "learning_rate": 1.0221974979022292e-05, "loss": 0.4417, "step": 35957 }, { "epoch": 0.9873146622734761, "grad_norm": 0.42461729049682617, "learning_rate": 1.0221543191823174e-05, "loss": 0.4668, "step": 35958 }, { "epoch": 0.9873421197144426, "grad_norm": 0.4331369996070862, "learning_rate": 1.0221111404210802e-05, "loss": 0.4885, "step": 35959 }, { "epoch": 0.9873695771554091, "grad_norm": 0.396995484828949, "learning_rate": 1.022067961618599e-05, "loss": 0.4971, "step": 35960 }, { "epoch": 0.9873970345963756, "grad_norm": 0.39297986030578613, "learning_rate": 1.022024782774954e-05, "loss": 0.383, "step": 35961 }, { "epoch": 0.9874244920373422, "grad_norm": 0.38801243901252747, "learning_rate": 1.0219816038902259e-05, "loss": 0.5059, "step": 35962 }, { "epoch": 0.9874519494783086, "grad_norm": 0.3468981087207794, "learning_rate": 1.021938424964495e-05, "loss": 0.4669, "step": 35963 }, { "epoch": 0.9874794069192752, "grad_norm": 0.3361624777317047, "learning_rate": 1.0218952459978417e-05, "loss": 0.4266, "step": 35964 }, { "epoch": 0.9875068643602416, "grad_norm": 0.39264172315597534, "learning_rate": 1.021852066990347e-05, "loss": 0.5182, "step": 35965 }, { "epoch": 0.9875343218012081, "grad_norm": 0.45283371210098267, "learning_rate": 1.0218088879420914e-05, "loss": 0.5123, "step": 35966 }, { "epoch": 0.9875617792421746, "grad_norm": 0.39580830931663513, "learning_rate": 1.021765708853155e-05, "loss": 0.4768, "step": 35967 }, { "epoch": 0.9875892366831411, "grad_norm": 0.43125712871551514, "learning_rate": 1.0217225297236187e-05, "loss": 0.5307, "step": 35968 }, { "epoch": 0.9876166941241077, "grad_norm": 0.3786207437515259, "learning_rate": 1.0216793505535627e-05, "loss": 0.4942, "step": 35969 }, { "epoch": 0.9876441515650741, "grad_norm": 0.41749677062034607, "learning_rate": 1.021636171343068e-05, "loss": 0.5383, "step": 35970 }, { "epoch": 0.9876716090060407, "grad_norm": 0.3435963988304138, "learning_rate": 1.0215929920922151e-05, "loss": 0.3971, "step": 35971 }, { "epoch": 0.9876990664470071, "grad_norm": 0.42613476514816284, "learning_rate": 1.0215498128010841e-05, "loss": 0.5299, "step": 35972 }, { "epoch": 0.9877265238879737, "grad_norm": 0.3777581751346588, "learning_rate": 1.0215066334697561e-05, "loss": 0.4164, "step": 35973 }, { "epoch": 0.9877539813289401, "grad_norm": 0.3604992628097534, "learning_rate": 1.0214634540983111e-05, "loss": 0.4832, "step": 35974 }, { "epoch": 0.9877814387699067, "grad_norm": 0.46952196955680847, "learning_rate": 1.0214202746868301e-05, "loss": 0.5319, "step": 35975 }, { "epoch": 0.9878088962108732, "grad_norm": 0.3638250529766083, "learning_rate": 1.0213770952353934e-05, "loss": 0.468, "step": 35976 }, { "epoch": 0.9878363536518396, "grad_norm": 0.4293268918991089, "learning_rate": 1.0213339157440813e-05, "loss": 0.4913, "step": 35977 }, { "epoch": 0.9878638110928062, "grad_norm": 0.353071391582489, "learning_rate": 1.0212907362129752e-05, "loss": 0.4686, "step": 35978 }, { "epoch": 0.9878912685337726, "grad_norm": 0.5254124402999878, "learning_rate": 1.0212475566421544e-05, "loss": 0.4155, "step": 35979 }, { "epoch": 0.9879187259747392, "grad_norm": 0.44358932971954346, "learning_rate": 1.0212043770317006e-05, "loss": 0.4711, "step": 35980 }, { "epoch": 0.9879461834157056, "grad_norm": 0.3561739921569824, "learning_rate": 1.021161197381694e-05, "loss": 0.4329, "step": 35981 }, { "epoch": 0.9879736408566722, "grad_norm": 0.368450790643692, "learning_rate": 1.0211180176922147e-05, "loss": 0.4202, "step": 35982 }, { "epoch": 0.9880010982976387, "grad_norm": 0.35711756348609924, "learning_rate": 1.0210748379633435e-05, "loss": 0.5028, "step": 35983 }, { "epoch": 0.9880285557386052, "grad_norm": 0.38752081990242004, "learning_rate": 1.0210316581951613e-05, "loss": 0.4793, "step": 35984 }, { "epoch": 0.9880560131795717, "grad_norm": 0.4879627525806427, "learning_rate": 1.0209884783877481e-05, "loss": 0.4908, "step": 35985 }, { "epoch": 0.9880834706205381, "grad_norm": 0.4289221167564392, "learning_rate": 1.0209452985411847e-05, "loss": 0.4995, "step": 35986 }, { "epoch": 0.9881109280615047, "grad_norm": 0.3656303882598877, "learning_rate": 1.0209021186555516e-05, "loss": 0.423, "step": 35987 }, { "epoch": 0.9881383855024711, "grad_norm": 0.38101592659950256, "learning_rate": 1.0208589387309295e-05, "loss": 0.41, "step": 35988 }, { "epoch": 0.9881658429434377, "grad_norm": 0.39417845010757446, "learning_rate": 1.020815758767399e-05, "loss": 0.5106, "step": 35989 }, { "epoch": 0.9881933003844042, "grad_norm": 0.39943158626556396, "learning_rate": 1.0207725787650402e-05, "loss": 0.446, "step": 35990 }, { "epoch": 0.9882207578253707, "grad_norm": 0.4329339861869812, "learning_rate": 1.020729398723934e-05, "loss": 0.4945, "step": 35991 }, { "epoch": 0.9882482152663372, "grad_norm": 0.45335114002227783, "learning_rate": 1.0206862186441608e-05, "loss": 0.5441, "step": 35992 }, { "epoch": 0.9882756727073037, "grad_norm": 0.36752524971961975, "learning_rate": 1.0206430385258013e-05, "loss": 0.4126, "step": 35993 }, { "epoch": 0.9883031301482702, "grad_norm": 0.41001251339912415, "learning_rate": 1.020599858368936e-05, "loss": 0.4513, "step": 35994 }, { "epoch": 0.9883305875892366, "grad_norm": 0.42791658639907837, "learning_rate": 1.0205566781736451e-05, "loss": 0.4485, "step": 35995 }, { "epoch": 0.9883580450302032, "grad_norm": 0.42211830615997314, "learning_rate": 1.0205134979400097e-05, "loss": 0.4363, "step": 35996 }, { "epoch": 0.9883855024711697, "grad_norm": 0.43387001752853394, "learning_rate": 1.0204703176681099e-05, "loss": 0.4416, "step": 35997 }, { "epoch": 0.9884129599121362, "grad_norm": 0.42837923765182495, "learning_rate": 1.0204271373580265e-05, "loss": 0.5469, "step": 35998 }, { "epoch": 0.9884404173531027, "grad_norm": 0.40141788125038147, "learning_rate": 1.0203839570098402e-05, "loss": 0.4596, "step": 35999 }, { "epoch": 0.9884678747940692, "grad_norm": 0.38804134726524353, "learning_rate": 1.0203407766236308e-05, "loss": 0.4534, "step": 36000 }, { "epoch": 0.9884953322350357, "grad_norm": 0.4501250088214874, "learning_rate": 1.0202975961994799e-05, "loss": 0.5149, "step": 36001 }, { "epoch": 0.9885227896760022, "grad_norm": 0.45680081844329834, "learning_rate": 1.0202544157374669e-05, "loss": 0.4822, "step": 36002 }, { "epoch": 0.9885502471169687, "grad_norm": 0.40002503991127014, "learning_rate": 1.0202112352376734e-05, "loss": 0.5048, "step": 36003 }, { "epoch": 0.9885777045579353, "grad_norm": 0.3943614363670349, "learning_rate": 1.0201680547001795e-05, "loss": 0.4682, "step": 36004 }, { "epoch": 0.9886051619989017, "grad_norm": 0.3875051736831665, "learning_rate": 1.0201248741250656e-05, "loss": 0.4312, "step": 36005 }, { "epoch": 0.9886326194398682, "grad_norm": 0.4051513969898224, "learning_rate": 1.0200816935124124e-05, "loss": 0.5095, "step": 36006 }, { "epoch": 0.9886600768808347, "grad_norm": 0.3891288638114929, "learning_rate": 1.0200385128623005e-05, "loss": 0.4834, "step": 36007 }, { "epoch": 0.9886875343218012, "grad_norm": 0.42494919896125793, "learning_rate": 1.0199953321748105e-05, "loss": 0.5856, "step": 36008 }, { "epoch": 0.9887149917627677, "grad_norm": 0.40577074885368347, "learning_rate": 1.0199521514500223e-05, "loss": 0.5108, "step": 36009 }, { "epoch": 0.9887424492037342, "grad_norm": 0.3763245642185211, "learning_rate": 1.0199089706880174e-05, "loss": 0.4856, "step": 36010 }, { "epoch": 0.9887699066447008, "grad_norm": 0.37040647864341736, "learning_rate": 1.019865789888876e-05, "loss": 0.4615, "step": 36011 }, { "epoch": 0.9887973640856672, "grad_norm": 0.4971982538700104, "learning_rate": 1.0198226090526785e-05, "loss": 0.4984, "step": 36012 }, { "epoch": 0.9888248215266338, "grad_norm": 0.3937513828277588, "learning_rate": 1.0197794281795052e-05, "loss": 0.538, "step": 36013 }, { "epoch": 0.9888522789676002, "grad_norm": 0.5317143201828003, "learning_rate": 1.0197362472694372e-05, "loss": 0.6008, "step": 36014 }, { "epoch": 0.9888797364085667, "grad_norm": 0.4030527174472809, "learning_rate": 1.0196930663225548e-05, "loss": 0.5253, "step": 36015 }, { "epoch": 0.9889071938495332, "grad_norm": 0.46623173356056213, "learning_rate": 1.0196498853389387e-05, "loss": 0.6136, "step": 36016 }, { "epoch": 0.9889346512904997, "grad_norm": 0.448650985956192, "learning_rate": 1.0196067043186688e-05, "loss": 0.5188, "step": 36017 }, { "epoch": 0.9889621087314663, "grad_norm": 0.3829805552959442, "learning_rate": 1.0195635232618266e-05, "loss": 0.4285, "step": 36018 }, { "epoch": 0.9889895661724327, "grad_norm": 0.40322497487068176, "learning_rate": 1.019520342168492e-05, "loss": 0.3968, "step": 36019 }, { "epoch": 0.9890170236133993, "grad_norm": 0.4361368715763092, "learning_rate": 1.0194771610387458e-05, "loss": 0.4896, "step": 36020 }, { "epoch": 0.9890444810543657, "grad_norm": 0.43198761343955994, "learning_rate": 1.0194339798726685e-05, "loss": 0.5252, "step": 36021 }, { "epoch": 0.9890719384953323, "grad_norm": 0.40842097997665405, "learning_rate": 1.0193907986703405e-05, "loss": 0.5189, "step": 36022 }, { "epoch": 0.9890993959362987, "grad_norm": 0.43640220165252686, "learning_rate": 1.0193476174318426e-05, "loss": 0.505, "step": 36023 }, { "epoch": 0.9891268533772652, "grad_norm": 0.4063473045825958, "learning_rate": 1.019304436157255e-05, "loss": 0.4753, "step": 36024 }, { "epoch": 0.9891543108182317, "grad_norm": 0.49594804644584656, "learning_rate": 1.0192612548466587e-05, "loss": 0.514, "step": 36025 }, { "epoch": 0.9891817682591982, "grad_norm": 0.4195566177368164, "learning_rate": 1.0192180735001338e-05, "loss": 0.5445, "step": 36026 }, { "epoch": 0.9892092257001648, "grad_norm": 0.4353272616863251, "learning_rate": 1.0191748921177613e-05, "loss": 0.5492, "step": 36027 }, { "epoch": 0.9892366831411312, "grad_norm": 0.40935418009757996, "learning_rate": 1.0191317106996214e-05, "loss": 0.5512, "step": 36028 }, { "epoch": 0.9892641405820978, "grad_norm": 0.43198245763778687, "learning_rate": 1.0190885292457949e-05, "loss": 0.5127, "step": 36029 }, { "epoch": 0.9892915980230642, "grad_norm": 0.44679170846939087, "learning_rate": 1.019045347756362e-05, "loss": 0.483, "step": 36030 }, { "epoch": 0.9893190554640308, "grad_norm": 0.4192979037761688, "learning_rate": 1.0190021662314033e-05, "loss": 0.4273, "step": 36031 }, { "epoch": 0.9893465129049972, "grad_norm": 0.36598774790763855, "learning_rate": 1.0189589846709997e-05, "loss": 0.4903, "step": 36032 }, { "epoch": 0.9893739703459637, "grad_norm": 0.41282570362091064, "learning_rate": 1.0189158030752318e-05, "loss": 0.5171, "step": 36033 }, { "epoch": 0.9894014277869303, "grad_norm": 0.3772784471511841, "learning_rate": 1.0188726214441795e-05, "loss": 0.5392, "step": 36034 }, { "epoch": 0.9894288852278967, "grad_norm": 0.4146813452243805, "learning_rate": 1.018829439777924e-05, "loss": 0.4822, "step": 36035 }, { "epoch": 0.9894563426688633, "grad_norm": 0.3918597102165222, "learning_rate": 1.0187862580765452e-05, "loss": 0.5157, "step": 36036 }, { "epoch": 0.9894838001098297, "grad_norm": 0.4126170873641968, "learning_rate": 1.0187430763401245e-05, "loss": 0.4771, "step": 36037 }, { "epoch": 0.9895112575507963, "grad_norm": 0.4012683928012848, "learning_rate": 1.0186998945687417e-05, "loss": 0.5143, "step": 36038 }, { "epoch": 0.9895387149917627, "grad_norm": 0.39591649174690247, "learning_rate": 1.0186567127624776e-05, "loss": 0.4913, "step": 36039 }, { "epoch": 0.9895661724327293, "grad_norm": 0.40440794825553894, "learning_rate": 1.0186135309214129e-05, "loss": 0.4801, "step": 36040 }, { "epoch": 0.9895936298736958, "grad_norm": 0.39094114303588867, "learning_rate": 1.0185703490456281e-05, "loss": 0.6031, "step": 36041 }, { "epoch": 0.9896210873146623, "grad_norm": 0.42453572154045105, "learning_rate": 1.0185271671352035e-05, "loss": 0.5239, "step": 36042 }, { "epoch": 0.9896485447556288, "grad_norm": 0.37585943937301636, "learning_rate": 1.01848398519022e-05, "loss": 0.4269, "step": 36043 }, { "epoch": 0.9896760021965952, "grad_norm": 0.3933779299259186, "learning_rate": 1.0184408032107577e-05, "loss": 0.4831, "step": 36044 }, { "epoch": 0.9897034596375618, "grad_norm": 0.4277130663394928, "learning_rate": 1.0183976211968976e-05, "loss": 0.51, "step": 36045 }, { "epoch": 0.9897309170785282, "grad_norm": 0.37007811665534973, "learning_rate": 1.01835443914872e-05, "loss": 0.4479, "step": 36046 }, { "epoch": 0.9897583745194948, "grad_norm": 0.4101639688014984, "learning_rate": 1.0183112570663056e-05, "loss": 0.4381, "step": 36047 }, { "epoch": 0.9897858319604613, "grad_norm": 0.33831095695495605, "learning_rate": 1.018268074949735e-05, "loss": 0.4583, "step": 36048 }, { "epoch": 0.9898132894014278, "grad_norm": 0.5151986479759216, "learning_rate": 1.0182248927990881e-05, "loss": 0.4936, "step": 36049 }, { "epoch": 0.9898407468423943, "grad_norm": 0.41054007411003113, "learning_rate": 1.0181817106144465e-05, "loss": 0.5571, "step": 36050 }, { "epoch": 0.9898682042833608, "grad_norm": 0.39487960934638977, "learning_rate": 1.01813852839589e-05, "loss": 0.4481, "step": 36051 }, { "epoch": 0.9898956617243273, "grad_norm": 0.3563891351222992, "learning_rate": 1.0180953461434992e-05, "loss": 0.416, "step": 36052 }, { "epoch": 0.9899231191652937, "grad_norm": 0.39248061180114746, "learning_rate": 1.018052163857355e-05, "loss": 0.5432, "step": 36053 }, { "epoch": 0.9899505766062603, "grad_norm": 0.43844056129455566, "learning_rate": 1.0180089815375376e-05, "loss": 0.5391, "step": 36054 }, { "epoch": 0.9899780340472268, "grad_norm": 0.42017874121665955, "learning_rate": 1.0179657991841279e-05, "loss": 0.5749, "step": 36055 }, { "epoch": 0.9900054914881933, "grad_norm": 0.45329874753952026, "learning_rate": 1.0179226167972063e-05, "loss": 0.4759, "step": 36056 }, { "epoch": 0.9900329489291598, "grad_norm": 0.43620213866233826, "learning_rate": 1.017879434376853e-05, "loss": 0.4586, "step": 36057 }, { "epoch": 0.9900604063701263, "grad_norm": 0.4587477445602417, "learning_rate": 1.017836251923149e-05, "loss": 0.4763, "step": 36058 }, { "epoch": 0.9900878638110928, "grad_norm": 0.9359285831451416, "learning_rate": 1.0177930694361748e-05, "loss": 0.5253, "step": 36059 }, { "epoch": 0.9901153212520593, "grad_norm": 0.3340023458003998, "learning_rate": 1.0177498869160106e-05, "loss": 0.379, "step": 36060 }, { "epoch": 0.9901427786930258, "grad_norm": 0.4080970287322998, "learning_rate": 1.0177067043627375e-05, "loss": 0.4721, "step": 36061 }, { "epoch": 0.9901702361339924, "grad_norm": 0.3415207266807556, "learning_rate": 1.0176635217764355e-05, "loss": 0.4366, "step": 36062 }, { "epoch": 0.9901976935749588, "grad_norm": 0.4239104390144348, "learning_rate": 1.0176203391571855e-05, "loss": 0.5726, "step": 36063 }, { "epoch": 0.9902251510159253, "grad_norm": 0.40568044781684875, "learning_rate": 1.017577156505068e-05, "loss": 0.4791, "step": 36064 }, { "epoch": 0.9902526084568918, "grad_norm": 0.49049901962280273, "learning_rate": 1.0175339738201633e-05, "loss": 0.5048, "step": 36065 }, { "epoch": 0.9902800658978583, "grad_norm": 0.4074021875858307, "learning_rate": 1.0174907911025523e-05, "loss": 0.4839, "step": 36066 }, { "epoch": 0.9903075233388248, "grad_norm": 0.37671926617622375, "learning_rate": 1.0174476083523152e-05, "loss": 0.4702, "step": 36067 }, { "epoch": 0.9903349807797913, "grad_norm": 0.4358291029930115, "learning_rate": 1.017404425569533e-05, "loss": 0.5155, "step": 36068 }, { "epoch": 0.9903624382207579, "grad_norm": 0.38325536251068115, "learning_rate": 1.017361242754286e-05, "loss": 0.3921, "step": 36069 }, { "epoch": 0.9903898956617243, "grad_norm": 0.4048998951911926, "learning_rate": 1.0173180599066544e-05, "loss": 0.416, "step": 36070 }, { "epoch": 0.9904173531026909, "grad_norm": 0.4578258991241455, "learning_rate": 1.0172748770267193e-05, "loss": 0.5334, "step": 36071 }, { "epoch": 0.9904448105436573, "grad_norm": 0.3807884454727173, "learning_rate": 1.017231694114561e-05, "loss": 0.4225, "step": 36072 }, { "epoch": 0.9904722679846238, "grad_norm": 0.39607977867126465, "learning_rate": 1.0171885111702604e-05, "loss": 0.4918, "step": 36073 }, { "epoch": 0.9904997254255903, "grad_norm": 0.41109538078308105, "learning_rate": 1.0171453281938974e-05, "loss": 0.5076, "step": 36074 }, { "epoch": 0.9905271828665568, "grad_norm": 0.370983749628067, "learning_rate": 1.017102145185553e-05, "loss": 0.4498, "step": 36075 }, { "epoch": 0.9905546403075234, "grad_norm": 0.35659804940223694, "learning_rate": 1.0170589621453075e-05, "loss": 0.4799, "step": 36076 }, { "epoch": 0.9905820977484898, "grad_norm": 0.36830225586891174, "learning_rate": 1.017015779073242e-05, "loss": 0.4212, "step": 36077 }, { "epoch": 0.9906095551894564, "grad_norm": 0.416180819272995, "learning_rate": 1.016972595969436e-05, "loss": 0.5222, "step": 36078 }, { "epoch": 0.9906370126304228, "grad_norm": 0.4396577477455139, "learning_rate": 1.0169294128339713e-05, "loss": 0.5227, "step": 36079 }, { "epoch": 0.9906644700713894, "grad_norm": 0.4329797029495239, "learning_rate": 1.0168862296669274e-05, "loss": 0.5099, "step": 36080 }, { "epoch": 0.9906919275123558, "grad_norm": 0.4380686283111572, "learning_rate": 1.0168430464683857e-05, "loss": 0.5503, "step": 36081 }, { "epoch": 0.9907193849533223, "grad_norm": 0.3878006935119629, "learning_rate": 1.0167998632384262e-05, "loss": 0.4948, "step": 36082 }, { "epoch": 0.9907468423942889, "grad_norm": 0.4229143261909485, "learning_rate": 1.0167566799771293e-05, "loss": 0.4606, "step": 36083 }, { "epoch": 0.9907742998352553, "grad_norm": 0.43099620938301086, "learning_rate": 1.0167134966845762e-05, "loss": 0.5353, "step": 36084 }, { "epoch": 0.9908017572762219, "grad_norm": 0.7011162042617798, "learning_rate": 1.0166703133608467e-05, "loss": 0.5187, "step": 36085 }, { "epoch": 0.9908292147171883, "grad_norm": 0.36763209104537964, "learning_rate": 1.0166271300060222e-05, "loss": 0.4524, "step": 36086 }, { "epoch": 0.9908566721581549, "grad_norm": 0.42112892866134644, "learning_rate": 1.0165839466201825e-05, "loss": 0.5149, "step": 36087 }, { "epoch": 0.9908841295991213, "grad_norm": 0.34965500235557556, "learning_rate": 1.0165407632034086e-05, "loss": 0.3704, "step": 36088 }, { "epoch": 0.9909115870400879, "grad_norm": 0.38446009159088135, "learning_rate": 1.0164975797557807e-05, "loss": 0.4725, "step": 36089 }, { "epoch": 0.9909390444810544, "grad_norm": 0.4125695824623108, "learning_rate": 1.01645439627738e-05, "loss": 0.4815, "step": 36090 }, { "epoch": 0.9909665019220208, "grad_norm": 0.397901326417923, "learning_rate": 1.0164112127682862e-05, "loss": 0.4501, "step": 36091 }, { "epoch": 0.9909939593629874, "grad_norm": 0.3927576541900635, "learning_rate": 1.0163680292285802e-05, "loss": 0.4966, "step": 36092 }, { "epoch": 0.9910214168039538, "grad_norm": 0.36749890446662903, "learning_rate": 1.0163248456583426e-05, "loss": 0.4357, "step": 36093 }, { "epoch": 0.9910488742449204, "grad_norm": 0.40724971890449524, "learning_rate": 1.0162816620576543e-05, "loss": 0.4818, "step": 36094 }, { "epoch": 0.9910763316858868, "grad_norm": 0.4261913001537323, "learning_rate": 1.0162384784265953e-05, "loss": 0.5018, "step": 36095 }, { "epoch": 0.9911037891268534, "grad_norm": 0.41237130761146545, "learning_rate": 1.0161952947652463e-05, "loss": 0.5238, "step": 36096 }, { "epoch": 0.9911312465678199, "grad_norm": 0.43863511085510254, "learning_rate": 1.0161521110736881e-05, "loss": 0.4836, "step": 36097 }, { "epoch": 0.9911587040087864, "grad_norm": 0.39005979895591736, "learning_rate": 1.0161089273520007e-05, "loss": 0.5476, "step": 36098 }, { "epoch": 0.9911861614497529, "grad_norm": 0.39152541756629944, "learning_rate": 1.0160657436002652e-05, "loss": 0.501, "step": 36099 }, { "epoch": 0.9912136188907194, "grad_norm": 0.38891759514808655, "learning_rate": 1.0160225598185621e-05, "loss": 0.4682, "step": 36100 }, { "epoch": 0.9912410763316859, "grad_norm": 0.33485615253448486, "learning_rate": 1.0159793760069716e-05, "loss": 0.3817, "step": 36101 }, { "epoch": 0.9912685337726523, "grad_norm": 0.4379116892814636, "learning_rate": 1.0159361921655747e-05, "loss": 0.5356, "step": 36102 }, { "epoch": 0.9912959912136189, "grad_norm": 0.36743149161338806, "learning_rate": 1.0158930082944514e-05, "loss": 0.4416, "step": 36103 }, { "epoch": 0.9913234486545854, "grad_norm": 0.39001327753067017, "learning_rate": 1.0158498243936828e-05, "loss": 0.4378, "step": 36104 }, { "epoch": 0.9913509060955519, "grad_norm": 0.36384493112564087, "learning_rate": 1.0158066404633492e-05, "loss": 0.494, "step": 36105 }, { "epoch": 0.9913783635365184, "grad_norm": 0.39939558506011963, "learning_rate": 1.015763456503531e-05, "loss": 0.4873, "step": 36106 }, { "epoch": 0.9914058209774849, "grad_norm": 0.40751340985298157, "learning_rate": 1.0157202725143092e-05, "loss": 0.5455, "step": 36107 }, { "epoch": 0.9914332784184514, "grad_norm": 0.4201103448867798, "learning_rate": 1.0156770884957638e-05, "loss": 0.4509, "step": 36108 }, { "epoch": 0.9914607358594179, "grad_norm": 0.4375629723072052, "learning_rate": 1.0156339044479756e-05, "loss": 0.5482, "step": 36109 }, { "epoch": 0.9914881933003844, "grad_norm": 0.35609275102615356, "learning_rate": 1.0155907203710256e-05, "loss": 0.42, "step": 36110 }, { "epoch": 0.991515650741351, "grad_norm": 0.4169420003890991, "learning_rate": 1.0155475362649936e-05, "loss": 0.548, "step": 36111 }, { "epoch": 0.9915431081823174, "grad_norm": 0.3904082179069519, "learning_rate": 1.0155043521299605e-05, "loss": 0.4788, "step": 36112 }, { "epoch": 0.9915705656232839, "grad_norm": 0.39562007784843445, "learning_rate": 1.015461167966007e-05, "loss": 0.524, "step": 36113 }, { "epoch": 0.9915980230642504, "grad_norm": 0.42130163311958313, "learning_rate": 1.0154179837732132e-05, "loss": 0.4606, "step": 36114 }, { "epoch": 0.9916254805052169, "grad_norm": 0.4702787697315216, "learning_rate": 1.0153747995516602e-05, "loss": 0.486, "step": 36115 }, { "epoch": 0.9916529379461834, "grad_norm": 0.41987180709838867, "learning_rate": 1.0153316153014281e-05, "loss": 0.5096, "step": 36116 }, { "epoch": 0.9916803953871499, "grad_norm": 0.39047518372535706, "learning_rate": 1.0152884310225978e-05, "loss": 0.5243, "step": 36117 }, { "epoch": 0.9917078528281165, "grad_norm": 0.3860674202442169, "learning_rate": 1.0152452467152498e-05, "loss": 0.4906, "step": 36118 }, { "epoch": 0.9917353102690829, "grad_norm": 0.38789063692092896, "learning_rate": 1.0152020623794643e-05, "loss": 0.4972, "step": 36119 }, { "epoch": 0.9917627677100495, "grad_norm": 0.4610002338886261, "learning_rate": 1.0151588780153221e-05, "loss": 0.5421, "step": 36120 }, { "epoch": 0.9917902251510159, "grad_norm": 0.3975159525871277, "learning_rate": 1.015115693622904e-05, "loss": 0.4954, "step": 36121 }, { "epoch": 0.9918176825919824, "grad_norm": 0.4000074565410614, "learning_rate": 1.0150725092022899e-05, "loss": 0.477, "step": 36122 }, { "epoch": 0.9918451400329489, "grad_norm": 0.3887966275215149, "learning_rate": 1.0150293247535612e-05, "loss": 0.4871, "step": 36123 }, { "epoch": 0.9918725974739154, "grad_norm": 0.797031819820404, "learning_rate": 1.0149861402767977e-05, "loss": 0.4577, "step": 36124 }, { "epoch": 0.991900054914882, "grad_norm": 0.38122567534446716, "learning_rate": 1.0149429557720805e-05, "loss": 0.4537, "step": 36125 }, { "epoch": 0.9919275123558484, "grad_norm": 0.5100933313369751, "learning_rate": 1.01489977123949e-05, "loss": 0.5788, "step": 36126 }, { "epoch": 0.991954969796815, "grad_norm": 0.42949825525283813, "learning_rate": 1.0148565866791062e-05, "loss": 0.4646, "step": 36127 }, { "epoch": 0.9919824272377814, "grad_norm": 0.43114349246025085, "learning_rate": 1.0148134020910106e-05, "loss": 0.5511, "step": 36128 }, { "epoch": 0.992009884678748, "grad_norm": 0.43313583731651306, "learning_rate": 1.014770217475283e-05, "loss": 0.4891, "step": 36129 }, { "epoch": 0.9920373421197144, "grad_norm": 0.3908849358558655, "learning_rate": 1.0147270328320043e-05, "loss": 0.4475, "step": 36130 }, { "epoch": 0.9920647995606809, "grad_norm": 0.3966287672519684, "learning_rate": 1.014683848161255e-05, "loss": 0.5204, "step": 36131 }, { "epoch": 0.9920922570016475, "grad_norm": 0.5394250750541687, "learning_rate": 1.0146406634631155e-05, "loss": 0.5024, "step": 36132 }, { "epoch": 0.9921197144426139, "grad_norm": 0.3619430959224701, "learning_rate": 1.0145974787376667e-05, "loss": 0.4722, "step": 36133 }, { "epoch": 0.9921471718835805, "grad_norm": 0.4126207232475281, "learning_rate": 1.0145542939849885e-05, "loss": 0.496, "step": 36134 }, { "epoch": 0.9921746293245469, "grad_norm": 0.3828662931919098, "learning_rate": 1.0145111092051625e-05, "loss": 0.482, "step": 36135 }, { "epoch": 0.9922020867655135, "grad_norm": 0.3600865602493286, "learning_rate": 1.0144679243982684e-05, "loss": 0.484, "step": 36136 }, { "epoch": 0.9922295442064799, "grad_norm": 0.4285546541213989, "learning_rate": 1.014424739564387e-05, "loss": 0.5035, "step": 36137 }, { "epoch": 0.9922570016474465, "grad_norm": 0.47009748220443726, "learning_rate": 1.0143815547035989e-05, "loss": 0.4958, "step": 36138 }, { "epoch": 0.992284459088413, "grad_norm": 0.45229417085647583, "learning_rate": 1.0143383698159848e-05, "loss": 0.4866, "step": 36139 }, { "epoch": 0.9923119165293794, "grad_norm": 0.4015055000782013, "learning_rate": 1.0142951849016246e-05, "loss": 0.5419, "step": 36140 }, { "epoch": 0.992339373970346, "grad_norm": 0.3707212209701538, "learning_rate": 1.0142519999605997e-05, "loss": 0.4325, "step": 36141 }, { "epoch": 0.9923668314113124, "grad_norm": 0.3791162073612213, "learning_rate": 1.01420881499299e-05, "loss": 0.3845, "step": 36142 }, { "epoch": 0.992394288852279, "grad_norm": 0.36106806993484497, "learning_rate": 1.0141656299988764e-05, "loss": 0.3895, "step": 36143 }, { "epoch": 0.9924217462932454, "grad_norm": 0.4328170120716095, "learning_rate": 1.0141224449783395e-05, "loss": 0.5701, "step": 36144 }, { "epoch": 0.992449203734212, "grad_norm": 0.3614431321620941, "learning_rate": 1.0140792599314597e-05, "loss": 0.4342, "step": 36145 }, { "epoch": 0.9924766611751785, "grad_norm": 0.42663243412971497, "learning_rate": 1.0140360748583176e-05, "loss": 0.543, "step": 36146 }, { "epoch": 0.992504118616145, "grad_norm": 0.42506906390190125, "learning_rate": 1.0139928897589937e-05, "loss": 0.6689, "step": 36147 }, { "epoch": 0.9925315760571115, "grad_norm": 0.41736626625061035, "learning_rate": 1.0139497046335687e-05, "loss": 0.4992, "step": 36148 }, { "epoch": 0.992559033498078, "grad_norm": 0.3969409465789795, "learning_rate": 1.0139065194821229e-05, "loss": 0.4601, "step": 36149 }, { "epoch": 0.9925864909390445, "grad_norm": 0.3834052085876465, "learning_rate": 1.0138633343047368e-05, "loss": 0.518, "step": 36150 }, { "epoch": 0.9926139483800109, "grad_norm": 0.36072593927383423, "learning_rate": 1.0138201491014916e-05, "loss": 0.4995, "step": 36151 }, { "epoch": 0.9926414058209775, "grad_norm": 0.7352675199508667, "learning_rate": 1.0137769638724672e-05, "loss": 0.5274, "step": 36152 }, { "epoch": 0.992668863261944, "grad_norm": 0.4256822168827057, "learning_rate": 1.0137337786177443e-05, "loss": 0.4725, "step": 36153 }, { "epoch": 0.9926963207029105, "grad_norm": 0.4190169870853424, "learning_rate": 1.0136905933374038e-05, "loss": 0.4936, "step": 36154 }, { "epoch": 0.992723778143877, "grad_norm": 0.3602798581123352, "learning_rate": 1.0136474080315255e-05, "loss": 0.4094, "step": 36155 }, { "epoch": 0.9927512355848435, "grad_norm": 0.4666096568107605, "learning_rate": 1.0136042227001908e-05, "loss": 0.5055, "step": 36156 }, { "epoch": 0.99277869302581, "grad_norm": 0.3989529311656952, "learning_rate": 1.01356103734348e-05, "loss": 0.5336, "step": 36157 }, { "epoch": 0.9928061504667764, "grad_norm": 0.3854342997074127, "learning_rate": 1.013517851961473e-05, "loss": 0.4392, "step": 36158 }, { "epoch": 0.992833607907743, "grad_norm": 0.46753984689712524, "learning_rate": 1.0134746665542513e-05, "loss": 0.5098, "step": 36159 }, { "epoch": 0.9928610653487095, "grad_norm": 0.43145301938056946, "learning_rate": 1.0134314811218948e-05, "loss": 0.4731, "step": 36160 }, { "epoch": 0.992888522789676, "grad_norm": 0.3265478312969208, "learning_rate": 1.0133882956644847e-05, "loss": 0.3979, "step": 36161 }, { "epoch": 0.9929159802306425, "grad_norm": 0.4319513440132141, "learning_rate": 1.013345110182101e-05, "loss": 0.5442, "step": 36162 }, { "epoch": 0.992943437671609, "grad_norm": 0.4263421297073364, "learning_rate": 1.0133019246748241e-05, "loss": 0.4466, "step": 36163 }, { "epoch": 0.9929708951125755, "grad_norm": 0.32617318630218506, "learning_rate": 1.0132587391427352e-05, "loss": 0.3597, "step": 36164 }, { "epoch": 0.992998352553542, "grad_norm": 0.37918949127197266, "learning_rate": 1.0132155535859142e-05, "loss": 0.4141, "step": 36165 }, { "epoch": 0.9930258099945085, "grad_norm": 0.4149651527404785, "learning_rate": 1.0131723680044423e-05, "loss": 0.5364, "step": 36166 }, { "epoch": 0.9930532674354751, "grad_norm": 0.39559900760650635, "learning_rate": 1.0131291823983998e-05, "loss": 0.4716, "step": 36167 }, { "epoch": 0.9930807248764415, "grad_norm": 0.537512481212616, "learning_rate": 1.0130859967678668e-05, "loss": 0.4492, "step": 36168 }, { "epoch": 0.993108182317408, "grad_norm": 0.47782811522483826, "learning_rate": 1.0130428111129244e-05, "loss": 0.4964, "step": 36169 }, { "epoch": 0.9931356397583745, "grad_norm": 0.4243136942386627, "learning_rate": 1.0129996254336532e-05, "loss": 0.6227, "step": 36170 }, { "epoch": 0.993163097199341, "grad_norm": 0.4034324884414673, "learning_rate": 1.0129564397301333e-05, "loss": 0.4822, "step": 36171 }, { "epoch": 0.9931905546403075, "grad_norm": 0.4176284372806549, "learning_rate": 1.0129132540024456e-05, "loss": 0.4898, "step": 36172 }, { "epoch": 0.993218012081274, "grad_norm": 0.33006346225738525, "learning_rate": 1.0128700682506704e-05, "loss": 0.402, "step": 36173 }, { "epoch": 0.9932454695222406, "grad_norm": 0.4284411668777466, "learning_rate": 1.0128268824748886e-05, "loss": 0.4758, "step": 36174 }, { "epoch": 0.993272926963207, "grad_norm": 0.46221107244491577, "learning_rate": 1.0127836966751808e-05, "loss": 0.5044, "step": 36175 }, { "epoch": 0.9933003844041736, "grad_norm": 0.4254124164581299, "learning_rate": 1.0127405108516268e-05, "loss": 0.4615, "step": 36176 }, { "epoch": 0.99332784184514, "grad_norm": 0.36295586824417114, "learning_rate": 1.012697325004308e-05, "loss": 0.4409, "step": 36177 }, { "epoch": 0.9933552992861066, "grad_norm": 0.42843642830848694, "learning_rate": 1.0126541391333044e-05, "loss": 0.456, "step": 36178 }, { "epoch": 0.993382756727073, "grad_norm": 0.41248202323913574, "learning_rate": 1.0126109532386971e-05, "loss": 0.5275, "step": 36179 }, { "epoch": 0.9934102141680395, "grad_norm": 0.3455396890640259, "learning_rate": 1.0125677673205663e-05, "loss": 0.4798, "step": 36180 }, { "epoch": 0.9934376716090061, "grad_norm": 0.36558184027671814, "learning_rate": 1.0125245813789924e-05, "loss": 0.5049, "step": 36181 }, { "epoch": 0.9934651290499725, "grad_norm": 0.3933556079864502, "learning_rate": 1.0124813954140564e-05, "loss": 0.4592, "step": 36182 }, { "epoch": 0.9934925864909391, "grad_norm": 0.44083645939826965, "learning_rate": 1.0124382094258382e-05, "loss": 0.5139, "step": 36183 }, { "epoch": 0.9935200439319055, "grad_norm": 0.45462659001350403, "learning_rate": 1.0123950234144193e-05, "loss": 0.4965, "step": 36184 }, { "epoch": 0.9935475013728721, "grad_norm": 0.39680859446525574, "learning_rate": 1.0123518373798795e-05, "loss": 0.4676, "step": 36185 }, { "epoch": 0.9935749588138385, "grad_norm": 0.4053981304168701, "learning_rate": 1.0123086513222995e-05, "loss": 0.5513, "step": 36186 }, { "epoch": 0.993602416254805, "grad_norm": 0.4204159677028656, "learning_rate": 1.01226546524176e-05, "loss": 0.4387, "step": 36187 }, { "epoch": 0.9936298736957716, "grad_norm": 0.38572728633880615, "learning_rate": 1.0122222791383414e-05, "loss": 0.4709, "step": 36188 }, { "epoch": 0.993657331136738, "grad_norm": 0.37752124667167664, "learning_rate": 1.0121790930121244e-05, "loss": 0.4311, "step": 36189 }, { "epoch": 0.9936847885777046, "grad_norm": 0.3952014148235321, "learning_rate": 1.0121359068631895e-05, "loss": 0.4829, "step": 36190 }, { "epoch": 0.993712246018671, "grad_norm": 0.4968498945236206, "learning_rate": 1.0120927206916173e-05, "loss": 0.5262, "step": 36191 }, { "epoch": 0.9937397034596376, "grad_norm": 0.3493383824825287, "learning_rate": 1.0120495344974885e-05, "loss": 0.4727, "step": 36192 }, { "epoch": 0.993767160900604, "grad_norm": 0.39503079652786255, "learning_rate": 1.0120063482808831e-05, "loss": 0.4455, "step": 36193 }, { "epoch": 0.9937946183415706, "grad_norm": 0.3911696970462799, "learning_rate": 1.0119631620418823e-05, "loss": 0.4877, "step": 36194 }, { "epoch": 0.9938220757825371, "grad_norm": 0.38590800762176514, "learning_rate": 1.0119199757805662e-05, "loss": 0.4623, "step": 36195 }, { "epoch": 0.9938495332235036, "grad_norm": 0.4091864824295044, "learning_rate": 1.0118767894970159e-05, "loss": 0.5051, "step": 36196 }, { "epoch": 0.9938769906644701, "grad_norm": 0.3638581335544586, "learning_rate": 1.0118336031913113e-05, "loss": 0.4463, "step": 36197 }, { "epoch": 0.9939044481054365, "grad_norm": 0.40671268105506897, "learning_rate": 1.0117904168635331e-05, "loss": 0.5925, "step": 36198 }, { "epoch": 0.9939319055464031, "grad_norm": 0.38568761944770813, "learning_rate": 1.0117472305137623e-05, "loss": 0.4989, "step": 36199 }, { "epoch": 0.9939593629873695, "grad_norm": 0.434415340423584, "learning_rate": 1.011704044142079e-05, "loss": 0.4667, "step": 36200 }, { "epoch": 0.9939868204283361, "grad_norm": 0.5220248699188232, "learning_rate": 1.0116608577485641e-05, "loss": 0.455, "step": 36201 }, { "epoch": 0.9940142778693026, "grad_norm": 0.6947186589241028, "learning_rate": 1.0116176713332977e-05, "loss": 0.5103, "step": 36202 }, { "epoch": 0.9940417353102691, "grad_norm": 0.4247843325138092, "learning_rate": 1.0115744848963609e-05, "loss": 0.4795, "step": 36203 }, { "epoch": 0.9940691927512356, "grad_norm": 0.4232196807861328, "learning_rate": 1.0115312984378341e-05, "loss": 0.5186, "step": 36204 }, { "epoch": 0.9940966501922021, "grad_norm": 0.41762575507164, "learning_rate": 1.0114881119577972e-05, "loss": 0.5122, "step": 36205 }, { "epoch": 0.9941241076331686, "grad_norm": 0.4278082251548767, "learning_rate": 1.011444925456332e-05, "loss": 0.483, "step": 36206 }, { "epoch": 0.994151565074135, "grad_norm": 0.3635543882846832, "learning_rate": 1.0114017389335177e-05, "loss": 0.5355, "step": 36207 }, { "epoch": 0.9941790225151016, "grad_norm": 0.406921923160553, "learning_rate": 1.0113585523894359e-05, "loss": 0.5077, "step": 36208 }, { "epoch": 0.9942064799560681, "grad_norm": 0.44388648867607117, "learning_rate": 1.0113153658241668e-05, "loss": 0.4214, "step": 36209 }, { "epoch": 0.9942339373970346, "grad_norm": 0.3634577691555023, "learning_rate": 1.0112721792377907e-05, "loss": 0.4434, "step": 36210 }, { "epoch": 0.9942613948380011, "grad_norm": 0.37547120451927185, "learning_rate": 1.0112289926303887e-05, "loss": 0.4175, "step": 36211 }, { "epoch": 0.9942888522789676, "grad_norm": 0.3991980254650116, "learning_rate": 1.0111858060020408e-05, "loss": 0.5866, "step": 36212 }, { "epoch": 0.9943163097199341, "grad_norm": 0.3588138520717621, "learning_rate": 1.0111426193528277e-05, "loss": 0.4717, "step": 36213 }, { "epoch": 0.9943437671609006, "grad_norm": 0.40688592195510864, "learning_rate": 1.0110994326828305e-05, "loss": 0.5251, "step": 36214 }, { "epoch": 0.9943712246018671, "grad_norm": 0.40234455466270447, "learning_rate": 1.011056245992129e-05, "loss": 0.4943, "step": 36215 }, { "epoch": 0.9943986820428337, "grad_norm": 0.3514452874660492, "learning_rate": 1.0110130592808041e-05, "loss": 0.4255, "step": 36216 }, { "epoch": 0.9944261394838001, "grad_norm": 0.4270048141479492, "learning_rate": 1.0109698725489361e-05, "loss": 0.4849, "step": 36217 }, { "epoch": 0.9944535969247666, "grad_norm": 0.40148279070854187, "learning_rate": 1.0109266857966063e-05, "loss": 0.5599, "step": 36218 }, { "epoch": 0.9944810543657331, "grad_norm": 0.41165629029273987, "learning_rate": 1.0108834990238944e-05, "loss": 0.5061, "step": 36219 }, { "epoch": 0.9945085118066996, "grad_norm": 0.4055713415145874, "learning_rate": 1.0108403122308813e-05, "loss": 0.4016, "step": 36220 }, { "epoch": 0.9945359692476661, "grad_norm": 0.4241214394569397, "learning_rate": 1.0107971254176476e-05, "loss": 0.4553, "step": 36221 }, { "epoch": 0.9945634266886326, "grad_norm": 0.4091799259185791, "learning_rate": 1.0107539385842738e-05, "loss": 0.5131, "step": 36222 }, { "epoch": 0.9945908841295992, "grad_norm": 0.4368007779121399, "learning_rate": 1.0107107517308406e-05, "loss": 0.5082, "step": 36223 }, { "epoch": 0.9946183415705656, "grad_norm": 0.3887348473072052, "learning_rate": 1.0106675648574284e-05, "loss": 0.4956, "step": 36224 }, { "epoch": 0.9946457990115322, "grad_norm": 0.4033813178539276, "learning_rate": 1.0106243779641176e-05, "loss": 0.5412, "step": 36225 }, { "epoch": 0.9946732564524986, "grad_norm": 0.4595888555049896, "learning_rate": 1.0105811910509894e-05, "loss": 0.3616, "step": 36226 }, { "epoch": 0.9947007138934651, "grad_norm": 0.34764382243156433, "learning_rate": 1.0105380041181233e-05, "loss": 0.4592, "step": 36227 }, { "epoch": 0.9947281713344316, "grad_norm": 0.421186238527298, "learning_rate": 1.0104948171656007e-05, "loss": 0.55, "step": 36228 }, { "epoch": 0.9947556287753981, "grad_norm": 0.4288618564605713, "learning_rate": 1.0104516301935021e-05, "loss": 0.5172, "step": 36229 }, { "epoch": 0.9947830862163647, "grad_norm": 0.3651333749294281, "learning_rate": 1.0104084432019077e-05, "loss": 0.4927, "step": 36230 }, { "epoch": 0.9948105436573311, "grad_norm": 0.4012386202812195, "learning_rate": 1.0103652561908984e-05, "loss": 0.4281, "step": 36231 }, { "epoch": 0.9948380010982977, "grad_norm": 0.4299789071083069, "learning_rate": 1.0103220691605544e-05, "loss": 0.5351, "step": 36232 }, { "epoch": 0.9948654585392641, "grad_norm": 0.36181849241256714, "learning_rate": 1.0102788821109563e-05, "loss": 0.4879, "step": 36233 }, { "epoch": 0.9948929159802307, "grad_norm": 0.3969278633594513, "learning_rate": 1.0102356950421853e-05, "loss": 0.4628, "step": 36234 }, { "epoch": 0.9949203734211971, "grad_norm": 0.44546040892601013, "learning_rate": 1.010192507954321e-05, "loss": 0.4661, "step": 36235 }, { "epoch": 0.9949478308621637, "grad_norm": 0.438734769821167, "learning_rate": 1.0101493208474447e-05, "loss": 0.5115, "step": 36236 }, { "epoch": 0.9949752883031302, "grad_norm": 0.40464189648628235, "learning_rate": 1.0101061337216367e-05, "loss": 0.4514, "step": 36237 }, { "epoch": 0.9950027457440966, "grad_norm": 0.4120367169380188, "learning_rate": 1.010062946576977e-05, "loss": 0.4527, "step": 36238 }, { "epoch": 0.9950302031850632, "grad_norm": 0.39846712350845337, "learning_rate": 1.0100197594135473e-05, "loss": 0.455, "step": 36239 }, { "epoch": 0.9950576606260296, "grad_norm": 0.3654295802116394, "learning_rate": 1.0099765722314271e-05, "loss": 0.5512, "step": 36240 }, { "epoch": 0.9950851180669962, "grad_norm": 0.3982309401035309, "learning_rate": 1.0099333850306979e-05, "loss": 0.5415, "step": 36241 }, { "epoch": 0.9951125755079626, "grad_norm": 0.40375617146492004, "learning_rate": 1.0098901978114395e-05, "loss": 0.445, "step": 36242 }, { "epoch": 0.9951400329489292, "grad_norm": 0.44609084725379944, "learning_rate": 1.0098470105737326e-05, "loss": 0.5173, "step": 36243 }, { "epoch": 0.9951674903898957, "grad_norm": 0.3651413023471832, "learning_rate": 1.009803823317658e-05, "loss": 0.4068, "step": 36244 }, { "epoch": 0.9951949478308622, "grad_norm": 0.40080156922340393, "learning_rate": 1.0097606360432966e-05, "loss": 0.465, "step": 36245 }, { "epoch": 0.9952224052718287, "grad_norm": 0.39775973558425903, "learning_rate": 1.0097174487507278e-05, "loss": 0.4779, "step": 36246 }, { "epoch": 0.9952498627127951, "grad_norm": 0.39224958419799805, "learning_rate": 1.0096742614400332e-05, "loss": 0.5699, "step": 36247 }, { "epoch": 0.9952773201537617, "grad_norm": 0.40632638335227966, "learning_rate": 1.0096310741112927e-05, "loss": 0.4647, "step": 36248 }, { "epoch": 0.9953047775947281, "grad_norm": 0.4188957214355469, "learning_rate": 1.0095878867645876e-05, "loss": 0.4871, "step": 36249 }, { "epoch": 0.9953322350356947, "grad_norm": 0.40434014797210693, "learning_rate": 1.009544699399998e-05, "loss": 0.5657, "step": 36250 }, { "epoch": 0.9953596924766612, "grad_norm": 0.37849530577659607, "learning_rate": 1.0095015120176042e-05, "loss": 0.5226, "step": 36251 }, { "epoch": 0.9953871499176277, "grad_norm": 0.454843133687973, "learning_rate": 1.0094583246174871e-05, "loss": 0.5727, "step": 36252 }, { "epoch": 0.9954146073585942, "grad_norm": 0.4583251178264618, "learning_rate": 1.0094151371997272e-05, "loss": 0.5807, "step": 36253 }, { "epoch": 0.9954420647995607, "grad_norm": 0.4191831648349762, "learning_rate": 1.0093719497644052e-05, "loss": 0.5221, "step": 36254 }, { "epoch": 0.9954695222405272, "grad_norm": 0.38216009736061096, "learning_rate": 1.0093287623116017e-05, "loss": 0.4617, "step": 36255 }, { "epoch": 0.9954969796814936, "grad_norm": 0.45879700779914856, "learning_rate": 1.0092855748413965e-05, "loss": 0.4463, "step": 36256 }, { "epoch": 0.9955244371224602, "grad_norm": 0.40815335512161255, "learning_rate": 1.0092423873538712e-05, "loss": 0.4191, "step": 36257 }, { "epoch": 0.9955518945634267, "grad_norm": 0.4069272577762604, "learning_rate": 1.0091991998491057e-05, "loss": 0.4581, "step": 36258 }, { "epoch": 0.9955793520043932, "grad_norm": 0.3833152949810028, "learning_rate": 1.009156012327181e-05, "loss": 0.483, "step": 36259 }, { "epoch": 0.9956068094453597, "grad_norm": 0.47492069005966187, "learning_rate": 1.0091128247881773e-05, "loss": 0.4847, "step": 36260 }, { "epoch": 0.9956342668863262, "grad_norm": 0.41485142707824707, "learning_rate": 1.009069637232175e-05, "loss": 0.5146, "step": 36261 }, { "epoch": 0.9956617243272927, "grad_norm": 0.40766990184783936, "learning_rate": 1.0090264496592554e-05, "loss": 0.4598, "step": 36262 }, { "epoch": 0.9956891817682592, "grad_norm": 0.32176411151885986, "learning_rate": 1.0089832620694983e-05, "loss": 0.4094, "step": 36263 }, { "epoch": 0.9957166392092257, "grad_norm": 0.42441073060035706, "learning_rate": 1.0089400744629844e-05, "loss": 0.4891, "step": 36264 }, { "epoch": 0.9957440966501923, "grad_norm": 0.38113123178482056, "learning_rate": 1.0088968868397948e-05, "loss": 0.465, "step": 36265 }, { "epoch": 0.9957715540911587, "grad_norm": 0.4230332672595978, "learning_rate": 1.0088536992000094e-05, "loss": 0.4708, "step": 36266 }, { "epoch": 0.9957990115321252, "grad_norm": 0.4343395233154297, "learning_rate": 1.0088105115437092e-05, "loss": 0.4956, "step": 36267 }, { "epoch": 0.9958264689730917, "grad_norm": 0.4333374798297882, "learning_rate": 1.0087673238709746e-05, "loss": 0.4784, "step": 36268 }, { "epoch": 0.9958539264140582, "grad_norm": 0.45585885643959045, "learning_rate": 1.0087241361818857e-05, "loss": 0.5911, "step": 36269 }, { "epoch": 0.9958813838550247, "grad_norm": 0.4042471945285797, "learning_rate": 1.008680948476524e-05, "loss": 0.506, "step": 36270 }, { "epoch": 0.9959088412959912, "grad_norm": 0.41347214579582214, "learning_rate": 1.0086377607549693e-05, "loss": 0.4123, "step": 36271 }, { "epoch": 0.9959362987369578, "grad_norm": 0.414064884185791, "learning_rate": 1.0085945730173025e-05, "loss": 0.4407, "step": 36272 }, { "epoch": 0.9959637561779242, "grad_norm": 0.438445508480072, "learning_rate": 1.0085513852636044e-05, "loss": 0.4554, "step": 36273 }, { "epoch": 0.9959912136188908, "grad_norm": 0.3808782994747162, "learning_rate": 1.0085081974939548e-05, "loss": 0.457, "step": 36274 }, { "epoch": 0.9960186710598572, "grad_norm": 0.40578538179397583, "learning_rate": 1.0084650097084348e-05, "loss": 0.4288, "step": 36275 }, { "epoch": 0.9960461285008237, "grad_norm": 0.40050065517425537, "learning_rate": 1.0084218219071248e-05, "loss": 0.5175, "step": 36276 }, { "epoch": 0.9960735859417902, "grad_norm": 0.3736494183540344, "learning_rate": 1.0083786340901056e-05, "loss": 0.5295, "step": 36277 }, { "epoch": 0.9961010433827567, "grad_norm": 0.38550063967704773, "learning_rate": 1.0083354462574576e-05, "loss": 0.4673, "step": 36278 }, { "epoch": 0.9961285008237233, "grad_norm": 0.3951812982559204, "learning_rate": 1.0082922584092609e-05, "loss": 0.4422, "step": 36279 }, { "epoch": 0.9961559582646897, "grad_norm": 0.3639346659183502, "learning_rate": 1.0082490705455969e-05, "loss": 0.4215, "step": 36280 }, { "epoch": 0.9961834157056563, "grad_norm": 0.5479905605316162, "learning_rate": 1.0082058826665457e-05, "loss": 0.5565, "step": 36281 }, { "epoch": 0.9962108731466227, "grad_norm": 0.3765161633491516, "learning_rate": 1.008162694772188e-05, "loss": 0.4785, "step": 36282 }, { "epoch": 0.9962383305875893, "grad_norm": 0.37133052945137024, "learning_rate": 1.0081195068626039e-05, "loss": 0.4974, "step": 36283 }, { "epoch": 0.9962657880285557, "grad_norm": 0.40959665179252625, "learning_rate": 1.0080763189378744e-05, "loss": 0.4587, "step": 36284 }, { "epoch": 0.9962932454695222, "grad_norm": 0.4250530004501343, "learning_rate": 1.0080331309980801e-05, "loss": 0.4794, "step": 36285 }, { "epoch": 0.9963207029104888, "grad_norm": 0.38587844371795654, "learning_rate": 1.0079899430433018e-05, "loss": 0.4384, "step": 36286 }, { "epoch": 0.9963481603514552, "grad_norm": 0.4024626612663269, "learning_rate": 1.0079467550736193e-05, "loss": 0.5182, "step": 36287 }, { "epoch": 0.9963756177924218, "grad_norm": 0.40284988284111023, "learning_rate": 1.0079035670891137e-05, "loss": 0.4743, "step": 36288 }, { "epoch": 0.9964030752333882, "grad_norm": 0.3785414397716522, "learning_rate": 1.0078603790898651e-05, "loss": 0.4408, "step": 36289 }, { "epoch": 0.9964305326743548, "grad_norm": 0.34609872102737427, "learning_rate": 1.0078171910759549e-05, "loss": 0.3987, "step": 36290 }, { "epoch": 0.9964579901153212, "grad_norm": 0.38752779364585876, "learning_rate": 1.007774003047463e-05, "loss": 0.4381, "step": 36291 }, { "epoch": 0.9964854475562878, "grad_norm": 0.42407986521720886, "learning_rate": 1.0077308150044698e-05, "loss": 0.5102, "step": 36292 }, { "epoch": 0.9965129049972542, "grad_norm": 0.3780902922153473, "learning_rate": 1.0076876269470565e-05, "loss": 0.4051, "step": 36293 }, { "epoch": 0.9965403624382208, "grad_norm": 0.31430232524871826, "learning_rate": 1.0076444388753033e-05, "loss": 0.4139, "step": 36294 }, { "epoch": 0.9965678198791873, "grad_norm": 0.3889758288860321, "learning_rate": 1.0076012507892904e-05, "loss": 0.5418, "step": 36295 }, { "epoch": 0.9965952773201537, "grad_norm": 0.38234633207321167, "learning_rate": 1.0075580626890993e-05, "loss": 0.4656, "step": 36296 }, { "epoch": 0.9966227347611203, "grad_norm": 0.5597728490829468, "learning_rate": 1.0075148745748096e-05, "loss": 0.4174, "step": 36297 }, { "epoch": 0.9966501922020867, "grad_norm": 0.4654043912887573, "learning_rate": 1.0074716864465023e-05, "loss": 0.5544, "step": 36298 }, { "epoch": 0.9966776496430533, "grad_norm": 0.3833499848842621, "learning_rate": 1.0074284983042582e-05, "loss": 0.4713, "step": 36299 }, { "epoch": 0.9967051070840197, "grad_norm": 0.4033399224281311, "learning_rate": 1.0073853101481572e-05, "loss": 0.4941, "step": 36300 }, { "epoch": 0.9967325645249863, "grad_norm": 0.4356842637062073, "learning_rate": 1.0073421219782803e-05, "loss": 0.5201, "step": 36301 }, { "epoch": 0.9967600219659528, "grad_norm": 0.3896835148334503, "learning_rate": 1.007298933794708e-05, "loss": 0.4783, "step": 36302 }, { "epoch": 0.9967874794069193, "grad_norm": 0.5673554539680481, "learning_rate": 1.0072557455975211e-05, "loss": 0.4994, "step": 36303 }, { "epoch": 0.9968149368478858, "grad_norm": 0.484427809715271, "learning_rate": 1.0072125573868e-05, "loss": 0.5901, "step": 36304 }, { "epoch": 0.9968423942888522, "grad_norm": 0.36587032675743103, "learning_rate": 1.0071693691626247e-05, "loss": 0.4668, "step": 36305 }, { "epoch": 0.9968698517298188, "grad_norm": 0.5702548027038574, "learning_rate": 1.0071261809250768e-05, "loss": 0.4935, "step": 36306 }, { "epoch": 0.9968973091707852, "grad_norm": 0.4154992401599884, "learning_rate": 1.0070829926742359e-05, "loss": 0.4088, "step": 36307 }, { "epoch": 0.9969247666117518, "grad_norm": 0.37238091230392456, "learning_rate": 1.007039804410183e-05, "loss": 0.5457, "step": 36308 }, { "epoch": 0.9969522240527183, "grad_norm": 0.3797541558742523, "learning_rate": 1.0069966161329988e-05, "loss": 0.519, "step": 36309 }, { "epoch": 0.9969796814936848, "grad_norm": 0.3842678368091583, "learning_rate": 1.0069534278427635e-05, "loss": 0.4812, "step": 36310 }, { "epoch": 0.9970071389346513, "grad_norm": 0.34366562962532043, "learning_rate": 1.006910239539558e-05, "loss": 0.4585, "step": 36311 }, { "epoch": 0.9970345963756178, "grad_norm": 0.4334462583065033, "learning_rate": 1.0068670512234627e-05, "loss": 0.5304, "step": 36312 }, { "epoch": 0.9970620538165843, "grad_norm": 0.4129803478717804, "learning_rate": 1.0068238628945579e-05, "loss": 0.5031, "step": 36313 }, { "epoch": 0.9970895112575507, "grad_norm": 0.40476194024086, "learning_rate": 1.0067806745529245e-05, "loss": 0.5306, "step": 36314 }, { "epoch": 0.9971169686985173, "grad_norm": 0.41483527421951294, "learning_rate": 1.006737486198643e-05, "loss": 0.5017, "step": 36315 }, { "epoch": 0.9971444261394838, "grad_norm": 0.4459932744503021, "learning_rate": 1.0066942978317941e-05, "loss": 0.5264, "step": 36316 }, { "epoch": 0.9971718835804503, "grad_norm": 0.37012356519699097, "learning_rate": 1.0066511094524582e-05, "loss": 0.4977, "step": 36317 }, { "epoch": 0.9971993410214168, "grad_norm": 0.5116080045700073, "learning_rate": 1.0066079210607156e-05, "loss": 0.4271, "step": 36318 }, { "epoch": 0.9972267984623833, "grad_norm": 0.35533830523490906, "learning_rate": 1.0065647326566473e-05, "loss": 0.5219, "step": 36319 }, { "epoch": 0.9972542559033498, "grad_norm": 0.4461471736431122, "learning_rate": 1.0065215442403336e-05, "loss": 0.5806, "step": 36320 }, { "epoch": 0.9972817133443163, "grad_norm": 0.4027935266494751, "learning_rate": 1.0064783558118551e-05, "loss": 0.5011, "step": 36321 }, { "epoch": 0.9973091707852828, "grad_norm": 0.32781893014907837, "learning_rate": 1.0064351673712927e-05, "loss": 0.457, "step": 36322 }, { "epoch": 0.9973366282262494, "grad_norm": 0.46176549792289734, "learning_rate": 1.0063919789187262e-05, "loss": 0.5338, "step": 36323 }, { "epoch": 0.9973640856672158, "grad_norm": 0.38303858041763306, "learning_rate": 1.006348790454237e-05, "loss": 0.4587, "step": 36324 }, { "epoch": 0.9973915431081823, "grad_norm": 0.39849427342414856, "learning_rate": 1.0063056019779051e-05, "loss": 0.4359, "step": 36325 }, { "epoch": 0.9974190005491488, "grad_norm": 0.3855247497558594, "learning_rate": 1.0062624134898113e-05, "loss": 0.52, "step": 36326 }, { "epoch": 0.9974464579901153, "grad_norm": 0.3778969645500183, "learning_rate": 1.0062192249900362e-05, "loss": 0.4372, "step": 36327 }, { "epoch": 0.9974739154310818, "grad_norm": 0.4757137596607208, "learning_rate": 1.0061760364786599e-05, "loss": 0.4953, "step": 36328 }, { "epoch": 0.9975013728720483, "grad_norm": 0.4316854178905487, "learning_rate": 1.0061328479557637e-05, "loss": 0.4554, "step": 36329 }, { "epoch": 0.9975288303130149, "grad_norm": 0.3986954391002655, "learning_rate": 1.0060896594214278e-05, "loss": 0.4779, "step": 36330 }, { "epoch": 0.9975562877539813, "grad_norm": 0.4001389741897583, "learning_rate": 1.0060464708757322e-05, "loss": 0.5237, "step": 36331 }, { "epoch": 0.9975837451949479, "grad_norm": 0.37973204255104065, "learning_rate": 1.0060032823187587e-05, "loss": 0.4825, "step": 36332 }, { "epoch": 0.9976112026359143, "grad_norm": 0.37401750683784485, "learning_rate": 1.0059600937505867e-05, "loss": 0.4802, "step": 36333 }, { "epoch": 0.9976386600768808, "grad_norm": 0.39543402194976807, "learning_rate": 1.0059169051712973e-05, "loss": 0.53, "step": 36334 }, { "epoch": 0.9976661175178473, "grad_norm": 0.44876113533973694, "learning_rate": 1.0058737165809713e-05, "loss": 0.532, "step": 36335 }, { "epoch": 0.9976935749588138, "grad_norm": 0.36361831426620483, "learning_rate": 1.0058305279796885e-05, "loss": 0.4945, "step": 36336 }, { "epoch": 0.9977210323997804, "grad_norm": 0.442708820104599, "learning_rate": 1.0057873393675301e-05, "loss": 0.4277, "step": 36337 }, { "epoch": 0.9977484898407468, "grad_norm": 0.41339465975761414, "learning_rate": 1.0057441507445765e-05, "loss": 0.4916, "step": 36338 }, { "epoch": 0.9977759472817134, "grad_norm": 0.38590776920318604, "learning_rate": 1.0057009621109082e-05, "loss": 0.4657, "step": 36339 }, { "epoch": 0.9978034047226798, "grad_norm": 0.46026915311813354, "learning_rate": 1.0056577734666058e-05, "loss": 0.4524, "step": 36340 }, { "epoch": 0.9978308621636464, "grad_norm": 0.4422514736652374, "learning_rate": 1.0056145848117498e-05, "loss": 0.3864, "step": 36341 }, { "epoch": 0.9978583196046128, "grad_norm": 0.400076687335968, "learning_rate": 1.0055713961464208e-05, "loss": 0.4821, "step": 36342 }, { "epoch": 0.9978857770455793, "grad_norm": 0.3751876950263977, "learning_rate": 1.0055282074706996e-05, "loss": 0.4784, "step": 36343 }, { "epoch": 0.9979132344865459, "grad_norm": 0.4664754271507263, "learning_rate": 1.005485018784666e-05, "loss": 0.5275, "step": 36344 }, { "epoch": 0.9979406919275123, "grad_norm": 0.44040030241012573, "learning_rate": 1.0054418300884017e-05, "loss": 0.5284, "step": 36345 }, { "epoch": 0.9979681493684789, "grad_norm": 0.38531965017318726, "learning_rate": 1.0053986413819861e-05, "loss": 0.4901, "step": 36346 }, { "epoch": 0.9979956068094453, "grad_norm": 0.5174368619918823, "learning_rate": 1.0053554526655007e-05, "loss": 0.5175, "step": 36347 }, { "epoch": 0.9980230642504119, "grad_norm": 0.41888707876205444, "learning_rate": 1.0053122639390256e-05, "loss": 0.414, "step": 36348 }, { "epoch": 0.9980505216913783, "grad_norm": 0.35921308398246765, "learning_rate": 1.0052690752026412e-05, "loss": 0.4401, "step": 36349 }, { "epoch": 0.9980779791323449, "grad_norm": 0.38348913192749023, "learning_rate": 1.0052258864564287e-05, "loss": 0.4988, "step": 36350 }, { "epoch": 0.9981054365733114, "grad_norm": 0.3870282769203186, "learning_rate": 1.005182697700468e-05, "loss": 0.4242, "step": 36351 }, { "epoch": 0.9981328940142778, "grad_norm": 0.3701067864894867, "learning_rate": 1.00513950893484e-05, "loss": 0.4714, "step": 36352 }, { "epoch": 0.9981603514552444, "grad_norm": 0.3389052748680115, "learning_rate": 1.0050963201596252e-05, "loss": 0.4141, "step": 36353 }, { "epoch": 0.9981878088962108, "grad_norm": 0.48538637161254883, "learning_rate": 1.005053131374904e-05, "loss": 0.607, "step": 36354 }, { "epoch": 0.9982152663371774, "grad_norm": 0.4162866175174713, "learning_rate": 1.0050099425807572e-05, "loss": 0.4125, "step": 36355 }, { "epoch": 0.9982427237781438, "grad_norm": 0.41572287678718567, "learning_rate": 1.0049667537772652e-05, "loss": 0.4966, "step": 36356 }, { "epoch": 0.9982701812191104, "grad_norm": 0.39643585681915283, "learning_rate": 1.0049235649645085e-05, "loss": 0.5507, "step": 36357 }, { "epoch": 0.9982976386600769, "grad_norm": 0.38855525851249695, "learning_rate": 1.0048803761425678e-05, "loss": 0.5656, "step": 36358 }, { "epoch": 0.9983250961010434, "grad_norm": 0.37052029371261597, "learning_rate": 1.0048371873115237e-05, "loss": 0.5124, "step": 36359 }, { "epoch": 0.9983525535420099, "grad_norm": 0.43298953771591187, "learning_rate": 1.0047939984714569e-05, "loss": 0.4407, "step": 36360 }, { "epoch": 0.9983800109829764, "grad_norm": 0.4165111482143402, "learning_rate": 1.0047508096224476e-05, "loss": 0.5203, "step": 36361 }, { "epoch": 0.9984074684239429, "grad_norm": 0.3698457181453705, "learning_rate": 1.0047076207645765e-05, "loss": 0.4831, "step": 36362 }, { "epoch": 0.9984349258649093, "grad_norm": 0.3959658443927765, "learning_rate": 1.0046644318979243e-05, "loss": 0.4941, "step": 36363 }, { "epoch": 0.9984623833058759, "grad_norm": 0.36237844824790955, "learning_rate": 1.0046212430225713e-05, "loss": 0.4455, "step": 36364 }, { "epoch": 0.9984898407468424, "grad_norm": 0.38887614011764526, "learning_rate": 1.0045780541385984e-05, "loss": 0.4703, "step": 36365 }, { "epoch": 0.9985172981878089, "grad_norm": 1.2514432668685913, "learning_rate": 1.0045348652460856e-05, "loss": 0.4946, "step": 36366 }, { "epoch": 0.9985447556287754, "grad_norm": 0.44012251496315, "learning_rate": 1.0044916763451143e-05, "loss": 0.5305, "step": 36367 }, { "epoch": 0.9985722130697419, "grad_norm": 0.4713853597640991, "learning_rate": 1.0044484874357644e-05, "loss": 0.6037, "step": 36368 }, { "epoch": 0.9985996705107084, "grad_norm": 0.3711102604866028, "learning_rate": 1.0044052985181163e-05, "loss": 0.4534, "step": 36369 }, { "epoch": 0.9986271279516749, "grad_norm": 0.3874654173851013, "learning_rate": 1.0043621095922513e-05, "loss": 0.5175, "step": 36370 }, { "epoch": 0.9986545853926414, "grad_norm": 0.46532925963401794, "learning_rate": 1.0043189206582495e-05, "loss": 0.4821, "step": 36371 }, { "epoch": 0.998682042833608, "grad_norm": 0.4072956144809723, "learning_rate": 1.0042757317161914e-05, "loss": 0.5053, "step": 36372 }, { "epoch": 0.9987095002745744, "grad_norm": 0.361330509185791, "learning_rate": 1.004232542766158e-05, "loss": 0.4117, "step": 36373 }, { "epoch": 0.9987369577155409, "grad_norm": 0.36337369680404663, "learning_rate": 1.0041893538082295e-05, "loss": 0.4027, "step": 36374 }, { "epoch": 0.9987644151565074, "grad_norm": 0.45273157954216003, "learning_rate": 1.0041461648424863e-05, "loss": 0.5203, "step": 36375 }, { "epoch": 0.9987918725974739, "grad_norm": 0.40028098225593567, "learning_rate": 1.0041029758690093e-05, "loss": 0.4689, "step": 36376 }, { "epoch": 0.9988193300384404, "grad_norm": 0.3817903399467468, "learning_rate": 1.004059786887879e-05, "loss": 0.4809, "step": 36377 }, { "epoch": 0.9988467874794069, "grad_norm": 0.47052136063575745, "learning_rate": 1.0040165978991758e-05, "loss": 0.4983, "step": 36378 }, { "epoch": 0.9988742449203735, "grad_norm": 0.3913911283016205, "learning_rate": 1.0039734089029803e-05, "loss": 0.4402, "step": 36379 }, { "epoch": 0.9989017023613399, "grad_norm": 0.3843168616294861, "learning_rate": 1.0039302198993733e-05, "loss": 0.4578, "step": 36380 }, { "epoch": 0.9989291598023065, "grad_norm": 0.4350765347480774, "learning_rate": 1.003887030888435e-05, "loss": 0.4994, "step": 36381 }, { "epoch": 0.9989566172432729, "grad_norm": 0.3930009603500366, "learning_rate": 1.0038438418702463e-05, "loss": 0.4973, "step": 36382 }, { "epoch": 0.9989840746842394, "grad_norm": 0.3666399121284485, "learning_rate": 1.0038006528448874e-05, "loss": 0.441, "step": 36383 }, { "epoch": 0.9990115321252059, "grad_norm": 0.3789503276348114, "learning_rate": 1.0037574638124393e-05, "loss": 0.5825, "step": 36384 }, { "epoch": 0.9990389895661724, "grad_norm": 0.44380834698677063, "learning_rate": 1.0037142747729824e-05, "loss": 0.5408, "step": 36385 }, { "epoch": 0.999066447007139, "grad_norm": 0.3423822820186615, "learning_rate": 1.0036710857265968e-05, "loss": 0.4424, "step": 36386 }, { "epoch": 0.9990939044481054, "grad_norm": 0.38936883211135864, "learning_rate": 1.003627896673364e-05, "loss": 0.476, "step": 36387 }, { "epoch": 0.999121361889072, "grad_norm": 0.47787803411483765, "learning_rate": 1.0035847076133634e-05, "loss": 0.4628, "step": 36388 }, { "epoch": 0.9991488193300384, "grad_norm": 0.41769999265670776, "learning_rate": 1.0035415185466766e-05, "loss": 0.5046, "step": 36389 }, { "epoch": 0.999176276771005, "grad_norm": 0.4646648168563843, "learning_rate": 1.0034983294733838e-05, "loss": 0.4007, "step": 36390 }, { "epoch": 0.9992037342119714, "grad_norm": 0.41069120168685913, "learning_rate": 1.003455140393565e-05, "loss": 0.4588, "step": 36391 }, { "epoch": 0.999231191652938, "grad_norm": 0.3705097436904907, "learning_rate": 1.0034119513073018e-05, "loss": 0.4533, "step": 36392 }, { "epoch": 0.9992586490939045, "grad_norm": 0.3991020917892456, "learning_rate": 1.003368762214674e-05, "loss": 0.4932, "step": 36393 }, { "epoch": 0.9992861065348709, "grad_norm": 0.48547086119651794, "learning_rate": 1.0033255731157623e-05, "loss": 0.4951, "step": 36394 }, { "epoch": 0.9993135639758375, "grad_norm": 0.3986724019050598, "learning_rate": 1.0032823840106475e-05, "loss": 0.493, "step": 36395 }, { "epoch": 0.9993410214168039, "grad_norm": 0.3785673975944519, "learning_rate": 1.0032391948994097e-05, "loss": 0.5354, "step": 36396 }, { "epoch": 0.9993684788577705, "grad_norm": 0.4470888078212738, "learning_rate": 1.0031960057821302e-05, "loss": 0.5193, "step": 36397 }, { "epoch": 0.9993959362987369, "grad_norm": 0.4191124141216278, "learning_rate": 1.0031528166588889e-05, "loss": 0.5244, "step": 36398 }, { "epoch": 0.9994233937397035, "grad_norm": 0.36923474073410034, "learning_rate": 1.0031096275297667e-05, "loss": 0.4779, "step": 36399 }, { "epoch": 0.99945085118067, "grad_norm": 0.47648778557777405, "learning_rate": 1.003066438394844e-05, "loss": 0.5157, "step": 36400 }, { "epoch": 0.9994783086216364, "grad_norm": 0.36764925718307495, "learning_rate": 1.0030232492542014e-05, "loss": 0.4237, "step": 36401 }, { "epoch": 0.999505766062603, "grad_norm": 0.44600096344947815, "learning_rate": 1.0029800601079194e-05, "loss": 0.3947, "step": 36402 }, { "epoch": 0.9995332235035694, "grad_norm": 0.43304529786109924, "learning_rate": 1.0029368709560787e-05, "loss": 0.4483, "step": 36403 }, { "epoch": 0.999560680944536, "grad_norm": 0.36925169825553894, "learning_rate": 1.0028936817987598e-05, "loss": 0.4596, "step": 36404 }, { "epoch": 0.9995881383855024, "grad_norm": 0.3833598792552948, "learning_rate": 1.0028504926360434e-05, "loss": 0.4519, "step": 36405 }, { "epoch": 0.999615595826469, "grad_norm": 0.43140727281570435, "learning_rate": 1.0028073034680096e-05, "loss": 0.5082, "step": 36406 }, { "epoch": 0.9996430532674355, "grad_norm": 0.43111392855644226, "learning_rate": 1.0027641142947395e-05, "loss": 0.5655, "step": 36407 }, { "epoch": 0.999670510708402, "grad_norm": 0.48607900738716125, "learning_rate": 1.0027209251163135e-05, "loss": 0.524, "step": 36408 }, { "epoch": 0.9996979681493685, "grad_norm": 0.448324978351593, "learning_rate": 1.002677735932812e-05, "loss": 0.4694, "step": 36409 }, { "epoch": 0.999725425590335, "grad_norm": 0.44446954131126404, "learning_rate": 1.0026345467443157e-05, "loss": 0.5283, "step": 36410 }, { "epoch": 0.9997528830313015, "grad_norm": 0.4085082709789276, "learning_rate": 1.002591357550905e-05, "loss": 0.51, "step": 36411 }, { "epoch": 0.9997803404722679, "grad_norm": 0.4036116600036621, "learning_rate": 1.0025481683526609e-05, "loss": 0.5039, "step": 36412 }, { "epoch": 0.9998077979132345, "grad_norm": 0.4005512297153473, "learning_rate": 1.0025049791496633e-05, "loss": 0.4695, "step": 36413 }, { "epoch": 0.999835255354201, "grad_norm": 0.4023047387599945, "learning_rate": 1.0024617899419933e-05, "loss": 0.4625, "step": 36414 }, { "epoch": 0.9998627127951675, "grad_norm": 0.40903687477111816, "learning_rate": 1.0024186007297314e-05, "loss": 0.424, "step": 36415 }, { "epoch": 0.999890170236134, "grad_norm": 0.3974151313304901, "learning_rate": 1.0023754115129578e-05, "loss": 0.4182, "step": 36416 }, { "epoch": 0.9999176276771005, "grad_norm": 0.4098784923553467, "learning_rate": 1.0023322222917533e-05, "loss": 0.4391, "step": 36417 }, { "epoch": 0.999945085118067, "grad_norm": 0.36589181423187256, "learning_rate": 1.0022890330661987e-05, "loss": 0.3978, "step": 36418 }, { "epoch": 0.9999725425590335, "grad_norm": 0.587020754814148, "learning_rate": 1.0022458438363741e-05, "loss": 0.4859, "step": 36419 }, { "epoch": 1.0, "grad_norm": 0.4352031946182251, "learning_rate": 1.0022026546023604e-05, "loss": 0.5471, "step": 36420 }, { "epoch": 1.0, "eval_loss": 0.22710177302360535, "eval_runtime": 189.9737, "eval_samples_per_second": 126.328, "eval_steps_per_second": 15.792, "step": 36420 } ], "logging_steps": 1, "max_steps": 72840, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.491414303730932e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }