| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6031594064145525, | |
| "eval_steps": 500, | |
| "global_step": 315, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0019147917663954045, | |
| "grad_norm": 2.9491562843322754, | |
| "learning_rate": 0.0, | |
| "loss": 0.6229, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.003829583532790809, | |
| "grad_norm": 3.0646867752075195, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 0.6119, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0057443752991862135, | |
| "grad_norm": 3.0737922191619873, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 0.6582, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007659167065581618, | |
| "grad_norm": 2.9172728061676025, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 0.6209, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009573958831977022, | |
| "grad_norm": 2.668588161468506, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 0.5589, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.011488750598372427, | |
| "grad_norm": 3.2810585498809814, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.5968, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.013403542364767831, | |
| "grad_norm": 2.434365749359131, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.5636, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.015318334131163236, | |
| "grad_norm": 2.060615301132202, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 0.5661, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01723312589755864, | |
| "grad_norm": 1.8817814588546753, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 0.5817, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.019147917663954045, | |
| "grad_norm": 1.766438603401184, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 0.5529, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02106270943034945, | |
| "grad_norm": 1.5240556001663208, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 0.5207, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.022977501196744854, | |
| "grad_norm": 1.5381622314453125, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 0.5171, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02489229296314026, | |
| "grad_norm": 1.4144328832626343, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 0.5612, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.026807084729535663, | |
| "grad_norm": 1.282257318496704, | |
| "learning_rate": 5e-06, | |
| "loss": 0.493, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.028721876495931067, | |
| "grad_norm": 1.3273121118545532, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 0.4723, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.030636668262326472, | |
| "grad_norm": 1.1829627752304077, | |
| "learning_rate": 5.769230769230769e-06, | |
| "loss": 0.4675, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.032551460028721876, | |
| "grad_norm": 1.0885576009750366, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 0.4275, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03446625179511728, | |
| "grad_norm": 0.9974104762077332, | |
| "learning_rate": 6.538461538461539e-06, | |
| "loss": 0.4709, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.036381043561512685, | |
| "grad_norm": 1.0769761800765991, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 0.4916, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03829583532790809, | |
| "grad_norm": 0.967096745967865, | |
| "learning_rate": 7.307692307692308e-06, | |
| "loss": 0.4785, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.040210627094303494, | |
| "grad_norm": 1.0460747480392456, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.4653, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0421254188606989, | |
| "grad_norm": 1.0114920139312744, | |
| "learning_rate": 8.076923076923077e-06, | |
| "loss": 0.4648, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0440402106270943, | |
| "grad_norm": 1.1619290113449097, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 0.4833, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.04595500239348971, | |
| "grad_norm": 0.9872665405273438, | |
| "learning_rate": 8.846153846153847e-06, | |
| "loss": 0.4545, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04786979415988511, | |
| "grad_norm": 0.9702840447425842, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 0.4651, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04978458592628052, | |
| "grad_norm": 0.9493695497512817, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 0.477, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05169937769267592, | |
| "grad_norm": 0.9152507185935974, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4499, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.053614169459071326, | |
| "grad_norm": 1.0640617609024048, | |
| "learning_rate": 9.999899706000774e-06, | |
| "loss": 0.4853, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05552896122546673, | |
| "grad_norm": 0.9641034603118896, | |
| "learning_rate": 9.999598828026644e-06, | |
| "loss": 0.475, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.057443752991862135, | |
| "grad_norm": 0.8927161693572998, | |
| "learning_rate": 9.999097378148116e-06, | |
| "loss": 0.4448, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05935854475825754, | |
| "grad_norm": 0.881844699382782, | |
| "learning_rate": 9.998395376482152e-06, | |
| "loss": 0.4327, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.061273336524652944, | |
| "grad_norm": 0.8794113993644714, | |
| "learning_rate": 9.99749285119138e-06, | |
| "loss": 0.4294, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06318812829104835, | |
| "grad_norm": 0.9898825287818909, | |
| "learning_rate": 9.996389838482942e-06, | |
| "loss": 0.5294, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06510292005744375, | |
| "grad_norm": 0.9184749126434326, | |
| "learning_rate": 9.995086382607064e-06, | |
| "loss": 0.4774, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06701771182383916, | |
| "grad_norm": 0.9067336320877075, | |
| "learning_rate": 9.993582535855265e-06, | |
| "loss": 0.4569, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06893250359023456, | |
| "grad_norm": 0.8807307481765747, | |
| "learning_rate": 9.991878358558267e-06, | |
| "loss": 0.478, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07084729535662997, | |
| "grad_norm": 0.9359887838363647, | |
| "learning_rate": 9.989973919083576e-06, | |
| "loss": 0.4659, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07276208712302537, | |
| "grad_norm": 0.9008484482765198, | |
| "learning_rate": 9.987869293832727e-06, | |
| "loss": 0.4659, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07467687888942078, | |
| "grad_norm": 0.8065485954284668, | |
| "learning_rate": 9.985564567238237e-06, | |
| "loss": 0.4441, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.07659167065581618, | |
| "grad_norm": 0.9766021966934204, | |
| "learning_rate": 9.983059831760205e-06, | |
| "loss": 0.4834, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07850646242221158, | |
| "grad_norm": 0.8222993016242981, | |
| "learning_rate": 9.980355187882606e-06, | |
| "loss": 0.443, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08042125418860699, | |
| "grad_norm": 0.8215630054473877, | |
| "learning_rate": 9.977450744109258e-06, | |
| "loss": 0.4219, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0823360459550024, | |
| "grad_norm": 0.8324375748634338, | |
| "learning_rate": 9.974346616959476e-06, | |
| "loss": 0.4362, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0842508377213978, | |
| "grad_norm": 0.9242782592773438, | |
| "learning_rate": 9.97104293096339e-06, | |
| "loss": 0.4738, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0861656294877932, | |
| "grad_norm": 0.9275208711624146, | |
| "learning_rate": 9.967539818656953e-06, | |
| "loss": 0.4571, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0880804212541886, | |
| "grad_norm": 0.876868724822998, | |
| "learning_rate": 9.96383742057662e-06, | |
| "loss": 0.5172, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08999521302058401, | |
| "grad_norm": 0.8446276783943176, | |
| "learning_rate": 9.959935885253715e-06, | |
| "loss": 0.4457, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.09191000478697942, | |
| "grad_norm": 0.8077015280723572, | |
| "learning_rate": 9.955835369208475e-06, | |
| "loss": 0.4234, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.09382479655337482, | |
| "grad_norm": 0.7882896065711975, | |
| "learning_rate": 9.951536036943753e-06, | |
| "loss": 0.4264, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09573958831977022, | |
| "grad_norm": 0.8539751768112183, | |
| "learning_rate": 9.94703806093845e-06, | |
| "loss": 0.461, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09765438008616563, | |
| "grad_norm": 0.8285911679267883, | |
| "learning_rate": 9.942341621640558e-06, | |
| "loss": 0.4379, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.09956917185256103, | |
| "grad_norm": 0.8029133081436157, | |
| "learning_rate": 9.937446907459954e-06, | |
| "loss": 0.4565, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10148396361895644, | |
| "grad_norm": 0.7964851260185242, | |
| "learning_rate": 9.932354114760819e-06, | |
| "loss": 0.4262, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.10339875538535184, | |
| "grad_norm": 0.9846324920654297, | |
| "learning_rate": 9.92706344785377e-06, | |
| "loss": 0.5302, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10531354715174725, | |
| "grad_norm": 0.7648650407791138, | |
| "learning_rate": 9.921575118987672e-06, | |
| "loss": 0.4066, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10722833891814265, | |
| "grad_norm": 0.83173668384552, | |
| "learning_rate": 9.915889348341098e-06, | |
| "loss": 0.4438, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10914313068453806, | |
| "grad_norm": 0.7968882322311401, | |
| "learning_rate": 9.910006364013522e-06, | |
| "loss": 0.407, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.11105792245093346, | |
| "grad_norm": 0.8423118591308594, | |
| "learning_rate": 9.903926402016153e-06, | |
| "loss": 0.4174, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.11297271421732887, | |
| "grad_norm": 0.9054727554321289, | |
| "learning_rate": 9.897649706262474e-06, | |
| "loss": 0.4764, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.11488750598372427, | |
| "grad_norm": 0.8318431973457336, | |
| "learning_rate": 9.891176528558451e-06, | |
| "loss": 0.4326, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11680229775011967, | |
| "grad_norm": 0.8409565687179565, | |
| "learning_rate": 9.884507128592435e-06, | |
| "loss": 0.4451, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.11871708951651508, | |
| "grad_norm": 0.8471431136131287, | |
| "learning_rate": 9.877641773924748e-06, | |
| "loss": 0.4217, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.12063188128291048, | |
| "grad_norm": 0.8495103120803833, | |
| "learning_rate": 9.870580739976936e-06, | |
| "loss": 0.421, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.12254667304930589, | |
| "grad_norm": 0.8164567947387695, | |
| "learning_rate": 9.863324310020735e-06, | |
| "loss": 0.4266, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.12446146481570129, | |
| "grad_norm": 0.8732247948646545, | |
| "learning_rate": 9.855872775166696e-06, | |
| "loss": 0.4661, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1263762565820967, | |
| "grad_norm": 0.8157728910446167, | |
| "learning_rate": 9.848226434352513e-06, | |
| "loss": 0.4401, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.12829104834849211, | |
| "grad_norm": 0.8860891461372375, | |
| "learning_rate": 9.840385594331022e-06, | |
| "loss": 0.4748, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1302058401148875, | |
| "grad_norm": 0.8987312316894531, | |
| "learning_rate": 9.83235056965791e-06, | |
| "loss": 0.4881, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.13212063188128292, | |
| "grad_norm": 0.8786044716835022, | |
| "learning_rate": 9.824121682679072e-06, | |
| "loss": 0.4417, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.13403542364767831, | |
| "grad_norm": 0.8325650691986084, | |
| "learning_rate": 9.815699263517712e-06, | |
| "loss": 0.4377, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13595021541407373, | |
| "grad_norm": 0.8149142861366272, | |
| "learning_rate": 9.807083650061063e-06, | |
| "loss": 0.4496, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.13786500718046912, | |
| "grad_norm": 0.8394611477851868, | |
| "learning_rate": 9.798275187946859e-06, | |
| "loss": 0.4394, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13977979894686454, | |
| "grad_norm": 0.7746449112892151, | |
| "learning_rate": 9.789274230549456e-06, | |
| "loss": 0.4039, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.14169459071325993, | |
| "grad_norm": 0.7592336535453796, | |
| "learning_rate": 9.780081138965663e-06, | |
| "loss": 0.3788, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.14360938247965535, | |
| "grad_norm": 0.9066088199615479, | |
| "learning_rate": 9.770696282000245e-06, | |
| "loss": 0.4541, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14552417424605074, | |
| "grad_norm": 0.8512394428253174, | |
| "learning_rate": 9.761120036151138e-06, | |
| "loss": 0.4217, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.14743896601244616, | |
| "grad_norm": 0.795378565788269, | |
| "learning_rate": 9.751352785594337e-06, | |
| "loss": 0.4014, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.14935375777884155, | |
| "grad_norm": 0.9467825293540955, | |
| "learning_rate": 9.741394922168495e-06, | |
| "loss": 0.4855, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.15126854954523697, | |
| "grad_norm": 0.7824875712394714, | |
| "learning_rate": 9.731246845359187e-06, | |
| "loss": 0.4088, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.15318334131163236, | |
| "grad_norm": 0.7557615637779236, | |
| "learning_rate": 9.720908962282893e-06, | |
| "loss": 0.4023, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15509813307802778, | |
| "grad_norm": 0.8093947768211365, | |
| "learning_rate": 9.710381687670675e-06, | |
| "loss": 0.4345, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.15701292484442317, | |
| "grad_norm": 0.8901275396347046, | |
| "learning_rate": 9.699665443851518e-06, | |
| "loss": 0.4444, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1589277166108186, | |
| "grad_norm": 0.7518415451049805, | |
| "learning_rate": 9.688760660735403e-06, | |
| "loss": 0.4024, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.16084250837721398, | |
| "grad_norm": 0.7495772242546082, | |
| "learning_rate": 9.677667775796052e-06, | |
| "loss": 0.4005, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1627573001436094, | |
| "grad_norm": 0.8903560638427734, | |
| "learning_rate": 9.666387234053385e-06, | |
| "loss": 0.4495, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1646720919100048, | |
| "grad_norm": 0.8854427933692932, | |
| "learning_rate": 9.654919488055656e-06, | |
| "loss": 0.4381, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1665868836764002, | |
| "grad_norm": 0.8393151164054871, | |
| "learning_rate": 9.643264997861312e-06, | |
| "loss": 0.4177, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1685016754427956, | |
| "grad_norm": 0.8448845148086548, | |
| "learning_rate": 9.631424231020523e-06, | |
| "loss": 0.4437, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.170416467209191, | |
| "grad_norm": 0.8987253904342651, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.4479, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1723312589755864, | |
| "grad_norm": 0.9512760639190674, | |
| "learning_rate": 9.607185774946106e-06, | |
| "loss": 0.5188, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17424605074198182, | |
| "grad_norm": 0.9057194590568542, | |
| "learning_rate": 9.594789058101154e-06, | |
| "loss": 0.4448, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1761608425083772, | |
| "grad_norm": 0.8147549033164978, | |
| "learning_rate": 9.582208009348104e-06, | |
| "loss": 0.4106, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.17807563427477263, | |
| "grad_norm": 0.8666926622390747, | |
| "learning_rate": 9.569443133408434e-06, | |
| "loss": 0.4558, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.17999042604116802, | |
| "grad_norm": 0.8677969574928284, | |
| "learning_rate": 9.556494942378328e-06, | |
| "loss": 0.4379, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.18190521780756344, | |
| "grad_norm": 0.8896477222442627, | |
| "learning_rate": 9.543363955708124e-06, | |
| "loss": 0.4498, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18382000957395883, | |
| "grad_norm": 0.7357858419418335, | |
| "learning_rate": 9.530050700181499e-06, | |
| "loss": 0.3666, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.18573480134035425, | |
| "grad_norm": 0.7851715683937073, | |
| "learning_rate": 9.5165557098943e-06, | |
| "loss": 0.411, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.18764959310674964, | |
| "grad_norm": 0.8098123669624329, | |
| "learning_rate": 9.502879526233151e-06, | |
| "loss": 0.4023, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.18956438487314506, | |
| "grad_norm": 0.8245725631713867, | |
| "learning_rate": 9.48902269785371e-06, | |
| "loss": 0.423, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.19147917663954045, | |
| "grad_norm": 0.8497715592384338, | |
| "learning_rate": 9.47498578065867e-06, | |
| "loss": 0.4125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19339396840593587, | |
| "grad_norm": 0.8205481171607971, | |
| "learning_rate": 9.460769337775461e-06, | |
| "loss": 0.4312, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.19530876017233126, | |
| "grad_norm": 0.8062931299209595, | |
| "learning_rate": 9.446373939533642e-06, | |
| "loss": 0.3961, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.19722355193872668, | |
| "grad_norm": 0.8209528923034668, | |
| "learning_rate": 9.431800163442043e-06, | |
| "loss": 0.4121, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.19913834370512207, | |
| "grad_norm": 0.8154571652412415, | |
| "learning_rate": 9.417048594165572e-06, | |
| "loss": 0.4475, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.20105313547151749, | |
| "grad_norm": 0.8546404838562012, | |
| "learning_rate": 9.402119823501787e-06, | |
| "loss": 0.4293, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20296792723791288, | |
| "grad_norm": 0.8470130562782288, | |
| "learning_rate": 9.387014450357128e-06, | |
| "loss": 0.4139, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2048827190043083, | |
| "grad_norm": 0.9199275970458984, | |
| "learning_rate": 9.371733080722911e-06, | |
| "loss": 0.4825, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.20679751077070369, | |
| "grad_norm": 0.9049551486968994, | |
| "learning_rate": 9.356276327651006e-06, | |
| "loss": 0.4378, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2087123025370991, | |
| "grad_norm": 0.8089979887008667, | |
| "learning_rate": 9.340644811229243e-06, | |
| "loss": 0.4027, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2106270943034945, | |
| "grad_norm": 0.7452864050865173, | |
| "learning_rate": 9.324839158556542e-06, | |
| "loss": 0.3795, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2125418860698899, | |
| "grad_norm": 0.8286869525909424, | |
| "learning_rate": 9.308860003717748e-06, | |
| "loss": 0.4137, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2144566778362853, | |
| "grad_norm": 0.8634768724441528, | |
| "learning_rate": 9.292707987758202e-06, | |
| "loss": 0.445, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.21637146960268072, | |
| "grad_norm": 0.8329188227653503, | |
| "learning_rate": 9.27638375865801e-06, | |
| "loss": 0.4307, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2182862613690761, | |
| "grad_norm": 0.8780718445777893, | |
| "learning_rate": 9.259887971306064e-06, | |
| "loss": 0.4863, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.22020105313547153, | |
| "grad_norm": 0.9007835388183594, | |
| "learning_rate": 9.243221287473755e-06, | |
| "loss": 0.4482, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22211584490186692, | |
| "grad_norm": 0.8163229823112488, | |
| "learning_rate": 9.226384375788435e-06, | |
| "loss": 0.4168, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.22403063666826234, | |
| "grad_norm": 0.8288677334785461, | |
| "learning_rate": 9.209377911706585e-06, | |
| "loss": 0.4038, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.22594542843465773, | |
| "grad_norm": 0.8035851716995239, | |
| "learning_rate": 9.192202577486725e-06, | |
| "loss": 0.3922, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.22786022020105315, | |
| "grad_norm": 0.8203516006469727, | |
| "learning_rate": 9.174859062162037e-06, | |
| "loss": 0.3971, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.22977501196744854, | |
| "grad_norm": 0.8246352076530457, | |
| "learning_rate": 9.157348061512728e-06, | |
| "loss": 0.4433, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23168980373384396, | |
| "grad_norm": 0.8655344247817993, | |
| "learning_rate": 9.139670278038109e-06, | |
| "loss": 0.4405, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.23360459550023935, | |
| "grad_norm": 0.7439157366752625, | |
| "learning_rate": 9.121826420928421e-06, | |
| "loss": 0.3683, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.23551938726663477, | |
| "grad_norm": 0.817434549331665, | |
| "learning_rate": 9.103817206036383e-06, | |
| "loss": 0.4034, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.23743417903303016, | |
| "grad_norm": 0.8455221056938171, | |
| "learning_rate": 9.085643355848468e-06, | |
| "loss": 0.4418, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.23934897079942558, | |
| "grad_norm": 0.8356925845146179, | |
| "learning_rate": 9.06730559945592e-06, | |
| "loss": 0.4012, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24126376256582097, | |
| "grad_norm": 0.8181227445602417, | |
| "learning_rate": 9.048804672525513e-06, | |
| "loss": 0.4174, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.24317855433221638, | |
| "grad_norm": 0.8010542988777161, | |
| "learning_rate": 9.030141317270026e-06, | |
| "loss": 0.3952, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.24509334609861178, | |
| "grad_norm": 0.8500829935073853, | |
| "learning_rate": 9.011316282418474e-06, | |
| "loss": 0.4123, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2470081378650072, | |
| "grad_norm": 0.8971666693687439, | |
| "learning_rate": 8.992330323186069e-06, | |
| "loss": 0.4451, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.24892292963140258, | |
| "grad_norm": 0.9065473079681396, | |
| "learning_rate": 8.973184201243922e-06, | |
| "loss": 0.4821, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.250837721397798, | |
| "grad_norm": 0.8722876906394958, | |
| "learning_rate": 8.953878684688492e-06, | |
| "loss": 0.4204, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2527525131641934, | |
| "grad_norm": 0.8343362808227539, | |
| "learning_rate": 8.934414548010764e-06, | |
| "loss": 0.408, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2546673049305888, | |
| "grad_norm": 0.8162686824798584, | |
| "learning_rate": 8.914792572065178e-06, | |
| "loss": 0.416, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.25658209669698423, | |
| "grad_norm": 0.9116921424865723, | |
| "learning_rate": 8.89501354403831e-06, | |
| "loss": 0.4589, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2584968884633796, | |
| "grad_norm": 0.9577599763870239, | |
| "learning_rate": 8.875078257417294e-06, | |
| "loss": 0.4654, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.260411680229775, | |
| "grad_norm": 0.8709072470664978, | |
| "learning_rate": 8.854987511957974e-06, | |
| "loss": 0.4395, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.26232647199617043, | |
| "grad_norm": 0.8386030197143555, | |
| "learning_rate": 8.834742113652835e-06, | |
| "loss": 0.4281, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.26424126376256585, | |
| "grad_norm": 0.7646230459213257, | |
| "learning_rate": 8.81434287469866e-06, | |
| "loss": 0.3804, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2661560555289612, | |
| "grad_norm": 0.8096075057983398, | |
| "learning_rate": 8.793790613463956e-06, | |
| "loss": 0.4112, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.26807084729535663, | |
| "grad_norm": 0.8051929473876953, | |
| "learning_rate": 8.773086154456106e-06, | |
| "loss": 0.4172, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26998563906175205, | |
| "grad_norm": 0.9208196401596069, | |
| "learning_rate": 8.752230328288314e-06, | |
| "loss": 0.4768, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.27190043082814747, | |
| "grad_norm": 0.7890869975090027, | |
| "learning_rate": 8.731223971646261e-06, | |
| "loss": 0.3915, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.27381522259454283, | |
| "grad_norm": 0.786723792552948, | |
| "learning_rate": 8.710067927254555e-06, | |
| "loss": 0.3844, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.27573001436093825, | |
| "grad_norm": 0.791117250919342, | |
| "learning_rate": 8.688763043842916e-06, | |
| "loss": 0.4065, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.27764480612733367, | |
| "grad_norm": 0.8172312378883362, | |
| "learning_rate": 8.66731017611213e-06, | |
| "loss": 0.4337, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2795595978937291, | |
| "grad_norm": 0.8335762023925781, | |
| "learning_rate": 8.645710184699756e-06, | |
| "loss": 0.4182, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.28147438966012445, | |
| "grad_norm": 0.8034957051277161, | |
| "learning_rate": 8.6239639361456e-06, | |
| "loss": 0.4097, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.28338918142651986, | |
| "grad_norm": 0.8107390403747559, | |
| "learning_rate": 8.602072302856961e-06, | |
| "loss": 0.4055, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.2853039731929153, | |
| "grad_norm": 0.8442232012748718, | |
| "learning_rate": 8.580036163073615e-06, | |
| "loss": 0.4307, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.2872187649593107, | |
| "grad_norm": 0.8290265202522278, | |
| "learning_rate": 8.5578564008326e-06, | |
| "loss": 0.3892, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28913355672570606, | |
| "grad_norm": 0.8057438731193542, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.4042, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2910483484921015, | |
| "grad_norm": 0.8582248091697693, | |
| "learning_rate": 8.513069573898944e-06, | |
| "loss": 0.4149, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.2929631402584969, | |
| "grad_norm": 0.8402311205863953, | |
| "learning_rate": 8.490464305946296e-06, | |
| "loss": 0.4243, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.2948779320248923, | |
| "grad_norm": 0.812869668006897, | |
| "learning_rate": 8.467719008943886e-06, | |
| "loss": 0.4134, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2967927237912877, | |
| "grad_norm": 0.8431028723716736, | |
| "learning_rate": 8.444834595378434e-06, | |
| "loss": 0.4185, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2987075155576831, | |
| "grad_norm": 0.802760899066925, | |
| "learning_rate": 8.421811983317682e-06, | |
| "loss": 0.4011, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3006223073240785, | |
| "grad_norm": 0.814274251461029, | |
| "learning_rate": 8.398652096373566e-06, | |
| "loss": 0.4194, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.30253709909047394, | |
| "grad_norm": 0.8286414742469788, | |
| "learning_rate": 8.375355863665155e-06, | |
| "loss": 0.4044, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3044518908568693, | |
| "grad_norm": 0.8244617581367493, | |
| "learning_rate": 8.351924219781393e-06, | |
| "loss": 0.4415, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3063666826232647, | |
| "grad_norm": 0.8288456201553345, | |
| "learning_rate": 8.328358104743588e-06, | |
| "loss": 0.4143, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30828147438966014, | |
| "grad_norm": 0.7895364165306091, | |
| "learning_rate": 8.304658463967705e-06, | |
| "loss": 0.4122, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.31019626615605556, | |
| "grad_norm": 0.7923944592475891, | |
| "learning_rate": 8.28082624822645e-06, | |
| "loss": 0.3812, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3121110579224509, | |
| "grad_norm": 0.7424578666687012, | |
| "learning_rate": 8.256862413611113e-06, | |
| "loss": 0.3883, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.31402584968884634, | |
| "grad_norm": 0.8261198401451111, | |
| "learning_rate": 8.232767921493216e-06, | |
| "loss": 0.432, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.31594064145524176, | |
| "grad_norm": 0.8710785508155823, | |
| "learning_rate": 8.20854373848595e-06, | |
| "loss": 0.4508, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3178554332216372, | |
| "grad_norm": 0.7583726048469543, | |
| "learning_rate": 8.184190836405394e-06, | |
| "loss": 0.3709, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.31977022498803254, | |
| "grad_norm": 0.7795834541320801, | |
| "learning_rate": 8.15971019223152e-06, | |
| "loss": 0.4055, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.32168501675442795, | |
| "grad_norm": 0.7580612897872925, | |
| "learning_rate": 8.135102788069015e-06, | |
| "loss": 0.3605, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3235998085208234, | |
| "grad_norm": 0.7536636590957642, | |
| "learning_rate": 8.110369611107869e-06, | |
| "loss": 0.3656, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3255146002872188, | |
| "grad_norm": 0.8029680252075195, | |
| "learning_rate": 8.085511653583772e-06, | |
| "loss": 0.3819, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32742939205361415, | |
| "grad_norm": 0.8548794388771057, | |
| "learning_rate": 8.060529912738316e-06, | |
| "loss": 0.4449, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3293441838200096, | |
| "grad_norm": 0.877955436706543, | |
| "learning_rate": 8.035425390778975e-06, | |
| "loss": 0.4504, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.331258975586405, | |
| "grad_norm": 0.8173900246620178, | |
| "learning_rate": 8.010199094838915e-06, | |
| "loss": 0.4211, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3331737673528004, | |
| "grad_norm": 0.8715358972549438, | |
| "learning_rate": 7.984852036936578e-06, | |
| "loss": 0.3909, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3350885591191958, | |
| "grad_norm": 0.8475743532180786, | |
| "learning_rate": 7.959385233935087e-06, | |
| "loss": 0.4416, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3370033508855912, | |
| "grad_norm": 0.7483753561973572, | |
| "learning_rate": 7.933799707501448e-06, | |
| "loss": 0.351, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3389181426519866, | |
| "grad_norm": 0.8065423965454102, | |
| "learning_rate": 7.908096484065569e-06, | |
| "loss": 0.4085, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.340832934418382, | |
| "grad_norm": 0.8215972185134888, | |
| "learning_rate": 7.88227659477908e-06, | |
| "loss": 0.4132, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3427477261847774, | |
| "grad_norm": 0.7788512706756592, | |
| "learning_rate": 7.856341075473963e-06, | |
| "loss": 0.3828, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3446625179511728, | |
| "grad_norm": 0.7943012118339539, | |
| "learning_rate": 7.830290966620997e-06, | |
| "loss": 0.3737, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3465773097175682, | |
| "grad_norm": 0.8680888414382935, | |
| "learning_rate": 7.804127313288023e-06, | |
| "loss": 0.4019, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.34849210148396365, | |
| "grad_norm": 0.8370754718780518, | |
| "learning_rate": 7.777851165098012e-06, | |
| "loss": 0.4202, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.350406893250359, | |
| "grad_norm": 0.7426475882530212, | |
| "learning_rate": 7.751463576186957e-06, | |
| "loss": 0.378, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3523216850167544, | |
| "grad_norm": 0.827038586139679, | |
| "learning_rate": 7.72496560516159e-06, | |
| "loss": 0.415, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.35423647678314985, | |
| "grad_norm": 0.8714759349822998, | |
| "learning_rate": 7.6983583150569e-06, | |
| "loss": 0.4204, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.35615126854954526, | |
| "grad_norm": 0.8127462863922119, | |
| "learning_rate": 7.671642773293506e-06, | |
| "loss": 0.3904, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3580660603159406, | |
| "grad_norm": 0.8972522020339966, | |
| "learning_rate": 7.644820051634813e-06, | |
| "loss": 0.4168, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.35998085208233604, | |
| "grad_norm": 0.9051675200462341, | |
| "learning_rate": 7.617891226144034e-06, | |
| "loss": 0.4742, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.36189564384873146, | |
| "grad_norm": 0.8041402101516724, | |
| "learning_rate": 7.59085737714101e-06, | |
| "loss": 0.3916, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.3638104356151269, | |
| "grad_norm": 0.9296969175338745, | |
| "learning_rate": 7.563719589158874e-06, | |
| "loss": 0.4198, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36572522738152224, | |
| "grad_norm": 0.8441433310508728, | |
| "learning_rate": 7.536478950900537e-06, | |
| "loss": 0.4094, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.36764001914791766, | |
| "grad_norm": 0.8146634101867676, | |
| "learning_rate": 7.509136555195025e-06, | |
| "loss": 0.398, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3695548109143131, | |
| "grad_norm": 0.8095076680183411, | |
| "learning_rate": 7.481693498953621e-06, | |
| "loss": 0.4121, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3714696026807085, | |
| "grad_norm": 0.8033435344696045, | |
| "learning_rate": 7.4541508831258695e-06, | |
| "loss": 0.3912, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.37338439444710386, | |
| "grad_norm": 0.7945087552070618, | |
| "learning_rate": 7.4265098126554065e-06, | |
| "loss": 0.3784, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3752991862134993, | |
| "grad_norm": 0.858241081237793, | |
| "learning_rate": 7.3987713964356335e-06, | |
| "loss": 0.451, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3772139779798947, | |
| "grad_norm": 0.9208387136459351, | |
| "learning_rate": 7.370936747265226e-06, | |
| "loss": 0.4539, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3791287697462901, | |
| "grad_norm": 0.775140643119812, | |
| "learning_rate": 7.3430069818035e-06, | |
| "loss": 0.3956, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.3810435615126855, | |
| "grad_norm": 0.7926008105278015, | |
| "learning_rate": 7.314983220525604e-06, | |
| "loss": 0.4044, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3829583532790809, | |
| "grad_norm": 0.7891693711280823, | |
| "learning_rate": 7.286866587677576e-06, | |
| "loss": 0.3881, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3848731450454763, | |
| "grad_norm": 0.8547941446304321, | |
| "learning_rate": 7.2586582112312355e-06, | |
| "loss": 0.4289, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.38678793681187174, | |
| "grad_norm": 0.7894405722618103, | |
| "learning_rate": 7.230359222838939e-06, | |
| "loss": 0.3886, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3887027285782671, | |
| "grad_norm": 0.9024775624275208, | |
| "learning_rate": 7.201970757788172e-06, | |
| "loss": 0.4586, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.3906175203446625, | |
| "grad_norm": 0.7940675616264343, | |
| "learning_rate": 7.173493954956012e-06, | |
| "loss": 0.3905, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.39253231211105793, | |
| "grad_norm": 0.8231476545333862, | |
| "learning_rate": 7.144929956763438e-06, | |
| "loss": 0.4044, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.39444710387745335, | |
| "grad_norm": 0.9094031453132629, | |
| "learning_rate": 7.116279909129492e-06, | |
| "loss": 0.4502, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3963618956438487, | |
| "grad_norm": 0.843540608882904, | |
| "learning_rate": 7.087544961425317e-06, | |
| "loss": 0.4037, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.39827668741024413, | |
| "grad_norm": 0.8074728846549988, | |
| "learning_rate": 7.058726266428042e-06, | |
| "loss": 0.405, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.40019147917663955, | |
| "grad_norm": 0.7620254755020142, | |
| "learning_rate": 7.029824980274536e-06, | |
| "loss": 0.3727, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.40210627094303497, | |
| "grad_norm": 0.8311992883682251, | |
| "learning_rate": 7.0008422624150285e-06, | |
| "loss": 0.4172, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.40402106270943033, | |
| "grad_norm": 0.8231189846992493, | |
| "learning_rate": 6.971779275566593e-06, | |
| "loss": 0.4162, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.40593585447582575, | |
| "grad_norm": 0.8115664720535278, | |
| "learning_rate": 6.9426371856665005e-06, | |
| "loss": 0.4206, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.40785064624222117, | |
| "grad_norm": 0.8393989205360413, | |
| "learning_rate": 6.913417161825449e-06, | |
| "loss": 0.4252, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.4097654380086166, | |
| "grad_norm": 0.8263347148895264, | |
| "learning_rate": 6.884120376280658e-06, | |
| "loss": 0.3983, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.41168022977501195, | |
| "grad_norm": 0.834690272808075, | |
| "learning_rate": 6.85474800434884e-06, | |
| "loss": 0.4285, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.41359502154140737, | |
| "grad_norm": 0.7867841124534607, | |
| "learning_rate": 6.8253012243790565e-06, | |
| "loss": 0.4065, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4155098133078028, | |
| "grad_norm": 0.848772406578064, | |
| "learning_rate": 6.795781217705436e-06, | |
| "loss": 0.4529, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4174246050741982, | |
| "grad_norm": 0.7745128870010376, | |
| "learning_rate": 6.76618916859979e-06, | |
| "loss": 0.3631, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.41933939684059357, | |
| "grad_norm": 0.7742826342582703, | |
| "learning_rate": 6.736526264224101e-06, | |
| "loss": 0.3886, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.421254188606989, | |
| "grad_norm": 0.8211061358451843, | |
| "learning_rate": 6.706793694582892e-06, | |
| "loss": 0.3824, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4231689803733844, | |
| "grad_norm": 0.824216902256012, | |
| "learning_rate": 6.676992652475487e-06, | |
| "loss": 0.4104, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4250837721397798, | |
| "grad_norm": 0.7848684191703796, | |
| "learning_rate": 6.647124333448165e-06, | |
| "loss": 0.3711, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4269985639061752, | |
| "grad_norm": 0.8798813819885254, | |
| "learning_rate": 6.617189935746191e-06, | |
| "loss": 0.4083, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4289133556725706, | |
| "grad_norm": 0.8364046216011047, | |
| "learning_rate": 6.587190660265752e-06, | |
| "loss": 0.4248, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.430828147438966, | |
| "grad_norm": 0.8487688899040222, | |
| "learning_rate": 6.55712771050577e-06, | |
| "loss": 0.4148, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.43274293920536144, | |
| "grad_norm": 0.7809548377990723, | |
| "learning_rate": 6.52700229251963e-06, | |
| "loss": 0.393, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4346577309717568, | |
| "grad_norm": 0.9122399091720581, | |
| "learning_rate": 6.496815614866792e-06, | |
| "loss": 0.4037, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4365725227381522, | |
| "grad_norm": 0.8720874786376953, | |
| "learning_rate": 6.466568888564303e-06, | |
| "loss": 0.4581, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.43848731450454764, | |
| "grad_norm": 0.8561883568763733, | |
| "learning_rate": 6.436263327038225e-06, | |
| "loss": 0.4046, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.44040210627094306, | |
| "grad_norm": 0.8326470255851746, | |
| "learning_rate": 6.405900146074941e-06, | |
| "loss": 0.3882, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4423168980373384, | |
| "grad_norm": 0.8377370238304138, | |
| "learning_rate": 6.375480563772391e-06, | |
| "loss": 0.4368, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.44423168980373384, | |
| "grad_norm": 0.7525307536125183, | |
| "learning_rate": 6.3450058004912004e-06, | |
| "loss": 0.3646, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.44614648157012926, | |
| "grad_norm": 0.8400733470916748, | |
| "learning_rate": 6.314477078805724e-06, | |
| "loss": 0.4002, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.4480612733365247, | |
| "grad_norm": 0.7522779107093811, | |
| "learning_rate": 6.283895623454997e-06, | |
| "loss": 0.3865, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.44997606510292004, | |
| "grad_norm": 0.8109682202339172, | |
| "learning_rate": 6.2532626612936035e-06, | |
| "loss": 0.4089, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.45189085686931546, | |
| "grad_norm": 0.8554459810256958, | |
| "learning_rate": 6.2225794212424565e-06, | |
| "loss": 0.4401, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4538056486357109, | |
| "grad_norm": 0.8335216641426086, | |
| "learning_rate": 6.191847134239496e-06, | |
| "loss": 0.3995, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4557204404021063, | |
| "grad_norm": 0.8365229964256287, | |
| "learning_rate": 6.161067033190311e-06, | |
| "loss": 0.402, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.45763523216850166, | |
| "grad_norm": 0.7727139592170715, | |
| "learning_rate": 6.130240352918675e-06, | |
| "loss": 0.3976, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.4595500239348971, | |
| "grad_norm": 0.8664788603782654, | |
| "learning_rate": 6.0993683301170046e-06, | |
| "loss": 0.4347, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4614648157012925, | |
| "grad_norm": 0.7788071632385254, | |
| "learning_rate": 6.068452203296754e-06, | |
| "loss": 0.3849, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.4633796074676879, | |
| "grad_norm": 0.7709981203079224, | |
| "learning_rate": 6.0374932127387234e-06, | |
| "loss": 0.394, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4652943992340833, | |
| "grad_norm": 0.8584897518157959, | |
| "learning_rate": 6.006492600443301e-06, | |
| "loss": 0.4013, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.4672091910004787, | |
| "grad_norm": 0.8466057777404785, | |
| "learning_rate": 5.975451610080643e-06, | |
| "loss": 0.382, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4691239827668741, | |
| "grad_norm": 0.8147895336151123, | |
| "learning_rate": 5.944371486940772e-06, | |
| "loss": 0.3925, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.47103877453326953, | |
| "grad_norm": 0.9486895203590393, | |
| "learning_rate": 5.913253477883629e-06, | |
| "loss": 0.438, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4729535662996649, | |
| "grad_norm": 0.8018326163291931, | |
| "learning_rate": 5.882098831289044e-06, | |
| "loss": 0.3902, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.4748683580660603, | |
| "grad_norm": 0.7979179620742798, | |
| "learning_rate": 5.850908797006656e-06, | |
| "loss": 0.4001, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.47678314983245573, | |
| "grad_norm": 0.8484137058258057, | |
| "learning_rate": 5.819684626305776e-06, | |
| "loss": 0.4393, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.47869794159885115, | |
| "grad_norm": 0.812910795211792, | |
| "learning_rate": 5.788427571825186e-06, | |
| "loss": 0.3939, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4806127333652465, | |
| "grad_norm": 0.8852983117103577, | |
| "learning_rate": 5.757138887522884e-06, | |
| "loss": 0.4113, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.48252752513164193, | |
| "grad_norm": 0.8375086188316345, | |
| "learning_rate": 5.725819828625782e-06, | |
| "loss": 0.4132, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.48444231689803735, | |
| "grad_norm": 0.7939973473548889, | |
| "learning_rate": 5.694471651579346e-06, | |
| "loss": 0.4003, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.48635710866443277, | |
| "grad_norm": 0.7971997857093811, | |
| "learning_rate": 5.663095613997196e-06, | |
| "loss": 0.3868, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.48827190043082813, | |
| "grad_norm": 0.778202474117279, | |
| "learning_rate": 5.631692974610647e-06, | |
| "loss": 0.3761, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.49018669219722355, | |
| "grad_norm": 0.8734095692634583, | |
| "learning_rate": 5.600264993218215e-06, | |
| "loss": 0.4105, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.49210148396361897, | |
| "grad_norm": 0.8606191873550415, | |
| "learning_rate": 5.568812930635076e-06, | |
| "loss": 0.396, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.4940162757300144, | |
| "grad_norm": 0.8600229024887085, | |
| "learning_rate": 5.537338048642487e-06, | |
| "loss": 0.4379, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.49593106749640975, | |
| "grad_norm": 0.8452302813529968, | |
| "learning_rate": 5.505841609937162e-06, | |
| "loss": 0.3802, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.49784585926280517, | |
| "grad_norm": 0.7426350712776184, | |
| "learning_rate": 5.474324878080623e-06, | |
| "loss": 0.335, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4997606510292006, | |
| "grad_norm": 0.8211168050765991, | |
| "learning_rate": 5.4427891174485014e-06, | |
| "loss": 0.387, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.501675442795596, | |
| "grad_norm": 0.855265200138092, | |
| "learning_rate": 5.41123559317982e-06, | |
| "loss": 0.4148, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5035902345619914, | |
| "grad_norm": 0.8395704030990601, | |
| "learning_rate": 5.379665571126232e-06, | |
| "loss": 0.3774, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5055050263283868, | |
| "grad_norm": 0.7473710775375366, | |
| "learning_rate": 5.348080317801244e-06, | |
| "loss": 0.3672, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5074198180947822, | |
| "grad_norm": 0.9001408815383911, | |
| "learning_rate": 5.316481100329408e-06, | |
| "loss": 0.4314, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5093346098611776, | |
| "grad_norm": 0.8201159834861755, | |
| "learning_rate": 5.284869186395478e-06, | |
| "loss": 0.4166, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.511249401627573, | |
| "grad_norm": 0.8213218450546265, | |
| "learning_rate": 5.253245844193564e-06, | |
| "loss": 0.4087, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5131641933939685, | |
| "grad_norm": 0.8229288458824158, | |
| "learning_rate": 5.22161234237625e-06, | |
| "loss": 0.4013, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5150789851603638, | |
| "grad_norm": 0.8140142560005188, | |
| "learning_rate": 5.189969950003697e-06, | |
| "loss": 0.4021, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5169937769267592, | |
| "grad_norm": 0.8901419043540955, | |
| "learning_rate": 5.158319936492736e-06, | |
| "loss": 0.427, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5189085686931546, | |
| "grad_norm": 0.7799863219261169, | |
| "learning_rate": 5.12666357156594e-06, | |
| "loss": 0.3872, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.52082336045955, | |
| "grad_norm": 0.8645293712615967, | |
| "learning_rate": 5.0950021252006845e-06, | |
| "loss": 0.4287, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5227381522259454, | |
| "grad_norm": 0.8488345146179199, | |
| "learning_rate": 5.063336867578201e-06, | |
| "loss": 0.4402, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5246529439923409, | |
| "grad_norm": 0.8312931060791016, | |
| "learning_rate": 5.0316690690326175e-06, | |
| "loss": 0.3858, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5265677357587363, | |
| "grad_norm": 0.8159146308898926, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3707, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5284825275251317, | |
| "grad_norm": 0.8223234415054321, | |
| "learning_rate": 4.9683309309673825e-06, | |
| "loss": 0.3836, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.530397319291527, | |
| "grad_norm": 0.7489441633224487, | |
| "learning_rate": 4.936663132421801e-06, | |
| "loss": 0.3666, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5323121110579224, | |
| "grad_norm": 0.7627151012420654, | |
| "learning_rate": 4.904997874799316e-06, | |
| "loss": 0.3829, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5342269028243178, | |
| "grad_norm": 0.8040624856948853, | |
| "learning_rate": 4.873336428434062e-06, | |
| "loss": 0.3864, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5361416945907133, | |
| "grad_norm": 0.8104556798934937, | |
| "learning_rate": 4.841680063507265e-06, | |
| "loss": 0.4226, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5380564863571087, | |
| "grad_norm": 0.8425339460372925, | |
| "learning_rate": 4.8100300499963045e-06, | |
| "loss": 0.4126, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5399712781235041, | |
| "grad_norm": 0.7799105644226074, | |
| "learning_rate": 4.778387657623751e-06, | |
| "loss": 0.3768, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5418860698898995, | |
| "grad_norm": 0.8573192954063416, | |
| "learning_rate": 4.746754155806437e-06, | |
| "loss": 0.451, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5438008616562949, | |
| "grad_norm": 0.8153167366981506, | |
| "learning_rate": 4.715130813604522e-06, | |
| "loss": 0.3968, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5457156534226902, | |
| "grad_norm": 0.8407420516014099, | |
| "learning_rate": 4.683518899670594e-06, | |
| "loss": 0.392, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5476304451890857, | |
| "grad_norm": 0.8508596420288086, | |
| "learning_rate": 4.651919682198756e-06, | |
| "loss": 0.3945, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5495452369554811, | |
| "grad_norm": 0.8226655721664429, | |
| "learning_rate": 4.62033442887377e-06, | |
| "loss": 0.3993, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5514600287218765, | |
| "grad_norm": 0.8097487688064575, | |
| "learning_rate": 4.588764406820181e-06, | |
| "loss": 0.4303, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5533748204882719, | |
| "grad_norm": 0.7493626475334167, | |
| "learning_rate": 4.5572108825515e-06, | |
| "loss": 0.362, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5552896122546673, | |
| "grad_norm": 0.7713648676872253, | |
| "learning_rate": 4.5256751219193784e-06, | |
| "loss": 0.3906, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5572044040210627, | |
| "grad_norm": 0.8310909867286682, | |
| "learning_rate": 4.49415839006284e-06, | |
| "loss": 0.4041, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5591191957874582, | |
| "grad_norm": 0.8170990943908691, | |
| "learning_rate": 4.462661951357515e-06, | |
| "loss": 0.4054, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5610339875538535, | |
| "grad_norm": 0.862368643283844, | |
| "learning_rate": 4.431187069364927e-06, | |
| "loss": 0.4107, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.5629487793202489, | |
| "grad_norm": 0.8069734573364258, | |
| "learning_rate": 4.3997350067817866e-06, | |
| "loss": 0.3939, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5648635710866443, | |
| "grad_norm": 0.8641298413276672, | |
| "learning_rate": 4.368307025389355e-06, | |
| "loss": 0.4182, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5667783628530397, | |
| "grad_norm": 0.8040350079536438, | |
| "learning_rate": 4.336904386002805e-06, | |
| "loss": 0.3863, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5686931546194351, | |
| "grad_norm": 0.8322636485099792, | |
| "learning_rate": 4.3055283484206565e-06, | |
| "loss": 0.4228, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5706079463858306, | |
| "grad_norm": 0.7918723821640015, | |
| "learning_rate": 4.27418017137422e-06, | |
| "loss": 0.3749, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.572522738152226, | |
| "grad_norm": 0.7878877520561218, | |
| "learning_rate": 4.2428611124771184e-06, | |
| "loss": 0.3716, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5744375299186214, | |
| "grad_norm": 0.7795090675354004, | |
| "learning_rate": 4.211572428174816e-06, | |
| "loss": 0.3614, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5763523216850167, | |
| "grad_norm": 0.8057751655578613, | |
| "learning_rate": 4.180315373694225e-06, | |
| "loss": 0.4015, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5782671134514121, | |
| "grad_norm": 0.8051212430000305, | |
| "learning_rate": 4.149091202993345e-06, | |
| "loss": 0.3588, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5801819052178075, | |
| "grad_norm": 0.8171245455741882, | |
| "learning_rate": 4.11790116871096e-06, | |
| "loss": 0.417, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.582096696984203, | |
| "grad_norm": 0.8987613320350647, | |
| "learning_rate": 4.086746522116372e-06, | |
| "loss": 0.4536, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5840114887505984, | |
| "grad_norm": 0.7471241354942322, | |
| "learning_rate": 4.055628513059231e-06, | |
| "loss": 0.3866, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5859262805169938, | |
| "grad_norm": 0.828220009803772, | |
| "learning_rate": 4.02454838991936e-06, | |
| "loss": 0.3778, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5878410722833892, | |
| "grad_norm": 0.8547297120094299, | |
| "learning_rate": 3.993507399556699e-06, | |
| "loss": 0.4308, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.5897558640497846, | |
| "grad_norm": 0.8033933043479919, | |
| "learning_rate": 3.962506787261278e-06, | |
| "loss": 0.3993, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.59167065581618, | |
| "grad_norm": 0.7902593612670898, | |
| "learning_rate": 3.931547796703245e-06, | |
| "loss": 0.3794, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.5935854475825754, | |
| "grad_norm": 0.8059898018836975, | |
| "learning_rate": 3.900631669882996e-06, | |
| "loss": 0.3936, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5955002393489708, | |
| "grad_norm": 0.8180558681488037, | |
| "learning_rate": 3.869759647081326e-06, | |
| "loss": 0.3695, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.5974150311153662, | |
| "grad_norm": 0.7877086400985718, | |
| "learning_rate": 3.83893296680969e-06, | |
| "loss": 0.3838, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.5993298228817616, | |
| "grad_norm": 0.7896502614021301, | |
| "learning_rate": 3.8081528657605045e-06, | |
| "loss": 0.376, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.601244614648157, | |
| "grad_norm": 0.7718030214309692, | |
| "learning_rate": 3.7774205787575455e-06, | |
| "loss": 0.388, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6031594064145525, | |
| "grad_norm": 0.8119059205055237, | |
| "learning_rate": 3.7467373387063973e-06, | |
| "loss": 0.4241, | |
| "step": 315 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 522, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 105, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6526141583628698e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |