| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9940119760479043, | |
| "eval_steps": 500, | |
| "global_step": 13500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.022177866489243733, | |
| "grad_norm": 4.408344745635986, | |
| "learning_rate": 2.3645320197044334e-06, | |
| "loss": 1.7171, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04435573297848747, | |
| "grad_norm": 0.8857895135879517, | |
| "learning_rate": 4.8275862068965525e-06, | |
| "loss": 0.6279, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0665335994677312, | |
| "grad_norm": 2.241079807281494, | |
| "learning_rate": 7.290640394088671e-06, | |
| "loss": 0.5064, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08871146595697493, | |
| "grad_norm": 1.0983695983886719, | |
| "learning_rate": 9.75369458128079e-06, | |
| "loss": 0.4626, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11088933244621868, | |
| "grad_norm": 1.031287670135498, | |
| "learning_rate": 1.2216748768472909e-05, | |
| "loss": 0.423, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1330671989354624, | |
| "grad_norm": 1.961317777633667, | |
| "learning_rate": 1.4679802955665026e-05, | |
| "loss": 0.4184, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15524506542470615, | |
| "grad_norm": 1.3524340391159058, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.397, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17742293191394987, | |
| "grad_norm": 1.7465412616729736, | |
| "learning_rate": 1.9605911330049263e-05, | |
| "loss": 0.3795, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1996007984031936, | |
| "grad_norm": 1.1473759412765503, | |
| "learning_rate": 1.986787259142745e-05, | |
| "loss": 0.3657, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.22177866489243736, | |
| "grad_norm": 1.5489747524261475, | |
| "learning_rate": 1.9710578057412507e-05, | |
| "loss": 0.3726, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24395653138168108, | |
| "grad_norm": 1.2034003734588623, | |
| "learning_rate": 1.9553283523397563e-05, | |
| "loss": 0.3502, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2661343978709248, | |
| "grad_norm": 2.0690724849700928, | |
| "learning_rate": 1.939598898938262e-05, | |
| "loss": 0.3518, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.28831226436016855, | |
| "grad_norm": 1.9681050777435303, | |
| "learning_rate": 1.9238694455367677e-05, | |
| "loss": 0.3361, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3104901308494123, | |
| "grad_norm": 1.3863286972045898, | |
| "learning_rate": 1.9081399921352733e-05, | |
| "loss": 0.3397, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.33266799733865604, | |
| "grad_norm": 0.9572322964668274, | |
| "learning_rate": 1.8924105387337793e-05, | |
| "loss": 0.3339, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35484586382789973, | |
| "grad_norm": 0.892398476600647, | |
| "learning_rate": 1.8766810853322847e-05, | |
| "loss": 0.3218, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3770237303171435, | |
| "grad_norm": 1.2381540536880493, | |
| "learning_rate": 1.8609516319307907e-05, | |
| "loss": 0.3255, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3992015968063872, | |
| "grad_norm": 0.8742302060127258, | |
| "learning_rate": 1.8452221785292963e-05, | |
| "loss": 0.3166, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.421379463295631, | |
| "grad_norm": 1.0703165531158447, | |
| "learning_rate": 1.829492725127802e-05, | |
| "loss": 0.3098, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4435573297848747, | |
| "grad_norm": 1.6606981754302979, | |
| "learning_rate": 1.8137632717263076e-05, | |
| "loss": 0.3102, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4657351962741184, | |
| "grad_norm": 1.0174481868743896, | |
| "learning_rate": 1.7980338183248133e-05, | |
| "loss": 0.3061, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.48791306276336216, | |
| "grad_norm": 0.9234058856964111, | |
| "learning_rate": 1.7823043649233193e-05, | |
| "loss": 0.3023, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5100909292526059, | |
| "grad_norm": 0.8972137570381165, | |
| "learning_rate": 1.7665749115218246e-05, | |
| "loss": 0.3062, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5322687957418496, | |
| "grad_norm": 0.7803289890289307, | |
| "learning_rate": 1.7508454581203306e-05, | |
| "loss": 0.2996, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5544466622310934, | |
| "grad_norm": 0.879205584526062, | |
| "learning_rate": 1.7351160047188363e-05, | |
| "loss": 0.303, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5766245287203371, | |
| "grad_norm": 1.0589395761489868, | |
| "learning_rate": 1.719386551317342e-05, | |
| "loss": 0.2876, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5988023952095808, | |
| "grad_norm": 0.9810135960578918, | |
| "learning_rate": 1.7036570979158476e-05, | |
| "loss": 0.2841, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6209802616988246, | |
| "grad_norm": 0.835926353931427, | |
| "learning_rate": 1.6879276445143533e-05, | |
| "loss": 0.2861, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.6431581281880683, | |
| "grad_norm": 0.9618144631385803, | |
| "learning_rate": 1.672198191112859e-05, | |
| "loss": 0.2881, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6653359946773121, | |
| "grad_norm": 1.2271337509155273, | |
| "learning_rate": 1.6564687377113646e-05, | |
| "loss": 0.2795, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6875138611665558, | |
| "grad_norm": 0.933788537979126, | |
| "learning_rate": 1.6407392843098702e-05, | |
| "loss": 0.2758, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7096917276557995, | |
| "grad_norm": 1.3361326456069946, | |
| "learning_rate": 1.6250098309083762e-05, | |
| "loss": 0.2755, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7318695941450433, | |
| "grad_norm": 0.9134598970413208, | |
| "learning_rate": 1.6092803775068816e-05, | |
| "loss": 0.2693, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.754047460634287, | |
| "grad_norm": 0.8436419367790222, | |
| "learning_rate": 1.5935509241053876e-05, | |
| "loss": 0.2709, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7762253271235308, | |
| "grad_norm": 0.7325775623321533, | |
| "learning_rate": 1.5778214707038932e-05, | |
| "loss": 0.2766, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7984031936127745, | |
| "grad_norm": 0.9576388597488403, | |
| "learning_rate": 1.562092017302399e-05, | |
| "loss": 0.2683, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8205810601020181, | |
| "grad_norm": 0.812353789806366, | |
| "learning_rate": 1.5463625639009045e-05, | |
| "loss": 0.2643, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.842758926591262, | |
| "grad_norm": 1.00551176071167, | |
| "learning_rate": 1.5306331104994102e-05, | |
| "loss": 0.2696, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.8649367930805056, | |
| "grad_norm": 0.7504218816757202, | |
| "learning_rate": 1.5149036570979159e-05, | |
| "loss": 0.262, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8871146595697494, | |
| "grad_norm": 0.6838926076889038, | |
| "learning_rate": 1.4991742036964217e-05, | |
| "loss": 0.2582, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9092925260589931, | |
| "grad_norm": 0.9068514108657837, | |
| "learning_rate": 1.4834447502949274e-05, | |
| "loss": 0.2613, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.9314703925482368, | |
| "grad_norm": 0.8156359791755676, | |
| "learning_rate": 1.4677152968934332e-05, | |
| "loss": 0.2575, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9536482590374806, | |
| "grad_norm": 0.8061220049858093, | |
| "learning_rate": 1.4519858434919387e-05, | |
| "loss": 0.2512, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.9758261255267243, | |
| "grad_norm": 0.7665420174598694, | |
| "learning_rate": 1.4362563900904445e-05, | |
| "loss": 0.2551, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.998003992015968, | |
| "grad_norm": 1.094953179359436, | |
| "learning_rate": 1.4205269366889502e-05, | |
| "loss": 0.2515, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.0201818585052118, | |
| "grad_norm": 1.0698802471160889, | |
| "learning_rate": 1.4047974832874558e-05, | |
| "loss": 0.2425, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0423597249944556, | |
| "grad_norm": 0.9805143475532532, | |
| "learning_rate": 1.3890680298859615e-05, | |
| "loss": 0.2353, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.0645375914836992, | |
| "grad_norm": 1.0466519594192505, | |
| "learning_rate": 1.3733385764844673e-05, | |
| "loss": 0.2449, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.086715457972943, | |
| "grad_norm": 0.9419561624526978, | |
| "learning_rate": 1.3576091230829728e-05, | |
| "loss": 0.2362, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.1088933244621868, | |
| "grad_norm": 0.9370637536048889, | |
| "learning_rate": 1.3418796696814786e-05, | |
| "loss": 0.2327, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.1310711909514304, | |
| "grad_norm": 0.7672102451324463, | |
| "learning_rate": 1.3261502162799845e-05, | |
| "loss": 0.2337, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.1532490574406742, | |
| "grad_norm": 1.0745601654052734, | |
| "learning_rate": 1.3104207628784901e-05, | |
| "loss": 0.24, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.175426923929918, | |
| "grad_norm": 1.0820897817611694, | |
| "learning_rate": 1.2946913094769958e-05, | |
| "loss": 0.2271, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.1976047904191618, | |
| "grad_norm": 1.155911922454834, | |
| "learning_rate": 1.2789618560755015e-05, | |
| "loss": 0.2361, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.2197826569084054, | |
| "grad_norm": 0.9654746651649475, | |
| "learning_rate": 1.2632324026740073e-05, | |
| "loss": 0.2389, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.2419605233976492, | |
| "grad_norm": 1.0573245286941528, | |
| "learning_rate": 1.2475029492725128e-05, | |
| "loss": 0.2264, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.264138389886893, | |
| "grad_norm": 1.3749500513076782, | |
| "learning_rate": 1.2317734958710186e-05, | |
| "loss": 0.229, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.2863162563761366, | |
| "grad_norm": 0.9389622211456299, | |
| "learning_rate": 1.2160440424695243e-05, | |
| "loss": 0.2277, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.3084941228653804, | |
| "grad_norm": 1.2547938823699951, | |
| "learning_rate": 1.20031458906803e-05, | |
| "loss": 0.2265, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.3306719893546242, | |
| "grad_norm": 1.1487092971801758, | |
| "learning_rate": 1.1845851356665356e-05, | |
| "loss": 0.2266, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.3528498558438677, | |
| "grad_norm": 0.6461149454116821, | |
| "learning_rate": 1.1688556822650414e-05, | |
| "loss": 0.2235, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.3750277223331115, | |
| "grad_norm": 0.8437641859054565, | |
| "learning_rate": 1.1531262288635473e-05, | |
| "loss": 0.2266, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.3972055888223553, | |
| "grad_norm": 0.8984001278877258, | |
| "learning_rate": 1.1373967754620527e-05, | |
| "loss": 0.2195, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.419383455311599, | |
| "grad_norm": 1.1755112409591675, | |
| "learning_rate": 1.1216673220605586e-05, | |
| "loss": 0.2168, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.4415613218008427, | |
| "grad_norm": 1.250999927520752, | |
| "learning_rate": 1.1059378686590642e-05, | |
| "loss": 0.2214, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.4637391882900865, | |
| "grad_norm": 1.2418690919876099, | |
| "learning_rate": 1.0902084152575699e-05, | |
| "loss": 0.2196, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.4859170547793301, | |
| "grad_norm": 0.9416905641555786, | |
| "learning_rate": 1.0744789618560756e-05, | |
| "loss": 0.2237, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.508094921268574, | |
| "grad_norm": 0.9549462199211121, | |
| "learning_rate": 1.0587495084545814e-05, | |
| "loss": 0.2231, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.5302727877578177, | |
| "grad_norm": 0.9897739291191101, | |
| "learning_rate": 1.0430200550530869e-05, | |
| "loss": 0.221, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.5524506542470613, | |
| "grad_norm": 1.0174314975738525, | |
| "learning_rate": 1.0272906016515927e-05, | |
| "loss": 0.2193, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.5746285207363053, | |
| "grad_norm": 0.8986598253250122, | |
| "learning_rate": 1.0115611482500984e-05, | |
| "loss": 0.2114, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.596806387225549, | |
| "grad_norm": 0.7662016749382019, | |
| "learning_rate": 9.95831694848604e-06, | |
| "loss": 0.2162, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.6189842537147925, | |
| "grad_norm": 0.875023603439331, | |
| "learning_rate": 9.801022414471097e-06, | |
| "loss": 0.2093, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.6411621202040365, | |
| "grad_norm": 1.059648036956787, | |
| "learning_rate": 9.643727880456155e-06, | |
| "loss": 0.2114, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.66333998669328, | |
| "grad_norm": 1.2008799314498901, | |
| "learning_rate": 9.486433346441212e-06, | |
| "loss": 0.2129, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.685517853182524, | |
| "grad_norm": 1.009397029876709, | |
| "learning_rate": 9.32913881242627e-06, | |
| "loss": 0.2069, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.7076957196717677, | |
| "grad_norm": 0.9461073875427246, | |
| "learning_rate": 9.171844278411327e-06, | |
| "loss": 0.2109, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.7298735861610113, | |
| "grad_norm": 0.7946839332580566, | |
| "learning_rate": 9.014549744396383e-06, | |
| "loss": 0.2051, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.752051452650255, | |
| "grad_norm": 1.0686787366867065, | |
| "learning_rate": 8.85725521038144e-06, | |
| "loss": 0.2114, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.7742293191394989, | |
| "grad_norm": 1.1309982538223267, | |
| "learning_rate": 8.699960676366497e-06, | |
| "loss": 0.2113, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.7964071856287425, | |
| "grad_norm": 0.8873094320297241, | |
| "learning_rate": 8.542666142351555e-06, | |
| "loss": 0.2032, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.8185850521179863, | |
| "grad_norm": 1.1685720682144165, | |
| "learning_rate": 8.385371608336611e-06, | |
| "loss": 0.2046, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.84076291860723, | |
| "grad_norm": 1.1391305923461914, | |
| "learning_rate": 8.228077074321668e-06, | |
| "loss": 0.2059, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.8629407850964737, | |
| "grad_norm": 1.0028046369552612, | |
| "learning_rate": 8.070782540306725e-06, | |
| "loss": 0.2051, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.8851186515857175, | |
| "grad_norm": 1.3470697402954102, | |
| "learning_rate": 7.913488006291781e-06, | |
| "loss": 0.2059, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.9072965180749613, | |
| "grad_norm": 1.290456771850586, | |
| "learning_rate": 7.75619347227684e-06, | |
| "loss": 0.1995, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.9294743845642048, | |
| "grad_norm": 0.7506065964698792, | |
| "learning_rate": 7.598898938261896e-06, | |
| "loss": 0.2011, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.9516522510534486, | |
| "grad_norm": 1.170919418334961, | |
| "learning_rate": 7.441604404246953e-06, | |
| "loss": 0.2017, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.9738301175426924, | |
| "grad_norm": 1.1888222694396973, | |
| "learning_rate": 7.28430987023201e-06, | |
| "loss": 0.1998, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.996007984031936, | |
| "grad_norm": 1.1401287317276, | |
| "learning_rate": 7.127015336217067e-06, | |
| "loss": 0.1996, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.01818585052118, | |
| "grad_norm": 1.0609304904937744, | |
| "learning_rate": 6.969720802202124e-06, | |
| "loss": 0.194, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.0403637170104236, | |
| "grad_norm": 0.7136222124099731, | |
| "learning_rate": 6.812426268187181e-06, | |
| "loss": 0.1907, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.062541583499667, | |
| "grad_norm": 0.9201442003250122, | |
| "learning_rate": 6.6551317341722375e-06, | |
| "loss": 0.1899, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.0847194499889112, | |
| "grad_norm": 1.034180998802185, | |
| "learning_rate": 6.497837200157295e-06, | |
| "loss": 0.1905, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.106897316478155, | |
| "grad_norm": 1.2538888454437256, | |
| "learning_rate": 6.340542666142352e-06, | |
| "loss": 0.1895, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.1290751829673984, | |
| "grad_norm": 1.1865867376327515, | |
| "learning_rate": 6.18324813212741e-06, | |
| "loss": 0.1903, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.1512530494566424, | |
| "grad_norm": 1.1879113912582397, | |
| "learning_rate": 6.0259535981124665e-06, | |
| "loss": 0.1827, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.173430915945886, | |
| "grad_norm": 0.959338903427124, | |
| "learning_rate": 5.868659064097523e-06, | |
| "loss": 0.1871, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.1956087824351296, | |
| "grad_norm": 1.0765694379806519, | |
| "learning_rate": 5.7113645300825806e-06, | |
| "loss": 0.1904, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.2177866489243736, | |
| "grad_norm": 1.1562960147857666, | |
| "learning_rate": 5.554069996067637e-06, | |
| "loss": 0.1852, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.239964515413617, | |
| "grad_norm": 1.1772807836532593, | |
| "learning_rate": 5.396775462052695e-06, | |
| "loss": 0.1875, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.2621423819028608, | |
| "grad_norm": 0.9771366715431213, | |
| "learning_rate": 5.239480928037751e-06, | |
| "loss": 0.1899, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.284320248392105, | |
| "grad_norm": 0.7828590273857117, | |
| "learning_rate": 5.082186394022808e-06, | |
| "loss": 0.1846, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.3064981148813484, | |
| "grad_norm": 1.0688682794570923, | |
| "learning_rate": 4.924891860007865e-06, | |
| "loss": 0.186, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.3286759813705924, | |
| "grad_norm": 1.2667362689971924, | |
| "learning_rate": 4.767597325992922e-06, | |
| "loss": 0.186, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.350853847859836, | |
| "grad_norm": 0.9742441177368164, | |
| "learning_rate": 4.610302791977979e-06, | |
| "loss": 0.1822, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.3730317143490796, | |
| "grad_norm": 0.8631011843681335, | |
| "learning_rate": 4.453008257963036e-06, | |
| "loss": 0.1789, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.3952095808383236, | |
| "grad_norm": 0.7579483985900879, | |
| "learning_rate": 4.2957137239480934e-06, | |
| "loss": 0.1865, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.417387447327567, | |
| "grad_norm": 0.8615408539772034, | |
| "learning_rate": 4.13841918993315e-06, | |
| "loss": 0.1805, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.4395653138168107, | |
| "grad_norm": 1.0644463300704956, | |
| "learning_rate": 3.9811246559182075e-06, | |
| "loss": 0.1849, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.4617431803060548, | |
| "grad_norm": 0.9933910965919495, | |
| "learning_rate": 3.823830121903264e-06, | |
| "loss": 0.1846, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.4839210467952983, | |
| "grad_norm": 1.011958360671997, | |
| "learning_rate": 3.666535587888321e-06, | |
| "loss": 0.1863, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.506098913284542, | |
| "grad_norm": 1.0306683778762817, | |
| "learning_rate": 3.5092410538733786e-06, | |
| "loss": 0.1853, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.528276779773786, | |
| "grad_norm": 1.0129719972610474, | |
| "learning_rate": 3.351946519858435e-06, | |
| "loss": 0.1855, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.5504546462630295, | |
| "grad_norm": 1.0215705633163452, | |
| "learning_rate": 3.1946519858434922e-06, | |
| "loss": 0.1867, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.572632512752273, | |
| "grad_norm": 1.202038288116455, | |
| "learning_rate": 3.0373574518285493e-06, | |
| "loss": 0.1839, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.594810379241517, | |
| "grad_norm": 1.19171142578125, | |
| "learning_rate": 2.8800629178136063e-06, | |
| "loss": 0.1776, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.6169882457307607, | |
| "grad_norm": 1.0898429155349731, | |
| "learning_rate": 2.7227683837986633e-06, | |
| "loss": 0.178, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.6391661122200043, | |
| "grad_norm": 1.005279779434204, | |
| "learning_rate": 2.56547384978372e-06, | |
| "loss": 0.1811, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.6613439787092483, | |
| "grad_norm": 1.0780277252197266, | |
| "learning_rate": 2.408179315768777e-06, | |
| "loss": 0.1831, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.683521845198492, | |
| "grad_norm": 1.318746566772461, | |
| "learning_rate": 2.252457727093984e-06, | |
| "loss": 0.1835, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.7056997116877355, | |
| "grad_norm": 1.289838433265686, | |
| "learning_rate": 2.0951631930790405e-06, | |
| "loss": 0.1813, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.7278775781769795, | |
| "grad_norm": 0.806324303150177, | |
| "learning_rate": 1.9378686590640976e-06, | |
| "loss": 0.1778, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.750055444666223, | |
| "grad_norm": 1.2230814695358276, | |
| "learning_rate": 1.7805741250491546e-06, | |
| "loss": 0.1797, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.7722333111554667, | |
| "grad_norm": 1.0323050022125244, | |
| "learning_rate": 1.6232795910342116e-06, | |
| "loss": 0.1832, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.7944111776447107, | |
| "grad_norm": 0.9353643655776978, | |
| "learning_rate": 1.4659850570192689e-06, | |
| "loss": 0.1828, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.8165890441339543, | |
| "grad_norm": 0.8385490775108337, | |
| "learning_rate": 1.3086905230043257e-06, | |
| "loss": 0.1763, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.838766910623198, | |
| "grad_norm": 0.9432787299156189, | |
| "learning_rate": 1.1513959889893827e-06, | |
| "loss": 0.18, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.860944777112442, | |
| "grad_norm": 1.0854963064193726, | |
| "learning_rate": 9.941014549744397e-07, | |
| "loss": 0.1786, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.8831226436016855, | |
| "grad_norm": 1.0914461612701416, | |
| "learning_rate": 8.368069209594968e-07, | |
| "loss": 0.1804, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.905300510090929, | |
| "grad_norm": 0.8744707703590393, | |
| "learning_rate": 6.795123869445537e-07, | |
| "loss": 0.1776, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.927478376580173, | |
| "grad_norm": 1.073390245437622, | |
| "learning_rate": 5.222178529296107e-07, | |
| "loss": 0.1797, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.9496562430694167, | |
| "grad_norm": 1.0887576341629028, | |
| "learning_rate": 3.6492331891466777e-07, | |
| "loss": 0.1791, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.9718341095586602, | |
| "grad_norm": 1.3841413259506226, | |
| "learning_rate": 2.0762878489972477e-07, | |
| "loss": 0.1792, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.9940119760479043, | |
| "grad_norm": 1.0988340377807617, | |
| "learning_rate": 5.033425088478176e-08, | |
| "loss": 0.1834, | |
| "step": 13500 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 13527, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.765106604499366e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |