deberta-v3-base-end2end-absa / trainer_state.json
yangheng's picture
Upload 9 files
11fc638 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9940119760479043,
"eval_steps": 500,
"global_step": 13500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022177866489243733,
"grad_norm": 4.408344745635986,
"learning_rate": 2.3645320197044334e-06,
"loss": 1.7171,
"step": 100
},
{
"epoch": 0.04435573297848747,
"grad_norm": 0.8857895135879517,
"learning_rate": 4.8275862068965525e-06,
"loss": 0.6279,
"step": 200
},
{
"epoch": 0.0665335994677312,
"grad_norm": 2.241079807281494,
"learning_rate": 7.290640394088671e-06,
"loss": 0.5064,
"step": 300
},
{
"epoch": 0.08871146595697493,
"grad_norm": 1.0983695983886719,
"learning_rate": 9.75369458128079e-06,
"loss": 0.4626,
"step": 400
},
{
"epoch": 0.11088933244621868,
"grad_norm": 1.031287670135498,
"learning_rate": 1.2216748768472909e-05,
"loss": 0.423,
"step": 500
},
{
"epoch": 0.1330671989354624,
"grad_norm": 1.961317777633667,
"learning_rate": 1.4679802955665026e-05,
"loss": 0.4184,
"step": 600
},
{
"epoch": 0.15524506542470615,
"grad_norm": 1.3524340391159058,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.397,
"step": 700
},
{
"epoch": 0.17742293191394987,
"grad_norm": 1.7465412616729736,
"learning_rate": 1.9605911330049263e-05,
"loss": 0.3795,
"step": 800
},
{
"epoch": 0.1996007984031936,
"grad_norm": 1.1473759412765503,
"learning_rate": 1.986787259142745e-05,
"loss": 0.3657,
"step": 900
},
{
"epoch": 0.22177866489243736,
"grad_norm": 1.5489747524261475,
"learning_rate": 1.9710578057412507e-05,
"loss": 0.3726,
"step": 1000
},
{
"epoch": 0.24395653138168108,
"grad_norm": 1.2034003734588623,
"learning_rate": 1.9553283523397563e-05,
"loss": 0.3502,
"step": 1100
},
{
"epoch": 0.2661343978709248,
"grad_norm": 2.0690724849700928,
"learning_rate": 1.939598898938262e-05,
"loss": 0.3518,
"step": 1200
},
{
"epoch": 0.28831226436016855,
"grad_norm": 1.9681050777435303,
"learning_rate": 1.9238694455367677e-05,
"loss": 0.3361,
"step": 1300
},
{
"epoch": 0.3104901308494123,
"grad_norm": 1.3863286972045898,
"learning_rate": 1.9081399921352733e-05,
"loss": 0.3397,
"step": 1400
},
{
"epoch": 0.33266799733865604,
"grad_norm": 0.9572322964668274,
"learning_rate": 1.8924105387337793e-05,
"loss": 0.3339,
"step": 1500
},
{
"epoch": 0.35484586382789973,
"grad_norm": 0.892398476600647,
"learning_rate": 1.8766810853322847e-05,
"loss": 0.3218,
"step": 1600
},
{
"epoch": 0.3770237303171435,
"grad_norm": 1.2381540536880493,
"learning_rate": 1.8609516319307907e-05,
"loss": 0.3255,
"step": 1700
},
{
"epoch": 0.3992015968063872,
"grad_norm": 0.8742302060127258,
"learning_rate": 1.8452221785292963e-05,
"loss": 0.3166,
"step": 1800
},
{
"epoch": 0.421379463295631,
"grad_norm": 1.0703165531158447,
"learning_rate": 1.829492725127802e-05,
"loss": 0.3098,
"step": 1900
},
{
"epoch": 0.4435573297848747,
"grad_norm": 1.6606981754302979,
"learning_rate": 1.8137632717263076e-05,
"loss": 0.3102,
"step": 2000
},
{
"epoch": 0.4657351962741184,
"grad_norm": 1.0174481868743896,
"learning_rate": 1.7980338183248133e-05,
"loss": 0.3061,
"step": 2100
},
{
"epoch": 0.48791306276336216,
"grad_norm": 0.9234058856964111,
"learning_rate": 1.7823043649233193e-05,
"loss": 0.3023,
"step": 2200
},
{
"epoch": 0.5100909292526059,
"grad_norm": 0.8972137570381165,
"learning_rate": 1.7665749115218246e-05,
"loss": 0.3062,
"step": 2300
},
{
"epoch": 0.5322687957418496,
"grad_norm": 0.7803289890289307,
"learning_rate": 1.7508454581203306e-05,
"loss": 0.2996,
"step": 2400
},
{
"epoch": 0.5544466622310934,
"grad_norm": 0.879205584526062,
"learning_rate": 1.7351160047188363e-05,
"loss": 0.303,
"step": 2500
},
{
"epoch": 0.5766245287203371,
"grad_norm": 1.0589395761489868,
"learning_rate": 1.719386551317342e-05,
"loss": 0.2876,
"step": 2600
},
{
"epoch": 0.5988023952095808,
"grad_norm": 0.9810135960578918,
"learning_rate": 1.7036570979158476e-05,
"loss": 0.2841,
"step": 2700
},
{
"epoch": 0.6209802616988246,
"grad_norm": 0.835926353931427,
"learning_rate": 1.6879276445143533e-05,
"loss": 0.2861,
"step": 2800
},
{
"epoch": 0.6431581281880683,
"grad_norm": 0.9618144631385803,
"learning_rate": 1.672198191112859e-05,
"loss": 0.2881,
"step": 2900
},
{
"epoch": 0.6653359946773121,
"grad_norm": 1.2271337509155273,
"learning_rate": 1.6564687377113646e-05,
"loss": 0.2795,
"step": 3000
},
{
"epoch": 0.6875138611665558,
"grad_norm": 0.933788537979126,
"learning_rate": 1.6407392843098702e-05,
"loss": 0.2758,
"step": 3100
},
{
"epoch": 0.7096917276557995,
"grad_norm": 1.3361326456069946,
"learning_rate": 1.6250098309083762e-05,
"loss": 0.2755,
"step": 3200
},
{
"epoch": 0.7318695941450433,
"grad_norm": 0.9134598970413208,
"learning_rate": 1.6092803775068816e-05,
"loss": 0.2693,
"step": 3300
},
{
"epoch": 0.754047460634287,
"grad_norm": 0.8436419367790222,
"learning_rate": 1.5935509241053876e-05,
"loss": 0.2709,
"step": 3400
},
{
"epoch": 0.7762253271235308,
"grad_norm": 0.7325775623321533,
"learning_rate": 1.5778214707038932e-05,
"loss": 0.2766,
"step": 3500
},
{
"epoch": 0.7984031936127745,
"grad_norm": 0.9576388597488403,
"learning_rate": 1.562092017302399e-05,
"loss": 0.2683,
"step": 3600
},
{
"epoch": 0.8205810601020181,
"grad_norm": 0.812353789806366,
"learning_rate": 1.5463625639009045e-05,
"loss": 0.2643,
"step": 3700
},
{
"epoch": 0.842758926591262,
"grad_norm": 1.00551176071167,
"learning_rate": 1.5306331104994102e-05,
"loss": 0.2696,
"step": 3800
},
{
"epoch": 0.8649367930805056,
"grad_norm": 0.7504218816757202,
"learning_rate": 1.5149036570979159e-05,
"loss": 0.262,
"step": 3900
},
{
"epoch": 0.8871146595697494,
"grad_norm": 0.6838926076889038,
"learning_rate": 1.4991742036964217e-05,
"loss": 0.2582,
"step": 4000
},
{
"epoch": 0.9092925260589931,
"grad_norm": 0.9068514108657837,
"learning_rate": 1.4834447502949274e-05,
"loss": 0.2613,
"step": 4100
},
{
"epoch": 0.9314703925482368,
"grad_norm": 0.8156359791755676,
"learning_rate": 1.4677152968934332e-05,
"loss": 0.2575,
"step": 4200
},
{
"epoch": 0.9536482590374806,
"grad_norm": 0.8061220049858093,
"learning_rate": 1.4519858434919387e-05,
"loss": 0.2512,
"step": 4300
},
{
"epoch": 0.9758261255267243,
"grad_norm": 0.7665420174598694,
"learning_rate": 1.4362563900904445e-05,
"loss": 0.2551,
"step": 4400
},
{
"epoch": 0.998003992015968,
"grad_norm": 1.094953179359436,
"learning_rate": 1.4205269366889502e-05,
"loss": 0.2515,
"step": 4500
},
{
"epoch": 1.0201818585052118,
"grad_norm": 1.0698802471160889,
"learning_rate": 1.4047974832874558e-05,
"loss": 0.2425,
"step": 4600
},
{
"epoch": 1.0423597249944556,
"grad_norm": 0.9805143475532532,
"learning_rate": 1.3890680298859615e-05,
"loss": 0.2353,
"step": 4700
},
{
"epoch": 1.0645375914836992,
"grad_norm": 1.0466519594192505,
"learning_rate": 1.3733385764844673e-05,
"loss": 0.2449,
"step": 4800
},
{
"epoch": 1.086715457972943,
"grad_norm": 0.9419561624526978,
"learning_rate": 1.3576091230829728e-05,
"loss": 0.2362,
"step": 4900
},
{
"epoch": 1.1088933244621868,
"grad_norm": 0.9370637536048889,
"learning_rate": 1.3418796696814786e-05,
"loss": 0.2327,
"step": 5000
},
{
"epoch": 1.1310711909514304,
"grad_norm": 0.7672102451324463,
"learning_rate": 1.3261502162799845e-05,
"loss": 0.2337,
"step": 5100
},
{
"epoch": 1.1532490574406742,
"grad_norm": 1.0745601654052734,
"learning_rate": 1.3104207628784901e-05,
"loss": 0.24,
"step": 5200
},
{
"epoch": 1.175426923929918,
"grad_norm": 1.0820897817611694,
"learning_rate": 1.2946913094769958e-05,
"loss": 0.2271,
"step": 5300
},
{
"epoch": 1.1976047904191618,
"grad_norm": 1.155911922454834,
"learning_rate": 1.2789618560755015e-05,
"loss": 0.2361,
"step": 5400
},
{
"epoch": 1.2197826569084054,
"grad_norm": 0.9654746651649475,
"learning_rate": 1.2632324026740073e-05,
"loss": 0.2389,
"step": 5500
},
{
"epoch": 1.2419605233976492,
"grad_norm": 1.0573245286941528,
"learning_rate": 1.2475029492725128e-05,
"loss": 0.2264,
"step": 5600
},
{
"epoch": 1.264138389886893,
"grad_norm": 1.3749500513076782,
"learning_rate": 1.2317734958710186e-05,
"loss": 0.229,
"step": 5700
},
{
"epoch": 1.2863162563761366,
"grad_norm": 0.9389622211456299,
"learning_rate": 1.2160440424695243e-05,
"loss": 0.2277,
"step": 5800
},
{
"epoch": 1.3084941228653804,
"grad_norm": 1.2547938823699951,
"learning_rate": 1.20031458906803e-05,
"loss": 0.2265,
"step": 5900
},
{
"epoch": 1.3306719893546242,
"grad_norm": 1.1487092971801758,
"learning_rate": 1.1845851356665356e-05,
"loss": 0.2266,
"step": 6000
},
{
"epoch": 1.3528498558438677,
"grad_norm": 0.6461149454116821,
"learning_rate": 1.1688556822650414e-05,
"loss": 0.2235,
"step": 6100
},
{
"epoch": 1.3750277223331115,
"grad_norm": 0.8437641859054565,
"learning_rate": 1.1531262288635473e-05,
"loss": 0.2266,
"step": 6200
},
{
"epoch": 1.3972055888223553,
"grad_norm": 0.8984001278877258,
"learning_rate": 1.1373967754620527e-05,
"loss": 0.2195,
"step": 6300
},
{
"epoch": 1.419383455311599,
"grad_norm": 1.1755112409591675,
"learning_rate": 1.1216673220605586e-05,
"loss": 0.2168,
"step": 6400
},
{
"epoch": 1.4415613218008427,
"grad_norm": 1.250999927520752,
"learning_rate": 1.1059378686590642e-05,
"loss": 0.2214,
"step": 6500
},
{
"epoch": 1.4637391882900865,
"grad_norm": 1.2418690919876099,
"learning_rate": 1.0902084152575699e-05,
"loss": 0.2196,
"step": 6600
},
{
"epoch": 1.4859170547793301,
"grad_norm": 0.9416905641555786,
"learning_rate": 1.0744789618560756e-05,
"loss": 0.2237,
"step": 6700
},
{
"epoch": 1.508094921268574,
"grad_norm": 0.9549462199211121,
"learning_rate": 1.0587495084545814e-05,
"loss": 0.2231,
"step": 6800
},
{
"epoch": 1.5302727877578177,
"grad_norm": 0.9897739291191101,
"learning_rate": 1.0430200550530869e-05,
"loss": 0.221,
"step": 6900
},
{
"epoch": 1.5524506542470613,
"grad_norm": 1.0174314975738525,
"learning_rate": 1.0272906016515927e-05,
"loss": 0.2193,
"step": 7000
},
{
"epoch": 1.5746285207363053,
"grad_norm": 0.8986598253250122,
"learning_rate": 1.0115611482500984e-05,
"loss": 0.2114,
"step": 7100
},
{
"epoch": 1.596806387225549,
"grad_norm": 0.7662016749382019,
"learning_rate": 9.95831694848604e-06,
"loss": 0.2162,
"step": 7200
},
{
"epoch": 1.6189842537147925,
"grad_norm": 0.875023603439331,
"learning_rate": 9.801022414471097e-06,
"loss": 0.2093,
"step": 7300
},
{
"epoch": 1.6411621202040365,
"grad_norm": 1.059648036956787,
"learning_rate": 9.643727880456155e-06,
"loss": 0.2114,
"step": 7400
},
{
"epoch": 1.66333998669328,
"grad_norm": 1.2008799314498901,
"learning_rate": 9.486433346441212e-06,
"loss": 0.2129,
"step": 7500
},
{
"epoch": 1.685517853182524,
"grad_norm": 1.009397029876709,
"learning_rate": 9.32913881242627e-06,
"loss": 0.2069,
"step": 7600
},
{
"epoch": 1.7076957196717677,
"grad_norm": 0.9461073875427246,
"learning_rate": 9.171844278411327e-06,
"loss": 0.2109,
"step": 7700
},
{
"epoch": 1.7298735861610113,
"grad_norm": 0.7946839332580566,
"learning_rate": 9.014549744396383e-06,
"loss": 0.2051,
"step": 7800
},
{
"epoch": 1.752051452650255,
"grad_norm": 1.0686787366867065,
"learning_rate": 8.85725521038144e-06,
"loss": 0.2114,
"step": 7900
},
{
"epoch": 1.7742293191394989,
"grad_norm": 1.1309982538223267,
"learning_rate": 8.699960676366497e-06,
"loss": 0.2113,
"step": 8000
},
{
"epoch": 1.7964071856287425,
"grad_norm": 0.8873094320297241,
"learning_rate": 8.542666142351555e-06,
"loss": 0.2032,
"step": 8100
},
{
"epoch": 1.8185850521179863,
"grad_norm": 1.1685720682144165,
"learning_rate": 8.385371608336611e-06,
"loss": 0.2046,
"step": 8200
},
{
"epoch": 1.84076291860723,
"grad_norm": 1.1391305923461914,
"learning_rate": 8.228077074321668e-06,
"loss": 0.2059,
"step": 8300
},
{
"epoch": 1.8629407850964737,
"grad_norm": 1.0028046369552612,
"learning_rate": 8.070782540306725e-06,
"loss": 0.2051,
"step": 8400
},
{
"epoch": 1.8851186515857175,
"grad_norm": 1.3470697402954102,
"learning_rate": 7.913488006291781e-06,
"loss": 0.2059,
"step": 8500
},
{
"epoch": 1.9072965180749613,
"grad_norm": 1.290456771850586,
"learning_rate": 7.75619347227684e-06,
"loss": 0.1995,
"step": 8600
},
{
"epoch": 1.9294743845642048,
"grad_norm": 0.7506065964698792,
"learning_rate": 7.598898938261896e-06,
"loss": 0.2011,
"step": 8700
},
{
"epoch": 1.9516522510534486,
"grad_norm": 1.170919418334961,
"learning_rate": 7.441604404246953e-06,
"loss": 0.2017,
"step": 8800
},
{
"epoch": 1.9738301175426924,
"grad_norm": 1.1888222694396973,
"learning_rate": 7.28430987023201e-06,
"loss": 0.1998,
"step": 8900
},
{
"epoch": 1.996007984031936,
"grad_norm": 1.1401287317276,
"learning_rate": 7.127015336217067e-06,
"loss": 0.1996,
"step": 9000
},
{
"epoch": 2.01818585052118,
"grad_norm": 1.0609304904937744,
"learning_rate": 6.969720802202124e-06,
"loss": 0.194,
"step": 9100
},
{
"epoch": 2.0403637170104236,
"grad_norm": 0.7136222124099731,
"learning_rate": 6.812426268187181e-06,
"loss": 0.1907,
"step": 9200
},
{
"epoch": 2.062541583499667,
"grad_norm": 0.9201442003250122,
"learning_rate": 6.6551317341722375e-06,
"loss": 0.1899,
"step": 9300
},
{
"epoch": 2.0847194499889112,
"grad_norm": 1.034180998802185,
"learning_rate": 6.497837200157295e-06,
"loss": 0.1905,
"step": 9400
},
{
"epoch": 2.106897316478155,
"grad_norm": 1.2538888454437256,
"learning_rate": 6.340542666142352e-06,
"loss": 0.1895,
"step": 9500
},
{
"epoch": 2.1290751829673984,
"grad_norm": 1.1865867376327515,
"learning_rate": 6.18324813212741e-06,
"loss": 0.1903,
"step": 9600
},
{
"epoch": 2.1512530494566424,
"grad_norm": 1.1879113912582397,
"learning_rate": 6.0259535981124665e-06,
"loss": 0.1827,
"step": 9700
},
{
"epoch": 2.173430915945886,
"grad_norm": 0.959338903427124,
"learning_rate": 5.868659064097523e-06,
"loss": 0.1871,
"step": 9800
},
{
"epoch": 2.1956087824351296,
"grad_norm": 1.0765694379806519,
"learning_rate": 5.7113645300825806e-06,
"loss": 0.1904,
"step": 9900
},
{
"epoch": 2.2177866489243736,
"grad_norm": 1.1562960147857666,
"learning_rate": 5.554069996067637e-06,
"loss": 0.1852,
"step": 10000
},
{
"epoch": 2.239964515413617,
"grad_norm": 1.1772807836532593,
"learning_rate": 5.396775462052695e-06,
"loss": 0.1875,
"step": 10100
},
{
"epoch": 2.2621423819028608,
"grad_norm": 0.9771366715431213,
"learning_rate": 5.239480928037751e-06,
"loss": 0.1899,
"step": 10200
},
{
"epoch": 2.284320248392105,
"grad_norm": 0.7828590273857117,
"learning_rate": 5.082186394022808e-06,
"loss": 0.1846,
"step": 10300
},
{
"epoch": 2.3064981148813484,
"grad_norm": 1.0688682794570923,
"learning_rate": 4.924891860007865e-06,
"loss": 0.186,
"step": 10400
},
{
"epoch": 2.3286759813705924,
"grad_norm": 1.2667362689971924,
"learning_rate": 4.767597325992922e-06,
"loss": 0.186,
"step": 10500
},
{
"epoch": 2.350853847859836,
"grad_norm": 0.9742441177368164,
"learning_rate": 4.610302791977979e-06,
"loss": 0.1822,
"step": 10600
},
{
"epoch": 2.3730317143490796,
"grad_norm": 0.8631011843681335,
"learning_rate": 4.453008257963036e-06,
"loss": 0.1789,
"step": 10700
},
{
"epoch": 2.3952095808383236,
"grad_norm": 0.7579483985900879,
"learning_rate": 4.2957137239480934e-06,
"loss": 0.1865,
"step": 10800
},
{
"epoch": 2.417387447327567,
"grad_norm": 0.8615408539772034,
"learning_rate": 4.13841918993315e-06,
"loss": 0.1805,
"step": 10900
},
{
"epoch": 2.4395653138168107,
"grad_norm": 1.0644463300704956,
"learning_rate": 3.9811246559182075e-06,
"loss": 0.1849,
"step": 11000
},
{
"epoch": 2.4617431803060548,
"grad_norm": 0.9933910965919495,
"learning_rate": 3.823830121903264e-06,
"loss": 0.1846,
"step": 11100
},
{
"epoch": 2.4839210467952983,
"grad_norm": 1.011958360671997,
"learning_rate": 3.666535587888321e-06,
"loss": 0.1863,
"step": 11200
},
{
"epoch": 2.506098913284542,
"grad_norm": 1.0306683778762817,
"learning_rate": 3.5092410538733786e-06,
"loss": 0.1853,
"step": 11300
},
{
"epoch": 2.528276779773786,
"grad_norm": 1.0129719972610474,
"learning_rate": 3.351946519858435e-06,
"loss": 0.1855,
"step": 11400
},
{
"epoch": 2.5504546462630295,
"grad_norm": 1.0215705633163452,
"learning_rate": 3.1946519858434922e-06,
"loss": 0.1867,
"step": 11500
},
{
"epoch": 2.572632512752273,
"grad_norm": 1.202038288116455,
"learning_rate": 3.0373574518285493e-06,
"loss": 0.1839,
"step": 11600
},
{
"epoch": 2.594810379241517,
"grad_norm": 1.19171142578125,
"learning_rate": 2.8800629178136063e-06,
"loss": 0.1776,
"step": 11700
},
{
"epoch": 2.6169882457307607,
"grad_norm": 1.0898429155349731,
"learning_rate": 2.7227683837986633e-06,
"loss": 0.178,
"step": 11800
},
{
"epoch": 2.6391661122200043,
"grad_norm": 1.005279779434204,
"learning_rate": 2.56547384978372e-06,
"loss": 0.1811,
"step": 11900
},
{
"epoch": 2.6613439787092483,
"grad_norm": 1.0780277252197266,
"learning_rate": 2.408179315768777e-06,
"loss": 0.1831,
"step": 12000
},
{
"epoch": 2.683521845198492,
"grad_norm": 1.318746566772461,
"learning_rate": 2.252457727093984e-06,
"loss": 0.1835,
"step": 12100
},
{
"epoch": 2.7056997116877355,
"grad_norm": 1.289838433265686,
"learning_rate": 2.0951631930790405e-06,
"loss": 0.1813,
"step": 12200
},
{
"epoch": 2.7278775781769795,
"grad_norm": 0.806324303150177,
"learning_rate": 1.9378686590640976e-06,
"loss": 0.1778,
"step": 12300
},
{
"epoch": 2.750055444666223,
"grad_norm": 1.2230814695358276,
"learning_rate": 1.7805741250491546e-06,
"loss": 0.1797,
"step": 12400
},
{
"epoch": 2.7722333111554667,
"grad_norm": 1.0323050022125244,
"learning_rate": 1.6232795910342116e-06,
"loss": 0.1832,
"step": 12500
},
{
"epoch": 2.7944111776447107,
"grad_norm": 0.9353643655776978,
"learning_rate": 1.4659850570192689e-06,
"loss": 0.1828,
"step": 12600
},
{
"epoch": 2.8165890441339543,
"grad_norm": 0.8385490775108337,
"learning_rate": 1.3086905230043257e-06,
"loss": 0.1763,
"step": 12700
},
{
"epoch": 2.838766910623198,
"grad_norm": 0.9432787299156189,
"learning_rate": 1.1513959889893827e-06,
"loss": 0.18,
"step": 12800
},
{
"epoch": 2.860944777112442,
"grad_norm": 1.0854963064193726,
"learning_rate": 9.941014549744397e-07,
"loss": 0.1786,
"step": 12900
},
{
"epoch": 2.8831226436016855,
"grad_norm": 1.0914461612701416,
"learning_rate": 8.368069209594968e-07,
"loss": 0.1804,
"step": 13000
},
{
"epoch": 2.905300510090929,
"grad_norm": 0.8744707703590393,
"learning_rate": 6.795123869445537e-07,
"loss": 0.1776,
"step": 13100
},
{
"epoch": 2.927478376580173,
"grad_norm": 1.073390245437622,
"learning_rate": 5.222178529296107e-07,
"loss": 0.1797,
"step": 13200
},
{
"epoch": 2.9496562430694167,
"grad_norm": 1.0887576341629028,
"learning_rate": 3.6492331891466777e-07,
"loss": 0.1791,
"step": 13300
},
{
"epoch": 2.9718341095586602,
"grad_norm": 1.3841413259506226,
"learning_rate": 2.0762878489972477e-07,
"loss": 0.1792,
"step": 13400
},
{
"epoch": 2.9940119760479043,
"grad_norm": 1.0988340377807617,
"learning_rate": 5.033425088478176e-08,
"loss": 0.1834,
"step": 13500
}
],
"logging_steps": 100,
"max_steps": 13527,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.765106604499366e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}