| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1584800741427248, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004633920296570899, | |
| "grad_norm": 0.4352174997329712, | |
| "learning_rate": 0.00029937442075996293, | |
| "loss": 0.5216, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009267840593141797, | |
| "grad_norm": 0.2795519530773163, | |
| "learning_rate": 0.0002986793327154773, | |
| "loss": 0.5275, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013901760889712697, | |
| "grad_norm": 0.3515735864639282, | |
| "learning_rate": 0.00029798424467099164, | |
| "loss": 0.4768, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018535681186283594, | |
| "grad_norm": 0.3210294842720032, | |
| "learning_rate": 0.000297289156626506, | |
| "loss": 0.4708, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.023169601482854494, | |
| "grad_norm": 0.3719145953655243, | |
| "learning_rate": 0.0002965940685820204, | |
| "loss": 0.4825, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.027803521779425393, | |
| "grad_norm": 0.41005653142929077, | |
| "learning_rate": 0.0002958989805375347, | |
| "loss": 0.5387, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03243744207599629, | |
| "grad_norm": 0.44353237748146057, | |
| "learning_rate": 0.0002952038924930491, | |
| "loss": 0.517, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03707136237256719, | |
| "grad_norm": 0.42356154322624207, | |
| "learning_rate": 0.00029450880444856345, | |
| "loss": 0.448, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04170528266913809, | |
| "grad_norm": 0.37184569239616394, | |
| "learning_rate": 0.00029381371640407785, | |
| "loss": 0.5389, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04633920296570899, | |
| "grad_norm": 0.34952738881111145, | |
| "learning_rate": 0.0002931186283595922, | |
| "loss": 0.5006, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05097312326227989, | |
| "grad_norm": 0.425190806388855, | |
| "learning_rate": 0.00029242354031510656, | |
| "loss": 0.5391, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05560704355885079, | |
| "grad_norm": 0.522118330001831, | |
| "learning_rate": 0.0002917284522706209, | |
| "loss": 0.5485, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.060240963855421686, | |
| "grad_norm": 0.42207586765289307, | |
| "learning_rate": 0.0002910333642261353, | |
| "loss": 0.558, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06487488415199258, | |
| "grad_norm": 0.3233225643634796, | |
| "learning_rate": 0.00029033827618164966, | |
| "loss": 0.4983, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06950880444856349, | |
| "grad_norm": 0.3595481216907501, | |
| "learning_rate": 0.000289643188137164, | |
| "loss": 0.4469, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07414272474513438, | |
| "grad_norm": 0.4325884282588959, | |
| "learning_rate": 0.00028894810009267837, | |
| "loss": 0.5458, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07877664504170528, | |
| "grad_norm": 0.352845162153244, | |
| "learning_rate": 0.0002882530120481927, | |
| "loss": 0.5034, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08341056533827618, | |
| "grad_norm": 0.34022948145866394, | |
| "learning_rate": 0.0002875579240037071, | |
| "loss": 0.4987, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08804448563484708, | |
| "grad_norm": 0.31629809737205505, | |
| "learning_rate": 0.0002868628359592215, | |
| "loss": 0.4883, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09267840593141798, | |
| "grad_norm": 0.32983237504959106, | |
| "learning_rate": 0.0002861677479147359, | |
| "loss": 0.5043, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09731232622798888, | |
| "grad_norm": 0.3827531337738037, | |
| "learning_rate": 0.0002854726598702502, | |
| "loss": 0.5171, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.10194624652455977, | |
| "grad_norm": 0.36385366320610046, | |
| "learning_rate": 0.0002847775718257646, | |
| "loss": 0.4355, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.10658016682113068, | |
| "grad_norm": 0.37122678756713867, | |
| "learning_rate": 0.00028408248378127893, | |
| "loss": 0.4366, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11121408711770157, | |
| "grad_norm": 0.4853290021419525, | |
| "learning_rate": 0.0002833873957367933, | |
| "loss": 0.5453, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11584800741427248, | |
| "grad_norm": 0.3827182948589325, | |
| "learning_rate": 0.00028269230769230764, | |
| "loss": 0.431, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12048192771084337, | |
| "grad_norm": 0.39195120334625244, | |
| "learning_rate": 0.00028199721964782204, | |
| "loss": 0.4782, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12511584800741427, | |
| "grad_norm": 0.35562747716903687, | |
| "learning_rate": 0.0002813021316033364, | |
| "loss": 0.4195, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12974976830398516, | |
| "grad_norm": 0.4071958065032959, | |
| "learning_rate": 0.00028060704355885075, | |
| "loss": 0.5015, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13438368860055608, | |
| "grad_norm": 0.43866515159606934, | |
| "learning_rate": 0.00027991195551436515, | |
| "loss": 0.4587, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13901760889712697, | |
| "grad_norm": 0.4723324477672577, | |
| "learning_rate": 0.0002792168674698795, | |
| "loss": 0.4118, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14365152919369786, | |
| "grad_norm": 0.35274723172187805, | |
| "learning_rate": 0.00027852177942539385, | |
| "loss": 0.5076, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14828544949026876, | |
| "grad_norm": 0.4973192811012268, | |
| "learning_rate": 0.0002778266913809082, | |
| "loss": 0.5483, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15291936978683968, | |
| "grad_norm": 0.32685619592666626, | |
| "learning_rate": 0.0002771316033364226, | |
| "loss": 0.4675, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15755329008341057, | |
| "grad_norm": 0.485867977142334, | |
| "learning_rate": 0.00027643651529193696, | |
| "loss": 0.4491, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16218721037998146, | |
| "grad_norm": 0.3535182774066925, | |
| "learning_rate": 0.0002757414272474513, | |
| "loss": 0.4544, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16682113067655235, | |
| "grad_norm": 0.41201695799827576, | |
| "learning_rate": 0.00027504633920296567, | |
| "loss": 0.4577, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.17145505097312327, | |
| "grad_norm": 0.399549275636673, | |
| "learning_rate": 0.00027435125115848007, | |
| "loss": 0.447, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17608897126969417, | |
| "grad_norm": 0.44300174713134766, | |
| "learning_rate": 0.0002736561631139944, | |
| "loss": 0.5051, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.18072289156626506, | |
| "grad_norm": 0.32776978611946106, | |
| "learning_rate": 0.0002729610750695088, | |
| "loss": 0.5166, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18535681186283595, | |
| "grad_norm": 0.4692281186580658, | |
| "learning_rate": 0.0002722659870250231, | |
| "loss": 0.4838, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18999073215940684, | |
| "grad_norm": 0.42574265599250793, | |
| "learning_rate": 0.00027157089898053753, | |
| "loss": 0.4456, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19462465245597776, | |
| "grad_norm": 0.3274458050727844, | |
| "learning_rate": 0.0002708758109360519, | |
| "loss": 0.476, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19925857275254866, | |
| "grad_norm": 0.38131827116012573, | |
| "learning_rate": 0.00027018072289156623, | |
| "loss": 0.4124, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.20389249304911955, | |
| "grad_norm": 0.3712831139564514, | |
| "learning_rate": 0.0002694856348470806, | |
| "loss": 0.4395, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.20852641334569044, | |
| "grad_norm": 0.43983837962150574, | |
| "learning_rate": 0.000268790546802595, | |
| "loss": 0.5287, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.21316033364226136, | |
| "grad_norm": 0.383878618478775, | |
| "learning_rate": 0.00026809545875810934, | |
| "loss": 0.4524, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21779425393883226, | |
| "grad_norm": 0.39737293124198914, | |
| "learning_rate": 0.0002674003707136237, | |
| "loss": 0.5171, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.22242817423540315, | |
| "grad_norm": 0.3629433214664459, | |
| "learning_rate": 0.0002667052826691381, | |
| "loss": 0.4017, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.22706209453197404, | |
| "grad_norm": 0.44448867440223694, | |
| "learning_rate": 0.0002660101946246524, | |
| "loss": 0.4876, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.23169601482854496, | |
| "grad_norm": 0.4824674725532532, | |
| "learning_rate": 0.0002653151065801668, | |
| "loss": 0.442, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23632993512511585, | |
| "grad_norm": 0.3264263868331909, | |
| "learning_rate": 0.00026462001853568115, | |
| "loss": 0.4849, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.24096385542168675, | |
| "grad_norm": 0.3806018531322479, | |
| "learning_rate": 0.00026392493049119556, | |
| "loss": 0.4198, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24559777571825764, | |
| "grad_norm": 0.4619787931442261, | |
| "learning_rate": 0.00026322984244670986, | |
| "loss": 0.4348, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.25023169601482853, | |
| "grad_norm": 0.3893038034439087, | |
| "learning_rate": 0.00026253475440222426, | |
| "loss": 0.4269, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2548656163113994, | |
| "grad_norm": 0.5061798095703125, | |
| "learning_rate": 0.0002618396663577386, | |
| "loss": 0.4367, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2594995366079703, | |
| "grad_norm": 0.3888299763202667, | |
| "learning_rate": 0.000261144578313253, | |
| "loss": 0.4703, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.26413345690454126, | |
| "grad_norm": 0.5040469169616699, | |
| "learning_rate": 0.00026044949026876737, | |
| "loss": 0.4383, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.26876737720111216, | |
| "grad_norm": 0.33014264702796936, | |
| "learning_rate": 0.0002597544022242817, | |
| "loss": 0.4622, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.27340129749768305, | |
| "grad_norm": 0.4775944650173187, | |
| "learning_rate": 0.00025905931417979607, | |
| "loss": 0.5177, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.27803521779425394, | |
| "grad_norm": 0.3613554835319519, | |
| "learning_rate": 0.0002583642261353104, | |
| "loss": 0.4683, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.28266913809082483, | |
| "grad_norm": 0.4591723084449768, | |
| "learning_rate": 0.00025766913809082483, | |
| "loss": 0.5064, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2873030583873957, | |
| "grad_norm": 0.42493635416030884, | |
| "learning_rate": 0.0002569740500463392, | |
| "loss": 0.5226, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2919369786839666, | |
| "grad_norm": 0.3522678315639496, | |
| "learning_rate": 0.0002562789620018536, | |
| "loss": 0.4427, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2965708989805375, | |
| "grad_norm": 0.3232039213180542, | |
| "learning_rate": 0.0002555838739573679, | |
| "loss": 0.4115, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.30120481927710846, | |
| "grad_norm": 0.32238250970840454, | |
| "learning_rate": 0.0002548887859128823, | |
| "loss": 0.3853, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.30583873957367935, | |
| "grad_norm": 0.4618857502937317, | |
| "learning_rate": 0.00025419369786839664, | |
| "loss": 0.5639, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.31047265987025024, | |
| "grad_norm": 0.3681054711341858, | |
| "learning_rate": 0.000253498609823911, | |
| "loss": 0.4897, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.31510658016682114, | |
| "grad_norm": 0.3761281371116638, | |
| "learning_rate": 0.00025280352177942534, | |
| "loss": 0.4275, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31974050046339203, | |
| "grad_norm": 0.3062303066253662, | |
| "learning_rate": 0.00025210843373493975, | |
| "loss": 0.3967, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3243744207599629, | |
| "grad_norm": 0.38078251481056213, | |
| "learning_rate": 0.0002514133456904541, | |
| "loss": 0.4405, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3290083410565338, | |
| "grad_norm": 0.40751275420188904, | |
| "learning_rate": 0.00025071825764596845, | |
| "loss": 0.5463, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3336422613531047, | |
| "grad_norm": 0.408681720495224, | |
| "learning_rate": 0.00025002316960148286, | |
| "loss": 0.4934, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3382761816496756, | |
| "grad_norm": 0.46908262372016907, | |
| "learning_rate": 0.0002493280815569972, | |
| "loss": 0.445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.34291010194624655, | |
| "grad_norm": 0.479021281003952, | |
| "learning_rate": 0.00024863299351251156, | |
| "loss": 0.4423, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.34754402224281744, | |
| "grad_norm": 0.37807953357696533, | |
| "learning_rate": 0.0002479379054680259, | |
| "loss": 0.4505, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.35217794253938833, | |
| "grad_norm": 0.4273180365562439, | |
| "learning_rate": 0.0002472428174235403, | |
| "loss": 0.4379, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3568118628359592, | |
| "grad_norm": 0.4171128273010254, | |
| "learning_rate": 0.00024654772937905467, | |
| "loss": 0.4084, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3614457831325301, | |
| "grad_norm": 0.4805515706539154, | |
| "learning_rate": 0.000245852641334569, | |
| "loss": 0.4373, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.366079703429101, | |
| "grad_norm": 0.3882713317871094, | |
| "learning_rate": 0.00024515755329008337, | |
| "loss": 0.4425, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3707136237256719, | |
| "grad_norm": 0.3893239200115204, | |
| "learning_rate": 0.0002444624652455978, | |
| "loss": 0.4829, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3753475440222428, | |
| "grad_norm": 0.3848889470100403, | |
| "learning_rate": 0.00024376737720111213, | |
| "loss": 0.4629, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3799814643188137, | |
| "grad_norm": 0.3595952093601227, | |
| "learning_rate": 0.00024307228915662648, | |
| "loss": 0.4623, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.4663516581058502, | |
| "learning_rate": 0.00024237720111214086, | |
| "loss": 0.3994, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.38924930491195553, | |
| "grad_norm": 0.4669645130634308, | |
| "learning_rate": 0.00024168211306765524, | |
| "loss": 0.4537, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3938832252085264, | |
| "grad_norm": 0.43069687485694885, | |
| "learning_rate": 0.00024098702502316956, | |
| "loss": 0.4325, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3985171455050973, | |
| "grad_norm": 0.5129668712615967, | |
| "learning_rate": 0.00024029193697868394, | |
| "loss": 0.4251, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4031510658016682, | |
| "grad_norm": 0.47785133123397827, | |
| "learning_rate": 0.00023959684893419832, | |
| "loss": 0.4534, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4077849860982391, | |
| "grad_norm": 0.34385788440704346, | |
| "learning_rate": 0.0002389017608897127, | |
| "loss": 0.4258, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.41241890639481, | |
| "grad_norm": 0.35733455419540405, | |
| "learning_rate": 0.00023820667284522702, | |
| "loss": 0.4888, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4170528266913809, | |
| "grad_norm": 0.3299333155155182, | |
| "learning_rate": 0.0002375115848007414, | |
| "loss": 0.47, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.42168674698795183, | |
| "grad_norm": 0.34629347920417786, | |
| "learning_rate": 0.00023681649675625578, | |
| "loss": 0.4226, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4263206672845227, | |
| "grad_norm": 0.4319722354412079, | |
| "learning_rate": 0.00023612140871177013, | |
| "loss": 0.4982, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4309545875810936, | |
| "grad_norm": 0.3622225522994995, | |
| "learning_rate": 0.0002354263206672845, | |
| "loss": 0.4339, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4355885078776645, | |
| "grad_norm": 0.4643559157848358, | |
| "learning_rate": 0.00023473123262279886, | |
| "loss": 0.4238, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4402224281742354, | |
| "grad_norm": 0.3616037368774414, | |
| "learning_rate": 0.00023403614457831324, | |
| "loss": 0.4741, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4448563484708063, | |
| "grad_norm": 0.40745407342910767, | |
| "learning_rate": 0.0002333410565338276, | |
| "loss": 0.4116, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4494902687673772, | |
| "grad_norm": 0.4535151720046997, | |
| "learning_rate": 0.00023264596848934197, | |
| "loss": 0.4368, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4541241890639481, | |
| "grad_norm": 0.41154956817626953, | |
| "learning_rate": 0.00023195088044485634, | |
| "loss": 0.4914, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.458758109360519, | |
| "grad_norm": 0.4555477201938629, | |
| "learning_rate": 0.00023125579240037072, | |
| "loss": 0.4229, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4633920296570899, | |
| "grad_norm": 0.4146144390106201, | |
| "learning_rate": 0.00023056070435588505, | |
| "loss": 0.4097, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4680259499536608, | |
| "grad_norm": 0.36076611280441284, | |
| "learning_rate": 0.00022986561631139943, | |
| "loss": 0.4091, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4726598702502317, | |
| "grad_norm": 0.31528130173683167, | |
| "learning_rate": 0.0002291705282669138, | |
| "loss": 0.3686, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4772937905468026, | |
| "grad_norm": 0.3774864971637726, | |
| "learning_rate": 0.00022847544022242813, | |
| "loss": 0.4923, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4819277108433735, | |
| "grad_norm": 0.41823625564575195, | |
| "learning_rate": 0.0002277803521779425, | |
| "loss": 0.4546, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4865616311399444, | |
| "grad_norm": 0.3713241517543793, | |
| "learning_rate": 0.00022708526413345689, | |
| "loss": 0.4299, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4911955514365153, | |
| "grad_norm": 0.334870308637619, | |
| "learning_rate": 0.00022639017608897126, | |
| "loss": 0.4351, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.49582947173308617, | |
| "grad_norm": 0.24805714190006256, | |
| "learning_rate": 0.00022569508804448562, | |
| "loss": 0.3929, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5004633920296571, | |
| "grad_norm": 0.529045581817627, | |
| "learning_rate": 0.000225, | |
| "loss": 0.4349, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.505097312326228, | |
| "grad_norm": 0.5127238631248474, | |
| "learning_rate": 0.00022430491195551435, | |
| "loss": 0.444, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5097312326227988, | |
| "grad_norm": 0.4840947687625885, | |
| "learning_rate": 0.0002236098239110287, | |
| "loss": 0.4814, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5143651529193698, | |
| "grad_norm": 0.44880053400993347, | |
| "learning_rate": 0.00022291473586654308, | |
| "loss": 0.4466, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5189990732159406, | |
| "grad_norm": 0.2730713486671448, | |
| "learning_rate": 0.00022221964782205745, | |
| "loss": 0.3927, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5236329935125116, | |
| "grad_norm": 0.33978259563446045, | |
| "learning_rate": 0.00022152455977757183, | |
| "loss": 0.4763, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5282669138090825, | |
| "grad_norm": 0.33843424916267395, | |
| "learning_rate": 0.00022082947173308616, | |
| "loss": 0.3855, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5329008341056534, | |
| "grad_norm": 0.4196402430534363, | |
| "learning_rate": 0.00022013438368860053, | |
| "loss": 0.4703, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5375347544022243, | |
| "grad_norm": 0.39824753999710083, | |
| "learning_rate": 0.0002194392956441149, | |
| "loss": 0.4653, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5421686746987951, | |
| "grad_norm": 0.3243648409843445, | |
| "learning_rate": 0.00021874420759962924, | |
| "loss": 0.4504, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5468025949953661, | |
| "grad_norm": 0.3930327892303467, | |
| "learning_rate": 0.00021804911955514362, | |
| "loss": 0.5203, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5514365152919369, | |
| "grad_norm": 0.406800776720047, | |
| "learning_rate": 0.000217354031510658, | |
| "loss": 0.4388, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5560704355885079, | |
| "grad_norm": 0.32722190022468567, | |
| "learning_rate": 0.00021665894346617237, | |
| "loss": 0.4484, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5607043558850788, | |
| "grad_norm": 0.40086886286735535, | |
| "learning_rate": 0.00021596385542168672, | |
| "loss": 0.3964, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5653382761816497, | |
| "grad_norm": 0.31294673681259155, | |
| "learning_rate": 0.0002152687673772011, | |
| "loss": 0.4188, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5699721964782206, | |
| "grad_norm": 0.33032116293907166, | |
| "learning_rate": 0.00021457367933271545, | |
| "loss": 0.4595, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5746061167747915, | |
| "grad_norm": 0.3128484785556793, | |
| "learning_rate": 0.00021387859128822983, | |
| "loss": 0.4248, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5792400370713624, | |
| "grad_norm": 0.41308367252349854, | |
| "learning_rate": 0.00021318350324374418, | |
| "loss": 0.4842, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5838739573679332, | |
| "grad_norm": 0.32540541887283325, | |
| "learning_rate": 0.00021248841519925856, | |
| "loss": 0.392, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5885078776645042, | |
| "grad_norm": 0.37712159752845764, | |
| "learning_rate": 0.00021179332715477294, | |
| "loss": 0.4385, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.593141797961075, | |
| "grad_norm": 0.39498409628868103, | |
| "learning_rate": 0.00021109823911028727, | |
| "loss": 0.4388, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.597775718257646, | |
| "grad_norm": 0.3992222547531128, | |
| "learning_rate": 0.00021040315106580164, | |
| "loss": 0.504, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6024096385542169, | |
| "grad_norm": 0.36806175112724304, | |
| "learning_rate": 0.00020970806302131602, | |
| "loss": 0.4143, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6070435588507878, | |
| "grad_norm": 0.48738566040992737, | |
| "learning_rate": 0.0002090129749768304, | |
| "loss": 0.4917, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6116774791473587, | |
| "grad_norm": 0.3689660429954529, | |
| "learning_rate": 0.00020831788693234472, | |
| "loss": 0.4205, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6163113994439295, | |
| "grad_norm": 0.36636972427368164, | |
| "learning_rate": 0.0002076227988878591, | |
| "loss": 0.3859, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6209453197405005, | |
| "grad_norm": 0.355956494808197, | |
| "learning_rate": 0.00020692771084337348, | |
| "loss": 0.4223, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6255792400370713, | |
| "grad_norm": 0.355040967464447, | |
| "learning_rate": 0.00020623262279888783, | |
| "loss": 0.414, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6302131603336423, | |
| "grad_norm": 0.36592569947242737, | |
| "learning_rate": 0.0002055375347544022, | |
| "loss": 0.406, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6348470806302131, | |
| "grad_norm": 0.43309953808784485, | |
| "learning_rate": 0.00020484244670991656, | |
| "loss": 0.4529, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6394810009267841, | |
| "grad_norm": 0.30984750390052795, | |
| "learning_rate": 0.00020414735866543094, | |
| "loss": 0.3873, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.644114921223355, | |
| "grad_norm": 0.33852508664131165, | |
| "learning_rate": 0.0002034522706209453, | |
| "loss": 0.4137, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6487488415199258, | |
| "grad_norm": 0.4319971799850464, | |
| "learning_rate": 0.00020275718257645967, | |
| "loss": 0.445, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6533827618164968, | |
| "grad_norm": 0.3274412453174591, | |
| "learning_rate": 0.00020206209453197405, | |
| "loss": 0.3896, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6580166821130676, | |
| "grad_norm": 0.44836536049842834, | |
| "learning_rate": 0.0002013670064874884, | |
| "loss": 0.4647, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6626506024096386, | |
| "grad_norm": 0.45571744441986084, | |
| "learning_rate": 0.00020067191844300275, | |
| "loss": 0.4575, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6672845227062094, | |
| "grad_norm": 0.38867196440696716, | |
| "learning_rate": 0.00019997683039851713, | |
| "loss": 0.4505, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6719184430027804, | |
| "grad_norm": 0.46751868724823, | |
| "learning_rate": 0.0001992817423540315, | |
| "loss": 0.4094, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6765523632993512, | |
| "grad_norm": 0.3465523421764374, | |
| "learning_rate": 0.00019858665430954583, | |
| "loss": 0.357, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.6811862835959221, | |
| "grad_norm": 0.35008054971694946, | |
| "learning_rate": 0.0001978915662650602, | |
| "loss": 0.4235, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6858202038924931, | |
| "grad_norm": 0.44757360219955444, | |
| "learning_rate": 0.0001971964782205746, | |
| "loss": 0.4299, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6904541241890639, | |
| "grad_norm": 0.37812456488609314, | |
| "learning_rate": 0.00019650139017608897, | |
| "loss": 0.4861, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6950880444856349, | |
| "grad_norm": 0.4172975420951843, | |
| "learning_rate": 0.00019580630213160332, | |
| "loss": 0.411, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6997219647822057, | |
| "grad_norm": 0.3557645082473755, | |
| "learning_rate": 0.00019511121408711767, | |
| "loss": 0.413, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7043558850787767, | |
| "grad_norm": 0.475146621465683, | |
| "learning_rate": 0.00019441612604263205, | |
| "loss": 0.404, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7089898053753475, | |
| "grad_norm": 0.34932583570480347, | |
| "learning_rate": 0.0001937210379981464, | |
| "loss": 0.4339, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7136237256719185, | |
| "grad_norm": 0.3841742277145386, | |
| "learning_rate": 0.00019302594995366078, | |
| "loss": 0.443, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7182576459684893, | |
| "grad_norm": 0.40557074546813965, | |
| "learning_rate": 0.00019233086190917516, | |
| "loss": 0.4584, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7228915662650602, | |
| "grad_norm": 0.46951010823249817, | |
| "learning_rate": 0.0001916357738646895, | |
| "loss": 0.4237, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7275254865616312, | |
| "grad_norm": 0.38474270701408386, | |
| "learning_rate": 0.00019094068582020386, | |
| "loss": 0.4529, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.732159406858202, | |
| "grad_norm": 0.3190203905105591, | |
| "learning_rate": 0.00019024559777571824, | |
| "loss": 0.4161, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.736793327154773, | |
| "grad_norm": 0.3800548017024994, | |
| "learning_rate": 0.00018955050973123262, | |
| "loss": 0.3886, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7414272474513438, | |
| "grad_norm": 0.3625418543815613, | |
| "learning_rate": 0.00018885542168674694, | |
| "loss": 0.3909, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7460611677479148, | |
| "grad_norm": 0.5026484131813049, | |
| "learning_rate": 0.00018816033364226132, | |
| "loss": 0.5229, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7506950880444856, | |
| "grad_norm": 0.3223924934864044, | |
| "learning_rate": 0.0001874652455977757, | |
| "loss": 0.4273, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7553290083410565, | |
| "grad_norm": 0.27654021978378296, | |
| "learning_rate": 0.00018677015755329008, | |
| "loss": 0.3905, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.7599629286376274, | |
| "grad_norm": 0.42651575803756714, | |
| "learning_rate": 0.00018607506950880443, | |
| "loss": 0.4427, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7645968489341983, | |
| "grad_norm": 0.4370901584625244, | |
| "learning_rate": 0.00018537998146431878, | |
| "loss": 0.4688, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.37819185853004456, | |
| "learning_rate": 0.00018468489341983316, | |
| "loss": 0.4231, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7738646895273401, | |
| "grad_norm": 0.3696691691875458, | |
| "learning_rate": 0.00018398980537534754, | |
| "loss": 0.4458, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7784986098239111, | |
| "grad_norm": 0.34384486079216003, | |
| "learning_rate": 0.0001832947173308619, | |
| "loss": 0.462, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7831325301204819, | |
| "grad_norm": 0.39266905188560486, | |
| "learning_rate": 0.00018259962928637627, | |
| "loss": 0.4118, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7877664504170528, | |
| "grad_norm": 0.43815886974334717, | |
| "learning_rate": 0.00018190454124189065, | |
| "loss": 0.3836, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7924003707136237, | |
| "grad_norm": 0.4963682293891907, | |
| "learning_rate": 0.00018120945319740497, | |
| "loss": 0.3929, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7970342910101946, | |
| "grad_norm": 0.4125339388847351, | |
| "learning_rate": 0.00018051436515291935, | |
| "loss": 0.4214, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8016682113067656, | |
| "grad_norm": 0.5118056535720825, | |
| "learning_rate": 0.00017981927710843373, | |
| "loss": 0.4582, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8063021316033364, | |
| "grad_norm": 0.44652143120765686, | |
| "learning_rate": 0.0001791241890639481, | |
| "loss": 0.4557, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8109360518999074, | |
| "grad_norm": 0.39972642064094543, | |
| "learning_rate": 0.00017842910101946243, | |
| "loss": 0.454, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8155699721964782, | |
| "grad_norm": 0.5146743059158325, | |
| "learning_rate": 0.0001777340129749768, | |
| "loss": 0.4064, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8202038924930491, | |
| "grad_norm": 0.3463651239871979, | |
| "learning_rate": 0.0001770389249304912, | |
| "loss": 0.3826, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.82483781278962, | |
| "grad_norm": 0.3422740399837494, | |
| "learning_rate": 0.00017634383688600554, | |
| "loss": 0.3838, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8294717330861909, | |
| "grad_norm": 0.3831815719604492, | |
| "learning_rate": 0.00017564874884151992, | |
| "loss": 0.4442, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8341056533827618, | |
| "grad_norm": 0.3674461245536804, | |
| "learning_rate": 0.00017495366079703427, | |
| "loss": 0.3617, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8387395736793327, | |
| "grad_norm": 0.3935919404029846, | |
| "learning_rate": 0.00017425857275254865, | |
| "loss": 0.4578, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8433734939759037, | |
| "grad_norm": 0.4333125650882721, | |
| "learning_rate": 0.000173563484708063, | |
| "loss": 0.4431, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8480074142724745, | |
| "grad_norm": 0.42463764548301697, | |
| "learning_rate": 0.00017286839666357738, | |
| "loss": 0.4031, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8526413345690455, | |
| "grad_norm": 0.35087305307388306, | |
| "learning_rate": 0.00017217330861909176, | |
| "loss": 0.4582, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.8572752548656163, | |
| "grad_norm": 0.33768782019615173, | |
| "learning_rate": 0.0001714782205746061, | |
| "loss": 0.3978, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8619091751621872, | |
| "grad_norm": 0.3734382390975952, | |
| "learning_rate": 0.00017078313253012046, | |
| "loss": 0.4296, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8665430954587581, | |
| "grad_norm": 0.3718160092830658, | |
| "learning_rate": 0.00017008804448563484, | |
| "loss": 0.417, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.871177015755329, | |
| "grad_norm": 0.37972408533096313, | |
| "learning_rate": 0.00016939295644114921, | |
| "loss": 0.411, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.8758109360518999, | |
| "grad_norm": 0.43368878960609436, | |
| "learning_rate": 0.00016869786839666354, | |
| "loss": 0.506, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8804448563484708, | |
| "grad_norm": 0.41099444031715393, | |
| "learning_rate": 0.00016800278035217792, | |
| "loss": 0.3928, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8850787766450418, | |
| "grad_norm": 0.3973136842250824, | |
| "learning_rate": 0.0001673076923076923, | |
| "loss": 0.4442, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8897126969416126, | |
| "grad_norm": 0.30687418580055237, | |
| "learning_rate": 0.00016661260426320667, | |
| "loss": 0.4209, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8943466172381835, | |
| "grad_norm": 0.3588371276855469, | |
| "learning_rate": 0.00016591751621872103, | |
| "loss": 0.3932, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8989805375347544, | |
| "grad_norm": 0.4107860028743744, | |
| "learning_rate": 0.00016522242817423538, | |
| "loss": 0.3747, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9036144578313253, | |
| "grad_norm": 0.3935255706310272, | |
| "learning_rate": 0.00016452734012974976, | |
| "loss": 0.401, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9082483781278962, | |
| "grad_norm": 0.37768107652664185, | |
| "learning_rate": 0.0001638322520852641, | |
| "loss": 0.4125, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9128822984244671, | |
| "grad_norm": 0.34660592675209045, | |
| "learning_rate": 0.00016313716404077849, | |
| "loss": 0.4238, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.917516218721038, | |
| "grad_norm": 0.325065940618515, | |
| "learning_rate": 0.00016244207599629286, | |
| "loss": 0.4469, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9221501390176089, | |
| "grad_norm": 0.5264182686805725, | |
| "learning_rate": 0.00016174698795180722, | |
| "loss": 0.4067, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9267840593141798, | |
| "grad_norm": 0.39541614055633545, | |
| "learning_rate": 0.00016105189990732157, | |
| "loss": 0.4048, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9314179796107507, | |
| "grad_norm": 0.5899196863174438, | |
| "learning_rate": 0.00016035681186283595, | |
| "loss": 0.4598, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9360518999073216, | |
| "grad_norm": 0.4272339642047882, | |
| "learning_rate": 0.00015966172381835032, | |
| "loss": 0.5, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9406858202038925, | |
| "grad_norm": 0.5000485777854919, | |
| "learning_rate": 0.00015896663577386465, | |
| "loss": 0.4154, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9453197405004634, | |
| "grad_norm": 0.2850925922393799, | |
| "learning_rate": 0.00015827154772937903, | |
| "loss": 0.3751, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.9499536607970342, | |
| "grad_norm": 0.37266668677330017, | |
| "learning_rate": 0.0001575764596848934, | |
| "loss": 0.462, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9545875810936052, | |
| "grad_norm": 0.35389843583106995, | |
| "learning_rate": 0.00015688137164040778, | |
| "loss": 0.4325, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.959221501390176, | |
| "grad_norm": 0.4005086123943329, | |
| "learning_rate": 0.00015618628359592213, | |
| "loss": 0.4063, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 0.3318014144897461, | |
| "learning_rate": 0.00015549119555143649, | |
| "loss": 0.4005, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9684893419833179, | |
| "grad_norm": 0.47355303168296814, | |
| "learning_rate": 0.00015479610750695086, | |
| "loss": 0.5058, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9731232622798888, | |
| "grad_norm": 0.3099556565284729, | |
| "learning_rate": 0.00015410101946246524, | |
| "loss": 0.358, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9777571825764597, | |
| "grad_norm": 0.5098932385444641, | |
| "learning_rate": 0.0001534059314179796, | |
| "loss": 0.4138, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.9823911028730306, | |
| "grad_norm": 0.43745410442352295, | |
| "learning_rate": 0.00015271084337349397, | |
| "loss": 0.4537, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9870250231696015, | |
| "grad_norm": 0.4691702127456665, | |
| "learning_rate": 0.00015201575532900832, | |
| "loss": 0.4064, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.9916589434661723, | |
| "grad_norm": 0.43095409870147705, | |
| "learning_rate": 0.00015132066728452268, | |
| "loss": 0.4345, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.9962928637627433, | |
| "grad_norm": 0.3165135681629181, | |
| "learning_rate": 0.00015062557924003705, | |
| "loss": 0.3751, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0009267840593141, | |
| "grad_norm": 0.38903099298477173, | |
| "learning_rate": 0.00014993049119555143, | |
| "loss": 0.4483, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.0055607043558852, | |
| "grad_norm": 0.3540801703929901, | |
| "learning_rate": 0.00014923540315106578, | |
| "loss": 0.3847, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.010194624652456, | |
| "grad_norm": 0.4951782524585724, | |
| "learning_rate": 0.00014854031510658016, | |
| "loss": 0.3882, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.0148285449490269, | |
| "grad_norm": 0.4157162010669708, | |
| "learning_rate": 0.00014784522706209451, | |
| "loss": 0.3674, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.0194624652455977, | |
| "grad_norm": 0.3631850481033325, | |
| "learning_rate": 0.0001471501390176089, | |
| "loss": 0.341, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.0240963855421688, | |
| "grad_norm": 0.35982194542884827, | |
| "learning_rate": 0.00014645505097312324, | |
| "loss": 0.381, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.0287303058387396, | |
| "grad_norm": 0.3738069236278534, | |
| "learning_rate": 0.0001457599629286376, | |
| "loss": 0.395, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.0333642261353104, | |
| "grad_norm": 0.4233757257461548, | |
| "learning_rate": 0.00014506487488415197, | |
| "loss": 0.4212, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0379981464318813, | |
| "grad_norm": 0.4391021430492401, | |
| "learning_rate": 0.00014436978683966635, | |
| "loss": 0.4172, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.0426320667284523, | |
| "grad_norm": 0.49776721000671387, | |
| "learning_rate": 0.0001436746987951807, | |
| "loss": 0.384, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0472659870250232, | |
| "grad_norm": 0.4383520483970642, | |
| "learning_rate": 0.00014297961075069508, | |
| "loss": 0.3707, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.051899907321594, | |
| "grad_norm": 0.47047173976898193, | |
| "learning_rate": 0.00014228452270620946, | |
| "loss": 0.4059, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.056533827618165, | |
| "grad_norm": 0.4196452498435974, | |
| "learning_rate": 0.0001415894346617238, | |
| "loss": 0.441, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.061167747914736, | |
| "grad_norm": 0.4553895592689514, | |
| "learning_rate": 0.00014089434661723816, | |
| "loss": 0.4195, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.0658016682113067, | |
| "grad_norm": 0.5043196082115173, | |
| "learning_rate": 0.00014019925857275254, | |
| "loss": 0.4113, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.0704355885078776, | |
| "grad_norm": 0.4687553942203522, | |
| "learning_rate": 0.0001395041705282669, | |
| "loss": 0.4284, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.0750695088044486, | |
| "grad_norm": 0.42681458592414856, | |
| "learning_rate": 0.00013880908248378127, | |
| "loss": 0.3993, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.0797034291010195, | |
| "grad_norm": 0.38211435079574585, | |
| "learning_rate": 0.00013811399443929562, | |
| "loss": 0.4264, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.0843373493975903, | |
| "grad_norm": 0.4625399112701416, | |
| "learning_rate": 0.00013741890639481, | |
| "loss": 0.4464, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.0889712696941614, | |
| "grad_norm": 0.4715350568294525, | |
| "learning_rate": 0.00013672381835032435, | |
| "loss": 0.3644, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.0936051899907322, | |
| "grad_norm": 0.5162579417228699, | |
| "learning_rate": 0.00013602873030583873, | |
| "loss": 0.4064, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.098239110287303, | |
| "grad_norm": 0.361208438873291, | |
| "learning_rate": 0.00013533364226135308, | |
| "loss": 0.3765, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.1028730305838739, | |
| "grad_norm": 0.45088571310043335, | |
| "learning_rate": 0.00013463855421686746, | |
| "loss": 0.3975, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.107506950880445, | |
| "grad_norm": 0.4460572302341461, | |
| "learning_rate": 0.0001339434661723818, | |
| "loss": 0.4443, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.1121408711770158, | |
| "grad_norm": 0.3629909157752991, | |
| "learning_rate": 0.0001332483781278962, | |
| "loss": 0.401, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1167747914735866, | |
| "grad_norm": 0.37849023938179016, | |
| "learning_rate": 0.00013255329008341057, | |
| "loss": 0.3476, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.1214087117701577, | |
| "grad_norm": 0.423921674489975, | |
| "learning_rate": 0.00013185820203892492, | |
| "loss": 0.402, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.1260426320667285, | |
| "grad_norm": 0.33255496621131897, | |
| "learning_rate": 0.0001311631139944393, | |
| "loss": 0.3431, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.1306765523632993, | |
| "grad_norm": 0.34243419766426086, | |
| "learning_rate": 0.00013046802594995365, | |
| "loss": 0.4254, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.1353104726598702, | |
| "grad_norm": 0.32260575890541077, | |
| "learning_rate": 0.00012977293790546803, | |
| "loss": 0.3846, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.1399443929564412, | |
| "grad_norm": 0.4910984933376312, | |
| "learning_rate": 0.00012907784986098238, | |
| "loss": 0.3692, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.144578313253012, | |
| "grad_norm": 0.3949461877346039, | |
| "learning_rate": 0.00012838276181649673, | |
| "loss": 0.4011, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.149212233549583, | |
| "grad_norm": 0.5482661128044128, | |
| "learning_rate": 0.0001276876737720111, | |
| "loss": 0.479, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.3622649013996124, | |
| "learning_rate": 0.00012699258572752546, | |
| "loss": 0.3742, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.1584800741427248, | |
| "grad_norm": 0.45524731278419495, | |
| "learning_rate": 0.00012629749768303984, | |
| "loss": 0.4062, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4316, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.799325381832704e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |