| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 42.849557522123895, | |
| "eval_steps": 5000, | |
| "global_step": 2400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17699115044247787, | |
| "grad_norm": 3.7271776676935064, | |
| "learning_rate": 2.785515320334262e-07, | |
| "loss": 0.8908, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 3.670737329981903, | |
| "learning_rate": 5.571030640668524e-07, | |
| "loss": 0.9077, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 2.5209641167492456, | |
| "learning_rate": 8.356545961002786e-07, | |
| "loss": 0.9111, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 1.7589966584950831, | |
| "learning_rate": 1.1142061281337048e-06, | |
| "loss": 0.851, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8849557522123894, | |
| "grad_norm": 0.9269797048205026, | |
| "learning_rate": 1.392757660167131e-06, | |
| "loss": 0.7817, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0707964601769913, | |
| "grad_norm": 0.7581016735069944, | |
| "learning_rate": 1.6713091922005572e-06, | |
| "loss": 0.8326, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.247787610619469, | |
| "grad_norm": 0.5609083123432701, | |
| "learning_rate": 1.9498607242339835e-06, | |
| "loss": 0.6992, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.424778761061947, | |
| "grad_norm": 0.44823390625971854, | |
| "learning_rate": 2.2284122562674097e-06, | |
| "loss": 0.6624, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.6017699115044248, | |
| "grad_norm": 0.42125861316536894, | |
| "learning_rate": 2.506963788300836e-06, | |
| "loss": 0.6426, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7787610619469025, | |
| "grad_norm": 0.35815370826987114, | |
| "learning_rate": 2.785515320334262e-06, | |
| "loss": 0.6128, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9557522123893807, | |
| "grad_norm": 0.4577270620590399, | |
| "learning_rate": 3.064066852367688e-06, | |
| "loss": 0.6093, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.1415929203539825, | |
| "grad_norm": 0.34913913967622867, | |
| "learning_rate": 3.3426183844011143e-06, | |
| "loss": 0.6398, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.3185840707964602, | |
| "grad_norm": 0.38180716811578924, | |
| "learning_rate": 3.6211699164345405e-06, | |
| "loss": 0.5584, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.495575221238938, | |
| "grad_norm": 0.369627211366021, | |
| "learning_rate": 3.899721448467967e-06, | |
| "loss": 0.5709, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.672566371681416, | |
| "grad_norm": 0.3620391150396922, | |
| "learning_rate": 4.178272980501394e-06, | |
| "loss": 0.5367, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.849557522123894, | |
| "grad_norm": 0.3783946938245177, | |
| "learning_rate": 4.456824512534819e-06, | |
| "loss": 0.5493, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.0353982300884956, | |
| "grad_norm": 0.3583783631314861, | |
| "learning_rate": 4.735376044568246e-06, | |
| "loss": 0.5843, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.2123893805309733, | |
| "grad_norm": 0.3828611223359035, | |
| "learning_rate": 5.013927576601672e-06, | |
| "loss": 0.515, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.3893805309734515, | |
| "grad_norm": 0.3834571050098614, | |
| "learning_rate": 5.292479108635098e-06, | |
| "loss": 0.5032, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.566371681415929, | |
| "grad_norm": 0.4546286324545871, | |
| "learning_rate": 5.571030640668524e-06, | |
| "loss": 0.5078, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.7433628318584073, | |
| "grad_norm": 0.35842721167138686, | |
| "learning_rate": 5.849582172701951e-06, | |
| "loss": 0.5045, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.920353982300885, | |
| "grad_norm": 0.3721307952764144, | |
| "learning_rate": 6.128133704735376e-06, | |
| "loss": 0.4889, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.106194690265487, | |
| "grad_norm": 0.3486734629862048, | |
| "learning_rate": 6.406685236768803e-06, | |
| "loss": 0.5256, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.283185840707965, | |
| "grad_norm": 0.35747967960235744, | |
| "learning_rate": 6.685236768802229e-06, | |
| "loss": 0.4733, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.460176991150442, | |
| "grad_norm": 0.3460872307860064, | |
| "learning_rate": 6.963788300835655e-06, | |
| "loss": 0.4636, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.6371681415929205, | |
| "grad_norm": 0.35573355527709355, | |
| "learning_rate": 7.242339832869081e-06, | |
| "loss": 0.4623, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.814159292035399, | |
| "grad_norm": 0.35666347655438735, | |
| "learning_rate": 7.5208913649025075e-06, | |
| "loss": 0.4602, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.991150442477876, | |
| "grad_norm": 0.3564250144229788, | |
| "learning_rate": 7.799442896935934e-06, | |
| "loss": 0.4542, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.176991150442478, | |
| "grad_norm": 0.4025504652700621, | |
| "learning_rate": 8.07799442896936e-06, | |
| "loss": 0.4659, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.353982300884955, | |
| "grad_norm": 0.5428520516662031, | |
| "learning_rate": 8.356545961002787e-06, | |
| "loss": 0.4285, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.530973451327434, | |
| "grad_norm": 0.3928994965730304, | |
| "learning_rate": 8.635097493036211e-06, | |
| "loss": 0.4226, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.707964601769912, | |
| "grad_norm": 0.3850485302130108, | |
| "learning_rate": 8.913649025069639e-06, | |
| "loss": 0.4148, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.88495575221239, | |
| "grad_norm": 0.37058941523915273, | |
| "learning_rate": 9.192200557103064e-06, | |
| "loss": 0.4285, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 6.070796460176991, | |
| "grad_norm": 0.41682139676536206, | |
| "learning_rate": 9.470752089136492e-06, | |
| "loss": 0.4465, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.247787610619469, | |
| "grad_norm": 0.3924739843860392, | |
| "learning_rate": 9.749303621169918e-06, | |
| "loss": 0.3838, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.424778761061947, | |
| "grad_norm": 0.35592567356404897, | |
| "learning_rate": 9.9999976276417e-06, | |
| "loss": 0.3893, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.601769911504425, | |
| "grad_norm": 0.4159187900771811, | |
| "learning_rate": 9.999712947369595e-06, | |
| "loss": 0.3803, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.778761061946903, | |
| "grad_norm": 0.3857300466302711, | |
| "learning_rate": 9.998953826391322e-06, | |
| "loss": 0.38, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.95575221238938, | |
| "grad_norm": 0.3886065884171291, | |
| "learning_rate": 9.997720336742596e-06, | |
| "loss": 0.376, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 7.1415929203539825, | |
| "grad_norm": 0.3475659029068151, | |
| "learning_rate": 9.996012595473676e-06, | |
| "loss": 0.3883, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.31858407079646, | |
| "grad_norm": 0.3540420584120781, | |
| "learning_rate": 9.993830764638262e-06, | |
| "loss": 0.3335, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 7.495575221238938, | |
| "grad_norm": 0.4449981740267561, | |
| "learning_rate": 9.991175051278111e-06, | |
| "loss": 0.3323, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.672566371681416, | |
| "grad_norm": 0.3696907704775234, | |
| "learning_rate": 9.988045707403394e-06, | |
| "loss": 0.3359, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.849557522123893, | |
| "grad_norm": 0.38034972332442196, | |
| "learning_rate": 9.984443029968786e-06, | |
| "loss": 0.3211, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 8.035398230088495, | |
| "grad_norm": 0.4056044711686268, | |
| "learning_rate": 9.980367360845278e-06, | |
| "loss": 0.3434, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 8.212389380530974, | |
| "grad_norm": 0.3627220637163369, | |
| "learning_rate": 9.975819086787743e-06, | |
| "loss": 0.2758, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.389380530973451, | |
| "grad_norm": 0.3775354605167603, | |
| "learning_rate": 9.970798639398228e-06, | |
| "loss": 0.2768, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 8.56637168141593, | |
| "grad_norm": 0.45650386261945936, | |
| "learning_rate": 9.965306495085005e-06, | |
| "loss": 0.2773, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.743362831858407, | |
| "grad_norm": 0.3307082905256443, | |
| "learning_rate": 9.959343175017362e-06, | |
| "loss": 0.2718, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 8.920353982300885, | |
| "grad_norm": 0.3563870745711318, | |
| "learning_rate": 9.952909245076141e-06, | |
| "loss": 0.2737, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 9.106194690265486, | |
| "grad_norm": 0.5312131657692304, | |
| "learning_rate": 9.946005315800047e-06, | |
| "loss": 0.259, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 9.283185840707965, | |
| "grad_norm": 0.3897340271825655, | |
| "learning_rate": 9.93863204232771e-06, | |
| "loss": 0.2069, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.460176991150442, | |
| "grad_norm": 0.4074984143767453, | |
| "learning_rate": 9.930790124335511e-06, | |
| "loss": 0.2071, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 9.63716814159292, | |
| "grad_norm": 0.4025232487404969, | |
| "learning_rate": 9.922480305971193e-06, | |
| "loss": 0.2098, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.814159292035399, | |
| "grad_norm": 0.40076309227670925, | |
| "learning_rate": 9.91370337578325e-06, | |
| "loss": 0.2206, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 9.991150442477876, | |
| "grad_norm": 0.41455073775483275, | |
| "learning_rate": 9.904460166646084e-06, | |
| "loss": 0.2127, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 10.176991150442477, | |
| "grad_norm": 0.5102932920719749, | |
| "learning_rate": 9.894751555680988e-06, | |
| "loss": 0.1734, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 10.353982300884956, | |
| "grad_norm": 0.4418337454980142, | |
| "learning_rate": 9.884578464172901e-06, | |
| "loss": 0.1449, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 10.530973451327434, | |
| "grad_norm": 0.4645857386852471, | |
| "learning_rate": 9.873941857482988e-06, | |
| "loss": 0.149, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 10.70796460176991, | |
| "grad_norm": 0.44701917165189714, | |
| "learning_rate": 9.862842744957037e-06, | |
| "loss": 0.1445, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 10.88495575221239, | |
| "grad_norm": 0.4403519944347269, | |
| "learning_rate": 9.85128217982967e-06, | |
| "loss": 0.154, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 11.070796460176991, | |
| "grad_norm": 0.5432759091413589, | |
| "learning_rate": 9.8392612591244e-06, | |
| "loss": 0.1399, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 11.247787610619469, | |
| "grad_norm": 0.5393334729442735, | |
| "learning_rate": 9.826781123549542e-06, | |
| "loss": 0.0932, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 11.424778761061948, | |
| "grad_norm": 0.48955527875730576, | |
| "learning_rate": 9.813842957389953e-06, | |
| "loss": 0.1005, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 11.601769911504425, | |
| "grad_norm": 0.48039084849076596, | |
| "learning_rate": 9.800447988394657e-06, | |
| "loss": 0.0971, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 11.778761061946902, | |
| "grad_norm": 0.4790835393900875, | |
| "learning_rate": 9.786597487660336e-06, | |
| "loss": 0.1004, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 11.955752212389381, | |
| "grad_norm": 0.4287760706346846, | |
| "learning_rate": 9.772292769510718e-06, | |
| "loss": 0.098, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 12.141592920353983, | |
| "grad_norm": 0.5206942431108207, | |
| "learning_rate": 9.75753519137185e-06, | |
| "loss": 0.078, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 12.31858407079646, | |
| "grad_norm": 0.5235873880865046, | |
| "learning_rate": 9.742326153643285e-06, | |
| "loss": 0.0607, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 12.495575221238939, | |
| "grad_norm": 0.37417943883916716, | |
| "learning_rate": 9.726667099565202e-06, | |
| "loss": 0.0589, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 12.672566371681416, | |
| "grad_norm": 0.46680212830420037, | |
| "learning_rate": 9.710559515081446e-06, | |
| "loss": 0.0602, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 12.849557522123893, | |
| "grad_norm": 0.44256818209773113, | |
| "learning_rate": 9.69400492869852e-06, | |
| "loss": 0.0614, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 13.035398230088495, | |
| "grad_norm": 0.3392248898765409, | |
| "learning_rate": 9.677004911340539e-06, | |
| "loss": 0.0603, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 13.212389380530974, | |
| "grad_norm": 0.5068038205049078, | |
| "learning_rate": 9.659561076200173e-06, | |
| "loss": 0.0376, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 13.389380530973451, | |
| "grad_norm": 0.3566307973050844, | |
| "learning_rate": 9.64167507858554e-06, | |
| "loss": 0.0358, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 13.56637168141593, | |
| "grad_norm": 0.4251538821475592, | |
| "learning_rate": 9.62334861576315e-06, | |
| "loss": 0.0358, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 13.743362831858407, | |
| "grad_norm": 0.3816800016697675, | |
| "learning_rate": 9.604583426796837e-06, | |
| "loss": 0.0373, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 13.920353982300885, | |
| "grad_norm": 0.372768545921348, | |
| "learning_rate": 9.585381292382734e-06, | |
| "loss": 0.036, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 14.106194690265486, | |
| "grad_norm": 0.41177583770525134, | |
| "learning_rate": 9.565744034680291e-06, | |
| "loss": 0.032, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 14.283185840707965, | |
| "grad_norm": 0.330859615956022, | |
| "learning_rate": 9.545673517139376e-06, | |
| "loss": 0.022, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 14.460176991150442, | |
| "grad_norm": 0.3292330167120311, | |
| "learning_rate": 9.52517164432343e-06, | |
| "loss": 0.0216, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 14.63716814159292, | |
| "grad_norm": 0.3367773688229168, | |
| "learning_rate": 9.50424036172875e-06, | |
| "loss": 0.0217, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 14.814159292035399, | |
| "grad_norm": 0.3825032388799719, | |
| "learning_rate": 9.482881655599867e-06, | |
| "loss": 0.0232, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 14.991150442477876, | |
| "grad_norm": 0.3596880534313284, | |
| "learning_rate": 9.461097552741065e-06, | |
| "loss": 0.0213, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 15.176991150442477, | |
| "grad_norm": 0.33569920428582833, | |
| "learning_rate": 9.438890120324049e-06, | |
| "loss": 0.0162, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 15.353982300884956, | |
| "grad_norm": 0.3222473354568032, | |
| "learning_rate": 9.416261465691786e-06, | |
| "loss": 0.0148, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 15.530973451327434, | |
| "grad_norm": 0.31860277340421633, | |
| "learning_rate": 9.393213736158532e-06, | |
| "loss": 0.0138, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 15.70796460176991, | |
| "grad_norm": 0.33012842524691205, | |
| "learning_rate": 9.369749118806063e-06, | |
| "loss": 0.0139, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 15.88495575221239, | |
| "grad_norm": 0.31060024250404394, | |
| "learning_rate": 9.345869840276138e-06, | |
| "loss": 0.0139, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 16.07079646017699, | |
| "grad_norm": 0.29279639999523854, | |
| "learning_rate": 9.321578166559202e-06, | |
| "loss": 0.0134, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 16.24778761061947, | |
| "grad_norm": 0.25025457898908915, | |
| "learning_rate": 9.296876402779357e-06, | |
| "loss": 0.0094, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 16.424778761061948, | |
| "grad_norm": 0.26581112654153966, | |
| "learning_rate": 9.271766892975632e-06, | |
| "loss": 0.009, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 16.601769911504427, | |
| "grad_norm": 0.28504807326383513, | |
| "learning_rate": 9.246252019879526e-06, | |
| "loss": 0.0086, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 16.778761061946902, | |
| "grad_norm": 0.26705155273970366, | |
| "learning_rate": 9.22033420468893e-06, | |
| "loss": 0.009, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 16.95575221238938, | |
| "grad_norm": 0.29212797270505986, | |
| "learning_rate": 9.194015906838345e-06, | |
| "loss": 0.0085, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 17.141592920353983, | |
| "grad_norm": 0.2832856227572913, | |
| "learning_rate": 9.167299623765515e-06, | |
| "loss": 0.0075, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 17.31858407079646, | |
| "grad_norm": 0.22059241414450476, | |
| "learning_rate": 9.14018789067443e-06, | |
| "loss": 0.006, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 17.495575221238937, | |
| "grad_norm": 0.28049908913714794, | |
| "learning_rate": 9.11268328029475e-06, | |
| "loss": 0.0063, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 17.672566371681416, | |
| "grad_norm": 0.2252426153757288, | |
| "learning_rate": 9.08478840263767e-06, | |
| "loss": 0.0061, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 17.849557522123895, | |
| "grad_norm": 0.2856905436001316, | |
| "learning_rate": 9.05650590474825e-06, | |
| "loss": 0.0058, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 18.035398230088497, | |
| "grad_norm": 0.18171559344391483, | |
| "learning_rate": 9.027838470454222e-06, | |
| "loss": 0.0058, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 18.212389380530972, | |
| "grad_norm": 0.17446949191807126, | |
| "learning_rate": 8.998788820111323e-06, | |
| "loss": 0.0039, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 18.38938053097345, | |
| "grad_norm": 0.17008467931611163, | |
| "learning_rate": 8.969359710345132e-06, | |
| "loss": 0.0044, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 18.56637168141593, | |
| "grad_norm": 0.24666749937310403, | |
| "learning_rate": 8.939553933789499e-06, | |
| "loss": 0.0044, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 18.743362831858406, | |
| "grad_norm": 0.20151994276390126, | |
| "learning_rate": 8.90937431882154e-06, | |
| "loss": 0.0038, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 18.920353982300885, | |
| "grad_norm": 0.23754785196862227, | |
| "learning_rate": 8.878823729293238e-06, | |
| "loss": 0.004, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 19.106194690265486, | |
| "grad_norm": 0.14117052858029483, | |
| "learning_rate": 8.847905064259683e-06, | |
| "loss": 0.0037, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 19.283185840707965, | |
| "grad_norm": 0.18847378240235996, | |
| "learning_rate": 8.816621257703969e-06, | |
| "loss": 0.0031, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 19.460176991150444, | |
| "grad_norm": 0.17673625219477776, | |
| "learning_rate": 8.784975278258783e-06, | |
| "loss": 0.0031, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 19.63716814159292, | |
| "grad_norm": 0.16513937168636283, | |
| "learning_rate": 8.752970128924696e-06, | |
| "loss": 0.003, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 19.8141592920354, | |
| "grad_norm": 0.1875769935710693, | |
| "learning_rate": 8.7206088467852e-06, | |
| "loss": 0.0035, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 19.991150442477878, | |
| "grad_norm": 0.1942090816977111, | |
| "learning_rate": 8.687894502718503e-06, | |
| "loss": 0.003, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 20.17699115044248, | |
| "grad_norm": 0.16849498809305985, | |
| "learning_rate": 8.654830201106133e-06, | |
| "loss": 0.0027, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 20.353982300884955, | |
| "grad_norm": 0.15254024942857228, | |
| "learning_rate": 8.621419079538337e-06, | |
| "loss": 0.0025, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 20.530973451327434, | |
| "grad_norm": 0.1522681909876635, | |
| "learning_rate": 8.587664308516361e-06, | |
| "loss": 0.0024, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 20.707964601769913, | |
| "grad_norm": 0.14801504967498672, | |
| "learning_rate": 8.553569091151576e-06, | |
| "loss": 0.0023, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 20.884955752212388, | |
| "grad_norm": 0.13105166934570284, | |
| "learning_rate": 8.519136662861531e-06, | |
| "loss": 0.0023, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 21.07079646017699, | |
| "grad_norm": 0.14743879775955487, | |
| "learning_rate": 8.484370291062927e-06, | |
| "loss": 0.0025, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 21.24778761061947, | |
| "grad_norm": 0.1638754042647016, | |
| "learning_rate": 8.449273274861566e-06, | |
| "loss": 0.0019, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 21.424778761061948, | |
| "grad_norm": 0.1428524275854489, | |
| "learning_rate": 8.413848944739282e-06, | |
| "loss": 0.0019, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 21.601769911504427, | |
| "grad_norm": 0.13186832777609797, | |
| "learning_rate": 8.378100662237904e-06, | |
| "loss": 0.0019, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 21.778761061946902, | |
| "grad_norm": 0.13394117392395247, | |
| "learning_rate": 8.342031819640263e-06, | |
| "loss": 0.002, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 21.95575221238938, | |
| "grad_norm": 0.13314998013772597, | |
| "learning_rate": 8.305645839648287e-06, | |
| "loss": 0.002, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 22.141592920353983, | |
| "grad_norm": 0.11946313669563102, | |
| "learning_rate": 8.268946175058214e-06, | |
| "loss": 0.0017, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 22.31858407079646, | |
| "grad_norm": 0.13692545965091507, | |
| "learning_rate": 8.231936308432935e-06, | |
| "loss": 0.0017, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 22.495575221238937, | |
| "grad_norm": 0.1280253132127409, | |
| "learning_rate": 8.194619751771527e-06, | |
| "loss": 0.0016, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 22.672566371681416, | |
| "grad_norm": 0.14323656703506174, | |
| "learning_rate": 8.157000046175984e-06, | |
| "loss": 0.0017, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 22.849557522123895, | |
| "grad_norm": 0.132872080833497, | |
| "learning_rate": 8.119080761515197e-06, | |
| "loss": 0.0016, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 23.035398230088497, | |
| "grad_norm": 0.09675254792509355, | |
| "learning_rate": 8.080865496086177e-06, | |
| "loss": 0.0018, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 23.212389380530972, | |
| "grad_norm": 0.13055871987919138, | |
| "learning_rate": 8.042357876272626e-06, | |
| "loss": 0.0014, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 23.38938053097345, | |
| "grad_norm": 0.10202870847439023, | |
| "learning_rate": 8.003561556200796e-06, | |
| "loss": 0.0013, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 23.56637168141593, | |
| "grad_norm": 0.10027070765055289, | |
| "learning_rate": 7.964480217392739e-06, | |
| "loss": 0.0013, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 23.743362831858406, | |
| "grad_norm": 0.09963074007081689, | |
| "learning_rate": 7.925117568416966e-06, | |
| "loss": 0.0013, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 23.920353982300885, | |
| "grad_norm": 0.11679760725818564, | |
| "learning_rate": 7.885477344536516e-06, | |
| "loss": 0.0014, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 24.106194690265486, | |
| "grad_norm": 0.0754474545374589, | |
| "learning_rate": 7.845563307354506e-06, | |
| "loss": 0.0013, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 24.283185840707965, | |
| "grad_norm": 0.1253093329096803, | |
| "learning_rate": 7.80537924445718e-06, | |
| "loss": 0.0011, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 24.460176991150444, | |
| "grad_norm": 0.10972348039200527, | |
| "learning_rate": 7.764928969054493e-06, | |
| "loss": 0.0011, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 24.63716814159292, | |
| "grad_norm": 0.34554758886579884, | |
| "learning_rate": 7.724216319618257e-06, | |
| "loss": 0.0011, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 24.8141592920354, | |
| "grad_norm": 0.14420268593428043, | |
| "learning_rate": 7.683245159517903e-06, | |
| "loss": 0.0011, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 24.991150442477878, | |
| "grad_norm": 0.10272004081139009, | |
| "learning_rate": 7.642019376653858e-06, | |
| "loss": 0.0011, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 25.17699115044248, | |
| "grad_norm": 0.10066435427255882, | |
| "learning_rate": 7.600542883088629e-06, | |
| "loss": 0.001, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 25.353982300884955, | |
| "grad_norm": 0.10321823577887183, | |
| "learning_rate": 7.5588196146755526e-06, | |
| "loss": 0.001, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 25.530973451327434, | |
| "grad_norm": 0.09420357679341983, | |
| "learning_rate": 7.5168535306853155e-06, | |
| "loss": 0.001, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 25.707964601769913, | |
| "grad_norm": 0.09068373934441006, | |
| "learning_rate": 7.474648613430252e-06, | |
| "loss": 0.0011, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 25.884955752212388, | |
| "grad_norm": 0.09875146708972135, | |
| "learning_rate": 7.432208867886439e-06, | |
| "loss": 0.0011, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 26.07079646017699, | |
| "grad_norm": 0.08661242865325204, | |
| "learning_rate": 7.389538321313652e-06, | |
| "loss": 0.0011, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 26.24778761061947, | |
| "grad_norm": 0.08825158066362472, | |
| "learning_rate": 7.346641022873205e-06, | |
| "loss": 0.0009, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 26.424778761061948, | |
| "grad_norm": 0.10683045227322636, | |
| "learning_rate": 7.303521043243711e-06, | |
| "loss": 0.0009, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 26.601769911504427, | |
| "grad_norm": 0.12235791354083214, | |
| "learning_rate": 7.2601824742347985e-06, | |
| "loss": 0.0009, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 26.778761061946902, | |
| "grad_norm": 0.08514569373030315, | |
| "learning_rate": 7.2166294283988315e-06, | |
| "loss": 0.001, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 26.95575221238938, | |
| "grad_norm": 0.0842159487022361, | |
| "learning_rate": 7.172866038640644e-06, | |
| "loss": 0.0009, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 27.141592920353983, | |
| "grad_norm": 0.08674683001185676, | |
| "learning_rate": 7.128896457825364e-06, | |
| "loss": 0.0008, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 27.31858407079646, | |
| "grad_norm": 0.07909420218918227, | |
| "learning_rate": 7.084724858384326e-06, | |
| "loss": 0.0008, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 27.495575221238937, | |
| "grad_norm": 0.10596594004861332, | |
| "learning_rate": 7.04035543191914e-06, | |
| "loss": 0.0008, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 27.672566371681416, | |
| "grad_norm": 0.0840755602282058, | |
| "learning_rate": 6.995792388803929e-06, | |
| "loss": 0.001, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 27.849557522123895, | |
| "grad_norm": 0.07853861446661353, | |
| "learning_rate": 6.9510399577857976e-06, | |
| "loss": 0.0009, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 28.035398230088497, | |
| "grad_norm": 0.07288196207830411, | |
| "learning_rate": 6.906102385583548e-06, | |
| "loss": 0.001, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 28.212389380530972, | |
| "grad_norm": 0.0687447241728727, | |
| "learning_rate": 6.860983936484689e-06, | |
| "loss": 0.0007, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 28.38938053097345, | |
| "grad_norm": 0.07585751703066541, | |
| "learning_rate": 6.815688891940796e-06, | |
| "loss": 0.0007, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 28.56637168141593, | |
| "grad_norm": 0.06895349895505572, | |
| "learning_rate": 6.770221550161214e-06, | |
| "loss": 0.0008, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 28.743362831858406, | |
| "grad_norm": 0.12402782198503459, | |
| "learning_rate": 6.724586225705191e-06, | |
| "loss": 0.0008, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 28.920353982300885, | |
| "grad_norm": 0.1527989353563491, | |
| "learning_rate": 6.678787249072456e-06, | |
| "loss": 0.0009, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 29.106194690265486, | |
| "grad_norm": 0.06954410595068672, | |
| "learning_rate": 6.632828966292279e-06, | |
| "loss": 0.0009, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 29.283185840707965, | |
| "grad_norm": 0.1073267489136229, | |
| "learning_rate": 6.586715738511067e-06, | |
| "loss": 0.0009, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 29.460176991150444, | |
| "grad_norm": 0.08313874513099292, | |
| "learning_rate": 6.540451941578505e-06, | |
| "loss": 0.0009, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 29.63716814159292, | |
| "grad_norm": 0.12731437056639278, | |
| "learning_rate": 6.494041965632335e-06, | |
| "loss": 0.0009, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 29.8141592920354, | |
| "grad_norm": 0.09692619372507454, | |
| "learning_rate": 6.447490214681742e-06, | |
| "loss": 0.0009, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 29.991150442477878, | |
| "grad_norm": 0.10737775556639746, | |
| "learning_rate": 6.400801106189457e-06, | |
| "loss": 0.0008, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 30.17699115044248, | |
| "grad_norm": 0.10960316406181361, | |
| "learning_rate": 6.353979070652555e-06, | |
| "loss": 0.0007, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 30.353982300884955, | |
| "grad_norm": 0.0828957218344359, | |
| "learning_rate": 6.307028551182041e-06, | |
| "loss": 0.0009, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 30.530973451327434, | |
| "grad_norm": 0.06197924420139391, | |
| "learning_rate": 6.259954003081215e-06, | |
| "loss": 0.0008, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 30.707964601769913, | |
| "grad_norm": 0.13419189832528763, | |
| "learning_rate": 6.212759893422908e-06, | |
| "loss": 0.0009, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 30.884955752212388, | |
| "grad_norm": 0.07335797834009149, | |
| "learning_rate": 6.165450700625565e-06, | |
| "loss": 0.0009, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 31.07079646017699, | |
| "grad_norm": 0.08616596067290054, | |
| "learning_rate": 6.118030914028292e-06, | |
| "loss": 0.0009, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 31.24778761061947, | |
| "grad_norm": 0.11160398222363577, | |
| "learning_rate": 6.070505033464835e-06, | |
| "loss": 0.0008, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 31.424778761061948, | |
| "grad_norm": 0.08223375466647835, | |
| "learning_rate": 6.022877568836579e-06, | |
| "loss": 0.0008, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 31.601769911504427, | |
| "grad_norm": 0.08274688851558104, | |
| "learning_rate": 5.975153039684579e-06, | |
| "loss": 0.0008, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 31.778761061946902, | |
| "grad_norm": 0.0970587549375438, | |
| "learning_rate": 5.927335974760699e-06, | |
| "loss": 0.0008, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 31.95575221238938, | |
| "grad_norm": 0.054546179433890875, | |
| "learning_rate": 5.87943091159785e-06, | |
| "loss": 0.0007, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 32.14159292035398, | |
| "grad_norm": 0.05969746156171825, | |
| "learning_rate": 5.831442396079413e-06, | |
| "loss": 0.0008, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 32.31858407079646, | |
| "grad_norm": 0.05018822775479197, | |
| "learning_rate": 5.78337498200786e-06, | |
| "loss": 0.0007, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 32.49557522123894, | |
| "grad_norm": 0.08901463204658712, | |
| "learning_rate": 5.735233230672636e-06, | |
| "loss": 0.0007, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 32.67256637168141, | |
| "grad_norm": 0.07922243579859921, | |
| "learning_rate": 5.687021710417308e-06, | |
| "loss": 0.0007, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 32.849557522123895, | |
| "grad_norm": 0.07354939715718499, | |
| "learning_rate": 5.638744996206074e-06, | |
| "loss": 0.0006, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 33.0353982300885, | |
| "grad_norm": 0.06160878470580236, | |
| "learning_rate": 5.590407669189612e-06, | |
| "loss": 0.0006, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 33.21238938053097, | |
| "grad_norm": 0.06166669775060316, | |
| "learning_rate": 5.542014316270377e-06, | |
| "loss": 0.0005, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 33.389380530973455, | |
| "grad_norm": 0.052889879306465305, | |
| "learning_rate": 5.493569529667312e-06, | |
| "loss": 0.0006, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 33.56637168141593, | |
| "grad_norm": 0.08213350355164716, | |
| "learning_rate": 5.445077906480095e-06, | |
| "loss": 0.0006, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 33.743362831858406, | |
| "grad_norm": 0.08160047999287494, | |
| "learning_rate": 5.396544048252893e-06, | |
| "loss": 0.0005, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 33.92035398230089, | |
| "grad_norm": 0.07583900802666055, | |
| "learning_rate": 5.3479725605377065e-06, | |
| "loss": 0.0005, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 34.10619469026549, | |
| "grad_norm": 0.028472829495580533, | |
| "learning_rate": 5.299368052457332e-06, | |
| "loss": 0.0005, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 34.283185840707965, | |
| "grad_norm": 0.05015747871224623, | |
| "learning_rate": 5.250735136267993e-06, | |
| "loss": 0.0004, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 34.46017699115044, | |
| "grad_norm": 0.05176601714476982, | |
| "learning_rate": 5.2020784269216515e-06, | |
| "loss": 0.0004, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 34.63716814159292, | |
| "grad_norm": 0.037671952173425755, | |
| "learning_rate": 5.153402541628097e-06, | |
| "loss": 0.0004, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 34.8141592920354, | |
| "grad_norm": 0.06647847020125087, | |
| "learning_rate": 5.1047120994167855e-06, | |
| "loss": 0.0004, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 34.991150442477874, | |
| "grad_norm": 0.049900511058573065, | |
| "learning_rate": 5.056011720698536e-06, | |
| "loss": 0.0005, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 35.176991150442475, | |
| "grad_norm": 0.062213088302615654, | |
| "learning_rate": 5.007306026827076e-06, | |
| "loss": 0.0004, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 35.35398230088496, | |
| "grad_norm": 0.05221297387988636, | |
| "learning_rate": 4.958599639660508e-06, | |
| "loss": 0.0003, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 35.530973451327434, | |
| "grad_norm": 0.025844683052233374, | |
| "learning_rate": 4.909897181122725e-06, | |
| "loss": 0.0004, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 35.70796460176991, | |
| "grad_norm": 0.027696973790180324, | |
| "learning_rate": 4.861203272764813e-06, | |
| "loss": 0.0004, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 35.88495575221239, | |
| "grad_norm": 0.030687511235047944, | |
| "learning_rate": 4.8125225353265085e-06, | |
| "loss": 0.0004, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 36.07079646017699, | |
| "grad_norm": 0.04493009791881542, | |
| "learning_rate": 4.7638595882977064e-06, | |
| "loss": 0.0004, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 36.24778761061947, | |
| "grad_norm": 0.01691149961222786, | |
| "learning_rate": 4.71521904948011e-06, | |
| "loss": 0.0003, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 36.424778761061944, | |
| "grad_norm": 0.027515076083035817, | |
| "learning_rate": 4.666605534549021e-06, | |
| "loss": 0.0003, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 36.60176991150443, | |
| "grad_norm": 0.03559173688865591, | |
| "learning_rate": 4.618023656615352e-06, | |
| "loss": 0.0003, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 36.7787610619469, | |
| "grad_norm": 0.04165835396132989, | |
| "learning_rate": 4.569478025787869e-06, | |
| "loss": 0.0004, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 36.95575221238938, | |
| "grad_norm": 0.08306198347880518, | |
| "learning_rate": 4.520973248735715e-06, | |
| "loss": 0.0004, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 37.14159292035398, | |
| "grad_norm": 0.05411026344168162, | |
| "learning_rate": 4.472513928251275e-06, | |
| "loss": 0.0005, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 37.31858407079646, | |
| "grad_norm": 0.05227095996187008, | |
| "learning_rate": 4.424104662813396e-06, | |
| "loss": 0.0003, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 37.49557522123894, | |
| "grad_norm": 0.027806388434303872, | |
| "learning_rate": 4.375750046151023e-06, | |
| "loss": 0.0003, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 37.67256637168141, | |
| "grad_norm": 0.052658952971337866, | |
| "learning_rate": 4.3274546668072835e-06, | |
| "loss": 0.0003, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 37.849557522123895, | |
| "grad_norm": 0.030080205440969073, | |
| "learning_rate": 4.279223107704058e-06, | |
| "loss": 0.0005, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 38.0353982300885, | |
| "grad_norm": 0.05339256914519307, | |
| "learning_rate": 4.2310599457071e-06, | |
| "loss": 0.0004, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 38.21238938053097, | |
| "grad_norm": 0.04425363914132623, | |
| "learning_rate": 4.1829697511917146e-06, | |
| "loss": 0.0003, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 38.389380530973455, | |
| "grad_norm": 0.011312355977019536, | |
| "learning_rate": 4.134957087609065e-06, | |
| "loss": 0.0004, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 38.56637168141593, | |
| "grad_norm": 0.029346947515830845, | |
| "learning_rate": 4.087026511053116e-06, | |
| "loss": 0.0003, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 38.743362831858406, | |
| "grad_norm": 0.042536157363505335, | |
| "learning_rate": 4.0391825698283084e-06, | |
| "loss": 0.0003, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 38.92035398230089, | |
| "grad_norm": 0.03536377347443713, | |
| "learning_rate": 3.991429804017944e-06, | |
| "loss": 0.0003, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 39.10619469026549, | |
| "grad_norm": 0.05886812292462864, | |
| "learning_rate": 3.9437727450533605e-06, | |
| "loss": 0.0003, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 39.283185840707965, | |
| "grad_norm": 0.01648121399449709, | |
| "learning_rate": 3.89621591528393e-06, | |
| "loss": 0.0004, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 39.46017699115044, | |
| "grad_norm": 0.05635714119276726, | |
| "learning_rate": 3.848763827547915e-06, | |
| "loss": 0.0003, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 39.63716814159292, | |
| "grad_norm": 0.009383854914388465, | |
| "learning_rate": 3.8014209847442345e-06, | |
| "loss": 0.0003, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 39.8141592920354, | |
| "grad_norm": 0.05011834839916184, | |
| "learning_rate": 3.7541918794051637e-06, | |
| "loss": 0.0002, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 39.991150442477874, | |
| "grad_norm": 0.05133974792375958, | |
| "learning_rate": 3.7070809932700134e-06, | |
| "loss": 0.0003, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 40.176991150442475, | |
| "grad_norm": 0.018588269742905027, | |
| "learning_rate": 3.6600927968598588e-06, | |
| "loss": 0.0002, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 40.35398230088496, | |
| "grad_norm": 0.07504151811726231, | |
| "learning_rate": 3.613231749053304e-06, | |
| "loss": 0.0003, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 40.530973451327434, | |
| "grad_norm": 0.009247883379305936, | |
| "learning_rate": 3.5665022966633678e-06, | |
| "loss": 0.0002, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 40.70796460176991, | |
| "grad_norm": 0.010786624329556564, | |
| "learning_rate": 3.519908874015501e-06, | |
| "loss": 0.0002, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 40.88495575221239, | |
| "grad_norm": 0.009033335718842525, | |
| "learning_rate": 3.473455902526809e-06, | |
| "loss": 0.0002, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 41.07079646017699, | |
| "grad_norm": 0.006789585965722491, | |
| "learning_rate": 3.4271477902864836e-06, | |
| "loss": 0.0003, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 41.24778761061947, | |
| "grad_norm": 0.011629682271040448, | |
| "learning_rate": 3.3809889316375012e-06, | |
| "loss": 0.0002, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 41.424778761061944, | |
| "grad_norm": 0.016037900332241763, | |
| "learning_rate": 3.334983706759627e-06, | |
| "loss": 0.0002, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 41.60176991150443, | |
| "grad_norm": 0.008455409270455181, | |
| "learning_rate": 3.2891364812537686e-06, | |
| "loss": 0.0002, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 41.7787610619469, | |
| "grad_norm": 0.042022335506453394, | |
| "learning_rate": 3.2434516057277055e-06, | |
| "loss": 0.0002, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 41.95575221238938, | |
| "grad_norm": 0.022526899592221396, | |
| "learning_rate": 3.1979334153832486e-06, | |
| "loss": 0.0002, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 42.14159292035398, | |
| "grad_norm": 0.013477460019272535, | |
| "learning_rate": 3.1525862296048446e-06, | |
| "loss": 0.0002, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 42.31858407079646, | |
| "grad_norm": 0.014364454049260516, | |
| "learning_rate": 3.1074143515497114e-06, | |
| "loss": 0.0002, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 42.49557522123894, | |
| "grad_norm": 0.02250464749159911, | |
| "learning_rate": 3.0624220677394854e-06, | |
| "loss": 0.0002, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 42.67256637168141, | |
| "grad_norm": 0.011861124991493036, | |
| "learning_rate": 3.017613647653461e-06, | |
| "loss": 0.0002, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 42.849557522123895, | |
| "grad_norm": 0.006372525086061116, | |
| "learning_rate": 2.9729933433234402e-06, | |
| "loss": 0.0002, | |
| "step": 2400 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3584, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 64, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.5164849300121846e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |