{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 16.224986479177932, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027041644131963224, "grad_norm": 4.492307662963867, "learning_rate": 3.0000000000000004e-07, "loss": 1.1835, "step": 10 }, { "epoch": 0.005408328826392645, "grad_norm": 3.7692346572875977, "learning_rate": 6.333333333333333e-07, "loss": 1.1767, "step": 20 }, { "epoch": 0.008112493239588967, "grad_norm": 3.8657217025756836, "learning_rate": 9.666666666666668e-07, "loss": 1.1743, "step": 30 }, { "epoch": 0.01081665765278529, "grad_norm": 3.1163039207458496, "learning_rate": 1.3e-06, "loss": 1.1328, "step": 40 }, { "epoch": 0.01352082206598161, "grad_norm": 2.025866985321045, "learning_rate": 1.6333333333333333e-06, "loss": 1.0631, "step": 50 }, { "epoch": 0.016224986479177934, "grad_norm": 1.8034666776657104, "learning_rate": 1.9666666666666668e-06, "loss": 1.0168, "step": 60 }, { "epoch": 0.018929150892374257, "grad_norm": 1.8437780141830444, "learning_rate": 2.3e-06, "loss": 0.9645, "step": 70 }, { "epoch": 0.02163331530557058, "grad_norm": 1.3963513374328613, "learning_rate": 2.6333333333333337e-06, "loss": 0.9426, "step": 80 }, { "epoch": 0.024337479718766902, "grad_norm": 1.111611008644104, "learning_rate": 2.966666666666667e-06, "loss": 0.9177, "step": 90 }, { "epoch": 0.02704164413196322, "grad_norm": 0.8480343222618103, "learning_rate": 3.3e-06, "loss": 0.9077, "step": 100 }, { "epoch": 0.029745808545159545, "grad_norm": 0.7570890784263611, "learning_rate": 3.633333333333334e-06, "loss": 0.8992, "step": 110 }, { "epoch": 0.03244997295835587, "grad_norm": 0.5310139060020447, "learning_rate": 3.966666666666667e-06, "loss": 0.8879, "step": 120 }, { "epoch": 0.035154137371552194, "grad_norm": 0.4853365421295166, "learning_rate": 4.2999999999999995e-06, "loss": 0.8816, "step": 130 }, { "epoch": 0.03785830178474851, "grad_norm": 0.45577242970466614, "learning_rate": 4.633333333333334e-06, "loss": 0.8732, "step": 140 }, { "epoch": 0.04056246619794483, "grad_norm": 0.3295237720012665, "learning_rate": 4.966666666666667e-06, "loss": 0.8673, "step": 150 }, { "epoch": 0.04326663061114116, "grad_norm": 0.2890014350414276, "learning_rate": 5.3e-06, "loss": 0.862, "step": 160 }, { "epoch": 0.04597079502433748, "grad_norm": 0.3296661376953125, "learning_rate": 5.633333333333333e-06, "loss": 0.8582, "step": 170 }, { "epoch": 0.048674959437533805, "grad_norm": 0.27999117970466614, "learning_rate": 5.9666666666666666e-06, "loss": 0.8488, "step": 180 }, { "epoch": 0.051379123850730124, "grad_norm": 0.36767151951789856, "learning_rate": 6.300000000000001e-06, "loss": 0.8401, "step": 190 }, { "epoch": 0.05408328826392644, "grad_norm": 0.24689121544361115, "learning_rate": 6.633333333333333e-06, "loss": 0.8251, "step": 200 }, { "epoch": 0.05678745267712277, "grad_norm": 0.5604905486106873, "learning_rate": 6.966666666666667e-06, "loss": 0.8132, "step": 210 }, { "epoch": 0.05949161709031909, "grad_norm": 0.46440842747688293, "learning_rate": 7.2999999999999996e-06, "loss": 0.8068, "step": 220 }, { "epoch": 0.062195781503515415, "grad_norm": 0.3712397515773773, "learning_rate": 7.633333333333334e-06, "loss": 0.7977, "step": 230 }, { "epoch": 0.06489994591671173, "grad_norm": 0.3064053952693939, "learning_rate": 7.966666666666666e-06, "loss": 0.7939, "step": 240 }, { "epoch": 0.06760411032990805, "grad_norm": 0.5376770496368408, "learning_rate": 8.3e-06, "loss": 0.7926, "step": 250 }, { "epoch": 0.07030827474310439, "grad_norm": 0.22228306531906128, "learning_rate": 8.633333333333334e-06, "loss": 0.7842, "step": 260 }, { "epoch": 0.0730124391563007, "grad_norm": 0.16886256635189056, "learning_rate": 8.966666666666668e-06, "loss": 0.7857, "step": 270 }, { "epoch": 0.07571660356949703, "grad_norm": 0.23542286455631256, "learning_rate": 9.3e-06, "loss": 0.7856, "step": 280 }, { "epoch": 0.07842076798269335, "grad_norm": 0.18747013807296753, "learning_rate": 9.633333333333335e-06, "loss": 0.7833, "step": 290 }, { "epoch": 0.08112493239588967, "grad_norm": 0.20969118177890778, "learning_rate": 9.966666666666667e-06, "loss": 0.7813, "step": 300 }, { "epoch": 0.083829096809086, "grad_norm": 0.21486148238182068, "learning_rate": 1.03e-05, "loss": 0.7776, "step": 310 }, { "epoch": 0.08653326122228232, "grad_norm": 0.14918017387390137, "learning_rate": 1.0633333333333334e-05, "loss": 0.7809, "step": 320 }, { "epoch": 0.08923742563547864, "grad_norm": 0.21335196495056152, "learning_rate": 1.0966666666666666e-05, "loss": 0.7776, "step": 330 }, { "epoch": 0.09194159004867496, "grad_norm": 0.2862824499607086, "learning_rate": 1.13e-05, "loss": 0.781, "step": 340 }, { "epoch": 0.09464575446187128, "grad_norm": 0.18271762132644653, "learning_rate": 1.1633333333333334e-05, "loss": 0.7768, "step": 350 }, { "epoch": 0.09734991887506761, "grad_norm": 0.24288998544216156, "learning_rate": 1.1966666666666668e-05, "loss": 0.7747, "step": 360 }, { "epoch": 0.10005408328826393, "grad_norm": 0.18913313746452332, "learning_rate": 1.23e-05, "loss": 0.7743, "step": 370 }, { "epoch": 0.10275824770146025, "grad_norm": 0.1755599081516266, "learning_rate": 1.2633333333333333e-05, "loss": 0.7742, "step": 380 }, { "epoch": 0.10546241211465657, "grad_norm": 0.1690550595521927, "learning_rate": 1.2966666666666669e-05, "loss": 0.7732, "step": 390 }, { "epoch": 0.10816657652785289, "grad_norm": 0.26720109581947327, "learning_rate": 1.3300000000000001e-05, "loss": 0.7728, "step": 400 }, { "epoch": 0.11087074094104922, "grad_norm": 0.3271518051624298, "learning_rate": 1.3633333333333334e-05, "loss": 0.7728, "step": 410 }, { "epoch": 0.11357490535424554, "grad_norm": 0.24122919142246246, "learning_rate": 1.3966666666666666e-05, "loss": 0.77, "step": 420 }, { "epoch": 0.11627906976744186, "grad_norm": 0.47137361764907837, "learning_rate": 1.43e-05, "loss": 0.7696, "step": 430 }, { "epoch": 0.11898323418063818, "grad_norm": 0.5226354598999023, "learning_rate": 1.4633333333333334e-05, "loss": 0.7672, "step": 440 }, { "epoch": 0.12168739859383451, "grad_norm": 0.21189676225185394, "learning_rate": 1.4966666666666668e-05, "loss": 0.7625, "step": 450 }, { "epoch": 0.12439156300703083, "grad_norm": 0.5963268876075745, "learning_rate": 1.53e-05, "loss": 0.7476, "step": 460 }, { "epoch": 0.12709572742022715, "grad_norm": 2.749722957611084, "learning_rate": 1.563333333333333e-05, "loss": 0.7157, "step": 470 }, { "epoch": 0.12979989183342347, "grad_norm": 4.248365879058838, "learning_rate": 1.5966666666666667e-05, "loss": 0.747, "step": 480 }, { "epoch": 0.1325040562466198, "grad_norm": 1.1539766788482666, "learning_rate": 1.63e-05, "loss": 0.7001, "step": 490 }, { "epoch": 0.1352082206598161, "grad_norm": 1.5807900428771973, "learning_rate": 1.6633333333333336e-05, "loss": 0.6845, "step": 500 }, { "epoch": 0.13791238507301243, "grad_norm": 1.3849453926086426, "learning_rate": 1.6966666666666668e-05, "loss": 0.6703, "step": 510 }, { "epoch": 0.14061654948620878, "grad_norm": 0.9323300719261169, "learning_rate": 1.73e-05, "loss": 0.66, "step": 520 }, { "epoch": 0.1433207138994051, "grad_norm": 0.41016846895217896, "learning_rate": 1.7633333333333336e-05, "loss": 0.6524, "step": 530 }, { "epoch": 0.1460248783126014, "grad_norm": 0.7045881152153015, "learning_rate": 1.796666666666667e-05, "loss": 0.6517, "step": 540 }, { "epoch": 0.14872904272579773, "grad_norm": 0.6296499371528625, "learning_rate": 1.83e-05, "loss": 0.6453, "step": 550 }, { "epoch": 0.15143320713899405, "grad_norm": 0.2991262376308441, "learning_rate": 1.8633333333333333e-05, "loss": 0.6422, "step": 560 }, { "epoch": 0.15413737155219037, "grad_norm": 0.35057133436203003, "learning_rate": 1.896666666666667e-05, "loss": 0.6414, "step": 570 }, { "epoch": 0.1568415359653867, "grad_norm": 0.37422627210617065, "learning_rate": 1.93e-05, "loss": 0.6391, "step": 580 }, { "epoch": 0.159545700378583, "grad_norm": 0.30428922176361084, "learning_rate": 1.9633333333333334e-05, "loss": 0.6383, "step": 590 }, { "epoch": 0.16224986479177933, "grad_norm": 0.3561856746673584, "learning_rate": 1.9966666666666666e-05, "loss": 0.637, "step": 600 }, { "epoch": 0.16495402920497565, "grad_norm": 0.7720860838890076, "learning_rate": 2.0300000000000002e-05, "loss": 0.6371, "step": 610 }, { "epoch": 0.167658193618172, "grad_norm": 0.44177770614624023, "learning_rate": 2.0633333333333335e-05, "loss": 0.6368, "step": 620 }, { "epoch": 0.17036235803136832, "grad_norm": 0.3510745167732239, "learning_rate": 2.0966666666666667e-05, "loss": 0.6365, "step": 630 }, { "epoch": 0.17306652244456464, "grad_norm": 0.23940473794937134, "learning_rate": 2.13e-05, "loss": 0.6348, "step": 640 }, { "epoch": 0.17577068685776095, "grad_norm": 0.4234619736671448, "learning_rate": 2.1633333333333332e-05, "loss": 0.6347, "step": 650 }, { "epoch": 0.17847485127095727, "grad_norm": 0.34721970558166504, "learning_rate": 2.1966666666666668e-05, "loss": 0.6319, "step": 660 }, { "epoch": 0.1811790156841536, "grad_norm": 0.348274290561676, "learning_rate": 2.23e-05, "loss": 0.6355, "step": 670 }, { "epoch": 0.1838831800973499, "grad_norm": 0.3324425220489502, "learning_rate": 2.2633333333333336e-05, "loss": 0.6313, "step": 680 }, { "epoch": 0.18658734451054623, "grad_norm": 0.2860121726989746, "learning_rate": 2.2966666666666668e-05, "loss": 0.6312, "step": 690 }, { "epoch": 0.18929150892374255, "grad_norm": 0.32120540738105774, "learning_rate": 2.3300000000000004e-05, "loss": 0.6314, "step": 700 }, { "epoch": 0.1919956733369389, "grad_norm": 0.5201168060302734, "learning_rate": 2.3633333333333336e-05, "loss": 0.6284, "step": 710 }, { "epoch": 0.19469983775013522, "grad_norm": 0.2634330093860626, "learning_rate": 2.396666666666667e-05, "loss": 0.6262, "step": 720 }, { "epoch": 0.19740400216333154, "grad_norm": 0.5905399322509766, "learning_rate": 2.43e-05, "loss": 0.629, "step": 730 }, { "epoch": 0.20010816657652786, "grad_norm": 0.7479327321052551, "learning_rate": 2.4633333333333334e-05, "loss": 0.6245, "step": 740 }, { "epoch": 0.20281233098972418, "grad_norm": 0.4384053647518158, "learning_rate": 2.496666666666667e-05, "loss": 0.6228, "step": 750 }, { "epoch": 0.2055164954029205, "grad_norm": 0.4346765875816345, "learning_rate": 2.5300000000000002e-05, "loss": 0.6188, "step": 760 }, { "epoch": 0.20822065981611682, "grad_norm": 0.6608043909072876, "learning_rate": 2.5633333333333338e-05, "loss": 0.6132, "step": 770 }, { "epoch": 0.21092482422931313, "grad_norm": 2.3593976497650146, "learning_rate": 2.5966666666666667e-05, "loss": 0.5953, "step": 780 }, { "epoch": 0.21362898864250945, "grad_norm": 0.9859963655471802, "learning_rate": 2.6300000000000002e-05, "loss": 0.5826, "step": 790 }, { "epoch": 0.21633315305570577, "grad_norm": 1.2621300220489502, "learning_rate": 2.663333333333333e-05, "loss": 0.572, "step": 800 }, { "epoch": 0.21903731746890212, "grad_norm": 0.7065654993057251, "learning_rate": 2.6966666666666667e-05, "loss": 0.5592, "step": 810 }, { "epoch": 0.22174148188209844, "grad_norm": 0.77850341796875, "learning_rate": 2.7300000000000003e-05, "loss": 0.5446, "step": 820 }, { "epoch": 0.22444564629529476, "grad_norm": 1.214892864227295, "learning_rate": 2.7633333333333332e-05, "loss": 0.5309, "step": 830 }, { "epoch": 0.22714981070849108, "grad_norm": 1.9886802434921265, "learning_rate": 2.7966666666666668e-05, "loss": 0.5224, "step": 840 }, { "epoch": 0.2298539751216874, "grad_norm": 1.1326038837432861, "learning_rate": 2.83e-05, "loss": 0.5056, "step": 850 }, { "epoch": 0.23255813953488372, "grad_norm": 2.7956154346466064, "learning_rate": 2.8633333333333336e-05, "loss": 0.4566, "step": 860 }, { "epoch": 0.23526230394808004, "grad_norm": 2.000739097595215, "learning_rate": 2.8966666666666668e-05, "loss": 0.4289, "step": 870 }, { "epoch": 0.23796646836127636, "grad_norm": 1.8748174905776978, "learning_rate": 2.93e-05, "loss": 0.4127, "step": 880 }, { "epoch": 0.24067063277447268, "grad_norm": 2.1685845851898193, "learning_rate": 2.9633333333333336e-05, "loss": 0.386, "step": 890 }, { "epoch": 0.24337479718766902, "grad_norm": 1.8464926481246948, "learning_rate": 2.9966666666666672e-05, "loss": 0.366, "step": 900 }, { "epoch": 0.24607896160086534, "grad_norm": 6.847977161407471, "learning_rate": 3.03e-05, "loss": 0.3182, "step": 910 }, { "epoch": 0.24878312601406166, "grad_norm": 2.422333002090454, "learning_rate": 3.063333333333334e-05, "loss": 0.2904, "step": 920 }, { "epoch": 0.251487290427258, "grad_norm": 2.1491644382476807, "learning_rate": 3.096666666666666e-05, "loss": 0.2656, "step": 930 }, { "epoch": 0.2541914548404543, "grad_norm": 1.6123061180114746, "learning_rate": 3.13e-05, "loss": 0.2502, "step": 940 }, { "epoch": 0.2568956192536506, "grad_norm": 2.49971342086792, "learning_rate": 3.1633333333333334e-05, "loss": 0.2361, "step": 950 }, { "epoch": 0.25959978366684694, "grad_norm": 3.736366033554077, "learning_rate": 3.196666666666667e-05, "loss": 0.1969, "step": 960 }, { "epoch": 0.26230394808004326, "grad_norm": 3.2414052486419678, "learning_rate": 3.2300000000000006e-05, "loss": 0.1772, "step": 970 }, { "epoch": 0.2650081124932396, "grad_norm": 2.2949421405792236, "learning_rate": 3.263333333333333e-05, "loss": 0.145, "step": 980 }, { "epoch": 0.2677122769064359, "grad_norm": 2.54600191116333, "learning_rate": 3.296666666666667e-05, "loss": 0.1276, "step": 990 }, { "epoch": 0.2704164413196322, "grad_norm": 3.066659450531006, "learning_rate": 3.33e-05, "loss": 0.1194, "step": 1000 }, { "epoch": 0.27312060573282854, "grad_norm": 2.502074718475342, "learning_rate": 3.3633333333333335e-05, "loss": 0.1121, "step": 1010 }, { "epoch": 0.27582477014602486, "grad_norm": 2.0605275630950928, "learning_rate": 3.396666666666667e-05, "loss": 0.1086, "step": 1020 }, { "epoch": 0.2785289345592212, "grad_norm": 3.1915314197540283, "learning_rate": 3.430000000000001e-05, "loss": 0.1031, "step": 1030 }, { "epoch": 0.28123309897241755, "grad_norm": 2.5741214752197266, "learning_rate": 3.463333333333333e-05, "loss": 0.1005, "step": 1040 }, { "epoch": 0.28393726338561387, "grad_norm": 2.7066354751586914, "learning_rate": 3.496666666666667e-05, "loss": 0.0979, "step": 1050 }, { "epoch": 0.2866414277988102, "grad_norm": 2.2230615615844727, "learning_rate": 3.53e-05, "loss": 0.0932, "step": 1060 }, { "epoch": 0.2893455922120065, "grad_norm": 1.7855457067489624, "learning_rate": 3.563333333333334e-05, "loss": 0.0938, "step": 1070 }, { "epoch": 0.2920497566252028, "grad_norm": 1.831788182258606, "learning_rate": 3.596666666666667e-05, "loss": 0.0883, "step": 1080 }, { "epoch": 0.29475392103839915, "grad_norm": 2.1344759464263916, "learning_rate": 3.63e-05, "loss": 0.0904, "step": 1090 }, { "epoch": 0.29745808545159547, "grad_norm": 4.288725852966309, "learning_rate": 3.6633333333333334e-05, "loss": 0.0937, "step": 1100 }, { "epoch": 0.3001622498647918, "grad_norm": 3.200105667114258, "learning_rate": 3.6966666666666666e-05, "loss": 0.0962, "step": 1110 }, { "epoch": 0.3028664142779881, "grad_norm": 3.4650890827178955, "learning_rate": 3.73e-05, "loss": 0.0879, "step": 1120 }, { "epoch": 0.3055705786911844, "grad_norm": 2.473417282104492, "learning_rate": 3.763333333333334e-05, "loss": 0.0905, "step": 1130 }, { "epoch": 0.30827474310438074, "grad_norm": 2.6472647190093994, "learning_rate": 3.796666666666667e-05, "loss": 0.087, "step": 1140 }, { "epoch": 0.31097890751757706, "grad_norm": 4.729209899902344, "learning_rate": 3.83e-05, "loss": 0.0856, "step": 1150 }, { "epoch": 0.3136830719307734, "grad_norm": 4.690682888031006, "learning_rate": 3.8633333333333335e-05, "loss": 0.0974, "step": 1160 }, { "epoch": 0.3163872363439697, "grad_norm": 1.6888327598571777, "learning_rate": 3.896666666666667e-05, "loss": 0.0945, "step": 1170 }, { "epoch": 0.319091400757166, "grad_norm": 2.1726839542388916, "learning_rate": 3.9300000000000007e-05, "loss": 0.0904, "step": 1180 }, { "epoch": 0.32179556517036234, "grad_norm": 1.3107010126113892, "learning_rate": 3.963333333333333e-05, "loss": 0.0858, "step": 1190 }, { "epoch": 0.32449972958355866, "grad_norm": 2.102325916290283, "learning_rate": 3.996666666666667e-05, "loss": 0.0853, "step": 1200 }, { "epoch": 0.327203893996755, "grad_norm": 1.899569034576416, "learning_rate": 4.0300000000000004e-05, "loss": 0.0868, "step": 1210 }, { "epoch": 0.3299080584099513, "grad_norm": 1.9266401529312134, "learning_rate": 4.0633333333333336e-05, "loss": 0.0882, "step": 1220 }, { "epoch": 0.3326122228231477, "grad_norm": 2.6132972240448, "learning_rate": 4.096666666666667e-05, "loss": 0.0829, "step": 1230 }, { "epoch": 0.335316387236344, "grad_norm": 2.6080522537231445, "learning_rate": 4.13e-05, "loss": 0.0836, "step": 1240 }, { "epoch": 0.3380205516495403, "grad_norm": 1.885479211807251, "learning_rate": 4.1633333333333333e-05, "loss": 0.0844, "step": 1250 }, { "epoch": 0.34072471606273663, "grad_norm": 2.126375436782837, "learning_rate": 4.196666666666667e-05, "loss": 0.0817, "step": 1260 }, { "epoch": 0.34342888047593295, "grad_norm": 1.6490424871444702, "learning_rate": 4.23e-05, "loss": 0.079, "step": 1270 }, { "epoch": 0.34613304488912927, "grad_norm": 1.654329538345337, "learning_rate": 4.263333333333334e-05, "loss": 0.0849, "step": 1280 }, { "epoch": 0.3488372093023256, "grad_norm": 0.7547357082366943, "learning_rate": 4.296666666666666e-05, "loss": 0.0786, "step": 1290 }, { "epoch": 0.3515413737155219, "grad_norm": 1.8127481937408447, "learning_rate": 4.33e-05, "loss": 0.0825, "step": 1300 }, { "epoch": 0.35424553812871823, "grad_norm": 1.3285788297653198, "learning_rate": 4.3633333333333335e-05, "loss": 0.0804, "step": 1310 }, { "epoch": 0.35694970254191455, "grad_norm": 1.8146625757217407, "learning_rate": 4.396666666666667e-05, "loss": 0.0784, "step": 1320 }, { "epoch": 0.35965386695511087, "grad_norm": 2.3137218952178955, "learning_rate": 4.43e-05, "loss": 0.079, "step": 1330 }, { "epoch": 0.3623580313683072, "grad_norm": 2.234264612197876, "learning_rate": 4.463333333333334e-05, "loss": 0.0776, "step": 1340 }, { "epoch": 0.3650621957815035, "grad_norm": 2.4362223148345947, "learning_rate": 4.496666666666667e-05, "loss": 0.077, "step": 1350 }, { "epoch": 0.3677663601946998, "grad_norm": 1.4498448371887207, "learning_rate": 4.53e-05, "loss": 0.0745, "step": 1360 }, { "epoch": 0.37047052460789615, "grad_norm": 1.5430989265441895, "learning_rate": 4.5633333333333336e-05, "loss": 0.0777, "step": 1370 }, { "epoch": 0.37317468902109246, "grad_norm": 1.8048264980316162, "learning_rate": 4.596666666666667e-05, "loss": 0.0755, "step": 1380 }, { "epoch": 0.3758788534342888, "grad_norm": 2.1304931640625, "learning_rate": 4.630000000000001e-05, "loss": 0.0824, "step": 1390 }, { "epoch": 0.3785830178474851, "grad_norm": 3.186664581298828, "learning_rate": 4.663333333333333e-05, "loss": 0.0807, "step": 1400 }, { "epoch": 0.3812871822606814, "grad_norm": 2.027336359024048, "learning_rate": 4.696666666666667e-05, "loss": 0.0813, "step": 1410 }, { "epoch": 0.3839913466738778, "grad_norm": 2.030869245529175, "learning_rate": 4.73e-05, "loss": 0.0754, "step": 1420 }, { "epoch": 0.3866955110870741, "grad_norm": 2.012059211730957, "learning_rate": 4.763333333333334e-05, "loss": 0.0786, "step": 1430 }, { "epoch": 0.38939967550027044, "grad_norm": 2.3861606121063232, "learning_rate": 4.796666666666667e-05, "loss": 0.0767, "step": 1440 }, { "epoch": 0.39210383991346676, "grad_norm": 1.9760547876358032, "learning_rate": 4.83e-05, "loss": 0.0794, "step": 1450 }, { "epoch": 0.3948080043266631, "grad_norm": 1.924593210220337, "learning_rate": 4.8633333333333334e-05, "loss": 0.075, "step": 1460 }, { "epoch": 0.3975121687398594, "grad_norm": 2.9090793132781982, "learning_rate": 4.8966666666666667e-05, "loss": 0.0796, "step": 1470 }, { "epoch": 0.4002163331530557, "grad_norm": 2.376525402069092, "learning_rate": 4.93e-05, "loss": 0.0791, "step": 1480 }, { "epoch": 0.40292049756625203, "grad_norm": 2.586555004119873, "learning_rate": 4.963333333333334e-05, "loss": 0.0782, "step": 1490 }, { "epoch": 0.40562466197944835, "grad_norm": 3.2052805423736572, "learning_rate": 4.996666666666667e-05, "loss": 0.0824, "step": 1500 }, { "epoch": 0.4083288263926447, "grad_norm": 1.5561994314193726, "learning_rate": 5.03e-05, "loss": 0.0804, "step": 1510 }, { "epoch": 0.411032990805841, "grad_norm": 2.3665125370025635, "learning_rate": 5.0633333333333335e-05, "loss": 0.0767, "step": 1520 }, { "epoch": 0.4137371552190373, "grad_norm": 1.1073840856552124, "learning_rate": 5.0966666666666674e-05, "loss": 0.0743, "step": 1530 }, { "epoch": 0.41644131963223363, "grad_norm": 1.2054643630981445, "learning_rate": 5.130000000000001e-05, "loss": 0.0736, "step": 1540 }, { "epoch": 0.41914548404542995, "grad_norm": 3.0862021446228027, "learning_rate": 5.163333333333333e-05, "loss": 0.0784, "step": 1550 }, { "epoch": 0.42184964845862627, "grad_norm": 2.545301914215088, "learning_rate": 5.196666666666667e-05, "loss": 0.0811, "step": 1560 }, { "epoch": 0.4245538128718226, "grad_norm": 2.388385534286499, "learning_rate": 5.2300000000000004e-05, "loss": 0.0798, "step": 1570 }, { "epoch": 0.4272579772850189, "grad_norm": 1.5226831436157227, "learning_rate": 5.2633333333333336e-05, "loss": 0.0748, "step": 1580 }, { "epoch": 0.42996214169821523, "grad_norm": 1.1783701181411743, "learning_rate": 5.296666666666666e-05, "loss": 0.0726, "step": 1590 }, { "epoch": 0.43266630611141155, "grad_norm": 1.987740397453308, "learning_rate": 5.330000000000001e-05, "loss": 0.0776, "step": 1600 }, { "epoch": 0.4353704705246079, "grad_norm": 1.6655158996582031, "learning_rate": 5.3633333333333334e-05, "loss": 0.0732, "step": 1610 }, { "epoch": 0.43807463493780424, "grad_norm": 0.724779486656189, "learning_rate": 5.3966666666666666e-05, "loss": 0.071, "step": 1620 }, { "epoch": 0.44077879935100056, "grad_norm": 1.632519245147705, "learning_rate": 5.4300000000000005e-05, "loss": 0.07, "step": 1630 }, { "epoch": 0.4434829637641969, "grad_norm": 1.1951316595077515, "learning_rate": 5.463333333333334e-05, "loss": 0.0753, "step": 1640 }, { "epoch": 0.4461871281773932, "grad_norm": 2.2884228229522705, "learning_rate": 5.496666666666666e-05, "loss": 0.0761, "step": 1650 }, { "epoch": 0.4488912925905895, "grad_norm": 1.4414747953414917, "learning_rate": 5.530000000000001e-05, "loss": 0.0724, "step": 1660 }, { "epoch": 0.45159545700378584, "grad_norm": 1.0846270322799683, "learning_rate": 5.5633333333333335e-05, "loss": 0.0703, "step": 1670 }, { "epoch": 0.45429962141698216, "grad_norm": 1.7768381834030151, "learning_rate": 5.596666666666667e-05, "loss": 0.0703, "step": 1680 }, { "epoch": 0.4570037858301785, "grad_norm": 0.8953157067298889, "learning_rate": 5.63e-05, "loss": 0.0708, "step": 1690 }, { "epoch": 0.4597079502433748, "grad_norm": 1.9036773443222046, "learning_rate": 5.663333333333334e-05, "loss": 0.0713, "step": 1700 }, { "epoch": 0.4624121146565711, "grad_norm": 0.8471760153770447, "learning_rate": 5.696666666666667e-05, "loss": 0.0689, "step": 1710 }, { "epoch": 0.46511627906976744, "grad_norm": 1.4556324481964111, "learning_rate": 5.73e-05, "loss": 0.0738, "step": 1720 }, { "epoch": 0.46782044348296375, "grad_norm": 1.320701003074646, "learning_rate": 5.7633333333333336e-05, "loss": 0.0719, "step": 1730 }, { "epoch": 0.4705246078961601, "grad_norm": 1.2684639692306519, "learning_rate": 5.796666666666667e-05, "loss": 0.0688, "step": 1740 }, { "epoch": 0.4732287723093564, "grad_norm": 1.8521274328231812, "learning_rate": 5.83e-05, "loss": 0.0701, "step": 1750 }, { "epoch": 0.4759329367225527, "grad_norm": 1.137359619140625, "learning_rate": 5.863333333333334e-05, "loss": 0.0704, "step": 1760 }, { "epoch": 0.47863710113574903, "grad_norm": 0.9159125089645386, "learning_rate": 5.896666666666667e-05, "loss": 0.0708, "step": 1770 }, { "epoch": 0.48134126554894535, "grad_norm": 2.293166160583496, "learning_rate": 5.93e-05, "loss": 0.0698, "step": 1780 }, { "epoch": 0.48404542996214167, "grad_norm": 1.671349048614502, "learning_rate": 5.9633333333333344e-05, "loss": 0.073, "step": 1790 }, { "epoch": 0.48674959437533805, "grad_norm": 1.856791615486145, "learning_rate": 5.996666666666667e-05, "loss": 0.0723, "step": 1800 }, { "epoch": 0.48945375878853437, "grad_norm": 1.5166735649108887, "learning_rate": 6.03e-05, "loss": 0.07, "step": 1810 }, { "epoch": 0.4921579232017307, "grad_norm": 1.4505666494369507, "learning_rate": 6.063333333333333e-05, "loss": 0.0695, "step": 1820 }, { "epoch": 0.494862087614927, "grad_norm": 1.407767415046692, "learning_rate": 6.0966666666666674e-05, "loss": 0.0706, "step": 1830 }, { "epoch": 0.4975662520281233, "grad_norm": 1.4427908658981323, "learning_rate": 6.13e-05, "loss": 0.071, "step": 1840 }, { "epoch": 0.5002704164413196, "grad_norm": 1.2354680299758911, "learning_rate": 6.163333333333333e-05, "loss": 0.0715, "step": 1850 }, { "epoch": 0.502974580854516, "grad_norm": 0.970012903213501, "learning_rate": 6.196666666666668e-05, "loss": 0.0699, "step": 1860 }, { "epoch": 0.5056787452677123, "grad_norm": 1.179268717765808, "learning_rate": 6.23e-05, "loss": 0.0696, "step": 1870 }, { "epoch": 0.5083829096809086, "grad_norm": 1.278510332107544, "learning_rate": 6.263333333333333e-05, "loss": 0.0686, "step": 1880 }, { "epoch": 0.5110870740941049, "grad_norm": 1.1253117322921753, "learning_rate": 6.296666666666667e-05, "loss": 0.0683, "step": 1890 }, { "epoch": 0.5137912385073012, "grad_norm": 1.1540309190750122, "learning_rate": 6.330000000000001e-05, "loss": 0.0717, "step": 1900 }, { "epoch": 0.5164954029204976, "grad_norm": 1.2796787023544312, "learning_rate": 6.363333333333334e-05, "loss": 0.0687, "step": 1910 }, { "epoch": 0.5191995673336939, "grad_norm": 0.9578638672828674, "learning_rate": 6.396666666666667e-05, "loss": 0.0695, "step": 1920 }, { "epoch": 0.5219037317468902, "grad_norm": 1.1904412508010864, "learning_rate": 6.43e-05, "loss": 0.067, "step": 1930 }, { "epoch": 0.5246078961600865, "grad_norm": 1.0116466283798218, "learning_rate": 6.463333333333334e-05, "loss": 0.0696, "step": 1940 }, { "epoch": 0.5273120605732828, "grad_norm": 1.3901491165161133, "learning_rate": 6.496666666666667e-05, "loss": 0.0683, "step": 1950 }, { "epoch": 0.5300162249864792, "grad_norm": 1.200476884841919, "learning_rate": 6.53e-05, "loss": 0.0733, "step": 1960 }, { "epoch": 0.5327203893996755, "grad_norm": 1.5967493057250977, "learning_rate": 6.563333333333333e-05, "loss": 0.0677, "step": 1970 }, { "epoch": 0.5354245538128718, "grad_norm": 1.4789810180664062, "learning_rate": 6.596666666666667e-05, "loss": 0.0708, "step": 1980 }, { "epoch": 0.5381287182260681, "grad_norm": 1.2608692646026611, "learning_rate": 6.630000000000001e-05, "loss": 0.0733, "step": 1990 }, { "epoch": 0.5408328826392644, "grad_norm": 1.2001229524612427, "learning_rate": 6.663333333333333e-05, "loss": 0.0712, "step": 2000 }, { "epoch": 0.5435370470524608, "grad_norm": 1.17767333984375, "learning_rate": 6.696666666666666e-05, "loss": 0.0685, "step": 2010 }, { "epoch": 0.5462412114656571, "grad_norm": 1.3543097972869873, "learning_rate": 6.730000000000001e-05, "loss": 0.0684, "step": 2020 }, { "epoch": 0.5489453758788534, "grad_norm": 0.4761888086795807, "learning_rate": 6.763333333333334e-05, "loss": 0.0665, "step": 2030 }, { "epoch": 0.5516495402920497, "grad_norm": 0.8532746434211731, "learning_rate": 6.796666666666666e-05, "loss": 0.0647, "step": 2040 }, { "epoch": 0.554353704705246, "grad_norm": 1.3188414573669434, "learning_rate": 6.83e-05, "loss": 0.0658, "step": 2050 }, { "epoch": 0.5570578691184424, "grad_norm": 1.4216939210891724, "learning_rate": 6.863333333333334e-05, "loss": 0.0662, "step": 2060 }, { "epoch": 0.5597620335316387, "grad_norm": 1.2718783617019653, "learning_rate": 6.896666666666667e-05, "loss": 0.067, "step": 2070 }, { "epoch": 0.5624661979448351, "grad_norm": 1.1641064882278442, "learning_rate": 6.93e-05, "loss": 0.0675, "step": 2080 }, { "epoch": 0.5651703623580314, "grad_norm": 1.4883246421813965, "learning_rate": 6.963333333333334e-05, "loss": 0.066, "step": 2090 }, { "epoch": 0.5678745267712277, "grad_norm": 0.9324827790260315, "learning_rate": 6.996666666666667e-05, "loss": 0.0657, "step": 2100 }, { "epoch": 0.5705786911844241, "grad_norm": 0.7153642177581787, "learning_rate": 7.03e-05, "loss": 0.065, "step": 2110 }, { "epoch": 0.5732828555976204, "grad_norm": 1.0609079599380493, "learning_rate": 7.063333333333333e-05, "loss": 0.0652, "step": 2120 }, { "epoch": 0.5759870200108167, "grad_norm": 1.024787425994873, "learning_rate": 7.096666666666667e-05, "loss": 0.066, "step": 2130 }, { "epoch": 0.578691184424013, "grad_norm": 0.4845007359981537, "learning_rate": 7.13e-05, "loss": 0.0631, "step": 2140 }, { "epoch": 0.5813953488372093, "grad_norm": 0.7082512974739075, "learning_rate": 7.163333333333334e-05, "loss": 0.066, "step": 2150 }, { "epoch": 0.5840995132504057, "grad_norm": 0.6376518607139587, "learning_rate": 7.196666666666668e-05, "loss": 0.0639, "step": 2160 }, { "epoch": 0.586803677663602, "grad_norm": 0.789222002029419, "learning_rate": 7.23e-05, "loss": 0.0658, "step": 2170 }, { "epoch": 0.5895078420767983, "grad_norm": 0.6301352977752686, "learning_rate": 7.263333333333334e-05, "loss": 0.0633, "step": 2180 }, { "epoch": 0.5922120064899946, "grad_norm": 0.5354210138320923, "learning_rate": 7.296666666666667e-05, "loss": 0.066, "step": 2190 }, { "epoch": 0.5949161709031909, "grad_norm": 0.5181793570518494, "learning_rate": 7.33e-05, "loss": 0.0671, "step": 2200 }, { "epoch": 0.5976203353163873, "grad_norm": 0.6891258358955383, "learning_rate": 7.363333333333334e-05, "loss": 0.0661, "step": 2210 }, { "epoch": 0.6003244997295836, "grad_norm": 1.450053095817566, "learning_rate": 7.396666666666667e-05, "loss": 0.0676, "step": 2220 }, { "epoch": 0.6030286641427799, "grad_norm": 1.230047345161438, "learning_rate": 7.43e-05, "loss": 0.066, "step": 2230 }, { "epoch": 0.6057328285559762, "grad_norm": 1.5836573839187622, "learning_rate": 7.463333333333334e-05, "loss": 0.0648, "step": 2240 }, { "epoch": 0.6084369929691725, "grad_norm": 0.8568735718727112, "learning_rate": 7.496666666666667e-05, "loss": 0.0653, "step": 2250 }, { "epoch": 0.6111411573823688, "grad_norm": 0.33937135338783264, "learning_rate": 7.53e-05, "loss": 0.0627, "step": 2260 }, { "epoch": 0.6138453217955652, "grad_norm": 0.719310998916626, "learning_rate": 7.563333333333333e-05, "loss": 0.0645, "step": 2270 }, { "epoch": 0.6165494862087615, "grad_norm": 0.9268049597740173, "learning_rate": 7.596666666666668e-05, "loss": 0.0637, "step": 2280 }, { "epoch": 0.6192536506219578, "grad_norm": 1.0091806650161743, "learning_rate": 7.630000000000001e-05, "loss": 0.0674, "step": 2290 }, { "epoch": 0.6219578150351541, "grad_norm": 1.4328938722610474, "learning_rate": 7.663333333333333e-05, "loss": 0.0675, "step": 2300 }, { "epoch": 0.6246619794483504, "grad_norm": 1.1596894264221191, "learning_rate": 7.696666666666668e-05, "loss": 0.0667, "step": 2310 }, { "epoch": 0.6273661438615468, "grad_norm": 0.8001285791397095, "learning_rate": 7.730000000000001e-05, "loss": 0.0663, "step": 2320 }, { "epoch": 0.6300703082747431, "grad_norm": 0.9381599426269531, "learning_rate": 7.763333333333334e-05, "loss": 0.0657, "step": 2330 }, { "epoch": 0.6327744726879394, "grad_norm": 0.9917824864387512, "learning_rate": 7.796666666666666e-05, "loss": 0.0632, "step": 2340 }, { "epoch": 0.6354786371011357, "grad_norm": 1.2548283338546753, "learning_rate": 7.83e-05, "loss": 0.066, "step": 2350 }, { "epoch": 0.638182801514332, "grad_norm": 1.239151120185852, "learning_rate": 7.863333333333334e-05, "loss": 0.0658, "step": 2360 }, { "epoch": 0.6408869659275284, "grad_norm": 0.48114174604415894, "learning_rate": 7.896666666666667e-05, "loss": 0.0646, "step": 2370 }, { "epoch": 0.6435911303407247, "grad_norm": 0.7380053997039795, "learning_rate": 7.93e-05, "loss": 0.0645, "step": 2380 }, { "epoch": 0.646295294753921, "grad_norm": 0.9844059348106384, "learning_rate": 7.963333333333334e-05, "loss": 0.0648, "step": 2390 }, { "epoch": 0.6489994591671173, "grad_norm": 0.7818728685379028, "learning_rate": 7.996666666666667e-05, "loss": 0.0636, "step": 2400 }, { "epoch": 0.6517036235803136, "grad_norm": 1.329939365386963, "learning_rate": 8.030000000000001e-05, "loss": 0.0652, "step": 2410 }, { "epoch": 0.65440778799351, "grad_norm": 1.0675796270370483, "learning_rate": 8.063333333333333e-05, "loss": 0.0659, "step": 2420 }, { "epoch": 0.6571119524067063, "grad_norm": 0.9987035393714905, "learning_rate": 8.096666666666667e-05, "loss": 0.0637, "step": 2430 }, { "epoch": 0.6598161168199026, "grad_norm": 0.8521026372909546, "learning_rate": 8.13e-05, "loss": 0.0633, "step": 2440 }, { "epoch": 0.6625202812330989, "grad_norm": 0.8678424954414368, "learning_rate": 8.163333333333334e-05, "loss": 0.0635, "step": 2450 }, { "epoch": 0.6652244456462953, "grad_norm": 0.6618300676345825, "learning_rate": 8.196666666666668e-05, "loss": 0.0639, "step": 2460 }, { "epoch": 0.6679286100594917, "grad_norm": 0.6280407905578613, "learning_rate": 8.23e-05, "loss": 0.0662, "step": 2470 }, { "epoch": 0.670632774472688, "grad_norm": 0.767103374004364, "learning_rate": 8.263333333333334e-05, "loss": 0.0661, "step": 2480 }, { "epoch": 0.6733369388858843, "grad_norm": 0.7683860659599304, "learning_rate": 8.296666666666667e-05, "loss": 0.0641, "step": 2490 }, { "epoch": 0.6760411032990806, "grad_norm": 0.7734214663505554, "learning_rate": 8.33e-05, "loss": 0.0664, "step": 2500 }, { "epoch": 0.678745267712277, "grad_norm": 0.8221602439880371, "learning_rate": 8.363333333333334e-05, "loss": 0.0647, "step": 2510 }, { "epoch": 0.6814494321254733, "grad_norm": 1.0952779054641724, "learning_rate": 8.396666666666667e-05, "loss": 0.0629, "step": 2520 }, { "epoch": 0.6841535965386696, "grad_norm": 0.7130374908447266, "learning_rate": 8.43e-05, "loss": 0.0612, "step": 2530 }, { "epoch": 0.6868577609518659, "grad_norm": 0.40036100149154663, "learning_rate": 8.463333333333335e-05, "loss": 0.0604, "step": 2540 }, { "epoch": 0.6895619253650622, "grad_norm": 0.6390863656997681, "learning_rate": 8.496666666666667e-05, "loss": 0.0643, "step": 2550 }, { "epoch": 0.6922660897782585, "grad_norm": 0.5487748384475708, "learning_rate": 8.53e-05, "loss": 0.0635, "step": 2560 }, { "epoch": 0.6949702541914549, "grad_norm": 0.7062770128250122, "learning_rate": 8.563333333333333e-05, "loss": 0.0639, "step": 2570 }, { "epoch": 0.6976744186046512, "grad_norm": 0.8393713235855103, "learning_rate": 8.596666666666668e-05, "loss": 0.0646, "step": 2580 }, { "epoch": 0.7003785830178475, "grad_norm": 0.945492148399353, "learning_rate": 8.63e-05, "loss": 0.0602, "step": 2590 }, { "epoch": 0.7030827474310438, "grad_norm": 0.4470641314983368, "learning_rate": 8.663333333333333e-05, "loss": 0.061, "step": 2600 }, { "epoch": 0.7057869118442401, "grad_norm": 0.49227020144462585, "learning_rate": 8.696666666666668e-05, "loss": 0.0636, "step": 2610 }, { "epoch": 0.7084910762574365, "grad_norm": 0.7292385101318359, "learning_rate": 8.730000000000001e-05, "loss": 0.0626, "step": 2620 }, { "epoch": 0.7111952406706328, "grad_norm": 0.8360095024108887, "learning_rate": 8.763333333333334e-05, "loss": 0.0642, "step": 2630 }, { "epoch": 0.7138994050838291, "grad_norm": 0.9217790961265564, "learning_rate": 8.796666666666667e-05, "loss": 0.0627, "step": 2640 }, { "epoch": 0.7166035694970254, "grad_norm": 0.8764194846153259, "learning_rate": 8.83e-05, "loss": 0.0614, "step": 2650 }, { "epoch": 0.7193077339102217, "grad_norm": 0.6715452671051025, "learning_rate": 8.863333333333334e-05, "loss": 0.0616, "step": 2660 }, { "epoch": 0.722011898323418, "grad_norm": 0.7485305070877075, "learning_rate": 8.896666666666667e-05, "loss": 0.0625, "step": 2670 }, { "epoch": 0.7247160627366144, "grad_norm": 0.6855915188789368, "learning_rate": 8.93e-05, "loss": 0.0616, "step": 2680 }, { "epoch": 0.7274202271498107, "grad_norm": 0.6193075180053711, "learning_rate": 8.963333333333333e-05, "loss": 0.0599, "step": 2690 }, { "epoch": 0.730124391563007, "grad_norm": 0.6844053864479065, "learning_rate": 8.996666666666667e-05, "loss": 0.0621, "step": 2700 }, { "epoch": 0.7328285559762033, "grad_norm": 0.5885890126228333, "learning_rate": 9.030000000000001e-05, "loss": 0.0612, "step": 2710 }, { "epoch": 0.7355327203893997, "grad_norm": 0.7134838700294495, "learning_rate": 9.063333333333333e-05, "loss": 0.0603, "step": 2720 }, { "epoch": 0.738236884802596, "grad_norm": 0.7734204530715942, "learning_rate": 9.096666666666666e-05, "loss": 0.0594, "step": 2730 }, { "epoch": 0.7409410492157923, "grad_norm": 0.5410628318786621, "learning_rate": 9.130000000000001e-05, "loss": 0.0622, "step": 2740 }, { "epoch": 0.7436452136289886, "grad_norm": 0.9717596769332886, "learning_rate": 9.163333333333334e-05, "loss": 0.0659, "step": 2750 }, { "epoch": 0.7463493780421849, "grad_norm": 0.6860097646713257, "learning_rate": 9.196666666666666e-05, "loss": 0.0607, "step": 2760 }, { "epoch": 0.7490535424553812, "grad_norm": 0.7770229578018188, "learning_rate": 9.230000000000001e-05, "loss": 0.0618, "step": 2770 }, { "epoch": 0.7517577068685776, "grad_norm": 0.8556946516036987, "learning_rate": 9.263333333333334e-05, "loss": 0.0627, "step": 2780 }, { "epoch": 0.7544618712817739, "grad_norm": 0.6453386545181274, "learning_rate": 9.296666666666667e-05, "loss": 0.065, "step": 2790 }, { "epoch": 0.7571660356949702, "grad_norm": 0.7829745411872864, "learning_rate": 9.33e-05, "loss": 0.0647, "step": 2800 }, { "epoch": 0.7598702001081665, "grad_norm": 0.675743818283081, "learning_rate": 9.363333333333334e-05, "loss": 0.0654, "step": 2810 }, { "epoch": 0.7625743645213628, "grad_norm": 0.5221177339553833, "learning_rate": 9.396666666666667e-05, "loss": 0.0613, "step": 2820 }, { "epoch": 0.7652785289345592, "grad_norm": 0.31692928075790405, "learning_rate": 9.43e-05, "loss": 0.0609, "step": 2830 }, { "epoch": 0.7679826933477556, "grad_norm": 0.5569954514503479, "learning_rate": 9.463333333333333e-05, "loss": 0.0623, "step": 2840 }, { "epoch": 0.7706868577609519, "grad_norm": 0.8761803507804871, "learning_rate": 9.496666666666667e-05, "loss": 0.0637, "step": 2850 }, { "epoch": 0.7733910221741482, "grad_norm": 0.972436785697937, "learning_rate": 9.53e-05, "loss": 0.0631, "step": 2860 }, { "epoch": 0.7760951865873446, "grad_norm": 0.7537035942077637, "learning_rate": 9.563333333333334e-05, "loss": 0.0627, "step": 2870 }, { "epoch": 0.7787993510005409, "grad_norm": 0.6147890686988831, "learning_rate": 9.596666666666668e-05, "loss": 0.0624, "step": 2880 }, { "epoch": 0.7815035154137372, "grad_norm": 0.4677670896053314, "learning_rate": 9.63e-05, "loss": 0.064, "step": 2890 }, { "epoch": 0.7842076798269335, "grad_norm": 0.6543090343475342, "learning_rate": 9.663333333333334e-05, "loss": 0.0609, "step": 2900 }, { "epoch": 0.7869118442401298, "grad_norm": 0.8539631366729736, "learning_rate": 9.696666666666667e-05, "loss": 0.0628, "step": 2910 }, { "epoch": 0.7896160086533262, "grad_norm": 0.8887631893157959, "learning_rate": 9.730000000000001e-05, "loss": 0.0619, "step": 2920 }, { "epoch": 0.7923201730665225, "grad_norm": 0.6072664856910706, "learning_rate": 9.763333333333334e-05, "loss": 0.0618, "step": 2930 }, { "epoch": 0.7950243374797188, "grad_norm": 0.44579416513442993, "learning_rate": 9.796666666666667e-05, "loss": 0.0614, "step": 2940 }, { "epoch": 0.7977285018929151, "grad_norm": 0.5914490222930908, "learning_rate": 9.83e-05, "loss": 0.062, "step": 2950 }, { "epoch": 0.8004326663061114, "grad_norm": 0.6036466360092163, "learning_rate": 9.863333333333334e-05, "loss": 0.0638, "step": 2960 }, { "epoch": 0.8031368307193077, "grad_norm": 0.5337381958961487, "learning_rate": 9.896666666666667e-05, "loss": 0.0641, "step": 2970 }, { "epoch": 0.8058409951325041, "grad_norm": 0.7043077349662781, "learning_rate": 9.93e-05, "loss": 0.0629, "step": 2980 }, { "epoch": 0.8085451595457004, "grad_norm": 0.5369879603385925, "learning_rate": 9.963333333333333e-05, "loss": 0.0611, "step": 2990 }, { "epoch": 0.8112493239588967, "grad_norm": 0.5168898105621338, "learning_rate": 9.996666666666668e-05, "loss": 0.0604, "step": 3000 }, { "epoch": 0.813953488372093, "grad_norm": 1.0448945760726929, "learning_rate": 9.999999384858465e-05, "loss": 0.0625, "step": 3010 }, { "epoch": 0.8166576527852893, "grad_norm": 0.7644956707954407, "learning_rate": 9.999997258443473e-05, "loss": 0.063, "step": 3020 }, { "epoch": 0.8193618171984857, "grad_norm": 0.34073394536972046, "learning_rate": 9.999993613161331e-05, "loss": 0.0617, "step": 3030 }, { "epoch": 0.822065981611682, "grad_norm": 0.5182754993438721, "learning_rate": 9.999988449013146e-05, "loss": 0.0635, "step": 3040 }, { "epoch": 0.8247701460248783, "grad_norm": 0.5865412950515747, "learning_rate": 9.99998176600049e-05, "loss": 0.0613, "step": 3050 }, { "epoch": 0.8274743104380746, "grad_norm": 0.5206180214881897, "learning_rate": 9.999973564125389e-05, "loss": 0.0635, "step": 3060 }, { "epoch": 0.8301784748512709, "grad_norm": 0.5809818506240845, "learning_rate": 9.999963843390335e-05, "loss": 0.062, "step": 3070 }, { "epoch": 0.8328826392644673, "grad_norm": 0.6340605616569519, "learning_rate": 9.999952603798282e-05, "loss": 0.06, "step": 3080 }, { "epoch": 0.8355868036776636, "grad_norm": 0.6946631669998169, "learning_rate": 9.999939845352646e-05, "loss": 0.0592, "step": 3090 }, { "epoch": 0.8382909680908599, "grad_norm": 0.45597654581069946, "learning_rate": 9.999925568057298e-05, "loss": 0.06, "step": 3100 }, { "epoch": 0.8409951325040562, "grad_norm": 0.9012740850448608, "learning_rate": 9.999909771916578e-05, "loss": 0.0626, "step": 3110 }, { "epoch": 0.8436992969172525, "grad_norm": 0.6257476806640625, "learning_rate": 9.999892456935285e-05, "loss": 0.0609, "step": 3120 }, { "epoch": 0.8464034613304489, "grad_norm": 0.5868301391601562, "learning_rate": 9.999873623118679e-05, "loss": 0.0585, "step": 3130 }, { "epoch": 0.8491076257436452, "grad_norm": 0.5187428593635559, "learning_rate": 9.999853270472479e-05, "loss": 0.0597, "step": 3140 }, { "epoch": 0.8518117901568415, "grad_norm": 0.344025582075119, "learning_rate": 9.999831399002871e-05, "loss": 0.0592, "step": 3150 }, { "epoch": 0.8545159545700378, "grad_norm": 0.6159598231315613, "learning_rate": 9.999808008716494e-05, "loss": 0.0595, "step": 3160 }, { "epoch": 0.8572201189832341, "grad_norm": 0.3736618161201477, "learning_rate": 9.999783099620459e-05, "loss": 0.0597, "step": 3170 }, { "epoch": 0.8599242833964305, "grad_norm": 0.6503064632415771, "learning_rate": 9.999756671722328e-05, "loss": 0.0615, "step": 3180 }, { "epoch": 0.8626284478096268, "grad_norm": 0.3226473927497864, "learning_rate": 9.99972872503013e-05, "loss": 0.0589, "step": 3190 }, { "epoch": 0.8653326122228231, "grad_norm": 0.3998192846775055, "learning_rate": 9.999699259552359e-05, "loss": 0.0622, "step": 3200 }, { "epoch": 0.8680367766360195, "grad_norm": 0.5210559964179993, "learning_rate": 9.99966827529796e-05, "loss": 0.0609, "step": 3210 }, { "epoch": 0.8707409410492158, "grad_norm": 0.49736395478248596, "learning_rate": 9.999635772276348e-05, "loss": 0.0597, "step": 3220 }, { "epoch": 0.8734451054624122, "grad_norm": 0.4633181095123291, "learning_rate": 9.999601750497396e-05, "loss": 0.0587, "step": 3230 }, { "epoch": 0.8761492698756085, "grad_norm": 0.45899420976638794, "learning_rate": 9.99956620997144e-05, "loss": 0.0576, "step": 3240 }, { "epoch": 0.8788534342888048, "grad_norm": 0.5794694423675537, "learning_rate": 9.999529150709275e-05, "loss": 0.0614, "step": 3250 }, { "epoch": 0.8815575987020011, "grad_norm": 0.6844463348388672, "learning_rate": 9.999490572722158e-05, "loss": 0.0611, "step": 3260 }, { "epoch": 0.8842617631151974, "grad_norm": 0.8619941473007202, "learning_rate": 9.99945047602181e-05, "loss": 0.0578, "step": 3270 }, { "epoch": 0.8869659275283938, "grad_norm": 0.7888802289962769, "learning_rate": 9.99940886062041e-05, "loss": 0.0582, "step": 3280 }, { "epoch": 0.8896700919415901, "grad_norm": 0.6046693921089172, "learning_rate": 9.999365726530599e-05, "loss": 0.0599, "step": 3290 }, { "epoch": 0.8923742563547864, "grad_norm": 0.5481600165367126, "learning_rate": 9.999321073765481e-05, "loss": 0.0606, "step": 3300 }, { "epoch": 0.8950784207679827, "grad_norm": 0.5778167247772217, "learning_rate": 9.99927490233862e-05, "loss": 0.0613, "step": 3310 }, { "epoch": 0.897782585181179, "grad_norm": 0.6865261197090149, "learning_rate": 9.999227212264043e-05, "loss": 0.0615, "step": 3320 }, { "epoch": 0.9004867495943754, "grad_norm": 0.42647385597229004, "learning_rate": 9.999178003556236e-05, "loss": 0.0603, "step": 3330 }, { "epoch": 0.9031909140075717, "grad_norm": 0.37475278973579407, "learning_rate": 9.999127276230146e-05, "loss": 0.0575, "step": 3340 }, { "epoch": 0.905895078420768, "grad_norm": 0.6783252954483032, "learning_rate": 9.999075030301184e-05, "loss": 0.0586, "step": 3350 }, { "epoch": 0.9085992428339643, "grad_norm": 0.4450998604297638, "learning_rate": 9.999021265785221e-05, "loss": 0.0598, "step": 3360 }, { "epoch": 0.9113034072471606, "grad_norm": 0.27193471789360046, "learning_rate": 9.998965982698589e-05, "loss": 0.0584, "step": 3370 }, { "epoch": 0.914007571660357, "grad_norm": 0.44452813267707825, "learning_rate": 9.998909181058082e-05, "loss": 0.0603, "step": 3380 }, { "epoch": 0.9167117360735533, "grad_norm": 0.3736054003238678, "learning_rate": 9.998850860880953e-05, "loss": 0.059, "step": 3390 }, { "epoch": 0.9194159004867496, "grad_norm": 0.3906331956386566, "learning_rate": 9.998791022184922e-05, "loss": 0.0601, "step": 3400 }, { "epoch": 0.9221200648999459, "grad_norm": 0.44056349992752075, "learning_rate": 9.99872966498816e-05, "loss": 0.0585, "step": 3410 }, { "epoch": 0.9248242293131422, "grad_norm": 0.5072435736656189, "learning_rate": 9.998666789309313e-05, "loss": 0.0594, "step": 3420 }, { "epoch": 0.9275283937263386, "grad_norm": 0.7710506916046143, "learning_rate": 9.998602395167475e-05, "loss": 0.0568, "step": 3430 }, { "epoch": 0.9302325581395349, "grad_norm": 0.6772772669792175, "learning_rate": 9.998536482582213e-05, "loss": 0.0615, "step": 3440 }, { "epoch": 0.9329367225527312, "grad_norm": 0.8935719132423401, "learning_rate": 9.998469051573544e-05, "loss": 0.0607, "step": 3450 }, { "epoch": 0.9356408869659275, "grad_norm": 0.6497712135314941, "learning_rate": 9.998400102161954e-05, "loss": 0.0601, "step": 3460 }, { "epoch": 0.9383450513791238, "grad_norm": 0.6157494187355042, "learning_rate": 9.998329634368388e-05, "loss": 0.0588, "step": 3470 }, { "epoch": 0.9410492157923201, "grad_norm": 0.6426470279693604, "learning_rate": 9.998257648214253e-05, "loss": 0.0598, "step": 3480 }, { "epoch": 0.9437533802055165, "grad_norm": 0.4677267372608185, "learning_rate": 9.998184143721417e-05, "loss": 0.0602, "step": 3490 }, { "epoch": 0.9464575446187128, "grad_norm": 0.37633904814720154, "learning_rate": 9.998109120912206e-05, "loss": 0.0569, "step": 3500 }, { "epoch": 0.9491617090319091, "grad_norm": 0.5247051119804382, "learning_rate": 9.998032579809411e-05, "loss": 0.0594, "step": 3510 }, { "epoch": 0.9518658734451054, "grad_norm": 0.2701462209224701, "learning_rate": 9.997954520436286e-05, "loss": 0.0577, "step": 3520 }, { "epoch": 0.9545700378583017, "grad_norm": 0.3545607924461365, "learning_rate": 9.997874942816538e-05, "loss": 0.0571, "step": 3530 }, { "epoch": 0.9572742022714981, "grad_norm": 0.4248867332935333, "learning_rate": 9.997793846974345e-05, "loss": 0.0592, "step": 3540 }, { "epoch": 0.9599783666846944, "grad_norm": 0.3683431148529053, "learning_rate": 9.997711232934341e-05, "loss": 0.0581, "step": 3550 }, { "epoch": 0.9626825310978907, "grad_norm": 0.5161431431770325, "learning_rate": 9.99762710072162e-05, "loss": 0.0589, "step": 3560 }, { "epoch": 0.965386695511087, "grad_norm": 0.42300552129745483, "learning_rate": 9.997541450361743e-05, "loss": 0.0574, "step": 3570 }, { "epoch": 0.9680908599242833, "grad_norm": 0.3832260072231293, "learning_rate": 9.997454281880723e-05, "loss": 0.0572, "step": 3580 }, { "epoch": 0.9707950243374798, "grad_norm": 0.4681074321269989, "learning_rate": 9.997365595305044e-05, "loss": 0.059, "step": 3590 }, { "epoch": 0.9734991887506761, "grad_norm": 0.5424578785896301, "learning_rate": 9.997275390661644e-05, "loss": 0.0576, "step": 3600 }, { "epoch": 0.9762033531638724, "grad_norm": 0.30565354228019714, "learning_rate": 9.997183667977926e-05, "loss": 0.0568, "step": 3610 }, { "epoch": 0.9789075175770687, "grad_norm": 0.5046505331993103, "learning_rate": 9.997090427281752e-05, "loss": 0.0599, "step": 3620 }, { "epoch": 0.981611681990265, "grad_norm": 0.2944486737251282, "learning_rate": 9.996995668601448e-05, "loss": 0.0562, "step": 3630 }, { "epoch": 0.9843158464034614, "grad_norm": 0.29041630029678345, "learning_rate": 9.996899391965798e-05, "loss": 0.058, "step": 3640 }, { "epoch": 0.9870200108166577, "grad_norm": 0.2965882420539856, "learning_rate": 9.996801597404048e-05, "loss": 0.0583, "step": 3650 }, { "epoch": 0.989724175229854, "grad_norm": 0.5628537535667419, "learning_rate": 9.996702284945905e-05, "loss": 0.0597, "step": 3660 }, { "epoch": 0.9924283396430503, "grad_norm": 0.17506524920463562, "learning_rate": 9.996601454621539e-05, "loss": 0.0555, "step": 3670 }, { "epoch": 0.9951325040562466, "grad_norm": 0.3278454840183258, "learning_rate": 9.996499106461577e-05, "loss": 0.0595, "step": 3680 }, { "epoch": 0.997836668469443, "grad_norm": 0.34757184982299805, "learning_rate": 9.996395240497112e-05, "loss": 0.0575, "step": 3690 }, { "epoch": 1.0005408328826393, "grad_norm": 0.2435382455587387, "learning_rate": 9.996289856759696e-05, "loss": 0.0564, "step": 3700 }, { "epoch": 1.0032449972958355, "grad_norm": 0.3321845531463623, "learning_rate": 9.996182955281342e-05, "loss": 0.0577, "step": 3710 }, { "epoch": 1.005949161709032, "grad_norm": 0.5668061375617981, "learning_rate": 9.996074536094519e-05, "loss": 0.0572, "step": 3720 }, { "epoch": 1.0086533261222281, "grad_norm": 0.5654910206794739, "learning_rate": 9.995964599232168e-05, "loss": 0.0591, "step": 3730 }, { "epoch": 1.0113574905354246, "grad_norm": 0.3239102065563202, "learning_rate": 9.995853144727683e-05, "loss": 0.0575, "step": 3740 }, { "epoch": 1.0140616549486208, "grad_norm": 0.4057128131389618, "learning_rate": 9.99574017261492e-05, "loss": 0.0572, "step": 3750 }, { "epoch": 1.0167658193618172, "grad_norm": 0.24565257132053375, "learning_rate": 9.995625682928198e-05, "loss": 0.0572, "step": 3760 }, { "epoch": 1.0194699837750134, "grad_norm": 0.45781248807907104, "learning_rate": 9.995509675702295e-05, "loss": 0.0559, "step": 3770 }, { "epoch": 1.0221741481882098, "grad_norm": 0.4175795018672943, "learning_rate": 9.995392150972451e-05, "loss": 0.0577, "step": 3780 }, { "epoch": 1.0248783126014063, "grad_norm": 0.412243127822876, "learning_rate": 9.995273108774366e-05, "loss": 0.0561, "step": 3790 }, { "epoch": 1.0275824770146025, "grad_norm": 0.48802056908607483, "learning_rate": 9.995152549144205e-05, "loss": 0.0581, "step": 3800 }, { "epoch": 1.030286641427799, "grad_norm": 0.5499762296676636, "learning_rate": 9.995030472118587e-05, "loss": 0.0568, "step": 3810 }, { "epoch": 1.0329908058409951, "grad_norm": 0.5566831231117249, "learning_rate": 9.9949068777346e-05, "loss": 0.0571, "step": 3820 }, { "epoch": 1.0356949702541915, "grad_norm": 0.5135595798492432, "learning_rate": 9.994781766029786e-05, "loss": 0.0582, "step": 3830 }, { "epoch": 1.0383991346673878, "grad_norm": 0.37169018387794495, "learning_rate": 9.994655137042151e-05, "loss": 0.0583, "step": 3840 }, { "epoch": 1.0411032990805842, "grad_norm": 0.6045682430267334, "learning_rate": 9.99452699081016e-05, "loss": 0.058, "step": 3850 }, { "epoch": 1.0438074634937804, "grad_norm": 0.4780089259147644, "learning_rate": 9.994397327372743e-05, "loss": 0.0584, "step": 3860 }, { "epoch": 1.0465116279069768, "grad_norm": 0.40922167897224426, "learning_rate": 9.994266146769286e-05, "loss": 0.0577, "step": 3870 }, { "epoch": 1.049215792320173, "grad_norm": 0.37898990511894226, "learning_rate": 9.994133449039642e-05, "loss": 0.0568, "step": 3880 }, { "epoch": 1.0519199567333695, "grad_norm": 0.43074333667755127, "learning_rate": 9.993999234224118e-05, "loss": 0.0605, "step": 3890 }, { "epoch": 1.0546241211465657, "grad_norm": 0.5235444903373718, "learning_rate": 9.993863502363485e-05, "loss": 0.0568, "step": 3900 }, { "epoch": 1.057328285559762, "grad_norm": 0.43175068497657776, "learning_rate": 9.993726253498976e-05, "loss": 0.0554, "step": 3910 }, { "epoch": 1.0600324499729583, "grad_norm": 0.4195246696472168, "learning_rate": 9.993587487672282e-05, "loss": 0.0586, "step": 3920 }, { "epoch": 1.0627366143861547, "grad_norm": 0.6008460521697998, "learning_rate": 9.993447204925558e-05, "loss": 0.0575, "step": 3930 }, { "epoch": 1.065440778799351, "grad_norm": 0.4629196524620056, "learning_rate": 9.993305405301416e-05, "loss": 0.0565, "step": 3940 }, { "epoch": 1.0681449432125474, "grad_norm": 0.4906877279281616, "learning_rate": 9.993162088842935e-05, "loss": 0.0585, "step": 3950 }, { "epoch": 1.0708491076257436, "grad_norm": 0.41541731357574463, "learning_rate": 9.993017255593646e-05, "loss": 0.0574, "step": 3960 }, { "epoch": 1.07355327203894, "grad_norm": 0.3628599941730499, "learning_rate": 9.992870905597548e-05, "loss": 0.0583, "step": 3970 }, { "epoch": 1.0762574364521362, "grad_norm": 0.5712358951568604, "learning_rate": 9.9927230388991e-05, "loss": 0.058, "step": 3980 }, { "epoch": 1.0789616008653327, "grad_norm": 0.3648681938648224, "learning_rate": 9.992573655543215e-05, "loss": 0.058, "step": 3990 }, { "epoch": 1.0816657652785289, "grad_norm": 0.38056454062461853, "learning_rate": 9.992422755575277e-05, "loss": 0.0557, "step": 4000 }, { "epoch": 1.0843699296917253, "grad_norm": 0.3298643231391907, "learning_rate": 9.992270339041123e-05, "loss": 0.0533, "step": 4010 }, { "epoch": 1.0870740941049215, "grad_norm": 0.400408536195755, "learning_rate": 9.992116405987053e-05, "loss": 0.0556, "step": 4020 }, { "epoch": 1.089778258518118, "grad_norm": 0.4535055458545685, "learning_rate": 9.991960956459828e-05, "loss": 0.0559, "step": 4030 }, { "epoch": 1.0924824229313141, "grad_norm": 0.2443137913942337, "learning_rate": 9.991803990506669e-05, "loss": 0.0584, "step": 4040 }, { "epoch": 1.0951865873445106, "grad_norm": 0.520470917224884, "learning_rate": 9.991645508175258e-05, "loss": 0.0572, "step": 4050 }, { "epoch": 1.0978907517577068, "grad_norm": 0.39152243733406067, "learning_rate": 9.99148550951374e-05, "loss": 0.0565, "step": 4060 }, { "epoch": 1.1005949161709032, "grad_norm": 0.3067081868648529, "learning_rate": 9.991323994570716e-05, "loss": 0.0564, "step": 4070 }, { "epoch": 1.1032990805840994, "grad_norm": 0.23453955352306366, "learning_rate": 9.99116096339525e-05, "loss": 0.0557, "step": 4080 }, { "epoch": 1.1060032449972959, "grad_norm": 0.44361427426338196, "learning_rate": 9.990996416036869e-05, "loss": 0.0574, "step": 4090 }, { "epoch": 1.108707409410492, "grad_norm": 0.2594689130783081, "learning_rate": 9.990830352545555e-05, "loss": 0.0563, "step": 4100 }, { "epoch": 1.1114115738236885, "grad_norm": 0.31233692169189453, "learning_rate": 9.990662772971756e-05, "loss": 0.055, "step": 4110 }, { "epoch": 1.1141157382368847, "grad_norm": 0.28770750761032104, "learning_rate": 9.990493677366376e-05, "loss": 0.0571, "step": 4120 }, { "epoch": 1.1168199026500811, "grad_norm": 0.3456209599971771, "learning_rate": 9.990323065780786e-05, "loss": 0.0569, "step": 4130 }, { "epoch": 1.1195240670632773, "grad_norm": 0.34523677825927734, "learning_rate": 9.990150938266808e-05, "loss": 0.0569, "step": 4140 }, { "epoch": 1.1222282314764738, "grad_norm": 0.5861400365829468, "learning_rate": 9.989977294876733e-05, "loss": 0.0571, "step": 4150 }, { "epoch": 1.1249323958896702, "grad_norm": 0.2739264965057373, "learning_rate": 9.989802135663308e-05, "loss": 0.0567, "step": 4160 }, { "epoch": 1.1276365603028664, "grad_norm": 0.39317986369132996, "learning_rate": 9.989625460679743e-05, "loss": 0.0576, "step": 4170 }, { "epoch": 1.1303407247160626, "grad_norm": 0.541141927242279, "learning_rate": 9.989447269979706e-05, "loss": 0.0589, "step": 4180 }, { "epoch": 1.133044889129259, "grad_norm": 0.40537914633750916, "learning_rate": 9.989267563617328e-05, "loss": 0.0542, "step": 4190 }, { "epoch": 1.1357490535424555, "grad_norm": 0.3666684925556183, "learning_rate": 9.989086341647198e-05, "loss": 0.0564, "step": 4200 }, { "epoch": 1.1384532179556517, "grad_norm": 0.41736552119255066, "learning_rate": 9.988903604124366e-05, "loss": 0.0577, "step": 4210 }, { "epoch": 1.1411573823688481, "grad_norm": 0.42437055706977844, "learning_rate": 9.988719351104343e-05, "loss": 0.0551, "step": 4220 }, { "epoch": 1.1438615467820443, "grad_norm": 0.41209498047828674, "learning_rate": 9.9885335826431e-05, "loss": 0.058, "step": 4230 }, { "epoch": 1.1465657111952408, "grad_norm": 0.4161634147167206, "learning_rate": 9.988346298797071e-05, "loss": 0.0557, "step": 4240 }, { "epoch": 1.149269875608437, "grad_norm": 0.48972809314727783, "learning_rate": 9.988157499623146e-05, "loss": 0.0583, "step": 4250 }, { "epoch": 1.1519740400216334, "grad_norm": 0.5864782929420471, "learning_rate": 9.987967185178677e-05, "loss": 0.0552, "step": 4260 }, { "epoch": 1.1546782044348296, "grad_norm": 0.3749200701713562, "learning_rate": 9.987775355521476e-05, "loss": 0.0567, "step": 4270 }, { "epoch": 1.157382368848026, "grad_norm": 0.4902573525905609, "learning_rate": 9.987582010709817e-05, "loss": 0.0572, "step": 4280 }, { "epoch": 1.1600865332612222, "grad_norm": 0.6111738085746765, "learning_rate": 9.987387150802431e-05, "loss": 0.057, "step": 4290 }, { "epoch": 1.1627906976744187, "grad_norm": 0.5149335265159607, "learning_rate": 9.987190775858517e-05, "loss": 0.056, "step": 4300 }, { "epoch": 1.1654948620876149, "grad_norm": 0.32027313113212585, "learning_rate": 9.98699288593772e-05, "loss": 0.0564, "step": 4310 }, { "epoch": 1.1681990265008113, "grad_norm": 0.3590008616447449, "learning_rate": 9.986793481100161e-05, "loss": 0.0585, "step": 4320 }, { "epoch": 1.1709031909140075, "grad_norm": 0.3666689097881317, "learning_rate": 9.986592561406412e-05, "loss": 0.0566, "step": 4330 }, { "epoch": 1.173607355327204, "grad_norm": 0.4035090208053589, "learning_rate": 9.986390126917503e-05, "loss": 0.0565, "step": 4340 }, { "epoch": 1.1763115197404002, "grad_norm": 0.19240006804466248, "learning_rate": 9.986186177694933e-05, "loss": 0.0559, "step": 4350 }, { "epoch": 1.1790156841535966, "grad_norm": 0.3283756673336029, "learning_rate": 9.985980713800656e-05, "loss": 0.0561, "step": 4360 }, { "epoch": 1.1817198485667928, "grad_norm": 0.4631184935569763, "learning_rate": 9.985773735297084e-05, "loss": 0.0572, "step": 4370 }, { "epoch": 1.1844240129799892, "grad_norm": 0.45146939158439636, "learning_rate": 9.985565242247092e-05, "loss": 0.053, "step": 4380 }, { "epoch": 1.1871281773931854, "grad_norm": 0.39644375443458557, "learning_rate": 9.985355234714016e-05, "loss": 0.0576, "step": 4390 }, { "epoch": 1.1898323418063819, "grad_norm": 0.27578988671302795, "learning_rate": 9.985143712761652e-05, "loss": 0.0561, "step": 4400 }, { "epoch": 1.192536506219578, "grad_norm": 0.3779398798942566, "learning_rate": 9.984930676454252e-05, "loss": 0.0554, "step": 4410 }, { "epoch": 1.1952406706327745, "grad_norm": 0.4099121689796448, "learning_rate": 9.984716125856532e-05, "loss": 0.0567, "step": 4420 }, { "epoch": 1.1979448350459707, "grad_norm": 0.4558180272579193, "learning_rate": 9.984500061033667e-05, "loss": 0.0554, "step": 4430 }, { "epoch": 1.2006489994591671, "grad_norm": 0.3547060489654541, "learning_rate": 9.984282482051293e-05, "loss": 0.0554, "step": 4440 }, { "epoch": 1.2033531638723634, "grad_norm": 0.2679702639579773, "learning_rate": 9.9840633889755e-05, "loss": 0.0567, "step": 4450 }, { "epoch": 1.2060573282855598, "grad_norm": 0.22458551824092865, "learning_rate": 9.983842781872848e-05, "loss": 0.0549, "step": 4460 }, { "epoch": 1.208761492698756, "grad_norm": 0.35360217094421387, "learning_rate": 9.98362066081035e-05, "loss": 0.0583, "step": 4470 }, { "epoch": 1.2114656571119524, "grad_norm": 0.47144293785095215, "learning_rate": 9.983397025855479e-05, "loss": 0.0557, "step": 4480 }, { "epoch": 1.2141698215251489, "grad_norm": 0.43901586532592773, "learning_rate": 9.983171877076171e-05, "loss": 0.0561, "step": 4490 }, { "epoch": 1.216873985938345, "grad_norm": 0.22520259022712708, "learning_rate": 9.98294521454082e-05, "loss": 0.0579, "step": 4500 }, { "epoch": 1.2195781503515413, "grad_norm": 0.5502480268478394, "learning_rate": 9.98271703831828e-05, "loss": 0.0583, "step": 4510 }, { "epoch": 1.2222823147647377, "grad_norm": 0.4832879304885864, "learning_rate": 9.982487348477865e-05, "loss": 0.0548, "step": 4520 }, { "epoch": 1.2249864791779341, "grad_norm": 0.6694095134735107, "learning_rate": 9.982256145089347e-05, "loss": 0.0567, "step": 4530 }, { "epoch": 1.2276906435911303, "grad_norm": 0.5291188359260559, "learning_rate": 9.982023428222962e-05, "loss": 0.0566, "step": 4540 }, { "epoch": 1.2303948080043265, "grad_norm": 0.351970911026001, "learning_rate": 9.981789197949403e-05, "loss": 0.0553, "step": 4550 }, { "epoch": 1.233098972417523, "grad_norm": 0.3650189936161041, "learning_rate": 9.98155345433982e-05, "loss": 0.0553, "step": 4560 }, { "epoch": 1.2358031368307194, "grad_norm": 0.4115101099014282, "learning_rate": 9.981316197465831e-05, "loss": 0.0547, "step": 4570 }, { "epoch": 1.2385073012439156, "grad_norm": 0.23543447256088257, "learning_rate": 9.981077427399504e-05, "loss": 0.0556, "step": 4580 }, { "epoch": 1.241211465657112, "grad_norm": 0.3054785132408142, "learning_rate": 9.980837144213371e-05, "loss": 0.0558, "step": 4590 }, { "epoch": 1.2439156300703083, "grad_norm": 0.281286358833313, "learning_rate": 9.980595347980426e-05, "loss": 0.0557, "step": 4600 }, { "epoch": 1.2466197944835047, "grad_norm": 0.30940791964530945, "learning_rate": 9.980352038774119e-05, "loss": 0.0558, "step": 4610 }, { "epoch": 1.249323958896701, "grad_norm": 0.2375911921262741, "learning_rate": 9.98010721666836e-05, "loss": 0.054, "step": 4620 }, { "epoch": 1.2520281233098973, "grad_norm": 0.3011135160923004, "learning_rate": 9.979860881737523e-05, "loss": 0.057, "step": 4630 }, { "epoch": 1.2547322877230935, "grad_norm": 0.3418763279914856, "learning_rate": 9.979613034056434e-05, "loss": 0.0584, "step": 4640 }, { "epoch": 1.25743645213629, "grad_norm": 0.20252323150634766, "learning_rate": 9.979363673700386e-05, "loss": 0.0565, "step": 4650 }, { "epoch": 1.2601406165494862, "grad_norm": 0.2647918164730072, "learning_rate": 9.979112800745124e-05, "loss": 0.0548, "step": 4660 }, { "epoch": 1.2628447809626826, "grad_norm": 0.381572425365448, "learning_rate": 9.978860415266861e-05, "loss": 0.0561, "step": 4670 }, { "epoch": 1.2655489453758788, "grad_norm": 0.35822975635528564, "learning_rate": 9.978606517342262e-05, "loss": 0.056, "step": 4680 }, { "epoch": 1.2682531097890752, "grad_norm": 0.3457561731338501, "learning_rate": 9.978351107048456e-05, "loss": 0.0536, "step": 4690 }, { "epoch": 1.2709572742022714, "grad_norm": 0.31828129291534424, "learning_rate": 9.978094184463029e-05, "loss": 0.0568, "step": 4700 }, { "epoch": 1.2736614386154679, "grad_norm": 0.4482595920562744, "learning_rate": 9.977835749664029e-05, "loss": 0.0555, "step": 4710 }, { "epoch": 1.276365603028664, "grad_norm": 0.4217239320278168, "learning_rate": 9.97757580272996e-05, "loss": 0.056, "step": 4720 }, { "epoch": 1.2790697674418605, "grad_norm": 0.4402405917644501, "learning_rate": 9.977314343739786e-05, "loss": 0.0558, "step": 4730 }, { "epoch": 1.2817739318550567, "grad_norm": 0.46018901467323303, "learning_rate": 9.977051372772934e-05, "loss": 0.0552, "step": 4740 }, { "epoch": 1.2844780962682532, "grad_norm": 0.3903460204601288, "learning_rate": 9.976786889909286e-05, "loss": 0.0533, "step": 4750 }, { "epoch": 1.2871822606814494, "grad_norm": 0.38340288400650024, "learning_rate": 9.976520895229185e-05, "loss": 0.0551, "step": 4760 }, { "epoch": 1.2898864250946458, "grad_norm": 0.33765020966529846, "learning_rate": 9.976253388813433e-05, "loss": 0.0536, "step": 4770 }, { "epoch": 1.292590589507842, "grad_norm": 0.4900326728820801, "learning_rate": 9.975984370743293e-05, "loss": 0.0553, "step": 4780 }, { "epoch": 1.2952947539210384, "grad_norm": 0.31987717747688293, "learning_rate": 9.975713841100485e-05, "loss": 0.0561, "step": 4790 }, { "epoch": 1.2979989183342346, "grad_norm": 0.45070379972457886, "learning_rate": 9.975441799967187e-05, "loss": 0.0563, "step": 4800 }, { "epoch": 1.300703082747431, "grad_norm": 0.3916539251804352, "learning_rate": 9.975168247426039e-05, "loss": 0.0544, "step": 4810 }, { "epoch": 1.3034072471606275, "grad_norm": 0.2948928475379944, "learning_rate": 9.974893183560139e-05, "loss": 0.0543, "step": 4820 }, { "epoch": 1.3061114115738237, "grad_norm": 0.45453622937202454, "learning_rate": 9.974616608453045e-05, "loss": 0.053, "step": 4830 }, { "epoch": 1.30881557598702, "grad_norm": 0.38854822516441345, "learning_rate": 9.974338522188772e-05, "loss": 0.056, "step": 4840 }, { "epoch": 1.3115197404002163, "grad_norm": 0.1618748903274536, "learning_rate": 9.974058924851797e-05, "loss": 0.053, "step": 4850 }, { "epoch": 1.3142239048134128, "grad_norm": 0.35966408252716064, "learning_rate": 9.973777816527051e-05, "loss": 0.0553, "step": 4860 }, { "epoch": 1.316928069226609, "grad_norm": 0.3783445358276367, "learning_rate": 9.973495197299931e-05, "loss": 0.0553, "step": 4870 }, { "epoch": 1.3196322336398052, "grad_norm": 0.27189576625823975, "learning_rate": 9.973211067256287e-05, "loss": 0.0544, "step": 4880 }, { "epoch": 1.3223363980530016, "grad_norm": 0.4011562466621399, "learning_rate": 9.97292542648243e-05, "loss": 0.056, "step": 4890 }, { "epoch": 1.325040562466198, "grad_norm": 0.36941495537757874, "learning_rate": 9.972638275065131e-05, "loss": 0.0547, "step": 4900 }, { "epoch": 1.3277447268793943, "grad_norm": 0.3856217861175537, "learning_rate": 9.972349613091621e-05, "loss": 0.0519, "step": 4910 }, { "epoch": 1.3304488912925905, "grad_norm": 0.24806426465511322, "learning_rate": 9.972059440649584e-05, "loss": 0.0529, "step": 4920 }, { "epoch": 1.333153055705787, "grad_norm": 0.2947964072227478, "learning_rate": 9.971767757827168e-05, "loss": 0.0556, "step": 4930 }, { "epoch": 1.3358572201189833, "grad_norm": 0.3381788432598114, "learning_rate": 9.971474564712982e-05, "loss": 0.0549, "step": 4940 }, { "epoch": 1.3385613845321795, "grad_norm": 0.26930615305900574, "learning_rate": 9.971179861396084e-05, "loss": 0.0546, "step": 4950 }, { "epoch": 1.3412655489453758, "grad_norm": 0.34661203622817993, "learning_rate": 9.970883647966003e-05, "loss": 0.0564, "step": 4960 }, { "epoch": 1.3439697133585722, "grad_norm": 0.27452483773231506, "learning_rate": 9.970585924512717e-05, "loss": 0.0567, "step": 4970 }, { "epoch": 1.3466738777717686, "grad_norm": 0.2873125970363617, "learning_rate": 9.970286691126669e-05, "loss": 0.0551, "step": 4980 }, { "epoch": 1.3493780421849648, "grad_norm": 0.2584156095981598, "learning_rate": 9.969985947898756e-05, "loss": 0.0541, "step": 4990 }, { "epoch": 1.3520822065981613, "grad_norm": 0.2985088527202606, "learning_rate": 9.969683694920337e-05, "loss": 0.0556, "step": 5000 }, { "epoch": 1.3547863710113575, "grad_norm": 0.2722840905189514, "learning_rate": 9.969379932283228e-05, "loss": 0.0541, "step": 5010 }, { "epoch": 1.357490535424554, "grad_norm": 0.2607596814632416, "learning_rate": 9.969074660079704e-05, "loss": 0.0532, "step": 5020 }, { "epoch": 1.36019469983775, "grad_norm": 0.2187862992286682, "learning_rate": 9.968767878402501e-05, "loss": 0.0533, "step": 5030 }, { "epoch": 1.3628988642509465, "grad_norm": 0.25865235924720764, "learning_rate": 9.968459587344808e-05, "loss": 0.056, "step": 5040 }, { "epoch": 1.3656030286641427, "grad_norm": 0.2251492291688919, "learning_rate": 9.968149787000278e-05, "loss": 0.0556, "step": 5050 }, { "epoch": 1.3683071930773392, "grad_norm": 0.24701480567455292, "learning_rate": 9.967838477463018e-05, "loss": 0.0547, "step": 5060 }, { "epoch": 1.3710113574905354, "grad_norm": 0.379973441362381, "learning_rate": 9.967525658827597e-05, "loss": 0.0511, "step": 5070 }, { "epoch": 1.3737155219037318, "grad_norm": 0.3090791404247284, "learning_rate": 9.967211331189042e-05, "loss": 0.0539, "step": 5080 }, { "epoch": 1.376419686316928, "grad_norm": 0.14896750450134277, "learning_rate": 9.966895494642834e-05, "loss": 0.0556, "step": 5090 }, { "epoch": 1.3791238507301244, "grad_norm": 0.24581873416900635, "learning_rate": 9.96657814928492e-05, "loss": 0.0528, "step": 5100 }, { "epoch": 1.3818280151433207, "grad_norm": 0.3260997533798218, "learning_rate": 9.966259295211697e-05, "loss": 0.0534, "step": 5110 }, { "epoch": 1.384532179556517, "grad_norm": 0.22829076647758484, "learning_rate": 9.965938932520028e-05, "loss": 0.0536, "step": 5120 }, { "epoch": 1.3872363439697133, "grad_norm": 0.29032793641090393, "learning_rate": 9.965617061307229e-05, "loss": 0.0549, "step": 5130 }, { "epoch": 1.3899405083829097, "grad_norm": 0.25295573472976685, "learning_rate": 9.965293681671077e-05, "loss": 0.0543, "step": 5140 }, { "epoch": 1.392644672796106, "grad_norm": 0.5620379447937012, "learning_rate": 9.964968793709804e-05, "loss": 0.0552, "step": 5150 }, { "epoch": 1.3953488372093024, "grad_norm": 0.29963693022727966, "learning_rate": 9.964642397522106e-05, "loss": 0.0527, "step": 5160 }, { "epoch": 1.3980530016224986, "grad_norm": 0.29472121596336365, "learning_rate": 9.96431449320713e-05, "loss": 0.0552, "step": 5170 }, { "epoch": 1.400757166035695, "grad_norm": 0.2838699519634247, "learning_rate": 9.963985080864486e-05, "loss": 0.0539, "step": 5180 }, { "epoch": 1.4034613304488914, "grad_norm": 0.2881458103656769, "learning_rate": 9.96365416059424e-05, "loss": 0.0531, "step": 5190 }, { "epoch": 1.4061654948620876, "grad_norm": 0.3477221131324768, "learning_rate": 9.963321732496919e-05, "loss": 0.0544, "step": 5200 }, { "epoch": 1.4088696592752838, "grad_norm": 0.3247677683830261, "learning_rate": 9.962987796673506e-05, "loss": 0.053, "step": 5210 }, { "epoch": 1.4115738236884803, "grad_norm": 0.36600416898727417, "learning_rate": 9.962652353225438e-05, "loss": 0.0518, "step": 5220 }, { "epoch": 1.4142779881016767, "grad_norm": 0.2502504885196686, "learning_rate": 9.962315402254619e-05, "loss": 0.0544, "step": 5230 }, { "epoch": 1.416982152514873, "grad_norm": 0.22923175990581512, "learning_rate": 9.9619769438634e-05, "loss": 0.0533, "step": 5240 }, { "epoch": 1.4196863169280691, "grad_norm": 0.3285161256790161, "learning_rate": 9.9616369781546e-05, "loss": 0.0559, "step": 5250 }, { "epoch": 1.4223904813412656, "grad_norm": 0.23721207678318024, "learning_rate": 9.961295505231491e-05, "loss": 0.0535, "step": 5260 }, { "epoch": 1.425094645754462, "grad_norm": 0.41538864374160767, "learning_rate": 9.960952525197804e-05, "loss": 0.0545, "step": 5270 }, { "epoch": 1.4277988101676582, "grad_norm": 0.31761983036994934, "learning_rate": 9.960608038157724e-05, "loss": 0.0528, "step": 5280 }, { "epoch": 1.4305029745808544, "grad_norm": 0.31706827878952026, "learning_rate": 9.960262044215901e-05, "loss": 0.0539, "step": 5290 }, { "epoch": 1.4332071389940508, "grad_norm": 0.21269728243350983, "learning_rate": 9.959914543477435e-05, "loss": 0.0529, "step": 5300 }, { "epoch": 1.4359113034072473, "grad_norm": 0.28716105222702026, "learning_rate": 9.959565536047892e-05, "loss": 0.0523, "step": 5310 }, { "epoch": 1.4386154678204435, "grad_norm": 0.27380260825157166, "learning_rate": 9.959215022033288e-05, "loss": 0.0528, "step": 5320 }, { "epoch": 1.4413196322336397, "grad_norm": 0.24013245105743408, "learning_rate": 9.9588630015401e-05, "loss": 0.0517, "step": 5330 }, { "epoch": 1.444023796646836, "grad_norm": 0.27135759592056274, "learning_rate": 9.958509474675264e-05, "loss": 0.0541, "step": 5340 }, { "epoch": 1.4467279610600325, "grad_norm": 0.34975290298461914, "learning_rate": 9.958154441546171e-05, "loss": 0.054, "step": 5350 }, { "epoch": 1.4494321254732287, "grad_norm": 0.4933083653450012, "learning_rate": 9.957797902260673e-05, "loss": 0.054, "step": 5360 }, { "epoch": 1.452136289886425, "grad_norm": 0.36120814085006714, "learning_rate": 9.957439856927073e-05, "loss": 0.0532, "step": 5370 }, { "epoch": 1.4548404542996214, "grad_norm": 0.31392908096313477, "learning_rate": 9.957080305654139e-05, "loss": 0.0525, "step": 5380 }, { "epoch": 1.4575446187128178, "grad_norm": 0.28612470626831055, "learning_rate": 9.956719248551092e-05, "loss": 0.0529, "step": 5390 }, { "epoch": 1.460248783126014, "grad_norm": 0.24671319127082825, "learning_rate": 9.956356685727612e-05, "loss": 0.053, "step": 5400 }, { "epoch": 1.4629529475392105, "grad_norm": 0.3476572334766388, "learning_rate": 9.955992617293836e-05, "loss": 0.055, "step": 5410 }, { "epoch": 1.4656571119524067, "grad_norm": 0.26943281292915344, "learning_rate": 9.955627043360358e-05, "loss": 0.0521, "step": 5420 }, { "epoch": 1.468361276365603, "grad_norm": 0.28877121210098267, "learning_rate": 9.955259964038231e-05, "loss": 0.0536, "step": 5430 }, { "epoch": 1.4710654407787993, "grad_norm": 0.3346991240978241, "learning_rate": 9.954891379438962e-05, "loss": 0.0538, "step": 5440 }, { "epoch": 1.4737696051919957, "grad_norm": 0.3432411551475525, "learning_rate": 9.954521289674519e-05, "loss": 0.0552, "step": 5450 }, { "epoch": 1.476473769605192, "grad_norm": 0.40260204672813416, "learning_rate": 9.954149694857325e-05, "loss": 0.0547, "step": 5460 }, { "epoch": 1.4791779340183884, "grad_norm": 0.38721683621406555, "learning_rate": 9.953776595100258e-05, "loss": 0.0526, "step": 5470 }, { "epoch": 1.4818820984315846, "grad_norm": 0.5419077277183533, "learning_rate": 9.95340199051666e-05, "loss": 0.0531, "step": 5480 }, { "epoch": 1.484586262844781, "grad_norm": 0.303691029548645, "learning_rate": 9.953025881220325e-05, "loss": 0.0556, "step": 5490 }, { "epoch": 1.4872904272579772, "grad_norm": 0.33729326725006104, "learning_rate": 9.952648267325504e-05, "loss": 0.056, "step": 5500 }, { "epoch": 1.4899945916711737, "grad_norm": 0.4769758880138397, "learning_rate": 9.952269148946905e-05, "loss": 0.0542, "step": 5510 }, { "epoch": 1.4926987560843699, "grad_norm": 0.42190128564834595, "learning_rate": 9.951888526199697e-05, "loss": 0.0544, "step": 5520 }, { "epoch": 1.4954029204975663, "grad_norm": 0.29972192645072937, "learning_rate": 9.951506399199501e-05, "loss": 0.055, "step": 5530 }, { "epoch": 1.4981070849107625, "grad_norm": 0.34851086139678955, "learning_rate": 9.951122768062399e-05, "loss": 0.0545, "step": 5540 }, { "epoch": 1.500811249323959, "grad_norm": 0.24652864038944244, "learning_rate": 9.950737632904927e-05, "loss": 0.0521, "step": 5550 }, { "epoch": 1.5035154137371554, "grad_norm": 0.378999263048172, "learning_rate": 9.950350993844077e-05, "loss": 0.0536, "step": 5560 }, { "epoch": 1.5062195781503516, "grad_norm": 0.28222012519836426, "learning_rate": 9.949962850997303e-05, "loss": 0.0523, "step": 5570 }, { "epoch": 1.5089237425635478, "grad_norm": 0.44590523838996887, "learning_rate": 9.949573204482512e-05, "loss": 0.0522, "step": 5580 }, { "epoch": 1.5116279069767442, "grad_norm": 0.2384953498840332, "learning_rate": 9.949182054418064e-05, "loss": 0.0538, "step": 5590 }, { "epoch": 1.5143320713899406, "grad_norm": 0.3391299545764923, "learning_rate": 9.948789400922787e-05, "loss": 0.0549, "step": 5600 }, { "epoch": 1.5170362358031368, "grad_norm": 0.23050546646118164, "learning_rate": 9.948395244115953e-05, "loss": 0.0535, "step": 5610 }, { "epoch": 1.519740400216333, "grad_norm": 0.36458519101142883, "learning_rate": 9.9479995841173e-05, "loss": 0.0554, "step": 5620 }, { "epoch": 1.5224445646295295, "grad_norm": 0.4126681983470917, "learning_rate": 9.947602421047017e-05, "loss": 0.0527, "step": 5630 }, { "epoch": 1.525148729042726, "grad_norm": 0.4530881941318512, "learning_rate": 9.947203755025753e-05, "loss": 0.0506, "step": 5640 }, { "epoch": 1.5278528934559221, "grad_norm": 0.3872542381286621, "learning_rate": 9.946803586174611e-05, "loss": 0.0536, "step": 5650 }, { "epoch": 1.5305570578691183, "grad_norm": 0.3814108073711395, "learning_rate": 9.946401914615151e-05, "loss": 0.0524, "step": 5660 }, { "epoch": 1.5332612222823148, "grad_norm": 0.4279074966907501, "learning_rate": 9.945998740469394e-05, "loss": 0.0536, "step": 5670 }, { "epoch": 1.5359653866955112, "grad_norm": 0.3567333519458771, "learning_rate": 9.945594063859809e-05, "loss": 0.054, "step": 5680 }, { "epoch": 1.5386695511087074, "grad_norm": 0.4378107488155365, "learning_rate": 9.94518788490933e-05, "loss": 0.0537, "step": 5690 }, { "epoch": 1.5413737155219036, "grad_norm": 0.5164161324501038, "learning_rate": 9.944780203741341e-05, "loss": 0.0543, "step": 5700 }, { "epoch": 1.5440778799351, "grad_norm": 0.3630194664001465, "learning_rate": 9.944371020479686e-05, "loss": 0.0546, "step": 5710 }, { "epoch": 1.5467820443482965, "grad_norm": 0.23521782457828522, "learning_rate": 9.943960335248662e-05, "loss": 0.0522, "step": 5720 }, { "epoch": 1.5494862087614927, "grad_norm": 0.2814953625202179, "learning_rate": 9.943548148173027e-05, "loss": 0.0533, "step": 5730 }, { "epoch": 1.5521903731746889, "grad_norm": 0.39109405875205994, "learning_rate": 9.943134459377992e-05, "loss": 0.0509, "step": 5740 }, { "epoch": 1.5548945375878853, "grad_norm": 0.3696792423725128, "learning_rate": 9.942719268989222e-05, "loss": 0.0518, "step": 5750 }, { "epoch": 1.5575987020010817, "grad_norm": 0.2586972415447235, "learning_rate": 9.942302577132844e-05, "loss": 0.0525, "step": 5760 }, { "epoch": 1.560302866414278, "grad_norm": 0.3933272063732147, "learning_rate": 9.941884383935438e-05, "loss": 0.0552, "step": 5770 }, { "epoch": 1.5630070308274742, "grad_norm": 0.4271887242794037, "learning_rate": 9.941464689524039e-05, "loss": 0.0528, "step": 5780 }, { "epoch": 1.5657111952406706, "grad_norm": 0.4428408145904541, "learning_rate": 9.941043494026139e-05, "loss": 0.0529, "step": 5790 }, { "epoch": 1.568415359653867, "grad_norm": 0.5112735629081726, "learning_rate": 9.940620797569685e-05, "loss": 0.054, "step": 5800 }, { "epoch": 1.5711195240670632, "grad_norm": 0.413663387298584, "learning_rate": 9.940196600283082e-05, "loss": 0.0542, "step": 5810 }, { "epoch": 1.5738236884802594, "grad_norm": 0.5421071648597717, "learning_rate": 9.939770902295192e-05, "loss": 0.0546, "step": 5820 }, { "epoch": 1.5765278528934559, "grad_norm": 0.2887481153011322, "learning_rate": 9.939343703735329e-05, "loss": 0.0543, "step": 5830 }, { "epoch": 1.5792320173066523, "grad_norm": 0.17130717635154724, "learning_rate": 9.938915004733264e-05, "loss": 0.0521, "step": 5840 }, { "epoch": 1.5819361817198487, "grad_norm": 0.2900141179561615, "learning_rate": 9.938484805419224e-05, "loss": 0.0538, "step": 5850 }, { "epoch": 1.584640346133045, "grad_norm": 0.18816876411437988, "learning_rate": 9.938053105923894e-05, "loss": 0.0512, "step": 5860 }, { "epoch": 1.5873445105462411, "grad_norm": 0.37133026123046875, "learning_rate": 9.937619906378413e-05, "loss": 0.0524, "step": 5870 }, { "epoch": 1.5900486749594376, "grad_norm": 0.2073131799697876, "learning_rate": 9.937185206914374e-05, "loss": 0.0525, "step": 5880 }, { "epoch": 1.592752839372634, "grad_norm": 0.19854390621185303, "learning_rate": 9.936749007663829e-05, "loss": 0.0521, "step": 5890 }, { "epoch": 1.5954570037858302, "grad_norm": 0.2887383699417114, "learning_rate": 9.93631130875928e-05, "loss": 0.0541, "step": 5900 }, { "epoch": 1.5981611681990264, "grad_norm": 0.27385446429252625, "learning_rate": 9.935872110333692e-05, "loss": 0.053, "step": 5910 }, { "epoch": 1.6008653326122229, "grad_norm": 0.5098092555999756, "learning_rate": 9.935431412520484e-05, "loss": 0.053, "step": 5920 }, { "epoch": 1.6035694970254193, "grad_norm": 0.2198186218738556, "learning_rate": 9.934989215453523e-05, "loss": 0.0522, "step": 5930 }, { "epoch": 1.6062736614386155, "grad_norm": 0.30710741877555847, "learning_rate": 9.934545519267139e-05, "loss": 0.0545, "step": 5940 }, { "epoch": 1.6089778258518117, "grad_norm": 0.25574633479118347, "learning_rate": 9.934100324096117e-05, "loss": 0.0526, "step": 5950 }, { "epoch": 1.6116819902650081, "grad_norm": 0.2029321789741516, "learning_rate": 9.933653630075692e-05, "loss": 0.0517, "step": 5960 }, { "epoch": 1.6143861546782046, "grad_norm": 0.2966034412384033, "learning_rate": 9.93320543734156e-05, "loss": 0.0555, "step": 5970 }, { "epoch": 1.6170903190914008, "grad_norm": 0.2671225666999817, "learning_rate": 9.932755746029871e-05, "loss": 0.0524, "step": 5980 }, { "epoch": 1.619794483504597, "grad_norm": 0.29503872990608215, "learning_rate": 9.932304556277228e-05, "loss": 0.0535, "step": 5990 }, { "epoch": 1.6224986479177934, "grad_norm": 0.2605288028717041, "learning_rate": 9.93185186822069e-05, "loss": 0.0515, "step": 6000 }, { "epoch": 1.6252028123309898, "grad_norm": 0.2829229235649109, "learning_rate": 9.931397681997773e-05, "loss": 0.0547, "step": 6010 }, { "epoch": 1.627906976744186, "grad_norm": 0.3834557831287384, "learning_rate": 9.930941997746446e-05, "loss": 0.0525, "step": 6020 }, { "epoch": 1.6306111411573823, "grad_norm": 0.3769620656967163, "learning_rate": 9.930484815605134e-05, "loss": 0.0545, "step": 6030 }, { "epoch": 1.6333153055705787, "grad_norm": 0.2831316292285919, "learning_rate": 9.930026135712717e-05, "loss": 0.0542, "step": 6040 }, { "epoch": 1.6360194699837751, "grad_norm": 0.3556877076625824, "learning_rate": 9.92956595820853e-05, "loss": 0.0519, "step": 6050 }, { "epoch": 1.6387236343969713, "grad_norm": 0.23419450223445892, "learning_rate": 9.929104283232362e-05, "loss": 0.0549, "step": 6060 }, { "epoch": 1.6414277988101675, "grad_norm": 0.23721079528331757, "learning_rate": 9.92864111092446e-05, "loss": 0.0529, "step": 6070 }, { "epoch": 1.644131963223364, "grad_norm": 0.30110204219818115, "learning_rate": 9.92817644142552e-05, "loss": 0.0527, "step": 6080 }, { "epoch": 1.6468361276365604, "grad_norm": 0.2636876404285431, "learning_rate": 9.927710274876698e-05, "loss": 0.0511, "step": 6090 }, { "epoch": 1.6495402920497566, "grad_norm": 0.17786002159118652, "learning_rate": 9.927242611419603e-05, "loss": 0.0525, "step": 6100 }, { "epoch": 1.6522444564629528, "grad_norm": 0.33697378635406494, "learning_rate": 9.926773451196301e-05, "loss": 0.0526, "step": 6110 }, { "epoch": 1.6549486208761492, "grad_norm": 0.4605565667152405, "learning_rate": 9.926302794349306e-05, "loss": 0.0533, "step": 6120 }, { "epoch": 1.6576527852893457, "grad_norm": 0.42012161016464233, "learning_rate": 9.925830641021594e-05, "loss": 0.0524, "step": 6130 }, { "epoch": 1.6603569497025419, "grad_norm": 0.23405145108699799, "learning_rate": 9.925356991356593e-05, "loss": 0.0521, "step": 6140 }, { "epoch": 1.663061114115738, "grad_norm": 0.20473846793174744, "learning_rate": 9.924881845498184e-05, "loss": 0.0509, "step": 6150 }, { "epoch": 1.6657652785289345, "grad_norm": 0.21859662234783173, "learning_rate": 9.924405203590705e-05, "loss": 0.0517, "step": 6160 }, { "epoch": 1.668469442942131, "grad_norm": 0.35776087641716003, "learning_rate": 9.923927065778946e-05, "loss": 0.0497, "step": 6170 }, { "epoch": 1.6711736073553272, "grad_norm": 0.39941737055778503, "learning_rate": 9.923447432208154e-05, "loss": 0.0526, "step": 6180 }, { "epoch": 1.6738777717685234, "grad_norm": 0.36584097146987915, "learning_rate": 9.922966303024027e-05, "loss": 0.0508, "step": 6190 }, { "epoch": 1.6765819361817198, "grad_norm": 0.32039979100227356, "learning_rate": 9.922483678372721e-05, "loss": 0.0522, "step": 6200 }, { "epoch": 1.6792861005949162, "grad_norm": 0.22879885137081146, "learning_rate": 9.921999558400845e-05, "loss": 0.0507, "step": 6210 }, { "epoch": 1.6819902650081124, "grad_norm": 0.23918752372264862, "learning_rate": 9.92151394325546e-05, "loss": 0.0512, "step": 6220 }, { "epoch": 1.6846944294213089, "grad_norm": 0.2581140100955963, "learning_rate": 9.921026833084084e-05, "loss": 0.05, "step": 6230 }, { "epoch": 1.687398593834505, "grad_norm": 0.34125885367393494, "learning_rate": 9.920538228034689e-05, "loss": 0.0524, "step": 6240 }, { "epoch": 1.6901027582477015, "grad_norm": 0.2967666983604431, "learning_rate": 9.920048128255699e-05, "loss": 0.0541, "step": 6250 }, { "epoch": 1.692806922660898, "grad_norm": 0.4681030213832855, "learning_rate": 9.919556533895995e-05, "loss": 0.0538, "step": 6260 }, { "epoch": 1.6955110870740941, "grad_norm": 0.3274814784526825, "learning_rate": 9.919063445104907e-05, "loss": 0.0525, "step": 6270 }, { "epoch": 1.6982152514872904, "grad_norm": 0.35577481985092163, "learning_rate": 9.918568862032227e-05, "loss": 0.0549, "step": 6280 }, { "epoch": 1.7009194159004868, "grad_norm": 0.34000635147094727, "learning_rate": 9.918072784828194e-05, "loss": 0.0536, "step": 6290 }, { "epoch": 1.7036235803136832, "grad_norm": 0.3040684759616852, "learning_rate": 9.917575213643501e-05, "loss": 0.0505, "step": 6300 }, { "epoch": 1.7063277447268794, "grad_norm": 0.2020803838968277, "learning_rate": 9.917076148629302e-05, "loss": 0.0495, "step": 6310 }, { "epoch": 1.7090319091400756, "grad_norm": 0.2711341381072998, "learning_rate": 9.916575589937196e-05, "loss": 0.0517, "step": 6320 }, { "epoch": 1.711736073553272, "grad_norm": 0.32826706767082214, "learning_rate": 9.916073537719239e-05, "loss": 0.0508, "step": 6330 }, { "epoch": 1.7144402379664685, "grad_norm": 0.23592974245548248, "learning_rate": 9.915569992127944e-05, "loss": 0.0513, "step": 6340 }, { "epoch": 1.7171444023796647, "grad_norm": 0.2674313485622406, "learning_rate": 9.915064953316273e-05, "loss": 0.0529, "step": 6350 }, { "epoch": 1.719848566792861, "grad_norm": 0.22593489289283752, "learning_rate": 9.914558421437645e-05, "loss": 0.0525, "step": 6360 }, { "epoch": 1.7225527312060573, "grad_norm": 0.33639463782310486, "learning_rate": 9.914050396645929e-05, "loss": 0.0502, "step": 6370 }, { "epoch": 1.7252568956192538, "grad_norm": 0.31910890340805054, "learning_rate": 9.913540879095452e-05, "loss": 0.0531, "step": 6380 }, { "epoch": 1.72796106003245, "grad_norm": 0.3112967908382416, "learning_rate": 9.913029868940987e-05, "loss": 0.0521, "step": 6390 }, { "epoch": 1.7306652244456462, "grad_norm": 0.2749423682689667, "learning_rate": 9.912517366337772e-05, "loss": 0.0529, "step": 6400 }, { "epoch": 1.7333693888588426, "grad_norm": 0.23224900662899017, "learning_rate": 9.912003371441487e-05, "loss": 0.0524, "step": 6410 }, { "epoch": 1.736073553272039, "grad_norm": 0.2708677649497986, "learning_rate": 9.911487884408271e-05, "loss": 0.0526, "step": 6420 }, { "epoch": 1.7387777176852353, "grad_norm": 0.2558344602584839, "learning_rate": 9.910970905394719e-05, "loss": 0.0528, "step": 6430 }, { "epoch": 1.7414818820984315, "grad_norm": 0.30164146423339844, "learning_rate": 9.91045243455787e-05, "loss": 0.0517, "step": 6440 }, { "epoch": 1.744186046511628, "grad_norm": 0.22091379761695862, "learning_rate": 9.909932472055225e-05, "loss": 0.0522, "step": 6450 }, { "epoch": 1.7468902109248243, "grad_norm": 0.3502056300640106, "learning_rate": 9.909411018044734e-05, "loss": 0.0518, "step": 6460 }, { "epoch": 1.7495943753380205, "grad_norm": 0.26457202434539795, "learning_rate": 9.908888072684802e-05, "loss": 0.0527, "step": 6470 }, { "epoch": 1.7522985397512167, "grad_norm": 0.28929662704467773, "learning_rate": 9.908363636134285e-05, "loss": 0.0524, "step": 6480 }, { "epoch": 1.7550027041644132, "grad_norm": 0.3395238518714905, "learning_rate": 9.907837708552493e-05, "loss": 0.0525, "step": 6490 }, { "epoch": 1.7577068685776096, "grad_norm": 0.2769411504268646, "learning_rate": 9.90731029009919e-05, "loss": 0.0541, "step": 6500 }, { "epoch": 1.7604110329908058, "grad_norm": 0.5995008945465088, "learning_rate": 9.906781380934589e-05, "loss": 0.0542, "step": 6510 }, { "epoch": 1.763115197404002, "grad_norm": 0.27053532004356384, "learning_rate": 9.906250981219362e-05, "loss": 0.0533, "step": 6520 }, { "epoch": 1.7658193618171985, "grad_norm": 0.37482622265815735, "learning_rate": 9.905719091114628e-05, "loss": 0.0528, "step": 6530 }, { "epoch": 1.7685235262303949, "grad_norm": 0.24488064646720886, "learning_rate": 9.905185710781964e-05, "loss": 0.0518, "step": 6540 }, { "epoch": 1.771227690643591, "grad_norm": 0.27584993839263916, "learning_rate": 9.904650840383392e-05, "loss": 0.0508, "step": 6550 }, { "epoch": 1.7739318550567873, "grad_norm": 0.16720294952392578, "learning_rate": 9.904114480081397e-05, "loss": 0.0511, "step": 6560 }, { "epoch": 1.7766360194699837, "grad_norm": 0.2972680330276489, "learning_rate": 9.903576630038906e-05, "loss": 0.05, "step": 6570 }, { "epoch": 1.7793401838831802, "grad_norm": 0.2974444627761841, "learning_rate": 9.903037290419309e-05, "loss": 0.0498, "step": 6580 }, { "epoch": 1.7820443482963764, "grad_norm": 0.21884813904762268, "learning_rate": 9.902496461386439e-05, "loss": 0.0508, "step": 6590 }, { "epoch": 1.7847485127095726, "grad_norm": 0.23984703421592712, "learning_rate": 9.901954143104588e-05, "loss": 0.0507, "step": 6600 }, { "epoch": 1.787452677122769, "grad_norm": 0.32661211490631104, "learning_rate": 9.901410335738496e-05, "loss": 0.0503, "step": 6610 }, { "epoch": 1.7901568415359654, "grad_norm": 0.2617020308971405, "learning_rate": 9.900865039453358e-05, "loss": 0.0503, "step": 6620 }, { "epoch": 1.7928610059491619, "grad_norm": 0.4504227340221405, "learning_rate": 9.900318254414821e-05, "loss": 0.0532, "step": 6630 }, { "epoch": 1.795565170362358, "grad_norm": 0.38664737343788147, "learning_rate": 9.899769980788985e-05, "loss": 0.0514, "step": 6640 }, { "epoch": 1.7982693347755543, "grad_norm": 0.30254384875297546, "learning_rate": 9.899220218742398e-05, "loss": 0.0534, "step": 6650 }, { "epoch": 1.8009734991887507, "grad_norm": 0.31658926606178284, "learning_rate": 9.898668968442066e-05, "loss": 0.0529, "step": 6660 }, { "epoch": 1.8036776636019471, "grad_norm": 0.3325766921043396, "learning_rate": 9.898116230055443e-05, "loss": 0.0523, "step": 6670 }, { "epoch": 1.8063818280151434, "grad_norm": 0.3279995024204254, "learning_rate": 9.897562003750437e-05, "loss": 0.0525, "step": 6680 }, { "epoch": 1.8090859924283396, "grad_norm": 0.3201708197593689, "learning_rate": 9.897006289695407e-05, "loss": 0.0519, "step": 6690 }, { "epoch": 1.811790156841536, "grad_norm": 0.3361349403858185, "learning_rate": 9.896449088059164e-05, "loss": 0.0519, "step": 6700 }, { "epoch": 1.8144943212547324, "grad_norm": 0.35376909375190735, "learning_rate": 9.89589039901097e-05, "loss": 0.0524, "step": 6710 }, { "epoch": 1.8171984856679286, "grad_norm": 0.3019815981388092, "learning_rate": 9.895330222720542e-05, "loss": 0.0533, "step": 6720 }, { "epoch": 1.8199026500811248, "grad_norm": 0.37577158212661743, "learning_rate": 9.894768559358047e-05, "loss": 0.0513, "step": 6730 }, { "epoch": 1.8226068144943213, "grad_norm": 0.44600608944892883, "learning_rate": 9.894205409094101e-05, "loss": 0.0535, "step": 6740 }, { "epoch": 1.8253109789075177, "grad_norm": 0.3268283009529114, "learning_rate": 9.893640772099777e-05, "loss": 0.052, "step": 6750 }, { "epoch": 1.828015143320714, "grad_norm": 0.2828831672668457, "learning_rate": 9.893074648546595e-05, "loss": 0.051, "step": 6760 }, { "epoch": 1.8307193077339101, "grad_norm": 0.3485679030418396, "learning_rate": 9.892507038606528e-05, "loss": 0.0503, "step": 6770 }, { "epoch": 1.8334234721471065, "grad_norm": 0.196170374751091, "learning_rate": 9.891937942452003e-05, "loss": 0.0532, "step": 6780 }, { "epoch": 1.836127636560303, "grad_norm": 0.35622355341911316, "learning_rate": 9.891367360255895e-05, "loss": 0.0505, "step": 6790 }, { "epoch": 1.8388318009734992, "grad_norm": 0.32230260968208313, "learning_rate": 9.890795292191532e-05, "loss": 0.0521, "step": 6800 }, { "epoch": 1.8415359653866954, "grad_norm": 0.2837609648704529, "learning_rate": 9.890221738432694e-05, "loss": 0.0511, "step": 6810 }, { "epoch": 1.8442401297998918, "grad_norm": 0.2789319157600403, "learning_rate": 9.88964669915361e-05, "loss": 0.0515, "step": 6820 }, { "epoch": 1.8469442942130883, "grad_norm": 0.18869704008102417, "learning_rate": 9.889070174528963e-05, "loss": 0.0515, "step": 6830 }, { "epoch": 1.8496484586262845, "grad_norm": 0.3077528476715088, "learning_rate": 9.888492164733883e-05, "loss": 0.0526, "step": 6840 }, { "epoch": 1.8523526230394807, "grad_norm": 0.30399614572525024, "learning_rate": 9.88791266994396e-05, "loss": 0.0526, "step": 6850 }, { "epoch": 1.855056787452677, "grad_norm": 0.15232491493225098, "learning_rate": 9.887331690335223e-05, "loss": 0.0504, "step": 6860 }, { "epoch": 1.8577609518658735, "grad_norm": 0.34954482316970825, "learning_rate": 9.886749226084163e-05, "loss": 0.0539, "step": 6870 }, { "epoch": 1.8604651162790697, "grad_norm": 0.2800314128398895, "learning_rate": 9.886165277367714e-05, "loss": 0.0523, "step": 6880 }, { "epoch": 1.863169280692266, "grad_norm": 0.30027344822883606, "learning_rate": 9.885579844363265e-05, "loss": 0.0484, "step": 6890 }, { "epoch": 1.8658734451054624, "grad_norm": 0.28449922800064087, "learning_rate": 9.884992927248656e-05, "loss": 0.0505, "step": 6900 }, { "epoch": 1.8685776095186588, "grad_norm": 0.3688165247440338, "learning_rate": 9.884404526202178e-05, "loss": 0.0514, "step": 6910 }, { "epoch": 1.871281773931855, "grad_norm": 0.35433128476142883, "learning_rate": 9.883814641402568e-05, "loss": 0.053, "step": 6920 }, { "epoch": 1.8739859383450512, "grad_norm": 0.228667750954628, "learning_rate": 9.88322327302902e-05, "loss": 0.0497, "step": 6930 }, { "epoch": 1.8766901027582477, "grad_norm": 0.2035161554813385, "learning_rate": 9.882630421261176e-05, "loss": 0.0515, "step": 6940 }, { "epoch": 1.879394267171444, "grad_norm": 0.26619842648506165, "learning_rate": 9.88203608627913e-05, "loss": 0.0517, "step": 6950 }, { "epoch": 1.8820984315846403, "grad_norm": 0.21465055644512177, "learning_rate": 9.881440268263422e-05, "loss": 0.0489, "step": 6960 }, { "epoch": 1.8848025959978365, "grad_norm": 0.2729269564151764, "learning_rate": 9.880842967395048e-05, "loss": 0.0489, "step": 6970 }, { "epoch": 1.887506760411033, "grad_norm": 0.3845404088497162, "learning_rate": 9.880244183855452e-05, "loss": 0.0501, "step": 6980 }, { "epoch": 1.8902109248242294, "grad_norm": 0.26533985137939453, "learning_rate": 9.879643917826527e-05, "loss": 0.0516, "step": 6990 }, { "epoch": 1.8929150892374258, "grad_norm": 0.2410067915916443, "learning_rate": 9.87904216949062e-05, "loss": 0.0504, "step": 7000 }, { "epoch": 1.895619253650622, "grad_norm": 0.2037399262189865, "learning_rate": 9.878438939030526e-05, "loss": 0.0507, "step": 7010 }, { "epoch": 1.8983234180638182, "grad_norm": 0.32480940222740173, "learning_rate": 9.877834226629489e-05, "loss": 0.0497, "step": 7020 }, { "epoch": 1.9010275824770146, "grad_norm": 0.2992870807647705, "learning_rate": 9.877228032471206e-05, "loss": 0.0502, "step": 7030 }, { "epoch": 1.903731746890211, "grad_norm": 0.3398742079734802, "learning_rate": 9.876620356739823e-05, "loss": 0.0481, "step": 7040 }, { "epoch": 1.9064359113034073, "grad_norm": 0.23919132351875305, "learning_rate": 9.876011199619935e-05, "loss": 0.05, "step": 7050 }, { "epoch": 1.9091400757166035, "grad_norm": 0.23394091427326202, "learning_rate": 9.875400561296589e-05, "loss": 0.0508, "step": 7060 }, { "epoch": 1.9118442401298, "grad_norm": 0.32539817690849304, "learning_rate": 9.874788441955278e-05, "loss": 0.0514, "step": 7070 }, { "epoch": 1.9145484045429964, "grad_norm": 0.16950073838233948, "learning_rate": 9.874174841781951e-05, "loss": 0.0501, "step": 7080 }, { "epoch": 1.9172525689561926, "grad_norm": 0.15818078815937042, "learning_rate": 9.873559760963003e-05, "loss": 0.051, "step": 7090 }, { "epoch": 1.9199567333693888, "grad_norm": 0.2716408967971802, "learning_rate": 9.872943199685278e-05, "loss": 0.0512, "step": 7100 }, { "epoch": 1.9226608977825852, "grad_norm": 0.25466594099998474, "learning_rate": 9.872325158136071e-05, "loss": 0.0517, "step": 7110 }, { "epoch": 1.9253650621957816, "grad_norm": 0.15985716879367828, "learning_rate": 9.871705636503128e-05, "loss": 0.0513, "step": 7120 }, { "epoch": 1.9280692266089778, "grad_norm": 0.15858811140060425, "learning_rate": 9.871084634974641e-05, "loss": 0.0505, "step": 7130 }, { "epoch": 1.930773391022174, "grad_norm": 0.15513557195663452, "learning_rate": 9.870462153739257e-05, "loss": 0.0507, "step": 7140 }, { "epoch": 1.9334775554353705, "grad_norm": 0.2950281500816345, "learning_rate": 9.869838192986067e-05, "loss": 0.049, "step": 7150 }, { "epoch": 1.936181719848567, "grad_norm": 0.2628280222415924, "learning_rate": 9.869212752904616e-05, "loss": 0.0511, "step": 7160 }, { "epoch": 1.9388858842617631, "grad_norm": 0.48395299911499023, "learning_rate": 9.868585833684894e-05, "loss": 0.049, "step": 7170 }, { "epoch": 1.9415900486749593, "grad_norm": 0.2836771309375763, "learning_rate": 9.867957435517342e-05, "loss": 0.05, "step": 7180 }, { "epoch": 1.9442942130881558, "grad_norm": 0.35829558968544006, "learning_rate": 9.867327558592854e-05, "loss": 0.0506, "step": 7190 }, { "epoch": 1.9469983775013522, "grad_norm": 0.2351144552230835, "learning_rate": 9.866696203102766e-05, "loss": 0.0488, "step": 7200 }, { "epoch": 1.9497025419145484, "grad_norm": 0.27173542976379395, "learning_rate": 9.86606336923887e-05, "loss": 0.0491, "step": 7210 }, { "epoch": 1.9524067063277446, "grad_norm": 0.296769380569458, "learning_rate": 9.865429057193403e-05, "loss": 0.0503, "step": 7220 }, { "epoch": 1.955110870740941, "grad_norm": 0.20839665830135345, "learning_rate": 9.864793267159053e-05, "loss": 0.0509, "step": 7230 }, { "epoch": 1.9578150351541375, "grad_norm": 0.20044784247875214, "learning_rate": 9.864155999328957e-05, "loss": 0.0491, "step": 7240 }, { "epoch": 1.9605191995673337, "grad_norm": 0.16277696192264557, "learning_rate": 9.8635172538967e-05, "loss": 0.0501, "step": 7250 }, { "epoch": 1.9632233639805299, "grad_norm": 0.26536065340042114, "learning_rate": 9.862877031056312e-05, "loss": 0.0494, "step": 7260 }, { "epoch": 1.9659275283937263, "grad_norm": 0.3114403486251831, "learning_rate": 9.862235331002279e-05, "loss": 0.0495, "step": 7270 }, { "epoch": 1.9686316928069227, "grad_norm": 0.28731831908226013, "learning_rate": 9.861592153929533e-05, "loss": 0.0507, "step": 7280 }, { "epoch": 1.971335857220119, "grad_norm": 0.2577991485595703, "learning_rate": 9.860947500033455e-05, "loss": 0.0496, "step": 7290 }, { "epoch": 1.9740400216333152, "grad_norm": 0.22793656587600708, "learning_rate": 9.86030136950987e-05, "loss": 0.0503, "step": 7300 }, { "epoch": 1.9767441860465116, "grad_norm": 0.27249854803085327, "learning_rate": 9.85965376255506e-05, "loss": 0.0495, "step": 7310 }, { "epoch": 1.979448350459708, "grad_norm": 0.22351133823394775, "learning_rate": 9.859004679365747e-05, "loss": 0.0493, "step": 7320 }, { "epoch": 1.9821525148729042, "grad_norm": 0.27374574542045593, "learning_rate": 9.858354120139108e-05, "loss": 0.05, "step": 7330 }, { "epoch": 1.9848566792861004, "grad_norm": 0.2748188376426697, "learning_rate": 9.857702085072764e-05, "loss": 0.0523, "step": 7340 }, { "epoch": 1.9875608436992969, "grad_norm": 0.25111523270606995, "learning_rate": 9.857048574364787e-05, "loss": 0.0514, "step": 7350 }, { "epoch": 1.9902650081124933, "grad_norm": 0.23515571653842926, "learning_rate": 9.856393588213698e-05, "loss": 0.0496, "step": 7360 }, { "epoch": 1.9929691725256897, "grad_norm": 0.2397712618112564, "learning_rate": 9.855737126818458e-05, "loss": 0.0501, "step": 7370 }, { "epoch": 1.995673336938886, "grad_norm": 0.2327512800693512, "learning_rate": 9.855079190378491e-05, "loss": 0.0511, "step": 7380 }, { "epoch": 1.9983775013520821, "grad_norm": 0.20452557504177094, "learning_rate": 9.854419779093655e-05, "loss": 0.0494, "step": 7390 }, { "epoch": 2.0010816657652786, "grad_norm": 0.20018339157104492, "learning_rate": 9.853758893164264e-05, "loss": 0.0516, "step": 7400 }, { "epoch": 2.003785830178475, "grad_norm": 0.24505144357681274, "learning_rate": 9.853096532791078e-05, "loss": 0.0493, "step": 7410 }, { "epoch": 2.006489994591671, "grad_norm": 0.2252325564622879, "learning_rate": 9.852432698175304e-05, "loss": 0.0513, "step": 7420 }, { "epoch": 2.0091941590048674, "grad_norm": 0.3194548487663269, "learning_rate": 9.851767389518597e-05, "loss": 0.0495, "step": 7430 }, { "epoch": 2.011898323418064, "grad_norm": 0.25908708572387695, "learning_rate": 9.85110060702306e-05, "loss": 0.0512, "step": 7440 }, { "epoch": 2.0146024878312603, "grad_norm": 0.2628355622291565, "learning_rate": 9.850432350891245e-05, "loss": 0.0504, "step": 7450 }, { "epoch": 2.0173066522444563, "grad_norm": 0.27904942631721497, "learning_rate": 9.84976262132615e-05, "loss": 0.0502, "step": 7460 }, { "epoch": 2.0200108166576527, "grad_norm": 0.25348401069641113, "learning_rate": 9.849091418531222e-05, "loss": 0.0513, "step": 7470 }, { "epoch": 2.022714981070849, "grad_norm": 0.4108212888240814, "learning_rate": 9.848418742710353e-05, "loss": 0.0507, "step": 7480 }, { "epoch": 2.0254191454840456, "grad_norm": 0.3624875843524933, "learning_rate": 9.847744594067885e-05, "loss": 0.0505, "step": 7490 }, { "epoch": 2.0281233098972415, "grad_norm": 0.29782694578170776, "learning_rate": 9.847068972808607e-05, "loss": 0.0492, "step": 7500 }, { "epoch": 2.030827474310438, "grad_norm": 0.2468615621328354, "learning_rate": 9.846391879137756e-05, "loss": 0.0524, "step": 7510 }, { "epoch": 2.0335316387236344, "grad_norm": 0.280664324760437, "learning_rate": 9.845713313261012e-05, "loss": 0.0488, "step": 7520 }, { "epoch": 2.036235803136831, "grad_norm": 0.18010568618774414, "learning_rate": 9.845033275384505e-05, "loss": 0.0507, "step": 7530 }, { "epoch": 2.038939967550027, "grad_norm": 0.2790665924549103, "learning_rate": 9.844351765714818e-05, "loss": 0.0523, "step": 7540 }, { "epoch": 2.0416441319632233, "grad_norm": 0.22416500747203827, "learning_rate": 9.843668784458971e-05, "loss": 0.0513, "step": 7550 }, { "epoch": 2.0443482963764197, "grad_norm": 0.1641562134027481, "learning_rate": 9.842984331824437e-05, "loss": 0.05, "step": 7560 }, { "epoch": 2.047052460789616, "grad_norm": 0.18079370260238647, "learning_rate": 9.842298408019133e-05, "loss": 0.0497, "step": 7570 }, { "epoch": 2.0497566252028125, "grad_norm": 0.2492232620716095, "learning_rate": 9.841611013251429e-05, "loss": 0.0491, "step": 7580 }, { "epoch": 2.0524607896160085, "grad_norm": 0.29709431529045105, "learning_rate": 9.840922147730133e-05, "loss": 0.0496, "step": 7590 }, { "epoch": 2.055164954029205, "grad_norm": 0.1775466799736023, "learning_rate": 9.840231811664506e-05, "loss": 0.0509, "step": 7600 }, { "epoch": 2.0578691184424014, "grad_norm": 0.25056734681129456, "learning_rate": 9.839540005264252e-05, "loss": 0.0495, "step": 7610 }, { "epoch": 2.060573282855598, "grad_norm": 0.24817520380020142, "learning_rate": 9.838846728739527e-05, "loss": 0.0506, "step": 7620 }, { "epoch": 2.063277447268794, "grad_norm": 0.2927175760269165, "learning_rate": 9.838151982300927e-05, "loss": 0.0497, "step": 7630 }, { "epoch": 2.0659816116819902, "grad_norm": 0.30519163608551025, "learning_rate": 9.8374557661595e-05, "loss": 0.0509, "step": 7640 }, { "epoch": 2.0686857760951867, "grad_norm": 0.27457305788993835, "learning_rate": 9.836758080526735e-05, "loss": 0.0513, "step": 7650 }, { "epoch": 2.071389940508383, "grad_norm": 0.16695399582386017, "learning_rate": 9.836058925614575e-05, "loss": 0.047, "step": 7660 }, { "epoch": 2.074094104921579, "grad_norm": 0.21578854322433472, "learning_rate": 9.8353583016354e-05, "loss": 0.0498, "step": 7670 }, { "epoch": 2.0767982693347755, "grad_norm": 0.2516796886920929, "learning_rate": 9.834656208802044e-05, "loss": 0.0497, "step": 7680 }, { "epoch": 2.079502433747972, "grad_norm": 0.19031840562820435, "learning_rate": 9.833952647327784e-05, "loss": 0.0498, "step": 7690 }, { "epoch": 2.0822065981611684, "grad_norm": 0.2465708702802658, "learning_rate": 9.833247617426342e-05, "loss": 0.0502, "step": 7700 }, { "epoch": 2.0849107625743644, "grad_norm": 0.17530310153961182, "learning_rate": 9.832541119311889e-05, "loss": 0.0489, "step": 7710 }, { "epoch": 2.087614926987561, "grad_norm": 0.20325636863708496, "learning_rate": 9.83183315319904e-05, "loss": 0.0489, "step": 7720 }, { "epoch": 2.0903190914007572, "grad_norm": 0.2008994221687317, "learning_rate": 9.831123719302855e-05, "loss": 0.0474, "step": 7730 }, { "epoch": 2.0930232558139537, "grad_norm": 0.18584032356739044, "learning_rate": 9.830412817838842e-05, "loss": 0.0496, "step": 7740 }, { "epoch": 2.0957274202271496, "grad_norm": 0.18594348430633545, "learning_rate": 9.829700449022956e-05, "loss": 0.0511, "step": 7750 }, { "epoch": 2.098431584640346, "grad_norm": 0.23666128516197205, "learning_rate": 9.828986613071593e-05, "loss": 0.0491, "step": 7760 }, { "epoch": 2.1011357490535425, "grad_norm": 0.1922609955072403, "learning_rate": 9.828271310201601e-05, "loss": 0.05, "step": 7770 }, { "epoch": 2.103839913466739, "grad_norm": 0.22949478030204773, "learning_rate": 9.827554540630268e-05, "loss": 0.0501, "step": 7780 }, { "epoch": 2.106544077879935, "grad_norm": 0.20877620577812195, "learning_rate": 9.826836304575329e-05, "loss": 0.0507, "step": 7790 }, { "epoch": 2.1092482422931313, "grad_norm": 0.14579303562641144, "learning_rate": 9.826116602254966e-05, "loss": 0.0487, "step": 7800 }, { "epoch": 2.111952406706328, "grad_norm": 0.3156311810016632, "learning_rate": 9.825395433887805e-05, "loss": 0.049, "step": 7810 }, { "epoch": 2.114656571119524, "grad_norm": 0.2005610167980194, "learning_rate": 9.824672799692917e-05, "loss": 0.0484, "step": 7820 }, { "epoch": 2.11736073553272, "grad_norm": 0.28255030512809753, "learning_rate": 9.823948699889823e-05, "loss": 0.0493, "step": 7830 }, { "epoch": 2.1200648999459166, "grad_norm": 0.17374461889266968, "learning_rate": 9.823223134698483e-05, "loss": 0.0492, "step": 7840 }, { "epoch": 2.122769064359113, "grad_norm": 0.22803068161010742, "learning_rate": 9.822496104339303e-05, "loss": 0.0489, "step": 7850 }, { "epoch": 2.1254732287723095, "grad_norm": 0.28188711404800415, "learning_rate": 9.821767609033138e-05, "loss": 0.0506, "step": 7860 }, { "epoch": 2.1281773931855055, "grad_norm": 0.2666110098361969, "learning_rate": 9.821037649001284e-05, "loss": 0.0501, "step": 7870 }, { "epoch": 2.130881557598702, "grad_norm": 0.2911272346973419, "learning_rate": 9.820306224465486e-05, "loss": 0.0513, "step": 7880 }, { "epoch": 2.1335857220118983, "grad_norm": 0.3622305393218994, "learning_rate": 9.819573335647928e-05, "loss": 0.0495, "step": 7890 }, { "epoch": 2.1362898864250948, "grad_norm": 0.34167227149009705, "learning_rate": 9.818838982771246e-05, "loss": 0.0527, "step": 7900 }, { "epoch": 2.138994050838291, "grad_norm": 0.26542213559150696, "learning_rate": 9.818103166058514e-05, "loss": 0.0532, "step": 7910 }, { "epoch": 2.141698215251487, "grad_norm": 0.16277378797531128, "learning_rate": 9.817365885733254e-05, "loss": 0.0508, "step": 7920 }, { "epoch": 2.1444023796646836, "grad_norm": 0.20945590734481812, "learning_rate": 9.816627142019434e-05, "loss": 0.0519, "step": 7930 }, { "epoch": 2.14710654407788, "grad_norm": 0.25896215438842773, "learning_rate": 9.815886935141463e-05, "loss": 0.0507, "step": 7940 }, { "epoch": 2.149810708491076, "grad_norm": 0.18288464844226837, "learning_rate": 9.8151452653242e-05, "loss": 0.0501, "step": 7950 }, { "epoch": 2.1525148729042725, "grad_norm": 0.26696059107780457, "learning_rate": 9.814402132792939e-05, "loss": 0.0468, "step": 7960 }, { "epoch": 2.155219037317469, "grad_norm": 0.27333739399909973, "learning_rate": 9.813657537773428e-05, "loss": 0.0489, "step": 7970 }, { "epoch": 2.1579232017306653, "grad_norm": 0.24400590360164642, "learning_rate": 9.812911480491854e-05, "loss": 0.0491, "step": 7980 }, { "epoch": 2.1606273661438617, "grad_norm": 0.2748092710971832, "learning_rate": 9.81216396117485e-05, "loss": 0.0498, "step": 7990 }, { "epoch": 2.1633315305570577, "grad_norm": 0.23992888629436493, "learning_rate": 9.811414980049491e-05, "loss": 0.049, "step": 8000 }, { "epoch": 2.166035694970254, "grad_norm": 0.23095905780792236, "learning_rate": 9.810664537343301e-05, "loss": 0.0494, "step": 8010 }, { "epoch": 2.1687398593834506, "grad_norm": 0.19222372770309448, "learning_rate": 9.809912633284243e-05, "loss": 0.0485, "step": 8020 }, { "epoch": 2.171444023796647, "grad_norm": 0.22846442461013794, "learning_rate": 9.809159268100725e-05, "loss": 0.0498, "step": 8030 }, { "epoch": 2.174148188209843, "grad_norm": 0.17772817611694336, "learning_rate": 9.808404442021599e-05, "loss": 0.0485, "step": 8040 }, { "epoch": 2.1768523526230394, "grad_norm": 0.17437703907489777, "learning_rate": 9.807648155276163e-05, "loss": 0.0498, "step": 8050 }, { "epoch": 2.179556517036236, "grad_norm": 0.2420763224363327, "learning_rate": 9.806890408094156e-05, "loss": 0.0485, "step": 8060 }, { "epoch": 2.1822606814494323, "grad_norm": 0.2132657915353775, "learning_rate": 9.806131200705761e-05, "loss": 0.05, "step": 8070 }, { "epoch": 2.1849648458626283, "grad_norm": 0.2739064693450928, "learning_rate": 9.805370533341605e-05, "loss": 0.0495, "step": 8080 }, { "epoch": 2.1876690102758247, "grad_norm": 0.3580254912376404, "learning_rate": 9.804608406232762e-05, "loss": 0.0498, "step": 8090 }, { "epoch": 2.190373174689021, "grad_norm": 0.22566597163677216, "learning_rate": 9.803844819610741e-05, "loss": 0.0499, "step": 8100 }, { "epoch": 2.1930773391022176, "grad_norm": 0.36828088760375977, "learning_rate": 9.803079773707504e-05, "loss": 0.0495, "step": 8110 }, { "epoch": 2.1957815035154136, "grad_norm": 0.32329487800598145, "learning_rate": 9.802313268755447e-05, "loss": 0.048, "step": 8120 }, { "epoch": 2.19848566792861, "grad_norm": 0.25603777170181274, "learning_rate": 9.801545304987419e-05, "loss": 0.0491, "step": 8130 }, { "epoch": 2.2011898323418064, "grad_norm": 0.1929834932088852, "learning_rate": 9.800775882636704e-05, "loss": 0.0504, "step": 8140 }, { "epoch": 2.203893996755003, "grad_norm": 0.2729528248310089, "learning_rate": 9.800005001937034e-05, "loss": 0.0514, "step": 8150 }, { "epoch": 2.206598161168199, "grad_norm": 0.2685270607471466, "learning_rate": 9.79923266312258e-05, "loss": 0.048, "step": 8160 }, { "epoch": 2.2093023255813953, "grad_norm": 0.25711190700531006, "learning_rate": 9.79845886642796e-05, "loss": 0.0501, "step": 8170 }, { "epoch": 2.2120064899945917, "grad_norm": 0.20491743087768555, "learning_rate": 9.797683612088233e-05, "loss": 0.0483, "step": 8180 }, { "epoch": 2.214710654407788, "grad_norm": 0.1737980991601944, "learning_rate": 9.796906900338898e-05, "loss": 0.0491, "step": 8190 }, { "epoch": 2.217414818820984, "grad_norm": 0.3079661428928375, "learning_rate": 9.796128731415903e-05, "loss": 0.0507, "step": 8200 }, { "epoch": 2.2201189832341806, "grad_norm": 0.24802948534488678, "learning_rate": 9.795349105555634e-05, "loss": 0.0486, "step": 8210 }, { "epoch": 2.222823147647377, "grad_norm": 0.24384374916553497, "learning_rate": 9.794568022994922e-05, "loss": 0.0484, "step": 8220 }, { "epoch": 2.2255273120605734, "grad_norm": 0.2087869942188263, "learning_rate": 9.793785483971034e-05, "loss": 0.0487, "step": 8230 }, { "epoch": 2.2282314764737694, "grad_norm": 0.24070078134536743, "learning_rate": 9.793001488721691e-05, "loss": 0.0483, "step": 8240 }, { "epoch": 2.230935640886966, "grad_norm": 0.2175302356481552, "learning_rate": 9.792216037485047e-05, "loss": 0.0489, "step": 8250 }, { "epoch": 2.2336398053001623, "grad_norm": 0.15270985662937164, "learning_rate": 9.791429130499704e-05, "loss": 0.0483, "step": 8260 }, { "epoch": 2.2363439697133587, "grad_norm": 0.15660075843334198, "learning_rate": 9.790640768004698e-05, "loss": 0.0473, "step": 8270 }, { "epoch": 2.2390481341265547, "grad_norm": 0.19323284924030304, "learning_rate": 9.789850950239518e-05, "loss": 0.0477, "step": 8280 }, { "epoch": 2.241752298539751, "grad_norm": 0.20820297300815582, "learning_rate": 9.789059677444089e-05, "loss": 0.0487, "step": 8290 }, { "epoch": 2.2444564629529475, "grad_norm": 0.17180880904197693, "learning_rate": 9.788266949858776e-05, "loss": 0.0498, "step": 8300 }, { "epoch": 2.247160627366144, "grad_norm": 0.1922573745250702, "learning_rate": 9.787472767724392e-05, "loss": 0.0478, "step": 8310 }, { "epoch": 2.2498647917793404, "grad_norm": 0.16918568313121796, "learning_rate": 9.786677131282185e-05, "loss": 0.0484, "step": 8320 }, { "epoch": 2.2525689561925364, "grad_norm": 0.1825493574142456, "learning_rate": 9.785880040773853e-05, "loss": 0.0496, "step": 8330 }, { "epoch": 2.255273120605733, "grad_norm": 0.2228158712387085, "learning_rate": 9.785081496441527e-05, "loss": 0.0504, "step": 8340 }, { "epoch": 2.2579772850189292, "grad_norm": 0.22824785113334656, "learning_rate": 9.784281498527785e-05, "loss": 0.0503, "step": 8350 }, { "epoch": 2.2606814494321252, "grad_norm": 0.2846182882785797, "learning_rate": 9.783480047275646e-05, "loss": 0.0485, "step": 8360 }, { "epoch": 2.2633856138453217, "grad_norm": 0.3031616508960724, "learning_rate": 9.78267714292857e-05, "loss": 0.0482, "step": 8370 }, { "epoch": 2.266089778258518, "grad_norm": 0.21307483315467834, "learning_rate": 9.781872785730454e-05, "loss": 0.0488, "step": 8380 }, { "epoch": 2.2687939426717145, "grad_norm": 0.17440864443778992, "learning_rate": 9.781066975925646e-05, "loss": 0.0493, "step": 8390 }, { "epoch": 2.271498107084911, "grad_norm": 0.11542808264493942, "learning_rate": 9.780259713758928e-05, "loss": 0.0494, "step": 8400 }, { "epoch": 2.274202271498107, "grad_norm": 0.18058975040912628, "learning_rate": 9.779450999475524e-05, "loss": 0.0501, "step": 8410 }, { "epoch": 2.2769064359113034, "grad_norm": 0.3479918837547302, "learning_rate": 9.7786408333211e-05, "loss": 0.0492, "step": 8420 }, { "epoch": 2.2796106003245, "grad_norm": 0.24835775792598724, "learning_rate": 9.777829215541764e-05, "loss": 0.0465, "step": 8430 }, { "epoch": 2.2823147647376962, "grad_norm": 0.2437605857849121, "learning_rate": 9.777016146384064e-05, "loss": 0.0483, "step": 8440 }, { "epoch": 2.285018929150892, "grad_norm": 0.23190438747406006, "learning_rate": 9.776201626094988e-05, "loss": 0.0497, "step": 8450 }, { "epoch": 2.2877230935640886, "grad_norm": 0.21501994132995605, "learning_rate": 9.775385654921965e-05, "loss": 0.0498, "step": 8460 }, { "epoch": 2.290427257977285, "grad_norm": 0.32258400321006775, "learning_rate": 9.774568233112868e-05, "loss": 0.0475, "step": 8470 }, { "epoch": 2.2931314223904815, "grad_norm": 0.31717681884765625, "learning_rate": 9.773749360916007e-05, "loss": 0.0493, "step": 8480 }, { "epoch": 2.2958355868036775, "grad_norm": 0.2963731288909912, "learning_rate": 9.772929038580134e-05, "loss": 0.0494, "step": 8490 }, { "epoch": 2.298539751216874, "grad_norm": 0.31850314140319824, "learning_rate": 9.772107266354439e-05, "loss": 0.0508, "step": 8500 }, { "epoch": 2.3012439156300704, "grad_norm": 0.1990332454442978, "learning_rate": 9.77128404448856e-05, "loss": 0.0503, "step": 8510 }, { "epoch": 2.303948080043267, "grad_norm": 0.3205680549144745, "learning_rate": 9.770459373232565e-05, "loss": 0.0487, "step": 8520 }, { "epoch": 2.3066522444564628, "grad_norm": 0.18185649812221527, "learning_rate": 9.769633252836969e-05, "loss": 0.0491, "step": 8530 }, { "epoch": 2.309356408869659, "grad_norm": 0.17397455871105194, "learning_rate": 9.768805683552724e-05, "loss": 0.0484, "step": 8540 }, { "epoch": 2.3120605732828556, "grad_norm": 0.24840068817138672, "learning_rate": 9.767976665631228e-05, "loss": 0.0479, "step": 8550 }, { "epoch": 2.314764737696052, "grad_norm": 0.2979651987552643, "learning_rate": 9.767146199324311e-05, "loss": 0.0488, "step": 8560 }, { "epoch": 2.317468902109248, "grad_norm": 0.23151536285877228, "learning_rate": 9.766314284884249e-05, "loss": 0.0501, "step": 8570 }, { "epoch": 2.3201730665224445, "grad_norm": 0.24742509424686432, "learning_rate": 9.765480922563752e-05, "loss": 0.0482, "step": 8580 }, { "epoch": 2.322877230935641, "grad_norm": 0.2530880570411682, "learning_rate": 9.764646112615978e-05, "loss": 0.049, "step": 8590 }, { "epoch": 2.3255813953488373, "grad_norm": 0.21231330931186676, "learning_rate": 9.763809855294517e-05, "loss": 0.0487, "step": 8600 }, { "epoch": 2.3282855597620333, "grad_norm": 0.2298901528120041, "learning_rate": 9.762972150853404e-05, "loss": 0.0497, "step": 8610 }, { "epoch": 2.3309897241752298, "grad_norm": 0.2858811020851135, "learning_rate": 9.762132999547111e-05, "loss": 0.0489, "step": 8620 }, { "epoch": 2.333693888588426, "grad_norm": 0.27100083231925964, "learning_rate": 9.761292401630549e-05, "loss": 0.0505, "step": 8630 }, { "epoch": 2.3363980530016226, "grad_norm": 0.18462030589580536, "learning_rate": 9.76045035735907e-05, "loss": 0.0481, "step": 8640 }, { "epoch": 2.339102217414819, "grad_norm": 0.2245539277791977, "learning_rate": 9.759606866988464e-05, "loss": 0.0492, "step": 8650 }, { "epoch": 2.341806381828015, "grad_norm": 0.21166552603244781, "learning_rate": 9.758761930774963e-05, "loss": 0.0501, "step": 8660 }, { "epoch": 2.3445105462412115, "grad_norm": 0.23070751130580902, "learning_rate": 9.757915548975235e-05, "loss": 0.0488, "step": 8670 }, { "epoch": 2.347214710654408, "grad_norm": 0.16982091963291168, "learning_rate": 9.757067721846389e-05, "loss": 0.0469, "step": 8680 }, { "epoch": 2.349918875067604, "grad_norm": 0.19245974719524384, "learning_rate": 9.756218449645971e-05, "loss": 0.0476, "step": 8690 }, { "epoch": 2.3526230394808003, "grad_norm": 0.25591012835502625, "learning_rate": 9.75536773263197e-05, "loss": 0.0501, "step": 8700 }, { "epoch": 2.3553272038939967, "grad_norm": 0.2564776837825775, "learning_rate": 9.75451557106281e-05, "loss": 0.0471, "step": 8710 }, { "epoch": 2.358031368307193, "grad_norm": 0.17444302141666412, "learning_rate": 9.753661965197354e-05, "loss": 0.0472, "step": 8720 }, { "epoch": 2.3607355327203896, "grad_norm": 0.21744784712791443, "learning_rate": 9.752806915294908e-05, "loss": 0.048, "step": 8730 }, { "epoch": 2.3634396971335856, "grad_norm": 0.13642534613609314, "learning_rate": 9.75195042161521e-05, "loss": 0.0478, "step": 8740 }, { "epoch": 2.366143861546782, "grad_norm": 0.27594295144081116, "learning_rate": 9.751092484418442e-05, "loss": 0.0486, "step": 8750 }, { "epoch": 2.3688480259599785, "grad_norm": 0.2247999906539917, "learning_rate": 9.750233103965224e-05, "loss": 0.0477, "step": 8760 }, { "epoch": 2.3715521903731744, "grad_norm": 0.2576276957988739, "learning_rate": 9.749372280516611e-05, "loss": 0.0489, "step": 8770 }, { "epoch": 2.374256354786371, "grad_norm": 0.2281554937362671, "learning_rate": 9.748510014334097e-05, "loss": 0.0487, "step": 8780 }, { "epoch": 2.3769605191995673, "grad_norm": 0.36352765560150146, "learning_rate": 9.747646305679621e-05, "loss": 0.0507, "step": 8790 }, { "epoch": 2.3796646836127637, "grad_norm": 0.30096879601478577, "learning_rate": 9.74678115481555e-05, "loss": 0.0499, "step": 8800 }, { "epoch": 2.38236884802596, "grad_norm": 0.3011406660079956, "learning_rate": 9.745914562004696e-05, "loss": 0.0468, "step": 8810 }, { "epoch": 2.385073012439156, "grad_norm": 0.19945856928825378, "learning_rate": 9.745046527510307e-05, "loss": 0.0508, "step": 8820 }, { "epoch": 2.3877771768523526, "grad_norm": 0.22636142373085022, "learning_rate": 9.744177051596068e-05, "loss": 0.0499, "step": 8830 }, { "epoch": 2.390481341265549, "grad_norm": 0.18788951635360718, "learning_rate": 9.743306134526105e-05, "loss": 0.0473, "step": 8840 }, { "epoch": 2.3931855056787454, "grad_norm": 0.1263677328824997, "learning_rate": 9.742433776564977e-05, "loss": 0.0485, "step": 8850 }, { "epoch": 2.3958896700919414, "grad_norm": 0.21245871484279633, "learning_rate": 9.741559977977683e-05, "loss": 0.0463, "step": 8860 }, { "epoch": 2.398593834505138, "grad_norm": 0.20890074968338013, "learning_rate": 9.740684739029661e-05, "loss": 0.0482, "step": 8870 }, { "epoch": 2.4012979989183343, "grad_norm": 0.17799268662929535, "learning_rate": 9.739808059986789e-05, "loss": 0.0482, "step": 8880 }, { "epoch": 2.4040021633315307, "grad_norm": 0.3019181191921234, "learning_rate": 9.738929941115373e-05, "loss": 0.0489, "step": 8890 }, { "epoch": 2.4067063277447267, "grad_norm": 0.22618448734283447, "learning_rate": 9.738050382682167e-05, "loss": 0.049, "step": 8900 }, { "epoch": 2.409410492157923, "grad_norm": 0.1964368373155594, "learning_rate": 9.737169384954355e-05, "loss": 0.0491, "step": 8910 }, { "epoch": 2.4121146565711196, "grad_norm": 0.23717163503170013, "learning_rate": 9.736286948199562e-05, "loss": 0.0486, "step": 8920 }, { "epoch": 2.414818820984316, "grad_norm": 0.31374600529670715, "learning_rate": 9.735403072685848e-05, "loss": 0.0495, "step": 8930 }, { "epoch": 2.417522985397512, "grad_norm": 0.16867473721504211, "learning_rate": 9.734517758681712e-05, "loss": 0.0487, "step": 8940 }, { "epoch": 2.4202271498107084, "grad_norm": 0.1565200835466385, "learning_rate": 9.733631006456088e-05, "loss": 0.0472, "step": 8950 }, { "epoch": 2.422931314223905, "grad_norm": 0.1897311508655548, "learning_rate": 9.732742816278348e-05, "loss": 0.0492, "step": 8960 }, { "epoch": 2.4256354786371013, "grad_norm": 0.17615115642547607, "learning_rate": 9.731853188418302e-05, "loss": 0.0478, "step": 8970 }, { "epoch": 2.4283396430502977, "grad_norm": 0.21769677102565765, "learning_rate": 9.730962123146194e-05, "loss": 0.0468, "step": 8980 }, { "epoch": 2.4310438074634937, "grad_norm": 0.2494925707578659, "learning_rate": 9.730069620732709e-05, "loss": 0.0472, "step": 8990 }, { "epoch": 2.43374797187669, "grad_norm": 0.17663145065307617, "learning_rate": 9.72917568144896e-05, "loss": 0.0473, "step": 9000 }, { "epoch": 2.4364521362898865, "grad_norm": 0.26080742478370667, "learning_rate": 9.728280305566509e-05, "loss": 0.0479, "step": 9010 }, { "epoch": 2.4391563007030825, "grad_norm": 0.25975146889686584, "learning_rate": 9.727383493357343e-05, "loss": 0.0472, "step": 9020 }, { "epoch": 2.441860465116279, "grad_norm": 0.1642393320798874, "learning_rate": 9.726485245093891e-05, "loss": 0.0471, "step": 9030 }, { "epoch": 2.4445646295294754, "grad_norm": 0.2557942271232605, "learning_rate": 9.725585561049018e-05, "loss": 0.0488, "step": 9040 }, { "epoch": 2.447268793942672, "grad_norm": 0.282954603433609, "learning_rate": 9.724684441496022e-05, "loss": 0.0473, "step": 9050 }, { "epoch": 2.4499729583558683, "grad_norm": 0.23700135946273804, "learning_rate": 9.72378188670864e-05, "loss": 0.0474, "step": 9060 }, { "epoch": 2.4526771227690642, "grad_norm": 0.21886256337165833, "learning_rate": 9.722877896961047e-05, "loss": 0.0484, "step": 9070 }, { "epoch": 2.4553812871822607, "grad_norm": 0.13453873991966248, "learning_rate": 9.721972472527848e-05, "loss": 0.0464, "step": 9080 }, { "epoch": 2.458085451595457, "grad_norm": 0.27012476325035095, "learning_rate": 9.721065613684089e-05, "loss": 0.0508, "step": 9090 }, { "epoch": 2.460789616008653, "grad_norm": 0.16179421544075012, "learning_rate": 9.72015732070525e-05, "loss": 0.0473, "step": 9100 }, { "epoch": 2.4634937804218495, "grad_norm": 0.19114063680171967, "learning_rate": 9.719247593867244e-05, "loss": 0.0482, "step": 9110 }, { "epoch": 2.466197944835046, "grad_norm": 0.22787757217884064, "learning_rate": 9.718336433446423e-05, "loss": 0.0475, "step": 9120 }, { "epoch": 2.4689021092482424, "grad_norm": 0.2097082883119583, "learning_rate": 9.717423839719574e-05, "loss": 0.0476, "step": 9130 }, { "epoch": 2.471606273661439, "grad_norm": 0.20373870432376862, "learning_rate": 9.71650981296392e-05, "loss": 0.049, "step": 9140 }, { "epoch": 2.474310438074635, "grad_norm": 0.22332663834095, "learning_rate": 9.715594353457118e-05, "loss": 0.049, "step": 9150 }, { "epoch": 2.4770146024878312, "grad_norm": 0.17904236912727356, "learning_rate": 9.714677461477257e-05, "loss": 0.0481, "step": 9160 }, { "epoch": 2.4797187669010277, "grad_norm": 0.19034627079963684, "learning_rate": 9.713759137302869e-05, "loss": 0.0477, "step": 9170 }, { "epoch": 2.482422931314224, "grad_norm": 0.2104157656431198, "learning_rate": 9.712839381212914e-05, "loss": 0.0478, "step": 9180 }, { "epoch": 2.48512709572742, "grad_norm": 0.23209746181964874, "learning_rate": 9.71191819348679e-05, "loss": 0.0491, "step": 9190 }, { "epoch": 2.4878312601406165, "grad_norm": 0.22740750014781952, "learning_rate": 9.710995574404331e-05, "loss": 0.0485, "step": 9200 }, { "epoch": 2.490535424553813, "grad_norm": 0.1666725128889084, "learning_rate": 9.710071524245802e-05, "loss": 0.0484, "step": 9210 }, { "epoch": 2.4932395889670094, "grad_norm": 0.25208553671836853, "learning_rate": 9.709146043291906e-05, "loss": 0.0477, "step": 9220 }, { "epoch": 2.4959437533802054, "grad_norm": 0.28831249475479126, "learning_rate": 9.70821913182378e-05, "loss": 0.048, "step": 9230 }, { "epoch": 2.498647917793402, "grad_norm": 0.19918251037597656, "learning_rate": 9.707290790122995e-05, "loss": 0.0478, "step": 9240 }, { "epoch": 2.501352082206598, "grad_norm": 0.1841312050819397, "learning_rate": 9.706361018471557e-05, "loss": 0.0476, "step": 9250 }, { "epoch": 2.5040562466197946, "grad_norm": 0.17819739878177643, "learning_rate": 9.705429817151906e-05, "loss": 0.0492, "step": 9260 }, { "epoch": 2.5067604110329906, "grad_norm": 0.202997624874115, "learning_rate": 9.704497186446917e-05, "loss": 0.0472, "step": 9270 }, { "epoch": 2.509464575446187, "grad_norm": 0.2790718972682953, "learning_rate": 9.703563126639896e-05, "loss": 0.048, "step": 9280 }, { "epoch": 2.5121687398593835, "grad_norm": 0.35147157311439514, "learning_rate": 9.70262763801459e-05, "loss": 0.0483, "step": 9290 }, { "epoch": 2.51487290427258, "grad_norm": 0.23612695932388306, "learning_rate": 9.701690720855171e-05, "loss": 0.0486, "step": 9300 }, { "epoch": 2.5175770686857764, "grad_norm": 0.2441575825214386, "learning_rate": 9.700752375446253e-05, "loss": 0.0456, "step": 9310 }, { "epoch": 2.5202812330989723, "grad_norm": 0.32952263951301575, "learning_rate": 9.69981260207288e-05, "loss": 0.0473, "step": 9320 }, { "epoch": 2.5229853975121688, "grad_norm": 0.22560228407382965, "learning_rate": 9.698871401020529e-05, "loss": 0.0463, "step": 9330 }, { "epoch": 2.525689561925365, "grad_norm": 0.14853590726852417, "learning_rate": 9.697928772575112e-05, "loss": 0.0465, "step": 9340 }, { "epoch": 2.528393726338561, "grad_norm": 0.2753220498561859, "learning_rate": 9.696984717022976e-05, "loss": 0.0488, "step": 9350 }, { "epoch": 2.5310978907517576, "grad_norm": 0.2112967073917389, "learning_rate": 9.6960392346509e-05, "loss": 0.0482, "step": 9360 }, { "epoch": 2.533802055164954, "grad_norm": 0.18856890499591827, "learning_rate": 9.695092325746097e-05, "loss": 0.0474, "step": 9370 }, { "epoch": 2.5365062195781505, "grad_norm": 0.15081307291984558, "learning_rate": 9.694143990596211e-05, "loss": 0.0478, "step": 9380 }, { "epoch": 2.539210383991347, "grad_norm": 0.273235946893692, "learning_rate": 9.693194229489325e-05, "loss": 0.0497, "step": 9390 }, { "epoch": 2.541914548404543, "grad_norm": 0.23067529499530792, "learning_rate": 9.692243042713944e-05, "loss": 0.0481, "step": 9400 }, { "epoch": 2.5446187128177393, "grad_norm": 0.22763043642044067, "learning_rate": 9.691290430559022e-05, "loss": 0.048, "step": 9410 }, { "epoch": 2.5473228772309358, "grad_norm": 0.21775884926319122, "learning_rate": 9.690336393313932e-05, "loss": 0.0481, "step": 9420 }, { "epoch": 2.5500270416441317, "grad_norm": 0.16271020472049713, "learning_rate": 9.689380931268487e-05, "loss": 0.0469, "step": 9430 }, { "epoch": 2.552731206057328, "grad_norm": 0.20809048414230347, "learning_rate": 9.688424044712932e-05, "loss": 0.0463, "step": 9440 }, { "epoch": 2.5554353704705246, "grad_norm": 0.1806309074163437, "learning_rate": 9.687465733937942e-05, "loss": 0.0464, "step": 9450 }, { "epoch": 2.558139534883721, "grad_norm": 0.34534764289855957, "learning_rate": 9.686505999234627e-05, "loss": 0.0474, "step": 9460 }, { "epoch": 2.5608436992969175, "grad_norm": 0.19025729596614838, "learning_rate": 9.685544840894529e-05, "loss": 0.0467, "step": 9470 }, { "epoch": 2.5635478637101135, "grad_norm": 0.24022570252418518, "learning_rate": 9.684582259209624e-05, "loss": 0.0466, "step": 9480 }, { "epoch": 2.56625202812331, "grad_norm": 0.2593885362148285, "learning_rate": 9.683618254472317e-05, "loss": 0.0497, "step": 9490 }, { "epoch": 2.5689561925365063, "grad_norm": 0.22005484998226166, "learning_rate": 9.682652826975449e-05, "loss": 0.0473, "step": 9500 }, { "epoch": 2.5716603569497023, "grad_norm": 0.3295865058898926, "learning_rate": 9.681685977012291e-05, "loss": 0.046, "step": 9510 }, { "epoch": 2.5743645213628987, "grad_norm": 0.23142056167125702, "learning_rate": 9.680717704876546e-05, "loss": 0.0488, "step": 9520 }, { "epoch": 2.577068685776095, "grad_norm": 0.2808122932910919, "learning_rate": 9.679748010862349e-05, "loss": 0.0489, "step": 9530 }, { "epoch": 2.5797728501892916, "grad_norm": 0.20329256355762482, "learning_rate": 9.678776895264267e-05, "loss": 0.047, "step": 9540 }, { "epoch": 2.582477014602488, "grad_norm": 0.2508153021335602, "learning_rate": 9.6778043583773e-05, "loss": 0.0474, "step": 9550 }, { "epoch": 2.585181179015684, "grad_norm": 0.19177280366420746, "learning_rate": 9.67683040049688e-05, "loss": 0.0481, "step": 9560 }, { "epoch": 2.5878853434288804, "grad_norm": 0.3510109782218933, "learning_rate": 9.675855021918869e-05, "loss": 0.0475, "step": 9570 }, { "epoch": 2.590589507842077, "grad_norm": 0.14584186673164368, "learning_rate": 9.674878222939561e-05, "loss": 0.0476, "step": 9580 }, { "epoch": 2.593293672255273, "grad_norm": 0.24244436621665955, "learning_rate": 9.673900003855681e-05, "loss": 0.047, "step": 9590 }, { "epoch": 2.5959978366684693, "grad_norm": 0.2084505558013916, "learning_rate": 9.672920364964389e-05, "loss": 0.0482, "step": 9600 }, { "epoch": 2.5987020010816657, "grad_norm": 0.23617130517959595, "learning_rate": 9.671939306563269e-05, "loss": 0.048, "step": 9610 }, { "epoch": 2.601406165494862, "grad_norm": 0.12454376369714737, "learning_rate": 9.670956828950345e-05, "loss": 0.0476, "step": 9620 }, { "epoch": 2.6041103299080586, "grad_norm": 0.1951734721660614, "learning_rate": 9.669972932424065e-05, "loss": 0.0484, "step": 9630 }, { "epoch": 2.606814494321255, "grad_norm": 0.29833516478538513, "learning_rate": 9.668987617283312e-05, "loss": 0.047, "step": 9640 }, { "epoch": 2.609518658734451, "grad_norm": 0.24379444122314453, "learning_rate": 9.668000883827397e-05, "loss": 0.0473, "step": 9650 }, { "epoch": 2.6122228231476474, "grad_norm": 0.16364289820194244, "learning_rate": 9.667012732356067e-05, "loss": 0.0486, "step": 9660 }, { "epoch": 2.614926987560844, "grad_norm": 0.21044355630874634, "learning_rate": 9.666023163169493e-05, "loss": 0.0478, "step": 9670 }, { "epoch": 2.61763115197404, "grad_norm": 0.19797709584236145, "learning_rate": 9.665032176568281e-05, "loss": 0.0465, "step": 9680 }, { "epoch": 2.6203353163872363, "grad_norm": 0.22840426862239838, "learning_rate": 9.664039772853469e-05, "loss": 0.0492, "step": 9690 }, { "epoch": 2.6230394808004327, "grad_norm": 0.27606457471847534, "learning_rate": 9.663045952326518e-05, "loss": 0.049, "step": 9700 }, { "epoch": 2.625743645213629, "grad_norm": 0.25470373034477234, "learning_rate": 9.662050715289328e-05, "loss": 0.0492, "step": 9710 }, { "epoch": 2.6284478096268256, "grad_norm": 0.25410252809524536, "learning_rate": 9.661054062044226e-05, "loss": 0.0472, "step": 9720 }, { "epoch": 2.6311519740400215, "grad_norm": 0.20059534907341003, "learning_rate": 9.660055992893968e-05, "loss": 0.0463, "step": 9730 }, { "epoch": 2.633856138453218, "grad_norm": 0.2522803544998169, "learning_rate": 9.659056508141739e-05, "loss": 0.0474, "step": 9740 }, { "epoch": 2.6365603028664144, "grad_norm": 0.14952300488948822, "learning_rate": 9.658055608091161e-05, "loss": 0.0449, "step": 9750 }, { "epoch": 2.6392644672796104, "grad_norm": 0.23719103634357452, "learning_rate": 9.657053293046276e-05, "loss": 0.048, "step": 9760 }, { "epoch": 2.641968631692807, "grad_norm": 0.1258242130279541, "learning_rate": 9.656049563311564e-05, "loss": 0.0465, "step": 9770 }, { "epoch": 2.6446727961060033, "grad_norm": 0.1882343590259552, "learning_rate": 9.655044419191929e-05, "loss": 0.0461, "step": 9780 }, { "epoch": 2.6473769605191997, "grad_norm": 0.20934177935123444, "learning_rate": 9.654037860992711e-05, "loss": 0.047, "step": 9790 }, { "epoch": 2.650081124932396, "grad_norm": 0.1653866022825241, "learning_rate": 9.653029889019672e-05, "loss": 0.047, "step": 9800 }, { "epoch": 2.652785289345592, "grad_norm": 0.20470452308654785, "learning_rate": 9.65202050357901e-05, "loss": 0.0478, "step": 9810 }, { "epoch": 2.6554894537587885, "grad_norm": 0.15337422490119934, "learning_rate": 9.651009704977347e-05, "loss": 0.0459, "step": 9820 }, { "epoch": 2.658193618171985, "grad_norm": 0.3103278577327728, "learning_rate": 9.649997493521738e-05, "loss": 0.0489, "step": 9830 }, { "epoch": 2.660897782585181, "grad_norm": 0.3075762987136841, "learning_rate": 9.64898386951967e-05, "loss": 0.0491, "step": 9840 }, { "epoch": 2.6636019469983774, "grad_norm": 0.14111517369747162, "learning_rate": 9.647968833279049e-05, "loss": 0.0447, "step": 9850 }, { "epoch": 2.666306111411574, "grad_norm": 0.359759122133255, "learning_rate": 9.646952385108218e-05, "loss": 0.0451, "step": 9860 }, { "epoch": 2.6690102758247702, "grad_norm": 0.13428820669651031, "learning_rate": 9.645934525315951e-05, "loss": 0.0461, "step": 9870 }, { "epoch": 2.6717144402379667, "grad_norm": 0.23881272971630096, "learning_rate": 9.644915254211442e-05, "loss": 0.0494, "step": 9880 }, { "epoch": 2.6744186046511627, "grad_norm": 0.2868209481239319, "learning_rate": 9.643894572104321e-05, "loss": 0.046, "step": 9890 }, { "epoch": 2.677122769064359, "grad_norm": 0.24132326245307922, "learning_rate": 9.642872479304644e-05, "loss": 0.0486, "step": 9900 }, { "epoch": 2.6798269334775555, "grad_norm": 0.3531332314014435, "learning_rate": 9.641848976122895e-05, "loss": 0.0459, "step": 9910 }, { "epoch": 2.6825310978907515, "grad_norm": 0.27202486991882324, "learning_rate": 9.64082406286999e-05, "loss": 0.0465, "step": 9920 }, { "epoch": 2.685235262303948, "grad_norm": 0.217219278216362, "learning_rate": 9.639797739857269e-05, "loss": 0.0454, "step": 9930 }, { "epoch": 2.6879394267171444, "grad_norm": 0.19453397393226624, "learning_rate": 9.638770007396498e-05, "loss": 0.0463, "step": 9940 }, { "epoch": 2.690643591130341, "grad_norm": 0.2640365660190582, "learning_rate": 9.63774086579988e-05, "loss": 0.0488, "step": 9950 }, { "epoch": 2.6933477555435372, "grad_norm": 0.29999539256095886, "learning_rate": 9.63671031538004e-05, "loss": 0.0487, "step": 9960 }, { "epoch": 2.696051919956733, "grad_norm": 0.1512221097946167, "learning_rate": 9.635678356450031e-05, "loss": 0.0478, "step": 9970 }, { "epoch": 2.6987560843699296, "grad_norm": 0.3010389804840088, "learning_rate": 9.634644989323336e-05, "loss": 0.0491, "step": 9980 }, { "epoch": 2.701460248783126, "grad_norm": 0.28978344798088074, "learning_rate": 9.633610214313861e-05, "loss": 0.046, "step": 9990 }, { "epoch": 2.7041644131963225, "grad_norm": 0.28232574462890625, "learning_rate": 9.632574031735951e-05, "loss": 0.0473, "step": 10000 }, { "epoch": 2.7068685776095185, "grad_norm": 0.2557836174964905, "learning_rate": 9.631536441904364e-05, "loss": 0.0465, "step": 10010 }, { "epoch": 2.709572742022715, "grad_norm": 0.24802182614803314, "learning_rate": 9.630497445134293e-05, "loss": 0.047, "step": 10020 }, { "epoch": 2.7122769064359114, "grad_norm": 0.2811444401741028, "learning_rate": 9.62945704174136e-05, "loss": 0.046, "step": 10030 }, { "epoch": 2.714981070849108, "grad_norm": 0.20042237639427185, "learning_rate": 9.628415232041612e-05, "loss": 0.0466, "step": 10040 }, { "epoch": 2.717685235262304, "grad_norm": 0.16273514926433563, "learning_rate": 9.627372016351524e-05, "loss": 0.0466, "step": 10050 }, { "epoch": 2.7203893996755, "grad_norm": 0.21864916384220123, "learning_rate": 9.626327394987995e-05, "loss": 0.0465, "step": 10060 }, { "epoch": 2.7230935640886966, "grad_norm": 0.26289522647857666, "learning_rate": 9.625281368268355e-05, "loss": 0.0464, "step": 10070 }, { "epoch": 2.725797728501893, "grad_norm": 0.16085687279701233, "learning_rate": 9.624233936510357e-05, "loss": 0.0485, "step": 10080 }, { "epoch": 2.728501892915089, "grad_norm": 0.17864176630973816, "learning_rate": 9.623185100032187e-05, "loss": 0.0485, "step": 10090 }, { "epoch": 2.7312060573282855, "grad_norm": 0.21371392905712128, "learning_rate": 9.62213485915245e-05, "loss": 0.0467, "step": 10100 }, { "epoch": 2.733910221741482, "grad_norm": 0.23600080609321594, "learning_rate": 9.621083214190186e-05, "loss": 0.047, "step": 10110 }, { "epoch": 2.7366143861546783, "grad_norm": 0.19508789479732513, "learning_rate": 9.62003016546485e-05, "loss": 0.0472, "step": 10120 }, { "epoch": 2.7393185505678748, "grad_norm": 0.1983971893787384, "learning_rate": 9.618975713296339e-05, "loss": 0.0473, "step": 10130 }, { "epoch": 2.7420227149810708, "grad_norm": 0.15265944600105286, "learning_rate": 9.61791985800496e-05, "loss": 0.0468, "step": 10140 }, { "epoch": 2.744726879394267, "grad_norm": 0.210282564163208, "learning_rate": 9.616862599911458e-05, "loss": 0.0479, "step": 10150 }, { "epoch": 2.7474310438074636, "grad_norm": 0.2223467081785202, "learning_rate": 9.615803939337e-05, "loss": 0.0472, "step": 10160 }, { "epoch": 2.7501352082206596, "grad_norm": 0.19485688209533691, "learning_rate": 9.614743876603178e-05, "loss": 0.0467, "step": 10170 }, { "epoch": 2.752839372633856, "grad_norm": 0.28313618898391724, "learning_rate": 9.613682412032013e-05, "loss": 0.0478, "step": 10180 }, { "epoch": 2.7555435370470525, "grad_norm": 0.37367019057273865, "learning_rate": 9.612619545945947e-05, "loss": 0.0506, "step": 10190 }, { "epoch": 2.758247701460249, "grad_norm": 0.20576134324073792, "learning_rate": 9.611555278667852e-05, "loss": 0.0472, "step": 10200 }, { "epoch": 2.7609518658734453, "grad_norm": 0.2295365035533905, "learning_rate": 9.610489610521024e-05, "loss": 0.0471, "step": 10210 }, { "epoch": 2.7636560302866413, "grad_norm": 0.16883809864521027, "learning_rate": 9.609422541829187e-05, "loss": 0.0455, "step": 10220 }, { "epoch": 2.7663601946998377, "grad_norm": 0.1971200406551361, "learning_rate": 9.608354072916486e-05, "loss": 0.047, "step": 10230 }, { "epoch": 2.769064359113034, "grad_norm": 0.24352286756038666, "learning_rate": 9.607284204107493e-05, "loss": 0.0485, "step": 10240 }, { "epoch": 2.77176852352623, "grad_norm": 0.22777912020683289, "learning_rate": 9.606212935727208e-05, "loss": 0.0452, "step": 10250 }, { "epoch": 2.7744726879394266, "grad_norm": 0.15813878178596497, "learning_rate": 9.605140268101052e-05, "loss": 0.0449, "step": 10260 }, { "epoch": 2.777176852352623, "grad_norm": 0.23343868553638458, "learning_rate": 9.604066201554875e-05, "loss": 0.0464, "step": 10270 }, { "epoch": 2.7798810167658194, "grad_norm": 0.146291583776474, "learning_rate": 9.60299073641495e-05, "loss": 0.046, "step": 10280 }, { "epoch": 2.782585181179016, "grad_norm": 0.19628627598285675, "learning_rate": 9.601913873007974e-05, "loss": 0.0488, "step": 10290 }, { "epoch": 2.785289345592212, "grad_norm": 0.26978105306625366, "learning_rate": 9.60083561166107e-05, "loss": 0.0456, "step": 10300 }, { "epoch": 2.7879935100054083, "grad_norm": 0.3549629747867584, "learning_rate": 9.599755952701783e-05, "loss": 0.0458, "step": 10310 }, { "epoch": 2.7906976744186047, "grad_norm": 0.23315328359603882, "learning_rate": 9.598674896458089e-05, "loss": 0.047, "step": 10320 }, { "epoch": 2.7934018388318007, "grad_norm": 0.22213216125965118, "learning_rate": 9.597592443258383e-05, "loss": 0.0468, "step": 10330 }, { "epoch": 2.796106003244997, "grad_norm": 0.13306352496147156, "learning_rate": 9.596508593431483e-05, "loss": 0.046, "step": 10340 }, { "epoch": 2.7988101676581936, "grad_norm": 0.22837838530540466, "learning_rate": 9.59542334730664e-05, "loss": 0.0482, "step": 10350 }, { "epoch": 2.80151433207139, "grad_norm": 0.18392254412174225, "learning_rate": 9.594336705213516e-05, "loss": 0.0495, "step": 10360 }, { "epoch": 2.8042184964845864, "grad_norm": 0.24590738117694855, "learning_rate": 9.593248667482208e-05, "loss": 0.0451, "step": 10370 }, { "epoch": 2.806922660897783, "grad_norm": 0.19253389537334442, "learning_rate": 9.592159234443233e-05, "loss": 0.0467, "step": 10380 }, { "epoch": 2.809626825310979, "grad_norm": 0.28779444098472595, "learning_rate": 9.59106840642753e-05, "loss": 0.0469, "step": 10390 }, { "epoch": 2.8123309897241753, "grad_norm": 0.2013384997844696, "learning_rate": 9.589976183766467e-05, "loss": 0.0458, "step": 10400 }, { "epoch": 2.8150351541373717, "grad_norm": 0.19175952672958374, "learning_rate": 9.58888256679183e-05, "loss": 0.0461, "step": 10410 }, { "epoch": 2.8177393185505677, "grad_norm": 0.232865110039711, "learning_rate": 9.587787555835832e-05, "loss": 0.046, "step": 10420 }, { "epoch": 2.820443482963764, "grad_norm": 0.14644497632980347, "learning_rate": 9.586691151231107e-05, "loss": 0.0481, "step": 10430 }, { "epoch": 2.8231476473769606, "grad_norm": 0.1538984626531601, "learning_rate": 9.585593353310715e-05, "loss": 0.0466, "step": 10440 }, { "epoch": 2.825851811790157, "grad_norm": 0.19556084275245667, "learning_rate": 9.58449416240814e-05, "loss": 0.0478, "step": 10450 }, { "epoch": 2.8285559762033534, "grad_norm": 0.1912926584482193, "learning_rate": 9.583393578857283e-05, "loss": 0.0479, "step": 10460 }, { "epoch": 2.8312601406165494, "grad_norm": 0.18366920948028564, "learning_rate": 9.582291602992474e-05, "loss": 0.0469, "step": 10470 }, { "epoch": 2.833964305029746, "grad_norm": 0.30467063188552856, "learning_rate": 9.581188235148466e-05, "loss": 0.046, "step": 10480 }, { "epoch": 2.8366684694429423, "grad_norm": 0.20774276554584503, "learning_rate": 9.58008347566043e-05, "loss": 0.0461, "step": 10490 }, { "epoch": 2.8393726338561383, "grad_norm": 0.145005002617836, "learning_rate": 9.578977324863965e-05, "loss": 0.0467, "step": 10500 }, { "epoch": 2.8420767982693347, "grad_norm": 0.24847185611724854, "learning_rate": 9.577869783095089e-05, "loss": 0.0471, "step": 10510 }, { "epoch": 2.844780962682531, "grad_norm": 0.13514715433120728, "learning_rate": 9.576760850690245e-05, "loss": 0.0466, "step": 10520 }, { "epoch": 2.8474851270957275, "grad_norm": 0.20341935753822327, "learning_rate": 9.575650527986298e-05, "loss": 0.0441, "step": 10530 }, { "epoch": 2.850189291508924, "grad_norm": 0.1787046641111374, "learning_rate": 9.574538815320531e-05, "loss": 0.0463, "step": 10540 }, { "epoch": 2.85289345592212, "grad_norm": 0.18734213709831238, "learning_rate": 9.573425713030656e-05, "loss": 0.0466, "step": 10550 }, { "epoch": 2.8555976203353164, "grad_norm": 0.18538209795951843, "learning_rate": 9.572311221454806e-05, "loss": 0.0449, "step": 10560 }, { "epoch": 2.858301784748513, "grad_norm": 0.15854988992214203, "learning_rate": 9.57119534093153e-05, "loss": 0.0476, "step": 10570 }, { "epoch": 2.861005949161709, "grad_norm": 0.26688456535339355, "learning_rate": 9.570078071799806e-05, "loss": 0.0464, "step": 10580 }, { "epoch": 2.8637101135749052, "grad_norm": 0.23423966765403748, "learning_rate": 9.568959414399028e-05, "loss": 0.0455, "step": 10590 }, { "epoch": 2.8664142779881017, "grad_norm": 0.1802072525024414, "learning_rate": 9.567839369069018e-05, "loss": 0.0449, "step": 10600 }, { "epoch": 2.869118442401298, "grad_norm": 0.214845672249794, "learning_rate": 9.566717936150013e-05, "loss": 0.046, "step": 10610 }, { "epoch": 2.8718226068144945, "grad_norm": 0.33368980884552, "learning_rate": 9.565595115982678e-05, "loss": 0.0472, "step": 10620 }, { "epoch": 2.8745267712276905, "grad_norm": 0.27854451537132263, "learning_rate": 9.564470908908094e-05, "loss": 0.0445, "step": 10630 }, { "epoch": 2.877230935640887, "grad_norm": 0.28533774614334106, "learning_rate": 9.563345315267764e-05, "loss": 0.046, "step": 10640 }, { "epoch": 2.8799351000540834, "grad_norm": 0.29490113258361816, "learning_rate": 9.562218335403616e-05, "loss": 0.0458, "step": 10650 }, { "epoch": 2.8826392644672794, "grad_norm": 0.24263285100460052, "learning_rate": 9.561089969657999e-05, "loss": 0.0465, "step": 10660 }, { "epoch": 2.885343428880476, "grad_norm": 0.1633581817150116, "learning_rate": 9.559960218373673e-05, "loss": 0.0459, "step": 10670 }, { "epoch": 2.888047593293672, "grad_norm": 0.20334072411060333, "learning_rate": 9.558829081893836e-05, "loss": 0.0456, "step": 10680 }, { "epoch": 2.8907517577068687, "grad_norm": 0.16492711007595062, "learning_rate": 9.55769656056209e-05, "loss": 0.0465, "step": 10690 }, { "epoch": 2.893455922120065, "grad_norm": 0.18200960755348206, "learning_rate": 9.556562654722469e-05, "loss": 0.0464, "step": 10700 }, { "epoch": 2.896160086533261, "grad_norm": 0.17719171941280365, "learning_rate": 9.555427364719422e-05, "loss": 0.0462, "step": 10710 }, { "epoch": 2.8988642509464575, "grad_norm": 0.166691392660141, "learning_rate": 9.55429069089782e-05, "loss": 0.0446, "step": 10720 }, { "epoch": 2.901568415359654, "grad_norm": 0.2724417746067047, "learning_rate": 9.553152633602956e-05, "loss": 0.0451, "step": 10730 }, { "epoch": 2.90427257977285, "grad_norm": 0.19394001364707947, "learning_rate": 9.552013193180543e-05, "loss": 0.0453, "step": 10740 }, { "epoch": 2.9069767441860463, "grad_norm": 0.16756220161914825, "learning_rate": 9.550872369976707e-05, "loss": 0.0457, "step": 10750 }, { "epoch": 2.9096809085992428, "grad_norm": 0.19634293019771576, "learning_rate": 9.549730164338007e-05, "loss": 0.0472, "step": 10760 }, { "epoch": 2.912385073012439, "grad_norm": 0.17532473802566528, "learning_rate": 9.548586576611408e-05, "loss": 0.0453, "step": 10770 }, { "epoch": 2.9150892374256356, "grad_norm": 0.254054456949234, "learning_rate": 9.54744160714431e-05, "loss": 0.0439, "step": 10780 }, { "epoch": 2.917793401838832, "grad_norm": 0.26110976934432983, "learning_rate": 9.546295256284516e-05, "loss": 0.0461, "step": 10790 }, { "epoch": 2.920497566252028, "grad_norm": 0.2312125712633133, "learning_rate": 9.545147524380265e-05, "loss": 0.0466, "step": 10800 }, { "epoch": 2.9232017306652245, "grad_norm": 0.17246036231517792, "learning_rate": 9.543998411780201e-05, "loss": 0.0461, "step": 10810 }, { "epoch": 2.925905895078421, "grad_norm": 0.18683132529258728, "learning_rate": 9.542847918833397e-05, "loss": 0.0468, "step": 10820 }, { "epoch": 2.928610059491617, "grad_norm": 0.13602881133556366, "learning_rate": 9.541696045889343e-05, "loss": 0.0454, "step": 10830 }, { "epoch": 2.9313142239048133, "grad_norm": 0.2077842503786087, "learning_rate": 9.540542793297947e-05, "loss": 0.0459, "step": 10840 }, { "epoch": 2.9340183883180098, "grad_norm": 0.2518240809440613, "learning_rate": 9.539388161409537e-05, "loss": 0.0454, "step": 10850 }, { "epoch": 2.936722552731206, "grad_norm": 0.21493984758853912, "learning_rate": 9.538232150574857e-05, "loss": 0.0459, "step": 10860 }, { "epoch": 2.9394267171444026, "grad_norm": 0.1567727029323578, "learning_rate": 9.537074761145076e-05, "loss": 0.0459, "step": 10870 }, { "epoch": 2.9421308815575986, "grad_norm": 0.14240188896656036, "learning_rate": 9.535915993471778e-05, "loss": 0.0482, "step": 10880 }, { "epoch": 2.944835045970795, "grad_norm": 0.2898014485836029, "learning_rate": 9.534755847906964e-05, "loss": 0.047, "step": 10890 }, { "epoch": 2.9475392103839915, "grad_norm": 0.19490550458431244, "learning_rate": 9.533594324803057e-05, "loss": 0.0454, "step": 10900 }, { "epoch": 2.9502433747971875, "grad_norm": 0.22230017185211182, "learning_rate": 9.532431424512895e-05, "loss": 0.0461, "step": 10910 }, { "epoch": 2.952947539210384, "grad_norm": 0.38307836651802063, "learning_rate": 9.531267147389741e-05, "loss": 0.0466, "step": 10920 }, { "epoch": 2.9556517036235803, "grad_norm": 0.18658237159252167, "learning_rate": 9.530101493787266e-05, "loss": 0.0469, "step": 10930 }, { "epoch": 2.9583558680367767, "grad_norm": 0.2700965404510498, "learning_rate": 9.528934464059571e-05, "loss": 0.0469, "step": 10940 }, { "epoch": 2.961060032449973, "grad_norm": 0.23484280705451965, "learning_rate": 9.527766058561163e-05, "loss": 0.045, "step": 10950 }, { "epoch": 2.963764196863169, "grad_norm": 0.1426543891429901, "learning_rate": 9.526596277646976e-05, "loss": 0.0451, "step": 10960 }, { "epoch": 2.9664683612763656, "grad_norm": 0.22888921201229095, "learning_rate": 9.525425121672358e-05, "loss": 0.046, "step": 10970 }, { "epoch": 2.969172525689562, "grad_norm": 0.26800674200057983, "learning_rate": 9.524252590993074e-05, "loss": 0.0446, "step": 10980 }, { "epoch": 2.971876690102758, "grad_norm": 0.20244978368282318, "learning_rate": 9.523078685965309e-05, "loss": 0.0456, "step": 10990 }, { "epoch": 2.9745808545159544, "grad_norm": 0.14246192574501038, "learning_rate": 9.521903406945664e-05, "loss": 0.0468, "step": 11000 }, { "epoch": 2.977285018929151, "grad_norm": 0.2183072715997696, "learning_rate": 9.520726754291158e-05, "loss": 0.0447, "step": 11010 }, { "epoch": 2.9799891833423473, "grad_norm": 0.16404280066490173, "learning_rate": 9.519548728359227e-05, "loss": 0.0443, "step": 11020 }, { "epoch": 2.9826933477555437, "grad_norm": 0.15821611881256104, "learning_rate": 9.518369329507726e-05, "loss": 0.0439, "step": 11030 }, { "epoch": 2.9853975121687397, "grad_norm": 0.1499229371547699, "learning_rate": 9.51718855809492e-05, "loss": 0.0478, "step": 11040 }, { "epoch": 2.988101676581936, "grad_norm": 0.16599225997924805, "learning_rate": 9.516006414479502e-05, "loss": 0.047, "step": 11050 }, { "epoch": 2.9908058409951326, "grad_norm": 0.2547508180141449, "learning_rate": 9.514822899020572e-05, "loss": 0.0451, "step": 11060 }, { "epoch": 2.9935100054083286, "grad_norm": 0.17597784101963043, "learning_rate": 9.513638012077654e-05, "loss": 0.0463, "step": 11070 }, { "epoch": 2.996214169821525, "grad_norm": 0.2577025890350342, "learning_rate": 9.512451754010683e-05, "loss": 0.0458, "step": 11080 }, { "epoch": 2.9989183342347214, "grad_norm": 0.3299574851989746, "learning_rate": 9.511264125180013e-05, "loss": 0.0464, "step": 11090 }, { "epoch": 3.001622498647918, "grad_norm": 0.24046958982944489, "learning_rate": 9.510075125946414e-05, "loss": 0.0453, "step": 11100 }, { "epoch": 3.0043266630611143, "grad_norm": 0.22882844507694244, "learning_rate": 9.508884756671075e-05, "loss": 0.0468, "step": 11110 }, { "epoch": 3.0070308274743103, "grad_norm": 0.2054901123046875, "learning_rate": 9.507693017715596e-05, "loss": 0.0446, "step": 11120 }, { "epoch": 3.0097349918875067, "grad_norm": 0.16993245482444763, "learning_rate": 9.506499909441997e-05, "loss": 0.0438, "step": 11130 }, { "epoch": 3.012439156300703, "grad_norm": 0.1873202919960022, "learning_rate": 9.505305432212713e-05, "loss": 0.046, "step": 11140 }, { "epoch": 3.0151433207138996, "grad_norm": 0.14795981347560883, "learning_rate": 9.504109586390595e-05, "loss": 0.0457, "step": 11150 }, { "epoch": 3.0178474851270956, "grad_norm": 0.24480094015598297, "learning_rate": 9.502912372338908e-05, "loss": 0.0461, "step": 11160 }, { "epoch": 3.020551649540292, "grad_norm": 0.20598606765270233, "learning_rate": 9.501713790421335e-05, "loss": 0.0472, "step": 11170 }, { "epoch": 3.0232558139534884, "grad_norm": 0.28366219997406006, "learning_rate": 9.500513841001974e-05, "loss": 0.0459, "step": 11180 }, { "epoch": 3.025959978366685, "grad_norm": 0.19550266861915588, "learning_rate": 9.499312524445336e-05, "loss": 0.044, "step": 11190 }, { "epoch": 3.028664142779881, "grad_norm": 0.13693033158779144, "learning_rate": 9.498109841116351e-05, "loss": 0.0466, "step": 11200 }, { "epoch": 3.0313683071930773, "grad_norm": 0.17613862454891205, "learning_rate": 9.496905791380363e-05, "loss": 0.046, "step": 11210 }, { "epoch": 3.0340724716062737, "grad_norm": 0.2013135552406311, "learning_rate": 9.495700375603129e-05, "loss": 0.046, "step": 11220 }, { "epoch": 3.03677663601947, "grad_norm": 0.21265852451324463, "learning_rate": 9.494493594150822e-05, "loss": 0.0476, "step": 11230 }, { "epoch": 3.039480800432666, "grad_norm": 0.16675172746181488, "learning_rate": 9.493285447390032e-05, "loss": 0.0444, "step": 11240 }, { "epoch": 3.0421849648458625, "grad_norm": 0.3498218059539795, "learning_rate": 9.492075935687761e-05, "loss": 0.0448, "step": 11250 }, { "epoch": 3.044889129259059, "grad_norm": 0.2827083468437195, "learning_rate": 9.490865059411427e-05, "loss": 0.0476, "step": 11260 }, { "epoch": 3.0475932936722554, "grad_norm": 0.2489974945783615, "learning_rate": 9.489652818928863e-05, "loss": 0.0451, "step": 11270 }, { "epoch": 3.0502974580854514, "grad_norm": 0.21363352239131927, "learning_rate": 9.488439214608315e-05, "loss": 0.0449, "step": 11280 }, { "epoch": 3.053001622498648, "grad_norm": 0.16189312934875488, "learning_rate": 9.487224246818444e-05, "loss": 0.047, "step": 11290 }, { "epoch": 3.0557057869118442, "grad_norm": 0.199106827378273, "learning_rate": 9.486007915928325e-05, "loss": 0.0453, "step": 11300 }, { "epoch": 3.0584099513250407, "grad_norm": 0.1751253455877304, "learning_rate": 9.484790222307448e-05, "loss": 0.0473, "step": 11310 }, { "epoch": 3.0611141157382367, "grad_norm": 0.19152340292930603, "learning_rate": 9.483571166325716e-05, "loss": 0.0453, "step": 11320 }, { "epoch": 3.063818280151433, "grad_norm": 0.21027322113513947, "learning_rate": 9.482350748353444e-05, "loss": 0.0472, "step": 11330 }, { "epoch": 3.0665224445646295, "grad_norm": 0.1667861044406891, "learning_rate": 9.481128968761363e-05, "loss": 0.0453, "step": 11340 }, { "epoch": 3.069226608977826, "grad_norm": 0.11776404827833176, "learning_rate": 9.479905827920621e-05, "loss": 0.0459, "step": 11350 }, { "epoch": 3.0719307733910224, "grad_norm": 0.13977265357971191, "learning_rate": 9.478681326202773e-05, "loss": 0.0462, "step": 11360 }, { "epoch": 3.0746349378042184, "grad_norm": 0.12984757125377655, "learning_rate": 9.477455463979791e-05, "loss": 0.0454, "step": 11370 }, { "epoch": 3.077339102217415, "grad_norm": 0.1753227859735489, "learning_rate": 9.476228241624059e-05, "loss": 0.0456, "step": 11380 }, { "epoch": 3.0800432666306112, "grad_norm": 0.241938978433609, "learning_rate": 9.474999659508374e-05, "loss": 0.0455, "step": 11390 }, { "epoch": 3.0827474310438077, "grad_norm": 0.19942627847194672, "learning_rate": 9.47376971800595e-05, "loss": 0.0446, "step": 11400 }, { "epoch": 3.0854515954570036, "grad_norm": 0.14683467149734497, "learning_rate": 9.472538417490409e-05, "loss": 0.0444, "step": 11410 }, { "epoch": 3.0881557598702, "grad_norm": 0.17989906668663025, "learning_rate": 9.471305758335784e-05, "loss": 0.0441, "step": 11420 }, { "epoch": 3.0908599242833965, "grad_norm": 0.1580284982919693, "learning_rate": 9.47007174091653e-05, "loss": 0.0427, "step": 11430 }, { "epoch": 3.093564088696593, "grad_norm": 0.3532558083534241, "learning_rate": 9.468836365607507e-05, "loss": 0.0466, "step": 11440 }, { "epoch": 3.096268253109789, "grad_norm": 0.19639348983764648, "learning_rate": 9.467599632783988e-05, "loss": 0.0466, "step": 11450 }, { "epoch": 3.0989724175229854, "grad_norm": 0.14015327394008636, "learning_rate": 9.466361542821662e-05, "loss": 0.0458, "step": 11460 }, { "epoch": 3.101676581936182, "grad_norm": 0.21051903069019318, "learning_rate": 9.465122096096625e-05, "loss": 0.0456, "step": 11470 }, { "epoch": 3.104380746349378, "grad_norm": 0.2521112561225891, "learning_rate": 9.463881292985391e-05, "loss": 0.0458, "step": 11480 }, { "epoch": 3.107084910762574, "grad_norm": 0.23150813579559326, "learning_rate": 9.462639133864881e-05, "loss": 0.0454, "step": 11490 }, { "epoch": 3.1097890751757706, "grad_norm": 0.19462217390537262, "learning_rate": 9.461395619112432e-05, "loss": 0.0455, "step": 11500 }, { "epoch": 3.112493239588967, "grad_norm": 0.11117548495531082, "learning_rate": 9.460150749105791e-05, "loss": 0.0441, "step": 11510 }, { "epoch": 3.1151974040021635, "grad_norm": 0.2023305743932724, "learning_rate": 9.458904524223116e-05, "loss": 0.0463, "step": 11520 }, { "epoch": 3.1179015684153595, "grad_norm": 0.18947605788707733, "learning_rate": 9.457656944842976e-05, "loss": 0.0463, "step": 11530 }, { "epoch": 3.120605732828556, "grad_norm": 0.19732946157455444, "learning_rate": 9.456408011344353e-05, "loss": 0.0439, "step": 11540 }, { "epoch": 3.1233098972417523, "grad_norm": 0.21512258052825928, "learning_rate": 9.455157724106643e-05, "loss": 0.0452, "step": 11550 }, { "epoch": 3.1260140616549488, "grad_norm": 0.1748015433549881, "learning_rate": 9.453906083509647e-05, "loss": 0.0443, "step": 11560 }, { "epoch": 3.1287182260681448, "grad_norm": 0.18187014758586884, "learning_rate": 9.45265308993358e-05, "loss": 0.0437, "step": 11570 }, { "epoch": 3.131422390481341, "grad_norm": 0.21240536868572235, "learning_rate": 9.451398743759071e-05, "loss": 0.0479, "step": 11580 }, { "epoch": 3.1341265548945376, "grad_norm": 0.16948769986629486, "learning_rate": 9.450143045367156e-05, "loss": 0.0446, "step": 11590 }, { "epoch": 3.136830719307734, "grad_norm": 0.20852071046829224, "learning_rate": 9.448885995139283e-05, "loss": 0.0456, "step": 11600 }, { "epoch": 3.13953488372093, "grad_norm": 0.13638189435005188, "learning_rate": 9.44762759345731e-05, "loss": 0.0462, "step": 11610 }, { "epoch": 3.1422390481341265, "grad_norm": 0.10909190773963928, "learning_rate": 9.446367840703509e-05, "loss": 0.0451, "step": 11620 }, { "epoch": 3.144943212547323, "grad_norm": 0.2697785794734955, "learning_rate": 9.445106737260556e-05, "loss": 0.0449, "step": 11630 }, { "epoch": 3.1476473769605193, "grad_norm": 0.234361469745636, "learning_rate": 9.443844283511543e-05, "loss": 0.0455, "step": 11640 }, { "epoch": 3.1503515413737153, "grad_norm": 0.16025878489017487, "learning_rate": 9.442580479839968e-05, "loss": 0.0459, "step": 11650 }, { "epoch": 3.1530557057869117, "grad_norm": 0.18796610832214355, "learning_rate": 9.441315326629745e-05, "loss": 0.0443, "step": 11660 }, { "epoch": 3.155759870200108, "grad_norm": 0.14197666943073273, "learning_rate": 9.44004882426519e-05, "loss": 0.0445, "step": 11670 }, { "epoch": 3.1584640346133046, "grad_norm": 0.17984402179718018, "learning_rate": 9.438780973131037e-05, "loss": 0.0449, "step": 11680 }, { "epoch": 3.161168199026501, "grad_norm": 0.1218152865767479, "learning_rate": 9.437511773612423e-05, "loss": 0.0432, "step": 11690 }, { "epoch": 3.163872363439697, "grad_norm": 0.10909390449523926, "learning_rate": 9.436241226094896e-05, "loss": 0.0446, "step": 11700 }, { "epoch": 3.1665765278528935, "grad_norm": 0.11824561655521393, "learning_rate": 9.434969330964418e-05, "loss": 0.0455, "step": 11710 }, { "epoch": 3.16928069226609, "grad_norm": 0.14578953385353088, "learning_rate": 9.433696088607356e-05, "loss": 0.0454, "step": 11720 }, { "epoch": 3.171984856679286, "grad_norm": 0.16317184269428253, "learning_rate": 9.432421499410486e-05, "loss": 0.0445, "step": 11730 }, { "epoch": 3.1746890210924823, "grad_norm": 0.2177029848098755, "learning_rate": 9.431145563760998e-05, "loss": 0.0463, "step": 11740 }, { "epoch": 3.1773931855056787, "grad_norm": 0.19682176411151886, "learning_rate": 9.429868282046484e-05, "loss": 0.0433, "step": 11750 }, { "epoch": 3.180097349918875, "grad_norm": 0.20067855715751648, "learning_rate": 9.428589654654951e-05, "loss": 0.0465, "step": 11760 }, { "epoch": 3.1828015143320716, "grad_norm": 0.20013417303562164, "learning_rate": 9.42730968197481e-05, "loss": 0.047, "step": 11770 }, { "epoch": 3.1855056787452676, "grad_norm": 0.22545260190963745, "learning_rate": 9.426028364394883e-05, "loss": 0.0437, "step": 11780 }, { "epoch": 3.188209843158464, "grad_norm": 0.30775588750839233, "learning_rate": 9.424745702304402e-05, "loss": 0.0469, "step": 11790 }, { "epoch": 3.1909140075716604, "grad_norm": 0.1671847701072693, "learning_rate": 9.423461696093006e-05, "loss": 0.0454, "step": 11800 }, { "epoch": 3.193618171984857, "grad_norm": 0.14485253393650055, "learning_rate": 9.422176346150741e-05, "loss": 0.0456, "step": 11810 }, { "epoch": 3.196322336398053, "grad_norm": 0.23694759607315063, "learning_rate": 9.420889652868063e-05, "loss": 0.0454, "step": 11820 }, { "epoch": 3.1990265008112493, "grad_norm": 0.19762280583381653, "learning_rate": 9.419601616635836e-05, "loss": 0.047, "step": 11830 }, { "epoch": 3.2017306652244457, "grad_norm": 0.22723335027694702, "learning_rate": 9.418312237845331e-05, "loss": 0.0455, "step": 11840 }, { "epoch": 3.204434829637642, "grad_norm": 0.2157236784696579, "learning_rate": 9.417021516888225e-05, "loss": 0.0443, "step": 11850 }, { "epoch": 3.207138994050838, "grad_norm": 0.19283464550971985, "learning_rate": 9.415729454156608e-05, "loss": 0.0463, "step": 11860 }, { "epoch": 3.2098431584640346, "grad_norm": 0.16516859829425812, "learning_rate": 9.414436050042973e-05, "loss": 0.0451, "step": 11870 }, { "epoch": 3.212547322877231, "grad_norm": 0.19471979141235352, "learning_rate": 9.413141304940223e-05, "loss": 0.0465, "step": 11880 }, { "epoch": 3.2152514872904274, "grad_norm": 0.19036148488521576, "learning_rate": 9.411845219241666e-05, "loss": 0.0441, "step": 11890 }, { "epoch": 3.2179556517036234, "grad_norm": 0.18493911623954773, "learning_rate": 9.410547793341021e-05, "loss": 0.0447, "step": 11900 }, { "epoch": 3.22065981611682, "grad_norm": 0.1670977771282196, "learning_rate": 9.409249027632408e-05, "loss": 0.0449, "step": 11910 }, { "epoch": 3.2233639805300163, "grad_norm": 0.15949265658855438, "learning_rate": 9.407948922510362e-05, "loss": 0.0439, "step": 11920 }, { "epoch": 3.2260681449432127, "grad_norm": 0.16792194545269012, "learning_rate": 9.406647478369817e-05, "loss": 0.0461, "step": 11930 }, { "epoch": 3.2287723093564087, "grad_norm": 0.17451542615890503, "learning_rate": 9.405344695606118e-05, "loss": 0.0434, "step": 11940 }, { "epoch": 3.231476473769605, "grad_norm": 0.2119653820991516, "learning_rate": 9.404040574615018e-05, "loss": 0.0442, "step": 11950 }, { "epoch": 3.2341806381828015, "grad_norm": 0.16399845480918884, "learning_rate": 9.402735115792674e-05, "loss": 0.0436, "step": 11960 }, { "epoch": 3.236884802595998, "grad_norm": 0.19511817395687103, "learning_rate": 9.401428319535649e-05, "loss": 0.0441, "step": 11970 }, { "epoch": 3.239588967009194, "grad_norm": 0.1765781044960022, "learning_rate": 9.400120186240912e-05, "loss": 0.0462, "step": 11980 }, { "epoch": 3.2422931314223904, "grad_norm": 0.2588980197906494, "learning_rate": 9.398810716305844e-05, "loss": 0.0462, "step": 11990 }, { "epoch": 3.244997295835587, "grad_norm": 0.18051499128341675, "learning_rate": 9.397499910128222e-05, "loss": 0.0436, "step": 12000 }, { "epoch": 3.2477014602487833, "grad_norm": 0.17837172746658325, "learning_rate": 9.396187768106237e-05, "loss": 0.0442, "step": 12010 }, { "epoch": 3.2504056246619797, "grad_norm": 0.196793332695961, "learning_rate": 9.394874290638482e-05, "loss": 0.0436, "step": 12020 }, { "epoch": 3.2531097890751757, "grad_norm": 0.29863065481185913, "learning_rate": 9.393559478123959e-05, "loss": 0.0451, "step": 12030 }, { "epoch": 3.255813953488372, "grad_norm": 0.25972843170166016, "learning_rate": 9.39224333096207e-05, "loss": 0.0451, "step": 12040 }, { "epoch": 3.2585181179015685, "grad_norm": 0.12465929239988327, "learning_rate": 9.390925849552629e-05, "loss": 0.0474, "step": 12050 }, { "epoch": 3.2612222823147645, "grad_norm": 0.19863146543502808, "learning_rate": 9.389607034295849e-05, "loss": 0.0442, "step": 12060 }, { "epoch": 3.263926446727961, "grad_norm": 0.2714288830757141, "learning_rate": 9.388286885592355e-05, "loss": 0.0432, "step": 12070 }, { "epoch": 3.2666306111411574, "grad_norm": 0.20712952315807343, "learning_rate": 9.386965403843168e-05, "loss": 0.0448, "step": 12080 }, { "epoch": 3.269334775554354, "grad_norm": 0.11776124686002731, "learning_rate": 9.385642589449726e-05, "loss": 0.0444, "step": 12090 }, { "epoch": 3.2720389399675502, "grad_norm": 0.15303853154182434, "learning_rate": 9.38431844281386e-05, "loss": 0.045, "step": 12100 }, { "epoch": 3.2747431043807462, "grad_norm": 0.15289485454559326, "learning_rate": 9.38299296433781e-05, "loss": 0.0452, "step": 12110 }, { "epoch": 3.2774472687939427, "grad_norm": 0.17201627790927887, "learning_rate": 9.381666154424226e-05, "loss": 0.0433, "step": 12120 }, { "epoch": 3.280151433207139, "grad_norm": 0.23703302443027496, "learning_rate": 9.380338013476157e-05, "loss": 0.0435, "step": 12130 }, { "epoch": 3.282855597620335, "grad_norm": 0.1385125070810318, "learning_rate": 9.379008541897054e-05, "loss": 0.0435, "step": 12140 }, { "epoch": 3.2855597620335315, "grad_norm": 0.17566518485546112, "learning_rate": 9.377677740090777e-05, "loss": 0.0451, "step": 12150 }, { "epoch": 3.288263926446728, "grad_norm": 0.17158743739128113, "learning_rate": 9.376345608461588e-05, "loss": 0.0461, "step": 12160 }, { "epoch": 3.2909680908599244, "grad_norm": 0.19632317125797272, "learning_rate": 9.375012147414155e-05, "loss": 0.0438, "step": 12170 }, { "epoch": 3.293672255273121, "grad_norm": 0.14306537806987762, "learning_rate": 9.373677357353545e-05, "loss": 0.0453, "step": 12180 }, { "epoch": 3.296376419686317, "grad_norm": 0.11550094932317734, "learning_rate": 9.372341238685237e-05, "loss": 0.0447, "step": 12190 }, { "epoch": 3.299080584099513, "grad_norm": 0.15189716219902039, "learning_rate": 9.371003791815102e-05, "loss": 0.0456, "step": 12200 }, { "epoch": 3.3017847485127096, "grad_norm": 0.14284473657608032, "learning_rate": 9.369665017149429e-05, "loss": 0.0465, "step": 12210 }, { "epoch": 3.304488912925906, "grad_norm": 0.18459929525852203, "learning_rate": 9.368324915094895e-05, "loss": 0.0453, "step": 12220 }, { "epoch": 3.307193077339102, "grad_norm": 0.21994972229003906, "learning_rate": 9.366983486058591e-05, "loss": 0.0454, "step": 12230 }, { "epoch": 3.3098972417522985, "grad_norm": 0.18107007443904877, "learning_rate": 9.365640730448009e-05, "loss": 0.0441, "step": 12240 }, { "epoch": 3.312601406165495, "grad_norm": 0.19699475169181824, "learning_rate": 9.36429664867104e-05, "loss": 0.0429, "step": 12250 }, { "epoch": 3.3153055705786914, "grad_norm": 0.20235703885555267, "learning_rate": 9.362951241135982e-05, "loss": 0.0441, "step": 12260 }, { "epoch": 3.3180097349918873, "grad_norm": 0.22157223522663116, "learning_rate": 9.361604508251534e-05, "loss": 0.044, "step": 12270 }, { "epoch": 3.3207138994050838, "grad_norm": 0.15979771316051483, "learning_rate": 9.360256450426799e-05, "loss": 0.0451, "step": 12280 }, { "epoch": 3.32341806381828, "grad_norm": 0.16605761647224426, "learning_rate": 9.358907068071279e-05, "loss": 0.0443, "step": 12290 }, { "epoch": 3.3261222282314766, "grad_norm": 0.1811821460723877, "learning_rate": 9.357556361594882e-05, "loss": 0.0438, "step": 12300 }, { "epoch": 3.3288263926446726, "grad_norm": 0.17867940664291382, "learning_rate": 9.356204331407917e-05, "loss": 0.045, "step": 12310 }, { "epoch": 3.331530557057869, "grad_norm": 0.23275279998779297, "learning_rate": 9.354850977921094e-05, "loss": 0.0439, "step": 12320 }, { "epoch": 3.3342347214710655, "grad_norm": 0.16778075695037842, "learning_rate": 9.353496301545529e-05, "loss": 0.044, "step": 12330 }, { "epoch": 3.336938885884262, "grad_norm": 0.22186020016670227, "learning_rate": 9.352140302692733e-05, "loss": 0.0437, "step": 12340 }, { "epoch": 3.339643050297458, "grad_norm": 0.16301585733890533, "learning_rate": 9.350782981774627e-05, "loss": 0.0441, "step": 12350 }, { "epoch": 3.3423472147106543, "grad_norm": 0.199326753616333, "learning_rate": 9.349424339203526e-05, "loss": 0.0457, "step": 12360 }, { "epoch": 3.3450513791238508, "grad_norm": 0.20813299715518951, "learning_rate": 9.34806437539215e-05, "loss": 0.044, "step": 12370 }, { "epoch": 3.347755543537047, "grad_norm": 0.14477138221263885, "learning_rate": 9.346703090753622e-05, "loss": 0.0438, "step": 12380 }, { "epoch": 3.350459707950243, "grad_norm": 0.12240997701883316, "learning_rate": 9.345340485701461e-05, "loss": 0.0457, "step": 12390 }, { "epoch": 3.3531638723634396, "grad_norm": 0.1775479018688202, "learning_rate": 9.343976560649595e-05, "loss": 0.0453, "step": 12400 }, { "epoch": 3.355868036776636, "grad_norm": 0.20411834120750427, "learning_rate": 9.342611316012344e-05, "loss": 0.0463, "step": 12410 }, { "epoch": 3.3585722011898325, "grad_norm": 0.18682144582271576, "learning_rate": 9.341244752204437e-05, "loss": 0.0425, "step": 12420 }, { "epoch": 3.361276365603029, "grad_norm": 0.19483324885368347, "learning_rate": 9.339876869640995e-05, "loss": 0.0467, "step": 12430 }, { "epoch": 3.363980530016225, "grad_norm": 0.20131662487983704, "learning_rate": 9.33850766873755e-05, "loss": 0.0446, "step": 12440 }, { "epoch": 3.3666846944294213, "grad_norm": 0.15098612010478973, "learning_rate": 9.337137149910028e-05, "loss": 0.0452, "step": 12450 }, { "epoch": 3.3693888588426177, "grad_norm": 0.14519263803958893, "learning_rate": 9.335765313574753e-05, "loss": 0.0459, "step": 12460 }, { "epoch": 3.3720930232558137, "grad_norm": 0.2822754383087158, "learning_rate": 9.334392160148457e-05, "loss": 0.0456, "step": 12470 }, { "epoch": 3.37479718766901, "grad_norm": 0.24856862425804138, "learning_rate": 9.333017690048264e-05, "loss": 0.0456, "step": 12480 }, { "epoch": 3.3775013520822066, "grad_norm": 0.25348570942878723, "learning_rate": 9.331641903691706e-05, "loss": 0.0442, "step": 12490 }, { "epoch": 3.380205516495403, "grad_norm": 0.27022668719291687, "learning_rate": 9.330264801496707e-05, "loss": 0.0452, "step": 12500 }, { "epoch": 3.3829096809085994, "grad_norm": 0.1379816085100174, "learning_rate": 9.328886383881594e-05, "loss": 0.0436, "step": 12510 }, { "epoch": 3.3856138453217954, "grad_norm": 0.1647747904062271, "learning_rate": 9.327506651265095e-05, "loss": 0.0445, "step": 12520 }, { "epoch": 3.388318009734992, "grad_norm": 0.19903665781021118, "learning_rate": 9.326125604066338e-05, "loss": 0.0439, "step": 12530 }, { "epoch": 3.3910221741481883, "grad_norm": 0.17019963264465332, "learning_rate": 9.324743242704847e-05, "loss": 0.044, "step": 12540 }, { "epoch": 3.3937263385613847, "grad_norm": 0.2027595192193985, "learning_rate": 9.323359567600546e-05, "loss": 0.0439, "step": 12550 }, { "epoch": 3.3964305029745807, "grad_norm": 0.15340185165405273, "learning_rate": 9.321974579173761e-05, "loss": 0.0442, "step": 12560 }, { "epoch": 3.399134667387777, "grad_norm": 0.25730761885643005, "learning_rate": 9.320588277845213e-05, "loss": 0.0441, "step": 12570 }, { "epoch": 3.4018388318009736, "grad_norm": 0.25038063526153564, "learning_rate": 9.319200664036026e-05, "loss": 0.045, "step": 12580 }, { "epoch": 3.40454299621417, "grad_norm": 0.2037746161222458, "learning_rate": 9.31781173816772e-05, "loss": 0.0433, "step": 12590 }, { "epoch": 3.407247160627366, "grad_norm": 0.1585015058517456, "learning_rate": 9.316421500662212e-05, "loss": 0.0437, "step": 12600 }, { "epoch": 3.4099513250405624, "grad_norm": 0.18457724153995514, "learning_rate": 9.31502995194182e-05, "loss": 0.0445, "step": 12610 }, { "epoch": 3.412655489453759, "grad_norm": 0.20948933064937592, "learning_rate": 9.31363709242926e-05, "loss": 0.0444, "step": 12620 }, { "epoch": 3.4153596538669553, "grad_norm": 0.2038625329732895, "learning_rate": 9.312242922547647e-05, "loss": 0.0453, "step": 12630 }, { "epoch": 3.4180638182801513, "grad_norm": 0.1559446007013321, "learning_rate": 9.310847442720492e-05, "loss": 0.0454, "step": 12640 }, { "epoch": 3.4207679826933477, "grad_norm": 0.16429762542247772, "learning_rate": 9.309450653371706e-05, "loss": 0.0443, "step": 12650 }, { "epoch": 3.423472147106544, "grad_norm": 0.2552066445350647, "learning_rate": 9.308052554925595e-05, "loss": 0.0443, "step": 12660 }, { "epoch": 3.4261763115197406, "grad_norm": 0.2922823131084442, "learning_rate": 9.306653147806867e-05, "loss": 0.0438, "step": 12670 }, { "epoch": 3.4288804759329365, "grad_norm": 0.24460415542125702, "learning_rate": 9.305252432440622e-05, "loss": 0.0451, "step": 12680 }, { "epoch": 3.431584640346133, "grad_norm": 0.3677883744239807, "learning_rate": 9.303850409252361e-05, "loss": 0.0451, "step": 12690 }, { "epoch": 3.4342888047593294, "grad_norm": 0.18036670982837677, "learning_rate": 9.302447078667985e-05, "loss": 0.0439, "step": 12700 }, { "epoch": 3.436992969172526, "grad_norm": 0.24747909605503082, "learning_rate": 9.301042441113783e-05, "loss": 0.0447, "step": 12710 }, { "epoch": 3.439697133585722, "grad_norm": 0.15742303431034088, "learning_rate": 9.299636497016451e-05, "loss": 0.043, "step": 12720 }, { "epoch": 3.4424012979989183, "grad_norm": 0.18996350467205048, "learning_rate": 9.298229246803076e-05, "loss": 0.0438, "step": 12730 }, { "epoch": 3.4451054624121147, "grad_norm": 0.23367467522621155, "learning_rate": 9.296820690901144e-05, "loss": 0.0457, "step": 12740 }, { "epoch": 3.447809626825311, "grad_norm": 0.19977039098739624, "learning_rate": 9.295410829738539e-05, "loss": 0.0442, "step": 12750 }, { "epoch": 3.4505137912385075, "grad_norm": 0.16559933125972748, "learning_rate": 9.293999663743535e-05, "loss": 0.0424, "step": 12760 }, { "epoch": 3.4532179556517035, "grad_norm": 0.19973120093345642, "learning_rate": 9.292587193344813e-05, "loss": 0.0436, "step": 12770 }, { "epoch": 3.4559221200649, "grad_norm": 0.28003665804862976, "learning_rate": 9.291173418971437e-05, "loss": 0.0436, "step": 12780 }, { "epoch": 3.4586262844780964, "grad_norm": 0.14088855683803558, "learning_rate": 9.28975834105288e-05, "loss": 0.0443, "step": 12790 }, { "epoch": 3.4613304488912924, "grad_norm": 0.205988809466362, "learning_rate": 9.288341960019004e-05, "loss": 0.046, "step": 12800 }, { "epoch": 3.464034613304489, "grad_norm": 0.18900462985038757, "learning_rate": 9.286924276300067e-05, "loss": 0.0445, "step": 12810 }, { "epoch": 3.4667387777176852, "grad_norm": 0.19609516859054565, "learning_rate": 9.285505290326726e-05, "loss": 0.0439, "step": 12820 }, { "epoch": 3.4694429421308817, "grad_norm": 0.17809568345546722, "learning_rate": 9.284085002530027e-05, "loss": 0.0423, "step": 12830 }, { "epoch": 3.472147106544078, "grad_norm": 0.15062987804412842, "learning_rate": 9.282663413341422e-05, "loss": 0.0434, "step": 12840 }, { "epoch": 3.474851270957274, "grad_norm": 0.1638166606426239, "learning_rate": 9.281240523192747e-05, "loss": 0.0444, "step": 12850 }, { "epoch": 3.4775554353704705, "grad_norm": 0.21253393590450287, "learning_rate": 9.279816332516242e-05, "loss": 0.0454, "step": 12860 }, { "epoch": 3.480259599783667, "grad_norm": 0.1502641886472702, "learning_rate": 9.278390841744536e-05, "loss": 0.0439, "step": 12870 }, { "epoch": 3.482963764196863, "grad_norm": 0.15088260173797607, "learning_rate": 9.276964051310658e-05, "loss": 0.0446, "step": 12880 }, { "epoch": 3.4856679286100594, "grad_norm": 0.22488471865653992, "learning_rate": 9.275535961648027e-05, "loss": 0.0452, "step": 12890 }, { "epoch": 3.488372093023256, "grad_norm": 0.2189812809228897, "learning_rate": 9.274106573190459e-05, "loss": 0.0468, "step": 12900 }, { "epoch": 3.4910762574364522, "grad_norm": 0.17171485722064972, "learning_rate": 9.272675886372168e-05, "loss": 0.0427, "step": 12910 }, { "epoch": 3.4937804218496487, "grad_norm": 0.24636560678482056, "learning_rate": 9.271243901627754e-05, "loss": 0.0443, "step": 12920 }, { "epoch": 3.4964845862628446, "grad_norm": 0.13703182339668274, "learning_rate": 9.269810619392219e-05, "loss": 0.0432, "step": 12930 }, { "epoch": 3.499188750676041, "grad_norm": 0.11131767928600311, "learning_rate": 9.268376040100955e-05, "loss": 0.0438, "step": 12940 }, { "epoch": 3.5018929150892375, "grad_norm": 0.17478474974632263, "learning_rate": 9.266940164189752e-05, "loss": 0.0445, "step": 12950 }, { "epoch": 3.5045970795024335, "grad_norm": 0.22391118109226227, "learning_rate": 9.265502992094787e-05, "loss": 0.0434, "step": 12960 }, { "epoch": 3.50730124391563, "grad_norm": 0.16630440950393677, "learning_rate": 9.264064524252638e-05, "loss": 0.0443, "step": 12970 }, { "epoch": 3.5100054083288263, "grad_norm": 0.15698528289794922, "learning_rate": 9.262624761100271e-05, "loss": 0.0443, "step": 12980 }, { "epoch": 3.512709572742023, "grad_norm": 0.22765810787677765, "learning_rate": 9.261183703075051e-05, "loss": 0.0472, "step": 12990 }, { "epoch": 3.515413737155219, "grad_norm": 0.23271825909614563, "learning_rate": 9.259741350614733e-05, "loss": 0.0439, "step": 13000 }, { "epoch": 3.518117901568415, "grad_norm": 0.299001008272171, "learning_rate": 9.258297704157464e-05, "loss": 0.0452, "step": 13010 }, { "epoch": 3.5208220659816116, "grad_norm": 0.20374733209609985, "learning_rate": 9.256852764141786e-05, "loss": 0.0447, "step": 13020 }, { "epoch": 3.523526230394808, "grad_norm": 0.21836544573307037, "learning_rate": 9.255406531006634e-05, "loss": 0.0441, "step": 13030 }, { "epoch": 3.5262303948080045, "grad_norm": 0.22282321751117706, "learning_rate": 9.253959005191335e-05, "loss": 0.0431, "step": 13040 }, { "epoch": 3.5289345592212005, "grad_norm": 0.10681670159101486, "learning_rate": 9.25251018713561e-05, "loss": 0.043, "step": 13050 }, { "epoch": 3.531638723634397, "grad_norm": 0.22261972725391388, "learning_rate": 9.251060077279571e-05, "loss": 0.0438, "step": 13060 }, { "epoch": 3.5343428880475933, "grad_norm": 0.2153923511505127, "learning_rate": 9.249608676063724e-05, "loss": 0.0419, "step": 13070 }, { "epoch": 3.5370470524607898, "grad_norm": 0.17210474610328674, "learning_rate": 9.248155983928964e-05, "loss": 0.044, "step": 13080 }, { "epoch": 3.539751216873986, "grad_norm": 0.17922434210777283, "learning_rate": 9.246702001316583e-05, "loss": 0.0441, "step": 13090 }, { "epoch": 3.542455381287182, "grad_norm": 0.1971305012702942, "learning_rate": 9.245246728668262e-05, "loss": 0.0434, "step": 13100 }, { "epoch": 3.5451595457003786, "grad_norm": 0.21511736512184143, "learning_rate": 9.243790166426073e-05, "loss": 0.0445, "step": 13110 }, { "epoch": 3.547863710113575, "grad_norm": 0.21251922845840454, "learning_rate": 9.242332315032484e-05, "loss": 0.0435, "step": 13120 }, { "epoch": 3.550567874526771, "grad_norm": 0.13437214493751526, "learning_rate": 9.240873174930349e-05, "loss": 0.0453, "step": 13130 }, { "epoch": 3.5532720389399675, "grad_norm": 0.23452559113502502, "learning_rate": 9.239412746562917e-05, "loss": 0.0431, "step": 13140 }, { "epoch": 3.555976203353164, "grad_norm": 0.12029004096984863, "learning_rate": 9.237951030373828e-05, "loss": 0.0431, "step": 13150 }, { "epoch": 3.5586803677663603, "grad_norm": 0.14122320711612701, "learning_rate": 9.236488026807113e-05, "loss": 0.0453, "step": 13160 }, { "epoch": 3.5613845321795568, "grad_norm": 0.2241716831922531, "learning_rate": 9.235023736307193e-05, "loss": 0.0454, "step": 13170 }, { "epoch": 3.5640886965927527, "grad_norm": 0.14715789258480072, "learning_rate": 9.233558159318881e-05, "loss": 0.0418, "step": 13180 }, { "epoch": 3.566792861005949, "grad_norm": 0.19499850273132324, "learning_rate": 9.232091296287382e-05, "loss": 0.0437, "step": 13190 }, { "epoch": 3.5694970254191456, "grad_norm": 0.16550913453102112, "learning_rate": 9.230623147658288e-05, "loss": 0.0467, "step": 13200 }, { "epoch": 3.5722011898323416, "grad_norm": 0.21789558231830597, "learning_rate": 9.229153713877586e-05, "loss": 0.0423, "step": 13210 }, { "epoch": 3.574905354245538, "grad_norm": 0.18777070939540863, "learning_rate": 9.227682995391649e-05, "loss": 0.0439, "step": 13220 }, { "epoch": 3.5776095186587344, "grad_norm": 0.19411399960517883, "learning_rate": 9.226210992647243e-05, "loss": 0.0437, "step": 13230 }, { "epoch": 3.580313683071931, "grad_norm": 0.19304165244102478, "learning_rate": 9.224737706091525e-05, "loss": 0.0435, "step": 13240 }, { "epoch": 3.5830178474851273, "grad_norm": 0.1913703829050064, "learning_rate": 9.223263136172039e-05, "loss": 0.0443, "step": 13250 }, { "epoch": 3.5857220118983233, "grad_norm": 0.10480478405952454, "learning_rate": 9.22178728333672e-05, "loss": 0.043, "step": 13260 }, { "epoch": 3.5884261763115197, "grad_norm": 0.23774541914463043, "learning_rate": 9.220310148033897e-05, "loss": 0.0454, "step": 13270 }, { "epoch": 3.591130340724716, "grad_norm": 0.12866631150245667, "learning_rate": 9.21883173071228e-05, "loss": 0.0431, "step": 13280 }, { "epoch": 3.593834505137912, "grad_norm": 0.14224112033843994, "learning_rate": 9.217352031820976e-05, "loss": 0.0457, "step": 13290 }, { "epoch": 3.5965386695511086, "grad_norm": 0.13304609060287476, "learning_rate": 9.215871051809477e-05, "loss": 0.044, "step": 13300 }, { "epoch": 3.599242833964305, "grad_norm": 0.15073449909687042, "learning_rate": 9.214388791127666e-05, "loss": 0.0431, "step": 13310 }, { "epoch": 3.6019469983775014, "grad_norm": 0.1742781549692154, "learning_rate": 9.212905250225814e-05, "loss": 0.0453, "step": 13320 }, { "epoch": 3.604651162790698, "grad_norm": 0.20765608549118042, "learning_rate": 9.211420429554583e-05, "loss": 0.0446, "step": 13330 }, { "epoch": 3.607355327203894, "grad_norm": 0.17859548330307007, "learning_rate": 9.209934329565022e-05, "loss": 0.042, "step": 13340 }, { "epoch": 3.6100594916170903, "grad_norm": 0.1422380805015564, "learning_rate": 9.208446950708568e-05, "loss": 0.0445, "step": 13350 }, { "epoch": 3.6127636560302867, "grad_norm": 0.1596457064151764, "learning_rate": 9.20695829343705e-05, "loss": 0.0441, "step": 13360 }, { "epoch": 3.6154678204434827, "grad_norm": 0.20147940516471863, "learning_rate": 9.205468358202678e-05, "loss": 0.0442, "step": 13370 }, { "epoch": 3.618171984856679, "grad_norm": 0.24806085228919983, "learning_rate": 9.203977145458059e-05, "loss": 0.0428, "step": 13380 }, { "epoch": 3.6208761492698756, "grad_norm": 0.1206275224685669, "learning_rate": 9.202484655656182e-05, "loss": 0.0435, "step": 13390 }, { "epoch": 3.623580313683072, "grad_norm": 0.1745067536830902, "learning_rate": 9.200990889250427e-05, "loss": 0.045, "step": 13400 }, { "epoch": 3.6262844780962684, "grad_norm": 0.09115767478942871, "learning_rate": 9.19949584669456e-05, "loss": 0.0444, "step": 13410 }, { "epoch": 3.628988642509465, "grad_norm": 0.1335759460926056, "learning_rate": 9.197999528442738e-05, "loss": 0.0441, "step": 13420 }, { "epoch": 3.631692806922661, "grad_norm": 0.19289109110832214, "learning_rate": 9.196501934949499e-05, "loss": 0.045, "step": 13430 }, { "epoch": 3.6343969713358573, "grad_norm": 0.1944129317998886, "learning_rate": 9.195003066669776e-05, "loss": 0.0426, "step": 13440 }, { "epoch": 3.6371011357490537, "grad_norm": 0.16266724467277527, "learning_rate": 9.193502924058884e-05, "loss": 0.0435, "step": 13450 }, { "epoch": 3.6398053001622497, "grad_norm": 0.11242619156837463, "learning_rate": 9.192001507572526e-05, "loss": 0.043, "step": 13460 }, { "epoch": 3.642509464575446, "grad_norm": 0.20592531561851501, "learning_rate": 9.190498817666793e-05, "loss": 0.0435, "step": 13470 }, { "epoch": 3.6452136289886425, "grad_norm": 0.22910858690738678, "learning_rate": 9.188994854798163e-05, "loss": 0.0435, "step": 13480 }, { "epoch": 3.647917793401839, "grad_norm": 0.21644778549671173, "learning_rate": 9.187489619423499e-05, "loss": 0.0432, "step": 13490 }, { "epoch": 3.6506219578150354, "grad_norm": 0.16191741824150085, "learning_rate": 9.185983112000056e-05, "loss": 0.0439, "step": 13500 }, { "epoch": 3.6533261222282314, "grad_norm": 0.13299934566020966, "learning_rate": 9.184475332985464e-05, "loss": 0.0431, "step": 13510 }, { "epoch": 3.656030286641428, "grad_norm": 0.18713848292827606, "learning_rate": 9.182966282837754e-05, "loss": 0.0448, "step": 13520 }, { "epoch": 3.6587344510546242, "grad_norm": 0.1879199743270874, "learning_rate": 9.18145596201533e-05, "loss": 0.0432, "step": 13530 }, { "epoch": 3.6614386154678202, "grad_norm": 0.15207509696483612, "learning_rate": 9.179944370976991e-05, "loss": 0.0449, "step": 13540 }, { "epoch": 3.6641427798810167, "grad_norm": 0.15804405510425568, "learning_rate": 9.178431510181918e-05, "loss": 0.0421, "step": 13550 }, { "epoch": 3.666846944294213, "grad_norm": 0.16486486792564392, "learning_rate": 9.176917380089675e-05, "loss": 0.0428, "step": 13560 }, { "epoch": 3.6695511087074095, "grad_norm": 0.16717685759067535, "learning_rate": 9.175401981160219e-05, "loss": 0.0432, "step": 13570 }, { "epoch": 3.672255273120606, "grad_norm": 0.21063390374183655, "learning_rate": 9.173885313853885e-05, "loss": 0.0425, "step": 13580 }, { "epoch": 3.674959437533802, "grad_norm": 0.1767844706773758, "learning_rate": 9.172367378631398e-05, "loss": 0.0434, "step": 13590 }, { "epoch": 3.6776636019469984, "grad_norm": 0.11828300356864929, "learning_rate": 9.170848175953866e-05, "loss": 0.0424, "step": 13600 }, { "epoch": 3.680367766360195, "grad_norm": 0.20195387303829193, "learning_rate": 9.169327706282784e-05, "loss": 0.0432, "step": 13610 }, { "epoch": 3.683071930773391, "grad_norm": 0.15706110000610352, "learning_rate": 9.167805970080029e-05, "loss": 0.0421, "step": 13620 }, { "epoch": 3.685776095186587, "grad_norm": 0.24226446449756622, "learning_rate": 9.166282967807864e-05, "loss": 0.0441, "step": 13630 }, { "epoch": 3.6884802595997837, "grad_norm": 0.21941934525966644, "learning_rate": 9.16475869992894e-05, "loss": 0.0415, "step": 13640 }, { "epoch": 3.69118442401298, "grad_norm": 0.2118944376707077, "learning_rate": 9.163233166906284e-05, "loss": 0.0468, "step": 13650 }, { "epoch": 3.6938885884261765, "grad_norm": 0.2110217809677124, "learning_rate": 9.161706369203317e-05, "loss": 0.0435, "step": 13660 }, { "epoch": 3.6965927528393725, "grad_norm": 0.25231316685676575, "learning_rate": 9.16017830728384e-05, "loss": 0.0441, "step": 13670 }, { "epoch": 3.699296917252569, "grad_norm": 0.22875094413757324, "learning_rate": 9.158648981612035e-05, "loss": 0.0426, "step": 13680 }, { "epoch": 3.7020010816657654, "grad_norm": 0.1454615294933319, "learning_rate": 9.157118392652472e-05, "loss": 0.0427, "step": 13690 }, { "epoch": 3.7047052460789613, "grad_norm": 0.12420352548360825, "learning_rate": 9.155586540870104e-05, "loss": 0.041, "step": 13700 }, { "epoch": 3.7074094104921578, "grad_norm": 0.18959109485149384, "learning_rate": 9.154053426730267e-05, "loss": 0.0418, "step": 13710 }, { "epoch": 3.710113574905354, "grad_norm": 0.2567480206489563, "learning_rate": 9.15251905069868e-05, "loss": 0.0431, "step": 13720 }, { "epoch": 3.7128177393185506, "grad_norm": 0.1773953139781952, "learning_rate": 9.150983413241446e-05, "loss": 0.0438, "step": 13730 }, { "epoch": 3.715521903731747, "grad_norm": 0.1513749212026596, "learning_rate": 9.149446514825051e-05, "loss": 0.042, "step": 13740 }, { "epoch": 3.718226068144943, "grad_norm": 0.18566757440567017, "learning_rate": 9.147908355916365e-05, "loss": 0.0431, "step": 13750 }, { "epoch": 3.7209302325581395, "grad_norm": 0.15706850588321686, "learning_rate": 9.146368936982642e-05, "loss": 0.0429, "step": 13760 }, { "epoch": 3.723634396971336, "grad_norm": 0.1727975308895111, "learning_rate": 9.144828258491511e-05, "loss": 0.0431, "step": 13770 }, { "epoch": 3.7263385613845323, "grad_norm": 0.15430715680122375, "learning_rate": 9.143286320910996e-05, "loss": 0.0432, "step": 13780 }, { "epoch": 3.7290427257977283, "grad_norm": 0.2729777991771698, "learning_rate": 9.141743124709491e-05, "loss": 0.0428, "step": 13790 }, { "epoch": 3.7317468902109248, "grad_norm": 0.17813487350940704, "learning_rate": 9.140198670355784e-05, "loss": 0.0434, "step": 13800 }, { "epoch": 3.734451054624121, "grad_norm": 0.1941099464893341, "learning_rate": 9.138652958319034e-05, "loss": 0.044, "step": 13810 }, { "epoch": 3.7371552190373176, "grad_norm": 0.1874927431344986, "learning_rate": 9.137105989068791e-05, "loss": 0.0436, "step": 13820 }, { "epoch": 3.739859383450514, "grad_norm": 0.17999231815338135, "learning_rate": 9.135557763074983e-05, "loss": 0.0436, "step": 13830 }, { "epoch": 3.74256354786371, "grad_norm": 0.20721830427646637, "learning_rate": 9.13400828080792e-05, "loss": 0.0421, "step": 13840 }, { "epoch": 3.7452677122769065, "grad_norm": 0.18121302127838135, "learning_rate": 9.132457542738292e-05, "loss": 0.0421, "step": 13850 }, { "epoch": 3.747971876690103, "grad_norm": 0.22586855292320251, "learning_rate": 9.130905549337174e-05, "loss": 0.0456, "step": 13860 }, { "epoch": 3.750676041103299, "grad_norm": 0.22938431799411774, "learning_rate": 9.129352301076021e-05, "loss": 0.0438, "step": 13870 }, { "epoch": 3.7533802055164953, "grad_norm": 0.1434369832277298, "learning_rate": 9.127797798426668e-05, "loss": 0.0438, "step": 13880 }, { "epoch": 3.7560843699296917, "grad_norm": 0.18739400804042816, "learning_rate": 9.126242041861333e-05, "loss": 0.0438, "step": 13890 }, { "epoch": 3.758788534342888, "grad_norm": 0.19218012690544128, "learning_rate": 9.124685031852611e-05, "loss": 0.0446, "step": 13900 }, { "epoch": 3.7614926987560846, "grad_norm": 0.17030611634254456, "learning_rate": 9.123126768873482e-05, "loss": 0.0424, "step": 13910 }, { "epoch": 3.7641968631692806, "grad_norm": 0.2744395434856415, "learning_rate": 9.121567253397308e-05, "loss": 0.042, "step": 13920 }, { "epoch": 3.766901027582477, "grad_norm": 0.16283737123012543, "learning_rate": 9.120006485897824e-05, "loss": 0.041, "step": 13930 }, { "epoch": 3.7696051919956735, "grad_norm": 0.19074185192584991, "learning_rate": 9.118444466849152e-05, "loss": 0.044, "step": 13940 }, { "epoch": 3.7723093564088694, "grad_norm": 0.16514909267425537, "learning_rate": 9.116881196725793e-05, "loss": 0.0439, "step": 13950 }, { "epoch": 3.775013520822066, "grad_norm": 0.19462941586971283, "learning_rate": 9.115316676002627e-05, "loss": 0.0435, "step": 13960 }, { "epoch": 3.7777176852352623, "grad_norm": 0.17247644066810608, "learning_rate": 9.113750905154911e-05, "loss": 0.0415, "step": 13970 }, { "epoch": 3.7804218496484587, "grad_norm": 0.1726096272468567, "learning_rate": 9.112183884658289e-05, "loss": 0.0429, "step": 13980 }, { "epoch": 3.783126014061655, "grad_norm": 0.12307066470384598, "learning_rate": 9.11061561498878e-05, "loss": 0.0432, "step": 13990 }, { "epoch": 3.785830178474851, "grad_norm": 0.11955133825540543, "learning_rate": 9.109046096622779e-05, "loss": 0.0454, "step": 14000 }, { "epoch": 3.7885343428880476, "grad_norm": 0.16055236756801605, "learning_rate": 9.107475330037069e-05, "loss": 0.0437, "step": 14010 }, { "epoch": 3.791238507301244, "grad_norm": 0.17118480801582336, "learning_rate": 9.105903315708806e-05, "loss": 0.0437, "step": 14020 }, { "epoch": 3.79394267171444, "grad_norm": 0.17204910516738892, "learning_rate": 9.104330054115524e-05, "loss": 0.0442, "step": 14030 }, { "epoch": 3.7966468361276364, "grad_norm": 0.20955459773540497, "learning_rate": 9.102755545735141e-05, "loss": 0.0426, "step": 14040 }, { "epoch": 3.799351000540833, "grad_norm": 0.1611611396074295, "learning_rate": 9.10117979104595e-05, "loss": 0.0445, "step": 14050 }, { "epoch": 3.8020551649540293, "grad_norm": 0.2241182029247284, "learning_rate": 9.099602790526624e-05, "loss": 0.045, "step": 14060 }, { "epoch": 3.8047593293672257, "grad_norm": 0.1237950325012207, "learning_rate": 9.098024544656212e-05, "loss": 0.0439, "step": 14070 }, { "epoch": 3.8074634937804217, "grad_norm": 0.1502668410539627, "learning_rate": 9.096445053914148e-05, "loss": 0.0439, "step": 14080 }, { "epoch": 3.810167658193618, "grad_norm": 0.17038194835186005, "learning_rate": 9.094864318780236e-05, "loss": 0.0423, "step": 14090 }, { "epoch": 3.8128718226068146, "grad_norm": 0.2035704255104065, "learning_rate": 9.093282339734663e-05, "loss": 0.0427, "step": 14100 }, { "epoch": 3.8155759870200106, "grad_norm": 0.18530289828777313, "learning_rate": 9.091699117257992e-05, "loss": 0.0422, "step": 14110 }, { "epoch": 3.818280151433207, "grad_norm": 0.18067143857479095, "learning_rate": 9.090114651831163e-05, "loss": 0.0437, "step": 14120 }, { "epoch": 3.8209843158464034, "grad_norm": 0.22479501366615295, "learning_rate": 9.088528943935497e-05, "loss": 0.0451, "step": 14130 }, { "epoch": 3.8236884802596, "grad_norm": 0.16916988790035248, "learning_rate": 9.086941994052689e-05, "loss": 0.0428, "step": 14140 }, { "epoch": 3.8263926446727963, "grad_norm": 0.20568175613880157, "learning_rate": 9.085353802664813e-05, "loss": 0.0414, "step": 14150 }, { "epoch": 3.8290968090859927, "grad_norm": 0.1714010238647461, "learning_rate": 9.08376437025432e-05, "loss": 0.0421, "step": 14160 }, { "epoch": 3.8318009734991887, "grad_norm": 0.24551717936992645, "learning_rate": 9.082173697304035e-05, "loss": 0.0435, "step": 14170 }, { "epoch": 3.834505137912385, "grad_norm": 0.11734412610530853, "learning_rate": 9.080581784297166e-05, "loss": 0.043, "step": 14180 }, { "epoch": 3.8372093023255816, "grad_norm": 0.1272059977054596, "learning_rate": 9.078988631717291e-05, "loss": 0.0427, "step": 14190 }, { "epoch": 3.8399134667387775, "grad_norm": 0.185676708817482, "learning_rate": 9.077394240048369e-05, "loss": 0.0423, "step": 14200 }, { "epoch": 3.842617631151974, "grad_norm": 0.1291651576757431, "learning_rate": 9.075798609774736e-05, "loss": 0.0429, "step": 14210 }, { "epoch": 3.8453217955651704, "grad_norm": 0.1526947021484375, "learning_rate": 9.0742017413811e-05, "loss": 0.0417, "step": 14220 }, { "epoch": 3.848025959978367, "grad_norm": 0.14071780443191528, "learning_rate": 9.072603635352548e-05, "loss": 0.0414, "step": 14230 }, { "epoch": 3.8507301243915633, "grad_norm": 0.1421552300453186, "learning_rate": 9.071004292174541e-05, "loss": 0.0431, "step": 14240 }, { "epoch": 3.8534342888047592, "grad_norm": 0.18650920689105988, "learning_rate": 9.06940371233292e-05, "loss": 0.044, "step": 14250 }, { "epoch": 3.8561384532179557, "grad_norm": 0.21075324714183807, "learning_rate": 9.067801896313898e-05, "loss": 0.0412, "step": 14260 }, { "epoch": 3.858842617631152, "grad_norm": 0.24055972695350647, "learning_rate": 9.066198844604064e-05, "loss": 0.0418, "step": 14270 }, { "epoch": 3.861546782044348, "grad_norm": 0.24313978850841522, "learning_rate": 9.06459455769038e-05, "loss": 0.0438, "step": 14280 }, { "epoch": 3.8642509464575445, "grad_norm": 0.17806187272071838, "learning_rate": 9.062989036060193e-05, "loss": 0.0453, "step": 14290 }, { "epoch": 3.866955110870741, "grad_norm": 0.22348885238170624, "learning_rate": 9.061382280201212e-05, "loss": 0.0423, "step": 14300 }, { "epoch": 3.8696592752839374, "grad_norm": 0.21333378553390503, "learning_rate": 9.059774290601528e-05, "loss": 0.0423, "step": 14310 }, { "epoch": 3.872363439697134, "grad_norm": 0.1785406619310379, "learning_rate": 9.058165067749606e-05, "loss": 0.0437, "step": 14320 }, { "epoch": 3.87506760411033, "grad_norm": 0.1864941269159317, "learning_rate": 9.056554612134288e-05, "loss": 0.0423, "step": 14330 }, { "epoch": 3.8777717685235262, "grad_norm": 0.1935897022485733, "learning_rate": 9.054942924244785e-05, "loss": 0.0438, "step": 14340 }, { "epoch": 3.8804759329367227, "grad_norm": 0.14875119924545288, "learning_rate": 9.053330004570686e-05, "loss": 0.043, "step": 14350 }, { "epoch": 3.8831800973499186, "grad_norm": 0.1437215805053711, "learning_rate": 9.051715853601955e-05, "loss": 0.0428, "step": 14360 }, { "epoch": 3.885884261763115, "grad_norm": 0.19017720222473145, "learning_rate": 9.050100471828926e-05, "loss": 0.0431, "step": 14370 }, { "epoch": 3.8885884261763115, "grad_norm": 0.16863082349300385, "learning_rate": 9.048483859742311e-05, "loss": 0.0441, "step": 14380 }, { "epoch": 3.891292590589508, "grad_norm": 0.19860723614692688, "learning_rate": 9.046866017833193e-05, "loss": 0.0442, "step": 14390 }, { "epoch": 3.8939967550027044, "grad_norm": 0.17301703989505768, "learning_rate": 9.045246946593029e-05, "loss": 0.0423, "step": 14400 }, { "epoch": 3.8967009194159004, "grad_norm": 0.1918916255235672, "learning_rate": 9.043626646513652e-05, "loss": 0.0438, "step": 14410 }, { "epoch": 3.899405083829097, "grad_norm": 0.21052919328212738, "learning_rate": 9.042005118087267e-05, "loss": 0.0427, "step": 14420 }, { "epoch": 3.902109248242293, "grad_norm": 0.22708196938037872, "learning_rate": 9.040382361806448e-05, "loss": 0.0432, "step": 14430 }, { "epoch": 3.904813412655489, "grad_norm": 0.24632954597473145, "learning_rate": 9.038758378164148e-05, "loss": 0.0428, "step": 14440 }, { "epoch": 3.9075175770686856, "grad_norm": 0.19725996255874634, "learning_rate": 9.037133167653691e-05, "loss": 0.0422, "step": 14450 }, { "epoch": 3.910221741481882, "grad_norm": 0.1897476315498352, "learning_rate": 9.035506730768771e-05, "loss": 0.0434, "step": 14460 }, { "epoch": 3.9129259058950785, "grad_norm": 0.16816918551921844, "learning_rate": 9.033879068003458e-05, "loss": 0.042, "step": 14470 }, { "epoch": 3.915630070308275, "grad_norm": 0.1204901710152626, "learning_rate": 9.032250179852193e-05, "loss": 0.0457, "step": 14480 }, { "epoch": 3.918334234721471, "grad_norm": 0.1552884876728058, "learning_rate": 9.030620066809787e-05, "loss": 0.0421, "step": 14490 }, { "epoch": 3.9210383991346673, "grad_norm": 0.25708746910095215, "learning_rate": 9.028988729371428e-05, "loss": 0.0422, "step": 14500 }, { "epoch": 3.9237425635478638, "grad_norm": 0.17253535985946655, "learning_rate": 9.027356168032673e-05, "loss": 0.0423, "step": 14510 }, { "epoch": 3.9264467279610598, "grad_norm": 0.19932113587856293, "learning_rate": 9.02572238328945e-05, "loss": 0.0437, "step": 14520 }, { "epoch": 3.929150892374256, "grad_norm": 0.23330065608024597, "learning_rate": 9.02408737563806e-05, "loss": 0.0441, "step": 14530 }, { "epoch": 3.9318550567874526, "grad_norm": 0.22333213686943054, "learning_rate": 9.022451145575174e-05, "loss": 0.0437, "step": 14540 }, { "epoch": 3.934559221200649, "grad_norm": 0.17214223742485046, "learning_rate": 9.02081369359784e-05, "loss": 0.0429, "step": 14550 }, { "epoch": 3.9372633856138455, "grad_norm": 0.18931443989276886, "learning_rate": 9.019175020203465e-05, "loss": 0.0432, "step": 14560 }, { "epoch": 3.939967550027042, "grad_norm": 0.12328076362609863, "learning_rate": 9.017535125889842e-05, "loss": 0.0422, "step": 14570 }, { "epoch": 3.942671714440238, "grad_norm": 0.13864947855472565, "learning_rate": 9.015894011155124e-05, "loss": 0.046, "step": 14580 }, { "epoch": 3.9453758788534343, "grad_norm": 0.13879473507404327, "learning_rate": 9.014251676497838e-05, "loss": 0.045, "step": 14590 }, { "epoch": 3.9480800432666308, "grad_norm": 0.16625548899173737, "learning_rate": 9.012608122416884e-05, "loss": 0.0416, "step": 14600 }, { "epoch": 3.9507842076798267, "grad_norm": 0.10436610877513885, "learning_rate": 9.010963349411529e-05, "loss": 0.0432, "step": 14610 }, { "epoch": 3.953488372093023, "grad_norm": 0.11758750677108765, "learning_rate": 9.00931735798141e-05, "loss": 0.0415, "step": 14620 }, { "epoch": 3.9561925365062196, "grad_norm": 0.16573293507099152, "learning_rate": 9.00767014862654e-05, "loss": 0.044, "step": 14630 }, { "epoch": 3.958896700919416, "grad_norm": 0.17590229213237762, "learning_rate": 9.006021721847295e-05, "loss": 0.0437, "step": 14640 }, { "epoch": 3.9616008653326125, "grad_norm": 0.1762744039297104, "learning_rate": 9.004372078144423e-05, "loss": 0.0433, "step": 14650 }, { "epoch": 3.9643050297458085, "grad_norm": 0.16339926421642303, "learning_rate": 9.002721218019043e-05, "loss": 0.0424, "step": 14660 }, { "epoch": 3.967009194159005, "grad_norm": 0.12082333862781525, "learning_rate": 9.001069141972642e-05, "loss": 0.0413, "step": 14670 }, { "epoch": 3.9697133585722013, "grad_norm": 0.14983272552490234, "learning_rate": 8.99941585050708e-05, "loss": 0.0428, "step": 14680 }, { "epoch": 3.9724175229853973, "grad_norm": 0.10632647573947906, "learning_rate": 8.997761344124578e-05, "loss": 0.0419, "step": 14690 }, { "epoch": 3.9751216873985937, "grad_norm": 0.1214854046702385, "learning_rate": 8.996105623327737e-05, "loss": 0.0426, "step": 14700 }, { "epoch": 3.97782585181179, "grad_norm": 0.20160366594791412, "learning_rate": 8.994448688619517e-05, "loss": 0.0412, "step": 14710 }, { "epoch": 3.9805300162249866, "grad_norm": 0.12309478223323822, "learning_rate": 8.992790540503253e-05, "loss": 0.0412, "step": 14720 }, { "epoch": 3.983234180638183, "grad_norm": 0.17343643307685852, "learning_rate": 8.991131179482648e-05, "loss": 0.0425, "step": 14730 }, { "epoch": 3.985938345051379, "grad_norm": 0.10687506943941116, "learning_rate": 8.989470606061768e-05, "loss": 0.0438, "step": 14740 }, { "epoch": 3.9886425094645754, "grad_norm": 0.19845935702323914, "learning_rate": 8.987808820745056e-05, "loss": 0.0441, "step": 14750 }, { "epoch": 3.991346673877772, "grad_norm": 0.16287744045257568, "learning_rate": 8.986145824037315e-05, "loss": 0.0421, "step": 14760 }, { "epoch": 3.994050838290968, "grad_norm": 0.2661382853984833, "learning_rate": 8.984481616443721e-05, "loss": 0.0427, "step": 14770 }, { "epoch": 3.9967550027041643, "grad_norm": 0.2636624276638031, "learning_rate": 8.982816198469815e-05, "loss": 0.0428, "step": 14780 }, { "epoch": 3.9994591671173607, "grad_norm": 0.16969388723373413, "learning_rate": 8.98114957062151e-05, "loss": 0.0426, "step": 14790 }, { "epoch": 4.002163331530557, "grad_norm": 0.16447681188583374, "learning_rate": 8.97948173340508e-05, "loss": 0.0425, "step": 14800 }, { "epoch": 4.004867495943754, "grad_norm": 0.14992794394493103, "learning_rate": 8.977812687327172e-05, "loss": 0.0422, "step": 14810 }, { "epoch": 4.00757166035695, "grad_norm": 0.17075732350349426, "learning_rate": 8.976142432894798e-05, "loss": 0.0442, "step": 14820 }, { "epoch": 4.010275824770146, "grad_norm": 0.21440982818603516, "learning_rate": 8.974470970615336e-05, "loss": 0.0422, "step": 14830 }, { "epoch": 4.012979989183342, "grad_norm": 0.20003965497016907, "learning_rate": 8.972798300996534e-05, "loss": 0.0421, "step": 14840 }, { "epoch": 4.015684153596538, "grad_norm": 0.10121801495552063, "learning_rate": 8.971124424546504e-05, "loss": 0.043, "step": 14850 }, { "epoch": 4.018388318009735, "grad_norm": 0.13814550638198853, "learning_rate": 8.969449341773724e-05, "loss": 0.0408, "step": 14860 }, { "epoch": 4.021092482422931, "grad_norm": 0.22093512117862701, "learning_rate": 8.967773053187042e-05, "loss": 0.0408, "step": 14870 }, { "epoch": 4.023796646836128, "grad_norm": 0.3324645161628723, "learning_rate": 8.966095559295668e-05, "loss": 0.0424, "step": 14880 }, { "epoch": 4.026500811249324, "grad_norm": 0.20406076312065125, "learning_rate": 8.964416860609184e-05, "loss": 0.0418, "step": 14890 }, { "epoch": 4.029204975662521, "grad_norm": 0.16666050255298615, "learning_rate": 8.962736957637532e-05, "loss": 0.0429, "step": 14900 }, { "epoch": 4.031909140075717, "grad_norm": 0.1949862390756607, "learning_rate": 8.96105585089102e-05, "loss": 0.0435, "step": 14910 }, { "epoch": 4.0346133044889125, "grad_norm": 0.17071975767612457, "learning_rate": 8.959373540880329e-05, "loss": 0.0425, "step": 14920 }, { "epoch": 4.037317468902109, "grad_norm": 0.22319196164608002, "learning_rate": 8.957690028116495e-05, "loss": 0.0435, "step": 14930 }, { "epoch": 4.040021633315305, "grad_norm": 0.1484655737876892, "learning_rate": 8.956005313110928e-05, "loss": 0.0424, "step": 14940 }, { "epoch": 4.042725797728502, "grad_norm": 0.198496475815773, "learning_rate": 8.9543193963754e-05, "loss": 0.0428, "step": 14950 }, { "epoch": 4.045429962141698, "grad_norm": 0.19399996101856232, "learning_rate": 8.952632278422048e-05, "loss": 0.0438, "step": 14960 }, { "epoch": 4.048134126554895, "grad_norm": 0.15227803587913513, "learning_rate": 8.95094395976337e-05, "loss": 0.0439, "step": 14970 }, { "epoch": 4.050838290968091, "grad_norm": 0.14691473543643951, "learning_rate": 8.949254440912239e-05, "loss": 0.0416, "step": 14980 }, { "epoch": 4.0535424553812875, "grad_norm": 0.13352231681346893, "learning_rate": 8.94756372238188e-05, "loss": 0.0429, "step": 14990 }, { "epoch": 4.056246619794483, "grad_norm": 0.20042908191680908, "learning_rate": 8.945871804685892e-05, "loss": 0.0412, "step": 15000 }, { "epoch": 4.0589507842076795, "grad_norm": 0.1767876148223877, "learning_rate": 8.944178688338236e-05, "loss": 0.043, "step": 15010 }, { "epoch": 4.061654948620876, "grad_norm": 0.16826264560222626, "learning_rate": 8.942484373853233e-05, "loss": 0.0427, "step": 15020 }, { "epoch": 4.064359113034072, "grad_norm": 0.13688290119171143, "learning_rate": 8.940788861745572e-05, "loss": 0.0417, "step": 15030 }, { "epoch": 4.067063277447269, "grad_norm": 0.17488743364810944, "learning_rate": 8.939092152530308e-05, "loss": 0.0435, "step": 15040 }, { "epoch": 4.069767441860465, "grad_norm": 0.19749459624290466, "learning_rate": 8.937394246722853e-05, "loss": 0.045, "step": 15050 }, { "epoch": 4.072471606273662, "grad_norm": 0.19014872610569, "learning_rate": 8.935695144838984e-05, "loss": 0.0429, "step": 15060 }, { "epoch": 4.075175770686858, "grad_norm": 0.18740364909172058, "learning_rate": 8.933994847394849e-05, "loss": 0.0428, "step": 15070 }, { "epoch": 4.077879935100054, "grad_norm": 0.18209616839885712, "learning_rate": 8.932293354906949e-05, "loss": 0.0414, "step": 15080 }, { "epoch": 4.08058409951325, "grad_norm": 0.1319260150194168, "learning_rate": 8.930590667892153e-05, "loss": 0.0423, "step": 15090 }, { "epoch": 4.0832882639264465, "grad_norm": 0.14377297461032867, "learning_rate": 8.928886786867696e-05, "loss": 0.0418, "step": 15100 }, { "epoch": 4.085992428339643, "grad_norm": 0.12252823263406754, "learning_rate": 8.927181712351168e-05, "loss": 0.0411, "step": 15110 }, { "epoch": 4.088696592752839, "grad_norm": 0.2554216682910919, "learning_rate": 8.925475444860527e-05, "loss": 0.041, "step": 15120 }, { "epoch": 4.091400757166036, "grad_norm": 0.2421223223209381, "learning_rate": 8.923767984914092e-05, "loss": 0.0429, "step": 15130 }, { "epoch": 4.094104921579232, "grad_norm": 0.18068727850914001, "learning_rate": 8.922059333030545e-05, "loss": 0.0425, "step": 15140 }, { "epoch": 4.096809085992429, "grad_norm": 0.12521113455295563, "learning_rate": 8.920349489728928e-05, "loss": 0.0424, "step": 15150 }, { "epoch": 4.099513250405625, "grad_norm": 0.13206468522548676, "learning_rate": 8.918638455528646e-05, "loss": 0.0417, "step": 15160 }, { "epoch": 4.102217414818821, "grad_norm": 0.172859787940979, "learning_rate": 8.916926230949468e-05, "loss": 0.0427, "step": 15170 }, { "epoch": 4.104921579232017, "grad_norm": 0.17104457318782806, "learning_rate": 8.915212816511522e-05, "loss": 0.0421, "step": 15180 }, { "epoch": 4.1076257436452135, "grad_norm": 0.15950129926204681, "learning_rate": 8.913498212735296e-05, "loss": 0.0414, "step": 15190 }, { "epoch": 4.11032990805841, "grad_norm": 0.16962087154388428, "learning_rate": 8.911782420141643e-05, "loss": 0.0443, "step": 15200 }, { "epoch": 4.113034072471606, "grad_norm": 0.17952176928520203, "learning_rate": 8.910065439251775e-05, "loss": 0.0445, "step": 15210 }, { "epoch": 4.115738236884803, "grad_norm": 0.13930054008960724, "learning_rate": 8.908347270587268e-05, "loss": 0.0409, "step": 15220 }, { "epoch": 4.118442401297999, "grad_norm": 0.22276447713375092, "learning_rate": 8.906627914670054e-05, "loss": 0.0422, "step": 15230 }, { "epoch": 4.121146565711196, "grad_norm": 0.12707549333572388, "learning_rate": 8.904907372022427e-05, "loss": 0.0419, "step": 15240 }, { "epoch": 4.123850730124391, "grad_norm": 0.22147390246391296, "learning_rate": 8.903185643167042e-05, "loss": 0.0415, "step": 15250 }, { "epoch": 4.126554894537588, "grad_norm": 0.17487935721874237, "learning_rate": 8.901462728626919e-05, "loss": 0.0408, "step": 15260 }, { "epoch": 4.129259058950784, "grad_norm": 0.12275327742099762, "learning_rate": 8.899738628925429e-05, "loss": 0.0426, "step": 15270 }, { "epoch": 4.1319632233639805, "grad_norm": 0.15383903682231903, "learning_rate": 8.898013344586312e-05, "loss": 0.0422, "step": 15280 }, { "epoch": 4.134667387777177, "grad_norm": 0.13081592321395874, "learning_rate": 8.896286876133661e-05, "loss": 0.0411, "step": 15290 }, { "epoch": 4.137371552190373, "grad_norm": 0.17683278024196625, "learning_rate": 8.894559224091933e-05, "loss": 0.0427, "step": 15300 }, { "epoch": 4.14007571660357, "grad_norm": 0.18608617782592773, "learning_rate": 8.892830388985942e-05, "loss": 0.0414, "step": 15310 }, { "epoch": 4.142779881016766, "grad_norm": 0.24724186956882477, "learning_rate": 8.891100371340864e-05, "loss": 0.0412, "step": 15320 }, { "epoch": 4.145484045429962, "grad_norm": 0.11832154542207718, "learning_rate": 8.889369171682231e-05, "loss": 0.0403, "step": 15330 }, { "epoch": 4.148188209843158, "grad_norm": 0.19774720072746277, "learning_rate": 8.887636790535936e-05, "loss": 0.0425, "step": 15340 }, { "epoch": 4.150892374256355, "grad_norm": 0.146718367934227, "learning_rate": 8.885903228428231e-05, "loss": 0.0414, "step": 15350 }, { "epoch": 4.153596538669551, "grad_norm": 0.12523098289966583, "learning_rate": 8.884168485885727e-05, "loss": 0.0409, "step": 15360 }, { "epoch": 4.1563007030827475, "grad_norm": 0.14794215559959412, "learning_rate": 8.882432563435393e-05, "loss": 0.042, "step": 15370 }, { "epoch": 4.159004867495944, "grad_norm": 0.14270928502082825, "learning_rate": 8.880695461604556e-05, "loss": 0.0423, "step": 15380 }, { "epoch": 4.16170903190914, "grad_norm": 0.1472293585538864, "learning_rate": 8.878957180920901e-05, "loss": 0.0409, "step": 15390 }, { "epoch": 4.164413196322337, "grad_norm": 0.13760752975940704, "learning_rate": 8.877217721912473e-05, "loss": 0.0405, "step": 15400 }, { "epoch": 4.167117360735532, "grad_norm": 0.1224634200334549, "learning_rate": 8.875477085107673e-05, "loss": 0.0413, "step": 15410 }, { "epoch": 4.169821525148729, "grad_norm": 0.1937919408082962, "learning_rate": 8.87373527103526e-05, "loss": 0.042, "step": 15420 }, { "epoch": 4.172525689561925, "grad_norm": 0.16981546580791473, "learning_rate": 8.871992280224353e-05, "loss": 0.0407, "step": 15430 }, { "epoch": 4.175229853975122, "grad_norm": 0.16084600985050201, "learning_rate": 8.870248113204422e-05, "loss": 0.0413, "step": 15440 }, { "epoch": 4.177934018388318, "grad_norm": 0.2059153914451599, "learning_rate": 8.868502770505306e-05, "loss": 0.0436, "step": 15450 }, { "epoch": 4.1806381828015144, "grad_norm": 0.13422170281410217, "learning_rate": 8.86675625265719e-05, "loss": 0.041, "step": 15460 }, { "epoch": 4.183342347214711, "grad_norm": 0.14328418672084808, "learning_rate": 8.865008560190618e-05, "loss": 0.0431, "step": 15470 }, { "epoch": 4.186046511627907, "grad_norm": 0.16946078836917877, "learning_rate": 8.863259693636496e-05, "loss": 0.0422, "step": 15480 }, { "epoch": 4.188750676041103, "grad_norm": 0.15485885739326477, "learning_rate": 8.861509653526083e-05, "loss": 0.0404, "step": 15490 }, { "epoch": 4.191454840454299, "grad_norm": 0.14665700495243073, "learning_rate": 8.859758440390993e-05, "loss": 0.0438, "step": 15500 }, { "epoch": 4.194159004867496, "grad_norm": 0.13908647000789642, "learning_rate": 8.858006054763202e-05, "loss": 0.0415, "step": 15510 }, { "epoch": 4.196863169280692, "grad_norm": 0.20718251168727875, "learning_rate": 8.856252497175035e-05, "loss": 0.0424, "step": 15520 }, { "epoch": 4.199567333693889, "grad_norm": 0.17094838619232178, "learning_rate": 8.854497768159178e-05, "loss": 0.042, "step": 15530 }, { "epoch": 4.202271498107085, "grad_norm": 0.1903538703918457, "learning_rate": 8.852741868248671e-05, "loss": 0.0427, "step": 15540 }, { "epoch": 4.204975662520281, "grad_norm": 0.14236029982566833, "learning_rate": 8.85098479797691e-05, "loss": 0.0416, "step": 15550 }, { "epoch": 4.207679826933478, "grad_norm": 0.20135176181793213, "learning_rate": 8.849226557877646e-05, "loss": 0.0437, "step": 15560 }, { "epoch": 4.210383991346674, "grad_norm": 0.1646629273891449, "learning_rate": 8.84746714848499e-05, "loss": 0.0406, "step": 15570 }, { "epoch": 4.21308815575987, "grad_norm": 0.10826192051172256, "learning_rate": 8.845706570333397e-05, "loss": 0.042, "step": 15580 }, { "epoch": 4.215792320173066, "grad_norm": 0.14094266295433044, "learning_rate": 8.84394482395769e-05, "loss": 0.0417, "step": 15590 }, { "epoch": 4.218496484586263, "grad_norm": 0.2384779155254364, "learning_rate": 8.842181909893038e-05, "loss": 0.0445, "step": 15600 }, { "epoch": 4.221200648999459, "grad_norm": 0.1492748111486435, "learning_rate": 8.840417828674969e-05, "loss": 0.0411, "step": 15610 }, { "epoch": 4.223904813412656, "grad_norm": 0.13272839784622192, "learning_rate": 8.838652580839364e-05, "loss": 0.0399, "step": 15620 }, { "epoch": 4.226608977825852, "grad_norm": 0.15558837354183197, "learning_rate": 8.836886166922458e-05, "loss": 0.0412, "step": 15630 }, { "epoch": 4.229313142239048, "grad_norm": 0.17564432322978973, "learning_rate": 8.835118587460844e-05, "loss": 0.0405, "step": 15640 }, { "epoch": 4.232017306652245, "grad_norm": 0.12758515775203705, "learning_rate": 8.83334984299146e-05, "loss": 0.0413, "step": 15650 }, { "epoch": 4.23472147106544, "grad_norm": 0.21974749863147736, "learning_rate": 8.83157993405161e-05, "loss": 0.0418, "step": 15660 }, { "epoch": 4.237425635478637, "grad_norm": 0.20240168273448944, "learning_rate": 8.829808861178943e-05, "loss": 0.0431, "step": 15670 }, { "epoch": 4.240129799891833, "grad_norm": 0.1579856276512146, "learning_rate": 8.828036624911464e-05, "loss": 0.0413, "step": 15680 }, { "epoch": 4.24283396430503, "grad_norm": 0.16276758909225464, "learning_rate": 8.826263225787532e-05, "loss": 0.0425, "step": 15690 }, { "epoch": 4.245538128718226, "grad_norm": 0.24922147393226624, "learning_rate": 8.824488664345858e-05, "loss": 0.0427, "step": 15700 }, { "epoch": 4.2482422931314225, "grad_norm": 0.1730310171842575, "learning_rate": 8.822712941125508e-05, "loss": 0.0426, "step": 15710 }, { "epoch": 4.250946457544619, "grad_norm": 0.15310470759868622, "learning_rate": 8.820936056665898e-05, "loss": 0.0411, "step": 15720 }, { "epoch": 4.253650621957815, "grad_norm": 0.19150085747241974, "learning_rate": 8.819158011506801e-05, "loss": 0.0412, "step": 15730 }, { "epoch": 4.256354786371011, "grad_norm": 0.1650816947221756, "learning_rate": 8.81737880618834e-05, "loss": 0.0418, "step": 15740 }, { "epoch": 4.259058950784207, "grad_norm": 0.22254806756973267, "learning_rate": 8.815598441250987e-05, "loss": 0.0411, "step": 15750 }, { "epoch": 4.261763115197404, "grad_norm": 0.17965026199817657, "learning_rate": 8.813816917235576e-05, "loss": 0.0418, "step": 15760 }, { "epoch": 4.2644672796106, "grad_norm": 0.1357681304216385, "learning_rate": 8.812034234683282e-05, "loss": 0.0416, "step": 15770 }, { "epoch": 4.267171444023797, "grad_norm": 0.1645105630159378, "learning_rate": 8.810250394135637e-05, "loss": 0.0425, "step": 15780 }, { "epoch": 4.269875608436993, "grad_norm": 0.1907026618719101, "learning_rate": 8.808465396134529e-05, "loss": 0.0408, "step": 15790 }, { "epoch": 4.2725797728501895, "grad_norm": 0.15523703396320343, "learning_rate": 8.806679241222189e-05, "loss": 0.0409, "step": 15800 }, { "epoch": 4.275283937263386, "grad_norm": 0.14760848879814148, "learning_rate": 8.804891929941203e-05, "loss": 0.0411, "step": 15810 }, { "epoch": 4.277988101676582, "grad_norm": 0.1923319697380066, "learning_rate": 8.803103462834514e-05, "loss": 0.0409, "step": 15820 }, { "epoch": 4.280692266089778, "grad_norm": 0.19231146574020386, "learning_rate": 8.801313840445408e-05, "loss": 0.0413, "step": 15830 }, { "epoch": 4.283396430502974, "grad_norm": 0.1394631415605545, "learning_rate": 8.799523063317524e-05, "loss": 0.0407, "step": 15840 }, { "epoch": 4.286100594916171, "grad_norm": 0.22076094150543213, "learning_rate": 8.797731131994854e-05, "loss": 0.0408, "step": 15850 }, { "epoch": 4.288804759329367, "grad_norm": 0.21978111565113068, "learning_rate": 8.795938047021739e-05, "loss": 0.0399, "step": 15860 }, { "epoch": 4.291508923742564, "grad_norm": 0.1791050136089325, "learning_rate": 8.794143808942872e-05, "loss": 0.0427, "step": 15870 }, { "epoch": 4.29421308815576, "grad_norm": 0.16014230251312256, "learning_rate": 8.792348418303296e-05, "loss": 0.0417, "step": 15880 }, { "epoch": 4.2969172525689565, "grad_norm": 0.17305737733840942, "learning_rate": 8.790551875648398e-05, "loss": 0.0421, "step": 15890 }, { "epoch": 4.299621416982152, "grad_norm": 0.17347510159015656, "learning_rate": 8.788754181523926e-05, "loss": 0.0419, "step": 15900 }, { "epoch": 4.3023255813953485, "grad_norm": 0.16744692623615265, "learning_rate": 8.78695533647597e-05, "loss": 0.0424, "step": 15910 }, { "epoch": 4.305029745808545, "grad_norm": 0.14041386544704437, "learning_rate": 8.785155341050972e-05, "loss": 0.0409, "step": 15920 }, { "epoch": 4.307733910221741, "grad_norm": 0.1364365667104721, "learning_rate": 8.783354195795721e-05, "loss": 0.0431, "step": 15930 }, { "epoch": 4.310438074634938, "grad_norm": 0.13666820526123047, "learning_rate": 8.78155190125736e-05, "loss": 0.0418, "step": 15940 }, { "epoch": 4.313142239048134, "grad_norm": 0.2108476310968399, "learning_rate": 8.779748457983378e-05, "loss": 0.0439, "step": 15950 }, { "epoch": 4.315846403461331, "grad_norm": 0.17467351257801056, "learning_rate": 8.777943866521612e-05, "loss": 0.0396, "step": 15960 }, { "epoch": 4.318550567874527, "grad_norm": 0.12035959213972092, "learning_rate": 8.77613812742025e-05, "loss": 0.0405, "step": 15970 }, { "epoch": 4.3212547322877235, "grad_norm": 0.1318381130695343, "learning_rate": 8.774331241227829e-05, "loss": 0.0411, "step": 15980 }, { "epoch": 4.323958896700919, "grad_norm": 0.15879254043102264, "learning_rate": 8.772523208493232e-05, "loss": 0.0424, "step": 15990 }, { "epoch": 4.3266630611141155, "grad_norm": 0.14027191698551178, "learning_rate": 8.770714029765692e-05, "loss": 0.0403, "step": 16000 }, { "epoch": 4.329367225527312, "grad_norm": 0.10797111690044403, "learning_rate": 8.768903705594789e-05, "loss": 0.041, "step": 16010 }, { "epoch": 4.332071389940508, "grad_norm": 0.1613101363182068, "learning_rate": 8.767092236530453e-05, "loss": 0.0425, "step": 16020 }, { "epoch": 4.334775554353705, "grad_norm": 0.14487043023109436, "learning_rate": 8.76527962312296e-05, "loss": 0.0418, "step": 16030 }, { "epoch": 4.337479718766901, "grad_norm": 0.1558711975812912, "learning_rate": 8.763465865922934e-05, "loss": 0.0398, "step": 16040 }, { "epoch": 4.340183883180098, "grad_norm": 0.1386823058128357, "learning_rate": 8.761650965481347e-05, "loss": 0.0422, "step": 16050 }, { "epoch": 4.342888047593294, "grad_norm": 0.1698409467935562, "learning_rate": 8.759834922349516e-05, "loss": 0.043, "step": 16060 }, { "epoch": 4.34559221200649, "grad_norm": 0.22715970873832703, "learning_rate": 8.758017737079108e-05, "loss": 0.0415, "step": 16070 }, { "epoch": 4.348296376419686, "grad_norm": 0.12163522094488144, "learning_rate": 8.756199410222137e-05, "loss": 0.0406, "step": 16080 }, { "epoch": 4.3510005408328825, "grad_norm": 0.16618169844150543, "learning_rate": 8.754379942330963e-05, "loss": 0.0412, "step": 16090 }, { "epoch": 4.353704705246079, "grad_norm": 0.21773777902126312, "learning_rate": 8.75255933395829e-05, "loss": 0.0415, "step": 16100 }, { "epoch": 4.356408869659275, "grad_norm": 0.15664711594581604, "learning_rate": 8.750737585657171e-05, "loss": 0.0411, "step": 16110 }, { "epoch": 4.359113034072472, "grad_norm": 0.1300199180841446, "learning_rate": 8.748914697981008e-05, "loss": 0.0409, "step": 16120 }, { "epoch": 4.361817198485668, "grad_norm": 0.13234849274158478, "learning_rate": 8.747090671483542e-05, "loss": 0.0404, "step": 16130 }, { "epoch": 4.364521362898865, "grad_norm": 0.21875004470348358, "learning_rate": 8.745265506718869e-05, "loss": 0.0408, "step": 16140 }, { "epoch": 4.36722552731206, "grad_norm": 0.17478713393211365, "learning_rate": 8.74343920424142e-05, "loss": 0.0428, "step": 16150 }, { "epoch": 4.369929691725257, "grad_norm": 0.13234567642211914, "learning_rate": 8.741611764605982e-05, "loss": 0.042, "step": 16160 }, { "epoch": 4.372633856138453, "grad_norm": 0.16182982921600342, "learning_rate": 8.739783188367682e-05, "loss": 0.0409, "step": 16170 }, { "epoch": 4.375338020551649, "grad_norm": 0.15722672641277313, "learning_rate": 8.737953476081991e-05, "loss": 0.0416, "step": 16180 }, { "epoch": 4.378042184964846, "grad_norm": 0.18752166628837585, "learning_rate": 8.73612262830473e-05, "loss": 0.0414, "step": 16190 }, { "epoch": 4.380746349378042, "grad_norm": 0.21112501621246338, "learning_rate": 8.734290645592061e-05, "loss": 0.0413, "step": 16200 }, { "epoch": 4.383450513791239, "grad_norm": 0.1769542396068573, "learning_rate": 8.732457528500493e-05, "loss": 0.0432, "step": 16210 }, { "epoch": 4.386154678204435, "grad_norm": 0.18709266185760498, "learning_rate": 8.730623277586875e-05, "loss": 0.0413, "step": 16220 }, { "epoch": 4.388858842617632, "grad_norm": 0.1892770677804947, "learning_rate": 8.72878789340841e-05, "loss": 0.0407, "step": 16230 }, { "epoch": 4.391563007030827, "grad_norm": 0.22068588435649872, "learning_rate": 8.726951376522635e-05, "loss": 0.0414, "step": 16240 }, { "epoch": 4.394267171444024, "grad_norm": 0.13508574664592743, "learning_rate": 8.725113727487435e-05, "loss": 0.0427, "step": 16250 }, { "epoch": 4.39697133585722, "grad_norm": 0.12698706984519958, "learning_rate": 8.723274946861042e-05, "loss": 0.0416, "step": 16260 }, { "epoch": 4.399675500270416, "grad_norm": 0.16046160459518433, "learning_rate": 8.721435035202026e-05, "loss": 0.0411, "step": 16270 }, { "epoch": 4.402379664683613, "grad_norm": 0.16429151594638824, "learning_rate": 8.719593993069306e-05, "loss": 0.0412, "step": 16280 }, { "epoch": 4.405083829096809, "grad_norm": 0.20161370933055878, "learning_rate": 8.717751821022139e-05, "loss": 0.0428, "step": 16290 }, { "epoch": 4.407787993510006, "grad_norm": 0.13435675203800201, "learning_rate": 8.715908519620134e-05, "loss": 0.0396, "step": 16300 }, { "epoch": 4.410492157923201, "grad_norm": 0.25239598751068115, "learning_rate": 8.71406408942323e-05, "loss": 0.0409, "step": 16310 }, { "epoch": 4.413196322336398, "grad_norm": 0.21257859468460083, "learning_rate": 8.712218530991723e-05, "loss": 0.0414, "step": 16320 }, { "epoch": 4.415900486749594, "grad_norm": 0.13298679888248444, "learning_rate": 8.710371844886241e-05, "loss": 0.0417, "step": 16330 }, { "epoch": 4.4186046511627906, "grad_norm": 0.20809456706047058, "learning_rate": 8.708524031667758e-05, "loss": 0.0414, "step": 16340 }, { "epoch": 4.421308815575987, "grad_norm": 0.16817578673362732, "learning_rate": 8.706675091897592e-05, "loss": 0.041, "step": 16350 }, { "epoch": 4.424012979989183, "grad_norm": 0.18862946331501007, "learning_rate": 8.704825026137404e-05, "loss": 0.0409, "step": 16360 }, { "epoch": 4.42671714440238, "grad_norm": 0.1534801870584488, "learning_rate": 8.702973834949192e-05, "loss": 0.0409, "step": 16370 }, { "epoch": 4.429421308815576, "grad_norm": 0.23815609514713287, "learning_rate": 8.701121518895301e-05, "loss": 0.0414, "step": 16380 }, { "epoch": 4.432125473228773, "grad_norm": 0.1964552402496338, "learning_rate": 8.699268078538414e-05, "loss": 0.0421, "step": 16390 }, { "epoch": 4.434829637641968, "grad_norm": 0.12690742313861847, "learning_rate": 8.69741351444156e-05, "loss": 0.0405, "step": 16400 }, { "epoch": 4.437533802055165, "grad_norm": 0.22159568965435028, "learning_rate": 8.695557827168101e-05, "loss": 0.0406, "step": 16410 }, { "epoch": 4.440237966468361, "grad_norm": 0.2541653513908386, "learning_rate": 8.693701017281753e-05, "loss": 0.0403, "step": 16420 }, { "epoch": 4.4429421308815575, "grad_norm": 0.15370798110961914, "learning_rate": 8.691843085346563e-05, "loss": 0.0411, "step": 16430 }, { "epoch": 4.445646295294754, "grad_norm": 0.20855116844177246, "learning_rate": 8.689984031926919e-05, "loss": 0.0421, "step": 16440 }, { "epoch": 4.44835045970795, "grad_norm": 0.14136317372322083, "learning_rate": 8.688123857587555e-05, "loss": 0.0422, "step": 16450 }, { "epoch": 4.451054624121147, "grad_norm": 0.15573912858963013, "learning_rate": 8.686262562893544e-05, "loss": 0.042, "step": 16460 }, { "epoch": 4.453758788534343, "grad_norm": 0.18775756657123566, "learning_rate": 8.684400148410294e-05, "loss": 0.041, "step": 16470 }, { "epoch": 4.456462952947539, "grad_norm": 0.2070770114660263, "learning_rate": 8.682536614703562e-05, "loss": 0.0415, "step": 16480 }, { "epoch": 4.459167117360735, "grad_norm": 0.1429271548986435, "learning_rate": 8.680671962339437e-05, "loss": 0.0402, "step": 16490 }, { "epoch": 4.461871281773932, "grad_norm": 0.21253642439842224, "learning_rate": 8.678806191884352e-05, "loss": 0.041, "step": 16500 }, { "epoch": 4.464575446187128, "grad_norm": 0.15350182354450226, "learning_rate": 8.67693930390508e-05, "loss": 0.0402, "step": 16510 }, { "epoch": 4.4672796106003245, "grad_norm": 0.11699886620044708, "learning_rate": 8.67507129896873e-05, "loss": 0.04, "step": 16520 }, { "epoch": 4.469983775013521, "grad_norm": 0.12335149943828583, "learning_rate": 8.673202177642757e-05, "loss": 0.0407, "step": 16530 }, { "epoch": 4.472687939426717, "grad_norm": 0.17046542465686798, "learning_rate": 8.671331940494945e-05, "loss": 0.0403, "step": 16540 }, { "epoch": 4.475392103839914, "grad_norm": 0.1536651849746704, "learning_rate": 8.669460588093427e-05, "loss": 0.0417, "step": 16550 }, { "epoch": 4.478096268253109, "grad_norm": 0.2471996694803238, "learning_rate": 8.667588121006667e-05, "loss": 0.0422, "step": 16560 }, { "epoch": 4.480800432666306, "grad_norm": 0.12481634318828583, "learning_rate": 8.665714539803475e-05, "loss": 0.04, "step": 16570 }, { "epoch": 4.483504597079502, "grad_norm": 0.14239521324634552, "learning_rate": 8.663839845052993e-05, "loss": 0.041, "step": 16580 }, { "epoch": 4.486208761492699, "grad_norm": 0.226005420088768, "learning_rate": 8.661964037324703e-05, "loss": 0.041, "step": 16590 }, { "epoch": 4.488912925905895, "grad_norm": 0.24976325035095215, "learning_rate": 8.660087117188427e-05, "loss": 0.0409, "step": 16600 }, { "epoch": 4.4916170903190915, "grad_norm": 0.16045594215393066, "learning_rate": 8.658209085214325e-05, "loss": 0.0403, "step": 16610 }, { "epoch": 4.494321254732288, "grad_norm": 0.26183393597602844, "learning_rate": 8.656329941972891e-05, "loss": 0.0429, "step": 16620 }, { "epoch": 4.497025419145484, "grad_norm": 0.1691884547472, "learning_rate": 8.654449688034963e-05, "loss": 0.0421, "step": 16630 }, { "epoch": 4.499729583558681, "grad_norm": 0.15335430204868317, "learning_rate": 8.652568323971706e-05, "loss": 0.0422, "step": 16640 }, { "epoch": 4.502433747971876, "grad_norm": 0.1616811603307724, "learning_rate": 8.650685850354636e-05, "loss": 0.0412, "step": 16650 }, { "epoch": 4.505137912385073, "grad_norm": 0.17482630908489227, "learning_rate": 8.648802267755593e-05, "loss": 0.041, "step": 16660 }, { "epoch": 4.507842076798269, "grad_norm": 0.138427272439003, "learning_rate": 8.646917576746764e-05, "loss": 0.0416, "step": 16670 }, { "epoch": 4.510546241211466, "grad_norm": 0.17484620213508606, "learning_rate": 8.645031777900666e-05, "loss": 0.0423, "step": 16680 }, { "epoch": 4.513250405624662, "grad_norm": 0.15755021572113037, "learning_rate": 8.643144871790154e-05, "loss": 0.0403, "step": 16690 }, { "epoch": 4.5159545700378585, "grad_norm": 0.21761652827262878, "learning_rate": 8.641256858988424e-05, "loss": 0.0409, "step": 16700 }, { "epoch": 4.518658734451055, "grad_norm": 0.2178589403629303, "learning_rate": 8.639367740069e-05, "loss": 0.0411, "step": 16710 }, { "epoch": 4.5213628988642505, "grad_norm": 0.175583153963089, "learning_rate": 8.63747751560575e-05, "loss": 0.0418, "step": 16720 }, { "epoch": 4.524067063277447, "grad_norm": 0.12172047793865204, "learning_rate": 8.635586186172871e-05, "loss": 0.0408, "step": 16730 }, { "epoch": 4.526771227690643, "grad_norm": 0.14705535769462585, "learning_rate": 8.633693752344902e-05, "loss": 0.0402, "step": 16740 }, { "epoch": 4.52947539210384, "grad_norm": 0.15105651319026947, "learning_rate": 8.631800214696713e-05, "loss": 0.0408, "step": 16750 }, { "epoch": 4.532179556517036, "grad_norm": 0.1553635150194168, "learning_rate": 8.629905573803511e-05, "loss": 0.0401, "step": 16760 }, { "epoch": 4.534883720930233, "grad_norm": 0.18894800543785095, "learning_rate": 8.628009830240839e-05, "loss": 0.04, "step": 16770 }, { "epoch": 4.537587885343429, "grad_norm": 0.23707862198352814, "learning_rate": 8.626112984584571e-05, "loss": 0.0413, "step": 16780 }, { "epoch": 4.5402920497566255, "grad_norm": 0.20670491456985474, "learning_rate": 8.62421503741092e-05, "loss": 0.0408, "step": 16790 }, { "epoch": 4.542996214169822, "grad_norm": 0.25247612595558167, "learning_rate": 8.622315989296432e-05, "loss": 0.0406, "step": 16800 }, { "epoch": 4.5457003785830175, "grad_norm": 0.12852945923805237, "learning_rate": 8.62041584081799e-05, "loss": 0.0413, "step": 16810 }, { "epoch": 4.548404542996214, "grad_norm": 0.17437058687210083, "learning_rate": 8.618514592552807e-05, "loss": 0.041, "step": 16820 }, { "epoch": 4.55110870740941, "grad_norm": 0.19244347512722015, "learning_rate": 8.616612245078431e-05, "loss": 0.0415, "step": 16830 }, { "epoch": 4.553812871822607, "grad_norm": 0.14991173148155212, "learning_rate": 8.614708798972746e-05, "loss": 0.0396, "step": 16840 }, { "epoch": 4.556517036235803, "grad_norm": 0.1164301186800003, "learning_rate": 8.61280425481397e-05, "loss": 0.0425, "step": 16850 }, { "epoch": 4.559221200649, "grad_norm": 0.18623602390289307, "learning_rate": 8.61089861318065e-05, "loss": 0.0408, "step": 16860 }, { "epoch": 4.561925365062196, "grad_norm": 0.14860829710960388, "learning_rate": 8.608991874651673e-05, "loss": 0.0401, "step": 16870 }, { "epoch": 4.5646295294753925, "grad_norm": 0.14008766412734985, "learning_rate": 8.607084039806255e-05, "loss": 0.0399, "step": 16880 }, { "epoch": 4.567333693888589, "grad_norm": 0.15294605493545532, "learning_rate": 8.605175109223944e-05, "loss": 0.0406, "step": 16890 }, { "epoch": 4.570037858301784, "grad_norm": 0.1426944136619568, "learning_rate": 8.603265083484624e-05, "loss": 0.0405, "step": 16900 }, { "epoch": 4.572742022714981, "grad_norm": 0.1984037458896637, "learning_rate": 8.60135396316851e-05, "loss": 0.0398, "step": 16910 }, { "epoch": 4.575446187128177, "grad_norm": 0.22241497039794922, "learning_rate": 8.599441748856152e-05, "loss": 0.0414, "step": 16920 }, { "epoch": 4.578150351541374, "grad_norm": 0.14659236371517181, "learning_rate": 8.597528441128427e-05, "loss": 0.0403, "step": 16930 }, { "epoch": 4.58085451595457, "grad_norm": 0.1390591561794281, "learning_rate": 8.595614040566549e-05, "loss": 0.0402, "step": 16940 }, { "epoch": 4.583558680367767, "grad_norm": 0.11083206534385681, "learning_rate": 8.593698547752063e-05, "loss": 0.0427, "step": 16950 }, { "epoch": 4.586262844780963, "grad_norm": 0.13852843642234802, "learning_rate": 8.591781963266843e-05, "loss": 0.0394, "step": 16960 }, { "epoch": 4.588967009194159, "grad_norm": 0.17208434641361237, "learning_rate": 8.5898642876931e-05, "loss": 0.0418, "step": 16970 }, { "epoch": 4.591671173607355, "grad_norm": 0.17580698430538177, "learning_rate": 8.587945521613369e-05, "loss": 0.041, "step": 16980 }, { "epoch": 4.594375338020551, "grad_norm": 0.15508918464183807, "learning_rate": 8.586025665610524e-05, "loss": 0.0403, "step": 16990 }, { "epoch": 4.597079502433748, "grad_norm": 0.15430262684822083, "learning_rate": 8.584104720267765e-05, "loss": 0.0395, "step": 17000 }, { "epoch": 4.599783666846944, "grad_norm": 0.19012583792209625, "learning_rate": 8.582182686168625e-05, "loss": 0.0404, "step": 17010 }, { "epoch": 4.602487831260141, "grad_norm": 0.13821464776992798, "learning_rate": 8.580259563896967e-05, "loss": 0.0423, "step": 17020 }, { "epoch": 4.605191995673337, "grad_norm": 0.13277281820774078, "learning_rate": 8.578335354036983e-05, "loss": 0.0411, "step": 17030 }, { "epoch": 4.607896160086534, "grad_norm": 0.1710357367992401, "learning_rate": 8.576410057173201e-05, "loss": 0.0392, "step": 17040 }, { "epoch": 4.61060032449973, "grad_norm": 0.09276096522808075, "learning_rate": 8.574483673890474e-05, "loss": 0.0417, "step": 17050 }, { "epoch": 4.6133044889129255, "grad_norm": 0.14372535049915314, "learning_rate": 8.572556204773983e-05, "loss": 0.0408, "step": 17060 }, { "epoch": 4.616008653326122, "grad_norm": 0.17414487898349762, "learning_rate": 8.570627650409246e-05, "loss": 0.0414, "step": 17070 }, { "epoch": 4.618712817739318, "grad_norm": 0.16027502715587616, "learning_rate": 8.568698011382107e-05, "loss": 0.0408, "step": 17080 }, { "epoch": 4.621416982152515, "grad_norm": 0.2065284699201584, "learning_rate": 8.566767288278738e-05, "loss": 0.0395, "step": 17090 }, { "epoch": 4.624121146565711, "grad_norm": 0.12331859767436981, "learning_rate": 8.56483548168564e-05, "loss": 0.0417, "step": 17100 }, { "epoch": 4.626825310978908, "grad_norm": 0.19777749478816986, "learning_rate": 8.562902592189648e-05, "loss": 0.041, "step": 17110 }, { "epoch": 4.629529475392104, "grad_norm": 0.20553173124790192, "learning_rate": 8.560968620377921e-05, "loss": 0.04, "step": 17120 }, { "epoch": 4.6322336398053, "grad_norm": 0.12390065938234329, "learning_rate": 8.559033566837951e-05, "loss": 0.0397, "step": 17130 }, { "epoch": 4.634937804218496, "grad_norm": 0.13856498897075653, "learning_rate": 8.557097432157551e-05, "loss": 0.0419, "step": 17140 }, { "epoch": 4.6376419686316925, "grad_norm": 0.13661350309848785, "learning_rate": 8.555160216924872e-05, "loss": 0.0414, "step": 17150 }, { "epoch": 4.640346133044889, "grad_norm": 0.19935500621795654, "learning_rate": 8.55322192172839e-05, "loss": 0.0435, "step": 17160 }, { "epoch": 4.643050297458085, "grad_norm": 0.14128701388835907, "learning_rate": 8.551282547156902e-05, "loss": 0.0406, "step": 17170 }, { "epoch": 4.645754461871282, "grad_norm": 0.2524544298648834, "learning_rate": 8.549342093799544e-05, "loss": 0.0428, "step": 17180 }, { "epoch": 4.648458626284478, "grad_norm": 0.18222442269325256, "learning_rate": 8.547400562245773e-05, "loss": 0.0399, "step": 17190 }, { "epoch": 4.651162790697675, "grad_norm": 0.11607924848794937, "learning_rate": 8.545457953085374e-05, "loss": 0.0416, "step": 17200 }, { "epoch": 4.653866955110871, "grad_norm": 0.2111450433731079, "learning_rate": 8.543514266908463e-05, "loss": 0.0418, "step": 17210 }, { "epoch": 4.656571119524067, "grad_norm": 0.19057118892669678, "learning_rate": 8.541569504305478e-05, "loss": 0.0397, "step": 17220 }, { "epoch": 4.659275283937263, "grad_norm": 0.22497209906578064, "learning_rate": 8.539623665867187e-05, "loss": 0.0396, "step": 17230 }, { "epoch": 4.6619794483504595, "grad_norm": 0.13782605528831482, "learning_rate": 8.537676752184685e-05, "loss": 0.0407, "step": 17240 }, { "epoch": 4.664683612763656, "grad_norm": 0.13532686233520508, "learning_rate": 8.53572876384939e-05, "loss": 0.0404, "step": 17250 }, { "epoch": 4.667387777176852, "grad_norm": 0.12351974844932556, "learning_rate": 8.533779701453056e-05, "loss": 0.0393, "step": 17260 }, { "epoch": 4.670091941590049, "grad_norm": 0.11320328712463379, "learning_rate": 8.53182956558775e-05, "loss": 0.0408, "step": 17270 }, { "epoch": 4.672796106003245, "grad_norm": 0.1398524045944214, "learning_rate": 8.529878356845877e-05, "loss": 0.0409, "step": 17280 }, { "epoch": 4.675500270416442, "grad_norm": 0.13972650468349457, "learning_rate": 8.527926075820158e-05, "loss": 0.0399, "step": 17290 }, { "epoch": 4.678204434829638, "grad_norm": 0.16158974170684814, "learning_rate": 8.525972723103648e-05, "loss": 0.042, "step": 17300 }, { "epoch": 4.680908599242834, "grad_norm": 0.1554943025112152, "learning_rate": 8.524018299289722e-05, "loss": 0.0396, "step": 17310 }, { "epoch": 4.68361276365603, "grad_norm": 0.14723233878612518, "learning_rate": 8.522062804972083e-05, "loss": 0.0398, "step": 17320 }, { "epoch": 4.6863169280692265, "grad_norm": 0.14546191692352295, "learning_rate": 8.520106240744759e-05, "loss": 0.0403, "step": 17330 }, { "epoch": 4.689021092482423, "grad_norm": 0.2639559805393219, "learning_rate": 8.518148607202102e-05, "loss": 0.04, "step": 17340 }, { "epoch": 4.691725256895619, "grad_norm": 0.14699451625347137, "learning_rate": 8.51618990493879e-05, "loss": 0.0402, "step": 17350 }, { "epoch": 4.694429421308816, "grad_norm": 0.1381818801164627, "learning_rate": 8.514230134549823e-05, "loss": 0.0389, "step": 17360 }, { "epoch": 4.697133585722012, "grad_norm": 0.1173916757106781, "learning_rate": 8.51226929663053e-05, "loss": 0.0401, "step": 17370 }, { "epoch": 4.699837750135208, "grad_norm": 0.1523219794034958, "learning_rate": 8.51030739177656e-05, "loss": 0.0424, "step": 17380 }, { "epoch": 4.702541914548404, "grad_norm": 0.18548484146595, "learning_rate": 8.508344420583889e-05, "loss": 0.0404, "step": 17390 }, { "epoch": 4.705246078961601, "grad_norm": 0.18918778002262115, "learning_rate": 8.506380383648816e-05, "loss": 0.0396, "step": 17400 }, { "epoch": 4.707950243374797, "grad_norm": 0.19834890961647034, "learning_rate": 8.504415281567963e-05, "loss": 0.0421, "step": 17410 }, { "epoch": 4.7106544077879935, "grad_norm": 0.14094913005828857, "learning_rate": 8.502449114938275e-05, "loss": 0.0398, "step": 17420 }, { "epoch": 4.71335857220119, "grad_norm": 0.17316734790802002, "learning_rate": 8.500481884357025e-05, "loss": 0.0401, "step": 17430 }, { "epoch": 4.716062736614386, "grad_norm": 0.19184304773807526, "learning_rate": 8.498513590421801e-05, "loss": 0.0388, "step": 17440 }, { "epoch": 4.718766901027583, "grad_norm": 0.18290841579437256, "learning_rate": 8.496544233730522e-05, "loss": 0.0406, "step": 17450 }, { "epoch": 4.721471065440779, "grad_norm": 0.20041099190711975, "learning_rate": 8.494573814881426e-05, "loss": 0.0404, "step": 17460 }, { "epoch": 4.724175229853975, "grad_norm": 0.13110916316509247, "learning_rate": 8.492602334473074e-05, "loss": 0.0403, "step": 17470 }, { "epoch": 4.726879394267171, "grad_norm": 0.13074159622192383, "learning_rate": 8.49062979310435e-05, "loss": 0.0402, "step": 17480 }, { "epoch": 4.729583558680368, "grad_norm": 0.14534829556941986, "learning_rate": 8.488656191374458e-05, "loss": 0.0412, "step": 17490 }, { "epoch": 4.732287723093564, "grad_norm": 0.15308509767055511, "learning_rate": 8.48668152988293e-05, "loss": 0.0407, "step": 17500 }, { "epoch": 4.7349918875067605, "grad_norm": 0.1989075392484665, "learning_rate": 8.484705809229612e-05, "loss": 0.0417, "step": 17510 }, { "epoch": 4.737696051919957, "grad_norm": 0.20105858147144318, "learning_rate": 8.482729030014677e-05, "loss": 0.0409, "step": 17520 }, { "epoch": 4.740400216333153, "grad_norm": 0.15163160860538483, "learning_rate": 8.48075119283862e-05, "loss": 0.0403, "step": 17530 }, { "epoch": 4.743104380746349, "grad_norm": 0.19111822545528412, "learning_rate": 8.478772298302254e-05, "loss": 0.0408, "step": 17540 }, { "epoch": 4.745808545159546, "grad_norm": 0.2726028263568878, "learning_rate": 8.476792347006716e-05, "loss": 0.0402, "step": 17550 }, { "epoch": 4.748512709572742, "grad_norm": 0.16361579298973083, "learning_rate": 8.474811339553462e-05, "loss": 0.0403, "step": 17560 }, { "epoch": 4.751216873985938, "grad_norm": 0.19360339641571045, "learning_rate": 8.47282927654427e-05, "loss": 0.0401, "step": 17570 }, { "epoch": 4.753921038399135, "grad_norm": 0.1672658920288086, "learning_rate": 8.470846158581238e-05, "loss": 0.041, "step": 17580 }, { "epoch": 4.756625202812331, "grad_norm": 0.19307072460651398, "learning_rate": 8.468861986266787e-05, "loss": 0.0402, "step": 17590 }, { "epoch": 4.7593293672255275, "grad_norm": 0.17629075050354004, "learning_rate": 8.466876760203654e-05, "loss": 0.0395, "step": 17600 }, { "epoch": 4.762033531638724, "grad_norm": 0.1634126901626587, "learning_rate": 8.464890480994898e-05, "loss": 0.0411, "step": 17610 }, { "epoch": 4.76473769605192, "grad_norm": 0.1844424605369568, "learning_rate": 8.462903149243899e-05, "loss": 0.0421, "step": 17620 }, { "epoch": 4.767441860465116, "grad_norm": 0.14927133917808533, "learning_rate": 8.460914765554357e-05, "loss": 0.0399, "step": 17630 }, { "epoch": 4.770146024878312, "grad_norm": 0.16001592576503754, "learning_rate": 8.458925330530288e-05, "loss": 0.0399, "step": 17640 }, { "epoch": 4.772850189291509, "grad_norm": 0.18920011818408966, "learning_rate": 8.456934844776032e-05, "loss": 0.0397, "step": 17650 }, { "epoch": 4.775554353704705, "grad_norm": 0.140511155128479, "learning_rate": 8.454943308896246e-05, "loss": 0.0396, "step": 17660 }, { "epoch": 4.778258518117902, "grad_norm": 0.2410658597946167, "learning_rate": 8.452950723495905e-05, "loss": 0.0392, "step": 17670 }, { "epoch": 4.780962682531098, "grad_norm": 0.21152964234352112, "learning_rate": 8.450957089180303e-05, "loss": 0.0409, "step": 17680 }, { "epoch": 4.7836668469442944, "grad_norm": 0.18571610748767853, "learning_rate": 8.448962406555055e-05, "loss": 0.0408, "step": 17690 }, { "epoch": 4.786371011357491, "grad_norm": 0.221204936504364, "learning_rate": 8.446966676226093e-05, "loss": 0.0413, "step": 17700 }, { "epoch": 4.789075175770687, "grad_norm": 0.16823004186153412, "learning_rate": 8.444969898799667e-05, "loss": 0.0399, "step": 17710 }, { "epoch": 4.791779340183883, "grad_norm": 0.12777364253997803, "learning_rate": 8.442972074882343e-05, "loss": 0.0411, "step": 17720 }, { "epoch": 4.794483504597079, "grad_norm": 0.16651634871959686, "learning_rate": 8.44097320508101e-05, "loss": 0.0401, "step": 17730 }, { "epoch": 4.797187669010276, "grad_norm": 0.13656185567378998, "learning_rate": 8.43897329000287e-05, "loss": 0.039, "step": 17740 }, { "epoch": 4.799891833423472, "grad_norm": 0.16942189633846283, "learning_rate": 8.436972330255448e-05, "loss": 0.0414, "step": 17750 }, { "epoch": 4.802595997836669, "grad_norm": 0.3046013414859772, "learning_rate": 8.434970326446579e-05, "loss": 0.0409, "step": 17760 }, { "epoch": 4.805300162249865, "grad_norm": 0.16374708712100983, "learning_rate": 8.432967279184418e-05, "loss": 0.0396, "step": 17770 }, { "epoch": 4.808004326663061, "grad_norm": 0.16964232921600342, "learning_rate": 8.430963189077441e-05, "loss": 0.0409, "step": 17780 }, { "epoch": 4.810708491076257, "grad_norm": 0.18116764724254608, "learning_rate": 8.428958056734437e-05, "loss": 0.0398, "step": 17790 }, { "epoch": 4.813412655489453, "grad_norm": 0.1307428777217865, "learning_rate": 8.426951882764513e-05, "loss": 0.0391, "step": 17800 }, { "epoch": 4.81611681990265, "grad_norm": 0.16614298522472382, "learning_rate": 8.424944667777089e-05, "loss": 0.04, "step": 17810 }, { "epoch": 4.818820984315846, "grad_norm": 0.14333827793598175, "learning_rate": 8.422936412381905e-05, "loss": 0.0395, "step": 17820 }, { "epoch": 4.821525148729043, "grad_norm": 0.12909337878227234, "learning_rate": 8.420927117189017e-05, "loss": 0.0393, "step": 17830 }, { "epoch": 4.824229313142239, "grad_norm": 0.09605981409549713, "learning_rate": 8.418916782808795e-05, "loss": 0.0399, "step": 17840 }, { "epoch": 4.826933477555436, "grad_norm": 0.13672514259815216, "learning_rate": 8.416905409851926e-05, "loss": 0.0399, "step": 17850 }, { "epoch": 4.829637641968632, "grad_norm": 0.17300741374492645, "learning_rate": 8.41489299892941e-05, "loss": 0.0399, "step": 17860 }, { "epoch": 4.832341806381828, "grad_norm": 0.18705004453659058, "learning_rate": 8.412879550652566e-05, "loss": 0.04, "step": 17870 }, { "epoch": 4.835045970795024, "grad_norm": 0.2225879430770874, "learning_rate": 8.410865065633029e-05, "loss": 0.0398, "step": 17880 }, { "epoch": 4.83775013520822, "grad_norm": 0.16713455319404602, "learning_rate": 8.408849544482742e-05, "loss": 0.0395, "step": 17890 }, { "epoch": 4.840454299621417, "grad_norm": 0.2325199693441391, "learning_rate": 8.406832987813968e-05, "loss": 0.0403, "step": 17900 }, { "epoch": 4.843158464034613, "grad_norm": 0.1406680941581726, "learning_rate": 8.404815396239286e-05, "loss": 0.0399, "step": 17910 }, { "epoch": 4.84586262844781, "grad_norm": 0.17452369630336761, "learning_rate": 8.402796770371587e-05, "loss": 0.0398, "step": 17920 }, { "epoch": 4.848566792861006, "grad_norm": 0.18749387562274933, "learning_rate": 8.400777110824071e-05, "loss": 0.0397, "step": 17930 }, { "epoch": 4.8512709572742025, "grad_norm": 0.1657656878232956, "learning_rate": 8.398756418210263e-05, "loss": 0.0396, "step": 17940 }, { "epoch": 4.853975121687399, "grad_norm": 0.16234822571277618, "learning_rate": 8.396734693143993e-05, "loss": 0.0408, "step": 17950 }, { "epoch": 4.856679286100595, "grad_norm": 0.1562160849571228, "learning_rate": 8.39471193623941e-05, "loss": 0.0405, "step": 17960 }, { "epoch": 4.859383450513791, "grad_norm": 0.15501295030117035, "learning_rate": 8.392688148110974e-05, "loss": 0.0422, "step": 17970 }, { "epoch": 4.862087614926987, "grad_norm": 0.15027853846549988, "learning_rate": 8.390663329373456e-05, "loss": 0.04, "step": 17980 }, { "epoch": 4.864791779340184, "grad_norm": 0.20623727142810822, "learning_rate": 8.388637480641944e-05, "loss": 0.0413, "step": 17990 }, { "epoch": 4.86749594375338, "grad_norm": 0.18859820067882538, "learning_rate": 8.386610602531837e-05, "loss": 0.0409, "step": 18000 }, { "epoch": 4.870200108166577, "grad_norm": 0.15466764569282532, "learning_rate": 8.384582695658847e-05, "loss": 0.039, "step": 18010 }, { "epoch": 4.872904272579773, "grad_norm": 0.1537114530801773, "learning_rate": 8.382553760638999e-05, "loss": 0.0416, "step": 18020 }, { "epoch": 4.8756084369929695, "grad_norm": 0.1905810385942459, "learning_rate": 8.380523798088631e-05, "loss": 0.0402, "step": 18030 }, { "epoch": 4.878312601406165, "grad_norm": 0.1682923287153244, "learning_rate": 8.378492808624389e-05, "loss": 0.0395, "step": 18040 }, { "epoch": 4.8810167658193615, "grad_norm": 0.17615275084972382, "learning_rate": 8.376460792863237e-05, "loss": 0.0393, "step": 18050 }, { "epoch": 4.883720930232558, "grad_norm": 0.24389035999774933, "learning_rate": 8.374427751422444e-05, "loss": 0.0387, "step": 18060 }, { "epoch": 4.886425094645754, "grad_norm": 0.1788618415594101, "learning_rate": 8.3723936849196e-05, "loss": 0.0386, "step": 18070 }, { "epoch": 4.889129259058951, "grad_norm": 0.1593906432390213, "learning_rate": 8.370358593972595e-05, "loss": 0.0392, "step": 18080 }, { "epoch": 4.891833423472147, "grad_norm": 0.1808042824268341, "learning_rate": 8.36832247919964e-05, "loss": 0.0416, "step": 18090 }, { "epoch": 4.894537587885344, "grad_norm": 0.20379889011383057, "learning_rate": 8.36628534121925e-05, "loss": 0.0397, "step": 18100 }, { "epoch": 4.89724175229854, "grad_norm": 0.14579035341739655, "learning_rate": 8.364247180650254e-05, "loss": 0.0404, "step": 18110 }, { "epoch": 4.8999459167117365, "grad_norm": 0.17961442470550537, "learning_rate": 8.362207998111794e-05, "loss": 0.0397, "step": 18120 }, { "epoch": 4.902650081124932, "grad_norm": 0.18807339668273926, "learning_rate": 8.360167794223318e-05, "loss": 0.0384, "step": 18130 }, { "epoch": 4.9053542455381285, "grad_norm": 0.18327178061008453, "learning_rate": 8.358126569604586e-05, "loss": 0.0421, "step": 18140 }, { "epoch": 4.908058409951325, "grad_norm": 0.14342190325260162, "learning_rate": 8.356084324875668e-05, "loss": 0.0399, "step": 18150 }, { "epoch": 4.910762574364521, "grad_norm": 0.1894017904996872, "learning_rate": 8.354041060656945e-05, "loss": 0.0403, "step": 18160 }, { "epoch": 4.913466738777718, "grad_norm": 0.14749526977539062, "learning_rate": 8.351996777569106e-05, "loss": 0.0392, "step": 18170 }, { "epoch": 4.916170903190914, "grad_norm": 0.1669098287820816, "learning_rate": 8.349951476233148e-05, "loss": 0.0396, "step": 18180 }, { "epoch": 4.918875067604111, "grad_norm": 0.14776860177516937, "learning_rate": 8.347905157270386e-05, "loss": 0.0409, "step": 18190 }, { "epoch": 4.921579232017306, "grad_norm": 0.21645487844944, "learning_rate": 8.345857821302432e-05, "loss": 0.0426, "step": 18200 }, { "epoch": 4.924283396430503, "grad_norm": 0.2107655256986618, "learning_rate": 8.343809468951213e-05, "loss": 0.0406, "step": 18210 }, { "epoch": 4.926987560843699, "grad_norm": 0.20171061158180237, "learning_rate": 8.341760100838965e-05, "loss": 0.0406, "step": 18220 }, { "epoch": 4.9296917252568955, "grad_norm": 0.1550397127866745, "learning_rate": 8.339709717588233e-05, "loss": 0.0405, "step": 18230 }, { "epoch": 4.932395889670092, "grad_norm": 0.16912227869033813, "learning_rate": 8.33765831982187e-05, "loss": 0.0396, "step": 18240 }, { "epoch": 4.935100054083288, "grad_norm": 0.2122403383255005, "learning_rate": 8.335605908163035e-05, "loss": 0.0415, "step": 18250 }, { "epoch": 4.937804218496485, "grad_norm": 0.16204291582107544, "learning_rate": 8.333552483235196e-05, "loss": 0.0398, "step": 18260 }, { "epoch": 4.940508382909681, "grad_norm": 0.10103531181812286, "learning_rate": 8.33149804566213e-05, "loss": 0.0409, "step": 18270 }, { "epoch": 4.943212547322878, "grad_norm": 0.1407576948404312, "learning_rate": 8.329442596067921e-05, "loss": 0.0415, "step": 18280 }, { "epoch": 4.945916711736073, "grad_norm": 0.16426676511764526, "learning_rate": 8.32738613507696e-05, "loss": 0.04, "step": 18290 }, { "epoch": 4.94862087614927, "grad_norm": 0.16130773723125458, "learning_rate": 8.325328663313946e-05, "loss": 0.0403, "step": 18300 }, { "epoch": 4.951325040562466, "grad_norm": 0.17062638700008392, "learning_rate": 8.323270181403884e-05, "loss": 0.0389, "step": 18310 }, { "epoch": 4.9540292049756625, "grad_norm": 0.17173011600971222, "learning_rate": 8.321210689972086e-05, "loss": 0.0409, "step": 18320 }, { "epoch": 4.956733369388859, "grad_norm": 0.1716562658548355, "learning_rate": 8.319150189644174e-05, "loss": 0.0404, "step": 18330 }, { "epoch": 4.959437533802055, "grad_norm": 0.1662561297416687, "learning_rate": 8.31708868104607e-05, "loss": 0.0396, "step": 18340 }, { "epoch": 4.962141698215252, "grad_norm": 0.13302917778491974, "learning_rate": 8.315026164804007e-05, "loss": 0.0392, "step": 18350 }, { "epoch": 4.964845862628448, "grad_norm": 0.15321648120880127, "learning_rate": 8.312962641544524e-05, "loss": 0.0398, "step": 18360 }, { "epoch": 4.967550027041645, "grad_norm": 0.15135347843170166, "learning_rate": 8.310898111894465e-05, "loss": 0.0386, "step": 18370 }, { "epoch": 4.97025419145484, "grad_norm": 0.17195618152618408, "learning_rate": 8.308832576480977e-05, "loss": 0.0392, "step": 18380 }, { "epoch": 4.972958355868037, "grad_norm": 0.2572031021118164, "learning_rate": 8.306766035931519e-05, "loss": 0.0402, "step": 18390 }, { "epoch": 4.975662520281233, "grad_norm": 0.23060686886310577, "learning_rate": 8.304698490873847e-05, "loss": 0.0395, "step": 18400 }, { "epoch": 4.9783666846944294, "grad_norm": 0.178070530295372, "learning_rate": 8.30262994193603e-05, "loss": 0.0397, "step": 18410 }, { "epoch": 4.981070849107626, "grad_norm": 0.19921331107616425, "learning_rate": 8.300560389746438e-05, "loss": 0.0396, "step": 18420 }, { "epoch": 4.983775013520822, "grad_norm": 0.17227064073085785, "learning_rate": 8.298489834933745e-05, "loss": 0.0401, "step": 18430 }, { "epoch": 4.986479177934019, "grad_norm": 0.15391601622104645, "learning_rate": 8.296418278126934e-05, "loss": 0.0409, "step": 18440 }, { "epoch": 4.989183342347214, "grad_norm": 0.17216990888118744, "learning_rate": 8.294345719955284e-05, "loss": 0.0405, "step": 18450 }, { "epoch": 4.991887506760411, "grad_norm": 0.20589447021484375, "learning_rate": 8.29227216104839e-05, "loss": 0.0397, "step": 18460 }, { "epoch": 4.994591671173607, "grad_norm": 0.11231964826583862, "learning_rate": 8.290197602036137e-05, "loss": 0.039, "step": 18470 }, { "epoch": 4.997295835586804, "grad_norm": 0.09228087216615677, "learning_rate": 8.288122043548725e-05, "loss": 0.0397, "step": 18480 }, { "epoch": 5.0, "grad_norm": 0.12927699089050293, "learning_rate": 8.286045486216657e-05, "loss": 0.0389, "step": 18490 }, { "epoch": 5.002704164413196, "grad_norm": 0.1670951247215271, "learning_rate": 8.283967930670733e-05, "loss": 0.0396, "step": 18500 }, { "epoch": 5.005408328826393, "grad_norm": 0.24610869586467743, "learning_rate": 8.281889377542058e-05, "loss": 0.0403, "step": 18510 }, { "epoch": 5.008112493239589, "grad_norm": 0.17951519787311554, "learning_rate": 8.279809827462045e-05, "loss": 0.0403, "step": 18520 }, { "epoch": 5.010816657652786, "grad_norm": 0.2567676603794098, "learning_rate": 8.277729281062402e-05, "loss": 0.0399, "step": 18530 }, { "epoch": 5.013520822065981, "grad_norm": 0.20049124956130981, "learning_rate": 8.27564773897515e-05, "loss": 0.0395, "step": 18540 }, { "epoch": 5.016224986479178, "grad_norm": 0.15767529606819153, "learning_rate": 8.273565201832602e-05, "loss": 0.0392, "step": 18550 }, { "epoch": 5.018929150892374, "grad_norm": 0.17872001230716705, "learning_rate": 8.27148167026738e-05, "loss": 0.0381, "step": 18560 }, { "epoch": 5.0216333153055706, "grad_norm": 0.15470384061336517, "learning_rate": 8.269397144912405e-05, "loss": 0.0391, "step": 18570 }, { "epoch": 5.024337479718767, "grad_norm": 0.1183251366019249, "learning_rate": 8.267311626400899e-05, "loss": 0.039, "step": 18580 }, { "epoch": 5.027041644131963, "grad_norm": 0.11288285255432129, "learning_rate": 8.26522511536639e-05, "loss": 0.0388, "step": 18590 }, { "epoch": 5.02974580854516, "grad_norm": 0.13566720485687256, "learning_rate": 8.263137612442706e-05, "loss": 0.0404, "step": 18600 }, { "epoch": 5.032449972958356, "grad_norm": 0.14256325364112854, "learning_rate": 8.261049118263971e-05, "loss": 0.0392, "step": 18610 }, { "epoch": 5.035154137371552, "grad_norm": 0.1309543251991272, "learning_rate": 8.258959633464619e-05, "loss": 0.0385, "step": 18620 }, { "epoch": 5.037858301784748, "grad_norm": 0.16381974518299103, "learning_rate": 8.256869158679377e-05, "loss": 0.0405, "step": 18630 }, { "epoch": 5.040562466197945, "grad_norm": 0.22769875824451447, "learning_rate": 8.254777694543278e-05, "loss": 0.0387, "step": 18640 }, { "epoch": 5.043266630611141, "grad_norm": 0.17587876319885254, "learning_rate": 8.252685241691651e-05, "loss": 0.0408, "step": 18650 }, { "epoch": 5.0459707950243375, "grad_norm": 0.1218358650803566, "learning_rate": 8.250591800760133e-05, "loss": 0.0391, "step": 18660 }, { "epoch": 5.048674959437534, "grad_norm": 0.19196771085262299, "learning_rate": 8.248497372384649e-05, "loss": 0.0381, "step": 18670 }, { "epoch": 5.05137912385073, "grad_norm": 0.19348843395709991, "learning_rate": 8.246401957201437e-05, "loss": 0.0384, "step": 18680 }, { "epoch": 5.054083288263927, "grad_norm": 0.13179628551006317, "learning_rate": 8.244305555847027e-05, "loss": 0.0397, "step": 18690 }, { "epoch": 5.056787452677122, "grad_norm": 0.13938744366168976, "learning_rate": 8.24220816895825e-05, "loss": 0.0392, "step": 18700 }, { "epoch": 5.059491617090319, "grad_norm": 0.16022878885269165, "learning_rate": 8.240109797172237e-05, "loss": 0.0414, "step": 18710 }, { "epoch": 5.062195781503515, "grad_norm": 0.1124056726694107, "learning_rate": 8.238010441126416e-05, "loss": 0.039, "step": 18720 }, { "epoch": 5.064899945916712, "grad_norm": 0.1590997576713562, "learning_rate": 8.23591010145852e-05, "loss": 0.0403, "step": 18730 }, { "epoch": 5.067604110329908, "grad_norm": 0.21572691202163696, "learning_rate": 8.233808778806571e-05, "loss": 0.0395, "step": 18740 }, { "epoch": 5.0703082747431045, "grad_norm": 0.21307536959648132, "learning_rate": 8.231706473808903e-05, "loss": 0.0398, "step": 18750 }, { "epoch": 5.073012439156301, "grad_norm": 0.20805896818637848, "learning_rate": 8.229603187104133e-05, "loss": 0.0418, "step": 18760 }, { "epoch": 5.075716603569497, "grad_norm": 0.1717902421951294, "learning_rate": 8.22749891933119e-05, "loss": 0.0408, "step": 18770 }, { "epoch": 5.078420767982693, "grad_norm": 0.15812060236930847, "learning_rate": 8.225393671129291e-05, "loss": 0.04, "step": 18780 }, { "epoch": 5.081124932395889, "grad_norm": 0.20622262358665466, "learning_rate": 8.223287443137957e-05, "loss": 0.0397, "step": 18790 }, { "epoch": 5.083829096809086, "grad_norm": 0.19902724027633667, "learning_rate": 8.221180235997004e-05, "loss": 0.041, "step": 18800 }, { "epoch": 5.086533261222282, "grad_norm": 0.17515116930007935, "learning_rate": 8.219072050346544e-05, "loss": 0.039, "step": 18810 }, { "epoch": 5.089237425635479, "grad_norm": 0.1782894730567932, "learning_rate": 8.216962886826992e-05, "loss": 0.0391, "step": 18820 }, { "epoch": 5.091941590048675, "grad_norm": 0.13897323608398438, "learning_rate": 8.214852746079054e-05, "loss": 0.0389, "step": 18830 }, { "epoch": 5.0946457544618715, "grad_norm": 0.1309492141008377, "learning_rate": 8.212741628743732e-05, "loss": 0.0392, "step": 18840 }, { "epoch": 5.097349918875068, "grad_norm": 0.14888136088848114, "learning_rate": 8.210629535462333e-05, "loss": 0.0395, "step": 18850 }, { "epoch": 5.1000540832882635, "grad_norm": 0.1460563838481903, "learning_rate": 8.208516466876453e-05, "loss": 0.0393, "step": 18860 }, { "epoch": 5.10275824770146, "grad_norm": 0.11427439749240875, "learning_rate": 8.206402423627986e-05, "loss": 0.0405, "step": 18870 }, { "epoch": 5.105462412114656, "grad_norm": 0.1494373232126236, "learning_rate": 8.204287406359124e-05, "loss": 0.0387, "step": 18880 }, { "epoch": 5.108166576527853, "grad_norm": 0.2169719934463501, "learning_rate": 8.20217141571235e-05, "loss": 0.0396, "step": 18890 }, { "epoch": 5.110870740941049, "grad_norm": 0.1694410890340805, "learning_rate": 8.200054452330449e-05, "loss": 0.0386, "step": 18900 }, { "epoch": 5.113574905354246, "grad_norm": 0.1929217129945755, "learning_rate": 8.197936516856499e-05, "loss": 0.04, "step": 18910 }, { "epoch": 5.116279069767442, "grad_norm": 0.16045956313610077, "learning_rate": 8.195817609933871e-05, "loss": 0.0407, "step": 18920 }, { "epoch": 5.1189832341806385, "grad_norm": 0.12270423769950867, "learning_rate": 8.193697732206233e-05, "loss": 0.0394, "step": 18930 }, { "epoch": 5.121687398593835, "grad_norm": 0.16401588916778564, "learning_rate": 8.19157688431755e-05, "loss": 0.0412, "step": 18940 }, { "epoch": 5.1243915630070305, "grad_norm": 0.1932261437177658, "learning_rate": 8.189455066912077e-05, "loss": 0.0395, "step": 18950 }, { "epoch": 5.127095727420227, "grad_norm": 0.20198602974414825, "learning_rate": 8.187332280634369e-05, "loss": 0.0396, "step": 18960 }, { "epoch": 5.129799891833423, "grad_norm": 0.15664294362068176, "learning_rate": 8.18520852612927e-05, "loss": 0.0407, "step": 18970 }, { "epoch": 5.13250405624662, "grad_norm": 0.17450352013111115, "learning_rate": 8.183083804041921e-05, "loss": 0.038, "step": 18980 }, { "epoch": 5.135208220659816, "grad_norm": 0.21673554182052612, "learning_rate": 8.180958115017757e-05, "loss": 0.0394, "step": 18990 }, { "epoch": 5.137912385073013, "grad_norm": 0.26756763458251953, "learning_rate": 8.178831459702505e-05, "loss": 0.0393, "step": 19000 }, { "epoch": 5.140616549486209, "grad_norm": 0.15123674273490906, "learning_rate": 8.17670383874219e-05, "loss": 0.0388, "step": 19010 }, { "epoch": 5.1433207138994055, "grad_norm": 0.14491210877895355, "learning_rate": 8.174575252783124e-05, "loss": 0.0387, "step": 19020 }, { "epoch": 5.146024878312601, "grad_norm": 0.10386427491903305, "learning_rate": 8.172445702471914e-05, "loss": 0.0394, "step": 19030 }, { "epoch": 5.1487290427257975, "grad_norm": 0.17848540842533112, "learning_rate": 8.170315188455466e-05, "loss": 0.0413, "step": 19040 }, { "epoch": 5.151433207138994, "grad_norm": 0.12924697995185852, "learning_rate": 8.168183711380969e-05, "loss": 0.0386, "step": 19050 }, { "epoch": 5.15413737155219, "grad_norm": 0.17155516147613525, "learning_rate": 8.166051271895913e-05, "loss": 0.0382, "step": 19060 }, { "epoch": 5.156841535965387, "grad_norm": 0.13258613646030426, "learning_rate": 8.163917870648075e-05, "loss": 0.0384, "step": 19070 }, { "epoch": 5.159545700378583, "grad_norm": 0.1722453087568283, "learning_rate": 8.161783508285526e-05, "loss": 0.0401, "step": 19080 }, { "epoch": 5.16224986479178, "grad_norm": 0.11822031438350677, "learning_rate": 8.159648185456628e-05, "loss": 0.0386, "step": 19090 }, { "epoch": 5.164954029204976, "grad_norm": 0.1695268601179123, "learning_rate": 8.157511902810038e-05, "loss": 0.0398, "step": 19100 }, { "epoch": 5.167658193618172, "grad_norm": 0.14726237952709198, "learning_rate": 8.155374660994701e-05, "loss": 0.0397, "step": 19110 }, { "epoch": 5.170362358031368, "grad_norm": 0.1431266814470291, "learning_rate": 8.153236460659857e-05, "loss": 0.0388, "step": 19120 }, { "epoch": 5.173066522444564, "grad_norm": 0.13025906682014465, "learning_rate": 8.151097302455031e-05, "loss": 0.0384, "step": 19130 }, { "epoch": 5.175770686857761, "grad_norm": 0.18784764409065247, "learning_rate": 8.148957187030044e-05, "loss": 0.0392, "step": 19140 }, { "epoch": 5.178474851270957, "grad_norm": 0.14253385365009308, "learning_rate": 8.146816115035006e-05, "loss": 0.0403, "step": 19150 }, { "epoch": 5.181179015684154, "grad_norm": 0.13585978746414185, "learning_rate": 8.14467408712032e-05, "loss": 0.0391, "step": 19160 }, { "epoch": 5.18388318009735, "grad_norm": 0.16393880546092987, "learning_rate": 8.142531103936678e-05, "loss": 0.0392, "step": 19170 }, { "epoch": 5.186587344510547, "grad_norm": 0.13013778626918793, "learning_rate": 8.14038716613506e-05, "loss": 0.0381, "step": 19180 }, { "epoch": 5.189291508923742, "grad_norm": 0.1584954410791397, "learning_rate": 8.138242274366736e-05, "loss": 0.039, "step": 19190 }, { "epoch": 5.191995673336939, "grad_norm": 0.10979384183883667, "learning_rate": 8.136096429283271e-05, "loss": 0.0394, "step": 19200 }, { "epoch": 5.194699837750135, "grad_norm": 0.11599626392126083, "learning_rate": 8.133949631536515e-05, "loss": 0.0388, "step": 19210 }, { "epoch": 5.197404002163331, "grad_norm": 0.19434553384780884, "learning_rate": 8.131801881778607e-05, "loss": 0.04, "step": 19220 }, { "epoch": 5.200108166576528, "grad_norm": 0.18361233174800873, "learning_rate": 8.129653180661978e-05, "loss": 0.0394, "step": 19230 }, { "epoch": 5.202812330989724, "grad_norm": 0.15270264446735382, "learning_rate": 8.127503528839346e-05, "loss": 0.0397, "step": 19240 }, { "epoch": 5.205516495402921, "grad_norm": 0.1990472376346588, "learning_rate": 8.125352926963721e-05, "loss": 0.0381, "step": 19250 }, { "epoch": 5.208220659816117, "grad_norm": 0.2128181755542755, "learning_rate": 8.123201375688395e-05, "loss": 0.0386, "step": 19260 }, { "epoch": 5.210924824229313, "grad_norm": 0.10993891209363937, "learning_rate": 8.121048875666954e-05, "loss": 0.038, "step": 19270 }, { "epoch": 5.213628988642509, "grad_norm": 0.1709207445383072, "learning_rate": 8.118895427553274e-05, "loss": 0.0401, "step": 19280 }, { "epoch": 5.2163331530557056, "grad_norm": 0.12005055695772171, "learning_rate": 8.116741032001511e-05, "loss": 0.0389, "step": 19290 }, { "epoch": 5.219037317468902, "grad_norm": 0.22846604883670807, "learning_rate": 8.114585689666114e-05, "loss": 0.0402, "step": 19300 }, { "epoch": 5.221741481882098, "grad_norm": 0.23164691030979156, "learning_rate": 8.112429401201821e-05, "loss": 0.0382, "step": 19310 }, { "epoch": 5.224445646295295, "grad_norm": 0.15068311989307404, "learning_rate": 8.110272167263656e-05, "loss": 0.0387, "step": 19320 }, { "epoch": 5.227149810708491, "grad_norm": 0.1340378373861313, "learning_rate": 8.108113988506929e-05, "loss": 0.0388, "step": 19330 }, { "epoch": 5.229853975121688, "grad_norm": 0.1075737401843071, "learning_rate": 8.105954865587235e-05, "loss": 0.0399, "step": 19340 }, { "epoch": 5.232558139534884, "grad_norm": 0.16570113599300385, "learning_rate": 8.103794799160463e-05, "loss": 0.0392, "step": 19350 }, { "epoch": 5.23526230394808, "grad_norm": 0.27121976017951965, "learning_rate": 8.101633789882781e-05, "loss": 0.0377, "step": 19360 }, { "epoch": 5.237966468361276, "grad_norm": 0.15864306688308716, "learning_rate": 8.099471838410648e-05, "loss": 0.0393, "step": 19370 }, { "epoch": 5.2406706327744725, "grad_norm": 0.14702819287776947, "learning_rate": 8.097308945400806e-05, "loss": 0.0403, "step": 19380 }, { "epoch": 5.243374797187669, "grad_norm": 0.20161405205726624, "learning_rate": 8.095145111510288e-05, "loss": 0.0377, "step": 19390 }, { "epoch": 5.246078961600865, "grad_norm": 0.1787712574005127, "learning_rate": 8.092980337396406e-05, "loss": 0.0394, "step": 19400 }, { "epoch": 5.248783126014062, "grad_norm": 0.14890070259571075, "learning_rate": 8.090814623716763e-05, "loss": 0.0382, "step": 19410 }, { "epoch": 5.251487290427258, "grad_norm": 0.1585862934589386, "learning_rate": 8.088647971129246e-05, "loss": 0.0406, "step": 19420 }, { "epoch": 5.254191454840455, "grad_norm": 0.16547654569149017, "learning_rate": 8.086480380292026e-05, "loss": 0.0392, "step": 19430 }, { "epoch": 5.25689561925365, "grad_norm": 0.1291016936302185, "learning_rate": 8.084311851863562e-05, "loss": 0.0412, "step": 19440 }, { "epoch": 5.259599783666847, "grad_norm": 0.11618354171514511, "learning_rate": 8.082142386502591e-05, "loss": 0.0402, "step": 19450 }, { "epoch": 5.262303948080043, "grad_norm": 0.1408640593290329, "learning_rate": 8.079971984868145e-05, "loss": 0.0381, "step": 19460 }, { "epoch": 5.2650081124932395, "grad_norm": 0.15127010643482208, "learning_rate": 8.077800647619532e-05, "loss": 0.0392, "step": 19470 }, { "epoch": 5.267712276906436, "grad_norm": 0.15283659100532532, "learning_rate": 8.075628375416345e-05, "loss": 0.0382, "step": 19480 }, { "epoch": 5.270416441319632, "grad_norm": 0.1307983100414276, "learning_rate": 8.073455168918464e-05, "loss": 0.0374, "step": 19490 }, { "epoch": 5.273120605732829, "grad_norm": 0.19166474044322968, "learning_rate": 8.071281028786055e-05, "loss": 0.0397, "step": 19500 }, { "epoch": 5.275824770146025, "grad_norm": 0.14888927340507507, "learning_rate": 8.069105955679562e-05, "loss": 0.0393, "step": 19510 }, { "epoch": 5.278528934559221, "grad_norm": 0.14774447679519653, "learning_rate": 8.066929950259713e-05, "loss": 0.0385, "step": 19520 }, { "epoch": 5.281233098972417, "grad_norm": 0.11945530772209167, "learning_rate": 8.064753013187522e-05, "loss": 0.0387, "step": 19530 }, { "epoch": 5.283937263385614, "grad_norm": 0.16084086894989014, "learning_rate": 8.062575145124289e-05, "loss": 0.0375, "step": 19540 }, { "epoch": 5.28664142779881, "grad_norm": 0.11317597329616547, "learning_rate": 8.060396346731587e-05, "loss": 0.0384, "step": 19550 }, { "epoch": 5.2893455922120065, "grad_norm": 0.13911616802215576, "learning_rate": 8.058216618671281e-05, "loss": 0.0385, "step": 19560 }, { "epoch": 5.292049756625203, "grad_norm": 0.20568865537643433, "learning_rate": 8.056035961605514e-05, "loss": 0.0381, "step": 19570 }, { "epoch": 5.294753921038399, "grad_norm": 0.1358155757188797, "learning_rate": 8.05385437619671e-05, "loss": 0.0391, "step": 19580 }, { "epoch": 5.297458085451596, "grad_norm": 0.18618382513523102, "learning_rate": 8.05167186310758e-05, "loss": 0.0405, "step": 19590 }, { "epoch": 5.300162249864792, "grad_norm": 0.14239904284477234, "learning_rate": 8.049488423001113e-05, "loss": 0.0389, "step": 19600 }, { "epoch": 5.302866414277988, "grad_norm": 0.10553202778100967, "learning_rate": 8.047304056540581e-05, "loss": 0.0401, "step": 19610 }, { "epoch": 5.305570578691184, "grad_norm": 0.11644337326288223, "learning_rate": 8.045118764389534e-05, "loss": 0.0377, "step": 19620 }, { "epoch": 5.308274743104381, "grad_norm": 0.15743117034435272, "learning_rate": 8.042932547211809e-05, "loss": 0.0404, "step": 19630 }, { "epoch": 5.310978907517577, "grad_norm": 0.14822044968605042, "learning_rate": 8.04074540567152e-05, "loss": 0.0385, "step": 19640 }, { "epoch": 5.3136830719307735, "grad_norm": 0.17082245647907257, "learning_rate": 8.038557340433063e-05, "loss": 0.0367, "step": 19650 }, { "epoch": 5.31638723634397, "grad_norm": 0.19421054422855377, "learning_rate": 8.036368352161115e-05, "loss": 0.0413, "step": 19660 }, { "epoch": 5.319091400757166, "grad_norm": 0.14745649695396423, "learning_rate": 8.034178441520633e-05, "loss": 0.039, "step": 19670 }, { "epoch": 5.321795565170362, "grad_norm": 0.22827038168907166, "learning_rate": 8.031987609176852e-05, "loss": 0.0402, "step": 19680 }, { "epoch": 5.324499729583558, "grad_norm": 0.18989352881908417, "learning_rate": 8.02979585579529e-05, "loss": 0.0406, "step": 19690 }, { "epoch": 5.327203893996755, "grad_norm": 0.14542236924171448, "learning_rate": 8.027603182041745e-05, "loss": 0.0387, "step": 19700 }, { "epoch": 5.329908058409951, "grad_norm": 0.1481267213821411, "learning_rate": 8.025409588582292e-05, "loss": 0.0395, "step": 19710 }, { "epoch": 5.332612222823148, "grad_norm": 0.08872464299201965, "learning_rate": 8.023215076083288e-05, "loss": 0.0375, "step": 19720 }, { "epoch": 5.335316387236344, "grad_norm": 0.21393902599811554, "learning_rate": 8.021019645211367e-05, "loss": 0.0398, "step": 19730 }, { "epoch": 5.3380205516495405, "grad_norm": 0.18363675475120544, "learning_rate": 8.018823296633441e-05, "loss": 0.0378, "step": 19740 }, { "epoch": 5.340724716062737, "grad_norm": 0.29736989736557007, "learning_rate": 8.016626031016708e-05, "loss": 0.0382, "step": 19750 }, { "epoch": 5.343428880475933, "grad_norm": 0.16391430795192719, "learning_rate": 8.014427849028636e-05, "loss": 0.039, "step": 19760 }, { "epoch": 5.346133044889129, "grad_norm": 0.15217696130275726, "learning_rate": 8.012228751336974e-05, "loss": 0.0401, "step": 19770 }, { "epoch": 5.348837209302325, "grad_norm": 0.15877602994441986, "learning_rate": 8.01002873860975e-05, "loss": 0.0384, "step": 19780 }, { "epoch": 5.351541373715522, "grad_norm": 0.1953132599592209, "learning_rate": 8.00782781151527e-05, "loss": 0.0408, "step": 19790 }, { "epoch": 5.354245538128718, "grad_norm": 0.17530569434165955, "learning_rate": 8.005625970722119e-05, "loss": 0.0389, "step": 19800 }, { "epoch": 5.356949702541915, "grad_norm": 0.12130407243967056, "learning_rate": 8.003423216899158e-05, "loss": 0.0391, "step": 19810 }, { "epoch": 5.359653866955111, "grad_norm": 0.22971956431865692, "learning_rate": 8.001219550715522e-05, "loss": 0.0374, "step": 19820 }, { "epoch": 5.3623580313683075, "grad_norm": 0.23955276608467102, "learning_rate": 7.999014972840632e-05, "loss": 0.0387, "step": 19830 }, { "epoch": 5.365062195781504, "grad_norm": 0.26871681213378906, "learning_rate": 7.996809483944174e-05, "loss": 0.0388, "step": 19840 }, { "epoch": 5.367766360194699, "grad_norm": 0.10524957627058029, "learning_rate": 7.994603084696124e-05, "loss": 0.0411, "step": 19850 }, { "epoch": 5.370470524607896, "grad_norm": 0.1485617309808731, "learning_rate": 7.992395775766724e-05, "loss": 0.0385, "step": 19860 }, { "epoch": 5.373174689021092, "grad_norm": 0.1862977296113968, "learning_rate": 7.990187557826497e-05, "loss": 0.0386, "step": 19870 }, { "epoch": 5.375878853434289, "grad_norm": 0.10728713124990463, "learning_rate": 7.987978431546242e-05, "loss": 0.0376, "step": 19880 }, { "epoch": 5.378583017847485, "grad_norm": 0.11994235962629318, "learning_rate": 7.985768397597031e-05, "loss": 0.0397, "step": 19890 }, { "epoch": 5.381287182260682, "grad_norm": 0.17149750888347626, "learning_rate": 7.983557456650216e-05, "loss": 0.0391, "step": 19900 }, { "epoch": 5.383991346673878, "grad_norm": 0.1938135027885437, "learning_rate": 7.981345609377422e-05, "loss": 0.0393, "step": 19910 }, { "epoch": 5.3866955110870745, "grad_norm": 0.15469810366630554, "learning_rate": 7.97913285645055e-05, "loss": 0.0377, "step": 19920 }, { "epoch": 5.38939967550027, "grad_norm": 0.1800260692834854, "learning_rate": 7.976919198541776e-05, "loss": 0.0374, "step": 19930 }, { "epoch": 5.392103839913466, "grad_norm": 0.1721997857093811, "learning_rate": 7.974704636323548e-05, "loss": 0.0387, "step": 19940 }, { "epoch": 5.394808004326663, "grad_norm": 0.21649177372455597, "learning_rate": 7.972489170468597e-05, "loss": 0.0392, "step": 19950 }, { "epoch": 5.397512168739859, "grad_norm": 0.15653236210346222, "learning_rate": 7.970272801649918e-05, "loss": 0.0383, "step": 19960 }, { "epoch": 5.400216333153056, "grad_norm": 0.30716267228126526, "learning_rate": 7.96805553054079e-05, "loss": 0.0377, "step": 19970 }, { "epoch": 5.402920497566252, "grad_norm": 0.12541911005973816, "learning_rate": 7.965837357814756e-05, "loss": 0.0377, "step": 19980 }, { "epoch": 5.405624661979449, "grad_norm": 0.1274174451828003, "learning_rate": 7.963618284145643e-05, "loss": 0.0394, "step": 19990 }, { "epoch": 5.408328826392645, "grad_norm": 0.160974383354187, "learning_rate": 7.961398310207544e-05, "loss": 0.0388, "step": 20000 }, { "epoch": 5.411032990805841, "grad_norm": 0.16076691448688507, "learning_rate": 7.95917743667483e-05, "loss": 0.039, "step": 20010 }, { "epoch": 5.413737155219037, "grad_norm": 0.18078868091106415, "learning_rate": 7.956955664222144e-05, "loss": 0.0405, "step": 20020 }, { "epoch": 5.416441319632233, "grad_norm": 0.18447251617908478, "learning_rate": 7.954732993524399e-05, "loss": 0.0392, "step": 20030 }, { "epoch": 5.41914548404543, "grad_norm": 0.18813090026378632, "learning_rate": 7.952509425256786e-05, "loss": 0.0409, "step": 20040 }, { "epoch": 5.421849648458626, "grad_norm": 0.18579645454883575, "learning_rate": 7.950284960094767e-05, "loss": 0.0405, "step": 20050 }, { "epoch": 5.424553812871823, "grad_norm": 0.20404300093650818, "learning_rate": 7.948059598714076e-05, "loss": 0.0376, "step": 20060 }, { "epoch": 5.427257977285019, "grad_norm": 0.12692883610725403, "learning_rate": 7.945833341790717e-05, "loss": 0.0383, "step": 20070 }, { "epoch": 5.429962141698216, "grad_norm": 0.1347617506980896, "learning_rate": 7.94360619000097e-05, "loss": 0.0399, "step": 20080 }, { "epoch": 5.432666306111411, "grad_norm": 0.16970521211624146, "learning_rate": 7.941378144021381e-05, "loss": 0.0368, "step": 20090 }, { "epoch": 5.4353704705246075, "grad_norm": 0.1554202437400818, "learning_rate": 7.939149204528777e-05, "loss": 0.0384, "step": 20100 }, { "epoch": 5.438074634937804, "grad_norm": 0.17209528386592865, "learning_rate": 7.936919372200246e-05, "loss": 0.0378, "step": 20110 }, { "epoch": 5.440778799351, "grad_norm": 0.15631656348705292, "learning_rate": 7.934688647713158e-05, "loss": 0.0374, "step": 20120 }, { "epoch": 5.443482963764197, "grad_norm": 0.17750662565231323, "learning_rate": 7.932457031745143e-05, "loss": 0.0394, "step": 20130 }, { "epoch": 5.446187128177393, "grad_norm": 0.1605028510093689, "learning_rate": 7.930224524974108e-05, "loss": 0.037, "step": 20140 }, { "epoch": 5.44889129259059, "grad_norm": 0.17955780029296875, "learning_rate": 7.927991128078232e-05, "loss": 0.0395, "step": 20150 }, { "epoch": 5.451595457003786, "grad_norm": 0.17690464854240417, "learning_rate": 7.925756841735958e-05, "loss": 0.04, "step": 20160 }, { "epoch": 5.4542996214169825, "grad_norm": 0.14366452395915985, "learning_rate": 7.923521666626008e-05, "loss": 0.0373, "step": 20170 }, { "epoch": 5.457003785830178, "grad_norm": 0.15516544878482819, "learning_rate": 7.921285603427366e-05, "loss": 0.0389, "step": 20180 }, { "epoch": 5.4597079502433745, "grad_norm": 0.1827796846628189, "learning_rate": 7.91904865281929e-05, "loss": 0.0389, "step": 20190 }, { "epoch": 5.462412114656571, "grad_norm": 0.18848097324371338, "learning_rate": 7.916810815481307e-05, "loss": 0.0378, "step": 20200 }, { "epoch": 5.465116279069767, "grad_norm": 0.17013858258724213, "learning_rate": 7.914572092093211e-05, "loss": 0.0384, "step": 20210 }, { "epoch": 5.467820443482964, "grad_norm": 0.13889525830745697, "learning_rate": 7.912332483335068e-05, "loss": 0.0377, "step": 20220 }, { "epoch": 5.47052460789616, "grad_norm": 0.09583434462547302, "learning_rate": 7.910091989887213e-05, "loss": 0.037, "step": 20230 }, { "epoch": 5.473228772309357, "grad_norm": 0.24691973626613617, "learning_rate": 7.907850612430248e-05, "loss": 0.0378, "step": 20240 }, { "epoch": 5.475932936722553, "grad_norm": 0.15121062099933624, "learning_rate": 7.905608351645044e-05, "loss": 0.0386, "step": 20250 }, { "epoch": 5.478637101135749, "grad_norm": 0.10122504085302353, "learning_rate": 7.90336520821274e-05, "loss": 0.0384, "step": 20260 }, { "epoch": 5.481341265548945, "grad_norm": 0.1255245953798294, "learning_rate": 7.901121182814746e-05, "loss": 0.0381, "step": 20270 }, { "epoch": 5.4840454299621415, "grad_norm": 0.1441972255706787, "learning_rate": 7.898876276132736e-05, "loss": 0.0382, "step": 20280 }, { "epoch": 5.486749594375338, "grad_norm": 0.1310892105102539, "learning_rate": 7.896630488848654e-05, "loss": 0.0395, "step": 20290 }, { "epoch": 5.489453758788534, "grad_norm": 0.14579957723617554, "learning_rate": 7.89438382164471e-05, "loss": 0.0377, "step": 20300 }, { "epoch": 5.492157923201731, "grad_norm": 0.14979827404022217, "learning_rate": 7.892136275203383e-05, "loss": 0.0381, "step": 20310 }, { "epoch": 5.494862087614927, "grad_norm": 0.17504091560840607, "learning_rate": 7.889887850207418e-05, "loss": 0.0386, "step": 20320 }, { "epoch": 5.497566252028124, "grad_norm": 0.15707635879516602, "learning_rate": 7.887638547339827e-05, "loss": 0.0377, "step": 20330 }, { "epoch": 5.500270416441319, "grad_norm": 0.14087453484535217, "learning_rate": 7.885388367283891e-05, "loss": 0.0386, "step": 20340 }, { "epoch": 5.502974580854516, "grad_norm": 0.18525174260139465, "learning_rate": 7.88313731072315e-05, "loss": 0.0383, "step": 20350 }, { "epoch": 5.505678745267712, "grad_norm": 0.14180968701839447, "learning_rate": 7.88088537834142e-05, "loss": 0.0386, "step": 20360 }, { "epoch": 5.5083829096809085, "grad_norm": 0.16164927184581757, "learning_rate": 7.878632570822778e-05, "loss": 0.0394, "step": 20370 }, { "epoch": 5.511087074094105, "grad_norm": 0.12491539865732193, "learning_rate": 7.876378888851567e-05, "loss": 0.0386, "step": 20380 }, { "epoch": 5.513791238507301, "grad_norm": 0.22695162892341614, "learning_rate": 7.874124333112396e-05, "loss": 0.0388, "step": 20390 }, { "epoch": 5.516495402920498, "grad_norm": 0.14111138880252838, "learning_rate": 7.871868904290138e-05, "loss": 0.0384, "step": 20400 }, { "epoch": 5.519199567333694, "grad_norm": 0.1767643541097641, "learning_rate": 7.869612603069935e-05, "loss": 0.0386, "step": 20410 }, { "epoch": 5.521903731746891, "grad_norm": 0.19428510963916779, "learning_rate": 7.867355430137192e-05, "loss": 0.0385, "step": 20420 }, { "epoch": 5.524607896160086, "grad_norm": 0.1552596390247345, "learning_rate": 7.865097386177577e-05, "loss": 0.0377, "step": 20430 }, { "epoch": 5.527312060573283, "grad_norm": 0.1881042867898941, "learning_rate": 7.862838471877023e-05, "loss": 0.0384, "step": 20440 }, { "epoch": 5.530016224986479, "grad_norm": 0.11251015216112137, "learning_rate": 7.860578687921731e-05, "loss": 0.0395, "step": 20450 }, { "epoch": 5.5327203893996755, "grad_norm": 0.2001129388809204, "learning_rate": 7.858318034998164e-05, "loss": 0.0368, "step": 20460 }, { "epoch": 5.535424553812872, "grad_norm": 0.123389832675457, "learning_rate": 7.856056513793046e-05, "loss": 0.0376, "step": 20470 }, { "epoch": 5.538128718226068, "grad_norm": 0.11791729927062988, "learning_rate": 7.85379412499337e-05, "loss": 0.0382, "step": 20480 }, { "epoch": 5.540832882639265, "grad_norm": 0.1223895251750946, "learning_rate": 7.851530869286389e-05, "loss": 0.0395, "step": 20490 }, { "epoch": 5.54353704705246, "grad_norm": 0.12247535586357117, "learning_rate": 7.849266747359619e-05, "loss": 0.0378, "step": 20500 }, { "epoch": 5.546241211465657, "grad_norm": 0.12355975061655045, "learning_rate": 7.847001759900843e-05, "loss": 0.0375, "step": 20510 }, { "epoch": 5.548945375878853, "grad_norm": 0.11700727045536041, "learning_rate": 7.844735907598102e-05, "loss": 0.039, "step": 20520 }, { "epoch": 5.55164954029205, "grad_norm": 0.14669503271579742, "learning_rate": 7.842469191139703e-05, "loss": 0.0387, "step": 20530 }, { "epoch": 5.554353704705246, "grad_norm": 0.16585923731327057, "learning_rate": 7.840201611214215e-05, "loss": 0.0371, "step": 20540 }, { "epoch": 5.5570578691184425, "grad_norm": 0.19198067486286163, "learning_rate": 7.837933168510469e-05, "loss": 0.0381, "step": 20550 }, { "epoch": 5.559762033531639, "grad_norm": 0.2328639179468155, "learning_rate": 7.835663863717559e-05, "loss": 0.0412, "step": 20560 }, { "epoch": 5.562466197944835, "grad_norm": 0.19546610116958618, "learning_rate": 7.833393697524838e-05, "loss": 0.038, "step": 20570 }, { "epoch": 5.565170362358032, "grad_norm": 0.22606785595417023, "learning_rate": 7.831122670621922e-05, "loss": 0.0376, "step": 20580 }, { "epoch": 5.567874526771227, "grad_norm": 0.3021584451198578, "learning_rate": 7.82885078369869e-05, "loss": 0.0391, "step": 20590 }, { "epoch": 5.570578691184424, "grad_norm": 0.21891942620277405, "learning_rate": 7.826578037445283e-05, "loss": 0.0388, "step": 20600 }, { "epoch": 5.57328285559762, "grad_norm": 0.19768419861793518, "learning_rate": 7.824304432552097e-05, "loss": 0.0388, "step": 20610 }, { "epoch": 5.575987020010817, "grad_norm": 0.21026524901390076, "learning_rate": 7.822029969709798e-05, "loss": 0.0375, "step": 20620 }, { "epoch": 5.578691184424013, "grad_norm": 0.18776492774486542, "learning_rate": 7.819754649609306e-05, "loss": 0.0387, "step": 20630 }, { "epoch": 5.5813953488372094, "grad_norm": 0.13995467126369476, "learning_rate": 7.817478472941802e-05, "loss": 0.0379, "step": 20640 }, { "epoch": 5.584099513250406, "grad_norm": 0.1261570006608963, "learning_rate": 7.815201440398727e-05, "loss": 0.0395, "step": 20650 }, { "epoch": 5.586803677663602, "grad_norm": 0.186034694314003, "learning_rate": 7.812923552671789e-05, "loss": 0.0387, "step": 20660 }, { "epoch": 5.589507842076799, "grad_norm": 0.11741123348474503, "learning_rate": 7.810644810452945e-05, "loss": 0.0383, "step": 20670 }, { "epoch": 5.592212006489994, "grad_norm": 0.1270875781774521, "learning_rate": 7.808365214434417e-05, "loss": 0.0379, "step": 20680 }, { "epoch": 5.594916170903191, "grad_norm": 0.17052336037158966, "learning_rate": 7.80608476530869e-05, "loss": 0.039, "step": 20690 }, { "epoch": 5.597620335316387, "grad_norm": 0.13014927506446838, "learning_rate": 7.8038034637685e-05, "loss": 0.04, "step": 20700 }, { "epoch": 5.600324499729584, "grad_norm": 0.17106260359287262, "learning_rate": 7.801521310506848e-05, "loss": 0.0363, "step": 20710 }, { "epoch": 5.60302866414278, "grad_norm": 0.155898779630661, "learning_rate": 7.799238306216994e-05, "loss": 0.0381, "step": 20720 }, { "epoch": 5.605732828555976, "grad_norm": 0.15032432973384857, "learning_rate": 7.796954451592448e-05, "loss": 0.0368, "step": 20730 }, { "epoch": 5.608436992969173, "grad_norm": 0.17017769813537598, "learning_rate": 7.794669747326992e-05, "loss": 0.0388, "step": 20740 }, { "epoch": 5.611141157382368, "grad_norm": 0.13931995630264282, "learning_rate": 7.792384194114654e-05, "loss": 0.0382, "step": 20750 }, { "epoch": 5.613845321795565, "grad_norm": 0.1960311233997345, "learning_rate": 7.790097792649729e-05, "loss": 0.0372, "step": 20760 }, { "epoch": 5.616549486208761, "grad_norm": 0.19786274433135986, "learning_rate": 7.787810543626762e-05, "loss": 0.0375, "step": 20770 }, { "epoch": 5.619253650621958, "grad_norm": 0.14056968688964844, "learning_rate": 7.785522447740558e-05, "loss": 0.0384, "step": 20780 }, { "epoch": 5.621957815035154, "grad_norm": 0.11061259359121323, "learning_rate": 7.783233505686182e-05, "loss": 0.0381, "step": 20790 }, { "epoch": 5.624661979448351, "grad_norm": 0.1248687133193016, "learning_rate": 7.780943718158955e-05, "loss": 0.0374, "step": 20800 }, { "epoch": 5.627366143861547, "grad_norm": 0.1680372804403305, "learning_rate": 7.778653085854453e-05, "loss": 0.0381, "step": 20810 }, { "epoch": 5.630070308274743, "grad_norm": 0.2040892392396927, "learning_rate": 7.77636160946851e-05, "loss": 0.0376, "step": 20820 }, { "epoch": 5.63277447268794, "grad_norm": 0.15647375583648682, "learning_rate": 7.774069289697215e-05, "loss": 0.0389, "step": 20830 }, { "epoch": 5.635478637101135, "grad_norm": 0.20718172192573547, "learning_rate": 7.771776127236913e-05, "loss": 0.0376, "step": 20840 }, { "epoch": 5.638182801514332, "grad_norm": 0.20718266069889069, "learning_rate": 7.769482122784212e-05, "loss": 0.0382, "step": 20850 }, { "epoch": 5.640886965927528, "grad_norm": 0.1857406347990036, "learning_rate": 7.767187277035963e-05, "loss": 0.0376, "step": 20860 }, { "epoch": 5.643591130340725, "grad_norm": 0.10998596251010895, "learning_rate": 7.764891590689285e-05, "loss": 0.0373, "step": 20870 }, { "epoch": 5.646295294753921, "grad_norm": 0.13595204055309296, "learning_rate": 7.762595064441542e-05, "loss": 0.0378, "step": 20880 }, { "epoch": 5.6489994591671175, "grad_norm": 0.19982858002185822, "learning_rate": 7.760297698990362e-05, "loss": 0.0398, "step": 20890 }, { "epoch": 5.651703623580314, "grad_norm": 0.18907351791858673, "learning_rate": 7.757999495033623e-05, "loss": 0.0376, "step": 20900 }, { "epoch": 5.6544077879935095, "grad_norm": 0.12718240916728973, "learning_rate": 7.755700453269456e-05, "loss": 0.038, "step": 20910 }, { "epoch": 5.657111952406706, "grad_norm": 0.16805033385753632, "learning_rate": 7.753400574396254e-05, "loss": 0.0386, "step": 20920 }, { "epoch": 5.659816116819902, "grad_norm": 0.27219733595848083, "learning_rate": 7.751099859112655e-05, "loss": 0.0381, "step": 20930 }, { "epoch": 5.662520281233099, "grad_norm": 0.1262529194355011, "learning_rate": 7.748798308117557e-05, "loss": 0.0393, "step": 20940 }, { "epoch": 5.665224445646295, "grad_norm": 0.14993348717689514, "learning_rate": 7.746495922110112e-05, "loss": 0.0389, "step": 20950 }, { "epoch": 5.667928610059492, "grad_norm": 0.22597581148147583, "learning_rate": 7.744192701789723e-05, "loss": 0.0374, "step": 20960 }, { "epoch": 5.670632774472688, "grad_norm": 0.1312713623046875, "learning_rate": 7.741888647856046e-05, "loss": 0.0368, "step": 20970 }, { "epoch": 5.6733369388858845, "grad_norm": 0.16329820454120636, "learning_rate": 7.739583761008994e-05, "loss": 0.0383, "step": 20980 }, { "epoch": 5.676041103299081, "grad_norm": 0.21780513226985931, "learning_rate": 7.73727804194873e-05, "loss": 0.0386, "step": 20990 }, { "epoch": 5.6787452677122765, "grad_norm": 0.14820027351379395, "learning_rate": 7.734971491375671e-05, "loss": 0.0354, "step": 21000 }, { "epoch": 5.681449432125473, "grad_norm": 0.14052222669124603, "learning_rate": 7.732664109990485e-05, "loss": 0.0375, "step": 21010 }, { "epoch": 5.684153596538669, "grad_norm": 0.197843998670578, "learning_rate": 7.730355898494095e-05, "loss": 0.036, "step": 21020 }, { "epoch": 5.686857760951866, "grad_norm": 0.15730901062488556, "learning_rate": 7.728046857587673e-05, "loss": 0.0377, "step": 21030 }, { "epoch": 5.689561925365062, "grad_norm": 0.15983346104621887, "learning_rate": 7.725736987972647e-05, "loss": 0.0404, "step": 21040 }, { "epoch": 5.692266089778259, "grad_norm": 0.2081548273563385, "learning_rate": 7.723426290350691e-05, "loss": 0.0368, "step": 21050 }, { "epoch": 5.694970254191455, "grad_norm": 0.16243049502372742, "learning_rate": 7.721114765423736e-05, "loss": 0.0379, "step": 21060 }, { "epoch": 5.6976744186046515, "grad_norm": 0.1438361406326294, "learning_rate": 7.718802413893963e-05, "loss": 0.0378, "step": 21070 }, { "epoch": 5.700378583017848, "grad_norm": 0.16974109411239624, "learning_rate": 7.716489236463802e-05, "loss": 0.0382, "step": 21080 }, { "epoch": 5.7030827474310435, "grad_norm": 0.19191086292266846, "learning_rate": 7.714175233835936e-05, "loss": 0.0383, "step": 21090 }, { "epoch": 5.70578691184424, "grad_norm": 0.16128171980381012, "learning_rate": 7.711860406713299e-05, "loss": 0.0372, "step": 21100 }, { "epoch": 5.708491076257436, "grad_norm": 0.1192069724202156, "learning_rate": 7.70954475579907e-05, "loss": 0.0379, "step": 21110 }, { "epoch": 5.711195240670633, "grad_norm": 0.12440460175275803, "learning_rate": 7.707228281796688e-05, "loss": 0.0367, "step": 21120 }, { "epoch": 5.713899405083829, "grad_norm": 0.15060938894748688, "learning_rate": 7.704910985409833e-05, "loss": 0.0381, "step": 21130 }, { "epoch": 5.716603569497026, "grad_norm": 0.13297921419143677, "learning_rate": 7.702592867342439e-05, "loss": 0.0379, "step": 21140 }, { "epoch": 5.719307733910222, "grad_norm": 0.14333423972129822, "learning_rate": 7.700273928298691e-05, "loss": 0.0386, "step": 21150 }, { "epoch": 5.722011898323418, "grad_norm": 0.12516120076179504, "learning_rate": 7.697954168983021e-05, "loss": 0.0372, "step": 21160 }, { "epoch": 5.724716062736614, "grad_norm": 0.16046467423439026, "learning_rate": 7.695633590100109e-05, "loss": 0.0378, "step": 21170 }, { "epoch": 5.7274202271498105, "grad_norm": 0.1567661315202713, "learning_rate": 7.693312192354886e-05, "loss": 0.0383, "step": 21180 }, { "epoch": 5.730124391563007, "grad_norm": 0.2435140758752823, "learning_rate": 7.690989976452532e-05, "loss": 0.0396, "step": 21190 }, { "epoch": 5.732828555976203, "grad_norm": 0.18506188690662384, "learning_rate": 7.688666943098475e-05, "loss": 0.0383, "step": 21200 }, { "epoch": 5.7355327203894, "grad_norm": 0.1752341091632843, "learning_rate": 7.686343092998389e-05, "loss": 0.039, "step": 21210 }, { "epoch": 5.738236884802596, "grad_norm": 0.18676885962486267, "learning_rate": 7.684018426858202e-05, "loss": 0.0392, "step": 21220 }, { "epoch": 5.740941049215793, "grad_norm": 0.11638158559799194, "learning_rate": 7.681692945384084e-05, "loss": 0.0366, "step": 21230 }, { "epoch": 5.743645213628989, "grad_norm": 0.19799602031707764, "learning_rate": 7.679366649282456e-05, "loss": 0.0394, "step": 21240 }, { "epoch": 5.746349378042185, "grad_norm": 0.15900559723377228, "learning_rate": 7.677039539259983e-05, "loss": 0.0379, "step": 21250 }, { "epoch": 5.749053542455381, "grad_norm": 0.19481876492500305, "learning_rate": 7.674711616023581e-05, "loss": 0.0376, "step": 21260 }, { "epoch": 5.7517577068685775, "grad_norm": 0.274161159992218, "learning_rate": 7.672382880280413e-05, "loss": 0.0378, "step": 21270 }, { "epoch": 5.754461871281774, "grad_norm": 0.18661391735076904, "learning_rate": 7.670053332737885e-05, "loss": 0.039, "step": 21280 }, { "epoch": 5.75716603569497, "grad_norm": 0.1641707867383957, "learning_rate": 7.667722974103654e-05, "loss": 0.0381, "step": 21290 }, { "epoch": 5.759870200108167, "grad_norm": 0.17445217072963715, "learning_rate": 7.66539180508562e-05, "loss": 0.0373, "step": 21300 }, { "epoch": 5.762574364521363, "grad_norm": 0.14126551151275635, "learning_rate": 7.663059826391932e-05, "loss": 0.0369, "step": 21310 }, { "epoch": 5.765278528934559, "grad_norm": 0.11148462444543839, "learning_rate": 7.660727038730981e-05, "loss": 0.0392, "step": 21320 }, { "epoch": 5.767982693347756, "grad_norm": 0.11601770669221878, "learning_rate": 7.65839344281141e-05, "loss": 0.0385, "step": 21330 }, { "epoch": 5.770686857760952, "grad_norm": 0.10951288044452667, "learning_rate": 7.656059039342101e-05, "loss": 0.0383, "step": 21340 }, { "epoch": 5.773391022174148, "grad_norm": 0.2182612419128418, "learning_rate": 7.653723829032187e-05, "loss": 0.0381, "step": 21350 }, { "epoch": 5.776095186587344, "grad_norm": 0.1159781739115715, "learning_rate": 7.65138781259104e-05, "loss": 0.0371, "step": 21360 }, { "epoch": 5.778799351000541, "grad_norm": 0.16429002583026886, "learning_rate": 7.649050990728279e-05, "loss": 0.0379, "step": 21370 }, { "epoch": 5.781503515413737, "grad_norm": 0.14881666004657745, "learning_rate": 7.646713364153774e-05, "loss": 0.0382, "step": 21380 }, { "epoch": 5.784207679826934, "grad_norm": 0.14604173600673676, "learning_rate": 7.64437493357763e-05, "loss": 0.0375, "step": 21390 }, { "epoch": 5.78691184424013, "grad_norm": 0.1553943157196045, "learning_rate": 7.642035699710202e-05, "loss": 0.0375, "step": 21400 }, { "epoch": 5.789616008653326, "grad_norm": 0.1653861552476883, "learning_rate": 7.639695663262089e-05, "loss": 0.0376, "step": 21410 }, { "epoch": 5.792320173066522, "grad_norm": 0.1532559096813202, "learning_rate": 7.637354824944128e-05, "loss": 0.0376, "step": 21420 }, { "epoch": 5.795024337479719, "grad_norm": 0.14176808297634125, "learning_rate": 7.635013185467408e-05, "loss": 0.0387, "step": 21430 }, { "epoch": 5.797728501892915, "grad_norm": 0.1301908642053604, "learning_rate": 7.632670745543256e-05, "loss": 0.037, "step": 21440 }, { "epoch": 5.800432666306111, "grad_norm": 0.11504638940095901, "learning_rate": 7.630327505883242e-05, "loss": 0.0385, "step": 21450 }, { "epoch": 5.803136830719308, "grad_norm": 0.14361856877803802, "learning_rate": 7.627983467199182e-05, "loss": 0.0379, "step": 21460 }, { "epoch": 5.805840995132504, "grad_norm": 0.16550381481647491, "learning_rate": 7.625638630203132e-05, "loss": 0.0366, "step": 21470 }, { "epoch": 5.808545159545701, "grad_norm": 0.1793106347322464, "learning_rate": 7.623292995607394e-05, "loss": 0.0366, "step": 21480 }, { "epoch": 5.811249323958897, "grad_norm": 0.10648420453071594, "learning_rate": 7.620946564124507e-05, "loss": 0.0374, "step": 21490 }, { "epoch": 5.813953488372093, "grad_norm": 0.14174839854240417, "learning_rate": 7.618599336467256e-05, "loss": 0.0366, "step": 21500 }, { "epoch": 5.816657652785289, "grad_norm": 0.12147820740938187, "learning_rate": 7.616251313348666e-05, "loss": 0.0378, "step": 21510 }, { "epoch": 5.8193618171984856, "grad_norm": 0.25743749737739563, "learning_rate": 7.613902495482005e-05, "loss": 0.0374, "step": 21520 }, { "epoch": 5.822065981611682, "grad_norm": 0.12687641382217407, "learning_rate": 7.611552883580784e-05, "loss": 0.0371, "step": 21530 }, { "epoch": 5.824770146024878, "grad_norm": 0.17885157465934753, "learning_rate": 7.609202478358748e-05, "loss": 0.0385, "step": 21540 }, { "epoch": 5.827474310438075, "grad_norm": 0.17508162558078766, "learning_rate": 7.606851280529895e-05, "loss": 0.0384, "step": 21550 }, { "epoch": 5.830178474851271, "grad_norm": 0.10044002532958984, "learning_rate": 7.604499290808449e-05, "loss": 0.0373, "step": 21560 }, { "epoch": 5.832882639264467, "grad_norm": 0.12643404304981232, "learning_rate": 7.602146509908888e-05, "loss": 0.0372, "step": 21570 }, { "epoch": 5.835586803677663, "grad_norm": 0.14764149487018585, "learning_rate": 7.599792938545921e-05, "loss": 0.039, "step": 21580 }, { "epoch": 5.83829096809086, "grad_norm": 0.15095330774784088, "learning_rate": 7.597438577434506e-05, "loss": 0.039, "step": 21590 }, { "epoch": 5.840995132504056, "grad_norm": 0.14459313452243805, "learning_rate": 7.595083427289831e-05, "loss": 0.0394, "step": 21600 }, { "epoch": 5.8436992969172525, "grad_norm": 0.18055343627929688, "learning_rate": 7.59272748882733e-05, "loss": 0.0372, "step": 21610 }, { "epoch": 5.846403461330449, "grad_norm": 0.1681176722049713, "learning_rate": 7.590370762762675e-05, "loss": 0.0394, "step": 21620 }, { "epoch": 5.849107625743645, "grad_norm": 0.17865446209907532, "learning_rate": 7.588013249811777e-05, "loss": 0.0377, "step": 21630 }, { "epoch": 5.851811790156842, "grad_norm": 0.14773645997047424, "learning_rate": 7.585654950690786e-05, "loss": 0.0386, "step": 21640 }, { "epoch": 5.854515954570038, "grad_norm": 0.2224685549736023, "learning_rate": 7.583295866116091e-05, "loss": 0.0379, "step": 21650 }, { "epoch": 5.857220118983234, "grad_norm": 0.08858471363782883, "learning_rate": 7.580935996804321e-05, "loss": 0.0373, "step": 21660 }, { "epoch": 5.85992428339643, "grad_norm": 0.2244926542043686, "learning_rate": 7.57857534347234e-05, "loss": 0.0381, "step": 21670 }, { "epoch": 5.862628447809627, "grad_norm": 0.17922209203243256, "learning_rate": 7.576213906837254e-05, "loss": 0.0371, "step": 21680 }, { "epoch": 5.865332612222823, "grad_norm": 0.1875215321779251, "learning_rate": 7.573851687616403e-05, "loss": 0.0376, "step": 21690 }, { "epoch": 5.8680367766360195, "grad_norm": 0.13131192326545715, "learning_rate": 7.571488686527368e-05, "loss": 0.0358, "step": 21700 }, { "epoch": 5.870740941049216, "grad_norm": 0.15947791934013367, "learning_rate": 7.569124904287968e-05, "loss": 0.0369, "step": 21710 }, { "epoch": 5.873445105462412, "grad_norm": 0.1569156050682068, "learning_rate": 7.566760341616254e-05, "loss": 0.037, "step": 21720 }, { "epoch": 5.876149269875609, "grad_norm": 0.13240790367126465, "learning_rate": 7.564394999230519e-05, "loss": 0.0368, "step": 21730 }, { "epoch": 5.878853434288805, "grad_norm": 0.11729367077350616, "learning_rate": 7.562028877849294e-05, "loss": 0.0374, "step": 21740 }, { "epoch": 5.881557598702001, "grad_norm": 0.19173841178417206, "learning_rate": 7.559661978191341e-05, "loss": 0.038, "step": 21750 }, { "epoch": 5.884261763115197, "grad_norm": 0.16687382757663727, "learning_rate": 7.557294300975664e-05, "loss": 0.0379, "step": 21760 }, { "epoch": 5.886965927528394, "grad_norm": 0.1465488225221634, "learning_rate": 7.554925846921499e-05, "loss": 0.0364, "step": 21770 }, { "epoch": 5.88967009194159, "grad_norm": 0.1749396175146103, "learning_rate": 7.552556616748321e-05, "loss": 0.0389, "step": 21780 }, { "epoch": 5.8923742563547865, "grad_norm": 0.1530492901802063, "learning_rate": 7.550186611175838e-05, "loss": 0.0376, "step": 21790 }, { "epoch": 5.895078420767983, "grad_norm": 0.11455587297677994, "learning_rate": 7.547815830923998e-05, "loss": 0.0392, "step": 21800 }, { "epoch": 5.897782585181179, "grad_norm": 0.18384110927581787, "learning_rate": 7.54544427671298e-05, "loss": 0.0379, "step": 21810 }, { "epoch": 5.900486749594375, "grad_norm": 0.1875440776348114, "learning_rate": 7.543071949263198e-05, "loss": 0.037, "step": 21820 }, { "epoch": 5.903190914007571, "grad_norm": 0.09485138207674026, "learning_rate": 7.540698849295305e-05, "loss": 0.0374, "step": 21830 }, { "epoch": 5.905895078420768, "grad_norm": 0.09919480979442596, "learning_rate": 7.538324977530183e-05, "loss": 0.0376, "step": 21840 }, { "epoch": 5.908599242833964, "grad_norm": 0.09468437731266022, "learning_rate": 7.535950334688955e-05, "loss": 0.037, "step": 21850 }, { "epoch": 5.911303407247161, "grad_norm": 0.13908638060092926, "learning_rate": 7.533574921492972e-05, "loss": 0.0376, "step": 21860 }, { "epoch": 5.914007571660357, "grad_norm": 0.12476350367069244, "learning_rate": 7.531198738663824e-05, "loss": 0.0367, "step": 21870 }, { "epoch": 5.9167117360735535, "grad_norm": 0.20821541547775269, "learning_rate": 7.528821786923333e-05, "loss": 0.0384, "step": 21880 }, { "epoch": 5.91941590048675, "grad_norm": 0.13659721612930298, "learning_rate": 7.52644406699355e-05, "loss": 0.0377, "step": 21890 }, { "epoch": 5.922120064899946, "grad_norm": 0.15881606936454773, "learning_rate": 7.524065579596766e-05, "loss": 0.0381, "step": 21900 }, { "epoch": 5.924824229313142, "grad_norm": 0.1429748237133026, "learning_rate": 7.521686325455506e-05, "loss": 0.0372, "step": 21910 }, { "epoch": 5.927528393726338, "grad_norm": 0.12635537981987, "learning_rate": 7.51930630529252e-05, "loss": 0.0376, "step": 21920 }, { "epoch": 5.930232558139535, "grad_norm": 0.1270180344581604, "learning_rate": 7.516925519830797e-05, "loss": 0.0385, "step": 21930 }, { "epoch": 5.932936722552731, "grad_norm": 0.16034457087516785, "learning_rate": 7.514543969793557e-05, "loss": 0.038, "step": 21940 }, { "epoch": 5.935640886965928, "grad_norm": 0.1467713564634323, "learning_rate": 7.512161655904251e-05, "loss": 0.0369, "step": 21950 }, { "epoch": 5.938345051379124, "grad_norm": 0.11084144562482834, "learning_rate": 7.509778578886563e-05, "loss": 0.0379, "step": 21960 }, { "epoch": 5.9410492157923205, "grad_norm": 0.2056582123041153, "learning_rate": 7.507394739464412e-05, "loss": 0.0365, "step": 21970 }, { "epoch": 5.943753380205516, "grad_norm": 0.1458335667848587, "learning_rate": 7.50501013836194e-05, "loss": 0.0375, "step": 21980 }, { "epoch": 5.9464575446187125, "grad_norm": 0.14781343936920166, "learning_rate": 7.50262477630353e-05, "loss": 0.0381, "step": 21990 }, { "epoch": 5.949161709031909, "grad_norm": 0.09574303776025772, "learning_rate": 7.500238654013794e-05, "loss": 0.0368, "step": 22000 }, { "epoch": 5.951865873445105, "grad_norm": 0.08492299914360046, "learning_rate": 7.497851772217566e-05, "loss": 0.0367, "step": 22010 }, { "epoch": 5.954570037858302, "grad_norm": 0.19976010918617249, "learning_rate": 7.495464131639924e-05, "loss": 0.0382, "step": 22020 }, { "epoch": 5.957274202271498, "grad_norm": 0.15155962109565735, "learning_rate": 7.493075733006166e-05, "loss": 0.0371, "step": 22030 }, { "epoch": 5.959978366684695, "grad_norm": 0.18058720231056213, "learning_rate": 7.490686577041828e-05, "loss": 0.037, "step": 22040 }, { "epoch": 5.962682531097891, "grad_norm": 0.13406801223754883, "learning_rate": 7.488296664472668e-05, "loss": 0.038, "step": 22050 }, { "epoch": 5.9653866955110875, "grad_norm": 0.15251436829566956, "learning_rate": 7.485905996024682e-05, "loss": 0.0375, "step": 22060 }, { "epoch": 5.968090859924283, "grad_norm": 0.14817067980766296, "learning_rate": 7.483514572424093e-05, "loss": 0.0368, "step": 22070 }, { "epoch": 5.970795024337479, "grad_norm": 0.14520230889320374, "learning_rate": 7.481122394397349e-05, "loss": 0.0392, "step": 22080 }, { "epoch": 5.973499188750676, "grad_norm": 0.1382662057876587, "learning_rate": 7.478729462671131e-05, "loss": 0.0373, "step": 22090 }, { "epoch": 5.976203353163872, "grad_norm": 0.14611411094665527, "learning_rate": 7.47633577797235e-05, "loss": 0.0367, "step": 22100 }, { "epoch": 5.978907517577069, "grad_norm": 0.12168391793966293, "learning_rate": 7.473941341028144e-05, "loss": 0.037, "step": 22110 }, { "epoch": 5.981611681990265, "grad_norm": 0.14272071421146393, "learning_rate": 7.471546152565879e-05, "loss": 0.0384, "step": 22120 }, { "epoch": 5.984315846403462, "grad_norm": 0.15327566862106323, "learning_rate": 7.46915021331315e-05, "loss": 0.0388, "step": 22130 }, { "epoch": 5.987020010816658, "grad_norm": 0.18653157353401184, "learning_rate": 7.466753523997778e-05, "loss": 0.0388, "step": 22140 }, { "epoch": 5.9897241752298545, "grad_norm": 0.1418362557888031, "learning_rate": 7.464356085347819e-05, "loss": 0.0363, "step": 22150 }, { "epoch": 5.99242833964305, "grad_norm": 0.15246225893497467, "learning_rate": 7.461957898091548e-05, "loss": 0.0373, "step": 22160 }, { "epoch": 5.995132504056246, "grad_norm": 0.161077618598938, "learning_rate": 7.459558962957473e-05, "loss": 0.0368, "step": 22170 }, { "epoch": 5.997836668469443, "grad_norm": 0.18936045467853546, "learning_rate": 7.457159280674326e-05, "loss": 0.0372, "step": 22180 }, { "epoch": 6.000540832882639, "grad_norm": 0.22035710513591766, "learning_rate": 7.454758851971066e-05, "loss": 0.0369, "step": 22190 }, { "epoch": 6.003244997295836, "grad_norm": 0.21244217455387115, "learning_rate": 7.45235767757688e-05, "loss": 0.0377, "step": 22200 }, { "epoch": 6.005949161709032, "grad_norm": 0.15766501426696777, "learning_rate": 7.449955758221183e-05, "loss": 0.0385, "step": 22210 }, { "epoch": 6.008653326122229, "grad_norm": 0.2276957929134369, "learning_rate": 7.447553094633615e-05, "loss": 0.037, "step": 22220 }, { "epoch": 6.011357490535424, "grad_norm": 0.11420471966266632, "learning_rate": 7.445149687544039e-05, "loss": 0.0382, "step": 22230 }, { "epoch": 6.0140616549486205, "grad_norm": 0.177988201379776, "learning_rate": 7.44274553768255e-05, "loss": 0.0384, "step": 22240 }, { "epoch": 6.016765819361817, "grad_norm": 0.17514798045158386, "learning_rate": 7.440340645779464e-05, "loss": 0.0366, "step": 22250 }, { "epoch": 6.019469983775013, "grad_norm": 0.14305266737937927, "learning_rate": 7.437935012565322e-05, "loss": 0.0361, "step": 22260 }, { "epoch": 6.02217414818821, "grad_norm": 0.12727133929729462, "learning_rate": 7.435528638770893e-05, "loss": 0.038, "step": 22270 }, { "epoch": 6.024878312601406, "grad_norm": 0.17132675647735596, "learning_rate": 7.433121525127171e-05, "loss": 0.0367, "step": 22280 }, { "epoch": 6.027582477014603, "grad_norm": 0.16263264417648315, "learning_rate": 7.430713672365371e-05, "loss": 0.0389, "step": 22290 }, { "epoch": 6.030286641427799, "grad_norm": 0.18848863244056702, "learning_rate": 7.428305081216938e-05, "loss": 0.0359, "step": 22300 }, { "epoch": 6.032990805840996, "grad_norm": 0.12060252577066422, "learning_rate": 7.425895752413536e-05, "loss": 0.035, "step": 22310 }, { "epoch": 6.035694970254191, "grad_norm": 0.13828061521053314, "learning_rate": 7.423485686687057e-05, "loss": 0.0357, "step": 22320 }, { "epoch": 6.0383991346673875, "grad_norm": 0.18469174206256866, "learning_rate": 7.421074884769616e-05, "loss": 0.0378, "step": 22330 }, { "epoch": 6.041103299080584, "grad_norm": 0.11647859960794449, "learning_rate": 7.418663347393548e-05, "loss": 0.0375, "step": 22340 }, { "epoch": 6.04380746349378, "grad_norm": 0.13386547565460205, "learning_rate": 7.416251075291418e-05, "loss": 0.038, "step": 22350 }, { "epoch": 6.046511627906977, "grad_norm": 0.22177663445472717, "learning_rate": 7.413838069196007e-05, "loss": 0.0387, "step": 22360 }, { "epoch": 6.049215792320173, "grad_norm": 0.20649099349975586, "learning_rate": 7.411424329840324e-05, "loss": 0.0371, "step": 22370 }, { "epoch": 6.05191995673337, "grad_norm": 0.1910533457994461, "learning_rate": 7.409009857957601e-05, "loss": 0.0374, "step": 22380 }, { "epoch": 6.054624121146566, "grad_norm": 0.1522020548582077, "learning_rate": 7.40659465428129e-05, "loss": 0.0362, "step": 22390 }, { "epoch": 6.057328285559762, "grad_norm": 0.2645544707775116, "learning_rate": 7.404178719545063e-05, "loss": 0.0359, "step": 22400 }, { "epoch": 6.060032449972958, "grad_norm": 0.1652093529701233, "learning_rate": 7.401762054482822e-05, "loss": 0.0387, "step": 22410 }, { "epoch": 6.0627366143861545, "grad_norm": 0.12236011773347855, "learning_rate": 7.39934465982868e-05, "loss": 0.0373, "step": 22420 }, { "epoch": 6.065440778799351, "grad_norm": 0.21128685772418976, "learning_rate": 7.396926536316984e-05, "loss": 0.0366, "step": 22430 }, { "epoch": 6.068144943212547, "grad_norm": 0.13972978293895721, "learning_rate": 7.394507684682293e-05, "loss": 0.0364, "step": 22440 }, { "epoch": 6.070849107625744, "grad_norm": 0.09495452791452408, "learning_rate": 7.392088105659393e-05, "loss": 0.0372, "step": 22450 }, { "epoch": 6.07355327203894, "grad_norm": 0.13130143284797668, "learning_rate": 7.389667799983284e-05, "loss": 0.0381, "step": 22460 }, { "epoch": 6.076257436452137, "grad_norm": 0.15564480423927307, "learning_rate": 7.387246768389193e-05, "loss": 0.037, "step": 22470 }, { "epoch": 6.078961600865332, "grad_norm": 0.14750516414642334, "learning_rate": 7.384825011612563e-05, "loss": 0.0359, "step": 22480 }, { "epoch": 6.081665765278529, "grad_norm": 0.12281301617622375, "learning_rate": 7.382402530389066e-05, "loss": 0.0369, "step": 22490 }, { "epoch": 6.084369929691725, "grad_norm": 0.18947899341583252, "learning_rate": 7.379979325454582e-05, "loss": 0.0363, "step": 22500 }, { "epoch": 6.0870740941049215, "grad_norm": 0.17256762087345123, "learning_rate": 7.37755539754522e-05, "loss": 0.0387, "step": 22510 }, { "epoch": 6.089778258518118, "grad_norm": 0.1676339954137802, "learning_rate": 7.375130747397302e-05, "loss": 0.0375, "step": 22520 }, { "epoch": 6.092482422931314, "grad_norm": 0.08276750892400742, "learning_rate": 7.372705375747377e-05, "loss": 0.0374, "step": 22530 }, { "epoch": 6.095186587344511, "grad_norm": 0.09536480903625488, "learning_rate": 7.370279283332205e-05, "loss": 0.0379, "step": 22540 }, { "epoch": 6.097890751757707, "grad_norm": 0.16374631226062775, "learning_rate": 7.36785247088877e-05, "loss": 0.0377, "step": 22550 }, { "epoch": 6.100594916170903, "grad_norm": 0.13221751153469086, "learning_rate": 7.365424939154275e-05, "loss": 0.037, "step": 22560 }, { "epoch": 6.103299080584099, "grad_norm": 0.1405823975801468, "learning_rate": 7.362996688866138e-05, "loss": 0.0367, "step": 22570 }, { "epoch": 6.106003244997296, "grad_norm": 0.14564314484596252, "learning_rate": 7.360567720761999e-05, "loss": 0.0358, "step": 22580 }, { "epoch": 6.108707409410492, "grad_norm": 0.12171824276447296, "learning_rate": 7.358138035579711e-05, "loss": 0.0359, "step": 22590 }, { "epoch": 6.1114115738236885, "grad_norm": 0.16816313564777374, "learning_rate": 7.355707634057354e-05, "loss": 0.0372, "step": 22600 }, { "epoch": 6.114115738236885, "grad_norm": 0.13128232955932617, "learning_rate": 7.353276516933215e-05, "loss": 0.0369, "step": 22610 }, { "epoch": 6.116819902650081, "grad_norm": 0.12030956894159317, "learning_rate": 7.350844684945806e-05, "loss": 0.0358, "step": 22620 }, { "epoch": 6.119524067063278, "grad_norm": 0.10828165709972382, "learning_rate": 7.348412138833851e-05, "loss": 0.0372, "step": 22630 }, { "epoch": 6.122228231476473, "grad_norm": 0.12547667324543, "learning_rate": 7.345978879336295e-05, "loss": 0.0376, "step": 22640 }, { "epoch": 6.12493239588967, "grad_norm": 0.16865618526935577, "learning_rate": 7.343544907192296e-05, "loss": 0.038, "step": 22650 }, { "epoch": 6.127636560302866, "grad_norm": 0.19377997517585754, "learning_rate": 7.341110223141235e-05, "loss": 0.0381, "step": 22660 }, { "epoch": 6.130340724716063, "grad_norm": 0.15254434943199158, "learning_rate": 7.3386748279227e-05, "loss": 0.0363, "step": 22670 }, { "epoch": 6.133044889129259, "grad_norm": 0.1444394886493683, "learning_rate": 7.336238722276501e-05, "loss": 0.0357, "step": 22680 }, { "epoch": 6.1357490535424555, "grad_norm": 0.1513212025165558, "learning_rate": 7.333801906942663e-05, "loss": 0.0366, "step": 22690 }, { "epoch": 6.138453217955652, "grad_norm": 0.21014954149723053, "learning_rate": 7.331364382661428e-05, "loss": 0.0363, "step": 22700 }, { "epoch": 6.141157382368848, "grad_norm": 0.16845019161701202, "learning_rate": 7.328926150173248e-05, "loss": 0.0384, "step": 22710 }, { "epoch": 6.143861546782045, "grad_norm": 0.16943222284317017, "learning_rate": 7.326487210218795e-05, "loss": 0.0369, "step": 22720 }, { "epoch": 6.14656571119524, "grad_norm": 0.16383376717567444, "learning_rate": 7.324047563538955e-05, "loss": 0.0385, "step": 22730 }, { "epoch": 6.149269875608437, "grad_norm": 0.26006653904914856, "learning_rate": 7.321607210874828e-05, "loss": 0.0376, "step": 22740 }, { "epoch": 6.151974040021633, "grad_norm": 0.10886461287736893, "learning_rate": 7.31916615296773e-05, "loss": 0.0365, "step": 22750 }, { "epoch": 6.15467820443483, "grad_norm": 0.16850373148918152, "learning_rate": 7.316724390559188e-05, "loss": 0.0382, "step": 22760 }, { "epoch": 6.157382368848026, "grad_norm": 0.22192391753196716, "learning_rate": 7.314281924390946e-05, "loss": 0.0366, "step": 22770 }, { "epoch": 6.1600865332612225, "grad_norm": 0.10949735343456268, "learning_rate": 7.311838755204959e-05, "loss": 0.0372, "step": 22780 }, { "epoch": 6.162790697674419, "grad_norm": 0.12259552627801895, "learning_rate": 7.3093948837434e-05, "loss": 0.0351, "step": 22790 }, { "epoch": 6.165494862087615, "grad_norm": 0.2139790952205658, "learning_rate": 7.306950310748651e-05, "loss": 0.0368, "step": 22800 }, { "epoch": 6.168199026500811, "grad_norm": 0.14546923339366913, "learning_rate": 7.304505036963311e-05, "loss": 0.0357, "step": 22810 }, { "epoch": 6.170903190914007, "grad_norm": 0.14547833800315857, "learning_rate": 7.302059063130186e-05, "loss": 0.0375, "step": 22820 }, { "epoch": 6.173607355327204, "grad_norm": 0.11334849148988724, "learning_rate": 7.2996123899923e-05, "loss": 0.0362, "step": 22830 }, { "epoch": 6.1763115197404, "grad_norm": 0.09860286861658096, "learning_rate": 7.297165018292886e-05, "loss": 0.0366, "step": 22840 }, { "epoch": 6.179015684153597, "grad_norm": 0.1759021282196045, "learning_rate": 7.294716948775396e-05, "loss": 0.0364, "step": 22850 }, { "epoch": 6.181719848566793, "grad_norm": 0.16947205364704132, "learning_rate": 7.292268182183484e-05, "loss": 0.0367, "step": 22860 }, { "epoch": 6.1844240129799894, "grad_norm": 0.1761256754398346, "learning_rate": 7.28981871926102e-05, "loss": 0.0362, "step": 22870 }, { "epoch": 6.187128177393186, "grad_norm": 0.1514870524406433, "learning_rate": 7.28736856075209e-05, "loss": 0.0384, "step": 22880 }, { "epoch": 6.189832341806381, "grad_norm": 0.1182810515165329, "learning_rate": 7.284917707400985e-05, "loss": 0.0375, "step": 22890 }, { "epoch": 6.192536506219578, "grad_norm": 0.1500759869813919, "learning_rate": 7.282466159952212e-05, "loss": 0.0365, "step": 22900 }, { "epoch": 6.195240670632774, "grad_norm": 0.18703119456768036, "learning_rate": 7.280013919150483e-05, "loss": 0.0373, "step": 22910 }, { "epoch": 6.197944835045971, "grad_norm": 0.1529865562915802, "learning_rate": 7.277560985740728e-05, "loss": 0.0359, "step": 22920 }, { "epoch": 6.200648999459167, "grad_norm": 0.14901752769947052, "learning_rate": 7.275107360468079e-05, "loss": 0.0361, "step": 22930 }, { "epoch": 6.203353163872364, "grad_norm": 0.18015716969966888, "learning_rate": 7.272653044077885e-05, "loss": 0.0383, "step": 22940 }, { "epoch": 6.20605732828556, "grad_norm": 0.13967369496822357, "learning_rate": 7.270198037315703e-05, "loss": 0.0372, "step": 22950 }, { "epoch": 6.208761492698756, "grad_norm": 0.13668254017829895, "learning_rate": 7.267742340927297e-05, "loss": 0.0372, "step": 22960 }, { "epoch": 6.211465657111952, "grad_norm": 0.1198643147945404, "learning_rate": 7.265285955658645e-05, "loss": 0.0366, "step": 22970 }, { "epoch": 6.214169821525148, "grad_norm": 0.11090222746133804, "learning_rate": 7.26282888225593e-05, "loss": 0.0362, "step": 22980 }, { "epoch": 6.216873985938345, "grad_norm": 0.15598468482494354, "learning_rate": 7.260371121465548e-05, "loss": 0.0384, "step": 22990 }, { "epoch": 6.219578150351541, "grad_norm": 0.1937537044286728, "learning_rate": 7.2579126740341e-05, "loss": 0.0373, "step": 23000 }, { "epoch": 6.222282314764738, "grad_norm": 0.17896100878715515, "learning_rate": 7.2554535407084e-05, "loss": 0.0361, "step": 23010 }, { "epoch": 6.224986479177934, "grad_norm": 0.13024267554283142, "learning_rate": 7.252993722235464e-05, "loss": 0.0375, "step": 23020 }, { "epoch": 6.227690643591131, "grad_norm": 0.12720844149589539, "learning_rate": 7.250533219362523e-05, "loss": 0.0366, "step": 23030 }, { "epoch": 6.230394808004327, "grad_norm": 0.180471733212471, "learning_rate": 7.248072032837012e-05, "loss": 0.0353, "step": 23040 }, { "epoch": 6.2330989724175225, "grad_norm": 0.21499422192573547, "learning_rate": 7.245610163406575e-05, "loss": 0.0349, "step": 23050 }, { "epoch": 6.235803136830719, "grad_norm": 0.13004350662231445, "learning_rate": 7.243147611819061e-05, "loss": 0.036, "step": 23060 }, { "epoch": 6.238507301243915, "grad_norm": 0.19100409746170044, "learning_rate": 7.240684378822531e-05, "loss": 0.0348, "step": 23070 }, { "epoch": 6.241211465657112, "grad_norm": 0.20147643983364105, "learning_rate": 7.238220465165248e-05, "loss": 0.0374, "step": 23080 }, { "epoch": 6.243915630070308, "grad_norm": 0.16797971725463867, "learning_rate": 7.235755871595684e-05, "loss": 0.038, "step": 23090 }, { "epoch": 6.246619794483505, "grad_norm": 0.14049990475177765, "learning_rate": 7.233290598862517e-05, "loss": 0.0354, "step": 23100 }, { "epoch": 6.249323958896701, "grad_norm": 0.1253790706396103, "learning_rate": 7.230824647714635e-05, "loss": 0.0358, "step": 23110 }, { "epoch": 6.2520281233098975, "grad_norm": 0.16798153519630432, "learning_rate": 7.228358018901124e-05, "loss": 0.0367, "step": 23120 }, { "epoch": 6.254732287723094, "grad_norm": 0.16045518219470978, "learning_rate": 7.225890713171286e-05, "loss": 0.037, "step": 23130 }, { "epoch": 6.2574364521362895, "grad_norm": 0.19936899840831757, "learning_rate": 7.223422731274618e-05, "loss": 0.0369, "step": 23140 }, { "epoch": 6.260140616549486, "grad_norm": 0.1125766783952713, "learning_rate": 7.220954073960832e-05, "loss": 0.0379, "step": 23150 }, { "epoch": 6.262844780962682, "grad_norm": 0.22566354274749756, "learning_rate": 7.218484741979838e-05, "loss": 0.0358, "step": 23160 }, { "epoch": 6.265548945375879, "grad_norm": 0.11653165519237518, "learning_rate": 7.216014736081756e-05, "loss": 0.036, "step": 23170 }, { "epoch": 6.268253109789075, "grad_norm": 0.141583651304245, "learning_rate": 7.213544057016906e-05, "loss": 0.0354, "step": 23180 }, { "epoch": 6.270957274202272, "grad_norm": 0.13145728409290314, "learning_rate": 7.211072705535819e-05, "loss": 0.037, "step": 23190 }, { "epoch": 6.273661438615468, "grad_norm": 0.14784637093544006, "learning_rate": 7.208600682389224e-05, "loss": 0.0363, "step": 23200 }, { "epoch": 6.2763656030286645, "grad_norm": 0.20145142078399658, "learning_rate": 7.206127988328055e-05, "loss": 0.0377, "step": 23210 }, { "epoch": 6.27906976744186, "grad_norm": 0.12655505537986755, "learning_rate": 7.203654624103453e-05, "loss": 0.0362, "step": 23220 }, { "epoch": 6.2817739318550565, "grad_norm": 0.14689956605434418, "learning_rate": 7.201180590466761e-05, "loss": 0.0371, "step": 23230 }, { "epoch": 6.284478096268253, "grad_norm": 0.11318440735340118, "learning_rate": 7.198705888169523e-05, "loss": 0.0382, "step": 23240 }, { "epoch": 6.287182260681449, "grad_norm": 0.10265595465898514, "learning_rate": 7.196230517963491e-05, "loss": 0.0351, "step": 23250 }, { "epoch": 6.289886425094646, "grad_norm": 0.15176701545715332, "learning_rate": 7.193754480600615e-05, "loss": 0.0346, "step": 23260 }, { "epoch": 6.292590589507842, "grad_norm": 0.16651351749897003, "learning_rate": 7.19127777683305e-05, "loss": 0.0373, "step": 23270 }, { "epoch": 6.295294753921039, "grad_norm": 0.24071918427944183, "learning_rate": 7.188800407413156e-05, "loss": 0.037, "step": 23280 }, { "epoch": 6.297998918334235, "grad_norm": 0.13064102828502655, "learning_rate": 7.186322373093489e-05, "loss": 0.037, "step": 23290 }, { "epoch": 6.300703082747431, "grad_norm": 0.11015281081199646, "learning_rate": 7.18384367462681e-05, "loss": 0.0374, "step": 23300 }, { "epoch": 6.303407247160627, "grad_norm": 0.13653384149074554, "learning_rate": 7.181364312766085e-05, "loss": 0.036, "step": 23310 }, { "epoch": 6.3061114115738235, "grad_norm": 0.225728839635849, "learning_rate": 7.178884288264477e-05, "loss": 0.0361, "step": 23320 }, { "epoch": 6.30881557598702, "grad_norm": 0.21288859844207764, "learning_rate": 7.176403601875353e-05, "loss": 0.0367, "step": 23330 }, { "epoch": 6.311519740400216, "grad_norm": 0.13702742755413055, "learning_rate": 7.173922254352279e-05, "loss": 0.0374, "step": 23340 }, { "epoch": 6.314223904813413, "grad_norm": 0.1692623347043991, "learning_rate": 7.171440246449024e-05, "loss": 0.0351, "step": 23350 }, { "epoch": 6.316928069226609, "grad_norm": 0.12618914246559143, "learning_rate": 7.168957578919555e-05, "loss": 0.0372, "step": 23360 }, { "epoch": 6.319632233639806, "grad_norm": 0.12626436352729797, "learning_rate": 7.16647425251804e-05, "loss": 0.0366, "step": 23370 }, { "epoch": 6.322336398053002, "grad_norm": 0.20605280995368958, "learning_rate": 7.163990267998852e-05, "loss": 0.0369, "step": 23380 }, { "epoch": 6.325040562466198, "grad_norm": 0.21091704070568085, "learning_rate": 7.161505626116556e-05, "loss": 0.0369, "step": 23390 }, { "epoch": 6.327744726879394, "grad_norm": 0.18073804676532745, "learning_rate": 7.159020327625923e-05, "loss": 0.0359, "step": 23400 }, { "epoch": 6.3304488912925905, "grad_norm": 0.16616809368133545, "learning_rate": 7.15653437328192e-05, "loss": 0.0368, "step": 23410 }, { "epoch": 6.333153055705787, "grad_norm": 0.13501094281673431, "learning_rate": 7.154047763839713e-05, "loss": 0.0367, "step": 23420 }, { "epoch": 6.335857220118983, "grad_norm": 0.19085554778575897, "learning_rate": 7.15156050005467e-05, "loss": 0.0348, "step": 23430 }, { "epoch": 6.33856138453218, "grad_norm": 0.2068476378917694, "learning_rate": 7.149072582682358e-05, "loss": 0.0364, "step": 23440 }, { "epoch": 6.341265548945376, "grad_norm": 0.1088905781507492, "learning_rate": 7.146584012478535e-05, "loss": 0.038, "step": 23450 }, { "epoch": 6.343969713358572, "grad_norm": 0.14064480364322662, "learning_rate": 7.144094790199169e-05, "loss": 0.0387, "step": 23460 }, { "epoch": 6.346673877771768, "grad_norm": 0.12939579784870148, "learning_rate": 7.141604916600415e-05, "loss": 0.0363, "step": 23470 }, { "epoch": 6.349378042184965, "grad_norm": 0.13687090575695038, "learning_rate": 7.139114392438635e-05, "loss": 0.0365, "step": 23480 }, { "epoch": 6.352082206598161, "grad_norm": 0.13184383511543274, "learning_rate": 7.136623218470382e-05, "loss": 0.0366, "step": 23490 }, { "epoch": 6.3547863710113575, "grad_norm": 0.2018529772758484, "learning_rate": 7.13413139545241e-05, "loss": 0.0386, "step": 23500 }, { "epoch": 6.357490535424554, "grad_norm": 0.10247929394245148, "learning_rate": 7.131638924141668e-05, "loss": 0.0364, "step": 23510 }, { "epoch": 6.36019469983775, "grad_norm": 0.1683139204978943, "learning_rate": 7.129145805295304e-05, "loss": 0.0351, "step": 23520 }, { "epoch": 6.362898864250947, "grad_norm": 0.19998662173748016, "learning_rate": 7.126652039670661e-05, "loss": 0.0345, "step": 23530 }, { "epoch": 6.365603028664143, "grad_norm": 0.1307964026927948, "learning_rate": 7.124157628025278e-05, "loss": 0.037, "step": 23540 }, { "epoch": 6.368307193077339, "grad_norm": 0.26806640625, "learning_rate": 7.121662571116894e-05, "loss": 0.0368, "step": 23550 }, { "epoch": 6.371011357490535, "grad_norm": 0.10148759931325912, "learning_rate": 7.119166869703441e-05, "loss": 0.035, "step": 23560 }, { "epoch": 6.373715521903732, "grad_norm": 0.1469566822052002, "learning_rate": 7.116670524543044e-05, "loss": 0.0363, "step": 23570 }, { "epoch": 6.376419686316928, "grad_norm": 0.2023547887802124, "learning_rate": 7.114173536394032e-05, "loss": 0.036, "step": 23580 }, { "epoch": 6.3791238507301244, "grad_norm": 0.1756598949432373, "learning_rate": 7.111675906014917e-05, "loss": 0.0364, "step": 23590 }, { "epoch": 6.381828015143321, "grad_norm": 0.14128144085407257, "learning_rate": 7.109177634164421e-05, "loss": 0.0359, "step": 23600 }, { "epoch": 6.384532179556517, "grad_norm": 0.12309995293617249, "learning_rate": 7.106678721601449e-05, "loss": 0.0363, "step": 23610 }, { "epoch": 6.387236343969714, "grad_norm": 0.15597784519195557, "learning_rate": 7.104179169085103e-05, "loss": 0.0368, "step": 23620 }, { "epoch": 6.389940508382909, "grad_norm": 0.15173359215259552, "learning_rate": 7.101678977374683e-05, "loss": 0.0353, "step": 23630 }, { "epoch": 6.392644672796106, "grad_norm": 0.2022669017314911, "learning_rate": 7.099178147229685e-05, "loss": 0.0379, "step": 23640 }, { "epoch": 6.395348837209302, "grad_norm": 0.16102291643619537, "learning_rate": 7.096676679409789e-05, "loss": 0.0353, "step": 23650 }, { "epoch": 6.398053001622499, "grad_norm": 0.24970319867134094, "learning_rate": 7.094174574674877e-05, "loss": 0.0353, "step": 23660 }, { "epoch": 6.400757166035695, "grad_norm": 0.11615010350942612, "learning_rate": 7.091671833785025e-05, "loss": 0.0354, "step": 23670 }, { "epoch": 6.403461330448891, "grad_norm": 0.10315459966659546, "learning_rate": 7.089168457500493e-05, "loss": 0.0364, "step": 23680 }, { "epoch": 6.406165494862088, "grad_norm": 0.13086220622062683, "learning_rate": 7.086664446581747e-05, "loss": 0.0371, "step": 23690 }, { "epoch": 6.408869659275284, "grad_norm": 0.1350151002407074, "learning_rate": 7.084159801789438e-05, "loss": 0.0371, "step": 23700 }, { "epoch": 6.41157382368848, "grad_norm": 0.1420426070690155, "learning_rate": 7.081654523884411e-05, "loss": 0.0357, "step": 23710 }, { "epoch": 6.414277988101676, "grad_norm": 0.13627135753631592, "learning_rate": 7.0791486136277e-05, "loss": 0.0384, "step": 23720 }, { "epoch": 6.416982152514873, "grad_norm": 0.11405161768198013, "learning_rate": 7.07664207178054e-05, "loss": 0.0353, "step": 23730 }, { "epoch": 6.419686316928069, "grad_norm": 0.17959971725940704, "learning_rate": 7.074134899104345e-05, "loss": 0.0366, "step": 23740 }, { "epoch": 6.4223904813412656, "grad_norm": 0.13157223165035248, "learning_rate": 7.071627096360735e-05, "loss": 0.0367, "step": 23750 }, { "epoch": 6.425094645754462, "grad_norm": 0.13612425327301025, "learning_rate": 7.069118664311511e-05, "loss": 0.0366, "step": 23760 }, { "epoch": 6.427798810167658, "grad_norm": 0.21831931173801422, "learning_rate": 7.06660960371867e-05, "loss": 0.0359, "step": 23770 }, { "epoch": 6.430502974580855, "grad_norm": 0.29502496123313904, "learning_rate": 7.064099915344396e-05, "loss": 0.0378, "step": 23780 }, { "epoch": 6.433207138994051, "grad_norm": 0.14337056875228882, "learning_rate": 7.061589599951066e-05, "loss": 0.0372, "step": 23790 }, { "epoch": 6.435911303407247, "grad_norm": 0.11358760297298431, "learning_rate": 7.05907865830125e-05, "loss": 0.0385, "step": 23800 }, { "epoch": 6.438615467820443, "grad_norm": 0.16660098731517792, "learning_rate": 7.056567091157703e-05, "loss": 0.0363, "step": 23810 }, { "epoch": 6.44131963223364, "grad_norm": 0.1822713017463684, "learning_rate": 7.054054899283375e-05, "loss": 0.0357, "step": 23820 }, { "epoch": 6.444023796646836, "grad_norm": 0.15399911999702454, "learning_rate": 7.051542083441403e-05, "loss": 0.0358, "step": 23830 }, { "epoch": 6.4467279610600325, "grad_norm": 0.14986678957939148, "learning_rate": 7.049028644395113e-05, "loss": 0.0376, "step": 23840 }, { "epoch": 6.449432125473229, "grad_norm": 0.15056075155735016, "learning_rate": 7.046514582908024e-05, "loss": 0.0368, "step": 23850 }, { "epoch": 6.452136289886425, "grad_norm": 0.15464049577713013, "learning_rate": 7.043999899743838e-05, "loss": 0.0359, "step": 23860 }, { "epoch": 6.454840454299622, "grad_norm": 0.18651866912841797, "learning_rate": 7.041484595666451e-05, "loss": 0.0366, "step": 23870 }, { "epoch": 6.457544618712817, "grad_norm": 0.16530033946037292, "learning_rate": 7.038968671439948e-05, "loss": 0.0354, "step": 23880 }, { "epoch": 6.460248783126014, "grad_norm": 0.13762734830379486, "learning_rate": 7.036452127828596e-05, "loss": 0.0356, "step": 23890 }, { "epoch": 6.46295294753921, "grad_norm": 0.1573653221130371, "learning_rate": 7.033934965596859e-05, "loss": 0.0363, "step": 23900 }, { "epoch": 6.465657111952407, "grad_norm": 0.12300077080726624, "learning_rate": 7.031417185509381e-05, "loss": 0.0368, "step": 23910 }, { "epoch": 6.468361276365603, "grad_norm": 0.12955032289028168, "learning_rate": 7.028898788331e-05, "loss": 0.036, "step": 23920 }, { "epoch": 6.4710654407787995, "grad_norm": 0.1462220400571823, "learning_rate": 7.026379774826736e-05, "loss": 0.0356, "step": 23930 }, { "epoch": 6.473769605191996, "grad_norm": 0.15115638077259064, "learning_rate": 7.0238601457618e-05, "loss": 0.0365, "step": 23940 }, { "epoch": 6.476473769605192, "grad_norm": 0.12030498683452606, "learning_rate": 7.02133990190159e-05, "loss": 0.0384, "step": 23950 }, { "epoch": 6.479177934018388, "grad_norm": 0.16741187870502472, "learning_rate": 7.018819044011687e-05, "loss": 0.0363, "step": 23960 }, { "epoch": 6.481882098431584, "grad_norm": 0.19839271903038025, "learning_rate": 7.016297572857863e-05, "loss": 0.0366, "step": 23970 }, { "epoch": 6.484586262844781, "grad_norm": 0.15689362585544586, "learning_rate": 7.013775489206072e-05, "loss": 0.0367, "step": 23980 }, { "epoch": 6.487290427257977, "grad_norm": 0.15316052734851837, "learning_rate": 7.01125279382246e-05, "loss": 0.0373, "step": 23990 }, { "epoch": 6.489994591671174, "grad_norm": 0.13344666361808777, "learning_rate": 7.008729487473351e-05, "loss": 0.0354, "step": 24000 }, { "epoch": 6.49269875608437, "grad_norm": 0.13001465797424316, "learning_rate": 7.006205570925263e-05, "loss": 0.0361, "step": 24010 }, { "epoch": 6.4954029204975665, "grad_norm": 0.10760396718978882, "learning_rate": 7.003681044944892e-05, "loss": 0.035, "step": 24020 }, { "epoch": 6.498107084910763, "grad_norm": 0.161954864859581, "learning_rate": 7.001155910299126e-05, "loss": 0.0366, "step": 24030 }, { "epoch": 6.500811249323959, "grad_norm": 0.1526464968919754, "learning_rate": 6.99863016775503e-05, "loss": 0.0361, "step": 24040 }, { "epoch": 6.503515413737155, "grad_norm": 0.10103194415569305, "learning_rate": 6.996103818079859e-05, "loss": 0.0347, "step": 24050 }, { "epoch": 6.506219578150351, "grad_norm": 0.22134821116924286, "learning_rate": 6.993576862041054e-05, "loss": 0.0365, "step": 24060 }, { "epoch": 6.508923742563548, "grad_norm": 0.14807257056236267, "learning_rate": 6.991049300406235e-05, "loss": 0.0356, "step": 24070 }, { "epoch": 6.511627906976744, "grad_norm": 0.1625940352678299, "learning_rate": 6.988521133943209e-05, "loss": 0.037, "step": 24080 }, { "epoch": 6.514332071389941, "grad_norm": 0.17019198834896088, "learning_rate": 6.985992363419966e-05, "loss": 0.0363, "step": 24090 }, { "epoch": 6.517036235803137, "grad_norm": 0.18081852793693542, "learning_rate": 6.983462989604682e-05, "loss": 0.0357, "step": 24100 }, { "epoch": 6.5197404002163335, "grad_norm": 0.21649563312530518, "learning_rate": 6.980933013265709e-05, "loss": 0.0361, "step": 24110 }, { "epoch": 6.522444564629529, "grad_norm": 0.1394544541835785, "learning_rate": 6.978402435171592e-05, "loss": 0.0356, "step": 24120 }, { "epoch": 6.5251487290427255, "grad_norm": 0.2310725301504135, "learning_rate": 6.975871256091052e-05, "loss": 0.037, "step": 24130 }, { "epoch": 6.527852893455922, "grad_norm": 0.2189953774213791, "learning_rate": 6.973339476792995e-05, "loss": 0.0364, "step": 24140 }, { "epoch": 6.530557057869118, "grad_norm": 0.14267921447753906, "learning_rate": 6.970807098046505e-05, "loss": 0.0365, "step": 24150 }, { "epoch": 6.533261222282315, "grad_norm": 0.17955732345581055, "learning_rate": 6.968274120620858e-05, "loss": 0.0359, "step": 24160 }, { "epoch": 6.535965386695511, "grad_norm": 0.15259401500225067, "learning_rate": 6.965740545285499e-05, "loss": 0.0344, "step": 24170 }, { "epoch": 6.538669551108708, "grad_norm": 0.19462516903877258, "learning_rate": 6.963206372810068e-05, "loss": 0.0366, "step": 24180 }, { "epoch": 6.541373715521904, "grad_norm": 0.17884185910224915, "learning_rate": 6.960671603964375e-05, "loss": 0.0363, "step": 24190 }, { "epoch": 6.5440778799351005, "grad_norm": 0.26292628049850464, "learning_rate": 6.958136239518418e-05, "loss": 0.0351, "step": 24200 }, { "epoch": 6.546782044348296, "grad_norm": 0.15920239686965942, "learning_rate": 6.955600280242371e-05, "loss": 0.0354, "step": 24210 }, { "epoch": 6.5494862087614925, "grad_norm": 0.1418861597776413, "learning_rate": 6.953063726906596e-05, "loss": 0.0358, "step": 24220 }, { "epoch": 6.552190373174689, "grad_norm": 0.15354669094085693, "learning_rate": 6.950526580281626e-05, "loss": 0.0361, "step": 24230 }, { "epoch": 6.554894537587885, "grad_norm": 0.14140623807907104, "learning_rate": 6.947988841138184e-05, "loss": 0.0357, "step": 24240 }, { "epoch": 6.557598702001082, "grad_norm": 0.16392149031162262, "learning_rate": 6.945450510247165e-05, "loss": 0.0358, "step": 24250 }, { "epoch": 6.560302866414278, "grad_norm": 0.1619793176651001, "learning_rate": 6.942911588379647e-05, "loss": 0.0369, "step": 24260 }, { "epoch": 6.563007030827475, "grad_norm": 0.15174533426761627, "learning_rate": 6.940372076306888e-05, "loss": 0.0343, "step": 24270 }, { "epoch": 6.56571119524067, "grad_norm": 0.18213042616844177, "learning_rate": 6.937831974800326e-05, "loss": 0.0378, "step": 24280 }, { "epoch": 6.568415359653867, "grad_norm": 0.18782155215740204, "learning_rate": 6.935291284631574e-05, "loss": 0.0374, "step": 24290 }, { "epoch": 6.571119524067063, "grad_norm": 0.18335993587970734, "learning_rate": 6.932750006572428e-05, "loss": 0.0358, "step": 24300 }, { "epoch": 6.573823688480259, "grad_norm": 0.1100623682141304, "learning_rate": 6.930208141394863e-05, "loss": 0.0373, "step": 24310 }, { "epoch": 6.576527852893456, "grad_norm": 0.1805780977010727, "learning_rate": 6.927665689871026e-05, "loss": 0.0376, "step": 24320 }, { "epoch": 6.579232017306652, "grad_norm": 0.13066589832305908, "learning_rate": 6.925122652773253e-05, "loss": 0.037, "step": 24330 }, { "epoch": 6.581936181719849, "grad_norm": 0.2250438779592514, "learning_rate": 6.922579030874046e-05, "loss": 0.035, "step": 24340 }, { "epoch": 6.584640346133045, "grad_norm": 0.11660876125097275, "learning_rate": 6.920034824946093e-05, "loss": 0.036, "step": 24350 }, { "epoch": 6.587344510546242, "grad_norm": 0.13256803154945374, "learning_rate": 6.917490035762255e-05, "loss": 0.0347, "step": 24360 }, { "epoch": 6.590048674959437, "grad_norm": 0.11905408650636673, "learning_rate": 6.914944664095573e-05, "loss": 0.0363, "step": 24370 }, { "epoch": 6.592752839372634, "grad_norm": 0.15456083416938782, "learning_rate": 6.912398710719264e-05, "loss": 0.0358, "step": 24380 }, { "epoch": 6.59545700378583, "grad_norm": 0.35268065333366394, "learning_rate": 6.90985217640672e-05, "loss": 0.0377, "step": 24390 }, { "epoch": 6.598161168199026, "grad_norm": 0.20973387360572815, "learning_rate": 6.90730506193151e-05, "loss": 0.0383, "step": 24400 }, { "epoch": 6.600865332612223, "grad_norm": 0.19039298593997955, "learning_rate": 6.904757368067384e-05, "loss": 0.037, "step": 24410 }, { "epoch": 6.603569497025419, "grad_norm": 0.12044451385736465, "learning_rate": 6.90220909558826e-05, "loss": 0.0366, "step": 24420 }, { "epoch": 6.606273661438616, "grad_norm": 0.197166308760643, "learning_rate": 6.899660245268237e-05, "loss": 0.0356, "step": 24430 }, { "epoch": 6.608977825851812, "grad_norm": 0.11415687203407288, "learning_rate": 6.897110817881592e-05, "loss": 0.0361, "step": 24440 }, { "epoch": 6.611681990265009, "grad_norm": 0.10916023701429367, "learning_rate": 6.894560814202769e-05, "loss": 0.0353, "step": 24450 }, { "epoch": 6.614386154678204, "grad_norm": 0.10175147652626038, "learning_rate": 6.892010235006394e-05, "loss": 0.0347, "step": 24460 }, { "epoch": 6.6170903190914006, "grad_norm": 0.1693791300058365, "learning_rate": 6.889459081067264e-05, "loss": 0.0362, "step": 24470 }, { "epoch": 6.619794483504597, "grad_norm": 0.22257952392101288, "learning_rate": 6.886907353160356e-05, "loss": 0.0372, "step": 24480 }, { "epoch": 6.622498647917793, "grad_norm": 0.18571776151657104, "learning_rate": 6.884355052060814e-05, "loss": 0.0357, "step": 24490 }, { "epoch": 6.62520281233099, "grad_norm": 0.1279212236404419, "learning_rate": 6.88180217854396e-05, "loss": 0.035, "step": 24500 }, { "epoch": 6.627906976744186, "grad_norm": 0.23145219683647156, "learning_rate": 6.87924873338529e-05, "loss": 0.038, "step": 24510 }, { "epoch": 6.630611141157383, "grad_norm": 0.10847480595111847, "learning_rate": 6.876694717360475e-05, "loss": 0.0351, "step": 24520 }, { "epoch": 6.633315305570578, "grad_norm": 0.146787628531456, "learning_rate": 6.874140131245355e-05, "loss": 0.037, "step": 24530 }, { "epoch": 6.636019469983775, "grad_norm": 0.11550679057836533, "learning_rate": 6.871584975815948e-05, "loss": 0.0352, "step": 24540 }, { "epoch": 6.638723634396971, "grad_norm": 0.212397038936615, "learning_rate": 6.86902925184844e-05, "loss": 0.0354, "step": 24550 }, { "epoch": 6.6414277988101675, "grad_norm": 0.13401849567890167, "learning_rate": 6.866472960119195e-05, "loss": 0.0354, "step": 24560 }, { "epoch": 6.644131963223364, "grad_norm": 0.17678026854991913, "learning_rate": 6.863916101404748e-05, "loss": 0.0341, "step": 24570 }, { "epoch": 6.64683612763656, "grad_norm": 0.16972675919532776, "learning_rate": 6.8613586764818e-05, "loss": 0.0358, "step": 24580 }, { "epoch": 6.649540292049757, "grad_norm": 0.12414856255054474, "learning_rate": 6.858800686127233e-05, "loss": 0.0358, "step": 24590 }, { "epoch": 6.652244456462953, "grad_norm": 0.18136999011039734, "learning_rate": 6.856242131118097e-05, "loss": 0.0363, "step": 24600 }, { "epoch": 6.65494862087615, "grad_norm": 0.14164206385612488, "learning_rate": 6.853683012231614e-05, "loss": 0.0358, "step": 24610 }, { "epoch": 6.657652785289345, "grad_norm": 0.1709221452474594, "learning_rate": 6.851123330245173e-05, "loss": 0.0365, "step": 24620 }, { "epoch": 6.660356949702542, "grad_norm": 0.13484013080596924, "learning_rate": 6.848563085936343e-05, "loss": 0.0354, "step": 24630 }, { "epoch": 6.663061114115738, "grad_norm": 0.1776314228773117, "learning_rate": 6.846002280082853e-05, "loss": 0.0358, "step": 24640 }, { "epoch": 6.6657652785289345, "grad_norm": 0.17367631196975708, "learning_rate": 6.843440913462614e-05, "loss": 0.0369, "step": 24650 }, { "epoch": 6.668469442942131, "grad_norm": 0.16723762452602386, "learning_rate": 6.840878986853698e-05, "loss": 0.0342, "step": 24660 }, { "epoch": 6.671173607355327, "grad_norm": 0.1837444007396698, "learning_rate": 6.838316501034352e-05, "loss": 0.0362, "step": 24670 }, { "epoch": 6.673877771768524, "grad_norm": 0.16828423738479614, "learning_rate": 6.83575345678299e-05, "loss": 0.0345, "step": 24680 }, { "epoch": 6.676581936181719, "grad_norm": 0.16872787475585938, "learning_rate": 6.833189854878196e-05, "loss": 0.0359, "step": 24690 }, { "epoch": 6.679286100594916, "grad_norm": 0.24568672478199005, "learning_rate": 6.83062569609873e-05, "loss": 0.0345, "step": 24700 }, { "epoch": 6.681990265008112, "grad_norm": 0.19444195926189423, "learning_rate": 6.828060981223512e-05, "loss": 0.0351, "step": 24710 }, { "epoch": 6.684694429421309, "grad_norm": 0.13927757740020752, "learning_rate": 6.825495711031634e-05, "loss": 0.036, "step": 24720 }, { "epoch": 6.687398593834505, "grad_norm": 0.20156468451023102, "learning_rate": 6.822929886302359e-05, "loss": 0.0352, "step": 24730 }, { "epoch": 6.6901027582477015, "grad_norm": 0.15990014374256134, "learning_rate": 6.820363507815116e-05, "loss": 0.0361, "step": 24740 }, { "epoch": 6.692806922660898, "grad_norm": 0.16563978791236877, "learning_rate": 6.817796576349501e-05, "loss": 0.036, "step": 24750 }, { "epoch": 6.695511087074094, "grad_norm": 0.12304297089576721, "learning_rate": 6.815229092685285e-05, "loss": 0.0347, "step": 24760 }, { "epoch": 6.698215251487291, "grad_norm": 0.2040354162454605, "learning_rate": 6.812661057602399e-05, "loss": 0.0354, "step": 24770 }, { "epoch": 6.700919415900486, "grad_norm": 0.15901848673820496, "learning_rate": 6.810092471880943e-05, "loss": 0.0369, "step": 24780 }, { "epoch": 6.703623580313683, "grad_norm": 0.17767181992530823, "learning_rate": 6.807523336301187e-05, "loss": 0.0359, "step": 24790 }, { "epoch": 6.706327744726879, "grad_norm": 0.12644512951374054, "learning_rate": 6.804953651643566e-05, "loss": 0.0366, "step": 24800 }, { "epoch": 6.709031909140076, "grad_norm": 0.11488664895296097, "learning_rate": 6.802383418688685e-05, "loss": 0.0352, "step": 24810 }, { "epoch": 6.711736073553272, "grad_norm": 0.15514495968818665, "learning_rate": 6.799812638217309e-05, "loss": 0.0364, "step": 24820 }, { "epoch": 6.7144402379664685, "grad_norm": 0.14535845816135406, "learning_rate": 6.797241311010373e-05, "loss": 0.0352, "step": 24830 }, { "epoch": 6.717144402379665, "grad_norm": 0.14950042963027954, "learning_rate": 6.794669437848982e-05, "loss": 0.0355, "step": 24840 }, { "epoch": 6.719848566792861, "grad_norm": 0.15418533980846405, "learning_rate": 6.792097019514402e-05, "loss": 0.0363, "step": 24850 }, { "epoch": 6.722552731206058, "grad_norm": 0.12567396461963654, "learning_rate": 6.789524056788064e-05, "loss": 0.0348, "step": 24860 }, { "epoch": 6.725256895619253, "grad_norm": 0.18386147916316986, "learning_rate": 6.786950550451567e-05, "loss": 0.0351, "step": 24870 }, { "epoch": 6.72796106003245, "grad_norm": 0.1763991266489029, "learning_rate": 6.784376501286676e-05, "loss": 0.0361, "step": 24880 }, { "epoch": 6.730665224445646, "grad_norm": 0.12373289465904236, "learning_rate": 6.781801910075316e-05, "loss": 0.0349, "step": 24890 }, { "epoch": 6.733369388858843, "grad_norm": 0.2070089876651764, "learning_rate": 6.779226777599581e-05, "loss": 0.0364, "step": 24900 }, { "epoch": 6.736073553272039, "grad_norm": 0.10300008952617645, "learning_rate": 6.776651104641729e-05, "loss": 0.0365, "step": 24910 }, { "epoch": 6.7387777176852355, "grad_norm": 0.11542832106351852, "learning_rate": 6.774074891984183e-05, "loss": 0.0366, "step": 24920 }, { "epoch": 6.741481882098432, "grad_norm": 0.13113611936569214, "learning_rate": 6.771498140409526e-05, "loss": 0.0369, "step": 24930 }, { "epoch": 6.7441860465116275, "grad_norm": 0.14633303880691528, "learning_rate": 6.768920850700506e-05, "loss": 0.0349, "step": 24940 }, { "epoch": 6.746890210924824, "grad_norm": 0.15309058129787445, "learning_rate": 6.766343023640039e-05, "loss": 0.0356, "step": 24950 }, { "epoch": 6.74959437533802, "grad_norm": 0.11917783319950104, "learning_rate": 6.763764660011198e-05, "loss": 0.0361, "step": 24960 }, { "epoch": 6.752298539751217, "grad_norm": 0.22802124917507172, "learning_rate": 6.761185760597223e-05, "loss": 0.0367, "step": 24970 }, { "epoch": 6.755002704164413, "grad_norm": 0.13341201841831207, "learning_rate": 6.758606326181515e-05, "loss": 0.0356, "step": 24980 }, { "epoch": 6.75770686857761, "grad_norm": 0.11277052760124207, "learning_rate": 6.75602635754764e-05, "loss": 0.0347, "step": 24990 }, { "epoch": 6.760411032990806, "grad_norm": 0.152080699801445, "learning_rate": 6.75344585547932e-05, "loss": 0.0349, "step": 25000 }, { "epoch": 6.7631151974040025, "grad_norm": 0.19093665480613708, "learning_rate": 6.750864820760449e-05, "loss": 0.0352, "step": 25010 }, { "epoch": 6.765819361817199, "grad_norm": 0.19045354425907135, "learning_rate": 6.748283254175072e-05, "loss": 0.0351, "step": 25020 }, { "epoch": 6.768523526230394, "grad_norm": 0.19268444180488586, "learning_rate": 6.745701156507404e-05, "loss": 0.0355, "step": 25030 }, { "epoch": 6.771227690643591, "grad_norm": 0.1472110152244568, "learning_rate": 6.743118528541818e-05, "loss": 0.0369, "step": 25040 }, { "epoch": 6.773931855056787, "grad_norm": 0.2106289267539978, "learning_rate": 6.740535371062846e-05, "loss": 0.0351, "step": 25050 }, { "epoch": 6.776636019469984, "grad_norm": 0.15084630250930786, "learning_rate": 6.737951684855185e-05, "loss": 0.0348, "step": 25060 }, { "epoch": 6.77934018388318, "grad_norm": 0.25516918301582336, "learning_rate": 6.735367470703691e-05, "loss": 0.0349, "step": 25070 }, { "epoch": 6.782044348296377, "grad_norm": 0.17049728333950043, "learning_rate": 6.732782729393379e-05, "loss": 0.0366, "step": 25080 }, { "epoch": 6.784748512709573, "grad_norm": 0.15030965209007263, "learning_rate": 6.730197461709425e-05, "loss": 0.0362, "step": 25090 }, { "epoch": 6.7874526771227695, "grad_norm": 0.23926784098148346, "learning_rate": 6.727611668437164e-05, "loss": 0.0369, "step": 25100 }, { "epoch": 6.790156841535966, "grad_norm": 0.16463449597358704, "learning_rate": 6.725025350362094e-05, "loss": 0.0348, "step": 25110 }, { "epoch": 6.792861005949161, "grad_norm": 0.1996287852525711, "learning_rate": 6.72243850826987e-05, "loss": 0.034, "step": 25120 }, { "epoch": 6.795565170362358, "grad_norm": 0.1245148777961731, "learning_rate": 6.719851142946305e-05, "loss": 0.0338, "step": 25130 }, { "epoch": 6.798269334775554, "grad_norm": 0.15659165382385254, "learning_rate": 6.717263255177372e-05, "loss": 0.0363, "step": 25140 }, { "epoch": 6.800973499188751, "grad_norm": 0.14502806961536407, "learning_rate": 6.714674845749205e-05, "loss": 0.0357, "step": 25150 }, { "epoch": 6.803677663601947, "grad_norm": 0.12011600285768509, "learning_rate": 6.712085915448092e-05, "loss": 0.0345, "step": 25160 }, { "epoch": 6.806381828015144, "grad_norm": 0.30717113614082336, "learning_rate": 6.709496465060486e-05, "loss": 0.0354, "step": 25170 }, { "epoch": 6.80908599242834, "grad_norm": 0.12526065111160278, "learning_rate": 6.706906495372987e-05, "loss": 0.0331, "step": 25180 }, { "epoch": 6.8117901568415355, "grad_norm": 0.16316354274749756, "learning_rate": 6.704316007172365e-05, "loss": 0.0358, "step": 25190 }, { "epoch": 6.814494321254732, "grad_norm": 0.13421247899532318, "learning_rate": 6.701725001245539e-05, "loss": 0.0348, "step": 25200 }, { "epoch": 6.817198485667928, "grad_norm": 0.14460764825344086, "learning_rate": 6.699133478379588e-05, "loss": 0.0364, "step": 25210 }, { "epoch": 6.819902650081125, "grad_norm": 0.14204041659832, "learning_rate": 6.69654143936175e-05, "loss": 0.0348, "step": 25220 }, { "epoch": 6.822606814494321, "grad_norm": 0.11747163534164429, "learning_rate": 6.693948884979419e-05, "loss": 0.0369, "step": 25230 }, { "epoch": 6.825310978907518, "grad_norm": 0.16545677185058594, "learning_rate": 6.691355816020142e-05, "loss": 0.0366, "step": 25240 }, { "epoch": 6.828015143320714, "grad_norm": 0.1344936639070511, "learning_rate": 6.688762233271624e-05, "loss": 0.0346, "step": 25250 }, { "epoch": 6.830719307733911, "grad_norm": 0.10785269737243652, "learning_rate": 6.68616813752173e-05, "loss": 0.0359, "step": 25260 }, { "epoch": 6.833423472147107, "grad_norm": 0.18469493091106415, "learning_rate": 6.683573529558477e-05, "loss": 0.0345, "step": 25270 }, { "epoch": 6.8361276365603025, "grad_norm": 0.11806050688028336, "learning_rate": 6.680978410170037e-05, "loss": 0.0356, "step": 25280 }, { "epoch": 6.838831800973499, "grad_norm": 0.13857293128967285, "learning_rate": 6.678382780144741e-05, "loss": 0.0347, "step": 25290 }, { "epoch": 6.841535965386695, "grad_norm": 0.2005484700202942, "learning_rate": 6.675786640271071e-05, "loss": 0.0362, "step": 25300 }, { "epoch": 6.844240129799892, "grad_norm": 0.2036876678466797, "learning_rate": 6.673189991337665e-05, "loss": 0.0373, "step": 25310 }, { "epoch": 6.846944294213088, "grad_norm": 0.1351352483034134, "learning_rate": 6.670592834133317e-05, "loss": 0.0366, "step": 25320 }, { "epoch": 6.849648458626285, "grad_norm": 0.30854007601737976, "learning_rate": 6.667995169446979e-05, "loss": 0.0352, "step": 25330 }, { "epoch": 6.852352623039481, "grad_norm": 0.15804478526115417, "learning_rate": 6.665396998067747e-05, "loss": 0.0349, "step": 25340 }, { "epoch": 6.855056787452677, "grad_norm": 0.11823894828557968, "learning_rate": 6.66279832078488e-05, "loss": 0.035, "step": 25350 }, { "epoch": 6.857760951865873, "grad_norm": 0.14075717329978943, "learning_rate": 6.660199138387786e-05, "loss": 0.0365, "step": 25360 }, { "epoch": 6.8604651162790695, "grad_norm": 0.1746664047241211, "learning_rate": 6.65759945166603e-05, "loss": 0.0349, "step": 25370 }, { "epoch": 6.863169280692266, "grad_norm": 0.1637141853570938, "learning_rate": 6.654999261409326e-05, "loss": 0.0349, "step": 25380 }, { "epoch": 6.865873445105462, "grad_norm": 0.12780101597309113, "learning_rate": 6.652398568407544e-05, "loss": 0.0365, "step": 25390 }, { "epoch": 6.868577609518659, "grad_norm": 0.1722751408815384, "learning_rate": 6.649797373450707e-05, "loss": 0.0362, "step": 25400 }, { "epoch": 6.871281773931855, "grad_norm": 0.2454620748758316, "learning_rate": 6.647195677328988e-05, "loss": 0.035, "step": 25410 }, { "epoch": 6.873985938345052, "grad_norm": 0.15002720057964325, "learning_rate": 6.644593480832712e-05, "loss": 0.0355, "step": 25420 }, { "epoch": 6.876690102758248, "grad_norm": 0.129243403673172, "learning_rate": 6.641990784752363e-05, "loss": 0.0354, "step": 25430 }, { "epoch": 6.879394267171444, "grad_norm": 0.12297334522008896, "learning_rate": 6.639387589878566e-05, "loss": 0.0347, "step": 25440 }, { "epoch": 6.88209843158464, "grad_norm": 0.13351640105247498, "learning_rate": 6.636783897002103e-05, "loss": 0.0337, "step": 25450 }, { "epoch": 6.8848025959978365, "grad_norm": 0.18827340006828308, "learning_rate": 6.63417970691391e-05, "loss": 0.0355, "step": 25460 }, { "epoch": 6.887506760411033, "grad_norm": 0.1631304919719696, "learning_rate": 6.63157502040507e-05, "loss": 0.0351, "step": 25470 }, { "epoch": 6.890210924824229, "grad_norm": 0.1064891368150711, "learning_rate": 6.628969838266819e-05, "loss": 0.0348, "step": 25480 }, { "epoch": 6.892915089237426, "grad_norm": 0.15298806130886078, "learning_rate": 6.626364161290541e-05, "loss": 0.0366, "step": 25490 }, { "epoch": 6.895619253650622, "grad_norm": 0.22932711243629456, "learning_rate": 6.623757990267774e-05, "loss": 0.0352, "step": 25500 }, { "epoch": 6.898323418063819, "grad_norm": 0.14479133486747742, "learning_rate": 6.621151325990201e-05, "loss": 0.0345, "step": 25510 }, { "epoch": 6.901027582477015, "grad_norm": 0.12091386318206787, "learning_rate": 6.618544169249657e-05, "loss": 0.0355, "step": 25520 }, { "epoch": 6.903731746890211, "grad_norm": 0.1904667764902115, "learning_rate": 6.615936520838133e-05, "loss": 0.0367, "step": 25530 }, { "epoch": 6.906435911303407, "grad_norm": 0.13344460725784302, "learning_rate": 6.613328381547759e-05, "loss": 0.0356, "step": 25540 }, { "epoch": 6.9091400757166035, "grad_norm": 0.14572882652282715, "learning_rate": 6.610719752170821e-05, "loss": 0.0349, "step": 25550 }, { "epoch": 6.9118442401298, "grad_norm": 0.16568982601165771, "learning_rate": 6.60811063349975e-05, "loss": 0.0344, "step": 25560 }, { "epoch": 6.914548404542996, "grad_norm": 0.09487439692020416, "learning_rate": 6.605501026327127e-05, "loss": 0.0342, "step": 25570 }, { "epoch": 6.917252568956193, "grad_norm": 0.12093014270067215, "learning_rate": 6.602890931445685e-05, "loss": 0.0347, "step": 25580 }, { "epoch": 6.919956733369389, "grad_norm": 0.1527995616197586, "learning_rate": 6.6002803496483e-05, "loss": 0.037, "step": 25590 }, { "epoch": 6.922660897782585, "grad_norm": 0.1917172074317932, "learning_rate": 6.597669281727997e-05, "loss": 0.0346, "step": 25600 }, { "epoch": 6.925365062195781, "grad_norm": 0.16395051777362823, "learning_rate": 6.595057728477949e-05, "loss": 0.0348, "step": 25610 }, { "epoch": 6.928069226608978, "grad_norm": 0.12832336127758026, "learning_rate": 6.59244569069148e-05, "loss": 0.0357, "step": 25620 }, { "epoch": 6.930773391022174, "grad_norm": 0.09848114848136902, "learning_rate": 6.589833169162054e-05, "loss": 0.0355, "step": 25630 }, { "epoch": 6.9334775554353705, "grad_norm": 0.13207970559597015, "learning_rate": 6.587220164683291e-05, "loss": 0.0345, "step": 25640 }, { "epoch": 6.936181719848567, "grad_norm": 0.12078934907913208, "learning_rate": 6.58460667804895e-05, "loss": 0.0342, "step": 25650 }, { "epoch": 6.938885884261763, "grad_norm": 0.10369155555963516, "learning_rate": 6.581992710052938e-05, "loss": 0.0344, "step": 25660 }, { "epoch": 6.94159004867496, "grad_norm": 0.11772937327623367, "learning_rate": 6.579378261489311e-05, "loss": 0.0358, "step": 25670 }, { "epoch": 6.944294213088156, "grad_norm": 0.14649222791194916, "learning_rate": 6.576763333152268e-05, "loss": 0.0344, "step": 25680 }, { "epoch": 6.946998377501352, "grad_norm": 0.09739526361227036, "learning_rate": 6.574147925836159e-05, "loss": 0.0354, "step": 25690 }, { "epoch": 6.949702541914548, "grad_norm": 0.13395453989505768, "learning_rate": 6.571532040335472e-05, "loss": 0.0344, "step": 25700 }, { "epoch": 6.952406706327745, "grad_norm": 0.12464452534914017, "learning_rate": 6.568915677444845e-05, "loss": 0.0346, "step": 25710 }, { "epoch": 6.955110870740941, "grad_norm": 0.12199530750513077, "learning_rate": 6.56629883795906e-05, "loss": 0.035, "step": 25720 }, { "epoch": 6.9578150351541375, "grad_norm": 0.16238225996494293, "learning_rate": 6.563681522673043e-05, "loss": 0.0346, "step": 25730 }, { "epoch": 6.960519199567334, "grad_norm": 0.14261938631534576, "learning_rate": 6.561063732381867e-05, "loss": 0.0349, "step": 25740 }, { "epoch": 6.96322336398053, "grad_norm": 0.10690496861934662, "learning_rate": 6.558445467880745e-05, "loss": 0.0349, "step": 25750 }, { "epoch": 6.965927528393726, "grad_norm": 0.12272815406322479, "learning_rate": 6.55582672996504e-05, "loss": 0.0341, "step": 25760 }, { "epoch": 6.968631692806922, "grad_norm": 0.11939441412687302, "learning_rate": 6.553207519430253e-05, "loss": 0.0355, "step": 25770 }, { "epoch": 6.971335857220119, "grad_norm": 0.1362302601337433, "learning_rate": 6.550587837072032e-05, "loss": 0.0344, "step": 25780 }, { "epoch": 6.974040021633315, "grad_norm": 0.1536915898323059, "learning_rate": 6.547967683686166e-05, "loss": 0.0355, "step": 25790 }, { "epoch": 6.976744186046512, "grad_norm": 0.09670581668615341, "learning_rate": 6.545347060068591e-05, "loss": 0.0348, "step": 25800 }, { "epoch": 6.979448350459708, "grad_norm": 0.1305541694164276, "learning_rate": 6.542725967015382e-05, "loss": 0.0358, "step": 25810 }, { "epoch": 6.9821525148729044, "grad_norm": 0.1806962788105011, "learning_rate": 6.540104405322757e-05, "loss": 0.0363, "step": 25820 }, { "epoch": 6.984856679286101, "grad_norm": 0.19234292209148407, "learning_rate": 6.537482375787077e-05, "loss": 0.0359, "step": 25830 }, { "epoch": 6.987560843699297, "grad_norm": 0.13714300096035004, "learning_rate": 6.534859879204845e-05, "loss": 0.036, "step": 25840 }, { "epoch": 6.990265008112493, "grad_norm": 0.14539968967437744, "learning_rate": 6.532236916372709e-05, "loss": 0.0349, "step": 25850 }, { "epoch": 6.992969172525689, "grad_norm": 0.1626463681459427, "learning_rate": 6.529613488087454e-05, "loss": 0.0349, "step": 25860 }, { "epoch": 6.995673336938886, "grad_norm": 0.15750707685947418, "learning_rate": 6.526989595146009e-05, "loss": 0.0349, "step": 25870 }, { "epoch": 6.998377501352082, "grad_norm": 0.14401507377624512, "learning_rate": 6.524365238345441e-05, "loss": 0.0344, "step": 25880 }, { "epoch": 7.001081665765279, "grad_norm": 0.18599815666675568, "learning_rate": 6.521740418482964e-05, "loss": 0.0338, "step": 25890 }, { "epoch": 7.003785830178475, "grad_norm": 0.21322353184223175, "learning_rate": 6.519115136355925e-05, "loss": 0.0349, "step": 25900 }, { "epoch": 7.006489994591671, "grad_norm": 0.14494407176971436, "learning_rate": 6.51648939276182e-05, "loss": 0.0342, "step": 25910 }, { "epoch": 7.009194159004868, "grad_norm": 0.14624015986919403, "learning_rate": 6.513863188498277e-05, "loss": 0.0349, "step": 25920 }, { "epoch": 7.011898323418063, "grad_norm": 0.1488078087568283, "learning_rate": 6.511236524363068e-05, "loss": 0.0365, "step": 25930 }, { "epoch": 7.01460248783126, "grad_norm": 0.21145758032798767, "learning_rate": 6.508609401154104e-05, "loss": 0.036, "step": 25940 }, { "epoch": 7.017306652244456, "grad_norm": 0.11567443609237671, "learning_rate": 6.505981819669439e-05, "loss": 0.035, "step": 25950 }, { "epoch": 7.020010816657653, "grad_norm": 0.2264845371246338, "learning_rate": 6.503353780707258e-05, "loss": 0.0334, "step": 25960 }, { "epoch": 7.022714981070849, "grad_norm": 0.2301587015390396, "learning_rate": 6.500725285065895e-05, "loss": 0.0347, "step": 25970 }, { "epoch": 7.025419145484046, "grad_norm": 0.1123528853058815, "learning_rate": 6.498096333543813e-05, "loss": 0.0342, "step": 25980 }, { "epoch": 7.028123309897242, "grad_norm": 0.18923702836036682, "learning_rate": 6.49546692693962e-05, "loss": 0.0357, "step": 25990 }, { "epoch": 7.030827474310438, "grad_norm": 0.16421392560005188, "learning_rate": 6.492837066052059e-05, "loss": 0.0357, "step": 26000 }, { "epoch": 7.033531638723634, "grad_norm": 0.13460782170295715, "learning_rate": 6.490206751680014e-05, "loss": 0.0333, "step": 26010 }, { "epoch": 7.03623580313683, "grad_norm": 0.18096838891506195, "learning_rate": 6.487575984622505e-05, "loss": 0.0345, "step": 26020 }, { "epoch": 7.038939967550027, "grad_norm": 0.2019367516040802, "learning_rate": 6.484944765678689e-05, "loss": 0.035, "step": 26030 }, { "epoch": 7.041644131963223, "grad_norm": 0.15963086485862732, "learning_rate": 6.482313095647861e-05, "loss": 0.0338, "step": 26040 }, { "epoch": 7.04434829637642, "grad_norm": 0.18961581587791443, "learning_rate": 6.479680975329451e-05, "loss": 0.0341, "step": 26050 }, { "epoch": 7.047052460789616, "grad_norm": 0.13702024519443512, "learning_rate": 6.477048405523031e-05, "loss": 0.0339, "step": 26060 }, { "epoch": 7.0497566252028125, "grad_norm": 0.10502581298351288, "learning_rate": 6.474415387028304e-05, "loss": 0.0339, "step": 26070 }, { "epoch": 7.052460789616009, "grad_norm": 0.1273152381181717, "learning_rate": 6.471781920645114e-05, "loss": 0.0342, "step": 26080 }, { "epoch": 7.055164954029205, "grad_norm": 0.2059641033411026, "learning_rate": 6.469148007173434e-05, "loss": 0.0345, "step": 26090 }, { "epoch": 7.057869118442401, "grad_norm": 0.14573998749256134, "learning_rate": 6.466513647413381e-05, "loss": 0.0336, "step": 26100 }, { "epoch": 7.060573282855597, "grad_norm": 0.12630507349967957, "learning_rate": 6.463878842165203e-05, "loss": 0.0351, "step": 26110 }, { "epoch": 7.063277447268794, "grad_norm": 0.13521447777748108, "learning_rate": 6.461243592229286e-05, "loss": 0.0346, "step": 26120 }, { "epoch": 7.06598161168199, "grad_norm": 0.16443729400634766, "learning_rate": 6.458607898406146e-05, "loss": 0.0337, "step": 26130 }, { "epoch": 7.068685776095187, "grad_norm": 0.17278191447257996, "learning_rate": 6.455971761496439e-05, "loss": 0.0338, "step": 26140 }, { "epoch": 7.071389940508383, "grad_norm": 0.1832512468099594, "learning_rate": 6.453335182300953e-05, "loss": 0.035, "step": 26150 }, { "epoch": 7.0740941049215795, "grad_norm": 0.22960972785949707, "learning_rate": 6.450698161620612e-05, "loss": 0.0345, "step": 26160 }, { "epoch": 7.076798269334776, "grad_norm": 0.15583784878253937, "learning_rate": 6.448060700256473e-05, "loss": 0.0342, "step": 26170 }, { "epoch": 7.0795024337479715, "grad_norm": 0.10797715187072754, "learning_rate": 6.445422799009726e-05, "loss": 0.0346, "step": 26180 }, { "epoch": 7.082206598161168, "grad_norm": 0.14454255998134613, "learning_rate": 6.442784458681699e-05, "loss": 0.0338, "step": 26190 }, { "epoch": 7.084910762574364, "grad_norm": 0.16330453753471375, "learning_rate": 6.440145680073847e-05, "loss": 0.0346, "step": 26200 }, { "epoch": 7.087614926987561, "grad_norm": 0.148514986038208, "learning_rate": 6.437506463987762e-05, "loss": 0.0355, "step": 26210 }, { "epoch": 7.090319091400757, "grad_norm": 0.17608048021793365, "learning_rate": 6.434866811225168e-05, "loss": 0.0344, "step": 26220 }, { "epoch": 7.093023255813954, "grad_norm": 0.15983761847019196, "learning_rate": 6.432226722587923e-05, "loss": 0.0353, "step": 26230 }, { "epoch": 7.09572742022715, "grad_norm": 0.12660202383995056, "learning_rate": 6.429586198878015e-05, "loss": 0.0351, "step": 26240 }, { "epoch": 7.0984315846403465, "grad_norm": 0.1691170334815979, "learning_rate": 6.426945240897566e-05, "loss": 0.035, "step": 26250 }, { "epoch": 7.101135749053542, "grad_norm": 0.13975496590137482, "learning_rate": 6.424303849448829e-05, "loss": 0.0351, "step": 26260 }, { "epoch": 7.1038399134667385, "grad_norm": 0.13903826475143433, "learning_rate": 6.42166202533419e-05, "loss": 0.0341, "step": 26270 }, { "epoch": 7.106544077879935, "grad_norm": 0.10579592734575272, "learning_rate": 6.419019769356164e-05, "loss": 0.0346, "step": 26280 }, { "epoch": 7.109248242293131, "grad_norm": 0.15024331212043762, "learning_rate": 6.416377082317398e-05, "loss": 0.0348, "step": 26290 }, { "epoch": 7.111952406706328, "grad_norm": 0.12991555035114288, "learning_rate": 6.413733965020674e-05, "loss": 0.0352, "step": 26300 }, { "epoch": 7.114656571119524, "grad_norm": 0.15458069741725922, "learning_rate": 6.411090418268896e-05, "loss": 0.0346, "step": 26310 }, { "epoch": 7.117360735532721, "grad_norm": 0.22101101279258728, "learning_rate": 6.408446442865109e-05, "loss": 0.0359, "step": 26320 }, { "epoch": 7.120064899945917, "grad_norm": 0.22691726684570312, "learning_rate": 6.405802039612479e-05, "loss": 0.0343, "step": 26330 }, { "epoch": 7.122769064359113, "grad_norm": 0.17975273728370667, "learning_rate": 6.403157209314308e-05, "loss": 0.0366, "step": 26340 }, { "epoch": 7.125473228772309, "grad_norm": 0.19208592176437378, "learning_rate": 6.400511952774024e-05, "loss": 0.0341, "step": 26350 }, { "epoch": 7.1281773931855055, "grad_norm": 0.2419619858264923, "learning_rate": 6.397866270795187e-05, "loss": 0.035, "step": 26360 }, { "epoch": 7.130881557598702, "grad_norm": 0.13775894045829773, "learning_rate": 6.395220164181489e-05, "loss": 0.0347, "step": 26370 }, { "epoch": 7.133585722011898, "grad_norm": 0.25390899181365967, "learning_rate": 6.39257363373674e-05, "loss": 0.0345, "step": 26380 }, { "epoch": 7.136289886425095, "grad_norm": 0.19864150881767273, "learning_rate": 6.389926680264892e-05, "loss": 0.0337, "step": 26390 }, { "epoch": 7.138994050838291, "grad_norm": 0.12009132653474808, "learning_rate": 6.387279304570017e-05, "loss": 0.0353, "step": 26400 }, { "epoch": 7.141698215251488, "grad_norm": 0.114985391497612, "learning_rate": 6.384631507456319e-05, "loss": 0.0352, "step": 26410 }, { "epoch": 7.144402379664683, "grad_norm": 0.1767280250787735, "learning_rate": 6.381983289728126e-05, "loss": 0.0337, "step": 26420 }, { "epoch": 7.14710654407788, "grad_norm": 0.1133209615945816, "learning_rate": 6.3793346521899e-05, "loss": 0.0342, "step": 26430 }, { "epoch": 7.149810708491076, "grad_norm": 0.1322399079799652, "learning_rate": 6.376685595646226e-05, "loss": 0.034, "step": 26440 }, { "epoch": 7.1525148729042725, "grad_norm": 0.1334003359079361, "learning_rate": 6.374036120901816e-05, "loss": 0.0349, "step": 26450 }, { "epoch": 7.155219037317469, "grad_norm": 0.10589784383773804, "learning_rate": 6.371386228761514e-05, "loss": 0.0332, "step": 26460 }, { "epoch": 7.157923201730665, "grad_norm": 0.1665395051240921, "learning_rate": 6.368735920030283e-05, "loss": 0.036, "step": 26470 }, { "epoch": 7.160627366143862, "grad_norm": 0.12872357666492462, "learning_rate": 6.366085195513218e-05, "loss": 0.0336, "step": 26480 }, { "epoch": 7.163331530557058, "grad_norm": 0.09737611562013626, "learning_rate": 6.363434056015543e-05, "loss": 0.0333, "step": 26490 }, { "epoch": 7.166035694970255, "grad_norm": 0.13742676377296448, "learning_rate": 6.360782502342599e-05, "loss": 0.0345, "step": 26500 }, { "epoch": 7.16873985938345, "grad_norm": 0.21579474210739136, "learning_rate": 6.358130535299862e-05, "loss": 0.0339, "step": 26510 }, { "epoch": 7.171444023796647, "grad_norm": 0.17222729325294495, "learning_rate": 6.355478155692926e-05, "loss": 0.0362, "step": 26520 }, { "epoch": 7.174148188209843, "grad_norm": 0.3696175813674927, "learning_rate": 6.352825364327517e-05, "loss": 0.0347, "step": 26530 }, { "epoch": 7.176852352623039, "grad_norm": 0.12712641060352325, "learning_rate": 6.350172162009482e-05, "loss": 0.0353, "step": 26540 }, { "epoch": 7.179556517036236, "grad_norm": 0.19724616408348083, "learning_rate": 6.347518549544793e-05, "loss": 0.0343, "step": 26550 }, { "epoch": 7.182260681449432, "grad_norm": 0.13477696478366852, "learning_rate": 6.344864527739547e-05, "loss": 0.0356, "step": 26560 }, { "epoch": 7.184964845862629, "grad_norm": 0.20264403522014618, "learning_rate": 6.342210097399966e-05, "loss": 0.0346, "step": 26570 }, { "epoch": 7.187669010275825, "grad_norm": 0.10319358110427856, "learning_rate": 6.339555259332398e-05, "loss": 0.034, "step": 26580 }, { "epoch": 7.190373174689021, "grad_norm": 0.108866386115551, "learning_rate": 6.33690001434331e-05, "loss": 0.0339, "step": 26590 }, { "epoch": 7.193077339102217, "grad_norm": 0.127651646733284, "learning_rate": 6.334244363239296e-05, "loss": 0.035, "step": 26600 }, { "epoch": 7.195781503515414, "grad_norm": 0.11363023519515991, "learning_rate": 6.331588306827073e-05, "loss": 0.0337, "step": 26610 }, { "epoch": 7.19848566792861, "grad_norm": 0.16345959901809692, "learning_rate": 6.328931845913483e-05, "loss": 0.0349, "step": 26620 }, { "epoch": 7.201189832341806, "grad_norm": 0.21246857941150665, "learning_rate": 6.326274981305484e-05, "loss": 0.0346, "step": 26630 }, { "epoch": 7.203893996755003, "grad_norm": 0.17084595561027527, "learning_rate": 6.323617713810166e-05, "loss": 0.0348, "step": 26640 }, { "epoch": 7.206598161168199, "grad_norm": 0.12522824108600616, "learning_rate": 6.320960044234734e-05, "loss": 0.0346, "step": 26650 }, { "epoch": 7.209302325581396, "grad_norm": 0.15637287497520447, "learning_rate": 6.318301973386518e-05, "loss": 0.0375, "step": 26660 }, { "epoch": 7.212006489994591, "grad_norm": 0.20902015268802643, "learning_rate": 6.315643502072971e-05, "loss": 0.0346, "step": 26670 }, { "epoch": 7.214710654407788, "grad_norm": 0.15317511558532715, "learning_rate": 6.312984631101667e-05, "loss": 0.0355, "step": 26680 }, { "epoch": 7.217414818820984, "grad_norm": 0.17503201961517334, "learning_rate": 6.310325361280297e-05, "loss": 0.0347, "step": 26690 }, { "epoch": 7.2201189832341806, "grad_norm": 0.12505926191806793, "learning_rate": 6.30766569341668e-05, "loss": 0.0343, "step": 26700 }, { "epoch": 7.222823147647377, "grad_norm": 0.14277009665966034, "learning_rate": 6.305005628318753e-05, "loss": 0.0338, "step": 26710 }, { "epoch": 7.225527312060573, "grad_norm": 0.11557959020137787, "learning_rate": 6.302345166794572e-05, "loss": 0.0331, "step": 26720 }, { "epoch": 7.22823147647377, "grad_norm": 0.12335234135389328, "learning_rate": 6.299684309652316e-05, "loss": 0.0341, "step": 26730 }, { "epoch": 7.230935640886966, "grad_norm": 0.12983432412147522, "learning_rate": 6.297023057700283e-05, "loss": 0.0341, "step": 26740 }, { "epoch": 7.233639805300163, "grad_norm": 0.1530138999223709, "learning_rate": 6.294361411746891e-05, "loss": 0.0337, "step": 26750 }, { "epoch": 7.236343969713358, "grad_norm": 0.16904903948307037, "learning_rate": 6.291699372600677e-05, "loss": 0.0359, "step": 26760 }, { "epoch": 7.239048134126555, "grad_norm": 0.16490782797336578, "learning_rate": 6.2890369410703e-05, "loss": 0.033, "step": 26770 }, { "epoch": 7.241752298539751, "grad_norm": 0.09100961685180664, "learning_rate": 6.286374117964534e-05, "loss": 0.0352, "step": 26780 }, { "epoch": 7.2444564629529475, "grad_norm": 0.26833823323249817, "learning_rate": 6.283710904092277e-05, "loss": 0.0346, "step": 26790 }, { "epoch": 7.247160627366144, "grad_norm": 0.10756178200244904, "learning_rate": 6.281047300262542e-05, "loss": 0.034, "step": 26800 }, { "epoch": 7.24986479177934, "grad_norm": 0.13504524528980255, "learning_rate": 6.278383307284461e-05, "loss": 0.034, "step": 26810 }, { "epoch": 7.252568956192537, "grad_norm": 0.15647736191749573, "learning_rate": 6.275718925967284e-05, "loss": 0.034, "step": 26820 }, { "epoch": 7.255273120605732, "grad_norm": 0.15039658546447754, "learning_rate": 6.273054157120382e-05, "loss": 0.0333, "step": 26830 }, { "epoch": 7.257977285018929, "grad_norm": 0.09394800662994385, "learning_rate": 6.270389001553238e-05, "loss": 0.0357, "step": 26840 }, { "epoch": 7.260681449432125, "grad_norm": 0.17443116009235382, "learning_rate": 6.26772346007546e-05, "loss": 0.0354, "step": 26850 }, { "epoch": 7.263385613845322, "grad_norm": 0.11548230797052383, "learning_rate": 6.265057533496767e-05, "loss": 0.035, "step": 26860 }, { "epoch": 7.266089778258518, "grad_norm": 0.1584966778755188, "learning_rate": 6.262391222626997e-05, "loss": 0.0344, "step": 26870 }, { "epoch": 7.2687939426717145, "grad_norm": 0.17102226614952087, "learning_rate": 6.259724528276106e-05, "loss": 0.0343, "step": 26880 }, { "epoch": 7.271498107084911, "grad_norm": 0.13228118419647217, "learning_rate": 6.257057451254162e-05, "loss": 0.0345, "step": 26890 }, { "epoch": 7.274202271498107, "grad_norm": 0.11393505334854126, "learning_rate": 6.254389992371357e-05, "loss": 0.0347, "step": 26900 }, { "epoch": 7.276906435911304, "grad_norm": 0.13318152725696564, "learning_rate": 6.25172215243799e-05, "loss": 0.0345, "step": 26910 }, { "epoch": 7.279610600324499, "grad_norm": 0.2005952149629593, "learning_rate": 6.249053932264486e-05, "loss": 0.0332, "step": 26920 }, { "epoch": 7.282314764737696, "grad_norm": 0.17434799671173096, "learning_rate": 6.246385332661376e-05, "loss": 0.034, "step": 26930 }, { "epoch": 7.285018929150892, "grad_norm": 0.12770865857601166, "learning_rate": 6.24371635443931e-05, "loss": 0.0346, "step": 26940 }, { "epoch": 7.287723093564089, "grad_norm": 0.1610664278268814, "learning_rate": 6.241046998409054e-05, "loss": 0.0348, "step": 26950 }, { "epoch": 7.290427257977285, "grad_norm": 0.12856359779834747, "learning_rate": 6.238377265381489e-05, "loss": 0.0338, "step": 26960 }, { "epoch": 7.2931314223904815, "grad_norm": 0.13308168947696686, "learning_rate": 6.235707156167607e-05, "loss": 0.0354, "step": 26970 }, { "epoch": 7.295835586803678, "grad_norm": 0.1727822870016098, "learning_rate": 6.233036671578519e-05, "loss": 0.0345, "step": 26980 }, { "epoch": 7.298539751216874, "grad_norm": 0.14430595934391022, "learning_rate": 6.230365812425445e-05, "loss": 0.0363, "step": 26990 }, { "epoch": 7.30124391563007, "grad_norm": 0.10607773065567017, "learning_rate": 6.227694579519724e-05, "loss": 0.0344, "step": 27000 }, { "epoch": 7.303948080043266, "grad_norm": 0.1329430490732193, "learning_rate": 6.225022973672805e-05, "loss": 0.034, "step": 27010 }, { "epoch": 7.306652244456463, "grad_norm": 0.12379197776317596, "learning_rate": 6.222350995696253e-05, "loss": 0.0345, "step": 27020 }, { "epoch": 7.309356408869659, "grad_norm": 0.14118053019046783, "learning_rate": 6.21967864640174e-05, "loss": 0.0336, "step": 27030 }, { "epoch": 7.312060573282856, "grad_norm": 0.14958345890045166, "learning_rate": 6.217005926601059e-05, "loss": 0.033, "step": 27040 }, { "epoch": 7.314764737696052, "grad_norm": 0.19332215189933777, "learning_rate": 6.214332837106111e-05, "loss": 0.0346, "step": 27050 }, { "epoch": 7.3174689021092485, "grad_norm": 0.11768855899572372, "learning_rate": 6.21165937872891e-05, "loss": 0.0358, "step": 27060 }, { "epoch": 7.320173066522445, "grad_norm": 0.18032655119895935, "learning_rate": 6.208985552281582e-05, "loss": 0.0343, "step": 27070 }, { "epoch": 7.3228772309356405, "grad_norm": 0.17456656694412231, "learning_rate": 6.206311358576364e-05, "loss": 0.0337, "step": 27080 }, { "epoch": 7.325581395348837, "grad_norm": 0.16614209115505219, "learning_rate": 6.203636798425608e-05, "loss": 0.034, "step": 27090 }, { "epoch": 7.328285559762033, "grad_norm": 0.17095476388931274, "learning_rate": 6.20096187264177e-05, "loss": 0.0332, "step": 27100 }, { "epoch": 7.33098972417523, "grad_norm": 0.22179698944091797, "learning_rate": 6.198286582037425e-05, "loss": 0.0341, "step": 27110 }, { "epoch": 7.333693888588426, "grad_norm": 0.12990999221801758, "learning_rate": 6.195610927425256e-05, "loss": 0.0338, "step": 27120 }, { "epoch": 7.336398053001623, "grad_norm": 0.18064789474010468, "learning_rate": 6.192934909618056e-05, "loss": 0.0337, "step": 27130 }, { "epoch": 7.339102217414819, "grad_norm": 0.11952347308397293, "learning_rate": 6.190258529428728e-05, "loss": 0.0335, "step": 27140 }, { "epoch": 7.3418063818280155, "grad_norm": 0.17608225345611572, "learning_rate": 6.187581787670285e-05, "loss": 0.0343, "step": 27150 }, { "epoch": 7.344510546241212, "grad_norm": 0.15456165373325348, "learning_rate": 6.184904685155852e-05, "loss": 0.0338, "step": 27160 }, { "epoch": 7.3472147106544075, "grad_norm": 0.12458112835884094, "learning_rate": 6.18222722269866e-05, "loss": 0.0341, "step": 27170 }, { "epoch": 7.349918875067604, "grad_norm": 0.13885962963104248, "learning_rate": 6.179549401112053e-05, "loss": 0.0352, "step": 27180 }, { "epoch": 7.3526230394808, "grad_norm": 0.1341477483510971, "learning_rate": 6.176871221209482e-05, "loss": 0.0341, "step": 27190 }, { "epoch": 7.355327203893997, "grad_norm": 0.15243849158287048, "learning_rate": 6.174192683804508e-05, "loss": 0.0341, "step": 27200 }, { "epoch": 7.358031368307193, "grad_norm": 0.13228009641170502, "learning_rate": 6.1715137897108e-05, "loss": 0.0346, "step": 27210 }, { "epoch": 7.36073553272039, "grad_norm": 0.11299564689397812, "learning_rate": 6.168834539742134e-05, "loss": 0.0336, "step": 27220 }, { "epoch": 7.363439697133586, "grad_norm": 0.2034130096435547, "learning_rate": 6.166154934712397e-05, "loss": 0.0343, "step": 27230 }, { "epoch": 7.366143861546782, "grad_norm": 0.250804603099823, "learning_rate": 6.163474975435581e-05, "loss": 0.0347, "step": 27240 }, { "epoch": 7.368848025959978, "grad_norm": 0.13546858727931976, "learning_rate": 6.160794662725787e-05, "loss": 0.0332, "step": 27250 }, { "epoch": 7.371552190373174, "grad_norm": 0.14265024662017822, "learning_rate": 6.158113997397222e-05, "loss": 0.034, "step": 27260 }, { "epoch": 7.374256354786371, "grad_norm": 0.22177821397781372, "learning_rate": 6.155432980264205e-05, "loss": 0.0351, "step": 27270 }, { "epoch": 7.376960519199567, "grad_norm": 0.17520150542259216, "learning_rate": 6.152751612141156e-05, "loss": 0.035, "step": 27280 }, { "epoch": 7.379664683612764, "grad_norm": 0.1345958560705185, "learning_rate": 6.150069893842602e-05, "loss": 0.0324, "step": 27290 }, { "epoch": 7.38236884802596, "grad_norm": 0.17960801720619202, "learning_rate": 6.147387826183182e-05, "loss": 0.0345, "step": 27300 }, { "epoch": 7.385073012439157, "grad_norm": 0.11327622085809708, "learning_rate": 6.144705409977635e-05, "loss": 0.0339, "step": 27310 }, { "epoch": 7.387777176852353, "grad_norm": 0.1512054204940796, "learning_rate": 6.142022646040808e-05, "loss": 0.0341, "step": 27320 }, { "epoch": 7.390481341265549, "grad_norm": 0.14200226962566376, "learning_rate": 6.139339535187653e-05, "loss": 0.0335, "step": 27330 }, { "epoch": 7.393185505678745, "grad_norm": 0.1349688172340393, "learning_rate": 6.136656078233232e-05, "loss": 0.0342, "step": 27340 }, { "epoch": 7.395889670091941, "grad_norm": 0.23429563641548157, "learning_rate": 6.133972275992707e-05, "loss": 0.0332, "step": 27350 }, { "epoch": 7.398593834505138, "grad_norm": 0.20226003229618073, "learning_rate": 6.131288129281342e-05, "loss": 0.0334, "step": 27360 }, { "epoch": 7.401297998918334, "grad_norm": 0.2099316120147705, "learning_rate": 6.128603638914516e-05, "loss": 0.0336, "step": 27370 }, { "epoch": 7.404002163331531, "grad_norm": 0.08934267610311508, "learning_rate": 6.125918805707704e-05, "loss": 0.035, "step": 27380 }, { "epoch": 7.406706327744727, "grad_norm": 0.18383830785751343, "learning_rate": 6.123233630476485e-05, "loss": 0.0341, "step": 27390 }, { "epoch": 7.409410492157924, "grad_norm": 0.11098603159189224, "learning_rate": 6.120548114036547e-05, "loss": 0.0329, "step": 27400 }, { "epoch": 7.412114656571119, "grad_norm": 0.15532618761062622, "learning_rate": 6.117862257203679e-05, "loss": 0.0338, "step": 27410 }, { "epoch": 7.4148188209843156, "grad_norm": 0.1329253613948822, "learning_rate": 6.115176060793771e-05, "loss": 0.0357, "step": 27420 }, { "epoch": 7.417522985397512, "grad_norm": 0.12030947953462601, "learning_rate": 6.112489525622822e-05, "loss": 0.0342, "step": 27430 }, { "epoch": 7.420227149810708, "grad_norm": 0.1318376660346985, "learning_rate": 6.109802652506928e-05, "loss": 0.0345, "step": 27440 }, { "epoch": 7.422931314223905, "grad_norm": 0.16024136543273926, "learning_rate": 6.107115442262291e-05, "loss": 0.0357, "step": 27450 }, { "epoch": 7.425635478637101, "grad_norm": 0.1294722706079483, "learning_rate": 6.104427895705214e-05, "loss": 0.0343, "step": 27460 }, { "epoch": 7.428339643050298, "grad_norm": 0.18246421217918396, "learning_rate": 6.101740013652103e-05, "loss": 0.0326, "step": 27470 }, { "epoch": 7.431043807463494, "grad_norm": 0.1150176152586937, "learning_rate": 6.099051796919465e-05, "loss": 0.0343, "step": 27480 }, { "epoch": 7.43374797187669, "grad_norm": 0.09500481188297272, "learning_rate": 6.096363246323911e-05, "loss": 0.0335, "step": 27490 }, { "epoch": 7.436452136289886, "grad_norm": 0.1068766638636589, "learning_rate": 6.0936743626821504e-05, "loss": 0.034, "step": 27500 }, { "epoch": 7.4391563007030825, "grad_norm": 0.14531876146793365, "learning_rate": 6.090985146810996e-05, "loss": 0.034, "step": 27510 }, { "epoch": 7.441860465116279, "grad_norm": 0.1566598117351532, "learning_rate": 6.088295599527357e-05, "loss": 0.0337, "step": 27520 }, { "epoch": 7.444564629529475, "grad_norm": 0.19329385459423065, "learning_rate": 6.085605721648252e-05, "loss": 0.0337, "step": 27530 }, { "epoch": 7.447268793942672, "grad_norm": 0.23403556644916534, "learning_rate": 6.082915513990792e-05, "loss": 0.0346, "step": 27540 }, { "epoch": 7.449972958355868, "grad_norm": 0.11087597906589508, "learning_rate": 6.080224977372192e-05, "loss": 0.0343, "step": 27550 }, { "epoch": 7.452677122769065, "grad_norm": 0.20468047261238098, "learning_rate": 6.0775341126097666e-05, "loss": 0.0341, "step": 27560 }, { "epoch": 7.455381287182261, "grad_norm": 0.19709767401218414, "learning_rate": 6.074842920520926e-05, "loss": 0.033, "step": 27570 }, { "epoch": 7.458085451595457, "grad_norm": 0.16228218376636505, "learning_rate": 6.072151401923186e-05, "loss": 0.0334, "step": 27580 }, { "epoch": 7.460789616008653, "grad_norm": 0.14575447142124176, "learning_rate": 6.069459557634159e-05, "loss": 0.0344, "step": 27590 }, { "epoch": 7.4634937804218495, "grad_norm": 0.14245149493217468, "learning_rate": 6.066767388471557e-05, "loss": 0.0344, "step": 27600 }, { "epoch": 7.466197944835046, "grad_norm": 0.17773684859275818, "learning_rate": 6.064074895253188e-05, "loss": 0.0331, "step": 27610 }, { "epoch": 7.468902109248242, "grad_norm": 0.11736723780632019, "learning_rate": 6.061382078796961e-05, "loss": 0.0338, "step": 27620 }, { "epoch": 7.471606273661439, "grad_norm": 0.1439940631389618, "learning_rate": 6.0586889399208814e-05, "loss": 0.0339, "step": 27630 }, { "epoch": 7.474310438074635, "grad_norm": 0.16057740151882172, "learning_rate": 6.0559954794430565e-05, "loss": 0.0343, "step": 27640 }, { "epoch": 7.477014602487832, "grad_norm": 0.12675917148590088, "learning_rate": 6.053301698181687e-05, "loss": 0.034, "step": 27650 }, { "epoch": 7.479718766901027, "grad_norm": 0.23473204672336578, "learning_rate": 6.0506075969550725e-05, "loss": 0.0334, "step": 27660 }, { "epoch": 7.482422931314224, "grad_norm": 0.13465049862861633, "learning_rate": 6.047913176581609e-05, "loss": 0.0347, "step": 27670 }, { "epoch": 7.48512709572742, "grad_norm": 0.17047925293445587, "learning_rate": 6.0452184378797904e-05, "loss": 0.0343, "step": 27680 }, { "epoch": 7.4878312601406165, "grad_norm": 0.1736418604850769, "learning_rate": 6.042523381668209e-05, "loss": 0.0336, "step": 27690 }, { "epoch": 7.490535424553813, "grad_norm": 0.13930369913578033, "learning_rate": 6.03982800876555e-05, "loss": 0.0346, "step": 27700 }, { "epoch": 7.493239588967009, "grad_norm": 0.11684223264455795, "learning_rate": 6.0371323199905975e-05, "loss": 0.0343, "step": 27710 }, { "epoch": 7.495943753380206, "grad_norm": 0.1733824610710144, "learning_rate": 6.03443631616223e-05, "loss": 0.0328, "step": 27720 }, { "epoch": 7.498647917793402, "grad_norm": 0.14008037745952606, "learning_rate": 6.031739998099421e-05, "loss": 0.0336, "step": 27730 }, { "epoch": 7.501352082206598, "grad_norm": 0.1408211588859558, "learning_rate": 6.029043366621243e-05, "loss": 0.0339, "step": 27740 }, { "epoch": 7.504056246619794, "grad_norm": 0.1518973559141159, "learning_rate": 6.0263464225468615e-05, "loss": 0.0346, "step": 27750 }, { "epoch": 7.506760411032991, "grad_norm": 0.17153315246105194, "learning_rate": 6.023649166695534e-05, "loss": 0.0342, "step": 27760 }, { "epoch": 7.509464575446187, "grad_norm": 0.1125929206609726, "learning_rate": 6.0209515998866186e-05, "loss": 0.0331, "step": 27770 }, { "epoch": 7.5121687398593835, "grad_norm": 0.12283899635076523, "learning_rate": 6.018253722939563e-05, "loss": 0.0331, "step": 27780 }, { "epoch": 7.51487290427258, "grad_norm": 0.17802023887634277, "learning_rate": 6.015555536673914e-05, "loss": 0.035, "step": 27790 }, { "epoch": 7.517577068685776, "grad_norm": 0.16389335691928864, "learning_rate": 6.0128570419093054e-05, "loss": 0.0337, "step": 27800 }, { "epoch": 7.520281233098973, "grad_norm": 0.16036894917488098, "learning_rate": 6.010158239465471e-05, "loss": 0.0358, "step": 27810 }, { "epoch": 7.522985397512169, "grad_norm": 0.12581415474414825, "learning_rate": 6.007459130162235e-05, "loss": 0.0326, "step": 27820 }, { "epoch": 7.525689561925365, "grad_norm": 0.22350206971168518, "learning_rate": 6.004759714819516e-05, "loss": 0.0334, "step": 27830 }, { "epoch": 7.528393726338561, "grad_norm": 0.1511598825454712, "learning_rate": 6.002059994257323e-05, "loss": 0.035, "step": 27840 }, { "epoch": 7.531097890751758, "grad_norm": 0.16008445620536804, "learning_rate": 5.999359969295764e-05, "loss": 0.0334, "step": 27850 }, { "epoch": 7.533802055164954, "grad_norm": 0.18486373126506805, "learning_rate": 5.9966596407550314e-05, "loss": 0.0322, "step": 27860 }, { "epoch": 7.5365062195781505, "grad_norm": 0.12086580693721771, "learning_rate": 5.993959009455416e-05, "loss": 0.0349, "step": 27870 }, { "epoch": 7.539210383991347, "grad_norm": 0.10922875255346298, "learning_rate": 5.991258076217298e-05, "loss": 0.034, "step": 27880 }, { "epoch": 7.541914548404543, "grad_norm": 0.14818942546844482, "learning_rate": 5.988556841861147e-05, "loss": 0.0337, "step": 27890 }, { "epoch": 7.544618712817739, "grad_norm": 0.13844577968120575, "learning_rate": 5.985855307207531e-05, "loss": 0.0338, "step": 27900 }, { "epoch": 7.547322877230935, "grad_norm": 0.19273878633975983, "learning_rate": 5.9831534730771e-05, "loss": 0.0329, "step": 27910 }, { "epoch": 7.550027041644132, "grad_norm": 0.11972258239984512, "learning_rate": 5.980451340290605e-05, "loss": 0.0341, "step": 27920 }, { "epoch": 7.552731206057328, "grad_norm": 0.17082726955413818, "learning_rate": 5.97774890966888e-05, "loss": 0.0329, "step": 27930 }, { "epoch": 7.555435370470525, "grad_norm": 0.11202441900968552, "learning_rate": 5.975046182032851e-05, "loss": 0.0345, "step": 27940 }, { "epoch": 7.558139534883721, "grad_norm": 0.17259612679481506, "learning_rate": 5.972343158203537e-05, "loss": 0.0322, "step": 27950 }, { "epoch": 7.5608436992969175, "grad_norm": 0.1714528203010559, "learning_rate": 5.969639839002045e-05, "loss": 0.0339, "step": 27960 }, { "epoch": 7.563547863710114, "grad_norm": 0.17534537613391876, "learning_rate": 5.966936225249572e-05, "loss": 0.0342, "step": 27970 }, { "epoch": 7.56625202812331, "grad_norm": 0.14882872998714447, "learning_rate": 5.9642323177674044e-05, "loss": 0.0338, "step": 27980 }, { "epoch": 7.568956192536506, "grad_norm": 0.17728957533836365, "learning_rate": 5.9615281173769154e-05, "loss": 0.0333, "step": 27990 }, { "epoch": 7.571660356949702, "grad_norm": 0.16935740411281586, "learning_rate": 5.958823624899574e-05, "loss": 0.0336, "step": 28000 }, { "epoch": 7.574364521362899, "grad_norm": 0.12214040756225586, "learning_rate": 5.956118841156933e-05, "loss": 0.0341, "step": 28010 }, { "epoch": 7.577068685776095, "grad_norm": 0.14396221935749054, "learning_rate": 5.953413766970631e-05, "loss": 0.035, "step": 28020 }, { "epoch": 7.579772850189292, "grad_norm": 0.1642797887325287, "learning_rate": 5.9507084031624e-05, "loss": 0.0337, "step": 28030 }, { "epoch": 7.582477014602488, "grad_norm": 0.09735124558210373, "learning_rate": 5.948002750554058e-05, "loss": 0.034, "step": 28040 }, { "epoch": 7.5851811790156844, "grad_norm": 0.12307817488908768, "learning_rate": 5.9452968099675124e-05, "loss": 0.0339, "step": 28050 }, { "epoch": 7.58788534342888, "grad_norm": 0.14674244821071625, "learning_rate": 5.9425905822247527e-05, "loss": 0.0339, "step": 28060 }, { "epoch": 7.590589507842076, "grad_norm": 0.08952483534812927, "learning_rate": 5.939884068147864e-05, "loss": 0.0354, "step": 28070 }, { "epoch": 7.593293672255273, "grad_norm": 0.15228842198848724, "learning_rate": 5.937177268559011e-05, "loss": 0.0334, "step": 28080 }, { "epoch": 7.595997836668469, "grad_norm": 0.20435911417007446, "learning_rate": 5.934470184280448e-05, "loss": 0.0344, "step": 28090 }, { "epoch": 7.598702001081666, "grad_norm": 0.14247220754623413, "learning_rate": 5.931762816134516e-05, "loss": 0.0328, "step": 28100 }, { "epoch": 7.601406165494862, "grad_norm": 0.16724559664726257, "learning_rate": 5.9290551649436434e-05, "loss": 0.0347, "step": 28110 }, { "epoch": 7.604110329908059, "grad_norm": 0.107323057949543, "learning_rate": 5.9263472315303416e-05, "loss": 0.0334, "step": 28120 }, { "epoch": 7.606814494321255, "grad_norm": 0.10622744262218475, "learning_rate": 5.9236390167172096e-05, "loss": 0.0333, "step": 28130 }, { "epoch": 7.609518658734451, "grad_norm": 0.14311698079109192, "learning_rate": 5.920930521326932e-05, "loss": 0.0339, "step": 28140 }, { "epoch": 7.612222823147647, "grad_norm": 0.1151074692606926, "learning_rate": 5.918221746182276e-05, "loss": 0.0336, "step": 28150 }, { "epoch": 7.614926987560843, "grad_norm": 0.09298308193683624, "learning_rate": 5.9155126921061e-05, "loss": 0.0349, "step": 28160 }, { "epoch": 7.61763115197404, "grad_norm": 0.161377415060997, "learning_rate": 5.91280335992134e-05, "loss": 0.0339, "step": 28170 }, { "epoch": 7.620335316387236, "grad_norm": 0.25275707244873047, "learning_rate": 5.91009375045102e-05, "loss": 0.0347, "step": 28180 }, { "epoch": 7.623039480800433, "grad_norm": 0.1657915562391281, "learning_rate": 5.9073838645182476e-05, "loss": 0.0335, "step": 28190 }, { "epoch": 7.625743645213629, "grad_norm": 0.20728431642055511, "learning_rate": 5.904673702946217e-05, "loss": 0.0347, "step": 28200 }, { "epoch": 7.628447809626826, "grad_norm": 0.19583730399608612, "learning_rate": 5.9019632665582004e-05, "loss": 0.0336, "step": 28210 }, { "epoch": 7.631151974040022, "grad_norm": 0.127980574965477, "learning_rate": 5.899252556177559e-05, "loss": 0.0346, "step": 28220 }, { "epoch": 7.633856138453218, "grad_norm": 0.09167162328958511, "learning_rate": 5.896541572627735e-05, "loss": 0.0343, "step": 28230 }, { "epoch": 7.636560302866414, "grad_norm": 0.16287793219089508, "learning_rate": 5.893830316732253e-05, "loss": 0.0329, "step": 28240 }, { "epoch": 7.63926446727961, "grad_norm": 0.1933658868074417, "learning_rate": 5.8911187893147214e-05, "loss": 0.0327, "step": 28250 }, { "epoch": 7.641968631692807, "grad_norm": 0.15333543717861176, "learning_rate": 5.888406991198828e-05, "loss": 0.0336, "step": 28260 }, { "epoch": 7.644672796106003, "grad_norm": 0.13896358013153076, "learning_rate": 5.885694923208349e-05, "loss": 0.0336, "step": 28270 }, { "epoch": 7.6473769605192, "grad_norm": 0.1546355038881302, "learning_rate": 5.882982586167138e-05, "loss": 0.0324, "step": 28280 }, { "epoch": 7.650081124932396, "grad_norm": 0.20858441293239594, "learning_rate": 5.880269980899131e-05, "loss": 0.033, "step": 28290 }, { "epoch": 7.6527852893455925, "grad_norm": 0.17081402242183685, "learning_rate": 5.8775571082283465e-05, "loss": 0.0326, "step": 28300 }, { "epoch": 7.655489453758788, "grad_norm": 0.11358178406953812, "learning_rate": 5.8748439689788824e-05, "loss": 0.0332, "step": 28310 }, { "epoch": 7.6581936181719845, "grad_norm": 0.11965499818325043, "learning_rate": 5.87213056397492e-05, "loss": 0.0341, "step": 28320 }, { "epoch": 7.660897782585181, "grad_norm": 0.1894199699163437, "learning_rate": 5.869416894040719e-05, "loss": 0.0343, "step": 28330 }, { "epoch": 7.663601946998377, "grad_norm": 0.08758696168661118, "learning_rate": 5.866702960000621e-05, "loss": 0.0327, "step": 28340 }, { "epoch": 7.666306111411574, "grad_norm": 0.16723303496837616, "learning_rate": 5.863988762679048e-05, "loss": 0.0329, "step": 28350 }, { "epoch": 7.66901027582477, "grad_norm": 0.1738133281469345, "learning_rate": 5.8612743029005e-05, "loss": 0.0332, "step": 28360 }, { "epoch": 7.671714440237967, "grad_norm": 0.1465829312801361, "learning_rate": 5.858559581489561e-05, "loss": 0.0332, "step": 28370 }, { "epoch": 7.674418604651163, "grad_norm": 0.17267991602420807, "learning_rate": 5.85584459927089e-05, "loss": 0.0337, "step": 28380 }, { "epoch": 7.6771227690643595, "grad_norm": 0.13406546413898468, "learning_rate": 5.853129357069227e-05, "loss": 0.0331, "step": 28390 }, { "epoch": 7.679826933477555, "grad_norm": 0.12895463407039642, "learning_rate": 5.8504138557093913e-05, "loss": 0.0335, "step": 28400 }, { "epoch": 7.6825310978907515, "grad_norm": 0.17636020481586456, "learning_rate": 5.8476980960162784e-05, "loss": 0.0335, "step": 28410 }, { "epoch": 7.685235262303948, "grad_norm": 0.1516740322113037, "learning_rate": 5.844982078814868e-05, "loss": 0.0344, "step": 28420 }, { "epoch": 7.687939426717144, "grad_norm": 0.14841234683990479, "learning_rate": 5.842265804930211e-05, "loss": 0.033, "step": 28430 }, { "epoch": 7.690643591130341, "grad_norm": 0.16566342115402222, "learning_rate": 5.8395492751874425e-05, "loss": 0.0349, "step": 28440 }, { "epoch": 7.693347755543537, "grad_norm": 0.1319410353899002, "learning_rate": 5.836832490411771e-05, "loss": 0.0327, "step": 28450 }, { "epoch": 7.696051919956734, "grad_norm": 0.11865943670272827, "learning_rate": 5.834115451428485e-05, "loss": 0.0329, "step": 28460 }, { "epoch": 7.698756084369929, "grad_norm": 0.12358401715755463, "learning_rate": 5.831398159062946e-05, "loss": 0.0344, "step": 28470 }, { "epoch": 7.701460248783126, "grad_norm": 0.10282787680625916, "learning_rate": 5.828680614140599e-05, "loss": 0.0336, "step": 28480 }, { "epoch": 7.704164413196322, "grad_norm": 0.12483703345060349, "learning_rate": 5.825962817486962e-05, "loss": 0.0337, "step": 28490 }, { "epoch": 7.7068685776095185, "grad_norm": 0.1591951549053192, "learning_rate": 5.823244769927629e-05, "loss": 0.0324, "step": 28500 }, { "epoch": 7.709572742022715, "grad_norm": 0.10543885827064514, "learning_rate": 5.8205264722882716e-05, "loss": 0.0325, "step": 28510 }, { "epoch": 7.712276906435911, "grad_norm": 0.2899112403392792, "learning_rate": 5.817807925394636e-05, "loss": 0.0338, "step": 28520 }, { "epoch": 7.714981070849108, "grad_norm": 0.1837235689163208, "learning_rate": 5.815089130072546e-05, "loss": 0.0325, "step": 28530 }, { "epoch": 7.717685235262304, "grad_norm": 0.17063714563846588, "learning_rate": 5.8123700871479e-05, "loss": 0.0333, "step": 28540 }, { "epoch": 7.720389399675501, "grad_norm": 0.21489760279655457, "learning_rate": 5.809650797446671e-05, "loss": 0.0335, "step": 28550 }, { "epoch": 7.723093564088696, "grad_norm": 0.08781804889440536, "learning_rate": 5.806931261794907e-05, "loss": 0.033, "step": 28560 }, { "epoch": 7.725797728501893, "grad_norm": 0.128851056098938, "learning_rate": 5.804211481018731e-05, "loss": 0.0334, "step": 28570 }, { "epoch": 7.728501892915089, "grad_norm": 0.17771176993846893, "learning_rate": 5.801491455944341e-05, "loss": 0.0342, "step": 28580 }, { "epoch": 7.7312060573282855, "grad_norm": 0.21915310621261597, "learning_rate": 5.79877118739801e-05, "loss": 0.0345, "step": 28590 }, { "epoch": 7.733910221741482, "grad_norm": 0.1337273120880127, "learning_rate": 5.7960506762060816e-05, "loss": 0.0334, "step": 28600 }, { "epoch": 7.736614386154678, "grad_norm": 0.18092107772827148, "learning_rate": 5.793329923194977e-05, "loss": 0.0339, "step": 28610 }, { "epoch": 7.739318550567875, "grad_norm": 0.17242519557476044, "learning_rate": 5.790608929191187e-05, "loss": 0.0351, "step": 28620 }, { "epoch": 7.742022714981071, "grad_norm": 0.11619862914085388, "learning_rate": 5.78788769502128e-05, "loss": 0.0342, "step": 28630 }, { "epoch": 7.744726879394268, "grad_norm": 0.14666707813739777, "learning_rate": 5.785166221511894e-05, "loss": 0.0332, "step": 28640 }, { "epoch": 7.747431043807463, "grad_norm": 0.13121631741523743, "learning_rate": 5.7824445094897415e-05, "loss": 0.0322, "step": 28650 }, { "epoch": 7.75013520822066, "grad_norm": 0.10423087328672409, "learning_rate": 5.7797225597816065e-05, "loss": 0.0335, "step": 28660 }, { "epoch": 7.752839372633856, "grad_norm": 0.14070899784564972, "learning_rate": 5.777000373214345e-05, "loss": 0.0337, "step": 28670 }, { "epoch": 7.7555435370470525, "grad_norm": 0.16766168177127838, "learning_rate": 5.774277950614885e-05, "loss": 0.0352, "step": 28680 }, { "epoch": 7.758247701460249, "grad_norm": 0.14280065894126892, "learning_rate": 5.771555292810227e-05, "loss": 0.035, "step": 28690 }, { "epoch": 7.760951865873445, "grad_norm": 0.20112010836601257, "learning_rate": 5.768832400627444e-05, "loss": 0.0338, "step": 28700 }, { "epoch": 7.763656030286642, "grad_norm": 0.117180235683918, "learning_rate": 5.7661092748936775e-05, "loss": 0.0341, "step": 28710 }, { "epoch": 7.766360194699837, "grad_norm": 0.17429865896701813, "learning_rate": 5.76338591643614e-05, "loss": 0.0335, "step": 28720 }, { "epoch": 7.769064359113034, "grad_norm": 0.12510788440704346, "learning_rate": 5.760662326082118e-05, "loss": 0.0341, "step": 28730 }, { "epoch": 7.77176852352623, "grad_norm": 0.13887526094913483, "learning_rate": 5.757938504658965e-05, "loss": 0.0331, "step": 28740 }, { "epoch": 7.774472687939427, "grad_norm": 0.18865293264389038, "learning_rate": 5.755214452994107e-05, "loss": 0.0321, "step": 28750 }, { "epoch": 7.777176852352623, "grad_norm": 0.16992512345314026, "learning_rate": 5.752490171915039e-05, "loss": 0.0322, "step": 28760 }, { "epoch": 7.7798810167658194, "grad_norm": 0.18888716399669647, "learning_rate": 5.749765662249324e-05, "loss": 0.0328, "step": 28770 }, { "epoch": 7.782585181179016, "grad_norm": 0.11024155467748642, "learning_rate": 5.747040924824596e-05, "loss": 0.0337, "step": 28780 }, { "epoch": 7.785289345592212, "grad_norm": 0.21784444153308868, "learning_rate": 5.7443159604685613e-05, "loss": 0.0326, "step": 28790 }, { "epoch": 7.787993510005409, "grad_norm": 0.11335673183202744, "learning_rate": 5.74159077000899e-05, "loss": 0.0334, "step": 28800 }, { "epoch": 7.790697674418604, "grad_norm": 0.14988484978675842, "learning_rate": 5.7388653542737235e-05, "loss": 0.0334, "step": 28810 }, { "epoch": 7.793401838831801, "grad_norm": 0.15358465909957886, "learning_rate": 5.736139714090672e-05, "loss": 0.034, "step": 28820 }, { "epoch": 7.796106003244997, "grad_norm": 0.11088978499174118, "learning_rate": 5.73341385028781e-05, "loss": 0.0319, "step": 28830 }, { "epoch": 7.798810167658194, "grad_norm": 0.15091973543167114, "learning_rate": 5.7306877636931855e-05, "loss": 0.0336, "step": 28840 }, { "epoch": 7.80151433207139, "grad_norm": 0.12701404094696045, "learning_rate": 5.7279614551349125e-05, "loss": 0.0339, "step": 28850 }, { "epoch": 7.804218496484586, "grad_norm": 0.1099219024181366, "learning_rate": 5.725234925441169e-05, "loss": 0.0317, "step": 28860 }, { "epoch": 7.806922660897783, "grad_norm": 0.12871667742729187, "learning_rate": 5.7225081754402044e-05, "loss": 0.0335, "step": 28870 }, { "epoch": 7.809626825310979, "grad_norm": 0.1970154494047165, "learning_rate": 5.7197812059603326e-05, "loss": 0.0336, "step": 28880 }, { "epoch": 7.812330989724176, "grad_norm": 0.1191922128200531, "learning_rate": 5.717054017829934e-05, "loss": 0.0328, "step": 28890 }, { "epoch": 7.815035154137371, "grad_norm": 0.12594935297966003, "learning_rate": 5.7143266118774584e-05, "loss": 0.0331, "step": 28900 }, { "epoch": 7.817739318550568, "grad_norm": 0.10895612090826035, "learning_rate": 5.711598988931418e-05, "loss": 0.0316, "step": 28910 }, { "epoch": 7.820443482963764, "grad_norm": 0.28914469480514526, "learning_rate": 5.7088711498203954e-05, "loss": 0.0335, "step": 28920 }, { "epoch": 7.823147647376961, "grad_norm": 0.16663624346256256, "learning_rate": 5.706143095373033e-05, "loss": 0.0339, "step": 28930 }, { "epoch": 7.825851811790157, "grad_norm": 0.14477893710136414, "learning_rate": 5.703414826418042e-05, "loss": 0.032, "step": 28940 }, { "epoch": 7.828555976203353, "grad_norm": 0.16516990959644318, "learning_rate": 5.7006863437842007e-05, "loss": 0.0343, "step": 28950 }, { "epoch": 7.83126014061655, "grad_norm": 0.10166098177433014, "learning_rate": 5.697957648300348e-05, "loss": 0.0326, "step": 28960 }, { "epoch": 7.833964305029745, "grad_norm": 0.10974273085594177, "learning_rate": 5.695228740795391e-05, "loss": 0.0319, "step": 28970 }, { "epoch": 7.836668469442942, "grad_norm": 0.1392280012369156, "learning_rate": 5.6924996220982985e-05, "loss": 0.0317, "step": 28980 }, { "epoch": 7.839372633856138, "grad_norm": 0.21182216703891754, "learning_rate": 5.6897702930381045e-05, "loss": 0.0322, "step": 28990 }, { "epoch": 7.842076798269335, "grad_norm": 0.17261891067028046, "learning_rate": 5.687040754443908e-05, "loss": 0.0327, "step": 29000 }, { "epoch": 7.844780962682531, "grad_norm": 0.11773280054330826, "learning_rate": 5.6843110071448725e-05, "loss": 0.0334, "step": 29010 }, { "epoch": 7.8474851270957275, "grad_norm": 0.1798238456249237, "learning_rate": 5.6815810519702194e-05, "loss": 0.0322, "step": 29020 }, { "epoch": 7.850189291508924, "grad_norm": 0.1774798482656479, "learning_rate": 5.6788508897492396e-05, "loss": 0.0341, "step": 29030 }, { "epoch": 7.85289345592212, "grad_norm": 0.10887517780065536, "learning_rate": 5.676120521311282e-05, "loss": 0.0344, "step": 29040 }, { "epoch": 7.855597620335317, "grad_norm": 0.13631361722946167, "learning_rate": 5.6733899474857634e-05, "loss": 0.0326, "step": 29050 }, { "epoch": 7.858301784748512, "grad_norm": 0.09392847120761871, "learning_rate": 5.670659169102157e-05, "loss": 0.0321, "step": 29060 }, { "epoch": 7.861005949161709, "grad_norm": 0.1364153027534485, "learning_rate": 5.6679281869900044e-05, "loss": 0.0332, "step": 29070 }, { "epoch": 7.863710113574905, "grad_norm": 0.20084629952907562, "learning_rate": 5.6651970019789045e-05, "loss": 0.0337, "step": 29080 }, { "epoch": 7.866414277988102, "grad_norm": 0.11862487345933914, "learning_rate": 5.662465614898519e-05, "loss": 0.0315, "step": 29090 }, { "epoch": 7.869118442401298, "grad_norm": 0.12851035594940186, "learning_rate": 5.6597340265785695e-05, "loss": 0.0342, "step": 29100 }, { "epoch": 7.8718226068144945, "grad_norm": 0.18830980360507965, "learning_rate": 5.657002237848843e-05, "loss": 0.0334, "step": 29110 }, { "epoch": 7.874526771227691, "grad_norm": 0.14967986941337585, "learning_rate": 5.654270249539183e-05, "loss": 0.033, "step": 29120 }, { "epoch": 7.8772309356408865, "grad_norm": 0.12490998208522797, "learning_rate": 5.651538062479498e-05, "loss": 0.0327, "step": 29130 }, { "epoch": 7.879935100054083, "grad_norm": 0.16990450024604797, "learning_rate": 5.648805677499751e-05, "loss": 0.032, "step": 29140 }, { "epoch": 7.882639264467279, "grad_norm": 0.13459855318069458, "learning_rate": 5.646073095429969e-05, "loss": 0.0333, "step": 29150 }, { "epoch": 7.885343428880476, "grad_norm": 0.20116093754768372, "learning_rate": 5.643340317100241e-05, "loss": 0.0325, "step": 29160 }, { "epoch": 7.888047593293672, "grad_norm": 0.1286282241344452, "learning_rate": 5.64060734334071e-05, "loss": 0.0327, "step": 29170 }, { "epoch": 7.890751757706869, "grad_norm": 0.1038326844573021, "learning_rate": 5.637874174981583e-05, "loss": 0.0334, "step": 29180 }, { "epoch": 7.893455922120065, "grad_norm": 0.16044290363788605, "learning_rate": 5.635140812853124e-05, "loss": 0.0329, "step": 29190 }, { "epoch": 7.8961600865332615, "grad_norm": 0.20786403119564056, "learning_rate": 5.6324072577856544e-05, "loss": 0.0328, "step": 29200 }, { "epoch": 7.898864250946458, "grad_norm": 0.28381165862083435, "learning_rate": 5.629673510609559e-05, "loss": 0.0326, "step": 29210 }, { "epoch": 7.9015684153596535, "grad_norm": 0.1598021388053894, "learning_rate": 5.626939572155276e-05, "loss": 0.0332, "step": 29220 }, { "epoch": 7.90427257977285, "grad_norm": 0.19537803530693054, "learning_rate": 5.6242054432533054e-05, "loss": 0.0317, "step": 29230 }, { "epoch": 7.906976744186046, "grad_norm": 0.1400481015443802, "learning_rate": 5.621471124734201e-05, "loss": 0.0335, "step": 29240 }, { "epoch": 7.909680908599243, "grad_norm": 0.2076166868209839, "learning_rate": 5.6187366174285794e-05, "loss": 0.0323, "step": 29250 }, { "epoch": 7.912385073012439, "grad_norm": 0.1286526620388031, "learning_rate": 5.616001922167109e-05, "loss": 0.033, "step": 29260 }, { "epoch": 7.915089237425636, "grad_norm": 0.19820982217788696, "learning_rate": 5.61326703978052e-05, "loss": 0.0334, "step": 29270 }, { "epoch": 7.917793401838832, "grad_norm": 0.1307637095451355, "learning_rate": 5.6105319710995964e-05, "loss": 0.0325, "step": 29280 }, { "epoch": 7.9204975662520285, "grad_norm": 0.08350186049938202, "learning_rate": 5.60779671695518e-05, "loss": 0.0334, "step": 29290 }, { "epoch": 7.923201730665225, "grad_norm": 0.1338197886943817, "learning_rate": 5.6050612781781684e-05, "loss": 0.0321, "step": 29300 }, { "epoch": 7.9259058950784205, "grad_norm": 0.10579405725002289, "learning_rate": 5.602325655599516e-05, "loss": 0.033, "step": 29310 }, { "epoch": 7.928610059491617, "grad_norm": 0.17884555459022522, "learning_rate": 5.599589850050234e-05, "loss": 0.0327, "step": 29320 }, { "epoch": 7.931314223904813, "grad_norm": 0.15454989671707153, "learning_rate": 5.5968538623613874e-05, "loss": 0.0322, "step": 29330 }, { "epoch": 7.93401838831801, "grad_norm": 0.13855770230293274, "learning_rate": 5.594117693364095e-05, "loss": 0.0341, "step": 29340 }, { "epoch": 7.936722552731206, "grad_norm": 0.17949336767196655, "learning_rate": 5.591381343889535e-05, "loss": 0.0328, "step": 29350 }, { "epoch": 7.939426717144403, "grad_norm": 0.12392537295818329, "learning_rate": 5.5886448147689355e-05, "loss": 0.0336, "step": 29360 }, { "epoch": 7.942130881557599, "grad_norm": 0.11127841472625732, "learning_rate": 5.585908106833585e-05, "loss": 0.0333, "step": 29370 }, { "epoch": 7.944835045970795, "grad_norm": 0.10107088088989258, "learning_rate": 5.5831712209148226e-05, "loss": 0.0326, "step": 29380 }, { "epoch": 7.947539210383991, "grad_norm": 0.16071607172489166, "learning_rate": 5.58043415784404e-05, "loss": 0.0329, "step": 29390 }, { "epoch": 7.9502433747971875, "grad_norm": 0.138338103890419, "learning_rate": 5.577696918452686e-05, "loss": 0.0341, "step": 29400 }, { "epoch": 7.952947539210384, "grad_norm": 0.16478490829467773, "learning_rate": 5.5749595035722604e-05, "loss": 0.033, "step": 29410 }, { "epoch": 7.95565170362358, "grad_norm": 0.17418169975280762, "learning_rate": 5.5722219140343193e-05, "loss": 0.0334, "step": 29420 }, { "epoch": 7.958355868036777, "grad_norm": 0.14446260035037994, "learning_rate": 5.56948415067047e-05, "loss": 0.0323, "step": 29430 }, { "epoch": 7.961060032449973, "grad_norm": 0.1400604546070099, "learning_rate": 5.5667462143123704e-05, "loss": 0.0322, "step": 29440 }, { "epoch": 7.96376419686317, "grad_norm": 0.165761336684227, "learning_rate": 5.564008105791737e-05, "loss": 0.0338, "step": 29450 }, { "epoch": 7.966468361276366, "grad_norm": 0.1743849813938141, "learning_rate": 5.5612698259403316e-05, "loss": 0.0322, "step": 29460 }, { "epoch": 7.969172525689562, "grad_norm": 0.13766293227672577, "learning_rate": 5.5585313755899724e-05, "loss": 0.0336, "step": 29470 }, { "epoch": 7.971876690102758, "grad_norm": 0.11097204685211182, "learning_rate": 5.5557927555725285e-05, "loss": 0.0335, "step": 29480 }, { "epoch": 7.974580854515954, "grad_norm": 0.20402948558330536, "learning_rate": 5.55305396671992e-05, "loss": 0.0311, "step": 29490 }, { "epoch": 7.977285018929151, "grad_norm": 0.16187742352485657, "learning_rate": 5.55031500986412e-05, "loss": 0.0317, "step": 29500 }, { "epoch": 7.979989183342347, "grad_norm": 0.19609375298023224, "learning_rate": 5.547575885837149e-05, "loss": 0.0327, "step": 29510 }, { "epoch": 7.982693347755544, "grad_norm": 0.1716754138469696, "learning_rate": 5.5448365954710825e-05, "loss": 0.0332, "step": 29520 }, { "epoch": 7.98539751216874, "grad_norm": 0.1638871282339096, "learning_rate": 5.5420971395980446e-05, "loss": 0.034, "step": 29530 }, { "epoch": 7.988101676581936, "grad_norm": 0.12058732658624649, "learning_rate": 5.539357519050209e-05, "loss": 0.0338, "step": 29540 }, { "epoch": 7.990805840995132, "grad_norm": 0.1168409064412117, "learning_rate": 5.536617734659799e-05, "loss": 0.032, "step": 29550 }, { "epoch": 7.993510005408329, "grad_norm": 0.13223668932914734, "learning_rate": 5.533877787259091e-05, "loss": 0.0309, "step": 29560 }, { "epoch": 7.996214169821525, "grad_norm": 0.13222870230674744, "learning_rate": 5.5311376776804044e-05, "loss": 0.0332, "step": 29570 }, { "epoch": 7.998918334234721, "grad_norm": 0.09625998139381409, "learning_rate": 5.528397406756118e-05, "loss": 0.0325, "step": 29580 }, { "epoch": 8.001622498647917, "grad_norm": 0.11766723543405533, "learning_rate": 5.525656975318652e-05, "loss": 0.034, "step": 29590 }, { "epoch": 8.004326663061114, "grad_norm": 0.13976986706256866, "learning_rate": 5.522916384200474e-05, "loss": 0.0349, "step": 29600 }, { "epoch": 8.00703082747431, "grad_norm": 0.20231562852859497, "learning_rate": 5.520175634234106e-05, "loss": 0.0329, "step": 29610 }, { "epoch": 8.009734991887507, "grad_norm": 0.16184911131858826, "learning_rate": 5.517434726252113e-05, "loss": 0.0324, "step": 29620 }, { "epoch": 8.012439156300703, "grad_norm": 0.17187172174453735, "learning_rate": 5.514693661087113e-05, "loss": 0.0325, "step": 29630 }, { "epoch": 8.0151433207139, "grad_norm": 0.10875377058982849, "learning_rate": 5.511952439571769e-05, "loss": 0.0329, "step": 29640 }, { "epoch": 8.017847485127096, "grad_norm": 0.16192160546779633, "learning_rate": 5.509211062538791e-05, "loss": 0.0335, "step": 29650 }, { "epoch": 8.020551649540293, "grad_norm": 0.161725252866745, "learning_rate": 5.506469530820939e-05, "loss": 0.0321, "step": 29660 }, { "epoch": 8.023255813953488, "grad_norm": 0.13781587779521942, "learning_rate": 5.503727845251014e-05, "loss": 0.0325, "step": 29670 }, { "epoch": 8.025959978366684, "grad_norm": 0.15296615660190582, "learning_rate": 5.50098600666187e-05, "loss": 0.0333, "step": 29680 }, { "epoch": 8.028664142779881, "grad_norm": 0.1750694066286087, "learning_rate": 5.498244015886406e-05, "loss": 0.0318, "step": 29690 }, { "epoch": 8.031368307193077, "grad_norm": 0.19238150119781494, "learning_rate": 5.495501873757565e-05, "loss": 0.0317, "step": 29700 }, { "epoch": 8.034072471606274, "grad_norm": 0.11145007610321045, "learning_rate": 5.492759581108336e-05, "loss": 0.0331, "step": 29710 }, { "epoch": 8.03677663601947, "grad_norm": 0.18390707671642303, "learning_rate": 5.490017138771759e-05, "loss": 0.033, "step": 29720 }, { "epoch": 8.039480800432667, "grad_norm": 0.17391082644462585, "learning_rate": 5.487274547580912e-05, "loss": 0.0311, "step": 29730 }, { "epoch": 8.042184964845863, "grad_norm": 0.19079354405403137, "learning_rate": 5.484531808368923e-05, "loss": 0.0337, "step": 29740 }, { "epoch": 8.044889129259058, "grad_norm": 0.10190176218748093, "learning_rate": 5.4817889219689656e-05, "loss": 0.0325, "step": 29750 }, { "epoch": 8.047593293672255, "grad_norm": 0.18710874021053314, "learning_rate": 5.4790458892142536e-05, "loss": 0.0327, "step": 29760 }, { "epoch": 8.050297458085451, "grad_norm": 0.19119514524936676, "learning_rate": 5.476302710938048e-05, "loss": 0.0336, "step": 29770 }, { "epoch": 8.053001622498648, "grad_norm": 0.1478215456008911, "learning_rate": 5.473559387973657e-05, "loss": 0.0334, "step": 29780 }, { "epoch": 8.055705786911844, "grad_norm": 0.18539270758628845, "learning_rate": 5.470815921154425e-05, "loss": 0.0341, "step": 29790 }, { "epoch": 8.058409951325041, "grad_norm": 0.10239000618457794, "learning_rate": 5.468072311313749e-05, "loss": 0.0325, "step": 29800 }, { "epoch": 8.061114115738237, "grad_norm": 0.10228639096021652, "learning_rate": 5.465328559285063e-05, "loss": 0.0336, "step": 29810 }, { "epoch": 8.063818280151434, "grad_norm": 0.4617862403392792, "learning_rate": 5.462584665901849e-05, "loss": 0.0321, "step": 29820 }, { "epoch": 8.06652244456463, "grad_norm": 0.13714459538459778, "learning_rate": 5.4598406319976235e-05, "loss": 0.0336, "step": 29830 }, { "epoch": 8.069226608977825, "grad_norm": 0.13607394695281982, "learning_rate": 5.457096458405958e-05, "loss": 0.0325, "step": 29840 }, { "epoch": 8.071930773391022, "grad_norm": 0.11658037453889847, "learning_rate": 5.454352145960457e-05, "loss": 0.0318, "step": 29850 }, { "epoch": 8.074634937804218, "grad_norm": 0.12987318634986877, "learning_rate": 5.4516076954947715e-05, "loss": 0.0321, "step": 29860 }, { "epoch": 8.077339102217415, "grad_norm": 0.1845444142818451, "learning_rate": 5.448863107842591e-05, "loss": 0.0315, "step": 29870 }, { "epoch": 8.08004326663061, "grad_norm": 0.1026078388094902, "learning_rate": 5.446118383837651e-05, "loss": 0.0317, "step": 29880 }, { "epoch": 8.082747431043808, "grad_norm": 0.1375764012336731, "learning_rate": 5.443373524313722e-05, "loss": 0.0327, "step": 29890 }, { "epoch": 8.085451595457004, "grad_norm": 0.1682383269071579, "learning_rate": 5.440628530104626e-05, "loss": 0.0316, "step": 29900 }, { "epoch": 8.088155759870201, "grad_norm": 0.17769372463226318, "learning_rate": 5.4378834020442146e-05, "loss": 0.0334, "step": 29910 }, { "epoch": 8.090859924283397, "grad_norm": 0.12917278707027435, "learning_rate": 5.4351381409663884e-05, "loss": 0.0329, "step": 29920 }, { "epoch": 8.093564088696592, "grad_norm": 0.14671017229557037, "learning_rate": 5.432392747705084e-05, "loss": 0.0333, "step": 29930 }, { "epoch": 8.09626825310979, "grad_norm": 0.08945149183273315, "learning_rate": 5.429647223094278e-05, "loss": 0.0326, "step": 29940 }, { "epoch": 8.098972417522985, "grad_norm": 0.18160946667194366, "learning_rate": 5.4269015679679924e-05, "loss": 0.0336, "step": 29950 }, { "epoch": 8.101676581936182, "grad_norm": 0.10816147923469543, "learning_rate": 5.424155783160281e-05, "loss": 0.0333, "step": 29960 }, { "epoch": 8.104380746349378, "grad_norm": 0.1086786538362503, "learning_rate": 5.4214098695052415e-05, "loss": 0.0315, "step": 29970 }, { "epoch": 8.107084910762575, "grad_norm": 0.13075609505176544, "learning_rate": 5.418663827837012e-05, "loss": 0.0328, "step": 29980 }, { "epoch": 8.10978907517577, "grad_norm": 0.09791641682386398, "learning_rate": 5.415917658989763e-05, "loss": 0.0319, "step": 29990 }, { "epoch": 8.112493239588966, "grad_norm": 0.15226206183433533, "learning_rate": 5.413171363797713e-05, "loss": 0.0319, "step": 30000 }, { "epoch": 8.115197404002163, "grad_norm": 0.11154383420944214, "learning_rate": 5.4104249430951116e-05, "loss": 0.0333, "step": 30010 }, { "epoch": 8.117901568415359, "grad_norm": 0.11519784480333328, "learning_rate": 5.4076783977162494e-05, "loss": 0.0326, "step": 30020 }, { "epoch": 8.120605732828556, "grad_norm": 0.15219910442829132, "learning_rate": 5.4049317284954525e-05, "loss": 0.0324, "step": 30030 }, { "epoch": 8.123309897241752, "grad_norm": 0.17946118116378784, "learning_rate": 5.4021849362670884e-05, "loss": 0.033, "step": 30040 }, { "epoch": 8.12601406165495, "grad_norm": 0.14496779441833496, "learning_rate": 5.3994380218655604e-05, "loss": 0.0323, "step": 30050 }, { "epoch": 8.128718226068145, "grad_norm": 0.11986522376537323, "learning_rate": 5.396690986125309e-05, "loss": 0.033, "step": 30060 }, { "epoch": 8.131422390481342, "grad_norm": 0.10182053595781326, "learning_rate": 5.3939438298808075e-05, "loss": 0.0334, "step": 30070 }, { "epoch": 8.134126554894538, "grad_norm": 0.18183192610740662, "learning_rate": 5.3911965539665744e-05, "loss": 0.0311, "step": 30080 }, { "epoch": 8.136830719307733, "grad_norm": 0.14225181937217712, "learning_rate": 5.388449159217156e-05, "loss": 0.0337, "step": 30090 }, { "epoch": 8.13953488372093, "grad_norm": 0.11822816729545593, "learning_rate": 5.3857016464671385e-05, "loss": 0.0319, "step": 30100 }, { "epoch": 8.142239048134126, "grad_norm": 0.10620876401662827, "learning_rate": 5.382954016551146e-05, "loss": 0.0337, "step": 30110 }, { "epoch": 8.144943212547323, "grad_norm": 0.1719164401292801, "learning_rate": 5.380206270303835e-05, "loss": 0.033, "step": 30120 }, { "epoch": 8.147647376960519, "grad_norm": 0.1296004354953766, "learning_rate": 5.377458408559897e-05, "loss": 0.0334, "step": 30130 }, { "epoch": 8.150351541373716, "grad_norm": 0.13791413605213165, "learning_rate": 5.374710432154061e-05, "loss": 0.0322, "step": 30140 }, { "epoch": 8.153055705786912, "grad_norm": 0.11509237438440323, "learning_rate": 5.3719623419210886e-05, "loss": 0.0312, "step": 30150 }, { "epoch": 8.155759870200107, "grad_norm": 0.1329469382762909, "learning_rate": 5.3692141386957786e-05, "loss": 0.032, "step": 30160 }, { "epoch": 8.158464034613305, "grad_norm": 0.10774441808462143, "learning_rate": 5.3664658233129616e-05, "loss": 0.0321, "step": 30170 }, { "epoch": 8.1611681990265, "grad_norm": 0.1700413078069687, "learning_rate": 5.363717396607504e-05, "loss": 0.0327, "step": 30180 }, { "epoch": 8.163872363439697, "grad_norm": 0.17567960917949677, "learning_rate": 5.360968859414305e-05, "loss": 0.0338, "step": 30190 }, { "epoch": 8.166576527852893, "grad_norm": 0.3716004490852356, "learning_rate": 5.358220212568295e-05, "loss": 0.0316, "step": 30200 }, { "epoch": 8.16928069226609, "grad_norm": 0.11133453249931335, "learning_rate": 5.355471456904444e-05, "loss": 0.0321, "step": 30210 }, { "epoch": 8.171984856679286, "grad_norm": 0.13741755485534668, "learning_rate": 5.3527225932577495e-05, "loss": 0.0332, "step": 30220 }, { "epoch": 8.174689021092483, "grad_norm": 0.13222116231918335, "learning_rate": 5.349973622463246e-05, "loss": 0.0323, "step": 30230 }, { "epoch": 8.177393185505679, "grad_norm": 0.14900454878807068, "learning_rate": 5.3472245453559956e-05, "loss": 0.0325, "step": 30240 }, { "epoch": 8.180097349918874, "grad_norm": 0.1705726534128189, "learning_rate": 5.3444753627710955e-05, "loss": 0.0321, "step": 30250 }, { "epoch": 8.182801514332072, "grad_norm": 0.10358226299285889, "learning_rate": 5.341726075543676e-05, "loss": 0.0347, "step": 30260 }, { "epoch": 8.185505678745267, "grad_norm": 0.1325259953737259, "learning_rate": 5.338976684508898e-05, "loss": 0.0318, "step": 30270 }, { "epoch": 8.188209843158464, "grad_norm": 0.16865627467632294, "learning_rate": 5.336227190501953e-05, "loss": 0.0325, "step": 30280 }, { "epoch": 8.19091400757166, "grad_norm": 0.11861611157655716, "learning_rate": 5.3334775943580664e-05, "loss": 0.0342, "step": 30290 }, { "epoch": 8.193618171984857, "grad_norm": 0.12329159677028656, "learning_rate": 5.330727896912491e-05, "loss": 0.0316, "step": 30300 }, { "epoch": 8.196322336398053, "grad_norm": 0.16526731848716736, "learning_rate": 5.327978099000511e-05, "loss": 0.0335, "step": 30310 }, { "epoch": 8.19902650081125, "grad_norm": 0.1633514165878296, "learning_rate": 5.3252282014574465e-05, "loss": 0.0326, "step": 30320 }, { "epoch": 8.201730665224446, "grad_norm": 0.17117196321487427, "learning_rate": 5.322478205118641e-05, "loss": 0.0327, "step": 30330 }, { "epoch": 8.204434829637641, "grad_norm": 0.12257176637649536, "learning_rate": 5.3197281108194704e-05, "loss": 0.0318, "step": 30340 }, { "epoch": 8.207138994050839, "grad_norm": 0.1047990545630455, "learning_rate": 5.316977919395342e-05, "loss": 0.031, "step": 30350 }, { "epoch": 8.209843158464034, "grad_norm": 0.11756663024425507, "learning_rate": 5.314227631681691e-05, "loss": 0.0331, "step": 30360 }, { "epoch": 8.212547322877231, "grad_norm": 0.1296829879283905, "learning_rate": 5.311477248513982e-05, "loss": 0.0325, "step": 30370 }, { "epoch": 8.215251487290427, "grad_norm": 0.1525518000125885, "learning_rate": 5.30872677072771e-05, "loss": 0.0339, "step": 30380 }, { "epoch": 8.217955651703624, "grad_norm": 0.17409023642539978, "learning_rate": 5.3059761991583954e-05, "loss": 0.0326, "step": 30390 }, { "epoch": 8.22065981611682, "grad_norm": 0.15605078637599945, "learning_rate": 5.303225534641592e-05, "loss": 0.0328, "step": 30400 }, { "epoch": 8.223363980530015, "grad_norm": 0.14955571293830872, "learning_rate": 5.300474778012875e-05, "loss": 0.0323, "step": 30410 }, { "epoch": 8.226068144943213, "grad_norm": 0.11835924535989761, "learning_rate": 5.297723930107855e-05, "loss": 0.032, "step": 30420 }, { "epoch": 8.228772309356408, "grad_norm": 0.12121506780385971, "learning_rate": 5.294972991762167e-05, "loss": 0.0321, "step": 30430 }, { "epoch": 8.231476473769606, "grad_norm": 0.12980973720550537, "learning_rate": 5.292221963811472e-05, "loss": 0.0314, "step": 30440 }, { "epoch": 8.234180638182801, "grad_norm": 0.14496763050556183, "learning_rate": 5.28947084709146e-05, "loss": 0.0333, "step": 30450 }, { "epoch": 8.236884802595998, "grad_norm": 0.1414526104927063, "learning_rate": 5.2867196424378465e-05, "loss": 0.0325, "step": 30460 }, { "epoch": 8.239588967009194, "grad_norm": 0.12538716197013855, "learning_rate": 5.2839683506863765e-05, "loss": 0.0321, "step": 30470 }, { "epoch": 8.242293131422391, "grad_norm": 0.13458319008350372, "learning_rate": 5.281216972672821e-05, "loss": 0.0325, "step": 30480 }, { "epoch": 8.244997295835587, "grad_norm": 0.11720346659421921, "learning_rate": 5.278465509232973e-05, "loss": 0.0313, "step": 30490 }, { "epoch": 8.247701460248782, "grad_norm": 0.17626428604125977, "learning_rate": 5.275713961202655e-05, "loss": 0.0314, "step": 30500 }, { "epoch": 8.25040562466198, "grad_norm": 0.14011137187480927, "learning_rate": 5.2729623294177165e-05, "loss": 0.0326, "step": 30510 }, { "epoch": 8.253109789075175, "grad_norm": 0.12360792607069016, "learning_rate": 5.270210614714028e-05, "loss": 0.033, "step": 30520 }, { "epoch": 8.255813953488373, "grad_norm": 0.1837388277053833, "learning_rate": 5.267458817927491e-05, "loss": 0.0324, "step": 30530 }, { "epoch": 8.258518117901568, "grad_norm": 0.16277259588241577, "learning_rate": 5.264706939894026e-05, "loss": 0.032, "step": 30540 }, { "epoch": 8.261222282314765, "grad_norm": 0.22835411131381989, "learning_rate": 5.261954981449584e-05, "loss": 0.0314, "step": 30550 }, { "epoch": 8.263926446727961, "grad_norm": 0.15311118960380554, "learning_rate": 5.2592029434301324e-05, "loss": 0.0315, "step": 30560 }, { "epoch": 8.266630611141156, "grad_norm": 0.14035026729106903, "learning_rate": 5.256450826671672e-05, "loss": 0.0312, "step": 30570 }, { "epoch": 8.269334775554354, "grad_norm": 0.14211204648017883, "learning_rate": 5.253698632010221e-05, "loss": 0.0327, "step": 30580 }, { "epoch": 8.27203893996755, "grad_norm": 0.14172370731830597, "learning_rate": 5.2509463602818246e-05, "loss": 0.0336, "step": 30590 }, { "epoch": 8.274743104380747, "grad_norm": 0.18235833942890167, "learning_rate": 5.248194012322549e-05, "loss": 0.0307, "step": 30600 }, { "epoch": 8.277447268793942, "grad_norm": 0.20567446947097778, "learning_rate": 5.245441588968486e-05, "loss": 0.0331, "step": 30610 }, { "epoch": 8.28015143320714, "grad_norm": 0.1331825852394104, "learning_rate": 5.242689091055748e-05, "loss": 0.0318, "step": 30620 }, { "epoch": 8.282855597620335, "grad_norm": 0.15715579688549042, "learning_rate": 5.239936519420473e-05, "loss": 0.0322, "step": 30630 }, { "epoch": 8.285559762033532, "grad_norm": 0.12738540768623352, "learning_rate": 5.2371838748988175e-05, "loss": 0.0324, "step": 30640 }, { "epoch": 8.288263926446728, "grad_norm": 0.25916945934295654, "learning_rate": 5.234431158326965e-05, "loss": 0.0344, "step": 30650 }, { "epoch": 8.290968090859923, "grad_norm": 0.16929195821285248, "learning_rate": 5.231678370541115e-05, "loss": 0.0308, "step": 30660 }, { "epoch": 8.29367225527312, "grad_norm": 0.2505860924720764, "learning_rate": 5.228925512377495e-05, "loss": 0.0324, "step": 30670 }, { "epoch": 8.296376419686316, "grad_norm": 0.13027599453926086, "learning_rate": 5.2261725846723465e-05, "loss": 0.0326, "step": 30680 }, { "epoch": 8.299080584099514, "grad_norm": 0.18854308128356934, "learning_rate": 5.22341958826194e-05, "loss": 0.0318, "step": 30690 }, { "epoch": 8.30178474851271, "grad_norm": 0.14549744129180908, "learning_rate": 5.22066652398256e-05, "loss": 0.0323, "step": 30700 }, { "epoch": 8.304488912925907, "grad_norm": 0.21751759946346283, "learning_rate": 5.2179133926705185e-05, "loss": 0.0313, "step": 30710 }, { "epoch": 8.307193077339102, "grad_norm": 0.13850203156471252, "learning_rate": 5.215160195162141e-05, "loss": 0.0325, "step": 30720 }, { "epoch": 8.3098972417523, "grad_norm": 0.2106867879629135, "learning_rate": 5.212406932293776e-05, "loss": 0.0329, "step": 30730 }, { "epoch": 8.312601406165495, "grad_norm": 0.15918725728988647, "learning_rate": 5.209653604901795e-05, "loss": 0.032, "step": 30740 }, { "epoch": 8.31530557057869, "grad_norm": 0.12682674825191498, "learning_rate": 5.206900213822584e-05, "loss": 0.0327, "step": 30750 }, { "epoch": 8.318009734991888, "grad_norm": 0.1763242781162262, "learning_rate": 5.204146759892551e-05, "loss": 0.0316, "step": 30760 }, { "epoch": 8.320713899405083, "grad_norm": 0.1712590605020523, "learning_rate": 5.2013932439481216e-05, "loss": 0.0322, "step": 30770 }, { "epoch": 8.32341806381828, "grad_norm": 0.15317513048648834, "learning_rate": 5.198639666825743e-05, "loss": 0.0309, "step": 30780 }, { "epoch": 8.326122228231476, "grad_norm": 0.13907259702682495, "learning_rate": 5.195886029361877e-05, "loss": 0.0324, "step": 30790 }, { "epoch": 8.328826392644674, "grad_norm": 0.2070748209953308, "learning_rate": 5.193132332393009e-05, "loss": 0.0315, "step": 30800 }, { "epoch": 8.331530557057869, "grad_norm": 0.16666848957538605, "learning_rate": 5.1903785767556376e-05, "loss": 0.0322, "step": 30810 }, { "epoch": 8.334234721471065, "grad_norm": 0.21429850161075592, "learning_rate": 5.187624763286282e-05, "loss": 0.0314, "step": 30820 }, { "epoch": 8.336938885884262, "grad_norm": 0.1379304826259613, "learning_rate": 5.184870892821475e-05, "loss": 0.0316, "step": 30830 }, { "epoch": 8.339643050297457, "grad_norm": 0.16144199669361115, "learning_rate": 5.182116966197773e-05, "loss": 0.0326, "step": 30840 }, { "epoch": 8.342347214710655, "grad_norm": 0.09254145622253418, "learning_rate": 5.1793629842517466e-05, "loss": 0.0315, "step": 30850 }, { "epoch": 8.34505137912385, "grad_norm": 0.2703838348388672, "learning_rate": 5.17660894781998e-05, "loss": 0.0325, "step": 30860 }, { "epoch": 8.347755543537048, "grad_norm": 0.12392333149909973, "learning_rate": 5.173854857739079e-05, "loss": 0.032, "step": 30870 }, { "epoch": 8.350459707950243, "grad_norm": 0.1579626500606537, "learning_rate": 5.171100714845661e-05, "loss": 0.0336, "step": 30880 }, { "epoch": 8.35316387236344, "grad_norm": 0.17001821100711823, "learning_rate": 5.1683465199763646e-05, "loss": 0.032, "step": 30890 }, { "epoch": 8.355868036776636, "grad_norm": 0.13660234212875366, "learning_rate": 5.16559227396784e-05, "loss": 0.032, "step": 30900 }, { "epoch": 8.358572201189832, "grad_norm": 0.19228367507457733, "learning_rate": 5.1628379776567556e-05, "loss": 0.0335, "step": 30910 }, { "epoch": 8.361276365603029, "grad_norm": 0.10313932597637177, "learning_rate": 5.160083631879792e-05, "loss": 0.0334, "step": 30920 }, { "epoch": 8.363980530016224, "grad_norm": 0.1446819305419922, "learning_rate": 5.1573292374736484e-05, "loss": 0.0316, "step": 30930 }, { "epoch": 8.366684694429422, "grad_norm": 0.18070177733898163, "learning_rate": 5.1545747952750356e-05, "loss": 0.0323, "step": 30940 }, { "epoch": 8.369388858842617, "grad_norm": 0.16232748329639435, "learning_rate": 5.151820306120682e-05, "loss": 0.0326, "step": 30950 }, { "epoch": 8.372093023255815, "grad_norm": 0.16156157851219177, "learning_rate": 5.149065770847328e-05, "loss": 0.0309, "step": 30960 }, { "epoch": 8.37479718766901, "grad_norm": 0.14628329873085022, "learning_rate": 5.1463111902917297e-05, "loss": 0.032, "step": 30970 }, { "epoch": 8.377501352082206, "grad_norm": 0.15091174840927124, "learning_rate": 5.143556565290654e-05, "loss": 0.032, "step": 30980 }, { "epoch": 8.380205516495403, "grad_norm": 0.12947170436382294, "learning_rate": 5.140801896680882e-05, "loss": 0.0308, "step": 30990 }, { "epoch": 8.382909680908599, "grad_norm": 0.1296461671590805, "learning_rate": 5.1380471852992144e-05, "loss": 0.0311, "step": 31000 }, { "epoch": 8.385613845321796, "grad_norm": 0.12946605682373047, "learning_rate": 5.135292431982457e-05, "loss": 0.0316, "step": 31010 }, { "epoch": 8.388318009734991, "grad_norm": 0.1643412709236145, "learning_rate": 5.1325376375674294e-05, "loss": 0.0326, "step": 31020 }, { "epoch": 8.391022174148189, "grad_norm": 0.10407891869544983, "learning_rate": 5.129782802890968e-05, "loss": 0.0321, "step": 31030 }, { "epoch": 8.393726338561384, "grad_norm": 0.08440513163805008, "learning_rate": 5.127027928789916e-05, "loss": 0.0325, "step": 31040 }, { "epoch": 8.396430502974582, "grad_norm": 0.26984286308288574, "learning_rate": 5.124273016101135e-05, "loss": 0.0309, "step": 31050 }, { "epoch": 8.399134667387777, "grad_norm": 0.1717967987060547, "learning_rate": 5.121518065661492e-05, "loss": 0.0311, "step": 31060 }, { "epoch": 8.401838831800973, "grad_norm": 0.1465306282043457, "learning_rate": 5.11876307830787e-05, "loss": 0.031, "step": 31070 }, { "epoch": 8.40454299621417, "grad_norm": 0.09548085182905197, "learning_rate": 5.1160080548771596e-05, "loss": 0.0316, "step": 31080 }, { "epoch": 8.407247160627366, "grad_norm": 0.12113367021083832, "learning_rate": 5.1132529962062656e-05, "loss": 0.031, "step": 31090 }, { "epoch": 8.409951325040563, "grad_norm": 0.15174058079719543, "learning_rate": 5.110497903132101e-05, "loss": 0.0323, "step": 31100 }, { "epoch": 8.412655489453758, "grad_norm": 0.14349140226840973, "learning_rate": 5.107742776491592e-05, "loss": 0.0321, "step": 31110 }, { "epoch": 8.415359653866956, "grad_norm": 0.14075714349746704, "learning_rate": 5.104987617121673e-05, "loss": 0.0329, "step": 31120 }, { "epoch": 8.418063818280151, "grad_norm": 0.20070132613182068, "learning_rate": 5.102232425859287e-05, "loss": 0.0316, "step": 31130 }, { "epoch": 8.420767982693349, "grad_norm": 0.18078377842903137, "learning_rate": 5.09947720354139e-05, "loss": 0.0321, "step": 31140 }, { "epoch": 8.423472147106544, "grad_norm": 0.1686558723449707, "learning_rate": 5.096721951004942e-05, "loss": 0.0329, "step": 31150 }, { "epoch": 8.42617631151974, "grad_norm": 0.11665314435958862, "learning_rate": 5.0939666690869227e-05, "loss": 0.0309, "step": 31160 }, { "epoch": 8.428880475932937, "grad_norm": 0.09694632142782211, "learning_rate": 5.0912113586243096e-05, "loss": 0.0333, "step": 31170 }, { "epoch": 8.431584640346133, "grad_norm": 0.15806415677070618, "learning_rate": 5.0884560204540935e-05, "loss": 0.0317, "step": 31180 }, { "epoch": 8.43428880475933, "grad_norm": 0.17410330474376678, "learning_rate": 5.0857006554132736e-05, "loss": 0.0327, "step": 31190 }, { "epoch": 8.436992969172525, "grad_norm": 0.13764788210391998, "learning_rate": 5.0829452643388575e-05, "loss": 0.0327, "step": 31200 }, { "epoch": 8.439697133585723, "grad_norm": 0.18653756380081177, "learning_rate": 5.08018984806786e-05, "loss": 0.0312, "step": 31210 }, { "epoch": 8.442401297998918, "grad_norm": 0.15547601878643036, "learning_rate": 5.0774344074373036e-05, "loss": 0.0322, "step": 31220 }, { "epoch": 8.445105462412116, "grad_norm": 0.13700084388256073, "learning_rate": 5.07467894328422e-05, "loss": 0.0344, "step": 31230 }, { "epoch": 8.447809626825311, "grad_norm": 0.14347721636295319, "learning_rate": 5.0719234564456454e-05, "loss": 0.0305, "step": 31240 }, { "epoch": 8.450513791238507, "grad_norm": 0.128265842795372, "learning_rate": 5.0691679477586216e-05, "loss": 0.0322, "step": 31250 }, { "epoch": 8.453217955651704, "grad_norm": 0.11887374520301819, "learning_rate": 5.0664124180602035e-05, "loss": 0.0317, "step": 31260 }, { "epoch": 8.4559221200649, "grad_norm": 0.1118822768330574, "learning_rate": 5.063656868187447e-05, "loss": 0.0319, "step": 31270 }, { "epoch": 8.458626284478097, "grad_norm": 0.18121415376663208, "learning_rate": 5.060901298977413e-05, "loss": 0.0324, "step": 31280 }, { "epoch": 8.461330448891292, "grad_norm": 0.10535044223070145, "learning_rate": 5.0581457112671725e-05, "loss": 0.0309, "step": 31290 }, { "epoch": 8.46403461330449, "grad_norm": 0.13284486532211304, "learning_rate": 5.0553901058938016e-05, "loss": 0.0313, "step": 31300 }, { "epoch": 8.466738777717685, "grad_norm": 0.12864957749843597, "learning_rate": 5.052634483694377e-05, "loss": 0.0333, "step": 31310 }, { "epoch": 8.46944294213088, "grad_norm": 0.22239090502262115, "learning_rate": 5.049878845505988e-05, "loss": 0.0326, "step": 31320 }, { "epoch": 8.472147106544078, "grad_norm": 0.13786055147647858, "learning_rate": 5.047123192165721e-05, "loss": 0.0309, "step": 31330 }, { "epoch": 8.474851270957274, "grad_norm": 0.11174280196428299, "learning_rate": 5.0443675245106735e-05, "loss": 0.0337, "step": 31340 }, { "epoch": 8.477555435370471, "grad_norm": 0.27595603466033936, "learning_rate": 5.0416118433779426e-05, "loss": 0.032, "step": 31350 }, { "epoch": 8.480259599783667, "grad_norm": 0.12149256467819214, "learning_rate": 5.038856149604633e-05, "loss": 0.0338, "step": 31360 }, { "epoch": 8.482963764196864, "grad_norm": 0.17517459392547607, "learning_rate": 5.03610044402785e-05, "loss": 0.0322, "step": 31370 }, { "epoch": 8.48566792861006, "grad_norm": 0.12526902556419373, "learning_rate": 5.033344727484707e-05, "loss": 0.0332, "step": 31380 }, { "epoch": 8.488372093023255, "grad_norm": 0.19234123826026917, "learning_rate": 5.030589000812315e-05, "loss": 0.0331, "step": 31390 }, { "epoch": 8.491076257436452, "grad_norm": 0.2028733491897583, "learning_rate": 5.027833264847793e-05, "loss": 0.0336, "step": 31400 }, { "epoch": 8.493780421849648, "grad_norm": 0.16324976086616516, "learning_rate": 5.025077520428258e-05, "loss": 0.0319, "step": 31410 }, { "epoch": 8.496484586262845, "grad_norm": 0.20044293999671936, "learning_rate": 5.022321768390837e-05, "loss": 0.0318, "step": 31420 }, { "epoch": 8.49918875067604, "grad_norm": 0.14150716364383698, "learning_rate": 5.0195660095726516e-05, "loss": 0.0325, "step": 31430 }, { "epoch": 8.501892915089238, "grad_norm": 0.17620545625686646, "learning_rate": 5.016810244810829e-05, "loss": 0.0325, "step": 31440 }, { "epoch": 8.504597079502433, "grad_norm": 0.12972363829612732, "learning_rate": 5.0140544749424976e-05, "loss": 0.0317, "step": 31450 }, { "epoch": 8.50730124391563, "grad_norm": 0.11770113557577133, "learning_rate": 5.0112987008047874e-05, "loss": 0.0319, "step": 31460 }, { "epoch": 8.510005408328826, "grad_norm": 0.18609485030174255, "learning_rate": 5.008542923234831e-05, "loss": 0.0309, "step": 31470 }, { "epoch": 8.512709572742022, "grad_norm": 0.20389875769615173, "learning_rate": 5.00578714306976e-05, "loss": 0.0328, "step": 31480 }, { "epoch": 8.51541373715522, "grad_norm": 0.1289699524641037, "learning_rate": 5.0030313611467084e-05, "loss": 0.0314, "step": 31490 }, { "epoch": 8.518117901568415, "grad_norm": 0.12197376787662506, "learning_rate": 5.0002755783028074e-05, "loss": 0.0329, "step": 31500 }, { "epoch": 8.520822065981612, "grad_norm": 0.13079750537872314, "learning_rate": 4.997519795375194e-05, "loss": 0.0336, "step": 31510 }, { "epoch": 8.523526230394808, "grad_norm": 0.10360211879014969, "learning_rate": 4.9947640132010016e-05, "loss": 0.032, "step": 31520 }, { "epoch": 8.526230394808005, "grad_norm": 0.11785919219255447, "learning_rate": 4.9920082326173625e-05, "loss": 0.031, "step": 31530 }, { "epoch": 8.5289345592212, "grad_norm": 0.13944804668426514, "learning_rate": 4.9892524544614114e-05, "loss": 0.0312, "step": 31540 }, { "epoch": 8.531638723634398, "grad_norm": 0.13612103462219238, "learning_rate": 4.986496679570283e-05, "loss": 0.032, "step": 31550 }, { "epoch": 8.534342888047593, "grad_norm": 0.16877084970474243, "learning_rate": 4.983740908781105e-05, "loss": 0.0313, "step": 31560 }, { "epoch": 8.537047052460789, "grad_norm": 0.13431012630462646, "learning_rate": 4.9809851429310116e-05, "loss": 0.0325, "step": 31570 }, { "epoch": 8.539751216873986, "grad_norm": 0.20146064460277557, "learning_rate": 4.9782293828571275e-05, "loss": 0.0323, "step": 31580 }, { "epoch": 8.542455381287182, "grad_norm": 0.16754426062107086, "learning_rate": 4.9754736293965846e-05, "loss": 0.0317, "step": 31590 }, { "epoch": 8.545159545700379, "grad_norm": 0.1942499876022339, "learning_rate": 4.972717883386502e-05, "loss": 0.0318, "step": 31600 }, { "epoch": 8.547863710113575, "grad_norm": 0.2130538523197174, "learning_rate": 4.9699621456640075e-05, "loss": 0.031, "step": 31610 }, { "epoch": 8.550567874526772, "grad_norm": 0.2496216893196106, "learning_rate": 4.9672064170662214e-05, "loss": 0.0322, "step": 31620 }, { "epoch": 8.553272038939967, "grad_norm": 0.2067725509405136, "learning_rate": 4.9644506984302583e-05, "loss": 0.0322, "step": 31630 }, { "epoch": 8.555976203353165, "grad_norm": 0.171837717294693, "learning_rate": 4.9616949905932356e-05, "loss": 0.0316, "step": 31640 }, { "epoch": 8.55868036776636, "grad_norm": 0.10072899609804153, "learning_rate": 4.9589392943922615e-05, "loss": 0.032, "step": 31650 }, { "epoch": 8.561384532179556, "grad_norm": 0.14276117086410522, "learning_rate": 4.956183610664447e-05, "loss": 0.0315, "step": 31660 }, { "epoch": 8.564088696592753, "grad_norm": 0.14111188054084778, "learning_rate": 4.9534279402468945e-05, "loss": 0.0305, "step": 31670 }, { "epoch": 8.566792861005949, "grad_norm": 0.17169250547885895, "learning_rate": 4.9506722839767036e-05, "loss": 0.0334, "step": 31680 }, { "epoch": 8.569497025419146, "grad_norm": 0.138363778591156, "learning_rate": 4.947916642690972e-05, "loss": 0.0322, "step": 31690 }, { "epoch": 8.572201189832342, "grad_norm": 0.20901985466480255, "learning_rate": 4.9451610172267874e-05, "loss": 0.0327, "step": 31700 }, { "epoch": 8.574905354245539, "grad_norm": 0.20249906182289124, "learning_rate": 4.9424054084212376e-05, "loss": 0.0312, "step": 31710 }, { "epoch": 8.577609518658734, "grad_norm": 0.1356746107339859, "learning_rate": 4.939649817111407e-05, "loss": 0.031, "step": 31720 }, { "epoch": 8.58031368307193, "grad_norm": 0.31019464135169983, "learning_rate": 4.936894244134365e-05, "loss": 0.0313, "step": 31730 }, { "epoch": 8.583017847485127, "grad_norm": 0.14323312044143677, "learning_rate": 4.9341386903271886e-05, "loss": 0.0314, "step": 31740 }, { "epoch": 8.585722011898323, "grad_norm": 0.15198417007923126, "learning_rate": 4.931383156526936e-05, "loss": 0.0311, "step": 31750 }, { "epoch": 8.58842617631152, "grad_norm": 0.16350166499614716, "learning_rate": 4.92862764357067e-05, "loss": 0.0313, "step": 31760 }, { "epoch": 8.591130340724716, "grad_norm": 0.135198175907135, "learning_rate": 4.925872152295443e-05, "loss": 0.031, "step": 31770 }, { "epoch": 8.593834505137913, "grad_norm": 0.1359197199344635, "learning_rate": 4.923116683538296e-05, "loss": 0.0311, "step": 31780 }, { "epoch": 8.596538669551109, "grad_norm": 0.17616486549377441, "learning_rate": 4.920361238136273e-05, "loss": 0.0313, "step": 31790 }, { "epoch": 8.599242833964304, "grad_norm": 0.17561349272727966, "learning_rate": 4.9176058169264014e-05, "loss": 0.0314, "step": 31800 }, { "epoch": 8.601946998377501, "grad_norm": 0.10073836147785187, "learning_rate": 4.9148504207457074e-05, "loss": 0.0313, "step": 31810 }, { "epoch": 8.604651162790697, "grad_norm": 0.1295670121908188, "learning_rate": 4.912095050431208e-05, "loss": 0.0314, "step": 31820 }, { "epoch": 8.607355327203894, "grad_norm": 0.11620132625102997, "learning_rate": 4.909339706819911e-05, "loss": 0.0323, "step": 31830 }, { "epoch": 8.61005949161709, "grad_norm": 0.12964580953121185, "learning_rate": 4.906584390748819e-05, "loss": 0.0317, "step": 31840 }, { "epoch": 8.612763656030287, "grad_norm": 0.14276975393295288, "learning_rate": 4.9038291030549195e-05, "loss": 0.0327, "step": 31850 }, { "epoch": 8.615467820443483, "grad_norm": 0.11225498467683792, "learning_rate": 4.9010738445751995e-05, "loss": 0.0312, "step": 31860 }, { "epoch": 8.61817198485668, "grad_norm": 0.23120668530464172, "learning_rate": 4.8983186161466364e-05, "loss": 0.0328, "step": 31870 }, { "epoch": 8.620876149269876, "grad_norm": 0.13320454955101013, "learning_rate": 4.89556341860619e-05, "loss": 0.0325, "step": 31880 }, { "epoch": 8.623580313683071, "grad_norm": 0.14451083540916443, "learning_rate": 4.892808252790822e-05, "loss": 0.031, "step": 31890 }, { "epoch": 8.626284478096268, "grad_norm": 0.12728869915008545, "learning_rate": 4.890053119537475e-05, "loss": 0.0316, "step": 31900 }, { "epoch": 8.628988642509464, "grad_norm": 0.16262903809547424, "learning_rate": 4.887298019683087e-05, "loss": 0.0323, "step": 31910 }, { "epoch": 8.631692806922661, "grad_norm": 0.18348045647144318, "learning_rate": 4.884542954064587e-05, "loss": 0.0313, "step": 31920 }, { "epoch": 8.634396971335857, "grad_norm": 0.13331584632396698, "learning_rate": 4.881787923518887e-05, "loss": 0.0325, "step": 31930 }, { "epoch": 8.637101135749054, "grad_norm": 0.11096447706222534, "learning_rate": 4.879032928882896e-05, "loss": 0.0312, "step": 31940 }, { "epoch": 8.63980530016225, "grad_norm": 0.20090873539447784, "learning_rate": 4.876277970993505e-05, "loss": 0.0316, "step": 31950 }, { "epoch": 8.642509464575447, "grad_norm": 0.11029121279716492, "learning_rate": 4.873523050687602e-05, "loss": 0.0327, "step": 31960 }, { "epoch": 8.645213628988643, "grad_norm": 0.15670040249824524, "learning_rate": 4.870768168802056e-05, "loss": 0.0308, "step": 31970 }, { "epoch": 8.647917793401838, "grad_norm": 0.3152758479118347, "learning_rate": 4.868013326173728e-05, "loss": 0.0308, "step": 31980 }, { "epoch": 8.650621957815035, "grad_norm": 0.1515311598777771, "learning_rate": 4.865258523639468e-05, "loss": 0.0324, "step": 31990 }, { "epoch": 8.653326122228231, "grad_norm": 0.14031721651554108, "learning_rate": 4.862503762036109e-05, "loss": 0.0326, "step": 32000 }, { "epoch": 8.656030286641428, "grad_norm": 0.1501978635787964, "learning_rate": 4.859749042200478e-05, "loss": 0.0324, "step": 32010 }, { "epoch": 8.658734451054624, "grad_norm": 0.17795003950595856, "learning_rate": 4.856994364969384e-05, "loss": 0.0313, "step": 32020 }, { "epoch": 8.661438615467821, "grad_norm": 0.24568213522434235, "learning_rate": 4.854239731179625e-05, "loss": 0.0328, "step": 32030 }, { "epoch": 8.664142779881017, "grad_norm": 0.11817548424005508, "learning_rate": 4.85148514166799e-05, "loss": 0.0304, "step": 32040 }, { "epoch": 8.666846944294214, "grad_norm": 0.148188054561615, "learning_rate": 4.8487305972712456e-05, "loss": 0.0309, "step": 32050 }, { "epoch": 8.66955110870741, "grad_norm": 0.1518218070268631, "learning_rate": 4.8459760988261526e-05, "loss": 0.0311, "step": 32060 }, { "epoch": 8.672255273120605, "grad_norm": 0.1365707814693451, "learning_rate": 4.843221647169453e-05, "loss": 0.0305, "step": 32070 }, { "epoch": 8.674959437533802, "grad_norm": 0.10737889260053635, "learning_rate": 4.840467243137878e-05, "loss": 0.0325, "step": 32080 }, { "epoch": 8.677663601946998, "grad_norm": 0.14596809446811676, "learning_rate": 4.837712887568143e-05, "loss": 0.032, "step": 32090 }, { "epoch": 8.680367766360195, "grad_norm": 0.15066058933734894, "learning_rate": 4.8349585812969464e-05, "loss": 0.0308, "step": 32100 }, { "epoch": 8.68307193077339, "grad_norm": 0.19396629929542542, "learning_rate": 4.8322043251609775e-05, "loss": 0.0312, "step": 32110 }, { "epoch": 8.685776095186588, "grad_norm": 0.137438103556633, "learning_rate": 4.8294501199969015e-05, "loss": 0.0314, "step": 32120 }, { "epoch": 8.688480259599784, "grad_norm": 0.15866699814796448, "learning_rate": 4.826695966641376e-05, "loss": 0.0303, "step": 32130 }, { "epoch": 8.69118442401298, "grad_norm": 0.15150059759616852, "learning_rate": 4.823941865931043e-05, "loss": 0.0316, "step": 32140 }, { "epoch": 8.693888588426177, "grad_norm": 0.16505233943462372, "learning_rate": 4.82118781870252e-05, "loss": 0.0311, "step": 32150 }, { "epoch": 8.696592752839372, "grad_norm": 0.11091844737529755, "learning_rate": 4.8184338257924185e-05, "loss": 0.0316, "step": 32160 }, { "epoch": 8.69929691725257, "grad_norm": 0.14752596616744995, "learning_rate": 4.815679888037324e-05, "loss": 0.0312, "step": 32170 }, { "epoch": 8.702001081665765, "grad_norm": 0.13078321516513824, "learning_rate": 4.8129260062738135e-05, "loss": 0.0315, "step": 32180 }, { "epoch": 8.704705246078962, "grad_norm": 0.12482307106256485, "learning_rate": 4.810172181338445e-05, "loss": 0.0322, "step": 32190 }, { "epoch": 8.707409410492158, "grad_norm": 0.15617971122264862, "learning_rate": 4.807418414067753e-05, "loss": 0.0323, "step": 32200 }, { "epoch": 8.710113574905353, "grad_norm": 0.2274923026561737, "learning_rate": 4.804664705298264e-05, "loss": 0.0314, "step": 32210 }, { "epoch": 8.71281773931855, "grad_norm": 0.12205307185649872, "learning_rate": 4.80191105586648e-05, "loss": 0.0306, "step": 32220 }, { "epoch": 8.715521903731746, "grad_norm": 0.11253892630338669, "learning_rate": 4.799157466608886e-05, "loss": 0.0308, "step": 32230 }, { "epoch": 8.718226068144943, "grad_norm": 0.11304585635662079, "learning_rate": 4.796403938361951e-05, "loss": 0.031, "step": 32240 }, { "epoch": 8.720930232558139, "grad_norm": 0.2370137870311737, "learning_rate": 4.793650471962123e-05, "loss": 0.0305, "step": 32250 }, { "epoch": 8.723634396971336, "grad_norm": 0.15522843599319458, "learning_rate": 4.790897068245835e-05, "loss": 0.0306, "step": 32260 }, { "epoch": 8.726338561384532, "grad_norm": 0.1738128811120987, "learning_rate": 4.7881437280494954e-05, "loss": 0.0314, "step": 32270 }, { "epoch": 8.72904272579773, "grad_norm": 0.19673678278923035, "learning_rate": 4.7853904522094965e-05, "loss": 0.0312, "step": 32280 }, { "epoch": 8.731746890210925, "grad_norm": 0.2085784375667572, "learning_rate": 4.782637241562215e-05, "loss": 0.0309, "step": 32290 }, { "epoch": 8.73445105462412, "grad_norm": 0.8044289946556091, "learning_rate": 4.779884096943997e-05, "loss": 0.0318, "step": 32300 }, { "epoch": 8.737155219037318, "grad_norm": 0.16589227318763733, "learning_rate": 4.777131019191182e-05, "loss": 0.0322, "step": 32310 }, { "epoch": 8.739859383450513, "grad_norm": 0.14058417081832886, "learning_rate": 4.774378009140076e-05, "loss": 0.0308, "step": 32320 }, { "epoch": 8.74256354786371, "grad_norm": 0.15815666317939758, "learning_rate": 4.7716250676269735e-05, "loss": 0.0306, "step": 32330 }, { "epoch": 8.745267712276906, "grad_norm": 0.13046596944332123, "learning_rate": 4.7688721954881485e-05, "loss": 0.0319, "step": 32340 }, { "epoch": 8.747971876690103, "grad_norm": 0.10561424493789673, "learning_rate": 4.7661193935598446e-05, "loss": 0.0317, "step": 32350 }, { "epoch": 8.750676041103299, "grad_norm": 0.11616621911525726, "learning_rate": 4.763366662678296e-05, "loss": 0.0316, "step": 32360 }, { "epoch": 8.753380205516496, "grad_norm": 0.22908280789852142, "learning_rate": 4.7606140036797064e-05, "loss": 0.0306, "step": 32370 }, { "epoch": 8.756084369929692, "grad_norm": 0.14152446389198303, "learning_rate": 4.7578614174002614e-05, "loss": 0.0318, "step": 32380 }, { "epoch": 8.758788534342887, "grad_norm": 0.19043199717998505, "learning_rate": 4.755108904676125e-05, "loss": 0.0314, "step": 32390 }, { "epoch": 8.761492698756085, "grad_norm": 0.10866372287273407, "learning_rate": 4.752356466343436e-05, "loss": 0.0321, "step": 32400 }, { "epoch": 8.76419686316928, "grad_norm": 0.22041811048984528, "learning_rate": 4.7496041032383174e-05, "loss": 0.0313, "step": 32410 }, { "epoch": 8.766901027582477, "grad_norm": 0.12521842122077942, "learning_rate": 4.746851816196858e-05, "loss": 0.0309, "step": 32420 }, { "epoch": 8.769605191995673, "grad_norm": 0.16393287479877472, "learning_rate": 4.744099606055135e-05, "loss": 0.0314, "step": 32430 }, { "epoch": 8.77230935640887, "grad_norm": 0.22147998213768005, "learning_rate": 4.741347473649193e-05, "loss": 0.0307, "step": 32440 }, { "epoch": 8.775013520822066, "grad_norm": 0.13894391059875488, "learning_rate": 4.738595419815058e-05, "loss": 0.0294, "step": 32450 }, { "epoch": 8.777717685235263, "grad_norm": 0.13159413635730743, "learning_rate": 4.7358434453887365e-05, "loss": 0.0301, "step": 32460 }, { "epoch": 8.780421849648459, "grad_norm": 0.14845295250415802, "learning_rate": 4.7330915512061976e-05, "loss": 0.03, "step": 32470 }, { "epoch": 8.783126014061654, "grad_norm": 0.1756245195865631, "learning_rate": 4.730339738103402e-05, "loss": 0.0302, "step": 32480 }, { "epoch": 8.785830178474852, "grad_norm": 0.17003153264522552, "learning_rate": 4.727588006916271e-05, "loss": 0.0315, "step": 32490 }, { "epoch": 8.788534342888047, "grad_norm": 0.12930847704410553, "learning_rate": 4.724836358480711e-05, "loss": 0.0321, "step": 32500 }, { "epoch": 8.791238507301244, "grad_norm": 0.10901582986116409, "learning_rate": 4.722084793632601e-05, "loss": 0.0322, "step": 32510 }, { "epoch": 8.79394267171444, "grad_norm": 0.10985924303531647, "learning_rate": 4.719333313207792e-05, "loss": 0.0318, "step": 32520 }, { "epoch": 8.796646836127637, "grad_norm": 0.10337602347135544, "learning_rate": 4.716581918042114e-05, "loss": 0.0306, "step": 32530 }, { "epoch": 8.799351000540833, "grad_norm": 0.14581280946731567, "learning_rate": 4.7138306089713636e-05, "loss": 0.032, "step": 32540 }, { "epoch": 8.802055164954028, "grad_norm": 0.14937752485275269, "learning_rate": 4.7110793868313183e-05, "loss": 0.0308, "step": 32550 }, { "epoch": 8.804759329367226, "grad_norm": 0.15413890779018402, "learning_rate": 4.708328252457729e-05, "loss": 0.0306, "step": 32560 }, { "epoch": 8.807463493780421, "grad_norm": 0.21935904026031494, "learning_rate": 4.7055772066863135e-05, "loss": 0.0321, "step": 32570 }, { "epoch": 8.810167658193619, "grad_norm": 0.18724936246871948, "learning_rate": 4.702826250352771e-05, "loss": 0.0301, "step": 32580 }, { "epoch": 8.812871822606814, "grad_norm": 0.13357651233673096, "learning_rate": 4.7000753842927653e-05, "loss": 0.0308, "step": 32590 }, { "epoch": 8.815575987020011, "grad_norm": 0.13368181884288788, "learning_rate": 4.6973246093419384e-05, "loss": 0.0322, "step": 32600 }, { "epoch": 8.818280151433207, "grad_norm": 0.10904044657945633, "learning_rate": 4.694573926335906e-05, "loss": 0.0307, "step": 32610 }, { "epoch": 8.820984315846403, "grad_norm": 0.1305881142616272, "learning_rate": 4.6918233361102476e-05, "loss": 0.0313, "step": 32620 }, { "epoch": 8.8236884802596, "grad_norm": 0.1285891979932785, "learning_rate": 4.689072839500525e-05, "loss": 0.0326, "step": 32630 }, { "epoch": 8.826392644672795, "grad_norm": 0.11553065478801727, "learning_rate": 4.6863224373422635e-05, "loss": 0.0318, "step": 32640 }, { "epoch": 8.829096809085993, "grad_norm": 0.15169702470302582, "learning_rate": 4.683572130470962e-05, "loss": 0.0327, "step": 32650 }, { "epoch": 8.831800973499188, "grad_norm": 0.1700732558965683, "learning_rate": 4.680821919722094e-05, "loss": 0.0321, "step": 32660 }, { "epoch": 8.834505137912386, "grad_norm": 0.16413544118404388, "learning_rate": 4.6780718059310975e-05, "loss": 0.0312, "step": 32670 }, { "epoch": 8.837209302325581, "grad_norm": 0.13900084793567657, "learning_rate": 4.675321789933389e-05, "loss": 0.0314, "step": 32680 }, { "epoch": 8.839913466738778, "grad_norm": 0.13997943699359894, "learning_rate": 4.6725718725643464e-05, "loss": 0.0309, "step": 32690 }, { "epoch": 8.842617631151974, "grad_norm": 0.09859199076890945, "learning_rate": 4.669822054659323e-05, "loss": 0.0305, "step": 32700 }, { "epoch": 8.84532179556517, "grad_norm": 0.15525929629802704, "learning_rate": 4.667072337053644e-05, "loss": 0.033, "step": 32710 }, { "epoch": 8.848025959978367, "grad_norm": 0.22656773030757904, "learning_rate": 4.6643227205825965e-05, "loss": 0.0301, "step": 32720 }, { "epoch": 8.850730124391562, "grad_norm": 0.20520912110805511, "learning_rate": 4.6615732060814454e-05, "loss": 0.0314, "step": 32730 }, { "epoch": 8.85343428880476, "grad_norm": 0.14479774236679077, "learning_rate": 4.658823794385417e-05, "loss": 0.0318, "step": 32740 }, { "epoch": 8.856138453217955, "grad_norm": 0.2601306736469269, "learning_rate": 4.6560744863297115e-05, "loss": 0.0307, "step": 32750 }, { "epoch": 8.858842617631153, "grad_norm": 0.20628948509693146, "learning_rate": 4.653325282749498e-05, "loss": 0.0309, "step": 32760 }, { "epoch": 8.861546782044348, "grad_norm": 0.16333138942718506, "learning_rate": 4.6505761844799075e-05, "loss": 0.0315, "step": 32770 }, { "epoch": 8.864250946457545, "grad_norm": 0.15788666903972626, "learning_rate": 4.647827192356048e-05, "loss": 0.0303, "step": 32780 }, { "epoch": 8.866955110870741, "grad_norm": 0.16768109798431396, "learning_rate": 4.645078307212989e-05, "loss": 0.0304, "step": 32790 }, { "epoch": 8.869659275283936, "grad_norm": 0.17896278202533722, "learning_rate": 4.642329529885768e-05, "loss": 0.0301, "step": 32800 }, { "epoch": 8.872363439697134, "grad_norm": 0.13510026037693024, "learning_rate": 4.639580861209393e-05, "loss": 0.0302, "step": 32810 }, { "epoch": 8.87506760411033, "grad_norm": 0.15658794343471527, "learning_rate": 4.636832302018835e-05, "loss": 0.0328, "step": 32820 }, { "epoch": 8.877771768523527, "grad_norm": 0.12806956470012665, "learning_rate": 4.6340838531490365e-05, "loss": 0.0299, "step": 32830 }, { "epoch": 8.880475932936722, "grad_norm": 0.1104060709476471, "learning_rate": 4.6313355154349e-05, "loss": 0.0312, "step": 32840 }, { "epoch": 8.88318009734992, "grad_norm": 0.15185540914535522, "learning_rate": 4.6285872897113025e-05, "loss": 0.0305, "step": 32850 }, { "epoch": 8.885884261763115, "grad_norm": 0.14973734319210052, "learning_rate": 4.625839176813077e-05, "loss": 0.0315, "step": 32860 }, { "epoch": 8.888588426176312, "grad_norm": 0.10565654188394547, "learning_rate": 4.623091177575031e-05, "loss": 0.0289, "step": 32870 }, { "epoch": 8.891292590589508, "grad_norm": 0.3496229946613312, "learning_rate": 4.620343292831936e-05, "loss": 0.0325, "step": 32880 }, { "epoch": 8.893996755002703, "grad_norm": 0.2317401021718979, "learning_rate": 4.6175955234185206e-05, "loss": 0.0329, "step": 32890 }, { "epoch": 8.8967009194159, "grad_norm": 0.11746802181005478, "learning_rate": 4.614847870169492e-05, "loss": 0.0314, "step": 32900 }, { "epoch": 8.899405083829096, "grad_norm": 0.1309763789176941, "learning_rate": 4.612100333919509e-05, "loss": 0.0306, "step": 32910 }, { "epoch": 8.902109248242294, "grad_norm": 0.1675502061843872, "learning_rate": 4.609352915503202e-05, "loss": 0.0308, "step": 32920 }, { "epoch": 8.90481341265549, "grad_norm": 0.15633538365364075, "learning_rate": 4.606605615755166e-05, "loss": 0.0298, "step": 32930 }, { "epoch": 8.907517577068687, "grad_norm": 0.14592653512954712, "learning_rate": 4.6038584355099576e-05, "loss": 0.0308, "step": 32940 }, { "epoch": 8.910221741481882, "grad_norm": 0.1551789939403534, "learning_rate": 4.6011113756020964e-05, "loss": 0.0316, "step": 32950 }, { "epoch": 8.912925905895078, "grad_norm": 0.16251815855503082, "learning_rate": 4.598364436866066e-05, "loss": 0.0317, "step": 32960 }, { "epoch": 8.915630070308275, "grad_norm": 0.12110387533903122, "learning_rate": 4.595617620136316e-05, "loss": 0.0305, "step": 32970 }, { "epoch": 8.91833423472147, "grad_norm": 0.1613999307155609, "learning_rate": 4.592870926247257e-05, "loss": 0.0311, "step": 32980 }, { "epoch": 8.921038399134668, "grad_norm": 0.14585821330547333, "learning_rate": 4.5901243560332594e-05, "loss": 0.0319, "step": 32990 }, { "epoch": 8.923742563547863, "grad_norm": 0.11672261357307434, "learning_rate": 4.587377910328662e-05, "loss": 0.0319, "step": 33000 }, { "epoch": 8.92644672796106, "grad_norm": 0.1355646550655365, "learning_rate": 4.5846315899677586e-05, "loss": 0.0308, "step": 33010 }, { "epoch": 8.929150892374256, "grad_norm": 0.14112500846385956, "learning_rate": 4.5818853957848114e-05, "loss": 0.031, "step": 33020 }, { "epoch": 8.931855056787454, "grad_norm": 0.10851430147886276, "learning_rate": 4.579139328614043e-05, "loss": 0.0314, "step": 33030 }, { "epoch": 8.934559221200649, "grad_norm": 0.11736653745174408, "learning_rate": 4.576393389289633e-05, "loss": 0.0302, "step": 33040 }, { "epoch": 8.937263385613845, "grad_norm": 0.13539515435695648, "learning_rate": 4.573647578645728e-05, "loss": 0.031, "step": 33050 }, { "epoch": 8.939967550027042, "grad_norm": 0.15109162032604218, "learning_rate": 4.57090189751643e-05, "loss": 0.0316, "step": 33060 }, { "epoch": 8.942671714440237, "grad_norm": 0.20246370136737823, "learning_rate": 4.568156346735806e-05, "loss": 0.0316, "step": 33070 }, { "epoch": 8.945375878853435, "grad_norm": 0.13217882812023163, "learning_rate": 4.565410927137882e-05, "loss": 0.0316, "step": 33080 }, { "epoch": 8.94808004326663, "grad_norm": 0.16133035719394684, "learning_rate": 4.562665639556644e-05, "loss": 0.0307, "step": 33090 }, { "epoch": 8.950784207679828, "grad_norm": 0.12297417223453522, "learning_rate": 4.559920484826037e-05, "loss": 0.0307, "step": 33100 }, { "epoch": 8.953488372093023, "grad_norm": 0.1815636307001114, "learning_rate": 4.5571754637799665e-05, "loss": 0.0307, "step": 33110 }, { "epoch": 8.956192536506219, "grad_norm": 0.16183893382549286, "learning_rate": 4.554430577252298e-05, "loss": 0.0309, "step": 33120 }, { "epoch": 8.958896700919416, "grad_norm": 0.16528701782226562, "learning_rate": 4.551685826076858e-05, "loss": 0.0313, "step": 33130 }, { "epoch": 8.961600865332612, "grad_norm": 0.15588317811489105, "learning_rate": 4.5489412110874246e-05, "loss": 0.0312, "step": 33140 }, { "epoch": 8.964305029745809, "grad_norm": 0.16636474430561066, "learning_rate": 4.5461967331177444e-05, "loss": 0.0306, "step": 33150 }, { "epoch": 8.967009194159004, "grad_norm": 0.13690342009067535, "learning_rate": 4.5434523930015115e-05, "loss": 0.0306, "step": 33160 }, { "epoch": 8.969713358572202, "grad_norm": 0.13687360286712646, "learning_rate": 4.540708191572388e-05, "loss": 0.0301, "step": 33170 }, { "epoch": 8.972417522985397, "grad_norm": 0.19946523010730743, "learning_rate": 4.537964129663991e-05, "loss": 0.0307, "step": 33180 }, { "epoch": 8.975121687398595, "grad_norm": 0.19099240005016327, "learning_rate": 4.535220208109889e-05, "loss": 0.0304, "step": 33190 }, { "epoch": 8.97782585181179, "grad_norm": 0.12012044340372086, "learning_rate": 4.5324764277436194e-05, "loss": 0.0317, "step": 33200 }, { "epoch": 8.980530016224986, "grad_norm": 0.13889452815055847, "learning_rate": 4.529732789398664e-05, "loss": 0.0297, "step": 33210 }, { "epoch": 8.983234180638183, "grad_norm": 0.11969882249832153, "learning_rate": 4.526989293908472e-05, "loss": 0.0303, "step": 33220 }, { "epoch": 8.985938345051379, "grad_norm": 0.11943478137254715, "learning_rate": 4.524245942106442e-05, "loss": 0.0313, "step": 33230 }, { "epoch": 8.988642509464576, "grad_norm": 0.2511564791202545, "learning_rate": 4.5215027348259345e-05, "loss": 0.0318, "step": 33240 }, { "epoch": 8.991346673877771, "grad_norm": 0.14269046485424042, "learning_rate": 4.5187596729002616e-05, "loss": 0.029, "step": 33250 }, { "epoch": 8.994050838290969, "grad_norm": 0.13654349744319916, "learning_rate": 4.516016757162693e-05, "loss": 0.0313, "step": 33260 }, { "epoch": 8.996755002704164, "grad_norm": 0.12993457913398743, "learning_rate": 4.513273988446457e-05, "loss": 0.0306, "step": 33270 }, { "epoch": 8.999459167117362, "grad_norm": 0.12172793596982956, "learning_rate": 4.5105313675847296e-05, "loss": 0.0316, "step": 33280 }, { "epoch": 9.002163331530557, "grad_norm": 0.12919242680072784, "learning_rate": 4.5077888954106495e-05, "loss": 0.0302, "step": 33290 }, { "epoch": 9.004867495943753, "grad_norm": 0.1553681343793869, "learning_rate": 4.505046572757309e-05, "loss": 0.0309, "step": 33300 }, { "epoch": 9.00757166035695, "grad_norm": 0.16563859581947327, "learning_rate": 4.502304400457749e-05, "loss": 0.0302, "step": 33310 }, { "epoch": 9.010275824770146, "grad_norm": 0.1140923872590065, "learning_rate": 4.499562379344973e-05, "loss": 0.0326, "step": 33320 }, { "epoch": 9.012979989183343, "grad_norm": 0.1888066977262497, "learning_rate": 4.4968205102519306e-05, "loss": 0.0316, "step": 33330 }, { "epoch": 9.015684153596538, "grad_norm": 0.14540259540081024, "learning_rate": 4.494078794011532e-05, "loss": 0.0304, "step": 33340 }, { "epoch": 9.018388318009736, "grad_norm": 0.14688292145729065, "learning_rate": 4.491337231456639e-05, "loss": 0.0297, "step": 33350 }, { "epoch": 9.021092482422931, "grad_norm": 0.16023550927639008, "learning_rate": 4.4885958234200634e-05, "loss": 0.0294, "step": 33360 }, { "epoch": 9.023796646836127, "grad_norm": 0.1605517864227295, "learning_rate": 4.485854570734575e-05, "loss": 0.0312, "step": 33370 }, { "epoch": 9.026500811249324, "grad_norm": 0.12424629926681519, "learning_rate": 4.483113474232891e-05, "loss": 0.0306, "step": 33380 }, { "epoch": 9.02920497566252, "grad_norm": 0.10365438461303711, "learning_rate": 4.480372534747688e-05, "loss": 0.0301, "step": 33390 }, { "epoch": 9.031909140075717, "grad_norm": 0.1497647911310196, "learning_rate": 4.477631753111588e-05, "loss": 0.0305, "step": 33400 }, { "epoch": 9.034613304488913, "grad_norm": 0.1460219919681549, "learning_rate": 4.4748911301571686e-05, "loss": 0.0306, "step": 33410 }, { "epoch": 9.03731746890211, "grad_norm": 0.10818216949701309, "learning_rate": 4.472150666716961e-05, "loss": 0.0312, "step": 33420 }, { "epoch": 9.040021633315305, "grad_norm": 0.1655355989933014, "learning_rate": 4.469410363623442e-05, "loss": 0.0295, "step": 33430 }, { "epoch": 9.042725797728503, "grad_norm": 0.12711544334888458, "learning_rate": 4.466670221709044e-05, "loss": 0.0298, "step": 33440 }, { "epoch": 9.045429962141698, "grad_norm": 0.11398632079362869, "learning_rate": 4.463930241806154e-05, "loss": 0.0312, "step": 33450 }, { "epoch": 9.048134126554894, "grad_norm": 0.21591657400131226, "learning_rate": 4.4611904247471006e-05, "loss": 0.0311, "step": 33460 }, { "epoch": 9.050838290968091, "grad_norm": 0.1841614693403244, "learning_rate": 4.458450771364171e-05, "loss": 0.031, "step": 33470 }, { "epoch": 9.053542455381287, "grad_norm": 0.12552078068256378, "learning_rate": 4.4557112824895965e-05, "loss": 0.0307, "step": 33480 }, { "epoch": 9.056246619794484, "grad_norm": 0.181365504860878, "learning_rate": 4.452971958955563e-05, "loss": 0.0308, "step": 33490 }, { "epoch": 9.05895078420768, "grad_norm": 0.17228636145591736, "learning_rate": 4.450232801594208e-05, "loss": 0.0323, "step": 33500 }, { "epoch": 9.061654948620877, "grad_norm": 0.18136557936668396, "learning_rate": 4.447493811237609e-05, "loss": 0.0301, "step": 33510 }, { "epoch": 9.064359113034072, "grad_norm": 0.11591968685388565, "learning_rate": 4.444754988717804e-05, "loss": 0.0291, "step": 33520 }, { "epoch": 9.067063277447268, "grad_norm": 0.13345743715763092, "learning_rate": 4.442016334866771e-05, "loss": 0.0295, "step": 33530 }, { "epoch": 9.069767441860465, "grad_norm": 0.25787150859832764, "learning_rate": 4.4392778505164445e-05, "loss": 0.0309, "step": 33540 }, { "epoch": 9.07247160627366, "grad_norm": 0.13132444024085999, "learning_rate": 4.436539536498702e-05, "loss": 0.0295, "step": 33550 }, { "epoch": 9.075175770686858, "grad_norm": 0.1389426589012146, "learning_rate": 4.433801393645369e-05, "loss": 0.0313, "step": 33560 }, { "epoch": 9.077879935100054, "grad_norm": 0.17804837226867676, "learning_rate": 4.431063422788226e-05, "loss": 0.0323, "step": 33570 }, { "epoch": 9.080584099513251, "grad_norm": 0.1804257333278656, "learning_rate": 4.428325624758991e-05, "loss": 0.0311, "step": 33580 }, { "epoch": 9.083288263926447, "grad_norm": 0.22957351803779602, "learning_rate": 4.4255880003893366e-05, "loss": 0.0314, "step": 33590 }, { "epoch": 9.085992428339644, "grad_norm": 0.14289778470993042, "learning_rate": 4.422850550510884e-05, "loss": 0.0298, "step": 33600 }, { "epoch": 9.08869659275284, "grad_norm": 0.16307801008224487, "learning_rate": 4.4201132759551934e-05, "loss": 0.031, "step": 33610 }, { "epoch": 9.091400757166035, "grad_norm": 0.1430259644985199, "learning_rate": 4.4173761775537804e-05, "loss": 0.0305, "step": 33620 }, { "epoch": 9.094104921579232, "grad_norm": 0.10409197211265564, "learning_rate": 4.414639256138099e-05, "loss": 0.0303, "step": 33630 }, { "epoch": 9.096809085992428, "grad_norm": 0.18043284118175507, "learning_rate": 4.411902512539557e-05, "loss": 0.0312, "step": 33640 }, { "epoch": 9.099513250405625, "grad_norm": 0.18351761996746063, "learning_rate": 4.4091659475895044e-05, "loss": 0.0304, "step": 33650 }, { "epoch": 9.10221741481882, "grad_norm": 0.12132767587900162, "learning_rate": 4.406429562119235e-05, "loss": 0.0308, "step": 33660 }, { "epoch": 9.104921579232018, "grad_norm": 0.21518616378307343, "learning_rate": 4.4036933569599945e-05, "loss": 0.0304, "step": 33670 }, { "epoch": 9.107625743645213, "grad_norm": 0.11869625747203827, "learning_rate": 4.400957332942965e-05, "loss": 0.0301, "step": 33680 }, { "epoch": 9.11032990805841, "grad_norm": 0.0805620551109314, "learning_rate": 4.3982214908992844e-05, "loss": 0.0307, "step": 33690 }, { "epoch": 9.113034072471606, "grad_norm": 0.20456218719482422, "learning_rate": 4.3954858316600235e-05, "loss": 0.0295, "step": 33700 }, { "epoch": 9.115738236884802, "grad_norm": 0.15819083154201508, "learning_rate": 4.392750356056205e-05, "loss": 0.0299, "step": 33710 }, { "epoch": 9.118442401298, "grad_norm": 0.15857958793640137, "learning_rate": 4.390015064918798e-05, "loss": 0.032, "step": 33720 }, { "epoch": 9.121146565711195, "grad_norm": 0.11200198531150818, "learning_rate": 4.387279959078705e-05, "loss": 0.0317, "step": 33730 }, { "epoch": 9.123850730124392, "grad_norm": 0.1803526133298874, "learning_rate": 4.384545039366786e-05, "loss": 0.0304, "step": 33740 }, { "epoch": 9.126554894537588, "grad_norm": 0.11385159194469452, "learning_rate": 4.381810306613831e-05, "loss": 0.0294, "step": 33750 }, { "epoch": 9.129259058950785, "grad_norm": 0.14004144072532654, "learning_rate": 4.3790757616505826e-05, "loss": 0.0309, "step": 33760 }, { "epoch": 9.13196322336398, "grad_norm": 0.1290319859981537, "learning_rate": 4.376341405307725e-05, "loss": 0.0308, "step": 33770 }, { "epoch": 9.134667387777176, "grad_norm": 0.20928803086280823, "learning_rate": 4.37360723841588e-05, "loss": 0.0306, "step": 33780 }, { "epoch": 9.137371552190373, "grad_norm": 0.18075807392597198, "learning_rate": 4.370873261805619e-05, "loss": 0.031, "step": 33790 }, { "epoch": 9.140075716603569, "grad_norm": 0.18141914904117584, "learning_rate": 4.368139476307449e-05, "loss": 0.0307, "step": 33800 }, { "epoch": 9.142779881016766, "grad_norm": 0.12377659231424332, "learning_rate": 4.365405882751822e-05, "loss": 0.0323, "step": 33810 }, { "epoch": 9.145484045429962, "grad_norm": 0.14233458042144775, "learning_rate": 4.3626724819691326e-05, "loss": 0.0293, "step": 33820 }, { "epoch": 9.148188209843159, "grad_norm": 0.14391452074050903, "learning_rate": 4.359939274789715e-05, "loss": 0.0306, "step": 33830 }, { "epoch": 9.150892374256355, "grad_norm": 0.16364224255084991, "learning_rate": 4.357206262043848e-05, "loss": 0.031, "step": 33840 }, { "epoch": 9.153596538669552, "grad_norm": 0.12715427577495575, "learning_rate": 4.354473444561745e-05, "loss": 0.0306, "step": 33850 }, { "epoch": 9.156300703082747, "grad_norm": 0.13291461765766144, "learning_rate": 4.3517408231735644e-05, "loss": 0.0301, "step": 33860 }, { "epoch": 9.159004867495943, "grad_norm": 0.12346246838569641, "learning_rate": 4.3490083987094086e-05, "loss": 0.0324, "step": 33870 }, { "epoch": 9.16170903190914, "grad_norm": 0.21000684797763824, "learning_rate": 4.34627617199931e-05, "loss": 0.0301, "step": 33880 }, { "epoch": 9.164413196322336, "grad_norm": 0.19329337775707245, "learning_rate": 4.3435441438732526e-05, "loss": 0.0299, "step": 33890 }, { "epoch": 9.167117360735533, "grad_norm": 0.1226242184638977, "learning_rate": 4.340812315161149e-05, "loss": 0.0301, "step": 33900 }, { "epoch": 9.169821525148729, "grad_norm": 0.20032009482383728, "learning_rate": 4.338080686692859e-05, "loss": 0.0309, "step": 33910 }, { "epoch": 9.172525689561926, "grad_norm": 0.12640230357646942, "learning_rate": 4.3353492592981816e-05, "loss": 0.0306, "step": 33920 }, { "epoch": 9.175229853975122, "grad_norm": 0.12061465531587601, "learning_rate": 4.3326180338068485e-05, "loss": 0.0301, "step": 33930 }, { "epoch": 9.177934018388317, "grad_norm": 0.14890342950820923, "learning_rate": 4.3298870110485356e-05, "loss": 0.029, "step": 33940 }, { "epoch": 9.180638182801514, "grad_norm": 0.11902555823326111, "learning_rate": 4.3271561918528567e-05, "loss": 0.033, "step": 33950 }, { "epoch": 9.18334234721471, "grad_norm": 0.13505518436431885, "learning_rate": 4.324425577049359e-05, "loss": 0.0312, "step": 33960 }, { "epoch": 9.186046511627907, "grad_norm": 0.23588348925113678, "learning_rate": 4.321695167467535e-05, "loss": 0.0328, "step": 33970 }, { "epoch": 9.188750676041103, "grad_norm": 0.165298193693161, "learning_rate": 4.3189649639368093e-05, "loss": 0.0317, "step": 33980 }, { "epoch": 9.1914548404543, "grad_norm": 0.14421720802783966, "learning_rate": 4.316234967286547e-05, "loss": 0.0308, "step": 33990 }, { "epoch": 9.194159004867496, "grad_norm": 0.13356123864650726, "learning_rate": 4.313505178346046e-05, "loss": 0.0301, "step": 34000 }, { "epoch": 9.196863169280693, "grad_norm": 0.20991437137126923, "learning_rate": 4.3107755979445465e-05, "loss": 0.0308, "step": 34010 }, { "epoch": 9.199567333693889, "grad_norm": 0.1277727335691452, "learning_rate": 4.308046226911224e-05, "loss": 0.029, "step": 34020 }, { "epoch": 9.202271498107084, "grad_norm": 0.150956392288208, "learning_rate": 4.305317066075185e-05, "loss": 0.0316, "step": 34030 }, { "epoch": 9.204975662520281, "grad_norm": 0.13836655020713806, "learning_rate": 4.302588116265482e-05, "loss": 0.0301, "step": 34040 }, { "epoch": 9.207679826933477, "grad_norm": 0.229384645819664, "learning_rate": 4.299859378311094e-05, "loss": 0.0314, "step": 34050 }, { "epoch": 9.210383991346674, "grad_norm": 0.11543145030736923, "learning_rate": 4.2971308530409424e-05, "loss": 0.0309, "step": 34060 }, { "epoch": 9.21308815575987, "grad_norm": 0.18190638720989227, "learning_rate": 4.2944025412838765e-05, "loss": 0.0301, "step": 34070 }, { "epoch": 9.215792320173067, "grad_norm": 0.1475914567708969, "learning_rate": 4.291674443868689e-05, "loss": 0.0315, "step": 34080 }, { "epoch": 9.218496484586263, "grad_norm": 0.1638585776090622, "learning_rate": 4.288946561624104e-05, "loss": 0.0329, "step": 34090 }, { "epoch": 9.22120064899946, "grad_norm": 0.24211816489696503, "learning_rate": 4.2862188953787794e-05, "loss": 0.0318, "step": 34100 }, { "epoch": 9.223904813412656, "grad_norm": 0.13369232416152954, "learning_rate": 4.283491445961308e-05, "loss": 0.0306, "step": 34110 }, { "epoch": 9.226608977825851, "grad_norm": 0.13145777583122253, "learning_rate": 4.2807642142002155e-05, "loss": 0.0297, "step": 34120 }, { "epoch": 9.229313142239048, "grad_norm": 0.1579483151435852, "learning_rate": 4.278037200923966e-05, "loss": 0.0292, "step": 34130 }, { "epoch": 9.232017306652244, "grad_norm": 0.12752363085746765, "learning_rate": 4.275310406960953e-05, "loss": 0.0299, "step": 34140 }, { "epoch": 9.234721471065441, "grad_norm": 0.15229803323745728, "learning_rate": 4.272583833139502e-05, "loss": 0.0307, "step": 34150 }, { "epoch": 9.237425635478637, "grad_norm": 0.14037184417247772, "learning_rate": 4.2698574802878794e-05, "loss": 0.0312, "step": 34160 }, { "epoch": 9.240129799891834, "grad_norm": 0.1529819816350937, "learning_rate": 4.2671313492342734e-05, "loss": 0.0299, "step": 34170 }, { "epoch": 9.24283396430503, "grad_norm": 0.21817688643932343, "learning_rate": 4.264405440806813e-05, "loss": 0.0304, "step": 34180 }, { "epoch": 9.245538128718225, "grad_norm": 0.1550525724887848, "learning_rate": 4.26167975583356e-05, "loss": 0.03, "step": 34190 }, { "epoch": 9.248242293131423, "grad_norm": 0.21078941226005554, "learning_rate": 4.2589542951425e-05, "loss": 0.0295, "step": 34200 }, { "epoch": 9.250946457544618, "grad_norm": 0.13242043554782867, "learning_rate": 4.2562290595615615e-05, "loss": 0.0316, "step": 34210 }, { "epoch": 9.253650621957815, "grad_norm": 0.14549528062343597, "learning_rate": 4.2535040499185946e-05, "loss": 0.0308, "step": 34220 }, { "epoch": 9.256354786371011, "grad_norm": 0.1297001838684082, "learning_rate": 4.250779267041387e-05, "loss": 0.0311, "step": 34230 }, { "epoch": 9.259058950784208, "grad_norm": 0.21890202164649963, "learning_rate": 4.248054711757657e-05, "loss": 0.0302, "step": 34240 }, { "epoch": 9.261763115197404, "grad_norm": 0.1569453477859497, "learning_rate": 4.245330384895052e-05, "loss": 0.0297, "step": 34250 }, { "epoch": 9.264467279610601, "grad_norm": 0.15572160482406616, "learning_rate": 4.242606287281151e-05, "loss": 0.0311, "step": 34260 }, { "epoch": 9.267171444023797, "grad_norm": 0.2578448951244354, "learning_rate": 4.2398824197434595e-05, "loss": 0.0297, "step": 34270 }, { "epoch": 9.269875608436992, "grad_norm": 0.17312532663345337, "learning_rate": 4.23715878310942e-05, "loss": 0.0298, "step": 34280 }, { "epoch": 9.27257977285019, "grad_norm": 0.13434728980064392, "learning_rate": 4.234435378206402e-05, "loss": 0.0301, "step": 34290 }, { "epoch": 9.275283937263385, "grad_norm": 0.0880606472492218, "learning_rate": 4.2317122058617006e-05, "loss": 0.0306, "step": 34300 }, { "epoch": 9.277988101676582, "grad_norm": 0.19310924410820007, "learning_rate": 4.2289892669025485e-05, "loss": 0.0307, "step": 34310 }, { "epoch": 9.280692266089778, "grad_norm": 0.10076360404491425, "learning_rate": 4.226266562156097e-05, "loss": 0.0294, "step": 34320 }, { "epoch": 9.283396430502975, "grad_norm": 0.13504737615585327, "learning_rate": 4.223544092449435e-05, "loss": 0.0298, "step": 34330 }, { "epoch": 9.28610059491617, "grad_norm": 0.1468765288591385, "learning_rate": 4.2208218586095784e-05, "loss": 0.0296, "step": 34340 }, { "epoch": 9.288804759329366, "grad_norm": 0.16717156767845154, "learning_rate": 4.218099861463466e-05, "loss": 0.0298, "step": 34350 }, { "epoch": 9.291508923742564, "grad_norm": 0.11145814508199692, "learning_rate": 4.215378101837972e-05, "loss": 0.0289, "step": 34360 }, { "epoch": 9.29421308815576, "grad_norm": 0.12361709773540497, "learning_rate": 4.2126565805598937e-05, "loss": 0.0317, "step": 34370 }, { "epoch": 9.296917252568957, "grad_norm": 0.10178151726722717, "learning_rate": 4.209935298455957e-05, "loss": 0.0305, "step": 34380 }, { "epoch": 9.299621416982152, "grad_norm": 0.18351474404335022, "learning_rate": 4.207214256352817e-05, "loss": 0.0292, "step": 34390 }, { "epoch": 9.30232558139535, "grad_norm": 0.2088705152273178, "learning_rate": 4.2044934550770524e-05, "loss": 0.0311, "step": 34400 }, { "epoch": 9.305029745808545, "grad_norm": 0.12198004126548767, "learning_rate": 4.201772895455174e-05, "loss": 0.0289, "step": 34410 }, { "epoch": 9.307733910221742, "grad_norm": 0.1342250108718872, "learning_rate": 4.199052578313613e-05, "loss": 0.031, "step": 34420 }, { "epoch": 9.310438074634938, "grad_norm": 0.11175811290740967, "learning_rate": 4.1963325044787294e-05, "loss": 0.0309, "step": 34430 }, { "epoch": 9.313142239048133, "grad_norm": 0.13834288716316223, "learning_rate": 4.193612674776814e-05, "loss": 0.031, "step": 34440 }, { "epoch": 9.31584640346133, "grad_norm": 0.1495915800333023, "learning_rate": 4.1908930900340745e-05, "loss": 0.0286, "step": 34450 }, { "epoch": 9.318550567874526, "grad_norm": 0.1606711447238922, "learning_rate": 4.1881737510766536e-05, "loss": 0.03, "step": 34460 }, { "epoch": 9.321254732287723, "grad_norm": 0.1167595311999321, "learning_rate": 4.185454658730609e-05, "loss": 0.0302, "step": 34470 }, { "epoch": 9.323958896700919, "grad_norm": 0.13072258234024048, "learning_rate": 4.1827358138219355e-05, "loss": 0.0299, "step": 34480 }, { "epoch": 9.326663061114116, "grad_norm": 0.17515218257904053, "learning_rate": 4.1800172171765404e-05, "loss": 0.0294, "step": 34490 }, { "epoch": 9.329367225527312, "grad_norm": 0.3249594569206238, "learning_rate": 4.177298869620264e-05, "loss": 0.0321, "step": 34500 }, { "epoch": 9.33207138994051, "grad_norm": 0.1547686904668808, "learning_rate": 4.1745807719788705e-05, "loss": 0.0314, "step": 34510 }, { "epoch": 9.334775554353705, "grad_norm": 0.15639086067676544, "learning_rate": 4.1718629250780445e-05, "loss": 0.0307, "step": 34520 }, { "epoch": 9.3374797187669, "grad_norm": 0.1298055797815323, "learning_rate": 4.1691453297433956e-05, "loss": 0.0311, "step": 34530 }, { "epoch": 9.340183883180098, "grad_norm": 0.10354844480752945, "learning_rate": 4.166427986800457e-05, "loss": 0.0296, "step": 34540 }, { "epoch": 9.342888047593293, "grad_norm": 0.1492297649383545, "learning_rate": 4.163710897074688e-05, "loss": 0.0302, "step": 34550 }, { "epoch": 9.34559221200649, "grad_norm": 0.13216382265090942, "learning_rate": 4.1609940613914686e-05, "loss": 0.0292, "step": 34560 }, { "epoch": 9.348296376419686, "grad_norm": 0.14676910638809204, "learning_rate": 4.1582774805760996e-05, "loss": 0.0315, "step": 34570 }, { "epoch": 9.351000540832883, "grad_norm": 0.12770791351795197, "learning_rate": 4.155561155453809e-05, "loss": 0.0313, "step": 34580 }, { "epoch": 9.353704705246079, "grad_norm": 0.13625212013721466, "learning_rate": 4.15284508684974e-05, "loss": 0.0309, "step": 34590 }, { "epoch": 9.356408869659274, "grad_norm": 0.1729152500629425, "learning_rate": 4.1501292755889675e-05, "loss": 0.0306, "step": 34600 }, { "epoch": 9.359113034072472, "grad_norm": 0.14219477772712708, "learning_rate": 4.1474137224964833e-05, "loss": 0.0303, "step": 34610 }, { "epoch": 9.361817198485667, "grad_norm": 0.16538996994495392, "learning_rate": 4.144698428397197e-05, "loss": 0.0303, "step": 34620 }, { "epoch": 9.364521362898865, "grad_norm": 0.13050267100334167, "learning_rate": 4.1419833941159466e-05, "loss": 0.0307, "step": 34630 }, { "epoch": 9.36722552731206, "grad_norm": 0.13745617866516113, "learning_rate": 4.1392686204774846e-05, "loss": 0.0303, "step": 34640 }, { "epoch": 9.369929691725257, "grad_norm": 0.11005986481904984, "learning_rate": 4.13655410830649e-05, "loss": 0.0299, "step": 34650 }, { "epoch": 9.372633856138453, "grad_norm": 0.16395695507526398, "learning_rate": 4.1338398584275594e-05, "loss": 0.0294, "step": 34660 }, { "epoch": 9.37533802055165, "grad_norm": 0.1701594442129135, "learning_rate": 4.1311258716652104e-05, "loss": 0.0317, "step": 34670 }, { "epoch": 9.378042184964846, "grad_norm": 0.1286555379629135, "learning_rate": 4.128412148843881e-05, "loss": 0.0296, "step": 34680 }, { "epoch": 9.380746349378041, "grad_norm": 0.2607525587081909, "learning_rate": 4.125698690787926e-05, "loss": 0.0302, "step": 34690 }, { "epoch": 9.383450513791239, "grad_norm": 0.16334480047225952, "learning_rate": 4.1229854983216245e-05, "loss": 0.0303, "step": 34700 }, { "epoch": 9.386154678204434, "grad_norm": 0.19357258081436157, "learning_rate": 4.120272572269175e-05, "loss": 0.0305, "step": 34710 }, { "epoch": 9.388858842617632, "grad_norm": 0.15384994447231293, "learning_rate": 4.117559913454687e-05, "loss": 0.0312, "step": 34720 }, { "epoch": 9.391563007030827, "grad_norm": 0.20599398016929626, "learning_rate": 4.114847522702201e-05, "loss": 0.0305, "step": 34730 }, { "epoch": 9.394267171444024, "grad_norm": 0.21984341740608215, "learning_rate": 4.112135400835664e-05, "loss": 0.0302, "step": 34740 }, { "epoch": 9.39697133585722, "grad_norm": 0.17893333733081818, "learning_rate": 4.109423548678949e-05, "loss": 0.03, "step": 34750 }, { "epoch": 9.399675500270416, "grad_norm": 0.17908020317554474, "learning_rate": 4.106711967055848e-05, "loss": 0.0316, "step": 34760 }, { "epoch": 9.402379664683613, "grad_norm": 0.22155295312404633, "learning_rate": 4.1040006567900636e-05, "loss": 0.0306, "step": 34770 }, { "epoch": 9.405083829096808, "grad_norm": 0.1274992823600769, "learning_rate": 4.101289618705224e-05, "loss": 0.0312, "step": 34780 }, { "epoch": 9.407787993510006, "grad_norm": 0.17795909941196442, "learning_rate": 4.0985788536248675e-05, "loss": 0.0305, "step": 34790 }, { "epoch": 9.410492157923201, "grad_norm": 0.13910558819770813, "learning_rate": 4.095868362372454e-05, "loss": 0.0292, "step": 34800 }, { "epoch": 9.413196322336399, "grad_norm": 0.1315208524465561, "learning_rate": 4.0931581457713614e-05, "loss": 0.0306, "step": 34810 }, { "epoch": 9.415900486749594, "grad_norm": 0.12668851017951965, "learning_rate": 4.0904482046448805e-05, "loss": 0.0297, "step": 34820 }, { "epoch": 9.418604651162791, "grad_norm": 0.11683882772922516, "learning_rate": 4.087738539816219e-05, "loss": 0.0302, "step": 34830 }, { "epoch": 9.421308815575987, "grad_norm": 0.11969058960676193, "learning_rate": 4.085029152108501e-05, "loss": 0.0283, "step": 34840 }, { "epoch": 9.424012979989183, "grad_norm": 0.11752182990312576, "learning_rate": 4.0823200423447714e-05, "loss": 0.0306, "step": 34850 }, { "epoch": 9.42671714440238, "grad_norm": 0.1008090078830719, "learning_rate": 4.079611211347981e-05, "loss": 0.0307, "step": 34860 }, { "epoch": 9.429421308815575, "grad_norm": 0.18592889606952667, "learning_rate": 4.076902659941002e-05, "loss": 0.0313, "step": 34870 }, { "epoch": 9.432125473228773, "grad_norm": 0.18185380101203918, "learning_rate": 4.074194388946624e-05, "loss": 0.0305, "step": 34880 }, { "epoch": 9.434829637641968, "grad_norm": 0.2254135012626648, "learning_rate": 4.071486399187545e-05, "loss": 0.029, "step": 34890 }, { "epoch": 9.437533802055166, "grad_norm": 0.1485694944858551, "learning_rate": 4.0687786914863836e-05, "loss": 0.0297, "step": 34900 }, { "epoch": 9.440237966468361, "grad_norm": 0.15014483034610748, "learning_rate": 4.0660712666656666e-05, "loss": 0.0289, "step": 34910 }, { "epoch": 9.442942130881558, "grad_norm": 0.16107387840747833, "learning_rate": 4.0633641255478394e-05, "loss": 0.0304, "step": 34920 }, { "epoch": 9.445646295294754, "grad_norm": 0.10952658951282501, "learning_rate": 4.0606572689552624e-05, "loss": 0.03, "step": 34930 }, { "epoch": 9.44835045970795, "grad_norm": 0.20939280092716217, "learning_rate": 4.0579506977102036e-05, "loss": 0.0311, "step": 34940 }, { "epoch": 9.451054624121147, "grad_norm": 0.13745473325252533, "learning_rate": 4.055244412634849e-05, "loss": 0.03, "step": 34950 }, { "epoch": 9.453758788534342, "grad_norm": 0.16628296673297882, "learning_rate": 4.052538414551298e-05, "loss": 0.0302, "step": 34960 }, { "epoch": 9.45646295294754, "grad_norm": 0.1315547525882721, "learning_rate": 4.0498327042815596e-05, "loss": 0.0308, "step": 34970 }, { "epoch": 9.459167117360735, "grad_norm": 0.12434953451156616, "learning_rate": 4.047127282647559e-05, "loss": 0.0305, "step": 34980 }, { "epoch": 9.461871281773933, "grad_norm": 0.18596956133842468, "learning_rate": 4.04442215047113e-05, "loss": 0.0307, "step": 34990 }, { "epoch": 9.464575446187128, "grad_norm": 0.13355444371700287, "learning_rate": 4.041717308574023e-05, "loss": 0.0306, "step": 35000 }, { "epoch": 9.467279610600325, "grad_norm": 0.11742154508829117, "learning_rate": 4.039012757777893e-05, "loss": 0.0285, "step": 35010 }, { "epoch": 9.469983775013521, "grad_norm": 0.12477119266986847, "learning_rate": 4.036308498904314e-05, "loss": 0.0301, "step": 35020 }, { "epoch": 9.472687939426716, "grad_norm": 0.15195688605308533, "learning_rate": 4.033604532774771e-05, "loss": 0.0292, "step": 35030 }, { "epoch": 9.475392103839914, "grad_norm": 0.16518929600715637, "learning_rate": 4.030900860210652e-05, "loss": 0.0298, "step": 35040 }, { "epoch": 9.47809626825311, "grad_norm": 0.1365971863269806, "learning_rate": 4.028197482033266e-05, "loss": 0.0301, "step": 35050 }, { "epoch": 9.480800432666307, "grad_norm": 0.13055722415447235, "learning_rate": 4.0254943990638246e-05, "loss": 0.029, "step": 35060 }, { "epoch": 9.483504597079502, "grad_norm": 0.12535174190998077, "learning_rate": 4.022791612123454e-05, "loss": 0.0299, "step": 35070 }, { "epoch": 9.4862087614927, "grad_norm": 0.16065526008605957, "learning_rate": 4.020089122033192e-05, "loss": 0.0318, "step": 35080 }, { "epoch": 9.488912925905895, "grad_norm": 0.12793119251728058, "learning_rate": 4.0173869296139795e-05, "loss": 0.0299, "step": 35090 }, { "epoch": 9.49161709031909, "grad_norm": 0.15187573432922363, "learning_rate": 4.014685035686675e-05, "loss": 0.0293, "step": 35100 }, { "epoch": 9.494321254732288, "grad_norm": 0.20739945769309998, "learning_rate": 4.011983441072039e-05, "loss": 0.0323, "step": 35110 }, { "epoch": 9.497025419145483, "grad_norm": 0.16246967017650604, "learning_rate": 4.0092821465907485e-05, "loss": 0.0318, "step": 35120 }, { "epoch": 9.49972958355868, "grad_norm": 0.16493411362171173, "learning_rate": 4.006581153063383e-05, "loss": 0.0304, "step": 35130 }, { "epoch": 9.502433747971876, "grad_norm": 0.21655279397964478, "learning_rate": 4.003880461310432e-05, "loss": 0.0298, "step": 35140 }, { "epoch": 9.505137912385074, "grad_norm": 0.13494160771369934, "learning_rate": 4.001180072152298e-05, "loss": 0.029, "step": 35150 }, { "epoch": 9.50784207679827, "grad_norm": 0.22847987711429596, "learning_rate": 3.998479986409285e-05, "loss": 0.0307, "step": 35160 }, { "epoch": 9.510546241211465, "grad_norm": 0.13591963052749634, "learning_rate": 3.995780204901607e-05, "loss": 0.0291, "step": 35170 }, { "epoch": 9.513250405624662, "grad_norm": 0.21231727302074432, "learning_rate": 3.993080728449391e-05, "loss": 0.0306, "step": 35180 }, { "epoch": 9.515954570037858, "grad_norm": 0.13356250524520874, "learning_rate": 3.990381557872661e-05, "loss": 0.0292, "step": 35190 }, { "epoch": 9.518658734451055, "grad_norm": 0.12346730381250381, "learning_rate": 3.987682693991359e-05, "loss": 0.0303, "step": 35200 }, { "epoch": 9.52136289886425, "grad_norm": 0.12644024193286896, "learning_rate": 3.9849841376253226e-05, "loss": 0.0287, "step": 35210 }, { "epoch": 9.524067063277448, "grad_norm": 0.22898708283901215, "learning_rate": 3.982285889594306e-05, "loss": 0.0313, "step": 35220 }, { "epoch": 9.526771227690643, "grad_norm": 0.27309390902519226, "learning_rate": 3.9795879507179665e-05, "loss": 0.0293, "step": 35230 }, { "epoch": 9.52947539210384, "grad_norm": 0.18115289509296417, "learning_rate": 3.9768903218158634e-05, "loss": 0.0323, "step": 35240 }, { "epoch": 9.532179556517036, "grad_norm": 0.11533223092556, "learning_rate": 3.974193003707468e-05, "loss": 0.0296, "step": 35250 }, { "epoch": 9.534883720930232, "grad_norm": 0.10398174822330475, "learning_rate": 3.971495997212152e-05, "loss": 0.0287, "step": 35260 }, { "epoch": 9.537587885343429, "grad_norm": 0.15092280507087708, "learning_rate": 3.9687993031491985e-05, "loss": 0.0298, "step": 35270 }, { "epoch": 9.540292049756625, "grad_norm": 0.20501747727394104, "learning_rate": 3.966102922337787e-05, "loss": 0.0297, "step": 35280 }, { "epoch": 9.542996214169822, "grad_norm": 0.1068427711725235, "learning_rate": 3.963406855597009e-05, "loss": 0.0291, "step": 35290 }, { "epoch": 9.545700378583017, "grad_norm": 0.11557155102491379, "learning_rate": 3.960711103745861e-05, "loss": 0.0308, "step": 35300 }, { "epoch": 9.548404542996215, "grad_norm": 0.12852269411087036, "learning_rate": 3.958015667603237e-05, "loss": 0.0303, "step": 35310 }, { "epoch": 9.55110870740941, "grad_norm": 0.11343104392290115, "learning_rate": 3.955320547987943e-05, "loss": 0.0297, "step": 35320 }, { "epoch": 9.553812871822608, "grad_norm": 0.1704171895980835, "learning_rate": 3.952625745718681e-05, "loss": 0.03, "step": 35330 }, { "epoch": 9.556517036235803, "grad_norm": 0.2774423658847809, "learning_rate": 3.949931261614064e-05, "loss": 0.0306, "step": 35340 }, { "epoch": 9.559221200648999, "grad_norm": 0.15280485153198242, "learning_rate": 3.947237096492605e-05, "loss": 0.0287, "step": 35350 }, { "epoch": 9.561925365062196, "grad_norm": 0.15195709466934204, "learning_rate": 3.944543251172719e-05, "loss": 0.0312, "step": 35360 }, { "epoch": 9.564629529475392, "grad_norm": 0.1765635460615158, "learning_rate": 3.941849726472725e-05, "loss": 0.0297, "step": 35370 }, { "epoch": 9.567333693888589, "grad_norm": 0.2537723779678345, "learning_rate": 3.939156523210846e-05, "loss": 0.0301, "step": 35380 }, { "epoch": 9.570037858301784, "grad_norm": 0.12974418699741364, "learning_rate": 3.9364636422052046e-05, "loss": 0.0295, "step": 35390 }, { "epoch": 9.572742022714982, "grad_norm": 0.11177525669336319, "learning_rate": 3.933771084273828e-05, "loss": 0.0292, "step": 35400 }, { "epoch": 9.575446187128177, "grad_norm": 0.19396403431892395, "learning_rate": 3.931078850234643e-05, "loss": 0.0292, "step": 35410 }, { "epoch": 9.578150351541375, "grad_norm": 0.14207185804843903, "learning_rate": 3.928386940905483e-05, "loss": 0.0294, "step": 35420 }, { "epoch": 9.58085451595457, "grad_norm": 0.1377444565296173, "learning_rate": 3.925695357104073e-05, "loss": 0.0288, "step": 35430 }, { "epoch": 9.583558680367766, "grad_norm": 0.18450221419334412, "learning_rate": 3.923004099648049e-05, "loss": 0.0295, "step": 35440 }, { "epoch": 9.586262844780963, "grad_norm": 0.1278035044670105, "learning_rate": 3.920313169354944e-05, "loss": 0.0286, "step": 35450 }, { "epoch": 9.588967009194159, "grad_norm": 0.14733745157718658, "learning_rate": 3.9176225670421897e-05, "loss": 0.0298, "step": 35460 }, { "epoch": 9.591671173607356, "grad_norm": 0.1726154237985611, "learning_rate": 3.9149322935271224e-05, "loss": 0.029, "step": 35470 }, { "epoch": 9.594375338020551, "grad_norm": 0.1903313845396042, "learning_rate": 3.9122423496269725e-05, "loss": 0.0302, "step": 35480 }, { "epoch": 9.597079502433749, "grad_norm": 0.15155379474163055, "learning_rate": 3.909552736158877e-05, "loss": 0.0291, "step": 35490 }, { "epoch": 9.599783666846944, "grad_norm": 0.15244628489017487, "learning_rate": 3.90686345393987e-05, "loss": 0.0289, "step": 35500 }, { "epoch": 9.60248783126014, "grad_norm": 0.18469589948654175, "learning_rate": 3.9041745037868816e-05, "loss": 0.0283, "step": 35510 }, { "epoch": 9.605191995673337, "grad_norm": 0.19988186657428741, "learning_rate": 3.9014858865167465e-05, "loss": 0.0288, "step": 35520 }, { "epoch": 9.607896160086533, "grad_norm": 0.2119951993227005, "learning_rate": 3.8987976029461935e-05, "loss": 0.0288, "step": 35530 }, { "epoch": 9.61060032449973, "grad_norm": 0.11542757600545883, "learning_rate": 3.896109653891853e-05, "loss": 0.0302, "step": 35540 }, { "epoch": 9.613304488912926, "grad_norm": 0.1266658753156662, "learning_rate": 3.893422040170254e-05, "loss": 0.03, "step": 35550 }, { "epoch": 9.616008653326123, "grad_norm": 0.19923105835914612, "learning_rate": 3.8907347625978207e-05, "loss": 0.03, "step": 35560 }, { "epoch": 9.618712817739318, "grad_norm": 0.21832461655139923, "learning_rate": 3.88804782199088e-05, "loss": 0.0302, "step": 35570 }, { "epoch": 9.621416982152514, "grad_norm": 0.11735330522060394, "learning_rate": 3.8853612191656495e-05, "loss": 0.0282, "step": 35580 }, { "epoch": 9.624121146565711, "grad_norm": 0.2616835832595825, "learning_rate": 3.88267495493825e-05, "loss": 0.0312, "step": 35590 }, { "epoch": 9.626825310978907, "grad_norm": 0.14649473130702972, "learning_rate": 3.8799890301247004e-05, "loss": 0.0296, "step": 35600 }, { "epoch": 9.629529475392104, "grad_norm": 0.2549547553062439, "learning_rate": 3.8773034455409096e-05, "loss": 0.0286, "step": 35610 }, { "epoch": 9.6322336398053, "grad_norm": 0.19170616567134857, "learning_rate": 3.8746182020026904e-05, "loss": 0.0299, "step": 35620 }, { "epoch": 9.634937804218497, "grad_norm": 0.14691407978534698, "learning_rate": 3.871933300325745e-05, "loss": 0.0303, "step": 35630 }, { "epoch": 9.637641968631693, "grad_norm": 0.14475375413894653, "learning_rate": 3.869248741325679e-05, "loss": 0.0284, "step": 35640 }, { "epoch": 9.64034613304489, "grad_norm": 0.10424117743968964, "learning_rate": 3.866564525817992e-05, "loss": 0.0288, "step": 35650 }, { "epoch": 9.643050297458085, "grad_norm": 0.21868683397769928, "learning_rate": 3.8638806546180725e-05, "loss": 0.0287, "step": 35660 }, { "epoch": 9.645754461871281, "grad_norm": 0.08953484147787094, "learning_rate": 3.861197128541213e-05, "loss": 0.0288, "step": 35670 }, { "epoch": 9.648458626284478, "grad_norm": 0.14494234323501587, "learning_rate": 3.858513948402599e-05, "loss": 0.0293, "step": 35680 }, { "epoch": 9.651162790697674, "grad_norm": 0.1308528631925583, "learning_rate": 3.8558311150173077e-05, "loss": 0.0312, "step": 35690 }, { "epoch": 9.653866955110871, "grad_norm": 0.1554020345211029, "learning_rate": 3.853148629200312e-05, "loss": 0.0304, "step": 35700 }, { "epoch": 9.656571119524067, "grad_norm": 0.15324367582798004, "learning_rate": 3.850466491766482e-05, "loss": 0.0313, "step": 35710 }, { "epoch": 9.659275283937264, "grad_norm": 0.14316284656524658, "learning_rate": 3.847784703530583e-05, "loss": 0.0291, "step": 35720 }, { "epoch": 9.66197944835046, "grad_norm": 0.12874220311641693, "learning_rate": 3.845103265307266e-05, "loss": 0.0294, "step": 35730 }, { "epoch": 9.664683612763657, "grad_norm": 0.10973517596721649, "learning_rate": 3.842422177911086e-05, "loss": 0.0283, "step": 35740 }, { "epoch": 9.667387777176852, "grad_norm": 0.0982057973742485, "learning_rate": 3.8397414421564826e-05, "loss": 0.0288, "step": 35750 }, { "epoch": 9.670091941590048, "grad_norm": 0.23045669496059418, "learning_rate": 3.8370610588577935e-05, "loss": 0.0296, "step": 35760 }, { "epoch": 9.672796106003245, "grad_norm": 0.11014352738857269, "learning_rate": 3.834381028829251e-05, "loss": 0.0291, "step": 35770 }, { "epoch": 9.67550027041644, "grad_norm": 0.13548307120800018, "learning_rate": 3.8317013528849745e-05, "loss": 0.0299, "step": 35780 }, { "epoch": 9.678204434829638, "grad_norm": 0.11580085754394531, "learning_rate": 3.8290220318389815e-05, "loss": 0.0288, "step": 35790 }, { "epoch": 9.680908599242834, "grad_norm": 0.1351076364517212, "learning_rate": 3.8263430665051746e-05, "loss": 0.029, "step": 35800 }, { "epoch": 9.683612763656031, "grad_norm": 0.19005712866783142, "learning_rate": 3.8236644576973554e-05, "loss": 0.0281, "step": 35810 }, { "epoch": 9.686316928069227, "grad_norm": 0.11426740139722824, "learning_rate": 3.820986206229217e-05, "loss": 0.0298, "step": 35820 }, { "epoch": 9.689021092482424, "grad_norm": 0.12103912979364395, "learning_rate": 3.8183083129143384e-05, "loss": 0.0293, "step": 35830 }, { "epoch": 9.69172525689562, "grad_norm": 0.17140288650989532, "learning_rate": 3.815630778566193e-05, "loss": 0.0292, "step": 35840 }, { "epoch": 9.694429421308815, "grad_norm": 0.1348866969347, "learning_rate": 3.812953603998145e-05, "loss": 0.0293, "step": 35850 }, { "epoch": 9.697133585722012, "grad_norm": 0.1777459681034088, "learning_rate": 3.8102767900234504e-05, "loss": 0.0302, "step": 35860 }, { "epoch": 9.699837750135208, "grad_norm": 0.11309327930212021, "learning_rate": 3.807600337455256e-05, "loss": 0.0301, "step": 35870 }, { "epoch": 9.702541914548405, "grad_norm": 0.1773826628923416, "learning_rate": 3.804924247106593e-05, "loss": 0.0286, "step": 35880 }, { "epoch": 9.7052460789616, "grad_norm": 0.2518218457698822, "learning_rate": 3.8022485197903925e-05, "loss": 0.0293, "step": 35890 }, { "epoch": 9.707950243374798, "grad_norm": 0.17197923362255096, "learning_rate": 3.799573156319464e-05, "loss": 0.0287, "step": 35900 }, { "epoch": 9.710654407787993, "grad_norm": 0.1890021562576294, "learning_rate": 3.796898157506515e-05, "loss": 0.029, "step": 35910 }, { "epoch": 9.713358572201189, "grad_norm": 0.15981872379779816, "learning_rate": 3.794223524164143e-05, "loss": 0.0295, "step": 35920 }, { "epoch": 9.716062736614386, "grad_norm": 0.13983507454395294, "learning_rate": 3.7915492571048245e-05, "loss": 0.0295, "step": 35930 }, { "epoch": 9.718766901027582, "grad_norm": 0.16024383902549744, "learning_rate": 3.788875357140937e-05, "loss": 0.0306, "step": 35940 }, { "epoch": 9.72147106544078, "grad_norm": 0.12541721761226654, "learning_rate": 3.786201825084736e-05, "loss": 0.0292, "step": 35950 }, { "epoch": 9.724175229853975, "grad_norm": 0.14723335206508636, "learning_rate": 3.783528661748372e-05, "loss": 0.0286, "step": 35960 }, { "epoch": 9.726879394267172, "grad_norm": 0.10545969009399414, "learning_rate": 3.780855867943882e-05, "loss": 0.0291, "step": 35970 }, { "epoch": 9.729583558680368, "grad_norm": 0.1597718596458435, "learning_rate": 3.778183444483189e-05, "loss": 0.031, "step": 35980 }, { "epoch": 9.732287723093563, "grad_norm": 0.21500633656978607, "learning_rate": 3.775511392178108e-05, "loss": 0.0305, "step": 35990 }, { "epoch": 9.73499188750676, "grad_norm": 0.1087627187371254, "learning_rate": 3.772839711840332e-05, "loss": 0.0302, "step": 36000 }, { "epoch": 9.737696051919956, "grad_norm": 0.12332139909267426, "learning_rate": 3.7701684042814515e-05, "loss": 0.0315, "step": 36010 }, { "epoch": 9.740400216333153, "grad_norm": 0.16188636422157288, "learning_rate": 3.76749747031294e-05, "loss": 0.0284, "step": 36020 }, { "epoch": 9.743104380746349, "grad_norm": 0.16056503355503082, "learning_rate": 3.764826910746152e-05, "loss": 0.0298, "step": 36030 }, { "epoch": 9.745808545159546, "grad_norm": 0.15271101891994476, "learning_rate": 3.762156726392338e-05, "loss": 0.03, "step": 36040 }, { "epoch": 9.748512709572742, "grad_norm": 0.12699268758296967, "learning_rate": 3.759486918062625e-05, "loss": 0.0294, "step": 36050 }, { "epoch": 9.751216873985939, "grad_norm": 0.11888224631547928, "learning_rate": 3.756817486568033e-05, "loss": 0.0303, "step": 36060 }, { "epoch": 9.753921038399135, "grad_norm": 0.1283298134803772, "learning_rate": 3.7541484327194654e-05, "loss": 0.0284, "step": 36070 }, { "epoch": 9.75662520281233, "grad_norm": 0.12137361615896225, "learning_rate": 3.751479757327707e-05, "loss": 0.0292, "step": 36080 }, { "epoch": 9.759329367225527, "grad_norm": 0.22637644410133362, "learning_rate": 3.7488114612034345e-05, "loss": 0.0291, "step": 36090 }, { "epoch": 9.762033531638723, "grad_norm": 0.12586621940135956, "learning_rate": 3.7461435451572044e-05, "loss": 0.0292, "step": 36100 }, { "epoch": 9.76473769605192, "grad_norm": 0.1887347251176834, "learning_rate": 3.743476009999459e-05, "loss": 0.0291, "step": 36110 }, { "epoch": 9.767441860465116, "grad_norm": 0.19008924067020416, "learning_rate": 3.7408088565405245e-05, "loss": 0.0302, "step": 36120 }, { "epoch": 9.770146024878313, "grad_norm": 0.21723046898841858, "learning_rate": 3.738142085590612e-05, "loss": 0.0302, "step": 36130 }, { "epoch": 9.772850189291509, "grad_norm": 0.16817642748355865, "learning_rate": 3.7354756979598194e-05, "loss": 0.0285, "step": 36140 }, { "epoch": 9.775554353704706, "grad_norm": 0.17592507600784302, "learning_rate": 3.7328096944581187e-05, "loss": 0.0281, "step": 36150 }, { "epoch": 9.778258518117902, "grad_norm": 0.14833039045333862, "learning_rate": 3.730144075895377e-05, "loss": 0.029, "step": 36160 }, { "epoch": 9.780962682531097, "grad_norm": 0.12223771959543228, "learning_rate": 3.727478843081335e-05, "loss": 0.0299, "step": 36170 }, { "epoch": 9.783666846944294, "grad_norm": 0.12821544706821442, "learning_rate": 3.72481399682562e-05, "loss": 0.0277, "step": 36180 }, { "epoch": 9.78637101135749, "grad_norm": 0.13969773054122925, "learning_rate": 3.722149537937747e-05, "loss": 0.0304, "step": 36190 }, { "epoch": 9.789075175770687, "grad_norm": 0.2490498125553131, "learning_rate": 3.7194854672271015e-05, "loss": 0.0301, "step": 36200 }, { "epoch": 9.791779340183883, "grad_norm": 0.22939227521419525, "learning_rate": 3.7168217855029644e-05, "loss": 0.0302, "step": 36210 }, { "epoch": 9.79448350459708, "grad_norm": 0.2732827663421631, "learning_rate": 3.7141584935744856e-05, "loss": 0.0303, "step": 36220 }, { "epoch": 9.797187669010276, "grad_norm": 0.163569837808609, "learning_rate": 3.7114955922507055e-05, "loss": 0.0303, "step": 36230 }, { "epoch": 9.799891833423473, "grad_norm": 0.10201866924762726, "learning_rate": 3.708833082340545e-05, "loss": 0.029, "step": 36240 }, { "epoch": 9.802595997836669, "grad_norm": 0.14981317520141602, "learning_rate": 3.7061709646528034e-05, "loss": 0.0296, "step": 36250 }, { "epoch": 9.805300162249864, "grad_norm": 0.12390007078647614, "learning_rate": 3.7035092399961604e-05, "loss": 0.0294, "step": 36260 }, { "epoch": 9.808004326663061, "grad_norm": 0.14339470863342285, "learning_rate": 3.700847909179177e-05, "loss": 0.029, "step": 36270 }, { "epoch": 9.810708491076257, "grad_norm": 0.13996219635009766, "learning_rate": 3.698186973010297e-05, "loss": 0.0293, "step": 36280 }, { "epoch": 9.813412655489454, "grad_norm": 0.10782179236412048, "learning_rate": 3.695526432297844e-05, "loss": 0.0286, "step": 36290 }, { "epoch": 9.81611681990265, "grad_norm": 0.14468687772750854, "learning_rate": 3.692866287850017e-05, "loss": 0.0301, "step": 36300 }, { "epoch": 9.818820984315847, "grad_norm": 0.22895914316177368, "learning_rate": 3.6902065404749006e-05, "loss": 0.0282, "step": 36310 }, { "epoch": 9.821525148729043, "grad_norm": 0.16409599781036377, "learning_rate": 3.6875471909804516e-05, "loss": 0.0296, "step": 36320 }, { "epoch": 9.824229313142238, "grad_norm": 0.18890048563480377, "learning_rate": 3.6848882401745135e-05, "loss": 0.03, "step": 36330 }, { "epoch": 9.826933477555436, "grad_norm": 0.13683462142944336, "learning_rate": 3.682229688864806e-05, "loss": 0.0296, "step": 36340 }, { "epoch": 9.829637641968631, "grad_norm": 0.1896040439605713, "learning_rate": 3.6795715378589235e-05, "loss": 0.0291, "step": 36350 }, { "epoch": 9.832341806381828, "grad_norm": 0.14093652367591858, "learning_rate": 3.676913787964345e-05, "loss": 0.0287, "step": 36360 }, { "epoch": 9.835045970795024, "grad_norm": 0.12940463423728943, "learning_rate": 3.674256439988423e-05, "loss": 0.0288, "step": 36370 }, { "epoch": 9.837750135208221, "grad_norm": 0.13094480335712433, "learning_rate": 3.6715994947383904e-05, "loss": 0.0294, "step": 36380 }, { "epoch": 9.840454299621417, "grad_norm": 0.32161304354667664, "learning_rate": 3.668942953021357e-05, "loss": 0.0299, "step": 36390 }, { "epoch": 9.843158464034612, "grad_norm": 0.1379026472568512, "learning_rate": 3.66628681564431e-05, "loss": 0.0305, "step": 36400 }, { "epoch": 9.84586262844781, "grad_norm": 0.20635899901390076, "learning_rate": 3.663631083414114e-05, "loss": 0.0283, "step": 36410 }, { "epoch": 9.848566792861005, "grad_norm": 0.2292259931564331, "learning_rate": 3.660975757137509e-05, "loss": 0.0287, "step": 36420 }, { "epoch": 9.851270957274203, "grad_norm": 0.198871448636055, "learning_rate": 3.658320837621114e-05, "loss": 0.0289, "step": 36430 }, { "epoch": 9.853975121687398, "grad_norm": 0.142948180437088, "learning_rate": 3.655666325671426e-05, "loss": 0.0288, "step": 36440 }, { "epoch": 9.856679286100595, "grad_norm": 0.13592270016670227, "learning_rate": 3.65301222209481e-05, "loss": 0.0291, "step": 36450 }, { "epoch": 9.859383450513791, "grad_norm": 0.2303406149148941, "learning_rate": 3.650358527697519e-05, "loss": 0.0299, "step": 36460 }, { "epoch": 9.862087614926988, "grad_norm": 0.1544090211391449, "learning_rate": 3.64770524328567e-05, "loss": 0.0288, "step": 36470 }, { "epoch": 9.864791779340184, "grad_norm": 0.1835329532623291, "learning_rate": 3.645052369665265e-05, "loss": 0.0297, "step": 36480 }, { "epoch": 9.86749594375338, "grad_norm": 0.15944133698940277, "learning_rate": 3.6423999076421724e-05, "loss": 0.0292, "step": 36490 }, { "epoch": 9.870200108166577, "grad_norm": 0.11987606436014175, "learning_rate": 3.639747858022142e-05, "loss": 0.0286, "step": 36500 }, { "epoch": 9.872904272579772, "grad_norm": 0.12175039201974869, "learning_rate": 3.637096221610799e-05, "loss": 0.0287, "step": 36510 }, { "epoch": 9.87560843699297, "grad_norm": 0.15693698823451996, "learning_rate": 3.634444999213638e-05, "loss": 0.0299, "step": 36520 }, { "epoch": 9.878312601406165, "grad_norm": 0.13318344950675964, "learning_rate": 3.6317941916360296e-05, "loss": 0.0295, "step": 36530 }, { "epoch": 9.881016765819362, "grad_norm": 0.2186213880777359, "learning_rate": 3.629143799683221e-05, "loss": 0.0296, "step": 36540 }, { "epoch": 9.883720930232558, "grad_norm": 0.14056894183158875, "learning_rate": 3.626493824160331e-05, "loss": 0.0289, "step": 36550 }, { "epoch": 9.886425094645755, "grad_norm": 0.16478018462657928, "learning_rate": 3.623844265872352e-05, "loss": 0.031, "step": 36560 }, { "epoch": 9.88912925905895, "grad_norm": 0.1343480348587036, "learning_rate": 3.621195125624149e-05, "loss": 0.0296, "step": 36570 }, { "epoch": 9.891833423472146, "grad_norm": 0.13479742407798767, "learning_rate": 3.618546404220463e-05, "loss": 0.0291, "step": 36580 }, { "epoch": 9.894537587885344, "grad_norm": 0.19858166575431824, "learning_rate": 3.615898102465903e-05, "loss": 0.0291, "step": 36590 }, { "epoch": 9.89724175229854, "grad_norm": 0.16330470144748688, "learning_rate": 3.6132502211649544e-05, "loss": 0.0295, "step": 36600 }, { "epoch": 9.899945916711737, "grad_norm": 0.2175230085849762, "learning_rate": 3.610602761121975e-05, "loss": 0.0298, "step": 36610 }, { "epoch": 9.902650081124932, "grad_norm": 0.27148982882499695, "learning_rate": 3.6079557231411897e-05, "loss": 0.0287, "step": 36620 }, { "epoch": 9.90535424553813, "grad_norm": 0.17846663296222687, "learning_rate": 3.6053091080267035e-05, "loss": 0.0283, "step": 36630 }, { "epoch": 9.908058409951325, "grad_norm": 0.18602699041366577, "learning_rate": 3.602662916582483e-05, "loss": 0.0303, "step": 36640 }, { "epoch": 9.910762574364522, "grad_norm": 0.1938813179731369, "learning_rate": 3.600017149612375e-05, "loss": 0.0279, "step": 36650 }, { "epoch": 9.913466738777718, "grad_norm": 0.14578332006931305, "learning_rate": 3.5973718079200935e-05, "loss": 0.0287, "step": 36660 }, { "epoch": 9.916170903190913, "grad_norm": 0.15460915863513947, "learning_rate": 3.5947268923092216e-05, "loss": 0.0283, "step": 36670 }, { "epoch": 9.91887506760411, "grad_norm": 0.1271524429321289, "learning_rate": 3.592082403583216e-05, "loss": 0.0296, "step": 36680 }, { "epoch": 9.921579232017306, "grad_norm": 0.22517919540405273, "learning_rate": 3.5894383425454004e-05, "loss": 0.0295, "step": 36690 }, { "epoch": 9.924283396430504, "grad_norm": 0.11010700464248657, "learning_rate": 3.586794709998975e-05, "loss": 0.0298, "step": 36700 }, { "epoch": 9.926987560843699, "grad_norm": 0.13139335811138153, "learning_rate": 3.584151506747002e-05, "loss": 0.0289, "step": 36710 }, { "epoch": 9.929691725256896, "grad_norm": 0.16595177352428436, "learning_rate": 3.581508733592418e-05, "loss": 0.0298, "step": 36720 }, { "epoch": 9.932395889670092, "grad_norm": 0.1211567372083664, "learning_rate": 3.5788663913380297e-05, "loss": 0.0292, "step": 36730 }, { "epoch": 9.935100054083287, "grad_norm": 0.1473652422428131, "learning_rate": 3.576224480786506e-05, "loss": 0.0291, "step": 36740 }, { "epoch": 9.937804218496485, "grad_norm": 0.28483039140701294, "learning_rate": 3.573583002740393e-05, "loss": 0.0292, "step": 36750 }, { "epoch": 9.94050838290968, "grad_norm": 0.25754833221435547, "learning_rate": 3.570941958002103e-05, "loss": 0.0301, "step": 36760 }, { "epoch": 9.943212547322878, "grad_norm": 0.1499119997024536, "learning_rate": 3.568301347373912e-05, "loss": 0.0294, "step": 36770 }, { "epoch": 9.945916711736073, "grad_norm": 0.2351083755493164, "learning_rate": 3.5656611716579726e-05, "loss": 0.0287, "step": 36780 }, { "epoch": 9.94862087614927, "grad_norm": 0.1939598172903061, "learning_rate": 3.5630214316562946e-05, "loss": 0.0296, "step": 36790 }, { "epoch": 9.951325040562466, "grad_norm": 0.15480078756809235, "learning_rate": 3.560382128170766e-05, "loss": 0.03, "step": 36800 }, { "epoch": 9.954029204975663, "grad_norm": 0.13159051537513733, "learning_rate": 3.5577432620031374e-05, "loss": 0.0287, "step": 36810 }, { "epoch": 9.956733369388859, "grad_norm": 0.17252035439014435, "learning_rate": 3.5551048339550216e-05, "loss": 0.0301, "step": 36820 }, { "epoch": 9.959437533802054, "grad_norm": 0.1481553614139557, "learning_rate": 3.55246684482791e-05, "loss": 0.029, "step": 36830 }, { "epoch": 9.962141698215252, "grad_norm": 0.1509542614221573, "learning_rate": 3.5498292954231496e-05, "loss": 0.0295, "step": 36840 }, { "epoch": 9.964845862628447, "grad_norm": 0.1614450216293335, "learning_rate": 3.54719218654196e-05, "loss": 0.0285, "step": 36850 }, { "epoch": 9.967550027041645, "grad_norm": 0.12432575225830078, "learning_rate": 3.544555518985425e-05, "loss": 0.0281, "step": 36860 }, { "epoch": 9.97025419145484, "grad_norm": 0.20705436170101166, "learning_rate": 3.541919293554494e-05, "loss": 0.0292, "step": 36870 }, { "epoch": 9.972958355868037, "grad_norm": 0.14758020639419556, "learning_rate": 3.539283511049985e-05, "loss": 0.0276, "step": 36880 }, { "epoch": 9.975662520281233, "grad_norm": 0.1493089348077774, "learning_rate": 3.5366481722725755e-05, "loss": 0.0291, "step": 36890 }, { "epoch": 9.978366684694429, "grad_norm": 0.12846578657627106, "learning_rate": 3.534013278022816e-05, "loss": 0.0292, "step": 36900 }, { "epoch": 9.981070849107626, "grad_norm": 0.18934936821460724, "learning_rate": 3.531378829101113e-05, "loss": 0.0278, "step": 36910 }, { "epoch": 9.983775013520821, "grad_norm": 0.1975332349538803, "learning_rate": 3.528744826307746e-05, "loss": 0.0299, "step": 36920 }, { "epoch": 9.986479177934019, "grad_norm": 0.14882326126098633, "learning_rate": 3.5261112704428554e-05, "loss": 0.0282, "step": 36930 }, { "epoch": 9.989183342347214, "grad_norm": 0.12490471452474594, "learning_rate": 3.523478162306443e-05, "loss": 0.0281, "step": 36940 }, { "epoch": 9.991887506760412, "grad_norm": 0.1611798107624054, "learning_rate": 3.520845502698381e-05, "loss": 0.03, "step": 36950 }, { "epoch": 9.994591671173607, "grad_norm": 0.12321146577596664, "learning_rate": 3.5182132924184005e-05, "loss": 0.0282, "step": 36960 }, { "epoch": 9.997295835586804, "grad_norm": 0.11173451691865921, "learning_rate": 3.5155815322660966e-05, "loss": 0.0275, "step": 36970 }, { "epoch": 10.0, "grad_norm": 0.1361643224954605, "learning_rate": 3.512950223040931e-05, "loss": 0.0286, "step": 36980 }, { "epoch": 10.002704164413196, "grad_norm": 0.14149104058742523, "learning_rate": 3.5103193655422216e-05, "loss": 0.0288, "step": 36990 }, { "epoch": 10.005408328826393, "grad_norm": 0.1744523048400879, "learning_rate": 3.5076889605691596e-05, "loss": 0.0288, "step": 37000 }, { "epoch": 10.008112493239588, "grad_norm": 0.15183719992637634, "learning_rate": 3.505059008920787e-05, "loss": 0.0297, "step": 37010 }, { "epoch": 10.010816657652786, "grad_norm": 0.17183518409729004, "learning_rate": 3.502429511396016e-05, "loss": 0.0284, "step": 37020 }, { "epoch": 10.013520822065981, "grad_norm": 0.15558165311813354, "learning_rate": 3.4998004687936196e-05, "loss": 0.0297, "step": 37030 }, { "epoch": 10.016224986479179, "grad_norm": 0.12205963581800461, "learning_rate": 3.497171881912229e-05, "loss": 0.0294, "step": 37040 }, { "epoch": 10.018929150892374, "grad_norm": 0.1412138193845749, "learning_rate": 3.494543751550342e-05, "loss": 0.0281, "step": 37050 }, { "epoch": 10.021633315305571, "grad_norm": 0.11109301447868347, "learning_rate": 3.491916078506313e-05, "loss": 0.0294, "step": 37060 }, { "epoch": 10.024337479718767, "grad_norm": 0.18778853118419647, "learning_rate": 3.489288863578361e-05, "loss": 0.0282, "step": 37070 }, { "epoch": 10.027041644131963, "grad_norm": 0.24978429079055786, "learning_rate": 3.4866621075645646e-05, "loss": 0.03, "step": 37080 }, { "epoch": 10.02974580854516, "grad_norm": 0.15829023718833923, "learning_rate": 3.4840358112628614e-05, "loss": 0.0285, "step": 37090 }, { "epoch": 10.032449972958355, "grad_norm": 0.12957856059074402, "learning_rate": 3.481409975471053e-05, "loss": 0.0289, "step": 37100 }, { "epoch": 10.035154137371553, "grad_norm": 0.1286400556564331, "learning_rate": 3.4787846009867986e-05, "loss": 0.0277, "step": 37110 }, { "epoch": 10.037858301784748, "grad_norm": 0.16698311269283295, "learning_rate": 3.476159688607615e-05, "loss": 0.0298, "step": 37120 }, { "epoch": 10.040562466197946, "grad_norm": 0.11434360593557358, "learning_rate": 3.4735352391308854e-05, "loss": 0.0275, "step": 37130 }, { "epoch": 10.043266630611141, "grad_norm": 0.12273135036230087, "learning_rate": 3.4709112533538446e-05, "loss": 0.0297, "step": 37140 }, { "epoch": 10.045970795024337, "grad_norm": 0.15264013409614563, "learning_rate": 3.4682877320735934e-05, "loss": 0.0282, "step": 37150 }, { "epoch": 10.048674959437534, "grad_norm": 0.20238281786441803, "learning_rate": 3.465664676087085e-05, "loss": 0.0282, "step": 37160 }, { "epoch": 10.05137912385073, "grad_norm": 0.11153608560562134, "learning_rate": 3.463042086191136e-05, "loss": 0.0294, "step": 37170 }, { "epoch": 10.054083288263927, "grad_norm": 0.2070043832063675, "learning_rate": 3.460419963182423e-05, "loss": 0.0292, "step": 37180 }, { "epoch": 10.056787452677122, "grad_norm": 0.15780499577522278, "learning_rate": 3.457798307857473e-05, "loss": 0.0283, "step": 37190 }, { "epoch": 10.05949161709032, "grad_norm": 0.16070741415023804, "learning_rate": 3.455177121012678e-05, "loss": 0.029, "step": 37200 }, { "epoch": 10.062195781503515, "grad_norm": 0.13152803480625153, "learning_rate": 3.452556403444285e-05, "loss": 0.0296, "step": 37210 }, { "epoch": 10.064899945916713, "grad_norm": 0.22501859068870544, "learning_rate": 3.449936155948397e-05, "loss": 0.0279, "step": 37220 }, { "epoch": 10.067604110329908, "grad_norm": 0.14871147274971008, "learning_rate": 3.44731637932098e-05, "loss": 0.0283, "step": 37230 }, { "epoch": 10.070308274743104, "grad_norm": 0.14408038556575775, "learning_rate": 3.44469707435785e-05, "loss": 0.0282, "step": 37240 }, { "epoch": 10.073012439156301, "grad_norm": 0.14285610616207123, "learning_rate": 3.4420782418546835e-05, "loss": 0.0284, "step": 37250 }, { "epoch": 10.075716603569496, "grad_norm": 0.12057728320360184, "learning_rate": 3.439459882607012e-05, "loss": 0.0304, "step": 37260 }, { "epoch": 10.078420767982694, "grad_norm": 0.12367226183414459, "learning_rate": 3.436841997410225e-05, "loss": 0.029, "step": 37270 }, { "epoch": 10.08112493239589, "grad_norm": 0.14250439405441284, "learning_rate": 3.434224587059567e-05, "loss": 0.0297, "step": 37280 }, { "epoch": 10.083829096809087, "grad_norm": 0.19760096073150635, "learning_rate": 3.431607652350136e-05, "loss": 0.0284, "step": 37290 }, { "epoch": 10.086533261222282, "grad_norm": 0.1983530968427658, "learning_rate": 3.428991194076891e-05, "loss": 0.0305, "step": 37300 }, { "epoch": 10.089237425635478, "grad_norm": 0.23957708477973938, "learning_rate": 3.4263752130346394e-05, "loss": 0.028, "step": 37310 }, { "epoch": 10.091941590048675, "grad_norm": 0.15217310190200806, "learning_rate": 3.4237597100180515e-05, "loss": 0.0275, "step": 37320 }, { "epoch": 10.09464575446187, "grad_norm": 0.14177747070789337, "learning_rate": 3.4211446858216427e-05, "loss": 0.0297, "step": 37330 }, { "epoch": 10.097349918875068, "grad_norm": 0.1716204583644867, "learning_rate": 3.4185301412397915e-05, "loss": 0.0298, "step": 37340 }, { "epoch": 10.100054083288263, "grad_norm": 0.14793671667575836, "learning_rate": 3.415916077066729e-05, "loss": 0.0289, "step": 37350 }, { "epoch": 10.10275824770146, "grad_norm": 0.12957635521888733, "learning_rate": 3.413302494096535e-05, "loss": 0.0291, "step": 37360 }, { "epoch": 10.105462412114656, "grad_norm": 0.1156509667634964, "learning_rate": 3.410689393123151e-05, "loss": 0.0295, "step": 37370 }, { "epoch": 10.108166576527854, "grad_norm": 0.12253998965024948, "learning_rate": 3.408076774940364e-05, "loss": 0.0274, "step": 37380 }, { "epoch": 10.11087074094105, "grad_norm": 0.1229424774646759, "learning_rate": 3.40546464034182e-05, "loss": 0.0291, "step": 37390 }, { "epoch": 10.113574905354245, "grad_norm": 0.10487870126962662, "learning_rate": 3.4028529901210185e-05, "loss": 0.0291, "step": 37400 }, { "epoch": 10.116279069767442, "grad_norm": 0.13452881574630737, "learning_rate": 3.4002418250713086e-05, "loss": 0.0274, "step": 37410 }, { "epoch": 10.118983234180638, "grad_norm": 0.13334931433200836, "learning_rate": 3.3976311459858936e-05, "loss": 0.0293, "step": 37420 }, { "epoch": 10.121687398593835, "grad_norm": 0.1929878294467926, "learning_rate": 3.395020953657826e-05, "loss": 0.0278, "step": 37430 }, { "epoch": 10.12439156300703, "grad_norm": 0.08674418926239014, "learning_rate": 3.3924112488800165e-05, "loss": 0.0277, "step": 37440 }, { "epoch": 10.127095727420228, "grad_norm": 0.12542378902435303, "learning_rate": 3.389802032445225e-05, "loss": 0.0291, "step": 37450 }, { "epoch": 10.129799891833423, "grad_norm": 0.10887093842029572, "learning_rate": 3.38719330514606e-05, "loss": 0.0298, "step": 37460 }, { "epoch": 10.13250405624662, "grad_norm": 0.1462523490190506, "learning_rate": 3.3845850677749866e-05, "loss": 0.0287, "step": 37470 }, { "epoch": 10.135208220659816, "grad_norm": 0.15423545241355896, "learning_rate": 3.3819773211243157e-05, "loss": 0.0269, "step": 37480 }, { "epoch": 10.137912385073012, "grad_norm": 0.1741744577884674, "learning_rate": 3.379370065986213e-05, "loss": 0.0291, "step": 37490 }, { "epoch": 10.140616549486209, "grad_norm": 0.13760483264923096, "learning_rate": 3.3767633031526955e-05, "loss": 0.0291, "step": 37500 }, { "epoch": 10.143320713899405, "grad_norm": 0.2001723349094391, "learning_rate": 3.374157033415626e-05, "loss": 0.0281, "step": 37510 }, { "epoch": 10.146024878312602, "grad_norm": 0.11163949966430664, "learning_rate": 3.371551257566723e-05, "loss": 0.0286, "step": 37520 }, { "epoch": 10.148729042725797, "grad_norm": 0.194017231464386, "learning_rate": 3.36894597639755e-05, "loss": 0.028, "step": 37530 }, { "epoch": 10.151433207138995, "grad_norm": 0.0898120254278183, "learning_rate": 3.366341190699523e-05, "loss": 0.028, "step": 37540 }, { "epoch": 10.15413737155219, "grad_norm": 0.16747719049453735, "learning_rate": 3.36373690126391e-05, "loss": 0.0293, "step": 37550 }, { "epoch": 10.156841535965386, "grad_norm": 0.1683325320482254, "learning_rate": 3.3611331088818234e-05, "loss": 0.0298, "step": 37560 }, { "epoch": 10.159545700378583, "grad_norm": 0.13794349133968353, "learning_rate": 3.3585298143442265e-05, "loss": 0.0271, "step": 37570 }, { "epoch": 10.162249864791779, "grad_norm": 0.1372705101966858, "learning_rate": 3.35592701844193e-05, "loss": 0.0282, "step": 37580 }, { "epoch": 10.164954029204976, "grad_norm": 0.1306617110967636, "learning_rate": 3.353324721965596e-05, "loss": 0.0279, "step": 37590 }, { "epoch": 10.167658193618172, "grad_norm": 0.16489878296852112, "learning_rate": 3.350722925705736e-05, "loss": 0.0277, "step": 37600 }, { "epoch": 10.170362358031369, "grad_norm": 0.10386840254068375, "learning_rate": 3.348121630452703e-05, "loss": 0.0294, "step": 37610 }, { "epoch": 10.173066522444564, "grad_norm": 0.12696392834186554, "learning_rate": 3.3455208369967044e-05, "loss": 0.0284, "step": 37620 }, { "epoch": 10.175770686857762, "grad_norm": 0.1884358823299408, "learning_rate": 3.34292054612779e-05, "loss": 0.0284, "step": 37630 }, { "epoch": 10.178474851270957, "grad_norm": 0.1723966747522354, "learning_rate": 3.340320758635861e-05, "loss": 0.0274, "step": 37640 }, { "epoch": 10.181179015684153, "grad_norm": 0.16198840737342834, "learning_rate": 3.337721475310666e-05, "loss": 0.0288, "step": 37650 }, { "epoch": 10.18388318009735, "grad_norm": 0.11868756264448166, "learning_rate": 3.335122696941795e-05, "loss": 0.0296, "step": 37660 }, { "epoch": 10.186587344510546, "grad_norm": 0.13477137684822083, "learning_rate": 3.332524424318692e-05, "loss": 0.0276, "step": 37670 }, { "epoch": 10.189291508923743, "grad_norm": 0.18302211165428162, "learning_rate": 3.32992665823064e-05, "loss": 0.0282, "step": 37680 }, { "epoch": 10.191995673336939, "grad_norm": 0.15192116796970367, "learning_rate": 3.327329399466774e-05, "loss": 0.0286, "step": 37690 }, { "epoch": 10.194699837750136, "grad_norm": 0.18514761328697205, "learning_rate": 3.324732648816072e-05, "loss": 0.029, "step": 37700 }, { "epoch": 10.197404002163331, "grad_norm": 0.1434185653924942, "learning_rate": 3.322136407067358e-05, "loss": 0.029, "step": 37710 }, { "epoch": 10.200108166576527, "grad_norm": 0.16809603571891785, "learning_rate": 3.3195406750093036e-05, "loss": 0.0286, "step": 37720 }, { "epoch": 10.202812330989724, "grad_norm": 0.30654391646385193, "learning_rate": 3.3169454534304205e-05, "loss": 0.029, "step": 37730 }, { "epoch": 10.20551649540292, "grad_norm": 0.16192607581615448, "learning_rate": 3.3143507431190725e-05, "loss": 0.0286, "step": 37740 }, { "epoch": 10.208220659816117, "grad_norm": 0.2583408057689667, "learning_rate": 3.311756544863459e-05, "loss": 0.03, "step": 37750 }, { "epoch": 10.210924824229313, "grad_norm": 0.22876133024692535, "learning_rate": 3.309162859451633e-05, "loss": 0.0282, "step": 37760 }, { "epoch": 10.21362898864251, "grad_norm": 0.17862579226493835, "learning_rate": 3.306569687671487e-05, "loss": 0.0297, "step": 37770 }, { "epoch": 10.216333153055706, "grad_norm": 0.1997608244419098, "learning_rate": 3.303977030310756e-05, "loss": 0.0274, "step": 37780 }, { "epoch": 10.219037317468903, "grad_norm": 0.15179578959941864, "learning_rate": 3.3013848881570245e-05, "loss": 0.0293, "step": 37790 }, { "epoch": 10.221741481882098, "grad_norm": 0.10437720268964767, "learning_rate": 3.298793261997712e-05, "loss": 0.0272, "step": 37800 }, { "epoch": 10.224445646295294, "grad_norm": 0.1530754715204239, "learning_rate": 3.2962021526200893e-05, "loss": 0.0296, "step": 37810 }, { "epoch": 10.227149810708491, "grad_norm": 0.18329757452011108, "learning_rate": 3.293611560811268e-05, "loss": 0.0282, "step": 37820 }, { "epoch": 10.229853975121687, "grad_norm": 0.17405183613300323, "learning_rate": 3.291021487358199e-05, "loss": 0.0292, "step": 37830 }, { "epoch": 10.232558139534884, "grad_norm": 0.15190371870994568, "learning_rate": 3.28843193304768e-05, "loss": 0.0296, "step": 37840 }, { "epoch": 10.23526230394808, "grad_norm": 0.12600718438625336, "learning_rate": 3.2858428986663456e-05, "loss": 0.0283, "step": 37850 }, { "epoch": 10.237966468361277, "grad_norm": 0.3466797471046448, "learning_rate": 3.283254385000681e-05, "loss": 0.0277, "step": 37860 }, { "epoch": 10.240670632774473, "grad_norm": 0.11446448415517807, "learning_rate": 3.2806663928370076e-05, "loss": 0.0283, "step": 37870 }, { "epoch": 10.24337479718767, "grad_norm": 0.16074608266353607, "learning_rate": 3.278078922961485e-05, "loss": 0.0285, "step": 37880 }, { "epoch": 10.246078961600865, "grad_norm": 0.15392433106899261, "learning_rate": 3.275491976160123e-05, "loss": 0.0289, "step": 37890 }, { "epoch": 10.248783126014061, "grad_norm": 0.13693514466285706, "learning_rate": 3.2729055532187645e-05, "loss": 0.0281, "step": 37900 }, { "epoch": 10.251487290427258, "grad_norm": 0.1316356658935547, "learning_rate": 3.270319654923097e-05, "loss": 0.0294, "step": 37910 }, { "epoch": 10.254191454840454, "grad_norm": 0.10231151431798935, "learning_rate": 3.2677342820586506e-05, "loss": 0.0288, "step": 37920 }, { "epoch": 10.256895619253651, "grad_norm": 0.1228865385055542, "learning_rate": 3.2651494354107905e-05, "loss": 0.0287, "step": 37930 }, { "epoch": 10.259599783666847, "grad_norm": 0.1918238401412964, "learning_rate": 3.2625651157647266e-05, "loss": 0.0287, "step": 37940 }, { "epoch": 10.262303948080044, "grad_norm": 0.12834948301315308, "learning_rate": 3.259981323905505e-05, "loss": 0.0279, "step": 37950 }, { "epoch": 10.26500811249324, "grad_norm": 0.15314233303070068, "learning_rate": 3.257398060618014e-05, "loss": 0.0282, "step": 37960 }, { "epoch": 10.267712276906435, "grad_norm": 0.19029493629932404, "learning_rate": 3.254815326686983e-05, "loss": 0.0287, "step": 37970 }, { "epoch": 10.270416441319632, "grad_norm": 0.12650127708911896, "learning_rate": 3.2522331228969774e-05, "loss": 0.0292, "step": 37980 }, { "epoch": 10.273120605732828, "grad_norm": 0.12694363296031952, "learning_rate": 3.2496514500324006e-05, "loss": 0.0292, "step": 37990 }, { "epoch": 10.275824770146025, "grad_norm": 0.10217157751321793, "learning_rate": 3.247070308877498e-05, "loss": 0.0297, "step": 38000 }, { "epoch": 10.27852893455922, "grad_norm": 0.13127447664737701, "learning_rate": 3.2444897002163515e-05, "loss": 0.0282, "step": 38010 }, { "epoch": 10.281233098972418, "grad_norm": 0.14494194090366364, "learning_rate": 3.241909624832885e-05, "loss": 0.0291, "step": 38020 }, { "epoch": 10.283937263385614, "grad_norm": 0.1700543910264969, "learning_rate": 3.239330083510852e-05, "loss": 0.0291, "step": 38030 }, { "epoch": 10.286641427798811, "grad_norm": 0.14012010395526886, "learning_rate": 3.236751077033855e-05, "loss": 0.0288, "step": 38040 }, { "epoch": 10.289345592212007, "grad_norm": 0.13953736424446106, "learning_rate": 3.234172606185322e-05, "loss": 0.0296, "step": 38050 }, { "epoch": 10.292049756625202, "grad_norm": 0.14523082971572876, "learning_rate": 3.231594671748528e-05, "loss": 0.0287, "step": 38060 }, { "epoch": 10.2947539210384, "grad_norm": 0.16367800533771515, "learning_rate": 3.2290172745065815e-05, "loss": 0.03, "step": 38070 }, { "epoch": 10.297458085451595, "grad_norm": 0.11230361461639404, "learning_rate": 3.226440415242426e-05, "loss": 0.0297, "step": 38080 }, { "epoch": 10.300162249864792, "grad_norm": 0.1596880704164505, "learning_rate": 3.223864094738846e-05, "loss": 0.029, "step": 38090 }, { "epoch": 10.302866414277988, "grad_norm": 0.18956008553504944, "learning_rate": 3.221288313778456e-05, "loss": 0.0284, "step": 38100 }, { "epoch": 10.305570578691185, "grad_norm": 0.1434888392686844, "learning_rate": 3.2187130731437125e-05, "loss": 0.0277, "step": 38110 }, { "epoch": 10.30827474310438, "grad_norm": 0.2035386711359024, "learning_rate": 3.216138373616905e-05, "loss": 0.0294, "step": 38120 }, { "epoch": 10.310978907517576, "grad_norm": 0.2876192331314087, "learning_rate": 3.21356421598016e-05, "loss": 0.0291, "step": 38130 }, { "epoch": 10.313683071930773, "grad_norm": 0.18121761083602905, "learning_rate": 3.210990601015438e-05, "loss": 0.0285, "step": 38140 }, { "epoch": 10.316387236343969, "grad_norm": 0.13303637504577637, "learning_rate": 3.208417529504535e-05, "loss": 0.028, "step": 38150 }, { "epoch": 10.319091400757166, "grad_norm": 0.1386861950159073, "learning_rate": 3.205845002229084e-05, "loss": 0.0273, "step": 38160 }, { "epoch": 10.321795565170362, "grad_norm": 0.10365189611911774, "learning_rate": 3.203273019970547e-05, "loss": 0.0281, "step": 38170 }, { "epoch": 10.32449972958356, "grad_norm": 0.15164634585380554, "learning_rate": 3.200701583510227e-05, "loss": 0.0285, "step": 38180 }, { "epoch": 10.327203893996755, "grad_norm": 0.10040789097547531, "learning_rate": 3.198130693629261e-05, "loss": 0.028, "step": 38190 }, { "epoch": 10.329908058409952, "grad_norm": 0.13510683178901672, "learning_rate": 3.195560351108612e-05, "loss": 0.0295, "step": 38200 }, { "epoch": 10.332612222823148, "grad_norm": 0.13885526359081268, "learning_rate": 3.1929905567290865e-05, "loss": 0.0269, "step": 38210 }, { "epoch": 10.335316387236343, "grad_norm": 0.19929476082324982, "learning_rate": 3.1904213112713164e-05, "loss": 0.0284, "step": 38220 }, { "epoch": 10.33802055164954, "grad_norm": 0.13808397948741913, "learning_rate": 3.187852615515774e-05, "loss": 0.0274, "step": 38230 }, { "epoch": 10.340724716062736, "grad_norm": 0.10712933540344238, "learning_rate": 3.1852844702427606e-05, "loss": 0.0284, "step": 38240 }, { "epoch": 10.343428880475933, "grad_norm": 0.13479910790920258, "learning_rate": 3.18271687623241e-05, "loss": 0.0296, "step": 38250 }, { "epoch": 10.346133044889129, "grad_norm": 0.16544795036315918, "learning_rate": 3.1801498342646896e-05, "loss": 0.0292, "step": 38260 }, { "epoch": 10.348837209302326, "grad_norm": 0.09994316101074219, "learning_rate": 3.177583345119398e-05, "loss": 0.0294, "step": 38270 }, { "epoch": 10.351541373715522, "grad_norm": 0.10954268276691437, "learning_rate": 3.17501740957617e-05, "loss": 0.0284, "step": 38280 }, { "epoch": 10.354245538128719, "grad_norm": 0.12831741571426392, "learning_rate": 3.172452028414467e-05, "loss": 0.0284, "step": 38290 }, { "epoch": 10.356949702541915, "grad_norm": 0.12702961266040802, "learning_rate": 3.169887202413583e-05, "loss": 0.0288, "step": 38300 }, { "epoch": 10.35965386695511, "grad_norm": 0.2056150734424591, "learning_rate": 3.167322932352646e-05, "loss": 0.0283, "step": 38310 }, { "epoch": 10.362358031368307, "grad_norm": 0.1414797157049179, "learning_rate": 3.164759219010613e-05, "loss": 0.0273, "step": 38320 }, { "epoch": 10.365062195781503, "grad_norm": 0.14366517961025238, "learning_rate": 3.1621960631662725e-05, "loss": 0.0279, "step": 38330 }, { "epoch": 10.3677663601947, "grad_norm": 0.14425796270370483, "learning_rate": 3.159633465598245e-05, "loss": 0.0284, "step": 38340 }, { "epoch": 10.370470524607896, "grad_norm": 0.08786685764789581, "learning_rate": 3.1570714270849767e-05, "loss": 0.0279, "step": 38350 }, { "epoch": 10.373174689021093, "grad_norm": 0.2079101800918579, "learning_rate": 3.1545099484047516e-05, "loss": 0.0292, "step": 38360 }, { "epoch": 10.375878853434289, "grad_norm": 0.11463640630245209, "learning_rate": 3.151949030335674e-05, "loss": 0.0291, "step": 38370 }, { "epoch": 10.378583017847484, "grad_norm": 0.09175976365804672, "learning_rate": 3.149388673655687e-05, "loss": 0.0266, "step": 38380 }, { "epoch": 10.381287182260682, "grad_norm": 0.11346009373664856, "learning_rate": 3.146828879142559e-05, "loss": 0.0276, "step": 38390 }, { "epoch": 10.383991346673877, "grad_norm": 0.10972515493631363, "learning_rate": 3.1442696475738866e-05, "loss": 0.0282, "step": 38400 }, { "epoch": 10.386695511087074, "grad_norm": 0.24065278470516205, "learning_rate": 3.141710979727098e-05, "loss": 0.0276, "step": 38410 }, { "epoch": 10.38939967550027, "grad_norm": 0.18018290400505066, "learning_rate": 3.139152876379447e-05, "loss": 0.0276, "step": 38420 }, { "epoch": 10.392103839913467, "grad_norm": 0.17932575941085815, "learning_rate": 3.1365953383080214e-05, "loss": 0.0281, "step": 38430 }, { "epoch": 10.394808004326663, "grad_norm": 0.16361108422279358, "learning_rate": 3.134038366289731e-05, "loss": 0.0277, "step": 38440 }, { "epoch": 10.39751216873986, "grad_norm": 0.18663768470287323, "learning_rate": 3.131481961101317e-05, "loss": 0.0281, "step": 38450 }, { "epoch": 10.400216333153056, "grad_norm": 0.1580498367547989, "learning_rate": 3.128926123519349e-05, "loss": 0.0298, "step": 38460 }, { "epoch": 10.402920497566251, "grad_norm": 0.22633129358291626, "learning_rate": 3.1263708543202194e-05, "loss": 0.0278, "step": 38470 }, { "epoch": 10.405624661979449, "grad_norm": 0.152747243642807, "learning_rate": 3.123816154280155e-05, "loss": 0.0284, "step": 38480 }, { "epoch": 10.408328826392644, "grad_norm": 0.23001529276371002, "learning_rate": 3.121262024175207e-05, "loss": 0.0288, "step": 38490 }, { "epoch": 10.411032990805841, "grad_norm": 0.24828140437602997, "learning_rate": 3.118708464781248e-05, "loss": 0.029, "step": 38500 }, { "epoch": 10.413737155219037, "grad_norm": 0.10266147553920746, "learning_rate": 3.116155476873987e-05, "loss": 0.029, "step": 38510 }, { "epoch": 10.416441319632234, "grad_norm": 0.19012683629989624, "learning_rate": 3.11360306122895e-05, "loss": 0.0298, "step": 38520 }, { "epoch": 10.41914548404543, "grad_norm": 0.18606893718242645, "learning_rate": 3.1110512186214975e-05, "loss": 0.0276, "step": 38530 }, { "epoch": 10.421849648458625, "grad_norm": 0.23695862293243408, "learning_rate": 3.1084999498268095e-05, "loss": 0.0297, "step": 38540 }, { "epoch": 10.424553812871823, "grad_norm": 0.13015888631343842, "learning_rate": 3.1059492556198934e-05, "loss": 0.0286, "step": 38550 }, { "epoch": 10.427257977285018, "grad_norm": 0.18867309391498566, "learning_rate": 3.103399136775586e-05, "loss": 0.0283, "step": 38560 }, { "epoch": 10.429962141698216, "grad_norm": 0.17508326470851898, "learning_rate": 3.100849594068541e-05, "loss": 0.0287, "step": 38570 }, { "epoch": 10.432666306111411, "grad_norm": 0.1681606024503708, "learning_rate": 3.0983006282732484e-05, "loss": 0.0279, "step": 38580 }, { "epoch": 10.435370470524608, "grad_norm": 0.24750825762748718, "learning_rate": 3.0957522401640116e-05, "loss": 0.0277, "step": 38590 }, { "epoch": 10.438074634937804, "grad_norm": 0.12466798722743988, "learning_rate": 3.0932044305149645e-05, "loss": 0.0277, "step": 38600 }, { "epoch": 10.440778799351001, "grad_norm": 0.1472880244255066, "learning_rate": 3.090657200100068e-05, "loss": 0.0293, "step": 38610 }, { "epoch": 10.443482963764197, "grad_norm": 0.15057450532913208, "learning_rate": 3.088110549693099e-05, "loss": 0.0292, "step": 38620 }, { "epoch": 10.446187128177392, "grad_norm": 0.12721794843673706, "learning_rate": 3.085564480067667e-05, "loss": 0.0275, "step": 38630 }, { "epoch": 10.44889129259059, "grad_norm": 0.13130652904510498, "learning_rate": 3.0830189919971955e-05, "loss": 0.0286, "step": 38640 }, { "epoch": 10.451595457003785, "grad_norm": 0.2300572544336319, "learning_rate": 3.080474086254939e-05, "loss": 0.0282, "step": 38650 }, { "epoch": 10.454299621416983, "grad_norm": 0.12123852223157883, "learning_rate": 3.077929763613975e-05, "loss": 0.0277, "step": 38660 }, { "epoch": 10.457003785830178, "grad_norm": 0.31108176708221436, "learning_rate": 3.075386024847198e-05, "loss": 0.0278, "step": 38670 }, { "epoch": 10.459707950243375, "grad_norm": 0.22571246325969696, "learning_rate": 3.072842870727331e-05, "loss": 0.0274, "step": 38680 }, { "epoch": 10.462412114656571, "grad_norm": 0.20620335638523102, "learning_rate": 3.070300302026916e-05, "loss": 0.0282, "step": 38690 }, { "epoch": 10.465116279069768, "grad_norm": 0.20796237885951996, "learning_rate": 3.067758319518318e-05, "loss": 0.0288, "step": 38700 }, { "epoch": 10.467820443482964, "grad_norm": 0.15685611963272095, "learning_rate": 3.065216923973725e-05, "loss": 0.0281, "step": 38710 }, { "epoch": 10.47052460789616, "grad_norm": 0.15850338339805603, "learning_rate": 3.062676116165145e-05, "loss": 0.0278, "step": 38720 }, { "epoch": 10.473228772309357, "grad_norm": 0.18366797268390656, "learning_rate": 3.06013589686441e-05, "loss": 0.0277, "step": 38730 }, { "epoch": 10.475932936722552, "grad_norm": 0.1542639434337616, "learning_rate": 3.05759626684317e-05, "loss": 0.0271, "step": 38740 }, { "epoch": 10.47863710113575, "grad_norm": 0.15588000416755676, "learning_rate": 3.055057226872896e-05, "loss": 0.0282, "step": 38750 }, { "epoch": 10.481341265548945, "grad_norm": 0.10532104969024658, "learning_rate": 3.052518777724887e-05, "loss": 0.0277, "step": 38760 }, { "epoch": 10.484045429962142, "grad_norm": 0.1435399353504181, "learning_rate": 3.04998092017025e-05, "loss": 0.028, "step": 38770 }, { "epoch": 10.486749594375338, "grad_norm": 0.14158004522323608, "learning_rate": 3.0474436549799246e-05, "loss": 0.0304, "step": 38780 }, { "epoch": 10.489453758788535, "grad_norm": 0.15955878794193268, "learning_rate": 3.044906982924661e-05, "loss": 0.0283, "step": 38790 }, { "epoch": 10.49215792320173, "grad_norm": 0.10097185522317886, "learning_rate": 3.0423709047750337e-05, "loss": 0.0274, "step": 38800 }, { "epoch": 10.494862087614926, "grad_norm": 0.11042492091655731, "learning_rate": 3.03983542130144e-05, "loss": 0.0277, "step": 38810 }, { "epoch": 10.497566252028124, "grad_norm": 0.12879903614521027, "learning_rate": 3.0373005332740877e-05, "loss": 0.0286, "step": 38820 }, { "epoch": 10.50027041644132, "grad_norm": 0.24182195961475372, "learning_rate": 3.034766241463013e-05, "loss": 0.0305, "step": 38830 }, { "epoch": 10.502974580854517, "grad_norm": 0.1603652983903885, "learning_rate": 3.032232546638064e-05, "loss": 0.0278, "step": 38840 }, { "epoch": 10.505678745267712, "grad_norm": 0.18054857850074768, "learning_rate": 3.0296994495689114e-05, "loss": 0.0284, "step": 38850 }, { "epoch": 10.50838290968091, "grad_norm": 0.21253684163093567, "learning_rate": 3.0271669510250444e-05, "loss": 0.0282, "step": 38860 }, { "epoch": 10.511087074094105, "grad_norm": 0.1378592699766159, "learning_rate": 3.024635051775766e-05, "loss": 0.0276, "step": 38870 }, { "epoch": 10.5137912385073, "grad_norm": 0.18843042850494385, "learning_rate": 3.022103752590205e-05, "loss": 0.0273, "step": 38880 }, { "epoch": 10.516495402920498, "grad_norm": 0.172918900847435, "learning_rate": 3.0195730542372992e-05, "loss": 0.0271, "step": 38890 }, { "epoch": 10.519199567333693, "grad_norm": 0.30201655626296997, "learning_rate": 3.0170429574858084e-05, "loss": 0.0289, "step": 38900 }, { "epoch": 10.52190373174689, "grad_norm": 0.1856573522090912, "learning_rate": 3.0145134631043127e-05, "loss": 0.0279, "step": 38910 }, { "epoch": 10.524607896160086, "grad_norm": 0.1852627694606781, "learning_rate": 3.0119845718612018e-05, "loss": 0.0285, "step": 38920 }, { "epoch": 10.527312060573284, "grad_norm": 0.1570305973291397, "learning_rate": 3.009456284524688e-05, "loss": 0.0272, "step": 38930 }, { "epoch": 10.530016224986479, "grad_norm": 0.1267482489347458, "learning_rate": 3.0069286018627967e-05, "loss": 0.0278, "step": 38940 }, { "epoch": 10.532720389399675, "grad_norm": 0.227256640791893, "learning_rate": 3.0044015246433743e-05, "loss": 0.0265, "step": 38950 }, { "epoch": 10.535424553812872, "grad_norm": 0.12988196313381195, "learning_rate": 3.0018750536340755e-05, "loss": 0.0267, "step": 38960 }, { "epoch": 10.538128718226067, "grad_norm": 0.21718937158584595, "learning_rate": 2.999349189602378e-05, "loss": 0.0282, "step": 38970 }, { "epoch": 10.540832882639265, "grad_norm": 0.25028449296951294, "learning_rate": 2.9968239333155733e-05, "loss": 0.0292, "step": 38980 }, { "epoch": 10.54353704705246, "grad_norm": 0.17691469192504883, "learning_rate": 2.994299285540767e-05, "loss": 0.0285, "step": 38990 }, { "epoch": 10.546241211465658, "grad_norm": 0.2329293191432953, "learning_rate": 2.9917752470448813e-05, "loss": 0.0289, "step": 39000 }, { "epoch": 10.548945375878853, "grad_norm": 0.2419573813676834, "learning_rate": 2.9892518185946495e-05, "loss": 0.0293, "step": 39010 }, { "epoch": 10.55164954029205, "grad_norm": 0.24552889168262482, "learning_rate": 2.986729000956624e-05, "loss": 0.0279, "step": 39020 }, { "epoch": 10.554353704705246, "grad_norm": 0.2642999291419983, "learning_rate": 2.9842067948971736e-05, "loss": 0.0283, "step": 39030 }, { "epoch": 10.557057869118442, "grad_norm": 0.12813690304756165, "learning_rate": 2.9816852011824727e-05, "loss": 0.0276, "step": 39040 }, { "epoch": 10.559762033531639, "grad_norm": 0.1219324842095375, "learning_rate": 2.979164220578519e-05, "loss": 0.027, "step": 39050 }, { "epoch": 10.562466197944834, "grad_norm": 0.13211244344711304, "learning_rate": 2.9766438538511165e-05, "loss": 0.0278, "step": 39060 }, { "epoch": 10.565170362358032, "grad_norm": 0.11361663043498993, "learning_rate": 2.9741241017658873e-05, "loss": 0.0286, "step": 39070 }, { "epoch": 10.567874526771227, "grad_norm": 0.12258656322956085, "learning_rate": 2.971604965088267e-05, "loss": 0.0286, "step": 39080 }, { "epoch": 10.570578691184425, "grad_norm": 0.18471799790859222, "learning_rate": 2.9690864445835008e-05, "loss": 0.0275, "step": 39090 }, { "epoch": 10.57328285559762, "grad_norm": 0.12963108718395233, "learning_rate": 2.966568541016651e-05, "loss": 0.029, "step": 39100 }, { "epoch": 10.575987020010817, "grad_norm": 0.13062265515327454, "learning_rate": 2.9640512551525867e-05, "loss": 0.0279, "step": 39110 }, { "epoch": 10.578691184424013, "grad_norm": 0.2183953821659088, "learning_rate": 2.961534587755995e-05, "loss": 0.0293, "step": 39120 }, { "epoch": 10.581395348837209, "grad_norm": 0.1515766829252243, "learning_rate": 2.959018539591375e-05, "loss": 0.0276, "step": 39130 }, { "epoch": 10.584099513250406, "grad_norm": 0.17994768917560577, "learning_rate": 2.9565031114230325e-05, "loss": 0.0282, "step": 39140 }, { "epoch": 10.586803677663601, "grad_norm": 0.18190494179725647, "learning_rate": 2.9539883040150895e-05, "loss": 0.0281, "step": 39150 }, { "epoch": 10.589507842076799, "grad_norm": 0.12028668820858002, "learning_rate": 2.9514741181314774e-05, "loss": 0.0282, "step": 39160 }, { "epoch": 10.592212006489994, "grad_norm": 0.12742413580417633, "learning_rate": 2.94896055453594e-05, "loss": 0.0271, "step": 39170 }, { "epoch": 10.594916170903192, "grad_norm": 0.11621153354644775, "learning_rate": 2.9464476139920332e-05, "loss": 0.028, "step": 39180 }, { "epoch": 10.597620335316387, "grad_norm": 0.14182379841804504, "learning_rate": 2.9439352972631186e-05, "loss": 0.0264, "step": 39190 }, { "epoch": 10.600324499729584, "grad_norm": 0.3488808870315552, "learning_rate": 2.9414236051123757e-05, "loss": 0.0277, "step": 39200 }, { "epoch": 10.60302866414278, "grad_norm": 0.21549804508686066, "learning_rate": 2.938912538302785e-05, "loss": 0.027, "step": 39210 }, { "epoch": 10.605732828555976, "grad_norm": 0.11071999371051788, "learning_rate": 2.9364020975971464e-05, "loss": 0.0273, "step": 39220 }, { "epoch": 10.608436992969173, "grad_norm": 0.19072912633419037, "learning_rate": 2.9338922837580657e-05, "loss": 0.0281, "step": 39230 }, { "epoch": 10.611141157382368, "grad_norm": 0.18052798509597778, "learning_rate": 2.931383097547955e-05, "loss": 0.0267, "step": 39240 }, { "epoch": 10.613845321795566, "grad_norm": 0.21889525651931763, "learning_rate": 2.928874539729043e-05, "loss": 0.0277, "step": 39250 }, { "epoch": 10.616549486208761, "grad_norm": 0.20003600418567657, "learning_rate": 2.926366611063358e-05, "loss": 0.0275, "step": 39260 }, { "epoch": 10.619253650621959, "grad_norm": 0.11874713748693466, "learning_rate": 2.9238593123127463e-05, "loss": 0.0284, "step": 39270 }, { "epoch": 10.621957815035154, "grad_norm": 0.11454520374536514, "learning_rate": 2.9213526442388583e-05, "loss": 0.0279, "step": 39280 }, { "epoch": 10.62466197944835, "grad_norm": 0.14704641699790955, "learning_rate": 2.9188466076031545e-05, "loss": 0.0276, "step": 39290 }, { "epoch": 10.627366143861547, "grad_norm": 0.1438804566860199, "learning_rate": 2.9163412031669012e-05, "loss": 0.0297, "step": 39300 }, { "epoch": 10.630070308274743, "grad_norm": 0.12821003794670105, "learning_rate": 2.913836431691175e-05, "loss": 0.0277, "step": 39310 }, { "epoch": 10.63277447268794, "grad_norm": 0.1375238001346588, "learning_rate": 2.9113322939368583e-05, "loss": 0.027, "step": 39320 }, { "epoch": 10.635478637101135, "grad_norm": 0.1229194700717926, "learning_rate": 2.9088287906646427e-05, "loss": 0.0275, "step": 39330 }, { "epoch": 10.638182801514333, "grad_norm": 0.12817856669425964, "learning_rate": 2.906325922635024e-05, "loss": 0.0285, "step": 39340 }, { "epoch": 10.640886965927528, "grad_norm": 0.12304624170064926, "learning_rate": 2.903823690608313e-05, "loss": 0.0276, "step": 39350 }, { "epoch": 10.643591130340724, "grad_norm": 0.13210159540176392, "learning_rate": 2.9013220953446174e-05, "loss": 0.0273, "step": 39360 }, { "epoch": 10.646295294753921, "grad_norm": 0.13853853940963745, "learning_rate": 2.8988211376038564e-05, "loss": 0.0276, "step": 39370 }, { "epoch": 10.648999459167117, "grad_norm": 0.15642720460891724, "learning_rate": 2.8963208181457564e-05, "loss": 0.0282, "step": 39380 }, { "epoch": 10.651703623580314, "grad_norm": 0.17312806844711304, "learning_rate": 2.8938211377298453e-05, "loss": 0.0274, "step": 39390 }, { "epoch": 10.65440778799351, "grad_norm": 0.2782735228538513, "learning_rate": 2.8913220971154652e-05, "loss": 0.0279, "step": 39400 }, { "epoch": 10.657111952406707, "grad_norm": 0.1297311931848526, "learning_rate": 2.888823697061753e-05, "loss": 0.0277, "step": 39410 }, { "epoch": 10.659816116819902, "grad_norm": 0.1383575052022934, "learning_rate": 2.8863259383276618e-05, "loss": 0.028, "step": 39420 }, { "epoch": 10.6625202812331, "grad_norm": 0.14038963615894318, "learning_rate": 2.8838288216719395e-05, "loss": 0.0285, "step": 39430 }, { "epoch": 10.665224445646295, "grad_norm": 0.24387532472610474, "learning_rate": 2.8813323478531484e-05, "loss": 0.0288, "step": 39440 }, { "epoch": 10.66792861005949, "grad_norm": 0.15355972945690155, "learning_rate": 2.8788365176296496e-05, "loss": 0.0277, "step": 39450 }, { "epoch": 10.670632774472688, "grad_norm": 0.25033190846443176, "learning_rate": 2.876341331759611e-05, "loss": 0.026, "step": 39460 }, { "epoch": 10.673336938885884, "grad_norm": 0.18237975239753723, "learning_rate": 2.8738467910010036e-05, "loss": 0.0278, "step": 39470 }, { "epoch": 10.676041103299081, "grad_norm": 0.12749896943569183, "learning_rate": 2.8713528961116032e-05, "loss": 0.0276, "step": 39480 }, { "epoch": 10.678745267712277, "grad_norm": 0.10400677472352982, "learning_rate": 2.8688596478489875e-05, "loss": 0.0268, "step": 39490 }, { "epoch": 10.681449432125474, "grad_norm": 0.1534823626279831, "learning_rate": 2.8663670469705434e-05, "loss": 0.0288, "step": 39500 }, { "epoch": 10.68415359653867, "grad_norm": 0.1278415322303772, "learning_rate": 2.8638750942334546e-05, "loss": 0.0275, "step": 39510 }, { "epoch": 10.686857760951867, "grad_norm": 0.11909198015928268, "learning_rate": 2.8613837903947115e-05, "loss": 0.0269, "step": 39520 }, { "epoch": 10.689561925365062, "grad_norm": 0.2112678587436676, "learning_rate": 2.858893136211106e-05, "loss": 0.0283, "step": 39530 }, { "epoch": 10.692266089778258, "grad_norm": 0.18225304782390594, "learning_rate": 2.8564031324392315e-05, "loss": 0.027, "step": 39540 }, { "epoch": 10.694970254191455, "grad_norm": 0.10510363429784775, "learning_rate": 2.85391377983549e-05, "loss": 0.0275, "step": 39550 }, { "epoch": 10.69767441860465, "grad_norm": 0.14130142331123352, "learning_rate": 2.851425079156075e-05, "loss": 0.0265, "step": 39560 }, { "epoch": 10.700378583017848, "grad_norm": 0.12609738111495972, "learning_rate": 2.848937031156994e-05, "loss": 0.027, "step": 39570 }, { "epoch": 10.703082747431043, "grad_norm": 0.15320216119289398, "learning_rate": 2.846449636594044e-05, "loss": 0.0279, "step": 39580 }, { "epoch": 10.70578691184424, "grad_norm": 0.11938226222991943, "learning_rate": 2.843962896222836e-05, "loss": 0.0273, "step": 39590 }, { "epoch": 10.708491076257436, "grad_norm": 0.13045184314250946, "learning_rate": 2.8414768107987722e-05, "loss": 0.0287, "step": 39600 }, { "epoch": 10.711195240670634, "grad_norm": 0.19627396762371063, "learning_rate": 2.838991381077061e-05, "loss": 0.027, "step": 39610 }, { "epoch": 10.71389940508383, "grad_norm": 0.12470874935388565, "learning_rate": 2.83650660781271e-05, "loss": 0.0266, "step": 39620 }, { "epoch": 10.716603569497025, "grad_norm": 0.22864370048046112, "learning_rate": 2.8340224917605285e-05, "loss": 0.0275, "step": 39630 }, { "epoch": 10.719307733910222, "grad_norm": 0.16690747439861298, "learning_rate": 2.831539033675122e-05, "loss": 0.0273, "step": 39640 }, { "epoch": 10.722011898323418, "grad_norm": 0.1255137175321579, "learning_rate": 2.8290562343109038e-05, "loss": 0.0279, "step": 39650 }, { "epoch": 10.724716062736615, "grad_norm": 0.1464056521654129, "learning_rate": 2.826574094422082e-05, "loss": 0.0272, "step": 39660 }, { "epoch": 10.72742022714981, "grad_norm": 0.12805579602718353, "learning_rate": 2.8240926147626645e-05, "loss": 0.0287, "step": 39670 }, { "epoch": 10.730124391563008, "grad_norm": 0.16297103464603424, "learning_rate": 2.8216117960864586e-05, "loss": 0.0281, "step": 39680 }, { "epoch": 10.732828555976203, "grad_norm": 0.16656988859176636, "learning_rate": 2.8191316391470703e-05, "loss": 0.0296, "step": 39690 }, { "epoch": 10.735532720389399, "grad_norm": 0.14317940175533295, "learning_rate": 2.816652144697911e-05, "loss": 0.028, "step": 39700 }, { "epoch": 10.738236884802596, "grad_norm": 0.16984552145004272, "learning_rate": 2.8141733134921783e-05, "loss": 0.0274, "step": 39710 }, { "epoch": 10.740941049215792, "grad_norm": 0.09538397192955017, "learning_rate": 2.811695146282884e-05, "loss": 0.028, "step": 39720 }, { "epoch": 10.743645213628989, "grad_norm": 0.1526319980621338, "learning_rate": 2.8092176438228212e-05, "loss": 0.0282, "step": 39730 }, { "epoch": 10.746349378042185, "grad_norm": 0.10628372430801392, "learning_rate": 2.806740806864598e-05, "loss": 0.0273, "step": 39740 }, { "epoch": 10.749053542455382, "grad_norm": 0.1385960429906845, "learning_rate": 2.804264636160604e-05, "loss": 0.0282, "step": 39750 }, { "epoch": 10.751757706868577, "grad_norm": 0.14301136136054993, "learning_rate": 2.8017891324630402e-05, "loss": 0.0276, "step": 39760 }, { "epoch": 10.754461871281773, "grad_norm": 0.15079374611377716, "learning_rate": 2.7993142965238976e-05, "loss": 0.028, "step": 39770 }, { "epoch": 10.75716603569497, "grad_norm": 0.12852072715759277, "learning_rate": 2.7968401290949665e-05, "loss": 0.0286, "step": 39780 }, { "epoch": 10.759870200108166, "grad_norm": 0.16626515984535217, "learning_rate": 2.7943666309278328e-05, "loss": 0.0282, "step": 39790 }, { "epoch": 10.762574364521363, "grad_norm": 0.1598462164402008, "learning_rate": 2.7918938027738783e-05, "loss": 0.0283, "step": 39800 }, { "epoch": 10.765278528934559, "grad_norm": 0.19100704789161682, "learning_rate": 2.789421645384287e-05, "loss": 0.0291, "step": 39810 }, { "epoch": 10.767982693347756, "grad_norm": 0.20411132276058197, "learning_rate": 2.786950159510032e-05, "loss": 0.0281, "step": 39820 }, { "epoch": 10.770686857760952, "grad_norm": 0.1338573694229126, "learning_rate": 2.7844793459018876e-05, "loss": 0.0279, "step": 39830 }, { "epoch": 10.773391022174149, "grad_norm": 0.1975540816783905, "learning_rate": 2.7820092053104195e-05, "loss": 0.0272, "step": 39840 }, { "epoch": 10.776095186587344, "grad_norm": 0.16549324989318848, "learning_rate": 2.7795397384859933e-05, "loss": 0.0272, "step": 39850 }, { "epoch": 10.77879935100054, "grad_norm": 0.22886812686920166, "learning_rate": 2.7770709461787638e-05, "loss": 0.0266, "step": 39860 }, { "epoch": 10.781503515413737, "grad_norm": 0.14793945848941803, "learning_rate": 2.7746028291386915e-05, "loss": 0.0296, "step": 39870 }, { "epoch": 10.784207679826933, "grad_norm": 0.12363366037607193, "learning_rate": 2.772135388115519e-05, "loss": 0.0274, "step": 39880 }, { "epoch": 10.78691184424013, "grad_norm": 0.13822638988494873, "learning_rate": 2.7696686238587945e-05, "loss": 0.0278, "step": 39890 }, { "epoch": 10.789616008653326, "grad_norm": 0.18173693120479584, "learning_rate": 2.7672025371178505e-05, "loss": 0.0269, "step": 39900 }, { "epoch": 10.792320173066523, "grad_norm": 0.1834789514541626, "learning_rate": 2.7647371286418238e-05, "loss": 0.0283, "step": 39910 }, { "epoch": 10.795024337479719, "grad_norm": 0.1234872117638588, "learning_rate": 2.762272399179639e-05, "loss": 0.0269, "step": 39920 }, { "epoch": 10.797728501892916, "grad_norm": 0.1374744325876236, "learning_rate": 2.7598083494800154e-05, "loss": 0.0302, "step": 39930 }, { "epoch": 10.800432666306111, "grad_norm": 0.13899730145931244, "learning_rate": 2.7573449802914664e-05, "loss": 0.028, "step": 39940 }, { "epoch": 10.803136830719307, "grad_norm": 0.17580263316631317, "learning_rate": 2.7548822923622964e-05, "loss": 0.0284, "step": 39950 }, { "epoch": 10.805840995132504, "grad_norm": 0.17574520409107208, "learning_rate": 2.752420286440609e-05, "loss": 0.0284, "step": 39960 }, { "epoch": 10.8085451595457, "grad_norm": 0.15351088345050812, "learning_rate": 2.749958963274295e-05, "loss": 0.0286, "step": 39970 }, { "epoch": 10.811249323958897, "grad_norm": 0.1446641981601715, "learning_rate": 2.747498323611039e-05, "loss": 0.0268, "step": 39980 }, { "epoch": 10.813953488372093, "grad_norm": 0.14253273606300354, "learning_rate": 2.7450383681983184e-05, "loss": 0.0276, "step": 39990 }, { "epoch": 10.81665765278529, "grad_norm": 0.12856701016426086, "learning_rate": 2.742579097783403e-05, "loss": 0.0275, "step": 40000 }, { "epoch": 10.819361817198486, "grad_norm": 0.1377895623445511, "learning_rate": 2.7401205131133512e-05, "loss": 0.0284, "step": 40010 }, { "epoch": 10.822065981611683, "grad_norm": 0.15436723828315735, "learning_rate": 2.7376626149350238e-05, "loss": 0.0283, "step": 40020 }, { "epoch": 10.824770146024878, "grad_norm": 0.13550814986228943, "learning_rate": 2.735205403995056e-05, "loss": 0.0283, "step": 40030 }, { "epoch": 10.827474310438074, "grad_norm": 0.2683977484703064, "learning_rate": 2.7327488810398917e-05, "loss": 0.0274, "step": 40040 }, { "epoch": 10.830178474851271, "grad_norm": 0.2001204639673233, "learning_rate": 2.7302930468157507e-05, "loss": 0.0263, "step": 40050 }, { "epoch": 10.832882639264467, "grad_norm": 0.13358382880687714, "learning_rate": 2.727837902068655e-05, "loss": 0.0271, "step": 40060 }, { "epoch": 10.835586803677664, "grad_norm": 0.10480957478284836, "learning_rate": 2.7253834475444123e-05, "loss": 0.0278, "step": 40070 }, { "epoch": 10.83829096809086, "grad_norm": 0.118545301258564, "learning_rate": 2.7229296839886204e-05, "loss": 0.0281, "step": 40080 }, { "epoch": 10.840995132504057, "grad_norm": 0.12557213008403778, "learning_rate": 2.720476612146668e-05, "loss": 0.0267, "step": 40090 }, { "epoch": 10.843699296917253, "grad_norm": 0.1544255018234253, "learning_rate": 2.7180242327637317e-05, "loss": 0.0275, "step": 40100 }, { "epoch": 10.846403461330448, "grad_norm": 0.1449149250984192, "learning_rate": 2.7155725465847826e-05, "loss": 0.0276, "step": 40110 }, { "epoch": 10.849107625743645, "grad_norm": 0.14975564181804657, "learning_rate": 2.713121554354578e-05, "loss": 0.0272, "step": 40120 }, { "epoch": 10.851811790156841, "grad_norm": 0.19960810244083405, "learning_rate": 2.7106712568176628e-05, "loss": 0.028, "step": 40130 }, { "epoch": 10.854515954570038, "grad_norm": 0.09471101313829422, "learning_rate": 2.708221654718374e-05, "loss": 0.0276, "step": 40140 }, { "epoch": 10.857220118983234, "grad_norm": 0.14597678184509277, "learning_rate": 2.7057727488008357e-05, "loss": 0.0271, "step": 40150 }, { "epoch": 10.859924283396431, "grad_norm": 0.25388288497924805, "learning_rate": 2.703324539808961e-05, "loss": 0.0283, "step": 40160 }, { "epoch": 10.862628447809627, "grad_norm": 0.11840435117483139, "learning_rate": 2.7008770284864505e-05, "loss": 0.028, "step": 40170 }, { "epoch": 10.865332612222822, "grad_norm": 0.20890912413597107, "learning_rate": 2.6984302155767916e-05, "loss": 0.027, "step": 40180 }, { "epoch": 10.86803677663602, "grad_norm": 0.16247816383838654, "learning_rate": 2.6959841018232683e-05, "loss": 0.0273, "step": 40190 }, { "epoch": 10.870740941049215, "grad_norm": 0.15251897275447845, "learning_rate": 2.693538687968937e-05, "loss": 0.0263, "step": 40200 }, { "epoch": 10.873445105462412, "grad_norm": 0.23505650460720062, "learning_rate": 2.6910939747566556e-05, "loss": 0.0288, "step": 40210 }, { "epoch": 10.876149269875608, "grad_norm": 0.11812414228916168, "learning_rate": 2.6886499629290607e-05, "loss": 0.0287, "step": 40220 }, { "epoch": 10.878853434288805, "grad_norm": 0.13960827887058258, "learning_rate": 2.6862066532285802e-05, "loss": 0.0281, "step": 40230 }, { "epoch": 10.881557598702, "grad_norm": 0.1258174031972885, "learning_rate": 2.6837640463974262e-05, "loss": 0.0281, "step": 40240 }, { "epoch": 10.884261763115198, "grad_norm": 0.1161477118730545, "learning_rate": 2.681322143177596e-05, "loss": 0.0277, "step": 40250 }, { "epoch": 10.886965927528394, "grad_norm": 0.1311764270067215, "learning_rate": 2.678880944310882e-05, "loss": 0.0269, "step": 40260 }, { "epoch": 10.88967009194159, "grad_norm": 0.1366688758134842, "learning_rate": 2.6764404505388474e-05, "loss": 0.0259, "step": 40270 }, { "epoch": 10.892374256354787, "grad_norm": 0.12197458744049072, "learning_rate": 2.6740006626028558e-05, "loss": 0.027, "step": 40280 }, { "epoch": 10.895078420767982, "grad_norm": 0.15605288743972778, "learning_rate": 2.671561581244048e-05, "loss": 0.0274, "step": 40290 }, { "epoch": 10.89778258518118, "grad_norm": 0.13091027736663818, "learning_rate": 2.6691232072033536e-05, "loss": 0.0279, "step": 40300 }, { "epoch": 10.900486749594375, "grad_norm": 0.17553657293319702, "learning_rate": 2.6666855412214852e-05, "loss": 0.0276, "step": 40310 }, { "epoch": 10.903190914007572, "grad_norm": 0.1317819058895111, "learning_rate": 2.664248584038942e-05, "loss": 0.027, "step": 40320 }, { "epoch": 10.905895078420768, "grad_norm": 0.2048402726650238, "learning_rate": 2.6618123363960047e-05, "loss": 0.0275, "step": 40330 }, { "epoch": 10.908599242833965, "grad_norm": 0.12531441450119019, "learning_rate": 2.659376799032748e-05, "loss": 0.0266, "step": 40340 }, { "epoch": 10.91130340724716, "grad_norm": 0.13694041967391968, "learning_rate": 2.6569419726890145e-05, "loss": 0.0281, "step": 40350 }, { "epoch": 10.914007571660356, "grad_norm": 0.15025731921195984, "learning_rate": 2.654507858104447e-05, "loss": 0.0274, "step": 40360 }, { "epoch": 10.916711736073553, "grad_norm": 0.20528917014598846, "learning_rate": 2.652074456018463e-05, "loss": 0.0271, "step": 40370 }, { "epoch": 10.919415900486749, "grad_norm": 0.18446044623851776, "learning_rate": 2.6496417671702646e-05, "loss": 0.0266, "step": 40380 }, { "epoch": 10.922120064899946, "grad_norm": 0.12468725442886353, "learning_rate": 2.6472097922988427e-05, "loss": 0.0266, "step": 40390 }, { "epoch": 10.924824229313142, "grad_norm": 0.2820151150226593, "learning_rate": 2.6447785321429607e-05, "loss": 0.0271, "step": 40400 }, { "epoch": 10.92752839372634, "grad_norm": 0.12815654277801514, "learning_rate": 2.6423479874411784e-05, "loss": 0.0279, "step": 40410 }, { "epoch": 10.930232558139535, "grad_norm": 0.12127991765737534, "learning_rate": 2.6399181589318234e-05, "loss": 0.0285, "step": 40420 }, { "epoch": 10.932936722552732, "grad_norm": 0.0990036353468895, "learning_rate": 2.6374890473530188e-05, "loss": 0.0295, "step": 40430 }, { "epoch": 10.935640886965928, "grad_norm": 0.13623328506946564, "learning_rate": 2.635060653442664e-05, "loss": 0.0275, "step": 40440 }, { "epoch": 10.938345051379123, "grad_norm": 0.3921952247619629, "learning_rate": 2.6326329779384395e-05, "loss": 0.0282, "step": 40450 }, { "epoch": 10.94104921579232, "grad_norm": 0.16335240006446838, "learning_rate": 2.63020602157781e-05, "loss": 0.0282, "step": 40460 }, { "epoch": 10.943753380205516, "grad_norm": 0.17474856972694397, "learning_rate": 2.62777978509802e-05, "loss": 0.0283, "step": 40470 }, { "epoch": 10.946457544618713, "grad_norm": 0.12730717658996582, "learning_rate": 2.6253542692360954e-05, "loss": 0.0263, "step": 40480 }, { "epoch": 10.949161709031909, "grad_norm": 0.2941538095474243, "learning_rate": 2.6229294747288458e-05, "loss": 0.027, "step": 40490 }, { "epoch": 10.951865873445106, "grad_norm": 0.12083210796117783, "learning_rate": 2.6205054023128596e-05, "loss": 0.0275, "step": 40500 }, { "epoch": 10.954570037858302, "grad_norm": 0.1876438856124878, "learning_rate": 2.6180820527245043e-05, "loss": 0.0273, "step": 40510 }, { "epoch": 10.957274202271497, "grad_norm": 0.10606727749109268, "learning_rate": 2.6156594266999313e-05, "loss": 0.0257, "step": 40520 }, { "epoch": 10.959978366684695, "grad_norm": 0.12801265716552734, "learning_rate": 2.6132375249750672e-05, "loss": 0.0285, "step": 40530 }, { "epoch": 10.96268253109789, "grad_norm": 0.11716891080141068, "learning_rate": 2.6108163482856286e-05, "loss": 0.0269, "step": 40540 }, { "epoch": 10.965386695511087, "grad_norm": 0.22157594561576843, "learning_rate": 2.6083958973670964e-05, "loss": 0.0268, "step": 40550 }, { "epoch": 10.968090859924283, "grad_norm": 0.15970806777477264, "learning_rate": 2.6059761729547483e-05, "loss": 0.0273, "step": 40560 }, { "epoch": 10.97079502433748, "grad_norm": 0.14579656720161438, "learning_rate": 2.603557175783624e-05, "loss": 0.0272, "step": 40570 }, { "epoch": 10.973499188750676, "grad_norm": 0.14563551545143127, "learning_rate": 2.601138906588559e-05, "loss": 0.0275, "step": 40580 }, { "epoch": 10.976203353163873, "grad_norm": 0.1261667013168335, "learning_rate": 2.598721366104152e-05, "loss": 0.0271, "step": 40590 }, { "epoch": 10.978907517577069, "grad_norm": 0.12469833344221115, "learning_rate": 2.5963045550647945e-05, "loss": 0.0262, "step": 40600 }, { "epoch": 10.981611681990264, "grad_norm": 0.1574227660894394, "learning_rate": 2.5938884742046466e-05, "loss": 0.0259, "step": 40610 }, { "epoch": 10.984315846403462, "grad_norm": 0.111948661506176, "learning_rate": 2.5914731242576507e-05, "loss": 0.0285, "step": 40620 }, { "epoch": 10.987020010816657, "grad_norm": 0.16359920799732208, "learning_rate": 2.5890585059575268e-05, "loss": 0.0279, "step": 40630 }, { "epoch": 10.989724175229854, "grad_norm": 0.16578717529773712, "learning_rate": 2.5866446200377688e-05, "loss": 0.0273, "step": 40640 }, { "epoch": 10.99242833964305, "grad_norm": 0.13422678411006927, "learning_rate": 2.5842314672316566e-05, "loss": 0.0266, "step": 40650 }, { "epoch": 10.995132504056247, "grad_norm": 0.16755282878875732, "learning_rate": 2.581819048272239e-05, "loss": 0.0264, "step": 40660 }, { "epoch": 10.997836668469443, "grad_norm": 0.18896116316318512, "learning_rate": 2.5794073638923478e-05, "loss": 0.0274, "step": 40670 }, { "epoch": 11.000540832882638, "grad_norm": 0.24569924175739288, "learning_rate": 2.576996414824586e-05, "loss": 0.0255, "step": 40680 }, { "epoch": 11.003244997295836, "grad_norm": 0.1352795511484146, "learning_rate": 2.574586201801339e-05, "loss": 0.0271, "step": 40690 }, { "epoch": 11.005949161709031, "grad_norm": 0.16647182404994965, "learning_rate": 2.572176725554762e-05, "loss": 0.028, "step": 40700 }, { "epoch": 11.008653326122229, "grad_norm": 0.1976713091135025, "learning_rate": 2.5697679868167966e-05, "loss": 0.0276, "step": 40710 }, { "epoch": 11.011357490535424, "grad_norm": 0.16437487304210663, "learning_rate": 2.5673599863191468e-05, "loss": 0.0276, "step": 40720 }, { "epoch": 11.014061654948621, "grad_norm": 0.11895103007555008, "learning_rate": 2.564952724793306e-05, "loss": 0.0277, "step": 40730 }, { "epoch": 11.016765819361817, "grad_norm": 0.1375018060207367, "learning_rate": 2.5625462029705306e-05, "loss": 0.0275, "step": 40740 }, { "epoch": 11.019469983775014, "grad_norm": 0.16318067908287048, "learning_rate": 2.5601404215818624e-05, "loss": 0.0273, "step": 40750 }, { "epoch": 11.02217414818821, "grad_norm": 0.13242816925048828, "learning_rate": 2.5577353813581144e-05, "loss": 0.0268, "step": 40760 }, { "epoch": 11.024878312601405, "grad_norm": 0.12240537256002426, "learning_rate": 2.5553310830298733e-05, "loss": 0.0274, "step": 40770 }, { "epoch": 11.027582477014603, "grad_norm": 0.21514737606048584, "learning_rate": 2.5529275273275012e-05, "loss": 0.0266, "step": 40780 }, { "epoch": 11.030286641427798, "grad_norm": 0.10832204669713974, "learning_rate": 2.550524714981133e-05, "loss": 0.0261, "step": 40790 }, { "epoch": 11.032990805840996, "grad_norm": 0.09188895672559738, "learning_rate": 2.5481226467206837e-05, "loss": 0.0264, "step": 40800 }, { "epoch": 11.035694970254191, "grad_norm": 0.19824889302253723, "learning_rate": 2.5457213232758365e-05, "loss": 0.0259, "step": 40810 }, { "epoch": 11.038399134667388, "grad_norm": 0.11833982914686203, "learning_rate": 2.5433207453760498e-05, "loss": 0.0269, "step": 40820 }, { "epoch": 11.041103299080584, "grad_norm": 0.1262279450893402, "learning_rate": 2.5409209137505552e-05, "loss": 0.0269, "step": 40830 }, { "epoch": 11.043807463493781, "grad_norm": 0.1728665977716446, "learning_rate": 2.5385218291283597e-05, "loss": 0.0263, "step": 40840 }, { "epoch": 11.046511627906977, "grad_norm": 0.1197500228881836, "learning_rate": 2.5361234922382383e-05, "loss": 0.0275, "step": 40850 }, { "epoch": 11.049215792320172, "grad_norm": 0.09905724227428436, "learning_rate": 2.533725903808749e-05, "loss": 0.0283, "step": 40860 }, { "epoch": 11.05191995673337, "grad_norm": 0.17289221286773682, "learning_rate": 2.5313290645682085e-05, "loss": 0.0262, "step": 40870 }, { "epoch": 11.054624121146565, "grad_norm": 0.13720394670963287, "learning_rate": 2.52893297524472e-05, "loss": 0.0274, "step": 40880 }, { "epoch": 11.057328285559763, "grad_norm": 0.16991063952445984, "learning_rate": 2.526537636566145e-05, "loss": 0.0282, "step": 40890 }, { "epoch": 11.060032449972958, "grad_norm": 0.12356686592102051, "learning_rate": 2.5241430492601305e-05, "loss": 0.0285, "step": 40900 }, { "epoch": 11.062736614386155, "grad_norm": 0.14605651795864105, "learning_rate": 2.5217492140540867e-05, "loss": 0.0265, "step": 40910 }, { "epoch": 11.065440778799351, "grad_norm": 0.31255415081977844, "learning_rate": 2.5193561316751967e-05, "loss": 0.0274, "step": 40920 }, { "epoch": 11.068144943212546, "grad_norm": 0.2262507528066635, "learning_rate": 2.516963802850416e-05, "loss": 0.0285, "step": 40930 }, { "epoch": 11.070849107625744, "grad_norm": 0.13061684370040894, "learning_rate": 2.5145722283064698e-05, "loss": 0.0274, "step": 40940 }, { "epoch": 11.07355327203894, "grad_norm": 0.19267159700393677, "learning_rate": 2.5121814087698602e-05, "loss": 0.0264, "step": 40950 }, { "epoch": 11.076257436452137, "grad_norm": 0.14363855123519897, "learning_rate": 2.509791344966848e-05, "loss": 0.0262, "step": 40960 }, { "epoch": 11.078961600865332, "grad_norm": 0.12801983952522278, "learning_rate": 2.5074020376234768e-05, "loss": 0.0275, "step": 40970 }, { "epoch": 11.08166576527853, "grad_norm": 0.2917773127555847, "learning_rate": 2.5050134874655534e-05, "loss": 0.0264, "step": 40980 }, { "epoch": 11.084369929691725, "grad_norm": 0.1328216791152954, "learning_rate": 2.5026256952186566e-05, "loss": 0.0273, "step": 40990 }, { "epoch": 11.087074094104922, "grad_norm": 0.15779165923595428, "learning_rate": 2.5002386616081335e-05, "loss": 0.0287, "step": 41000 }, { "epoch": 11.089778258518118, "grad_norm": 0.14769785106182098, "learning_rate": 2.497852387359103e-05, "loss": 0.0276, "step": 41010 }, { "epoch": 11.092482422931313, "grad_norm": 0.1243152767419815, "learning_rate": 2.4954668731964496e-05, "loss": 0.0273, "step": 41020 }, { "epoch": 11.09518658734451, "grad_norm": 0.12914854288101196, "learning_rate": 2.4930821198448364e-05, "loss": 0.0285, "step": 41030 }, { "epoch": 11.097890751757706, "grad_norm": 0.11308760195970535, "learning_rate": 2.4906981280286796e-05, "loss": 0.0265, "step": 41040 }, { "epoch": 11.100594916170904, "grad_norm": 0.13582609593868256, "learning_rate": 2.488314898472179e-05, "loss": 0.0272, "step": 41050 }, { "epoch": 11.1032990805841, "grad_norm": 0.10532567650079727, "learning_rate": 2.485932431899295e-05, "loss": 0.027, "step": 41060 }, { "epoch": 11.106003244997297, "grad_norm": 0.10126534104347229, "learning_rate": 2.4835507290337584e-05, "loss": 0.0277, "step": 41070 }, { "epoch": 11.108707409410492, "grad_norm": 0.1425127238035202, "learning_rate": 2.4811697905990672e-05, "loss": 0.0256, "step": 41080 }, { "epoch": 11.111411573823688, "grad_norm": 0.12070182710886002, "learning_rate": 2.4787896173184854e-05, "loss": 0.027, "step": 41090 }, { "epoch": 11.114115738236885, "grad_norm": 0.12953512370586395, "learning_rate": 2.4764102099150534e-05, "loss": 0.0273, "step": 41100 }, { "epoch": 11.11681990265008, "grad_norm": 0.13894155621528625, "learning_rate": 2.4740315691115644e-05, "loss": 0.0276, "step": 41110 }, { "epoch": 11.119524067063278, "grad_norm": 0.17808200418949127, "learning_rate": 2.4716536956305918e-05, "loss": 0.028, "step": 41120 }, { "epoch": 11.122228231476473, "grad_norm": 0.1456274390220642, "learning_rate": 2.4692765901944697e-05, "loss": 0.0276, "step": 41130 }, { "epoch": 11.12493239588967, "grad_norm": 0.1225845143198967, "learning_rate": 2.4669002535253e-05, "loss": 0.0265, "step": 41140 }, { "epoch": 11.127636560302866, "grad_norm": 0.10088427364826202, "learning_rate": 2.46452468634495e-05, "loss": 0.0283, "step": 41150 }, { "epoch": 11.130340724716064, "grad_norm": 0.13865047693252563, "learning_rate": 2.462149889375055e-05, "loss": 0.0276, "step": 41160 }, { "epoch": 11.133044889129259, "grad_norm": 0.1231459528207779, "learning_rate": 2.459775863337014e-05, "loss": 0.028, "step": 41170 }, { "epoch": 11.135749053542455, "grad_norm": 0.14364396035671234, "learning_rate": 2.4574026089519985e-05, "loss": 0.0272, "step": 41180 }, { "epoch": 11.138453217955652, "grad_norm": 0.13521850109100342, "learning_rate": 2.4550301269409333e-05, "loss": 0.028, "step": 41190 }, { "epoch": 11.141157382368847, "grad_norm": 0.11297928541898727, "learning_rate": 2.4526584180245216e-05, "loss": 0.0263, "step": 41200 }, { "epoch": 11.143861546782045, "grad_norm": 0.22476808726787567, "learning_rate": 2.4502874829232236e-05, "loss": 0.0284, "step": 41210 }, { "epoch": 11.14656571119524, "grad_norm": 0.20281897485256195, "learning_rate": 2.447917322357267e-05, "loss": 0.0272, "step": 41220 }, { "epoch": 11.149269875608438, "grad_norm": 0.19034641981124878, "learning_rate": 2.4455479370466443e-05, "loss": 0.0274, "step": 41230 }, { "epoch": 11.151974040021633, "grad_norm": 0.12467612326145172, "learning_rate": 2.4431793277111097e-05, "loss": 0.0257, "step": 41240 }, { "epoch": 11.15467820443483, "grad_norm": 0.17861294746398926, "learning_rate": 2.4408114950701905e-05, "loss": 0.0276, "step": 41250 }, { "epoch": 11.157382368848026, "grad_norm": 0.13833534717559814, "learning_rate": 2.4384444398431634e-05, "loss": 0.0275, "step": 41260 }, { "epoch": 11.160086533261222, "grad_norm": 0.13057047128677368, "learning_rate": 2.4360781627490837e-05, "loss": 0.0275, "step": 41270 }, { "epoch": 11.162790697674419, "grad_norm": 0.19670774042606354, "learning_rate": 2.433712664506762e-05, "loss": 0.0269, "step": 41280 }, { "epoch": 11.165494862087614, "grad_norm": 0.17311540246009827, "learning_rate": 2.431347945834774e-05, "loss": 0.0263, "step": 41290 }, { "epoch": 11.168199026500812, "grad_norm": 0.24249786138534546, "learning_rate": 2.428984007451458e-05, "loss": 0.0269, "step": 41300 }, { "epoch": 11.170903190914007, "grad_norm": 0.21688371896743774, "learning_rate": 2.426620850074917e-05, "loss": 0.0278, "step": 41310 }, { "epoch": 11.173607355327205, "grad_norm": 0.15194526314735413, "learning_rate": 2.424258474423014e-05, "loss": 0.0278, "step": 41320 }, { "epoch": 11.1763115197404, "grad_norm": 0.25357913970947266, "learning_rate": 2.421896881213382e-05, "loss": 0.0273, "step": 41330 }, { "epoch": 11.179015684153596, "grad_norm": 0.13720828294754028, "learning_rate": 2.419536071163402e-05, "loss": 0.0259, "step": 41340 }, { "epoch": 11.181719848566793, "grad_norm": 0.13729692995548248, "learning_rate": 2.417176044990233e-05, "loss": 0.0289, "step": 41350 }, { "epoch": 11.184424012979989, "grad_norm": 0.171853706240654, "learning_rate": 2.4148168034107855e-05, "loss": 0.0266, "step": 41360 }, { "epoch": 11.187128177393186, "grad_norm": 0.1567407250404358, "learning_rate": 2.4124583471417355e-05, "loss": 0.0267, "step": 41370 }, { "epoch": 11.189832341806381, "grad_norm": 0.2814862132072449, "learning_rate": 2.41010067689952e-05, "loss": 0.0282, "step": 41380 }, { "epoch": 11.192536506219579, "grad_norm": 0.12822458148002625, "learning_rate": 2.4077437934003338e-05, "loss": 0.0258, "step": 41390 }, { "epoch": 11.195240670632774, "grad_norm": 0.1462738960981369, "learning_rate": 2.405387697360143e-05, "loss": 0.0269, "step": 41400 }, { "epoch": 11.197944835045972, "grad_norm": 0.1474475860595703, "learning_rate": 2.4030323894946595e-05, "loss": 0.0286, "step": 41410 }, { "epoch": 11.200648999459167, "grad_norm": 0.18849655985832214, "learning_rate": 2.40067787051937e-05, "loss": 0.0275, "step": 41420 }, { "epoch": 11.203353163872363, "grad_norm": 0.32191115617752075, "learning_rate": 2.3983241411495087e-05, "loss": 0.0265, "step": 41430 }, { "epoch": 11.20605732828556, "grad_norm": 0.11078964173793793, "learning_rate": 2.3959712021000823e-05, "loss": 0.0269, "step": 41440 }, { "epoch": 11.208761492698756, "grad_norm": 0.13153217732906342, "learning_rate": 2.3936190540858495e-05, "loss": 0.029, "step": 41450 }, { "epoch": 11.211465657111953, "grad_norm": 0.1851566582918167, "learning_rate": 2.39126769782133e-05, "loss": 0.0264, "step": 41460 }, { "epoch": 11.214169821525148, "grad_norm": 0.15524806082248688, "learning_rate": 2.388917134020805e-05, "loss": 0.0279, "step": 41470 }, { "epoch": 11.216873985938346, "grad_norm": 0.16023433208465576, "learning_rate": 2.3865673633983128e-05, "loss": 0.0275, "step": 41480 }, { "epoch": 11.219578150351541, "grad_norm": 0.14469458162784576, "learning_rate": 2.3842183866676492e-05, "loss": 0.0263, "step": 41490 }, { "epoch": 11.222282314764737, "grad_norm": 0.17794497311115265, "learning_rate": 2.381870204542377e-05, "loss": 0.028, "step": 41500 }, { "epoch": 11.224986479177934, "grad_norm": 0.095559261739254, "learning_rate": 2.379522817735808e-05, "loss": 0.0265, "step": 41510 }, { "epoch": 11.22769064359113, "grad_norm": 0.17754197120666504, "learning_rate": 2.377176226961018e-05, "loss": 0.026, "step": 41520 }, { "epoch": 11.230394808004327, "grad_norm": 0.21884289383888245, "learning_rate": 2.3748304329308384e-05, "loss": 0.0262, "step": 41530 }, { "epoch": 11.233098972417523, "grad_norm": 0.1427551954984665, "learning_rate": 2.372485436357858e-05, "loss": 0.0269, "step": 41540 }, { "epoch": 11.23580313683072, "grad_norm": 0.18019776046276093, "learning_rate": 2.3701412379544296e-05, "loss": 0.0271, "step": 41550 }, { "epoch": 11.238507301243915, "grad_norm": 0.17838959395885468, "learning_rate": 2.367797838432653e-05, "loss": 0.0272, "step": 41560 }, { "epoch": 11.241211465657113, "grad_norm": 0.2303524762392044, "learning_rate": 2.3654552385043967e-05, "loss": 0.0269, "step": 41570 }, { "epoch": 11.243915630070308, "grad_norm": 0.15954187512397766, "learning_rate": 2.3631134388812742e-05, "loss": 0.0267, "step": 41580 }, { "epoch": 11.246619794483504, "grad_norm": 0.23380544781684875, "learning_rate": 2.3607724402746684e-05, "loss": 0.0271, "step": 41590 }, { "epoch": 11.249323958896701, "grad_norm": 0.11192372441291809, "learning_rate": 2.35843224339571e-05, "loss": 0.0266, "step": 41600 }, { "epoch": 11.252028123309897, "grad_norm": 0.13821105659008026, "learning_rate": 2.3560928489552897e-05, "loss": 0.0269, "step": 41610 }, { "epoch": 11.254732287723094, "grad_norm": 0.11415357887744904, "learning_rate": 2.353754257664053e-05, "loss": 0.0275, "step": 41620 }, { "epoch": 11.25743645213629, "grad_norm": 0.19408251345157623, "learning_rate": 2.3514164702324037e-05, "loss": 0.0272, "step": 41630 }, { "epoch": 11.260140616549487, "grad_norm": 0.18681807816028595, "learning_rate": 2.3490794873704963e-05, "loss": 0.0267, "step": 41640 }, { "epoch": 11.262844780962682, "grad_norm": 0.2234499454498291, "learning_rate": 2.3467433097882496e-05, "loss": 0.0266, "step": 41650 }, { "epoch": 11.26554894537588, "grad_norm": 0.183963805437088, "learning_rate": 2.34440793819533e-05, "loss": 0.0283, "step": 41660 }, { "epoch": 11.268253109789075, "grad_norm": 0.1456652730703354, "learning_rate": 2.3420733733011617e-05, "loss": 0.0265, "step": 41670 }, { "epoch": 11.27095727420227, "grad_norm": 0.11042780429124832, "learning_rate": 2.3397396158149243e-05, "loss": 0.027, "step": 41680 }, { "epoch": 11.273661438615468, "grad_norm": 0.17052686214447021, "learning_rate": 2.3374066664455498e-05, "loss": 0.0271, "step": 41690 }, { "epoch": 11.276365603028664, "grad_norm": 0.15941070020198822, "learning_rate": 2.3350745259017315e-05, "loss": 0.0275, "step": 41700 }, { "epoch": 11.279069767441861, "grad_norm": 0.12061937898397446, "learning_rate": 2.332743194891906e-05, "loss": 0.0259, "step": 41710 }, { "epoch": 11.281773931855057, "grad_norm": 0.11285697668790817, "learning_rate": 2.330412674124276e-05, "loss": 0.0261, "step": 41720 }, { "epoch": 11.284478096268254, "grad_norm": 0.13975511491298676, "learning_rate": 2.328082964306786e-05, "loss": 0.027, "step": 41730 }, { "epoch": 11.28718226068145, "grad_norm": 0.14781229197978973, "learning_rate": 2.325754066147145e-05, "loss": 0.0263, "step": 41740 }, { "epoch": 11.289886425094645, "grad_norm": 0.2174004316329956, "learning_rate": 2.32342598035281e-05, "loss": 0.0263, "step": 41750 }, { "epoch": 11.292590589507842, "grad_norm": 0.10014303028583527, "learning_rate": 2.321098707630991e-05, "loss": 0.027, "step": 41760 }, { "epoch": 11.295294753921038, "grad_norm": 0.15877775847911835, "learning_rate": 2.318772248688652e-05, "loss": 0.0268, "step": 41770 }, { "epoch": 11.297998918334235, "grad_norm": 0.12467797845602036, "learning_rate": 2.3164466042325107e-05, "loss": 0.0272, "step": 41780 }, { "epoch": 11.30070308274743, "grad_norm": 0.09618815034627914, "learning_rate": 2.3141217749690353e-05, "loss": 0.0262, "step": 41790 }, { "epoch": 11.303407247160628, "grad_norm": 0.11265698820352554, "learning_rate": 2.3117977616044466e-05, "loss": 0.0272, "step": 41800 }, { "epoch": 11.306111411573823, "grad_norm": 0.13081277906894684, "learning_rate": 2.309474564844722e-05, "loss": 0.0256, "step": 41810 }, { "epoch": 11.30881557598702, "grad_norm": 0.17395828664302826, "learning_rate": 2.307152185395585e-05, "loss": 0.0267, "step": 41820 }, { "epoch": 11.311519740400216, "grad_norm": 0.19471438229084015, "learning_rate": 2.3048306239625144e-05, "loss": 0.027, "step": 41830 }, { "epoch": 11.314223904813412, "grad_norm": 0.14646753668785095, "learning_rate": 2.3025098812507378e-05, "loss": 0.027, "step": 41840 }, { "epoch": 11.31692806922661, "grad_norm": 0.19278091192245483, "learning_rate": 2.3001899579652366e-05, "loss": 0.0265, "step": 41850 }, { "epoch": 11.319632233639805, "grad_norm": 0.16548891365528107, "learning_rate": 2.2978708548107393e-05, "loss": 0.0282, "step": 41860 }, { "epoch": 11.322336398053002, "grad_norm": 0.13508448004722595, "learning_rate": 2.2955525724917348e-05, "loss": 0.0258, "step": 41870 }, { "epoch": 11.325040562466198, "grad_norm": 0.3292543292045593, "learning_rate": 2.2932351117124477e-05, "loss": 0.0265, "step": 41880 }, { "epoch": 11.327744726879395, "grad_norm": 0.13498280942440033, "learning_rate": 2.29091847317687e-05, "loss": 0.0274, "step": 41890 }, { "epoch": 11.33044889129259, "grad_norm": 0.273384690284729, "learning_rate": 2.2886026575887277e-05, "loss": 0.027, "step": 41900 }, { "epoch": 11.333153055705786, "grad_norm": 0.14118444919586182, "learning_rate": 2.2862876656515094e-05, "loss": 0.0261, "step": 41910 }, { "epoch": 11.335857220118983, "grad_norm": 0.14946478605270386, "learning_rate": 2.2839734980684464e-05, "loss": 0.0257, "step": 41920 }, { "epoch": 11.338561384532179, "grad_norm": 0.11813041567802429, "learning_rate": 2.281660155542522e-05, "loss": 0.0259, "step": 41930 }, { "epoch": 11.341265548945376, "grad_norm": 0.20179596543312073, "learning_rate": 2.279347638776469e-05, "loss": 0.0275, "step": 41940 }, { "epoch": 11.343969713358572, "grad_norm": 0.12786133587360382, "learning_rate": 2.2770359484727665e-05, "loss": 0.0269, "step": 41950 }, { "epoch": 11.346673877771769, "grad_norm": 0.19485484063625336, "learning_rate": 2.27472508533365e-05, "loss": 0.0252, "step": 41960 }, { "epoch": 11.349378042184965, "grad_norm": 0.11759892106056213, "learning_rate": 2.2724150500610948e-05, "loss": 0.0269, "step": 41970 }, { "epoch": 11.352082206598162, "grad_norm": 0.20482775568962097, "learning_rate": 2.2701058433568302e-05, "loss": 0.0267, "step": 41980 }, { "epoch": 11.354786371011357, "grad_norm": 0.13376697897911072, "learning_rate": 2.2677974659223318e-05, "loss": 0.0267, "step": 41990 }, { "epoch": 11.357490535424553, "grad_norm": 0.13847757875919342, "learning_rate": 2.2654899184588235e-05, "loss": 0.0267, "step": 42000 }, { "epoch": 11.36019469983775, "grad_norm": 0.12091639637947083, "learning_rate": 2.2631832016672756e-05, "loss": 0.027, "step": 42010 }, { "epoch": 11.362898864250946, "grad_norm": 0.14756537973880768, "learning_rate": 2.2608773162484127e-05, "loss": 0.0269, "step": 42020 }, { "epoch": 11.365603028664143, "grad_norm": 0.12992192804813385, "learning_rate": 2.2585722629026958e-05, "loss": 0.0278, "step": 42030 }, { "epoch": 11.368307193077339, "grad_norm": 0.2620103657245636, "learning_rate": 2.2562680423303457e-05, "loss": 0.0263, "step": 42040 }, { "epoch": 11.371011357490536, "grad_norm": 0.14664120972156525, "learning_rate": 2.2539646552313165e-05, "loss": 0.0259, "step": 42050 }, { "epoch": 11.373715521903732, "grad_norm": 0.11725308001041412, "learning_rate": 2.251662102305322e-05, "loss": 0.0264, "step": 42060 }, { "epoch": 11.376419686316929, "grad_norm": 0.12937824428081512, "learning_rate": 2.2493603842518152e-05, "loss": 0.0267, "step": 42070 }, { "epoch": 11.379123850730124, "grad_norm": 0.11601158231496811, "learning_rate": 2.2470595017699974e-05, "loss": 0.0265, "step": 42080 }, { "epoch": 11.38182801514332, "grad_norm": 0.10889741033315659, "learning_rate": 2.244759455558816e-05, "loss": 0.027, "step": 42090 }, { "epoch": 11.384532179556517, "grad_norm": 0.13536308705806732, "learning_rate": 2.2424602463169614e-05, "loss": 0.0272, "step": 42100 }, { "epoch": 11.387236343969713, "grad_norm": 0.1307002604007721, "learning_rate": 2.2401618747428776e-05, "loss": 0.0273, "step": 42110 }, { "epoch": 11.38994050838291, "grad_norm": 0.10063367336988449, "learning_rate": 2.237864341534747e-05, "loss": 0.0255, "step": 42120 }, { "epoch": 11.392644672796106, "grad_norm": 0.27206480503082275, "learning_rate": 2.2355676473904998e-05, "loss": 0.0271, "step": 42130 }, { "epoch": 11.395348837209303, "grad_norm": 0.22661980986595154, "learning_rate": 2.2332717930078108e-05, "loss": 0.0255, "step": 42140 }, { "epoch": 11.398053001622499, "grad_norm": 0.23394951224327087, "learning_rate": 2.2309767790840992e-05, "loss": 0.026, "step": 42150 }, { "epoch": 11.400757166035694, "grad_norm": 0.20311377942562103, "learning_rate": 2.228682606316529e-05, "loss": 0.0276, "step": 42160 }, { "epoch": 11.403461330448891, "grad_norm": 0.18005958199501038, "learning_rate": 2.2263892754020138e-05, "loss": 0.0268, "step": 42170 }, { "epoch": 11.406165494862087, "grad_norm": 0.12803801894187927, "learning_rate": 2.2240967870372004e-05, "loss": 0.0264, "step": 42180 }, { "epoch": 11.408869659275284, "grad_norm": 0.16740557551383972, "learning_rate": 2.2218051419184933e-05, "loss": 0.0267, "step": 42190 }, { "epoch": 11.41157382368848, "grad_norm": 0.11169042438268661, "learning_rate": 2.219514340742026e-05, "loss": 0.0251, "step": 42200 }, { "epoch": 11.414277988101677, "grad_norm": 0.19255781173706055, "learning_rate": 2.2172243842036898e-05, "loss": 0.0255, "step": 42210 }, { "epoch": 11.416982152514873, "grad_norm": 0.21620263159275055, "learning_rate": 2.2149352729991107e-05, "loss": 0.0274, "step": 42220 }, { "epoch": 11.41968631692807, "grad_norm": 0.20818667113780975, "learning_rate": 2.2126470078236605e-05, "loss": 0.0269, "step": 42230 }, { "epoch": 11.422390481341266, "grad_norm": 0.13056042790412903, "learning_rate": 2.2103595893724533e-05, "loss": 0.0265, "step": 42240 }, { "epoch": 11.425094645754461, "grad_norm": 0.12434615939855576, "learning_rate": 2.208073018340345e-05, "loss": 0.0258, "step": 42250 }, { "epoch": 11.427798810167658, "grad_norm": 0.12931481003761292, "learning_rate": 2.2057872954219405e-05, "loss": 0.026, "step": 42260 }, { "epoch": 11.430502974580854, "grad_norm": 0.18503621220588684, "learning_rate": 2.203502421311575e-05, "loss": 0.0265, "step": 42270 }, { "epoch": 11.433207138994051, "grad_norm": 0.15806567668914795, "learning_rate": 2.2012183967033388e-05, "loss": 0.0242, "step": 42280 }, { "epoch": 11.435911303407247, "grad_norm": 0.17482861876487732, "learning_rate": 2.198935222291056e-05, "loss": 0.0264, "step": 42290 }, { "epoch": 11.438615467820444, "grad_norm": 0.12196243554353714, "learning_rate": 2.1966528987682948e-05, "loss": 0.0263, "step": 42300 }, { "epoch": 11.44131963223364, "grad_norm": 0.10003694891929626, "learning_rate": 2.194371426828365e-05, "loss": 0.0264, "step": 42310 }, { "epoch": 11.444023796646835, "grad_norm": 0.2203061580657959, "learning_rate": 2.192090807164317e-05, "loss": 0.0263, "step": 42320 }, { "epoch": 11.446727961060033, "grad_norm": 0.1319885104894638, "learning_rate": 2.1898110404689422e-05, "loss": 0.0282, "step": 42330 }, { "epoch": 11.449432125473228, "grad_norm": 0.12733514606952667, "learning_rate": 2.1875321274347776e-05, "loss": 0.0259, "step": 42340 }, { "epoch": 11.452136289886425, "grad_norm": 0.1509067267179489, "learning_rate": 2.18525406875409e-05, "loss": 0.026, "step": 42350 }, { "epoch": 11.454840454299621, "grad_norm": 0.28364163637161255, "learning_rate": 2.1829768651188997e-05, "loss": 0.027, "step": 42360 }, { "epoch": 11.457544618712818, "grad_norm": 0.14152930676937103, "learning_rate": 2.180700517220958e-05, "loss": 0.0272, "step": 42370 }, { "epoch": 11.460248783126014, "grad_norm": 0.1966240257024765, "learning_rate": 2.1784250257517603e-05, "loss": 0.026, "step": 42380 }, { "epoch": 11.462952947539211, "grad_norm": 0.21432267129421234, "learning_rate": 2.1761503914025406e-05, "loss": 0.0267, "step": 42390 }, { "epoch": 11.465657111952407, "grad_norm": 0.1414254605770111, "learning_rate": 2.1738766148642705e-05, "loss": 0.0268, "step": 42400 }, { "epoch": 11.468361276365602, "grad_norm": 0.17236405611038208, "learning_rate": 2.1716036968276683e-05, "loss": 0.026, "step": 42410 }, { "epoch": 11.4710654407788, "grad_norm": 0.2616153061389923, "learning_rate": 2.1693316379831808e-05, "loss": 0.027, "step": 42420 }, { "epoch": 11.473769605191995, "grad_norm": 0.1554524302482605, "learning_rate": 2.1670604390210037e-05, "loss": 0.0269, "step": 42430 }, { "epoch": 11.476473769605192, "grad_norm": 0.13916246592998505, "learning_rate": 2.1647901006310656e-05, "loss": 0.0258, "step": 42440 }, { "epoch": 11.479177934018388, "grad_norm": 0.16876699030399323, "learning_rate": 2.1625206235030353e-05, "loss": 0.0264, "step": 42450 }, { "epoch": 11.481882098431585, "grad_norm": 0.12048454582691193, "learning_rate": 2.160252008326321e-05, "loss": 0.0263, "step": 42460 }, { "epoch": 11.48458626284478, "grad_norm": 0.12490510940551758, "learning_rate": 2.157984255790067e-05, "loss": 0.0259, "step": 42470 }, { "epoch": 11.487290427257978, "grad_norm": 0.10140255838632584, "learning_rate": 2.1557173665831553e-05, "loss": 0.0261, "step": 42480 }, { "epoch": 11.489994591671174, "grad_norm": 0.2106204777956009, "learning_rate": 2.153451341394212e-05, "loss": 0.0262, "step": 42490 }, { "epoch": 11.49269875608437, "grad_norm": 0.20162580907344818, "learning_rate": 2.151186180911589e-05, "loss": 0.0264, "step": 42500 }, { "epoch": 11.495402920497567, "grad_norm": 0.11792104691267014, "learning_rate": 2.1489218858233877e-05, "loss": 0.0268, "step": 42510 }, { "epoch": 11.498107084910762, "grad_norm": 0.1202525645494461, "learning_rate": 2.1466584568174392e-05, "loss": 0.0282, "step": 42520 }, { "epoch": 11.50081124932396, "grad_norm": 0.21212251484394073, "learning_rate": 2.1443958945813132e-05, "loss": 0.0262, "step": 42530 }, { "epoch": 11.503515413737155, "grad_norm": 0.3051365911960602, "learning_rate": 2.1421341998023163e-05, "loss": 0.0269, "step": 42540 }, { "epoch": 11.506219578150352, "grad_norm": 0.11937687546014786, "learning_rate": 2.139873373167491e-05, "loss": 0.0259, "step": 42550 }, { "epoch": 11.508923742563548, "grad_norm": 0.15262183547019958, "learning_rate": 2.13761341536362e-05, "loss": 0.0255, "step": 42560 }, { "epoch": 11.511627906976745, "grad_norm": 0.12312335520982742, "learning_rate": 2.1353543270772136e-05, "loss": 0.0269, "step": 42570 }, { "epoch": 11.51433207138994, "grad_norm": 0.13924559950828552, "learning_rate": 2.1330961089945297e-05, "loss": 0.0263, "step": 42580 }, { "epoch": 11.517036235803136, "grad_norm": 0.2652166187763214, "learning_rate": 2.130838761801548e-05, "loss": 0.0261, "step": 42590 }, { "epoch": 11.519740400216333, "grad_norm": 0.13555502891540527, "learning_rate": 2.1285822861839966e-05, "loss": 0.0262, "step": 42600 }, { "epoch": 11.522444564629529, "grad_norm": 0.12320361286401749, "learning_rate": 2.126326682827331e-05, "loss": 0.0256, "step": 42610 }, { "epoch": 11.525148729042726, "grad_norm": 0.14092274010181427, "learning_rate": 2.124071952416744e-05, "loss": 0.0255, "step": 42620 }, { "epoch": 11.527852893455922, "grad_norm": 0.1397915631532669, "learning_rate": 2.1218180956371634e-05, "loss": 0.0266, "step": 42630 }, { "epoch": 11.53055705786912, "grad_norm": 0.1330452561378479, "learning_rate": 2.119565113173252e-05, "loss": 0.0269, "step": 42640 }, { "epoch": 11.533261222282315, "grad_norm": 0.1413412094116211, "learning_rate": 2.1173130057094033e-05, "loss": 0.0268, "step": 42650 }, { "epoch": 11.53596538669551, "grad_norm": 0.15477962791919708, "learning_rate": 2.115061773929753e-05, "loss": 0.0262, "step": 42660 }, { "epoch": 11.538669551108708, "grad_norm": 0.14233557879924774, "learning_rate": 2.1128114185181623e-05, "loss": 0.0271, "step": 42670 }, { "epoch": 11.541373715521903, "grad_norm": 0.1666705161333084, "learning_rate": 2.1105619401582317e-05, "loss": 0.0269, "step": 42680 }, { "epoch": 11.5440778799351, "grad_norm": 0.20698873698711395, "learning_rate": 2.1083133395332928e-05, "loss": 0.0262, "step": 42690 }, { "epoch": 11.546782044348296, "grad_norm": 0.11258043348789215, "learning_rate": 2.1060656173264082e-05, "loss": 0.0256, "step": 42700 }, { "epoch": 11.549486208761493, "grad_norm": 0.10535039007663727, "learning_rate": 2.103818774220383e-05, "loss": 0.0261, "step": 42710 }, { "epoch": 11.552190373174689, "grad_norm": 0.11978019773960114, "learning_rate": 2.1015728108977412e-05, "loss": 0.0254, "step": 42720 }, { "epoch": 11.554894537587884, "grad_norm": 0.10414653271436691, "learning_rate": 2.0993277280407548e-05, "loss": 0.028, "step": 42730 }, { "epoch": 11.557598702001082, "grad_norm": 0.1467093974351883, "learning_rate": 2.0970835263314132e-05, "loss": 0.0256, "step": 42740 }, { "epoch": 11.560302866414277, "grad_norm": 0.18277224898338318, "learning_rate": 2.094840206451451e-05, "loss": 0.0282, "step": 42750 }, { "epoch": 11.563007030827475, "grad_norm": 0.24919840693473816, "learning_rate": 2.0925977690823273e-05, "loss": 0.0259, "step": 42760 }, { "epoch": 11.56571119524067, "grad_norm": 0.13953405618667603, "learning_rate": 2.0903562149052364e-05, "loss": 0.0259, "step": 42770 }, { "epoch": 11.568415359653867, "grad_norm": 0.10928257554769516, "learning_rate": 2.0881155446011025e-05, "loss": 0.0253, "step": 42780 }, { "epoch": 11.571119524067063, "grad_norm": 0.13288156688213348, "learning_rate": 2.0858757588505823e-05, "loss": 0.0273, "step": 42790 }, { "epoch": 11.57382368848026, "grad_norm": 0.1123817190527916, "learning_rate": 2.0836368583340622e-05, "loss": 0.0253, "step": 42800 }, { "epoch": 11.576527852893456, "grad_norm": 0.12000957876443863, "learning_rate": 2.081398843731664e-05, "loss": 0.0271, "step": 42810 }, { "epoch": 11.579232017306651, "grad_norm": 0.13960479199886322, "learning_rate": 2.0791617157232357e-05, "loss": 0.026, "step": 42820 }, { "epoch": 11.581936181719849, "grad_norm": 0.16273024678230286, "learning_rate": 2.0769254749883576e-05, "loss": 0.0262, "step": 42830 }, { "epoch": 11.584640346133044, "grad_norm": 0.11944214254617691, "learning_rate": 2.0746901222063415e-05, "loss": 0.0254, "step": 42840 }, { "epoch": 11.587344510546242, "grad_norm": 0.15318594872951508, "learning_rate": 2.072455658056226e-05, "loss": 0.0254, "step": 42850 }, { "epoch": 11.590048674959437, "grad_norm": 0.17683100700378418, "learning_rate": 2.0702220832167873e-05, "loss": 0.0264, "step": 42860 }, { "epoch": 11.592752839372634, "grad_norm": 0.14607927203178406, "learning_rate": 2.0679893983665205e-05, "loss": 0.0267, "step": 42870 }, { "epoch": 11.59545700378583, "grad_norm": 0.11390654742717743, "learning_rate": 2.0657576041836622e-05, "loss": 0.0259, "step": 42880 }, { "epoch": 11.598161168199027, "grad_norm": 0.1369391232728958, "learning_rate": 2.0635267013461666e-05, "loss": 0.0249, "step": 42890 }, { "epoch": 11.600865332612223, "grad_norm": 0.10085293650627136, "learning_rate": 2.061296690531728e-05, "loss": 0.0267, "step": 42900 }, { "epoch": 11.603569497025418, "grad_norm": 0.1772993803024292, "learning_rate": 2.0590675724177622e-05, "loss": 0.0273, "step": 42910 }, { "epoch": 11.606273661438616, "grad_norm": 0.3007688820362091, "learning_rate": 2.0568393476814167e-05, "loss": 0.0262, "step": 42920 }, { "epoch": 11.608977825851811, "grad_norm": 0.14411163330078125, "learning_rate": 2.0546120169995685e-05, "loss": 0.0252, "step": 42930 }, { "epoch": 11.611681990265009, "grad_norm": 0.1879444271326065, "learning_rate": 2.0523855810488214e-05, "loss": 0.0251, "step": 42940 }, { "epoch": 11.614386154678204, "grad_norm": 0.09900869429111481, "learning_rate": 2.050160040505505e-05, "loss": 0.0258, "step": 42950 }, { "epoch": 11.617090319091401, "grad_norm": 0.2970132827758789, "learning_rate": 2.0479353960456843e-05, "loss": 0.0255, "step": 42960 }, { "epoch": 11.619794483504597, "grad_norm": 0.12066789716482162, "learning_rate": 2.0457116483451456e-05, "loss": 0.0262, "step": 42970 }, { "epoch": 11.622498647917794, "grad_norm": 0.11995125561952591, "learning_rate": 2.0434887980794043e-05, "loss": 0.026, "step": 42980 }, { "epoch": 11.62520281233099, "grad_norm": 0.11151596903800964, "learning_rate": 2.0412668459237043e-05, "loss": 0.0254, "step": 42990 }, { "epoch": 11.627906976744185, "grad_norm": 0.2367328256368637, "learning_rate": 2.039045792553016e-05, "loss": 0.0269, "step": 43000 }, { "epoch": 11.630611141157383, "grad_norm": 0.16087578237056732, "learning_rate": 2.036825638642036e-05, "loss": 0.0257, "step": 43010 }, { "epoch": 11.633315305570578, "grad_norm": 0.2947615385055542, "learning_rate": 2.0346063848651868e-05, "loss": 0.0255, "step": 43020 }, { "epoch": 11.636019469983776, "grad_norm": 0.13263767957687378, "learning_rate": 2.0323880318966254e-05, "loss": 0.0263, "step": 43030 }, { "epoch": 11.638723634396971, "grad_norm": 0.11506019532680511, "learning_rate": 2.030170580410221e-05, "loss": 0.0263, "step": 43040 }, { "epoch": 11.641427798810168, "grad_norm": 0.27099767327308655, "learning_rate": 2.0279540310795837e-05, "loss": 0.0273, "step": 43050 }, { "epoch": 11.644131963223364, "grad_norm": 0.09605835378170013, "learning_rate": 2.0257383845780365e-05, "loss": 0.0265, "step": 43060 }, { "epoch": 11.64683612763656, "grad_norm": 0.15277284383773804, "learning_rate": 2.0235236415786384e-05, "loss": 0.0258, "step": 43070 }, { "epoch": 11.649540292049757, "grad_norm": 0.19465507566928864, "learning_rate": 2.021309802754169e-05, "loss": 0.0255, "step": 43080 }, { "epoch": 11.652244456462952, "grad_norm": 0.13562022149562836, "learning_rate": 2.0190968687771332e-05, "loss": 0.0265, "step": 43090 }, { "epoch": 11.65494862087615, "grad_norm": 0.1377398520708084, "learning_rate": 2.016884840319763e-05, "loss": 0.0256, "step": 43100 }, { "epoch": 11.657652785289345, "grad_norm": 0.175448477268219, "learning_rate": 2.0146737180540122e-05, "loss": 0.0254, "step": 43110 }, { "epoch": 11.660356949702543, "grad_norm": 0.2011691927909851, "learning_rate": 2.012463502651564e-05, "loss": 0.0263, "step": 43120 }, { "epoch": 11.663061114115738, "grad_norm": 0.18862669169902802, "learning_rate": 2.0102541947838228e-05, "loss": 0.026, "step": 43130 }, { "epoch": 11.665765278528934, "grad_norm": 0.09929509460926056, "learning_rate": 2.0080457951219173e-05, "loss": 0.0263, "step": 43140 }, { "epoch": 11.668469442942131, "grad_norm": 0.08382528275251389, "learning_rate": 2.0058383043367017e-05, "loss": 0.0246, "step": 43150 }, { "epoch": 11.671173607355326, "grad_norm": 0.23258629441261292, "learning_rate": 2.0036317230987528e-05, "loss": 0.0254, "step": 43160 }, { "epoch": 11.673877771768524, "grad_norm": 0.2757699489593506, "learning_rate": 2.0014260520783696e-05, "loss": 0.0266, "step": 43170 }, { "epoch": 11.67658193618172, "grad_norm": 0.17738938331604004, "learning_rate": 1.9992212919455834e-05, "loss": 0.0273, "step": 43180 }, { "epoch": 11.679286100594917, "grad_norm": 0.1112489327788353, "learning_rate": 1.9970174433701333e-05, "loss": 0.0263, "step": 43190 }, { "epoch": 11.681990265008112, "grad_norm": 0.10767067223787308, "learning_rate": 1.9948145070214992e-05, "loss": 0.0249, "step": 43200 }, { "epoch": 11.68469442942131, "grad_norm": 0.18463090062141418, "learning_rate": 1.9926124835688663e-05, "loss": 0.0255, "step": 43210 }, { "epoch": 11.687398593834505, "grad_norm": 0.1613895297050476, "learning_rate": 1.9904113736811576e-05, "loss": 0.0264, "step": 43220 }, { "epoch": 11.6901027582477, "grad_norm": 0.12809956073760986, "learning_rate": 1.9882111780270096e-05, "loss": 0.026, "step": 43230 }, { "epoch": 11.692806922660898, "grad_norm": 0.10391022264957428, "learning_rate": 1.986011897274784e-05, "loss": 0.0284, "step": 43240 }, { "epoch": 11.695511087074093, "grad_norm": 0.10036378353834152, "learning_rate": 1.983813532092565e-05, "loss": 0.0252, "step": 43250 }, { "epoch": 11.69821525148729, "grad_norm": 0.12558914721012115, "learning_rate": 1.981616083148155e-05, "loss": 0.0265, "step": 43260 }, { "epoch": 11.700919415900486, "grad_norm": 0.12886157631874084, "learning_rate": 1.9794195511090845e-05, "loss": 0.027, "step": 43270 }, { "epoch": 11.703623580313684, "grad_norm": 0.1540975719690323, "learning_rate": 1.977223936642601e-05, "loss": 0.0266, "step": 43280 }, { "epoch": 11.70632774472688, "grad_norm": 0.15094877779483795, "learning_rate": 1.975029240415674e-05, "loss": 0.0273, "step": 43290 }, { "epoch": 11.709031909140077, "grad_norm": 0.1959892362356186, "learning_rate": 1.9728354630949936e-05, "loss": 0.0266, "step": 43300 }, { "epoch": 11.711736073553272, "grad_norm": 0.20276948809623718, "learning_rate": 1.9706426053469716e-05, "loss": 0.026, "step": 43310 }, { "epoch": 11.714440237966468, "grad_norm": 0.16242939233779907, "learning_rate": 1.9684506678377396e-05, "loss": 0.0257, "step": 43320 }, { "epoch": 11.717144402379665, "grad_norm": 0.2390841245651245, "learning_rate": 1.9662596512331544e-05, "loss": 0.0265, "step": 43330 }, { "epoch": 11.71984856679286, "grad_norm": 0.15297271311283112, "learning_rate": 1.964069556198782e-05, "loss": 0.0249, "step": 43340 }, { "epoch": 11.722552731206058, "grad_norm": 0.24932342767715454, "learning_rate": 1.9618803833999232e-05, "loss": 0.026, "step": 43350 }, { "epoch": 11.725256895619253, "grad_norm": 0.18044117093086243, "learning_rate": 1.9596921335015838e-05, "loss": 0.0257, "step": 43360 }, { "epoch": 11.72796106003245, "grad_norm": 0.15563775599002838, "learning_rate": 1.957504807168501e-05, "loss": 0.0261, "step": 43370 }, { "epoch": 11.730665224445646, "grad_norm": 0.11221610754728317, "learning_rate": 1.9553184050651253e-05, "loss": 0.0256, "step": 43380 }, { "epoch": 11.733369388858844, "grad_norm": 0.17130570113658905, "learning_rate": 1.953132927855628e-05, "loss": 0.0266, "step": 43390 }, { "epoch": 11.736073553272039, "grad_norm": 0.13974614441394806, "learning_rate": 1.9509483762038995e-05, "loss": 0.0257, "step": 43400 }, { "epoch": 11.738777717685235, "grad_norm": 0.10388367623090744, "learning_rate": 1.9487647507735467e-05, "loss": 0.0268, "step": 43410 }, { "epoch": 11.741481882098432, "grad_norm": 0.17439936101436615, "learning_rate": 1.9465820522279032e-05, "loss": 0.0274, "step": 43420 }, { "epoch": 11.744186046511627, "grad_norm": 0.17317655682563782, "learning_rate": 1.9444002812300078e-05, "loss": 0.0256, "step": 43430 }, { "epoch": 11.746890210924825, "grad_norm": 0.1339481770992279, "learning_rate": 1.94221943844263e-05, "loss": 0.0263, "step": 43440 }, { "epoch": 11.74959437533802, "grad_norm": 0.13489241898059845, "learning_rate": 1.9400395245282515e-05, "loss": 0.0253, "step": 43450 }, { "epoch": 11.752298539751218, "grad_norm": 0.1444081813097, "learning_rate": 1.937860540149071e-05, "loss": 0.0266, "step": 43460 }, { "epoch": 11.755002704164413, "grad_norm": 0.13732732832431793, "learning_rate": 1.9356824859670082e-05, "loss": 0.0255, "step": 43470 }, { "epoch": 11.757706868577609, "grad_norm": 0.14829879999160767, "learning_rate": 1.9335053626436967e-05, "loss": 0.0253, "step": 43480 }, { "epoch": 11.760411032990806, "grad_norm": 0.1154860332608223, "learning_rate": 1.9313291708404885e-05, "loss": 0.025, "step": 43490 }, { "epoch": 11.763115197404002, "grad_norm": 0.15263479948043823, "learning_rate": 1.9291539112184587e-05, "loss": 0.0262, "step": 43500 }, { "epoch": 11.765819361817199, "grad_norm": 0.1309325098991394, "learning_rate": 1.9269795844383854e-05, "loss": 0.0264, "step": 43510 }, { "epoch": 11.768523526230394, "grad_norm": 0.13224317133426666, "learning_rate": 1.9248061911607777e-05, "loss": 0.0275, "step": 43520 }, { "epoch": 11.771227690643592, "grad_norm": 0.25385966897010803, "learning_rate": 1.9226337320458538e-05, "loss": 0.0252, "step": 43530 }, { "epoch": 11.773931855056787, "grad_norm": 0.13590417802333832, "learning_rate": 1.9204622077535488e-05, "loss": 0.0256, "step": 43540 }, { "epoch": 11.776636019469983, "grad_norm": 0.12183725833892822, "learning_rate": 1.9182916189435147e-05, "loss": 0.026, "step": 43550 }, { "epoch": 11.77934018388318, "grad_norm": 0.2192174643278122, "learning_rate": 1.916121966275117e-05, "loss": 0.0267, "step": 43560 }, { "epoch": 11.782044348296376, "grad_norm": 0.19096362590789795, "learning_rate": 1.9139532504074443e-05, "loss": 0.0249, "step": 43570 }, { "epoch": 11.784748512709573, "grad_norm": 0.2260545939207077, "learning_rate": 1.9117854719992885e-05, "loss": 0.0259, "step": 43580 }, { "epoch": 11.787452677122769, "grad_norm": 0.11772170662879944, "learning_rate": 1.9096186317091687e-05, "loss": 0.0266, "step": 43590 }, { "epoch": 11.790156841535966, "grad_norm": 0.2143365442752838, "learning_rate": 1.9074527301953116e-05, "loss": 0.0268, "step": 43600 }, { "epoch": 11.792861005949161, "grad_norm": 0.1706446409225464, "learning_rate": 1.9052877681156607e-05, "loss": 0.0258, "step": 43610 }, { "epoch": 11.795565170362359, "grad_norm": 0.13584278523921967, "learning_rate": 1.903123746127875e-05, "loss": 0.0264, "step": 43620 }, { "epoch": 11.798269334775554, "grad_norm": 0.19225984811782837, "learning_rate": 1.900960664889327e-05, "loss": 0.0265, "step": 43630 }, { "epoch": 11.80097349918875, "grad_norm": 0.11911196261644363, "learning_rate": 1.8987985250571015e-05, "loss": 0.0265, "step": 43640 }, { "epoch": 11.803677663601947, "grad_norm": 0.1195574626326561, "learning_rate": 1.8966373272880054e-05, "loss": 0.0259, "step": 43650 }, { "epoch": 11.806381828015143, "grad_norm": 0.1917126476764679, "learning_rate": 1.8944770722385462e-05, "loss": 0.0264, "step": 43660 }, { "epoch": 11.80908599242834, "grad_norm": 0.16300907731056213, "learning_rate": 1.8923177605649576e-05, "loss": 0.0264, "step": 43670 }, { "epoch": 11.811790156841536, "grad_norm": 0.4459216594696045, "learning_rate": 1.8901593929231802e-05, "loss": 0.0265, "step": 43680 }, { "epoch": 11.814494321254733, "grad_norm": 0.21125009655952454, "learning_rate": 1.8880019699688684e-05, "loss": 0.0261, "step": 43690 }, { "epoch": 11.817198485667928, "grad_norm": 0.14331771433353424, "learning_rate": 1.8858454923573904e-05, "loss": 0.0263, "step": 43700 }, { "epoch": 11.819902650081126, "grad_norm": 0.1209273636341095, "learning_rate": 1.8836899607438253e-05, "loss": 0.0263, "step": 43710 }, { "epoch": 11.822606814494321, "grad_norm": 0.3119834065437317, "learning_rate": 1.8815353757829723e-05, "loss": 0.0269, "step": 43720 }, { "epoch": 11.825310978907517, "grad_norm": 0.1554921418428421, "learning_rate": 1.879381738129331e-05, "loss": 0.0279, "step": 43730 }, { "epoch": 11.828015143320714, "grad_norm": 0.09636233747005463, "learning_rate": 1.8772290484371236e-05, "loss": 0.0263, "step": 43740 }, { "epoch": 11.83071930773391, "grad_norm": 0.13324809074401855, "learning_rate": 1.8750773073602795e-05, "loss": 0.0252, "step": 43750 }, { "epoch": 11.833423472147107, "grad_norm": 0.1931629776954651, "learning_rate": 1.8729265155524405e-05, "loss": 0.0272, "step": 43760 }, { "epoch": 11.836127636560303, "grad_norm": 0.19380252063274384, "learning_rate": 1.8707766736669607e-05, "loss": 0.0257, "step": 43770 }, { "epoch": 11.8388318009735, "grad_norm": 0.16914105415344238, "learning_rate": 1.8686277823569055e-05, "loss": 0.0266, "step": 43780 }, { "epoch": 11.841535965386695, "grad_norm": 0.1704787015914917, "learning_rate": 1.8664798422750484e-05, "loss": 0.0258, "step": 43790 }, { "epoch": 11.844240129799893, "grad_norm": 0.17340971529483795, "learning_rate": 1.8643328540738832e-05, "loss": 0.0277, "step": 43800 }, { "epoch": 11.846944294213088, "grad_norm": 0.14594635367393494, "learning_rate": 1.862186818405601e-05, "loss": 0.0254, "step": 43810 }, { "epoch": 11.849648458626284, "grad_norm": 0.14965610206127167, "learning_rate": 1.8600417359221156e-05, "loss": 0.0269, "step": 43820 }, { "epoch": 11.852352623039481, "grad_norm": 0.15943469107151031, "learning_rate": 1.8578976072750454e-05, "loss": 0.0272, "step": 43830 }, { "epoch": 11.855056787452677, "grad_norm": 0.1082923486828804, "learning_rate": 1.8557544331157194e-05, "loss": 0.0246, "step": 43840 }, { "epoch": 11.857760951865874, "grad_norm": 0.1291428506374359, "learning_rate": 1.8536122140951785e-05, "loss": 0.0258, "step": 43850 }, { "epoch": 11.86046511627907, "grad_norm": 0.13450132310390472, "learning_rate": 1.8514709508641688e-05, "loss": 0.0256, "step": 43860 }, { "epoch": 11.863169280692267, "grad_norm": 0.15929554402828217, "learning_rate": 1.8493306440731555e-05, "loss": 0.0253, "step": 43870 }, { "epoch": 11.865873445105462, "grad_norm": 0.11289139091968536, "learning_rate": 1.8471912943723013e-05, "loss": 0.0251, "step": 43880 }, { "epoch": 11.868577609518658, "grad_norm": 0.107899010181427, "learning_rate": 1.8450529024114894e-05, "loss": 0.0253, "step": 43890 }, { "epoch": 11.871281773931855, "grad_norm": 0.17089428007602692, "learning_rate": 1.842915468840301e-05, "loss": 0.0248, "step": 43900 }, { "epoch": 11.87398593834505, "grad_norm": 0.13994698226451874, "learning_rate": 1.840778994308037e-05, "loss": 0.0262, "step": 43910 }, { "epoch": 11.876690102758248, "grad_norm": 0.12978090345859528, "learning_rate": 1.8386434794637004e-05, "loss": 0.0249, "step": 43920 }, { "epoch": 11.879394267171444, "grad_norm": 0.10391809791326523, "learning_rate": 1.8365089249560034e-05, "loss": 0.0265, "step": 43930 }, { "epoch": 11.882098431584641, "grad_norm": 0.1493685245513916, "learning_rate": 1.8343753314333683e-05, "loss": 0.0257, "step": 43940 }, { "epoch": 11.884802595997837, "grad_norm": 0.12329995632171631, "learning_rate": 1.8322426995439236e-05, "loss": 0.0258, "step": 43950 }, { "epoch": 11.887506760411032, "grad_norm": 0.17834533751010895, "learning_rate": 1.8301110299355058e-05, "loss": 0.0249, "step": 43960 }, { "epoch": 11.89021092482423, "grad_norm": 0.1596478670835495, "learning_rate": 1.8279803232556625e-05, "loss": 0.0256, "step": 43970 }, { "epoch": 11.892915089237425, "grad_norm": 0.09956002980470657, "learning_rate": 1.8258505801516444e-05, "loss": 0.0258, "step": 43980 }, { "epoch": 11.895619253650622, "grad_norm": 0.15954618155956268, "learning_rate": 1.8237218012704117e-05, "loss": 0.0262, "step": 43990 }, { "epoch": 11.898323418063818, "grad_norm": 0.1854715347290039, "learning_rate": 1.821593987258631e-05, "loss": 0.0253, "step": 44000 }, { "epoch": 11.901027582477015, "grad_norm": 0.1317727118730545, "learning_rate": 1.8194671387626744e-05, "loss": 0.0252, "step": 44010 }, { "epoch": 11.90373174689021, "grad_norm": 0.16150450706481934, "learning_rate": 1.8173412564286276e-05, "loss": 0.0251, "step": 44020 }, { "epoch": 11.906435911303408, "grad_norm": 0.12199094891548157, "learning_rate": 1.8152163409022697e-05, "loss": 0.0263, "step": 44030 }, { "epoch": 11.909140075716603, "grad_norm": 0.15218986570835114, "learning_rate": 1.8130923928291023e-05, "loss": 0.0265, "step": 44040 }, { "epoch": 11.911844240129799, "grad_norm": 0.18540844321250916, "learning_rate": 1.8109694128543163e-05, "loss": 0.0267, "step": 44050 }, { "epoch": 11.914548404542996, "grad_norm": 0.14199897646903992, "learning_rate": 1.8088474016228237e-05, "loss": 0.0263, "step": 44060 }, { "epoch": 11.917252568956192, "grad_norm": 0.12155754119157791, "learning_rate": 1.8067263597792328e-05, "loss": 0.025, "step": 44070 }, { "epoch": 11.91995673336939, "grad_norm": 0.15032528340816498, "learning_rate": 1.80460628796786e-05, "loss": 0.0247, "step": 44080 }, { "epoch": 11.922660897782585, "grad_norm": 0.15353961288928986, "learning_rate": 1.8024871868327276e-05, "loss": 0.0251, "step": 44090 }, { "epoch": 11.925365062195782, "grad_norm": 0.16627071797847748, "learning_rate": 1.8003690570175608e-05, "loss": 0.0255, "step": 44100 }, { "epoch": 11.928069226608978, "grad_norm": 0.18862801790237427, "learning_rate": 1.7982518991657943e-05, "loss": 0.0257, "step": 44110 }, { "epoch": 11.930773391022175, "grad_norm": 0.4161832630634308, "learning_rate": 1.7961357139205643e-05, "loss": 0.0261, "step": 44120 }, { "epoch": 11.93347755543537, "grad_norm": 0.1764151006937027, "learning_rate": 1.7940205019247108e-05, "loss": 0.0257, "step": 44130 }, { "epoch": 11.936181719848566, "grad_norm": 0.16152212023735046, "learning_rate": 1.79190626382078e-05, "loss": 0.0257, "step": 44140 }, { "epoch": 11.938885884261763, "grad_norm": 0.13300316035747528, "learning_rate": 1.7897930002510215e-05, "loss": 0.0269, "step": 44150 }, { "epoch": 11.941590048674959, "grad_norm": 0.1061762347817421, "learning_rate": 1.787680711857387e-05, "loss": 0.0266, "step": 44160 }, { "epoch": 11.944294213088156, "grad_norm": 0.09759043157100677, "learning_rate": 1.7855693992815398e-05, "loss": 0.0259, "step": 44170 }, { "epoch": 11.946998377501352, "grad_norm": 0.21437811851501465, "learning_rate": 1.7834590631648328e-05, "loss": 0.0263, "step": 44180 }, { "epoch": 11.949702541914549, "grad_norm": 0.22135964035987854, "learning_rate": 1.7813497041483384e-05, "loss": 0.0257, "step": 44190 }, { "epoch": 11.952406706327745, "grad_norm": 0.10514333099126816, "learning_rate": 1.779241322872817e-05, "loss": 0.0267, "step": 44200 }, { "epoch": 11.955110870740942, "grad_norm": 0.12800216674804688, "learning_rate": 1.777133919978744e-05, "loss": 0.0255, "step": 44210 }, { "epoch": 11.957815035154137, "grad_norm": 0.14941544830799103, "learning_rate": 1.7750274961062912e-05, "loss": 0.0252, "step": 44220 }, { "epoch": 11.960519199567333, "grad_norm": 0.15414156019687653, "learning_rate": 1.772922051895335e-05, "loss": 0.0254, "step": 44230 }, { "epoch": 11.96322336398053, "grad_norm": 0.2102106660604477, "learning_rate": 1.770817587985453e-05, "loss": 0.0259, "step": 44240 }, { "epoch": 11.965927528393726, "grad_norm": 0.16570724546909332, "learning_rate": 1.7687141050159246e-05, "loss": 0.0257, "step": 44250 }, { "epoch": 11.968631692806923, "grad_norm": 0.15480700135231018, "learning_rate": 1.7666116036257375e-05, "loss": 0.0259, "step": 44260 }, { "epoch": 11.971335857220119, "grad_norm": 0.20244400203227997, "learning_rate": 1.764510084453569e-05, "loss": 0.0265, "step": 44270 }, { "epoch": 11.974040021633316, "grad_norm": 0.19035643339157104, "learning_rate": 1.76240954813781e-05, "loss": 0.0262, "step": 44280 }, { "epoch": 11.976744186046512, "grad_norm": 0.21033541858196259, "learning_rate": 1.7603099953165476e-05, "loss": 0.0251, "step": 44290 }, { "epoch": 11.979448350459707, "grad_norm": 0.14947423338890076, "learning_rate": 1.7582114266275683e-05, "loss": 0.025, "step": 44300 }, { "epoch": 11.982152514872904, "grad_norm": 0.16350875794887543, "learning_rate": 1.756113842708364e-05, "loss": 0.0262, "step": 44310 }, { "epoch": 11.9848566792861, "grad_norm": 0.17810483276844025, "learning_rate": 1.7540172441961245e-05, "loss": 0.0265, "step": 44320 }, { "epoch": 11.987560843699297, "grad_norm": 0.10970702022314072, "learning_rate": 1.7519216317277387e-05, "loss": 0.0242, "step": 44330 }, { "epoch": 11.990265008112493, "grad_norm": 0.10920870304107666, "learning_rate": 1.7498270059398046e-05, "loss": 0.0261, "step": 44340 }, { "epoch": 11.99296917252569, "grad_norm": 0.14270713925361633, "learning_rate": 1.7477333674686062e-05, "loss": 0.0262, "step": 44350 }, { "epoch": 11.995673336938886, "grad_norm": 0.11829102039337158, "learning_rate": 1.745640716950142e-05, "loss": 0.0266, "step": 44360 }, { "epoch": 11.998377501352083, "grad_norm": 0.1386776566505432, "learning_rate": 1.7435490550201017e-05, "loss": 0.0261, "step": 44370 }, { "epoch": 12.001081665765279, "grad_norm": 0.20471884310245514, "learning_rate": 1.7414583823138762e-05, "loss": 0.0257, "step": 44380 }, { "epoch": 12.003785830178474, "grad_norm": 0.18764536082744598, "learning_rate": 1.739368699466558e-05, "loss": 0.026, "step": 44390 }, { "epoch": 12.006489994591671, "grad_norm": 0.30749186873435974, "learning_rate": 1.737280007112935e-05, "loss": 0.0255, "step": 44400 }, { "epoch": 12.009194159004867, "grad_norm": 0.10895801335573196, "learning_rate": 1.735192305887502e-05, "loss": 0.0252, "step": 44410 }, { "epoch": 12.011898323418064, "grad_norm": 0.1384553760290146, "learning_rate": 1.733105596424441e-05, "loss": 0.0253, "step": 44420 }, { "epoch": 12.01460248783126, "grad_norm": 0.13605520129203796, "learning_rate": 1.7310198793576437e-05, "loss": 0.027, "step": 44430 }, { "epoch": 12.017306652244457, "grad_norm": 0.13945923745632172, "learning_rate": 1.7289351553206952e-05, "loss": 0.0275, "step": 44440 }, { "epoch": 12.020010816657653, "grad_norm": 0.15465860068798065, "learning_rate": 1.7268514249468788e-05, "loss": 0.0261, "step": 44450 }, { "epoch": 12.022714981070848, "grad_norm": 0.20312589406967163, "learning_rate": 1.7247686888691765e-05, "loss": 0.0263, "step": 44460 }, { "epoch": 12.025419145484046, "grad_norm": 0.13972127437591553, "learning_rate": 1.7226869477202694e-05, "loss": 0.026, "step": 44470 }, { "epoch": 12.028123309897241, "grad_norm": 0.11897355318069458, "learning_rate": 1.7206062021325336e-05, "loss": 0.0271, "step": 44480 }, { "epoch": 12.030827474310438, "grad_norm": 0.21641287207603455, "learning_rate": 1.7185264527380502e-05, "loss": 0.0248, "step": 44490 }, { "epoch": 12.033531638723634, "grad_norm": 0.1124999076128006, "learning_rate": 1.716447700168584e-05, "loss": 0.0239, "step": 44500 }, { "epoch": 12.036235803136831, "grad_norm": 0.17467357218265533, "learning_rate": 1.714369945055611e-05, "loss": 0.0263, "step": 44510 }, { "epoch": 12.038939967550027, "grad_norm": 0.15544019639492035, "learning_rate": 1.7122931880302968e-05, "loss": 0.0268, "step": 44520 }, { "epoch": 12.041644131963224, "grad_norm": 0.18112170696258545, "learning_rate": 1.710217429723505e-05, "loss": 0.026, "step": 44530 }, { "epoch": 12.04434829637642, "grad_norm": 0.10450171679258347, "learning_rate": 1.7081426707657972e-05, "loss": 0.0266, "step": 44540 }, { "epoch": 12.047052460789615, "grad_norm": 0.16481435298919678, "learning_rate": 1.7060689117874275e-05, "loss": 0.0267, "step": 44550 }, { "epoch": 12.049756625202813, "grad_norm": 0.17380256950855255, "learning_rate": 1.703996153418354e-05, "loss": 0.0256, "step": 44560 }, { "epoch": 12.052460789616008, "grad_norm": 0.12951385974884033, "learning_rate": 1.7019243962882205e-05, "loss": 0.0256, "step": 44570 }, { "epoch": 12.055164954029205, "grad_norm": 0.15728791058063507, "learning_rate": 1.6998536410263754e-05, "loss": 0.0245, "step": 44580 }, { "epoch": 12.057869118442401, "grad_norm": 0.1275223046541214, "learning_rate": 1.6977838882618596e-05, "loss": 0.0263, "step": 44590 }, { "epoch": 12.060573282855598, "grad_norm": 0.09316928684711456, "learning_rate": 1.6957151386234088e-05, "loss": 0.0252, "step": 44600 }, { "epoch": 12.063277447268794, "grad_norm": 0.12904401123523712, "learning_rate": 1.6936473927394536e-05, "loss": 0.026, "step": 44610 }, { "epoch": 12.065981611681991, "grad_norm": 0.13522212207317352, "learning_rate": 1.6915806512381222e-05, "loss": 0.0255, "step": 44620 }, { "epoch": 12.068685776095187, "grad_norm": 0.15289779007434845, "learning_rate": 1.6895149147472344e-05, "loss": 0.0266, "step": 44630 }, { "epoch": 12.071389940508382, "grad_norm": 0.1587902009487152, "learning_rate": 1.6874501838943073e-05, "loss": 0.0255, "step": 44640 }, { "epoch": 12.07409410492158, "grad_norm": 0.13420294225215912, "learning_rate": 1.6853864593065506e-05, "loss": 0.0249, "step": 44650 }, { "epoch": 12.076798269334775, "grad_norm": 0.17062826454639435, "learning_rate": 1.683323741610871e-05, "loss": 0.0239, "step": 44660 }, { "epoch": 12.079502433747972, "grad_norm": 0.1109967827796936, "learning_rate": 1.6812620314338674e-05, "loss": 0.0244, "step": 44670 }, { "epoch": 12.082206598161168, "grad_norm": 0.1509084850549698, "learning_rate": 1.6792013294018326e-05, "loss": 0.0242, "step": 44680 }, { "epoch": 12.084910762574365, "grad_norm": 0.12829895317554474, "learning_rate": 1.6771416361407526e-05, "loss": 0.0254, "step": 44690 }, { "epoch": 12.08761492698756, "grad_norm": 0.16758091747760773, "learning_rate": 1.675082952276308e-05, "loss": 0.0263, "step": 44700 }, { "epoch": 12.090319091400756, "grad_norm": 0.12072828412055969, "learning_rate": 1.6730252784338757e-05, "loss": 0.0251, "step": 44710 }, { "epoch": 12.093023255813954, "grad_norm": 0.15430477261543274, "learning_rate": 1.6709686152385166e-05, "loss": 0.0263, "step": 44720 }, { "epoch": 12.09572742022715, "grad_norm": 0.19397754967212677, "learning_rate": 1.668912963314998e-05, "loss": 0.0252, "step": 44730 }, { "epoch": 12.098431584640347, "grad_norm": 0.1792965829372406, "learning_rate": 1.6668583232877653e-05, "loss": 0.0242, "step": 44740 }, { "epoch": 12.101135749053542, "grad_norm": 0.13770967721939087, "learning_rate": 1.6648046957809698e-05, "loss": 0.0266, "step": 44750 }, { "epoch": 12.10383991346674, "grad_norm": 0.23661863803863525, "learning_rate": 1.6627520814184462e-05, "loss": 0.0237, "step": 44760 }, { "epoch": 12.106544077879935, "grad_norm": 0.14595036208629608, "learning_rate": 1.660700480823726e-05, "loss": 0.0263, "step": 44770 }, { "epoch": 12.109248242293132, "grad_norm": 0.19315382838249207, "learning_rate": 1.65864989462003e-05, "loss": 0.0245, "step": 44780 }, { "epoch": 12.111952406706328, "grad_norm": 0.15881282091140747, "learning_rate": 1.656600323430273e-05, "loss": 0.0258, "step": 44790 }, { "epoch": 12.114656571119523, "grad_norm": 0.18973232805728912, "learning_rate": 1.654551767877059e-05, "loss": 0.026, "step": 44800 }, { "epoch": 12.11736073553272, "grad_norm": 0.1506289839744568, "learning_rate": 1.6525042285826874e-05, "loss": 0.0254, "step": 44810 }, { "epoch": 12.120064899945916, "grad_norm": 0.11211223900318146, "learning_rate": 1.6504577061691468e-05, "loss": 0.0252, "step": 44820 }, { "epoch": 12.122769064359114, "grad_norm": 0.1477079838514328, "learning_rate": 1.6484122012581143e-05, "loss": 0.026, "step": 44830 }, { "epoch": 12.125473228772309, "grad_norm": 0.1389399766921997, "learning_rate": 1.6463677144709623e-05, "loss": 0.0253, "step": 44840 }, { "epoch": 12.128177393185506, "grad_norm": 0.1441994458436966, "learning_rate": 1.6443242464287493e-05, "loss": 0.0235, "step": 44850 }, { "epoch": 12.130881557598702, "grad_norm": 0.15515591204166412, "learning_rate": 1.642281797752232e-05, "loss": 0.026, "step": 44860 }, { "epoch": 12.133585722011897, "grad_norm": 0.1158885732293129, "learning_rate": 1.6402403690618456e-05, "loss": 0.0244, "step": 44870 }, { "epoch": 12.136289886425095, "grad_norm": 0.19173277914524078, "learning_rate": 1.6381999609777295e-05, "loss": 0.0248, "step": 44880 }, { "epoch": 12.13899405083829, "grad_norm": 0.12129253149032593, "learning_rate": 1.6361605741196983e-05, "loss": 0.0271, "step": 44890 }, { "epoch": 12.141698215251488, "grad_norm": 0.1219465583562851, "learning_rate": 1.63412220910727e-05, "loss": 0.0259, "step": 44900 }, { "epoch": 12.144402379664683, "grad_norm": 0.4437129497528076, "learning_rate": 1.6320848665596433e-05, "loss": 0.0246, "step": 44910 }, { "epoch": 12.14710654407788, "grad_norm": 0.2443065047264099, "learning_rate": 1.6300485470957095e-05, "loss": 0.0255, "step": 44920 }, { "epoch": 12.149810708491076, "grad_norm": 0.3184770941734314, "learning_rate": 1.6280132513340483e-05, "loss": 0.025, "step": 44930 }, { "epoch": 12.152514872904273, "grad_norm": 0.09450826048851013, "learning_rate": 1.62597897989293e-05, "loss": 0.0251, "step": 44940 }, { "epoch": 12.155219037317469, "grad_norm": 0.14262844622135162, "learning_rate": 1.623945733390309e-05, "loss": 0.025, "step": 44950 }, { "epoch": 12.157923201730664, "grad_norm": 0.11435739696025848, "learning_rate": 1.6219135124438374e-05, "loss": 0.0254, "step": 44960 }, { "epoch": 12.160627366143862, "grad_norm": 0.15763962268829346, "learning_rate": 1.6198823176708465e-05, "loss": 0.0249, "step": 44970 }, { "epoch": 12.163331530557057, "grad_norm": 0.12367342412471771, "learning_rate": 1.6178521496883613e-05, "loss": 0.0261, "step": 44980 }, { "epoch": 12.166035694970255, "grad_norm": 0.17919771373271942, "learning_rate": 1.6158230091130926e-05, "loss": 0.0245, "step": 44990 }, { "epoch": 12.16873985938345, "grad_norm": 0.21480010449886322, "learning_rate": 1.613794896561438e-05, "loss": 0.0254, "step": 45000 }, { "epoch": 12.171444023796647, "grad_norm": 0.19413624703884125, "learning_rate": 1.6117678126494894e-05, "loss": 0.0246, "step": 45010 }, { "epoch": 12.174148188209843, "grad_norm": 0.21594834327697754, "learning_rate": 1.6097417579930153e-05, "loss": 0.0255, "step": 45020 }, { "epoch": 12.17685235262304, "grad_norm": 0.14314484596252441, "learning_rate": 1.6077167332074834e-05, "loss": 0.0253, "step": 45030 }, { "epoch": 12.179556517036236, "grad_norm": 0.2769183814525604, "learning_rate": 1.605692738908037e-05, "loss": 0.0247, "step": 45040 }, { "epoch": 12.182260681449431, "grad_norm": 0.22657540440559387, "learning_rate": 1.6036697757095176e-05, "loss": 0.026, "step": 45050 }, { "epoch": 12.184964845862629, "grad_norm": 0.1700878143310547, "learning_rate": 1.6016478442264428e-05, "loss": 0.0275, "step": 45060 }, { "epoch": 12.187669010275824, "grad_norm": 0.2090195268392563, "learning_rate": 1.599626945073026e-05, "loss": 0.0259, "step": 45070 }, { "epoch": 12.190373174689022, "grad_norm": 0.11856559664011002, "learning_rate": 1.597607078863162e-05, "loss": 0.0263, "step": 45080 }, { "epoch": 12.193077339102217, "grad_norm": 0.15381835401058197, "learning_rate": 1.595588246210432e-05, "loss": 0.0244, "step": 45090 }, { "epoch": 12.195781503515414, "grad_norm": 0.11696349829435349, "learning_rate": 1.5935704477281048e-05, "loss": 0.0258, "step": 45100 }, { "epoch": 12.19848566792861, "grad_norm": 0.22275033593177795, "learning_rate": 1.5915536840291323e-05, "loss": 0.0252, "step": 45110 }, { "epoch": 12.201189832341806, "grad_norm": 0.16379030048847198, "learning_rate": 1.5895379557261576e-05, "loss": 0.0272, "step": 45120 }, { "epoch": 12.203893996755003, "grad_norm": 0.3401292860507965, "learning_rate": 1.5875232634315033e-05, "loss": 0.0265, "step": 45130 }, { "epoch": 12.206598161168198, "grad_norm": 0.20272815227508545, "learning_rate": 1.5855096077571812e-05, "loss": 0.0254, "step": 45140 }, { "epoch": 12.209302325581396, "grad_norm": 0.14493057131767273, "learning_rate": 1.5834969893148855e-05, "loss": 0.0252, "step": 45150 }, { "epoch": 12.212006489994591, "grad_norm": 0.15928250551223755, "learning_rate": 1.581485408715997e-05, "loss": 0.0256, "step": 45160 }, { "epoch": 12.214710654407789, "grad_norm": 0.126911923289299, "learning_rate": 1.5794748665715785e-05, "loss": 0.0261, "step": 45170 }, { "epoch": 12.217414818820984, "grad_norm": 0.1428125947713852, "learning_rate": 1.5774653634923857e-05, "loss": 0.0251, "step": 45180 }, { "epoch": 12.220118983234181, "grad_norm": 0.15458662807941437, "learning_rate": 1.575456900088845e-05, "loss": 0.0267, "step": 45190 }, { "epoch": 12.222823147647377, "grad_norm": 0.11876270174980164, "learning_rate": 1.5734494769710816e-05, "loss": 0.0243, "step": 45200 }, { "epoch": 12.225527312060573, "grad_norm": 0.12902578711509705, "learning_rate": 1.5714430947488912e-05, "loss": 0.0264, "step": 45210 }, { "epoch": 12.22823147647377, "grad_norm": 0.18192648887634277, "learning_rate": 1.5694377540317645e-05, "loss": 0.0243, "step": 45220 }, { "epoch": 12.230935640886965, "grad_norm": 0.1498338133096695, "learning_rate": 1.5674334554288694e-05, "loss": 0.0261, "step": 45230 }, { "epoch": 12.233639805300163, "grad_norm": 0.09480101615190506, "learning_rate": 1.5654301995490582e-05, "loss": 0.0263, "step": 45240 }, { "epoch": 12.236343969713358, "grad_norm": 0.262789249420166, "learning_rate": 1.5634279870008685e-05, "loss": 0.0238, "step": 45250 }, { "epoch": 12.239048134126556, "grad_norm": 0.1423952877521515, "learning_rate": 1.5614268183925174e-05, "loss": 0.0243, "step": 45260 }, { "epoch": 12.241752298539751, "grad_norm": 0.23678939044475555, "learning_rate": 1.5594266943319097e-05, "loss": 0.0268, "step": 45270 }, { "epoch": 12.244456462952947, "grad_norm": 0.26677456498146057, "learning_rate": 1.5574276154266294e-05, "loss": 0.0269, "step": 45280 }, { "epoch": 12.247160627366144, "grad_norm": 0.10542052239179611, "learning_rate": 1.5554295822839437e-05, "loss": 0.0243, "step": 45290 }, { "epoch": 12.24986479177934, "grad_norm": 0.1256306767463684, "learning_rate": 1.5534325955108025e-05, "loss": 0.0256, "step": 45300 }, { "epoch": 12.252568956192537, "grad_norm": 0.15218694508075714, "learning_rate": 1.5514366557138373e-05, "loss": 0.0253, "step": 45310 }, { "epoch": 12.255273120605732, "grad_norm": 0.09001204371452332, "learning_rate": 1.5494417634993602e-05, "loss": 0.0251, "step": 45320 }, { "epoch": 12.25797728501893, "grad_norm": 0.09365781396627426, "learning_rate": 1.547447919473372e-05, "loss": 0.0259, "step": 45330 }, { "epoch": 12.260681449432125, "grad_norm": 0.11992832273244858, "learning_rate": 1.5454551242415434e-05, "loss": 0.0254, "step": 45340 }, { "epoch": 12.263385613845323, "grad_norm": 0.20916159451007843, "learning_rate": 1.543463378409239e-05, "loss": 0.0262, "step": 45350 }, { "epoch": 12.266089778258518, "grad_norm": 0.12178001552820206, "learning_rate": 1.541472682581493e-05, "loss": 0.0245, "step": 45360 }, { "epoch": 12.268793942671714, "grad_norm": 0.1734214574098587, "learning_rate": 1.5394830373630298e-05, "loss": 0.0257, "step": 45370 }, { "epoch": 12.271498107084911, "grad_norm": 0.1851203590631485, "learning_rate": 1.5374944433582506e-05, "loss": 0.0248, "step": 45380 }, { "epoch": 12.274202271498106, "grad_norm": 0.22746630012989044, "learning_rate": 1.5355069011712375e-05, "loss": 0.0241, "step": 45390 }, { "epoch": 12.276906435911304, "grad_norm": 0.12653763592243195, "learning_rate": 1.5335204114057526e-05, "loss": 0.0247, "step": 45400 }, { "epoch": 12.2796106003245, "grad_norm": 0.12773184478282928, "learning_rate": 1.5315349746652387e-05, "loss": 0.0237, "step": 45410 }, { "epoch": 12.282314764737697, "grad_norm": 0.13916632533073425, "learning_rate": 1.5295505915528212e-05, "loss": 0.0262, "step": 45420 }, { "epoch": 12.285018929150892, "grad_norm": 0.24091297388076782, "learning_rate": 1.5275672626713024e-05, "loss": 0.0243, "step": 45430 }, { "epoch": 12.28772309356409, "grad_norm": 0.18604744970798492, "learning_rate": 1.5255849886231643e-05, "loss": 0.0254, "step": 45440 }, { "epoch": 12.290427257977285, "grad_norm": 0.16148322820663452, "learning_rate": 1.523603770010571e-05, "loss": 0.0239, "step": 45450 }, { "epoch": 12.29313142239048, "grad_norm": 0.1880197376012802, "learning_rate": 1.521623607435363e-05, "loss": 0.0251, "step": 45460 }, { "epoch": 12.295835586803678, "grad_norm": 0.3030557334423065, "learning_rate": 1.5196445014990612e-05, "loss": 0.0264, "step": 45470 }, { "epoch": 12.298539751216873, "grad_norm": 0.29809463024139404, "learning_rate": 1.5176664528028672e-05, "loss": 0.0275, "step": 45480 }, { "epoch": 12.30124391563007, "grad_norm": 0.18295501172542572, "learning_rate": 1.5156894619476574e-05, "loss": 0.0261, "step": 45490 }, { "epoch": 12.303948080043266, "grad_norm": 0.1935369223356247, "learning_rate": 1.5137135295339938e-05, "loss": 0.0253, "step": 45500 }, { "epoch": 12.306652244456464, "grad_norm": 0.1724218726158142, "learning_rate": 1.5117386561621073e-05, "loss": 0.0255, "step": 45510 }, { "epoch": 12.30935640886966, "grad_norm": 0.14674651622772217, "learning_rate": 1.5097648424319167e-05, "loss": 0.0256, "step": 45520 }, { "epoch": 12.312060573282855, "grad_norm": 0.1848209798336029, "learning_rate": 1.5077920889430119e-05, "loss": 0.0263, "step": 45530 }, { "epoch": 12.314764737696052, "grad_norm": 0.14639046788215637, "learning_rate": 1.5058203962946644e-05, "loss": 0.0267, "step": 45540 }, { "epoch": 12.317468902109248, "grad_norm": 0.15350309014320374, "learning_rate": 1.503849765085822e-05, "loss": 0.0246, "step": 45550 }, { "epoch": 12.320173066522445, "grad_norm": 0.12193349748849869, "learning_rate": 1.501880195915109e-05, "loss": 0.0234, "step": 45560 }, { "epoch": 12.32287723093564, "grad_norm": 0.12110470980405807, "learning_rate": 1.499911689380833e-05, "loss": 0.0252, "step": 45570 }, { "epoch": 12.325581395348838, "grad_norm": 0.14126503467559814, "learning_rate": 1.4979442460809683e-05, "loss": 0.0259, "step": 45580 }, { "epoch": 12.328285559762033, "grad_norm": 0.20594291388988495, "learning_rate": 1.4959778666131763e-05, "loss": 0.0251, "step": 45590 }, { "epoch": 12.33098972417523, "grad_norm": 0.18378067016601562, "learning_rate": 1.4940125515747905e-05, "loss": 0.0247, "step": 45600 }, { "epoch": 12.333693888588426, "grad_norm": 0.15415361523628235, "learning_rate": 1.4920483015628211e-05, "loss": 0.0249, "step": 45610 }, { "epoch": 12.336398053001622, "grad_norm": 0.10597135871648788, "learning_rate": 1.490085117173956e-05, "loss": 0.0245, "step": 45620 }, { "epoch": 12.339102217414819, "grad_norm": 0.1363566666841507, "learning_rate": 1.488122999004558e-05, "loss": 0.0239, "step": 45630 }, { "epoch": 12.341806381828015, "grad_norm": 0.1743663102388382, "learning_rate": 1.486161947650666e-05, "loss": 0.0244, "step": 45640 }, { "epoch": 12.344510546241212, "grad_norm": 0.1304403841495514, "learning_rate": 1.4842019637079995e-05, "loss": 0.0258, "step": 45650 }, { "epoch": 12.347214710654407, "grad_norm": 0.10313478857278824, "learning_rate": 1.482243047771944e-05, "loss": 0.0243, "step": 45660 }, { "epoch": 12.349918875067605, "grad_norm": 0.11365416646003723, "learning_rate": 1.4802852004375712e-05, "loss": 0.0257, "step": 45670 }, { "epoch": 12.3526230394808, "grad_norm": 0.26068004965782166, "learning_rate": 1.4783284222996218e-05, "loss": 0.0258, "step": 45680 }, { "epoch": 12.355327203893996, "grad_norm": 0.13547474145889282, "learning_rate": 1.4763727139525135e-05, "loss": 0.0249, "step": 45690 }, { "epoch": 12.358031368307193, "grad_norm": 0.12542442977428436, "learning_rate": 1.4744180759903392e-05, "loss": 0.0252, "step": 45700 }, { "epoch": 12.360735532720389, "grad_norm": 0.15256370604038239, "learning_rate": 1.4724645090068635e-05, "loss": 0.0261, "step": 45710 }, { "epoch": 12.363439697133586, "grad_norm": 0.12247292697429657, "learning_rate": 1.4705120135955341e-05, "loss": 0.0268, "step": 45720 }, { "epoch": 12.366143861546782, "grad_norm": 0.23445019125938416, "learning_rate": 1.4685605903494614e-05, "loss": 0.0246, "step": 45730 }, { "epoch": 12.368848025959979, "grad_norm": 0.16489705443382263, "learning_rate": 1.46661023986144e-05, "loss": 0.0252, "step": 45740 }, { "epoch": 12.371552190373174, "grad_norm": 0.2225542962551117, "learning_rate": 1.4646609627239344e-05, "loss": 0.0249, "step": 45750 }, { "epoch": 12.374256354786372, "grad_norm": 0.10411888360977173, "learning_rate": 1.4627127595290835e-05, "loss": 0.0263, "step": 45760 }, { "epoch": 12.376960519199567, "grad_norm": 0.268192857503891, "learning_rate": 1.460765630868699e-05, "loss": 0.0251, "step": 45770 }, { "epoch": 12.379664683612763, "grad_norm": 0.21130184829235077, "learning_rate": 1.4588195773342678e-05, "loss": 0.0246, "step": 45780 }, { "epoch": 12.38236884802596, "grad_norm": 0.14272911846637726, "learning_rate": 1.4568745995169485e-05, "loss": 0.0247, "step": 45790 }, { "epoch": 12.385073012439156, "grad_norm": 0.12802597880363464, "learning_rate": 1.4549306980075778e-05, "loss": 0.025, "step": 45800 }, { "epoch": 12.387777176852353, "grad_norm": 0.10084414482116699, "learning_rate": 1.4529878733966557e-05, "loss": 0.025, "step": 45810 }, { "epoch": 12.390481341265549, "grad_norm": 0.15798792243003845, "learning_rate": 1.4510461262743658e-05, "loss": 0.0234, "step": 45820 }, { "epoch": 12.393185505678746, "grad_norm": 0.1335238665342331, "learning_rate": 1.4491054572305585e-05, "loss": 0.0258, "step": 45830 }, { "epoch": 12.395889670091941, "grad_norm": 0.12404002249240875, "learning_rate": 1.4471658668547566e-05, "loss": 0.0248, "step": 45840 }, { "epoch": 12.398593834505139, "grad_norm": 0.31441444158554077, "learning_rate": 1.4452273557361579e-05, "loss": 0.0251, "step": 45850 }, { "epoch": 12.401297998918334, "grad_norm": 0.16227610409259796, "learning_rate": 1.4432899244636282e-05, "loss": 0.0258, "step": 45860 }, { "epoch": 12.40400216333153, "grad_norm": 0.15788231790065765, "learning_rate": 1.4413535736257134e-05, "loss": 0.0267, "step": 45870 }, { "epoch": 12.406706327744727, "grad_norm": 0.16869355738162994, "learning_rate": 1.439418303810619e-05, "loss": 0.0251, "step": 45880 }, { "epoch": 12.409410492157923, "grad_norm": 0.12965090572834015, "learning_rate": 1.4374841156062352e-05, "loss": 0.0245, "step": 45890 }, { "epoch": 12.41211465657112, "grad_norm": 0.13069617748260498, "learning_rate": 1.4355510096001112e-05, "loss": 0.0274, "step": 45900 }, { "epoch": 12.414818820984316, "grad_norm": 0.14998458325862885, "learning_rate": 1.4336189863794786e-05, "loss": 0.0257, "step": 45910 }, { "epoch": 12.417522985397513, "grad_norm": 0.1567627489566803, "learning_rate": 1.4316880465312327e-05, "loss": 0.0257, "step": 45920 }, { "epoch": 12.420227149810708, "grad_norm": 0.17863427102565765, "learning_rate": 1.4297581906419426e-05, "loss": 0.0251, "step": 45930 }, { "epoch": 12.422931314223904, "grad_norm": 0.19640997052192688, "learning_rate": 1.4278294192978475e-05, "loss": 0.0256, "step": 45940 }, { "epoch": 12.425635478637101, "grad_norm": 0.20012062788009644, "learning_rate": 1.4259017330848574e-05, "loss": 0.0249, "step": 45950 }, { "epoch": 12.428339643050297, "grad_norm": 0.17871831357479095, "learning_rate": 1.4239751325885498e-05, "loss": 0.026, "step": 45960 }, { "epoch": 12.431043807463494, "grad_norm": 0.16573995351791382, "learning_rate": 1.4220496183941795e-05, "loss": 0.0245, "step": 45970 }, { "epoch": 12.43374797187669, "grad_norm": 0.12722834944725037, "learning_rate": 1.4201251910866648e-05, "loss": 0.0246, "step": 45980 }, { "epoch": 12.436452136289887, "grad_norm": 0.14216113090515137, "learning_rate": 1.4182018512505957e-05, "loss": 0.0263, "step": 45990 }, { "epoch": 12.439156300703083, "grad_norm": 0.0900987982749939, "learning_rate": 1.4162795994702327e-05, "loss": 0.0243, "step": 46000 }, { "epoch": 12.44186046511628, "grad_norm": 0.13580946624279022, "learning_rate": 1.4143584363295032e-05, "loss": 0.0265, "step": 46010 }, { "epoch": 12.444564629529475, "grad_norm": 0.11680833250284195, "learning_rate": 1.4124383624120101e-05, "loss": 0.0257, "step": 46020 }, { "epoch": 12.447268793942671, "grad_norm": 0.10509275645017624, "learning_rate": 1.4105193783010151e-05, "loss": 0.0247, "step": 46030 }, { "epoch": 12.449972958355868, "grad_norm": 0.19104637205600739, "learning_rate": 1.4086014845794621e-05, "loss": 0.0262, "step": 46040 }, { "epoch": 12.452677122769064, "grad_norm": 0.1105848029255867, "learning_rate": 1.4066846818299489e-05, "loss": 0.0254, "step": 46050 }, { "epoch": 12.455381287182261, "grad_norm": 0.19974465668201447, "learning_rate": 1.4047689706347555e-05, "loss": 0.0246, "step": 46060 }, { "epoch": 12.458085451595457, "grad_norm": 0.1434377133846283, "learning_rate": 1.402854351575822e-05, "loss": 0.0253, "step": 46070 }, { "epoch": 12.460789616008654, "grad_norm": 0.1663067787885666, "learning_rate": 1.4009408252347588e-05, "loss": 0.0254, "step": 46080 }, { "epoch": 12.46349378042185, "grad_norm": 0.13479438424110413, "learning_rate": 1.399028392192846e-05, "loss": 0.0262, "step": 46090 }, { "epoch": 12.466197944835045, "grad_norm": 0.24231602251529694, "learning_rate": 1.397117053031029e-05, "loss": 0.0272, "step": 46100 }, { "epoch": 12.468902109248242, "grad_norm": 0.1602584272623062, "learning_rate": 1.3952068083299213e-05, "loss": 0.0246, "step": 46110 }, { "epoch": 12.471606273661438, "grad_norm": 0.2135687917470932, "learning_rate": 1.3932976586698082e-05, "loss": 0.024, "step": 46120 }, { "epoch": 12.474310438074635, "grad_norm": 0.15560032427310944, "learning_rate": 1.3913896046306363e-05, "loss": 0.0255, "step": 46130 }, { "epoch": 12.47701460248783, "grad_norm": 0.17020249366760254, "learning_rate": 1.389482646792023e-05, "loss": 0.0246, "step": 46140 }, { "epoch": 12.479718766901028, "grad_norm": 0.24739046394824982, "learning_rate": 1.387576785733251e-05, "loss": 0.0262, "step": 46150 }, { "epoch": 12.482422931314224, "grad_norm": 0.2931630611419678, "learning_rate": 1.3856720220332703e-05, "loss": 0.0248, "step": 46160 }, { "epoch": 12.485127095727421, "grad_norm": 0.13994699716567993, "learning_rate": 1.383768356270701e-05, "loss": 0.0255, "step": 46170 }, { "epoch": 12.487831260140617, "grad_norm": 0.14888010919094086, "learning_rate": 1.3818657890238207e-05, "loss": 0.0263, "step": 46180 }, { "epoch": 12.490535424553812, "grad_norm": 0.20547610521316528, "learning_rate": 1.3799643208705859e-05, "loss": 0.0247, "step": 46190 }, { "epoch": 12.49323958896701, "grad_norm": 0.13002830743789673, "learning_rate": 1.3780639523886058e-05, "loss": 0.0254, "step": 46200 }, { "epoch": 12.495943753380205, "grad_norm": 0.1266130656003952, "learning_rate": 1.3761646841551668e-05, "loss": 0.0263, "step": 46210 }, { "epoch": 12.498647917793402, "grad_norm": 0.13477011024951935, "learning_rate": 1.3742665167472146e-05, "loss": 0.0265, "step": 46220 }, { "epoch": 12.501352082206598, "grad_norm": 0.1157657578587532, "learning_rate": 1.372369450741363e-05, "loss": 0.0241, "step": 46230 }, { "epoch": 12.504056246619795, "grad_norm": 0.44170081615448, "learning_rate": 1.3704734867138901e-05, "loss": 0.0253, "step": 46240 }, { "epoch": 12.50676041103299, "grad_norm": 0.22039549052715302, "learning_rate": 1.36857862524074e-05, "loss": 0.024, "step": 46250 }, { "epoch": 12.509464575446188, "grad_norm": 0.2006164938211441, "learning_rate": 1.3666848668975213e-05, "loss": 0.0247, "step": 46260 }, { "epoch": 12.512168739859383, "grad_norm": 0.2159905731678009, "learning_rate": 1.3647922122595063e-05, "loss": 0.0265, "step": 46270 }, { "epoch": 12.514872904272579, "grad_norm": 0.1515662968158722, "learning_rate": 1.3629006619016366e-05, "loss": 0.0257, "step": 46280 }, { "epoch": 12.517577068685776, "grad_norm": 0.17013034224510193, "learning_rate": 1.3610102163985139e-05, "loss": 0.0248, "step": 46290 }, { "epoch": 12.520281233098972, "grad_norm": 0.12341276556253433, "learning_rate": 1.3591208763244057e-05, "loss": 0.0249, "step": 46300 }, { "epoch": 12.52298539751217, "grad_norm": 0.2019413411617279, "learning_rate": 1.3572326422532428e-05, "loss": 0.024, "step": 46310 }, { "epoch": 12.525689561925365, "grad_norm": 0.10994008928537369, "learning_rate": 1.355345514758622e-05, "loss": 0.0244, "step": 46320 }, { "epoch": 12.528393726338562, "grad_norm": 0.13778527081012726, "learning_rate": 1.3534594944138007e-05, "loss": 0.0243, "step": 46330 }, { "epoch": 12.531097890751758, "grad_norm": 0.14438140392303467, "learning_rate": 1.3515745817917069e-05, "loss": 0.0264, "step": 46340 }, { "epoch": 12.533802055164955, "grad_norm": 0.12411519885063171, "learning_rate": 1.3496907774649208e-05, "loss": 0.0249, "step": 46350 }, { "epoch": 12.53650621957815, "grad_norm": 0.19693857431411743, "learning_rate": 1.3478080820056987e-05, "loss": 0.027, "step": 46360 }, { "epoch": 12.539210383991346, "grad_norm": 0.12314782291650772, "learning_rate": 1.3459264959859474e-05, "loss": 0.0257, "step": 46370 }, { "epoch": 12.541914548404543, "grad_norm": 0.13199804723262787, "learning_rate": 1.3440460199772487e-05, "loss": 0.026, "step": 46380 }, { "epoch": 12.544618712817739, "grad_norm": 0.15037980675697327, "learning_rate": 1.3421666545508382e-05, "loss": 0.0245, "step": 46390 }, { "epoch": 12.547322877230936, "grad_norm": 0.1490054577589035, "learning_rate": 1.3402884002776194e-05, "loss": 0.0257, "step": 46400 }, { "epoch": 12.550027041644132, "grad_norm": 0.23432253301143646, "learning_rate": 1.3384112577281555e-05, "loss": 0.0249, "step": 46410 }, { "epoch": 12.552731206057329, "grad_norm": 0.21325665712356567, "learning_rate": 1.3365352274726711e-05, "loss": 0.0252, "step": 46420 }, { "epoch": 12.555435370470525, "grad_norm": 0.10658405721187592, "learning_rate": 1.3346603100810578e-05, "loss": 0.026, "step": 46430 }, { "epoch": 12.55813953488372, "grad_norm": 0.1356862485408783, "learning_rate": 1.3327865061228645e-05, "loss": 0.0239, "step": 46440 }, { "epoch": 12.560843699296917, "grad_norm": 0.1143825501203537, "learning_rate": 1.330913816167304e-05, "loss": 0.0248, "step": 46450 }, { "epoch": 12.563547863710113, "grad_norm": 0.1736029088497162, "learning_rate": 1.3290422407832492e-05, "loss": 0.027, "step": 46460 }, { "epoch": 12.56625202812331, "grad_norm": 0.13805490732192993, "learning_rate": 1.3271717805392354e-05, "loss": 0.0249, "step": 46470 }, { "epoch": 12.568956192536506, "grad_norm": 0.21206723153591156, "learning_rate": 1.3253024360034582e-05, "loss": 0.0249, "step": 46480 }, { "epoch": 12.571660356949703, "grad_norm": 0.15536561608314514, "learning_rate": 1.323434207743779e-05, "loss": 0.0238, "step": 46490 }, { "epoch": 12.574364521362899, "grad_norm": 0.14400127530097961, "learning_rate": 1.3215670963277105e-05, "loss": 0.0254, "step": 46500 }, { "epoch": 12.577068685776094, "grad_norm": 0.1697234809398651, "learning_rate": 1.3197011023224376e-05, "loss": 0.0257, "step": 46510 }, { "epoch": 12.579772850189292, "grad_norm": 0.14165054261684418, "learning_rate": 1.3178362262947941e-05, "loss": 0.0253, "step": 46520 }, { "epoch": 12.582477014602487, "grad_norm": 0.22103451192378998, "learning_rate": 1.3159724688112845e-05, "loss": 0.025, "step": 46530 }, { "epoch": 12.585181179015684, "grad_norm": 0.14421489834785461, "learning_rate": 1.3141098304380683e-05, "loss": 0.0245, "step": 46540 }, { "epoch": 12.58788534342888, "grad_norm": 0.1752629578113556, "learning_rate": 1.3122483117409651e-05, "loss": 0.025, "step": 46550 }, { "epoch": 12.590589507842077, "grad_norm": 0.13930974900722504, "learning_rate": 1.3103879132854552e-05, "loss": 0.0256, "step": 46560 }, { "epoch": 12.593293672255273, "grad_norm": 0.15864309668540955, "learning_rate": 1.3085286356366771e-05, "loss": 0.0248, "step": 46570 }, { "epoch": 12.59599783666847, "grad_norm": 0.14837875962257385, "learning_rate": 1.3066704793594337e-05, "loss": 0.0248, "step": 46580 }, { "epoch": 12.598702001081666, "grad_norm": 0.09976450353860855, "learning_rate": 1.3048134450181816e-05, "loss": 0.0245, "step": 46590 }, { "epoch": 12.601406165494861, "grad_norm": 0.1079854890704155, "learning_rate": 1.3029575331770394e-05, "loss": 0.0238, "step": 46600 }, { "epoch": 12.604110329908059, "grad_norm": 0.131732776761055, "learning_rate": 1.3011027443997837e-05, "loss": 0.0236, "step": 46610 }, { "epoch": 12.606814494321254, "grad_norm": 0.16390633583068848, "learning_rate": 1.2992490792498507e-05, "loss": 0.0246, "step": 46620 }, { "epoch": 12.609518658734451, "grad_norm": 0.20439022779464722, "learning_rate": 1.297396538290333e-05, "loss": 0.0244, "step": 46630 }, { "epoch": 12.612222823147647, "grad_norm": 0.18712541460990906, "learning_rate": 1.2955451220839888e-05, "loss": 0.0254, "step": 46640 }, { "epoch": 12.614926987560844, "grad_norm": 0.18493522703647614, "learning_rate": 1.2936948311932223e-05, "loss": 0.0243, "step": 46650 }, { "epoch": 12.61763115197404, "grad_norm": 0.11484549939632416, "learning_rate": 1.2918456661801104e-05, "loss": 0.0264, "step": 46660 }, { "epoch": 12.620335316387237, "grad_norm": 0.1250106394290924, "learning_rate": 1.2899976276063736e-05, "loss": 0.0249, "step": 46670 }, { "epoch": 12.623039480800433, "grad_norm": 0.12397197633981705, "learning_rate": 1.2881507160334022e-05, "loss": 0.0257, "step": 46680 }, { "epoch": 12.625743645213628, "grad_norm": 0.12620949745178223, "learning_rate": 1.286304932022238e-05, "loss": 0.0234, "step": 46690 }, { "epoch": 12.628447809626826, "grad_norm": 0.1447557806968689, "learning_rate": 1.2844602761335806e-05, "loss": 0.0243, "step": 46700 }, { "epoch": 12.631151974040021, "grad_norm": 0.27888914942741394, "learning_rate": 1.2826167489277885e-05, "loss": 0.0255, "step": 46710 }, { "epoch": 12.633856138453218, "grad_norm": 0.1742411106824875, "learning_rate": 1.2807743509648745e-05, "loss": 0.0261, "step": 46720 }, { "epoch": 12.636560302866414, "grad_norm": 0.16668574512004852, "learning_rate": 1.2789330828045149e-05, "loss": 0.0249, "step": 46730 }, { "epoch": 12.639264467279611, "grad_norm": 0.19213682413101196, "learning_rate": 1.2770929450060332e-05, "loss": 0.025, "step": 46740 }, { "epoch": 12.641968631692807, "grad_norm": 0.1222727969288826, "learning_rate": 1.2752539381284184e-05, "loss": 0.024, "step": 46750 }, { "epoch": 12.644672796106004, "grad_norm": 0.1154925674200058, "learning_rate": 1.273416062730311e-05, "loss": 0.0245, "step": 46760 }, { "epoch": 12.6473769605192, "grad_norm": 0.20424829423427582, "learning_rate": 1.2715793193700088e-05, "loss": 0.0235, "step": 46770 }, { "epoch": 12.650081124932395, "grad_norm": 0.2800598442554474, "learning_rate": 1.2697437086054664e-05, "loss": 0.0259, "step": 46780 }, { "epoch": 12.652785289345593, "grad_norm": 0.14035993814468384, "learning_rate": 1.2679092309942937e-05, "loss": 0.0253, "step": 46790 }, { "epoch": 12.655489453758788, "grad_norm": 0.2067301869392395, "learning_rate": 1.266075887093755e-05, "loss": 0.0249, "step": 46800 }, { "epoch": 12.658193618171985, "grad_norm": 0.2662283778190613, "learning_rate": 1.2642436774607757e-05, "loss": 0.0244, "step": 46810 }, { "epoch": 12.660897782585181, "grad_norm": 0.1678646206855774, "learning_rate": 1.2624126026519278e-05, "loss": 0.0249, "step": 46820 }, { "epoch": 12.663601946998378, "grad_norm": 0.15980318188667297, "learning_rate": 1.2605826632234474e-05, "loss": 0.0245, "step": 46830 }, { "epoch": 12.666306111411574, "grad_norm": 0.1449405699968338, "learning_rate": 1.2587538597312198e-05, "loss": 0.0246, "step": 46840 }, { "epoch": 12.66901027582477, "grad_norm": 0.13860227167606354, "learning_rate": 1.2569261927307884e-05, "loss": 0.0251, "step": 46850 }, { "epoch": 12.671714440237967, "grad_norm": 0.18935643136501312, "learning_rate": 1.2550996627773493e-05, "loss": 0.0246, "step": 46860 }, { "epoch": 12.674418604651162, "grad_norm": 0.12119143456220627, "learning_rate": 1.2532742704257527e-05, "loss": 0.0247, "step": 46870 }, { "epoch": 12.67712276906436, "grad_norm": 0.2106306552886963, "learning_rate": 1.2514500162305087e-05, "loss": 0.0254, "step": 46880 }, { "epoch": 12.679826933477555, "grad_norm": 0.14913009107112885, "learning_rate": 1.2496269007457728e-05, "loss": 0.0242, "step": 46890 }, { "epoch": 12.682531097890752, "grad_norm": 0.4140167534351349, "learning_rate": 1.2478049245253625e-05, "loss": 0.0239, "step": 46900 }, { "epoch": 12.685235262303948, "grad_norm": 0.15531091392040253, "learning_rate": 1.2459840881227459e-05, "loss": 0.0241, "step": 46910 }, { "epoch": 12.687939426717143, "grad_norm": 0.1554582566022873, "learning_rate": 1.2441643920910435e-05, "loss": 0.0247, "step": 46920 }, { "epoch": 12.69064359113034, "grad_norm": 0.23602932691574097, "learning_rate": 1.2423458369830322e-05, "loss": 0.0263, "step": 46930 }, { "epoch": 12.693347755543536, "grad_norm": 0.1746387481689453, "learning_rate": 1.2405284233511406e-05, "loss": 0.0237, "step": 46940 }, { "epoch": 12.696051919956734, "grad_norm": 0.11722804605960846, "learning_rate": 1.2387121517474487e-05, "loss": 0.0249, "step": 46950 }, { "epoch": 12.69875608436993, "grad_norm": 0.16814328730106354, "learning_rate": 1.2368970227236975e-05, "loss": 0.0261, "step": 46960 }, { "epoch": 12.701460248783127, "grad_norm": 0.2897193431854248, "learning_rate": 1.2350830368312688e-05, "loss": 0.0254, "step": 46970 }, { "epoch": 12.704164413196322, "grad_norm": 0.38632771372795105, "learning_rate": 1.2332701946212083e-05, "loss": 0.0245, "step": 46980 }, { "epoch": 12.70686857760952, "grad_norm": 0.16190771758556366, "learning_rate": 1.2314584966442077e-05, "loss": 0.0249, "step": 46990 }, { "epoch": 12.709572742022715, "grad_norm": 0.13780838251113892, "learning_rate": 1.2296479434506136e-05, "loss": 0.0249, "step": 47000 }, { "epoch": 12.71227690643591, "grad_norm": 0.18658514320850372, "learning_rate": 1.2278385355904232e-05, "loss": 0.0243, "step": 47010 }, { "epoch": 12.714981070849108, "grad_norm": 0.10360684245824814, "learning_rate": 1.2260302736132867e-05, "loss": 0.0244, "step": 47020 }, { "epoch": 12.717685235262303, "grad_norm": 0.19608628749847412, "learning_rate": 1.2242231580685098e-05, "loss": 0.0276, "step": 47030 }, { "epoch": 12.7203893996755, "grad_norm": 0.17377151548862457, "learning_rate": 1.2224171895050413e-05, "loss": 0.0259, "step": 47040 }, { "epoch": 12.723093564088696, "grad_norm": 0.19096051156520844, "learning_rate": 1.2206123684714903e-05, "loss": 0.0249, "step": 47050 }, { "epoch": 12.725797728501894, "grad_norm": 0.23650483787059784, "learning_rate": 1.2188086955161132e-05, "loss": 0.0234, "step": 47060 }, { "epoch": 12.728501892915089, "grad_norm": 0.21432948112487793, "learning_rate": 1.2170061711868175e-05, "loss": 0.0238, "step": 47070 }, { "epoch": 12.731206057328286, "grad_norm": 0.26980775594711304, "learning_rate": 1.215204796031163e-05, "loss": 0.0244, "step": 47080 }, { "epoch": 12.733910221741482, "grad_norm": 0.14025934040546417, "learning_rate": 1.2134045705963599e-05, "loss": 0.025, "step": 47090 }, { "epoch": 12.736614386154677, "grad_norm": 0.14506739377975464, "learning_rate": 1.2116054954292689e-05, "loss": 0.0252, "step": 47100 }, { "epoch": 12.739318550567875, "grad_norm": 0.22400985658168793, "learning_rate": 1.2098075710764011e-05, "loss": 0.0249, "step": 47110 }, { "epoch": 12.74202271498107, "grad_norm": 0.14877207577228546, "learning_rate": 1.2080107980839183e-05, "loss": 0.0243, "step": 47120 }, { "epoch": 12.744726879394268, "grad_norm": 0.16754329204559326, "learning_rate": 1.2062151769976343e-05, "loss": 0.0248, "step": 47130 }, { "epoch": 12.747431043807463, "grad_norm": 0.15350909531116486, "learning_rate": 1.204420708363011e-05, "loss": 0.0252, "step": 47140 }, { "epoch": 12.75013520822066, "grad_norm": 0.11928248405456543, "learning_rate": 1.2026273927251597e-05, "loss": 0.0253, "step": 47150 }, { "epoch": 12.752839372633856, "grad_norm": 0.11619163304567337, "learning_rate": 1.2008352306288424e-05, "loss": 0.0266, "step": 47160 }, { "epoch": 12.755543537047053, "grad_norm": 0.1405341625213623, "learning_rate": 1.1990442226184695e-05, "loss": 0.0254, "step": 47170 }, { "epoch": 12.758247701460249, "grad_norm": 0.1190919280052185, "learning_rate": 1.1972543692381066e-05, "loss": 0.0234, "step": 47180 }, { "epoch": 12.760951865873444, "grad_norm": 0.15682627260684967, "learning_rate": 1.1954656710314576e-05, "loss": 0.0254, "step": 47190 }, { "epoch": 12.763656030286642, "grad_norm": 0.13846907019615173, "learning_rate": 1.1936781285418875e-05, "loss": 0.0246, "step": 47200 }, { "epoch": 12.766360194699837, "grad_norm": 0.1517498642206192, "learning_rate": 1.1918917423123993e-05, "loss": 0.0235, "step": 47210 }, { "epoch": 12.769064359113035, "grad_norm": 0.12557490170001984, "learning_rate": 1.1901065128856537e-05, "loss": 0.0247, "step": 47220 }, { "epoch": 12.77176852352623, "grad_norm": 0.12363827973604202, "learning_rate": 1.1883224408039551e-05, "loss": 0.0248, "step": 47230 }, { "epoch": 12.774472687939427, "grad_norm": 0.12706497311592102, "learning_rate": 1.1865395266092578e-05, "loss": 0.0251, "step": 47240 }, { "epoch": 12.777176852352623, "grad_norm": 0.09844266623258591, "learning_rate": 1.1847577708431633e-05, "loss": 0.0258, "step": 47250 }, { "epoch": 12.779881016765819, "grad_norm": 0.12350283563137054, "learning_rate": 1.1829771740469225e-05, "loss": 0.0234, "step": 47260 }, { "epoch": 12.782585181179016, "grad_norm": 0.13727156817913055, "learning_rate": 1.1811977367614324e-05, "loss": 0.0255, "step": 47270 }, { "epoch": 12.785289345592211, "grad_norm": 0.10132914781570435, "learning_rate": 1.1794194595272412e-05, "loss": 0.0242, "step": 47280 }, { "epoch": 12.787993510005409, "grad_norm": 0.16595079004764557, "learning_rate": 1.1776423428845423e-05, "loss": 0.0244, "step": 47290 }, { "epoch": 12.790697674418604, "grad_norm": 0.1285308301448822, "learning_rate": 1.1758663873731756e-05, "loss": 0.0244, "step": 47300 }, { "epoch": 12.793401838831802, "grad_norm": 0.11136139184236526, "learning_rate": 1.1740915935326302e-05, "loss": 0.0251, "step": 47310 }, { "epoch": 12.796106003244997, "grad_norm": 0.2033928781747818, "learning_rate": 1.1723179619020396e-05, "loss": 0.0243, "step": 47320 }, { "epoch": 12.798810167658193, "grad_norm": 0.18452554941177368, "learning_rate": 1.1705454930201914e-05, "loss": 0.0249, "step": 47330 }, { "epoch": 12.80151433207139, "grad_norm": 0.17867325246334076, "learning_rate": 1.1687741874255087e-05, "loss": 0.0253, "step": 47340 }, { "epoch": 12.804218496484586, "grad_norm": 0.13266874849796295, "learning_rate": 1.1670040456560728e-05, "loss": 0.0254, "step": 47350 }, { "epoch": 12.806922660897783, "grad_norm": 0.1684064269065857, "learning_rate": 1.1652350682496005e-05, "loss": 0.0246, "step": 47360 }, { "epoch": 12.809626825310978, "grad_norm": 0.15582436323165894, "learning_rate": 1.163467255743465e-05, "loss": 0.0243, "step": 47370 }, { "epoch": 12.812330989724176, "grad_norm": 0.10846055299043655, "learning_rate": 1.1617006086746796e-05, "loss": 0.025, "step": 47380 }, { "epoch": 12.815035154137371, "grad_norm": 0.1256238967180252, "learning_rate": 1.1599351275799047e-05, "loss": 0.0244, "step": 47390 }, { "epoch": 12.817739318550569, "grad_norm": 0.15457351505756378, "learning_rate": 1.1581708129954466e-05, "loss": 0.0242, "step": 47400 }, { "epoch": 12.820443482963764, "grad_norm": 0.11568447947502136, "learning_rate": 1.1564076654572587e-05, "loss": 0.0255, "step": 47410 }, { "epoch": 12.82314764737696, "grad_norm": 0.13985377550125122, "learning_rate": 1.1546456855009358e-05, "loss": 0.0232, "step": 47420 }, { "epoch": 12.825851811790157, "grad_norm": 0.15697595477104187, "learning_rate": 1.1528848736617248e-05, "loss": 0.0235, "step": 47430 }, { "epoch": 12.828555976203353, "grad_norm": 0.2605579197406769, "learning_rate": 1.1511252304745112e-05, "loss": 0.0248, "step": 47440 }, { "epoch": 12.83126014061655, "grad_norm": 0.1514119654893875, "learning_rate": 1.1493667564738297e-05, "loss": 0.0248, "step": 47450 }, { "epoch": 12.833964305029745, "grad_norm": 0.14582973718643188, "learning_rate": 1.1476094521938574e-05, "loss": 0.0253, "step": 47460 }, { "epoch": 12.836668469442943, "grad_norm": 0.10937143862247467, "learning_rate": 1.1458533181684167e-05, "loss": 0.0254, "step": 47470 }, { "epoch": 12.839372633856138, "grad_norm": 0.20748169720172882, "learning_rate": 1.1440983549309753e-05, "loss": 0.0238, "step": 47480 }, { "epoch": 12.842076798269336, "grad_norm": 0.1374177783727646, "learning_rate": 1.1423445630146434e-05, "loss": 0.0245, "step": 47490 }, { "epoch": 12.844780962682531, "grad_norm": 0.12294783443212509, "learning_rate": 1.1405919429521799e-05, "loss": 0.025, "step": 47500 }, { "epoch": 12.847485127095727, "grad_norm": 0.11193563044071198, "learning_rate": 1.1388404952759802e-05, "loss": 0.0236, "step": 47510 }, { "epoch": 12.850189291508924, "grad_norm": 0.1370474249124527, "learning_rate": 1.1370902205180923e-05, "loss": 0.0254, "step": 47520 }, { "epoch": 12.85289345592212, "grad_norm": 0.13742244243621826, "learning_rate": 1.1353411192101987e-05, "loss": 0.0246, "step": 47530 }, { "epoch": 12.855597620335317, "grad_norm": 0.1647920459508896, "learning_rate": 1.133593191883634e-05, "loss": 0.025, "step": 47540 }, { "epoch": 12.858301784748512, "grad_norm": 0.13553641736507416, "learning_rate": 1.1318464390693711e-05, "loss": 0.0254, "step": 47550 }, { "epoch": 12.86100594916171, "grad_norm": 0.12706153094768524, "learning_rate": 1.1301008612980257e-05, "loss": 0.0248, "step": 47560 }, { "epoch": 12.863710113574905, "grad_norm": 0.1580088585615158, "learning_rate": 1.128356459099863e-05, "loss": 0.0239, "step": 47570 }, { "epoch": 12.866414277988103, "grad_norm": 0.2239857017993927, "learning_rate": 1.1266132330047802e-05, "loss": 0.0248, "step": 47580 }, { "epoch": 12.869118442401298, "grad_norm": 0.1426030844449997, "learning_rate": 1.1248711835423281e-05, "loss": 0.0249, "step": 47590 }, { "epoch": 12.871822606814494, "grad_norm": 0.12312641739845276, "learning_rate": 1.123130311241693e-05, "loss": 0.0236, "step": 47600 }, { "epoch": 12.874526771227691, "grad_norm": 0.13822688162326813, "learning_rate": 1.1213906166317068e-05, "loss": 0.0247, "step": 47610 }, { "epoch": 12.877230935640886, "grad_norm": 0.20168934762477875, "learning_rate": 1.1196521002408427e-05, "loss": 0.0245, "step": 47620 }, { "epoch": 12.879935100054084, "grad_norm": 0.17577563226222992, "learning_rate": 1.1179147625972159e-05, "loss": 0.0253, "step": 47630 }, { "epoch": 12.88263926446728, "grad_norm": 0.16701585054397583, "learning_rate": 1.1161786042285822e-05, "loss": 0.0245, "step": 47640 }, { "epoch": 12.885343428880477, "grad_norm": 0.1510409712791443, "learning_rate": 1.1144436256623447e-05, "loss": 0.0253, "step": 47650 }, { "epoch": 12.888047593293672, "grad_norm": 0.152612566947937, "learning_rate": 1.1127098274255392e-05, "loss": 0.024, "step": 47660 }, { "epoch": 12.890751757706868, "grad_norm": 0.1862846314907074, "learning_rate": 1.1109772100448512e-05, "loss": 0.0244, "step": 47670 }, { "epoch": 12.893455922120065, "grad_norm": 0.1800232082605362, "learning_rate": 1.1092457740466033e-05, "loss": 0.0249, "step": 47680 }, { "epoch": 12.89616008653326, "grad_norm": 0.4027448892593384, "learning_rate": 1.10751551995676e-05, "loss": 0.025, "step": 47690 }, { "epoch": 12.898864250946458, "grad_norm": 0.12445686757564545, "learning_rate": 1.1057864483009262e-05, "loss": 0.0239, "step": 47700 }, { "epoch": 12.901568415359653, "grad_norm": 0.34672704339027405, "learning_rate": 1.1040585596043473e-05, "loss": 0.0239, "step": 47710 }, { "epoch": 12.90427257977285, "grad_norm": 0.12539918720722198, "learning_rate": 1.1023318543919148e-05, "loss": 0.0236, "step": 47720 }, { "epoch": 12.906976744186046, "grad_norm": 0.1195179671049118, "learning_rate": 1.10060633318815e-05, "loss": 0.0244, "step": 47730 }, { "epoch": 12.909680908599244, "grad_norm": 0.15706759691238403, "learning_rate": 1.0988819965172248e-05, "loss": 0.0239, "step": 47740 }, { "epoch": 12.91238507301244, "grad_norm": 0.11741556227207184, "learning_rate": 1.0971588449029462e-05, "loss": 0.0243, "step": 47750 }, { "epoch": 12.915089237425635, "grad_norm": 0.15840214490890503, "learning_rate": 1.095436878868762e-05, "loss": 0.0246, "step": 47760 }, { "epoch": 12.917793401838832, "grad_norm": 0.14350421726703644, "learning_rate": 1.0937160989377598e-05, "loss": 0.0262, "step": 47770 }, { "epoch": 12.920497566252028, "grad_norm": 0.16588011384010315, "learning_rate": 1.0919965056326676e-05, "loss": 0.0247, "step": 47780 }, { "epoch": 12.923201730665225, "grad_norm": 0.18259376287460327, "learning_rate": 1.0902780994758504e-05, "loss": 0.0235, "step": 47790 }, { "epoch": 12.92590589507842, "grad_norm": 0.20249606668949127, "learning_rate": 1.0885608809893193e-05, "loss": 0.0243, "step": 47800 }, { "epoch": 12.928610059491618, "grad_norm": 0.3104179799556732, "learning_rate": 1.0868448506947142e-05, "loss": 0.0259, "step": 47810 }, { "epoch": 12.931314223904813, "grad_norm": 0.29773736000061035, "learning_rate": 1.0851300091133243e-05, "loss": 0.0252, "step": 47820 }, { "epoch": 12.934018388318009, "grad_norm": 0.1169121041893959, "learning_rate": 1.083416356766071e-05, "loss": 0.0252, "step": 47830 }, { "epoch": 12.936722552731206, "grad_norm": 0.24177902936935425, "learning_rate": 1.0817038941735175e-05, "loss": 0.025, "step": 47840 }, { "epoch": 12.939426717144402, "grad_norm": 0.13463647663593292, "learning_rate": 1.0799926218558642e-05, "loss": 0.0247, "step": 47850 }, { "epoch": 12.942130881557599, "grad_norm": 0.15385662019252777, "learning_rate": 1.0782825403329488e-05, "loss": 0.0243, "step": 47860 }, { "epoch": 12.944835045970795, "grad_norm": 0.17885906994342804, "learning_rate": 1.076573650124254e-05, "loss": 0.0246, "step": 47870 }, { "epoch": 12.947539210383992, "grad_norm": 0.20810602605342865, "learning_rate": 1.0748659517488891e-05, "loss": 0.024, "step": 47880 }, { "epoch": 12.950243374797187, "grad_norm": 0.12799623608589172, "learning_rate": 1.0731594457256138e-05, "loss": 0.0241, "step": 47890 }, { "epoch": 12.952947539210385, "grad_norm": 0.1157609149813652, "learning_rate": 1.0714541325728139e-05, "loss": 0.025, "step": 47900 }, { "epoch": 12.95565170362358, "grad_norm": 0.16519896686077118, "learning_rate": 1.0697500128085231e-05, "loss": 0.0237, "step": 47910 }, { "epoch": 12.958355868036776, "grad_norm": 0.2376287281513214, "learning_rate": 1.0680470869504055e-05, "loss": 0.0248, "step": 47920 }, { "epoch": 12.961060032449973, "grad_norm": 0.25858986377716064, "learning_rate": 1.066345355515766e-05, "loss": 0.0256, "step": 47930 }, { "epoch": 12.963764196863169, "grad_norm": 0.1253005862236023, "learning_rate": 1.0646448190215453e-05, "loss": 0.0245, "step": 47940 }, { "epoch": 12.966468361276366, "grad_norm": 0.16216988861560822, "learning_rate": 1.0629454779843217e-05, "loss": 0.0243, "step": 47950 }, { "epoch": 12.969172525689562, "grad_norm": 0.1821797490119934, "learning_rate": 1.0612473329203082e-05, "loss": 0.0234, "step": 47960 }, { "epoch": 12.971876690102759, "grad_norm": 0.18408922851085663, "learning_rate": 1.0595503843453596e-05, "loss": 0.0238, "step": 47970 }, { "epoch": 12.974580854515954, "grad_norm": 0.3622225522994995, "learning_rate": 1.0578546327749634e-05, "loss": 0.0255, "step": 47980 }, { "epoch": 12.977285018929152, "grad_norm": 0.19487027823925018, "learning_rate": 1.0561600787242425e-05, "loss": 0.0237, "step": 47990 }, { "epoch": 12.979989183342347, "grad_norm": 0.1399783194065094, "learning_rate": 1.0544667227079591e-05, "loss": 0.0237, "step": 48000 }, { "epoch": 12.982693347755543, "grad_norm": 0.15263204276561737, "learning_rate": 1.0527745652405085e-05, "loss": 0.0238, "step": 48010 }, { "epoch": 12.98539751216874, "grad_norm": 0.13003015518188477, "learning_rate": 1.051083606835927e-05, "loss": 0.0239, "step": 48020 }, { "epoch": 12.988101676581936, "grad_norm": 0.11157027631998062, "learning_rate": 1.049393848007878e-05, "loss": 0.0241, "step": 48030 }, { "epoch": 12.990805840995133, "grad_norm": 0.17661452293395996, "learning_rate": 1.0477052892696709e-05, "loss": 0.0229, "step": 48040 }, { "epoch": 12.993510005408329, "grad_norm": 0.2264205515384674, "learning_rate": 1.0460179311342394e-05, "loss": 0.024, "step": 48050 }, { "epoch": 12.996214169821526, "grad_norm": 0.09808287769556046, "learning_rate": 1.0443317741141634e-05, "loss": 0.0229, "step": 48060 }, { "epoch": 12.998918334234721, "grad_norm": 0.13848687708377838, "learning_rate": 1.0426468187216514e-05, "loss": 0.0232, "step": 48070 }, { "epoch": 13.001622498647917, "grad_norm": 0.1387193351984024, "learning_rate": 1.0409630654685477e-05, "loss": 0.0235, "step": 48080 }, { "epoch": 13.004326663061114, "grad_norm": 0.1154729574918747, "learning_rate": 1.039280514866332e-05, "loss": 0.0235, "step": 48090 }, { "epoch": 13.00703082747431, "grad_norm": 0.18650370836257935, "learning_rate": 1.0375991674261198e-05, "loss": 0.0242, "step": 48100 }, { "epoch": 13.009734991887507, "grad_norm": 0.19982361793518066, "learning_rate": 1.0359190236586575e-05, "loss": 0.0257, "step": 48110 }, { "epoch": 13.012439156300703, "grad_norm": 0.1372433602809906, "learning_rate": 1.0342400840743322e-05, "loss": 0.0236, "step": 48120 }, { "epoch": 13.0151433207139, "grad_norm": 0.14518746733665466, "learning_rate": 1.0325623491831593e-05, "loss": 0.0242, "step": 48130 }, { "epoch": 13.017847485127096, "grad_norm": 0.11550911515951157, "learning_rate": 1.0308858194947906e-05, "loss": 0.0246, "step": 48140 }, { "epoch": 13.020551649540293, "grad_norm": 0.11310311406850815, "learning_rate": 1.0292104955185111e-05, "loss": 0.0244, "step": 48150 }, { "epoch": 13.023255813953488, "grad_norm": 0.15835872292518616, "learning_rate": 1.0275363777632396e-05, "loss": 0.0241, "step": 48160 }, { "epoch": 13.025959978366684, "grad_norm": 0.1351645141839981, "learning_rate": 1.0258634667375321e-05, "loss": 0.0234, "step": 48170 }, { "epoch": 13.028664142779881, "grad_norm": 0.22782082855701447, "learning_rate": 1.02419176294957e-05, "loss": 0.0229, "step": 48180 }, { "epoch": 13.031368307193077, "grad_norm": 0.1418696790933609, "learning_rate": 1.0225212669071782e-05, "loss": 0.0242, "step": 48190 }, { "epoch": 13.034072471606274, "grad_norm": 0.14907072484493256, "learning_rate": 1.0208519791178029e-05, "loss": 0.0237, "step": 48200 }, { "epoch": 13.03677663601947, "grad_norm": 0.1118822917342186, "learning_rate": 1.019183900088535e-05, "loss": 0.0256, "step": 48210 }, { "epoch": 13.039480800432667, "grad_norm": 0.14394544064998627, "learning_rate": 1.0175170303260906e-05, "loss": 0.0251, "step": 48220 }, { "epoch": 13.042184964845863, "grad_norm": 0.14336775243282318, "learning_rate": 1.0158513703368206e-05, "loss": 0.0245, "step": 48230 }, { "epoch": 13.044889129259058, "grad_norm": 0.18101699650287628, "learning_rate": 1.0141869206267095e-05, "loss": 0.0246, "step": 48240 }, { "epoch": 13.047593293672255, "grad_norm": 0.13231679797172546, "learning_rate": 1.0125236817013723e-05, "loss": 0.023, "step": 48250 }, { "epoch": 13.050297458085451, "grad_norm": 0.27939462661743164, "learning_rate": 1.010861654066056e-05, "loss": 0.0243, "step": 48260 }, { "epoch": 13.053001622498648, "grad_norm": 0.40843501687049866, "learning_rate": 1.0092008382256434e-05, "loss": 0.0245, "step": 48270 }, { "epoch": 13.055705786911844, "grad_norm": 0.14605149626731873, "learning_rate": 1.0075412346846458e-05, "loss": 0.0245, "step": 48280 }, { "epoch": 13.058409951325041, "grad_norm": 0.1537065953016281, "learning_rate": 1.0058828439472056e-05, "loss": 0.0242, "step": 48290 }, { "epoch": 13.061114115738237, "grad_norm": 0.15266001224517822, "learning_rate": 1.0042256665170996e-05, "loss": 0.0241, "step": 48300 }, { "epoch": 13.063818280151434, "grad_norm": 0.10354053229093552, "learning_rate": 1.0025697028977332e-05, "loss": 0.0252, "step": 48310 }, { "epoch": 13.06652244456463, "grad_norm": 0.13192713260650635, "learning_rate": 1.0009149535921454e-05, "loss": 0.0256, "step": 48320 }, { "epoch": 13.069226608977825, "grad_norm": 0.11788143217563629, "learning_rate": 9.992614191030031e-06, "loss": 0.0231, "step": 48330 }, { "epoch": 13.071930773391022, "grad_norm": 0.1672338843345642, "learning_rate": 9.976090999326115e-06, "loss": 0.0239, "step": 48340 }, { "epoch": 13.074634937804218, "grad_norm": 0.2188221961259842, "learning_rate": 9.959579965828952e-06, "loss": 0.024, "step": 48350 }, { "epoch": 13.077339102217415, "grad_norm": 0.1556173712015152, "learning_rate": 9.943081095554218e-06, "loss": 0.0249, "step": 48360 }, { "epoch": 13.08004326663061, "grad_norm": 0.19715727865695953, "learning_rate": 9.926594393513783e-06, "loss": 0.0248, "step": 48370 }, { "epoch": 13.082747431043808, "grad_norm": 0.13627108931541443, "learning_rate": 9.910119864715906e-06, "loss": 0.0238, "step": 48380 }, { "epoch": 13.085451595457004, "grad_norm": 0.16246198117733002, "learning_rate": 9.8936575141651e-06, "loss": 0.0242, "step": 48390 }, { "epoch": 13.088155759870201, "grad_norm": 0.17530447244644165, "learning_rate": 9.877207346862194e-06, "loss": 0.0245, "step": 48400 }, { "epoch": 13.090859924283397, "grad_norm": 0.14931057393550873, "learning_rate": 9.860769367804312e-06, "loss": 0.0252, "step": 48410 }, { "epoch": 13.093564088696592, "grad_norm": 0.18007735908031464, "learning_rate": 9.844343581984877e-06, "loss": 0.0272, "step": 48420 }, { "epoch": 13.09626825310979, "grad_norm": 0.15383663773536682, "learning_rate": 9.82792999439362e-06, "loss": 0.0242, "step": 48430 }, { "epoch": 13.098972417522985, "grad_norm": 0.13849753141403198, "learning_rate": 9.811528610016546e-06, "loss": 0.024, "step": 48440 }, { "epoch": 13.101676581936182, "grad_norm": 0.2258801907300949, "learning_rate": 9.79513943383597e-06, "loss": 0.0254, "step": 48450 }, { "epoch": 13.104380746349378, "grad_norm": 0.20371942222118378, "learning_rate": 9.778762470830489e-06, "loss": 0.0236, "step": 48460 }, { "epoch": 13.107084910762575, "grad_norm": 0.22784294188022614, "learning_rate": 9.762397725974982e-06, "loss": 0.0237, "step": 48470 }, { "epoch": 13.10978907517577, "grad_norm": 0.11378877609968185, "learning_rate": 9.746045204240622e-06, "loss": 0.0252, "step": 48480 }, { "epoch": 13.112493239588966, "grad_norm": 0.17675165832042694, "learning_rate": 9.729704910594917e-06, "loss": 0.0239, "step": 48490 }, { "epoch": 13.115197404002163, "grad_norm": 0.10921760648488998, "learning_rate": 9.713376850001554e-06, "loss": 0.0255, "step": 48500 }, { "epoch": 13.117901568415359, "grad_norm": 0.16017182171344757, "learning_rate": 9.697061027420622e-06, "loss": 0.0241, "step": 48510 }, { "epoch": 13.120605732828556, "grad_norm": 0.1275172233581543, "learning_rate": 9.680757447808385e-06, "loss": 0.0256, "step": 48520 }, { "epoch": 13.123309897241752, "grad_norm": 0.15900425612926483, "learning_rate": 9.664466116117488e-06, "loss": 0.0246, "step": 48530 }, { "epoch": 13.12601406165495, "grad_norm": 0.1743355244398117, "learning_rate": 9.64818703729678e-06, "loss": 0.0238, "step": 48540 }, { "epoch": 13.128718226068145, "grad_norm": 0.11928881704807281, "learning_rate": 9.631920216291423e-06, "loss": 0.0244, "step": 48550 }, { "epoch": 13.131422390481342, "grad_norm": 0.10927005857229233, "learning_rate": 9.615665658042849e-06, "loss": 0.0246, "step": 48560 }, { "epoch": 13.134126554894538, "grad_norm": 0.1441996991634369, "learning_rate": 9.599423367488747e-06, "loss": 0.0229, "step": 48570 }, { "epoch": 13.136830719307733, "grad_norm": 0.13795150816440582, "learning_rate": 9.583193349563124e-06, "loss": 0.0257, "step": 48580 }, { "epoch": 13.13953488372093, "grad_norm": 0.1417296826839447, "learning_rate": 9.566975609196216e-06, "loss": 0.0249, "step": 48590 }, { "epoch": 13.142239048134126, "grad_norm": 0.15828527510166168, "learning_rate": 9.550770151314548e-06, "loss": 0.0244, "step": 48600 }, { "epoch": 13.144943212547323, "grad_norm": 0.15219852328300476, "learning_rate": 9.53457698084091e-06, "loss": 0.0244, "step": 48610 }, { "epoch": 13.147647376960519, "grad_norm": 0.19947275519371033, "learning_rate": 9.518396102694355e-06, "loss": 0.0246, "step": 48620 }, { "epoch": 13.150351541373716, "grad_norm": 0.13152876496315002, "learning_rate": 9.502227521790198e-06, "loss": 0.0247, "step": 48630 }, { "epoch": 13.153055705786912, "grad_norm": 0.14748777449131012, "learning_rate": 9.486071243040063e-06, "loss": 0.0243, "step": 48640 }, { "epoch": 13.155759870200107, "grad_norm": 0.18938444554805756, "learning_rate": 9.469927271351747e-06, "loss": 0.0246, "step": 48650 }, { "epoch": 13.158464034613305, "grad_norm": 0.172781303524971, "learning_rate": 9.453795611629419e-06, "loss": 0.0248, "step": 48660 }, { "epoch": 13.1611681990265, "grad_norm": 0.133523628115654, "learning_rate": 9.437676268773399e-06, "loss": 0.024, "step": 48670 }, { "epoch": 13.163872363439697, "grad_norm": 0.18173649907112122, "learning_rate": 9.421569247680357e-06, "loss": 0.0259, "step": 48680 }, { "epoch": 13.166576527852893, "grad_norm": 0.15555739402770996, "learning_rate": 9.40547455324316e-06, "loss": 0.0243, "step": 48690 }, { "epoch": 13.16928069226609, "grad_norm": 0.15562351047992706, "learning_rate": 9.389392190350965e-06, "loss": 0.0242, "step": 48700 }, { "epoch": 13.171984856679286, "grad_norm": 0.14001043140888214, "learning_rate": 9.373322163889153e-06, "loss": 0.0241, "step": 48710 }, { "epoch": 13.174689021092483, "grad_norm": 0.13498330116271973, "learning_rate": 9.357264478739375e-06, "loss": 0.0241, "step": 48720 }, { "epoch": 13.177393185505679, "grad_norm": 0.1249120831489563, "learning_rate": 9.341219139779567e-06, "loss": 0.0247, "step": 48730 }, { "epoch": 13.180097349918874, "grad_norm": 0.10040724277496338, "learning_rate": 9.325186151883824e-06, "loss": 0.0248, "step": 48740 }, { "epoch": 13.182801514332072, "grad_norm": 0.2706640958786011, "learning_rate": 9.30916551992258e-06, "loss": 0.0243, "step": 48750 }, { "epoch": 13.185505678745267, "grad_norm": 0.11811727285385132, "learning_rate": 9.293157248762479e-06, "loss": 0.0245, "step": 48760 }, { "epoch": 13.188209843158464, "grad_norm": 0.160155788064003, "learning_rate": 9.2771613432664e-06, "loss": 0.0241, "step": 48770 }, { "epoch": 13.19091400757166, "grad_norm": 0.15063892304897308, "learning_rate": 9.261177808293481e-06, "loss": 0.0255, "step": 48780 }, { "epoch": 13.193618171984857, "grad_norm": 0.1779083013534546, "learning_rate": 9.245206648699096e-06, "loss": 0.0238, "step": 48790 }, { "epoch": 13.196322336398053, "grad_norm": 0.2775598168373108, "learning_rate": 9.22924786933485e-06, "loss": 0.0253, "step": 48800 }, { "epoch": 13.19902650081125, "grad_norm": 0.20198161900043488, "learning_rate": 9.213301475048642e-06, "loss": 0.0234, "step": 48810 }, { "epoch": 13.201730665224446, "grad_norm": 0.13115568459033966, "learning_rate": 9.197367470684504e-06, "loss": 0.0237, "step": 48820 }, { "epoch": 13.204434829637641, "grad_norm": 0.15790955722332, "learning_rate": 9.181445861082816e-06, "loss": 0.0245, "step": 48830 }, { "epoch": 13.207138994050839, "grad_norm": 0.2901276648044586, "learning_rate": 9.16553665108012e-06, "loss": 0.0235, "step": 48840 }, { "epoch": 13.209843158464034, "grad_norm": 0.11084747314453125, "learning_rate": 9.149639845509223e-06, "loss": 0.0228, "step": 48850 }, { "epoch": 13.212547322877231, "grad_norm": 0.22855515778064728, "learning_rate": 9.133755449199144e-06, "loss": 0.0251, "step": 48860 }, { "epoch": 13.215251487290427, "grad_norm": 0.13407938182353973, "learning_rate": 9.117883466975135e-06, "loss": 0.0238, "step": 48870 }, { "epoch": 13.217955651703624, "grad_norm": 0.1221814975142479, "learning_rate": 9.10202390365873e-06, "loss": 0.0251, "step": 48880 }, { "epoch": 13.22065981611682, "grad_norm": 0.17218813300132751, "learning_rate": 9.086176764067583e-06, "loss": 0.0233, "step": 48890 }, { "epoch": 13.223363980530015, "grad_norm": 0.1671910583972931, "learning_rate": 9.070342053015684e-06, "loss": 0.0255, "step": 48900 }, { "epoch": 13.226068144943213, "grad_norm": 0.21775704622268677, "learning_rate": 9.054519775313187e-06, "loss": 0.0237, "step": 48910 }, { "epoch": 13.228772309356408, "grad_norm": 0.11306804418563843, "learning_rate": 9.038709935766476e-06, "loss": 0.0239, "step": 48920 }, { "epoch": 13.231476473769606, "grad_norm": 0.14150364696979523, "learning_rate": 9.02291253917817e-06, "loss": 0.022, "step": 48930 }, { "epoch": 13.234180638182801, "grad_norm": 0.11134982109069824, "learning_rate": 9.007127590347091e-06, "loss": 0.0235, "step": 48940 }, { "epoch": 13.236884802595998, "grad_norm": 0.10645411163568497, "learning_rate": 8.991355094068288e-06, "loss": 0.0248, "step": 48950 }, { "epoch": 13.239588967009194, "grad_norm": 0.21760858595371246, "learning_rate": 8.975595055133062e-06, "loss": 0.0234, "step": 48960 }, { "epoch": 13.242293131422391, "grad_norm": 0.12669150531291962, "learning_rate": 8.959847478328848e-06, "loss": 0.0232, "step": 48970 }, { "epoch": 13.244997295835587, "grad_norm": 0.1388082504272461, "learning_rate": 8.944112368439378e-06, "loss": 0.0235, "step": 48980 }, { "epoch": 13.247701460248782, "grad_norm": 0.16156361997127533, "learning_rate": 8.928389730244552e-06, "loss": 0.0248, "step": 48990 }, { "epoch": 13.25040562466198, "grad_norm": 0.23662637174129486, "learning_rate": 8.912679568520494e-06, "loss": 0.0231, "step": 49000 }, { "epoch": 13.253109789075175, "grad_norm": 0.2205563336610794, "learning_rate": 8.896981888039534e-06, "loss": 0.0255, "step": 49010 }, { "epoch": 13.255813953488373, "grad_norm": 0.22316618263721466, "learning_rate": 8.881296693570201e-06, "loss": 0.0241, "step": 49020 }, { "epoch": 13.258518117901568, "grad_norm": 0.147647887468338, "learning_rate": 8.865623989877281e-06, "loss": 0.0249, "step": 49030 }, { "epoch": 13.261222282314765, "grad_norm": 0.11730413883924484, "learning_rate": 8.849963781721681e-06, "loss": 0.0235, "step": 49040 }, { "epoch": 13.263926446727961, "grad_norm": 0.21474061906337738, "learning_rate": 8.834316073860588e-06, "loss": 0.0238, "step": 49050 }, { "epoch": 13.266630611141156, "grad_norm": 0.15159325301647186, "learning_rate": 8.818680871047357e-06, "loss": 0.0241, "step": 49060 }, { "epoch": 13.269334775554354, "grad_norm": 0.13759958744049072, "learning_rate": 8.803058178031549e-06, "loss": 0.0231, "step": 49070 }, { "epoch": 13.27203893996755, "grad_norm": 0.12005046010017395, "learning_rate": 8.787447999558922e-06, "loss": 0.0224, "step": 49080 }, { "epoch": 13.274743104380747, "grad_norm": 0.11858002841472626, "learning_rate": 8.77185034037144e-06, "loss": 0.0248, "step": 49090 }, { "epoch": 13.277447268793942, "grad_norm": 0.14554181694984436, "learning_rate": 8.756265205207259e-06, "loss": 0.0244, "step": 49100 }, { "epoch": 13.28015143320714, "grad_norm": 0.17431333661079407, "learning_rate": 8.740692598800732e-06, "loss": 0.0234, "step": 49110 }, { "epoch": 13.282855597620335, "grad_norm": 0.15126018226146698, "learning_rate": 8.72513252588239e-06, "loss": 0.0239, "step": 49120 }, { "epoch": 13.285559762033532, "grad_norm": 0.1856842190027237, "learning_rate": 8.709584991178998e-06, "loss": 0.0238, "step": 49130 }, { "epoch": 13.288263926446728, "grad_norm": 0.27991607785224915, "learning_rate": 8.694049999413479e-06, "loss": 0.0235, "step": 49140 }, { "epoch": 13.290968090859923, "grad_norm": 0.15998010337352753, "learning_rate": 8.678527555304945e-06, "loss": 0.0241, "step": 49150 }, { "epoch": 13.29367225527312, "grad_norm": 0.14557944238185883, "learning_rate": 8.663017663568712e-06, "loss": 0.024, "step": 49160 }, { "epoch": 13.296376419686316, "grad_norm": 0.11669328808784485, "learning_rate": 8.647520328916259e-06, "loss": 0.0242, "step": 49170 }, { "epoch": 13.299080584099514, "grad_norm": 0.13018463551998138, "learning_rate": 8.632035556055307e-06, "loss": 0.0254, "step": 49180 }, { "epoch": 13.30178474851271, "grad_norm": 0.15151387453079224, "learning_rate": 8.616563349689672e-06, "loss": 0.0226, "step": 49190 }, { "epoch": 13.304488912925907, "grad_norm": 0.16353698074817657, "learning_rate": 8.601103714519448e-06, "loss": 0.0237, "step": 49200 }, { "epoch": 13.307193077339102, "grad_norm": 0.12092366069555283, "learning_rate": 8.58565665524082e-06, "loss": 0.0251, "step": 49210 }, { "epoch": 13.3098972417523, "grad_norm": 0.15080824494361877, "learning_rate": 8.570222176546222e-06, "loss": 0.0246, "step": 49220 }, { "epoch": 13.312601406165495, "grad_norm": 0.2201545238494873, "learning_rate": 8.554800283124242e-06, "loss": 0.0228, "step": 49230 }, { "epoch": 13.31530557057869, "grad_norm": 0.16557468473911285, "learning_rate": 8.539390979659639e-06, "loss": 0.0239, "step": 49240 }, { "epoch": 13.318009734991888, "grad_norm": 0.12511537969112396, "learning_rate": 8.523994270833352e-06, "loss": 0.0244, "step": 49250 }, { "epoch": 13.320713899405083, "grad_norm": 0.12783770263195038, "learning_rate": 8.5086101613225e-06, "loss": 0.0245, "step": 49260 }, { "epoch": 13.32341806381828, "grad_norm": 0.14034268260002136, "learning_rate": 8.493238655800346e-06, "loss": 0.0246, "step": 49270 }, { "epoch": 13.326122228231476, "grad_norm": 0.11454882472753525, "learning_rate": 8.47787975893638e-06, "loss": 0.0234, "step": 49280 }, { "epoch": 13.328826392644674, "grad_norm": 0.1315154880285263, "learning_rate": 8.462533475396211e-06, "loss": 0.0245, "step": 49290 }, { "epoch": 13.331530557057869, "grad_norm": 0.11735116690397263, "learning_rate": 8.447199809841643e-06, "loss": 0.0232, "step": 49300 }, { "epoch": 13.334234721471065, "grad_norm": 0.14882585406303406, "learning_rate": 8.431878766930635e-06, "loss": 0.024, "step": 49310 }, { "epoch": 13.336938885884262, "grad_norm": 0.16910158097743988, "learning_rate": 8.416570351317304e-06, "loss": 0.0232, "step": 49320 }, { "epoch": 13.339643050297457, "grad_norm": 0.1370796412229538, "learning_rate": 8.401274567651973e-06, "loss": 0.0248, "step": 49330 }, { "epoch": 13.342347214710655, "grad_norm": 0.14151740074157715, "learning_rate": 8.385991420581058e-06, "loss": 0.0242, "step": 49340 }, { "epoch": 13.34505137912385, "grad_norm": 0.1680680215358734, "learning_rate": 8.370720914747215e-06, "loss": 0.0251, "step": 49350 }, { "epoch": 13.347755543537048, "grad_norm": 0.10820460319519043, "learning_rate": 8.355463054789181e-06, "loss": 0.0235, "step": 49360 }, { "epoch": 13.350459707950243, "grad_norm": 0.2935543954372406, "learning_rate": 8.340217845341919e-06, "loss": 0.0243, "step": 49370 }, { "epoch": 13.35316387236344, "grad_norm": 0.12673348188400269, "learning_rate": 8.324985291036514e-06, "loss": 0.0246, "step": 49380 }, { "epoch": 13.355868036776636, "grad_norm": 0.10832604765892029, "learning_rate": 8.309765396500213e-06, "loss": 0.0226, "step": 49390 }, { "epoch": 13.358572201189832, "grad_norm": 0.16021892428398132, "learning_rate": 8.294558166356419e-06, "loss": 0.0247, "step": 49400 }, { "epoch": 13.361276365603029, "grad_norm": 0.17638589441776276, "learning_rate": 8.279363605224683e-06, "loss": 0.0234, "step": 49410 }, { "epoch": 13.363980530016224, "grad_norm": 0.15499147772789001, "learning_rate": 8.264181717720704e-06, "loss": 0.0248, "step": 49420 }, { "epoch": 13.366684694429422, "grad_norm": 0.13517342507839203, "learning_rate": 8.249012508456361e-06, "loss": 0.023, "step": 49430 }, { "epoch": 13.369388858842617, "grad_norm": 0.137616366147995, "learning_rate": 8.233855982039646e-06, "loss": 0.0244, "step": 49440 }, { "epoch": 13.372093023255815, "grad_norm": 0.20149566233158112, "learning_rate": 8.218712143074708e-06, "loss": 0.0244, "step": 49450 }, { "epoch": 13.37479718766901, "grad_norm": 0.13487814366817474, "learning_rate": 8.203580996161858e-06, "loss": 0.0238, "step": 49460 }, { "epoch": 13.377501352082206, "grad_norm": 0.10921725630760193, "learning_rate": 8.188462545897512e-06, "loss": 0.024, "step": 49470 }, { "epoch": 13.380205516495403, "grad_norm": 0.18072280287742615, "learning_rate": 8.173356796874304e-06, "loss": 0.0233, "step": 49480 }, { "epoch": 13.382909680908599, "grad_norm": 0.13101592659950256, "learning_rate": 8.158263753680906e-06, "loss": 0.0239, "step": 49490 }, { "epoch": 13.385613845321796, "grad_norm": 0.1327669322490692, "learning_rate": 8.143183420902239e-06, "loss": 0.0233, "step": 49500 }, { "epoch": 13.388318009734991, "grad_norm": 0.11028794199228287, "learning_rate": 8.128115803119258e-06, "loss": 0.0239, "step": 49510 }, { "epoch": 13.391022174148189, "grad_norm": 0.1441507488489151, "learning_rate": 8.11306090490916e-06, "loss": 0.0241, "step": 49520 }, { "epoch": 13.393726338561384, "grad_norm": 0.12187794595956802, "learning_rate": 8.098018730845169e-06, "loss": 0.0223, "step": 49530 }, { "epoch": 13.396430502974582, "grad_norm": 0.15391452610492706, "learning_rate": 8.082989285496745e-06, "loss": 0.0233, "step": 49540 }, { "epoch": 13.399134667387777, "grad_norm": 0.11245706677436829, "learning_rate": 8.067972573429416e-06, "loss": 0.023, "step": 49550 }, { "epoch": 13.401838831800973, "grad_norm": 0.21386508643627167, "learning_rate": 8.052968599204874e-06, "loss": 0.0251, "step": 49560 }, { "epoch": 13.40454299621417, "grad_norm": 0.19402092695236206, "learning_rate": 8.037977367380922e-06, "loss": 0.0238, "step": 49570 }, { "epoch": 13.407247160627366, "grad_norm": 0.14632701873779297, "learning_rate": 8.022998882511495e-06, "loss": 0.0248, "step": 49580 }, { "epoch": 13.409951325040563, "grad_norm": 0.19459091126918793, "learning_rate": 8.008033149146677e-06, "loss": 0.0232, "step": 49590 }, { "epoch": 13.412655489453758, "grad_norm": 0.16755998134613037, "learning_rate": 7.993080171832656e-06, "loss": 0.0244, "step": 49600 }, { "epoch": 13.415359653866956, "grad_norm": 0.10473766177892685, "learning_rate": 7.978139955111752e-06, "loss": 0.0239, "step": 49610 }, { "epoch": 13.418063818280151, "grad_norm": 0.16766728460788727, "learning_rate": 7.9632125035224e-06, "loss": 0.0267, "step": 49620 }, { "epoch": 13.420767982693349, "grad_norm": 0.12512238323688507, "learning_rate": 7.948297821599177e-06, "loss": 0.0243, "step": 49630 }, { "epoch": 13.423472147106544, "grad_norm": 0.14141596853733063, "learning_rate": 7.933395913872755e-06, "loss": 0.0239, "step": 49640 }, { "epoch": 13.42617631151974, "grad_norm": 0.09616418927907944, "learning_rate": 7.918506784869972e-06, "loss": 0.0247, "step": 49650 }, { "epoch": 13.428880475932937, "grad_norm": 0.3818279802799225, "learning_rate": 7.903630439113707e-06, "loss": 0.0248, "step": 49660 }, { "epoch": 13.431584640346133, "grad_norm": 0.14300121366977692, "learning_rate": 7.888766881123044e-06, "loss": 0.0231, "step": 49670 }, { "epoch": 13.43428880475933, "grad_norm": 0.15025267004966736, "learning_rate": 7.873916115413099e-06, "loss": 0.0234, "step": 49680 }, { "epoch": 13.436992969172525, "grad_norm": 0.1614362597465515, "learning_rate": 7.85907814649518e-06, "loss": 0.0245, "step": 49690 }, { "epoch": 13.439697133585723, "grad_norm": 0.11344941705465317, "learning_rate": 7.844252978876649e-06, "loss": 0.0239, "step": 49700 }, { "epoch": 13.442401297998918, "grad_norm": 0.17637081444263458, "learning_rate": 7.829440617061001e-06, "loss": 0.0237, "step": 49710 }, { "epoch": 13.445105462412116, "grad_norm": 0.1330200731754303, "learning_rate": 7.814641065547851e-06, "loss": 0.0246, "step": 49720 }, { "epoch": 13.447809626825311, "grad_norm": 0.22731751203536987, "learning_rate": 7.79985432883289e-06, "loss": 0.0256, "step": 49730 }, { "epoch": 13.450513791238507, "grad_norm": 0.16798368096351624, "learning_rate": 7.78508041140797e-06, "loss": 0.0226, "step": 49740 }, { "epoch": 13.453217955651704, "grad_norm": 0.2001853883266449, "learning_rate": 7.770319317760993e-06, "loss": 0.0247, "step": 49750 }, { "epoch": 13.4559221200649, "grad_norm": 0.12324409931898117, "learning_rate": 7.755571052376004e-06, "loss": 0.0232, "step": 49760 }, { "epoch": 13.458626284478097, "grad_norm": 0.14350204169750214, "learning_rate": 7.740835619733128e-06, "loss": 0.0252, "step": 49770 }, { "epoch": 13.461330448891292, "grad_norm": 0.1901332437992096, "learning_rate": 7.726113024308601e-06, "loss": 0.0245, "step": 49780 }, { "epoch": 13.46403461330449, "grad_norm": 0.18559788167476654, "learning_rate": 7.711403270574746e-06, "loss": 0.025, "step": 49790 }, { "epoch": 13.466738777717685, "grad_norm": 0.18852077424526215, "learning_rate": 7.696706363000039e-06, "loss": 0.0234, "step": 49800 }, { "epoch": 13.46944294213088, "grad_norm": 0.22362534701824188, "learning_rate": 7.682022306048959e-06, "loss": 0.0268, "step": 49810 }, { "epoch": 13.472147106544078, "grad_norm": 0.15663178265094757, "learning_rate": 7.667351104182186e-06, "loss": 0.0239, "step": 49820 }, { "epoch": 13.474851270957274, "grad_norm": 0.165366530418396, "learning_rate": 7.652692761856395e-06, "loss": 0.0236, "step": 49830 }, { "epoch": 13.477555435370471, "grad_norm": 0.1547088325023651, "learning_rate": 7.63804728352444e-06, "loss": 0.024, "step": 49840 }, { "epoch": 13.480259599783667, "grad_norm": 0.18025478720664978, "learning_rate": 7.623414673635215e-06, "loss": 0.0232, "step": 49850 }, { "epoch": 13.482963764196864, "grad_norm": 0.14744672179222107, "learning_rate": 7.608794936633723e-06, "loss": 0.0237, "step": 49860 }, { "epoch": 13.48566792861006, "grad_norm": 0.17751511931419373, "learning_rate": 7.594188076961056e-06, "loss": 0.0236, "step": 49870 }, { "epoch": 13.488372093023255, "grad_norm": 0.13711078464984894, "learning_rate": 7.579594099054382e-06, "loss": 0.024, "step": 49880 }, { "epoch": 13.491076257436452, "grad_norm": 0.18423771858215332, "learning_rate": 7.565013007346983e-06, "loss": 0.0232, "step": 49890 }, { "epoch": 13.493780421849648, "grad_norm": 0.23776140809059143, "learning_rate": 7.5504448062682035e-06, "loss": 0.0245, "step": 49900 }, { "epoch": 13.496484586262845, "grad_norm": 0.20742136240005493, "learning_rate": 7.53588950024347e-06, "loss": 0.0241, "step": 49910 }, { "epoch": 13.49918875067604, "grad_norm": 0.1230342909693718, "learning_rate": 7.5213470936943145e-06, "loss": 0.0247, "step": 49920 }, { "epoch": 13.501892915089238, "grad_norm": 0.1023705005645752, "learning_rate": 7.506817591038323e-06, "loss": 0.0236, "step": 49930 }, { "epoch": 13.504597079502433, "grad_norm": 0.39832213521003723, "learning_rate": 7.492300996689183e-06, "loss": 0.0228, "step": 49940 }, { "epoch": 13.50730124391563, "grad_norm": 0.19723959267139435, "learning_rate": 7.477797315056645e-06, "loss": 0.0237, "step": 49950 }, { "epoch": 13.510005408328826, "grad_norm": 0.19262216985225677, "learning_rate": 7.463306550546539e-06, "loss": 0.0235, "step": 49960 }, { "epoch": 13.512709572742022, "grad_norm": 0.10586011409759521, "learning_rate": 7.448828707560812e-06, "loss": 0.0235, "step": 49970 }, { "epoch": 13.51541373715522, "grad_norm": 0.11743336915969849, "learning_rate": 7.4343637904974e-06, "loss": 0.024, "step": 49980 }, { "epoch": 13.518117901568415, "grad_norm": 0.13157743215560913, "learning_rate": 7.419911803750401e-06, "loss": 0.0243, "step": 49990 }, { "epoch": 13.520822065981612, "grad_norm": 0.1632792055606842, "learning_rate": 7.405472751709935e-06, "loss": 0.0242, "step": 50000 }, { "epoch": 13.523526230394808, "grad_norm": 0.2163734883069992, "learning_rate": 7.3910466387622e-06, "loss": 0.0225, "step": 50010 }, { "epoch": 13.526230394808005, "grad_norm": 0.11782890558242798, "learning_rate": 7.3766334692894735e-06, "loss": 0.0237, "step": 50020 }, { "epoch": 13.5289345592212, "grad_norm": 0.16830000281333923, "learning_rate": 7.3622332476700865e-06, "loss": 0.0242, "step": 50030 }, { "epoch": 13.531638723634398, "grad_norm": 0.11818969994783401, "learning_rate": 7.347845978278472e-06, "loss": 0.0235, "step": 50040 }, { "epoch": 13.534342888047593, "grad_norm": 0.21374806761741638, "learning_rate": 7.333471665485065e-06, "loss": 0.0241, "step": 50050 }, { "epoch": 13.537047052460789, "grad_norm": 0.09562584012746811, "learning_rate": 7.31911031365643e-06, "loss": 0.022, "step": 50060 }, { "epoch": 13.539751216873986, "grad_norm": 0.1545872539281845, "learning_rate": 7.304761927155157e-06, "loss": 0.0235, "step": 50070 }, { "epoch": 13.542455381287182, "grad_norm": 0.2727219760417938, "learning_rate": 7.29042651033991e-06, "loss": 0.0237, "step": 50080 }, { "epoch": 13.545159545700379, "grad_norm": 0.20397154986858368, "learning_rate": 7.276104067565409e-06, "loss": 0.0233, "step": 50090 }, { "epoch": 13.547863710113575, "grad_norm": 0.33417025208473206, "learning_rate": 7.261794603182431e-06, "loss": 0.0238, "step": 50100 }, { "epoch": 13.550567874526772, "grad_norm": 0.11749973893165588, "learning_rate": 7.24749812153781e-06, "loss": 0.0261, "step": 50110 }, { "epoch": 13.553272038939967, "grad_norm": 0.1705557256937027, "learning_rate": 7.2332146269744605e-06, "loss": 0.0235, "step": 50120 }, { "epoch": 13.555976203353165, "grad_norm": 0.1329030692577362, "learning_rate": 7.218944123831295e-06, "loss": 0.0237, "step": 50130 }, { "epoch": 13.55868036776636, "grad_norm": 0.17946547269821167, "learning_rate": 7.204686616443351e-06, "loss": 0.0243, "step": 50140 }, { "epoch": 13.561384532179556, "grad_norm": 0.1739341765642166, "learning_rate": 7.190442109141665e-06, "loss": 0.0236, "step": 50150 }, { "epoch": 13.564088696592753, "grad_norm": 0.13843438029289246, "learning_rate": 7.176210606253347e-06, "loss": 0.0242, "step": 50160 }, { "epoch": 13.566792861005949, "grad_norm": 0.1733599752187729, "learning_rate": 7.161992112101551e-06, "loss": 0.023, "step": 50170 }, { "epoch": 13.569497025419146, "grad_norm": 0.12444479018449783, "learning_rate": 7.147786631005465e-06, "loss": 0.0223, "step": 50180 }, { "epoch": 13.572201189832342, "grad_norm": 0.1592245250940323, "learning_rate": 7.1335941672803775e-06, "loss": 0.0222, "step": 50190 }, { "epoch": 13.574905354245539, "grad_norm": 0.14281640946865082, "learning_rate": 7.1194147252375384e-06, "loss": 0.0238, "step": 50200 }, { "epoch": 13.577609518658734, "grad_norm": 0.13888347148895264, "learning_rate": 7.10524830918432e-06, "loss": 0.0249, "step": 50210 }, { "epoch": 13.58031368307193, "grad_norm": 0.17431668937206268, "learning_rate": 7.091094923424097e-06, "loss": 0.0238, "step": 50220 }, { "epoch": 13.583017847485127, "grad_norm": 0.21530120074748993, "learning_rate": 7.0769545722562894e-06, "loss": 0.0249, "step": 50230 }, { "epoch": 13.585722011898323, "grad_norm": 0.11926808953285217, "learning_rate": 7.0628272599763675e-06, "loss": 0.0248, "step": 50240 }, { "epoch": 13.58842617631152, "grad_norm": 0.22672493755817413, "learning_rate": 7.048712990875828e-06, "loss": 0.0231, "step": 50250 }, { "epoch": 13.591130340724716, "grad_norm": 0.10073922574520111, "learning_rate": 7.034611769242216e-06, "loss": 0.0234, "step": 50260 }, { "epoch": 13.593834505137913, "grad_norm": 0.16113214194774628, "learning_rate": 7.02052359935913e-06, "loss": 0.0241, "step": 50270 }, { "epoch": 13.596538669551109, "grad_norm": 0.1851329356431961, "learning_rate": 7.006448485506145e-06, "loss": 0.0239, "step": 50280 }, { "epoch": 13.599242833964304, "grad_norm": 0.17239892482757568, "learning_rate": 6.992386431958942e-06, "loss": 0.0235, "step": 50290 }, { "epoch": 13.601946998377501, "grad_norm": 0.1669939160346985, "learning_rate": 6.978337442989197e-06, "loss": 0.0245, "step": 50300 }, { "epoch": 13.604651162790697, "grad_norm": 0.12595130503177643, "learning_rate": 6.964301522864608e-06, "loss": 0.0241, "step": 50310 }, { "epoch": 13.607355327203894, "grad_norm": 0.1658799797296524, "learning_rate": 6.950278675848926e-06, "loss": 0.0228, "step": 50320 }, { "epoch": 13.61005949161709, "grad_norm": 0.11768550425767899, "learning_rate": 6.9362689062019145e-06, "loss": 0.0234, "step": 50330 }, { "epoch": 13.612763656030287, "grad_norm": 0.3814915120601654, "learning_rate": 6.922272218179393e-06, "loss": 0.023, "step": 50340 }, { "epoch": 13.615467820443483, "grad_norm": 0.11020603030920029, "learning_rate": 6.908288616033148e-06, "loss": 0.0235, "step": 50350 }, { "epoch": 13.61817198485668, "grad_norm": 0.28144344687461853, "learning_rate": 6.894318104011077e-06, "loss": 0.0241, "step": 50360 }, { "epoch": 13.620876149269876, "grad_norm": 0.14729008078575134, "learning_rate": 6.880360686357007e-06, "loss": 0.0226, "step": 50370 }, { "epoch": 13.623580313683071, "grad_norm": 0.12815071642398834, "learning_rate": 6.8664163673108575e-06, "loss": 0.0226, "step": 50380 }, { "epoch": 13.626284478096268, "grad_norm": 0.22795148193836212, "learning_rate": 6.85248515110854e-06, "loss": 0.0246, "step": 50390 }, { "epoch": 13.628988642509464, "grad_norm": 0.11746430397033691, "learning_rate": 6.838567041981992e-06, "loss": 0.0238, "step": 50400 }, { "epoch": 13.631692806922661, "grad_norm": 0.15554966032505035, "learning_rate": 6.8246620441591634e-06, "loss": 0.023, "step": 50410 }, { "epoch": 13.634396971335857, "grad_norm": 0.14294108748435974, "learning_rate": 6.8107701618640275e-06, "loss": 0.0223, "step": 50420 }, { "epoch": 13.637101135749054, "grad_norm": 0.1654919981956482, "learning_rate": 6.796891399316557e-06, "loss": 0.0223, "step": 50430 }, { "epoch": 13.63980530016225, "grad_norm": 0.13189393281936646, "learning_rate": 6.7830257607327804e-06, "loss": 0.025, "step": 50440 }, { "epoch": 13.642509464575447, "grad_norm": 0.11094819754362106, "learning_rate": 6.7691732503247e-06, "loss": 0.0234, "step": 50450 }, { "epoch": 13.645213628988643, "grad_norm": 0.1101837307214737, "learning_rate": 6.755333872300346e-06, "loss": 0.0241, "step": 50460 }, { "epoch": 13.647917793401838, "grad_norm": 0.16522103548049927, "learning_rate": 6.741507630863747e-06, "loss": 0.0237, "step": 50470 }, { "epoch": 13.650621957815035, "grad_norm": 0.2317817360162735, "learning_rate": 6.727694530214945e-06, "loss": 0.0245, "step": 50480 }, { "epoch": 13.653326122228231, "grad_norm": 0.13490907847881317, "learning_rate": 6.713894574550028e-06, "loss": 0.0238, "step": 50490 }, { "epoch": 13.656030286641428, "grad_norm": 0.1293398141860962, "learning_rate": 6.700107768061015e-06, "loss": 0.0255, "step": 50500 }, { "epoch": 13.658734451054624, "grad_norm": 0.18114131689071655, "learning_rate": 6.686334114936016e-06, "loss": 0.0239, "step": 50510 }, { "epoch": 13.661438615467821, "grad_norm": 0.19915591180324554, "learning_rate": 6.672573619359063e-06, "loss": 0.0229, "step": 50520 }, { "epoch": 13.664142779881017, "grad_norm": 0.25958341360092163, "learning_rate": 6.658826285510256e-06, "loss": 0.0233, "step": 50530 }, { "epoch": 13.666846944294214, "grad_norm": 0.10189609229564667, "learning_rate": 6.645092117565666e-06, "loss": 0.0248, "step": 50540 }, { "epoch": 13.66955110870741, "grad_norm": 0.1437322348356247, "learning_rate": 6.631371119697371e-06, "loss": 0.0242, "step": 50550 }, { "epoch": 13.672255273120605, "grad_norm": 0.11972358077764511, "learning_rate": 6.6176632960734505e-06, "loss": 0.0233, "step": 50560 }, { "epoch": 13.674959437533802, "grad_norm": 0.2020898014307022, "learning_rate": 6.603968650857978e-06, "loss": 0.0229, "step": 50570 }, { "epoch": 13.677663601946998, "grad_norm": 0.14816489815711975, "learning_rate": 6.5902871882110085e-06, "loss": 0.0237, "step": 50580 }, { "epoch": 13.680367766360195, "grad_norm": 0.3212878108024597, "learning_rate": 6.576618912288635e-06, "loss": 0.0238, "step": 50590 }, { "epoch": 13.68307193077339, "grad_norm": 0.18489764630794525, "learning_rate": 6.562963827242913e-06, "loss": 0.0232, "step": 50600 }, { "epoch": 13.685776095186588, "grad_norm": 0.13778942823410034, "learning_rate": 6.549321937221886e-06, "loss": 0.0235, "step": 50610 }, { "epoch": 13.688480259599784, "grad_norm": 0.16999417543411255, "learning_rate": 6.5356932463696064e-06, "loss": 0.0222, "step": 50620 }, { "epoch": 13.69118442401298, "grad_norm": 0.20482471585273743, "learning_rate": 6.522077758826101e-06, "loss": 0.025, "step": 50630 }, { "epoch": 13.693888588426177, "grad_norm": 0.11192761361598969, "learning_rate": 6.5084754787274275e-06, "loss": 0.0232, "step": 50640 }, { "epoch": 13.696592752839372, "grad_norm": 0.268205463886261, "learning_rate": 6.494886410205553e-06, "loss": 0.0243, "step": 50650 }, { "epoch": 13.69929691725257, "grad_norm": 0.22363565862178802, "learning_rate": 6.481310557388521e-06, "loss": 0.0246, "step": 50660 }, { "epoch": 13.702001081665765, "grad_norm": 0.20732836425304413, "learning_rate": 6.46774792440028e-06, "loss": 0.0236, "step": 50670 }, { "epoch": 13.704705246078962, "grad_norm": 0.09389202296733856, "learning_rate": 6.4541985153608206e-06, "loss": 0.0229, "step": 50680 }, { "epoch": 13.707409410492158, "grad_norm": 0.120432548224926, "learning_rate": 6.4406623343861e-06, "loss": 0.023, "step": 50690 }, { "epoch": 13.710113574905353, "grad_norm": 0.11564520746469498, "learning_rate": 6.427139385588038e-06, "loss": 0.0225, "step": 50700 }, { "epoch": 13.71281773931855, "grad_norm": 0.1861169934272766, "learning_rate": 6.413629673074561e-06, "loss": 0.0225, "step": 50710 }, { "epoch": 13.715521903731746, "grad_norm": 0.20901261270046234, "learning_rate": 6.400133200949554e-06, "loss": 0.0229, "step": 50720 }, { "epoch": 13.718226068144943, "grad_norm": 0.10578031092882156, "learning_rate": 6.386649973312897e-06, "loss": 0.0246, "step": 50730 }, { "epoch": 13.720930232558139, "grad_norm": 0.12139717489480972, "learning_rate": 6.37317999426042e-06, "loss": 0.024, "step": 50740 }, { "epoch": 13.723634396971336, "grad_norm": 0.16701430082321167, "learning_rate": 6.359723267883977e-06, "loss": 0.0231, "step": 50750 }, { "epoch": 13.726338561384532, "grad_norm": 0.41798409819602966, "learning_rate": 6.346279798271343e-06, "loss": 0.0232, "step": 50760 }, { "epoch": 13.72904272579773, "grad_norm": 0.1411030888557434, "learning_rate": 6.332849589506301e-06, "loss": 0.0236, "step": 50770 }, { "epoch": 13.731746890210925, "grad_norm": 0.18833494186401367, "learning_rate": 6.319432645668588e-06, "loss": 0.024, "step": 50780 }, { "epoch": 13.73445105462412, "grad_norm": 0.19226334989070892, "learning_rate": 6.306028970833922e-06, "loss": 0.0247, "step": 50790 }, { "epoch": 13.737155219037318, "grad_norm": 0.18923693895339966, "learning_rate": 6.2926385690739665e-06, "loss": 0.0241, "step": 50800 }, { "epoch": 13.739859383450513, "grad_norm": 0.10953157395124435, "learning_rate": 6.279261444456413e-06, "loss": 0.0239, "step": 50810 }, { "epoch": 13.74256354786371, "grad_norm": 0.21027453243732452, "learning_rate": 6.265897601044829e-06, "loss": 0.023, "step": 50820 }, { "epoch": 13.745267712276906, "grad_norm": 0.09705601632595062, "learning_rate": 6.2525470428988434e-06, "loss": 0.0247, "step": 50830 }, { "epoch": 13.747971876690103, "grad_norm": 0.17846563458442688, "learning_rate": 6.239209774073962e-06, "loss": 0.0234, "step": 50840 }, { "epoch": 13.750676041103299, "grad_norm": 0.2416759878396988, "learning_rate": 6.225885798621728e-06, "loss": 0.0241, "step": 50850 }, { "epoch": 13.753380205516496, "grad_norm": 0.18347863852977753, "learning_rate": 6.2125751205895925e-06, "loss": 0.0253, "step": 50860 }, { "epoch": 13.756084369929692, "grad_norm": 0.18583542108535767, "learning_rate": 6.199277744020998e-06, "loss": 0.0239, "step": 50870 }, { "epoch": 13.758788534342887, "grad_norm": 0.11956167221069336, "learning_rate": 6.185993672955337e-06, "loss": 0.0244, "step": 50880 }, { "epoch": 13.761492698756085, "grad_norm": 0.14195102453231812, "learning_rate": 6.172722911427947e-06, "loss": 0.0234, "step": 50890 }, { "epoch": 13.76419686316928, "grad_norm": 0.08903241157531738, "learning_rate": 6.159465463470149e-06, "loss": 0.025, "step": 50900 }, { "epoch": 13.766901027582477, "grad_norm": 0.1386224329471588, "learning_rate": 6.146221333109204e-06, "loss": 0.0231, "step": 50910 }, { "epoch": 13.769605191995673, "grad_norm": 0.19980134069919586, "learning_rate": 6.132990524368326e-06, "loss": 0.0245, "step": 50920 }, { "epoch": 13.77230935640887, "grad_norm": 0.1461448073387146, "learning_rate": 6.119773041266685e-06, "loss": 0.0219, "step": 50930 }, { "epoch": 13.775013520822066, "grad_norm": 0.16895848512649536, "learning_rate": 6.106568887819402e-06, "loss": 0.0246, "step": 50940 }, { "epoch": 13.777717685235263, "grad_norm": 0.18600580096244812, "learning_rate": 6.093378068037548e-06, "loss": 0.023, "step": 50950 }, { "epoch": 13.780421849648459, "grad_norm": 0.21906396746635437, "learning_rate": 6.080200585928164e-06, "loss": 0.0232, "step": 50960 }, { "epoch": 13.783126014061654, "grad_norm": 0.3714301288127899, "learning_rate": 6.06703644549419e-06, "loss": 0.0248, "step": 50970 }, { "epoch": 13.785830178474852, "grad_norm": 0.2733334004878998, "learning_rate": 6.053885650734576e-06, "loss": 0.0237, "step": 50980 }, { "epoch": 13.788534342888047, "grad_norm": 0.21382059156894684, "learning_rate": 6.040748205644153e-06, "loss": 0.025, "step": 50990 }, { "epoch": 13.791238507301244, "grad_norm": 0.13367204368114471, "learning_rate": 6.0276241142137646e-06, "loss": 0.0245, "step": 51000 }, { "epoch": 13.79394267171444, "grad_norm": 0.1324326992034912, "learning_rate": 6.014513380430142e-06, "loss": 0.0241, "step": 51010 }, { "epoch": 13.796646836127637, "grad_norm": 0.14092445373535156, "learning_rate": 6.001416008275984e-06, "loss": 0.0256, "step": 51020 }, { "epoch": 13.799351000540833, "grad_norm": 0.12233652919530869, "learning_rate": 5.988332001729929e-06, "loss": 0.0229, "step": 51030 }, { "epoch": 13.802055164954028, "grad_norm": 0.20828214287757874, "learning_rate": 5.975261364766543e-06, "loss": 0.0245, "step": 51040 }, { "epoch": 13.804759329367226, "grad_norm": 0.2053631991147995, "learning_rate": 5.962204101356356e-06, "loss": 0.0245, "step": 51050 }, { "epoch": 13.807463493780421, "grad_norm": 0.1828380674123764, "learning_rate": 5.94916021546581e-06, "loss": 0.0249, "step": 51060 }, { "epoch": 13.810167658193619, "grad_norm": 0.2649388909339905, "learning_rate": 5.936129711057298e-06, "loss": 0.0232, "step": 51070 }, { "epoch": 13.812871822606814, "grad_norm": 0.14440688490867615, "learning_rate": 5.923112592089142e-06, "loss": 0.0233, "step": 51080 }, { "epoch": 13.815575987020011, "grad_norm": 0.29525938630104065, "learning_rate": 5.9101088625155954e-06, "loss": 0.0227, "step": 51090 }, { "epoch": 13.818280151433207, "grad_norm": 0.28107187151908875, "learning_rate": 5.897118526286843e-06, "loss": 0.0224, "step": 51100 }, { "epoch": 13.820984315846403, "grad_norm": 0.15599799156188965, "learning_rate": 5.884141587349035e-06, "loss": 0.0235, "step": 51110 }, { "epoch": 13.8236884802596, "grad_norm": 0.16804920136928558, "learning_rate": 5.871178049644177e-06, "loss": 0.0226, "step": 51120 }, { "epoch": 13.826392644672795, "grad_norm": 0.2213808298110962, "learning_rate": 5.858227917110293e-06, "loss": 0.0235, "step": 51130 }, { "epoch": 13.829096809085993, "grad_norm": 0.25576335191726685, "learning_rate": 5.845291193681252e-06, "loss": 0.0231, "step": 51140 }, { "epoch": 13.831800973499188, "grad_norm": 0.24765048921108246, "learning_rate": 5.832367883286921e-06, "loss": 0.0235, "step": 51150 }, { "epoch": 13.834505137912386, "grad_norm": 0.1036418005824089, "learning_rate": 5.81945798985305e-06, "loss": 0.0222, "step": 51160 }, { "epoch": 13.837209302325581, "grad_norm": 0.1775406152009964, "learning_rate": 5.806561517301306e-06, "loss": 0.0238, "step": 51170 }, { "epoch": 13.839913466738778, "grad_norm": 0.11684273183345795, "learning_rate": 5.793678469549335e-06, "loss": 0.0232, "step": 51180 }, { "epoch": 13.842617631151974, "grad_norm": 0.13966737687587738, "learning_rate": 5.780808850510627e-06, "loss": 0.0227, "step": 51190 }, { "epoch": 13.84532179556517, "grad_norm": 0.179282546043396, "learning_rate": 5.767952664094673e-06, "loss": 0.0232, "step": 51200 }, { "epoch": 13.848025959978367, "grad_norm": 0.1518908143043518, "learning_rate": 5.755109914206791e-06, "loss": 0.0239, "step": 51210 }, { "epoch": 13.850730124391562, "grad_norm": 0.15858282148838043, "learning_rate": 5.7422806047483125e-06, "loss": 0.0238, "step": 51220 }, { "epoch": 13.85343428880476, "grad_norm": 0.2579077482223511, "learning_rate": 5.72946473961643e-06, "loss": 0.0243, "step": 51230 }, { "epoch": 13.856138453217955, "grad_norm": 0.1426376849412918, "learning_rate": 5.716662322704264e-06, "loss": 0.0237, "step": 51240 }, { "epoch": 13.858842617631153, "grad_norm": 0.20357242226600647, "learning_rate": 5.703873357900852e-06, "loss": 0.0254, "step": 51250 }, { "epoch": 13.861546782044348, "grad_norm": 0.2006567120552063, "learning_rate": 5.691097849091143e-06, "loss": 0.0255, "step": 51260 }, { "epoch": 13.864250946457545, "grad_norm": 0.10150555521249771, "learning_rate": 5.678335800155982e-06, "loss": 0.0238, "step": 51270 }, { "epoch": 13.866955110870741, "grad_norm": 0.20077507197856903, "learning_rate": 5.665587214972174e-06, "loss": 0.0234, "step": 51280 }, { "epoch": 13.869659275283936, "grad_norm": 0.16981017589569092, "learning_rate": 5.652852097412386e-06, "loss": 0.0234, "step": 51290 }, { "epoch": 13.872363439697134, "grad_norm": 0.10144241154193878, "learning_rate": 5.640130451345216e-06, "loss": 0.0248, "step": 51300 }, { "epoch": 13.87506760411033, "grad_norm": 0.12632277607917786, "learning_rate": 5.627422280635159e-06, "loss": 0.0247, "step": 51310 }, { "epoch": 13.877771768523527, "grad_norm": 0.24079278111457825, "learning_rate": 5.61472758914261e-06, "loss": 0.0238, "step": 51320 }, { "epoch": 13.880475932936722, "grad_norm": 0.19738100469112396, "learning_rate": 5.602046380723918e-06, "loss": 0.024, "step": 51330 }, { "epoch": 13.88318009734992, "grad_norm": 0.11175734549760818, "learning_rate": 5.5893786592312535e-06, "loss": 0.023, "step": 51340 }, { "epoch": 13.885884261763115, "grad_norm": 0.26866987347602844, "learning_rate": 5.576724428512775e-06, "loss": 0.0232, "step": 51350 }, { "epoch": 13.888588426176312, "grad_norm": 0.128705233335495, "learning_rate": 5.56408369241247e-06, "loss": 0.0219, "step": 51360 }, { "epoch": 13.891292590589508, "grad_norm": 0.21276229619979858, "learning_rate": 5.5514564547702875e-06, "loss": 0.0244, "step": 51370 }, { "epoch": 13.893996755002703, "grad_norm": 0.18952038884162903, "learning_rate": 5.538842719422038e-06, "loss": 0.0243, "step": 51380 }, { "epoch": 13.8967009194159, "grad_norm": 0.3034428358078003, "learning_rate": 5.52624249019944e-06, "loss": 0.0238, "step": 51390 }, { "epoch": 13.899405083829096, "grad_norm": 0.14945271611213684, "learning_rate": 5.51365577093011e-06, "loss": 0.0236, "step": 51400 }, { "epoch": 13.902109248242294, "grad_norm": 0.12216587364673615, "learning_rate": 5.501082565437565e-06, "loss": 0.0228, "step": 51410 }, { "epoch": 13.90481341265549, "grad_norm": 0.27531522512435913, "learning_rate": 5.488522877541202e-06, "loss": 0.0225, "step": 51420 }, { "epoch": 13.907517577068687, "grad_norm": 0.10867723822593689, "learning_rate": 5.475976711056341e-06, "loss": 0.0236, "step": 51430 }, { "epoch": 13.910221741481882, "grad_norm": 0.2674340605735779, "learning_rate": 5.463444069794166e-06, "loss": 0.0235, "step": 51440 }, { "epoch": 13.912925905895078, "grad_norm": 0.16567061841487885, "learning_rate": 5.4509249575617594e-06, "loss": 0.0221, "step": 51450 }, { "epoch": 13.915630070308275, "grad_norm": 0.1432180106639862, "learning_rate": 5.438419378162107e-06, "loss": 0.0231, "step": 51460 }, { "epoch": 13.91833423472147, "grad_norm": 0.0926576629281044, "learning_rate": 5.425927335394054e-06, "loss": 0.0233, "step": 51470 }, { "epoch": 13.921038399134668, "grad_norm": 0.18101781606674194, "learning_rate": 5.413448833052387e-06, "loss": 0.0245, "step": 51480 }, { "epoch": 13.923742563547863, "grad_norm": 0.20031356811523438, "learning_rate": 5.400983874927701e-06, "loss": 0.0237, "step": 51490 }, { "epoch": 13.92644672796106, "grad_norm": 0.3385591208934784, "learning_rate": 5.388532464806567e-06, "loss": 0.0232, "step": 51500 }, { "epoch": 13.929150892374256, "grad_norm": 0.1854773610830307, "learning_rate": 5.3760946064713546e-06, "loss": 0.0238, "step": 51510 }, { "epoch": 13.931855056787454, "grad_norm": 0.1979851871728897, "learning_rate": 5.363670303700386e-06, "loss": 0.024, "step": 51520 }, { "epoch": 13.934559221200649, "grad_norm": 0.16077610850334167, "learning_rate": 5.351259560267824e-06, "loss": 0.0247, "step": 51530 }, { "epoch": 13.937263385613845, "grad_norm": 0.1323452889919281, "learning_rate": 5.338862379943721e-06, "loss": 0.0242, "step": 51540 }, { "epoch": 13.939967550027042, "grad_norm": 0.11435940861701965, "learning_rate": 5.326478766494025e-06, "loss": 0.0238, "step": 51550 }, { "epoch": 13.942671714440237, "grad_norm": 0.13327620923519135, "learning_rate": 5.3141087236805385e-06, "loss": 0.0238, "step": 51560 }, { "epoch": 13.945375878853435, "grad_norm": 0.12644760310649872, "learning_rate": 5.3017522552609615e-06, "loss": 0.0238, "step": 51570 }, { "epoch": 13.94808004326663, "grad_norm": 0.12034519016742706, "learning_rate": 5.289409364988851e-06, "loss": 0.0234, "step": 51580 }, { "epoch": 13.950784207679828, "grad_norm": 0.1550084948539734, "learning_rate": 5.277080056613671e-06, "loss": 0.0232, "step": 51590 }, { "epoch": 13.953488372093023, "grad_norm": 0.18056635558605194, "learning_rate": 5.264764333880729e-06, "loss": 0.0246, "step": 51600 }, { "epoch": 13.956192536506219, "grad_norm": 0.3171890079975128, "learning_rate": 5.252462200531216e-06, "loss": 0.0238, "step": 51610 }, { "epoch": 13.958896700919416, "grad_norm": 0.15031135082244873, "learning_rate": 5.240173660302194e-06, "loss": 0.0227, "step": 51620 }, { "epoch": 13.961600865332612, "grad_norm": 0.1738835573196411, "learning_rate": 5.2278987169266044e-06, "loss": 0.0242, "step": 51630 }, { "epoch": 13.964305029745809, "grad_norm": 0.22281116247177124, "learning_rate": 5.215637374133231e-06, "loss": 0.0245, "step": 51640 }, { "epoch": 13.967009194159004, "grad_norm": 0.1681445986032486, "learning_rate": 5.203389635646782e-06, "loss": 0.0231, "step": 51650 }, { "epoch": 13.969713358572202, "grad_norm": 0.14561313390731812, "learning_rate": 5.191155505187756e-06, "loss": 0.0225, "step": 51660 }, { "epoch": 13.972417522985397, "grad_norm": 0.3052321672439575, "learning_rate": 5.1789349864726e-06, "loss": 0.025, "step": 51670 }, { "epoch": 13.975121687398595, "grad_norm": 0.2410145252943039, "learning_rate": 5.16672808321354e-06, "loss": 0.0242, "step": 51680 }, { "epoch": 13.97782585181179, "grad_norm": 0.17492419481277466, "learning_rate": 5.154534799118749e-06, "loss": 0.0228, "step": 51690 }, { "epoch": 13.980530016224986, "grad_norm": 0.1646036058664322, "learning_rate": 5.142355137892207e-06, "loss": 0.0227, "step": 51700 }, { "epoch": 13.983234180638183, "grad_norm": 0.1348753273487091, "learning_rate": 5.130189103233779e-06, "loss": 0.025, "step": 51710 }, { "epoch": 13.985938345051379, "grad_norm": 0.1369849294424057, "learning_rate": 5.118036698839179e-06, "loss": 0.0241, "step": 51720 }, { "epoch": 13.988642509464576, "grad_norm": 0.14091497659683228, "learning_rate": 5.105897928399983e-06, "loss": 0.024, "step": 51730 }, { "epoch": 13.991346673877771, "grad_norm": 0.15182159841060638, "learning_rate": 5.0937727956036464e-06, "loss": 0.0232, "step": 51740 }, { "epoch": 13.994050838290969, "grad_norm": 0.26024335622787476, "learning_rate": 5.081661304133456e-06, "loss": 0.0234, "step": 51750 }, { "epoch": 13.996755002704164, "grad_norm": 0.14670737087726593, "learning_rate": 5.069563457668558e-06, "loss": 0.0233, "step": 51760 }, { "epoch": 13.999459167117362, "grad_norm": 0.14281342923641205, "learning_rate": 5.0574792598839624e-06, "loss": 0.0235, "step": 51770 }, { "epoch": 14.002163331530557, "grad_norm": 0.20987223088741302, "learning_rate": 5.0454087144505276e-06, "loss": 0.0237, "step": 51780 }, { "epoch": 14.004867495943753, "grad_norm": 0.11999745666980743, "learning_rate": 5.0333518250349655e-06, "loss": 0.0234, "step": 51790 }, { "epoch": 14.00757166035695, "grad_norm": 0.11627696454524994, "learning_rate": 5.021308595299856e-06, "loss": 0.022, "step": 51800 }, { "epoch": 14.010275824770146, "grad_norm": 0.132200226187706, "learning_rate": 5.009279028903585e-06, "loss": 0.023, "step": 51810 }, { "epoch": 14.012979989183343, "grad_norm": 0.14757119119167328, "learning_rate": 4.997263129500452e-06, "loss": 0.0235, "step": 51820 }, { "epoch": 14.015684153596538, "grad_norm": 0.24207614362239838, "learning_rate": 4.985260900740535e-06, "loss": 0.0236, "step": 51830 }, { "epoch": 14.018388318009736, "grad_norm": 0.16610977053642273, "learning_rate": 4.973272346269814e-06, "loss": 0.0247, "step": 51840 }, { "epoch": 14.021092482422931, "grad_norm": 0.3273349404335022, "learning_rate": 4.961297469730097e-06, "loss": 0.0229, "step": 51850 }, { "epoch": 14.023796646836127, "grad_norm": 0.11379613727331161, "learning_rate": 4.949336274759031e-06, "loss": 0.024, "step": 51860 }, { "epoch": 14.026500811249324, "grad_norm": 0.15775319933891296, "learning_rate": 4.9373887649901144e-06, "loss": 0.0234, "step": 51870 }, { "epoch": 14.02920497566252, "grad_norm": 0.2502598166465759, "learning_rate": 4.925454944052666e-06, "loss": 0.0227, "step": 51880 }, { "epoch": 14.031909140075717, "grad_norm": 0.28256338834762573, "learning_rate": 4.913534815571891e-06, "loss": 0.0229, "step": 51890 }, { "epoch": 14.034613304488913, "grad_norm": 0.1876988261938095, "learning_rate": 4.901628383168805e-06, "loss": 0.0245, "step": 51900 }, { "epoch": 14.03731746890211, "grad_norm": 0.15671999752521515, "learning_rate": 4.8897356504602585e-06, "loss": 0.0237, "step": 51910 }, { "epoch": 14.040021633315305, "grad_norm": 0.17565834522247314, "learning_rate": 4.877856621058957e-06, "loss": 0.0231, "step": 51920 }, { "epoch": 14.042725797728503, "grad_norm": 0.16685663163661957, "learning_rate": 4.86599129857343e-06, "loss": 0.0247, "step": 51930 }, { "epoch": 14.045429962141698, "grad_norm": 0.13150399923324585, "learning_rate": 4.85413968660805e-06, "loss": 0.0224, "step": 51940 }, { "epoch": 14.048134126554894, "grad_norm": 0.16251346468925476, "learning_rate": 4.842301788763031e-06, "loss": 0.0229, "step": 51950 }, { "epoch": 14.050838290968091, "grad_norm": 0.28321677446365356, "learning_rate": 4.830477608634393e-06, "loss": 0.0232, "step": 51960 }, { "epoch": 14.053542455381287, "grad_norm": 0.2659943997859955, "learning_rate": 4.818667149814049e-06, "loss": 0.0242, "step": 51970 }, { "epoch": 14.056246619794484, "grad_norm": 0.1856786161661148, "learning_rate": 4.806870415889664e-06, "loss": 0.0236, "step": 51980 }, { "epoch": 14.05895078420768, "grad_norm": 0.45099663734436035, "learning_rate": 4.795087410444798e-06, "loss": 0.0226, "step": 51990 }, { "epoch": 14.061654948620877, "grad_norm": 0.13463856279850006, "learning_rate": 4.783318137058807e-06, "loss": 0.0229, "step": 52000 }, { "epoch": 14.064359113034072, "grad_norm": 0.19269321858882904, "learning_rate": 4.771562599306895e-06, "loss": 0.0228, "step": 52010 }, { "epoch": 14.067063277447268, "grad_norm": 0.1499483585357666, "learning_rate": 4.759820800760073e-06, "loss": 0.0243, "step": 52020 }, { "epoch": 14.069767441860465, "grad_norm": 0.14399108290672302, "learning_rate": 4.7480927449851834e-06, "loss": 0.0223, "step": 52030 }, { "epoch": 14.07247160627366, "grad_norm": 0.08274618536233902, "learning_rate": 4.73637843554493e-06, "loss": 0.0227, "step": 52040 }, { "epoch": 14.075175770686858, "grad_norm": 0.17638052999973297, "learning_rate": 4.724677875997774e-06, "loss": 0.0228, "step": 52050 }, { "epoch": 14.077879935100054, "grad_norm": 0.11582931131124496, "learning_rate": 4.712991069898065e-06, "loss": 0.0225, "step": 52060 }, { "epoch": 14.080584099513251, "grad_norm": 0.1560581922531128, "learning_rate": 4.7013180207959305e-06, "loss": 0.0237, "step": 52070 }, { "epoch": 14.083288263926447, "grad_norm": 0.2434227019548416, "learning_rate": 4.6896587322373395e-06, "loss": 0.0229, "step": 52080 }, { "epoch": 14.085992428339644, "grad_norm": 0.22854483127593994, "learning_rate": 4.678013207764081e-06, "loss": 0.0238, "step": 52090 }, { "epoch": 14.08869659275284, "grad_norm": 0.16264210641384125, "learning_rate": 4.666381450913748e-06, "loss": 0.0235, "step": 52100 }, { "epoch": 14.091400757166035, "grad_norm": 0.13514196872711182, "learning_rate": 4.654763465219752e-06, "loss": 0.0232, "step": 52110 }, { "epoch": 14.094104921579232, "grad_norm": 0.12346448004245758, "learning_rate": 4.643159254211371e-06, "loss": 0.0233, "step": 52120 }, { "epoch": 14.096809085992428, "grad_norm": 0.13231168687343597, "learning_rate": 4.631568821413606e-06, "loss": 0.0241, "step": 52130 }, { "epoch": 14.099513250405625, "grad_norm": 0.16538308560848236, "learning_rate": 4.619992170347359e-06, "loss": 0.023, "step": 52140 }, { "epoch": 14.10221741481882, "grad_norm": 0.1151522621512413, "learning_rate": 4.608429304529305e-06, "loss": 0.0227, "step": 52150 }, { "epoch": 14.104921579232018, "grad_norm": 0.1420796513557434, "learning_rate": 4.596880227471928e-06, "loss": 0.0235, "step": 52160 }, { "epoch": 14.107625743645213, "grad_norm": 0.1616508811712265, "learning_rate": 4.585344942683539e-06, "loss": 0.0243, "step": 52170 }, { "epoch": 14.11032990805841, "grad_norm": 0.15927380323410034, "learning_rate": 4.573823453668241e-06, "loss": 0.023, "step": 52180 }, { "epoch": 14.113034072471606, "grad_norm": 0.2006228268146515, "learning_rate": 4.562315763925995e-06, "loss": 0.0233, "step": 52190 }, { "epoch": 14.115738236884802, "grad_norm": 0.2267296463251114, "learning_rate": 4.5508218769524825e-06, "loss": 0.023, "step": 52200 }, { "epoch": 14.118442401298, "grad_norm": 0.14950115978717804, "learning_rate": 4.539341796239277e-06, "loss": 0.025, "step": 52210 }, { "epoch": 14.121146565711195, "grad_norm": 0.24924489855766296, "learning_rate": 4.527875525273717e-06, "loss": 0.0234, "step": 52220 }, { "epoch": 14.123850730124392, "grad_norm": 0.15028858184814453, "learning_rate": 4.51642306753895e-06, "loss": 0.0232, "step": 52230 }, { "epoch": 14.126554894537588, "grad_norm": 0.10126171261072159, "learning_rate": 4.5049844265139306e-06, "loss": 0.022, "step": 52240 }, { "epoch": 14.129259058950785, "grad_norm": 0.10729847848415375, "learning_rate": 4.4935596056734144e-06, "loss": 0.0234, "step": 52250 }, { "epoch": 14.13196322336398, "grad_norm": 0.23009642958641052, "learning_rate": 4.482148608487957e-06, "loss": 0.0232, "step": 52260 }, { "epoch": 14.134667387777176, "grad_norm": 0.18210136890411377, "learning_rate": 4.4707514384239365e-06, "loss": 0.0226, "step": 52270 }, { "epoch": 14.137371552190373, "grad_norm": 0.14994964003562927, "learning_rate": 4.459368098943484e-06, "loss": 0.0247, "step": 52280 }, { "epoch": 14.140075716603569, "grad_norm": 0.1270286738872528, "learning_rate": 4.447998593504582e-06, "loss": 0.0235, "step": 52290 }, { "epoch": 14.142779881016766, "grad_norm": 0.13200107216835022, "learning_rate": 4.4366429255609744e-06, "loss": 0.0232, "step": 52300 }, { "epoch": 14.145484045429962, "grad_norm": 0.11994775384664536, "learning_rate": 4.425301098562212e-06, "loss": 0.0221, "step": 52310 }, { "epoch": 14.148188209843159, "grad_norm": 0.1268848329782486, "learning_rate": 4.413973115953651e-06, "loss": 0.0228, "step": 52320 }, { "epoch": 14.150892374256355, "grad_norm": 0.16193382441997528, "learning_rate": 4.402658981176416e-06, "loss": 0.0241, "step": 52330 }, { "epoch": 14.153596538669552, "grad_norm": 0.1354236900806427, "learning_rate": 4.391358697667475e-06, "loss": 0.0228, "step": 52340 }, { "epoch": 14.156300703082747, "grad_norm": 0.13408660888671875, "learning_rate": 4.3800722688595195e-06, "loss": 0.0234, "step": 52350 }, { "epoch": 14.159004867495943, "grad_norm": 0.1661393791437149, "learning_rate": 4.368799698181097e-06, "loss": 0.0244, "step": 52360 }, { "epoch": 14.16170903190914, "grad_norm": 0.20348073542118073, "learning_rate": 4.357540989056486e-06, "loss": 0.0244, "step": 52370 }, { "epoch": 14.164413196322336, "grad_norm": 0.2543005049228668, "learning_rate": 4.346296144905815e-06, "loss": 0.0238, "step": 52380 }, { "epoch": 14.167117360735533, "grad_norm": 0.14514131844043732, "learning_rate": 4.335065169144958e-06, "loss": 0.0219, "step": 52390 }, { "epoch": 14.169821525148729, "grad_norm": 0.3430962562561035, "learning_rate": 4.323848065185593e-06, "loss": 0.0234, "step": 52400 }, { "epoch": 14.172525689561926, "grad_norm": 0.1054174154996872, "learning_rate": 4.31264483643517e-06, "loss": 0.0227, "step": 52410 }, { "epoch": 14.175229853975122, "grad_norm": 0.13996148109436035, "learning_rate": 4.301455486296946e-06, "loss": 0.0234, "step": 52420 }, { "epoch": 14.177934018388317, "grad_norm": 0.335683673620224, "learning_rate": 4.290280018169935e-06, "loss": 0.024, "step": 52430 }, { "epoch": 14.180638182801514, "grad_norm": 0.21446433663368225, "learning_rate": 4.27911843544897e-06, "loss": 0.0253, "step": 52440 }, { "epoch": 14.18334234721471, "grad_norm": 0.15461409091949463, "learning_rate": 4.2679707415246294e-06, "loss": 0.0227, "step": 52450 }, { "epoch": 14.186046511627907, "grad_norm": 0.32828614115715027, "learning_rate": 4.256836939783299e-06, "loss": 0.0239, "step": 52460 }, { "epoch": 14.188750676041103, "grad_norm": 0.15672250092029572, "learning_rate": 4.245717033607127e-06, "loss": 0.0247, "step": 52470 }, { "epoch": 14.1914548404543, "grad_norm": 0.2183806449174881, "learning_rate": 4.234611026374035e-06, "loss": 0.023, "step": 52480 }, { "epoch": 14.194159004867496, "grad_norm": 0.21685484051704407, "learning_rate": 4.2235189214577694e-06, "loss": 0.0237, "step": 52490 }, { "epoch": 14.196863169280693, "grad_norm": 0.14279009401798248, "learning_rate": 4.212440722227779e-06, "loss": 0.0245, "step": 52500 }, { "epoch": 14.199567333693889, "grad_norm": 0.17216747999191284, "learning_rate": 4.201376432049364e-06, "loss": 0.0233, "step": 52510 }, { "epoch": 14.202271498107084, "grad_norm": 0.10314889252185822, "learning_rate": 4.1903260542835275e-06, "loss": 0.0231, "step": 52520 }, { "epoch": 14.204975662520281, "grad_norm": 0.1483079195022583, "learning_rate": 4.1792895922871114e-06, "loss": 0.0229, "step": 52530 }, { "epoch": 14.207679826933477, "grad_norm": 0.10783815383911133, "learning_rate": 4.168267049412694e-06, "loss": 0.0225, "step": 52540 }, { "epoch": 14.210383991346674, "grad_norm": 0.1118859052658081, "learning_rate": 4.157258429008626e-06, "loss": 0.0224, "step": 52550 }, { "epoch": 14.21308815575987, "grad_norm": 0.11098022758960724, "learning_rate": 4.146263734419043e-06, "loss": 0.0239, "step": 52560 }, { "epoch": 14.215792320173067, "grad_norm": 0.12196849286556244, "learning_rate": 4.135282968983839e-06, "loss": 0.0231, "step": 52570 }, { "epoch": 14.218496484586263, "grad_norm": 0.10382150113582611, "learning_rate": 4.124316136038675e-06, "loss": 0.0225, "step": 52580 }, { "epoch": 14.22120064899946, "grad_norm": 0.10315142571926117, "learning_rate": 4.1133632389149965e-06, "loss": 0.0232, "step": 52590 }, { "epoch": 14.223904813412656, "grad_norm": 0.27753639221191406, "learning_rate": 4.102424280939998e-06, "loss": 0.025, "step": 52600 }, { "epoch": 14.226608977825851, "grad_norm": 0.16430100798606873, "learning_rate": 4.091499265436649e-06, "loss": 0.0232, "step": 52610 }, { "epoch": 14.229313142239048, "grad_norm": 0.18703486025333405, "learning_rate": 4.080588195723684e-06, "loss": 0.0226, "step": 52620 }, { "epoch": 14.232017306652244, "grad_norm": 0.10370148718357086, "learning_rate": 4.069691075115578e-06, "loss": 0.0228, "step": 52630 }, { "epoch": 14.234721471065441, "grad_norm": 0.15867973864078522, "learning_rate": 4.0588079069226235e-06, "loss": 0.0233, "step": 52640 }, { "epoch": 14.237425635478637, "grad_norm": 0.1982022225856781, "learning_rate": 4.0479386944508034e-06, "loss": 0.023, "step": 52650 }, { "epoch": 14.240129799891834, "grad_norm": 0.18742674589157104, "learning_rate": 4.037083441001932e-06, "loss": 0.0238, "step": 52660 }, { "epoch": 14.24283396430503, "grad_norm": 0.4189830720424652, "learning_rate": 4.026242149873516e-06, "loss": 0.0246, "step": 52670 }, { "epoch": 14.245538128718225, "grad_norm": 0.14362850785255432, "learning_rate": 4.015414824358871e-06, "loss": 0.0233, "step": 52680 }, { "epoch": 14.248242293131423, "grad_norm": 0.115357406437397, "learning_rate": 4.004601467747054e-06, "loss": 0.0219, "step": 52690 }, { "epoch": 14.250946457544618, "grad_norm": 0.1589307188987732, "learning_rate": 3.993802083322873e-06, "loss": 0.0251, "step": 52700 }, { "epoch": 14.253650621957815, "grad_norm": 0.19227764010429382, "learning_rate": 3.9830166743668906e-06, "loss": 0.024, "step": 52710 }, { "epoch": 14.256354786371011, "grad_norm": 0.16609589755535126, "learning_rate": 3.9722452441554425e-06, "loss": 0.0232, "step": 52720 }, { "epoch": 14.259058950784208, "grad_norm": 0.20698446035385132, "learning_rate": 3.961487795960584e-06, "loss": 0.0236, "step": 52730 }, { "epoch": 14.261763115197404, "grad_norm": 0.17103919386863708, "learning_rate": 3.95074433305016e-06, "loss": 0.0224, "step": 52740 }, { "epoch": 14.264467279610601, "grad_norm": 0.15332616865634918, "learning_rate": 3.940014858687752e-06, "loss": 0.024, "step": 52750 }, { "epoch": 14.267171444023797, "grad_norm": 0.1512071043252945, "learning_rate": 3.929299376132689e-06, "loss": 0.023, "step": 52760 }, { "epoch": 14.269875608436992, "grad_norm": 0.16041682660579681, "learning_rate": 3.918597888640047e-06, "loss": 0.0241, "step": 52770 }, { "epoch": 14.27257977285019, "grad_norm": 0.2556126117706299, "learning_rate": 3.907910399460657e-06, "loss": 0.0223, "step": 52780 }, { "epoch": 14.275283937263385, "grad_norm": 0.1378774791955948, "learning_rate": 3.8972369118410956e-06, "loss": 0.0238, "step": 52790 }, { "epoch": 14.277988101676582, "grad_norm": 0.2015138417482376, "learning_rate": 3.88657742902368e-06, "loss": 0.0239, "step": 52800 }, { "epoch": 14.280692266089778, "grad_norm": 0.13399885594844818, "learning_rate": 3.875931954246504e-06, "loss": 0.0235, "step": 52810 }, { "epoch": 14.283396430502975, "grad_norm": 0.20596551895141602, "learning_rate": 3.865300490743351e-06, "loss": 0.0221, "step": 52820 }, { "epoch": 14.28610059491617, "grad_norm": 0.5701572299003601, "learning_rate": 3.854683041743806e-06, "loss": 0.0237, "step": 52830 }, { "epoch": 14.288804759329366, "grad_norm": 0.18056540191173553, "learning_rate": 3.844079610473139e-06, "loss": 0.0228, "step": 52840 }, { "epoch": 14.291508923742564, "grad_norm": 0.11988253146409988, "learning_rate": 3.833490200152423e-06, "loss": 0.0243, "step": 52850 }, { "epoch": 14.29421308815576, "grad_norm": 0.13128075003623962, "learning_rate": 3.822914813998424e-06, "loss": 0.0229, "step": 52860 }, { "epoch": 14.296917252568957, "grad_norm": 0.2351040542125702, "learning_rate": 3.812353455223666e-06, "loss": 0.024, "step": 52870 }, { "epoch": 14.299621416982152, "grad_norm": 0.24451105296611786, "learning_rate": 3.8018061270364225e-06, "loss": 0.0222, "step": 52880 }, { "epoch": 14.30232558139535, "grad_norm": 0.1896662712097168, "learning_rate": 3.7912728326406688e-06, "loss": 0.0239, "step": 52890 }, { "epoch": 14.305029745808545, "grad_norm": 0.15294329822063446, "learning_rate": 3.7807535752361732e-06, "loss": 0.0224, "step": 52900 }, { "epoch": 14.307733910221742, "grad_norm": 0.19164101779460907, "learning_rate": 3.7702483580183855e-06, "loss": 0.0236, "step": 52910 }, { "epoch": 14.310438074634938, "grad_norm": 0.2729044556617737, "learning_rate": 3.759757184178525e-06, "loss": 0.0222, "step": 52920 }, { "epoch": 14.313142239048133, "grad_norm": 0.19580455124378204, "learning_rate": 3.7492800569035312e-06, "loss": 0.0234, "step": 52930 }, { "epoch": 14.31584640346133, "grad_norm": 0.19753941893577576, "learning_rate": 3.7388169793760754e-06, "loss": 0.0233, "step": 52940 }, { "epoch": 14.318550567874526, "grad_norm": 0.16625408828258514, "learning_rate": 3.728367954774553e-06, "loss": 0.0239, "step": 52950 }, { "epoch": 14.321254732287723, "grad_norm": 0.18988096714019775, "learning_rate": 3.7179329862731317e-06, "loss": 0.0235, "step": 52960 }, { "epoch": 14.323958896700919, "grad_norm": 0.17830790579319, "learning_rate": 3.707512077041647e-06, "loss": 0.0226, "step": 52970 }, { "epoch": 14.326663061114116, "grad_norm": 0.2343459129333496, "learning_rate": 3.6971052302457288e-06, "loss": 0.0229, "step": 52980 }, { "epoch": 14.329367225527312, "grad_norm": 0.1759725660085678, "learning_rate": 3.6867124490466698e-06, "loss": 0.0235, "step": 52990 }, { "epoch": 14.33207138994051, "grad_norm": 0.15926049649715424, "learning_rate": 3.6763337366015393e-06, "loss": 0.0225, "step": 53000 }, { "epoch": 14.334775554353705, "grad_norm": 0.1309482306241989, "learning_rate": 3.665969096063121e-06, "loss": 0.0245, "step": 53010 }, { "epoch": 14.3374797187669, "grad_norm": 0.3403145670890808, "learning_rate": 3.6556185305799074e-06, "loss": 0.0229, "step": 53020 }, { "epoch": 14.340183883180098, "grad_norm": 0.23273806273937225, "learning_rate": 3.645282043296133e-06, "loss": 0.024, "step": 53030 }, { "epoch": 14.342888047593293, "grad_norm": 0.12407933175563812, "learning_rate": 3.6349596373517427e-06, "loss": 0.0244, "step": 53040 }, { "epoch": 14.34559221200649, "grad_norm": 0.18572433292865753, "learning_rate": 3.6246513158824215e-06, "loss": 0.0239, "step": 53050 }, { "epoch": 14.348296376419686, "grad_norm": 0.1802988052368164, "learning_rate": 3.6143570820195593e-06, "loss": 0.0237, "step": 53060 }, { "epoch": 14.351000540832883, "grad_norm": 0.2074689120054245, "learning_rate": 3.6040769388902773e-06, "loss": 0.0231, "step": 53070 }, { "epoch": 14.353704705246079, "grad_norm": 0.12435712665319443, "learning_rate": 3.593810889617405e-06, "loss": 0.023, "step": 53080 }, { "epoch": 14.356408869659274, "grad_norm": 0.1318991482257843, "learning_rate": 3.5835589373194978e-06, "loss": 0.0235, "step": 53090 }, { "epoch": 14.359113034072472, "grad_norm": 0.13781774044036865, "learning_rate": 3.5733210851108257e-06, "loss": 0.0236, "step": 53100 }, { "epoch": 14.361817198485667, "grad_norm": 0.1316283941268921, "learning_rate": 3.5630973361014008e-06, "loss": 0.0229, "step": 53110 }, { "epoch": 14.364521362898865, "grad_norm": 0.1740269809961319, "learning_rate": 3.552887693396889e-06, "loss": 0.0225, "step": 53120 }, { "epoch": 14.36722552731206, "grad_norm": 0.2935715615749359, "learning_rate": 3.542692160098754e-06, "loss": 0.0229, "step": 53130 }, { "epoch": 14.369929691725257, "grad_norm": 0.14075179398059845, "learning_rate": 3.5325107393040846e-06, "loss": 0.023, "step": 53140 }, { "epoch": 14.372633856138453, "grad_norm": 0.18689732253551483, "learning_rate": 3.522343434105757e-06, "loss": 0.0236, "step": 53150 }, { "epoch": 14.37533802055165, "grad_norm": 0.1466887891292572, "learning_rate": 3.512190247592323e-06, "loss": 0.0232, "step": 53160 }, { "epoch": 14.378042184964846, "grad_norm": 0.2400607168674469, "learning_rate": 3.502051182848054e-06, "loss": 0.0232, "step": 53170 }, { "epoch": 14.380746349378041, "grad_norm": 0.18561087548732758, "learning_rate": 3.4919262429529308e-06, "loss": 0.0217, "step": 53180 }, { "epoch": 14.383450513791239, "grad_norm": 0.22158493101596832, "learning_rate": 3.4818154309826325e-06, "loss": 0.0249, "step": 53190 }, { "epoch": 14.386154678204434, "grad_norm": 0.1260041445493698, "learning_rate": 3.4717187500085734e-06, "loss": 0.0218, "step": 53200 }, { "epoch": 14.388858842617632, "grad_norm": 0.116438128054142, "learning_rate": 3.46163620309784e-06, "loss": 0.0234, "step": 53210 }, { "epoch": 14.391563007030827, "grad_norm": 0.13242854177951813, "learning_rate": 3.4515677933132595e-06, "loss": 0.0234, "step": 53220 }, { "epoch": 14.394267171444024, "grad_norm": 0.12895505130290985, "learning_rate": 3.4415135237133466e-06, "loss": 0.0229, "step": 53230 }, { "epoch": 14.39697133585722, "grad_norm": 0.13057009875774384, "learning_rate": 3.4314733973523196e-06, "loss": 0.0239, "step": 53240 }, { "epoch": 14.399675500270416, "grad_norm": 0.2793252468109131, "learning_rate": 3.4214474172800993e-06, "loss": 0.0242, "step": 53250 }, { "epoch": 14.402379664683613, "grad_norm": 0.11564451456069946, "learning_rate": 3.411435586542322e-06, "loss": 0.0249, "step": 53260 }, { "epoch": 14.405083829096808, "grad_norm": 0.15473003685474396, "learning_rate": 3.4014379081802995e-06, "loss": 0.024, "step": 53270 }, { "epoch": 14.407787993510006, "grad_norm": 0.16285361349582672, "learning_rate": 3.391454385231102e-06, "loss": 0.0229, "step": 53280 }, { "epoch": 14.410492157923201, "grad_norm": 0.12137345969676971, "learning_rate": 3.3814850207274095e-06, "loss": 0.022, "step": 53290 }, { "epoch": 14.413196322336399, "grad_norm": 0.18831366300582886, "learning_rate": 3.371529817697694e-06, "loss": 0.022, "step": 53300 }, { "epoch": 14.415900486749594, "grad_norm": 0.10417506098747253, "learning_rate": 3.3615887791660585e-06, "loss": 0.0239, "step": 53310 }, { "epoch": 14.418604651162791, "grad_norm": 0.11384845525026321, "learning_rate": 3.3516619081523426e-06, "loss": 0.023, "step": 53320 }, { "epoch": 14.421308815575987, "grad_norm": 0.1282145380973816, "learning_rate": 3.3417492076720567e-06, "loss": 0.0233, "step": 53330 }, { "epoch": 14.424012979989183, "grad_norm": 0.1718234270811081, "learning_rate": 3.3318506807364147e-06, "loss": 0.0241, "step": 53340 }, { "epoch": 14.42671714440238, "grad_norm": 0.20872662961483002, "learning_rate": 3.3219663303523553e-06, "loss": 0.0239, "step": 53350 }, { "epoch": 14.429421308815575, "grad_norm": 0.26725080609321594, "learning_rate": 3.3120961595224374e-06, "loss": 0.0238, "step": 53360 }, { "epoch": 14.432125473228773, "grad_norm": 0.357759565114975, "learning_rate": 3.302240171245002e-06, "loss": 0.0235, "step": 53370 }, { "epoch": 14.434829637641968, "grad_norm": 0.12476873397827148, "learning_rate": 3.29239836851401e-06, "loss": 0.023, "step": 53380 }, { "epoch": 14.437533802055166, "grad_norm": 0.14171552658081055, "learning_rate": 3.2825707543191588e-06, "loss": 0.0239, "step": 53390 }, { "epoch": 14.440237966468361, "grad_norm": 0.14224694669246674, "learning_rate": 3.272757331645804e-06, "loss": 0.0222, "step": 53400 }, { "epoch": 14.442942130881558, "grad_norm": 0.15752547979354858, "learning_rate": 3.2629581034750166e-06, "loss": 0.0221, "step": 53410 }, { "epoch": 14.445646295294754, "grad_norm": 0.13724438846111298, "learning_rate": 3.2531730727835218e-06, "loss": 0.0223, "step": 53420 }, { "epoch": 14.44835045970795, "grad_norm": 0.1802469789981842, "learning_rate": 3.2434022425437914e-06, "loss": 0.0235, "step": 53430 }, { "epoch": 14.451054624121147, "grad_norm": 0.16843749582767487, "learning_rate": 3.233645615723907e-06, "loss": 0.0234, "step": 53440 }, { "epoch": 14.453758788534342, "grad_norm": 0.28648802638053894, "learning_rate": 3.2239031952876918e-06, "loss": 0.0242, "step": 53450 }, { "epoch": 14.45646295294754, "grad_norm": 0.16731996834278107, "learning_rate": 3.21417498419464e-06, "loss": 0.0241, "step": 53460 }, { "epoch": 14.459167117360735, "grad_norm": 0.18499040603637695, "learning_rate": 3.204460985399921e-06, "loss": 0.0238, "step": 53470 }, { "epoch": 14.461871281773933, "grad_norm": 0.1537451446056366, "learning_rate": 3.1947612018543903e-06, "loss": 0.0229, "step": 53480 }, { "epoch": 14.464575446187128, "grad_norm": 0.13950948417186737, "learning_rate": 3.1850756365045753e-06, "loss": 0.0242, "step": 53490 }, { "epoch": 14.467279610600325, "grad_norm": 0.16796521842479706, "learning_rate": 3.175404292292722e-06, "loss": 0.0225, "step": 53500 }, { "epoch": 14.469983775013521, "grad_norm": 0.10071298480033875, "learning_rate": 3.1657471721566965e-06, "loss": 0.0243, "step": 53510 }, { "epoch": 14.472687939426716, "grad_norm": 0.1622014045715332, "learning_rate": 3.1561042790300977e-06, "loss": 0.0236, "step": 53520 }, { "epoch": 14.475392103839914, "grad_norm": 0.16811445355415344, "learning_rate": 3.1464756158421816e-06, "loss": 0.0232, "step": 53530 }, { "epoch": 14.47809626825311, "grad_norm": 0.16661642491817474, "learning_rate": 3.136861185517875e-06, "loss": 0.0234, "step": 53540 }, { "epoch": 14.480800432666307, "grad_norm": 0.18357118964195251, "learning_rate": 3.127260990977798e-06, "loss": 0.0232, "step": 53550 }, { "epoch": 14.483504597079502, "grad_norm": 0.12925145030021667, "learning_rate": 3.1176750351382235e-06, "loss": 0.0224, "step": 53560 }, { "epoch": 14.4862087614927, "grad_norm": 0.28642725944519043, "learning_rate": 3.1081033209111153e-06, "loss": 0.0231, "step": 53570 }, { "epoch": 14.488912925905895, "grad_norm": 0.15341496467590332, "learning_rate": 3.0985458512041155e-06, "loss": 0.0232, "step": 53580 }, { "epoch": 14.49161709031909, "grad_norm": 0.22938001155853271, "learning_rate": 3.089002628920512e-06, "loss": 0.023, "step": 53590 }, { "epoch": 14.494321254732288, "grad_norm": 0.2264804095029831, "learning_rate": 3.079473656959303e-06, "loss": 0.0231, "step": 53600 }, { "epoch": 14.497025419145483, "grad_norm": 0.26517006754875183, "learning_rate": 3.0699589382151393e-06, "loss": 0.0247, "step": 53610 }, { "epoch": 14.49972958355868, "grad_norm": 0.14080971479415894, "learning_rate": 3.060458475578326e-06, "loss": 0.0228, "step": 53620 }, { "epoch": 14.502433747971876, "grad_norm": 0.16934417188167572, "learning_rate": 3.0509722719348656e-06, "loss": 0.0223, "step": 53630 }, { "epoch": 14.505137912385074, "grad_norm": 0.11752624809741974, "learning_rate": 3.041500330166408e-06, "loss": 0.0226, "step": 53640 }, { "epoch": 14.50784207679827, "grad_norm": 0.1034146323800087, "learning_rate": 3.032042653150291e-06, "loss": 0.0231, "step": 53650 }, { "epoch": 14.510546241211465, "grad_norm": 0.23780101537704468, "learning_rate": 3.0225992437594887e-06, "loss": 0.0243, "step": 53660 }, { "epoch": 14.513250405624662, "grad_norm": 0.2361217588186264, "learning_rate": 3.0131701048626783e-06, "loss": 0.0242, "step": 53670 }, { "epoch": 14.515954570037858, "grad_norm": 0.11536389589309692, "learning_rate": 3.003755239324163e-06, "loss": 0.0246, "step": 53680 }, { "epoch": 14.518658734451055, "grad_norm": 0.16224457323551178, "learning_rate": 2.9943546500039553e-06, "loss": 0.0225, "step": 53690 }, { "epoch": 14.52136289886425, "grad_norm": 0.2939901351928711, "learning_rate": 2.9849683397576877e-06, "loss": 0.0224, "step": 53700 }, { "epoch": 14.524067063277448, "grad_norm": 0.3357878029346466, "learning_rate": 2.975596311436679e-06, "loss": 0.0236, "step": 53710 }, { "epoch": 14.526771227690643, "grad_norm": 0.2134765088558197, "learning_rate": 2.966238567887902e-06, "loss": 0.0219, "step": 53720 }, { "epoch": 14.52947539210384, "grad_norm": 0.19540496170520782, "learning_rate": 2.9568951119539943e-06, "loss": 0.0227, "step": 53730 }, { "epoch": 14.532179556517036, "grad_norm": 0.2204647660255432, "learning_rate": 2.947565946473241e-06, "loss": 0.0228, "step": 53740 }, { "epoch": 14.534883720930232, "grad_norm": 0.1374882161617279, "learning_rate": 2.9382510742796188e-06, "loss": 0.023, "step": 53750 }, { "epoch": 14.537587885343429, "grad_norm": 0.25492793321609497, "learning_rate": 2.9289504982027204e-06, "loss": 0.0224, "step": 53760 }, { "epoch": 14.540292049756625, "grad_norm": 0.23480769991874695, "learning_rate": 2.9196642210678248e-06, "loss": 0.0228, "step": 53770 }, { "epoch": 14.542996214169822, "grad_norm": 0.14061665534973145, "learning_rate": 2.910392245695853e-06, "loss": 0.0229, "step": 53780 }, { "epoch": 14.545700378583017, "grad_norm": 0.17899808287620544, "learning_rate": 2.9011345749033804e-06, "loss": 0.0233, "step": 53790 }, { "epoch": 14.548404542996215, "grad_norm": 0.15590158104896545, "learning_rate": 2.8918912115026677e-06, "loss": 0.0228, "step": 53800 }, { "epoch": 14.55110870740941, "grad_norm": 0.1102585643529892, "learning_rate": 2.8826621583015743e-06, "loss": 0.0236, "step": 53810 }, { "epoch": 14.553812871822608, "grad_norm": 0.1754491627216339, "learning_rate": 2.8734474181036643e-06, "loss": 0.0227, "step": 53820 }, { "epoch": 14.556517036235803, "grad_norm": 0.17274102568626404, "learning_rate": 2.864246993708114e-06, "loss": 0.0227, "step": 53830 }, { "epoch": 14.559221200648999, "grad_norm": 0.18992246687412262, "learning_rate": 2.8550608879097884e-06, "loss": 0.023, "step": 53840 }, { "epoch": 14.561925365062196, "grad_norm": 0.14479462802410126, "learning_rate": 2.845889103499172e-06, "loss": 0.0221, "step": 53850 }, { "epoch": 14.564629529475392, "grad_norm": 0.4047544300556183, "learning_rate": 2.8367316432624138e-06, "loss": 0.025, "step": 53860 }, { "epoch": 14.567333693888589, "grad_norm": 0.14005114138126373, "learning_rate": 2.8275885099813105e-06, "loss": 0.0229, "step": 53870 }, { "epoch": 14.570037858301784, "grad_norm": 0.25150004029273987, "learning_rate": 2.8184597064332963e-06, "loss": 0.0237, "step": 53880 }, { "epoch": 14.572742022714982, "grad_norm": 0.11693461984395981, "learning_rate": 2.809345235391464e-06, "loss": 0.0219, "step": 53890 }, { "epoch": 14.575446187128177, "grad_norm": 0.09291200339794159, "learning_rate": 2.80024509962456e-06, "loss": 0.0232, "step": 53900 }, { "epoch": 14.578150351541375, "grad_norm": 0.15280292928218842, "learning_rate": 2.7911593018969563e-06, "loss": 0.0221, "step": 53910 }, { "epoch": 14.58085451595457, "grad_norm": 0.14116783440113068, "learning_rate": 2.7820878449686838e-06, "loss": 0.0226, "step": 53920 }, { "epoch": 14.583558680367766, "grad_norm": 0.12572595477104187, "learning_rate": 2.7730307315953995e-06, "loss": 0.0231, "step": 53930 }, { "epoch": 14.586262844780963, "grad_norm": 0.26435399055480957, "learning_rate": 2.763987964528425e-06, "loss": 0.0236, "step": 53940 }, { "epoch": 14.588967009194159, "grad_norm": 0.1268124282360077, "learning_rate": 2.754959546514718e-06, "loss": 0.0229, "step": 53950 }, { "epoch": 14.591671173607356, "grad_norm": 0.3586403429508209, "learning_rate": 2.7459454802968576e-06, "loss": 0.0251, "step": 53960 }, { "epoch": 14.594375338020551, "grad_norm": 0.12140881270170212, "learning_rate": 2.7369457686131028e-06, "loss": 0.0223, "step": 53970 }, { "epoch": 14.597079502433749, "grad_norm": 0.11410641670227051, "learning_rate": 2.7279604141973004e-06, "loss": 0.0233, "step": 53980 }, { "epoch": 14.599783666846944, "grad_norm": 0.11963659524917603, "learning_rate": 2.7189894197789946e-06, "loss": 0.0235, "step": 53990 }, { "epoch": 14.60248783126014, "grad_norm": 0.17374785244464874, "learning_rate": 2.7100327880833055e-06, "loss": 0.0241, "step": 54000 }, { "epoch": 14.605191995673337, "grad_norm": 0.1199808195233345, "learning_rate": 2.70109052183104e-06, "loss": 0.0227, "step": 54010 }, { "epoch": 14.607896160086533, "grad_norm": 0.12903043627738953, "learning_rate": 2.69216262373862e-06, "loss": 0.022, "step": 54020 }, { "epoch": 14.61060032449973, "grad_norm": 0.1541370004415512, "learning_rate": 2.6832490965181036e-06, "loss": 0.0243, "step": 54030 }, { "epoch": 14.613304488912926, "grad_norm": 0.14889387786388397, "learning_rate": 2.6743499428771857e-06, "loss": 0.0233, "step": 54040 }, { "epoch": 14.616008653326123, "grad_norm": 0.14333289861679077, "learning_rate": 2.6654651655191875e-06, "loss": 0.0228, "step": 54050 }, { "epoch": 14.618712817739318, "grad_norm": 0.17072537541389465, "learning_rate": 2.6565947671430836e-06, "loss": 0.0223, "step": 54060 }, { "epoch": 14.621416982152514, "grad_norm": 0.18164560198783875, "learning_rate": 2.647738750443457e-06, "loss": 0.0227, "step": 54070 }, { "epoch": 14.624121146565711, "grad_norm": 0.16205419600009918, "learning_rate": 2.6388971181105393e-06, "loss": 0.0226, "step": 54080 }, { "epoch": 14.626825310978907, "grad_norm": 0.18061687052249908, "learning_rate": 2.630069872830171e-06, "loss": 0.0232, "step": 54090 }, { "epoch": 14.629529475392104, "grad_norm": 0.14914366602897644, "learning_rate": 2.6212570172838514e-06, "loss": 0.0224, "step": 54100 }, { "epoch": 14.6322336398053, "grad_norm": 0.17726083099842072, "learning_rate": 2.6124585541486778e-06, "loss": 0.0242, "step": 54110 }, { "epoch": 14.634937804218497, "grad_norm": 0.18666088581085205, "learning_rate": 2.6036744860974127e-06, "loss": 0.0224, "step": 54120 }, { "epoch": 14.637641968631693, "grad_norm": 0.15188953280448914, "learning_rate": 2.594904815798399e-06, "loss": 0.023, "step": 54130 }, { "epoch": 14.64034613304489, "grad_norm": 0.1639205515384674, "learning_rate": 2.586149545915656e-06, "loss": 0.022, "step": 54140 }, { "epoch": 14.643050297458085, "grad_norm": 0.23190991580486298, "learning_rate": 2.577408679108778e-06, "loss": 0.0225, "step": 54150 }, { "epoch": 14.645754461871281, "grad_norm": 0.15691131353378296, "learning_rate": 2.5686822180330306e-06, "loss": 0.0236, "step": 54160 }, { "epoch": 14.648458626284478, "grad_norm": 0.13997714221477509, "learning_rate": 2.5599701653392703e-06, "loss": 0.0225, "step": 54170 }, { "epoch": 14.651162790697674, "grad_norm": 0.24137113988399506, "learning_rate": 2.551272523673992e-06, "loss": 0.023, "step": 54180 }, { "epoch": 14.653866955110871, "grad_norm": 0.2132248878479004, "learning_rate": 2.542589295679315e-06, "loss": 0.0234, "step": 54190 }, { "epoch": 14.656571119524067, "grad_norm": 0.35190239548683167, "learning_rate": 2.5339204839929575e-06, "loss": 0.024, "step": 54200 }, { "epoch": 14.659275283937264, "grad_norm": 0.11067824810743332, "learning_rate": 2.525266091248296e-06, "loss": 0.0232, "step": 54210 }, { "epoch": 14.66197944835046, "grad_norm": 0.3775321841239929, "learning_rate": 2.5166261200743e-06, "loss": 0.0223, "step": 54220 }, { "epoch": 14.664683612763657, "grad_norm": 0.22349664568901062, "learning_rate": 2.5080005730955646e-06, "loss": 0.0237, "step": 54230 }, { "epoch": 14.667387777176852, "grad_norm": 0.231377512216568, "learning_rate": 2.499389452932299e-06, "loss": 0.0237, "step": 54240 }, { "epoch": 14.670091941590048, "grad_norm": 0.1041729599237442, "learning_rate": 2.4907927622003336e-06, "loss": 0.024, "step": 54250 }, { "epoch": 14.672796106003245, "grad_norm": 0.19788841903209686, "learning_rate": 2.4822105035111177e-06, "loss": 0.0236, "step": 54260 }, { "epoch": 14.67550027041644, "grad_norm": 0.1315762847661972, "learning_rate": 2.4736426794717273e-06, "loss": 0.0242, "step": 54270 }, { "epoch": 14.678204434829638, "grad_norm": 0.165832981467247, "learning_rate": 2.4650892926848135e-06, "loss": 0.0229, "step": 54280 }, { "epoch": 14.680908599242834, "grad_norm": 0.11670736223459244, "learning_rate": 2.456550345748704e-06, "loss": 0.0227, "step": 54290 }, { "epoch": 14.683612763656031, "grad_norm": 0.2248397022485733, "learning_rate": 2.4480258412572733e-06, "loss": 0.023, "step": 54300 }, { "epoch": 14.686316928069227, "grad_norm": 0.20579765737056732, "learning_rate": 2.4395157818000612e-06, "loss": 0.0226, "step": 54310 }, { "epoch": 14.689021092482424, "grad_norm": 0.17947718501091003, "learning_rate": 2.431020169962189e-06, "loss": 0.0219, "step": 54320 }, { "epoch": 14.69172525689562, "grad_norm": 0.33534950017929077, "learning_rate": 2.422539008324409e-06, "loss": 0.0233, "step": 54330 }, { "epoch": 14.694429421308815, "grad_norm": 0.1631738245487213, "learning_rate": 2.414072299463066e-06, "loss": 0.0222, "step": 54340 }, { "epoch": 14.697133585722012, "grad_norm": 0.15707366168498993, "learning_rate": 2.4056200459501186e-06, "loss": 0.0232, "step": 54350 }, { "epoch": 14.699837750135208, "grad_norm": 0.2755526304244995, "learning_rate": 2.397182250353147e-06, "loss": 0.0222, "step": 54360 }, { "epoch": 14.702541914548405, "grad_norm": 0.2286204993724823, "learning_rate": 2.388758915235334e-06, "loss": 0.0242, "step": 54370 }, { "epoch": 14.7052460789616, "grad_norm": 0.15409144759178162, "learning_rate": 2.380350043155455e-06, "loss": 0.0221, "step": 54380 }, { "epoch": 14.707950243374798, "grad_norm": 0.11671540141105652, "learning_rate": 2.371955636667911e-06, "loss": 0.0237, "step": 54390 }, { "epoch": 14.710654407787993, "grad_norm": 0.24754518270492554, "learning_rate": 2.3635756983227008e-06, "loss": 0.0221, "step": 54400 }, { "epoch": 14.713358572201189, "grad_norm": 0.13777370750904083, "learning_rate": 2.3552102306654278e-06, "loss": 0.0229, "step": 54410 }, { "epoch": 14.716062736614386, "grad_norm": 0.3035546541213989, "learning_rate": 2.346859236237292e-06, "loss": 0.0231, "step": 54420 }, { "epoch": 14.718766901027582, "grad_norm": 0.23834504187107086, "learning_rate": 2.3385227175751145e-06, "loss": 0.0232, "step": 54430 }, { "epoch": 14.72147106544078, "grad_norm": 0.18067066371440887, "learning_rate": 2.330200677211314e-06, "loss": 0.0231, "step": 54440 }, { "epoch": 14.724175229853975, "grad_norm": 0.20121917128562927, "learning_rate": 2.3218931176738847e-06, "loss": 0.0227, "step": 54450 }, { "epoch": 14.726879394267172, "grad_norm": 0.13136063516139984, "learning_rate": 2.313600041486469e-06, "loss": 0.0227, "step": 54460 }, { "epoch": 14.729583558680368, "grad_norm": 0.16881537437438965, "learning_rate": 2.3053214511682743e-06, "loss": 0.0238, "step": 54470 }, { "epoch": 14.732287723093563, "grad_norm": 0.12286060303449631, "learning_rate": 2.2970573492341163e-06, "loss": 0.0219, "step": 54480 }, { "epoch": 14.73499188750676, "grad_norm": 0.22645947337150574, "learning_rate": 2.288807738194415e-06, "loss": 0.0236, "step": 54490 }, { "epoch": 14.737696051919956, "grad_norm": 0.23915088176727295, "learning_rate": 2.2805726205551768e-06, "loss": 0.0239, "step": 54500 }, { "epoch": 14.740400216333153, "grad_norm": 0.24588169157505035, "learning_rate": 2.272351998818023e-06, "loss": 0.0222, "step": 54510 }, { "epoch": 14.743104380746349, "grad_norm": 0.15357589721679688, "learning_rate": 2.2641458754801505e-06, "loss": 0.0248, "step": 54520 }, { "epoch": 14.745808545159546, "grad_norm": 0.29007428884506226, "learning_rate": 2.2559542530343756e-06, "loss": 0.024, "step": 54530 }, { "epoch": 14.748512709572742, "grad_norm": 0.12696222960948944, "learning_rate": 2.247777133969087e-06, "loss": 0.0238, "step": 54540 }, { "epoch": 14.751216873985939, "grad_norm": 0.18214072287082672, "learning_rate": 2.2396145207682795e-06, "loss": 0.0229, "step": 54550 }, { "epoch": 14.753921038399135, "grad_norm": 0.21342113614082336, "learning_rate": 2.231466415911543e-06, "loss": 0.0229, "step": 54560 }, { "epoch": 14.75662520281233, "grad_norm": 0.12856684625148773, "learning_rate": 2.2233328218740524e-06, "loss": 0.023, "step": 54570 }, { "epoch": 14.759329367225527, "grad_norm": 0.24709907174110413, "learning_rate": 2.215213741126576e-06, "loss": 0.0236, "step": 54580 }, { "epoch": 14.762033531638723, "grad_norm": 0.1535516083240509, "learning_rate": 2.2071091761354912e-06, "loss": 0.0227, "step": 54590 }, { "epoch": 14.76473769605192, "grad_norm": 0.16372652351856232, "learning_rate": 2.1990191293627337e-06, "loss": 0.0231, "step": 54600 }, { "epoch": 14.767441860465116, "grad_norm": 0.21662858128547668, "learning_rate": 2.1909436032658548e-06, "loss": 0.024, "step": 54610 }, { "epoch": 14.770146024878313, "grad_norm": 0.21334919333457947, "learning_rate": 2.1828826002979806e-06, "loss": 0.0216, "step": 54620 }, { "epoch": 14.772850189291509, "grad_norm": 0.1450076848268509, "learning_rate": 2.1748361229078362e-06, "loss": 0.0215, "step": 54630 }, { "epoch": 14.775554353704706, "grad_norm": 0.302092969417572, "learning_rate": 2.1668041735397327e-06, "loss": 0.0225, "step": 54640 }, { "epoch": 14.778258518117902, "grad_norm": 0.2616255581378937, "learning_rate": 2.1587867546335514e-06, "loss": 0.0208, "step": 54650 }, { "epoch": 14.780962682531097, "grad_norm": 0.19486898183822632, "learning_rate": 2.1507838686247894e-06, "loss": 0.0235, "step": 54660 }, { "epoch": 14.783666846944294, "grad_norm": 0.13220266997814178, "learning_rate": 2.1427955179444957e-06, "loss": 0.0226, "step": 54670 }, { "epoch": 14.78637101135749, "grad_norm": 0.2905519902706146, "learning_rate": 2.13482170501933e-06, "loss": 0.0244, "step": 54680 }, { "epoch": 14.789075175770687, "grad_norm": 0.1245630607008934, "learning_rate": 2.1268624322715202e-06, "loss": 0.023, "step": 54690 }, { "epoch": 14.791779340183883, "grad_norm": 0.09701648354530334, "learning_rate": 2.118917702118889e-06, "loss": 0.024, "step": 54700 }, { "epoch": 14.79448350459708, "grad_norm": 0.45370662212371826, "learning_rate": 2.1109875169748327e-06, "loss": 0.0221, "step": 54710 }, { "epoch": 14.797187669010276, "grad_norm": 0.11939793080091476, "learning_rate": 2.103071879248336e-06, "loss": 0.0232, "step": 54720 }, { "epoch": 14.799891833423473, "grad_norm": 0.1434033066034317, "learning_rate": 2.0951707913439478e-06, "loss": 0.0222, "step": 54730 }, { "epoch": 14.802595997836669, "grad_norm": 0.11383049190044403, "learning_rate": 2.0872842556618255e-06, "loss": 0.0224, "step": 54740 }, { "epoch": 14.805300162249864, "grad_norm": 0.1246151477098465, "learning_rate": 2.079412274597686e-06, "loss": 0.0225, "step": 54750 }, { "epoch": 14.808004326663061, "grad_norm": 0.22689104080200195, "learning_rate": 2.0715548505428284e-06, "loss": 0.0241, "step": 54760 }, { "epoch": 14.810708491076257, "grad_norm": 0.2972927987575531, "learning_rate": 2.0637119858841258e-06, "loss": 0.0222, "step": 54770 }, { "epoch": 14.813412655489454, "grad_norm": 0.13808836042881012, "learning_rate": 2.055883683004034e-06, "loss": 0.0217, "step": 54780 }, { "epoch": 14.81611681990265, "grad_norm": 0.1291375458240509, "learning_rate": 2.0480699442806006e-06, "loss": 0.022, "step": 54790 }, { "epoch": 14.818820984315847, "grad_norm": 0.1323809027671814, "learning_rate": 2.0402707720874105e-06, "loss": 0.0224, "step": 54800 }, { "epoch": 14.821525148729043, "grad_norm": 0.3928643465042114, "learning_rate": 2.032486168793668e-06, "loss": 0.023, "step": 54810 }, { "epoch": 14.824229313142238, "grad_norm": 0.1292729377746582, "learning_rate": 2.024716136764104e-06, "loss": 0.0234, "step": 54820 }, { "epoch": 14.826933477555436, "grad_norm": 0.1616014838218689, "learning_rate": 2.0169606783590757e-06, "loss": 0.0239, "step": 54830 }, { "epoch": 14.829637641968631, "grad_norm": 0.13260559737682343, "learning_rate": 2.0092197959344638e-06, "loss": 0.0227, "step": 54840 }, { "epoch": 14.832341806381828, "grad_norm": 0.27441027760505676, "learning_rate": 2.0014934918417606e-06, "loss": 0.0232, "step": 54850 }, { "epoch": 14.835045970795024, "grad_norm": 0.15844540297985077, "learning_rate": 1.99378176842801e-06, "loss": 0.0227, "step": 54860 }, { "epoch": 14.837750135208221, "grad_norm": 0.13219420611858368, "learning_rate": 1.9860846280358224e-06, "loss": 0.0231, "step": 54870 }, { "epoch": 14.840454299621417, "grad_norm": 0.1544620394706726, "learning_rate": 1.978402073003394e-06, "loss": 0.0234, "step": 54880 }, { "epoch": 14.843158464034612, "grad_norm": 0.18502746522426605, "learning_rate": 1.9707341056644736e-06, "loss": 0.0217, "step": 54890 }, { "epoch": 14.84586262844781, "grad_norm": 0.2202385812997818, "learning_rate": 1.963080728348399e-06, "loss": 0.022, "step": 54900 }, { "epoch": 14.848566792861005, "grad_norm": 0.17984354496002197, "learning_rate": 1.955441943380054e-06, "loss": 0.0219, "step": 54910 }, { "epoch": 14.851270957274203, "grad_norm": 0.23279744386672974, "learning_rate": 1.947817753079906e-06, "loss": 0.0229, "step": 54920 }, { "epoch": 14.853975121687398, "grad_norm": 0.13854153454303741, "learning_rate": 1.9402081597639785e-06, "loss": 0.0238, "step": 54930 }, { "epoch": 14.856679286100595, "grad_norm": 0.13947869837284088, "learning_rate": 1.9326131657438683e-06, "loss": 0.0235, "step": 54940 }, { "epoch": 14.859383450513791, "grad_norm": 0.16635406017303467, "learning_rate": 1.925032773326724e-06, "loss": 0.0226, "step": 54950 }, { "epoch": 14.862087614926988, "grad_norm": 0.17712204158306122, "learning_rate": 1.9174669848152916e-06, "loss": 0.0239, "step": 54960 }, { "epoch": 14.864791779340184, "grad_norm": 0.18410685658454895, "learning_rate": 1.9099158025078336e-06, "loss": 0.023, "step": 54970 }, { "epoch": 14.86749594375338, "grad_norm": 0.12735044956207275, "learning_rate": 1.90237922869822e-06, "loss": 0.0224, "step": 54980 }, { "epoch": 14.870200108166577, "grad_norm": 0.13064220547676086, "learning_rate": 1.8948572656758367e-06, "loss": 0.0217, "step": 54990 }, { "epoch": 14.872904272579772, "grad_norm": 0.1608874499797821, "learning_rate": 1.8873499157256834e-06, "loss": 0.0234, "step": 55000 }, { "epoch": 14.87560843699297, "grad_norm": 0.17229808866977692, "learning_rate": 1.879857181128286e-06, "loss": 0.0231, "step": 55010 }, { "epoch": 14.878312601406165, "grad_norm": 0.17960281670093536, "learning_rate": 1.8723790641597349e-06, "loss": 0.0233, "step": 55020 }, { "epoch": 14.881016765819362, "grad_norm": 0.12166385352611542, "learning_rate": 1.8649155670916906e-06, "loss": 0.0223, "step": 55030 }, { "epoch": 14.883720930232558, "grad_norm": 0.10970892757177353, "learning_rate": 1.8574666921913565e-06, "loss": 0.0241, "step": 55040 }, { "epoch": 14.886425094645755, "grad_norm": 0.2635762691497803, "learning_rate": 1.8500324417215166e-06, "loss": 0.0229, "step": 55050 }, { "epoch": 14.88912925905895, "grad_norm": 0.14809827506542206, "learning_rate": 1.8426128179404977e-06, "loss": 0.0228, "step": 55060 }, { "epoch": 14.891833423472146, "grad_norm": 0.2799976170063019, "learning_rate": 1.8352078231021807e-06, "loss": 0.0241, "step": 55070 }, { "epoch": 14.894537587885344, "grad_norm": 0.13862381875514984, "learning_rate": 1.8278174594560049e-06, "loss": 0.0231, "step": 55080 }, { "epoch": 14.89724175229854, "grad_norm": 0.23129256069660187, "learning_rate": 1.82044172924698e-06, "loss": 0.0226, "step": 55090 }, { "epoch": 14.899945916711737, "grad_norm": 0.1814110279083252, "learning_rate": 1.813080634715636e-06, "loss": 0.0226, "step": 55100 }, { "epoch": 14.902650081124932, "grad_norm": 0.24033205211162567, "learning_rate": 1.8057341780981119e-06, "loss": 0.0221, "step": 55110 }, { "epoch": 14.90535424553813, "grad_norm": 0.13753244280815125, "learning_rate": 1.7984023616260338e-06, "loss": 0.0236, "step": 55120 }, { "epoch": 14.908058409951325, "grad_norm": 0.144527867436409, "learning_rate": 1.7910851875266421e-06, "loss": 0.0233, "step": 55130 }, { "epoch": 14.910762574364522, "grad_norm": 0.24498708546161652, "learning_rate": 1.7837826580226757e-06, "loss": 0.0224, "step": 55140 }, { "epoch": 14.913466738777718, "grad_norm": 0.13637632131576538, "learning_rate": 1.7764947753324656e-06, "loss": 0.0227, "step": 55150 }, { "epoch": 14.916170903190913, "grad_norm": 0.19669514894485474, "learning_rate": 1.7692215416698799e-06, "loss": 0.0234, "step": 55160 }, { "epoch": 14.91887506760411, "grad_norm": 0.1603960543870926, "learning_rate": 1.7619629592443233e-06, "loss": 0.0226, "step": 55170 }, { "epoch": 14.921579232017306, "grad_norm": 0.3053848445415497, "learning_rate": 1.7547190302607709e-06, "loss": 0.0228, "step": 55180 }, { "epoch": 14.924283396430504, "grad_norm": 0.24608106911182404, "learning_rate": 1.747489756919729e-06, "loss": 0.023, "step": 55190 }, { "epoch": 14.926987560843699, "grad_norm": 0.20162193477153778, "learning_rate": 1.7402751414172802e-06, "loss": 0.0247, "step": 55200 }, { "epoch": 14.929691725256896, "grad_norm": 0.1328321248292923, "learning_rate": 1.7330751859450044e-06, "loss": 0.0231, "step": 55210 }, { "epoch": 14.932395889670092, "grad_norm": 0.135771244764328, "learning_rate": 1.7258898926900801e-06, "loss": 0.0233, "step": 55220 }, { "epoch": 14.935100054083287, "grad_norm": 0.14044101536273956, "learning_rate": 1.7187192638352002e-06, "loss": 0.0242, "step": 55230 }, { "epoch": 14.937804218496485, "grad_norm": 0.26936379075050354, "learning_rate": 1.7115633015586163e-06, "loss": 0.0234, "step": 55240 }, { "epoch": 14.94050838290968, "grad_norm": 0.12062324583530426, "learning_rate": 1.7044220080341178e-06, "loss": 0.0225, "step": 55250 }, { "epoch": 14.943212547322878, "grad_norm": 0.1236155554652214, "learning_rate": 1.6972953854310414e-06, "loss": 0.0232, "step": 55260 }, { "epoch": 14.945916711736073, "grad_norm": 0.15688969194889069, "learning_rate": 1.690183435914261e-06, "loss": 0.0246, "step": 55270 }, { "epoch": 14.94862087614927, "grad_norm": 0.13663169741630554, "learning_rate": 1.6830861616442206e-06, "loss": 0.0228, "step": 55280 }, { "epoch": 14.951325040562466, "grad_norm": 0.18123134970664978, "learning_rate": 1.6760035647768568e-06, "loss": 0.0233, "step": 55290 }, { "epoch": 14.954029204975663, "grad_norm": 0.1652584820985794, "learning_rate": 1.6689356474636985e-06, "loss": 0.0227, "step": 55300 }, { "epoch": 14.956733369388859, "grad_norm": 0.13976018130779266, "learning_rate": 1.6618824118517784e-06, "loss": 0.0221, "step": 55310 }, { "epoch": 14.959437533802054, "grad_norm": 0.1994788944721222, "learning_rate": 1.654843860083688e-06, "loss": 0.023, "step": 55320 }, { "epoch": 14.962141698215252, "grad_norm": 0.11256498098373413, "learning_rate": 1.647819994297556e-06, "loss": 0.023, "step": 55330 }, { "epoch": 14.964845862628447, "grad_norm": 0.1524849236011505, "learning_rate": 1.6408108166270363e-06, "loss": 0.023, "step": 55340 }, { "epoch": 14.967550027041645, "grad_norm": 0.19956868886947632, "learning_rate": 1.6338163292013486e-06, "loss": 0.0238, "step": 55350 }, { "epoch": 14.97025419145484, "grad_norm": 0.1700139194726944, "learning_rate": 1.6268365341452208e-06, "loss": 0.0225, "step": 55360 }, { "epoch": 14.972958355868037, "grad_norm": 0.24058544635772705, "learning_rate": 1.6198714335789345e-06, "loss": 0.0218, "step": 55370 }, { "epoch": 14.975662520281233, "grad_norm": 0.293413907289505, "learning_rate": 1.612921029618303e-06, "loss": 0.0229, "step": 55380 }, { "epoch": 14.978366684694429, "grad_norm": 0.19240382313728333, "learning_rate": 1.6059853243746815e-06, "loss": 0.0226, "step": 55390 }, { "epoch": 14.981070849107626, "grad_norm": 0.15449172258377075, "learning_rate": 1.5990643199549404e-06, "loss": 0.0237, "step": 55400 }, { "epoch": 14.983775013520821, "grad_norm": 0.21380802989006042, "learning_rate": 1.5921580184615147e-06, "loss": 0.0248, "step": 55410 }, { "epoch": 14.986479177934019, "grad_norm": 0.19350187480449677, "learning_rate": 1.5852664219923374e-06, "loss": 0.0248, "step": 55420 }, { "epoch": 14.989183342347214, "grad_norm": 0.15133656561374664, "learning_rate": 1.5783895326409172e-06, "loss": 0.0225, "step": 55430 }, { "epoch": 14.991887506760412, "grad_norm": 0.2062624990940094, "learning_rate": 1.5715273524962438e-06, "loss": 0.0237, "step": 55440 }, { "epoch": 14.994591671173607, "grad_norm": 0.21035416424274445, "learning_rate": 1.564679883642889e-06, "loss": 0.0237, "step": 55450 }, { "epoch": 14.997295835586804, "grad_norm": 0.18534092605113983, "learning_rate": 1.5578471281609276e-06, "loss": 0.022, "step": 55460 }, { "epoch": 15.0, "grad_norm": 0.12950186431407928, "learning_rate": 1.551029088125966e-06, "loss": 0.0218, "step": 55470 }, { "epoch": 15.002704164413196, "grad_norm": 0.16113780438899994, "learning_rate": 1.544225765609142e-06, "loss": 0.0233, "step": 55480 }, { "epoch": 15.005408328826393, "grad_norm": 0.1252717822790146, "learning_rate": 1.53743716267713e-06, "loss": 0.0225, "step": 55490 }, { "epoch": 15.008112493239588, "grad_norm": 0.16260966658592224, "learning_rate": 1.5306632813921361e-06, "loss": 0.0233, "step": 55500 }, { "epoch": 15.010816657652786, "grad_norm": 0.13975119590759277, "learning_rate": 1.52390412381187e-06, "loss": 0.0231, "step": 55510 }, { "epoch": 15.013520822065981, "grad_norm": 0.17478686571121216, "learning_rate": 1.5171596919895948e-06, "loss": 0.0249, "step": 55520 }, { "epoch": 15.016224986479179, "grad_norm": 0.1713491976261139, "learning_rate": 1.5104299879740936e-06, "loss": 0.0229, "step": 55530 }, { "epoch": 15.018929150892374, "grad_norm": 0.15316873788833618, "learning_rate": 1.5037150138096701e-06, "loss": 0.023, "step": 55540 }, { "epoch": 15.021633315305571, "grad_norm": 0.1984807550907135, "learning_rate": 1.4970147715361538e-06, "loss": 0.0225, "step": 55550 }, { "epoch": 15.024337479718767, "grad_norm": 0.11538282036781311, "learning_rate": 1.4903292631889054e-06, "loss": 0.0229, "step": 55560 }, { "epoch": 15.027041644131963, "grad_norm": 0.14609834551811218, "learning_rate": 1.483658490798795e-06, "loss": 0.0232, "step": 55570 }, { "epoch": 15.02974580854516, "grad_norm": 0.16402685642242432, "learning_rate": 1.4770024563922457e-06, "loss": 0.0221, "step": 55580 }, { "epoch": 15.032449972958355, "grad_norm": 0.16631953418254852, "learning_rate": 1.4703611619911628e-06, "loss": 0.0222, "step": 55590 }, { "epoch": 15.035154137371553, "grad_norm": 0.18041734397411346, "learning_rate": 1.4637346096130155e-06, "loss": 0.0224, "step": 55600 }, { "epoch": 15.037858301784748, "grad_norm": 0.14005862176418304, "learning_rate": 1.4571228012707662e-06, "loss": 0.0227, "step": 55610 }, { "epoch": 15.040562466197946, "grad_norm": 0.17935003340244293, "learning_rate": 1.4505257389729132e-06, "loss": 0.0225, "step": 55620 }, { "epoch": 15.043266630611141, "grad_norm": 0.14097759127616882, "learning_rate": 1.4439434247234596e-06, "loss": 0.0233, "step": 55630 }, { "epoch": 15.045970795024337, "grad_norm": 0.19798965752124786, "learning_rate": 1.4373758605219445e-06, "loss": 0.0227, "step": 55640 }, { "epoch": 15.048674959437534, "grad_norm": 0.19815978407859802, "learning_rate": 1.4308230483634333e-06, "loss": 0.0221, "step": 55650 }, { "epoch": 15.05137912385073, "grad_norm": 0.14750801026821136, "learning_rate": 1.4242849902384724e-06, "loss": 0.024, "step": 55660 }, { "epoch": 15.054083288263927, "grad_norm": 0.15047451853752136, "learning_rate": 1.4177616881331734e-06, "loss": 0.0223, "step": 55670 }, { "epoch": 15.056787452677122, "grad_norm": 0.127081036567688, "learning_rate": 1.4112531440291233e-06, "loss": 0.0226, "step": 55680 }, { "epoch": 15.05949161709032, "grad_norm": 0.16648945212364197, "learning_rate": 1.4047593599034624e-06, "loss": 0.0219, "step": 55690 }, { "epoch": 15.062195781503515, "grad_norm": 0.18028321862220764, "learning_rate": 1.3982803377288246e-06, "loss": 0.0232, "step": 55700 }, { "epoch": 15.064899945916713, "grad_norm": 0.1451893150806427, "learning_rate": 1.3918160794733681e-06, "loss": 0.0248, "step": 55710 }, { "epoch": 15.067604110329908, "grad_norm": 0.17853008210659027, "learning_rate": 1.3853665871007615e-06, "loss": 0.0236, "step": 55720 }, { "epoch": 15.070308274743104, "grad_norm": 0.13594922423362732, "learning_rate": 1.378931862570193e-06, "loss": 0.0226, "step": 55730 }, { "epoch": 15.073012439156301, "grad_norm": 0.14315783977508545, "learning_rate": 1.372511907836349e-06, "loss": 0.0223, "step": 55740 }, { "epoch": 15.075716603569496, "grad_norm": 0.3358885943889618, "learning_rate": 1.3661067248494586e-06, "loss": 0.023, "step": 55750 }, { "epoch": 15.078420767982694, "grad_norm": 0.20525196194648743, "learning_rate": 1.3597163155552429e-06, "loss": 0.0233, "step": 55760 }, { "epoch": 15.08112493239589, "grad_norm": 0.13640238344669342, "learning_rate": 1.3533406818949434e-06, "loss": 0.023, "step": 55770 }, { "epoch": 15.083829096809087, "grad_norm": 0.2521524429321289, "learning_rate": 1.3469798258053002e-06, "loss": 0.0243, "step": 55780 }, { "epoch": 15.086533261222282, "grad_norm": 0.16175857186317444, "learning_rate": 1.3406337492185672e-06, "loss": 0.0222, "step": 55790 }, { "epoch": 15.089237425635478, "grad_norm": 0.4956355690956116, "learning_rate": 1.3343024540625414e-06, "loss": 0.0232, "step": 55800 }, { "epoch": 15.091941590048675, "grad_norm": 0.1979874223470688, "learning_rate": 1.3279859422604735e-06, "loss": 0.022, "step": 55810 }, { "epoch": 15.09464575446187, "grad_norm": 0.10560806095600128, "learning_rate": 1.3216842157311781e-06, "loss": 0.0223, "step": 55820 }, { "epoch": 15.097349918875068, "grad_norm": 0.12168511748313904, "learning_rate": 1.3153972763889355e-06, "loss": 0.0222, "step": 55830 }, { "epoch": 15.100054083288263, "grad_norm": 0.1311080902814865, "learning_rate": 1.3091251261435566e-06, "loss": 0.023, "step": 55840 }, { "epoch": 15.10275824770146, "grad_norm": 0.3213927447795868, "learning_rate": 1.3028677669003564e-06, "loss": 0.0221, "step": 55850 }, { "epoch": 15.105462412114656, "grad_norm": 0.15478567779064178, "learning_rate": 1.2966252005601587e-06, "loss": 0.0238, "step": 55860 }, { "epoch": 15.108166576527854, "grad_norm": 0.13582637906074524, "learning_rate": 1.2903974290192855e-06, "loss": 0.0229, "step": 55870 }, { "epoch": 15.11087074094105, "grad_norm": 0.15811124444007874, "learning_rate": 1.284184454169568e-06, "loss": 0.0229, "step": 55880 }, { "epoch": 15.113574905354245, "grad_norm": 0.1639612913131714, "learning_rate": 1.2779862778983464e-06, "loss": 0.0226, "step": 55890 }, { "epoch": 15.116279069767442, "grad_norm": 0.23388178646564484, "learning_rate": 1.2718029020884647e-06, "loss": 0.0217, "step": 55900 }, { "epoch": 15.118983234180638, "grad_norm": 0.1547595113515854, "learning_rate": 1.2656343286182703e-06, "loss": 0.0226, "step": 55910 }, { "epoch": 15.121687398593835, "grad_norm": 0.18584021925926208, "learning_rate": 1.2594805593616088e-06, "loss": 0.0236, "step": 55920 }, { "epoch": 15.12439156300703, "grad_norm": 0.1785973608493805, "learning_rate": 1.2533415961878404e-06, "loss": 0.0224, "step": 55930 }, { "epoch": 15.127095727420228, "grad_norm": 0.14910514652729034, "learning_rate": 1.2472174409618009e-06, "loss": 0.0221, "step": 55940 }, { "epoch": 15.129799891833423, "grad_norm": 0.1507512927055359, "learning_rate": 1.2411080955438747e-06, "loss": 0.0232, "step": 55950 }, { "epoch": 15.13250405624662, "grad_norm": 0.42763227224349976, "learning_rate": 1.235013561789894e-06, "loss": 0.0227, "step": 55960 }, { "epoch": 15.135208220659816, "grad_norm": 0.16610895097255707, "learning_rate": 1.2289338415512385e-06, "loss": 0.0234, "step": 55970 }, { "epoch": 15.137912385073012, "grad_norm": 0.45625898241996765, "learning_rate": 1.222868936674748e-06, "loss": 0.0239, "step": 55980 }, { "epoch": 15.140616549486209, "grad_norm": 0.1526465117931366, "learning_rate": 1.2168188490027876e-06, "loss": 0.0228, "step": 55990 }, { "epoch": 15.143320713899405, "grad_norm": 0.14405877888202667, "learning_rate": 1.2107835803732204e-06, "loss": 0.0237, "step": 56000 }, { "epoch": 15.146024878312602, "grad_norm": 0.12061547487974167, "learning_rate": 1.2047631326193964e-06, "loss": 0.0223, "step": 56010 }, { "epoch": 15.148729042725797, "grad_norm": 0.11876946687698364, "learning_rate": 1.1987575075701696e-06, "loss": 0.0239, "step": 56020 }, { "epoch": 15.151433207138995, "grad_norm": 0.09506316483020782, "learning_rate": 1.1927667070498916e-06, "loss": 0.0228, "step": 56030 }, { "epoch": 15.15413737155219, "grad_norm": 0.30300232768058777, "learning_rate": 1.1867907328784067e-06, "loss": 0.0234, "step": 56040 }, { "epoch": 15.156841535965386, "grad_norm": 0.2792528569698334, "learning_rate": 1.1808295868710518e-06, "loss": 0.0227, "step": 56050 }, { "epoch": 15.159545700378583, "grad_norm": 0.1518184393644333, "learning_rate": 1.174883270838678e-06, "loss": 0.0231, "step": 56060 }, { "epoch": 15.162249864791779, "grad_norm": 0.13408520817756653, "learning_rate": 1.1689517865876187e-06, "loss": 0.0236, "step": 56070 }, { "epoch": 15.164954029204976, "grad_norm": 0.3003849685192108, "learning_rate": 1.1630351359196933e-06, "loss": 0.0233, "step": 56080 }, { "epoch": 15.167658193618172, "grad_norm": 0.2231989949941635, "learning_rate": 1.1571333206322255e-06, "loss": 0.0223, "step": 56090 }, { "epoch": 15.170362358031369, "grad_norm": 0.1529463678598404, "learning_rate": 1.1512463425180365e-06, "loss": 0.0227, "step": 56100 }, { "epoch": 15.173066522444564, "grad_norm": 0.3396144509315491, "learning_rate": 1.145374203365429e-06, "loss": 0.0221, "step": 56110 }, { "epoch": 15.175770686857762, "grad_norm": 0.14811690151691437, "learning_rate": 1.1395169049582155e-06, "loss": 0.0231, "step": 56120 }, { "epoch": 15.178474851270957, "grad_norm": 0.19342558085918427, "learning_rate": 1.1336744490756722e-06, "loss": 0.0232, "step": 56130 }, { "epoch": 15.181179015684153, "grad_norm": 0.16786833107471466, "learning_rate": 1.1278468374925967e-06, "loss": 0.0223, "step": 56140 }, { "epoch": 15.18388318009735, "grad_norm": 0.16484244167804718, "learning_rate": 1.12203407197925e-06, "loss": 0.0232, "step": 56150 }, { "epoch": 15.186587344510546, "grad_norm": 0.17683503031730652, "learning_rate": 1.116236154301409e-06, "loss": 0.0225, "step": 56160 }, { "epoch": 15.189291508923743, "grad_norm": 0.22169260680675507, "learning_rate": 1.11045308622032e-06, "loss": 0.0229, "step": 56170 }, { "epoch": 15.191995673336939, "grad_norm": 0.17574457824230194, "learning_rate": 1.1046848694927337e-06, "loss": 0.0227, "step": 56180 }, { "epoch": 15.194699837750136, "grad_norm": 0.17956599593162537, "learning_rate": 1.098931505870876e-06, "loss": 0.0227, "step": 56190 }, { "epoch": 15.197404002163331, "grad_norm": 0.1281643807888031, "learning_rate": 1.0931929971024657e-06, "loss": 0.0224, "step": 56200 }, { "epoch": 15.200108166576527, "grad_norm": 0.14037367701530457, "learning_rate": 1.0874693449307193e-06, "loss": 0.0223, "step": 56210 }, { "epoch": 15.202812330989724, "grad_norm": 0.16236133873462677, "learning_rate": 1.081760551094324e-06, "loss": 0.0233, "step": 56220 }, { "epoch": 15.20551649540292, "grad_norm": 0.28670698404312134, "learning_rate": 1.0760666173274592e-06, "loss": 0.0231, "step": 56230 }, { "epoch": 15.208220659816117, "grad_norm": 0.17633061110973358, "learning_rate": 1.0703875453597967e-06, "loss": 0.0226, "step": 56240 }, { "epoch": 15.210924824229313, "grad_norm": 0.17319662868976593, "learning_rate": 1.0647233369164845e-06, "loss": 0.0222, "step": 56250 }, { "epoch": 15.21362898864251, "grad_norm": 0.2147572636604309, "learning_rate": 1.0590739937181625e-06, "loss": 0.0222, "step": 56260 }, { "epoch": 15.216333153055706, "grad_norm": 0.1672920137643814, "learning_rate": 1.053439517480953e-06, "loss": 0.0227, "step": 56270 }, { "epoch": 15.219037317468903, "grad_norm": 0.11826067417860031, "learning_rate": 1.047819909916453e-06, "loss": 0.0242, "step": 56280 }, { "epoch": 15.221741481882098, "grad_norm": 0.13135458528995514, "learning_rate": 1.0422151727317697e-06, "loss": 0.0226, "step": 56290 }, { "epoch": 15.224445646295294, "grad_norm": 0.5480031967163086, "learning_rate": 1.0366253076294462e-06, "loss": 0.0219, "step": 56300 }, { "epoch": 15.227149810708491, "grad_norm": 0.13954630494117737, "learning_rate": 1.031050316307558e-06, "loss": 0.0229, "step": 56310 }, { "epoch": 15.229853975121687, "grad_norm": 0.14102989435195923, "learning_rate": 1.0254902004596333e-06, "loss": 0.0207, "step": 56320 }, { "epoch": 15.232558139534884, "grad_norm": 0.2504805028438568, "learning_rate": 1.0199449617746882e-06, "loss": 0.0224, "step": 56330 }, { "epoch": 15.23526230394808, "grad_norm": 0.11196310818195343, "learning_rate": 1.0144146019372247e-06, "loss": 0.0235, "step": 56340 }, { "epoch": 15.237966468361277, "grad_norm": 0.09095583111047745, "learning_rate": 1.0088991226272048e-06, "loss": 0.0226, "step": 56350 }, { "epoch": 15.240670632774473, "grad_norm": 0.15452684462070465, "learning_rate": 1.003398525520105e-06, "loss": 0.0232, "step": 56360 }, { "epoch": 15.24337479718767, "grad_norm": 0.13297490775585175, "learning_rate": 9.979128122868552e-07, "loss": 0.0233, "step": 56370 }, { "epoch": 15.246078961600865, "grad_norm": 0.26319292187690735, "learning_rate": 9.924419845938614e-07, "loss": 0.0235, "step": 56380 }, { "epoch": 15.248783126014061, "grad_norm": 0.13020645081996918, "learning_rate": 9.869860441030276e-07, "loss": 0.0223, "step": 56390 }, { "epoch": 15.251487290427258, "grad_norm": 0.14878545701503754, "learning_rate": 9.815449924717169e-07, "loss": 0.0218, "step": 56400 }, { "epoch": 15.254191454840454, "grad_norm": 0.17686109244823456, "learning_rate": 9.761188313527791e-07, "loss": 0.0221, "step": 56410 }, { "epoch": 15.256895619253651, "grad_norm": 0.2514258921146393, "learning_rate": 9.70707562394546e-07, "loss": 0.0228, "step": 56420 }, { "epoch": 15.259599783666847, "grad_norm": 0.24587644636631012, "learning_rate": 9.653111872408027e-07, "loss": 0.0236, "step": 56430 }, { "epoch": 15.262303948080044, "grad_norm": 0.10671409219503403, "learning_rate": 9.599297075308434e-07, "loss": 0.0227, "step": 56440 }, { "epoch": 15.26500811249324, "grad_norm": 0.14304488897323608, "learning_rate": 9.545631248994048e-07, "loss": 0.022, "step": 56450 }, { "epoch": 15.267712276906435, "grad_norm": 0.21477557718753815, "learning_rate": 9.492114409767217e-07, "loss": 0.0223, "step": 56460 }, { "epoch": 15.270416441319632, "grad_norm": 0.25716713070869446, "learning_rate": 9.438746573884938e-07, "loss": 0.0226, "step": 56470 }, { "epoch": 15.273120605732828, "grad_norm": 0.10288487374782562, "learning_rate": 9.385527757558909e-07, "loss": 0.0233, "step": 56480 }, { "epoch": 15.275824770146025, "grad_norm": 0.24836954474449158, "learning_rate": 9.332457976955644e-07, "loss": 0.0231, "step": 56490 }, { "epoch": 15.27852893455922, "grad_norm": 0.12310788780450821, "learning_rate": 9.279537248196247e-07, "loss": 0.0229, "step": 56500 }, { "epoch": 15.281233098972418, "grad_norm": 0.14142881333827972, "learning_rate": 9.2267655873568e-07, "loss": 0.0227, "step": 56510 }, { "epoch": 15.283937263385614, "grad_norm": 0.1638258993625641, "learning_rate": 9.174143010467762e-07, "loss": 0.0225, "step": 56520 }, { "epoch": 15.286641427798811, "grad_norm": 0.12253343313932419, "learning_rate": 9.12166953351462e-07, "loss": 0.0226, "step": 56530 }, { "epoch": 15.289345592212007, "grad_norm": 0.1828397512435913, "learning_rate": 9.069345172437404e-07, "loss": 0.0216, "step": 56540 }, { "epoch": 15.292049756625202, "grad_norm": 0.20855475962162018, "learning_rate": 9.017169943130843e-07, "loss": 0.0218, "step": 56550 }, { "epoch": 15.2947539210384, "grad_norm": 0.15905070304870605, "learning_rate": 8.965143861444425e-07, "loss": 0.0235, "step": 56560 }, { "epoch": 15.297458085451595, "grad_norm": 0.12172152101993561, "learning_rate": 8.91326694318223e-07, "loss": 0.0246, "step": 56570 }, { "epoch": 15.300162249864792, "grad_norm": 0.12154529243707657, "learning_rate": 8.861539204103098e-07, "loss": 0.024, "step": 56580 }, { "epoch": 15.302866414277988, "grad_norm": 0.18426387012004852, "learning_rate": 8.809960659920735e-07, "loss": 0.0225, "step": 56590 }, { "epoch": 15.305570578691185, "grad_norm": 0.15784743428230286, "learning_rate": 8.758531326303055e-07, "loss": 0.0233, "step": 56600 }, { "epoch": 15.30827474310438, "grad_norm": 0.12233636528253555, "learning_rate": 8.707251218873169e-07, "loss": 0.0237, "step": 56610 }, { "epoch": 15.310978907517576, "grad_norm": 0.12510260939598083, "learning_rate": 8.656120353208507e-07, "loss": 0.022, "step": 56620 }, { "epoch": 15.313683071930773, "grad_norm": 0.2372073084115982, "learning_rate": 8.605138744841312e-07, "loss": 0.0222, "step": 56630 }, { "epoch": 15.316387236343969, "grad_norm": 0.1190461739897728, "learning_rate": 8.554306409258417e-07, "loss": 0.0222, "step": 56640 }, { "epoch": 15.319091400757166, "grad_norm": 0.12615886330604553, "learning_rate": 8.503623361901358e-07, "loss": 0.0225, "step": 56650 }, { "epoch": 15.321795565170362, "grad_norm": 0.10429760068655014, "learning_rate": 8.453089618166377e-07, "loss": 0.0221, "step": 56660 }, { "epoch": 15.32449972958356, "grad_norm": 0.34019556641578674, "learning_rate": 8.402705193404137e-07, "loss": 0.0229, "step": 56670 }, { "epoch": 15.327203893996755, "grad_norm": 0.141323059797287, "learning_rate": 8.352470102920174e-07, "loss": 0.0224, "step": 56680 }, { "epoch": 15.329908058409952, "grad_norm": 0.1957542449235916, "learning_rate": 8.302384361974669e-07, "loss": 0.0239, "step": 56690 }, { "epoch": 15.332612222823148, "grad_norm": 0.18006962537765503, "learning_rate": 8.252447985782231e-07, "loss": 0.023, "step": 56700 }, { "epoch": 15.335316387236343, "grad_norm": 0.15881037712097168, "learning_rate": 8.202660989512279e-07, "loss": 0.0236, "step": 56710 }, { "epoch": 15.33802055164954, "grad_norm": 0.18576836585998535, "learning_rate": 8.153023388288772e-07, "loss": 0.0242, "step": 56720 }, { "epoch": 15.340724716062736, "grad_norm": 0.18321187794208527, "learning_rate": 8.103535197190204e-07, "loss": 0.0222, "step": 56730 }, { "epoch": 15.343428880475933, "grad_norm": 0.2658281624317169, "learning_rate": 8.054196431249993e-07, "loss": 0.0212, "step": 56740 }, { "epoch": 15.346133044889129, "grad_norm": 0.152512326836586, "learning_rate": 8.005007105455709e-07, "loss": 0.0237, "step": 56750 }, { "epoch": 15.348837209302326, "grad_norm": 0.2615152597427368, "learning_rate": 7.95596723474995e-07, "loss": 0.0239, "step": 56760 }, { "epoch": 15.351541373715522, "grad_norm": 0.21927975118160248, "learning_rate": 7.907076834029692e-07, "loss": 0.0234, "step": 56770 }, { "epoch": 15.354245538128719, "grad_norm": 0.16403889656066895, "learning_rate": 7.858335918146498e-07, "loss": 0.0238, "step": 56780 }, { "epoch": 15.356949702541915, "grad_norm": 0.19213315844535828, "learning_rate": 7.809744501906635e-07, "loss": 0.0214, "step": 56790 }, { "epoch": 15.35965386695511, "grad_norm": 0.1588151752948761, "learning_rate": 7.761302600070797e-07, "loss": 0.0222, "step": 56800 }, { "epoch": 15.362358031368307, "grad_norm": 0.10539226979017258, "learning_rate": 7.713010227354545e-07, "loss": 0.0238, "step": 56810 }, { "epoch": 15.365062195781503, "grad_norm": 0.17463862895965576, "learning_rate": 7.664867398427589e-07, "loss": 0.0236, "step": 56820 }, { "epoch": 15.3677663601947, "grad_norm": 0.14012767374515533, "learning_rate": 7.616874127914619e-07, "loss": 0.0227, "step": 56830 }, { "epoch": 15.370470524607896, "grad_norm": 0.25495296716690063, "learning_rate": 7.569030430394641e-07, "loss": 0.0242, "step": 56840 }, { "epoch": 15.373174689021093, "grad_norm": 0.1386037915945053, "learning_rate": 7.521336320401306e-07, "loss": 0.0227, "step": 56850 }, { "epoch": 15.375878853434289, "grad_norm": 0.22400127351284027, "learning_rate": 7.473791812422915e-07, "loss": 0.0227, "step": 56860 }, { "epoch": 15.378583017847484, "grad_norm": 0.22159315645694733, "learning_rate": 7.426396920902134e-07, "loss": 0.0225, "step": 56870 }, { "epoch": 15.381287182260682, "grad_norm": 0.17228293418884277, "learning_rate": 7.379151660236283e-07, "loss": 0.0213, "step": 56880 }, { "epoch": 15.383991346673877, "grad_norm": 0.2606818377971649, "learning_rate": 7.332056044777324e-07, "loss": 0.024, "step": 56890 }, { "epoch": 15.386695511087074, "grad_norm": 0.21290160715579987, "learning_rate": 7.285110088831537e-07, "loss": 0.0238, "step": 56900 }, { "epoch": 15.38939967550027, "grad_norm": 0.15864159166812897, "learning_rate": 7.238313806659902e-07, "loss": 0.023, "step": 56910 }, { "epoch": 15.392103839913467, "grad_norm": 0.19112692773342133, "learning_rate": 7.191667212477993e-07, "loss": 0.0217, "step": 56920 }, { "epoch": 15.394808004326663, "grad_norm": 0.10199430584907532, "learning_rate": 7.145170320455697e-07, "loss": 0.0218, "step": 56930 }, { "epoch": 15.39751216873986, "grad_norm": 0.23121720552444458, "learning_rate": 7.098823144717604e-07, "loss": 0.022, "step": 56940 }, { "epoch": 15.400216333153056, "grad_norm": 0.17171944677829742, "learning_rate": 7.052625699342674e-07, "loss": 0.0233, "step": 56950 }, { "epoch": 15.402920497566251, "grad_norm": 0.09171085804700851, "learning_rate": 7.006577998364628e-07, "loss": 0.0238, "step": 56960 }, { "epoch": 15.405624661979449, "grad_norm": 0.21529299020767212, "learning_rate": 6.96068005577133e-07, "loss": 0.0225, "step": 56970 }, { "epoch": 15.408328826392644, "grad_norm": 0.3140393793582916, "learning_rate": 6.914931885505627e-07, "loss": 0.0227, "step": 56980 }, { "epoch": 15.411032990805841, "grad_norm": 0.17253626883029938, "learning_rate": 6.869333501464347e-07, "loss": 0.0233, "step": 56990 }, { "epoch": 15.413737155219037, "grad_norm": 0.13971172273159027, "learning_rate": 6.823884917499246e-07, "loss": 0.0236, "step": 57000 }, { "epoch": 15.416441319632234, "grad_norm": 0.09841088205575943, "learning_rate": 6.778586147416278e-07, "loss": 0.0226, "step": 57010 }, { "epoch": 15.41914548404543, "grad_norm": 0.28010109066963196, "learning_rate": 6.733437204976156e-07, "loss": 0.0229, "step": 57020 }, { "epoch": 15.421849648458625, "grad_norm": 0.15777689218521118, "learning_rate": 6.688438103893857e-07, "loss": 0.0242, "step": 57030 }, { "epoch": 15.424553812871823, "grad_norm": 0.13608193397521973, "learning_rate": 6.64358885783889e-07, "loss": 0.0231, "step": 57040 }, { "epoch": 15.427257977285018, "grad_norm": 0.16789744794368744, "learning_rate": 6.598889480435299e-07, "loss": 0.0234, "step": 57050 }, { "epoch": 15.429962141698216, "grad_norm": 0.1940547525882721, "learning_rate": 6.554339985261615e-07, "loss": 0.0233, "step": 57060 }, { "epoch": 15.432666306111411, "grad_norm": 0.13972851634025574, "learning_rate": 6.509940385850733e-07, "loss": 0.0225, "step": 57070 }, { "epoch": 15.435370470524608, "grad_norm": 0.16347874701023102, "learning_rate": 6.465690695690141e-07, "loss": 0.0229, "step": 57080 }, { "epoch": 15.438074634937804, "grad_norm": 0.10438014566898346, "learning_rate": 6.421590928221699e-07, "loss": 0.0225, "step": 57090 }, { "epoch": 15.440778799351001, "grad_norm": 0.1652374118566513, "learning_rate": 6.377641096841691e-07, "loss": 0.0219, "step": 57100 }, { "epoch": 15.443482963764197, "grad_norm": 0.16950425505638123, "learning_rate": 6.333841214901048e-07, "loss": 0.0227, "step": 57110 }, { "epoch": 15.446187128177392, "grad_norm": 0.1799650490283966, "learning_rate": 6.290191295704906e-07, "loss": 0.0233, "step": 57120 }, { "epoch": 15.44889129259059, "grad_norm": 0.11782556027173996, "learning_rate": 6.246691352513046e-07, "loss": 0.0215, "step": 57130 }, { "epoch": 15.451595457003785, "grad_norm": 0.15968185663223267, "learning_rate": 6.203341398539452e-07, "loss": 0.0227, "step": 57140 }, { "epoch": 15.454299621416983, "grad_norm": 0.11381079256534576, "learning_rate": 6.160141446952872e-07, "loss": 0.0221, "step": 57150 }, { "epoch": 15.457003785830178, "grad_norm": 0.1935085654258728, "learning_rate": 6.11709151087625e-07, "loss": 0.0236, "step": 57160 }, { "epoch": 15.459707950243375, "grad_norm": 0.13435043394565582, "learning_rate": 6.074191603386958e-07, "loss": 0.022, "step": 57170 }, { "epoch": 15.462412114656571, "grad_norm": 0.22632642090320587, "learning_rate": 6.031441737516907e-07, "loss": 0.0217, "step": 57180 }, { "epoch": 15.465116279069768, "grad_norm": 0.27924707531929016, "learning_rate": 5.988841926252431e-07, "loss": 0.022, "step": 57190 }, { "epoch": 15.467820443482964, "grad_norm": 0.23329228162765503, "learning_rate": 5.946392182534066e-07, "loss": 0.0228, "step": 57200 }, { "epoch": 15.47052460789616, "grad_norm": 0.4807165861129761, "learning_rate": 5.90409251925711e-07, "loss": 0.0246, "step": 57210 }, { "epoch": 15.473228772309357, "grad_norm": 0.2317076027393341, "learning_rate": 5.861942949270949e-07, "loss": 0.0236, "step": 57220 }, { "epoch": 15.475932936722552, "grad_norm": 0.284769743680954, "learning_rate": 5.819943485379564e-07, "loss": 0.0242, "step": 57230 }, { "epoch": 15.47863710113575, "grad_norm": 0.1292675882577896, "learning_rate": 5.778094140341306e-07, "loss": 0.0233, "step": 57240 }, { "epoch": 15.481341265548945, "grad_norm": 0.1378972828388214, "learning_rate": 5.736394926868893e-07, "loss": 0.0224, "step": 57250 }, { "epoch": 15.484045429962142, "grad_norm": 0.4836583435535431, "learning_rate": 5.694845857629416e-07, "loss": 0.0232, "step": 57260 }, { "epoch": 15.486749594375338, "grad_norm": 0.19158965349197388, "learning_rate": 5.653446945244334e-07, "loss": 0.0237, "step": 57270 }, { "epoch": 15.489453758788535, "grad_norm": 0.16697460412979126, "learning_rate": 5.612198202289698e-07, "loss": 0.023, "step": 57280 }, { "epoch": 15.49215792320173, "grad_norm": 0.21076232194900513, "learning_rate": 5.571099641295596e-07, "loss": 0.0226, "step": 57290 }, { "epoch": 15.494862087614926, "grad_norm": 0.2809326946735382, "learning_rate": 5.530151274746875e-07, "loss": 0.0232, "step": 57300 }, { "epoch": 15.497566252028124, "grad_norm": 0.3512379825115204, "learning_rate": 5.489353115082418e-07, "loss": 0.022, "step": 57310 }, { "epoch": 15.50027041644132, "grad_norm": 0.1663406640291214, "learning_rate": 5.4487051746957e-07, "loss": 0.0241, "step": 57320 }, { "epoch": 15.502974580854517, "grad_norm": 0.3039070963859558, "learning_rate": 5.408207465934511e-07, "loss": 0.0228, "step": 57330 }, { "epoch": 15.505678745267712, "grad_norm": 0.1350805163383484, "learning_rate": 5.3678600011009e-07, "loss": 0.0227, "step": 57340 }, { "epoch": 15.50838290968091, "grad_norm": 0.19066941738128662, "learning_rate": 5.327662792451449e-07, "loss": 0.0243, "step": 57350 }, { "epoch": 15.511087074094105, "grad_norm": 0.1448359489440918, "learning_rate": 5.287615852196947e-07, "loss": 0.0217, "step": 57360 }, { "epoch": 15.5137912385073, "grad_norm": 0.12113694101572037, "learning_rate": 5.247719192502665e-07, "loss": 0.0229, "step": 57370 }, { "epoch": 15.516495402920498, "grad_norm": 0.16405022144317627, "learning_rate": 5.207972825488128e-07, "loss": 0.0235, "step": 57380 }, { "epoch": 15.519199567333693, "grad_norm": 0.13980257511138916, "learning_rate": 5.168376763227178e-07, "loss": 0.0244, "step": 57390 }, { "epoch": 15.52190373174689, "grad_norm": 0.14923885464668274, "learning_rate": 5.12893101774814e-07, "loss": 0.0225, "step": 57400 }, { "epoch": 15.524607896160086, "grad_norm": 0.11062905937433243, "learning_rate": 5.089635601033483e-07, "loss": 0.0222, "step": 57410 }, { "epoch": 15.527312060573284, "grad_norm": 0.19976437091827393, "learning_rate": 5.050490525020213e-07, "loss": 0.0226, "step": 57420 }, { "epoch": 15.530016224986479, "grad_norm": 0.18811042606830597, "learning_rate": 5.011495801599541e-07, "loss": 0.0224, "step": 57430 }, { "epoch": 15.532720389399675, "grad_norm": 0.1333644539117813, "learning_rate": 4.972651442616994e-07, "loss": 0.0235, "step": 57440 }, { "epoch": 15.535424553812872, "grad_norm": 0.09153150022029877, "learning_rate": 4.933957459872574e-07, "loss": 0.0229, "step": 57450 }, { "epoch": 15.538128718226067, "grad_norm": 0.09635064750909805, "learning_rate": 4.895413865120324e-07, "loss": 0.0217, "step": 57460 }, { "epoch": 15.540832882639265, "grad_norm": 0.1316893845796585, "learning_rate": 4.857020670068935e-07, "loss": 0.024, "step": 57470 }, { "epoch": 15.54353704705246, "grad_norm": 0.17549997568130493, "learning_rate": 4.818777886381132e-07, "loss": 0.0231, "step": 57480 }, { "epoch": 15.546241211465658, "grad_norm": 0.16619566082954407, "learning_rate": 4.780685525674122e-07, "loss": 0.0217, "step": 57490 }, { "epoch": 15.548945375878853, "grad_norm": 0.13713671267032623, "learning_rate": 4.7427435995193724e-07, "loss": 0.0227, "step": 57500 }, { "epoch": 15.55164954029205, "grad_norm": 0.17831651866436005, "learning_rate": 4.7049521194425515e-07, "loss": 0.0223, "step": 57510 }, { "epoch": 15.554353704705246, "grad_norm": 0.13178658485412598, "learning_rate": 4.667311096923754e-07, "loss": 0.0225, "step": 57520 }, { "epoch": 15.557057869118442, "grad_norm": 0.12554647028446198, "learning_rate": 4.6298205433973895e-07, "loss": 0.0228, "step": 57530 }, { "epoch": 15.559762033531639, "grad_norm": 0.15827037394046783, "learning_rate": 4.5924804702520696e-07, "loss": 0.0235, "step": 57540 }, { "epoch": 15.562466197944834, "grad_norm": 0.1402738243341446, "learning_rate": 4.5552908888306655e-07, "loss": 0.024, "step": 57550 }, { "epoch": 15.565170362358032, "grad_norm": 0.1308729350566864, "learning_rate": 4.5182518104304185e-07, "loss": 0.0226, "step": 57560 }, { "epoch": 15.567874526771227, "grad_norm": 0.18224473297595978, "learning_rate": 4.4813632463028277e-07, "loss": 0.0224, "step": 57570 }, { "epoch": 15.570578691184425, "grad_norm": 0.15095841884613037, "learning_rate": 4.444625207653763e-07, "loss": 0.022, "step": 57580 }, { "epoch": 15.57328285559762, "grad_norm": 0.15165284276008606, "learning_rate": 4.4080377056430753e-07, "loss": 0.0218, "step": 57590 }, { "epoch": 15.575987020010817, "grad_norm": 0.2138533890247345, "learning_rate": 4.371600751385263e-07, "loss": 0.0235, "step": 57600 }, { "epoch": 15.578691184424013, "grad_norm": 0.10038372129201889, "learning_rate": 4.3353143559487495e-07, "loss": 0.0221, "step": 57610 }, { "epoch": 15.581395348837209, "grad_norm": 0.2698841392993927, "learning_rate": 4.2991785303565513e-07, "loss": 0.0234, "step": 57620 }, { "epoch": 15.584099513250406, "grad_norm": 0.2296425700187683, "learning_rate": 4.2631932855856647e-07, "loss": 0.0233, "step": 57630 }, { "epoch": 15.586803677663601, "grad_norm": 0.13884055614471436, "learning_rate": 4.2273586325674576e-07, "loss": 0.0237, "step": 57640 }, { "epoch": 15.589507842076799, "grad_norm": 0.12480869144201279, "learning_rate": 4.1916745821876103e-07, "loss": 0.0224, "step": 57650 }, { "epoch": 15.592212006489994, "grad_norm": 0.1835273802280426, "learning_rate": 4.156141145285897e-07, "loss": 0.0224, "step": 57660 }, { "epoch": 15.594916170903192, "grad_norm": 0.17726878821849823, "learning_rate": 4.1207583326566267e-07, "loss": 0.0236, "step": 57670 }, { "epoch": 15.597620335316387, "grad_norm": 0.16682322323322296, "learning_rate": 4.085526155047925e-07, "loss": 0.0238, "step": 57680 }, { "epoch": 15.600324499729584, "grad_norm": 0.37710413336753845, "learning_rate": 4.050444623162564e-07, "loss": 0.023, "step": 57690 }, { "epoch": 15.60302866414278, "grad_norm": 0.15050707757472992, "learning_rate": 4.015513747657351e-07, "loss": 0.023, "step": 57700 }, { "epoch": 15.605732828555976, "grad_norm": 0.1795208752155304, "learning_rate": 3.9807335391433554e-07, "loss": 0.0232, "step": 57710 }, { "epoch": 15.608436992969173, "grad_norm": 0.3234021067619324, "learning_rate": 3.946104008185847e-07, "loss": 0.0234, "step": 57720 }, { "epoch": 15.611141157382368, "grad_norm": 0.14391131699085236, "learning_rate": 3.9116251653044113e-07, "loss": 0.0235, "step": 57730 }, { "epoch": 15.613845321795566, "grad_norm": 0.12585914134979248, "learning_rate": 3.877297020972781e-07, "loss": 0.0225, "step": 57740 }, { "epoch": 15.616549486208761, "grad_norm": 0.18412241339683533, "learning_rate": 3.8431195856190036e-07, "loss": 0.022, "step": 57750 }, { "epoch": 15.619253650621959, "grad_norm": 0.16131313145160675, "learning_rate": 3.8090928696251635e-07, "loss": 0.0246, "step": 57760 }, { "epoch": 15.621957815035154, "grad_norm": 0.25890204310417175, "learning_rate": 3.77521688332777e-07, "loss": 0.0225, "step": 57770 }, { "epoch": 15.62466197944835, "grad_norm": 0.16284576058387756, "learning_rate": 3.741491637017425e-07, "loss": 0.0219, "step": 57780 }, { "epoch": 15.627366143861547, "grad_norm": 0.16449257731437683, "learning_rate": 3.707917140939043e-07, "loss": 0.0234, "step": 57790 }, { "epoch": 15.630070308274743, "grad_norm": 0.1395997852087021, "learning_rate": 3.6744934052915235e-07, "loss": 0.0235, "step": 57800 }, { "epoch": 15.63277447268794, "grad_norm": 0.14577585458755493, "learning_rate": 3.641220440228188e-07, "loss": 0.0229, "step": 57810 }, { "epoch": 15.635478637101135, "grad_norm": 0.12250638008117676, "learning_rate": 3.608098255856562e-07, "loss": 0.0224, "step": 57820 }, { "epoch": 15.638182801514333, "grad_norm": 0.12456230074167252, "learning_rate": 3.575126862238154e-07, "loss": 0.0233, "step": 57830 }, { "epoch": 15.640886965927528, "grad_norm": 0.17435374855995178, "learning_rate": 3.5423062693888955e-07, "loss": 0.0234, "step": 57840 }, { "epoch": 15.643591130340724, "grad_norm": 0.23844631016254425, "learning_rate": 3.509636487278756e-07, "loss": 0.0219, "step": 57850 }, { "epoch": 15.646295294753921, "grad_norm": 0.10424051433801651, "learning_rate": 3.4771175258320186e-07, "loss": 0.0226, "step": 57860 }, { "epoch": 15.648999459167117, "grad_norm": 0.22056670486927032, "learning_rate": 3.4447493949270047e-07, "loss": 0.0231, "step": 57870 }, { "epoch": 15.651703623580314, "grad_norm": 0.13717980682849884, "learning_rate": 3.4125321043964045e-07, "loss": 0.0228, "step": 57880 }, { "epoch": 15.65440778799351, "grad_norm": 0.13602468371391296, "learning_rate": 3.380465664026833e-07, "loss": 0.0235, "step": 57890 }, { "epoch": 15.657111952406707, "grad_norm": 0.23198333382606506, "learning_rate": 3.348550083559388e-07, "loss": 0.0249, "step": 57900 }, { "epoch": 15.659816116819902, "grad_norm": 0.12189308553934097, "learning_rate": 3.316785372689091e-07, "loss": 0.0219, "step": 57910 }, { "epoch": 15.6625202812331, "grad_norm": 0.2949407994747162, "learning_rate": 3.285171541065224e-07, "loss": 0.0223, "step": 57920 }, { "epoch": 15.665224445646295, "grad_norm": 0.10029774904251099, "learning_rate": 3.253708598291272e-07, "loss": 0.0228, "step": 57930 }, { "epoch": 15.66792861005949, "grad_norm": 0.1456025093793869, "learning_rate": 3.2223965539248116e-07, "loss": 0.024, "step": 57940 }, { "epoch": 15.670632774472688, "grad_norm": 0.14723677933216095, "learning_rate": 3.1912354174776227e-07, "loss": 0.0237, "step": 57950 }, { "epoch": 15.673336938885884, "grad_norm": 0.15888744592666626, "learning_rate": 3.1602251984155786e-07, "loss": 0.0222, "step": 57960 }, { "epoch": 15.676041103299081, "grad_norm": 0.20254261791706085, "learning_rate": 3.1293659061589207e-07, "loss": 0.0218, "step": 57970 }, { "epoch": 15.678745267712277, "grad_norm": 0.1583990603685379, "learning_rate": 3.098657550081707e-07, "loss": 0.0223, "step": 57980 }, { "epoch": 15.681449432125474, "grad_norm": 0.11783058941364288, "learning_rate": 3.068100139512475e-07, "loss": 0.0232, "step": 57990 }, { "epoch": 15.68415359653867, "grad_norm": 0.24382945895195007, "learning_rate": 3.037693683733689e-07, "loss": 0.0223, "step": 58000 }, { "epoch": 15.686857760951867, "grad_norm": 0.1424027383327484, "learning_rate": 3.007438191982015e-07, "loss": 0.0236, "step": 58010 }, { "epoch": 15.689561925365062, "grad_norm": 0.11920307576656342, "learning_rate": 2.9773336734482684e-07, "loss": 0.023, "step": 58020 }, { "epoch": 15.692266089778258, "grad_norm": 0.2202048897743225, "learning_rate": 2.9473801372774667e-07, "loss": 0.0218, "step": 58030 }, { "epoch": 15.694970254191455, "grad_norm": 0.15763582289218903, "learning_rate": 2.91757759256861e-07, "loss": 0.0237, "step": 58040 }, { "epoch": 15.69767441860465, "grad_norm": 0.1203237920999527, "learning_rate": 2.887926048375067e-07, "loss": 0.0223, "step": 58050 }, { "epoch": 15.700378583017848, "grad_norm": 0.11212580651044846, "learning_rate": 2.858425513704022e-07, "loss": 0.0228, "step": 58060 }, { "epoch": 15.703082747431043, "grad_norm": 0.24221490323543549, "learning_rate": 2.8290759975170834e-07, "loss": 0.0234, "step": 58070 }, { "epoch": 15.70578691184424, "grad_norm": 0.1884218156337738, "learning_rate": 2.799877508729787e-07, "loss": 0.0221, "step": 58080 }, { "epoch": 15.708491076257436, "grad_norm": 0.26529425382614136, "learning_rate": 2.770830056211926e-07, "loss": 0.0242, "step": 58090 }, { "epoch": 15.711195240670634, "grad_norm": 0.13359582424163818, "learning_rate": 2.741933648787331e-07, "loss": 0.0232, "step": 58100 }, { "epoch": 15.71389940508383, "grad_norm": 0.1486554741859436, "learning_rate": 2.7131882952339263e-07, "loss": 0.0229, "step": 58110 }, { "epoch": 15.716603569497025, "grad_norm": 0.14869685471057892, "learning_rate": 2.684594004283836e-07, "loss": 0.0224, "step": 58120 }, { "epoch": 15.719307733910222, "grad_norm": 0.09325465559959412, "learning_rate": 2.6561507846232234e-07, "loss": 0.0212, "step": 58130 }, { "epoch": 15.722011898323418, "grad_norm": 0.12982217967510223, "learning_rate": 2.6278586448924005e-07, "loss": 0.0217, "step": 58140 }, { "epoch": 15.724716062736615, "grad_norm": 0.24064680933952332, "learning_rate": 2.5997175936857685e-07, "loss": 0.0214, "step": 58150 }, { "epoch": 15.72742022714981, "grad_norm": 0.21652230620384216, "learning_rate": 2.57172763955188e-07, "loss": 0.0227, "step": 58160 }, { "epoch": 15.730124391563008, "grad_norm": 0.12422670423984528, "learning_rate": 2.543888790993265e-07, "loss": 0.0224, "step": 58170 }, { "epoch": 15.732828555976203, "grad_norm": 0.12987837195396423, "learning_rate": 2.5162010564666607e-07, "loss": 0.0226, "step": 58180 }, { "epoch": 15.735532720389399, "grad_norm": 0.18167857825756073, "learning_rate": 2.488664444382893e-07, "loss": 0.0243, "step": 58190 }, { "epoch": 15.738236884802596, "grad_norm": 0.2043226957321167, "learning_rate": 2.461278963106828e-07, "loss": 0.0227, "step": 58200 }, { "epoch": 15.740941049215792, "grad_norm": 0.22172580659389496, "learning_rate": 2.434044620957421e-07, "loss": 0.0219, "step": 58210 }, { "epoch": 15.743645213628989, "grad_norm": 0.20917925238609314, "learning_rate": 2.406961426207832e-07, "loss": 0.0234, "step": 58220 }, { "epoch": 15.746349378042185, "grad_norm": 0.21378642320632935, "learning_rate": 2.380029387085203e-07, "loss": 0.023, "step": 58230 }, { "epoch": 15.749053542455382, "grad_norm": 0.37819868326187134, "learning_rate": 2.353248511770767e-07, "loss": 0.0228, "step": 58240 }, { "epoch": 15.751757706868577, "grad_norm": 0.20193995535373688, "learning_rate": 2.3266188083997942e-07, "loss": 0.0233, "step": 58250 }, { "epoch": 15.754461871281773, "grad_norm": 0.14336971938610077, "learning_rate": 2.3001402850617027e-07, "loss": 0.0223, "step": 58260 }, { "epoch": 15.75716603569497, "grad_norm": 0.23144015669822693, "learning_rate": 2.2738129498000581e-07, "loss": 0.023, "step": 58270 }, { "epoch": 15.759870200108166, "grad_norm": 0.190613254904747, "learning_rate": 2.2476368106122414e-07, "loss": 0.0225, "step": 58280 }, { "epoch": 15.762574364521363, "grad_norm": 0.1434033066034317, "learning_rate": 2.2216118754500582e-07, "loss": 0.0236, "step": 58290 }, { "epoch": 15.765278528934559, "grad_norm": 0.1170421838760376, "learning_rate": 2.1957381522190735e-07, "loss": 0.024, "step": 58300 }, { "epoch": 15.767982693347756, "grad_norm": 0.22068236768245697, "learning_rate": 2.1700156487790558e-07, "loss": 0.0218, "step": 58310 }, { "epoch": 15.770686857760952, "grad_norm": 0.15763281285762787, "learning_rate": 2.1444443729439213e-07, "loss": 0.0214, "step": 58320 }, { "epoch": 15.773391022174149, "grad_norm": 0.18358731269836426, "learning_rate": 2.1190243324814007e-07, "loss": 0.0219, "step": 58330 }, { "epoch": 15.776095186587344, "grad_norm": 0.1489359438419342, "learning_rate": 2.0937555351135395e-07, "loss": 0.0226, "step": 58340 }, { "epoch": 15.77879935100054, "grad_norm": 0.476642370223999, "learning_rate": 2.0686379885162532e-07, "loss": 0.0228, "step": 58350 }, { "epoch": 15.781503515413737, "grad_norm": 0.1410246640443802, "learning_rate": 2.0436717003196604e-07, "loss": 0.0227, "step": 58360 }, { "epoch": 15.784207679826933, "grad_norm": 0.30683934688568115, "learning_rate": 2.0188566781078057e-07, "loss": 0.0229, "step": 58370 }, { "epoch": 15.78691184424013, "grad_norm": 0.13803087174892426, "learning_rate": 1.994192929418881e-07, "loss": 0.0229, "step": 58380 }, { "epoch": 15.789616008653326, "grad_norm": 0.16385622322559357, "learning_rate": 1.9696804617451158e-07, "loss": 0.0216, "step": 58390 }, { "epoch": 15.792320173066523, "grad_norm": 0.16444961726665497, "learning_rate": 1.9453192825326093e-07, "loss": 0.0233, "step": 58400 }, { "epoch": 15.795024337479719, "grad_norm": 0.14119891822338104, "learning_rate": 1.9211093991817753e-07, "loss": 0.022, "step": 58410 }, { "epoch": 15.797728501892916, "grad_norm": 0.11615493148565292, "learning_rate": 1.8970508190468973e-07, "loss": 0.0215, "step": 58420 }, { "epoch": 15.800432666306111, "grad_norm": 0.25700631737709045, "learning_rate": 1.8731435494362958e-07, "loss": 0.0228, "step": 58430 }, { "epoch": 15.803136830719307, "grad_norm": 0.2818402349948883, "learning_rate": 1.849387597612495e-07, "loss": 0.0238, "step": 58440 }, { "epoch": 15.805840995132504, "grad_norm": 0.1621459573507309, "learning_rate": 1.8257829707917228e-07, "loss": 0.022, "step": 58450 }, { "epoch": 15.8085451595457, "grad_norm": 0.17062219977378845, "learning_rate": 1.8023296761446317e-07, "loss": 0.0225, "step": 58460 }, { "epoch": 15.811249323958897, "grad_norm": 0.23506812751293182, "learning_rate": 1.7790277207956341e-07, "loss": 0.0221, "step": 58470 }, { "epoch": 15.813953488372093, "grad_norm": 0.23984195291996002, "learning_rate": 1.7558771118232343e-07, "loss": 0.0224, "step": 58480 }, { "epoch": 15.81665765278529, "grad_norm": 0.12338866293430328, "learning_rate": 1.7328778562599734e-07, "loss": 0.0225, "step": 58490 }, { "epoch": 15.819361817198486, "grad_norm": 0.28283777832984924, "learning_rate": 1.7100299610924298e-07, "loss": 0.0224, "step": 58500 }, { "epoch": 15.822065981611683, "grad_norm": 0.20957240462303162, "learning_rate": 1.6873334332612733e-07, "loss": 0.0228, "step": 58510 }, { "epoch": 15.824770146024878, "grad_norm": 0.15600475668907166, "learning_rate": 1.6647882796609894e-07, "loss": 0.024, "step": 58520 }, { "epoch": 15.827474310438074, "grad_norm": 0.16355374455451965, "learning_rate": 1.6423945071402102e-07, "loss": 0.0238, "step": 58530 }, { "epoch": 15.830178474851271, "grad_norm": 0.12983541190624237, "learning_rate": 1.6201521225016614e-07, "loss": 0.0218, "step": 58540 }, { "epoch": 15.832882639264467, "grad_norm": 0.1094096302986145, "learning_rate": 1.598061132501938e-07, "loss": 0.0223, "step": 58550 }, { "epoch": 15.835586803677664, "grad_norm": 0.14139103889465332, "learning_rate": 1.576121543851672e-07, "loss": 0.0228, "step": 58560 }, { "epoch": 15.83829096809086, "grad_norm": 0.19506339728832245, "learning_rate": 1.5543333632155876e-07, "loss": 0.0241, "step": 58570 }, { "epoch": 15.840995132504057, "grad_norm": 0.15485164523124695, "learning_rate": 1.5326965972123352e-07, "loss": 0.0232, "step": 58580 }, { "epoch": 15.843699296917253, "grad_norm": 0.17643818259239197, "learning_rate": 1.5112112524146016e-07, "loss": 0.0227, "step": 58590 }, { "epoch": 15.846403461330448, "grad_norm": 0.11414328217506409, "learning_rate": 1.4898773353489992e-07, "loss": 0.0239, "step": 58600 }, { "epoch": 15.849107625743645, "grad_norm": 0.34301936626434326, "learning_rate": 1.4686948524962884e-07, "loss": 0.0229, "step": 58610 }, { "epoch": 15.851811790156841, "grad_norm": 0.2956951856613159, "learning_rate": 1.4476638102911556e-07, "loss": 0.0222, "step": 58620 }, { "epoch": 15.854515954570038, "grad_norm": 0.18955698609352112, "learning_rate": 1.4267842151222123e-07, "loss": 0.0231, "step": 58630 }, { "epoch": 15.857220118983234, "grad_norm": 0.2578326165676117, "learning_rate": 1.4060560733321626e-07, "loss": 0.0231, "step": 58640 }, { "epoch": 15.859924283396431, "grad_norm": 0.19621771574020386, "learning_rate": 1.385479391217692e-07, "loss": 0.0237, "step": 58650 }, { "epoch": 15.862628447809627, "grad_norm": 0.13819409906864166, "learning_rate": 1.365054175029412e-07, "loss": 0.0248, "step": 58660 }, { "epoch": 15.865332612222822, "grad_norm": 0.12352954596281052, "learning_rate": 1.3447804309719702e-07, "loss": 0.0232, "step": 58670 }, { "epoch": 15.86803677663602, "grad_norm": 0.15423612296581268, "learning_rate": 1.3246581652040512e-07, "loss": 0.023, "step": 58680 }, { "epoch": 15.870740941049215, "grad_norm": 0.16907507181167603, "learning_rate": 1.3046873838381546e-07, "loss": 0.0216, "step": 58690 }, { "epoch": 15.873445105462412, "grad_norm": 0.10116244852542877, "learning_rate": 1.2848680929409828e-07, "loss": 0.0233, "step": 58700 }, { "epoch": 15.876149269875608, "grad_norm": 0.21518829464912415, "learning_rate": 1.2652002985331091e-07, "loss": 0.0234, "step": 58710 }, { "epoch": 15.878853434288805, "grad_norm": 0.10135596990585327, "learning_rate": 1.2456840065889764e-07, "loss": 0.0232, "step": 58720 }, { "epoch": 15.881557598702, "grad_norm": 0.34296590089797974, "learning_rate": 1.226319223037231e-07, "loss": 0.023, "step": 58730 }, { "epoch": 15.884261763115198, "grad_norm": 0.202057346701622, "learning_rate": 1.2071059537603902e-07, "loss": 0.0225, "step": 58740 }, { "epoch": 15.886965927528394, "grad_norm": 0.30757275223731995, "learning_rate": 1.1880442045948403e-07, "loss": 0.0212, "step": 58750 }, { "epoch": 15.88967009194159, "grad_norm": 0.14149728417396545, "learning_rate": 1.1691339813311164e-07, "loss": 0.023, "step": 58760 }, { "epoch": 15.892374256354787, "grad_norm": 0.18683139979839325, "learning_rate": 1.1503752897136233e-07, "loss": 0.0232, "step": 58770 }, { "epoch": 15.895078420767982, "grad_norm": 0.17750167846679688, "learning_rate": 1.1317681354407472e-07, "loss": 0.022, "step": 58780 }, { "epoch": 15.89778258518118, "grad_norm": 0.1421762853860855, "learning_rate": 1.1133125241649111e-07, "loss": 0.0231, "step": 58790 }, { "epoch": 15.900486749594375, "grad_norm": 0.13798965513706207, "learning_rate": 1.0950084614922973e-07, "loss": 0.0226, "step": 58800 }, { "epoch": 15.903190914007572, "grad_norm": 0.16955365240573883, "learning_rate": 1.0768559529834021e-07, "loss": 0.021, "step": 58810 }, { "epoch": 15.905895078420768, "grad_norm": 0.2290780395269394, "learning_rate": 1.0588550041522594e-07, "loss": 0.0235, "step": 58820 }, { "epoch": 15.908599242833965, "grad_norm": 0.23188461363315582, "learning_rate": 1.0410056204672169e-07, "loss": 0.0223, "step": 58830 }, { "epoch": 15.91130340724716, "grad_norm": 0.10898678004741669, "learning_rate": 1.0233078073504376e-07, "loss": 0.0235, "step": 58840 }, { "epoch": 15.914007571660356, "grad_norm": 0.24761393666267395, "learning_rate": 1.0057615701780654e-07, "loss": 0.0218, "step": 58850 }, { "epoch": 15.916711736073553, "grad_norm": 0.19525772333145142, "learning_rate": 9.883669142801144e-08, "loss": 0.0223, "step": 58860 }, { "epoch": 15.919415900486749, "grad_norm": 0.15171615779399872, "learning_rate": 9.711238449406356e-08, "loss": 0.0218, "step": 58870 }, { "epoch": 15.922120064899946, "grad_norm": 0.18046247959136963, "learning_rate": 9.540323673976614e-08, "loss": 0.0227, "step": 58880 }, { "epoch": 15.924824229313142, "grad_norm": 0.1550120860338211, "learning_rate": 9.370924868430942e-08, "loss": 0.0235, "step": 58890 }, { "epoch": 15.92752839372634, "grad_norm": 0.15140138566493988, "learning_rate": 9.203042084228175e-08, "loss": 0.0234, "step": 58900 }, { "epoch": 15.930232558139535, "grad_norm": 0.15603941679000854, "learning_rate": 9.036675372366965e-08, "loss": 0.0232, "step": 58910 }, { "epoch": 15.932936722552732, "grad_norm": 0.18742528557777405, "learning_rate": 8.871824783385218e-08, "loss": 0.0214, "step": 58920 }, { "epoch": 15.935640886965928, "grad_norm": 0.1779521107673645, "learning_rate": 8.7084903673601e-08, "loss": 0.0228, "step": 58930 }, { "epoch": 15.938345051379123, "grad_norm": 0.19370223581790924, "learning_rate": 8.546672173908032e-08, "loss": 0.0224, "step": 58940 }, { "epoch": 15.94104921579232, "grad_norm": 0.2425207495689392, "learning_rate": 8.386370252185249e-08, "loss": 0.0233, "step": 58950 }, { "epoch": 15.943753380205516, "grad_norm": 0.11386101692914963, "learning_rate": 8.227584650887243e-08, "loss": 0.0226, "step": 58960 }, { "epoch": 15.946457544618713, "grad_norm": 0.32789093255996704, "learning_rate": 8.070315418249319e-08, "loss": 0.0233, "step": 58970 }, { "epoch": 15.949161709031909, "grad_norm": 0.28678029775619507, "learning_rate": 7.914562602044929e-08, "loss": 0.0221, "step": 58980 }, { "epoch": 15.951865873445106, "grad_norm": 0.14654524624347687, "learning_rate": 7.76032624958789e-08, "loss": 0.0229, "step": 58990 }, { "epoch": 15.954570037858302, "grad_norm": 0.14277957379817963, "learning_rate": 7.607606407731282e-08, "loss": 0.0243, "step": 59000 }, { "epoch": 15.957274202271497, "grad_norm": 0.17330093681812286, "learning_rate": 7.45640312286744e-08, "loss": 0.0225, "step": 59010 }, { "epoch": 15.959978366684695, "grad_norm": 0.11884395033121109, "learning_rate": 7.306716440927952e-08, "loss": 0.0227, "step": 59020 }, { "epoch": 15.96268253109789, "grad_norm": 0.11503230780363083, "learning_rate": 7.15854640738367e-08, "loss": 0.0234, "step": 59030 }, { "epoch": 15.965386695511087, "grad_norm": 0.12140202522277832, "learning_rate": 7.011893067244701e-08, "loss": 0.0223, "step": 59040 }, { "epoch": 15.968090859924283, "grad_norm": 0.15289296209812164, "learning_rate": 6.866756465060408e-08, "loss": 0.0216, "step": 59050 }, { "epoch": 15.97079502433748, "grad_norm": 0.18780100345611572, "learning_rate": 6.723136644918859e-08, "loss": 0.0221, "step": 59060 }, { "epoch": 15.973499188750676, "grad_norm": 0.150089830160141, "learning_rate": 6.581033650449042e-08, "loss": 0.0224, "step": 59070 }, { "epoch": 15.976203353163873, "grad_norm": 0.23418566584587097, "learning_rate": 6.440447524817539e-08, "loss": 0.0217, "step": 59080 }, { "epoch": 15.978907517577069, "grad_norm": 0.1703077256679535, "learning_rate": 6.301378310730743e-08, "loss": 0.0218, "step": 59090 }, { "epoch": 15.981611681990264, "grad_norm": 0.19814394414424896, "learning_rate": 6.163826050434307e-08, "loss": 0.0233, "step": 59100 }, { "epoch": 15.984315846403462, "grad_norm": 0.25824618339538574, "learning_rate": 6.027790785713139e-08, "loss": 0.0239, "step": 59110 }, { "epoch": 15.987020010816657, "grad_norm": 0.31278955936431885, "learning_rate": 5.89327255789085e-08, "loss": 0.0231, "step": 59120 }, { "epoch": 15.989724175229854, "grad_norm": 0.14430980384349823, "learning_rate": 5.7602714078303085e-08, "loss": 0.0244, "step": 59130 }, { "epoch": 15.99242833964305, "grad_norm": 0.15632566809654236, "learning_rate": 5.628787375934197e-08, "loss": 0.0226, "step": 59140 }, { "epoch": 15.995132504056247, "grad_norm": 0.1284143626689911, "learning_rate": 5.498820502143898e-08, "loss": 0.0222, "step": 59150 }, { "epoch": 15.997836668469443, "grad_norm": 0.3478669822216034, "learning_rate": 5.370370825939497e-08, "loss": 0.0218, "step": 59160 }, { "epoch": 16.00054083288264, "grad_norm": 0.2383924424648285, "learning_rate": 5.243438386340893e-08, "loss": 0.0221, "step": 59170 }, { "epoch": 16.003244997295834, "grad_norm": 0.18408960103988647, "learning_rate": 5.118023221907242e-08, "loss": 0.0237, "step": 59180 }, { "epoch": 16.005949161709033, "grad_norm": 0.24879564344882965, "learning_rate": 4.994125370735292e-08, "loss": 0.0227, "step": 59190 }, { "epoch": 16.00865332612223, "grad_norm": 0.1965583860874176, "learning_rate": 4.871744870462713e-08, "loss": 0.0224, "step": 59200 }, { "epoch": 16.011357490535424, "grad_norm": 0.1395687609910965, "learning_rate": 4.7508817582658794e-08, "loss": 0.0232, "step": 59210 }, { "epoch": 16.01406165494862, "grad_norm": 0.21454857289791107, "learning_rate": 4.631536070858755e-08, "loss": 0.0235, "step": 59220 }, { "epoch": 16.01676581936182, "grad_norm": 0.3047586977481842, "learning_rate": 4.513707844495674e-08, "loss": 0.0232, "step": 59230 }, { "epoch": 16.019469983775014, "grad_norm": 0.26884910464286804, "learning_rate": 4.3973971149702255e-08, "loss": 0.022, "step": 59240 }, { "epoch": 16.02217414818821, "grad_norm": 0.12975548207759857, "learning_rate": 4.2826039176147025e-08, "loss": 0.0225, "step": 59250 }, { "epoch": 16.024878312601405, "grad_norm": 0.18368002772331238, "learning_rate": 4.169328287299545e-08, "loss": 0.0217, "step": 59260 }, { "epoch": 16.0275824770146, "grad_norm": 0.12635332345962524, "learning_rate": 4.057570258435006e-08, "loss": 0.0228, "step": 59270 }, { "epoch": 16.0302866414278, "grad_norm": 0.15834668278694153, "learning_rate": 3.947329864970595e-08, "loss": 0.0223, "step": 59280 }, { "epoch": 16.032990805840996, "grad_norm": 0.14507794380187988, "learning_rate": 3.8386071403939686e-08, "loss": 0.0232, "step": 59290 }, { "epoch": 16.03569497025419, "grad_norm": 0.2868877649307251, "learning_rate": 3.731402117733152e-08, "loss": 0.0232, "step": 59300 }, { "epoch": 16.038399134667387, "grad_norm": 0.1792641282081604, "learning_rate": 3.625714829552651e-08, "loss": 0.0227, "step": 59310 }, { "epoch": 16.041103299080586, "grad_norm": 0.17336688935756683, "learning_rate": 3.5215453079590065e-08, "loss": 0.0221, "step": 59320 }, { "epoch": 16.04380746349378, "grad_norm": 0.11527138203382492, "learning_rate": 3.4188935845952396e-08, "loss": 0.0226, "step": 59330 }, { "epoch": 16.046511627906977, "grad_norm": 0.1548927128314972, "learning_rate": 3.3177596906447396e-08, "loss": 0.021, "step": 59340 }, { "epoch": 16.049215792320172, "grad_norm": 0.11705483496189117, "learning_rate": 3.218143656829043e-08, "loss": 0.0219, "step": 59350 }, { "epoch": 16.051919956733368, "grad_norm": 0.17300210893154144, "learning_rate": 3.120045513408387e-08, "loss": 0.0239, "step": 59360 }, { "epoch": 16.054624121146567, "grad_norm": 0.2001214623451233, "learning_rate": 3.02346529018338e-08, "loss": 0.0239, "step": 59370 }, { "epoch": 16.057328285559763, "grad_norm": 0.11871714144945145, "learning_rate": 2.9284030164922204e-08, "loss": 0.0224, "step": 59380 }, { "epoch": 16.060032449972958, "grad_norm": 0.14935165643692017, "learning_rate": 2.8348587212123634e-08, "loss": 0.0235, "step": 59390 }, { "epoch": 16.062736614386154, "grad_norm": 0.11869774758815765, "learning_rate": 2.7428324327594125e-08, "loss": 0.0212, "step": 59400 }, { "epoch": 16.065440778799353, "grad_norm": 0.20239244401454926, "learning_rate": 2.6523241790893383e-08, "loss": 0.0226, "step": 59410 }, { "epoch": 16.06814494321255, "grad_norm": 0.10820553451776505, "learning_rate": 2.563333987695704e-08, "loss": 0.0237, "step": 59420 }, { "epoch": 16.070849107625744, "grad_norm": 0.14148110151290894, "learning_rate": 2.4758618856118852e-08, "loss": 0.0221, "step": 59430 }, { "epoch": 16.07355327203894, "grad_norm": 0.12736663222312927, "learning_rate": 2.3899078994088497e-08, "loss": 0.0228, "step": 59440 }, { "epoch": 16.076257436452135, "grad_norm": 0.17402788996696472, "learning_rate": 2.3054720551973775e-08, "loss": 0.0219, "step": 59450 }, { "epoch": 16.078961600865334, "grad_norm": 0.10954132676124573, "learning_rate": 2.222554378627506e-08, "loss": 0.0237, "step": 59460 }, { "epoch": 16.08166576527853, "grad_norm": 0.26894351840019226, "learning_rate": 2.1411548948868653e-08, "loss": 0.0223, "step": 59470 }, { "epoch": 16.084369929691725, "grad_norm": 0.15988409519195557, "learning_rate": 2.0612736287023426e-08, "loss": 0.022, "step": 59480 }, { "epoch": 16.08707409410492, "grad_norm": 0.10437940806150436, "learning_rate": 1.9829106043400826e-08, "loss": 0.0238, "step": 59490 }, { "epoch": 16.089778258518116, "grad_norm": 0.27318552136421204, "learning_rate": 1.9060658456043768e-08, "loss": 0.0215, "step": 59500 }, { "epoch": 16.092482422931315, "grad_norm": 0.3164650797843933, "learning_rate": 1.830739375838775e-08, "loss": 0.0217, "step": 59510 }, { "epoch": 16.09518658734451, "grad_norm": 0.3305644690990448, "learning_rate": 1.7569312179260832e-08, "loss": 0.0219, "step": 59520 }, { "epoch": 16.097890751757706, "grad_norm": 0.14419572055339813, "learning_rate": 1.684641394286146e-08, "loss": 0.022, "step": 59530 }, { "epoch": 16.100594916170902, "grad_norm": 0.10506558418273926, "learning_rate": 1.6138699268797296e-08, "loss": 0.0233, "step": 59540 }, { "epoch": 16.1032990805841, "grad_norm": 0.11518969386816025, "learning_rate": 1.5446168372046376e-08, "loss": 0.023, "step": 59550 }, { "epoch": 16.106003244997297, "grad_norm": 0.1446744054555893, "learning_rate": 1.4768821462984861e-08, "loss": 0.0224, "step": 59560 }, { "epoch": 16.108707409410492, "grad_norm": 0.24829745292663574, "learning_rate": 1.4106658747370383e-08, "loss": 0.0237, "step": 59570 }, { "epoch": 16.111411573823688, "grad_norm": 0.11141304671764374, "learning_rate": 1.3459680426353149e-08, "loss": 0.0221, "step": 59580 }, { "epoch": 16.114115738236883, "grad_norm": 0.19025515019893646, "learning_rate": 1.2827886696464841e-08, "loss": 0.0228, "step": 59590 }, { "epoch": 16.116819902650082, "grad_norm": 0.11829162389039993, "learning_rate": 1.2211277749635264e-08, "loss": 0.023, "step": 59600 }, { "epoch": 16.119524067063278, "grad_norm": 0.10172490030527115, "learning_rate": 1.1609853773164592e-08, "loss": 0.0221, "step": 59610 }, { "epoch": 16.122228231476473, "grad_norm": 0.15467453002929688, "learning_rate": 1.1023614949751127e-08, "loss": 0.0226, "step": 59620 }, { "epoch": 16.12493239588967, "grad_norm": 0.1476210504770279, "learning_rate": 1.0452561457485744e-08, "loss": 0.023, "step": 59630 }, { "epoch": 16.127636560302868, "grad_norm": 0.15573345124721527, "learning_rate": 9.896693469829688e-09, "loss": 0.0214, "step": 59640 }, { "epoch": 16.130340724716064, "grad_norm": 0.15083739161491394, "learning_rate": 9.35601115564788e-09, "loss": 0.0234, "step": 59650 }, { "epoch": 16.13304488912926, "grad_norm": 0.17096421122550964, "learning_rate": 8.830514679186719e-09, "loss": 0.0222, "step": 59660 }, { "epoch": 16.135749053542455, "grad_norm": 0.12719640135765076, "learning_rate": 8.320204200074066e-09, "loss": 0.0231, "step": 59670 }, { "epoch": 16.13845321795565, "grad_norm": 0.19420947134494781, "learning_rate": 7.825079873324814e-09, "loss": 0.0228, "step": 59680 }, { "epoch": 16.14115738236885, "grad_norm": 0.10795269906520844, "learning_rate": 7.345141849351977e-09, "loss": 0.0249, "step": 59690 }, { "epoch": 16.143861546782045, "grad_norm": 0.15488366782665253, "learning_rate": 6.880390273944493e-09, "loss": 0.0226, "step": 59700 }, { "epoch": 16.14656571119524, "grad_norm": 0.19031798839569092, "learning_rate": 6.4308252882838704e-09, "loss": 0.0235, "step": 59710 }, { "epoch": 16.149269875608436, "grad_norm": 0.18474604189395905, "learning_rate": 5.9964470289386455e-09, "loss": 0.0225, "step": 59720 }, { "epoch": 16.151974040021635, "grad_norm": 0.11869840323925018, "learning_rate": 5.577255627853273e-09, "loss": 0.0229, "step": 59730 }, { "epoch": 16.15467820443483, "grad_norm": 0.13437296450138092, "learning_rate": 5.173251212370334e-09, "loss": 0.0231, "step": 59740 }, { "epoch": 16.157382368848026, "grad_norm": 0.15580618381500244, "learning_rate": 4.784433905219432e-09, "loss": 0.022, "step": 59750 }, { "epoch": 16.16008653326122, "grad_norm": 0.16346284747123718, "learning_rate": 4.4108038245060934e-09, "loss": 0.023, "step": 59760 }, { "epoch": 16.162790697674417, "grad_norm": 0.17891012132167816, "learning_rate": 4.05236108373952e-09, "loss": 0.0226, "step": 59770 }, { "epoch": 16.165494862087616, "grad_norm": 0.2387332022190094, "learning_rate": 3.7091057917937324e-09, "loss": 0.0233, "step": 59780 }, { "epoch": 16.16819902650081, "grad_norm": 0.12414529919624329, "learning_rate": 3.381038052946428e-09, "loss": 0.0229, "step": 59790 }, { "epoch": 16.170903190914007, "grad_norm": 0.2692320942878723, "learning_rate": 3.0681579668623283e-09, "loss": 0.0218, "step": 59800 }, { "epoch": 16.173607355327203, "grad_norm": 0.14913558959960938, "learning_rate": 2.7704656285709727e-09, "loss": 0.0242, "step": 59810 }, { "epoch": 16.176311519740402, "grad_norm": 0.14448216557502747, "learning_rate": 2.4879611285166803e-09, "loss": 0.0217, "step": 59820 }, { "epoch": 16.179015684153597, "grad_norm": 0.19686102867126465, "learning_rate": 2.2206445525085883e-09, "loss": 0.0222, "step": 59830 }, { "epoch": 16.181719848566793, "grad_norm": 0.15158183872699738, "learning_rate": 1.9685159817595112e-09, "loss": 0.0241, "step": 59840 }, { "epoch": 16.18442401297999, "grad_norm": 0.23094703257083893, "learning_rate": 1.7315754928470817e-09, "loss": 0.0222, "step": 59850 }, { "epoch": 16.187128177393184, "grad_norm": 0.15443970263004303, "learning_rate": 1.5098231577581611e-09, "loss": 0.0234, "step": 59860 }, { "epoch": 16.189832341806383, "grad_norm": 0.13075076043605804, "learning_rate": 1.3032590438499804e-09, "loss": 0.0237, "step": 59870 }, { "epoch": 16.19253650621958, "grad_norm": 0.11872207373380661, "learning_rate": 1.1118832138723444e-09, "loss": 0.0237, "step": 59880 }, { "epoch": 16.195240670632774, "grad_norm": 0.17888247966766357, "learning_rate": 9.356957259620825e-10, "loss": 0.0223, "step": 59890 }, { "epoch": 16.19794483504597, "grad_norm": 0.19348524510860443, "learning_rate": 7.74696633637495e-10, "loss": 0.0212, "step": 59900 }, { "epoch": 16.200648999459165, "grad_norm": 0.13361546397209167, "learning_rate": 6.288859858039064e-10, "loss": 0.0229, "step": 59910 }, { "epoch": 16.203353163872364, "grad_norm": 0.12026207149028778, "learning_rate": 4.982638267647665e-10, "loss": 0.0232, "step": 59920 }, { "epoch": 16.20605732828556, "grad_norm": 0.4272010028362274, "learning_rate": 3.8283019618834406e-10, "loss": 0.0228, "step": 59930 }, { "epoch": 16.208761492698756, "grad_norm": 0.23396684229373932, "learning_rate": 2.825851291410331e-10, "loss": 0.0222, "step": 59940 }, { "epoch": 16.21146565711195, "grad_norm": 0.2572304308414459, "learning_rate": 1.975286560873535e-10, "loss": 0.0222, "step": 59950 }, { "epoch": 16.21416982152515, "grad_norm": 0.13092748820781708, "learning_rate": 1.2766080285109284e-10, "loss": 0.0235, "step": 59960 }, { "epoch": 16.216873985938346, "grad_norm": 0.11077563464641571, "learning_rate": 7.298159065971533e-11, "loss": 0.0233, "step": 59970 }, { "epoch": 16.21957815035154, "grad_norm": 0.31178462505340576, "learning_rate": 3.349103612770854e-11, "loss": 0.0223, "step": 59980 }, { "epoch": 16.222282314764737, "grad_norm": 0.20139028131961823, "learning_rate": 9.189151245481142e-12, "loss": 0.0215, "step": 59990 }, { "epoch": 16.224986479177932, "grad_norm": 0.18205960094928741, "learning_rate": 7.594340156735769e-14, "loss": 0.0223, "step": 60000 }, { "epoch": 16.224986479177932, "step": 60000, "total_flos": 0.0, "train_loss": 0.04595814109469454, "train_runtime": 143154.2094, "train_samples_per_second": 429.188, "train_steps_per_second": 0.419 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 17, "save_steps": 30000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }