| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9945945945945946, | |
| "eval_steps": 500, | |
| "global_step": 115, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008648648648648649, | |
| "grad_norm": 2.249741315841675, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8319, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017297297297297298, | |
| "grad_norm": 2.1813502311706543, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4027, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.025945945945945945, | |
| "grad_norm": 0.8601759672164917, | |
| "learning_rate": 0.00019823008849557524, | |
| "loss": 1.1102, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.034594594594594595, | |
| "grad_norm": 1.7297605276107788, | |
| "learning_rate": 0.00019646017699115044, | |
| "loss": 1.3774, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.043243243243243246, | |
| "grad_norm": 1.0936262607574463, | |
| "learning_rate": 0.00019469026548672567, | |
| "loss": 0.895, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05189189189189189, | |
| "grad_norm": 0.6946480870246887, | |
| "learning_rate": 0.00019292035398230087, | |
| "loss": 0.7451, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06054054054054054, | |
| "grad_norm": 0.45863592624664307, | |
| "learning_rate": 0.00019115044247787613, | |
| "loss": 0.876, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06918918918918919, | |
| "grad_norm": 0.5447478890419006, | |
| "learning_rate": 0.00018938053097345133, | |
| "loss": 0.7719, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07783783783783783, | |
| "grad_norm": 0.45514124631881714, | |
| "learning_rate": 0.00018761061946902656, | |
| "loss": 0.5759, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08648648648648649, | |
| "grad_norm": 0.4590395987033844, | |
| "learning_rate": 0.0001858407079646018, | |
| "loss": 0.5838, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09513513513513513, | |
| "grad_norm": 0.5425634384155273, | |
| "learning_rate": 0.000184070796460177, | |
| "loss": 0.6641, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10378378378378378, | |
| "grad_norm": 1.0379027128219604, | |
| "learning_rate": 0.00018230088495575222, | |
| "loss": 0.9623, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11243243243243244, | |
| "grad_norm": 0.5286022424697876, | |
| "learning_rate": 0.00018053097345132742, | |
| "loss": 0.4761, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.12108108108108108, | |
| "grad_norm": 0.6451830267906189, | |
| "learning_rate": 0.00017876106194690265, | |
| "loss": 0.547, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12972972972972974, | |
| "grad_norm": 0.6369953751564026, | |
| "learning_rate": 0.0001769911504424779, | |
| "loss": 0.5872, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13837837837837838, | |
| "grad_norm": 0.4720052182674408, | |
| "learning_rate": 0.0001752212389380531, | |
| "loss": 0.3248, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.14702702702702702, | |
| "grad_norm": 0.5918360352516174, | |
| "learning_rate": 0.00017345132743362834, | |
| "loss": 0.6277, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.15567567567567567, | |
| "grad_norm": 0.5242601037025452, | |
| "learning_rate": 0.00017168141592920354, | |
| "loss": 0.5645, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1643243243243243, | |
| "grad_norm": 0.474292129278183, | |
| "learning_rate": 0.00016991150442477877, | |
| "loss": 0.2115, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.17297297297297298, | |
| "grad_norm": 0.6523647904396057, | |
| "learning_rate": 0.000168141592920354, | |
| "loss": 0.5803, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.18162162162162163, | |
| "grad_norm": 0.521297812461853, | |
| "learning_rate": 0.0001663716814159292, | |
| "loss": 0.4483, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.19027027027027027, | |
| "grad_norm": 0.5689568519592285, | |
| "learning_rate": 0.00016460176991150443, | |
| "loss": 0.6231, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.1989189189189189, | |
| "grad_norm": 0.4570567011833191, | |
| "learning_rate": 0.00016283185840707966, | |
| "loss": 0.2368, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.20756756756756756, | |
| "grad_norm": 0.414307564496994, | |
| "learning_rate": 0.0001610619469026549, | |
| "loss": 0.4674, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.5027227997779846, | |
| "learning_rate": 0.0001592920353982301, | |
| "loss": 0.3558, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.22486486486486487, | |
| "grad_norm": 0.4441507160663605, | |
| "learning_rate": 0.00015752212389380532, | |
| "loss": 0.437, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.23351351351351352, | |
| "grad_norm": 0.4098701477050781, | |
| "learning_rate": 0.00015575221238938055, | |
| "loss": 0.3553, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.24216216216216216, | |
| "grad_norm": 0.3602244257926941, | |
| "learning_rate": 0.00015398230088495575, | |
| "loss": 0.3689, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2508108108108108, | |
| "grad_norm": 0.4340718984603882, | |
| "learning_rate": 0.00015221238938053098, | |
| "loss": 0.318, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2594594594594595, | |
| "grad_norm": 0.44470590353012085, | |
| "learning_rate": 0.00015044247787610618, | |
| "loss": 0.4992, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2681081081081081, | |
| "grad_norm": 0.43699413537979126, | |
| "learning_rate": 0.00014867256637168144, | |
| "loss": 0.3362, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.27675675675675676, | |
| "grad_norm": 0.4950752258300781, | |
| "learning_rate": 0.00014690265486725664, | |
| "loss": 0.4464, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.28540540540540543, | |
| "grad_norm": 0.4312315881252289, | |
| "learning_rate": 0.00014513274336283187, | |
| "loss": 0.4786, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.29405405405405405, | |
| "grad_norm": 0.45234543085098267, | |
| "learning_rate": 0.0001433628318584071, | |
| "loss": 0.5572, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3027027027027027, | |
| "grad_norm": 0.4373219311237335, | |
| "learning_rate": 0.0001415929203539823, | |
| "loss": 0.3873, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.31135135135135134, | |
| "grad_norm": 0.35862988233566284, | |
| "learning_rate": 0.00013982300884955753, | |
| "loss": 0.2902, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.41014787554740906, | |
| "learning_rate": 0.00013805309734513276, | |
| "loss": 0.3806, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3286486486486486, | |
| "grad_norm": 0.4181463420391083, | |
| "learning_rate": 0.00013628318584070796, | |
| "loss": 0.3036, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3372972972972973, | |
| "grad_norm": 0.3663095235824585, | |
| "learning_rate": 0.00013451327433628321, | |
| "loss": 0.1979, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.34594594594594597, | |
| "grad_norm": 0.46295005083084106, | |
| "learning_rate": 0.00013274336283185842, | |
| "loss": 0.4204, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3545945945945946, | |
| "grad_norm": 0.39596325159072876, | |
| "learning_rate": 0.00013097345132743365, | |
| "loss": 0.3512, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.36324324324324325, | |
| "grad_norm": 0.7628335952758789, | |
| "learning_rate": 0.00012920353982300885, | |
| "loss": 0.4965, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.37189189189189187, | |
| "grad_norm": 0.5216770172119141, | |
| "learning_rate": 0.00012743362831858408, | |
| "loss": 0.4658, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.38054054054054054, | |
| "grad_norm": 0.38578447699546814, | |
| "learning_rate": 0.0001256637168141593, | |
| "loss": 0.2661, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3891891891891892, | |
| "grad_norm": 0.2811882197856903, | |
| "learning_rate": 0.0001238938053097345, | |
| "loss": 0.1545, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3978378378378378, | |
| "grad_norm": 0.3812131881713867, | |
| "learning_rate": 0.00012212389380530974, | |
| "loss": 0.3295, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4064864864864865, | |
| "grad_norm": 0.3791070878505707, | |
| "learning_rate": 0.00012035398230088497, | |
| "loss": 0.2472, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4151351351351351, | |
| "grad_norm": 0.38515138626098633, | |
| "learning_rate": 0.0001185840707964602, | |
| "loss": 0.4042, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4237837837837838, | |
| "grad_norm": 0.5093116164207458, | |
| "learning_rate": 0.00011681415929203541, | |
| "loss": 0.8376, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.2971178889274597, | |
| "learning_rate": 0.00011504424778761063, | |
| "loss": 0.4082, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4410810810810811, | |
| "grad_norm": 0.30018818378448486, | |
| "learning_rate": 0.00011327433628318584, | |
| "loss": 0.129, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.44972972972972974, | |
| "grad_norm": 0.4631483256816864, | |
| "learning_rate": 0.00011150442477876106, | |
| "loss": 0.3752, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.45837837837837836, | |
| "grad_norm": 0.3890452980995178, | |
| "learning_rate": 0.00010973451327433629, | |
| "loss": 0.4054, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.46702702702702703, | |
| "grad_norm": 0.3566686511039734, | |
| "learning_rate": 0.0001079646017699115, | |
| "loss": 0.2452, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.4756756756756757, | |
| "grad_norm": 0.4903372526168823, | |
| "learning_rate": 0.00010619469026548674, | |
| "loss": 0.4505, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4843243243243243, | |
| "grad_norm": 0.3836239278316498, | |
| "learning_rate": 0.00010442477876106196, | |
| "loss": 0.3952, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.492972972972973, | |
| "grad_norm": 0.42047417163848877, | |
| "learning_rate": 0.00010265486725663717, | |
| "loss": 0.5074, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5016216216216216, | |
| "grad_norm": 0.24409635365009308, | |
| "learning_rate": 0.00010088495575221239, | |
| "loss": 0.1389, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5102702702702703, | |
| "grad_norm": 0.3819220960140228, | |
| "learning_rate": 9.911504424778762e-05, | |
| "loss": 0.3945, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.518918918918919, | |
| "grad_norm": 0.31148406863212585, | |
| "learning_rate": 9.734513274336283e-05, | |
| "loss": 0.5203, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5275675675675676, | |
| "grad_norm": 0.3157011866569519, | |
| "learning_rate": 9.557522123893806e-05, | |
| "loss": 0.262, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5362162162162162, | |
| "grad_norm": 0.40180379152297974, | |
| "learning_rate": 9.380530973451328e-05, | |
| "loss": 0.2404, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5448648648648649, | |
| "grad_norm": 0.4064180552959442, | |
| "learning_rate": 9.20353982300885e-05, | |
| "loss": 0.6118, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5535135135135135, | |
| "grad_norm": 0.3912467956542969, | |
| "learning_rate": 9.026548672566371e-05, | |
| "loss": 0.271, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5621621621621622, | |
| "grad_norm": 0.31059980392456055, | |
| "learning_rate": 8.849557522123895e-05, | |
| "loss": 0.2373, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5708108108108109, | |
| "grad_norm": 0.30928152799606323, | |
| "learning_rate": 8.672566371681417e-05, | |
| "loss": 0.4169, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5794594594594594, | |
| "grad_norm": 0.40631791949272156, | |
| "learning_rate": 8.495575221238938e-05, | |
| "loss": 0.4175, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5881081081081081, | |
| "grad_norm": 0.40440961718559265, | |
| "learning_rate": 8.31858407079646e-05, | |
| "loss": 0.3269, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5967567567567568, | |
| "grad_norm": 0.4534294009208679, | |
| "learning_rate": 8.141592920353983e-05, | |
| "loss": 0.2242, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.6054054054054054, | |
| "grad_norm": 0.41317978501319885, | |
| "learning_rate": 7.964601769911504e-05, | |
| "loss": 0.2633, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.614054054054054, | |
| "grad_norm": 0.272535115480423, | |
| "learning_rate": 7.787610619469027e-05, | |
| "loss": 0.1455, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6227027027027027, | |
| "grad_norm": 0.4280416667461395, | |
| "learning_rate": 7.610619469026549e-05, | |
| "loss": 0.5289, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6313513513513513, | |
| "grad_norm": 0.4870530664920807, | |
| "learning_rate": 7.433628318584072e-05, | |
| "loss": 0.5633, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.38074707984924316, | |
| "learning_rate": 7.256637168141593e-05, | |
| "loss": 0.4738, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.32775411009788513, | |
| "learning_rate": 7.079646017699115e-05, | |
| "loss": 0.2764, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6572972972972972, | |
| "grad_norm": 0.3663316071033478, | |
| "learning_rate": 6.902654867256638e-05, | |
| "loss": 0.4794, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6659459459459459, | |
| "grad_norm": 0.36854031682014465, | |
| "learning_rate": 6.725663716814161e-05, | |
| "loss": 0.1809, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6745945945945946, | |
| "grad_norm": 0.37296342849731445, | |
| "learning_rate": 6.548672566371682e-05, | |
| "loss": 0.4067, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6832432432432433, | |
| "grad_norm": 0.4202044606208801, | |
| "learning_rate": 6.371681415929204e-05, | |
| "loss": 0.2752, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6918918918918919, | |
| "grad_norm": 0.29250282049179077, | |
| "learning_rate": 6.194690265486725e-05, | |
| "loss": 0.1461, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7005405405405405, | |
| "grad_norm": 0.37763354182243347, | |
| "learning_rate": 6.017699115044248e-05, | |
| "loss": 0.2817, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7091891891891892, | |
| "grad_norm": 0.30031171441078186, | |
| "learning_rate": 5.8407079646017705e-05, | |
| "loss": 0.1572, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7178378378378378, | |
| "grad_norm": 0.4519175887107849, | |
| "learning_rate": 5.663716814159292e-05, | |
| "loss": 0.3046, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7264864864864865, | |
| "grad_norm": 0.3103352189064026, | |
| "learning_rate": 5.486725663716814e-05, | |
| "loss": 0.1347, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7351351351351352, | |
| "grad_norm": 0.7960600852966309, | |
| "learning_rate": 5.309734513274337e-05, | |
| "loss": 0.3168, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7437837837837837, | |
| "grad_norm": 0.3281419277191162, | |
| "learning_rate": 5.132743362831859e-05, | |
| "loss": 0.2045, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7524324324324324, | |
| "grad_norm": 0.35785752534866333, | |
| "learning_rate": 4.955752212389381e-05, | |
| "loss": 0.4077, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7610810810810811, | |
| "grad_norm": 0.37461650371551514, | |
| "learning_rate": 4.778761061946903e-05, | |
| "loss": 0.3227, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7697297297297298, | |
| "grad_norm": 0.3365744352340698, | |
| "learning_rate": 4.601769911504425e-05, | |
| "loss": 0.2306, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7783783783783784, | |
| "grad_norm": 0.29543980956077576, | |
| "learning_rate": 4.4247787610619477e-05, | |
| "loss": 0.3661, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.787027027027027, | |
| "grad_norm": 0.3135324716567993, | |
| "learning_rate": 4.247787610619469e-05, | |
| "loss": 0.2503, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7956756756756757, | |
| "grad_norm": 0.23556429147720337, | |
| "learning_rate": 4.0707964601769914e-05, | |
| "loss": 0.1044, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8043243243243243, | |
| "grad_norm": 0.2718769907951355, | |
| "learning_rate": 3.893805309734514e-05, | |
| "loss": 0.1471, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.812972972972973, | |
| "grad_norm": 0.25528448820114136, | |
| "learning_rate": 3.716814159292036e-05, | |
| "loss": 0.1126, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8216216216216217, | |
| "grad_norm": 0.514164388179779, | |
| "learning_rate": 3.5398230088495574e-05, | |
| "loss": 0.3423, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8302702702702702, | |
| "grad_norm": 0.33162716031074524, | |
| "learning_rate": 3.3628318584070804e-05, | |
| "loss": 0.3637, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8389189189189189, | |
| "grad_norm": 0.25161704421043396, | |
| "learning_rate": 3.185840707964602e-05, | |
| "loss": 0.1284, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8475675675675676, | |
| "grad_norm": 0.32825589179992676, | |
| "learning_rate": 3.008849557522124e-05, | |
| "loss": 0.2171, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8562162162162162, | |
| "grad_norm": 0.23435255885124207, | |
| "learning_rate": 2.831858407079646e-05, | |
| "loss": 0.16, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 0.2661581337451935, | |
| "learning_rate": 2.6548672566371686e-05, | |
| "loss": 0.2421, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8735135135135135, | |
| "grad_norm": 0.2724602222442627, | |
| "learning_rate": 2.4778761061946905e-05, | |
| "loss": 0.1246, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.8821621621621621, | |
| "grad_norm": 0.47894561290740967, | |
| "learning_rate": 2.3008849557522124e-05, | |
| "loss": 0.4472, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8908108108108108, | |
| "grad_norm": 0.3064163327217102, | |
| "learning_rate": 2.1238938053097346e-05, | |
| "loss": 0.2987, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.8994594594594595, | |
| "grad_norm": 0.4226900637149811, | |
| "learning_rate": 1.946902654867257e-05, | |
| "loss": 0.4185, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.9081081081081082, | |
| "grad_norm": 0.34745219349861145, | |
| "learning_rate": 1.7699115044247787e-05, | |
| "loss": 0.2572, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9167567567567567, | |
| "grad_norm": 0.35236531496047974, | |
| "learning_rate": 1.592920353982301e-05, | |
| "loss": 0.3427, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9254054054054054, | |
| "grad_norm": 0.37095391750335693, | |
| "learning_rate": 1.415929203539823e-05, | |
| "loss": 0.4018, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9340540540540541, | |
| "grad_norm": 0.3331229090690613, | |
| "learning_rate": 1.2389380530973452e-05, | |
| "loss": 0.2038, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9427027027027027, | |
| "grad_norm": 0.2652183175086975, | |
| "learning_rate": 1.0619469026548673e-05, | |
| "loss": 0.1072, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9513513513513514, | |
| "grad_norm": 0.29123690724372864, | |
| "learning_rate": 8.849557522123894e-06, | |
| "loss": 0.1406, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.3317340612411499, | |
| "learning_rate": 7.079646017699115e-06, | |
| "loss": 0.2202, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.9686486486486486, | |
| "grad_norm": 0.47986647486686707, | |
| "learning_rate": 5.3097345132743365e-06, | |
| "loss": 0.3464, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.9772972972972973, | |
| "grad_norm": 0.2612822949886322, | |
| "learning_rate": 3.5398230088495575e-06, | |
| "loss": 0.1271, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.985945945945946, | |
| "grad_norm": 0.26845863461494446, | |
| "learning_rate": 1.7699115044247788e-06, | |
| "loss": 0.1044, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9945945945945946, | |
| "grad_norm": 0.2526237368583679, | |
| "learning_rate": 0.0, | |
| "loss": 0.1158, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.9945945945945946, | |
| "step": 115, | |
| "total_flos": 1.3431114641260646e+17, | |
| "train_loss": 0.4029887131374815, | |
| "train_runtime": 1125.7865, | |
| "train_samples_per_second": 0.822, | |
| "train_steps_per_second": 0.102 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 115, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3431114641260646e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |