| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 0, | |
| "global_step": 452, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004424778761061947, | |
| "grad_norm": 0.057373046875, | |
| "learning_rate": 0.00039911504424778763, | |
| "loss": 1.3739, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008849557522123894, | |
| "grad_norm": 0.1201171875, | |
| "learning_rate": 0.00039823008849557525, | |
| "loss": 1.4091, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01327433628318584, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.00039734513274336286, | |
| "loss": 1.2628, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017699115044247787, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.0003964601769911505, | |
| "loss": 1.1101, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.022123893805309734, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.0003955752212389381, | |
| "loss": 1.344, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02654867256637168, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.00039469026548672565, | |
| "loss": 1.1884, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.030973451327433628, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.0003938053097345133, | |
| "loss": 1.1329, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.035398230088495575, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 0.0003929203539823009, | |
| "loss": 1.138, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03982300884955752, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.00039203539823008855, | |
| "loss": 1.0113, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04424778761061947, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.0003911504424778761, | |
| "loss": 1.087, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.048672566371681415, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 0.0003902654867256637, | |
| "loss": 1.1459, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05309734513274336, | |
| "grad_norm": 0.03662109375, | |
| "learning_rate": 0.00038938053097345134, | |
| "loss": 1.1421, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05752212389380531, | |
| "grad_norm": 0.035888671875, | |
| "learning_rate": 0.00038849557522123895, | |
| "loss": 1.175, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.061946902654867256, | |
| "grad_norm": 0.037109375, | |
| "learning_rate": 0.00038761061946902657, | |
| "loss": 1.2099, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06637168141592921, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 0.0003867256637168142, | |
| "loss": 1.1295, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07079646017699115, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.00038584070796460174, | |
| "loss": 1.0737, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0752212389380531, | |
| "grad_norm": 0.037109375, | |
| "learning_rate": 0.0003849557522123894, | |
| "loss": 1.1563, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07964601769911504, | |
| "grad_norm": 0.039306640625, | |
| "learning_rate": 0.000384070796460177, | |
| "loss": 1.1061, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.084070796460177, | |
| "grad_norm": 0.050048828125, | |
| "learning_rate": 0.00038318584070796464, | |
| "loss": 1.1052, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08849557522123894, | |
| "grad_norm": 0.036865234375, | |
| "learning_rate": 0.00038230088495575226, | |
| "loss": 1.0009, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09292035398230089, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.0003814159292035398, | |
| "loss": 0.9805, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09734513274336283, | |
| "grad_norm": 0.03173828125, | |
| "learning_rate": 0.0003805309734513275, | |
| "loss": 1.1098, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10176991150442478, | |
| "grad_norm": 0.0322265625, | |
| "learning_rate": 0.00037964601769911505, | |
| "loss": 1.0691, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10619469026548672, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 0.00037876106194690266, | |
| "loss": 1.2944, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11061946902654868, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.0003778761061946903, | |
| "loss": 1.0819, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11504424778761062, | |
| "grad_norm": 0.0341796875, | |
| "learning_rate": 0.0003769911504424779, | |
| "loss": 1.215, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11946902654867257, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.0003761061946902655, | |
| "loss": 1.0624, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12389380530973451, | |
| "grad_norm": 0.03271484375, | |
| "learning_rate": 0.0003752212389380531, | |
| "loss": 1.0258, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12831858407079647, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 0.00037433628318584073, | |
| "loss": 1.0544, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.13274336283185842, | |
| "grad_norm": 0.035400390625, | |
| "learning_rate": 0.00037345132743362835, | |
| "loss": 1.0203, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13716814159292035, | |
| "grad_norm": 0.05810546875, | |
| "learning_rate": 0.0003725663716814159, | |
| "loss": 1.1584, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1415929203539823, | |
| "grad_norm": 0.0341796875, | |
| "learning_rate": 0.0003716814159292036, | |
| "loss": 0.9215, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14601769911504425, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 0.0003707964601769912, | |
| "loss": 1.1255, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1504424778761062, | |
| "grad_norm": 0.053466796875, | |
| "learning_rate": 0.00036991150442477875, | |
| "loss": 1.3504, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15486725663716813, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.0003690265486725664, | |
| "loss": 1.0819, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1592920353982301, | |
| "grad_norm": 0.041259765625, | |
| "learning_rate": 0.000368141592920354, | |
| "loss": 1.2328, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16371681415929204, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 0.00036725663716814165, | |
| "loss": 1.1783, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.168141592920354, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 0.0003663716814159292, | |
| "loss": 1.105, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.17256637168141592, | |
| "grad_norm": 0.05224609375, | |
| "learning_rate": 0.0003654867256637168, | |
| "loss": 1.1757, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17699115044247787, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.00036460176991150444, | |
| "loss": 1.1601, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18141592920353983, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 0.00036371681415929205, | |
| "loss": 0.9869, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.18584070796460178, | |
| "grad_norm": 0.05419921875, | |
| "learning_rate": 0.00036283185840707967, | |
| "loss": 1.0769, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1902654867256637, | |
| "grad_norm": 0.0361328125, | |
| "learning_rate": 0.0003619469026548673, | |
| "loss": 1.015, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.19469026548672566, | |
| "grad_norm": 0.03564453125, | |
| "learning_rate": 0.00036106194690265484, | |
| "loss": 0.9435, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19911504424778761, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 0.0003601769911504425, | |
| "loss": 1.1832, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.20353982300884957, | |
| "grad_norm": 0.052490234375, | |
| "learning_rate": 0.00035929203539823007, | |
| "loss": 1.1826, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2079646017699115, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.00035840707964601774, | |
| "loss": 1.02, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.21238938053097345, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 0.0003575221238938053, | |
| "loss": 1.0803, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2168141592920354, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.0003566371681415929, | |
| "loss": 1.021, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.22123893805309736, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.0003557522123893806, | |
| "loss": 1.0058, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22566371681415928, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 0.00035486725663716814, | |
| "loss": 1.0489, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.23008849557522124, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 0.0003539823008849558, | |
| "loss": 0.986, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2345132743362832, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 0.00035309734513274337, | |
| "loss": 1.0928, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.23893805309734514, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.000352212389380531, | |
| "loss": 1.0037, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.24336283185840707, | |
| "grad_norm": 0.035888671875, | |
| "learning_rate": 0.0003513274336283186, | |
| "loss": 1.0165, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24778761061946902, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 0.0003504424778761062, | |
| "loss": 0.9856, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.252212389380531, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 0.00034955752212389383, | |
| "loss": 1.0988, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.25663716814159293, | |
| "grad_norm": 0.035400390625, | |
| "learning_rate": 0.00034867256637168145, | |
| "loss": 0.9983, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2610619469026549, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 0.000347787610619469, | |
| "loss": 1.0727, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.26548672566371684, | |
| "grad_norm": 0.0380859375, | |
| "learning_rate": 0.0003469026548672567, | |
| "loss": 0.9617, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26991150442477874, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 0.00034601769911504423, | |
| "loss": 1.1435, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2743362831858407, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.0003451327433628319, | |
| "loss": 1.0895, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.27876106194690264, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 0.00034424778761061946, | |
| "loss": 1.0823, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2831858407079646, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.0003433628318584071, | |
| "loss": 1.1119, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.28761061946902655, | |
| "grad_norm": 0.0576171875, | |
| "learning_rate": 0.00034247787610619475, | |
| "loss": 1.2428, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2920353982300885, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.0003415929203539823, | |
| "loss": 0.9943, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.29646017699115046, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 0.0003407079646017699, | |
| "loss": 1.3215, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3008849557522124, | |
| "grad_norm": 0.03515625, | |
| "learning_rate": 0.00033982300884955754, | |
| "loss": 0.9997, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3053097345132743, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 0.00033893805309734515, | |
| "loss": 0.9796, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.30973451327433627, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 0.00033805309734513277, | |
| "loss": 1.1079, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3141592920353982, | |
| "grad_norm": 0.041259765625, | |
| "learning_rate": 0.0003371681415929204, | |
| "loss": 1.0242, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3185840707964602, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 0.000336283185840708, | |
| "loss": 1.0227, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3230088495575221, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.0003353982300884956, | |
| "loss": 0.9375, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3274336283185841, | |
| "grad_norm": 0.03759765625, | |
| "learning_rate": 0.00033451327433628317, | |
| "loss": 1.0104, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.33185840707964603, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.00033362831858407084, | |
| "loss": 1.1685, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.336283185840708, | |
| "grad_norm": 0.051513671875, | |
| "learning_rate": 0.0003327433628318584, | |
| "loss": 1.2954, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3407079646017699, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 0.000331858407079646, | |
| "loss": 0.9816, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.34513274336283184, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 0.00033097345132743363, | |
| "loss": 1.0791, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3495575221238938, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 0.00033008849557522124, | |
| "loss": 1.0989, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 0.00032920353982300886, | |
| "loss": 1.1164, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3584070796460177, | |
| "grad_norm": 0.0400390625, | |
| "learning_rate": 0.00032831858407079647, | |
| "loss": 1.2053, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.36283185840707965, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.0003274336283185841, | |
| "loss": 1.0322, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3672566371681416, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.0003265486725663717, | |
| "loss": 0.9184, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.37168141592920356, | |
| "grad_norm": 0.037353515625, | |
| "learning_rate": 0.0003256637168141593, | |
| "loss": 1.0874, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.37610619469026546, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 0.00032477876106194693, | |
| "loss": 1.0051, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3805309734513274, | |
| "grad_norm": 0.052001953125, | |
| "learning_rate": 0.00032389380530973454, | |
| "loss": 1.1232, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.38495575221238937, | |
| "grad_norm": 0.036865234375, | |
| "learning_rate": 0.0003230088495575221, | |
| "loss": 0.9745, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3893805309734513, | |
| "grad_norm": 0.037353515625, | |
| "learning_rate": 0.0003221238938053098, | |
| "loss": 0.9092, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3938053097345133, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 0.00032123893805309733, | |
| "loss": 1.0712, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.39823008849557523, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 0.000320353982300885, | |
| "loss": 1.0908, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4026548672566372, | |
| "grad_norm": 0.04150390625, | |
| "learning_rate": 0.00031946902654867256, | |
| "loss": 1.0897, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.40707964601769914, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 0.0003185840707964602, | |
| "loss": 0.8939, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.41150442477876104, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 0.0003176991150442478, | |
| "loss": 1.0992, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.415929203539823, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 0.0003168141592920354, | |
| "loss": 0.937, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.42035398230088494, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.000315929203539823, | |
| "loss": 1.1744, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4247787610619469, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.00031504424778761064, | |
| "loss": 1.0227, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.42920353982300885, | |
| "grad_norm": 0.041259765625, | |
| "learning_rate": 0.00031415929203539825, | |
| "loss": 1.112, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4336283185840708, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 0.00031327433628318586, | |
| "loss": 0.9122, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.43805309734513276, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 0.0003123893805309735, | |
| "loss": 1.0073, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4424778761061947, | |
| "grad_norm": 0.040283203125, | |
| "learning_rate": 0.0003115044247787611, | |
| "loss": 1.0326, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4469026548672566, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.0003106194690265487, | |
| "loss": 1.0014, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.45132743362831856, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.00030973451327433627, | |
| "loss": 1.1081, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4557522123893805, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 0.00030884955752212394, | |
| "loss": 1.1268, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.46017699115044247, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 0.0003079646017699115, | |
| "loss": 1.0382, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4646017699115044, | |
| "grad_norm": 0.0576171875, | |
| "learning_rate": 0.00030707964601769917, | |
| "loss": 0.9887, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4690265486725664, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 0.0003061946902654867, | |
| "loss": 1.0143, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.47345132743362833, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 0.00030530973451327434, | |
| "loss": 1.0332, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.4778761061946903, | |
| "grad_norm": 0.044921875, | |
| "learning_rate": 0.00030442477876106196, | |
| "loss": 0.9422, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4823008849557522, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00030353982300884957, | |
| "loss": 1.0376, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.48672566371681414, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 0.0003026548672566372, | |
| "loss": 1.1175, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4911504424778761, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 0.0003017699115044248, | |
| "loss": 0.9571, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.49557522123893805, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 0.00030088495575221236, | |
| "loss": 1.0857, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 0.00030000000000000003, | |
| "loss": 0.9346, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.504424778761062, | |
| "grad_norm": 0.054443359375, | |
| "learning_rate": 0.00029911504424778764, | |
| "loss": 1.0317, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5088495575221239, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.00029823008849557526, | |
| "loss": 1.0535, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5132743362831859, | |
| "grad_norm": 0.04150390625, | |
| "learning_rate": 0.00029734513274336287, | |
| "loss": 1.0437, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5176991150442478, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.00029646017699115043, | |
| "loss": 1.0253, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5221238938053098, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 0.0002955752212389381, | |
| "loss": 1.022, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5265486725663717, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 0.00029469026548672566, | |
| "loss": 1.2344, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 0.0576171875, | |
| "learning_rate": 0.0002938053097345133, | |
| "loss": 0.9828, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5353982300884956, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 0.0002929203539823009, | |
| "loss": 0.9207, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5398230088495575, | |
| "grad_norm": 0.050537109375, | |
| "learning_rate": 0.0002920353982300885, | |
| "loss": 0.9794, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5442477876106194, | |
| "grad_norm": 0.05908203125, | |
| "learning_rate": 0.0002911504424778761, | |
| "loss": 1.0962, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5486725663716814, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.00029026548672566373, | |
| "loss": 1.1614, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5530973451327433, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 0.00028938053097345135, | |
| "loss": 0.9082, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5575221238938053, | |
| "grad_norm": 0.037353515625, | |
| "learning_rate": 0.00028849557522123896, | |
| "loss": 0.9406, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5619469026548672, | |
| "grad_norm": 0.039306640625, | |
| "learning_rate": 0.0002876106194690265, | |
| "loss": 1.1105, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5663716814159292, | |
| "grad_norm": 0.051025390625, | |
| "learning_rate": 0.0002867256637168142, | |
| "loss": 0.9679, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5707964601769911, | |
| "grad_norm": 0.037109375, | |
| "learning_rate": 0.00028584070796460175, | |
| "loss": 0.9529, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5752212389380531, | |
| "grad_norm": 0.056396484375, | |
| "learning_rate": 0.00028495575221238937, | |
| "loss": 1.0341, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5796460176991151, | |
| "grad_norm": 0.039306640625, | |
| "learning_rate": 0.00028407079646017704, | |
| "loss": 0.9493, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.584070796460177, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 0.0002831858407079646, | |
| "loss": 1.262, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.588495575221239, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 0.00028230088495575226, | |
| "loss": 0.9412, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5929203539823009, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 0.0002814159292035398, | |
| "loss": 1.0563, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5973451327433629, | |
| "grad_norm": 0.05712890625, | |
| "learning_rate": 0.00028053097345132744, | |
| "loss": 1.0201, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6017699115044248, | |
| "grad_norm": 0.04052734375, | |
| "learning_rate": 0.00027964601769911505, | |
| "loss": 1.0401, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6061946902654868, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 0.00027876106194690267, | |
| "loss": 1.0241, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6106194690265486, | |
| "grad_norm": 0.05810546875, | |
| "learning_rate": 0.0002778761061946903, | |
| "loss": 1.1263, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6150442477876106, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 0.0002769911504424779, | |
| "loss": 1.0869, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6194690265486725, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 0.0002761061946902655, | |
| "loss": 0.9944, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6238938053097345, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 0.0002752212389380531, | |
| "loss": 0.9675, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6283185840707964, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.0002743362831858407, | |
| "loss": 1.0227, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6327433628318584, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 0.00027345132743362836, | |
| "loss": 1.0381, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6371681415929203, | |
| "grad_norm": 0.055908203125, | |
| "learning_rate": 0.0002725663716814159, | |
| "loss": 0.9385, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6415929203539823, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 0.00027168141592920353, | |
| "loss": 1.001, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6460176991150443, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 0.0002707964601769912, | |
| "loss": 1.04, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6504424778761062, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 0.00026991150442477876, | |
| "loss": 0.9735, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6548672566371682, | |
| "grad_norm": 0.045654296875, | |
| "learning_rate": 0.00026902654867256643, | |
| "loss": 1.0873, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6592920353982301, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 0.000268141592920354, | |
| "loss": 1.1032, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6637168141592921, | |
| "grad_norm": 0.051513671875, | |
| "learning_rate": 0.0002672566371681416, | |
| "loss": 1.0414, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.668141592920354, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 0.0002663716814159292, | |
| "loss": 0.892, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.672566371681416, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 0.00026548672566371683, | |
| "loss": 0.9048, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6769911504424779, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00026460176991150445, | |
| "loss": 1.0745, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6814159292035398, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 0.00026371681415929206, | |
| "loss": 1.2796, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6858407079646017, | |
| "grad_norm": 0.050048828125, | |
| "learning_rate": 0.0002628318584070796, | |
| "loss": 0.9484, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6902654867256637, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 0.0002619469026548673, | |
| "loss": 1.0571, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6946902654867256, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 0.00026106194690265485, | |
| "loss": 1.1435, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6991150442477876, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 0.0002601769911504425, | |
| "loss": 1.0, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7035398230088495, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 0.0002592920353982301, | |
| "loss": 1.0044, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 0.0002584070796460177, | |
| "loss": 1.001, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7123893805309734, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.0002575221238938053, | |
| "loss": 1.0643, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7168141592920354, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 0.0002566371681415929, | |
| "loss": 1.2461, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7212389380530974, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 0.00025575221238938054, | |
| "loss": 1.297, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7256637168141593, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 0.00025486725663716815, | |
| "loss": 0.9718, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7300884955752213, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 0.00025398230088495577, | |
| "loss": 0.9553, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7345132743362832, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.0002530973451327434, | |
| "loss": 1.074, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7389380530973452, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 0.000252212389380531, | |
| "loss": 1.0015, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7433628318584071, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 0.0002513274336283186, | |
| "loss": 1.021, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7477876106194691, | |
| "grad_norm": 0.0556640625, | |
| "learning_rate": 0.0002504424778761062, | |
| "loss": 1.063, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7522123893805309, | |
| "grad_norm": 0.03759765625, | |
| "learning_rate": 0.0002495575221238938, | |
| "loss": 0.9415, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7566371681415929, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 0.00024867256637168145, | |
| "loss": 1.0556, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7610619469026548, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.000247787610619469, | |
| "loss": 1.1345, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7654867256637168, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 0.00024690265486725663, | |
| "loss": 0.9686, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7699115044247787, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 0.00024601769911504424, | |
| "loss": 0.8729, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7743362831858407, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 0.00024513274336283186, | |
| "loss": 1.0424, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7787610619469026, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 0.00024424778761061947, | |
| "loss": 1.0317, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7831858407079646, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 0.0002433628318584071, | |
| "loss": 1.1979, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7876106194690266, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 0.00024247787610619473, | |
| "loss": 1.0134, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7920353982300885, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 0.00024159292035398232, | |
| "loss": 1.1044, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7964601769911505, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 0.00024070796460176993, | |
| "loss": 1.0293, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8008849557522124, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 0.00023982300884955752, | |
| "loss": 0.9629, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8053097345132744, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 0.00023893805309734516, | |
| "loss": 0.9511, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8097345132743363, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.00023805309734513275, | |
| "loss": 1.0096, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8141592920353983, | |
| "grad_norm": 0.0498046875, | |
| "learning_rate": 0.0002371681415929204, | |
| "loss": 0.8986, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8185840707964602, | |
| "grad_norm": 0.050048828125, | |
| "learning_rate": 0.00023628318584070798, | |
| "loss": 0.9618, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8230088495575221, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 0.0002353982300884956, | |
| "loss": 1.0183, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.827433628318584, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 0.00023451327433628318, | |
| "loss": 0.9824, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.831858407079646, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 0.00023362831858407082, | |
| "loss": 0.9304, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8362831858407079, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 0.0002327433628318584, | |
| "loss": 0.9942, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8407079646017699, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 0.00023185840707964602, | |
| "loss": 1.1299, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8451327433628318, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 0.0002309734513274336, | |
| "loss": 1.0395, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8495575221238938, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 0.00023008849557522125, | |
| "loss": 0.9442, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8539823008849557, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 0.00022920353982300884, | |
| "loss": 1.0056, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8584070796460177, | |
| "grad_norm": 0.050537109375, | |
| "learning_rate": 0.00022831858407079648, | |
| "loss": 0.9217, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8628318584070797, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 0.0002274336283185841, | |
| "loss": 0.9522, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8672566371681416, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.00022654867256637168, | |
| "loss": 0.9525, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8716814159292036, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 0.00022566371681415932, | |
| "loss": 1.0493, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8761061946902655, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 0.0002247787610619469, | |
| "loss": 1.1643, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8805309734513275, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 0.00022389380530973453, | |
| "loss": 0.8968, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8849557522123894, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 0.0002230088495575221, | |
| "loss": 0.8145, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8893805309734514, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 0.00022212389380530975, | |
| "loss": 1.1892, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8938053097345132, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 0.00022123893805309734, | |
| "loss": 0.9646, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8982300884955752, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 0.00022035398230088498, | |
| "loss": 1.0692, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9026548672566371, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.00021946902654867257, | |
| "loss": 0.9034, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9070796460176991, | |
| "grad_norm": 0.04150390625, | |
| "learning_rate": 0.00021858407079646019, | |
| "loss": 1.1094, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.911504424778761, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 0.00021769911504424777, | |
| "loss": 1.1966, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.915929203539823, | |
| "grad_norm": 0.049560546875, | |
| "learning_rate": 0.00021681415929203541, | |
| "loss": 1.1902, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9203539823008849, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 0.000215929203539823, | |
| "loss": 1.1077, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9247787610619469, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.00021504424778761064, | |
| "loss": 0.9293, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9292035398230089, | |
| "grad_norm": 0.040283203125, | |
| "learning_rate": 0.00021415929203539826, | |
| "loss": 1.0238, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9336283185840708, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 0.00021327433628318585, | |
| "loss": 0.9889, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9380530973451328, | |
| "grad_norm": 0.048583984375, | |
| "learning_rate": 0.0002123893805309735, | |
| "loss": 1.0614, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9424778761061947, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 0.00021150442477876107, | |
| "loss": 1.0836, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9469026548672567, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 0.0002106194690265487, | |
| "loss": 1.0815, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9513274336283186, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 0.00020973451327433628, | |
| "loss": 1.0021, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9557522123893806, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 0.00020884955752212392, | |
| "loss": 1.0002, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9601769911504425, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 0.0002079646017699115, | |
| "loss": 1.2081, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9646017699115044, | |
| "grad_norm": 0.0439453125, | |
| "learning_rate": 0.00020707964601769915, | |
| "loss": 1.0711, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9690265486725663, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 0.00020619469026548673, | |
| "loss": 1.0342, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9734513274336283, | |
| "grad_norm": 0.0556640625, | |
| "learning_rate": 0.00020530973451327435, | |
| "loss": 1.0103, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9778761061946902, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 0.00020442477876106194, | |
| "loss": 0.9692, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9823008849557522, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 0.00020353982300884958, | |
| "loss": 0.9639, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9867256637168141, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 0.00020265486725663717, | |
| "loss": 0.9039, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9911504424778761, | |
| "grad_norm": 0.049560546875, | |
| "learning_rate": 0.00020176991150442478, | |
| "loss": 0.9265, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.995575221238938, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 0.00020088495575221237, | |
| "loss": 0.8961, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0299, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.0044247787610618, | |
| "grad_norm": 0.052978515625, | |
| "learning_rate": 0.00019911504424778762, | |
| "loss": 0.8533, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.008849557522124, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 0.00019823008849557524, | |
| "loss": 0.937, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.0132743362831858, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 0.00019734513274336283, | |
| "loss": 0.8202, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0176991150442478, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 0.00019646017699115044, | |
| "loss": 0.8976, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0221238938053097, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 0.00019557522123893806, | |
| "loss": 0.8791, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.0265486725663717, | |
| "grad_norm": 0.050537109375, | |
| "learning_rate": 0.00019469026548672567, | |
| "loss": 1.0753, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0309734513274336, | |
| "grad_norm": 0.05615234375, | |
| "learning_rate": 0.00019380530973451328, | |
| "loss": 1.0464, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0353982300884956, | |
| "grad_norm": 0.059326171875, | |
| "learning_rate": 0.00019292035398230087, | |
| "loss": 0.8115, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0398230088495575, | |
| "grad_norm": 0.058349609375, | |
| "learning_rate": 0.0001920353982300885, | |
| "loss": 0.9851, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0442477876106195, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.00019115044247787613, | |
| "loss": 0.8867, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0486725663716814, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 0.00019026548672566374, | |
| "loss": 0.7882, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0530973451327434, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00018938053097345133, | |
| "loss": 1.0028, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0575221238938053, | |
| "grad_norm": 0.06103515625, | |
| "learning_rate": 0.00018849557522123894, | |
| "loss": 0.9446, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.0619469026548674, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 0.00018761061946902656, | |
| "loss": 1.0249, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0663716814159292, | |
| "grad_norm": 0.053955078125, | |
| "learning_rate": 0.00018672566371681417, | |
| "loss": 0.9277, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.0707964601769913, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001858407079646018, | |
| "loss": 0.8228, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.075221238938053, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 0.00018495575221238938, | |
| "loss": 0.8757, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.079646017699115, | |
| "grad_norm": 0.059326171875, | |
| "learning_rate": 0.000184070796460177, | |
| "loss": 0.7868, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.084070796460177, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 0.0001831858407079646, | |
| "loss": 0.878, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0884955752212389, | |
| "grad_norm": 0.05908203125, | |
| "learning_rate": 0.00018230088495575222, | |
| "loss": 0.8944, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.092920353982301, | |
| "grad_norm": 0.059326171875, | |
| "learning_rate": 0.00018141592920353983, | |
| "loss": 0.8831, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0973451327433628, | |
| "grad_norm": 0.060302734375, | |
| "learning_rate": 0.00018053097345132742, | |
| "loss": 0.9312, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1017699115044248, | |
| "grad_norm": 0.053955078125, | |
| "learning_rate": 0.00017964601769911504, | |
| "loss": 0.7488, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1061946902654867, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00017876106194690265, | |
| "loss": 0.9677, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1106194690265487, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.0001778761061946903, | |
| "loss": 0.8391, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.1150442477876106, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 0.0001769911504424779, | |
| "loss": 0.9225, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.1194690265486726, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001761061946902655, | |
| "loss": 0.7969, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1238938053097345, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.0001752212389380531, | |
| "loss": 0.8957, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.1283185840707965, | |
| "grad_norm": 0.062255859375, | |
| "learning_rate": 0.00017433628318584072, | |
| "loss": 0.9192, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1327433628318584, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 0.00017345132743362834, | |
| "loss": 0.8669, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1371681415929205, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.00017256637168141595, | |
| "loss": 0.9332, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.1415929203539823, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 0.00017168141592920354, | |
| "loss": 0.8392, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1460176991150441, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00017079646017699115, | |
| "loss": 1.1159, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1504424778761062, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 0.00016991150442477877, | |
| "loss": 0.9649, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.154867256637168, | |
| "grad_norm": 0.059326171875, | |
| "learning_rate": 0.00016902654867256638, | |
| "loss": 0.9653, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.1592920353982301, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 0.000168141592920354, | |
| "loss": 0.8342, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.163716814159292, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 0.00016725663716814158, | |
| "loss": 0.7385, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.168141592920354, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.0001663716814159292, | |
| "loss": 0.7605, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.1725663716814159, | |
| "grad_norm": 0.057373046875, | |
| "learning_rate": 0.00016548672566371681, | |
| "loss": 0.8457, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.176991150442478, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.00016460176991150443, | |
| "loss": 0.872, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1814159292035398, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.00016371681415929204, | |
| "loss": 1.0322, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.1858407079646018, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 0.00016283185840707966, | |
| "loss": 1.0532, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1902654867256637, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 0.00016194690265486727, | |
| "loss": 0.9205, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1946902654867257, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 0.0001610619469026549, | |
| "loss": 0.8789, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1991150442477876, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 0.0001601769911504425, | |
| "loss": 1.0501, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.2035398230088497, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 0.0001592920353982301, | |
| "loss": 0.8666, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.2079646017699115, | |
| "grad_norm": 0.0595703125, | |
| "learning_rate": 0.0001584070796460177, | |
| "loss": 0.8761, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.2123893805309733, | |
| "grad_norm": 0.057373046875, | |
| "learning_rate": 0.00015752212389380532, | |
| "loss": 0.8827, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.2168141592920354, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 0.00015663716814159293, | |
| "loss": 0.8162, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.2212389380530975, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00015575221238938055, | |
| "loss": 0.7613, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2256637168141593, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 0.00015486725663716813, | |
| "loss": 0.825, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.2300884955752212, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 0.00015398230088495575, | |
| "loss": 0.9633, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.2345132743362832, | |
| "grad_norm": 0.0595703125, | |
| "learning_rate": 0.00015309734513274336, | |
| "loss": 0.9036, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.238938053097345, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.00015221238938053098, | |
| "loss": 0.9527, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2433628318584071, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 0.0001513274336283186, | |
| "loss": 0.9089, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.247787610619469, | |
| "grad_norm": 0.056884765625, | |
| "learning_rate": 0.00015044247787610618, | |
| "loss": 0.8911, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.252212389380531, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.00014955752212389382, | |
| "loss": 0.7871, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.2566371681415929, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.00014867256637168144, | |
| "loss": 0.8415, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.261061946902655, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 0.00014778761061946905, | |
| "loss": 1.0105, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.2654867256637168, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.00014690265486725664, | |
| "loss": 0.9677, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2699115044247788, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.00014601769911504425, | |
| "loss": 0.837, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.2743362831858407, | |
| "grad_norm": 0.126953125, | |
| "learning_rate": 0.00014513274336283187, | |
| "loss": 0.8605, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2787610619469025, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00014424778761061948, | |
| "loss": 0.8717, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.2831858407079646, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001433628318584071, | |
| "loss": 1.0469, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2876106194690267, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 0.00014247787610619468, | |
| "loss": 0.9339, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.2920353982300885, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 0.0001415929203539823, | |
| "loss": 0.7235, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2964601769911503, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001407079646017699, | |
| "loss": 0.8648, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.3008849557522124, | |
| "grad_norm": 0.062255859375, | |
| "learning_rate": 0.00013982300884955753, | |
| "loss": 0.8842, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.3053097345132743, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.00013893805309734514, | |
| "loss": 0.9593, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.3097345132743363, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.00013805309734513276, | |
| "loss": 0.9122, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.3141592920353982, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.00013716814159292034, | |
| "loss": 1.0082, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.3185840707964602, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 0.00013628318584070796, | |
| "loss": 0.884, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.323008849557522, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001353982300884956, | |
| "loss": 0.8348, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.3274336283185841, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 0.00013451327433628321, | |
| "loss": 0.747, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.331858407079646, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.0001336283185840708, | |
| "loss": 0.8841, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.336283185840708, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 0.00013274336283185842, | |
| "loss": 0.8985, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3407079646017699, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.00013185840707964603, | |
| "loss": 0.9008, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.3451327433628317, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.00013097345132743365, | |
| "loss": 0.8909, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.3495575221238938, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.00013008849557522126, | |
| "loss": 0.8108, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3539823008849559, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.00012920353982300885, | |
| "loss": 0.8546, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3584070796460177, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.00012831858407079646, | |
| "loss": 1.0212, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.3628318584070795, | |
| "grad_norm": 0.06201171875, | |
| "learning_rate": 0.00012743362831858408, | |
| "loss": 0.974, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3672566371681416, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001265486725663717, | |
| "loss": 0.7493, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.3716814159292037, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001256637168141593, | |
| "loss": 1.0118, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3761061946902655, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001247787610619469, | |
| "loss": 0.8243, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.3805309734513274, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 0.0001238938053097345, | |
| "loss": 0.9024, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3849557522123894, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.00012300884955752212, | |
| "loss": 0.9018, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.3893805309734513, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.00012212389380530974, | |
| "loss": 1.1168, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3938053097345133, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.00012123893805309736, | |
| "loss": 0.9847, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.3982300884955752, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 0.00012035398230088497, | |
| "loss": 0.9884, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.4026548672566372, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.00011946902654867258, | |
| "loss": 0.9483, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.407079646017699, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 0.0001185840707964602, | |
| "loss": 0.8768, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.411504424778761, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001176991150442478, | |
| "loss": 0.9072, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.415929203539823, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.00011681415929203541, | |
| "loss": 0.8627, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.420353982300885, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 0.00011592920353982301, | |
| "loss": 0.9518, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.424778761061947, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.00011504424778761063, | |
| "loss": 0.8705, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4292035398230087, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 0.00011415929203539824, | |
| "loss": 0.8535, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.4336283185840708, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 0.00011327433628318584, | |
| "loss": 0.8835, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4380530973451329, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.00011238938053097346, | |
| "loss": 1.1187, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.4424778761061947, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.00011150442477876106, | |
| "loss": 0.6991, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4469026548672566, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.00011061946902654867, | |
| "loss": 0.8172, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4513274336283186, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.00010973451327433629, | |
| "loss": 0.8526, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4557522123893805, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 0.00010884955752212389, | |
| "loss": 0.8048, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.4601769911504425, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 0.0001079646017699115, | |
| "loss": 0.9438, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4646017699115044, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.00010707964601769913, | |
| "loss": 0.9667, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.4690265486725664, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.00010619469026548674, | |
| "loss": 1.0007, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4734513274336283, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.00010530973451327434, | |
| "loss": 0.971, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.4778761061946903, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.00010442477876106196, | |
| "loss": 0.8334, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4823008849557522, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 0.00010353982300884957, | |
| "loss": 0.7885, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.4867256637168142, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.00010265486725663717, | |
| "loss": 0.825, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.491150442477876, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.00010176991150442479, | |
| "loss": 0.9044, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.495575221238938, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.00010088495575221239, | |
| "loss": 0.7607, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.966, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.504424778761062, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 9.911504424778762e-05, | |
| "loss": 0.7745, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.508849557522124, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 9.823008849557522e-05, | |
| "loss": 0.8849, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.5132743362831858, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 9.734513274336283e-05, | |
| "loss": 0.9905, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.5176991150442478, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 9.646017699115044e-05, | |
| "loss": 0.8459, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.5221238938053099, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 9.557522123893806e-05, | |
| "loss": 0.8842, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.5265486725663717, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 9.469026548672566e-05, | |
| "loss": 1.0654, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5309734513274336, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 9.380530973451328e-05, | |
| "loss": 0.8734, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5353982300884956, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 9.29203539823009e-05, | |
| "loss": 0.9752, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.5398230088495575, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 9.20353982300885e-05, | |
| "loss": 0.7664, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5442477876106193, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 9.115044247787611e-05, | |
| "loss": 0.8328, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.5486725663716814, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 9.026548672566371e-05, | |
| "loss": 0.8581, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5530973451327434, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 8.938053097345133e-05, | |
| "loss": 0.7521, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.5575221238938053, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 8.849557522123895e-05, | |
| "loss": 1.1778, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5619469026548671, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 8.761061946902655e-05, | |
| "loss": 0.8007, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.5663716814159292, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 8.672566371681417e-05, | |
| "loss": 1.1795, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5707964601769913, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 8.584070796460177e-05, | |
| "loss": 0.9632, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.575221238938053, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 8.495575221238938e-05, | |
| "loss": 0.7671, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.579646017699115, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 8.4070796460177e-05, | |
| "loss": 0.692, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.584070796460177, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 8.31858407079646e-05, | |
| "loss": 0.6548, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.588495575221239, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 8.230088495575221e-05, | |
| "loss": 0.805, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.592920353982301, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 8.141592920353983e-05, | |
| "loss": 0.7988, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5973451327433628, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 8.053097345132744e-05, | |
| "loss": 0.9695, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.6017699115044248, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 7.964601769911504e-05, | |
| "loss": 1.0397, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.606194690265487, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 7.876106194690266e-05, | |
| "loss": 0.9098, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.6106194690265485, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 7.787610619469027e-05, | |
| "loss": 0.9249, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.6150442477876106, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 7.699115044247787e-05, | |
| "loss": 0.7443, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.6194690265486726, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 7.610619469026549e-05, | |
| "loss": 0.8042, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.6238938053097345, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 7.522123893805309e-05, | |
| "loss": 0.8271, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6283185840707963, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 7.433628318584072e-05, | |
| "loss": 0.9711, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.6327433628318584, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 7.345132743362832e-05, | |
| "loss": 0.8821, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.6371681415929205, | |
| "grad_norm": 0.0556640625, | |
| "learning_rate": 7.256637168141593e-05, | |
| "loss": 0.7417, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6415929203539823, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 7.168141592920355e-05, | |
| "loss": 0.9247, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.6460176991150441, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 7.079646017699115e-05, | |
| "loss": 0.9101, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6504424778761062, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 6.991150442477876e-05, | |
| "loss": 1.0123, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.6548672566371683, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.902654867256638e-05, | |
| "loss": 0.7791, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6592920353982301, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 6.814159292035398e-05, | |
| "loss": 1.0589, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.663716814159292, | |
| "grad_norm": 0.058349609375, | |
| "learning_rate": 6.725663716814161e-05, | |
| "loss": 0.8401, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.668141592920354, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 6.637168141592921e-05, | |
| "loss": 0.8201, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.672566371681416, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 6.548672566371682e-05, | |
| "loss": 0.913, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.676991150442478, | |
| "grad_norm": 0.060302734375, | |
| "learning_rate": 6.460176991150442e-05, | |
| "loss": 0.8276, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.6814159292035398, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 6.371681415929204e-05, | |
| "loss": 0.7729, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6858407079646018, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 6.283185840707965e-05, | |
| "loss": 1.0113, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.6902654867256637, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 6.194690265486725e-05, | |
| "loss": 0.8446, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6946902654867255, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 6.106194690265487e-05, | |
| "loss": 0.8878, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.6991150442477876, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 6.017699115044248e-05, | |
| "loss": 0.6718, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.7035398230088497, | |
| "grad_norm": 0.060302734375, | |
| "learning_rate": 5.92920353982301e-05, | |
| "loss": 0.8153, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.7079646017699115, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 5.8407079646017705e-05, | |
| "loss": 0.9931, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.7123893805309733, | |
| "grad_norm": 0.0556640625, | |
| "learning_rate": 5.752212389380531e-05, | |
| "loss": 0.7466, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.7168141592920354, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 5.663716814159292e-05, | |
| "loss": 0.9364, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.7212389380530975, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 5.575221238938053e-05, | |
| "loss": 0.8851, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.7256637168141593, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 5.486725663716814e-05, | |
| "loss": 0.8714, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7300884955752212, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 5.398230088495575e-05, | |
| "loss": 0.8885, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.7345132743362832, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 5.309734513274337e-05, | |
| "loss": 0.8724, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.7389380530973453, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 5.221238938053098e-05, | |
| "loss": 1.1328, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.7433628318584071, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 5.132743362831859e-05, | |
| "loss": 0.7735, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.747787610619469, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 5.0442477876106195e-05, | |
| "loss": 0.9325, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.752212389380531, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 4.955752212389381e-05, | |
| "loss": 0.9273, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7566371681415929, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 4.867256637168142e-05, | |
| "loss": 0.7756, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.7610619469026547, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.778761061946903e-05, | |
| "loss": 1.0591, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7654867256637168, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 4.690265486725664e-05, | |
| "loss": 0.7867, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.7699115044247788, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 4.601769911504425e-05, | |
| "loss": 0.8369, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7743362831858407, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 4.5132743362831855e-05, | |
| "loss": 0.9999, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.7787610619469025, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 4.4247787610619477e-05, | |
| "loss": 0.8612, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7831858407079646, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 4.3362831858407084e-05, | |
| "loss": 0.8529, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.7876106194690267, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 4.247787610619469e-05, | |
| "loss": 0.8809, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.7920353982300885, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 4.15929203539823e-05, | |
| "loss": 0.9739, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7964601769911503, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 4.0707964601769914e-05, | |
| "loss": 0.9416, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.8008849557522124, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 3.982300884955752e-05, | |
| "loss": 0.8359, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.8053097345132745, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 3.893805309734514e-05, | |
| "loss": 0.9323, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.8097345132743363, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 3.8053097345132744e-05, | |
| "loss": 0.8084, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.8141592920353982, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 3.716814159292036e-05, | |
| "loss": 0.9237, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8185840707964602, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 3.628318584070797e-05, | |
| "loss": 1.0047, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.823008849557522, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 3.5398230088495574e-05, | |
| "loss": 0.9763, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.827433628318584, | |
| "grad_norm": 0.06201171875, | |
| "learning_rate": 3.451327433628319e-05, | |
| "loss": 0.7498, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.831858407079646, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 3.3628318584070804e-05, | |
| "loss": 0.8973, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.836283185840708, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 3.274336283185841e-05, | |
| "loss": 0.9526, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.8407079646017699, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 3.185840707964602e-05, | |
| "loss": 0.9184, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8451327433628317, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 3.097345132743363e-05, | |
| "loss": 0.9124, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.8495575221238938, | |
| "grad_norm": 0.056884765625, | |
| "learning_rate": 3.008849557522124e-05, | |
| "loss": 0.8303, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.8539823008849559, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 2.9203539823008852e-05, | |
| "loss": 0.9533, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.8584070796460177, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 2.831858407079646e-05, | |
| "loss": 0.8822, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8628318584070795, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 2.743362831858407e-05, | |
| "loss": 0.911, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.8672566371681416, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 2.6548672566371686e-05, | |
| "loss": 0.8209, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8716814159292037, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 2.5663716814159294e-05, | |
| "loss": 0.8294, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.8761061946902655, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 2.4778761061946905e-05, | |
| "loss": 0.7602, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8805309734513274, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 2.3893805309734516e-05, | |
| "loss": 0.8862, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.8849557522123894, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 2.3008849557522124e-05, | |
| "loss": 0.8715, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8893805309734515, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 2.2123893805309738e-05, | |
| "loss": 0.9235, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.893805309734513, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 2.1238938053097346e-05, | |
| "loss": 0.8975, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.8982300884955752, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 2.0353982300884957e-05, | |
| "loss": 1.0014, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.9026548672566372, | |
| "grad_norm": 0.05712890625, | |
| "learning_rate": 1.946902654867257e-05, | |
| "loss": 0.8397, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.907079646017699, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 1.858407079646018e-05, | |
| "loss": 1.0832, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.911504424778761, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 1.7699115044247787e-05, | |
| "loss": 0.7726, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.915929203539823, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 1.6814159292035402e-05, | |
| "loss": 0.936, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.920353982300885, | |
| "grad_norm": 0.062255859375, | |
| "learning_rate": 1.592920353982301e-05, | |
| "loss": 1.0048, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.924778761061947, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 1.504424778761062e-05, | |
| "loss": 0.864, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.9292035398230087, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 1.415929203539823e-05, | |
| "loss": 0.9952, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.9336283185840708, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 1.3274336283185843e-05, | |
| "loss": 0.8628, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.9380530973451329, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 1.2389380530973452e-05, | |
| "loss": 0.8487, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.9424778761061947, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 1.1504424778761062e-05, | |
| "loss": 0.8495, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.9469026548672566, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 1.0619469026548673e-05, | |
| "loss": 0.8815, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9513274336283186, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 9.734513274336284e-06, | |
| "loss": 0.8667, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.9557522123893807, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 8.849557522123894e-06, | |
| "loss": 0.7515, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.9601769911504425, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 7.964601769911505e-06, | |
| "loss": 0.8048, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.9646017699115044, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 7.079646017699115e-06, | |
| "loss": 0.9373, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.9690265486725664, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 6.194690265486726e-06, | |
| "loss": 0.7985, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9734513274336283, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 5.3097345132743365e-06, | |
| "loss": 0.9149, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9778761061946901, | |
| "grad_norm": 0.05615234375, | |
| "learning_rate": 4.424778761061947e-06, | |
| "loss": 0.8296, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.9823008849557522, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 3.5398230088495575e-06, | |
| "loss": 0.8539, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9867256637168142, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 2.6548672566371683e-06, | |
| "loss": 0.8847, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.991150442477876, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 1.7699115044247788e-06, | |
| "loss": 0.8814, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.995575221238938, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 8.849557522123894e-07, | |
| "loss": 0.8299, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.12451171875, | |
| "learning_rate": 0.0, | |
| "loss": 0.8232, | |
| "step": 452 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 452, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4086515032577802e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |