| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.971509971509972, | |
| "eval_steps": 100, | |
| "global_step": 10500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.3346759080886841, | |
| "eval_runtime": 218.6538, | |
| "eval_samples_per_second": 103.456, | |
| "eval_steps_per_second": 6.467, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.31195777654647827, | |
| "eval_runtime": 218.3816, | |
| "eval_samples_per_second": 103.585, | |
| "eval_steps_per_second": 6.475, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.31596091389656067, | |
| "eval_runtime": 220.5173, | |
| "eval_samples_per_second": 102.582, | |
| "eval_steps_per_second": 6.412, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.3161667287349701, | |
| "eval_runtime": 218.5361, | |
| "eval_samples_per_second": 103.512, | |
| "eval_steps_per_second": 6.47, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.7625830959164296e-05, | |
| "loss": 0.1792, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 0.3097754120826721, | |
| "eval_runtime": 219.0553, | |
| "eval_samples_per_second": 103.266, | |
| "eval_steps_per_second": 6.455, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 0.30482736229896545, | |
| "eval_runtime": 218.842, | |
| "eval_samples_per_second": 103.367, | |
| "eval_steps_per_second": 6.461, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 0.3050496578216553, | |
| "eval_runtime": 219.6074, | |
| "eval_samples_per_second": 103.007, | |
| "eval_steps_per_second": 6.439, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 0.30001184344291687, | |
| "eval_runtime": 219.2072, | |
| "eval_samples_per_second": 103.195, | |
| "eval_steps_per_second": 6.451, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 0.3052184283733368, | |
| "eval_runtime": 219.1722, | |
| "eval_samples_per_second": 103.211, | |
| "eval_steps_per_second": 6.452, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.5251661918328584e-05, | |
| "loss": 0.1198, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 0.3005639314651489, | |
| "eval_runtime": 218.7588, | |
| "eval_samples_per_second": 103.406, | |
| "eval_steps_per_second": 6.464, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 0.29650744795799255, | |
| "eval_runtime": 218.5819, | |
| "eval_samples_per_second": 103.49, | |
| "eval_steps_per_second": 6.469, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.2948579788208008, | |
| "eval_runtime": 219.004, | |
| "eval_samples_per_second": 103.29, | |
| "eval_steps_per_second": 6.457, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.28149962425231934, | |
| "eval_runtime": 218.3331, | |
| "eval_samples_per_second": 103.608, | |
| "eval_steps_per_second": 6.476, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.28821900486946106, | |
| "eval_runtime": 218.9147, | |
| "eval_samples_per_second": 103.333, | |
| "eval_steps_per_second": 6.459, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.287749287749288e-05, | |
| "loss": 0.1092, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.28900569677352905, | |
| "eval_runtime": 218.7775, | |
| "eval_samples_per_second": 103.397, | |
| "eval_steps_per_second": 6.463, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.2834137976169586, | |
| "eval_runtime": 218.2122, | |
| "eval_samples_per_second": 103.665, | |
| "eval_steps_per_second": 6.48, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.2864611744880676, | |
| "eval_runtime": 220.2143, | |
| "eval_samples_per_second": 102.723, | |
| "eval_steps_per_second": 6.421, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.28005194664001465, | |
| "eval_runtime": 218.8438, | |
| "eval_samples_per_second": 103.366, | |
| "eval_steps_per_second": 6.461, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.28052985668182373, | |
| "eval_runtime": 220.1784, | |
| "eval_samples_per_second": 102.739, | |
| "eval_steps_per_second": 6.422, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.050332383665717e-05, | |
| "loss": 0.099, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.2817462384700775, | |
| "eval_runtime": 219.1612, | |
| "eval_samples_per_second": 103.216, | |
| "eval_steps_per_second": 6.452, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 0.2855830788612366, | |
| "eval_runtime": 218.4225, | |
| "eval_samples_per_second": 103.565, | |
| "eval_steps_per_second": 6.474, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.2786203622817993, | |
| "eval_runtime": 220.2549, | |
| "eval_samples_per_second": 102.704, | |
| "eval_steps_per_second": 6.42, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 0.282156765460968, | |
| "eval_runtime": 218.8776, | |
| "eval_samples_per_second": 103.35, | |
| "eval_steps_per_second": 6.46, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_loss": 0.2802504599094391, | |
| "eval_runtime": 218.6184, | |
| "eval_samples_per_second": 103.473, | |
| "eval_steps_per_second": 6.468, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.8129154795821466e-05, | |
| "loss": 0.094, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 0.28312984108924866, | |
| "eval_runtime": 219.677, | |
| "eval_samples_per_second": 102.974, | |
| "eval_steps_per_second": 6.437, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 0.2841149866580963, | |
| "eval_runtime": 219.3064, | |
| "eval_samples_per_second": 103.148, | |
| "eval_steps_per_second": 6.448, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 0.2737499177455902, | |
| "eval_runtime": 218.5172, | |
| "eval_samples_per_second": 103.52, | |
| "eval_steps_per_second": 6.471, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.27663424611091614, | |
| "eval_runtime": 219.4686, | |
| "eval_samples_per_second": 103.072, | |
| "eval_steps_per_second": 6.443, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 0.27256855368614197, | |
| "eval_runtime": 218.7767, | |
| "eval_samples_per_second": 103.398, | |
| "eval_steps_per_second": 6.463, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.575498575498576e-05, | |
| "loss": 0.0891, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 0.27302590012550354, | |
| "eval_runtime": 218.602, | |
| "eval_samples_per_second": 103.48, | |
| "eval_steps_per_second": 6.468, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.27365198731422424, | |
| "eval_runtime": 219.6229, | |
| "eval_samples_per_second": 102.999, | |
| "eval_steps_per_second": 6.438, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_loss": 0.26861417293548584, | |
| "eval_runtime": 219.0414, | |
| "eval_samples_per_second": 103.273, | |
| "eval_steps_per_second": 6.455, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_loss": 0.2664526700973511, | |
| "eval_runtime": 218.6581, | |
| "eval_samples_per_second": 103.454, | |
| "eval_steps_per_second": 6.467, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_loss": 0.27480828762054443, | |
| "eval_runtime": 220.5641, | |
| "eval_samples_per_second": 102.56, | |
| "eval_steps_per_second": 6.411, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.338081671415005e-05, | |
| "loss": 0.0862, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "eval_loss": 0.26794129610061646, | |
| "eval_runtime": 218.5638, | |
| "eval_samples_per_second": 103.498, | |
| "eval_steps_per_second": 6.47, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "eval_loss": 0.2703064978122711, | |
| "eval_runtime": 218.4977, | |
| "eval_samples_per_second": 103.53, | |
| "eval_steps_per_second": 6.471, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "eval_loss": 0.2635132670402527, | |
| "eval_runtime": 219.2033, | |
| "eval_samples_per_second": 103.196, | |
| "eval_steps_per_second": 6.451, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_loss": 0.27066901326179504, | |
| "eval_runtime": 219.2382, | |
| "eval_samples_per_second": 103.18, | |
| "eval_steps_per_second": 6.45, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_loss": 0.26447921991348267, | |
| "eval_runtime": 219.2131, | |
| "eval_samples_per_second": 103.192, | |
| "eval_steps_per_second": 6.45, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.100664767331434e-05, | |
| "loss": 0.0838, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_loss": 0.2692434787750244, | |
| "eval_runtime": 219.4395, | |
| "eval_samples_per_second": 103.085, | |
| "eval_steps_per_second": 6.444, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_loss": 0.2642222046852112, | |
| "eval_runtime": 219.413, | |
| "eval_samples_per_second": 103.098, | |
| "eval_steps_per_second": 6.444, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_loss": 0.2643529176712036, | |
| "eval_runtime": 219.2041, | |
| "eval_samples_per_second": 103.196, | |
| "eval_steps_per_second": 6.451, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_loss": 0.25718453526496887, | |
| "eval_runtime": 219.1679, | |
| "eval_samples_per_second": 103.213, | |
| "eval_steps_per_second": 6.452, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "eval_loss": 0.26762890815734863, | |
| "eval_runtime": 218.6308, | |
| "eval_samples_per_second": 103.467, | |
| "eval_steps_per_second": 6.468, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.863247863247863e-05, | |
| "loss": 0.0761, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "eval_loss": 0.26568803191185, | |
| "eval_runtime": 220.7096, | |
| "eval_samples_per_second": 102.492, | |
| "eval_steps_per_second": 6.407, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "eval_loss": 0.26290062069892883, | |
| "eval_runtime": 219.3895, | |
| "eval_samples_per_second": 103.109, | |
| "eval_steps_per_second": 6.445, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "eval_loss": 0.26172617077827454, | |
| "eval_runtime": 219.4776, | |
| "eval_samples_per_second": 103.067, | |
| "eval_steps_per_second": 6.443, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_loss": 0.26161935925483704, | |
| "eval_runtime": 219.3677, | |
| "eval_samples_per_second": 103.119, | |
| "eval_steps_per_second": 6.446, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "eval_loss": 0.2620932459831238, | |
| "eval_runtime": 218.6157, | |
| "eval_samples_per_second": 103.474, | |
| "eval_steps_per_second": 6.468, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.6258309591642926e-05, | |
| "loss": 0.0741, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "eval_loss": 0.26253631711006165, | |
| "eval_runtime": 218.59, | |
| "eval_samples_per_second": 103.486, | |
| "eval_steps_per_second": 6.469, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_loss": 0.25699007511138916, | |
| "eval_runtime": 219.4178, | |
| "eval_samples_per_second": 103.096, | |
| "eval_steps_per_second": 6.444, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "eval_loss": 0.2583966851234436, | |
| "eval_runtime": 218.5589, | |
| "eval_samples_per_second": 103.501, | |
| "eval_steps_per_second": 6.47, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "eval_loss": 0.25885534286499023, | |
| "eval_runtime": 219.0916, | |
| "eval_samples_per_second": 103.249, | |
| "eval_steps_per_second": 6.454, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_loss": 0.25685915350914, | |
| "eval_runtime": 219.2049, | |
| "eval_samples_per_second": 103.196, | |
| "eval_steps_per_second": 6.451, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.388414055080722e-05, | |
| "loss": 0.0769, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "eval_loss": 0.2619025707244873, | |
| "eval_runtime": 219.3928, | |
| "eval_samples_per_second": 103.107, | |
| "eval_steps_per_second": 6.445, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "eval_loss": 0.25433388352394104, | |
| "eval_runtime": 219.2766, | |
| "eval_samples_per_second": 103.162, | |
| "eval_steps_per_second": 6.448, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "eval_loss": 0.25473591685295105, | |
| "eval_runtime": 219.1683, | |
| "eval_samples_per_second": 103.213, | |
| "eval_steps_per_second": 6.452, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "eval_loss": 0.2583990693092346, | |
| "eval_runtime": 218.8181, | |
| "eval_samples_per_second": 103.378, | |
| "eval_steps_per_second": 6.462, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_loss": 0.2513364255428314, | |
| "eval_runtime": 219.6254, | |
| "eval_samples_per_second": 102.998, | |
| "eval_steps_per_second": 6.438, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.150997150997151e-05, | |
| "loss": 0.0701, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "eval_loss": 0.25798743963241577, | |
| "eval_runtime": 218.5917, | |
| "eval_samples_per_second": 103.485, | |
| "eval_steps_per_second": 6.469, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "eval_loss": 0.252897173166275, | |
| "eval_runtime": 218.8327, | |
| "eval_samples_per_second": 103.371, | |
| "eval_steps_per_second": 6.462, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "eval_loss": 0.25619062781333923, | |
| "eval_runtime": 218.7633, | |
| "eval_samples_per_second": 103.404, | |
| "eval_steps_per_second": 6.464, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_loss": 0.25197675824165344, | |
| "eval_runtime": 218.5961, | |
| "eval_samples_per_second": 103.483, | |
| "eval_steps_per_second": 6.469, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "eval_loss": 0.2581734359264374, | |
| "eval_runtime": 219.6175, | |
| "eval_samples_per_second": 103.002, | |
| "eval_steps_per_second": 6.438, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.91358024691358e-05, | |
| "loss": 0.0684, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "eval_loss": 0.2550990581512451, | |
| "eval_runtime": 218.6851, | |
| "eval_samples_per_second": 103.441, | |
| "eval_steps_per_second": 6.466, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_loss": 0.2555626332759857, | |
| "eval_runtime": 219.3776, | |
| "eval_samples_per_second": 103.114, | |
| "eval_steps_per_second": 6.446, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "eval_loss": 0.25543132424354553, | |
| "eval_runtime": 218.6367, | |
| "eval_samples_per_second": 103.464, | |
| "eval_steps_per_second": 6.467, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "eval_loss": 0.25568485260009766, | |
| "eval_runtime": 219.9462, | |
| "eval_samples_per_second": 102.848, | |
| "eval_steps_per_second": 6.429, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_loss": 0.25582244992256165, | |
| "eval_runtime": 218.6889, | |
| "eval_samples_per_second": 103.439, | |
| "eval_steps_per_second": 6.466, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6761633428300098e-05, | |
| "loss": 0.0662, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "eval_loss": 0.25293371081352234, | |
| "eval_runtime": 219.8419, | |
| "eval_samples_per_second": 102.897, | |
| "eval_steps_per_second": 6.432, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "eval_loss": 0.249311164021492, | |
| "eval_runtime": 219.1307, | |
| "eval_samples_per_second": 103.231, | |
| "eval_steps_per_second": 6.453, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "eval_loss": 0.25428083539009094, | |
| "eval_runtime": 218.6179, | |
| "eval_samples_per_second": 103.473, | |
| "eval_steps_per_second": 6.468, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_loss": 0.25905051827430725, | |
| "eval_runtime": 219.7362, | |
| "eval_samples_per_second": 102.946, | |
| "eval_steps_per_second": 6.435, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "eval_loss": 0.25161299109458923, | |
| "eval_runtime": 218.5372, | |
| "eval_samples_per_second": 103.511, | |
| "eval_steps_per_second": 6.47, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.4387464387464389e-05, | |
| "loss": 0.0659, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "eval_loss": 0.2567010223865509, | |
| "eval_runtime": 219.8696, | |
| "eval_samples_per_second": 102.884, | |
| "eval_steps_per_second": 6.431, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "eval_loss": 0.2568127512931824, | |
| "eval_runtime": 218.4916, | |
| "eval_samples_per_second": 103.533, | |
| "eval_steps_per_second": 6.472, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "eval_loss": 0.24921847879886627, | |
| "eval_runtime": 219.9605, | |
| "eval_samples_per_second": 102.841, | |
| "eval_steps_per_second": 6.428, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "eval_loss": 0.24751408398151398, | |
| "eval_runtime": 218.5269, | |
| "eval_samples_per_second": 103.516, | |
| "eval_steps_per_second": 6.471, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_loss": 0.24565500020980835, | |
| "eval_runtime": 219.112, | |
| "eval_samples_per_second": 103.239, | |
| "eval_steps_per_second": 6.453, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 1.2013295346628681e-05, | |
| "loss": 0.0641, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_loss": 0.2555699646472931, | |
| "eval_runtime": 218.4879, | |
| "eval_samples_per_second": 103.534, | |
| "eval_steps_per_second": 6.472, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_loss": 0.25907498598098755, | |
| "eval_runtime": 218.8796, | |
| "eval_samples_per_second": 103.349, | |
| "eval_steps_per_second": 6.46, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "eval_loss": 0.2498636245727539, | |
| "eval_runtime": 218.2333, | |
| "eval_samples_per_second": 103.655, | |
| "eval_steps_per_second": 6.479, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "eval_loss": 0.24620996415615082, | |
| "eval_runtime": 218.4049, | |
| "eval_samples_per_second": 103.574, | |
| "eval_steps_per_second": 6.474, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "eval_loss": 0.2514709234237671, | |
| "eval_runtime": 218.5279, | |
| "eval_samples_per_second": 103.515, | |
| "eval_steps_per_second": 6.471, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 9.639126305792973e-06, | |
| "loss": 0.0614, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "eval_loss": 0.24893517792224884, | |
| "eval_runtime": 218.4312, | |
| "eval_samples_per_second": 103.561, | |
| "eval_steps_per_second": 6.473, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "eval_loss": 0.24635082483291626, | |
| "eval_runtime": 218.6536, | |
| "eval_samples_per_second": 103.456, | |
| "eval_steps_per_second": 6.467, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "eval_loss": 0.24810662865638733, | |
| "eval_runtime": 218.4312, | |
| "eval_samples_per_second": 103.561, | |
| "eval_steps_per_second": 6.473, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "eval_loss": 0.24930644035339355, | |
| "eval_runtime": 218.7482, | |
| "eval_samples_per_second": 103.411, | |
| "eval_steps_per_second": 6.464, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "eval_loss": 0.24013535678386688, | |
| "eval_runtime": 218.398, | |
| "eval_samples_per_second": 103.577, | |
| "eval_steps_per_second": 6.474, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 7.264957264957266e-06, | |
| "loss": 0.0609, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "eval_loss": 0.2461770623922348, | |
| "eval_runtime": 218.9012, | |
| "eval_samples_per_second": 103.339, | |
| "eval_steps_per_second": 6.46, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "eval_loss": 0.24963241815567017, | |
| "eval_runtime": 218.2697, | |
| "eval_samples_per_second": 103.638, | |
| "eval_steps_per_second": 6.478, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "eval_loss": 0.24689340591430664, | |
| "eval_runtime": 218.5291, | |
| "eval_samples_per_second": 103.515, | |
| "eval_steps_per_second": 6.471, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "eval_loss": 0.2498210072517395, | |
| "eval_runtime": 218.2515, | |
| "eval_samples_per_second": 103.646, | |
| "eval_steps_per_second": 6.479, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "eval_loss": 0.24612723290920258, | |
| "eval_runtime": 218.3684, | |
| "eval_samples_per_second": 103.591, | |
| "eval_steps_per_second": 6.475, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.890788224121558e-06, | |
| "loss": 0.0588, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "eval_loss": 0.247446671128273, | |
| "eval_runtime": 218.2559, | |
| "eval_samples_per_second": 103.644, | |
| "eval_steps_per_second": 6.479, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "eval_loss": 0.24682562053203583, | |
| "eval_runtime": 218.332, | |
| "eval_samples_per_second": 103.608, | |
| "eval_steps_per_second": 6.476, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "eval_loss": 0.2363210916519165, | |
| "eval_runtime": 218.2595, | |
| "eval_samples_per_second": 103.643, | |
| "eval_steps_per_second": 6.479, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "eval_loss": 0.24492982029914856, | |
| "eval_runtime": 218.2962, | |
| "eval_samples_per_second": 103.625, | |
| "eval_steps_per_second": 6.477, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "eval_loss": 0.247760608792305, | |
| "eval_runtime": 218.2063, | |
| "eval_samples_per_second": 103.668, | |
| "eval_steps_per_second": 6.48, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 2.51661918328585e-06, | |
| "loss": 0.0604, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_loss": 0.24903397262096405, | |
| "eval_runtime": 218.2843, | |
| "eval_samples_per_second": 103.631, | |
| "eval_steps_per_second": 6.478, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "eval_loss": 0.25066474080085754, | |
| "eval_runtime": 218.2601, | |
| "eval_samples_per_second": 103.642, | |
| "eval_steps_per_second": 6.479, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "eval_loss": 0.24707233905792236, | |
| "eval_runtime": 218.2446, | |
| "eval_samples_per_second": 103.65, | |
| "eval_steps_per_second": 6.479, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "eval_loss": 0.24911357462406158, | |
| "eval_runtime": 218.2948, | |
| "eval_samples_per_second": 103.626, | |
| "eval_steps_per_second": 6.477, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "eval_loss": 0.24460090696811676, | |
| "eval_runtime": 218.238, | |
| "eval_samples_per_second": 103.653, | |
| "eval_steps_per_second": 6.479, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.4245014245014247e-07, | |
| "loss": 0.0573, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_loss": 0.24572330713272095, | |
| "eval_runtime": 218.3788, | |
| "eval_samples_per_second": 103.586, | |
| "eval_steps_per_second": 6.475, | |
| "step": 10500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 10530, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "total_flos": 1.7676748920639283e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |