| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 3510, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "eval_f1": 0.37622621795062866, | |
| "eval_loss": 1.36316978931427, | |
| "eval_runtime": 3.2856, | |
| "eval_samples_per_second": 29.827, | |
| "eval_steps_per_second": 29.827, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_f1": 0.41617398891170265, | |
| "eval_loss": 1.227824330329895, | |
| "eval_runtime": 3.2714, | |
| "eval_samples_per_second": 29.957, | |
| "eval_steps_per_second": 29.957, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_f1": 0.41594055599199414, | |
| "eval_loss": 1.1802130937576294, | |
| "eval_runtime": 3.2851, | |
| "eval_samples_per_second": 29.832, | |
| "eval_steps_per_second": 29.832, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_f1": 0.4879226887989845, | |
| "eval_loss": 1.3237018585205078, | |
| "eval_runtime": 3.2714, | |
| "eval_samples_per_second": 29.957, | |
| "eval_steps_per_second": 29.957, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.572649572649573e-05, | |
| "loss": 1.2, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_f1": 0.46450380175056494, | |
| "eval_loss": 1.2971174716949463, | |
| "eval_runtime": 3.2843, | |
| "eval_samples_per_second": 29.839, | |
| "eval_steps_per_second": 29.839, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_f1": 0.5019868520647613, | |
| "eval_loss": 1.2549620866775513, | |
| "eval_runtime": 3.267, | |
| "eval_samples_per_second": 29.997, | |
| "eval_steps_per_second": 29.997, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.48057967334012397, | |
| "eval_loss": 1.1853649616241455, | |
| "eval_runtime": 3.277, | |
| "eval_samples_per_second": 29.905, | |
| "eval_steps_per_second": 29.905, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_f1": 0.5011814210846155, | |
| "eval_loss": 1.1788480281829834, | |
| "eval_runtime": 3.2639, | |
| "eval_samples_per_second": 30.025, | |
| "eval_steps_per_second": 30.025, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_f1": 0.4964300899620197, | |
| "eval_loss": 1.093542218208313, | |
| "eval_runtime": 3.2724, | |
| "eval_samples_per_second": 29.947, | |
| "eval_steps_per_second": 29.947, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.1452991452991456e-05, | |
| "loss": 0.9189, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_f1": 0.4986272191320895, | |
| "eval_loss": 1.2862237691879272, | |
| "eval_runtime": 3.302, | |
| "eval_samples_per_second": 29.679, | |
| "eval_steps_per_second": 29.679, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_f1": 0.49297809308258944, | |
| "eval_loss": 1.2222929000854492, | |
| "eval_runtime": 3.3171, | |
| "eval_samples_per_second": 29.544, | |
| "eval_steps_per_second": 29.544, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_f1": 0.4953797333525823, | |
| "eval_loss": 1.1196690797805786, | |
| "eval_runtime": 3.2943, | |
| "eval_samples_per_second": 29.749, | |
| "eval_steps_per_second": 29.749, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_f1": 0.5153008157478032, | |
| "eval_loss": 1.1256704330444336, | |
| "eval_runtime": 3.2631, | |
| "eval_samples_per_second": 30.033, | |
| "eval_steps_per_second": 30.033, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_f1": 0.5263780363862973, | |
| "eval_loss": 1.1729286909103394, | |
| "eval_runtime": 3.2904, | |
| "eval_samples_per_second": 29.783, | |
| "eval_steps_per_second": 29.783, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.7179487179487178e-05, | |
| "loss": 0.8143, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_f1": 0.5165321012151871, | |
| "eval_loss": 1.272233486175537, | |
| "eval_runtime": 3.3087, | |
| "eval_samples_per_second": 29.619, | |
| "eval_steps_per_second": 29.619, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_f1": 0.539472065505205, | |
| "eval_loss": 1.3217926025390625, | |
| "eval_runtime": 3.2634, | |
| "eval_samples_per_second": 30.03, | |
| "eval_steps_per_second": 30.03, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_f1": 0.5170136038987323, | |
| "eval_loss": 1.338261604309082, | |
| "eval_runtime": 3.2635, | |
| "eval_samples_per_second": 30.029, | |
| "eval_steps_per_second": 30.029, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_f1": 0.5138801729725696, | |
| "eval_loss": 1.250339388847351, | |
| "eval_runtime": 3.2656, | |
| "eval_samples_per_second": 30.009, | |
| "eval_steps_per_second": 30.009, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_f1": 0.523963853035474, | |
| "eval_loss": 1.362999439239502, | |
| "eval_runtime": 3.3211, | |
| "eval_samples_per_second": 29.508, | |
| "eval_steps_per_second": 29.508, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.2905982905982905e-05, | |
| "loss": 0.6175, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_f1": 0.5305458058252502, | |
| "eval_loss": 1.402750015258789, | |
| "eval_runtime": 3.2768, | |
| "eval_samples_per_second": 29.907, | |
| "eval_steps_per_second": 29.907, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_f1": 0.5408209021870833, | |
| "eval_loss": 1.4016790390014648, | |
| "eval_runtime": 3.3122, | |
| "eval_samples_per_second": 29.588, | |
| "eval_steps_per_second": 29.588, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_f1": 0.541281162975512, | |
| "eval_loss": 1.5929616689682007, | |
| "eval_runtime": 3.294, | |
| "eval_samples_per_second": 29.751, | |
| "eval_steps_per_second": 29.751, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_f1": 0.5564758214624422, | |
| "eval_loss": 1.5372625589370728, | |
| "eval_runtime": 3.2882, | |
| "eval_samples_per_second": 29.803, | |
| "eval_steps_per_second": 29.803, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "eval_f1": 0.5722151004353093, | |
| "eval_loss": 1.5012538433074951, | |
| "eval_runtime": 3.3067, | |
| "eval_samples_per_second": 29.637, | |
| "eval_steps_per_second": 29.637, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 8.632478632478633e-06, | |
| "loss": 0.4726, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "eval_f1": 0.5226487560978434, | |
| "eval_loss": 1.570418119430542, | |
| "eval_runtime": 3.3114, | |
| "eval_samples_per_second": 29.595, | |
| "eval_steps_per_second": 29.595, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_f1": 0.5483719296880323, | |
| "eval_loss": 1.5890936851501465, | |
| "eval_runtime": 3.2745, | |
| "eval_samples_per_second": 29.928, | |
| "eval_steps_per_second": 29.928, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "eval_f1": 0.5630120856995185, | |
| "eval_loss": 1.5236029624938965, | |
| "eval_runtime": 3.2951, | |
| "eval_samples_per_second": 29.741, | |
| "eval_steps_per_second": 29.741, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_f1": 0.5422100713682105, | |
| "eval_loss": 1.52333664894104, | |
| "eval_runtime": 3.3261, | |
| "eval_samples_per_second": 29.464, | |
| "eval_steps_per_second": 29.464, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "eval_f1": 0.5469719933620487, | |
| "eval_loss": 1.6104604005813599, | |
| "eval_runtime": 3.2888, | |
| "eval_samples_per_second": 29.798, | |
| "eval_steps_per_second": 29.798, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 4.358974358974359e-06, | |
| "loss": 0.3745, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "eval_f1": 0.5525357490677262, | |
| "eval_loss": 1.7136110067367554, | |
| "eval_runtime": 3.3248, | |
| "eval_samples_per_second": 29.476, | |
| "eval_steps_per_second": 29.476, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "eval_f1": 0.5539436259955471, | |
| "eval_loss": 1.6561492681503296, | |
| "eval_runtime": 3.2857, | |
| "eval_samples_per_second": 29.826, | |
| "eval_steps_per_second": 29.826, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_f1": 0.5504413375623162, | |
| "eval_loss": 1.7664132118225098, | |
| "eval_runtime": 3.2517, | |
| "eval_samples_per_second": 30.138, | |
| "eval_steps_per_second": 30.138, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "eval_f1": 0.5494419672200014, | |
| "eval_loss": 1.750455379486084, | |
| "eval_runtime": 3.27, | |
| "eval_samples_per_second": 29.969, | |
| "eval_steps_per_second": 29.969, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_f1": 0.5516497223039627, | |
| "eval_loss": 1.7312653064727783, | |
| "eval_runtime": 3.3127, | |
| "eval_samples_per_second": 29.583, | |
| "eval_steps_per_second": 29.583, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 8.547008547008547e-08, | |
| "loss": 0.307, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_f1": 0.5515045914952008, | |
| "eval_loss": 1.7193822860717773, | |
| "eval_runtime": 3.2769, | |
| "eval_samples_per_second": 29.907, | |
| "eval_steps_per_second": 29.907, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 3510, | |
| "total_flos": 2890172619430200.0, | |
| "train_loss": 0.6706694952103487, | |
| "train_runtime": 824.1732, | |
| "train_samples_per_second": 4.259, | |
| "train_steps_per_second": 4.259 | |
| } | |
| ], | |
| "max_steps": 3510, | |
| "num_train_epochs": 5, | |
| "total_flos": 2890172619430200.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |