| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.961630695443645, | |
| "eval_steps": 500, | |
| "global_step": 3328, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.8988, | |
| "eval_gen_len": 29.90909090909091, | |
| "eval_loss": 1.8075228929519653, | |
| "eval_precision": 0.901, | |
| "eval_recall": 0.897, | |
| "eval_rouge1": 0.411, | |
| "eval_rouge2": 0.1689, | |
| "eval_rougeL": 0.3152, | |
| "eval_rougeLsum": 0.3155, | |
| "eval_runtime": 182.3495, | |
| "eval_samples_per_second": 6.032, | |
| "eval_steps_per_second": 0.378, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.904, | |
| "eval_gen_len": 29.907272727272726, | |
| "eval_loss": 1.7311877012252808, | |
| "eval_precision": 0.9059, | |
| "eval_recall": 0.9024, | |
| "eval_rouge1": 0.4379, | |
| "eval_rouge2": 0.1893, | |
| "eval_rougeL": 0.3442, | |
| "eval_rougeLsum": 0.3446, | |
| "eval_runtime": 160.7204, | |
| "eval_samples_per_second": 6.844, | |
| "eval_steps_per_second": 0.429, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.699519230769231e-05, | |
| "loss": 2.0112, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9055, | |
| "eval_gen_len": 30.017272727272726, | |
| "eval_loss": 1.6986640691757202, | |
| "eval_precision": 0.9075, | |
| "eval_recall": 0.9039, | |
| "eval_rouge1": 0.4475, | |
| "eval_rouge2": 0.1978, | |
| "eval_rougeL": 0.352, | |
| "eval_rougeLsum": 0.3525, | |
| "eval_runtime": 161.6717, | |
| "eval_samples_per_second": 6.804, | |
| "eval_steps_per_second": 0.427, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9063, | |
| "eval_gen_len": 30.061818181818182, | |
| "eval_loss": 1.676792025566101, | |
| "eval_precision": 0.9082, | |
| "eval_recall": 0.9047, | |
| "eval_rouge1": 0.4514, | |
| "eval_rouge2": 0.1981, | |
| "eval_rougeL": 0.357, | |
| "eval_rougeLsum": 0.3573, | |
| "eval_runtime": 160.8885, | |
| "eval_samples_per_second": 6.837, | |
| "eval_steps_per_second": 0.429, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.3990384615384615e-05, | |
| "loss": 1.7647, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.9068, | |
| "eval_gen_len": 30.326363636363638, | |
| "eval_loss": 1.661742091178894, | |
| "eval_precision": 0.9084, | |
| "eval_recall": 0.9055, | |
| "eval_rouge1": 0.4537, | |
| "eval_rouge2": 0.2003, | |
| "eval_rougeL": 0.3592, | |
| "eval_rougeLsum": 0.3595, | |
| "eval_runtime": 162.5609, | |
| "eval_samples_per_second": 6.767, | |
| "eval_steps_per_second": 0.424, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.9072, | |
| "eval_gen_len": 30.082727272727272, | |
| "eval_loss": 1.6502095460891724, | |
| "eval_precision": 0.9089, | |
| "eval_recall": 0.9057, | |
| "eval_rouge1": 0.4554, | |
| "eval_rouge2": 0.2021, | |
| "eval_rougeL": 0.3607, | |
| "eval_rougeLsum": 0.361, | |
| "eval_runtime": 159.1789, | |
| "eval_samples_per_second": 6.91, | |
| "eval_steps_per_second": 0.433, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.908, | |
| "eval_gen_len": 30.021818181818183, | |
| "eval_loss": 1.6416178941726685, | |
| "eval_precision": 0.9099, | |
| "eval_recall": 0.9064, | |
| "eval_rouge1": 0.4592, | |
| "eval_rouge2": 0.2052, | |
| "eval_rougeL": 0.3639, | |
| "eval_rougeLsum": 0.3641, | |
| "eval_runtime": 156.6401, | |
| "eval_samples_per_second": 7.022, | |
| "eval_steps_per_second": 0.441, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 1.0985576923076924e-05, | |
| "loss": 1.6948, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9081, | |
| "eval_gen_len": 30.78272727272727, | |
| "eval_loss": 1.6360372304916382, | |
| "eval_precision": 0.909, | |
| "eval_recall": 0.9074, | |
| "eval_rouge1": 0.4612, | |
| "eval_rouge2": 0.2054, | |
| "eval_rougeL": 0.3649, | |
| "eval_rougeLsum": 0.365, | |
| "eval_runtime": 160.7238, | |
| "eval_samples_per_second": 6.844, | |
| "eval_steps_per_second": 0.429, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9083, | |
| "eval_gen_len": 30.62909090909091, | |
| "eval_loss": 1.6301532983779907, | |
| "eval_precision": 0.9095, | |
| "eval_recall": 0.9074, | |
| "eval_rouge1": 0.4621, | |
| "eval_rouge2": 0.2062, | |
| "eval_rougeL": 0.3645, | |
| "eval_rougeLsum": 0.3647, | |
| "eval_runtime": 159.8662, | |
| "eval_samples_per_second": 6.881, | |
| "eval_steps_per_second": 0.432, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 7.980769230769232e-06, | |
| "loss": 1.6501, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9083, | |
| "eval_gen_len": 30.48181818181818, | |
| "eval_loss": 1.6264721155166626, | |
| "eval_precision": 0.9095, | |
| "eval_recall": 0.9073, | |
| "eval_rouge1": 0.4606, | |
| "eval_rouge2": 0.2051, | |
| "eval_rougeL": 0.3651, | |
| "eval_rougeLsum": 0.3655, | |
| "eval_runtime": 157.2711, | |
| "eval_samples_per_second": 6.994, | |
| "eval_steps_per_second": 0.439, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9087, | |
| "eval_gen_len": 30.806363636363635, | |
| "eval_loss": 1.6229554414749146, | |
| "eval_precision": 0.9097, | |
| "eval_recall": 0.908, | |
| "eval_rouge1": 0.4625, | |
| "eval_rouge2": 0.2073, | |
| "eval_rougeL": 0.3658, | |
| "eval_rougeLsum": 0.366, | |
| "eval_runtime": 157.1263, | |
| "eval_samples_per_second": 7.001, | |
| "eval_steps_per_second": 0.439, | |
| "step": 2293 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 4.975961538461539e-06, | |
| "loss": 1.6222, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.909, | |
| "eval_gen_len": 30.55272727272727, | |
| "eval_loss": 1.6204967498779297, | |
| "eval_precision": 0.9103, | |
| "eval_recall": 0.9081, | |
| "eval_rouge1": 0.4644, | |
| "eval_rouge2": 0.2082, | |
| "eval_rougeL": 0.3674, | |
| "eval_rougeLsum": 0.3679, | |
| "eval_runtime": 155.7494, | |
| "eval_samples_per_second": 7.063, | |
| "eval_steps_per_second": 0.443, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.909, | |
| "eval_gen_len": 30.805454545454545, | |
| "eval_loss": 1.618751883506775, | |
| "eval_precision": 0.9101, | |
| "eval_recall": 0.9083, | |
| "eval_rouge1": 0.4648, | |
| "eval_rouge2": 0.2087, | |
| "eval_rougeL": 0.3681, | |
| "eval_rougeLsum": 0.3683, | |
| "eval_runtime": 158.59, | |
| "eval_samples_per_second": 6.936, | |
| "eval_steps_per_second": 0.435, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9093, | |
| "eval_gen_len": 30.670909090909092, | |
| "eval_loss": 1.6172130107879639, | |
| "eval_precision": 0.9104, | |
| "eval_recall": 0.9084, | |
| "eval_rouge1": 0.4654, | |
| "eval_rouge2": 0.2097, | |
| "eval_rougeL": 0.3685, | |
| "eval_rougeLsum": 0.3689, | |
| "eval_runtime": 159.3516, | |
| "eval_samples_per_second": 6.903, | |
| "eval_steps_per_second": 0.433, | |
| "step": 2919 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 1.971153846153846e-06, | |
| "loss": 1.6048, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9093, | |
| "eval_gen_len": 30.630909090909093, | |
| "eval_loss": 1.6168792247772217, | |
| "eval_precision": 0.9104, | |
| "eval_recall": 0.9084, | |
| "eval_rouge1": 0.465, | |
| "eval_rouge2": 0.21, | |
| "eval_rougeL": 0.3693, | |
| "eval_rougeLsum": 0.3697, | |
| "eval_runtime": 158.4169, | |
| "eval_samples_per_second": 6.944, | |
| "eval_steps_per_second": 0.436, | |
| "step": 3127 | |
| }, | |
| { | |
| "epoch": 15.96, | |
| "eval_f1": 0.9091, | |
| "eval_gen_len": 30.619090909090907, | |
| "eval_loss": 1.6166560649871826, | |
| "eval_precision": 0.9102, | |
| "eval_recall": 0.9083, | |
| "eval_rouge1": 0.4649, | |
| "eval_rouge2": 0.2096, | |
| "eval_rougeL": 0.3686, | |
| "eval_rougeLsum": 0.3688, | |
| "eval_runtime": 157.7317, | |
| "eval_samples_per_second": 6.974, | |
| "eval_steps_per_second": 0.437, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 15.96, | |
| "step": 3328, | |
| "total_flos": 4.6122414295331635e+17, | |
| "train_loss": 1.711962864949153, | |
| "train_runtime": 11907.5247, | |
| "train_samples_per_second": 26.874, | |
| "train_steps_per_second": 0.279 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3328, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 16, | |
| "save_steps": 500, | |
| "total_flos": 4.6122414295331635e+17, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |