| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999921630708223, | |
| "eval_steps": 700, | |
| "global_step": 7975, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08777360678991544, | |
| "grad_norm": 8.72152328491211, | |
| "learning_rate": 9.179810725552051e-05, | |
| "loss": 8.5437, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08777360678991544, | |
| "eval_accuracy": 0.7517348354967752, | |
| "eval_average_precision": 0.8842190407928112, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8282600101654712, | |
| "eval_loss": 0.5054899454116821, | |
| "eval_roc_auc": 0.7897197759298469, | |
| "eval_runtime": 633.398, | |
| "eval_samples_per_second": 19.339, | |
| "eval_steps_per_second": 2.419, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17554721357983089, | |
| "grad_norm": 9.17638111114502, | |
| "learning_rate": 8.296529968454258e-05, | |
| "loss": 7.8977, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.17554721357983089, | |
| "eval_accuracy": 0.7559800800065312, | |
| "eval_average_precision": 0.8909336486390318, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8349257193350638, | |
| "eval_loss": 0.49499139189720154, | |
| "eval_roc_auc": 0.7999154762203644, | |
| "eval_runtime": 627.8408, | |
| "eval_samples_per_second": 19.51, | |
| "eval_steps_per_second": 2.44, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2633208203697463, | |
| "grad_norm": 16.890045166015625, | |
| "learning_rate": 7.413249211356468e-05, | |
| "loss": 7.6678, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2633208203697463, | |
| "eval_accuracy": 0.7587558168013715, | |
| "eval_average_precision": 0.8970352803723831, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8362790182281566, | |
| "eval_loss": 0.4894179105758667, | |
| "eval_roc_auc": 0.8074783553104459, | |
| "eval_runtime": 627.6991, | |
| "eval_samples_per_second": 19.514, | |
| "eval_steps_per_second": 2.441, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.35109442715966177, | |
| "grad_norm": 24.892431259155273, | |
| "learning_rate": 6.529968454258676e-05, | |
| "loss": 7.686, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.35109442715966177, | |
| "eval_accuracy": 0.7607151604212589, | |
| "eval_average_precision": 0.8985326326623844, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8398973070410226, | |
| "eval_loss": 0.5012434720993042, | |
| "eval_roc_auc": 0.810687267904427, | |
| "eval_runtime": 627.9237, | |
| "eval_samples_per_second": 19.507, | |
| "eval_steps_per_second": 2.44, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4388680339495772, | |
| "grad_norm": 8.646401405334473, | |
| "learning_rate": 5.646687697160884e-05, | |
| "loss": 7.5638, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4388680339495772, | |
| "eval_accuracy": 0.7646338476610336, | |
| "eval_average_precision": 0.8996867165727693, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8403742871380322, | |
| "eval_loss": 0.48539647459983826, | |
| "eval_roc_auc": 0.8130039540515468, | |
| "eval_runtime": 627.8409, | |
| "eval_samples_per_second": 19.51, | |
| "eval_steps_per_second": 2.44, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5266416407394926, | |
| "grad_norm": 7.240037441253662, | |
| "learning_rate": 4.763406940063092e-05, | |
| "loss": 7.4233, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5266416407394926, | |
| "eval_accuracy": 0.7657767981059679, | |
| "eval_average_precision": 0.90033383484156, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8401760347612947, | |
| "eval_loss": 0.48347920179367065, | |
| "eval_roc_auc": 0.8146957469382213, | |
| "eval_runtime": 628.1254, | |
| "eval_samples_per_second": 19.501, | |
| "eval_steps_per_second": 2.439, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6144152475294081, | |
| "grad_norm": 10.247232437133789, | |
| "learning_rate": 3.8801261829652994e-05, | |
| "loss": 7.3907, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6144152475294081, | |
| "eval_accuracy": 0.7695322067107518, | |
| "eval_average_precision": 0.900695625601904, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8407513961753258, | |
| "eval_loss": 0.48138949275016785, | |
| "eval_roc_auc": 0.8159420465045062, | |
| "eval_runtime": 627.8685, | |
| "eval_samples_per_second": 19.509, | |
| "eval_steps_per_second": 2.44, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7021888543193235, | |
| "grad_norm": 18.78386116027832, | |
| "learning_rate": 2.9968454258675084e-05, | |
| "loss": 7.4048, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7021888543193235, | |
| "eval_accuracy": 0.7654502408359866, | |
| "eval_average_precision": 0.9011042318711991, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.841103921243294, | |
| "eval_loss": 0.4915079176425934, | |
| "eval_roc_auc": 0.8155820068183135, | |
| "eval_runtime": 627.9338, | |
| "eval_samples_per_second": 19.507, | |
| "eval_steps_per_second": 2.44, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.789962461109239, | |
| "grad_norm": 10.982274055480957, | |
| "learning_rate": 2.113564668769716e-05, | |
| "loss": 7.3339, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.789962461109239, | |
| "eval_accuracy": 0.768225977630827, | |
| "eval_average_precision": 0.9026856624447291, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8419529031898904, | |
| "eval_loss": 0.4847244918346405, | |
| "eval_roc_auc": 0.8177728526481173, | |
| "eval_runtime": 629.0358, | |
| "eval_samples_per_second": 19.473, | |
| "eval_steps_per_second": 2.435, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.8777360678991544, | |
| "grad_norm": 11.756400108337402, | |
| "learning_rate": 1.2302839116719243e-05, | |
| "loss": 7.3013, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8777360678991544, | |
| "eval_accuracy": 0.7696138460282472, | |
| "eval_average_precision": 0.9033701457563739, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8417629247504766, | |
| "eval_loss": 0.48107102513313293, | |
| "eval_roc_auc": 0.8189430436185476, | |
| "eval_runtime": 627.9862, | |
| "eval_samples_per_second": 19.505, | |
| "eval_steps_per_second": 2.44, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9655096746890698, | |
| "grad_norm": 11.076114654541016, | |
| "learning_rate": 3.470031545741325e-06, | |
| "loss": 7.2932, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9655096746890698, | |
| "eval_accuracy": 0.7703485998857049, | |
| "eval_average_precision": 0.9041708262033789, | |
| "eval_balance": 0.6869948567229978, | |
| "eval_f1": 0.8414586033928874, | |
| "eval_loss": 0.47746843099594116, | |
| "eval_roc_auc": 0.8201586889794568, | |
| "eval_runtime": 628.9473, | |
| "eval_samples_per_second": 19.475, | |
| "eval_steps_per_second": 2.436, | |
| "step": 7700 | |
| } | |
| ], | |
| "logging_steps": 700, | |
| "max_steps": 7975, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 700, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.751593546332242e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |