bert_extract_lora_fine-tuned / trainer_state.json
huyydangg's picture
Initial model upload
eab35eb verified
{
"best_metric": 0.10447731614112854,
"best_model_checkpoint": "./bert_question_answer/checkpoint-537",
"epoch": 9.99163179916318,
"eval_steps": 500,
"global_step": 597,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9874476987447699,
"grad_norm": 437032.875,
"learning_rate": 6.666666666666667e-06,
"loss": 6.2594,
"step": 59
},
{
"epoch": 0.9874476987447699,
"eval_loss": 5.944966793060303,
"eval_runtime": 121.3177,
"eval_samples_per_second": 51.584,
"eval_steps_per_second": 0.717,
"step": 59
},
{
"epoch": 1.99163179916318,
"grad_norm": 599712.1875,
"learning_rate": 1.344632768361582e-05,
"loss": 4.9678,
"step": 119
},
{
"epoch": 1.99163179916318,
"eval_loss": 2.9018592834472656,
"eval_runtime": 120.791,
"eval_samples_per_second": 51.808,
"eval_steps_per_second": 0.72,
"step": 119
},
{
"epoch": 2.99581589958159,
"grad_norm": 99944.7109375,
"learning_rate": 1.9943502824858758e-05,
"loss": 1.274,
"step": 179
},
{
"epoch": 2.99581589958159,
"eval_loss": 0.33090877532958984,
"eval_runtime": 120.4081,
"eval_samples_per_second": 51.973,
"eval_steps_per_second": 0.723,
"step": 179
},
{
"epoch": 4.0,
"grad_norm": 30878.6875,
"learning_rate": 1.824858757062147e-05,
"loss": 0.2577,
"step": 239
},
{
"epoch": 4.0,
"eval_loss": 0.2211635261774063,
"eval_runtime": 120.404,
"eval_samples_per_second": 51.975,
"eval_steps_per_second": 0.723,
"step": 239
},
{
"epoch": 4.98744769874477,
"grad_norm": 48801.58203125,
"learning_rate": 1.6581920903954804e-05,
"loss": 0.1769,
"step": 298
},
{
"epoch": 4.98744769874477,
"eval_loss": 0.1728881150484085,
"eval_runtime": 120.296,
"eval_samples_per_second": 52.022,
"eval_steps_per_second": 0.723,
"step": 298
},
{
"epoch": 5.99163179916318,
"grad_norm": 41960.20703125,
"learning_rate": 1.4887005649717517e-05,
"loss": 0.1441,
"step": 358
},
{
"epoch": 5.99163179916318,
"eval_loss": 0.14164601266384125,
"eval_runtime": 120.9698,
"eval_samples_per_second": 51.732,
"eval_steps_per_second": 0.719,
"step": 358
},
{
"epoch": 6.99581589958159,
"grad_norm": 46197.64453125,
"learning_rate": 1.3192090395480226e-05,
"loss": 0.1248,
"step": 418
},
{
"epoch": 6.99581589958159,
"eval_loss": 0.12222907692193985,
"eval_runtime": 120.6209,
"eval_samples_per_second": 51.882,
"eval_steps_per_second": 0.721,
"step": 418
},
{
"epoch": 8.0,
"grad_norm": 44641.1015625,
"learning_rate": 1.1497175141242938e-05,
"loss": 0.1133,
"step": 478
},
{
"epoch": 8.0,
"eval_loss": 0.11412081867456436,
"eval_runtime": 120.2968,
"eval_samples_per_second": 52.021,
"eval_steps_per_second": 0.723,
"step": 478
},
{
"epoch": 8.98744769874477,
"grad_norm": 45220.9921875,
"learning_rate": 9.830508474576272e-06,
"loss": 0.1054,
"step": 537
},
{
"epoch": 8.98744769874477,
"eval_loss": 0.10447731614112854,
"eval_runtime": 120.507,
"eval_samples_per_second": 51.931,
"eval_steps_per_second": 0.722,
"step": 537
},
{
"epoch": 9.99163179916318,
"grad_norm": 37404.69140625,
"learning_rate": 8.135593220338983e-06,
"loss": 0.0975,
"step": 597
},
{
"epoch": 9.99163179916318,
"eval_loss": 0.109876349568367,
"eval_runtime": 120.8315,
"eval_samples_per_second": 51.791,
"eval_steps_per_second": 0.72,
"step": 597
}
],
"logging_steps": 500,
"max_steps": 885,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5538865312514048e+17,
"train_batch_size": 72,
"trial_name": null,
"trial_params": null
}