| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.96, | |
| "eval_steps": 500, | |
| "global_step": 186, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.012967586517334, | |
| "learning_rate": 4.9910902453260824e-05, | |
| "loss": 3.1744, | |
| "num_input_tokens_seen": 4544, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.4645135402679443, | |
| "learning_rate": 4.964424488287009e-05, | |
| "loss": 2.264, | |
| "num_input_tokens_seen": 9104, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 3.6127007007598877, | |
| "learning_rate": 4.920192797165511e-05, | |
| "loss": 1.5631, | |
| "num_input_tokens_seen": 13504, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.5983505249023438, | |
| "learning_rate": 4.858710446774951e-05, | |
| "loss": 1.0369, | |
| "num_input_tokens_seen": 17808, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.702121615409851, | |
| "learning_rate": 4.780415671242334e-05, | |
| "loss": 0.8575, | |
| "num_input_tokens_seen": 22464, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.5292010307312012, | |
| "learning_rate": 4.685866540361456e-05, | |
| "loss": 0.6605, | |
| "num_input_tokens_seen": 26848, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.8130377531051636, | |
| "learning_rate": 4.5757369817809415e-05, | |
| "loss": 0.6076, | |
| "num_input_tokens_seen": 31296, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.726243019104004, | |
| "learning_rate": 4.45081197738023e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 35648, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.7013319730758667, | |
| "learning_rate": 4.3119819680728e-05, | |
| "loss": 0.5202, | |
| "num_input_tokens_seen": 40160, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.4300320148468018, | |
| "learning_rate": 4.160236506918098e-05, | |
| "loss": 0.484, | |
| "num_input_tokens_seen": 44608, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.562301754951477, | |
| "learning_rate": 3.9966572057815373e-05, | |
| "loss": 0.5275, | |
| "num_input_tokens_seen": 49488, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.4365646839141846, | |
| "learning_rate": 3.822410025817406e-05, | |
| "loss": 0.4158, | |
| "num_input_tokens_seen": 53888, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 1.6914381980895996, | |
| "learning_rate": 3.638736966726585e-05, | |
| "loss": 0.3571, | |
| "num_input_tokens_seen": 57952, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 1.6642335653305054, | |
| "learning_rate": 3.44694721402644e-05, | |
| "loss": 0.4535, | |
| "num_input_tokens_seen": 62304, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 1.2602016925811768, | |
| "learning_rate": 3.2484078074333954e-05, | |
| "loss": 0.3905, | |
| "num_input_tokens_seen": 66736, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 1.4347716569900513, | |
| "learning_rate": 3.0445338968721287e-05, | |
| "loss": 0.42, | |
| "num_input_tokens_seen": 71248, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 1.617989420890808, | |
| "learning_rate": 2.836778655564653e-05, | |
| "loss": 0.3955, | |
| "num_input_tokens_seen": 75776, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 1.752974033355713, | |
| "learning_rate": 2.6266229220967818e-05, | |
| "loss": 0.4066, | |
| "num_input_tokens_seen": 80032, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 1.70826256275177, | |
| "learning_rate": 2.4155646452913296e-05, | |
| "loss": 0.4497, | |
| "num_input_tokens_seen": 84816, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 1.4035143852233887, | |
| "learning_rate": 2.2051082071228854e-05, | |
| "loss": 0.4116, | |
| "num_input_tokens_seen": 89360, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 1.484985113143921, | |
| "learning_rate": 1.9967536997783494e-05, | |
| "loss": 0.4653, | |
| "num_input_tokens_seen": 93920, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 1.2968859672546387, | |
| "learning_rate": 1.79198623329424e-05, | |
| "loss": 0.405, | |
| "num_input_tokens_seen": 98368, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.9588320255279541, | |
| "learning_rate": 1.5922653499838137e-05, | |
| "loss": 0.4061, | |
| "num_input_tokens_seen": 102992, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 1.1992862224578857, | |
| "learning_rate": 1.399014621105914e-05, | |
| "loss": 0.3665, | |
| "num_input_tokens_seen": 107472, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 1.4413576126098633, | |
| "learning_rate": 1.2136114999284288e-05, | |
| "loss": 0.3508, | |
| "num_input_tokens_seen": 111776, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 1.225965976715088, | |
| "learning_rate": 1.0373775035117305e-05, | |
| "loss": 0.3255, | |
| "num_input_tokens_seen": 115824, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 1.2995370626449585, | |
| "learning_rate": 8.715687931944449e-06, | |
| "loss": 0.4355, | |
| "num_input_tokens_seen": 120544, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 1.6566274166107178, | |
| "learning_rate": 7.173672209219495e-06, | |
| "loss": 0.3769, | |
| "num_input_tokens_seen": 125104, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 1.2420896291732788, | |
| "learning_rate": 5.758719052376693e-06, | |
| "loss": 0.3056, | |
| "num_input_tokens_seen": 129360, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 1.6003204584121704, | |
| "learning_rate": 4.480913969818098e-06, | |
| "loss": 0.3677, | |
| "num_input_tokens_seen": 133856, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 1.551696538925171, | |
| "learning_rate": 3.3493649053890326e-06, | |
| "loss": 0.3446, | |
| "num_input_tokens_seen": 138240, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 1.4812240600585938, | |
| "learning_rate": 2.372137318741968e-06, | |
| "loss": 0.3474, | |
| "num_input_tokens_seen": 142784, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 1.4917963743209839, | |
| "learning_rate": 1.5561966963229924e-06, | |
| "loss": 0.4203, | |
| "num_input_tokens_seen": 147344, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 1.5821081399917603, | |
| "learning_rate": 9.073589027514789e-07, | |
| "loss": 0.3472, | |
| "num_input_tokens_seen": 151792, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 1.8335837125778198, | |
| "learning_rate": 4.302487264785521e-07, | |
| "loss": 0.3478, | |
| "num_input_tokens_seen": 156304, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 1.3951280117034912, | |
| "learning_rate": 1.2826691520262114e-07, | |
| "loss": 0.3708, | |
| "num_input_tokens_seen": 160864, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 1.2953612804412842, | |
| "learning_rate": 3.565936007254855e-09, | |
| "loss": 0.4196, | |
| "num_input_tokens_seen": 165376, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "num_input_tokens_seen": 166272, | |
| "step": 186, | |
| "total_flos": 976464451731456.0, | |
| "train_loss": 0.6014698924877311, | |
| "train_runtime": 123.0139, | |
| "train_samples_per_second": 24.387, | |
| "train_steps_per_second": 1.512 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 186, | |
| "num_input_tokens_seen": 166272, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 976464451731456.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |