| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 250.0, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 246.9295196533203, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 7.5384, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 36.33924865722656, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 4.8755, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 28.330366134643555, | |
| "learning_rate": 2.9e-06, | |
| "loss": 3.2837, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 15.325112342834473, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 1.8798, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 14.916882514953613, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 1.2343, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 14.089351654052734, | |
| "learning_rate": 5.9e-06, | |
| "loss": 1.0968, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 14.555135726928711, | |
| "learning_rate": 6.9e-06, | |
| "loss": 1.0377, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 14.033711433410645, | |
| "learning_rate": 7.9e-06, | |
| "loss": 0.9481, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 13.90065860748291, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.8843, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 13.229043006896973, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.7858, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 21.862545013427734, | |
| "learning_rate": 9.775e-06, | |
| "loss": 0.5956, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 0.7360788583755493, | |
| "learning_rate": 9.525000000000001e-06, | |
| "loss": 0.2605, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "grad_norm": 0.10755204409360886, | |
| "learning_rate": 9.275e-06, | |
| "loss": 0.0064, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "grad_norm": 0.07476246356964111, | |
| "learning_rate": 9.025e-06, | |
| "loss": 0.0008, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "grad_norm": 0.010864024981856346, | |
| "learning_rate": 8.775e-06, | |
| "loss": 0.0003, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 0.004976856987923384, | |
| "learning_rate": 8.525e-06, | |
| "loss": 0.0001, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "grad_norm": 0.0033130289521068335, | |
| "learning_rate": 8.275000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "grad_norm": 0.002786654280498624, | |
| "learning_rate": 8.025e-06, | |
| "loss": 0.0001, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "grad_norm": 0.002315290505066514, | |
| "learning_rate": 7.775000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 0.0021433436777442694, | |
| "learning_rate": 7.525e-06, | |
| "loss": 0.0001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 105.0, | |
| "grad_norm": 0.001889344654045999, | |
| "learning_rate": 7.275000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "grad_norm": 0.002015787875279784, | |
| "learning_rate": 7.0250000000000005e-06, | |
| "loss": 0.0001, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 115.0, | |
| "grad_norm": 0.0015458973357453942, | |
| "learning_rate": 6.775e-06, | |
| "loss": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "grad_norm": 0.0015052262460812926, | |
| "learning_rate": 6.525e-06, | |
| "loss": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 125.0, | |
| "grad_norm": 0.0014590113423764706, | |
| "learning_rate": 6.275e-06, | |
| "loss": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "grad_norm": 0.0014116850215941668, | |
| "learning_rate": 6.025000000000001e-06, | |
| "loss": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 135.0, | |
| "grad_norm": 0.0013347205240279436, | |
| "learning_rate": 5.775000000000001e-06, | |
| "loss": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "grad_norm": 0.001205465174280107, | |
| "learning_rate": 5.5250000000000005e-06, | |
| "loss": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 145.0, | |
| "grad_norm": 0.0012008518679067492, | |
| "learning_rate": 5.275e-06, | |
| "loss": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "grad_norm": 0.0012062429450452328, | |
| "learning_rate": 5.025e-06, | |
| "loss": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 155.0, | |
| "grad_norm": 0.0011254053097218275, | |
| "learning_rate": 4.775e-06, | |
| "loss": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "grad_norm": 0.0011136529501527548, | |
| "learning_rate": 4.525000000000001e-06, | |
| "loss": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 165.0, | |
| "grad_norm": 0.0009874015813693404, | |
| "learning_rate": 4.2750000000000006e-06, | |
| "loss": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 170.0, | |
| "grad_norm": 0.0010767158819362521, | |
| "learning_rate": 4.0250000000000004e-06, | |
| "loss": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 175.0, | |
| "grad_norm": 0.0009195652091875672, | |
| "learning_rate": 3.7750000000000003e-06, | |
| "loss": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "grad_norm": 0.0009165621013380587, | |
| "learning_rate": 3.525e-06, | |
| "loss": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 185.0, | |
| "grad_norm": 0.0009350143373012543, | |
| "learning_rate": 3.2750000000000004e-06, | |
| "loss": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 190.0, | |
| "grad_norm": 0.0008542860159650445, | |
| "learning_rate": 3.0250000000000003e-06, | |
| "loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 195.0, | |
| "grad_norm": 0.0008694427087903023, | |
| "learning_rate": 2.7750000000000005e-06, | |
| "loss": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "grad_norm": 0.000944987463299185, | |
| "learning_rate": 2.5250000000000004e-06, | |
| "loss": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 205.0, | |
| "grad_norm": 0.00086694530909881, | |
| "learning_rate": 2.2750000000000002e-06, | |
| "loss": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 210.0, | |
| "grad_norm": 0.0008247533696703613, | |
| "learning_rate": 2.025e-06, | |
| "loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 215.0, | |
| "grad_norm": 0.0008390002185478806, | |
| "learning_rate": 1.7750000000000002e-06, | |
| "loss": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "grad_norm": 0.0008247996447607875, | |
| "learning_rate": 1.525e-06, | |
| "loss": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 225.0, | |
| "grad_norm": 0.000851471268106252, | |
| "learning_rate": 1.275e-06, | |
| "loss": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 230.0, | |
| "grad_norm": 0.0008073352510109544, | |
| "learning_rate": 1.025e-06, | |
| "loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 235.0, | |
| "grad_norm": 0.0008215947891585529, | |
| "learning_rate": 7.750000000000001e-07, | |
| "loss": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "grad_norm": 0.0008052270859479904, | |
| "learning_rate": 5.250000000000001e-07, | |
| "loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 245.0, | |
| "grad_norm": 0.0008336780010722578, | |
| "learning_rate": 2.75e-07, | |
| "loss": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 250.0, | |
| "grad_norm": 0.0008618771098554134, | |
| "learning_rate": 2.5000000000000002e-08, | |
| "loss": 0.0, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 250, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.23653685248e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |