| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.992, | |
| "eval_steps": 60, | |
| "global_step": 682, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.045376777648926, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.0526, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.106898784637451, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.725, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.8986996412277222, | |
| "learning_rate": 8e-05, | |
| "loss": 0.6844, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.6844361424446106, | |
| "eval_runtime": 49.0643, | |
| "eval_samples_per_second": 35.036, | |
| "eval_steps_per_second": 1.101, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.5714163780212402, | |
| "learning_rate": 0.00010666666666666667, | |
| "loss": 0.6752, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.736541211605072, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.6895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.6849150657653809, | |
| "learning_rate": 0.00016, | |
| "loss": 0.6661, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.6780075430870056, | |
| "eval_runtime": 49.0527, | |
| "eval_samples_per_second": 35.044, | |
| "eval_steps_per_second": 1.101, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.0006439685821533, | |
| "learning_rate": 0.0001866666666666667, | |
| "loss": 0.6684, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.7500181198120117, | |
| "learning_rate": 0.00019574468085106384, | |
| "loss": 0.7304, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.4791840314865112, | |
| "learning_rate": 0.0001872340425531915, | |
| "loss": 0.6696, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.6752753853797913, | |
| "eval_runtime": 48.8363, | |
| "eval_samples_per_second": 35.199, | |
| "eval_steps_per_second": 1.106, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.1989027261734009, | |
| "learning_rate": 0.00017872340425531915, | |
| "loss": 0.6812, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 1.0437731742858887, | |
| "learning_rate": 0.00017021276595744682, | |
| "loss": 0.6886, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.7693199515342712, | |
| "learning_rate": 0.00016170212765957446, | |
| "loss": 0.6783, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "eval_loss": 0.6913231611251831, | |
| "eval_runtime": 48.9814, | |
| "eval_samples_per_second": 35.095, | |
| "eval_steps_per_second": 1.102, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.9999447464942932, | |
| "learning_rate": 0.00015319148936170213, | |
| "loss": 0.6733, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 3.2817800045013428, | |
| "learning_rate": 0.0001446808510638298, | |
| "loss": 0.6894, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 1.485412836074829, | |
| "learning_rate": 0.00013617021276595746, | |
| "loss": 0.6714, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_loss": 0.6768029928207397, | |
| "eval_runtime": 48.9645, | |
| "eval_samples_per_second": 35.107, | |
| "eval_steps_per_second": 1.103, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.6202168464660645, | |
| "learning_rate": 0.00012765957446808513, | |
| "loss": 0.6726, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.9941303133964539, | |
| "learning_rate": 0.00011914893617021277, | |
| "loss": 0.6547, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 1.1737464666366577, | |
| "learning_rate": 0.00011063829787234043, | |
| "loss": 0.683, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "eval_loss": 0.6857156753540039, | |
| "eval_runtime": 49.0746, | |
| "eval_samples_per_second": 35.028, | |
| "eval_steps_per_second": 1.1, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 2.2875583171844482, | |
| "learning_rate": 0.00010212765957446809, | |
| "loss": 0.6763, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.5993250608444214, | |
| "learning_rate": 9.361702127659576e-05, | |
| "loss": 0.659, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 1.2196799516677856, | |
| "learning_rate": 8.510638297872341e-05, | |
| "loss": 0.6541, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "eval_loss": 0.6817581653594971, | |
| "eval_runtime": 49.0974, | |
| "eval_samples_per_second": 35.012, | |
| "eval_steps_per_second": 1.1, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 1.3733019828796387, | |
| "learning_rate": 7.659574468085106e-05, | |
| "loss": 0.6489, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 2.523149013519287, | |
| "learning_rate": 6.808510638297873e-05, | |
| "loss": 0.6602, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 1.8784421682357788, | |
| "learning_rate": 5.9574468085106384e-05, | |
| "loss": 0.6429, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "eval_loss": 0.668049693107605, | |
| "eval_runtime": 49.0805, | |
| "eval_samples_per_second": 35.024, | |
| "eval_steps_per_second": 1.1, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.0589911937713623, | |
| "learning_rate": 5.1063829787234044e-05, | |
| "loss": 0.6452, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.384, | |
| "grad_norm": 1.13771653175354, | |
| "learning_rate": 4.2553191489361704e-05, | |
| "loss": 0.6496, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 8.704, | |
| "grad_norm": 1.4135953187942505, | |
| "learning_rate": 3.4042553191489365e-05, | |
| "loss": 0.6299, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 8.704, | |
| "eval_loss": 0.6739547848701477, | |
| "eval_runtime": 50.9783, | |
| "eval_samples_per_second": 33.72, | |
| "eval_steps_per_second": 1.059, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.024, | |
| "grad_norm": 0.9489519000053406, | |
| "learning_rate": 2.5531914893617022e-05, | |
| "loss": 0.6414, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 9.344, | |
| "grad_norm": 1.4730383157730103, | |
| "learning_rate": 1.7021276595744682e-05, | |
| "loss": 0.6083, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 9.664, | |
| "grad_norm": 1.221296787261963, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 0.6431, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.664, | |
| "eval_loss": 0.6727736592292786, | |
| "eval_runtime": 50.9608, | |
| "eval_samples_per_second": 33.732, | |
| "eval_steps_per_second": 1.06, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.984, | |
| "grad_norm": 1.7475664615631104, | |
| "learning_rate": 0.0, | |
| "loss": 0.6438, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "grad_norm": 2.3684651851654053, | |
| "learning_rate": 0.00011009174311926606, | |
| "loss": 0.6419, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "grad_norm": 1.4250073432922363, | |
| "learning_rate": 0.00010642201834862387, | |
| "loss": 0.6464, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "eval_loss": 0.6687487363815308, | |
| "eval_runtime": 50.793, | |
| "eval_samples_per_second": 33.843, | |
| "eval_steps_per_second": 1.063, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "grad_norm": 0.8611794114112854, | |
| "learning_rate": 0.00010275229357798166, | |
| "loss": 0.6436, | |
| "step": 680 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.298579454918656e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |