{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 289, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03460207612456748, "grad_norm": 20.492570877075195, "learning_rate": 9.653979238754326e-06, "loss": 0.5193, "step": 10 }, { "epoch": 0.06920415224913495, "grad_norm": 13.140957832336426, "learning_rate": 9.307958477508652e-06, "loss": 0.3286, "step": 20 }, { "epoch": 0.10380622837370242, "grad_norm": 3.8919756412506104, "learning_rate": 8.961937716262975e-06, "loss": 0.1891, "step": 30 }, { "epoch": 0.1384083044982699, "grad_norm": 8.864481925964355, "learning_rate": 8.615916955017302e-06, "loss": 0.2073, "step": 40 }, { "epoch": 0.17301038062283736, "grad_norm": 8.466791152954102, "learning_rate": 8.269896193771627e-06, "loss": 0.0768, "step": 50 }, { "epoch": 0.20761245674740483, "grad_norm": 14.27080249786377, "learning_rate": 7.923875432525952e-06, "loss": 0.1692, "step": 60 }, { "epoch": 0.2422145328719723, "grad_norm": 3.208296537399292, "learning_rate": 7.577854671280277e-06, "loss": 0.1812, "step": 70 }, { "epoch": 0.2768166089965398, "grad_norm": 4.197875499725342, "learning_rate": 7.2318339100346025e-06, "loss": 0.1341, "step": 80 }, { "epoch": 0.31141868512110726, "grad_norm": 6.3515825271606445, "learning_rate": 6.885813148788928e-06, "loss": 0.1277, "step": 90 }, { "epoch": 0.3460207612456747, "grad_norm": 4.87689733505249, "learning_rate": 6.539792387543253e-06, "loss": 0.0761, "step": 100 }, { "epoch": 0.3806228373702422, "grad_norm": 2.5042741298675537, "learning_rate": 6.193771626297579e-06, "loss": 0.059, "step": 110 }, { "epoch": 0.41522491349480967, "grad_norm": 5.011392593383789, "learning_rate": 5.847750865051903e-06, "loss": 0.0985, "step": 120 }, { "epoch": 0.44982698961937717, "grad_norm": 4.462304592132568, "learning_rate": 5.501730103806229e-06, "loss": 0.0964, "step": 130 }, { "epoch": 0.4844290657439446, "grad_norm": 7.347049236297607, "learning_rate": 5.155709342560554e-06, "loss": 0.1312, "step": 140 }, { "epoch": 0.5190311418685121, "grad_norm": 6.566490650177002, "learning_rate": 4.809688581314879e-06, "loss": 0.1423, "step": 150 }, { "epoch": 0.5536332179930796, "grad_norm": 8.601099014282227, "learning_rate": 4.463667820069205e-06, "loss": 0.071, "step": 160 }, { "epoch": 0.5882352941176471, "grad_norm": 1.2925947904586792, "learning_rate": 4.11764705882353e-06, "loss": 0.1023, "step": 170 }, { "epoch": 0.6228373702422145, "grad_norm": 9.43086051940918, "learning_rate": 3.7716262975778552e-06, "loss": 0.1289, "step": 180 }, { "epoch": 0.657439446366782, "grad_norm": 5.09530782699585, "learning_rate": 3.42560553633218e-06, "loss": 0.0939, "step": 190 }, { "epoch": 0.6920415224913494, "grad_norm": 11.45658016204834, "learning_rate": 3.0795847750865054e-06, "loss": 0.0839, "step": 200 }, { "epoch": 0.726643598615917, "grad_norm": 3.7783803939819336, "learning_rate": 2.7335640138408307e-06, "loss": 0.0503, "step": 210 }, { "epoch": 0.7612456747404844, "grad_norm": 0.32719558477401733, "learning_rate": 2.387543252595156e-06, "loss": 0.0956, "step": 220 }, { "epoch": 0.7958477508650519, "grad_norm": 3.984323024749756, "learning_rate": 2.041522491349481e-06, "loss": 0.0761, "step": 230 }, { "epoch": 0.8304498269896193, "grad_norm": 3.654313087463379, "learning_rate": 1.6955017301038063e-06, "loss": 0.0784, "step": 240 }, { "epoch": 0.8650519031141869, "grad_norm": 0.20838171243667603, "learning_rate": 1.3494809688581318e-06, "loss": 0.0666, "step": 250 }, { "epoch": 0.8996539792387543, "grad_norm": 13.685569763183594, "learning_rate": 1.0034602076124569e-06, "loss": 0.089, "step": 260 }, { "epoch": 0.9342560553633218, "grad_norm": 0.6432996392250061, "learning_rate": 6.57439446366782e-07, "loss": 0.0624, "step": 270 }, { "epoch": 0.9688581314878892, "grad_norm": 9.100397109985352, "learning_rate": 3.114186851211073e-07, "loss": 0.0828, "step": 280 } ], "logging_steps": 10, "max_steps": 289, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }