| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.993607305936073, | |
| "eval_steps": 500, | |
| "global_step": 272, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1461187214611872, | |
| "grad_norm": 2.1859907903291487, | |
| "learning_rate": 1e-05, | |
| "loss": 1.1204, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2922374429223744, | |
| "grad_norm": 1.1409575473262819, | |
| "learning_rate": 9.619771863117872e-06, | |
| "loss": 0.9067, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 1.0954325906176867, | |
| "learning_rate": 9.239543726235742e-06, | |
| "loss": 0.8594, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5844748858447488, | |
| "grad_norm": 1.044115826945501, | |
| "learning_rate": 8.859315589353613e-06, | |
| "loss": 0.8195, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.730593607305936, | |
| "grad_norm": 0.9711157529023948, | |
| "learning_rate": 8.479087452471484e-06, | |
| "loss": 0.8234, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 0.9306727693571185, | |
| "learning_rate": 8.098859315589354e-06, | |
| "loss": 0.797, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0292237442922374, | |
| "grad_norm": 0.9787347032260009, | |
| "learning_rate": 7.718631178707225e-06, | |
| "loss": 0.8408, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.1753424657534246, | |
| "grad_norm": 0.8884461385333994, | |
| "learning_rate": 7.338403041825095e-06, | |
| "loss": 0.7284, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3214611872146118, | |
| "grad_norm": 0.8436692395158627, | |
| "learning_rate": 6.9581749049429655e-06, | |
| "loss": 0.7073, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.467579908675799, | |
| "grad_norm": 0.8618862236545547, | |
| "learning_rate": 6.577946768060837e-06, | |
| "loss": 0.7123, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.6136986301369864, | |
| "grad_norm": 0.8663851098852621, | |
| "learning_rate": 6.197718631178707e-06, | |
| "loss": 0.7154, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.7598173515981737, | |
| "grad_norm": 0.8275684957862481, | |
| "learning_rate": 5.817490494296578e-06, | |
| "loss": 0.6963, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.9059360730593609, | |
| "grad_norm": 0.906736540956783, | |
| "learning_rate": 5.437262357414449e-06, | |
| "loss": 0.7094, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.058447488584475, | |
| "grad_norm": 0.9421948118312913, | |
| "learning_rate": 5.05703422053232e-06, | |
| "loss": 0.7617, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.204566210045662, | |
| "grad_norm": 0.9061317437755783, | |
| "learning_rate": 4.67680608365019e-06, | |
| "loss": 0.6572, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.350684931506849, | |
| "grad_norm": 0.9188196767115556, | |
| "learning_rate": 4.2965779467680614e-06, | |
| "loss": 0.6345, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.4968036529680364, | |
| "grad_norm": 0.9049399725414525, | |
| "learning_rate": 3.916349809885932e-06, | |
| "loss": 0.6518, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.6429223744292236, | |
| "grad_norm": 0.8983251599300998, | |
| "learning_rate": 3.536121673003803e-06, | |
| "loss": 0.6403, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.789041095890411, | |
| "grad_norm": 0.8154355993502768, | |
| "learning_rate": 3.155893536121673e-06, | |
| "loss": 0.6471, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.935159817351598, | |
| "grad_norm": 0.8754095287816128, | |
| "learning_rate": 2.7756653992395438e-06, | |
| "loss": 0.6408, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0876712328767124, | |
| "grad_norm": 0.9077061427857563, | |
| "learning_rate": 2.3954372623574147e-06, | |
| "loss": 0.6591, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.2337899543378996, | |
| "grad_norm": 0.901749586378799, | |
| "learning_rate": 2.015209125475285e-06, | |
| "loss": 0.6193, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.379908675799087, | |
| "grad_norm": 0.8630345017914713, | |
| "learning_rate": 1.634980988593156e-06, | |
| "loss": 0.5929, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.526027397260274, | |
| "grad_norm": 0.8211659550904478, | |
| "learning_rate": 1.2547528517110266e-06, | |
| "loss": 0.6103, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.6721461187214612, | |
| "grad_norm": 0.872245747524504, | |
| "learning_rate": 8.745247148288974e-07, | |
| "loss": 0.5939, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.8182648401826484, | |
| "grad_norm": 0.8858014720813091, | |
| "learning_rate": 4.942965779467681e-07, | |
| "loss": 0.5966, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.9643835616438357, | |
| "grad_norm": 0.8667147147445566, | |
| "learning_rate": 1.140684410646388e-07, | |
| "loss": 0.6042, | |
| "step": 270 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 272, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 125241405669376.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |