{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 4, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05128205128205128, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.399, "step": 4 }, { "epoch": 0.10256410256410256, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3892, "step": 8 }, { "epoch": 0.15384615384615385, "grad_norm": 0.26211628317832947, "learning_rate": 0.002, "loss": 2.3982, "step": 12 }, { "epoch": 0.20512820512820512, "grad_norm": 0.2586708962917328, "learning_rate": 0.001414213562373095, "loss": 2.3584, "step": 16 }, { "epoch": 0.2564102564102564, "grad_norm": 6.628456115722656, "learning_rate": 0.001, "loss": 3.0655, "step": 20 }, { "epoch": 0.3076923076923077, "grad_norm": 2.3061695098876953, "learning_rate": 0.0007071067811865475, "loss": 3.3394, "step": 24 }, { "epoch": 0.358974358974359, "grad_norm": 1.368303894996643, "learning_rate": 0.0006666666666666666, "loss": 5.6774, "step": 28 }, { "epoch": 0.41025641025641024, "grad_norm": 1.368303894996643, "learning_rate": 0.0006666666666666666, "loss": 6.81, "step": 32 }, { "epoch": 0.46153846153846156, "grad_norm": 3.298046350479126, "learning_rate": 0.0006030226891555273, "loss": 5.6635, "step": 36 }, { "epoch": 0.5128205128205128, "grad_norm": 0.5323753356933594, "learning_rate": 0.0005163977794943222, "loss": 2.1228, "step": 40 }, { "epoch": 0.5641025641025641, "grad_norm": 0.6583826541900635, "learning_rate": 0.0004588314677411235, "loss": 2.0043, "step": 44 }, { "epoch": 0.6153846153846154, "grad_norm": 0.37811949849128723, "learning_rate": 0.0004170288281141495, "loss": 1.8983, "step": 48 }, { "epoch": 0.6666666666666666, "grad_norm": 0.4072262644767761, "learning_rate": 0.00038490017945975053, "loss": 1.8793, "step": 52 }, { "epoch": 0.717948717948718, "grad_norm": 0.4174947738647461, "learning_rate": 0.00035921060405354985, "loss": 1.8188, "step": 56 }, { "epoch": 0.7692307692307693, "grad_norm": 0.45649221539497375, "learning_rate": 0.0003380617018914066, "loss": 1.738, "step": 60 }, { "epoch": 0.8205128205128205, "grad_norm": 0.420207142829895, "learning_rate": 0.00032025630761017425, "loss": 1.7158, "step": 64 }, { "epoch": 0.8717948717948718, "grad_norm": 0.4762813448905945, "learning_rate": 0.00030499714066520935, "loss": 1.677, "step": 68 }, { "epoch": 0.9230769230769231, "grad_norm": 0.4540621042251587, "learning_rate": 0.0002917299829957891, "loss": 1.6112, "step": 72 }, { "epoch": 0.9743589743589743, "grad_norm": 0.44487470388412476, "learning_rate": 0.00028005601680560193, "loss": 1.5661, "step": 76 }, { "epoch": 1.0, "step": 78, "total_flos": 7.922041751265608e+17, "train_loss": 2.710762830880972, "train_runtime": 773.1929, "train_samples_per_second": 12.913, "train_steps_per_second": 0.101 } ], "logging_steps": 4, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.922041751265608e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }