| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1e-05, | |
| "loss": 2.2814, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2162, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 3e-05, | |
| "loss": 2.06, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4e-05, | |
| "loss": 1.7632, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3906, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 6e-05, | |
| "loss": 1.0958, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 7e-05, | |
| "loss": 0.8168, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 8e-05, | |
| "loss": 0.6212, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 9e-05, | |
| "loss": 0.5377, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4409, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 9.972609476841367e-05, | |
| "loss": 0.4009, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 9.890738003669029e-05, | |
| "loss": 0.3598, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 0.2784, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 9.567727288213005e-05, | |
| "loss": 0.3299, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 9.330127018922194e-05, | |
| "loss": 0.3236, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.2714, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 42.5, | |
| "learning_rate": 8.715724127386972e-05, | |
| "loss": 0.2654, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 8.345653031794292e-05, | |
| "loss": 0.254, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 47.5, | |
| "learning_rate": 7.938926261462366e-05, | |
| "loss": 0.2223, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.2671, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 52.5, | |
| "learning_rate": 7.033683215379002e-05, | |
| "loss": 0.2293, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 0.2235, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 57.5, | |
| "learning_rate": 6.0395584540887963e-05, | |
| "loss": 0.2031, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 5.522642316338268e-05, | |
| "loss": 0.2034, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 62.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.189, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "learning_rate": 4.477357683661734e-05, | |
| "loss": 0.1788, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 67.5, | |
| "learning_rate": 3.960441545911204e-05, | |
| "loss": 0.1698, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 0.2309, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 72.5, | |
| "learning_rate": 2.9663167846209998e-05, | |
| "loss": 0.1637, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 0.1669, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 77.5, | |
| "learning_rate": 2.061073738537635e-05, | |
| "loss": 0.1786, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1.6543469682057106e-05, | |
| "loss": 0.1792, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 82.5, | |
| "learning_rate": 1.2842758726130283e-05, | |
| "loss": 0.1874, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 0.1805, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 87.5, | |
| "learning_rate": 6.698729810778065e-06, | |
| "loss": 0.196, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 4.322727117869951e-06, | |
| "loss": 0.1615, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 92.5, | |
| "learning_rate": 2.4471741852423237e-06, | |
| "loss": 0.1481, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "learning_rate": 1.0926199633097157e-06, | |
| "loss": 0.1633, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 97.5, | |
| "learning_rate": 2.7390523158633554e-07, | |
| "loss": 0.1673, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1748, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 400, | |
| "total_flos": 1.3814551337974377e+23, | |
| "train_loss": 0.4972930908203125, | |
| "train_runtime": 820.6819, | |
| "train_samples_per_second": 4242.326, | |
| "train_steps_per_second": 0.487 | |
| } | |
| ], | |
| "max_steps": 400, | |
| "num_train_epochs": 100, | |
| "start_time": 1656513160.5917685, | |
| "total_flos": 1.3814551337974377e+23, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |