| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "global_step": 5750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.3949999999999997e-06, | |
| "loss": 13.6823, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.895e-06, | |
| "loss": 7.5854, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.0394999999999998e-05, | |
| "loss": 4.3711, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 4.312221050262451, | |
| "eval_runtime": 80.91, | |
| "eval_samples_per_second": 21.011, | |
| "eval_steps_per_second": 1.322, | |
| "eval_wer": 1.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.3895e-05, | |
| "loss": 3.8129, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.7395e-05, | |
| "loss": 3.4258, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.0894999999999996e-05, | |
| "loss": 3.1653, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "eval_loss": 3.115588426589966, | |
| "eval_runtime": 81.1622, | |
| "eval_samples_per_second": 20.946, | |
| "eval_steps_per_second": 1.318, | |
| "eval_wer": 1.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.4394999999999996e-05, | |
| "loss": 3.0356, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 2.7895e-05, | |
| "loss": 2.9791, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 3.1395e-05, | |
| "loss": 2.8904, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "eval_loss": 2.842055320739746, | |
| "eval_runtime": 84.1755, | |
| "eval_samples_per_second": 20.196, | |
| "eval_steps_per_second": 1.271, | |
| "eval_wer": 0.9918110836031232, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 3.4895e-05, | |
| "loss": 2.1422, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 3.8394999999999994e-05, | |
| "loss": 1.2257, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 4.1895e-05, | |
| "loss": 0.9207, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "eval_loss": 0.9894591569900513, | |
| "eval_runtime": 82.3044, | |
| "eval_samples_per_second": 20.655, | |
| "eval_steps_per_second": 1.3, | |
| "eval_wer": 0.8688503777058338, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 4.5394999999999995e-05, | |
| "loss": 0.7881, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 4.8895e-05, | |
| "loss": 0.7047, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 5.2395e-05, | |
| "loss": 0.6384, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "eval_loss": 0.6993927359580994, | |
| "eval_runtime": 82.4631, | |
| "eval_samples_per_second": 20.615, | |
| "eval_steps_per_second": 1.298, | |
| "eval_wer": 0.7700120611946931, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 5.589499999999999e-05, | |
| "loss": 0.5989, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 5.9394999999999996e-05, | |
| "loss": 0.5601, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 6.289499999999999e-05, | |
| "loss": 0.5215, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "eval_loss": 0.5627515912055969, | |
| "eval_runtime": 81.3944, | |
| "eval_samples_per_second": 20.886, | |
| "eval_steps_per_second": 1.315, | |
| "eval_wer": 0.6443217164984447, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 6.639499999999999e-05, | |
| "loss": 0.4949, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 6.9895e-05, | |
| "loss": 0.4869, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "learning_rate": 6.818933333333333e-05, | |
| "loss": 0.4573, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "eval_loss": 0.5316212177276611, | |
| "eval_runtime": 82.2602, | |
| "eval_samples_per_second": 20.666, | |
| "eval_steps_per_second": 1.301, | |
| "eval_wer": 0.6174062083412684, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 6.632266666666666e-05, | |
| "loss": 0.438, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 6.4456e-05, | |
| "loss": 0.4153, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "learning_rate": 6.258933333333333e-05, | |
| "loss": 0.3875, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "eval_loss": 0.4931696653366089, | |
| "eval_runtime": 80.8801, | |
| "eval_samples_per_second": 21.019, | |
| "eval_steps_per_second": 1.323, | |
| "eval_wer": 0.5778581857423982, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "learning_rate": 6.072266666666667e-05, | |
| "loss": 0.3807, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 5.8855999999999993e-05, | |
| "loss": 0.3715, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 5.6989333333333333e-05, | |
| "loss": 0.3562, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "eval_loss": 0.4971640110015869, | |
| "eval_runtime": 82.278, | |
| "eval_samples_per_second": 20.662, | |
| "eval_steps_per_second": 1.3, | |
| "eval_wer": 0.547514759093506, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 5.512266666666666e-05, | |
| "loss": 0.3457, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 25.22, | |
| "learning_rate": 5.3256e-05, | |
| "loss": 0.3356, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "learning_rate": 5.1389333333333326e-05, | |
| "loss": 0.3218, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "eval_loss": 0.4894775450229645, | |
| "eval_runtime": 81.4053, | |
| "eval_samples_per_second": 20.883, | |
| "eval_steps_per_second": 1.314, | |
| "eval_wer": 0.5219323303497746, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "learning_rate": 4.9522666666666666e-05, | |
| "loss": 0.3072, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "learning_rate": 4.765599999999999e-05, | |
| "loss": 0.3006, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 28.7, | |
| "learning_rate": 4.578933333333333e-05, | |
| "loss": 0.2954, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 28.7, | |
| "eval_loss": 0.5226009488105774, | |
| "eval_runtime": 82.2645, | |
| "eval_samples_per_second": 20.665, | |
| "eval_steps_per_second": 1.301, | |
| "eval_wer": 0.5192026915508157, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "learning_rate": 4.392266666666666e-05, | |
| "loss": 0.2965, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 30.43, | |
| "learning_rate": 4.2056e-05, | |
| "loss": 0.286, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 31.3, | |
| "learning_rate": 4.018933333333333e-05, | |
| "loss": 0.287, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 31.3, | |
| "eval_loss": 0.495715469121933, | |
| "eval_runtime": 79.9357, | |
| "eval_samples_per_second": 21.267, | |
| "eval_steps_per_second": 1.339, | |
| "eval_wer": 0.5145686535897924, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 32.17, | |
| "learning_rate": 3.8322666666666665e-05, | |
| "loss": 0.2768, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 33.04, | |
| "learning_rate": 3.6456e-05, | |
| "loss": 0.2731, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 33.91, | |
| "learning_rate": 3.458933333333333e-05, | |
| "loss": 0.2587, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 33.91, | |
| "eval_loss": 0.49437007308006287, | |
| "eval_runtime": 82.64, | |
| "eval_samples_per_second": 20.571, | |
| "eval_steps_per_second": 1.295, | |
| "eval_wer": 0.48930362470640515, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "learning_rate": 3.2722666666666664e-05, | |
| "loss": 0.2549, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 35.65, | |
| "learning_rate": 3.0856e-05, | |
| "loss": 0.2538, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 36.52, | |
| "learning_rate": 2.8989333333333334e-05, | |
| "loss": 0.2496, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 36.52, | |
| "eval_loss": 0.4975605010986328, | |
| "eval_runtime": 82.1209, | |
| "eval_samples_per_second": 20.701, | |
| "eval_steps_per_second": 1.303, | |
| "eval_wer": 0.4894940646226116, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 37.39, | |
| "learning_rate": 2.7122666666666667e-05, | |
| "loss": 0.2461, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 38.26, | |
| "learning_rate": 2.5256e-05, | |
| "loss": 0.2359, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 39.13, | |
| "learning_rate": 2.3389333333333333e-05, | |
| "loss": 0.2365, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 39.13, | |
| "eval_loss": 0.5185123085975647, | |
| "eval_runtime": 79.6732, | |
| "eval_samples_per_second": 21.337, | |
| "eval_steps_per_second": 1.343, | |
| "eval_wer": 0.4818764679743541, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 2.1522666666666666e-05, | |
| "loss": 0.2357, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 40.87, | |
| "learning_rate": 1.9656e-05, | |
| "loss": 0.2289, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 41.74, | |
| "learning_rate": 1.7789333333333333e-05, | |
| "loss": 0.2264, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 41.74, | |
| "eval_loss": 0.5152125954627991, | |
| "eval_runtime": 79.201, | |
| "eval_samples_per_second": 21.464, | |
| "eval_steps_per_second": 1.351, | |
| "eval_wer": 0.47755982987367485, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 42.61, | |
| "learning_rate": 1.5922666666666666e-05, | |
| "loss": 0.2211, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 43.48, | |
| "learning_rate": 1.4055999999999999e-05, | |
| "loss": 0.2186, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 44.35, | |
| "learning_rate": 1.2189333333333332e-05, | |
| "loss": 0.2224, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 44.35, | |
| "eval_loss": 0.5030579566955566, | |
| "eval_runtime": 80.9089, | |
| "eval_samples_per_second": 21.011, | |
| "eval_steps_per_second": 1.322, | |
| "eval_wer": 0.4745762711864407, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 45.22, | |
| "learning_rate": 1.0322666666666665e-05, | |
| "loss": 0.2162, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 46.09, | |
| "learning_rate": 8.456e-06, | |
| "loss": 0.2159, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 46.96, | |
| "learning_rate": 6.589333333333332e-06, | |
| "loss": 0.2096, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 46.96, | |
| "eval_loss": 0.5061585307121277, | |
| "eval_runtime": 81.3005, | |
| "eval_samples_per_second": 20.91, | |
| "eval_steps_per_second": 1.316, | |
| "eval_wer": 0.47076747286231196, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 47.83, | |
| "learning_rate": 4.7226666666666654e-06, | |
| "loss": 0.205, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 48.7, | |
| "learning_rate": 2.856e-06, | |
| "loss": 0.2038, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 49.57, | |
| "learning_rate": 9.893333333333332e-07, | |
| "loss": 0.2038, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 49.57, | |
| "eval_loss": 0.5217297077178955, | |
| "eval_runtime": 83.7172, | |
| "eval_samples_per_second": 20.306, | |
| "eval_steps_per_second": 1.278, | |
| "eval_wer": 0.46981527328127975, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "step": 5750, | |
| "total_flos": 2.9609940258263142e+19, | |
| "train_loss": 1.123646092788033, | |
| "train_runtime": 13730.8326, | |
| "train_samples_per_second": 13.386, | |
| "train_steps_per_second": 0.419 | |
| } | |
| ], | |
| "max_steps": 5750, | |
| "num_train_epochs": 50, | |
| "total_flos": 2.9609940258263142e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |