| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.998955067920583, | |
| "global_step": 14340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.000148875, | |
| "loss": 6.9674, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 3.427687168121338, | |
| "eval_runtime": 218.9995, | |
| "eval_samples_per_second": 15.53, | |
| "eval_steps_per_second": 0.973, | |
| "eval_wer": 1.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.000298875, | |
| "loss": 2.566, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 1.0569312572479248, | |
| "eval_runtime": 218.6402, | |
| "eval_samples_per_second": 15.555, | |
| "eval_steps_per_second": 0.974, | |
| "eval_wer": 0.7833242711949475, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.0002912038404726735, | |
| "loss": 1.0118, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.6927362680435181, | |
| "eval_runtime": 220.9263, | |
| "eval_samples_per_second": 15.394, | |
| "eval_steps_per_second": 0.964, | |
| "eval_wer": 0.5602283658873227, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.000282341211225997, | |
| "loss": 0.7536, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_loss": 0.5688415765762329, | |
| "eval_runtime": 210.7589, | |
| "eval_samples_per_second": 16.137, | |
| "eval_steps_per_second": 1.011, | |
| "eval_wer": 0.5082395750554185, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00027347858197932054, | |
| "loss": 0.6251, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "eval_loss": 0.5367330312728882, | |
| "eval_runtime": 211.3794, | |
| "eval_samples_per_second": 16.09, | |
| "eval_steps_per_second": 1.008, | |
| "eval_wer": 0.46095612530846125, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 0.00026461595273264397, | |
| "loss": 0.5453, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_loss": 0.47355735301971436, | |
| "eval_runtime": 211.7978, | |
| "eval_samples_per_second": 16.058, | |
| "eval_steps_per_second": 1.006, | |
| "eval_wer": 0.4430758291856623, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 0.0002557533234859675, | |
| "loss": 0.4779, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "eval_loss": 0.4465107023715973, | |
| "eval_runtime": 211.1636, | |
| "eval_samples_per_second": 16.106, | |
| "eval_steps_per_second": 1.009, | |
| "eval_wer": 0.4200719394370321, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 0.00024689069423929094, | |
| "loss": 0.4458, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "eval_loss": 0.4270594120025635, | |
| "eval_runtime": 210.6558, | |
| "eval_samples_per_second": 16.145, | |
| "eval_steps_per_second": 1.011, | |
| "eval_wer": 0.4025262453469405, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 0.00023802806499261444, | |
| "loss": 0.4036, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "eval_loss": 0.4414581060409546, | |
| "eval_runtime": 212.0272, | |
| "eval_samples_per_second": 16.04, | |
| "eval_steps_per_second": 1.005, | |
| "eval_wer": 0.3957505541846167, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 0.00022916543574593795, | |
| "loss": 0.377, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "eval_loss": 0.4214448034763336, | |
| "eval_runtime": 210.8625, | |
| "eval_samples_per_second": 16.129, | |
| "eval_steps_per_second": 1.01, | |
| "eval_wer": 0.39071061106696225, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.0002203028064992614, | |
| "loss": 0.347, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "eval_loss": 0.42906612157821655, | |
| "eval_runtime": 212.1547, | |
| "eval_samples_per_second": 16.031, | |
| "eval_steps_per_second": 1.004, | |
| "eval_wer": 0.38757371701033083, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 0.00021144017725258492, | |
| "loss": 0.3322, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "eval_loss": 0.4414941370487213, | |
| "eval_runtime": 210.3342, | |
| "eval_samples_per_second": 16.17, | |
| "eval_steps_per_second": 1.013, | |
| "eval_wer": 0.3664728763227237, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 0.00020257754800590837, | |
| "loss": 0.311, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "eval_loss": 0.40909305214881897, | |
| "eval_runtime": 211.7535, | |
| "eval_samples_per_second": 16.061, | |
| "eval_steps_per_second": 1.006, | |
| "eval_wer": 0.37214019825170436, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "learning_rate": 0.00019371491875923188, | |
| "loss": 0.2956, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "eval_loss": 0.46582677960395813, | |
| "eval_runtime": 210.74, | |
| "eval_samples_per_second": 16.138, | |
| "eval_steps_per_second": 1.011, | |
| "eval_wer": 0.3568530678823874, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "learning_rate": 0.0001848522895125554, | |
| "loss": 0.2811, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "eval_loss": 0.44131794571876526, | |
| "eval_runtime": 210.7883, | |
| "eval_samples_per_second": 16.135, | |
| "eval_steps_per_second": 1.01, | |
| "eval_wer": 0.3576268350830231, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "learning_rate": 0.00017598966026587885, | |
| "loss": 0.2732, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "eval_loss": 0.48606938123703003, | |
| "eval_runtime": 210.0412, | |
| "eval_samples_per_second": 16.192, | |
| "eval_steps_per_second": 1.014, | |
| "eval_wer": 0.3552846208540717, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "learning_rate": 0.00016712703101920236, | |
| "loss": 0.2672, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "eval_loss": 0.4633455276489258, | |
| "eval_runtime": 212.5303, | |
| "eval_samples_per_second": 16.002, | |
| "eval_steps_per_second": 1.002, | |
| "eval_wer": 0.3534861349282697, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 15.06, | |
| "learning_rate": 0.00015826440177252584, | |
| "loss": 0.2497, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 15.06, | |
| "eval_loss": 0.468420147895813, | |
| "eval_runtime": 210.9196, | |
| "eval_samples_per_second": 16.125, | |
| "eval_steps_per_second": 1.01, | |
| "eval_wer": 0.3575850098289347, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 15.9, | |
| "learning_rate": 0.00014940177252584932, | |
| "loss": 0.2334, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 15.9, | |
| "eval_loss": 0.4702986776828766, | |
| "eval_runtime": 211.8705, | |
| "eval_samples_per_second": 16.052, | |
| "eval_steps_per_second": 1.005, | |
| "eval_wer": 0.34524655987285124, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 0.0001405391432791728, | |
| "loss": 0.2324, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "eval_loss": 0.4267388880252838, | |
| "eval_runtime": 211.5039, | |
| "eval_samples_per_second": 16.08, | |
| "eval_steps_per_second": 1.007, | |
| "eval_wer": 0.35070475553138986, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "learning_rate": 0.0001316765140324963, | |
| "loss": 0.2166, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "eval_loss": 0.4422346353530884, | |
| "eval_runtime": 213.9215, | |
| "eval_samples_per_second": 15.898, | |
| "eval_steps_per_second": 0.996, | |
| "eval_wer": 0.3400393157388431, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 0.00012281388478581977, | |
| "loss": 0.2116, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "eval_loss": 0.4669197201728821, | |
| "eval_runtime": 213.3022, | |
| "eval_samples_per_second": 15.945, | |
| "eval_steps_per_second": 0.999, | |
| "eval_wer": 0.33359822660922667, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 0.00011395125553914327, | |
| "loss": 0.2055, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "eval_loss": 0.46659788489341736, | |
| "eval_runtime": 209.6264, | |
| "eval_samples_per_second": 16.224, | |
| "eval_steps_per_second": 1.016, | |
| "eval_wer": 0.3343510811828182, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 20.08, | |
| "learning_rate": 0.00010508862629246675, | |
| "loss": 0.2, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 20.08, | |
| "eval_loss": 0.4791451096534729, | |
| "eval_runtime": 212.2536, | |
| "eval_samples_per_second": 16.023, | |
| "eval_steps_per_second": 1.004, | |
| "eval_wer": 0.3353967125350287, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "learning_rate": 9.622599704579024e-05, | |
| "loss": 0.1851, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "eval_loss": 0.4670654237270355, | |
| "eval_runtime": 209.9735, | |
| "eval_samples_per_second": 16.197, | |
| "eval_steps_per_second": 1.014, | |
| "eval_wer": 0.33182065331046884, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "learning_rate": 8.736336779911373e-05, | |
| "loss": 0.1768, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "eval_loss": 0.48615434765815735, | |
| "eval_runtime": 211.2759, | |
| "eval_samples_per_second": 16.097, | |
| "eval_steps_per_second": 1.008, | |
| "eval_wer": 0.33194612907273413, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 22.59, | |
| "learning_rate": 7.850073855243721e-05, | |
| "loss": 0.1759, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 22.59, | |
| "eval_loss": 0.4796726107597351, | |
| "eval_runtime": 210.8467, | |
| "eval_samples_per_second": 16.13, | |
| "eval_steps_per_second": 1.01, | |
| "eval_wer": 0.32908109916767747, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "learning_rate": 6.96381093057607e-05, | |
| "loss": 0.1697, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "eval_loss": 0.5016443133354187, | |
| "eval_runtime": 210.1285, | |
| "eval_samples_per_second": 16.185, | |
| "eval_steps_per_second": 1.014, | |
| "eval_wer": 0.32728261324187546, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "learning_rate": 6.077548005908419e-05, | |
| "loss": 0.162, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "eval_loss": 0.4838166832923889, | |
| "eval_runtime": 210.5105, | |
| "eval_samples_per_second": 16.156, | |
| "eval_steps_per_second": 1.012, | |
| "eval_wer": 0.32224267012422103, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "learning_rate": 5.191285081240768e-05, | |
| "loss": 0.1552, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "eval_loss": 0.4953179657459259, | |
| "eval_runtime": 214.2232, | |
| "eval_samples_per_second": 15.876, | |
| "eval_steps_per_second": 0.994, | |
| "eval_wer": 0.3248985737588356, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "learning_rate": 4.3050221565731165e-05, | |
| "loss": 0.1505, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "eval_loss": 0.5147430300712585, | |
| "eval_runtime": 213.2541, | |
| "eval_samples_per_second": 15.948, | |
| "eval_steps_per_second": 0.999, | |
| "eval_wer": 0.320444184198419, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 26.78, | |
| "learning_rate": 3.418759231905465e-05, | |
| "loss": 0.1505, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 26.78, | |
| "eval_loss": 0.5216009616851807, | |
| "eval_runtime": 214.5736, | |
| "eval_samples_per_second": 15.85, | |
| "eval_steps_per_second": 0.993, | |
| "eval_wer": 0.32184533021038103, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 2.534711964549483e-05, | |
| "loss": 0.1441, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "eval_loss": 0.5204435586929321, | |
| "eval_runtime": 210.4984, | |
| "eval_samples_per_second": 16.157, | |
| "eval_steps_per_second": 1.012, | |
| "eval_wer": 0.32084152411225897, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "learning_rate": 1.6506646971935004e-05, | |
| "loss": 0.1432, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "eval_loss": 0.5269867777824402, | |
| "eval_runtime": 210.8697, | |
| "eval_samples_per_second": 16.128, | |
| "eval_steps_per_second": 1.01, | |
| "eval_wer": 0.31854113513739596, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 29.29, | |
| "learning_rate": 7.644017725258493e-06, | |
| "loss": 0.1379, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 29.29, | |
| "eval_loss": 0.5423755049705505, | |
| "eval_runtime": 212.2394, | |
| "eval_samples_per_second": 16.024, | |
| "eval_steps_per_second": 1.004, | |
| "eval_wer": 0.3176209795474508, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 14340, | |
| "total_flos": 2.519939618068477e+20, | |
| "train_loss": 0.5493880579162342, | |
| "train_runtime": 54395.2991, | |
| "train_samples_per_second": 16.879, | |
| "train_steps_per_second": 0.264 | |
| } | |
| ], | |
| "max_steps": 14340, | |
| "num_train_epochs": 30, | |
| "total_flos": 2.519939618068477e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |