{ "best_metric": 0.49605429292929293, "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-22k/allps_cap20h/checkpoint-2457", "epoch": 60.0, "eval_steps": 500, "global_step": 49140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 991.008056640625, "learning_rate": 3.264143264143264e-05, "loss": 4052.2546, "step": 819 }, { "epoch": 1.0, "eval_cer": 0.7619917996344415, "eval_loss": 868.5341186523438, "eval_runtime": 24.3338, "eval_samples_per_second": 36.575, "eval_steps_per_second": 4.603, "eval_wer": 1.0, "step": 819 }, { "epoch": 2.0, "grad_norm": 551.8701171875, "learning_rate": 6.597476597476598e-05, "loss": 1110.0161, "step": 1638 }, { "epoch": 2.0, "eval_cer": 0.16704539840932667, "eval_loss": 284.3064880371094, "eval_runtime": 24.1337, "eval_samples_per_second": 36.878, "eval_steps_per_second": 4.641, "eval_wer": 0.5340909090909091, "step": 1638 }, { "epoch": 3.0, "grad_norm": 952.0059204101562, "learning_rate": 9.93080993080993e-05, "loss": 792.8359, "step": 2457 }, { "epoch": 3.0, "eval_cer": 0.1561280442622141, "eval_loss": 281.24151611328125, "eval_runtime": 23.8418, "eval_samples_per_second": 37.329, "eval_steps_per_second": 4.698, "eval_wer": 0.49605429292929293, "step": 2457 }, { "epoch": 4.0, "grad_norm": 2196.81884765625, "learning_rate": 0.00013264143264143263, "loss": 752.4135, "step": 3276 }, { "epoch": 4.0, "eval_cer": 0.17707355629106358, "eval_loss": 294.49365234375, "eval_runtime": 23.5949, "eval_samples_per_second": 37.72, "eval_steps_per_second": 4.747, "eval_wer": 0.5666035353535354, "step": 3276 }, { "epoch": 5.0, "grad_norm": 1654.1439208984375, "learning_rate": 0.000165974765974766, "loss": 705.1918, "step": 4095 }, { "epoch": 5.0, "eval_cer": 0.17862964975547102, "eval_loss": 332.3864440917969, "eval_runtime": 23.9822, "eval_samples_per_second": 37.111, "eval_steps_per_second": 4.67, "eval_wer": 0.5135732323232324, "step": 4095 }, { "epoch": 6.0, "grad_norm": 786.24609375, "learning_rate": 0.00019930809930809932, "loss": 726.7231, "step": 4914 }, { "epoch": 6.0, "eval_cer": 0.2265474485007163, "eval_loss": 395.57098388671875, "eval_runtime": 24.1262, "eval_samples_per_second": 36.889, "eval_steps_per_second": 4.642, "eval_wer": 0.6213699494949495, "step": 4914 }, { "epoch": 7.0, "grad_norm": 412.29510498046875, "learning_rate": 0.00023264143264143265, "loss": 742.4047, "step": 5733 }, { "epoch": 7.0, "eval_cer": 0.21772958553574076, "eval_loss": 339.9690246582031, "eval_runtime": 23.5379, "eval_samples_per_second": 37.811, "eval_steps_per_second": 4.758, "eval_wer": 0.602114898989899, "step": 5733 }, { "epoch": 8.0, "grad_norm": 403.2104797363281, "learning_rate": 0.000265974765974766, "loss": 749.9364, "step": 6552 }, { "epoch": 8.0, "eval_cer": 0.2160252926937707, "eval_loss": 407.9001770019531, "eval_runtime": 24.6629, "eval_samples_per_second": 36.087, "eval_steps_per_second": 4.541, "eval_wer": 0.6063762626262627, "step": 6552 }, { "epoch": 9.0, "grad_norm": 777.958251953125, "learning_rate": 0.0002993080993080993, "loss": 766.2681, "step": 7371 }, { "epoch": 9.0, "eval_cer": 0.25771871758138615, "eval_loss": 397.77618408203125, "eval_runtime": 24.7874, "eval_samples_per_second": 35.905, "eval_steps_per_second": 4.518, "eval_wer": 0.6685606060606061, "step": 7371 }, { "epoch": 10.0, "grad_norm": 726.7489013671875, "learning_rate": 0.00033264143264143266, "loss": 798.0908, "step": 8190 }, { "epoch": 10.0, "eval_cer": 0.3210492515931433, "eval_loss": 429.9941711425781, "eval_runtime": 23.8326, "eval_samples_per_second": 37.344, "eval_steps_per_second": 4.699, "eval_wer": 0.7534722222222222, "step": 8190 }, { "epoch": 11.0, "grad_norm": 423.5915832519531, "learning_rate": 0.00036597476597476596, "loss": 824.5388, "step": 9009 }, { "epoch": 11.0, "eval_cer": 0.3149483772168157, "eval_loss": 471.0012512207031, "eval_runtime": 24.2588, "eval_samples_per_second": 36.688, "eval_steps_per_second": 4.617, "eval_wer": 0.7575757575757576, "step": 9009 }, { "epoch": 12.0, "grad_norm": 665.407958984375, "learning_rate": 0.0003993080993080993, "loss": 858.5173, "step": 9828 }, { "epoch": 12.0, "eval_cer": 0.27846663044015213, "eval_loss": 433.6759033203125, "eval_runtime": 23.8147, "eval_samples_per_second": 37.372, "eval_steps_per_second": 4.703, "eval_wer": 0.7313762626262627, "step": 9828 }, { "epoch": 13.0, "grad_norm": 1094.12109375, "learning_rate": 0.0004326007326007326, "loss": 886.2443, "step": 10647 }, { "epoch": 13.0, "eval_cer": 0.31334288395988735, "eval_loss": 451.7483825683594, "eval_runtime": 23.8973, "eval_samples_per_second": 37.243, "eval_steps_per_second": 4.687, "eval_wer": 0.7547348484848485, "step": 10647 }, { "epoch": 14.0, "grad_norm": 748.5809326171875, "learning_rate": 0.000465934065934066, "loss": 914.3147, "step": 11466 }, { "epoch": 14.0, "eval_cer": 0.3175418663241614, "eval_loss": 480.8207092285156, "eval_runtime": 23.8907, "eval_samples_per_second": 37.253, "eval_steps_per_second": 4.688, "eval_wer": 0.8120265151515151, "step": 11466 }, { "epoch": 15.0, "grad_norm": 304.9002380371094, "learning_rate": 0.0004992673992673993, "loss": 962.0846, "step": 12285 }, { "epoch": 15.0, "eval_cer": 0.32016005532776765, "eval_loss": 529.720703125, "eval_runtime": 23.8442, "eval_samples_per_second": 37.326, "eval_steps_per_second": 4.697, "eval_wer": 0.7960858585858586, "step": 12285 }, { "epoch": 16.0, "grad_norm": 587.8988037109375, "learning_rate": 0.0004891330891330891, "loss": 973.3332, "step": 13104 }, { "epoch": 16.0, "eval_cer": 0.3679543545917107, "eval_loss": 518.808837890625, "eval_runtime": 24.7703, "eval_samples_per_second": 35.93, "eval_steps_per_second": 4.522, "eval_wer": 0.8304924242424242, "step": 13104 }, { "epoch": 17.0, "grad_norm": 536.6315307617188, "learning_rate": 0.00047803554470221137, "loss": 954.7767, "step": 13923 }, { "epoch": 17.0, "eval_cer": 0.30805710616015414, "eval_loss": 507.0257873535156, "eval_runtime": 24.2842, "eval_samples_per_second": 36.649, "eval_steps_per_second": 4.612, "eval_wer": 0.7891414141414141, "step": 13923 }, { "epoch": 18.0, "grad_norm": 754.9872436523438, "learning_rate": 0.00046692443359110027, "loss": 935.9705, "step": 14742 }, { "epoch": 18.0, "eval_cer": 0.32087635231932027, "eval_loss": 477.90582275390625, "eval_runtime": 24.8782, "eval_samples_per_second": 35.774, "eval_steps_per_second": 4.502, "eval_wer": 0.7940340909090909, "step": 14742 }, { "epoch": 19.0, "grad_norm": 514.4397583007812, "learning_rate": 0.0004558133224799891, "loss": 901.4906, "step": 15561 }, { "epoch": 19.0, "eval_cer": 0.3500963295954157, "eval_loss": 513.8383178710938, "eval_runtime": 26.7781, "eval_samples_per_second": 33.236, "eval_steps_per_second": 4.183, "eval_wer": 0.8093434343434344, "step": 15561 }, { "epoch": 20.0, "grad_norm": 1911.3572998046875, "learning_rate": 0.0004447022113688781, "loss": 886.2501, "step": 16380 }, { "epoch": 20.0, "eval_cer": 0.3455515486834955, "eval_loss": 471.8292236328125, "eval_runtime": 24.3743, "eval_samples_per_second": 36.514, "eval_steps_per_second": 4.595, "eval_wer": 0.7859848484848485, "step": 16380 }, { "epoch": 21.0, "grad_norm": 1233.0408935546875, "learning_rate": 0.0004335911002577669, "loss": 853.8701, "step": 17199 }, { "epoch": 21.0, "eval_cer": 0.3084523045003211, "eval_loss": 505.87554931640625, "eval_runtime": 24.8737, "eval_samples_per_second": 35.781, "eval_steps_per_second": 4.503, "eval_wer": 0.8118686868686869, "step": 17199 }, { "epoch": 22.0, "grad_norm": 911.884033203125, "learning_rate": 0.00042249355582688915, "loss": 835.6011, "step": 18018 }, { "epoch": 22.0, "eval_cer": 0.31450377908412785, "eval_loss": 488.744873046875, "eval_runtime": 24.8755, "eval_samples_per_second": 35.778, "eval_steps_per_second": 4.502, "eval_wer": 0.7542613636363636, "step": 18018 }, { "epoch": 23.0, "grad_norm": 426.70599365234375, "learning_rate": 0.00041140957807624475, "loss": 834.0464, "step": 18837 }, { "epoch": 23.0, "eval_cer": 0.2940769648767475, "eval_loss": 465.870849609375, "eval_runtime": 25.0917, "eval_samples_per_second": 35.47, "eval_steps_per_second": 4.464, "eval_wer": 0.7578914141414141, "step": 18837 }, { "epoch": 24.0, "grad_norm": 613.6903686523438, "learning_rate": 0.00040029846696513366, "loss": 783.4934, "step": 19656 }, { "epoch": 24.0, "eval_cer": 0.3116138912216569, "eval_loss": 448.39312744140625, "eval_runtime": 24.9209, "eval_samples_per_second": 35.713, "eval_steps_per_second": 4.494, "eval_wer": 0.7634154040404041, "step": 19656 }, { "epoch": 25.0, "grad_norm": 262.5715637207031, "learning_rate": 0.00038918735585402256, "loss": 766.9646, "step": 20475 }, { "epoch": 25.0, "eval_cer": 0.294669762386998, "eval_loss": 465.7940979003906, "eval_runtime": 25.194, "eval_samples_per_second": 35.326, "eval_steps_per_second": 4.446, "eval_wer": 0.7376893939393939, "step": 20475 }, { "epoch": 26.0, "grad_norm": 865.4219970703125, "learning_rate": 0.0003780762447429114, "loss": 740.5047, "step": 21294 }, { "epoch": 26.0, "eval_cer": 0.28654349651731464, "eval_loss": 481.7481689453125, "eval_runtime": 24.4084, "eval_samples_per_second": 36.463, "eval_steps_per_second": 4.589, "eval_wer": 0.7323232323232324, "step": 21294 }, { "epoch": 27.0, "grad_norm": 644.6862182617188, "learning_rate": 0.0003669651336318003, "loss": 724.7147, "step": 22113 }, { "epoch": 27.0, "eval_cer": 0.2947191621795188, "eval_loss": 447.7026062011719, "eval_runtime": 24.2348, "eval_samples_per_second": 36.724, "eval_steps_per_second": 4.621, "eval_wer": 0.7174873737373737, "step": 22113 }, { "epoch": 28.0, "grad_norm": 462.7465515136719, "learning_rate": 0.0003558540225206892, "loss": 694.9597, "step": 22932 }, { "epoch": 28.0, "eval_cer": 0.27029096477794795, "eval_loss": 439.9412841796875, "eval_runtime": 25.1492, "eval_samples_per_second": 35.389, "eval_steps_per_second": 4.453, "eval_wer": 0.7050189393939394, "step": 22932 }, { "epoch": 29.0, "grad_norm": 696.3271484375, "learning_rate": 0.0003447429114095781, "loss": 678.131, "step": 23751 }, { "epoch": 29.0, "eval_cer": 0.2723657560638245, "eval_loss": 439.17047119140625, "eval_runtime": 24.9686, "eval_samples_per_second": 35.645, "eval_steps_per_second": 4.486, "eval_wer": 0.694760101010101, "step": 23751 }, { "epoch": 30.0, "grad_norm": 348.0857849121094, "learning_rate": 0.0003336453669787003, "loss": 669.4257, "step": 24570 }, { "epoch": 30.0, "eval_cer": 0.2678456750481648, "eval_loss": 463.5548400878906, "eval_runtime": 24.2758, "eval_samples_per_second": 36.662, "eval_steps_per_second": 4.614, "eval_wer": 0.6933396464646465, "step": 24570 }, { "epoch": 31.0, "grad_norm": 679.9598999023438, "learning_rate": 0.00032253425586758924, "loss": 640.7192, "step": 25389 }, { "epoch": 31.0, "eval_cer": 0.26994516623030185, "eval_loss": 449.90155029296875, "eval_runtime": 24.2781, "eval_samples_per_second": 36.659, "eval_steps_per_second": 4.613, "eval_wer": 0.6968118686868687, "step": 25389 }, { "epoch": 32.0, "grad_norm": 276.22564697265625, "learning_rate": 0.0003114231447564781, "loss": 616.687, "step": 26208 }, { "epoch": 32.0, "eval_cer": 0.27528034382255595, "eval_loss": 445.01812744140625, "eval_runtime": 24.4977, "eval_samples_per_second": 36.33, "eval_steps_per_second": 4.572, "eval_wer": 0.6966540404040404, "step": 26208 }, { "epoch": 33.0, "grad_norm": 249.17897033691406, "learning_rate": 0.000300312033645367, "loss": 606.6529, "step": 27027 }, { "epoch": 33.0, "eval_cer": 0.2614484019167119, "eval_loss": 424.00262451171875, "eval_runtime": 24.2299, "eval_samples_per_second": 36.732, "eval_steps_per_second": 4.622, "eval_wer": 0.6897095959595959, "step": 27027 }, { "epoch": 34.0, "grad_norm": 728.5042724609375, "learning_rate": 0.0002892009225342559, "loss": 588.8339, "step": 27846 }, { "epoch": 34.0, "eval_cer": 0.2601640073111693, "eval_loss": 425.9888916015625, "eval_runtime": 23.7857, "eval_samples_per_second": 37.417, "eval_steps_per_second": 4.709, "eval_wer": 0.7024936868686869, "step": 27846 }, { "epoch": 35.0, "grad_norm": 235.7451934814453, "learning_rate": 0.0002781033781033781, "loss": 575.3545, "step": 28665 }, { "epoch": 35.0, "eval_cer": 0.28026972286716395, "eval_loss": 458.68701171875, "eval_runtime": 25.4135, "eval_samples_per_second": 35.021, "eval_steps_per_second": 4.407, "eval_wer": 0.6829229797979798, "step": 28665 }, { "epoch": 36.0, "grad_norm": 363.6673278808594, "learning_rate": 0.00026700583367250035, "loss": 558.9386, "step": 29484 }, { "epoch": 36.0, "eval_cer": 0.27476164600108677, "eval_loss": 456.4928283691406, "eval_runtime": 24.0819, "eval_samples_per_second": 36.957, "eval_steps_per_second": 4.651, "eval_wer": 0.6876578282828283, "step": 29484 }, { "epoch": 37.0, "grad_norm": 305.44427490234375, "learning_rate": 0.00025589472256138925, "loss": 530.0468, "step": 30303 }, { "epoch": 37.0, "eval_cer": 0.2480610581435558, "eval_loss": 434.17950439453125, "eval_runtime": 23.6026, "eval_samples_per_second": 37.708, "eval_steps_per_second": 4.745, "eval_wer": 0.6513573232323232, "step": 30303 }, { "epoch": 38.0, "grad_norm": 457.92230224609375, "learning_rate": 0.0002447836114502781, "loss": 520.7852, "step": 31122 }, { "epoch": 38.0, "eval_cer": 0.25776811737390704, "eval_loss": 458.8715515136719, "eval_runtime": 24.0205, "eval_samples_per_second": 37.052, "eval_steps_per_second": 4.663, "eval_wer": 0.6710858585858586, "step": 31122 }, { "epoch": 39.0, "grad_norm": 327.72235107421875, "learning_rate": 0.000233672500339167, "loss": 503.4493, "step": 31941 }, { "epoch": 39.0, "eval_cer": 0.2560144247394161, "eval_loss": 450.276123046875, "eval_runtime": 23.7188, "eval_samples_per_second": 37.523, "eval_steps_per_second": 4.722, "eval_wer": 0.6609848484848485, "step": 31941 }, { "epoch": 40.0, "grad_norm": 708.286376953125, "learning_rate": 0.00022257495590828925, "loss": 484.1686, "step": 32760 }, { "epoch": 40.0, "eval_cer": 0.25863261374302227, "eval_loss": 438.8575134277344, "eval_runtime": 24.0264, "eval_samples_per_second": 37.043, "eval_steps_per_second": 4.662, "eval_wer": 0.6541982323232324, "step": 32760 }, { "epoch": 41.0, "grad_norm": 273.99664306640625, "learning_rate": 0.00021146384479717815, "loss": 467.5615, "step": 33579 }, { "epoch": 41.0, "eval_cer": 0.2578175171664279, "eval_loss": 469.5018615722656, "eval_runtime": 25.2591, "eval_samples_per_second": 35.235, "eval_steps_per_second": 4.434, "eval_wer": 0.6687184343434344, "step": 33579 }, { "epoch": 42.0, "grad_norm": 649.657958984375, "learning_rate": 0.00020035273368606703, "loss": 454.8417, "step": 34398 }, { "epoch": 42.0, "eval_cer": 0.25369263449093515, "eval_loss": 439.1047058105469, "eval_runtime": 24.7496, "eval_samples_per_second": 35.96, "eval_steps_per_second": 4.525, "eval_wer": 0.6535669191919192, "step": 34398 }, { "epoch": 43.0, "grad_norm": 460.6722717285156, "learning_rate": 0.00018924162257495593, "loss": 441.654, "step": 35217 }, { "epoch": 43.0, "eval_cer": 0.25929951094205406, "eval_loss": 486.397216796875, "eval_runtime": 23.9495, "eval_samples_per_second": 37.162, "eval_steps_per_second": 4.677, "eval_wer": 0.6527777777777778, "step": 35217 }, { "epoch": 44.0, "grad_norm": 714.3244018554688, "learning_rate": 0.00017813051146384478, "loss": 437.2555, "step": 36036 }, { "epoch": 44.0, "eval_cer": 0.2559897248431557, "eval_loss": 454.9423828125, "eval_runtime": 24.3558, "eval_samples_per_second": 36.542, "eval_steps_per_second": 4.598, "eval_wer": 0.6589330808080808, "step": 36036 }, { "epoch": 45.0, "grad_norm": 315.14154052734375, "learning_rate": 0.00016701940035273368, "loss": 413.9407, "step": 36855 }, { "epoch": 45.0, "eval_cer": 0.24739416094452404, "eval_loss": 478.7567443847656, "eval_runtime": 24.1415, "eval_samples_per_second": 36.866, "eval_steps_per_second": 4.639, "eval_wer": 0.6437815656565656, "step": 36855 }, { "epoch": 46.0, "grad_norm": 559.75048828125, "learning_rate": 0.00015592185592185593, "loss": 398.7603, "step": 37674 }, { "epoch": 46.0, "eval_cer": 0.2493701526453589, "eval_loss": 461.5147399902344, "eval_runtime": 24.4344, "eval_samples_per_second": 36.424, "eval_steps_per_second": 4.584, "eval_wer": 0.6463068181818182, "step": 37674 }, { "epoch": 47.0, "grad_norm": 181.94717407226562, "learning_rate": 0.00014481074481074484, "loss": 386.0169, "step": 38493 }, { "epoch": 47.0, "eval_cer": 0.24331867806155213, "eval_loss": 462.0274658203125, "eval_runtime": 24.4708, "eval_samples_per_second": 36.37, "eval_steps_per_second": 4.577, "eval_wer": 0.6365214646464646, "step": 38493 }, { "epoch": 48.0, "grad_norm": 429.9681701660156, "learning_rate": 0.0001336996336996337, "loss": 374.3441, "step": 39312 }, { "epoch": 48.0, "eval_cer": 0.24242948179617646, "eval_loss": 470.66925048828125, "eval_runtime": 24.2843, "eval_samples_per_second": 36.649, "eval_steps_per_second": 4.612, "eval_wer": 0.6289457070707071, "step": 39312 }, { "epoch": 49.0, "grad_norm": 8201.0947265625, "learning_rate": 0.00012258852258852259, "loss": 363.7627, "step": 40131 }, { "epoch": 49.0, "eval_cer": 0.25260583905547596, "eval_loss": 465.7492980957031, "eval_runtime": 24.917, "eval_samples_per_second": 35.719, "eval_steps_per_second": 4.495, "eval_wer": 0.6412563131313131, "step": 40131 }, { "epoch": 50.0, "grad_norm": 343.2039489746094, "learning_rate": 0.00011147741147741149, "loss": 349.4398, "step": 40950 }, { "epoch": 50.0, "eval_cer": 0.25156844341253765, "eval_loss": 478.899169921875, "eval_runtime": 25.7381, "eval_samples_per_second": 34.579, "eval_steps_per_second": 4.352, "eval_wer": 0.6346275252525253, "step": 40950 }, { "epoch": 51.0, "grad_norm": 1325.235107421875, "learning_rate": 0.00010039343372676705, "loss": 337.0751, "step": 41769 }, { "epoch": 51.0, "eval_cer": 0.23669910586375537, "eval_loss": 467.36248779296875, "eval_runtime": 23.3669, "eval_samples_per_second": 38.088, "eval_steps_per_second": 4.793, "eval_wer": 0.6174242424242424, "step": 41769 }, { "epoch": 52.0, "grad_norm": 234.5354766845703, "learning_rate": 8.928232261565595e-05, "loss": 330.7304, "step": 42588 }, { "epoch": 52.0, "eval_cer": 0.2420342834560095, "eval_loss": 497.57745361328125, "eval_runtime": 23.9343, "eval_samples_per_second": 37.185, "eval_steps_per_second": 4.679, "eval_wer": 0.6212121212121212, "step": 42588 }, { "epoch": 53.0, "grad_norm": 472.48504638671875, "learning_rate": 7.817121150454484e-05, "loss": 312.5302, "step": 43407 }, { "epoch": 53.0, "eval_cer": 0.24297287951390603, "eval_loss": 496.43292236328125, "eval_runtime": 26.1733, "eval_samples_per_second": 34.004, "eval_steps_per_second": 4.279, "eval_wer": 0.6242108585858586, "step": 43407 }, { "epoch": 54.0, "grad_norm": 168.4958953857422, "learning_rate": 6.706010039343373e-05, "loss": 309.1301, "step": 44226 }, { "epoch": 54.0, "eval_cer": 0.24124388677567554, "eval_loss": 521.0546875, "eval_runtime": 23.8596, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.694, "eval_wer": 0.6137941919191919, "step": 44226 }, { "epoch": 55.0, "grad_norm": 513.9391479492188, "learning_rate": 5.594898928232261e-05, "loss": 292.5729, "step": 45045 }, { "epoch": 55.0, "eval_cer": 0.23716840389270366, "eval_loss": 502.69549560546875, "eval_runtime": 23.3761, "eval_samples_per_second": 38.073, "eval_steps_per_second": 4.791, "eval_wer": 0.6122159090909091, "step": 45045 }, { "epoch": 56.0, "grad_norm": 518.4885864257812, "learning_rate": 4.485144485144485e-05, "loss": 286.7145, "step": 45864 }, { "epoch": 56.0, "eval_cer": 0.2365015066936719, "eval_loss": 504.39935302734375, "eval_runtime": 24.3587, "eval_samples_per_second": 36.537, "eval_steps_per_second": 4.598, "eval_wer": 0.6158459595959596, "step": 45864 }, { "epoch": 57.0, "grad_norm": 215.2507781982422, "learning_rate": 3.374033374033374e-05, "loss": 268.4251, "step": 46683 }, { "epoch": 57.0, "eval_cer": 0.23455021488909747, "eval_loss": 523.27880859375, "eval_runtime": 24.3318, "eval_samples_per_second": 36.578, "eval_steps_per_second": 4.603, "eval_wer": 0.6174242424242424, "step": 46683 }, { "epoch": 58.0, "grad_norm": 496.5767822265625, "learning_rate": 2.262922262922263e-05, "loss": 272.3521, "step": 47502 }, { "epoch": 58.0, "eval_cer": 0.23504421281430618, "eval_loss": 519.0136108398438, "eval_runtime": 25.3712, "eval_samples_per_second": 35.079, "eval_steps_per_second": 4.414, "eval_wer": 0.6142676767676768, "step": 47502 }, { "epoch": 59.0, "grad_norm": 224.57681274414062, "learning_rate": 1.1518111518111518e-05, "loss": 263.5849, "step": 48321 }, { "epoch": 59.0, "eval_cer": 0.2333399199723361, "eval_loss": 521.5694580078125, "eval_runtime": 25.3221, "eval_samples_per_second": 35.147, "eval_steps_per_second": 4.423, "eval_wer": 0.6092171717171717, "step": 48321 }, { "epoch": 60.0, "grad_norm": 511.5002136230469, "learning_rate": 4.205670872337539e-07, "loss": 259.7302, "step": 49140 }, { "epoch": 60.0, "eval_cer": 0.23321642049103394, "eval_loss": 526.7686767578125, "eval_runtime": 24.9818, "eval_samples_per_second": 35.626, "eval_steps_per_second": 4.483, "eval_wer": 0.6117424242424242, "step": 49140 }, { "epoch": 60.0, "step": 49140, "total_flos": 4.128908222553674e+19, "train_loss": 675.357091982092, "train_runtime": 28715.4368, "train_samples_per_second": 27.372, "train_steps_per_second": 1.711 } ], "logging_steps": 500, "max_steps": 49140, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.128908222553674e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }