{
  "best_metric": 0.1550179123878479,
  "best_model_checkpoint": "miner_id_24/checkpoint-400",
  "epoch": 0.12281276146822365,
  "eval_steps": 100,
  "global_step": 422,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002910255011095347,
      "grad_norm": 0.5728808045387268,
      "learning_rate": 2e-05,
      "loss": 0.6246,
      "step": 1
    },
    {
      "epoch": 0.0002910255011095347,
      "eval_loss": 0.6954387426376343,
      "eval_runtime": 1569.9362,
      "eval_samples_per_second": 7.373,
      "eval_steps_per_second": 1.843,
      "step": 1
    },
    {
      "epoch": 0.0005820510022190694,
      "grad_norm": 0.6031620502471924,
      "learning_rate": 4e-05,
      "loss": 0.6844,
      "step": 2
    },
    {
      "epoch": 0.0008730765033286041,
      "grad_norm": 0.592772364616394,
      "learning_rate": 6e-05,
      "loss": 0.6462,
      "step": 3
    },
    {
      "epoch": 0.0011641020044381389,
      "grad_norm": 0.6578194499015808,
      "learning_rate": 8e-05,
      "loss": 0.6733,
      "step": 4
    },
    {
      "epoch": 0.0014551275055476737,
      "grad_norm": 0.7925109267234802,
      "learning_rate": 0.0001,
      "loss": 0.6427,
      "step": 5
    },
    {
      "epoch": 0.0017461530066572083,
      "grad_norm": 0.877795934677124,
      "learning_rate": 0.00012,
      "loss": 0.5856,
      "step": 6
    },
    {
      "epoch": 0.002037178507766743,
      "grad_norm": 0.6358482837677002,
      "learning_rate": 0.00014,
      "loss": 0.4832,
      "step": 7
    },
    {
      "epoch": 0.0023282040088762777,
      "grad_norm": 0.562201201915741,
      "learning_rate": 0.00016,
      "loss": 0.412,
      "step": 8
    },
    {
      "epoch": 0.0026192295099858125,
      "grad_norm": 0.47625571489334106,
      "learning_rate": 0.00018,
      "loss": 0.3772,
      "step": 9
    },
    {
      "epoch": 0.0029102550110953473,
      "grad_norm": 0.2741421163082123,
      "learning_rate": 0.0002,
      "loss": 0.309,
      "step": 10
    },
    {
      "epoch": 0.003201280512204882,
      "grad_norm": 0.22830943763256073,
      "learning_rate": 0.00019999709281135722,
      "loss": 0.2466,
      "step": 11
    },
    {
      "epoch": 0.0034923060133144166,
      "grad_norm": 0.28191593289375305,
      "learning_rate": 0.00019998837141446378,
      "loss": 0.257,
      "step": 12
    },
    {
      "epoch": 0.0037833315144239514,
      "grad_norm": 0.18168823421001434,
      "learning_rate": 0.00019997383631641463,
      "loss": 0.2153,
      "step": 13
    },
    {
      "epoch": 0.004074357015533486,
      "grad_norm": 0.15139496326446533,
      "learning_rate": 0.00019995348836233516,
      "loss": 0.1961,
      "step": 14
    },
    {
      "epoch": 0.004365382516643021,
      "grad_norm": 0.14195305109024048,
      "learning_rate": 0.00019992732873533222,
      "loss": 0.2014,
      "step": 15
    },
    {
      "epoch": 0.004656408017752555,
      "grad_norm": 0.125483900308609,
      "learning_rate": 0.00019989535895642525,
      "loss": 0.1955,
      "step": 16
    },
    {
      "epoch": 0.004947433518862091,
      "grad_norm": 0.13404949009418488,
      "learning_rate": 0.00019985758088445773,
      "loss": 0.2072,
      "step": 17
    },
    {
      "epoch": 0.005238459019971625,
      "grad_norm": 0.1380385011434555,
      "learning_rate": 0.00019981399671598939,
      "loss": 0.1803,
      "step": 18
    },
    {
      "epoch": 0.005529484521081159,
      "grad_norm": 0.11921346187591553,
      "learning_rate": 0.00019976460898516818,
      "loss": 0.1949,
      "step": 19
    },
    {
      "epoch": 0.005820510022190695,
      "grad_norm": 0.12015374004840851,
      "learning_rate": 0.00019970942056358307,
      "loss": 0.1801,
      "step": 20
    },
    {
      "epoch": 0.006111535523300229,
      "grad_norm": 0.11771855503320694,
      "learning_rate": 0.00019964843466009714,
      "loss": 0.187,
      "step": 21
    },
    {
      "epoch": 0.006402561024409764,
      "grad_norm": 0.09964954853057861,
      "learning_rate": 0.00019958165482066094,
      "loss": 0.1856,
      "step": 22
    },
    {
      "epoch": 0.006693586525519299,
      "grad_norm": 0.09882669895887375,
      "learning_rate": 0.00019950908492810622,
      "loss": 0.1749,
      "step": 23
    },
    {
      "epoch": 0.006984612026628833,
      "grad_norm": 0.10145384073257446,
      "learning_rate": 0.0001994307292019204,
      "loss": 0.1966,
      "step": 24
    },
    {
      "epoch": 0.007275637527738368,
      "grad_norm": 0.097194142639637,
      "learning_rate": 0.000199346592198001,
      "loss": 0.1866,
      "step": 25
    },
    {
      "epoch": 0.007566663028847903,
      "grad_norm": 0.0885874480009079,
      "learning_rate": 0.0001992566788083908,
      "loss": 0.167,
      "step": 26
    },
    {
      "epoch": 0.007857688529957437,
      "grad_norm": 0.09368938952684402,
      "learning_rate": 0.0001991609942609936,
      "loss": 0.1761,
      "step": 27
    },
    {
      "epoch": 0.008148714031066972,
      "grad_norm": 0.09040655195713043,
      "learning_rate": 0.00019905954411926992,
      "loss": 0.1842,
      "step": 28
    },
    {
      "epoch": 0.008439739532176508,
      "grad_norm": 0.08293917775154114,
      "learning_rate": 0.00019895233428191377,
      "loss": 0.1615,
      "step": 29
    },
    {
      "epoch": 0.008730765033286042,
      "grad_norm": 0.09155099838972092,
      "learning_rate": 0.00019883937098250963,
      "loss": 0.1766,
      "step": 30
    },
    {
      "epoch": 0.009021790534395576,
      "grad_norm": 0.08348311483860016,
      "learning_rate": 0.00019872066078916988,
      "loss": 0.1489,
      "step": 31
    },
    {
      "epoch": 0.00931281603550511,
      "grad_norm": 0.08968468010425568,
      "learning_rate": 0.000198596210604153,
      "loss": 0.1978,
      "step": 32
    },
    {
      "epoch": 0.009603841536614645,
      "grad_norm": 0.08129172772169113,
      "learning_rate": 0.00019846602766346235,
      "loss": 0.1637,
      "step": 33
    },
    {
      "epoch": 0.009894867037724181,
      "grad_norm": 0.07663632184267044,
      "learning_rate": 0.00019833011953642525,
      "loss": 0.1746,
      "step": 34
    },
    {
      "epoch": 0.010185892538833716,
      "grad_norm": 0.07788729667663574,
      "learning_rate": 0.00019818849412525294,
      "loss": 0.1841,
      "step": 35
    },
    {
      "epoch": 0.01047691803994325,
      "grad_norm": 0.0812305361032486,
      "learning_rate": 0.00019804115966458115,
      "loss": 0.1938,
      "step": 36
    },
    {
      "epoch": 0.010767943541052784,
      "grad_norm": 0.08344654738903046,
      "learning_rate": 0.00019788812472099136,
      "loss": 0.175,
      "step": 37
    },
    {
      "epoch": 0.011058969042162319,
      "grad_norm": 0.08840890228748322,
      "learning_rate": 0.0001977293981925125,
      "loss": 0.2129,
      "step": 38
    },
    {
      "epoch": 0.011349994543271855,
      "grad_norm": 0.0735771432518959,
      "learning_rate": 0.00019756498930810382,
      "loss": 0.1713,
      "step": 39
    },
    {
      "epoch": 0.01164102004438139,
      "grad_norm": 0.07437434047460556,
      "learning_rate": 0.00019739490762711812,
      "loss": 0.1559,
      "step": 40
    },
    {
      "epoch": 0.011932045545490924,
      "grad_norm": 0.07056687772274017,
      "learning_rate": 0.00019721916303874605,
      "loss": 0.175,
      "step": 41
    },
    {
      "epoch": 0.012223071046600458,
      "grad_norm": 0.07450737059116364,
      "learning_rate": 0.00019703776576144105,
      "loss": 0.1911,
      "step": 42
    },
    {
      "epoch": 0.012514096547709993,
      "grad_norm": 0.07784494012594223,
      "learning_rate": 0.00019685072634232522,
      "loss": 0.1826,
      "step": 43
    },
    {
      "epoch": 0.012805122048819529,
      "grad_norm": 0.06952424347400665,
      "learning_rate": 0.00019665805565657603,
      "loss": 0.1661,
      "step": 44
    },
    {
      "epoch": 0.013096147549929063,
      "grad_norm": 0.08172561228275299,
      "learning_rate": 0.00019645976490679403,
      "loss": 0.1906,
      "step": 45
    },
    {
      "epoch": 0.013387173051038597,
      "grad_norm": 0.0775056853890419,
      "learning_rate": 0.0001962558656223516,
      "loss": 0.1821,
      "step": 46
    },
    {
      "epoch": 0.013678198552148132,
      "grad_norm": 0.07613001763820648,
      "learning_rate": 0.0001960463696587224,
      "loss": 0.1815,
      "step": 47
    },
    {
      "epoch": 0.013969224053257666,
      "grad_norm": 0.07065246999263763,
      "learning_rate": 0.00019583128919679215,
      "loss": 0.1691,
      "step": 48
    },
    {
      "epoch": 0.0142602495543672,
      "grad_norm": 0.06258969753980637,
      "learning_rate": 0.00019561063674215036,
      "loss": 0.156,
      "step": 49
    },
    {
      "epoch": 0.014551275055476737,
      "grad_norm": 0.06769802421331406,
      "learning_rate": 0.00019538442512436328,
      "loss": 0.1547,
      "step": 50
    },
    {
      "epoch": 0.014842300556586271,
      "grad_norm": 0.07802195101976395,
      "learning_rate": 0.00019515266749622778,
      "loss": 0.1827,
      "step": 51
    },
    {
      "epoch": 0.015133326057695805,
      "grad_norm": 0.070973701775074,
      "learning_rate": 0.00019491537733300676,
      "loss": 0.1553,
      "step": 52
    },
    {
      "epoch": 0.01542435155880534,
      "grad_norm": 0.07104197889566422,
      "learning_rate": 0.0001946725684316456,
      "loss": 0.1791,
      "step": 53
    },
    {
      "epoch": 0.015715377059914874,
      "grad_norm": 0.0786806121468544,
      "learning_rate": 0.00019442425490996988,
      "loss": 0.1875,
      "step": 54
    },
    {
      "epoch": 0.01600640256102441,
      "grad_norm": 0.06334872543811798,
      "learning_rate": 0.0001941704512058646,
      "loss": 0.1738,
      "step": 55
    },
    {
      "epoch": 0.016297428062133943,
      "grad_norm": 0.07314591854810715,
      "learning_rate": 0.0001939111720764347,
      "loss": 0.1454,
      "step": 56
    },
    {
      "epoch": 0.01658845356324348,
      "grad_norm": 0.07218274474143982,
      "learning_rate": 0.00019364643259714694,
      "loss": 0.1813,
      "step": 57
    },
    {
      "epoch": 0.016879479064353015,
      "grad_norm": 0.07224708050489426,
      "learning_rate": 0.00019337624816095358,
      "loss": 0.1849,
      "step": 58
    },
    {
      "epoch": 0.017170504565462548,
      "grad_norm": 0.069266676902771,
      "learning_rate": 0.00019310063447739698,
      "loss": 0.1659,
      "step": 59
    },
    {
      "epoch": 0.017461530066572084,
      "grad_norm": 0.06808722019195557,
      "learning_rate": 0.0001928196075716966,
      "loss": 0.1735,
      "step": 60
    },
    {
      "epoch": 0.017752555567681617,
      "grad_norm": 0.06713879108428955,
      "learning_rate": 0.00019253318378381704,
      "loss": 0.1592,
      "step": 61
    },
    {
      "epoch": 0.018043581068791153,
      "grad_norm": 0.07436138391494751,
      "learning_rate": 0.00019224137976751795,
      "loss": 0.1817,
      "step": 62
    },
    {
      "epoch": 0.01833460656990069,
      "grad_norm": 0.08002530038356781,
      "learning_rate": 0.00019194421248938575,
      "loss": 0.1746,
      "step": 63
    },
    {
      "epoch": 0.01862563207101022,
      "grad_norm": 0.076021708548069,
      "learning_rate": 0.00019164169922784716,
      "loss": 0.169,
      "step": 64
    },
    {
      "epoch": 0.018916657572119758,
      "grad_norm": 0.06513865292072296,
      "learning_rate": 0.00019133385757216459,
      "loss": 0.1562,
      "step": 65
    },
    {
      "epoch": 0.01920768307322929,
      "grad_norm": 0.07233449071645737,
      "learning_rate": 0.00019102070542141328,
      "loss": 0.1782,
      "step": 66
    },
    {
      "epoch": 0.019498708574338827,
      "grad_norm": 0.068657785654068,
      "learning_rate": 0.00019070226098344078,
      "loss": 0.162,
      "step": 67
    },
    {
      "epoch": 0.019789734075448363,
      "grad_norm": 0.07379890233278275,
      "learning_rate": 0.0001903785427738082,
      "loss": 0.1697,
      "step": 68
    },
    {
      "epoch": 0.020080759576557895,
      "grad_norm": 0.06616660207509995,
      "learning_rate": 0.00019004956961471355,
      "loss": 0.1379,
      "step": 69
    },
    {
      "epoch": 0.02037178507766743,
      "grad_norm": 0.0688454881310463,
      "learning_rate": 0.00018971536063389744,
      "loss": 0.1668,
      "step": 70
    },
    {
      "epoch": 0.020662810578776964,
      "grad_norm": 0.06237876042723656,
      "learning_rate": 0.00018937593526353096,
      "loss": 0.1414,
      "step": 71
    },
    {
      "epoch": 0.0209538360798865,
      "grad_norm": 0.06888283044099808,
      "learning_rate": 0.00018903131323908578,
      "loss": 0.1648,
      "step": 72
    },
    {
      "epoch": 0.021244861580996036,
      "grad_norm": 0.06820754706859589,
      "learning_rate": 0.00018868151459818658,
      "loss": 0.1705,
      "step": 73
    },
    {
      "epoch": 0.02153588708210557,
      "grad_norm": 0.07488477230072021,
      "learning_rate": 0.00018832655967944607,
      "loss": 0.1818,
      "step": 74
    },
    {
      "epoch": 0.021826912583215105,
      "grad_norm": 0.06322386115789413,
      "learning_rate": 0.00018796646912128245,
      "loss": 0.1606,
      "step": 75
    },
    {
      "epoch": 0.022117938084324638,
      "grad_norm": 0.06205340102314949,
      "learning_rate": 0.00018760126386071935,
      "loss": 0.1538,
      "step": 76
    },
    {
      "epoch": 0.022408963585434174,
      "grad_norm": 0.07324095070362091,
      "learning_rate": 0.00018723096513216842,
      "loss": 0.1647,
      "step": 77
    },
    {
      "epoch": 0.02269998908654371,
      "grad_norm": 0.08337172865867615,
      "learning_rate": 0.0001868555944661949,
      "loss": 0.1974,
      "step": 78
    },
    {
      "epoch": 0.022991014587653243,
      "grad_norm": 0.08376014232635498,
      "learning_rate": 0.00018647517368826545,
      "loss": 0.1718,
      "step": 79
    },
    {
      "epoch": 0.02328204008876278,
      "grad_norm": 0.07914192974567413,
      "learning_rate": 0.00018608972491747944,
      "loss": 0.1914,
      "step": 80
    },
    {
      "epoch": 0.02357306558987231,
      "grad_norm": 0.0706939846277237,
      "learning_rate": 0.00018569927056528263,
      "loss": 0.1597,
      "step": 81
    },
    {
      "epoch": 0.023864091090981848,
      "grad_norm": 0.0692586675286293,
      "learning_rate": 0.00018530383333416418,
      "loss": 0.1773,
      "step": 82
    },
    {
      "epoch": 0.024155116592091384,
      "grad_norm": 0.07192862033843994,
      "learning_rate": 0.00018490343621633659,
      "loss": 0.1756,
      "step": 83
    },
    {
      "epoch": 0.024446142093200916,
      "grad_norm": 0.07629089802503586,
      "learning_rate": 0.00018449810249239902,
      "loss": 0.1749,
      "step": 84
    },
    {
      "epoch": 0.024737167594310452,
      "grad_norm": 0.0677715539932251,
      "learning_rate": 0.00018408785572998336,
      "loss": 0.1579,
      "step": 85
    },
    {
      "epoch": 0.025028193095419985,
      "grad_norm": 0.07510597258806229,
      "learning_rate": 0.0001836727197823842,
      "loss": 0.1664,
      "step": 86
    },
    {
      "epoch": 0.02531921859652952,
      "grad_norm": 0.07853872328996658,
      "learning_rate": 0.00018325271878717186,
      "loss": 0.1611,
      "step": 87
    },
    {
      "epoch": 0.025610244097639057,
      "grad_norm": 0.06929906457662582,
      "learning_rate": 0.00018282787716478868,
      "loss": 0.1678,
      "step": 88
    },
    {
      "epoch": 0.02590126959874859,
      "grad_norm": 0.08053125441074371,
      "learning_rate": 0.00018239821961712953,
      "loss": 0.1891,
      "step": 89
    },
    {
      "epoch": 0.026192295099858126,
      "grad_norm": 0.07177354395389557,
      "learning_rate": 0.00018196377112610526,
      "loss": 0.1712,
      "step": 90
    },
    {
      "epoch": 0.02648332060096766,
      "grad_norm": 0.06295143812894821,
      "learning_rate": 0.00018152455695219025,
      "loss": 0.1483,
      "step": 91
    },
    {
      "epoch": 0.026774346102077195,
      "grad_norm": 0.0677083432674408,
      "learning_rate": 0.00018108060263295362,
      "loss": 0.162,
      "step": 92
    },
    {
      "epoch": 0.027065371603186728,
      "grad_norm": 0.075025275349617,
      "learning_rate": 0.0001806319339815745,
      "loss": 0.1755,
      "step": 93
    },
    {
      "epoch": 0.027356397104296264,
      "grad_norm": 0.07081899046897888,
      "learning_rate": 0.00018017857708534107,
      "loss": 0.1608,
      "step": 94
    },
    {
      "epoch": 0.0276474226054058,
      "grad_norm": 0.0698823481798172,
      "learning_rate": 0.0001797205583041337,
      "loss": 0.168,
      "step": 95
    },
    {
      "epoch": 0.027938448106515332,
      "grad_norm": 0.07225258648395538,
      "learning_rate": 0.00017925790426889235,
      "loss": 0.1584,
      "step": 96
    },
    {
      "epoch": 0.02822947360762487,
      "grad_norm": 0.06617313623428345,
      "learning_rate": 0.00017879064188006818,
      "loss": 0.1562,
      "step": 97
    },
    {
      "epoch": 0.0285204991087344,
      "grad_norm": 0.07458757609128952,
      "learning_rate": 0.00017831879830605937,
      "loss": 0.1643,
      "step": 98
    },
    {
      "epoch": 0.028811524609843937,
      "grad_norm": 0.07620100677013397,
      "learning_rate": 0.00017784240098163152,
      "loss": 0.1705,
      "step": 99
    },
    {
      "epoch": 0.029102550110953473,
      "grad_norm": 0.06909362226724625,
      "learning_rate": 0.00017736147760632248,
      "loss": 0.1441,
      "step": 100
    },
    {
      "epoch": 0.029102550110953473,
      "eval_loss": 0.16861361265182495,
      "eval_runtime": 1578.1522,
      "eval_samples_per_second": 7.335,
      "eval_steps_per_second": 1.834,
      "step": 100
    },
    {
      "epoch": 0.029393575612063006,
      "grad_norm": 0.06833357363939285,
      "learning_rate": 0.00017687605614283167,
      "loss": 0.1575,
      "step": 101
    },
    {
      "epoch": 0.029684601113172542,
      "grad_norm": 0.0781283900141716,
      "learning_rate": 0.0001763861648153945,
      "loss": 0.1809,
      "step": 102
    },
    {
      "epoch": 0.029975626614282075,
      "grad_norm": 0.06533481925725937,
      "learning_rate": 0.00017589183210814095,
      "loss": 0.1639,
      "step": 103
    },
    {
      "epoch": 0.03026665211539161,
      "grad_norm": 0.06966619938611984,
      "learning_rate": 0.00017539308676343973,
      "loss": 0.1554,
      "step": 104
    },
    {
      "epoch": 0.030557677616501147,
      "grad_norm": 0.065178282558918,
      "learning_rate": 0.00017488995778022686,
      "loss": 0.1477,
      "step": 105
    },
    {
      "epoch": 0.03084870311761068,
      "grad_norm": 0.06928521394729614,
      "learning_rate": 0.0001743824744123196,
      "loss": 0.1558,
      "step": 106
    },
    {
      "epoch": 0.031139728618720216,
      "grad_norm": 0.06974615901708603,
      "learning_rate": 0.00017387066616671572,
      "loss": 0.1536,
      "step": 107
    },
    {
      "epoch": 0.03143075411982975,
      "grad_norm": 0.07279954850673676,
      "learning_rate": 0.00017335456280187752,
      "loss": 0.1604,
      "step": 108
    },
    {
      "epoch": 0.031721779620939285,
      "grad_norm": 0.07937084138393402,
      "learning_rate": 0.00017283419432600184,
      "loss": 0.1835,
      "step": 109
    },
    {
      "epoch": 0.03201280512204882,
      "grad_norm": 0.07247325778007507,
      "learning_rate": 0.00017230959099527512,
      "loss": 0.1814,
      "step": 110
    },
    {
      "epoch": 0.03230383062315836,
      "grad_norm": 0.0691957026720047,
      "learning_rate": 0.00017178078331211432,
      "loss": 0.1497,
      "step": 111
    },
    {
      "epoch": 0.032594856124267886,
      "grad_norm": 0.06361576914787292,
      "learning_rate": 0.0001712478020233932,
      "loss": 0.1497,
      "step": 112
    },
    {
      "epoch": 0.03288588162537742,
      "grad_norm": 0.07963518798351288,
      "learning_rate": 0.00017071067811865476,
      "loss": 0.1695,
      "step": 113
    },
    {
      "epoch": 0.03317690712648696,
      "grad_norm": 0.07183556258678436,
      "learning_rate": 0.00017016944282830933,
      "loss": 0.1655,
      "step": 114
    },
    {
      "epoch": 0.033467932627596494,
      "grad_norm": 0.06393659859895706,
      "learning_rate": 0.00016962412762181869,
      "loss": 0.1578,
      "step": 115
    },
    {
      "epoch": 0.03375895812870603,
      "grad_norm": 0.0766880065202713,
      "learning_rate": 0.00016907476420586633,
      "loss": 0.1702,
      "step": 116
    },
    {
      "epoch": 0.03404998362981556,
      "grad_norm": 0.08281052857637405,
      "learning_rate": 0.00016852138452251388,
      "loss": 0.1889,
      "step": 117
    },
    {
      "epoch": 0.034341009130925096,
      "grad_norm": 0.07395404577255249,
      "learning_rate": 0.00016796402074734402,
      "loss": 0.166,
      "step": 118
    },
    {
      "epoch": 0.03463203463203463,
      "grad_norm": 0.06773527711629868,
      "learning_rate": 0.0001674027052875895,
      "loss": 0.1559,
      "step": 119
    },
    {
      "epoch": 0.03492306013314417,
      "grad_norm": 0.06645803898572922,
      "learning_rate": 0.00016683747078024888,
      "loss": 0.1568,
      "step": 120
    },
    {
      "epoch": 0.035214085634253704,
      "grad_norm": 0.07534414529800415,
      "learning_rate": 0.00016626835009018892,
      "loss": 0.1494,
      "step": 121
    },
    {
      "epoch": 0.03550511113536323,
      "grad_norm": 0.07698463648557663,
      "learning_rate": 0.00016569537630823383,
      "loss": 0.1731,
      "step": 122
    },
    {
      "epoch": 0.03579613663647277,
      "grad_norm": 0.07219712436199188,
      "learning_rate": 0.000165118582749241,
      "loss": 0.1609,
      "step": 123
    },
    {
      "epoch": 0.036087162137582306,
      "grad_norm": 0.06992946565151215,
      "learning_rate": 0.0001645380029501641,
      "loss": 0.1525,
      "step": 124
    },
    {
      "epoch": 0.03637818763869184,
      "grad_norm": 0.08205880224704742,
      "learning_rate": 0.00016395367066810313,
      "loss": 0.1477,
      "step": 125
    },
    {
      "epoch": 0.03666921313980138,
      "grad_norm": 0.07560513913631439,
      "learning_rate": 0.00016336561987834153,
      "loss": 0.1787,
      "step": 126
    },
    {
      "epoch": 0.03696023864091091,
      "grad_norm": 0.07634903490543365,
      "learning_rate": 0.00016277388477237086,
      "loss": 0.1622,
      "step": 127
    },
    {
      "epoch": 0.03725126414202044,
      "grad_norm": 0.07008732855319977,
      "learning_rate": 0.00016217849975590272,
      "loss": 0.1583,
      "step": 128
    },
    {
      "epoch": 0.03754228964312998,
      "grad_norm": 0.07009593397378922,
      "learning_rate": 0.00016157949944686827,
      "loss": 0.151,
      "step": 129
    },
    {
      "epoch": 0.037833315144239515,
      "grad_norm": 0.07215984910726547,
      "learning_rate": 0.00016097691867340545,
      "loss": 0.1551,
      "step": 130
    },
    {
      "epoch": 0.03812434064534905,
      "grad_norm": 0.07626433670520782,
      "learning_rate": 0.0001603707924718338,
      "loss": 0.1568,
      "step": 131
    },
    {
      "epoch": 0.03841536614645858,
      "grad_norm": 0.07062167674303055,
      "learning_rate": 0.00015976115608461758,
      "loss": 0.1689,
      "step": 132
    },
    {
      "epoch": 0.03870639164756812,
      "grad_norm": 0.06966700404882431,
      "learning_rate": 0.00015914804495831635,
      "loss": 0.1754,
      "step": 133
    },
    {
      "epoch": 0.03899741714867765,
      "grad_norm": 0.06686096638441086,
      "learning_rate": 0.00015853149474152423,
      "loss": 0.1698,
      "step": 134
    },
    {
      "epoch": 0.03928844264978719,
      "grad_norm": 0.07211221009492874,
      "learning_rate": 0.00015791154128279696,
      "loss": 0.1637,
      "step": 135
    },
    {
      "epoch": 0.039579468150896725,
      "grad_norm": 0.07641851156949997,
      "learning_rate": 0.00015728822062856758,
      "loss": 0.1805,
      "step": 136
    },
    {
      "epoch": 0.039870493652006254,
      "grad_norm": 0.07227915525436401,
      "learning_rate": 0.0001566615690210507,
      "loss": 0.1689,
      "step": 137
    },
    {
      "epoch": 0.04016151915311579,
      "grad_norm": 0.0694207176566124,
      "learning_rate": 0.00015603162289613503,
      "loss": 0.1491,
      "step": 138
    },
    {
      "epoch": 0.04045254465422533,
      "grad_norm": 0.06391049921512604,
      "learning_rate": 0.00015539841888126488,
      "loss": 0.1491,
      "step": 139
    },
    {
      "epoch": 0.04074357015533486,
      "grad_norm": 0.07112076133489609,
      "learning_rate": 0.0001547619937933108,
      "loss": 0.163,
      "step": 140
    },
    {
      "epoch": 0.0410345956564444,
      "grad_norm": 0.07369551062583923,
      "learning_rate": 0.00015412238463642845,
      "loss": 0.1587,
      "step": 141
    },
    {
      "epoch": 0.04132562115755393,
      "grad_norm": 0.07277291268110275,
      "learning_rate": 0.00015347962859990744,
      "loss": 0.1711,
      "step": 142
    },
    {
      "epoch": 0.041616646658663464,
      "grad_norm": 0.0772562026977539,
      "learning_rate": 0.00015283376305600866,
      "loss": 0.1757,
      "step": 143
    },
    {
      "epoch": 0.041907672159773,
      "grad_norm": 0.0680210217833519,
      "learning_rate": 0.00015218482555779165,
      "loss": 0.1739,
      "step": 144
    },
    {
      "epoch": 0.042198697660882536,
      "grad_norm": 0.06750776618719101,
      "learning_rate": 0.0001515328538369309,
      "loss": 0.1507,
      "step": 145
    },
    {
      "epoch": 0.04248972316199207,
      "grad_norm": 0.0778404027223587,
      "learning_rate": 0.00015087788580152206,
      "loss": 0.1668,
      "step": 146
    },
    {
      "epoch": 0.0427807486631016,
      "grad_norm": 0.07347714900970459,
      "learning_rate": 0.0001502199595338778,
      "loss": 0.1709,
      "step": 147
    },
    {
      "epoch": 0.04307177416421114,
      "grad_norm": 0.07480094581842422,
      "learning_rate": 0.00014955911328831355,
      "loss": 0.1639,
      "step": 148
    },
    {
      "epoch": 0.043362799665320674,
      "grad_norm": 0.07880245894193649,
      "learning_rate": 0.00014889538548892338,
      "loss": 0.1838,
      "step": 149
    },
    {
      "epoch": 0.04365382516643021,
      "grad_norm": 0.07331986725330353,
      "learning_rate": 0.00014822881472734562,
      "loss": 0.1686,
      "step": 150
    },
    {
      "epoch": 0.043944850667539746,
      "grad_norm": 0.062073007225990295,
      "learning_rate": 0.00014755943976051927,
      "loss": 0.1414,
      "step": 151
    },
    {
      "epoch": 0.044235876168649275,
      "grad_norm": 0.0803973376750946,
      "learning_rate": 0.00014688729950843035,
      "loss": 0.1627,
      "step": 152
    },
    {
      "epoch": 0.04452690166975881,
      "grad_norm": 0.07154098898172379,
      "learning_rate": 0.00014621243305184897,
      "loss": 0.159,
      "step": 153
    },
    {
      "epoch": 0.04481792717086835,
      "grad_norm": 0.07353903353214264,
      "learning_rate": 0.0001455348796300571,
      "loss": 0.1503,
      "step": 154
    },
    {
      "epoch": 0.045108952671977884,
      "grad_norm": 0.07070112973451614,
      "learning_rate": 0.00014485467863856703,
      "loss": 0.1387,
      "step": 155
    },
    {
      "epoch": 0.04539997817308742,
      "grad_norm": 0.0715903788805008,
      "learning_rate": 0.0001441718696268307,
      "loss": 0.1554,
      "step": 156
    },
    {
      "epoch": 0.04569100367419695,
      "grad_norm": 0.06608898937702179,
      "learning_rate": 0.00014348649229594017,
      "loss": 0.1416,
      "step": 157
    },
    {
      "epoch": 0.045982029175306485,
      "grad_norm": 0.07506071776151657,
      "learning_rate": 0.0001427985864963193,
      "loss": 0.1668,
      "step": 158
    },
    {
      "epoch": 0.04627305467641602,
      "grad_norm": 0.08147752285003662,
      "learning_rate": 0.00014210819222540663,
      "loss": 0.1625,
      "step": 159
    },
    {
      "epoch": 0.04656408017752556,
      "grad_norm": 0.0735524520277977,
      "learning_rate": 0.00014141534962532984,
      "loss": 0.1643,
      "step": 160
    },
    {
      "epoch": 0.046855105678635094,
      "grad_norm": 0.07707686722278595,
      "learning_rate": 0.00014072009898057173,
      "loss": 0.1609,
      "step": 161
    },
    {
      "epoch": 0.04714613117974462,
      "grad_norm": 0.07367052882909775,
      "learning_rate": 0.0001400224807156278,
      "loss": 0.1548,
      "step": 162
    },
    {
      "epoch": 0.04743715668085416,
      "grad_norm": 0.07454690337181091,
      "learning_rate": 0.00013932253539265604,
      "loss": 0.1646,
      "step": 163
    },
    {
      "epoch": 0.047728182181963695,
      "grad_norm": 0.07515886425971985,
      "learning_rate": 0.0001386203037091183,
      "loss": 0.1449,
      "step": 164
    },
    {
      "epoch": 0.04801920768307323,
      "grad_norm": 0.07659222930669785,
      "learning_rate": 0.00013791582649541403,
      "loss": 0.1555,
      "step": 165
    },
    {
      "epoch": 0.04831023318418277,
      "grad_norm": 0.06687572598457336,
      "learning_rate": 0.00013720914471250644,
      "loss": 0.142,
      "step": 166
    },
    {
      "epoch": 0.048601258685292296,
      "grad_norm": 0.06918177753686905,
      "learning_rate": 0.00013650029944954048,
      "loss": 0.15,
      "step": 167
    },
    {
      "epoch": 0.04889228418640183,
      "grad_norm": 0.07094506174325943,
      "learning_rate": 0.0001357893319214542,
      "loss": 0.1356,
      "step": 168
    },
    {
      "epoch": 0.04918330968751137,
      "grad_norm": 0.07885518670082092,
      "learning_rate": 0.000135076283466582,
      "loss": 0.1698,
      "step": 169
    },
    {
      "epoch": 0.049474335188620905,
      "grad_norm": 0.0780964344739914,
      "learning_rate": 0.00013436119554425133,
      "loss": 0.1672,
      "step": 170
    },
    {
      "epoch": 0.04976536068973044,
      "grad_norm": 0.06983709335327148,
      "learning_rate": 0.00013364410973237185,
      "loss": 0.1662,
      "step": 171
    },
    {
      "epoch": 0.05005638619083997,
      "grad_norm": 0.07986849546432495,
      "learning_rate": 0.00013292506772501819,
      "loss": 0.1456,
      "step": 172
    },
    {
      "epoch": 0.050347411691949506,
      "grad_norm": 0.07250676304101944,
      "learning_rate": 0.00013220411133000543,
      "loss": 0.1575,
      "step": 173
    },
    {
      "epoch": 0.05063843719305904,
      "grad_norm": 0.07436628639698029,
      "learning_rate": 0.0001314812824664585,
      "loss": 0.1374,
      "step": 174
    },
    {
      "epoch": 0.05092946269416858,
      "grad_norm": 0.07161043584346771,
      "learning_rate": 0.00013075662316237464,
      "loss": 0.1432,
      "step": 175
    },
    {
      "epoch": 0.051220488195278115,
      "grad_norm": 0.08502248674631119,
      "learning_rate": 0.0001300301755521798,
      "loss": 0.1709,
      "step": 176
    },
    {
      "epoch": 0.051511513696387644,
      "grad_norm": 0.07785065472126007,
      "learning_rate": 0.00012930198187427886,
      "loss": 0.1765,
      "step": 177
    },
    {
      "epoch": 0.05180253919749718,
      "grad_norm": 0.07903854548931122,
      "learning_rate": 0.0001285720844685996,
      "loss": 0.1662,
      "step": 178
    },
    {
      "epoch": 0.052093564698606716,
      "grad_norm": 0.08938921988010406,
      "learning_rate": 0.00012784052577413096,
      "loss": 0.1826,
      "step": 179
    },
    {
      "epoch": 0.05238459019971625,
      "grad_norm": 0.08065656572580338,
      "learning_rate": 0.00012710734832645557,
      "loss": 0.169,
      "step": 180
    },
    {
      "epoch": 0.05267561570082579,
      "grad_norm": 0.07795777171850204,
      "learning_rate": 0.00012637259475527634,
      "loss": 0.1762,
      "step": 181
    },
    {
      "epoch": 0.05296664120193532,
      "grad_norm": 0.0752585232257843,
      "learning_rate": 0.00012563630778193805,
      "loss": 0.1627,
      "step": 182
    },
    {
      "epoch": 0.053257666703044854,
      "grad_norm": 0.06943824142217636,
      "learning_rate": 0.0001248985302169432,
      "loss": 0.1509,
      "step": 183
    },
    {
      "epoch": 0.05354869220415439,
      "grad_norm": 0.07209432125091553,
      "learning_rate": 0.00012415930495746302,
      "loss": 0.1655,
      "step": 184
    },
    {
      "epoch": 0.053839717705263926,
      "grad_norm": 0.07933972030878067,
      "learning_rate": 0.00012341867498484303,
      "loss": 0.1677,
      "step": 185
    },
    {
      "epoch": 0.054130743206373455,
      "grad_norm": 0.07778981328010559,
      "learning_rate": 0.00012267668336210413,
      "loss": 0.1581,
      "step": 186
    },
    {
      "epoch": 0.05442176870748299,
      "grad_norm": 0.07811924070119858,
      "learning_rate": 0.00012193337323143867,
      "loss": 0.1495,
      "step": 187
    },
    {
      "epoch": 0.05471279420859253,
      "grad_norm": 0.07439761608839035,
      "learning_rate": 0.00012118878781170214,
      "loss": 0.154,
      "step": 188
    },
    {
      "epoch": 0.05500381970970206,
      "grad_norm": 0.07360168546438217,
      "learning_rate": 0.00012044297039589998,
      "loss": 0.1385,
      "step": 189
    },
    {
      "epoch": 0.0552948452108116,
      "grad_norm": 0.0793876051902771,
      "learning_rate": 0.00011969596434867063,
      "loss": 0.1493,
      "step": 190
    },
    {
      "epoch": 0.05558587071192113,
      "grad_norm": 0.07218817621469498,
      "learning_rate": 0.00011894781310376398,
      "loss": 0.1384,
      "step": 191
    },
    {
      "epoch": 0.055876896213030665,
      "grad_norm": 0.07586020231246948,
      "learning_rate": 0.00011819856016151615,
      "loss": 0.16,
      "step": 192
    },
    {
      "epoch": 0.0561679217141402,
      "grad_norm": 0.07959903031587601,
      "learning_rate": 0.00011744824908631997,
      "loss": 0.1615,
      "step": 193
    },
    {
      "epoch": 0.05645894721524974,
      "grad_norm": 0.0836108848452568,
      "learning_rate": 0.00011669692350409223,
      "loss": 0.161,
      "step": 194
    },
    {
      "epoch": 0.05674997271635927,
      "grad_norm": 0.06765826046466827,
      "learning_rate": 0.00011594462709973683,
      "loss": 0.1336,
      "step": 195
    },
    {
      "epoch": 0.0570409982174688,
      "grad_norm": 0.07990916073322296,
      "learning_rate": 0.00011519140361460509,
      "loss": 0.1503,
      "step": 196
    },
    {
      "epoch": 0.05733202371857834,
      "grad_norm": 0.07911231368780136,
      "learning_rate": 0.00011443729684395224,
      "loss": 0.1532,
      "step": 197
    },
    {
      "epoch": 0.057623049219687875,
      "grad_norm": 0.07620514929294586,
      "learning_rate": 0.00011368235063439103,
      "loss": 0.1462,
      "step": 198
    },
    {
      "epoch": 0.05791407472079741,
      "grad_norm": 0.0822930559515953,
      "learning_rate": 0.00011292660888134241,
      "loss": 0.1628,
      "step": 199
    },
    {
      "epoch": 0.05820510022190695,
      "grad_norm": 0.08735381811857224,
      "learning_rate": 0.00011217011552648316,
      "loss": 0.1841,
      "step": 200
    },
    {
      "epoch": 0.05820510022190695,
      "eval_loss": 0.16103102266788483,
      "eval_runtime": 1580.5853,
      "eval_samples_per_second": 7.323,
      "eval_steps_per_second": 1.831,
      "step": 200
    },
    {
      "epoch": 0.058496125723016476,
      "grad_norm": 0.07938691228628159,
      "learning_rate": 0.00011141291455519116,
      "loss": 0.1433,
      "step": 201
    },
    {
      "epoch": 0.05878715122412601,
      "grad_norm": 0.07802147418260574,
      "learning_rate": 0.00011065504999398762,
      "loss": 0.1553,
      "step": 202
    },
    {
      "epoch": 0.05907817672523555,
      "grad_norm": 0.09244146943092346,
      "learning_rate": 0.00010989656590797748,
      "loss": 0.1501,
      "step": 203
    },
    {
      "epoch": 0.059369202226345084,
      "grad_norm": 0.07255587726831436,
      "learning_rate": 0.00010913750639828711,
      "loss": 0.1428,
      "step": 204
    },
    {
      "epoch": 0.05966022772745462,
      "grad_norm": 0.06894674152135849,
      "learning_rate": 0.00010837791559950028,
      "loss": 0.1465,
      "step": 205
    },
    {
      "epoch": 0.05995125322856415,
      "grad_norm": 0.06965523213148117,
      "learning_rate": 0.00010761783767709182,
      "loss": 0.1407,
      "step": 206
    },
    {
      "epoch": 0.060242278729673686,
      "grad_norm": 0.07710455358028412,
      "learning_rate": 0.0001068573168248598,
      "loss": 0.1565,
      "step": 207
    },
    {
      "epoch": 0.06053330423078322,
      "grad_norm": 0.0719844400882721,
      "learning_rate": 0.00010609639726235591,
      "loss": 0.1483,
      "step": 208
    },
    {
      "epoch": 0.06082432973189276,
      "grad_norm": 0.08301186561584473,
      "learning_rate": 0.00010533512323231437,
      "loss": 0.167,
      "step": 209
    },
    {
      "epoch": 0.061115355233002294,
      "grad_norm": 0.07628195732831955,
      "learning_rate": 0.00010457353899807946,
      "loss": 0.1398,
      "step": 210
    },
    {
      "epoch": 0.06140638073411182,
      "grad_norm": 0.07327238470315933,
      "learning_rate": 0.00010381168884103188,
      "loss": 0.1499,
      "step": 211
    },
    {
      "epoch": 0.06169740623522136,
      "grad_norm": 0.07578903436660767,
      "learning_rate": 0.00010304961705801415,
      "loss": 0.1521,
      "step": 212
    },
    {
      "epoch": 0.061988431736330896,
      "grad_norm": 0.07652377337217331,
      "learning_rate": 0.00010228736795875489,
      "loss": 0.1555,
      "step": 213
    },
    {
      "epoch": 0.06227945723744043,
      "grad_norm": 0.08231547474861145,
      "learning_rate": 0.0001015249858632926,
      "loss": 0.1625,
      "step": 214
    },
    {
      "epoch": 0.06257048273854997,
      "grad_norm": 0.06857180595397949,
      "learning_rate": 0.00010076251509939866,
      "loss": 0.1421,
      "step": 215
    },
    {
      "epoch": 0.0628615082396595,
      "grad_norm": 0.0719117596745491,
      "learning_rate": 0.0001,
      "loss": 0.147,
      "step": 216
    },
    {
      "epoch": 0.06315253374076904,
      "grad_norm": 0.06776969879865646,
      "learning_rate": 9.923748490060135e-05,
      "loss": 0.1404,
      "step": 217
    },
    {
      "epoch": 0.06344355924187857,
      "grad_norm": 0.07235383242368698,
      "learning_rate": 9.847501413670742e-05,
      "loss": 0.1486,
      "step": 218
    },
    {
      "epoch": 0.0637345847429881,
      "grad_norm": 0.08163522928953171,
      "learning_rate": 9.771263204124514e-05,
      "loss": 0.1588,
      "step": 219
    },
    {
      "epoch": 0.06402561024409764,
      "grad_norm": 0.08176423609256744,
      "learning_rate": 9.695038294198589e-05,
      "loss": 0.1693,
      "step": 220
    },
    {
      "epoch": 0.06431663574520717,
      "grad_norm": 0.08553650230169296,
      "learning_rate": 9.618831115896815e-05,
      "loss": 0.1765,
      "step": 221
    },
    {
      "epoch": 0.06460766124631671,
      "grad_norm": 0.08697977662086487,
      "learning_rate": 9.542646100192056e-05,
      "loss": 0.1547,
      "step": 222
    },
    {
      "epoch": 0.06489868674742624,
      "grad_norm": 0.07338088750839233,
      "learning_rate": 9.466487676768563e-05,
      "loss": 0.1485,
      "step": 223
    },
    {
      "epoch": 0.06518971224853577,
      "grad_norm": 0.08859650790691376,
      "learning_rate": 9.390360273764411e-05,
      "loss": 0.1608,
      "step": 224
    },
    {
      "epoch": 0.06548073774964532,
      "grad_norm": 0.0738888531923294,
      "learning_rate": 9.314268317514024e-05,
      "loss": 0.145,
      "step": 225
    },
    {
      "epoch": 0.06577176325075484,
      "grad_norm": 0.07572459429502487,
      "learning_rate": 9.238216232290822e-05,
      "loss": 0.1477,
      "step": 226
    },
    {
      "epoch": 0.06606278875186439,
      "grad_norm": 0.07947902381420135,
      "learning_rate": 9.162208440049976e-05,
      "loss": 0.1571,
      "step": 227
    },
    {
      "epoch": 0.06635381425297392,
      "grad_norm": 0.08949624747037888,
      "learning_rate": 9.08624936017129e-05,
      "loss": 0.1767,
      "step": 228
    },
    {
      "epoch": 0.06664483975408345,
      "grad_norm": 0.07495246082544327,
      "learning_rate": 9.010343409202256e-05,
      "loss": 0.163,
      "step": 229
    },
    {
      "epoch": 0.06693586525519299,
      "grad_norm": 0.08242852240800858,
      "learning_rate": 8.93449500060124e-05,
      "loss": 0.1764,
      "step": 230
    },
    {
      "epoch": 0.06722689075630252,
      "grad_norm": 0.08443745970726013,
      "learning_rate": 8.858708544480887e-05,
      "loss": 0.1646,
      "step": 231
    },
    {
      "epoch": 0.06751791625741206,
      "grad_norm": 0.08463139832019806,
      "learning_rate": 8.782988447351685e-05,
      "loss": 0.158,
      "step": 232
    },
    {
      "epoch": 0.06780894175852159,
      "grad_norm": 0.0822853296995163,
      "learning_rate": 8.707339111865763e-05,
      "loss": 0.1477,
      "step": 233
    },
    {
      "epoch": 0.06809996725963112,
      "grad_norm": 0.07560363411903381,
      "learning_rate": 8.6317649365609e-05,
      "loss": 0.161,
      "step": 234
    },
    {
      "epoch": 0.06839099276074066,
      "grad_norm": 0.08058074116706848,
      "learning_rate": 8.556270315604778e-05,
      "loss": 0.16,
      "step": 235
    },
    {
      "epoch": 0.06868201826185019,
      "grad_norm": 0.08817370980978012,
      "learning_rate": 8.480859638539492e-05,
      "loss": 0.1781,
      "step": 236
    },
    {
      "epoch": 0.06897304376295973,
      "grad_norm": 0.07165003567934036,
      "learning_rate": 8.405537290026318e-05,
      "loss": 0.1274,
      "step": 237
    },
    {
      "epoch": 0.06926406926406926,
      "grad_norm": 0.07525242120027542,
      "learning_rate": 8.33030764959078e-05,
      "loss": 0.1491,
      "step": 238
    },
    {
      "epoch": 0.0695550947651788,
      "grad_norm": 0.07487063109874725,
      "learning_rate": 8.255175091368004e-05,
      "loss": 0.1566,
      "step": 239
    },
    {
      "epoch": 0.06984612026628834,
      "grad_norm": 0.08262135833501816,
      "learning_rate": 8.180143983848387e-05,
      "loss": 0.1615,
      "step": 240
    },
    {
      "epoch": 0.07013714576739787,
      "grad_norm": 0.07212772965431213,
      "learning_rate": 8.105218689623603e-05,
      "loss": 0.124,
      "step": 241
    },
    {
      "epoch": 0.07042817126850741,
      "grad_norm": 0.07601718604564667,
      "learning_rate": 8.030403565132942e-05,
      "loss": 0.1442,
      "step": 242
    },
    {
      "epoch": 0.07071919676961694,
      "grad_norm": 0.06825247406959534,
      "learning_rate": 7.955702960410007e-05,
      "loss": 0.1332,
      "step": 243
    },
    {
      "epoch": 0.07101022227072647,
      "grad_norm": 0.08394462615251541,
      "learning_rate": 7.881121218829787e-05,
      "loss": 0.1574,
      "step": 244
    },
    {
      "epoch": 0.07130124777183601,
      "grad_norm": 0.073182113468647,
      "learning_rate": 7.806662676856133e-05,
      "loss": 0.1487,
      "step": 245
    },
    {
      "epoch": 0.07159227327294554,
      "grad_norm": 0.07771697640419006,
      "learning_rate": 7.732331663789592e-05,
      "loss": 0.1441,
      "step": 246
    },
    {
      "epoch": 0.07188329877405508,
      "grad_norm": 0.08099354058504105,
      "learning_rate": 7.658132501515702e-05,
      "loss": 0.1517,
      "step": 247
    },
    {
      "epoch": 0.07217432427516461,
      "grad_norm": 0.08727878332138062,
      "learning_rate": 7.584069504253703e-05,
      "loss": 0.17,
      "step": 248
    },
    {
      "epoch": 0.07246534977627414,
      "grad_norm": 0.07737427949905396,
      "learning_rate": 7.510146978305683e-05,
      "loss": 0.1481,
      "step": 249
    },
    {
      "epoch": 0.07275637527738368,
      "grad_norm": 0.08307147771120071,
      "learning_rate": 7.436369221806201e-05,
      "loss": 0.1623,
      "step": 250
    },
    {
      "epoch": 0.07304740077849321,
      "grad_norm": 0.08056792616844177,
      "learning_rate": 7.362740524472372e-05,
      "loss": 0.1443,
      "step": 251
    },
    {
      "epoch": 0.07333842627960276,
      "grad_norm": 0.09201047569513321,
      "learning_rate": 7.289265167354449e-05,
      "loss": 0.168,
      "step": 252
    },
    {
      "epoch": 0.07362945178071229,
      "grad_norm": 0.08724138140678406,
      "learning_rate": 7.215947422586906e-05,
      "loss": 0.1566,
      "step": 253
    },
    {
      "epoch": 0.07392047728182181,
      "grad_norm": 0.07930216193199158,
      "learning_rate": 7.142791553140045e-05,
      "loss": 0.1461,
      "step": 254
    },
    {
      "epoch": 0.07421150278293136,
      "grad_norm": 0.07905029505491257,
      "learning_rate": 7.069801812572117e-05,
      "loss": 0.1364,
      "step": 255
    },
    {
      "epoch": 0.07450252828404089,
      "grad_norm": 0.08945164084434509,
      "learning_rate": 6.996982444782021e-05,
      "loss": 0.1698,
      "step": 256
    },
    {
      "epoch": 0.07479355378515043,
      "grad_norm": 0.08269284665584564,
      "learning_rate": 6.92433768376254e-05,
      "loss": 0.1662,
      "step": 257
    },
    {
      "epoch": 0.07508457928625996,
      "grad_norm": 0.08096525073051453,
      "learning_rate": 6.851871753354153e-05,
      "loss": 0.1526,
      "step": 258
    },
    {
      "epoch": 0.07537560478736949,
      "grad_norm": 0.08036118000745773,
      "learning_rate": 6.77958886699946e-05,
      "loss": 0.1507,
      "step": 259
    },
    {
      "epoch": 0.07566663028847903,
      "grad_norm": 0.07475791871547699,
      "learning_rate": 6.707493227498188e-05,
      "loss": 0.1565,
      "step": 260
    },
    {
      "epoch": 0.07595765578958856,
      "grad_norm": 0.07425666600465775,
      "learning_rate": 6.635589026762818e-05,
      "loss": 0.1474,
      "step": 261
    },
    {
      "epoch": 0.0762486812906981,
      "grad_norm": 0.08315514773130417,
      "learning_rate": 6.563880445574873e-05,
      "loss": 0.1601,
      "step": 262
    },
    {
      "epoch": 0.07653970679180763,
      "grad_norm": 0.08405305445194244,
      "learning_rate": 6.492371653341804e-05,
      "loss": 0.1584,
      "step": 263
    },
    {
      "epoch": 0.07683073229291716,
      "grad_norm": 0.08118040859699249,
      "learning_rate": 6.421066807854584e-05,
      "loss": 0.1517,
      "step": 264
    },
    {
      "epoch": 0.0771217577940267,
      "grad_norm": 0.07679029554128647,
      "learning_rate": 6.349970055045956e-05,
      "loss": 0.1503,
      "step": 265
    },
    {
      "epoch": 0.07741278329513623,
      "grad_norm": 0.07175418734550476,
      "learning_rate": 6.279085528749359e-05,
      "loss": 0.1449,
      "step": 266
    },
    {
      "epoch": 0.07770380879624578,
      "grad_norm": 0.07590077817440033,
      "learning_rate": 6.208417350458598e-05,
      "loss": 0.1384,
      "step": 267
    },
    {
      "epoch": 0.0779948342973553,
      "grad_norm": 0.09335010498762131,
      "learning_rate": 6.137969629088173e-05,
      "loss": 0.1557,
      "step": 268
    },
    {
      "epoch": 0.07828585979846484,
      "grad_norm": 0.0851510614156723,
      "learning_rate": 6.067746460734398e-05,
      "loss": 0.1643,
      "step": 269
    },
    {
      "epoch": 0.07857688529957438,
      "grad_norm": 0.0860925167798996,
      "learning_rate": 5.9977519284372194e-05,
      "loss": 0.1663,
      "step": 270
    },
    {
      "epoch": 0.07886791080068391,
      "grad_norm": 0.0843532383441925,
      "learning_rate": 5.927990101942828e-05,
      "loss": 0.1587,
      "step": 271
    },
    {
      "epoch": 0.07915893630179345,
      "grad_norm": 0.08070237189531326,
      "learning_rate": 5.8584650374670135e-05,
      "loss": 0.141,
      "step": 272
    },
    {
      "epoch": 0.07944996180290298,
      "grad_norm": 0.07470858842134476,
      "learning_rate": 5.789180777459337e-05,
      "loss": 0.1256,
      "step": 273
    },
    {
      "epoch": 0.07974098730401251,
      "grad_norm": 0.08068849891424179,
      "learning_rate": 5.720141350368072e-05,
      "loss": 0.1539,
      "step": 274
    },
    {
      "epoch": 0.08003201280512205,
      "grad_norm": 0.08972500264644623,
      "learning_rate": 5.651350770405983e-05,
      "loss": 0.1493,
      "step": 275
    },
    {
      "epoch": 0.08032303830623158,
      "grad_norm": 0.09065806120634079,
      "learning_rate": 5.582813037316927e-05,
      "loss": 0.1556,
      "step": 276
    },
    {
      "epoch": 0.08061406380734112,
      "grad_norm": 0.0844668298959732,
      "learning_rate": 5.5145321361432956e-05,
      "loss": 0.1526,
      "step": 277
    },
    {
      "epoch": 0.08090508930845065,
      "grad_norm": 0.08198230713605881,
      "learning_rate": 5.446512036994287e-05,
      "loss": 0.1468,
      "step": 278
    },
    {
      "epoch": 0.08119611480956018,
      "grad_norm": 0.09235311299562454,
      "learning_rate": 5.3787566948151056e-05,
      "loss": 0.1693,
      "step": 279
    },
    {
      "epoch": 0.08148714031066973,
      "grad_norm": 0.08648033440113068,
      "learning_rate": 5.3112700491569666e-05,
      "loss": 0.1598,
      "step": 280
    },
    {
      "epoch": 0.08177816581177925,
      "grad_norm": 0.09177742153406143,
      "learning_rate": 5.244056023948075e-05,
      "loss": 0.1662,
      "step": 281
    },
    {
      "epoch": 0.0820691913128888,
      "grad_norm": 0.08776862174272537,
      "learning_rate": 5.177118527265438e-05,
      "loss": 0.1519,
      "step": 282
    },
    {
      "epoch": 0.08236021681399833,
      "grad_norm": 0.09411810338497162,
      "learning_rate": 5.1104614511076645e-05,
      "loss": 0.1618,
      "step": 283
    },
    {
      "epoch": 0.08265124231510786,
      "grad_norm": 0.1135680079460144,
      "learning_rate": 5.044088671168644e-05,
      "loss": 0.1503,
      "step": 284
    },
    {
      "epoch": 0.0829422678162174,
      "grad_norm": 0.10310947149991989,
      "learning_rate": 4.9780040466122235e-05,
      "loss": 0.1486,
      "step": 285
    },
    {
      "epoch": 0.08323329331732693,
      "grad_norm": 0.0861528143286705,
      "learning_rate": 4.912211419847794e-05,
      "loss": 0.1419,
      "step": 286
    },
    {
      "epoch": 0.08352431881843647,
      "grad_norm": 0.09601995348930359,
      "learning_rate": 4.846714616306908e-05,
      "loss": 0.1606,
      "step": 287
    },
    {
      "epoch": 0.083815344319546,
      "grad_norm": 0.08836833387613297,
      "learning_rate": 4.7815174442208354e-05,
      "loss": 0.1555,
      "step": 288
    },
    {
      "epoch": 0.08410636982065553,
      "grad_norm": 0.0788690447807312,
      "learning_rate": 4.716623694399134e-05,
      "loss": 0.1482,
      "step": 289
    },
    {
      "epoch": 0.08439739532176507,
      "grad_norm": 0.08364184200763702,
      "learning_rate": 4.652037140009259e-05,
      "loss": 0.1487,
      "step": 290
    },
    {
      "epoch": 0.0846884208228746,
      "grad_norm": 0.083168163895607,
      "learning_rate": 4.587761536357152e-05,
      "loss": 0.1483,
      "step": 291
    },
    {
      "epoch": 0.08497944632398415,
      "grad_norm": 0.0829937607049942,
      "learning_rate": 4.523800620668921e-05,
      "loss": 0.1481,
      "step": 292
    },
    {
      "epoch": 0.08527047182509367,
      "grad_norm": 0.08684185147285461,
      "learning_rate": 4.4601581118735105e-05,
      "loss": 0.1575,
      "step": 293
    },
    {
      "epoch": 0.0855614973262032,
      "grad_norm": 0.08446146547794342,
      "learning_rate": 4.3968377103865024e-05,
      "loss": 0.1464,
      "step": 294
    },
    {
      "epoch": 0.08585252282731275,
      "grad_norm": 0.08333813399076462,
      "learning_rate": 4.333843097894932e-05,
      "loss": 0.1365,
      "step": 295
    },
    {
      "epoch": 0.08614354832842228,
      "grad_norm": 0.09695811569690704,
      "learning_rate": 4.271177937143245e-05,
      "loss": 0.1671,
      "step": 296
    },
    {
      "epoch": 0.08643457382953182,
      "grad_norm": 0.09023378789424896,
      "learning_rate": 4.2088458717203085e-05,
      "loss": 0.1518,
      "step": 297
    },
    {
      "epoch": 0.08672559933064135,
      "grad_norm": 0.08972320705652237,
      "learning_rate": 4.146850525847579e-05,
      "loss": 0.157,
      "step": 298
    },
    {
      "epoch": 0.08701662483175088,
      "grad_norm": 0.10669073462486267,
      "learning_rate": 4.0851955041683675e-05,
      "loss": 0.1722,
      "step": 299
    },
    {
      "epoch": 0.08730765033286042,
      "grad_norm": 0.08736032247543335,
      "learning_rate": 4.023884391538244e-05,
      "loss": 0.1416,
      "step": 300
    },
    {
      "epoch": 0.08730765033286042,
      "eval_loss": 0.15649166703224182,
      "eval_runtime": 1579.1525,
      "eval_samples_per_second": 7.33,
      "eval_steps_per_second": 1.833,
      "step": 300
    },
    {
      "epoch": 0.08759867583396995,
      "grad_norm": 0.08116644620895386,
      "learning_rate": 3.9629207528166224e-05,
      "loss": 0.1422,
      "step": 301
    },
    {
      "epoch": 0.08788970133507949,
      "grad_norm": 0.07979750633239746,
      "learning_rate": 3.902308132659457e-05,
      "loss": 0.1464,
      "step": 302
    },
    {
      "epoch": 0.08818072683618902,
      "grad_norm": 0.07950086891651154,
      "learning_rate": 3.842050055313174e-05,
      "loss": 0.1352,
      "step": 303
    },
    {
      "epoch": 0.08847175233729855,
      "grad_norm": 0.0806957557797432,
      "learning_rate": 3.7821500244097274e-05,
      "loss": 0.1462,
      "step": 304
    },
    {
      "epoch": 0.0887627778384081,
      "grad_norm": 0.09410198032855988,
      "learning_rate": 3.722611522762917e-05,
      "loss": 0.1696,
      "step": 305
    },
    {
      "epoch": 0.08905380333951762,
      "grad_norm": 0.08685276657342911,
      "learning_rate": 3.663438012165848e-05,
      "loss": 0.1552,
      "step": 306
    },
    {
      "epoch": 0.08934482884062717,
      "grad_norm": 0.08622679114341736,
      "learning_rate": 3.604632933189691e-05,
      "loss": 0.1558,
      "step": 307
    },
    {
      "epoch": 0.0896358543417367,
      "grad_norm": 0.09552394598722458,
      "learning_rate": 3.5461997049835914e-05,
      "loss": 0.1573,
      "step": 308
    },
    {
      "epoch": 0.08992687984284622,
      "grad_norm": 0.07481776177883148,
      "learning_rate": 3.488141725075901e-05,
      "loss": 0.141,
      "step": 309
    },
    {
      "epoch": 0.09021790534395577,
      "grad_norm": 0.08296847343444824,
      "learning_rate": 3.430462369176619e-05,
      "loss": 0.1506,
      "step": 310
    },
    {
      "epoch": 0.0905089308450653,
      "grad_norm": 0.08685876429080963,
      "learning_rate": 3.373164990981108e-05,
      "loss": 0.1571,
      "step": 311
    },
    {
      "epoch": 0.09079995634617484,
      "grad_norm": 0.10209972411394119,
      "learning_rate": 3.316252921975116e-05,
      "loss": 0.1594,
      "step": 312
    },
    {
      "epoch": 0.09109098184728437,
      "grad_norm": 0.08648335188627243,
      "learning_rate": 3.259729471241051e-05,
      "loss": 0.1376,
      "step": 313
    },
    {
      "epoch": 0.0913820073483939,
      "grad_norm": 0.09473145008087158,
      "learning_rate": 3.203597925265598e-05,
      "loss": 0.1729,
      "step": 314
    },
    {
      "epoch": 0.09167303284950344,
      "grad_norm": 0.08934331685304642,
      "learning_rate": 3.1478615477486114e-05,
      "loss": 0.1494,
      "step": 315
    },
    {
      "epoch": 0.09196405835061297,
      "grad_norm": 0.08760195225477219,
      "learning_rate": 3.092523579413372e-05,
      "loss": 0.1459,
      "step": 316
    },
    {
      "epoch": 0.09225508385172251,
      "grad_norm": 0.08313330262899399,
      "learning_rate": 3.0375872378181337e-05,
      "loss": 0.1433,
      "step": 317
    },
    {
      "epoch": 0.09254610935283204,
      "grad_norm": 0.09257054328918457,
      "learning_rate": 2.98305571716907e-05,
      "loss": 0.1601,
      "step": 318
    },
    {
      "epoch": 0.09283713485394157,
      "grad_norm": 0.09015868604183197,
      "learning_rate": 2.9289321881345254e-05,
      "loss": 0.1628,
      "step": 319
    },
    {
      "epoch": 0.09312816035505112,
      "grad_norm": 0.08400508761405945,
      "learning_rate": 2.875219797660681e-05,
      "loss": 0.1429,
      "step": 320
    },
    {
      "epoch": 0.09341918585616064,
      "grad_norm": 0.08535855263471603,
      "learning_rate": 2.821921668788571e-05,
      "loss": 0.148,
      "step": 321
    },
    {
      "epoch": 0.09371021135727019,
      "grad_norm": 0.07879780232906342,
      "learning_rate": 2.769040900472488e-05,
      "loss": 0.1419,
      "step": 322
    },
    {
      "epoch": 0.09400123685837972,
      "grad_norm": 0.08892536163330078,
      "learning_rate": 2.71658056739982e-05,
      "loss": 0.1575,
      "step": 323
    },
    {
      "epoch": 0.09429226235948925,
      "grad_norm": 0.09279028326272964,
      "learning_rate": 2.6645437198122502e-05,
      "loss": 0.1407,
      "step": 324
    },
    {
      "epoch": 0.09458328786059879,
      "grad_norm": 0.07712056487798691,
      "learning_rate": 2.612933383328432e-05,
      "loss": 0.1439,
      "step": 325
    },
    {
      "epoch": 0.09487431336170832,
      "grad_norm": 0.09146617352962494,
      "learning_rate": 2.5617525587680402e-05,
      "loss": 0.1688,
      "step": 326
    },
    {
      "epoch": 0.09516533886281786,
      "grad_norm": 0.09416982531547546,
      "learning_rate": 2.5110042219773178e-05,
      "loss": 0.1466,
      "step": 327
    },
    {
      "epoch": 0.09545636436392739,
      "grad_norm": 0.10706603527069092,
      "learning_rate": 2.4606913236560282e-05,
      "loss": 0.1436,
      "step": 328
    },
    {
      "epoch": 0.09574738986503692,
      "grad_norm": 0.09682357311248779,
      "learning_rate": 2.410816789185907e-05,
      "loss": 0.1628,
      "step": 329
    },
    {
      "epoch": 0.09603841536614646,
      "grad_norm": 0.09372398257255554,
      "learning_rate": 2.3613835184605525e-05,
      "loss": 0.1587,
      "step": 330
    },
    {
      "epoch": 0.09632944086725599,
      "grad_norm": 0.08698038756847382,
      "learning_rate": 2.3123943857168318e-05,
      "loss": 0.1519,
      "step": 331
    },
    {
      "epoch": 0.09662046636836553,
      "grad_norm": 0.0826997235417366,
      "learning_rate": 2.2638522393677563e-05,
      "loss": 0.1492,
      "step": 332
    },
    {
      "epoch": 0.09691149186947506,
      "grad_norm": 0.08685287833213806,
      "learning_rate": 2.2157599018368492e-05,
      "loss": 0.1433,
      "step": 333
    },
    {
      "epoch": 0.09720251737058459,
      "grad_norm": 0.08258447051048279,
      "learning_rate": 2.1681201693940668e-05,
      "loss": 0.1425,
      "step": 334
    },
    {
      "epoch": 0.09749354287169414,
      "grad_norm": 0.09492287784814835,
      "learning_rate": 2.1209358119931845e-05,
      "loss": 0.1617,
      "step": 335
    },
    {
      "epoch": 0.09778456837280367,
      "grad_norm": 0.08668515086174011,
      "learning_rate": 2.074209573110769e-05,
      "loss": 0.1477,
      "step": 336
    },
    {
      "epoch": 0.09807559387391321,
      "grad_norm": 0.09318174421787262,
      "learning_rate": 2.027944169586633e-05,
      "loss": 0.156,
      "step": 337
    },
    {
      "epoch": 0.09836661937502274,
      "grad_norm": 0.08594143390655518,
      "learning_rate": 1.982142291465896e-05,
      "loss": 0.1671,
      "step": 338
    },
    {
      "epoch": 0.09865764487613227,
      "grad_norm": 0.08649858832359314,
      "learning_rate": 1.9368066018425503e-05,
      "loss": 0.1318,
      "step": 339
    },
    {
      "epoch": 0.09894867037724181,
      "grad_norm": 0.08002059906721115,
      "learning_rate": 1.891939736704641e-05,
      "loss": 0.1444,
      "step": 340
    },
    {
      "epoch": 0.09923969587835134,
      "grad_norm": 0.08685939759016037,
      "learning_rate": 1.8475443047809782e-05,
      "loss": 0.1385,
      "step": 341
    },
    {
      "epoch": 0.09953072137946088,
      "grad_norm": 0.08656252920627594,
      "learning_rate": 1.8036228873894746e-05,
      "loss": 0.1565,
      "step": 342
    },
    {
      "epoch": 0.09982174688057041,
      "grad_norm": 0.09303918480873108,
      "learning_rate": 1.760178038287048e-05,
      "loss": 0.1529,
      "step": 343
    },
    {
      "epoch": 0.10011277238167994,
      "grad_norm": 0.08067046105861664,
      "learning_rate": 1.7172122835211337e-05,
      "loss": 0.1541,
      "step": 344
    },
    {
      "epoch": 0.10040379788278948,
      "grad_norm": 0.09442534297704697,
      "learning_rate": 1.674728121282819e-05,
      "loss": 0.1632,
      "step": 345
    },
    {
      "epoch": 0.10069482338389901,
      "grad_norm": 0.08498077839612961,
      "learning_rate": 1.6327280217615792e-05,
      "loss": 0.1589,
      "step": 346
    },
    {
      "epoch": 0.10098584888500856,
      "grad_norm": 0.0830565020442009,
      "learning_rate": 1.591214427001667e-05,
      "loss": 0.1389,
      "step": 347
    },
    {
      "epoch": 0.10127687438611808,
      "grad_norm": 0.08560646325349808,
      "learning_rate": 1.5501897507601014e-05,
      "loss": 0.1328,
      "step": 348
    },
    {
      "epoch": 0.10156789988722761,
      "grad_norm": 0.07962379604578018,
      "learning_rate": 1.5096563783663432e-05,
      "loss": 0.1426,
      "step": 349
    },
    {
      "epoch": 0.10185892538833716,
      "grad_norm": 0.08420272916555405,
      "learning_rate": 1.4696166665835853e-05,
      "loss": 0.1587,
      "step": 350
    },
    {
      "epoch": 0.10214995088944669,
      "grad_norm": 0.09415718913078308,
      "learning_rate": 1.4300729434717396e-05,
      "loss": 0.1711,
      "step": 351
    },
    {
      "epoch": 0.10244097639055623,
      "grad_norm": 0.08557581901550293,
      "learning_rate": 1.3910275082520573e-05,
      "loss": 0.1483,
      "step": 352
    },
    {
      "epoch": 0.10273200189166576,
      "grad_norm": 0.10299764573574066,
      "learning_rate": 1.3524826311734551e-05,
      "loss": 0.1402,
      "step": 353
    },
    {
      "epoch": 0.10302302739277529,
      "grad_norm": 0.08215977996587753,
      "learning_rate": 1.3144405533805138e-05,
      "loss": 0.1445,
      "step": 354
    },
    {
      "epoch": 0.10331405289388483,
      "grad_norm": 0.08453574031591415,
      "learning_rate": 1.2769034867831586e-05,
      "loss": 0.147,
      "step": 355
    },
    {
      "epoch": 0.10360507839499436,
      "grad_norm": 0.09578870236873627,
      "learning_rate": 1.2398736139280686e-05,
      "loss": 0.1537,
      "step": 356
    },
    {
      "epoch": 0.1038961038961039,
      "grad_norm": 0.08585759252309799,
      "learning_rate": 1.2033530878717548e-05,
      "loss": 0.1386,
      "step": 357
    },
    {
      "epoch": 0.10418712939721343,
      "grad_norm": 0.08463095873594284,
      "learning_rate": 1.167344032055394e-05,
      "loss": 0.1399,
      "step": 358
    },
    {
      "epoch": 0.10447815489832296,
      "grad_norm": 0.09077376872301102,
      "learning_rate": 1.1318485401813439e-05,
      "loss": 0.178,
      "step": 359
    },
    {
      "epoch": 0.1047691803994325,
      "grad_norm": 0.07958751171827316,
      "learning_rate": 1.096868676091425e-05,
      "loss": 0.1379,
      "step": 360
    },
    {
      "epoch": 0.10506020590054203,
      "grad_norm": 0.09366265684366226,
      "learning_rate": 1.0624064736469053e-05,
      "loss": 0.1503,
      "step": 361
    },
    {
      "epoch": 0.10535123140165158,
      "grad_norm": 0.07501017302274704,
      "learning_rate": 1.02846393661026e-05,
      "loss": 0.1343,
      "step": 362
    },
    {
      "epoch": 0.1056422569027611,
      "grad_norm": 0.0942973643541336,
      "learning_rate": 9.950430385286491e-06,
      "loss": 0.1677,
      "step": 363
    },
    {
      "epoch": 0.10593328240387063,
      "grad_norm": 0.08338665217161179,
      "learning_rate": 9.62145722619182e-06,
      "loss": 0.1475,
      "step": 364
    },
    {
      "epoch": 0.10622430790498018,
      "grad_norm": 0.09208852797746658,
      "learning_rate": 9.297739016559226e-06,
      "loss": 0.1569,
      "step": 365
    },
    {
      "epoch": 0.10651533340608971,
      "grad_norm": 0.08696365356445312,
      "learning_rate": 8.979294578586738e-06,
      "loss": 0.1644,
      "step": 366
    },
    {
      "epoch": 0.10680635890719924,
      "grad_norm": 0.09445630013942719,
      "learning_rate": 8.666142427835444e-06,
      "loss": 0.1605,
      "step": 367
    },
    {
      "epoch": 0.10709738440830878,
      "grad_norm": 0.08878560364246368,
      "learning_rate": 8.358300772152849e-06,
      "loss": 0.1659,
      "step": 368
    },
    {
      "epoch": 0.10738840990941831,
      "grad_norm": 0.09253693372011185,
      "learning_rate": 8.055787510614288e-06,
      "loss": 0.1549,
      "step": 369
    },
    {
      "epoch": 0.10767943541052785,
      "grad_norm": 0.09546509385108948,
      "learning_rate": 7.758620232482084e-06,
      "loss": 0.1604,
      "step": 370
    },
    {
      "epoch": 0.10797046091163738,
      "grad_norm": 0.08155137300491333,
      "learning_rate": 7.46681621618297e-06,
      "loss": 0.1396,
      "step": 371
    },
    {
      "epoch": 0.10826148641274691,
      "grad_norm": 0.0899728313088417,
      "learning_rate": 7.180392428303395e-06,
      "loss": 0.158,
      "step": 372
    },
    {
      "epoch": 0.10855251191385645,
      "grad_norm": 0.08545383810997009,
      "learning_rate": 6.8993655226030405e-06,
      "loss": 0.1545,
      "step": 373
    },
    {
      "epoch": 0.10884353741496598,
      "grad_norm": 0.0924701988697052,
      "learning_rate": 6.623751839046455e-06,
      "loss": 0.1569,
      "step": 374
    },
    {
      "epoch": 0.10913456291607553,
      "grad_norm": 0.0837499350309372,
      "learning_rate": 6.353567402853056e-06,
      "loss": 0.1595,
      "step": 375
    },
    {
      "epoch": 0.10942558841718505,
      "grad_norm": 0.08741695433855057,
      "learning_rate": 6.0888279235653214e-06,
      "loss": 0.1582,
      "step": 376
    },
    {
      "epoch": 0.10971661391829458,
      "grad_norm": 0.08700203150510788,
      "learning_rate": 5.82954879413542e-06,
      "loss": 0.1552,
      "step": 377
    },
    {
      "epoch": 0.11000763941940413,
      "grad_norm": 0.07841800898313522,
      "learning_rate": 5.575745090030138e-06,
      "loss": 0.1462,
      "step": 378
    },
    {
      "epoch": 0.11029866492051366,
      "grad_norm": 0.07555332779884338,
      "learning_rate": 5.327431568354402e-06,
      "loss": 0.1358,
      "step": 379
    },
    {
      "epoch": 0.1105896904216232,
      "grad_norm": 0.07444937527179718,
      "learning_rate": 5.084622666993244e-06,
      "loss": 0.1383,
      "step": 380
    },
    {
      "epoch": 0.11088071592273273,
      "grad_norm": 0.08368539065122604,
      "learning_rate": 4.847332503772228e-06,
      "loss": 0.1368,
      "step": 381
    },
    {
      "epoch": 0.11117174142384226,
      "grad_norm": 0.08841046690940857,
      "learning_rate": 4.61557487563673e-06,
      "loss": 0.1504,
      "step": 382
    },
    {
      "epoch": 0.1114627669249518,
      "grad_norm": 0.08533196896314621,
      "learning_rate": 4.389363257849632e-06,
      "loss": 0.1449,
      "step": 383
    },
    {
      "epoch": 0.11175379242606133,
      "grad_norm": 0.09036096930503845,
      "learning_rate": 4.168710803207865e-06,
      "loss": 0.1555,
      "step": 384
    },
    {
      "epoch": 0.11204481792717087,
      "grad_norm": 0.08787363022565842,
      "learning_rate": 3.953630341277604e-06,
      "loss": 0.1483,
      "step": 385
    },
    {
      "epoch": 0.1123358434282804,
      "grad_norm": 0.09143967181444168,
      "learning_rate": 3.7441343776484117e-06,
      "loss": 0.1519,
      "step": 386
    },
    {
      "epoch": 0.11262686892938993,
      "grad_norm": 0.08185972273349762,
      "learning_rate": 3.540235093205979e-06,
      "loss": 0.1382,
      "step": 387
    },
    {
      "epoch": 0.11291789443049947,
      "grad_norm": 0.08177818357944489,
      "learning_rate": 3.3419443434240083e-06,
      "loss": 0.1439,
      "step": 388
    },
    {
      "epoch": 0.113208919931609,
      "grad_norm": 0.09348271787166595,
      "learning_rate": 3.1492736576747893e-06,
      "loss": 0.1582,
      "step": 389
    },
    {
      "epoch": 0.11349994543271855,
      "grad_norm": 0.08014369755983353,
      "learning_rate": 2.9622342385589254e-06,
      "loss": 0.1426,
      "step": 390
    },
    {
      "epoch": 0.11379097093382808,
      "grad_norm": 0.1089087501168251,
      "learning_rate": 2.7808369612539407e-06,
      "loss": 0.1504,
      "step": 391
    },
    {
      "epoch": 0.1140819964349376,
      "grad_norm": 0.0827077105641365,
      "learning_rate": 2.6050923728818787e-06,
      "loss": 0.14,
      "step": 392
    },
    {
      "epoch": 0.11437302193604715,
      "grad_norm": 0.08474422991275787,
      "learning_rate": 2.4350106918962e-06,
      "loss": 0.1501,
      "step": 393
    },
    {
      "epoch": 0.11466404743715668,
      "grad_norm": 0.09697262942790985,
      "learning_rate": 2.2706018074875045e-06,
      "loss": 0.1648,
      "step": 394
    },
    {
      "epoch": 0.11495507293826622,
      "grad_norm": 0.08163170516490936,
      "learning_rate": 2.111875279008657e-06,
      "loss": 0.1571,
      "step": 395
    },
    {
      "epoch": 0.11524609843937575,
      "grad_norm": 0.09289243817329407,
      "learning_rate": 1.9588403354188325e-06,
      "loss": 0.1667,
      "step": 396
    },
    {
      "epoch": 0.11553712394048528,
      "grad_norm": 0.09270413219928741,
      "learning_rate": 1.811505874747066e-06,
      "loss": 0.1641,
      "step": 397
    },
    {
      "epoch": 0.11582814944159482,
      "grad_norm": 0.08141297101974487,
      "learning_rate": 1.6698804635747579e-06,
      "loss": 0.1534,
      "step": 398
    },
    {
      "epoch": 0.11611917494270435,
      "grad_norm": 0.09712623059749603,
      "learning_rate": 1.5339723365376479e-06,
      "loss": 0.1538,
      "step": 399
    },
    {
      "epoch": 0.1164102004438139,
      "grad_norm": 0.08977963030338287,
      "learning_rate": 1.4037893958469995e-06,
      "loss": 0.153,
      "step": 400
    },
    {
      "epoch": 0.1164102004438139,
      "eval_loss": 0.1550179123878479,
      "eval_runtime": 1577.2866,
      "eval_samples_per_second": 7.339,
      "eval_steps_per_second": 1.835,
      "step": 400
    },
    {
      "epoch": 0.11670122594492342,
      "grad_norm": 0.08147388696670532,
      "learning_rate": 1.2793392108301439e-06,
      "loss": 0.1476,
      "step": 401
    },
    {
      "epoch": 0.11699225144603295,
      "grad_norm": 0.09103590250015259,
      "learning_rate": 1.160629017490389e-06,
      "loss": 0.162,
      "step": 402
    },
    {
      "epoch": 0.1172832769471425,
      "grad_norm": 0.08841648697853088,
      "learning_rate": 1.0476657180862325e-06,
      "loss": 0.1679,
      "step": 403
    },
    {
      "epoch": 0.11757430244825202,
      "grad_norm": 0.08707400411367416,
      "learning_rate": 9.404558807301067e-07,
      "loss": 0.1487,
      "step": 404
    },
    {
      "epoch": 0.11786532794936157,
      "grad_norm": 0.09094386547803879,
      "learning_rate": 8.390057390064265e-07,
      "loss": 0.1487,
      "step": 405
    },
    {
      "epoch": 0.1181563534504711,
      "grad_norm": 0.0876602977514267,
      "learning_rate": 7.433211916092142e-07,
      "loss": 0.1473,
      "step": 406
    },
    {
      "epoch": 0.11844737895158063,
      "grad_norm": 0.08664330095052719,
      "learning_rate": 6.534078019990398e-07,
      "loss": 0.1517,
      "step": 407
    },
    {
      "epoch": 0.11873840445269017,
      "grad_norm": 0.07475589960813522,
      "learning_rate": 5.69270798079613e-07,
      "loss": 0.1372,
      "step": 408
    },
    {
      "epoch": 0.1190294299537997,
      "grad_norm": 0.07260264456272125,
      "learning_rate": 4.909150718937717e-07,
      "loss": 0.1234,
      "step": 409
    },
    {
      "epoch": 0.11932045545490924,
      "grad_norm": 0.0814119204878807,
      "learning_rate": 4.1834517933907467e-07,
      "loss": 0.149,
      "step": 410
    },
    {
      "epoch": 0.11961148095601877,
      "grad_norm": 0.10258995741605759,
      "learning_rate": 3.5156533990285956e-07,
      "loss": 0.16,
      "step": 411
    },
    {
      "epoch": 0.1199025064571283,
      "grad_norm": 0.07404506951570511,
      "learning_rate": 2.9057943641693785e-07,
      "loss": 0.1342,
      "step": 412
    },
    {
      "epoch": 0.12019353195823784,
      "grad_norm": 0.08446568995714188,
      "learning_rate": 2.3539101483184278e-07,
      "loss": 0.1473,
      "step": 413
    },
    {
      "epoch": 0.12048455745934737,
      "grad_norm": 0.08428740501403809,
      "learning_rate": 1.8600328401061629e-07,
      "loss": 0.1522,
      "step": 414
    },
    {
      "epoch": 0.12077558296045691,
      "grad_norm": 0.0767434611916542,
      "learning_rate": 1.4241911554225828e-07,
      "loss": 0.1447,
      "step": 415
    },
    {
      "epoch": 0.12106660846156644,
      "grad_norm": 0.07769133895635605,
      "learning_rate": 1.0464104357477133e-07,
      "loss": 0.1407,
      "step": 416
    },
    {
      "epoch": 0.12135763396267597,
      "grad_norm": 0.08757521212100983,
      "learning_rate": 7.267126466777852e-08,
      "loss": 0.1437,
      "step": 417
    },
    {
      "epoch": 0.12164865946378552,
      "grad_norm": 0.10252358764410019,
      "learning_rate": 4.651163766484779e-08,
      "loss": 0.1719,
      "step": 418
    },
    {
      "epoch": 0.12193968496489505,
      "grad_norm": 0.08464578539133072,
      "learning_rate": 2.6163683585389565e-08,
      "loss": 0.1565,
      "step": 419
    },
    {
      "epoch": 0.12223071046600459,
      "grad_norm": 0.08833827823400497,
      "learning_rate": 1.1628585536216374e-08,
      "loss": 0.1483,
      "step": 420
    },
    {
      "epoch": 0.12252173596711412,
      "grad_norm": 0.08319747447967529,
      "learning_rate": 2.907188642786718e-09,
      "loss": 0.1472,
      "step": 421
    },
    {
      "epoch": 0.12281276146822365,
      "grad_norm": 0.0925559550523758,
      "learning_rate": 0.0,
      "loss": 0.1534,
      "step": 422
    }
  ],
  "logging_steps": 1,
  "max_steps": 422,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 2,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.3277580175561196e+18,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}