| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9983597594313833, | |
| "eval_steps": 500, | |
| "global_step": 1371, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002186987424822307, | |
| "grad_norm": 0.11989043653011322, | |
| "learning_rate": 2.1739130434782607e-06, | |
| "loss": 0.7588, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004373974849644614, | |
| "grad_norm": 0.08302941918373108, | |
| "learning_rate": 4.347826086956521e-06, | |
| "loss": 0.8145, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0065609622744669215, | |
| "grad_norm": 0.15307161211967468, | |
| "learning_rate": 6.521739130434782e-06, | |
| "loss": 0.8127, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008747949699289229, | |
| "grad_norm": 0.13161885738372803, | |
| "learning_rate": 8.695652173913043e-06, | |
| "loss": 0.6707, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010934937124111536, | |
| "grad_norm": 0.09451252222061157, | |
| "learning_rate": 1.0869565217391303e-05, | |
| "loss": 0.7497, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.013121924548933843, | |
| "grad_norm": 0.0813838317990303, | |
| "learning_rate": 1.3043478260869564e-05, | |
| "loss": 1.0007, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01530891197375615, | |
| "grad_norm": 0.12192627787590027, | |
| "learning_rate": 1.5217391304347826e-05, | |
| "loss": 0.6703, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017495899398578457, | |
| "grad_norm": 0.14730937778949738, | |
| "learning_rate": 1.7391304347826085e-05, | |
| "loss": 0.9552, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019682886823400764, | |
| "grad_norm": 0.13510680198669434, | |
| "learning_rate": 1.9565217391304346e-05, | |
| "loss": 0.9591, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02186987424822307, | |
| "grad_norm": 0.11157332360744476, | |
| "learning_rate": 2.1739130434782607e-05, | |
| "loss": 0.9358, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02405686167304538, | |
| "grad_norm": 0.11157120019197464, | |
| "learning_rate": 2.3913043478260864e-05, | |
| "loss": 0.8377, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.026243849097867686, | |
| "grad_norm": 0.13191162049770355, | |
| "learning_rate": 2.6086956521739128e-05, | |
| "loss": 0.8974, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.028430836522689993, | |
| "grad_norm": 0.14399488270282745, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 0.778, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0306178239475123, | |
| "grad_norm": 0.11593582481145859, | |
| "learning_rate": 3.0434782608695653e-05, | |
| "loss": 0.9507, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03280481137233461, | |
| "grad_norm": 0.16411006450653076, | |
| "learning_rate": 3.260869565217391e-05, | |
| "loss": 0.6949, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.034991798797156914, | |
| "grad_norm": 0.13450156152248383, | |
| "learning_rate": 3.478260869565217e-05, | |
| "loss": 0.8162, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.037178786221979225, | |
| "grad_norm": 0.12586522102355957, | |
| "learning_rate": 3.695652173913043e-05, | |
| "loss": 0.8776, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03936577364680153, | |
| "grad_norm": 0.10510208457708359, | |
| "learning_rate": 3.913043478260869e-05, | |
| "loss": 0.7852, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04155276107162384, | |
| "grad_norm": 0.12737107276916504, | |
| "learning_rate": 4.130434782608695e-05, | |
| "loss": 0.9647, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04373974849644614, | |
| "grad_norm": 0.1500634402036667, | |
| "learning_rate": 4.3478260869565214e-05, | |
| "loss": 0.7532, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.045926735921268454, | |
| "grad_norm": 0.16161426901817322, | |
| "learning_rate": 4.5652173913043474e-05, | |
| "loss": 0.811, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04811372334609076, | |
| "grad_norm": 0.1249527782201767, | |
| "learning_rate": 4.782608695652173e-05, | |
| "loss": 0.795, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05030071077091307, | |
| "grad_norm": 0.1505545973777771, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 0.9194, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05248769819573537, | |
| "grad_norm": 0.13624198734760284, | |
| "learning_rate": 5.2173913043478256e-05, | |
| "loss": 0.97, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05467468562055768, | |
| "grad_norm": 0.15684515237808228, | |
| "learning_rate": 5.434782608695652e-05, | |
| "loss": 0.6862, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.056861673045379986, | |
| "grad_norm": 0.14302442967891693, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 0.8062, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0590486604702023, | |
| "grad_norm": 0.23029306530952454, | |
| "learning_rate": 5.869565217391304e-05, | |
| "loss": 0.9101, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0612356478950246, | |
| "grad_norm": 0.24247854948043823, | |
| "learning_rate": 6.0869565217391306e-05, | |
| "loss": 0.8779, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0634226353198469, | |
| "grad_norm": 0.1507425308227539, | |
| "learning_rate": 6.304347826086956e-05, | |
| "loss": 0.7181, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06560962274466922, | |
| "grad_norm": 0.18965087831020355, | |
| "learning_rate": 6.521739130434782e-05, | |
| "loss": 0.8163, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 0.2104681432247162, | |
| "learning_rate": 6.739130434782608e-05, | |
| "loss": 0.9495, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06998359759431383, | |
| "grad_norm": 0.21606619656085968, | |
| "learning_rate": 6.956521739130434e-05, | |
| "loss": 0.9565, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07217058501913615, | |
| "grad_norm": 0.2107428014278412, | |
| "learning_rate": 7.17391304347826e-05, | |
| "loss": 0.7743, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07435757244395845, | |
| "grad_norm": 0.3160182535648346, | |
| "learning_rate": 7.391304347826086e-05, | |
| "loss": 1.0056, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07654455986878075, | |
| "grad_norm": 0.2970617115497589, | |
| "learning_rate": 7.608695652173912e-05, | |
| "loss": 0.8122, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07873154729360306, | |
| "grad_norm": 0.17866499722003937, | |
| "learning_rate": 7.826086956521738e-05, | |
| "loss": 0.7953, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08091853471842538, | |
| "grad_norm": 0.32111942768096924, | |
| "learning_rate": 8.043478260869566e-05, | |
| "loss": 0.9121, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08310552214324768, | |
| "grad_norm": 0.20938844978809357, | |
| "learning_rate": 8.26086956521739e-05, | |
| "loss": 0.887, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08529250956806998, | |
| "grad_norm": 0.27339646220207214, | |
| "learning_rate": 8.478260869565217e-05, | |
| "loss": 0.7808, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08747949699289229, | |
| "grad_norm": 0.19005413353443146, | |
| "learning_rate": 8.695652173913043e-05, | |
| "loss": 0.6723, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0896664844177146, | |
| "grad_norm": 0.19314634799957275, | |
| "learning_rate": 8.913043478260869e-05, | |
| "loss": 0.8384, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09185347184253691, | |
| "grad_norm": 0.21565446257591248, | |
| "learning_rate": 9.130434782608695e-05, | |
| "loss": 0.7402, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09404045926735921, | |
| "grad_norm": 0.3733920753002167, | |
| "learning_rate": 9.347826086956521e-05, | |
| "loss": 0.9476, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09622744669218151, | |
| "grad_norm": 0.3119434714317322, | |
| "learning_rate": 9.565217391304346e-05, | |
| "loss": 0.7324, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09841443411700383, | |
| "grad_norm": 0.20734310150146484, | |
| "learning_rate": 9.782608695652173e-05, | |
| "loss": 0.6521, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10060142154182614, | |
| "grad_norm": 0.2809116840362549, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.7374, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10278840896664844, | |
| "grad_norm": 0.2248832732439041, | |
| "learning_rate": 0.00010217391304347825, | |
| "loss": 0.7822, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10497539639147074, | |
| "grad_norm": 0.26310572028160095, | |
| "learning_rate": 0.00010434782608695651, | |
| "loss": 0.844, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10716238381629306, | |
| "grad_norm": 0.20629820227622986, | |
| "learning_rate": 0.00010652173913043477, | |
| "loss": 0.9024, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10934937124111536, | |
| "grad_norm": 0.40926942229270935, | |
| "learning_rate": 0.00010869565217391303, | |
| "loss": 0.8497, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11153635866593767, | |
| "grad_norm": 0.34393706917762756, | |
| "learning_rate": 0.00011086956521739128, | |
| "loss": 0.8326, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11372334609075997, | |
| "grad_norm": 0.25371822714805603, | |
| "learning_rate": 0.00011304347826086956, | |
| "loss": 1.0089, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11591033351558229, | |
| "grad_norm": 0.3484710454940796, | |
| "learning_rate": 0.00011521739130434782, | |
| "loss": 0.7667, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1180973209404046, | |
| "grad_norm": 0.5894125699996948, | |
| "learning_rate": 0.00011739130434782608, | |
| "loss": 0.7977, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1202843083652269, | |
| "grad_norm": 0.29829731583595276, | |
| "learning_rate": 0.00011956521739130434, | |
| "loss": 0.7545, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1224712957900492, | |
| "grad_norm": 0.4180648922920227, | |
| "learning_rate": 0.00012173913043478261, | |
| "loss": 0.9833, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12465828321487152, | |
| "grad_norm": 0.24174439907073975, | |
| "learning_rate": 0.00012391304347826086, | |
| "loss": 0.5948, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1268452706396938, | |
| "grad_norm": 0.253364235162735, | |
| "learning_rate": 0.00012608695652173912, | |
| "loss": 0.7528, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12903225806451613, | |
| "grad_norm": 0.31262415647506714, | |
| "learning_rate": 0.00012826086956521738, | |
| "loss": 0.7635, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13121924548933844, | |
| "grad_norm": 0.2893831729888916, | |
| "learning_rate": 0.00013043478260869564, | |
| "loss": 0.7426, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13340623291416073, | |
| "grad_norm": 0.26717469096183777, | |
| "learning_rate": 0.0001326086956521739, | |
| "loss": 0.7747, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 0.3445766270160675, | |
| "learning_rate": 0.00013478260869565216, | |
| "loss": 0.802, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13778020776380537, | |
| "grad_norm": 0.3893512487411499, | |
| "learning_rate": 0.00013695652173913042, | |
| "loss": 1.0112, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13996719518862766, | |
| "grad_norm": 0.2807013988494873, | |
| "learning_rate": 0.00013913043478260868, | |
| "loss": 0.832, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14215418261344998, | |
| "grad_norm": 0.3300040662288666, | |
| "learning_rate": 0.00014130434782608694, | |
| "loss": 0.8425, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1443411700382723, | |
| "grad_norm": 0.3051323890686035, | |
| "learning_rate": 0.0001434782608695652, | |
| "loss": 0.7218, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14652815746309458, | |
| "grad_norm": 0.25623396039009094, | |
| "learning_rate": 0.00014565217391304347, | |
| "loss": 0.7398, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1487151448879169, | |
| "grad_norm": 0.3793390989303589, | |
| "learning_rate": 0.00014782608695652173, | |
| "loss": 0.7293, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1509021323127392, | |
| "grad_norm": 0.3046607971191406, | |
| "learning_rate": 0.00015, | |
| "loss": 0.7507, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1530891197375615, | |
| "grad_norm": 0.23061273992061615, | |
| "learning_rate": 0.00015217391304347825, | |
| "loss": 0.6682, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15527610716238383, | |
| "grad_norm": 0.3328089714050293, | |
| "learning_rate": 0.00015434782608695648, | |
| "loss": 0.6736, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15746309458720611, | |
| "grad_norm": 0.4419778287410736, | |
| "learning_rate": 0.00015652173913043477, | |
| "loss": 0.8789, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15965008201202843, | |
| "grad_norm": 0.3310529291629791, | |
| "learning_rate": 0.00015869565217391303, | |
| "loss": 0.8108, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.16183706943685075, | |
| "grad_norm": 0.4529496729373932, | |
| "learning_rate": 0.00016086956521739132, | |
| "loss": 1.0239, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16402405686167304, | |
| "grad_norm": 0.3741857707500458, | |
| "learning_rate": 0.00016304347826086955, | |
| "loss": 0.7601, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16621104428649536, | |
| "grad_norm": 0.2660742700099945, | |
| "learning_rate": 0.0001652173913043478, | |
| "loss": 0.7989, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16839803171131765, | |
| "grad_norm": 0.28130316734313965, | |
| "learning_rate": 0.00016739130434782607, | |
| "loss": 0.8459, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17058501913613996, | |
| "grad_norm": 0.3322678804397583, | |
| "learning_rate": 0.00016956521739130433, | |
| "loss": 0.7567, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17277200656096228, | |
| "grad_norm": 0.30039381980895996, | |
| "learning_rate": 0.0001717391304347826, | |
| "loss": 0.7353, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17495899398578457, | |
| "grad_norm": 0.30451035499572754, | |
| "learning_rate": 0.00017391304347826085, | |
| "loss": 0.7913, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1771459814106069, | |
| "grad_norm": 0.30815356969833374, | |
| "learning_rate": 0.00017608695652173914, | |
| "loss": 0.7766, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1793329688354292, | |
| "grad_norm": 0.5257038474082947, | |
| "learning_rate": 0.00017826086956521738, | |
| "loss": 0.7486, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1815199562602515, | |
| "grad_norm": 0.22373591363430023, | |
| "learning_rate": 0.00018043478260869564, | |
| "loss": 0.79, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18370694368507381, | |
| "grad_norm": 0.21466179192066193, | |
| "learning_rate": 0.0001826086956521739, | |
| "loss": 0.6091, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1858939311098961, | |
| "grad_norm": 0.3204774558544159, | |
| "learning_rate": 0.00018478260869565216, | |
| "loss": 1.015, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18808091853471842, | |
| "grad_norm": 0.272977739572525, | |
| "learning_rate": 0.00018695652173913042, | |
| "loss": 0.7317, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.19026790595954074, | |
| "grad_norm": 0.32803332805633545, | |
| "learning_rate": 0.00018913043478260868, | |
| "loss": 0.7552, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19245489338436303, | |
| "grad_norm": 0.308023065328598, | |
| "learning_rate": 0.0001913043478260869, | |
| "loss": 0.7058, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19464188080918535, | |
| "grad_norm": 0.2604801654815674, | |
| "learning_rate": 0.0001934782608695652, | |
| "loss": 0.6967, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.19682886823400766, | |
| "grad_norm": 0.3489021062850952, | |
| "learning_rate": 0.00019565217391304346, | |
| "loss": 0.7518, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19901585565882995, | |
| "grad_norm": 0.6137279272079468, | |
| "learning_rate": 0.00019782608695652172, | |
| "loss": 0.635, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20120284308365227, | |
| "grad_norm": 0.41480115056037903, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 0.8928, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2033898305084746, | |
| "grad_norm": 0.22284042835235596, | |
| "learning_rate": 0.00020217391304347824, | |
| "loss": 0.5862, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.20557681793329688, | |
| "grad_norm": 0.233658567070961, | |
| "learning_rate": 0.0002043478260869565, | |
| "loss": 0.8148, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2077638053581192, | |
| "grad_norm": 0.21716511249542236, | |
| "learning_rate": 0.00020652173913043474, | |
| "loss": 0.6474, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2099507927829415, | |
| "grad_norm": 0.506393551826477, | |
| "learning_rate": 0.00020869565217391303, | |
| "loss": 0.7149, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2121377802077638, | |
| "grad_norm": 0.3504016697406769, | |
| "learning_rate": 0.00021086956521739129, | |
| "loss": 0.647, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.21432476763258612, | |
| "grad_norm": 0.28688108921051025, | |
| "learning_rate": 0.00021304347826086955, | |
| "loss": 0.6584, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2165117550574084, | |
| "grad_norm": 0.35572630167007446, | |
| "learning_rate": 0.0002152173913043478, | |
| "loss": 0.8177, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21869874248223073, | |
| "grad_norm": 0.30645623803138733, | |
| "learning_rate": 0.00021739130434782607, | |
| "loss": 0.7421, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22088572990705305, | |
| "grad_norm": 0.480013370513916, | |
| "learning_rate": 0.00021956521739130433, | |
| "loss": 0.7542, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22307271733187534, | |
| "grad_norm": 0.23101027309894562, | |
| "learning_rate": 0.00022173913043478256, | |
| "loss": 0.81, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22525970475669765, | |
| "grad_norm": 0.37322309613227844, | |
| "learning_rate": 0.00022391304347826085, | |
| "loss": 0.8879, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.22744669218151994, | |
| "grad_norm": 1.5672107934951782, | |
| "learning_rate": 0.0002260869565217391, | |
| "loss": 0.7838, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22963367960634226, | |
| "grad_norm": 0.5281320810317993, | |
| "learning_rate": 0.0002282608695652174, | |
| "loss": 0.7246, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23182066703116458, | |
| "grad_norm": 0.597309947013855, | |
| "learning_rate": 0.00023043478260869563, | |
| "loss": 0.6229, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23400765445598687, | |
| "grad_norm": 0.29928773641586304, | |
| "learning_rate": 0.0002326086956521739, | |
| "loss": 0.779, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2361946418808092, | |
| "grad_norm": 0.3042626678943634, | |
| "learning_rate": 0.00023478260869565215, | |
| "loss": 0.6647, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2383816293056315, | |
| "grad_norm": 0.3099993169307709, | |
| "learning_rate": 0.00023695652173913041, | |
| "loss": 0.8173, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2405686167304538, | |
| "grad_norm": 0.21835339069366455, | |
| "learning_rate": 0.00023913043478260867, | |
| "loss": 0.7145, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2427556041552761, | |
| "grad_norm": 0.2737351357936859, | |
| "learning_rate": 0.00024130434782608694, | |
| "loss": 0.754, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2449425915800984, | |
| "grad_norm": 0.2737314999103546, | |
| "learning_rate": 0.00024347826086956522, | |
| "loss": 0.6692, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24712957900492072, | |
| "grad_norm": 0.369526743888855, | |
| "learning_rate": 0.00024565217391304343, | |
| "loss": 0.7039, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.24931656642974304, | |
| "grad_norm": 0.2262083888053894, | |
| "learning_rate": 0.0002478260869565217, | |
| "loss": 0.6004, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.25150355385456535, | |
| "grad_norm": 0.42596694827079773, | |
| "learning_rate": 0.00025, | |
| "loss": 0.8972, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2536905412793876, | |
| "grad_norm": 0.4870564043521881, | |
| "learning_rate": 0.00025217391304347824, | |
| "loss": 0.7305, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.25587752870420993, | |
| "grad_norm": 0.3326433598995209, | |
| "learning_rate": 0.00025434782608695647, | |
| "loss": 0.7079, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.25806451612903225, | |
| "grad_norm": 0.3588925004005432, | |
| "learning_rate": 0.00025652173913043476, | |
| "loss": 0.7682, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.26025150355385457, | |
| "grad_norm": 0.2966621518135071, | |
| "learning_rate": 0.00025869565217391305, | |
| "loss": 0.8244, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2624384909786769, | |
| "grad_norm": 0.2213324010372162, | |
| "learning_rate": 0.0002608695652173913, | |
| "loss": 0.841, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2646254784034992, | |
| "grad_norm": 0.28340932726860046, | |
| "learning_rate": 0.00026304347826086957, | |
| "loss": 0.7646, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.26681246582832147, | |
| "grad_norm": 0.3026011884212494, | |
| "learning_rate": 0.0002652173913043478, | |
| "loss": 0.8269, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2689994532531438, | |
| "grad_norm": 0.3213091194629669, | |
| "learning_rate": 0.00026739130434782604, | |
| "loss": 0.7456, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 0.24254000186920166, | |
| "learning_rate": 0.0002695652173913043, | |
| "loss": 0.786, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2733734281027884, | |
| "grad_norm": 0.22490260004997253, | |
| "learning_rate": 0.0002717391304347826, | |
| "loss": 0.8288, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.27556041552761074, | |
| "grad_norm": 0.2039777934551239, | |
| "learning_rate": 0.00027391304347826085, | |
| "loss": 0.7204, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.277747402952433, | |
| "grad_norm": 0.2281191200017929, | |
| "learning_rate": 0.0002760869565217391, | |
| "loss": 0.5744, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2799343903772553, | |
| "grad_norm": 0.33240583539009094, | |
| "learning_rate": 0.00027826086956521737, | |
| "loss": 0.6398, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28212137780207763, | |
| "grad_norm": 0.38755086064338684, | |
| "learning_rate": 0.00028043478260869565, | |
| "loss": 0.6739, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.28430836522689995, | |
| "grad_norm": 0.5284032821655273, | |
| "learning_rate": 0.0002826086956521739, | |
| "loss": 1.0215, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28649535265172227, | |
| "grad_norm": 0.8248558044433594, | |
| "learning_rate": 0.0002847826086956521, | |
| "loss": 0.6937, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2886823400765446, | |
| "grad_norm": 0.264347106218338, | |
| "learning_rate": 0.0002869565217391304, | |
| "loss": 0.6745, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.29086932750136685, | |
| "grad_norm": 0.24335810542106628, | |
| "learning_rate": 0.00028913043478260864, | |
| "loss": 0.8085, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.29305631492618917, | |
| "grad_norm": 0.2641212046146393, | |
| "learning_rate": 0.00029130434782608693, | |
| "loss": 0.6991, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2952433023510115, | |
| "grad_norm": 0.2698618769645691, | |
| "learning_rate": 0.0002934782608695652, | |
| "loss": 0.7643, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2974302897758338, | |
| "grad_norm": 0.24988499283790588, | |
| "learning_rate": 0.00029565217391304345, | |
| "loss": 0.8905, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2996172772006561, | |
| "grad_norm": 0.2180056869983673, | |
| "learning_rate": 0.0002978260869565217, | |
| "loss": 0.7743, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3018042646254784, | |
| "grad_norm": 0.23834429681301117, | |
| "learning_rate": 0.0003, | |
| "loss": 0.6164, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3039912520503007, | |
| "grad_norm": 0.33471837639808655, | |
| "learning_rate": 0.00029975669099756687, | |
| "loss": 0.9367, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.306178239475123, | |
| "grad_norm": 0.22311441600322723, | |
| "learning_rate": 0.0002995133819951338, | |
| "loss": 0.8235, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.30836522689994533, | |
| "grad_norm": 0.16766682267189026, | |
| "learning_rate": 0.0002992700729927007, | |
| "loss": 0.6212, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.31055221432476765, | |
| "grad_norm": 0.21076077222824097, | |
| "learning_rate": 0.0002990267639902676, | |
| "loss": 0.7472, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3127392017495899, | |
| "grad_norm": 0.33612027764320374, | |
| "learning_rate": 0.0002987834549878345, | |
| "loss": 0.7475, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.31492618917441223, | |
| "grad_norm": 0.2724473476409912, | |
| "learning_rate": 0.0002985401459854014, | |
| "loss": 0.7422, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.31711317659923455, | |
| "grad_norm": 0.23170293867588043, | |
| "learning_rate": 0.0002982968369829683, | |
| "loss": 0.7233, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.31930016402405687, | |
| "grad_norm": 0.2461654394865036, | |
| "learning_rate": 0.00029805352798053527, | |
| "loss": 0.6717, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3214871514488792, | |
| "grad_norm": 0.2988247573375702, | |
| "learning_rate": 0.00029781021897810217, | |
| "loss": 0.8926, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3236741388737015, | |
| "grad_norm": 0.18185736238956451, | |
| "learning_rate": 0.00029756690997566907, | |
| "loss": 0.6663, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.32586112629852376, | |
| "grad_norm": 0.276687890291214, | |
| "learning_rate": 0.000297323600973236, | |
| "loss": 0.6903, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3280481137233461, | |
| "grad_norm": 0.3481093645095825, | |
| "learning_rate": 0.0002970802919708029, | |
| "loss": 0.7468, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3302351011481684, | |
| "grad_norm": 0.21930567920207977, | |
| "learning_rate": 0.0002968369829683698, | |
| "loss": 0.6268, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3324220885729907, | |
| "grad_norm": 0.18267425894737244, | |
| "learning_rate": 0.0002965936739659367, | |
| "loss": 0.7194, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.33460907599781303, | |
| "grad_norm": 0.7277535200119019, | |
| "learning_rate": 0.0002963503649635036, | |
| "loss": 0.7393, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3367960634226353, | |
| "grad_norm": 0.3378921151161194, | |
| "learning_rate": 0.0002961070559610705, | |
| "loss": 0.7413, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.20400595664978027, | |
| "learning_rate": 0.00029586374695863746, | |
| "loss": 0.7604, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.34117003827227993, | |
| "grad_norm": 0.3428679406642914, | |
| "learning_rate": 0.00029562043795620436, | |
| "loss": 0.6905, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.34335702569710225, | |
| "grad_norm": 0.25741925835609436, | |
| "learning_rate": 0.00029537712895377126, | |
| "loss": 0.8333, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.34554401312192456, | |
| "grad_norm": 0.2198708951473236, | |
| "learning_rate": 0.00029513381995133816, | |
| "loss": 0.7183, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3477310005467469, | |
| "grad_norm": 0.2663215696811676, | |
| "learning_rate": 0.0002948905109489051, | |
| "loss": 0.6736, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.34991798797156914, | |
| "grad_norm": 0.26539289951324463, | |
| "learning_rate": 0.000294647201946472, | |
| "loss": 0.7691, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35210497539639146, | |
| "grad_norm": 0.21398472785949707, | |
| "learning_rate": 0.0002944038929440389, | |
| "loss": 0.7259, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3542919628212138, | |
| "grad_norm": 0.27584224939346313, | |
| "learning_rate": 0.0002941605839416058, | |
| "loss": 0.7451, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3564789502460361, | |
| "grad_norm": 0.27322661876678467, | |
| "learning_rate": 0.0002939172749391727, | |
| "loss": 0.7429, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3586659376708584, | |
| "grad_norm": 0.3097633421421051, | |
| "learning_rate": 0.0002936739659367396, | |
| "loss": 0.7925, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3608529250956807, | |
| "grad_norm": 0.235543355345726, | |
| "learning_rate": 0.00029343065693430656, | |
| "loss": 0.6892, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.363039912520503, | |
| "grad_norm": 0.34558114409446716, | |
| "learning_rate": 0.00029318734793187345, | |
| "loss": 0.8239, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3652268999453253, | |
| "grad_norm": 0.5169651508331299, | |
| "learning_rate": 0.00029294403892944035, | |
| "loss": 0.5348, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.36741388737014763, | |
| "grad_norm": 0.4853683412075043, | |
| "learning_rate": 0.0002927007299270073, | |
| "loss": 0.7482, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.36960087479496995, | |
| "grad_norm": 0.3244207203388214, | |
| "learning_rate": 0.0002924574209245742, | |
| "loss": 0.6755, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3717878622197922, | |
| "grad_norm": 0.3096265494823456, | |
| "learning_rate": 0.0002922141119221411, | |
| "loss": 0.8395, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3739748496446145, | |
| "grad_norm": 0.21022038161754608, | |
| "learning_rate": 0.000291970802919708, | |
| "loss": 0.7376, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.37616183706943684, | |
| "grad_norm": 0.23877666890621185, | |
| "learning_rate": 0.0002917274939172749, | |
| "loss": 0.7051, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.37834882449425916, | |
| "grad_norm": 0.4041813015937805, | |
| "learning_rate": 0.0002914841849148418, | |
| "loss": 0.6341, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3805358119190815, | |
| "grad_norm": 0.45476263761520386, | |
| "learning_rate": 0.00029124087591240875, | |
| "loss": 0.6939, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3827227993439038, | |
| "grad_norm": 0.3100184202194214, | |
| "learning_rate": 0.00029099756690997565, | |
| "loss": 0.6321, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.38490978676872606, | |
| "grad_norm": 0.31327834725379944, | |
| "learning_rate": 0.00029075425790754255, | |
| "loss": 0.623, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3870967741935484, | |
| "grad_norm": 0.23366397619247437, | |
| "learning_rate": 0.0002905109489051095, | |
| "loss": 0.6799, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3892837616183707, | |
| "grad_norm": 0.312284380197525, | |
| "learning_rate": 0.0002902676399026764, | |
| "loss": 0.6979, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.391470749043193, | |
| "grad_norm": 0.39591529965400696, | |
| "learning_rate": 0.0002900243309002433, | |
| "loss": 0.8571, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.39365773646801533, | |
| "grad_norm": 0.22407367825508118, | |
| "learning_rate": 0.0002897810218978102, | |
| "loss": 0.7724, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3958447238928376, | |
| "grad_norm": 0.41758400201797485, | |
| "learning_rate": 0.0002895377128953771, | |
| "loss": 0.5597, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3980317113176599, | |
| "grad_norm": 0.22731241583824158, | |
| "learning_rate": 0.000289294403892944, | |
| "loss": 0.7618, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4002186987424822, | |
| "grad_norm": 0.24491345882415771, | |
| "learning_rate": 0.00028905109489051094, | |
| "loss": 0.6777, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.40240568616730454, | |
| "grad_norm": 0.2861243188381195, | |
| "learning_rate": 0.00028880778588807784, | |
| "loss": 0.8928, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.40459267359212686, | |
| "grad_norm": 0.30325135588645935, | |
| "learning_rate": 0.00028856447688564474, | |
| "loss": 0.6794, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 0.22165870666503906, | |
| "learning_rate": 0.0002883211678832117, | |
| "loss": 0.7288, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.40896664844177144, | |
| "grad_norm": 0.265067994594574, | |
| "learning_rate": 0.0002880778588807786, | |
| "loss": 0.6641, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.41115363586659376, | |
| "grad_norm": 0.3085087835788727, | |
| "learning_rate": 0.0002878345498783455, | |
| "loss": 0.7916, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4133406232914161, | |
| "grad_norm": 0.30947744846343994, | |
| "learning_rate": 0.0002875912408759124, | |
| "loss": 0.834, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4155276107162384, | |
| "grad_norm": 0.2581535875797272, | |
| "learning_rate": 0.0002873479318734793, | |
| "loss": 0.6255, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4177145981410607, | |
| "grad_norm": 0.24718667566776276, | |
| "learning_rate": 0.0002871046228710462, | |
| "loss": 0.7883, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.419901585565883, | |
| "grad_norm": 0.2618321180343628, | |
| "learning_rate": 0.00028686131386861314, | |
| "loss": 0.6922, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4220885729907053, | |
| "grad_norm": 0.24760881066322327, | |
| "learning_rate": 0.00028661800486618004, | |
| "loss": 0.7304, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.4242755604155276, | |
| "grad_norm": 0.27126792073249817, | |
| "learning_rate": 0.00028637469586374693, | |
| "loss": 0.5676, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4264625478403499, | |
| "grad_norm": 0.1799423098564148, | |
| "learning_rate": 0.00028613138686131383, | |
| "loss": 0.7223, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.42864953526517224, | |
| "grad_norm": 0.2653333246707916, | |
| "learning_rate": 0.0002858880778588808, | |
| "loss": 0.7486, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4308365226899945, | |
| "grad_norm": 0.17445164918899536, | |
| "learning_rate": 0.0002856447688564477, | |
| "loss": 0.6661, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4330235101148168, | |
| "grad_norm": 0.20842154324054718, | |
| "learning_rate": 0.0002854014598540146, | |
| "loss": 0.5784, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.43521049753963914, | |
| "grad_norm": 0.2216557264328003, | |
| "learning_rate": 0.0002851581508515815, | |
| "loss": 0.8205, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.43739748496446146, | |
| "grad_norm": 0.3524712920188904, | |
| "learning_rate": 0.0002849148418491484, | |
| "loss": 0.8784, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4395844723892838, | |
| "grad_norm": 0.22435776889324188, | |
| "learning_rate": 0.0002846715328467153, | |
| "loss": 0.7975, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4417714598141061, | |
| "grad_norm": 0.33707621693611145, | |
| "learning_rate": 0.00028442822384428223, | |
| "loss": 0.8767, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.44395844723892836, | |
| "grad_norm": 0.20236724615097046, | |
| "learning_rate": 0.00028418491484184913, | |
| "loss": 0.6695, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.4461454346637507, | |
| "grad_norm": 0.26543137431144714, | |
| "learning_rate": 0.000283941605839416, | |
| "loss": 0.7137, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.448332422088573, | |
| "grad_norm": 0.21210715174674988, | |
| "learning_rate": 0.000283698296836983, | |
| "loss": 0.8809, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4505194095133953, | |
| "grad_norm": 0.21614502370357513, | |
| "learning_rate": 0.0002834549878345499, | |
| "loss": 0.6771, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4527063969382176, | |
| "grad_norm": 0.30795833468437195, | |
| "learning_rate": 0.0002832116788321168, | |
| "loss": 0.6966, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4548933843630399, | |
| "grad_norm": 0.4060954749584198, | |
| "learning_rate": 0.0002829683698296837, | |
| "loss": 0.7059, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4570803717878622, | |
| "grad_norm": 0.24772609770298004, | |
| "learning_rate": 0.00028272506082725057, | |
| "loss": 0.6992, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4592673592126845, | |
| "grad_norm": 0.2909943461418152, | |
| "learning_rate": 0.00028248175182481747, | |
| "loss": 0.8624, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.46145434663750684, | |
| "grad_norm": 0.2036535143852234, | |
| "learning_rate": 0.0002822384428223844, | |
| "loss": 0.7753, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.46364133406232916, | |
| "grad_norm": 0.1994384229183197, | |
| "learning_rate": 0.0002819951338199513, | |
| "loss": 0.7294, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4658283214871515, | |
| "grad_norm": 0.2482912242412567, | |
| "learning_rate": 0.0002817518248175182, | |
| "loss": 0.6213, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.46801530891197374, | |
| "grad_norm": 0.42890939116477966, | |
| "learning_rate": 0.0002815085158150851, | |
| "loss": 0.8935, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.47020229633679606, | |
| "grad_norm": 0.24268397688865662, | |
| "learning_rate": 0.000281265206812652, | |
| "loss": 0.6253, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4723892837616184, | |
| "grad_norm": 0.3331579267978668, | |
| "learning_rate": 0.00028102189781021897, | |
| "loss": 0.7022, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4745762711864407, | |
| "grad_norm": 0.34377002716064453, | |
| "learning_rate": 0.00028077858880778587, | |
| "loss": 0.8386, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.476763258611263, | |
| "grad_norm": 0.2543483078479767, | |
| "learning_rate": 0.00028053527980535277, | |
| "loss": 0.6084, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.47895024603608527, | |
| "grad_norm": 0.30651986598968506, | |
| "learning_rate": 0.00028029197080291966, | |
| "loss": 0.7624, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4811372334609076, | |
| "grad_norm": 0.3476787209510803, | |
| "learning_rate": 0.0002800486618004866, | |
| "loss": 0.822, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4833242208857299, | |
| "grad_norm": 0.3727283477783203, | |
| "learning_rate": 0.0002798053527980535, | |
| "loss": 0.7416, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4855112083105522, | |
| "grad_norm": 0.3289774954319, | |
| "learning_rate": 0.0002795620437956204, | |
| "loss": 0.8264, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.48769819573537454, | |
| "grad_norm": 0.26083284616470337, | |
| "learning_rate": 0.0002793187347931873, | |
| "loss": 0.6279, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4898851831601968, | |
| "grad_norm": 0.2844780683517456, | |
| "learning_rate": 0.0002790754257907542, | |
| "loss": 0.6315, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4920721705850191, | |
| "grad_norm": 0.3443123996257782, | |
| "learning_rate": 0.0002788321167883211, | |
| "loss": 0.6538, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.49425915800984144, | |
| "grad_norm": 0.23209474980831146, | |
| "learning_rate": 0.00027858880778588806, | |
| "loss": 0.7205, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.49644614543466375, | |
| "grad_norm": 0.26261788606643677, | |
| "learning_rate": 0.00027834549878345496, | |
| "loss": 0.7253, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4986331328594861, | |
| "grad_norm": 0.28650718927383423, | |
| "learning_rate": 0.00027810218978102186, | |
| "loss": 0.889, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5008201202843083, | |
| "grad_norm": 0.2478565275669098, | |
| "learning_rate": 0.0002778588807785888, | |
| "loss": 0.7619, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5030071077091307, | |
| "grad_norm": 0.17673347890377045, | |
| "learning_rate": 0.0002776155717761557, | |
| "loss": 0.8684, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.505194095133953, | |
| "grad_norm": 0.28806573152542114, | |
| "learning_rate": 0.0002773722627737226, | |
| "loss": 0.7499, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5073810825587752, | |
| "grad_norm": 0.2507832646369934, | |
| "learning_rate": 0.0002771289537712895, | |
| "loss": 0.9297, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5095680699835976, | |
| "grad_norm": 0.29228198528289795, | |
| "learning_rate": 0.0002768856447688564, | |
| "loss": 0.8578, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5117550574084199, | |
| "grad_norm": 0.5378915667533875, | |
| "learning_rate": 0.0002766423357664233, | |
| "loss": 0.8647, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5139420448332422, | |
| "grad_norm": 0.6002528071403503, | |
| "learning_rate": 0.0002763990267639902, | |
| "loss": 0.8368, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 0.19659245014190674, | |
| "learning_rate": 0.00027615571776155715, | |
| "loss": 0.6983, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5183160196828869, | |
| "grad_norm": 0.2815648913383484, | |
| "learning_rate": 0.00027591240875912405, | |
| "loss": 0.7741, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5205030071077091, | |
| "grad_norm": 0.2534239888191223, | |
| "learning_rate": 0.00027566909975669095, | |
| "loss": 0.9392, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5226899945325314, | |
| "grad_norm": 0.30477020144462585, | |
| "learning_rate": 0.0002754257907542579, | |
| "loss": 0.7839, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5248769819573538, | |
| "grad_norm": 0.321443647146225, | |
| "learning_rate": 0.0002751824817518248, | |
| "loss": 0.8445, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.527063969382176, | |
| "grad_norm": 0.3917739689350128, | |
| "learning_rate": 0.0002749391727493917, | |
| "loss": 0.6641, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5292509568069984, | |
| "grad_norm": 0.2380986511707306, | |
| "learning_rate": 0.0002746958637469586, | |
| "loss": 0.8242, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5314379442318207, | |
| "grad_norm": 0.1695939153432846, | |
| "learning_rate": 0.0002744525547445255, | |
| "loss": 0.7013, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5336249316566429, | |
| "grad_norm": 0.24696393311023712, | |
| "learning_rate": 0.0002742092457420924, | |
| "loss": 0.8488, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5358119190814653, | |
| "grad_norm": 0.2278507500886917, | |
| "learning_rate": 0.00027396593673965935, | |
| "loss": 0.7894, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5379989065062876, | |
| "grad_norm": 0.41331958770751953, | |
| "learning_rate": 0.00027372262773722625, | |
| "loss": 0.8343, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5401858939311099, | |
| "grad_norm": 0.29076704382896423, | |
| "learning_rate": 0.00027347931873479315, | |
| "loss": 0.995, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.23243111371994019, | |
| "learning_rate": 0.0002732360097323601, | |
| "loss": 0.7456, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5445598687807545, | |
| "grad_norm": 0.21154357492923737, | |
| "learning_rate": 0.000272992700729927, | |
| "loss": 0.6853, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5467468562055768, | |
| "grad_norm": 0.24274934828281403, | |
| "learning_rate": 0.0002727493917274939, | |
| "loss": 0.6452, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5489338436303991, | |
| "grad_norm": 0.37139129638671875, | |
| "learning_rate": 0.0002725060827250608, | |
| "loss": 0.7449, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5511208310552215, | |
| "grad_norm": 0.17621925473213196, | |
| "learning_rate": 0.0002722627737226277, | |
| "loss": 0.6824, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5533078184800437, | |
| "grad_norm": 0.19210177659988403, | |
| "learning_rate": 0.0002720194647201946, | |
| "loss": 0.6186, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.555494805904866, | |
| "grad_norm": 0.21780337393283844, | |
| "learning_rate": 0.00027177615571776154, | |
| "loss": 0.663, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5576817933296884, | |
| "grad_norm": 0.21192163228988647, | |
| "learning_rate": 0.00027153284671532844, | |
| "loss": 0.8801, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5598687807545106, | |
| "grad_norm": 0.27523308992385864, | |
| "learning_rate": 0.00027128953771289534, | |
| "loss": 0.6769, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.562055768179333, | |
| "grad_norm": 0.24207553267478943, | |
| "learning_rate": 0.0002710462287104623, | |
| "loss": 0.4965, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5642427556041553, | |
| "grad_norm": 0.33707237243652344, | |
| "learning_rate": 0.0002708029197080292, | |
| "loss": 0.7787, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5664297430289775, | |
| "grad_norm": 0.2669321596622467, | |
| "learning_rate": 0.0002705596107055961, | |
| "loss": 1.0172, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5686167304537999, | |
| "grad_norm": 0.26386845111846924, | |
| "learning_rate": 0.000270316301703163, | |
| "loss": 0.6477, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5708037178786222, | |
| "grad_norm": 0.304721474647522, | |
| "learning_rate": 0.0002700729927007299, | |
| "loss": 0.8301, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5729907053034445, | |
| "grad_norm": 0.20255905389785767, | |
| "learning_rate": 0.0002698296836982968, | |
| "loss": 0.5643, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5751776927282668, | |
| "grad_norm": 0.2723388671875, | |
| "learning_rate": 0.00026958637469586374, | |
| "loss": 0.6883, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5773646801530892, | |
| "grad_norm": 0.27381351590156555, | |
| "learning_rate": 0.00026934306569343063, | |
| "loss": 0.808, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5795516675779114, | |
| "grad_norm": 0.25915855169296265, | |
| "learning_rate": 0.00026909975669099753, | |
| "loss": 0.722, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5817386550027337, | |
| "grad_norm": 0.22392873466014862, | |
| "learning_rate": 0.0002688564476885645, | |
| "loss": 0.6744, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5839256424275561, | |
| "grad_norm": 0.2078748643398285, | |
| "learning_rate": 0.0002686131386861314, | |
| "loss": 0.8127, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5861126298523783, | |
| "grad_norm": 0.18671007454395294, | |
| "learning_rate": 0.0002683698296836983, | |
| "loss": 0.6276, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5882996172772007, | |
| "grad_norm": 0.3014012575149536, | |
| "learning_rate": 0.0002681265206812652, | |
| "loss": 0.7543, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.590486604702023, | |
| "grad_norm": 0.23588421940803528, | |
| "learning_rate": 0.0002678832116788321, | |
| "loss": 0.8301, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5926735921268452, | |
| "grad_norm": 0.37635311484336853, | |
| "learning_rate": 0.000267639902676399, | |
| "loss": 0.8239, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5948605795516676, | |
| "grad_norm": 0.23310554027557373, | |
| "learning_rate": 0.0002673965936739659, | |
| "loss": 0.8723, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5970475669764899, | |
| "grad_norm": 0.47537633776664734, | |
| "learning_rate": 0.00026715328467153283, | |
| "loss": 0.7915, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5992345544013122, | |
| "grad_norm": 0.2815110981464386, | |
| "learning_rate": 0.0002669099756690997, | |
| "loss": 0.8004, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6014215418261345, | |
| "grad_norm": 0.19834642112255096, | |
| "learning_rate": 0.0002666666666666666, | |
| "loss": 0.7457, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6036085292509568, | |
| "grad_norm": 0.5626861453056335, | |
| "learning_rate": 0.0002664233576642336, | |
| "loss": 0.6196, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6057955166757791, | |
| "grad_norm": 0.2784450054168701, | |
| "learning_rate": 0.0002661800486618005, | |
| "loss": 0.6365, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6079825041006014, | |
| "grad_norm": 0.23809124529361725, | |
| "learning_rate": 0.0002659367396593674, | |
| "loss": 0.7889, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6101694915254238, | |
| "grad_norm": 0.25168001651763916, | |
| "learning_rate": 0.0002656934306569343, | |
| "loss": 0.6327, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.612356478950246, | |
| "grad_norm": 0.2970046401023865, | |
| "learning_rate": 0.00026545012165450117, | |
| "loss": 0.6913, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6145434663750683, | |
| "grad_norm": 0.3090710937976837, | |
| "learning_rate": 0.00026520681265206807, | |
| "loss": 0.7131, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6167304537998907, | |
| "grad_norm": 0.2775273621082306, | |
| "learning_rate": 0.000264963503649635, | |
| "loss": 0.8556, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6189174412247129, | |
| "grad_norm": 0.3191220164299011, | |
| "learning_rate": 0.0002647201946472019, | |
| "loss": 0.8762, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6211044286495353, | |
| "grad_norm": 0.2520481050014496, | |
| "learning_rate": 0.0002644768856447688, | |
| "loss": 0.6358, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6232914160743576, | |
| "grad_norm": 0.31783685088157654, | |
| "learning_rate": 0.00026423357664233577, | |
| "loss": 0.773, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6254784034991798, | |
| "grad_norm": 0.33624374866485596, | |
| "learning_rate": 0.00026399026763990267, | |
| "loss": 0.963, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6276653909240022, | |
| "grad_norm": 0.3576049208641052, | |
| "learning_rate": 0.00026374695863746957, | |
| "loss": 0.6658, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6298523783488245, | |
| "grad_norm": 0.2659110426902771, | |
| "learning_rate": 0.00026350364963503647, | |
| "loss": 0.6662, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6320393657736468, | |
| "grad_norm": 0.3657420575618744, | |
| "learning_rate": 0.00026326034063260337, | |
| "loss": 0.9873, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6342263531984691, | |
| "grad_norm": 0.24509188532829285, | |
| "learning_rate": 0.00026301703163017026, | |
| "loss": 0.7795, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6364133406232915, | |
| "grad_norm": 0.24286092817783356, | |
| "learning_rate": 0.0002627737226277372, | |
| "loss": 0.7611, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6386003280481137, | |
| "grad_norm": 0.2804836332798004, | |
| "learning_rate": 0.0002625304136253041, | |
| "loss": 0.759, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.640787315472936, | |
| "grad_norm": 0.3322978615760803, | |
| "learning_rate": 0.000262287104622871, | |
| "loss": 0.6943, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6429743028977584, | |
| "grad_norm": 0.2114831805229187, | |
| "learning_rate": 0.00026204379562043797, | |
| "loss": 0.6729, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 0.2177094966173172, | |
| "learning_rate": 0.00026180048661800486, | |
| "loss": 0.7916, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.647348277747403, | |
| "grad_norm": 0.2582005560398102, | |
| "learning_rate": 0.00026155717761557176, | |
| "loss": 0.7655, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6495352651722253, | |
| "grad_norm": 0.2613639831542969, | |
| "learning_rate": 0.00026131386861313866, | |
| "loss": 0.6482, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6517222525970475, | |
| "grad_norm": 0.2764948606491089, | |
| "learning_rate": 0.00026107055961070556, | |
| "loss": 0.7022, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6539092400218699, | |
| "grad_norm": 0.20186789333820343, | |
| "learning_rate": 0.00026082725060827246, | |
| "loss": 0.7853, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6560962274466922, | |
| "grad_norm": 0.3178173303604126, | |
| "learning_rate": 0.0002605839416058394, | |
| "loss": 0.8393, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6582832148715145, | |
| "grad_norm": 0.35939186811447144, | |
| "learning_rate": 0.0002603406326034063, | |
| "loss": 0.7078, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6604702022963368, | |
| "grad_norm": 0.3983876407146454, | |
| "learning_rate": 0.0002600973236009732, | |
| "loss": 0.8271, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6626571897211591, | |
| "grad_norm": 0.19504043459892273, | |
| "learning_rate": 0.00025985401459854016, | |
| "loss": 0.7748, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6648441771459814, | |
| "grad_norm": 0.21278342604637146, | |
| "learning_rate": 0.00025961070559610706, | |
| "loss": 0.8016, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6670311645708037, | |
| "grad_norm": 0.29927191138267517, | |
| "learning_rate": 0.00025936739659367396, | |
| "loss": 0.844, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6692181519956261, | |
| "grad_norm": 0.22748655080795288, | |
| "learning_rate": 0.00025912408759124085, | |
| "loss": 0.6786, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6714051394204483, | |
| "grad_norm": 0.21796458959579468, | |
| "learning_rate": 0.00025888077858880775, | |
| "loss": 0.8343, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6735921268452706, | |
| "grad_norm": 0.26962918043136597, | |
| "learning_rate": 0.00025863746958637465, | |
| "loss": 0.8058, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.675779114270093, | |
| "grad_norm": 0.2169698178768158, | |
| "learning_rate": 0.00025839416058394155, | |
| "loss": 0.8341, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.5226082801818848, | |
| "learning_rate": 0.0002581508515815085, | |
| "loss": 0.8038, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6801530891197376, | |
| "grad_norm": 0.2540872395038605, | |
| "learning_rate": 0.0002579075425790754, | |
| "loss": 0.6485, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6823400765445599, | |
| "grad_norm": 0.2758027911186218, | |
| "learning_rate": 0.0002576642335766423, | |
| "loss": 0.7258, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6845270639693821, | |
| "grad_norm": 0.3712478280067444, | |
| "learning_rate": 0.00025742092457420925, | |
| "loss": 1.0087, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6867140513942045, | |
| "grad_norm": 0.29959022998809814, | |
| "learning_rate": 0.00025717761557177615, | |
| "loss": 0.7344, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6889010388190268, | |
| "grad_norm": 0.29603782296180725, | |
| "learning_rate": 0.00025693430656934305, | |
| "loss": 0.7633, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6910880262438491, | |
| "grad_norm": 0.26212218403816223, | |
| "learning_rate": 0.00025669099756690995, | |
| "loss": 0.7762, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6932750136686714, | |
| "grad_norm": 0.2501971423625946, | |
| "learning_rate": 0.00025644768856447685, | |
| "loss": 0.6449, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6954620010934938, | |
| "grad_norm": 0.20236985385417938, | |
| "learning_rate": 0.00025620437956204374, | |
| "loss": 0.6661, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.697648988518316, | |
| "grad_norm": 0.28867748379707336, | |
| "learning_rate": 0.0002559610705596107, | |
| "loss": 0.7168, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6998359759431383, | |
| "grad_norm": 0.25392022728919983, | |
| "learning_rate": 0.0002557177615571776, | |
| "loss": 0.8255, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7020229633679607, | |
| "grad_norm": 0.2739144563674927, | |
| "learning_rate": 0.0002554744525547445, | |
| "loss": 0.8782, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7042099507927829, | |
| "grad_norm": 0.3195747137069702, | |
| "learning_rate": 0.00025523114355231145, | |
| "loss": 0.7681, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7063969382176053, | |
| "grad_norm": 0.6262739300727844, | |
| "learning_rate": 0.00025498783454987834, | |
| "loss": 0.6497, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7085839256424276, | |
| "grad_norm": 0.18836063146591187, | |
| "learning_rate": 0.00025474452554744524, | |
| "loss": 0.6773, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7107709130672498, | |
| "grad_norm": 0.428913950920105, | |
| "learning_rate": 0.00025450121654501214, | |
| "loss": 0.6359, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7129579004920722, | |
| "grad_norm": 0.2561635375022888, | |
| "learning_rate": 0.00025425790754257904, | |
| "loss": 0.6768, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7151448879168945, | |
| "grad_norm": 0.2519037425518036, | |
| "learning_rate": 0.00025401459854014594, | |
| "loss": 0.941, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7173318753417168, | |
| "grad_norm": 0.22086481750011444, | |
| "learning_rate": 0.0002537712895377129, | |
| "loss": 0.6448, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7195188627665391, | |
| "grad_norm": 0.3844771385192871, | |
| "learning_rate": 0.0002535279805352798, | |
| "loss": 0.6043, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7217058501913614, | |
| "grad_norm": 0.2547963857650757, | |
| "learning_rate": 0.0002532846715328467, | |
| "loss": 0.9912, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7238928376161837, | |
| "grad_norm": 0.40474840998649597, | |
| "learning_rate": 0.00025304136253041364, | |
| "loss": 0.5905, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.726079825041006, | |
| "grad_norm": 0.20748649537563324, | |
| "learning_rate": 0.00025279805352798054, | |
| "loss": 0.6245, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7282668124658284, | |
| "grad_norm": 0.29902809858322144, | |
| "learning_rate": 0.00025255474452554744, | |
| "loss": 0.7478, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7304537998906506, | |
| "grad_norm": 0.21671514213085175, | |
| "learning_rate": 0.00025231143552311433, | |
| "loss": 0.5296, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7326407873154729, | |
| "grad_norm": 0.1979508250951767, | |
| "learning_rate": 0.00025206812652068123, | |
| "loss": 0.5523, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7348277747402953, | |
| "grad_norm": 0.25213825702667236, | |
| "learning_rate": 0.00025182481751824813, | |
| "loss": 0.9787, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7370147621651175, | |
| "grad_norm": 0.32967931032180786, | |
| "learning_rate": 0.0002515815085158151, | |
| "loss": 0.7161, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7392017495899399, | |
| "grad_norm": 0.30640098452568054, | |
| "learning_rate": 0.000251338199513382, | |
| "loss": 0.9517, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7413887370147622, | |
| "grad_norm": 0.1820855438709259, | |
| "learning_rate": 0.0002510948905109489, | |
| "loss": 0.6219, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7435757244395844, | |
| "grad_norm": 0.29584068059921265, | |
| "learning_rate": 0.00025085158150851583, | |
| "loss": 0.7692, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7457627118644068, | |
| "grad_norm": 0.3015952408313751, | |
| "learning_rate": 0.00025060827250608273, | |
| "loss": 0.812, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.747949699289229, | |
| "grad_norm": 0.364886611700058, | |
| "learning_rate": 0.00025036496350364963, | |
| "loss": 0.7881, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7501366867140514, | |
| "grad_norm": 0.2170587182044983, | |
| "learning_rate": 0.00025012165450121653, | |
| "loss": 0.6989, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7523236741388737, | |
| "grad_norm": 0.23260867595672607, | |
| "learning_rate": 0.00024987834549878343, | |
| "loss": 0.6581, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7545106615636961, | |
| "grad_norm": 0.36740902066230774, | |
| "learning_rate": 0.0002496350364963503, | |
| "loss": 0.9984, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7566976489885183, | |
| "grad_norm": 0.6248576641082764, | |
| "learning_rate": 0.0002493917274939172, | |
| "loss": 0.9879, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7588846364133406, | |
| "grad_norm": 0.44404783844947815, | |
| "learning_rate": 0.0002491484184914842, | |
| "loss": 0.616, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.761071623838163, | |
| "grad_norm": 0.2840265929698944, | |
| "learning_rate": 0.0002489051094890511, | |
| "loss": 0.9053, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7632586112629852, | |
| "grad_norm": 0.34335142374038696, | |
| "learning_rate": 0.000248661800486618, | |
| "loss": 0.7877, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7654455986878076, | |
| "grad_norm": 0.28032955527305603, | |
| "learning_rate": 0.0002484184914841849, | |
| "loss": 0.5934, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7676325861126299, | |
| "grad_norm": 0.35794079303741455, | |
| "learning_rate": 0.0002481751824817518, | |
| "loss": 0.736, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7698195735374521, | |
| "grad_norm": 0.1937468945980072, | |
| "learning_rate": 0.0002479318734793187, | |
| "loss": 0.7268, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7720065609622745, | |
| "grad_norm": 0.2442459911108017, | |
| "learning_rate": 0.0002476885644768856, | |
| "loss": 0.9092, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7741935483870968, | |
| "grad_norm": 0.2178357094526291, | |
| "learning_rate": 0.0002474452554744525, | |
| "loss": 0.832, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7763805358119191, | |
| "grad_norm": 0.2904297113418579, | |
| "learning_rate": 0.0002472019464720194, | |
| "loss": 0.6973, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7785675232367414, | |
| "grad_norm": 0.2849595248699188, | |
| "learning_rate": 0.00024695863746958637, | |
| "loss": 0.8439, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7807545106615636, | |
| "grad_norm": 0.30786654353141785, | |
| "learning_rate": 0.00024671532846715327, | |
| "loss": 0.8282, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.782941498086386, | |
| "grad_norm": 0.2731088697910309, | |
| "learning_rate": 0.00024647201946472017, | |
| "loss": 0.7614, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7851284855112083, | |
| "grad_norm": 0.2967981696128845, | |
| "learning_rate": 0.0002462287104622871, | |
| "loss": 0.7059, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7873154729360307, | |
| "grad_norm": 0.2427809238433838, | |
| "learning_rate": 0.000245985401459854, | |
| "loss": 0.5235, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7895024603608529, | |
| "grad_norm": 0.3543761074542999, | |
| "learning_rate": 0.0002457420924574209, | |
| "loss": 0.6882, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7916894477856752, | |
| "grad_norm": 0.2084377259016037, | |
| "learning_rate": 0.0002454987834549878, | |
| "loss": 0.6333, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7938764352104976, | |
| "grad_norm": 0.3653489649295807, | |
| "learning_rate": 0.0002452554744525547, | |
| "loss": 0.8776, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7960634226353198, | |
| "grad_norm": 0.2806954085826874, | |
| "learning_rate": 0.0002450121654501216, | |
| "loss": 0.7464, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7982504100601422, | |
| "grad_norm": 0.3652292788028717, | |
| "learning_rate": 0.00024476885644768856, | |
| "loss": 0.93, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8004373974849645, | |
| "grad_norm": 0.24262574315071106, | |
| "learning_rate": 0.00024452554744525546, | |
| "loss": 0.8502, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8026243849097867, | |
| "grad_norm": 0.273867666721344, | |
| "learning_rate": 0.00024428223844282236, | |
| "loss": 0.9274, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8048113723346091, | |
| "grad_norm": 0.21722102165222168, | |
| "learning_rate": 0.0002440389294403893, | |
| "loss": 0.8045, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8069983597594313, | |
| "grad_norm": 0.19634899497032166, | |
| "learning_rate": 0.00024379562043795619, | |
| "loss": 0.7424, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.8091853471842537, | |
| "grad_norm": 0.27201011776924133, | |
| "learning_rate": 0.00024355231143552308, | |
| "loss": 0.797, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.811372334609076, | |
| "grad_norm": 0.254142165184021, | |
| "learning_rate": 0.00024330900243309, | |
| "loss": 0.6142, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.7009087204933167, | |
| "learning_rate": 0.0002430656934306569, | |
| "loss": 0.6703, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8157463094587206, | |
| "grad_norm": 0.2147742360830307, | |
| "learning_rate": 0.0002428223844282238, | |
| "loss": 0.8446, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8179332968835429, | |
| "grad_norm": 0.18214701116085052, | |
| "learning_rate": 0.00024257907542579076, | |
| "loss": 0.6536, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8201202843083653, | |
| "grad_norm": 0.22022093832492828, | |
| "learning_rate": 0.00024233576642335766, | |
| "loss": 0.7452, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8223072717331875, | |
| "grad_norm": 0.19220127165317535, | |
| "learning_rate": 0.00024209245742092456, | |
| "loss": 0.699, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8244942591580099, | |
| "grad_norm": 0.26980119943618774, | |
| "learning_rate": 0.00024184914841849148, | |
| "loss": 0.8433, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8266812465828322, | |
| "grad_norm": 0.1975000947713852, | |
| "learning_rate": 0.00024160583941605838, | |
| "loss": 0.5667, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8288682340076544, | |
| "grad_norm": 0.28691354393959045, | |
| "learning_rate": 0.00024136253041362528, | |
| "loss": 0.764, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8310552214324768, | |
| "grad_norm": 0.23176266252994537, | |
| "learning_rate": 0.0002411192214111922, | |
| "loss": 0.5348, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.833242208857299, | |
| "grad_norm": 0.2583778202533722, | |
| "learning_rate": 0.0002408759124087591, | |
| "loss": 0.8583, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8354291962821214, | |
| "grad_norm": 0.1877242922782898, | |
| "learning_rate": 0.000240632603406326, | |
| "loss": 0.6818, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8376161837069437, | |
| "grad_norm": 0.3764333724975586, | |
| "learning_rate": 0.0002403892944038929, | |
| "loss": 0.8631, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.839803171131766, | |
| "grad_norm": 0.30223846435546875, | |
| "learning_rate": 0.00024014598540145985, | |
| "loss": 0.7702, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8419901585565883, | |
| "grad_norm": 0.43627509474754333, | |
| "learning_rate": 0.00023990267639902675, | |
| "loss": 0.8994, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8441771459814106, | |
| "grad_norm": 0.2544715404510498, | |
| "learning_rate": 0.00023965936739659365, | |
| "loss": 0.6475, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.846364133406233, | |
| "grad_norm": 0.23747164011001587, | |
| "learning_rate": 0.00023941605839416057, | |
| "loss": 0.7199, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8485511208310552, | |
| "grad_norm": 0.3392624855041504, | |
| "learning_rate": 0.00023917274939172747, | |
| "loss": 0.763, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8507381082558775, | |
| "grad_norm": 0.25245627760887146, | |
| "learning_rate": 0.00023892944038929437, | |
| "loss": 0.7532, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8529250956806999, | |
| "grad_norm": 0.2674003839492798, | |
| "learning_rate": 0.0002386861313868613, | |
| "loss": 0.599, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8551120831055221, | |
| "grad_norm": 0.27161166071891785, | |
| "learning_rate": 0.0002384428223844282, | |
| "loss": 0.9355, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8572990705303445, | |
| "grad_norm": 0.18150918185710907, | |
| "learning_rate": 0.0002381995133819951, | |
| "loss": 0.6056, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8594860579551667, | |
| "grad_norm": 0.22968190908432007, | |
| "learning_rate": 0.00023795620437956204, | |
| "loss": 0.767, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.861673045379989, | |
| "grad_norm": 0.21685199439525604, | |
| "learning_rate": 0.00023771289537712894, | |
| "loss": 0.7246, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8638600328048114, | |
| "grad_norm": 0.26542550325393677, | |
| "learning_rate": 0.00023746958637469584, | |
| "loss": 0.7106, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8660470202296336, | |
| "grad_norm": 0.23525013029575348, | |
| "learning_rate": 0.00023722627737226277, | |
| "loss": 0.6958, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.868234007654456, | |
| "grad_norm": 0.20633290708065033, | |
| "learning_rate": 0.00023698296836982967, | |
| "loss": 0.643, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8704209950792783, | |
| "grad_norm": 0.21550309658050537, | |
| "learning_rate": 0.00023673965936739656, | |
| "loss": 0.7449, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8726079825041007, | |
| "grad_norm": 0.2124805748462677, | |
| "learning_rate": 0.0002364963503649635, | |
| "loss": 0.7398, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8747949699289229, | |
| "grad_norm": 0.21294209361076355, | |
| "learning_rate": 0.0002362530413625304, | |
| "loss": 0.7934, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8769819573537452, | |
| "grad_norm": 0.36196568608283997, | |
| "learning_rate": 0.00023600973236009729, | |
| "loss": 0.7848, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8791689447785676, | |
| "grad_norm": 0.27596211433410645, | |
| "learning_rate": 0.0002357664233576642, | |
| "loss": 0.7286, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8813559322033898, | |
| "grad_norm": 0.27594348788261414, | |
| "learning_rate": 0.00023552311435523114, | |
| "loss": 0.8247, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8835429196282122, | |
| "grad_norm": 0.2970782518386841, | |
| "learning_rate": 0.00023527980535279804, | |
| "loss": 0.7548, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8857299070530344, | |
| "grad_norm": 0.39152461290359497, | |
| "learning_rate": 0.00023503649635036496, | |
| "loss": 0.8263, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8879168944778567, | |
| "grad_norm": 0.42587387561798096, | |
| "learning_rate": 0.00023479318734793186, | |
| "loss": 0.9905, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8901038819026791, | |
| "grad_norm": 0.314147412776947, | |
| "learning_rate": 0.00023454987834549876, | |
| "loss": 0.6665, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8922908693275013, | |
| "grad_norm": 0.34058940410614014, | |
| "learning_rate": 0.00023430656934306568, | |
| "loss": 0.7359, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8944778567523237, | |
| "grad_norm": 0.2528778612613678, | |
| "learning_rate": 0.00023406326034063258, | |
| "loss": 0.693, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.896664844177146, | |
| "grad_norm": 0.17990703880786896, | |
| "learning_rate": 0.00023381995133819948, | |
| "loss": 0.7565, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8988518316019682, | |
| "grad_norm": 0.17062903940677643, | |
| "learning_rate": 0.0002335766423357664, | |
| "loss": 0.7891, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.9010388190267906, | |
| "grad_norm": 0.3442295789718628, | |
| "learning_rate": 0.0002333333333333333, | |
| "loss": 0.6173, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9032258064516129, | |
| "grad_norm": 0.45662209391593933, | |
| "learning_rate": 0.0002330900243309002, | |
| "loss": 0.796, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.9054127938764353, | |
| "grad_norm": 0.17335475981235504, | |
| "learning_rate": 0.00023284671532846715, | |
| "loss": 0.6825, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9075997813012575, | |
| "grad_norm": 0.22652967274188995, | |
| "learning_rate": 0.00023260340632603405, | |
| "loss": 0.7512, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9097867687260798, | |
| "grad_norm": 0.349649041891098, | |
| "learning_rate": 0.00023236009732360095, | |
| "loss": 0.8205, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9119737561509021, | |
| "grad_norm": 0.18699604272842407, | |
| "learning_rate": 0.00023211678832116788, | |
| "loss": 0.6451, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9141607435757244, | |
| "grad_norm": 0.2398325353860855, | |
| "learning_rate": 0.00023187347931873478, | |
| "loss": 0.6891, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9163477310005468, | |
| "grad_norm": 0.22116120159626007, | |
| "learning_rate": 0.00023163017031630167, | |
| "loss": 0.6765, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.918534718425369, | |
| "grad_norm": 0.24642986059188843, | |
| "learning_rate": 0.00023138686131386857, | |
| "loss": 0.6119, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9207217058501913, | |
| "grad_norm": 0.2329958975315094, | |
| "learning_rate": 0.0002311435523114355, | |
| "loss": 0.7286, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9229086932750137, | |
| "grad_norm": 0.5355735421180725, | |
| "learning_rate": 0.0002309002433090024, | |
| "loss": 0.79, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9250956806998359, | |
| "grad_norm": 0.4554167091846466, | |
| "learning_rate": 0.0002306569343065693, | |
| "loss": 0.6942, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9272826681246583, | |
| "grad_norm": 0.2831968367099762, | |
| "learning_rate": 0.00023041362530413625, | |
| "loss": 0.7531, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9294696555494806, | |
| "grad_norm": 0.2321235090494156, | |
| "learning_rate": 0.00023017031630170315, | |
| "loss": 0.6902, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.931656642974303, | |
| "grad_norm": 0.4006916880607605, | |
| "learning_rate": 0.00022992700729927004, | |
| "loss": 0.6725, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9338436303991252, | |
| "grad_norm": 0.3189490735530853, | |
| "learning_rate": 0.00022968369829683697, | |
| "loss": 0.769, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9360306178239475, | |
| "grad_norm": 0.4294585585594177, | |
| "learning_rate": 0.00022944038929440387, | |
| "loss": 0.8656, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9382176052487698, | |
| "grad_norm": 0.34347137808799744, | |
| "learning_rate": 0.00022919708029197077, | |
| "loss": 0.5948, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9404045926735921, | |
| "grad_norm": 0.21789056062698364, | |
| "learning_rate": 0.0002289537712895377, | |
| "loss": 0.8035, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9425915800984145, | |
| "grad_norm": 0.1835460364818573, | |
| "learning_rate": 0.0002287104622871046, | |
| "loss": 0.6128, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9447785675232367, | |
| "grad_norm": 0.3390374183654785, | |
| "learning_rate": 0.0002284671532846715, | |
| "loss": 0.7788, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.946965554948059, | |
| "grad_norm": 0.23330353200435638, | |
| "learning_rate": 0.00022822384428223844, | |
| "loss": 0.7653, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.2357734590768814, | |
| "learning_rate": 0.00022798053527980534, | |
| "loss": 0.765, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9513395297977036, | |
| "grad_norm": 0.2517554759979248, | |
| "learning_rate": 0.00022773722627737224, | |
| "loss": 0.7815, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.953526517222526, | |
| "grad_norm": 0.23417727649211884, | |
| "learning_rate": 0.00022749391727493916, | |
| "loss": 0.9801, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9557135046473483, | |
| "grad_norm": 0.256149023771286, | |
| "learning_rate": 0.00022725060827250606, | |
| "loss": 0.734, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9579004920721705, | |
| "grad_norm": 0.31608134508132935, | |
| "learning_rate": 0.00022700729927007296, | |
| "loss": 0.707, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9600874794969929, | |
| "grad_norm": 0.23100577294826508, | |
| "learning_rate": 0.00022676399026763989, | |
| "loss": 0.6734, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9622744669218152, | |
| "grad_norm": 0.27026960253715515, | |
| "learning_rate": 0.00022652068126520678, | |
| "loss": 0.7884, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9644614543466375, | |
| "grad_norm": 0.24245603382587433, | |
| "learning_rate": 0.00022627737226277368, | |
| "loss": 0.5405, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9666484417714598, | |
| "grad_norm": 0.25354650616645813, | |
| "learning_rate": 0.00022603406326034064, | |
| "loss": 0.629, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9688354291962821, | |
| "grad_norm": 0.35559025406837463, | |
| "learning_rate": 0.00022579075425790753, | |
| "loss": 0.5673, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9710224166211044, | |
| "grad_norm": 0.18353384733200073, | |
| "learning_rate": 0.00022554744525547443, | |
| "loss": 0.7391, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9732094040459267, | |
| "grad_norm": 0.20255619287490845, | |
| "learning_rate": 0.00022530413625304136, | |
| "loss": 0.605, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9753963914707491, | |
| "grad_norm": 0.24910545349121094, | |
| "learning_rate": 0.00022506082725060826, | |
| "loss": 0.7387, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9775833788955713, | |
| "grad_norm": 0.30054211616516113, | |
| "learning_rate": 0.00022481751824817515, | |
| "loss": 0.7649, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9797703663203936, | |
| "grad_norm": 0.2318667322397232, | |
| "learning_rate": 0.00022457420924574208, | |
| "loss": 0.6788, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.981957353745216, | |
| "grad_norm": 0.27025488018989563, | |
| "learning_rate": 0.00022433090024330898, | |
| "loss": 0.8761, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9841443411700382, | |
| "grad_norm": 0.324431836605072, | |
| "learning_rate": 0.00022408759124087588, | |
| "loss": 0.5286, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9863313285948606, | |
| "grad_norm": 0.22321289777755737, | |
| "learning_rate": 0.00022384428223844283, | |
| "loss": 0.9685, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9885183160196829, | |
| "grad_norm": 0.348459929227829, | |
| "learning_rate": 0.00022360097323600973, | |
| "loss": 0.9153, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9907053034445052, | |
| "grad_norm": 0.24513466656208038, | |
| "learning_rate": 0.00022335766423357663, | |
| "loss": 0.7944, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9928922908693275, | |
| "grad_norm": 0.296447217464447, | |
| "learning_rate": 0.00022311435523114355, | |
| "loss": 0.7568, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9950792782941498, | |
| "grad_norm": 0.27960076928138733, | |
| "learning_rate": 0.00022287104622871045, | |
| "loss": 0.6744, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9972662657189721, | |
| "grad_norm": 0.2234726995229721, | |
| "learning_rate": 0.00022262773722627735, | |
| "loss": 0.8226, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9994532531437944, | |
| "grad_norm": 0.20796756446361542, | |
| "learning_rate": 0.00022238442822384425, | |
| "loss": 0.6815, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.0016402405686167, | |
| "grad_norm": 0.4041379392147064, | |
| "learning_rate": 0.00022214111922141117, | |
| "loss": 0.814, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.003827227993439, | |
| "grad_norm": 0.2340199053287506, | |
| "learning_rate": 0.00022189781021897807, | |
| "loss": 0.9068, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0060142154182614, | |
| "grad_norm": 0.24355943500995636, | |
| "learning_rate": 0.00022165450121654497, | |
| "loss": 0.8377, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0082012028430836, | |
| "grad_norm": 0.27959203720092773, | |
| "learning_rate": 0.00022141119221411192, | |
| "loss": 0.6917, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.010388190267906, | |
| "grad_norm": 0.28080224990844727, | |
| "learning_rate": 0.00022116788321167882, | |
| "loss": 0.6356, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0125751776927283, | |
| "grad_norm": 0.48801225423812866, | |
| "learning_rate": 0.00022092457420924572, | |
| "loss": 0.5904, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0147621651175505, | |
| "grad_norm": 0.22513045370578766, | |
| "learning_rate": 0.00022068126520681264, | |
| "loss": 1.0814, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 0.24892054498195648, | |
| "learning_rate": 0.00022043795620437954, | |
| "loss": 0.682, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0191361399671952, | |
| "grad_norm": 0.27827882766723633, | |
| "learning_rate": 0.00022019464720194644, | |
| "loss": 0.5133, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0213231273920176, | |
| "grad_norm": 0.22580872476100922, | |
| "learning_rate": 0.00021995133819951337, | |
| "loss": 0.6408, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0235101148168397, | |
| "grad_norm": 0.27323248982429504, | |
| "learning_rate": 0.00021970802919708026, | |
| "loss": 0.6774, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.025697102241662, | |
| "grad_norm": 0.2104388028383255, | |
| "learning_rate": 0.00021946472019464716, | |
| "loss": 0.7655, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0278840896664845, | |
| "grad_norm": 0.26010340452194214, | |
| "learning_rate": 0.00021922141119221412, | |
| "loss": 0.6855, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0300710770913066, | |
| "grad_norm": 0.22332607209682465, | |
| "learning_rate": 0.00021897810218978101, | |
| "loss": 0.8742, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.032258064516129, | |
| "grad_norm": 0.22284770011901855, | |
| "learning_rate": 0.0002187347931873479, | |
| "loss": 0.7075, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0344450519409514, | |
| "grad_norm": 0.32503169775009155, | |
| "learning_rate": 0.00021849148418491484, | |
| "loss": 0.8198, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0366320393657737, | |
| "grad_norm": 0.2516832947731018, | |
| "learning_rate": 0.00021824817518248174, | |
| "loss": 0.6606, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.038819026790596, | |
| "grad_norm": 0.20064838230609894, | |
| "learning_rate": 0.00021800486618004863, | |
| "loss": 0.6696, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0410060142154183, | |
| "grad_norm": 0.24873629212379456, | |
| "learning_rate": 0.00021776155717761556, | |
| "loss": 0.8343, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0431930016402406, | |
| "grad_norm": 0.23766379058361053, | |
| "learning_rate": 0.00021751824817518246, | |
| "loss": 0.6831, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0453799890650628, | |
| "grad_norm": 0.24385926127433777, | |
| "learning_rate": 0.00021727493917274936, | |
| "loss": 0.6712, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0475669764898852, | |
| "grad_norm": 0.3146672546863556, | |
| "learning_rate": 0.00021703163017031628, | |
| "loss": 0.6183, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0497539639147075, | |
| "grad_norm": 0.25711727142333984, | |
| "learning_rate": 0.0002167883211678832, | |
| "loss": 0.6252, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0519409513395297, | |
| "grad_norm": 0.2440115511417389, | |
| "learning_rate": 0.0002165450121654501, | |
| "loss": 0.7278, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.054127938764352, | |
| "grad_norm": 0.2689894735813141, | |
| "learning_rate": 0.00021630170316301703, | |
| "loss": 0.8418, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0563149261891744, | |
| "grad_norm": 0.2136611044406891, | |
| "learning_rate": 0.00021605839416058393, | |
| "loss": 0.6313, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0585019136139968, | |
| "grad_norm": 0.2452273964881897, | |
| "learning_rate": 0.00021581508515815083, | |
| "loss": 0.8624, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.060688901038819, | |
| "grad_norm": 0.24893832206726074, | |
| "learning_rate": 0.00021557177615571775, | |
| "loss": 0.7416, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0628758884636413, | |
| "grad_norm": 0.25064295530319214, | |
| "learning_rate": 0.00021532846715328465, | |
| "loss": 0.7699, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0650628758884637, | |
| "grad_norm": 0.20812906324863434, | |
| "learning_rate": 0.00021508515815085155, | |
| "loss": 0.6415, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0672498633132859, | |
| "grad_norm": 0.1655895859003067, | |
| "learning_rate": 0.00021484184914841848, | |
| "loss": 0.5422, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0694368507381082, | |
| "grad_norm": 0.32013434171676636, | |
| "learning_rate": 0.00021459854014598537, | |
| "loss": 0.7758, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0716238381629306, | |
| "grad_norm": 0.3376011252403259, | |
| "learning_rate": 0.00021435523114355227, | |
| "loss": 0.829, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0738108255877528, | |
| "grad_norm": 0.3153345584869385, | |
| "learning_rate": 0.00021411192214111923, | |
| "loss": 0.7714, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0759978130125751, | |
| "grad_norm": 0.3034818470478058, | |
| "learning_rate": 0.00021386861313868612, | |
| "loss": 0.6347, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0781848004373975, | |
| "grad_norm": 0.2922978699207306, | |
| "learning_rate": 0.00021362530413625302, | |
| "loss": 0.7736, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.0803717878622199, | |
| "grad_norm": 0.2873200476169586, | |
| "learning_rate": 0.00021338199513381992, | |
| "loss": 0.7169, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.082558775287042, | |
| "grad_norm": 0.19887448847293854, | |
| "learning_rate": 0.00021313868613138685, | |
| "loss": 0.591, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0847457627118644, | |
| "grad_norm": 0.2438717931509018, | |
| "learning_rate": 0.00021289537712895374, | |
| "loss": 0.7372, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0869327501366868, | |
| "grad_norm": 0.2844999432563782, | |
| "learning_rate": 0.00021265206812652064, | |
| "loss": 0.9492, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.089119737561509, | |
| "grad_norm": 0.23038767278194427, | |
| "learning_rate": 0.00021240875912408757, | |
| "loss": 0.6491, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0913067249863313, | |
| "grad_norm": 0.25681063532829285, | |
| "learning_rate": 0.00021216545012165447, | |
| "loss": 0.7385, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.0934937124111537, | |
| "grad_norm": 0.26198524236679077, | |
| "learning_rate": 0.00021192214111922137, | |
| "loss": 0.6631, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.095680699835976, | |
| "grad_norm": 0.2462042272090912, | |
| "learning_rate": 0.00021167883211678832, | |
| "loss": 0.6845, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0978676872607982, | |
| "grad_norm": 0.4053664803504944, | |
| "learning_rate": 0.00021143552311435522, | |
| "loss": 0.8192, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1000546746856206, | |
| "grad_norm": 0.1960192620754242, | |
| "learning_rate": 0.00021119221411192211, | |
| "loss": 0.654, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.102241662110443, | |
| "grad_norm": 0.288463294506073, | |
| "learning_rate": 0.00021094890510948904, | |
| "loss": 0.845, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.104428649535265, | |
| "grad_norm": 0.2577453553676605, | |
| "learning_rate": 0.00021070559610705594, | |
| "loss": 0.7532, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1066156369600875, | |
| "grad_norm": 0.2428467571735382, | |
| "learning_rate": 0.00021046228710462284, | |
| "loss": 0.633, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1088026243849098, | |
| "grad_norm": 0.2504101097583771, | |
| "learning_rate": 0.00021021897810218976, | |
| "loss": 0.7633, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.110989611809732, | |
| "grad_norm": 0.30137497186660767, | |
| "learning_rate": 0.00020997566909975666, | |
| "loss": 0.7516, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1131765992345544, | |
| "grad_norm": 0.26197975873947144, | |
| "learning_rate": 0.00020973236009732356, | |
| "loss": 0.772, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.1153635866593767, | |
| "grad_norm": 0.21030549705028534, | |
| "learning_rate": 0.0002094890510948905, | |
| "loss": 0.656, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.117550574084199, | |
| "grad_norm": 0.32491016387939453, | |
| "learning_rate": 0.0002092457420924574, | |
| "loss": 0.6437, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.1197375615090213, | |
| "grad_norm": 0.35852229595184326, | |
| "learning_rate": 0.0002090024330900243, | |
| "loss": 0.6878, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1219245489338436, | |
| "grad_norm": 0.2437012642621994, | |
| "learning_rate": 0.00020875912408759123, | |
| "loss": 0.7602, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.124111536358666, | |
| "grad_norm": 0.30889564752578735, | |
| "learning_rate": 0.00020851581508515813, | |
| "loss": 0.8807, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1262985237834882, | |
| "grad_norm": 0.24090994894504547, | |
| "learning_rate": 0.00020827250608272503, | |
| "loss": 0.6094, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1284855112083105, | |
| "grad_norm": 0.22549685835838318, | |
| "learning_rate": 0.00020802919708029196, | |
| "loss": 0.6548, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.130672498633133, | |
| "grad_norm": 0.21927274763584137, | |
| "learning_rate": 0.00020778588807785885, | |
| "loss": 0.5024, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.132859486057955, | |
| "grad_norm": 0.2773030996322632, | |
| "learning_rate": 0.00020754257907542575, | |
| "loss": 0.7162, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1350464734827774, | |
| "grad_norm": 0.23646964132785797, | |
| "learning_rate": 0.0002072992700729927, | |
| "loss": 0.495, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.1372334609075998, | |
| "grad_norm": 0.18650543689727783, | |
| "learning_rate": 0.0002070559610705596, | |
| "loss": 0.6832, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1394204483324222, | |
| "grad_norm": 0.2712174952030182, | |
| "learning_rate": 0.0002068126520681265, | |
| "loss": 0.6178, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.1416074357572443, | |
| "grad_norm": 0.5166855454444885, | |
| "learning_rate": 0.00020656934306569343, | |
| "loss": 0.7423, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1437944231820667, | |
| "grad_norm": 0.23658710718154907, | |
| "learning_rate": 0.00020632603406326033, | |
| "loss": 0.823, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.145981410606889, | |
| "grad_norm": 0.2502736747264862, | |
| "learning_rate": 0.00020608272506082722, | |
| "loss": 0.7652, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1481683980317112, | |
| "grad_norm": 0.3579782545566559, | |
| "learning_rate": 0.00020583941605839415, | |
| "loss": 0.6607, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1503553854565336, | |
| "grad_norm": 0.23584862053394318, | |
| "learning_rate": 0.00020559610705596105, | |
| "loss": 0.5478, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.152542372881356, | |
| "grad_norm": 0.20075763761997223, | |
| "learning_rate": 0.00020535279805352795, | |
| "loss": 0.4904, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1547293603061783, | |
| "grad_norm": 0.28536489605903625, | |
| "learning_rate": 0.0002051094890510949, | |
| "loss": 0.725, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1569163477310005, | |
| "grad_norm": 0.2919155955314636, | |
| "learning_rate": 0.0002048661800486618, | |
| "loss": 0.7854, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1591033351558229, | |
| "grad_norm": 0.2859315574169159, | |
| "learning_rate": 0.0002046228710462287, | |
| "loss": 0.7588, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1612903225806452, | |
| "grad_norm": 0.2310762107372284, | |
| "learning_rate": 0.0002043795620437956, | |
| "loss": 0.7313, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.1634773100054674, | |
| "grad_norm": 0.37531688809394836, | |
| "learning_rate": 0.00020413625304136252, | |
| "loss": 0.7386, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1656642974302898, | |
| "grad_norm": 0.2388879358768463, | |
| "learning_rate": 0.00020389294403892942, | |
| "loss": 0.6976, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1678512848551121, | |
| "grad_norm": 0.35468119382858276, | |
| "learning_rate": 0.00020364963503649632, | |
| "loss": 0.7769, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1700382722799345, | |
| "grad_norm": 0.35036739706993103, | |
| "learning_rate": 0.00020340632603406324, | |
| "loss": 0.7023, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1722252597047567, | |
| "grad_norm": 0.22455590963363647, | |
| "learning_rate": 0.00020316301703163014, | |
| "loss": 0.6198, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.174412247129579, | |
| "grad_norm": 0.2568056881427765, | |
| "learning_rate": 0.00020291970802919704, | |
| "loss": 0.8131, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1765992345544014, | |
| "grad_norm": 0.2159530222415924, | |
| "learning_rate": 0.000202676399026764, | |
| "loss": 0.608, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1787862219792236, | |
| "grad_norm": 0.3671428859233856, | |
| "learning_rate": 0.0002024330900243309, | |
| "loss": 0.7317, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.180973209404046, | |
| "grad_norm": 0.40387099981307983, | |
| "learning_rate": 0.0002021897810218978, | |
| "loss": 0.7829, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1831601968288683, | |
| "grad_norm": 0.23750804364681244, | |
| "learning_rate": 0.00020194647201946471, | |
| "loss": 0.7261, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1853471842536905, | |
| "grad_norm": 0.29545098543167114, | |
| "learning_rate": 0.0002017031630170316, | |
| "loss": 0.641, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1875341716785128, | |
| "grad_norm": 0.28032809495925903, | |
| "learning_rate": 0.0002014598540145985, | |
| "loss": 0.5683, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.1897211591033352, | |
| "grad_norm": 0.42475053668022156, | |
| "learning_rate": 0.00020121654501216544, | |
| "loss": 0.7681, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1919081465281574, | |
| "grad_norm": 0.3492116928100586, | |
| "learning_rate": 0.00020097323600973233, | |
| "loss": 0.6798, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1940951339529797, | |
| "grad_norm": 0.358916699886322, | |
| "learning_rate": 0.00020072992700729923, | |
| "loss": 0.7502, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.196282121377802, | |
| "grad_norm": 0.27878785133361816, | |
| "learning_rate": 0.00020048661800486619, | |
| "loss": 0.7625, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.1984691088026245, | |
| "grad_norm": 0.29086047410964966, | |
| "learning_rate": 0.00020024330900243308, | |
| "loss": 0.6944, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2006560962274466, | |
| "grad_norm": 0.2969072759151459, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 0.7105, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.202843083652269, | |
| "grad_norm": 0.38667795062065125, | |
| "learning_rate": 0.0001997566909975669, | |
| "loss": 0.7046, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2050300710770914, | |
| "grad_norm": 0.26905378699302673, | |
| "learning_rate": 0.0001995133819951338, | |
| "loss": 0.8177, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.2072170585019135, | |
| "grad_norm": 0.25222644209861755, | |
| "learning_rate": 0.0001992700729927007, | |
| "loss": 0.7232, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.209404045926736, | |
| "grad_norm": 0.23291464149951935, | |
| "learning_rate": 0.00019902676399026763, | |
| "loss": 0.6135, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2115910333515583, | |
| "grad_norm": 0.24224941432476044, | |
| "learning_rate": 0.00019878345498783453, | |
| "loss": 0.6832, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2137780207763806, | |
| "grad_norm": 0.2552938759326935, | |
| "learning_rate": 0.00019854014598540143, | |
| "loss": 0.7707, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.2159650082012028, | |
| "grad_norm": 0.3016825318336487, | |
| "learning_rate": 0.00019829683698296835, | |
| "loss": 0.6199, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2181519956260252, | |
| "grad_norm": 0.2980547547340393, | |
| "learning_rate": 0.00019805352798053528, | |
| "loss": 0.7232, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.2203389830508475, | |
| "grad_norm": 0.3470471203327179, | |
| "learning_rate": 0.00019781021897810218, | |
| "loss": 0.6665, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2225259704756697, | |
| "grad_norm": 0.2844526171684265, | |
| "learning_rate": 0.0001975669099756691, | |
| "loss": 0.5931, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.224712957900492, | |
| "grad_norm": 0.2751246988773346, | |
| "learning_rate": 0.000197323600973236, | |
| "loss": 0.6265, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2268999453253144, | |
| "grad_norm": 0.2560863792896271, | |
| "learning_rate": 0.0001970802919708029, | |
| "loss": 0.6442, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2290869327501368, | |
| "grad_norm": 0.28800928592681885, | |
| "learning_rate": 0.00019683698296836982, | |
| "loss": 0.7135, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.231273920174959, | |
| "grad_norm": 0.44916409254074097, | |
| "learning_rate": 0.00019659367396593672, | |
| "loss": 0.654, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.2334609075997813, | |
| "grad_norm": 0.28822582960128784, | |
| "learning_rate": 0.00019635036496350362, | |
| "loss": 0.7907, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2356478950246037, | |
| "grad_norm": 0.3168655037879944, | |
| "learning_rate": 0.00019610705596107055, | |
| "loss": 0.6821, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.2378348824494259, | |
| "grad_norm": 0.24087372422218323, | |
| "learning_rate": 0.00019586374695863744, | |
| "loss": 0.5753, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2400218698742482, | |
| "grad_norm": 0.28054556250572205, | |
| "learning_rate": 0.00019562043795620434, | |
| "loss": 0.7782, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2422088572990706, | |
| "grad_norm": 0.2647920250892639, | |
| "learning_rate": 0.00019537712895377127, | |
| "loss": 0.672, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2443958447238928, | |
| "grad_norm": 0.2773146331310272, | |
| "learning_rate": 0.0001951338199513382, | |
| "loss": 0.6951, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2465828321487151, | |
| "grad_norm": 0.22990505397319794, | |
| "learning_rate": 0.0001948905109489051, | |
| "loss": 0.8364, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2487698195735375, | |
| "grad_norm": 0.27569764852523804, | |
| "learning_rate": 0.000194647201946472, | |
| "loss": 0.7833, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2509568069983596, | |
| "grad_norm": 0.2720679044723511, | |
| "learning_rate": 0.00019440389294403892, | |
| "loss": 0.6844, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.253143794423182, | |
| "grad_norm": 0.31944793462753296, | |
| "learning_rate": 0.00019416058394160581, | |
| "loss": 0.7761, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2553307818480044, | |
| "grad_norm": 0.3249347507953644, | |
| "learning_rate": 0.0001939172749391727, | |
| "loss": 0.6429, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2575177692728268, | |
| "grad_norm": 0.3601590692996979, | |
| "learning_rate": 0.00019367396593673964, | |
| "loss": 0.7387, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.259704756697649, | |
| "grad_norm": 0.30120986700057983, | |
| "learning_rate": 0.00019343065693430654, | |
| "loss": 0.7797, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2618917441224713, | |
| "grad_norm": 0.2647385895252228, | |
| "learning_rate": 0.00019318734793187344, | |
| "loss": 0.6112, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2640787315472937, | |
| "grad_norm": 0.2170192301273346, | |
| "learning_rate": 0.0001929440389294404, | |
| "loss": 0.6963, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2662657189721158, | |
| "grad_norm": 0.23418468236923218, | |
| "learning_rate": 0.0001927007299270073, | |
| "loss": 0.7496, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2684527063969382, | |
| "grad_norm": 0.29596206545829773, | |
| "learning_rate": 0.00019245742092457418, | |
| "loss": 0.8172, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2706396938217606, | |
| "grad_norm": 0.2754702568054199, | |
| "learning_rate": 0.0001922141119221411, | |
| "loss": 0.6895, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.272826681246583, | |
| "grad_norm": 0.2041543573141098, | |
| "learning_rate": 0.000191970802919708, | |
| "loss": 0.7623, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.275013668671405, | |
| "grad_norm": 0.3801957964897156, | |
| "learning_rate": 0.0001917274939172749, | |
| "loss": 0.634, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2772006560962275, | |
| "grad_norm": 0.39465653896331787, | |
| "learning_rate": 0.00019148418491484183, | |
| "loss": 0.6114, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2793876435210498, | |
| "grad_norm": 0.36799028515815735, | |
| "learning_rate": 0.00019124087591240873, | |
| "loss": 0.757, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.281574630945872, | |
| "grad_norm": 0.2876284718513489, | |
| "learning_rate": 0.00019099756690997563, | |
| "loss": 0.6992, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2837616183706944, | |
| "grad_norm": 0.4593120813369751, | |
| "learning_rate": 0.00019075425790754258, | |
| "loss": 0.6095, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.2859486057955167, | |
| "grad_norm": 0.24458545446395874, | |
| "learning_rate": 0.00019051094890510948, | |
| "loss": 0.5724, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.288135593220339, | |
| "grad_norm": 0.22930872440338135, | |
| "learning_rate": 0.00019026763990267638, | |
| "loss": 0.5479, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 0.32167893648147583, | |
| "learning_rate": 0.0001900243309002433, | |
| "loss": 0.7158, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2925095680699836, | |
| "grad_norm": 0.2847557067871094, | |
| "learning_rate": 0.0001897810218978102, | |
| "loss": 0.6545, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2946965554948058, | |
| "grad_norm": 0.24358853697776794, | |
| "learning_rate": 0.0001895377128953771, | |
| "loss": 0.7497, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2968835429196282, | |
| "grad_norm": 0.26657119393348694, | |
| "learning_rate": 0.00018929440389294403, | |
| "loss": 0.6816, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.2990705303444505, | |
| "grad_norm": 0.3368627727031708, | |
| "learning_rate": 0.00018905109489051093, | |
| "loss": 0.613, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.301257517769273, | |
| "grad_norm": 0.28971466422080994, | |
| "learning_rate": 0.00018880778588807782, | |
| "loss": 0.814, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.3034445051940953, | |
| "grad_norm": 0.3216496706008911, | |
| "learning_rate": 0.00018856447688564478, | |
| "loss": 0.7116, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3056314926189174, | |
| "grad_norm": 0.25016555190086365, | |
| "learning_rate": 0.00018832116788321167, | |
| "loss": 0.7034, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.3078184800437398, | |
| "grad_norm": 0.2602551579475403, | |
| "learning_rate": 0.00018807785888077857, | |
| "loss": 0.6624, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.310005467468562, | |
| "grad_norm": 0.1847269982099533, | |
| "learning_rate": 0.0001878345498783455, | |
| "loss": 0.6645, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.3121924548933843, | |
| "grad_norm": 0.20593389868736267, | |
| "learning_rate": 0.0001875912408759124, | |
| "loss": 0.6471, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3143794423182067, | |
| "grad_norm": 0.2651140093803406, | |
| "learning_rate": 0.0001873479318734793, | |
| "loss": 0.6743, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.316566429743029, | |
| "grad_norm": 0.3243972659111023, | |
| "learning_rate": 0.00018710462287104622, | |
| "loss": 0.662, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3187534171678512, | |
| "grad_norm": 0.24702341854572296, | |
| "learning_rate": 0.00018686131386861312, | |
| "loss": 0.746, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.3209404045926736, | |
| "grad_norm": 0.25382477045059204, | |
| "learning_rate": 0.00018661800486618002, | |
| "loss": 0.7115, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.323127392017496, | |
| "grad_norm": 0.26453620195388794, | |
| "learning_rate": 0.00018637469586374697, | |
| "loss": 0.5843, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3253143794423181, | |
| "grad_norm": 0.25161460041999817, | |
| "learning_rate": 0.00018613138686131387, | |
| "loss": 0.7831, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3275013668671405, | |
| "grad_norm": 0.2947143316268921, | |
| "learning_rate": 0.00018588807785888077, | |
| "loss": 0.6277, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3296883542919629, | |
| "grad_norm": 0.25893881916999817, | |
| "learning_rate": 0.00018564476885644767, | |
| "loss": 0.6816, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3318753417167852, | |
| "grad_norm": 0.3958803713321686, | |
| "learning_rate": 0.0001854014598540146, | |
| "loss": 0.8033, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3340623291416074, | |
| "grad_norm": 0.28083765506744385, | |
| "learning_rate": 0.0001851581508515815, | |
| "loss": 0.6587, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3362493165664298, | |
| "grad_norm": 0.26417723298072815, | |
| "learning_rate": 0.0001849148418491484, | |
| "loss": 0.6867, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3384363039912521, | |
| "grad_norm": 0.2628178000450134, | |
| "learning_rate": 0.0001846715328467153, | |
| "loss": 0.6275, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3406232914160743, | |
| "grad_norm": 0.20500022172927856, | |
| "learning_rate": 0.0001844282238442822, | |
| "loss": 0.6152, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3428102788408967, | |
| "grad_norm": 0.22486689686775208, | |
| "learning_rate": 0.0001841849148418491, | |
| "loss": 0.5407, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.344997266265719, | |
| "grad_norm": 0.3170478641986847, | |
| "learning_rate": 0.00018394160583941606, | |
| "loss": 0.7176, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3471842536905414, | |
| "grad_norm": 0.34868374466896057, | |
| "learning_rate": 0.00018369829683698296, | |
| "loss": 0.5815, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3493712411153636, | |
| "grad_norm": 0.2484477013349533, | |
| "learning_rate": 0.00018345498783454986, | |
| "loss": 0.6613, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.351558228540186, | |
| "grad_norm": 0.2799300253391266, | |
| "learning_rate": 0.00018321167883211678, | |
| "loss": 0.6685, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.353745215965008, | |
| "grad_norm": 0.28434398770332336, | |
| "learning_rate": 0.00018296836982968368, | |
| "loss": 0.7881, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.25863373279571533, | |
| "learning_rate": 0.00018272506082725058, | |
| "loss": 0.7325, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3581191908146528, | |
| "grad_norm": 0.3039908707141876, | |
| "learning_rate": 0.0001824817518248175, | |
| "loss": 0.8676, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3603061782394752, | |
| "grad_norm": 0.29525163769721985, | |
| "learning_rate": 0.0001822384428223844, | |
| "loss": 0.8909, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3624931656642976, | |
| "grad_norm": 0.475063294172287, | |
| "learning_rate": 0.0001819951338199513, | |
| "loss": 0.6882, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.3646801530891197, | |
| "grad_norm": 0.22500012814998627, | |
| "learning_rate": 0.00018175182481751826, | |
| "loss": 0.6354, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.366867140513942, | |
| "grad_norm": 0.24890188872814178, | |
| "learning_rate": 0.00018150851581508515, | |
| "loss": 0.5322, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3690541279387642, | |
| "grad_norm": 0.24399027228355408, | |
| "learning_rate": 0.00018126520681265205, | |
| "loss": 0.7255, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3712411153635866, | |
| "grad_norm": 0.32299381494522095, | |
| "learning_rate": 0.00018102189781021898, | |
| "loss": 0.5199, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.373428102788409, | |
| "grad_norm": 0.4946720600128174, | |
| "learning_rate": 0.00018077858880778588, | |
| "loss": 0.7099, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3756150902132314, | |
| "grad_norm": 0.47641122341156006, | |
| "learning_rate": 0.00018053527980535278, | |
| "loss": 0.752, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3778020776380535, | |
| "grad_norm": 0.3367193937301636, | |
| "learning_rate": 0.0001802919708029197, | |
| "loss": 0.7196, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3799890650628759, | |
| "grad_norm": 0.27993133664131165, | |
| "learning_rate": 0.0001800486618004866, | |
| "loss": 0.7357, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3821760524876983, | |
| "grad_norm": 0.27575206756591797, | |
| "learning_rate": 0.0001798053527980535, | |
| "loss": 0.6148, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3843630399125204, | |
| "grad_norm": 0.33214282989501953, | |
| "learning_rate": 0.00017956204379562042, | |
| "loss": 0.771, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.3865500273373428, | |
| "grad_norm": 0.2970830798149109, | |
| "learning_rate": 0.00017931873479318735, | |
| "loss": 0.6882, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3887370147621652, | |
| "grad_norm": 0.3435869812965393, | |
| "learning_rate": 0.00017907542579075425, | |
| "loss": 0.6992, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.3909240021869875, | |
| "grad_norm": 0.3328729569911957, | |
| "learning_rate": 0.00017883211678832117, | |
| "loss": 0.6594, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3931109896118097, | |
| "grad_norm": 0.3031856119632721, | |
| "learning_rate": 0.00017858880778588807, | |
| "loss": 0.642, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.395297977036632, | |
| "grad_norm": 0.2761346399784088, | |
| "learning_rate": 0.00017834549878345497, | |
| "loss": 1.0442, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.3974849644614544, | |
| "grad_norm": 0.34098902344703674, | |
| "learning_rate": 0.0001781021897810219, | |
| "loss": 0.9509, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.3996719518862766, | |
| "grad_norm": 0.4181225299835205, | |
| "learning_rate": 0.0001778588807785888, | |
| "loss": 0.6521, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.401858939311099, | |
| "grad_norm": 0.2533126473426819, | |
| "learning_rate": 0.0001776155717761557, | |
| "loss": 0.6221, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.4040459267359213, | |
| "grad_norm": 0.25691646337509155, | |
| "learning_rate": 0.00017737226277372262, | |
| "loss": 0.5691, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4062329141607437, | |
| "grad_norm": 0.2649155557155609, | |
| "learning_rate": 0.00017712895377128952, | |
| "loss": 0.614, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.4084199015855658, | |
| "grad_norm": 0.32973209023475647, | |
| "learning_rate": 0.00017688564476885641, | |
| "loss": 0.878, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4106068890103882, | |
| "grad_norm": 0.3559141755104065, | |
| "learning_rate": 0.00017664233576642334, | |
| "loss": 0.7954, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.4127938764352104, | |
| "grad_norm": 0.2913306653499603, | |
| "learning_rate": 0.00017639902676399026, | |
| "loss": 0.735, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4149808638600327, | |
| "grad_norm": 0.24183817207813263, | |
| "learning_rate": 0.00017615571776155716, | |
| "loss": 0.5965, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.4171678512848551, | |
| "grad_norm": 0.2638205885887146, | |
| "learning_rate": 0.00017591240875912406, | |
| "loss": 0.6843, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4193548387096775, | |
| "grad_norm": 0.23057186603546143, | |
| "learning_rate": 0.000175669099756691, | |
| "loss": 0.7453, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4215418261344999, | |
| "grad_norm": 0.22737360000610352, | |
| "learning_rate": 0.00017542579075425789, | |
| "loss": 0.5423, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.423728813559322, | |
| "grad_norm": 0.25872430205345154, | |
| "learning_rate": 0.00017518248175182478, | |
| "loss": 0.7591, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4259158009841444, | |
| "grad_norm": 0.2998059391975403, | |
| "learning_rate": 0.0001749391727493917, | |
| "loss": 0.6222, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4281027884089665, | |
| "grad_norm": 0.21351587772369385, | |
| "learning_rate": 0.0001746958637469586, | |
| "loss": 0.7082, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.430289775833789, | |
| "grad_norm": 0.34969425201416016, | |
| "learning_rate": 0.0001744525547445255, | |
| "loss": 0.6319, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4324767632586113, | |
| "grad_norm": 0.2845169007778168, | |
| "learning_rate": 0.00017420924574209246, | |
| "loss": 0.6965, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4346637506834337, | |
| "grad_norm": 0.2735065221786499, | |
| "learning_rate": 0.00017396593673965936, | |
| "loss": 0.6866, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4368507381082558, | |
| "grad_norm": 0.2701031267642975, | |
| "learning_rate": 0.00017372262773722626, | |
| "loss": 0.8098, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4390377255330782, | |
| "grad_norm": 0.319159597158432, | |
| "learning_rate": 0.00017347931873479318, | |
| "loss": 0.6627, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4412247129579006, | |
| "grad_norm": 0.24762673676013947, | |
| "learning_rate": 0.00017323600973236008, | |
| "loss": 0.8179, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4434117003827227, | |
| "grad_norm": 0.26977255940437317, | |
| "learning_rate": 0.00017299270072992698, | |
| "loss": 0.5487, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.445598687807545, | |
| "grad_norm": 0.25042101740837097, | |
| "learning_rate": 0.0001727493917274939, | |
| "loss": 0.9502, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4477856752323675, | |
| "grad_norm": 0.28913062810897827, | |
| "learning_rate": 0.0001725060827250608, | |
| "loss": 0.7216, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4499726626571898, | |
| "grad_norm": 0.3237348198890686, | |
| "learning_rate": 0.0001722627737226277, | |
| "loss": 0.7644, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.452159650082012, | |
| "grad_norm": 0.34338346123695374, | |
| "learning_rate": 0.00017201946472019465, | |
| "loss": 0.9851, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4543466375068343, | |
| "grad_norm": 0.1985798180103302, | |
| "learning_rate": 0.00017177615571776155, | |
| "loss": 0.649, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4565336249316567, | |
| "grad_norm": 0.2959745526313782, | |
| "learning_rate": 0.00017153284671532845, | |
| "loss": 0.8134, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4587206123564789, | |
| "grad_norm": 0.28383585810661316, | |
| "learning_rate": 0.00017128953771289537, | |
| "loss": 0.6864, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4609075997813012, | |
| "grad_norm": 0.35177820920944214, | |
| "learning_rate": 0.00017104622871046227, | |
| "loss": 0.779, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4630945872061236, | |
| "grad_norm": 0.27833032608032227, | |
| "learning_rate": 0.00017080291970802917, | |
| "loss": 0.7377, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.465281574630946, | |
| "grad_norm": 0.26814982295036316, | |
| "learning_rate": 0.0001705596107055961, | |
| "loss": 0.6367, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4674685620557681, | |
| "grad_norm": 0.29226943850517273, | |
| "learning_rate": 0.000170316301703163, | |
| "loss": 0.6674, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4696555494805905, | |
| "grad_norm": 0.23404401540756226, | |
| "learning_rate": 0.0001700729927007299, | |
| "loss": 0.6187, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4718425369054127, | |
| "grad_norm": 0.1943274289369583, | |
| "learning_rate": 0.00016982968369829685, | |
| "loss": 0.7886, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.474029524330235, | |
| "grad_norm": 0.2543155550956726, | |
| "learning_rate": 0.00016958637469586374, | |
| "loss": 0.8211, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4762165117550574, | |
| "grad_norm": 0.34419891238212585, | |
| "learning_rate": 0.00016934306569343064, | |
| "loss": 0.7097, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4784034991798798, | |
| "grad_norm": 0.3277907371520996, | |
| "learning_rate": 0.00016909975669099757, | |
| "loss": 0.6725, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4805904866047022, | |
| "grad_norm": 0.21943743526935577, | |
| "learning_rate": 0.00016885644768856447, | |
| "loss": 0.6246, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4827774740295243, | |
| "grad_norm": 0.6248902678489685, | |
| "learning_rate": 0.00016861313868613137, | |
| "loss": 0.8422, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4849644614543467, | |
| "grad_norm": 0.3430839478969574, | |
| "learning_rate": 0.0001683698296836983, | |
| "loss": 0.7539, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.4871514488791688, | |
| "grad_norm": 0.25437131524086, | |
| "learning_rate": 0.0001681265206812652, | |
| "loss": 0.8793, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4893384363039912, | |
| "grad_norm": 0.44833317399024963, | |
| "learning_rate": 0.0001678832116788321, | |
| "loss": 0.7591, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.4915254237288136, | |
| "grad_norm": 0.359467089176178, | |
| "learning_rate": 0.00016763990267639899, | |
| "loss": 0.6912, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.493712411153636, | |
| "grad_norm": 0.3209226429462433, | |
| "learning_rate": 0.00016739659367396594, | |
| "loss": 0.6292, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.495899398578458, | |
| "grad_norm": 0.30807530879974365, | |
| "learning_rate": 0.00016715328467153284, | |
| "loss": 0.7619, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4980863860032805, | |
| "grad_norm": 0.38420820236206055, | |
| "learning_rate": 0.00016690997566909974, | |
| "loss": 0.7212, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.5002733734281026, | |
| "grad_norm": 0.27499136328697205, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.7246, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.502460360852925, | |
| "grad_norm": 0.3359529376029968, | |
| "learning_rate": 0.00016642335766423356, | |
| "loss": 0.7988, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.5046473482777474, | |
| "grad_norm": 0.2965240180492401, | |
| "learning_rate": 0.00016618004866180046, | |
| "loss": 0.5721, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5068343357025697, | |
| "grad_norm": 0.35766786336898804, | |
| "learning_rate": 0.00016593673965936738, | |
| "loss": 0.8168, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.5090213231273921, | |
| "grad_norm": 0.2500085234642029, | |
| "learning_rate": 0.00016569343065693428, | |
| "loss": 0.7125, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5112083105522143, | |
| "grad_norm": 0.4028027355670929, | |
| "learning_rate": 0.00016545012165450118, | |
| "loss": 0.8912, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5133952979770366, | |
| "grad_norm": 0.365488737821579, | |
| "learning_rate": 0.00016520681265206813, | |
| "loss": 0.8114, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5155822854018588, | |
| "grad_norm": 0.2998720109462738, | |
| "learning_rate": 0.00016496350364963503, | |
| "loss": 0.7185, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.5177692728266812, | |
| "grad_norm": 0.31432968378067017, | |
| "learning_rate": 0.00016472019464720193, | |
| "loss": 0.6455, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5199562602515035, | |
| "grad_norm": 0.23023012280464172, | |
| "learning_rate": 0.00016447688564476886, | |
| "loss": 0.5255, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.522143247676326, | |
| "grad_norm": 0.3279372453689575, | |
| "learning_rate": 0.00016423357664233575, | |
| "loss": 0.696, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5243302351011483, | |
| "grad_norm": 0.3116084635257721, | |
| "learning_rate": 0.00016399026763990265, | |
| "loss": 0.6297, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5265172225259704, | |
| "grad_norm": 0.2646781802177429, | |
| "learning_rate": 0.00016374695863746958, | |
| "loss": 0.7854, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5287042099507928, | |
| "grad_norm": 0.29048752784729004, | |
| "learning_rate": 0.00016350364963503648, | |
| "loss": 0.6409, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.530891197375615, | |
| "grad_norm": 0.2570263743400574, | |
| "learning_rate": 0.00016326034063260337, | |
| "loss": 0.6613, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5330781848004373, | |
| "grad_norm": 0.3784395456314087, | |
| "learning_rate": 0.00016301703163017033, | |
| "loss": 0.5857, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5352651722252597, | |
| "grad_norm": 0.3324502110481262, | |
| "learning_rate": 0.00016277372262773723, | |
| "loss": 0.7317, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.537452159650082, | |
| "grad_norm": 0.2623542249202728, | |
| "learning_rate": 0.00016253041362530412, | |
| "loss": 0.648, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5396391470749045, | |
| "grad_norm": 0.31035107374191284, | |
| "learning_rate": 0.00016228710462287105, | |
| "loss": 0.8125, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5418261344997266, | |
| "grad_norm": 0.35497644543647766, | |
| "learning_rate": 0.00016204379562043795, | |
| "loss": 0.7798, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.544013121924549, | |
| "grad_norm": 0.4693346321582794, | |
| "learning_rate": 0.00016180048661800485, | |
| "loss": 0.7838, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5462001093493711, | |
| "grad_norm": 0.2803730368614197, | |
| "learning_rate": 0.00016155717761557177, | |
| "loss": 0.9113, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5483870967741935, | |
| "grad_norm": 0.3578079342842102, | |
| "learning_rate": 0.00016131386861313867, | |
| "loss": 0.6923, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5505740841990159, | |
| "grad_norm": 0.29390111565589905, | |
| "learning_rate": 0.00016107055961070557, | |
| "loss": 0.8407, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5527610716238383, | |
| "grad_norm": 0.32291004061698914, | |
| "learning_rate": 0.0001608272506082725, | |
| "loss": 0.8082, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5549480590486606, | |
| "grad_norm": 0.2640690803527832, | |
| "learning_rate": 0.00016058394160583942, | |
| "loss": 0.6813, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5571350464734828, | |
| "grad_norm": 0.32076698541641235, | |
| "learning_rate": 0.00016034063260340632, | |
| "loss": 0.8319, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.559322033898305, | |
| "grad_norm": 0.29734277725219727, | |
| "learning_rate": 0.00016009732360097324, | |
| "loss": 0.9649, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5615090213231273, | |
| "grad_norm": 0.3353315591812134, | |
| "learning_rate": 0.00015985401459854014, | |
| "loss": 0.6102, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5636960087479497, | |
| "grad_norm": 0.24924345314502716, | |
| "learning_rate": 0.00015961070559610704, | |
| "loss": 0.6868, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.565882996172772, | |
| "grad_norm": 0.21561355888843536, | |
| "learning_rate": 0.00015936739659367397, | |
| "loss": 0.6087, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5680699835975944, | |
| "grad_norm": 0.28856387734413147, | |
| "learning_rate": 0.00015912408759124086, | |
| "loss": 0.7849, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.5702569710224166, | |
| "grad_norm": 0.2342023402452469, | |
| "learning_rate": 0.00015888077858880776, | |
| "loss": 0.8097, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.572443958447239, | |
| "grad_norm": 0.27620434761047363, | |
| "learning_rate": 0.00015863746958637466, | |
| "loss": 0.6495, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.574630945872061, | |
| "grad_norm": 0.3575909733772278, | |
| "learning_rate": 0.00015839416058394159, | |
| "loss": 0.5667, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5768179332968835, | |
| "grad_norm": 0.29075026512145996, | |
| "learning_rate": 0.00015815085158150848, | |
| "loss": 0.734, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5790049207217058, | |
| "grad_norm": 0.317648321390152, | |
| "learning_rate": 0.0001579075425790754, | |
| "loss": 0.6881, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5811919081465282, | |
| "grad_norm": 0.2477569282054901, | |
| "learning_rate": 0.00015766423357664234, | |
| "loss": 0.7097, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.5833788955713506, | |
| "grad_norm": 0.2733086347579956, | |
| "learning_rate": 0.00015742092457420923, | |
| "loss": 0.4836, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5855658829961727, | |
| "grad_norm": 0.32278919219970703, | |
| "learning_rate": 0.00015717761557177613, | |
| "loss": 0.6931, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.587752870420995, | |
| "grad_norm": 0.2804641127586365, | |
| "learning_rate": 0.00015693430656934306, | |
| "loss": 0.6908, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5899398578458173, | |
| "grad_norm": 0.28953608870506287, | |
| "learning_rate": 0.00015669099756690996, | |
| "loss": 0.7086, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.5921268452706396, | |
| "grad_norm": 0.21297629177570343, | |
| "learning_rate": 0.00015644768856447685, | |
| "loss": 0.6663, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.594313832695462, | |
| "grad_norm": 0.23495450615882874, | |
| "learning_rate": 0.00015620437956204378, | |
| "loss": 0.7177, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.5965008201202844, | |
| "grad_norm": 0.4271846413612366, | |
| "learning_rate": 0.00015596107055961068, | |
| "loss": 0.9376, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5986878075451068, | |
| "grad_norm": 0.3190995156764984, | |
| "learning_rate": 0.00015571776155717758, | |
| "loss": 0.5957, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.600874794969929, | |
| "grad_norm": 0.3533025085926056, | |
| "learning_rate": 0.00015547445255474453, | |
| "loss": 0.8295, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6030617823947513, | |
| "grad_norm": 0.48731425404548645, | |
| "learning_rate": 0.00015523114355231143, | |
| "loss": 0.7024, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6052487698195734, | |
| "grad_norm": 0.2876966595649719, | |
| "learning_rate": 0.00015498783454987833, | |
| "loss": 0.6858, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6074357572443958, | |
| "grad_norm": 0.2668203115463257, | |
| "learning_rate": 0.00015474452554744525, | |
| "loss": 0.7548, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.6096227446692182, | |
| "grad_norm": 0.3176876902580261, | |
| "learning_rate": 0.00015450121654501215, | |
| "loss": 0.7124, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6118097320940405, | |
| "grad_norm": 0.3083260655403137, | |
| "learning_rate": 0.00015425790754257905, | |
| "loss": 0.682, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.613996719518863, | |
| "grad_norm": 0.38110706210136414, | |
| "learning_rate": 0.00015401459854014597, | |
| "loss": 0.9364, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.616183706943685, | |
| "grad_norm": 0.2112010270357132, | |
| "learning_rate": 0.00015377128953771287, | |
| "loss": 0.6111, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.6183706943685072, | |
| "grad_norm": 0.320754736661911, | |
| "learning_rate": 0.00015352798053527977, | |
| "loss": 0.8463, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6205576817933296, | |
| "grad_norm": 0.2661709785461426, | |
| "learning_rate": 0.00015328467153284672, | |
| "loss": 0.6922, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.622744669218152, | |
| "grad_norm": 0.28991788625717163, | |
| "learning_rate": 0.00015304136253041362, | |
| "loss": 0.683, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6249316566429743, | |
| "grad_norm": 0.23085246980190277, | |
| "learning_rate": 0.00015279805352798052, | |
| "loss": 0.6098, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6271186440677967, | |
| "grad_norm": 0.3355705440044403, | |
| "learning_rate": 0.00015255474452554745, | |
| "loss": 0.7358, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6293056314926189, | |
| "grad_norm": 0.2608512341976166, | |
| "learning_rate": 0.00015231143552311434, | |
| "loss": 0.6872, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6314926189174412, | |
| "grad_norm": 0.28092092275619507, | |
| "learning_rate": 0.00015206812652068124, | |
| "loss": 0.7605, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6336796063422634, | |
| "grad_norm": 0.3571244776248932, | |
| "learning_rate": 0.00015182481751824817, | |
| "loss": 0.5481, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6358665937670858, | |
| "grad_norm": 0.30611398816108704, | |
| "learning_rate": 0.00015158150851581507, | |
| "loss": 0.6696, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6380535811919081, | |
| "grad_norm": 0.32783061265945435, | |
| "learning_rate": 0.00015133819951338196, | |
| "loss": 0.8286, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6402405686167305, | |
| "grad_norm": 0.2778065502643585, | |
| "learning_rate": 0.00015109489051094892, | |
| "loss": 0.6223, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6424275560415529, | |
| "grad_norm": 0.2809867262840271, | |
| "learning_rate": 0.00015085158150851582, | |
| "loss": 0.4979, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.644614543466375, | |
| "grad_norm": 0.3469402492046356, | |
| "learning_rate": 0.00015060827250608271, | |
| "loss": 0.7277, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6468015308911974, | |
| "grad_norm": 0.33360373973846436, | |
| "learning_rate": 0.00015036496350364964, | |
| "loss": 0.7133, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6489885183160196, | |
| "grad_norm": 0.24966338276863098, | |
| "learning_rate": 0.00015012165450121654, | |
| "loss": 0.8344, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.651175505740842, | |
| "grad_norm": 0.35595226287841797, | |
| "learning_rate": 0.00014987834549878344, | |
| "loss": 0.5492, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6533624931656643, | |
| "grad_norm": 0.36205926537513733, | |
| "learning_rate": 0.00014963503649635036, | |
| "loss": 0.6962, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6555494805904867, | |
| "grad_norm": 0.3373574912548065, | |
| "learning_rate": 0.00014939172749391726, | |
| "loss": 0.9455, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.657736468015309, | |
| "grad_norm": 0.2560804486274719, | |
| "learning_rate": 0.00014914841849148416, | |
| "loss": 0.6532, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6599234554401312, | |
| "grad_norm": 0.3424091339111328, | |
| "learning_rate": 0.00014890510948905108, | |
| "loss": 0.7255, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.6621104428649536, | |
| "grad_norm": 0.3578891456127167, | |
| "learning_rate": 0.000148661800486618, | |
| "loss": 0.689, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6642974302897757, | |
| "grad_norm": 0.2998923659324646, | |
| "learning_rate": 0.0001484184914841849, | |
| "loss": 0.8305, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.666484417714598, | |
| "grad_norm": 0.29691943526268005, | |
| "learning_rate": 0.0001481751824817518, | |
| "loss": 0.5745, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6686714051394205, | |
| "grad_norm": 0.26453182101249695, | |
| "learning_rate": 0.00014793187347931873, | |
| "loss": 0.6202, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6708583925642428, | |
| "grad_norm": 0.24131835997104645, | |
| "learning_rate": 0.00014768856447688563, | |
| "loss": 0.8149, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6730453799890652, | |
| "grad_norm": 0.5507832169532776, | |
| "learning_rate": 0.00014744525547445256, | |
| "loss": 0.7544, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.6752323674138874, | |
| "grad_norm": 0.3100571930408478, | |
| "learning_rate": 0.00014720194647201945, | |
| "loss": 0.6096, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6774193548387095, | |
| "grad_norm": 0.40742942690849304, | |
| "learning_rate": 0.00014695863746958635, | |
| "loss": 0.8001, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.679606342263532, | |
| "grad_norm": 0.26272064447402954, | |
| "learning_rate": 0.00014671532846715328, | |
| "loss": 0.6614, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6817933296883543, | |
| "grad_norm": 0.3485982418060303, | |
| "learning_rate": 0.00014647201946472018, | |
| "loss": 0.7596, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6839803171131766, | |
| "grad_norm": 0.3311547636985779, | |
| "learning_rate": 0.0001462287104622871, | |
| "loss": 0.808, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.686167304537999, | |
| "grad_norm": 0.28489449620246887, | |
| "learning_rate": 0.000145985401459854, | |
| "loss": 0.683, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6883542919628212, | |
| "grad_norm": 0.23958906531333923, | |
| "learning_rate": 0.0001457420924574209, | |
| "loss": 0.619, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6905412793876435, | |
| "grad_norm": 0.2665773034095764, | |
| "learning_rate": 0.00014549878345498782, | |
| "loss": 0.7169, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.6927282668124657, | |
| "grad_norm": 0.33576110005378723, | |
| "learning_rate": 0.00014525547445255475, | |
| "loss": 0.7457, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.3103754222393036, | |
| "learning_rate": 0.00014501216545012165, | |
| "loss": 0.7083, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.6971022416621104, | |
| "grad_norm": 0.27746620774269104, | |
| "learning_rate": 0.00014476885644768855, | |
| "loss": 0.7648, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6992892290869328, | |
| "grad_norm": 0.3597886264324188, | |
| "learning_rate": 0.00014452554744525547, | |
| "loss": 0.8173, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.7014762165117552, | |
| "grad_norm": 0.2408217489719391, | |
| "learning_rate": 0.00014428223844282237, | |
| "loss": 0.5872, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7036632039365773, | |
| "grad_norm": 0.24239328503608704, | |
| "learning_rate": 0.0001440389294403893, | |
| "loss": 0.6311, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7058501913613997, | |
| "grad_norm": 0.4606420695781708, | |
| "learning_rate": 0.0001437956204379562, | |
| "loss": 0.6742, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7080371787862219, | |
| "grad_norm": 0.2773914933204651, | |
| "learning_rate": 0.0001435523114355231, | |
| "loss": 0.4933, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.7102241662110442, | |
| "grad_norm": 0.33102571964263916, | |
| "learning_rate": 0.00014330900243309002, | |
| "loss": 0.7694, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7124111536358666, | |
| "grad_norm": 0.3455331027507782, | |
| "learning_rate": 0.00014306569343065692, | |
| "loss": 0.5662, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.714598141060689, | |
| "grad_norm": 0.28522560000419617, | |
| "learning_rate": 0.00014282238442822384, | |
| "loss": 0.799, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7167851284855113, | |
| "grad_norm": 0.3302403688430786, | |
| "learning_rate": 0.00014257907542579074, | |
| "loss": 0.8366, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.7189721159103335, | |
| "grad_norm": 0.2695009410381317, | |
| "learning_rate": 0.00014233576642335764, | |
| "loss": 0.5889, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7211591033351559, | |
| "grad_norm": 0.2292398363351822, | |
| "learning_rate": 0.00014209245742092456, | |
| "loss": 0.519, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.723346090759978, | |
| "grad_norm": 0.2863897383213043, | |
| "learning_rate": 0.0001418491484184915, | |
| "loss": 0.6394, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7255330781848004, | |
| "grad_norm": 1.8092900514602661, | |
| "learning_rate": 0.0001416058394160584, | |
| "loss": 0.6393, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.7277200656096228, | |
| "grad_norm": 0.3296603262424469, | |
| "learning_rate": 0.00014136253041362529, | |
| "loss": 0.7414, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7299070530344451, | |
| "grad_norm": 0.36179548501968384, | |
| "learning_rate": 0.0001411192214111922, | |
| "loss": 0.7689, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7320940404592675, | |
| "grad_norm": 0.3196108937263489, | |
| "learning_rate": 0.0001408759124087591, | |
| "loss": 0.681, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7342810278840897, | |
| "grad_norm": 0.3329809010028839, | |
| "learning_rate": 0.000140632603406326, | |
| "loss": 0.7421, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7364680153089118, | |
| "grad_norm": 0.22216172516345978, | |
| "learning_rate": 0.00014038929440389293, | |
| "loss": 0.6421, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7386550027337342, | |
| "grad_norm": 0.33266568183898926, | |
| "learning_rate": 0.00014014598540145983, | |
| "loss": 0.5699, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7408419901585566, | |
| "grad_norm": 0.3858932852745056, | |
| "learning_rate": 0.00013990267639902676, | |
| "loss": 0.7368, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.743028977583379, | |
| "grad_norm": 0.3091468811035156, | |
| "learning_rate": 0.00013965936739659366, | |
| "loss": 0.6334, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7452159650082013, | |
| "grad_norm": 0.3596084415912628, | |
| "learning_rate": 0.00013941605839416055, | |
| "loss": 0.6, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7474029524330235, | |
| "grad_norm": 0.2971950173377991, | |
| "learning_rate": 0.00013917274939172748, | |
| "loss": 0.6638, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7495899398578458, | |
| "grad_norm": 0.36204877495765686, | |
| "learning_rate": 0.0001389294403892944, | |
| "loss": 0.6704, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.751776927282668, | |
| "grad_norm": 0.25178369879722595, | |
| "learning_rate": 0.0001386861313868613, | |
| "loss": 0.6057, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.7539639147074904, | |
| "grad_norm": 0.2541144788265228, | |
| "learning_rate": 0.0001384428223844282, | |
| "loss": 0.6294, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7561509021323127, | |
| "grad_norm": 0.31337326765060425, | |
| "learning_rate": 0.0001381995133819951, | |
| "loss": 0.7991, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.758337889557135, | |
| "grad_norm": 0.8276956081390381, | |
| "learning_rate": 0.00013795620437956203, | |
| "loss": 0.9111, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7605248769819575, | |
| "grad_norm": 0.2656904458999634, | |
| "learning_rate": 0.00013771289537712895, | |
| "loss": 0.7048, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.7627118644067796, | |
| "grad_norm": 0.3123759627342224, | |
| "learning_rate": 0.00013746958637469585, | |
| "loss": 0.816, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.764898851831602, | |
| "grad_norm": 0.28710535168647766, | |
| "learning_rate": 0.00013722627737226275, | |
| "loss": 0.7998, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7670858392564242, | |
| "grad_norm": 0.28171730041503906, | |
| "learning_rate": 0.00013698296836982967, | |
| "loss": 0.6835, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7692728266812465, | |
| "grad_norm": 0.42397668957710266, | |
| "learning_rate": 0.00013673965936739657, | |
| "loss": 0.6875, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.771459814106069, | |
| "grad_norm": 0.309830904006958, | |
| "learning_rate": 0.0001364963503649635, | |
| "loss": 0.7446, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7736468015308913, | |
| "grad_norm": 0.3108932375907898, | |
| "learning_rate": 0.0001362530413625304, | |
| "loss": 0.6415, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7758337889557136, | |
| "grad_norm": 0.34336167573928833, | |
| "learning_rate": 0.0001360097323600973, | |
| "loss": 0.688, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7780207763805358, | |
| "grad_norm": 0.2871513366699219, | |
| "learning_rate": 0.00013576642335766422, | |
| "loss": 0.8814, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7802077638053582, | |
| "grad_norm": 0.24412307143211365, | |
| "learning_rate": 0.00013552311435523115, | |
| "loss": 0.6767, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7823947512301803, | |
| "grad_norm": 0.3574623167514801, | |
| "learning_rate": 0.00013527980535279804, | |
| "loss": 0.7016, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.7845817386550027, | |
| "grad_norm": 0.4434225261211395, | |
| "learning_rate": 0.00013503649635036494, | |
| "loss": 0.6373, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.786768726079825, | |
| "grad_norm": 0.5134851932525635, | |
| "learning_rate": 0.00013479318734793187, | |
| "loss": 0.6622, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.7889557135046474, | |
| "grad_norm": 0.4768081307411194, | |
| "learning_rate": 0.00013454987834549877, | |
| "loss": 0.7665, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7911427009294698, | |
| "grad_norm": 0.2798459231853485, | |
| "learning_rate": 0.0001343065693430657, | |
| "loss": 0.6625, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.793329688354292, | |
| "grad_norm": 0.27218303084373474, | |
| "learning_rate": 0.0001340632603406326, | |
| "loss": 0.6266, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7955166757791141, | |
| "grad_norm": 0.287860244512558, | |
| "learning_rate": 0.0001338199513381995, | |
| "loss": 0.9758, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.7977036632039365, | |
| "grad_norm": 0.26204392313957214, | |
| "learning_rate": 0.00013357664233576641, | |
| "loss": 0.532, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.7998906506287589, | |
| "grad_norm": 0.29923009872436523, | |
| "learning_rate": 0.0001333333333333333, | |
| "loss": 0.6961, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.8020776380535812, | |
| "grad_norm": 0.34140443801879883, | |
| "learning_rate": 0.00013309002433090024, | |
| "loss": 0.8296, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8042646254784036, | |
| "grad_norm": 0.2605873644351959, | |
| "learning_rate": 0.00013284671532846714, | |
| "loss": 0.8329, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.8064516129032258, | |
| "grad_norm": 0.36522653698921204, | |
| "learning_rate": 0.00013260340632603403, | |
| "loss": 0.8552, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.8086386003280481, | |
| "grad_norm": 0.29043689370155334, | |
| "learning_rate": 0.00013236009732360096, | |
| "loss": 0.7261, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.8108255877528703, | |
| "grad_norm": 0.2861742675304413, | |
| "learning_rate": 0.00013211678832116789, | |
| "loss": 0.596, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8130125751776927, | |
| "grad_norm": 0.34066513180732727, | |
| "learning_rate": 0.00013187347931873478, | |
| "loss": 0.8127, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.815199562602515, | |
| "grad_norm": 0.3166887164115906, | |
| "learning_rate": 0.00013163017031630168, | |
| "loss": 0.7491, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8173865500273374, | |
| "grad_norm": 0.36282384395599365, | |
| "learning_rate": 0.0001313868613138686, | |
| "loss": 0.7511, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.8195735374521598, | |
| "grad_norm": 0.36424878239631653, | |
| "learning_rate": 0.0001311435523114355, | |
| "loss": 0.938, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.821760524876982, | |
| "grad_norm": 0.3587567210197449, | |
| "learning_rate": 0.00013090024330900243, | |
| "loss": 0.8294, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8239475123018043, | |
| "grad_norm": 0.3000282049179077, | |
| "learning_rate": 0.00013065693430656933, | |
| "loss": 0.7178, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8261344997266264, | |
| "grad_norm": 0.2934707999229431, | |
| "learning_rate": 0.00013041362530413623, | |
| "loss": 0.7185, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8283214871514488, | |
| "grad_norm": 0.26312437653541565, | |
| "learning_rate": 0.00013017031630170315, | |
| "loss": 0.6128, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8305084745762712, | |
| "grad_norm": 0.27557966113090515, | |
| "learning_rate": 0.00012992700729927008, | |
| "loss": 0.6751, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8326954620010936, | |
| "grad_norm": 0.296512633562088, | |
| "learning_rate": 0.00012968369829683698, | |
| "loss": 0.8259, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.834882449425916, | |
| "grad_norm": 0.4524163007736206, | |
| "learning_rate": 0.00012944038929440388, | |
| "loss": 0.6811, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.837069436850738, | |
| "grad_norm": 0.32787275314331055, | |
| "learning_rate": 0.00012919708029197077, | |
| "loss": 0.6882, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8392564242755605, | |
| "grad_norm": 0.26250511407852173, | |
| "learning_rate": 0.0001289537712895377, | |
| "loss": 0.6858, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8414434117003826, | |
| "grad_norm": 0.32813650369644165, | |
| "learning_rate": 0.00012871046228710463, | |
| "loss": 0.5929, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.843630399125205, | |
| "grad_norm": 0.3023451864719391, | |
| "learning_rate": 0.00012846715328467152, | |
| "loss": 0.7795, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8458173865500274, | |
| "grad_norm": 0.3112645745277405, | |
| "learning_rate": 0.00012822384428223842, | |
| "loss": 0.517, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8480043739748497, | |
| "grad_norm": 0.6681469678878784, | |
| "learning_rate": 0.00012798053527980535, | |
| "loss": 0.7089, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.850191361399672, | |
| "grad_norm": 0.2592954933643341, | |
| "learning_rate": 0.00012773722627737225, | |
| "loss": 0.7007, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8523783488244943, | |
| "grad_norm": 0.31619131565093994, | |
| "learning_rate": 0.00012749391727493917, | |
| "loss": 0.4884, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8545653362493164, | |
| "grad_norm": 0.3551687002182007, | |
| "learning_rate": 0.00012725060827250607, | |
| "loss": 0.5677, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8567523236741388, | |
| "grad_norm": 0.32219335436820984, | |
| "learning_rate": 0.00012700729927007297, | |
| "loss": 0.6744, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8589393110989612, | |
| "grad_norm": 0.28793492913246155, | |
| "learning_rate": 0.0001267639902676399, | |
| "loss": 0.6258, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8611262985237835, | |
| "grad_norm": 0.382720410823822, | |
| "learning_rate": 0.00012652068126520682, | |
| "loss": 0.7977, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.863313285948606, | |
| "grad_norm": 0.33804479241371155, | |
| "learning_rate": 0.00012627737226277372, | |
| "loss": 0.7254, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.865500273373428, | |
| "grad_norm": 0.3259097635746002, | |
| "learning_rate": 0.00012603406326034062, | |
| "loss": 0.8729, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8676872607982504, | |
| "grad_norm": 0.3584567606449127, | |
| "learning_rate": 0.00012579075425790754, | |
| "loss": 0.7337, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8698742482230726, | |
| "grad_norm": 0.336674302816391, | |
| "learning_rate": 0.00012554744525547444, | |
| "loss": 0.6829, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.872061235647895, | |
| "grad_norm": 0.49990177154541016, | |
| "learning_rate": 0.00012530413625304137, | |
| "loss": 0.7793, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8742482230727173, | |
| "grad_norm": 0.31498992443084717, | |
| "learning_rate": 0.00012506082725060826, | |
| "loss": 0.7355, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8764352104975397, | |
| "grad_norm": 0.3050641119480133, | |
| "learning_rate": 0.00012481751824817516, | |
| "loss": 0.6473, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.878622197922362, | |
| "grad_norm": 0.27067434787750244, | |
| "learning_rate": 0.0001245742092457421, | |
| "loss": 0.6639, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.8808091853471842, | |
| "grad_norm": 0.29407691955566406, | |
| "learning_rate": 0.000124330900243309, | |
| "loss": 0.8002, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8829961727720066, | |
| "grad_norm": 0.3786459267139435, | |
| "learning_rate": 0.0001240875912408759, | |
| "loss": 0.8694, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8851831601968287, | |
| "grad_norm": 0.3678539991378784, | |
| "learning_rate": 0.0001238442822384428, | |
| "loss": 0.7188, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8873701476216511, | |
| "grad_norm": 0.3660300076007843, | |
| "learning_rate": 0.0001236009732360097, | |
| "loss": 0.7348, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.8895571350464735, | |
| "grad_norm": 0.34265831112861633, | |
| "learning_rate": 0.00012335766423357663, | |
| "loss": 0.7046, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8917441224712959, | |
| "grad_norm": 0.3664507567882538, | |
| "learning_rate": 0.00012311435523114356, | |
| "loss": 0.777, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.8939311098961182, | |
| "grad_norm": 0.36169371008872986, | |
| "learning_rate": 0.00012287104622871046, | |
| "loss": 0.6797, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8961180973209404, | |
| "grad_norm": 0.2904834449291229, | |
| "learning_rate": 0.00012262773722627736, | |
| "loss": 0.6406, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.8983050847457628, | |
| "grad_norm": 0.3194887340068817, | |
| "learning_rate": 0.00012238442822384428, | |
| "loss": 0.7477, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.900492072170585, | |
| "grad_norm": 0.24546030163764954, | |
| "learning_rate": 0.00012214111922141118, | |
| "loss": 0.6013, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.9026790595954073, | |
| "grad_norm": 0.2817955017089844, | |
| "learning_rate": 0.00012189781021897809, | |
| "loss": 0.7813, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9048660470202297, | |
| "grad_norm": 0.28798621892929077, | |
| "learning_rate": 0.000121654501216545, | |
| "loss": 0.6312, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.907053034445052, | |
| "grad_norm": 0.22041471302509308, | |
| "learning_rate": 0.0001214111922141119, | |
| "loss": 0.6671, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9092400218698744, | |
| "grad_norm": 0.45332956314086914, | |
| "learning_rate": 0.00012116788321167883, | |
| "loss": 0.7519, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9114270092946966, | |
| "grad_norm": 0.2907330393791199, | |
| "learning_rate": 0.00012092457420924574, | |
| "loss": 0.7048, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.9136139967195187, | |
| "grad_norm": 0.3308665156364441, | |
| "learning_rate": 0.00012068126520681264, | |
| "loss": 0.6583, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.915800984144341, | |
| "grad_norm": 0.314803808927536, | |
| "learning_rate": 0.00012043795620437955, | |
| "loss": 0.7902, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9179879715691635, | |
| "grad_norm": 0.47894173860549927, | |
| "learning_rate": 0.00012019464720194645, | |
| "loss": 0.7153, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.9201749589939858, | |
| "grad_norm": 0.2984611392021179, | |
| "learning_rate": 0.00011995133819951337, | |
| "loss": 0.6093, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9223619464188082, | |
| "grad_norm": 0.5481080412864685, | |
| "learning_rate": 0.00011970802919708029, | |
| "loss": 0.7026, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9245489338436303, | |
| "grad_norm": 0.4306366443634033, | |
| "learning_rate": 0.00011946472019464718, | |
| "loss": 0.8093, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9267359212684527, | |
| "grad_norm": 0.4765607416629791, | |
| "learning_rate": 0.0001192214111922141, | |
| "loss": 0.8378, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9289229086932749, | |
| "grad_norm": 0.29230380058288574, | |
| "learning_rate": 0.00011897810218978102, | |
| "loss": 0.812, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9311098961180972, | |
| "grad_norm": 0.27519696950912476, | |
| "learning_rate": 0.00011873479318734792, | |
| "loss": 0.7204, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9332968835429196, | |
| "grad_norm": 0.43257808685302734, | |
| "learning_rate": 0.00011849148418491483, | |
| "loss": 0.7484, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 0.34764620661735535, | |
| "learning_rate": 0.00011824817518248174, | |
| "loss": 0.7835, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9376708583925644, | |
| "grad_norm": 0.2872960567474365, | |
| "learning_rate": 0.00011800486618004864, | |
| "loss": 0.6871, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9398578458173865, | |
| "grad_norm": 0.3657885491847992, | |
| "learning_rate": 0.00011776155717761557, | |
| "loss": 0.7439, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.942044833242209, | |
| "grad_norm": 0.3176083564758301, | |
| "learning_rate": 0.00011751824817518248, | |
| "loss": 0.6768, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.944231820667031, | |
| "grad_norm": 0.2851628363132477, | |
| "learning_rate": 0.00011727493917274938, | |
| "loss": 0.6673, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9464188080918534, | |
| "grad_norm": 0.2601426839828491, | |
| "learning_rate": 0.00011703163017031629, | |
| "loss": 0.6025, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9486057955166758, | |
| "grad_norm": 0.282064288854599, | |
| "learning_rate": 0.0001167883211678832, | |
| "loss": 0.7084, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9507927829414982, | |
| "grad_norm": 0.2761860191822052, | |
| "learning_rate": 0.0001165450121654501, | |
| "loss": 0.7596, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9529797703663205, | |
| "grad_norm": 0.28319042921066284, | |
| "learning_rate": 0.00011630170316301703, | |
| "loss": 0.6179, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9551667577911427, | |
| "grad_norm": 0.3847699761390686, | |
| "learning_rate": 0.00011605839416058394, | |
| "loss": 0.7964, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.957353745215965, | |
| "grad_norm": 0.5719382762908936, | |
| "learning_rate": 0.00011581508515815084, | |
| "loss": 0.7848, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.9595407326407872, | |
| "grad_norm": 0.24546296894550323, | |
| "learning_rate": 0.00011557177615571775, | |
| "loss": 0.7404, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9617277200656096, | |
| "grad_norm": 0.2359631359577179, | |
| "learning_rate": 0.00011532846715328465, | |
| "loss": 0.6091, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.963914707490432, | |
| "grad_norm": 0.23529179394245148, | |
| "learning_rate": 0.00011508515815085157, | |
| "loss": 0.7032, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9661016949152543, | |
| "grad_norm": 0.32363957166671753, | |
| "learning_rate": 0.00011484184914841848, | |
| "loss": 0.7238, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9682886823400767, | |
| "grad_norm": 0.24427059292793274, | |
| "learning_rate": 0.00011459854014598538, | |
| "loss": 0.6704, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9704756697648989, | |
| "grad_norm": 0.39608168601989746, | |
| "learning_rate": 0.0001143552311435523, | |
| "loss": 0.7251, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.972662657189721, | |
| "grad_norm": 0.2778458297252655, | |
| "learning_rate": 0.00011411192214111922, | |
| "loss": 0.6907, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9748496446145434, | |
| "grad_norm": 0.38359907269477844, | |
| "learning_rate": 0.00011386861313868612, | |
| "loss": 0.792, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9770366320393657, | |
| "grad_norm": 0.2692561149597168, | |
| "learning_rate": 0.00011362530413625303, | |
| "loss": 0.505, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9792236194641881, | |
| "grad_norm": 0.35147660970687866, | |
| "learning_rate": 0.00011338199513381994, | |
| "loss": 0.6847, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.9814106068890105, | |
| "grad_norm": 0.3441888689994812, | |
| "learning_rate": 0.00011313868613138684, | |
| "loss": 0.7633, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9835975943138326, | |
| "grad_norm": 0.22528661787509918, | |
| "learning_rate": 0.00011289537712895377, | |
| "loss": 0.6367, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.985784581738655, | |
| "grad_norm": 0.34356188774108887, | |
| "learning_rate": 0.00011265206812652068, | |
| "loss": 0.8377, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9879715691634772, | |
| "grad_norm": 0.3173167109489441, | |
| "learning_rate": 0.00011240875912408758, | |
| "loss": 0.6651, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.9901585565882995, | |
| "grad_norm": 0.2497638314962387, | |
| "learning_rate": 0.00011216545012165449, | |
| "loss": 0.7402, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.992345544013122, | |
| "grad_norm": 0.28941065073013306, | |
| "learning_rate": 0.00011192214111922141, | |
| "loss": 0.7328, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.9945325314379443, | |
| "grad_norm": 0.3209066092967987, | |
| "learning_rate": 0.00011167883211678831, | |
| "loss": 0.6639, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9967195188627667, | |
| "grad_norm": 0.2646278142929077, | |
| "learning_rate": 0.00011143552311435522, | |
| "loss": 0.6795, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.9989065062875888, | |
| "grad_norm": 0.25543129444122314, | |
| "learning_rate": 0.00011119221411192212, | |
| "loss": 0.711, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.001093493712411, | |
| "grad_norm": 0.37120577692985535, | |
| "learning_rate": 0.00011094890510948904, | |
| "loss": 0.909, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.0032804811372333, | |
| "grad_norm": 0.20501375198364258, | |
| "learning_rate": 0.00011070559610705596, | |
| "loss": 0.5982, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0054674685620557, | |
| "grad_norm": 0.2816307544708252, | |
| "learning_rate": 0.00011046228710462286, | |
| "loss": 0.6477, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.007654455986878, | |
| "grad_norm": 0.23481379449367523, | |
| "learning_rate": 0.00011021897810218977, | |
| "loss": 0.701, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0098414434117005, | |
| "grad_norm": 0.22269988059997559, | |
| "learning_rate": 0.00010997566909975668, | |
| "loss": 0.4909, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.012028430836523, | |
| "grad_norm": 0.22761498391628265, | |
| "learning_rate": 0.00010973236009732358, | |
| "loss": 0.5446, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.014215418261345, | |
| "grad_norm": 0.38109347224235535, | |
| "learning_rate": 0.00010948905109489051, | |
| "loss": 0.7502, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.016402405686167, | |
| "grad_norm": 0.26273003220558167, | |
| "learning_rate": 0.00010924574209245742, | |
| "loss": 0.8272, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.0185893931109895, | |
| "grad_norm": 0.2501181960105896, | |
| "learning_rate": 0.00010900243309002432, | |
| "loss": 0.6668, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.020776380535812, | |
| "grad_norm": 0.2221994698047638, | |
| "learning_rate": 0.00010875912408759123, | |
| "loss": 0.5899, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0229633679606343, | |
| "grad_norm": 0.26471519470214844, | |
| "learning_rate": 0.00010851581508515814, | |
| "loss": 0.491, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0251503553854566, | |
| "grad_norm": 0.29527121782302856, | |
| "learning_rate": 0.00010827250608272505, | |
| "loss": 0.6478, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.027337342810279, | |
| "grad_norm": 0.2646641135215759, | |
| "learning_rate": 0.00010802919708029196, | |
| "loss": 0.6052, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.029524330235101, | |
| "grad_norm": 0.2731557786464691, | |
| "learning_rate": 0.00010778588807785888, | |
| "loss": 0.7211, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0317113176599233, | |
| "grad_norm": 0.32770606875419617, | |
| "learning_rate": 0.00010754257907542578, | |
| "loss": 0.777, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0338983050847457, | |
| "grad_norm": 0.2406987100839615, | |
| "learning_rate": 0.00010729927007299269, | |
| "loss": 0.6697, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.036085292509568, | |
| "grad_norm": 0.2938626706600189, | |
| "learning_rate": 0.00010705596107055961, | |
| "loss": 0.7645, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.0382722799343904, | |
| "grad_norm": 0.25775012373924255, | |
| "learning_rate": 0.00010681265206812651, | |
| "loss": 0.721, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.040459267359213, | |
| "grad_norm": 0.3010717034339905, | |
| "learning_rate": 0.00010656934306569342, | |
| "loss": 0.565, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.042646254784035, | |
| "grad_norm": 0.27577218413352966, | |
| "learning_rate": 0.00010632603406326032, | |
| "loss": 0.5764, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.044833242208857, | |
| "grad_norm": 0.3049190938472748, | |
| "learning_rate": 0.00010608272506082723, | |
| "loss": 0.8492, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.0470202296336795, | |
| "grad_norm": 0.3621160686016083, | |
| "learning_rate": 0.00010583941605839416, | |
| "loss": 0.668, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.049207217058502, | |
| "grad_norm": 0.28885042667388916, | |
| "learning_rate": 0.00010559610705596106, | |
| "loss": 0.6898, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.051394204483324, | |
| "grad_norm": 0.38116586208343506, | |
| "learning_rate": 0.00010535279805352797, | |
| "loss": 0.8778, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0535811919081466, | |
| "grad_norm": 0.3027772903442383, | |
| "learning_rate": 0.00010510948905109488, | |
| "loss": 0.6428, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.055768179332969, | |
| "grad_norm": 0.20893897116184235, | |
| "learning_rate": 0.00010486618004866178, | |
| "loss": 0.6471, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0579551667577913, | |
| "grad_norm": 0.281434565782547, | |
| "learning_rate": 0.0001046228710462287, | |
| "loss": 0.6593, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.0601421541826133, | |
| "grad_norm": 0.3276302218437195, | |
| "learning_rate": 0.00010437956204379562, | |
| "loss": 0.6077, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0623291416074356, | |
| "grad_norm": 0.35327035188674927, | |
| "learning_rate": 0.00010413625304136252, | |
| "loss": 0.5687, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.064516129032258, | |
| "grad_norm": 0.3210618197917938, | |
| "learning_rate": 0.00010389294403892943, | |
| "loss": 0.6685, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.0667031164570804, | |
| "grad_norm": 0.25362011790275574, | |
| "learning_rate": 0.00010364963503649635, | |
| "loss": 0.5067, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0688901038819028, | |
| "grad_norm": 0.2774200439453125, | |
| "learning_rate": 0.00010340632603406325, | |
| "loss": 0.7696, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.071077091306725, | |
| "grad_norm": 0.39397120475769043, | |
| "learning_rate": 0.00010316301703163016, | |
| "loss": 0.7109, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0732640787315475, | |
| "grad_norm": 0.2712627947330475, | |
| "learning_rate": 0.00010291970802919708, | |
| "loss": 0.5855, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0754510661563694, | |
| "grad_norm": 0.20961184799671173, | |
| "learning_rate": 0.00010267639902676397, | |
| "loss": 0.6223, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.077638053581192, | |
| "grad_norm": 0.35785865783691406, | |
| "learning_rate": 0.0001024330900243309, | |
| "loss": 0.6426, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.079825041006014, | |
| "grad_norm": 0.30317097902297974, | |
| "learning_rate": 0.0001021897810218978, | |
| "loss": 0.5881, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.0820120284308365, | |
| "grad_norm": 0.2647455632686615, | |
| "learning_rate": 0.00010194647201946471, | |
| "loss": 0.4753, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.084199015855659, | |
| "grad_norm": 0.2377641350030899, | |
| "learning_rate": 0.00010170316301703162, | |
| "loss": 0.7245, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.0863860032804813, | |
| "grad_norm": 0.4126327633857727, | |
| "learning_rate": 0.00010145985401459852, | |
| "loss": 0.7418, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0885729907053037, | |
| "grad_norm": 0.372079998254776, | |
| "learning_rate": 0.00010121654501216545, | |
| "loss": 0.5861, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.0907599781301256, | |
| "grad_norm": 0.35693153738975525, | |
| "learning_rate": 0.00010097323600973236, | |
| "loss": 0.63, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.092946965554948, | |
| "grad_norm": 0.3220914304256439, | |
| "learning_rate": 0.00010072992700729926, | |
| "loss": 0.6541, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.0951339529797703, | |
| "grad_norm": 0.28749874234199524, | |
| "learning_rate": 0.00010048661800486617, | |
| "loss": 0.5944, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0973209404045927, | |
| "grad_norm": 0.27125856280326843, | |
| "learning_rate": 0.00010024330900243309, | |
| "loss": 0.546, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.099507927829415, | |
| "grad_norm": 0.32414090633392334, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.5295, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1016949152542375, | |
| "grad_norm": 0.37579938769340515, | |
| "learning_rate": 9.97566909975669e-05, | |
| "loss": 0.6202, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.1038819026790594, | |
| "grad_norm": 0.3326401710510254, | |
| "learning_rate": 9.951338199513382e-05, | |
| "loss": 0.5674, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1060688901038818, | |
| "grad_norm": 0.2777692377567291, | |
| "learning_rate": 9.927007299270071e-05, | |
| "loss": 0.5297, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.108255877528704, | |
| "grad_norm": 0.3658103942871094, | |
| "learning_rate": 9.902676399026764e-05, | |
| "loss": 0.6001, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.1104428649535265, | |
| "grad_norm": 0.30180448293685913, | |
| "learning_rate": 9.878345498783455e-05, | |
| "loss": 0.627, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.112629852378349, | |
| "grad_norm": 0.3160865604877472, | |
| "learning_rate": 9.854014598540145e-05, | |
| "loss": 0.6583, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1148168398031713, | |
| "grad_norm": 0.38876181840896606, | |
| "learning_rate": 9.829683698296836e-05, | |
| "loss": 0.7201, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.1170038272279936, | |
| "grad_norm": 0.32533615827560425, | |
| "learning_rate": 9.805352798053527e-05, | |
| "loss": 0.5814, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1191908146528156, | |
| "grad_norm": 0.2723495662212372, | |
| "learning_rate": 9.781021897810217e-05, | |
| "loss": 0.7299, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.121377802077638, | |
| "grad_norm": 0.3380286693572998, | |
| "learning_rate": 9.75669099756691e-05, | |
| "loss": 0.8313, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.1235647895024603, | |
| "grad_norm": 0.3675851821899414, | |
| "learning_rate": 9.7323600973236e-05, | |
| "loss": 0.5859, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.1257517769272827, | |
| "grad_norm": 0.32205119729042053, | |
| "learning_rate": 9.708029197080291e-05, | |
| "loss": 0.78, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.127938764352105, | |
| "grad_norm": 0.3244129419326782, | |
| "learning_rate": 9.683698296836982e-05, | |
| "loss": 0.6777, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.1301257517769274, | |
| "grad_norm": 0.3449605405330658, | |
| "learning_rate": 9.659367396593672e-05, | |
| "loss": 0.654, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.13231273920175, | |
| "grad_norm": 0.3051266670227051, | |
| "learning_rate": 9.635036496350364e-05, | |
| "loss": 0.6204, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1344997266265717, | |
| "grad_norm": 0.29881876707077026, | |
| "learning_rate": 9.610705596107056e-05, | |
| "loss": 0.4543, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.136686714051394, | |
| "grad_norm": 0.2953018546104431, | |
| "learning_rate": 9.586374695863745e-05, | |
| "loss": 0.7972, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.1388737014762165, | |
| "grad_norm": 0.3214372992515564, | |
| "learning_rate": 9.562043795620437e-05, | |
| "loss": 0.6216, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.141060688901039, | |
| "grad_norm": 0.31700441241264343, | |
| "learning_rate": 9.537712895377129e-05, | |
| "loss": 0.5708, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.143247676325861, | |
| "grad_norm": 0.3516302704811096, | |
| "learning_rate": 9.513381995133819e-05, | |
| "loss": 0.7428, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1454346637506836, | |
| "grad_norm": 0.278621643781662, | |
| "learning_rate": 9.48905109489051e-05, | |
| "loss": 0.5118, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.1476216511755055, | |
| "grad_norm": 0.39558589458465576, | |
| "learning_rate": 9.464720194647201e-05, | |
| "loss": 0.6228, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.149808638600328, | |
| "grad_norm": 0.2623763382434845, | |
| "learning_rate": 9.440389294403891e-05, | |
| "loss": 0.5621, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.1519956260251503, | |
| "grad_norm": 0.3559738099575043, | |
| "learning_rate": 9.416058394160584e-05, | |
| "loss": 0.6367, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.1541826134499726, | |
| "grad_norm": 0.34260550141334534, | |
| "learning_rate": 9.391727493917275e-05, | |
| "loss": 0.6587, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.156369600874795, | |
| "grad_norm": 0.3602772057056427, | |
| "learning_rate": 9.367396593673965e-05, | |
| "loss": 0.6749, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1585565882996174, | |
| "grad_norm": 0.4492672383785248, | |
| "learning_rate": 9.343065693430656e-05, | |
| "loss": 0.6159, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.1607435757244398, | |
| "grad_norm": 0.30676203966140747, | |
| "learning_rate": 9.318734793187348e-05, | |
| "loss": 0.7105, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1629305631492617, | |
| "grad_norm": 0.2810410261154175, | |
| "learning_rate": 9.294403892944038e-05, | |
| "loss": 0.7091, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.165117550574084, | |
| "grad_norm": 0.3161092698574066, | |
| "learning_rate": 9.27007299270073e-05, | |
| "loss": 0.6866, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1673045379989064, | |
| "grad_norm": 0.30391326546669006, | |
| "learning_rate": 9.24574209245742e-05, | |
| "loss": 0.6473, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.169491525423729, | |
| "grad_norm": 0.33336496353149414, | |
| "learning_rate": 9.22141119221411e-05, | |
| "loss": 0.7565, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.171678512848551, | |
| "grad_norm": 0.27083349227905273, | |
| "learning_rate": 9.197080291970803e-05, | |
| "loss": 0.602, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1738655002733736, | |
| "grad_norm": 0.3847806751728058, | |
| "learning_rate": 9.172749391727493e-05, | |
| "loss": 0.6034, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.176052487698196, | |
| "grad_norm": 0.334309846162796, | |
| "learning_rate": 9.148418491484184e-05, | |
| "loss": 0.7368, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.178239475123018, | |
| "grad_norm": 0.4568588435649872, | |
| "learning_rate": 9.124087591240875e-05, | |
| "loss": 0.6723, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1804264625478402, | |
| "grad_norm": 0.23190492391586304, | |
| "learning_rate": 9.099756690997565e-05, | |
| "loss": 0.5024, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.1826134499726626, | |
| "grad_norm": 0.4212368130683899, | |
| "learning_rate": 9.075425790754258e-05, | |
| "loss": 0.5137, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.184800437397485, | |
| "grad_norm": 0.3017450273036957, | |
| "learning_rate": 9.051094890510949e-05, | |
| "loss": 0.659, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.1869874248223073, | |
| "grad_norm": 0.32203611731529236, | |
| "learning_rate": 9.026763990267639e-05, | |
| "loss": 0.6198, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1891744122471297, | |
| "grad_norm": 0.308056503534317, | |
| "learning_rate": 9.00243309002433e-05, | |
| "loss": 0.5798, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.191361399671952, | |
| "grad_norm": 0.32163482904434204, | |
| "learning_rate": 8.978102189781021e-05, | |
| "loss": 0.4909, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.193548387096774, | |
| "grad_norm": 0.28082406520843506, | |
| "learning_rate": 8.953771289537712e-05, | |
| "loss": 0.5911, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.1957353745215964, | |
| "grad_norm": 0.3853447139263153, | |
| "learning_rate": 8.929440389294404e-05, | |
| "loss": 0.601, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1979223619464188, | |
| "grad_norm": 0.27736788988113403, | |
| "learning_rate": 8.905109489051095e-05, | |
| "loss": 0.5391, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.200109349371241, | |
| "grad_norm": 0.3074529767036438, | |
| "learning_rate": 8.880778588807785e-05, | |
| "loss": 0.5264, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.2022963367960635, | |
| "grad_norm": 0.34355053305625916, | |
| "learning_rate": 8.856447688564476e-05, | |
| "loss": 0.5479, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.204483324220886, | |
| "grad_norm": 0.25875043869018555, | |
| "learning_rate": 8.832116788321167e-05, | |
| "loss": 0.5133, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2066703116457083, | |
| "grad_norm": 0.4600970447063446, | |
| "learning_rate": 8.807785888077858e-05, | |
| "loss": 0.7145, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.20885729907053, | |
| "grad_norm": 0.4292985796928406, | |
| "learning_rate": 8.78345498783455e-05, | |
| "loss": 0.8484, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2110442864953526, | |
| "grad_norm": 0.38896313309669495, | |
| "learning_rate": 8.759124087591239e-05, | |
| "loss": 0.8592, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.213231273920175, | |
| "grad_norm": 0.32829031348228455, | |
| "learning_rate": 8.73479318734793e-05, | |
| "loss": 0.711, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.2154182613449973, | |
| "grad_norm": 0.32850679755210876, | |
| "learning_rate": 8.710462287104623e-05, | |
| "loss": 0.6644, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.2176052487698197, | |
| "grad_norm": 0.3872655928134918, | |
| "learning_rate": 8.686131386861313e-05, | |
| "loss": 0.7039, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.219792236194642, | |
| "grad_norm": 0.39074549078941345, | |
| "learning_rate": 8.661800486618004e-05, | |
| "loss": 0.6316, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.221979223619464, | |
| "grad_norm": 0.33514949679374695, | |
| "learning_rate": 8.637469586374695e-05, | |
| "loss": 0.7362, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2241662110442864, | |
| "grad_norm": 0.37822842597961426, | |
| "learning_rate": 8.613138686131385e-05, | |
| "loss": 0.8549, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.2263531984691087, | |
| "grad_norm": 0.2988075911998749, | |
| "learning_rate": 8.588807785888078e-05, | |
| "loss": 0.6768, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.228540185893931, | |
| "grad_norm": 0.3298238515853882, | |
| "learning_rate": 8.564476885644769e-05, | |
| "loss": 0.661, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.2307271733187535, | |
| "grad_norm": 0.3168882429599762, | |
| "learning_rate": 8.540145985401459e-05, | |
| "loss": 0.5899, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.232914160743576, | |
| "grad_norm": 0.32149139046669006, | |
| "learning_rate": 8.51581508515815e-05, | |
| "loss": 0.6377, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.235101148168398, | |
| "grad_norm": 0.3840494453907013, | |
| "learning_rate": 8.491484184914842e-05, | |
| "loss": 0.5914, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.23728813559322, | |
| "grad_norm": 0.36953312158584595, | |
| "learning_rate": 8.467153284671532e-05, | |
| "loss": 0.6954, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.2394751230180425, | |
| "grad_norm": 0.3132734000682831, | |
| "learning_rate": 8.442822384428223e-05, | |
| "loss": 0.6778, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.241662110442865, | |
| "grad_norm": 0.3022383153438568, | |
| "learning_rate": 8.418491484184915e-05, | |
| "loss": 0.5681, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.2438490978676873, | |
| "grad_norm": 0.33297014236450195, | |
| "learning_rate": 8.394160583941604e-05, | |
| "loss": 1.0015, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.2460360852925096, | |
| "grad_norm": 0.2536577582359314, | |
| "learning_rate": 8.369829683698297e-05, | |
| "loss": 0.6535, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.248223072717332, | |
| "grad_norm": 0.3168553113937378, | |
| "learning_rate": 8.345498783454987e-05, | |
| "loss": 0.4617, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.250410060142154, | |
| "grad_norm": 0.41692110896110535, | |
| "learning_rate": 8.321167883211678e-05, | |
| "loss": 0.6289, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.2525970475669763, | |
| "grad_norm": 0.31276077032089233, | |
| "learning_rate": 8.296836982968369e-05, | |
| "loss": 0.6558, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2547840349917987, | |
| "grad_norm": 0.382587730884552, | |
| "learning_rate": 8.272506082725059e-05, | |
| "loss": 0.7024, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.256971022416621, | |
| "grad_norm": 0.37239089608192444, | |
| "learning_rate": 8.248175182481752e-05, | |
| "loss": 0.6428, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2591580098414434, | |
| "grad_norm": 0.3444945216178894, | |
| "learning_rate": 8.223844282238443e-05, | |
| "loss": 0.8301, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.261344997266266, | |
| "grad_norm": 0.32943612337112427, | |
| "learning_rate": 8.199513381995133e-05, | |
| "loss": 0.8259, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.263531984691088, | |
| "grad_norm": 0.3256615996360779, | |
| "learning_rate": 8.175182481751824e-05, | |
| "loss": 0.5633, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.26571897211591, | |
| "grad_norm": 0.38470467925071716, | |
| "learning_rate": 8.150851581508516e-05, | |
| "loss": 0.8342, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2679059595407325, | |
| "grad_norm": 0.3568199872970581, | |
| "learning_rate": 8.126520681265206e-05, | |
| "loss": 0.6949, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.270092946965555, | |
| "grad_norm": 0.4587413966655731, | |
| "learning_rate": 8.102189781021897e-05, | |
| "loss": 0.855, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2722799343903772, | |
| "grad_norm": 0.3806265890598297, | |
| "learning_rate": 8.077858880778589e-05, | |
| "loss": 0.7383, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.2744669218151996, | |
| "grad_norm": 0.34413963556289673, | |
| "learning_rate": 8.053527980535278e-05, | |
| "loss": 0.7618, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.276653909240022, | |
| "grad_norm": 0.41507622599601746, | |
| "learning_rate": 8.029197080291971e-05, | |
| "loss": 0.6976, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.2788408966648444, | |
| "grad_norm": 0.3527161777019501, | |
| "learning_rate": 8.004866180048662e-05, | |
| "loss": 0.6337, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2810278840896663, | |
| "grad_norm": 0.405584454536438, | |
| "learning_rate": 7.980535279805352e-05, | |
| "loss": 0.8183, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.2832148715144887, | |
| "grad_norm": 0.41590583324432373, | |
| "learning_rate": 7.956204379562043e-05, | |
| "loss": 0.8062, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.285401858939311, | |
| "grad_norm": 0.41613471508026123, | |
| "learning_rate": 7.931873479318733e-05, | |
| "loss": 0.6246, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.2875888463641334, | |
| "grad_norm": 0.44034960865974426, | |
| "learning_rate": 7.907542579075424e-05, | |
| "loss": 0.8375, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.2897758337889558, | |
| "grad_norm": 0.3828635811805725, | |
| "learning_rate": 7.883211678832117e-05, | |
| "loss": 0.8442, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.291962821213778, | |
| "grad_norm": 0.3389468491077423, | |
| "learning_rate": 7.858880778588807e-05, | |
| "loss": 0.7997, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2941498086386005, | |
| "grad_norm": 0.33413904905319214, | |
| "learning_rate": 7.834549878345498e-05, | |
| "loss": 0.6141, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.2963367960634224, | |
| "grad_norm": 0.32505419850349426, | |
| "learning_rate": 7.810218978102189e-05, | |
| "loss": 0.5001, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.298523783488245, | |
| "grad_norm": 0.3244943618774414, | |
| "learning_rate": 7.785888077858879e-05, | |
| "loss": 0.6723, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.300710770913067, | |
| "grad_norm": 0.3737221658229828, | |
| "learning_rate": 7.761557177615571e-05, | |
| "loss": 0.7168, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.3028977583378896, | |
| "grad_norm": 0.4390661120414734, | |
| "learning_rate": 7.737226277372263e-05, | |
| "loss": 0.5277, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.305084745762712, | |
| "grad_norm": 0.42460954189300537, | |
| "learning_rate": 7.712895377128952e-05, | |
| "loss": 0.7353, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.3072717331875343, | |
| "grad_norm": 0.3381803035736084, | |
| "learning_rate": 7.688564476885644e-05, | |
| "loss": 0.6313, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.3094587206123567, | |
| "grad_norm": 0.33968648314476013, | |
| "learning_rate": 7.664233576642336e-05, | |
| "loss": 0.5752, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3116457080371786, | |
| "grad_norm": 0.34770649671554565, | |
| "learning_rate": 7.639902676399026e-05, | |
| "loss": 0.7087, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.313832695462001, | |
| "grad_norm": 0.27934038639068604, | |
| "learning_rate": 7.615571776155717e-05, | |
| "loss": 0.5717, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3160196828868234, | |
| "grad_norm": 0.35276851058006287, | |
| "learning_rate": 7.591240875912408e-05, | |
| "loss": 0.5339, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.3182066703116457, | |
| "grad_norm": 0.31707894802093506, | |
| "learning_rate": 7.566909975669098e-05, | |
| "loss": 0.5097, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.320393657736468, | |
| "grad_norm": 0.47757935523986816, | |
| "learning_rate": 7.542579075425791e-05, | |
| "loss": 0.7004, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.3225806451612905, | |
| "grad_norm": 0.3273807764053345, | |
| "learning_rate": 7.518248175182482e-05, | |
| "loss": 0.6859, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.324767632586113, | |
| "grad_norm": 0.30111655592918396, | |
| "learning_rate": 7.493917274939172e-05, | |
| "loss": 0.4916, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.326954620010935, | |
| "grad_norm": 0.33053281903266907, | |
| "learning_rate": 7.469586374695863e-05, | |
| "loss": 0.6866, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.329141607435757, | |
| "grad_norm": 0.34993547201156616, | |
| "learning_rate": 7.445255474452554e-05, | |
| "loss": 0.6471, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.3313285948605795, | |
| "grad_norm": 0.2865176200866699, | |
| "learning_rate": 7.420924574209245e-05, | |
| "loss": 0.4927, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.333515582285402, | |
| "grad_norm": 0.43209540843963623, | |
| "learning_rate": 7.396593673965937e-05, | |
| "loss": 0.6368, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.3357025697102243, | |
| "grad_norm": 0.3290870189666748, | |
| "learning_rate": 7.372262773722628e-05, | |
| "loss": 0.739, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3378895571350466, | |
| "grad_norm": 0.3443828225135803, | |
| "learning_rate": 7.347931873479318e-05, | |
| "loss": 0.8401, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.340076544559869, | |
| "grad_norm": 0.32021573185920715, | |
| "learning_rate": 7.323600973236009e-05, | |
| "loss": 0.7726, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.342263531984691, | |
| "grad_norm": 0.46182501316070557, | |
| "learning_rate": 7.2992700729927e-05, | |
| "loss": 0.9029, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.3444505194095133, | |
| "grad_norm": 0.35512760281562805, | |
| "learning_rate": 7.274939172749391e-05, | |
| "loss": 0.6847, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3466375068343357, | |
| "grad_norm": 0.380140483379364, | |
| "learning_rate": 7.250608272506082e-05, | |
| "loss": 0.7038, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.348824494259158, | |
| "grad_norm": 0.32431280612945557, | |
| "learning_rate": 7.226277372262774e-05, | |
| "loss": 0.5294, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3510114816839804, | |
| "grad_norm": 0.2768891453742981, | |
| "learning_rate": 7.201946472019465e-05, | |
| "loss": 0.5286, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.353198469108803, | |
| "grad_norm": 0.3334331214427948, | |
| "learning_rate": 7.177615571776155e-05, | |
| "loss": 0.6415, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3553854565336247, | |
| "grad_norm": 0.41533592343330383, | |
| "learning_rate": 7.153284671532846e-05, | |
| "loss": 0.6295, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.357572443958447, | |
| "grad_norm": 0.42005178332328796, | |
| "learning_rate": 7.128953771289537e-05, | |
| "loss": 0.8451, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3597594313832695, | |
| "grad_norm": 0.39049747586250305, | |
| "learning_rate": 7.104622871046228e-05, | |
| "loss": 0.8351, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.361946418808092, | |
| "grad_norm": 0.33119314908981323, | |
| "learning_rate": 7.08029197080292e-05, | |
| "loss": 0.5981, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3641334062329142, | |
| "grad_norm": 0.4426044225692749, | |
| "learning_rate": 7.05596107055961e-05, | |
| "loss": 0.671, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.3663203936577366, | |
| "grad_norm": 0.3445340096950531, | |
| "learning_rate": 7.0316301703163e-05, | |
| "loss": 0.6182, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.3685073810825585, | |
| "grad_norm": 0.35596704483032227, | |
| "learning_rate": 7.007299270072992e-05, | |
| "loss": 0.7591, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.370694368507381, | |
| "grad_norm": 0.39532068371772766, | |
| "learning_rate": 6.982968369829683e-05, | |
| "loss": 0.5479, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3728813559322033, | |
| "grad_norm": 0.3580004572868347, | |
| "learning_rate": 6.958637469586374e-05, | |
| "loss": 0.796, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.3750683433570257, | |
| "grad_norm": 0.5314396023750305, | |
| "learning_rate": 6.934306569343065e-05, | |
| "loss": 0.5986, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.377255330781848, | |
| "grad_norm": 0.5284639596939087, | |
| "learning_rate": 6.909975669099755e-05, | |
| "loss": 0.7934, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.3794423182066704, | |
| "grad_norm": 0.38761386275291443, | |
| "learning_rate": 6.885644768856448e-05, | |
| "loss": 0.6072, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.3816293056314928, | |
| "grad_norm": 0.3381224572658539, | |
| "learning_rate": 6.861313868613137e-05, | |
| "loss": 0.6392, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.3838162930563147, | |
| "grad_norm": 0.3654699921607971, | |
| "learning_rate": 6.836982968369829e-05, | |
| "loss": 0.6068, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.386003280481137, | |
| "grad_norm": 0.343288779258728, | |
| "learning_rate": 6.81265206812652e-05, | |
| "loss": 0.868, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.3881902679059595, | |
| "grad_norm": 0.3624615967273712, | |
| "learning_rate": 6.788321167883211e-05, | |
| "loss": 0.6408, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.390377255330782, | |
| "grad_norm": 0.3863930404186249, | |
| "learning_rate": 6.763990267639902e-05, | |
| "loss": 0.5778, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.392564242755604, | |
| "grad_norm": 0.34366974234580994, | |
| "learning_rate": 6.739659367396593e-05, | |
| "loss": 0.6983, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3947512301804266, | |
| "grad_norm": 0.34117886424064636, | |
| "learning_rate": 6.715328467153285e-05, | |
| "loss": 0.6472, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.396938217605249, | |
| "grad_norm": 0.3547564148902893, | |
| "learning_rate": 6.690997566909974e-05, | |
| "loss": 0.5363, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.399125205030071, | |
| "grad_norm": 0.31432420015335083, | |
| "learning_rate": 6.666666666666666e-05, | |
| "loss": 0.5539, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.4013121924548932, | |
| "grad_norm": 0.45095062255859375, | |
| "learning_rate": 6.642335766423357e-05, | |
| "loss": 0.6494, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4034991798797156, | |
| "grad_norm": 1.0102994441986084, | |
| "learning_rate": 6.618004866180048e-05, | |
| "loss": 0.988, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.405686167304538, | |
| "grad_norm": 0.5170231461524963, | |
| "learning_rate": 6.593673965936739e-05, | |
| "loss": 0.8045, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.4078731547293604, | |
| "grad_norm": 0.2993682622909546, | |
| "learning_rate": 6.56934306569343e-05, | |
| "loss": 0.5887, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.4100601421541827, | |
| "grad_norm": 0.29023849964141846, | |
| "learning_rate": 6.545012165450122e-05, | |
| "loss": 0.6123, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.412247129579005, | |
| "grad_norm": 0.4196130335330963, | |
| "learning_rate": 6.520681265206811e-05, | |
| "loss": 0.6444, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.414434117003827, | |
| "grad_norm": 0.43228599429130554, | |
| "learning_rate": 6.496350364963504e-05, | |
| "loss": 0.7432, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4166211044286494, | |
| "grad_norm": 0.3056860566139221, | |
| "learning_rate": 6.472019464720194e-05, | |
| "loss": 0.6673, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.418808091853472, | |
| "grad_norm": 0.4213399887084961, | |
| "learning_rate": 6.447688564476885e-05, | |
| "loss": 0.798, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.420995079278294, | |
| "grad_norm": 0.4033665060997009, | |
| "learning_rate": 6.423357664233576e-05, | |
| "loss": 0.7835, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.4231820667031165, | |
| "grad_norm": 0.35071858763694763, | |
| "learning_rate": 6.399026763990267e-05, | |
| "loss": 0.7173, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.425369054127939, | |
| "grad_norm": 0.36336860060691833, | |
| "learning_rate": 6.374695863746959e-05, | |
| "loss": 0.6904, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.4275560415527613, | |
| "grad_norm": 0.4012874662876129, | |
| "learning_rate": 6.350364963503648e-05, | |
| "loss": 0.6062, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.429743028977583, | |
| "grad_norm": 0.3614816665649414, | |
| "learning_rate": 6.326034063260341e-05, | |
| "loss": 0.7757, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.4319300164024056, | |
| "grad_norm": 0.34320759773254395, | |
| "learning_rate": 6.301703163017031e-05, | |
| "loss": 0.6789, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.434117003827228, | |
| "grad_norm": 0.3566221594810486, | |
| "learning_rate": 6.277372262773722e-05, | |
| "loss": 0.7995, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.4363039912520503, | |
| "grad_norm": 0.35487961769104004, | |
| "learning_rate": 6.253041362530413e-05, | |
| "loss": 0.6536, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.4384909786768727, | |
| "grad_norm": 0.3311222195625305, | |
| "learning_rate": 6.228710462287104e-05, | |
| "loss": 0.589, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.440677966101695, | |
| "grad_norm": 0.36649906635284424, | |
| "learning_rate": 6.204379562043796e-05, | |
| "loss": 0.7062, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4428649535265174, | |
| "grad_norm": 0.36625346541404724, | |
| "learning_rate": 6.180048661800485e-05, | |
| "loss": 0.6585, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.4450519409513394, | |
| "grad_norm": 0.47065046429634094, | |
| "learning_rate": 6.155717761557178e-05, | |
| "loss": 0.8547, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.4472389283761617, | |
| "grad_norm": 0.3721199333667755, | |
| "learning_rate": 6.131386861313868e-05, | |
| "loss": 0.7003, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.449425915800984, | |
| "grad_norm": 0.3814185559749603, | |
| "learning_rate": 6.107055961070559e-05, | |
| "loss": 0.6616, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4516129032258065, | |
| "grad_norm": 0.34303221106529236, | |
| "learning_rate": 6.08272506082725e-05, | |
| "loss": 0.7311, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.453799890650629, | |
| "grad_norm": 0.31710198521614075, | |
| "learning_rate": 6.0583941605839414e-05, | |
| "loss": 0.6767, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.4559868780754512, | |
| "grad_norm": 0.378255158662796, | |
| "learning_rate": 6.034063260340632e-05, | |
| "loss": 0.5758, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.4581738655002736, | |
| "grad_norm": 0.3049505949020386, | |
| "learning_rate": 6.0097323600973225e-05, | |
| "loss": 0.7468, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4603608529250955, | |
| "grad_norm": 0.31383493542671204, | |
| "learning_rate": 5.985401459854014e-05, | |
| "loss": 0.5064, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.462547840349918, | |
| "grad_norm": 0.4120381474494934, | |
| "learning_rate": 5.961070559610705e-05, | |
| "loss": 0.5933, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4647348277747403, | |
| "grad_norm": 0.41584497690200806, | |
| "learning_rate": 5.936739659367396e-05, | |
| "loss": 0.6191, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.4669218151995627, | |
| "grad_norm": 0.4834405481815338, | |
| "learning_rate": 5.912408759124087e-05, | |
| "loss": 0.6092, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.469108802624385, | |
| "grad_norm": 0.30698856711387634, | |
| "learning_rate": 5.8880778588807784e-05, | |
| "loss": 0.6318, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.4712957900492074, | |
| "grad_norm": 0.42027831077575684, | |
| "learning_rate": 5.863746958637469e-05, | |
| "loss": 0.5981, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4734827774740293, | |
| "grad_norm": 0.46082839369773865, | |
| "learning_rate": 5.83941605839416e-05, | |
| "loss": 0.7592, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.4756697648988517, | |
| "grad_norm": 0.3530132472515106, | |
| "learning_rate": 5.815085158150851e-05, | |
| "loss": 0.6589, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.477856752323674, | |
| "grad_norm": 0.40325507521629333, | |
| "learning_rate": 5.790754257907542e-05, | |
| "loss": 0.6136, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.4800437397484965, | |
| "grad_norm": 0.5407168865203857, | |
| "learning_rate": 5.7664233576642324e-05, | |
| "loss": 0.818, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.482230727173319, | |
| "grad_norm": 0.3995073139667511, | |
| "learning_rate": 5.742092457420924e-05, | |
| "loss": 0.7405, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.484417714598141, | |
| "grad_norm": 0.327036052942276, | |
| "learning_rate": 5.717761557177615e-05, | |
| "loss": 0.5611, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.486604702022963, | |
| "grad_norm": 0.4143662750720978, | |
| "learning_rate": 5.693430656934306e-05, | |
| "loss": 0.7194, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.4887916894477855, | |
| "grad_norm": 0.37465140223503113, | |
| "learning_rate": 5.669099756690997e-05, | |
| "loss": 0.8684, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.490978676872608, | |
| "grad_norm": 0.3546184301376343, | |
| "learning_rate": 5.644768856447688e-05, | |
| "loss": 0.5464, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.4931656642974303, | |
| "grad_norm": 0.5521944165229797, | |
| "learning_rate": 5.620437956204379e-05, | |
| "loss": 0.6143, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.4953526517222526, | |
| "grad_norm": 0.3398590385913849, | |
| "learning_rate": 5.596107055961071e-05, | |
| "loss": 0.7098, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.497539639147075, | |
| "grad_norm": 0.28899359703063965, | |
| "learning_rate": 5.571776155717761e-05, | |
| "loss": 0.6263, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.4997266265718974, | |
| "grad_norm": 0.3622675836086273, | |
| "learning_rate": 5.547445255474452e-05, | |
| "loss": 0.5183, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.5019136139967193, | |
| "grad_norm": 0.3359682261943817, | |
| "learning_rate": 5.523114355231143e-05, | |
| "loss": 0.7125, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.5041006014215417, | |
| "grad_norm": 0.42786240577697754, | |
| "learning_rate": 5.498783454987834e-05, | |
| "loss": 0.6445, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.506287588846364, | |
| "grad_norm": 0.340658575296402, | |
| "learning_rate": 5.4744525547445253e-05, | |
| "loss": 0.5709, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5084745762711864, | |
| "grad_norm": 0.3030422031879425, | |
| "learning_rate": 5.450121654501216e-05, | |
| "loss": 0.5894, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.510661563696009, | |
| "grad_norm": 0.4911826550960541, | |
| "learning_rate": 5.425790754257907e-05, | |
| "loss": 0.6198, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.512848551120831, | |
| "grad_norm": 0.3828030824661255, | |
| "learning_rate": 5.401459854014598e-05, | |
| "loss": 0.7856, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5150355385456535, | |
| "grad_norm": 0.354000449180603, | |
| "learning_rate": 5.377128953771289e-05, | |
| "loss": 0.5489, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5172225259704755, | |
| "grad_norm": 0.2972152829170227, | |
| "learning_rate": 5.3527980535279806e-05, | |
| "loss": 0.773, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.519409513395298, | |
| "grad_norm": 0.3820708394050598, | |
| "learning_rate": 5.328467153284671e-05, | |
| "loss": 0.6889, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.52159650082012, | |
| "grad_norm": 0.3476285934448242, | |
| "learning_rate": 5.304136253041362e-05, | |
| "loss": 0.5365, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.5237834882449426, | |
| "grad_norm": 0.36393001675605774, | |
| "learning_rate": 5.279805352798053e-05, | |
| "loss": 0.6012, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.525970475669765, | |
| "grad_norm": 0.3589417338371277, | |
| "learning_rate": 5.255474452554744e-05, | |
| "loss": 0.6502, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.5281574630945873, | |
| "grad_norm": 0.34018373489379883, | |
| "learning_rate": 5.231143552311435e-05, | |
| "loss": 0.6489, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5303444505194097, | |
| "grad_norm": 0.40649306774139404, | |
| "learning_rate": 5.206812652068126e-05, | |
| "loss": 0.6107, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.5325314379442316, | |
| "grad_norm": 0.3748558759689331, | |
| "learning_rate": 5.1824817518248176e-05, | |
| "loss": 0.5517, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.534718425369054, | |
| "grad_norm": 0.4162946939468384, | |
| "learning_rate": 5.158150851581508e-05, | |
| "loss": 0.5658, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.5369054127938764, | |
| "grad_norm": 0.40900272130966187, | |
| "learning_rate": 5.133819951338199e-05, | |
| "loss": 0.6965, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.5390924002186988, | |
| "grad_norm": 0.4511730372905731, | |
| "learning_rate": 5.10948905109489e-05, | |
| "loss": 0.7305, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.541279387643521, | |
| "grad_norm": 0.4122026860713959, | |
| "learning_rate": 5.085158150851581e-05, | |
| "loss": 0.6032, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5434663750683435, | |
| "grad_norm": 0.33657750487327576, | |
| "learning_rate": 5.060827250608272e-05, | |
| "loss": 0.6772, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.545653362493166, | |
| "grad_norm": 0.3611637353897095, | |
| "learning_rate": 5.036496350364963e-05, | |
| "loss": 0.7829, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.547840349917988, | |
| "grad_norm": 0.3221738040447235, | |
| "learning_rate": 5.0121654501216546e-05, | |
| "loss": 0.656, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.55002733734281, | |
| "grad_norm": 0.30915001034736633, | |
| "learning_rate": 4.987834549878345e-05, | |
| "loss": 0.55, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.5522143247676325, | |
| "grad_norm": 0.3413131535053253, | |
| "learning_rate": 4.963503649635036e-05, | |
| "loss": 0.7515, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.554401312192455, | |
| "grad_norm": 0.4244505763053894, | |
| "learning_rate": 4.9391727493917275e-05, | |
| "loss": 0.7202, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5565882996172773, | |
| "grad_norm": 0.2993778586387634, | |
| "learning_rate": 4.914841849148418e-05, | |
| "loss": 0.4497, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.5587752870420997, | |
| "grad_norm": 0.43434271216392517, | |
| "learning_rate": 4.8905109489051086e-05, | |
| "loss": 0.591, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.560962274466922, | |
| "grad_norm": 0.35246193408966064, | |
| "learning_rate": 4.8661800486618e-05, | |
| "loss": 0.537, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.563149261891744, | |
| "grad_norm": 0.37283191084861755, | |
| "learning_rate": 4.841849148418491e-05, | |
| "loss": 0.5856, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.5653362493165663, | |
| "grad_norm": 0.39839670062065125, | |
| "learning_rate": 4.817518248175182e-05, | |
| "loss": 0.4996, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.5675232367413887, | |
| "grad_norm": 0.4315820634365082, | |
| "learning_rate": 4.793187347931873e-05, | |
| "loss": 0.7119, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.569710224166211, | |
| "grad_norm": 0.4408882260322571, | |
| "learning_rate": 4.7688564476885646e-05, | |
| "loss": 0.7059, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.5718972115910335, | |
| "grad_norm": 0.4746418595314026, | |
| "learning_rate": 4.744525547445255e-05, | |
| "loss": 0.6944, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.5740841990158554, | |
| "grad_norm": 0.31449419260025024, | |
| "learning_rate": 4.7201946472019456e-05, | |
| "loss": 0.7469, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.576271186440678, | |
| "grad_norm": 0.4608743190765381, | |
| "learning_rate": 4.6958637469586375e-05, | |
| "loss": 0.4727, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.5784581738655, | |
| "grad_norm": 0.3578025996685028, | |
| "learning_rate": 4.671532846715328e-05, | |
| "loss": 0.8796, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.5806451612903225, | |
| "grad_norm": 0.3281157612800598, | |
| "learning_rate": 4.647201946472019e-05, | |
| "loss": 0.5228, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.582832148715145, | |
| "grad_norm": 0.34412261843681335, | |
| "learning_rate": 4.62287104622871e-05, | |
| "loss": 0.6171, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.5850191361399673, | |
| "grad_norm": 0.32819414138793945, | |
| "learning_rate": 4.5985401459854016e-05, | |
| "loss": 0.6381, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.5872061235647896, | |
| "grad_norm": 0.42394185066223145, | |
| "learning_rate": 4.574209245742092e-05, | |
| "loss": 0.6248, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.5893931109896116, | |
| "grad_norm": 0.3938983082771301, | |
| "learning_rate": 4.5498783454987826e-05, | |
| "loss": 0.688, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.5915800984144344, | |
| "grad_norm": 0.35975101590156555, | |
| "learning_rate": 4.5255474452554745e-05, | |
| "loss": 0.6196, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.5937670858392563, | |
| "grad_norm": 0.5351125597953796, | |
| "learning_rate": 4.501216545012165e-05, | |
| "loss": 0.6542, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5959540732640787, | |
| "grad_norm": 0.31686198711395264, | |
| "learning_rate": 4.476885644768856e-05, | |
| "loss": 0.7063, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.598141060688901, | |
| "grad_norm": 0.2979380786418915, | |
| "learning_rate": 4.4525547445255474e-05, | |
| "loss": 0.5374, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.6003280481137234, | |
| "grad_norm": 0.3495193123817444, | |
| "learning_rate": 4.428223844282238e-05, | |
| "loss": 0.6217, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.602515035538546, | |
| "grad_norm": 0.3886531591415405, | |
| "learning_rate": 4.403892944038929e-05, | |
| "loss": 0.5628, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6047020229633677, | |
| "grad_norm": 0.3585399091243744, | |
| "learning_rate": 4.3795620437956196e-05, | |
| "loss": 0.6921, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.6068890103881905, | |
| "grad_norm": 0.3813333809375763, | |
| "learning_rate": 4.3552311435523115e-05, | |
| "loss": 0.6603, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6090759978130125, | |
| "grad_norm": 0.4587854743003845, | |
| "learning_rate": 4.330900243309002e-05, | |
| "loss": 0.7274, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.611262985237835, | |
| "grad_norm": 0.4350600242614746, | |
| "learning_rate": 4.3065693430656925e-05, | |
| "loss": 0.6628, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.613449972662657, | |
| "grad_norm": 0.3220929205417633, | |
| "learning_rate": 4.2822384428223844e-05, | |
| "loss": 0.6057, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.6156369600874796, | |
| "grad_norm": 0.54576575756073, | |
| "learning_rate": 4.257907542579075e-05, | |
| "loss": 0.693, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.617823947512302, | |
| "grad_norm": 0.393766850233078, | |
| "learning_rate": 4.233576642335766e-05, | |
| "loss": 0.6226, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.620010934937124, | |
| "grad_norm": 0.3243195116519928, | |
| "learning_rate": 4.209245742092457e-05, | |
| "loss": 0.7465, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6221979223619463, | |
| "grad_norm": 0.3847908079624176, | |
| "learning_rate": 4.1849148418491485e-05, | |
| "loss": 0.4963, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.6243849097867686, | |
| "grad_norm": 0.40093564987182617, | |
| "learning_rate": 4.160583941605839e-05, | |
| "loss": 0.7138, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.626571897211591, | |
| "grad_norm": 0.4176326096057892, | |
| "learning_rate": 4.1362530413625295e-05, | |
| "loss": 0.4808, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.6287588846364134, | |
| "grad_norm": 0.3477429151535034, | |
| "learning_rate": 4.1119221411192214e-05, | |
| "loss": 0.6285, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6309458720612358, | |
| "grad_norm": 0.4201376736164093, | |
| "learning_rate": 4.087591240875912e-05, | |
| "loss": 1.0551, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.633132859486058, | |
| "grad_norm": 0.4241773188114166, | |
| "learning_rate": 4.063260340632603e-05, | |
| "loss": 0.6991, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.63531984691088, | |
| "grad_norm": 0.5858724117279053, | |
| "learning_rate": 4.038929440389294e-05, | |
| "loss": 0.6912, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.6375068343357024, | |
| "grad_norm": 0.3396605849266052, | |
| "learning_rate": 4.0145985401459855e-05, | |
| "loss": 0.5062, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.639693821760525, | |
| "grad_norm": 0.3286657929420471, | |
| "learning_rate": 3.990267639902676e-05, | |
| "loss": 0.678, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.641880809185347, | |
| "grad_norm": 0.3253632187843323, | |
| "learning_rate": 3.9659367396593665e-05, | |
| "loss": 0.5769, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.6440677966101696, | |
| "grad_norm": 0.39935943484306335, | |
| "learning_rate": 3.9416058394160584e-05, | |
| "loss": 0.6078, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.646254784034992, | |
| "grad_norm": 0.38090863823890686, | |
| "learning_rate": 3.917274939172749e-05, | |
| "loss": 0.6195, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6484417714598143, | |
| "grad_norm": 0.3816772401332855, | |
| "learning_rate": 3.8929440389294394e-05, | |
| "loss": 0.6636, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6506287588846362, | |
| "grad_norm": 0.354041188955307, | |
| "learning_rate": 3.868613138686131e-05, | |
| "loss": 0.6017, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6528157463094586, | |
| "grad_norm": 0.38338416814804077, | |
| "learning_rate": 3.844282238442822e-05, | |
| "loss": 0.5642, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.655002733734281, | |
| "grad_norm": 0.4089908003807068, | |
| "learning_rate": 3.819951338199513e-05, | |
| "loss": 0.7222, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.6571897211591033, | |
| "grad_norm": 0.44963401556015015, | |
| "learning_rate": 3.795620437956204e-05, | |
| "loss": 0.613, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.6593767085839257, | |
| "grad_norm": 0.2840285003185272, | |
| "learning_rate": 3.7712895377128954e-05, | |
| "loss": 0.6435, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.661563696008748, | |
| "grad_norm": 0.39185985922813416, | |
| "learning_rate": 3.746958637469586e-05, | |
| "loss": 0.7633, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6637506834335705, | |
| "grad_norm": 0.3823552131652832, | |
| "learning_rate": 3.722627737226277e-05, | |
| "loss": 0.6632, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.6659376708583924, | |
| "grad_norm": 0.4937818646430969, | |
| "learning_rate": 3.698296836982968e-05, | |
| "loss": 0.8944, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.6681246582832148, | |
| "grad_norm": 0.38062620162963867, | |
| "learning_rate": 3.673965936739659e-05, | |
| "loss": 0.7507, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.670311645708037, | |
| "grad_norm": 0.34089863300323486, | |
| "learning_rate": 3.64963503649635e-05, | |
| "loss": 0.6276, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.6724986331328595, | |
| "grad_norm": 0.45665138959884644, | |
| "learning_rate": 3.625304136253041e-05, | |
| "loss": 0.6801, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.674685620557682, | |
| "grad_norm": 0.5102551579475403, | |
| "learning_rate": 3.6009732360097324e-05, | |
| "loss": 0.5385, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.6768726079825043, | |
| "grad_norm": 0.4079155921936035, | |
| "learning_rate": 3.576642335766423e-05, | |
| "loss": 0.7165, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.6790595954073266, | |
| "grad_norm": 0.3809445798397064, | |
| "learning_rate": 3.552311435523114e-05, | |
| "loss": 0.6695, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.6812465828321486, | |
| "grad_norm": 0.44514816999435425, | |
| "learning_rate": 3.527980535279805e-05, | |
| "loss": 0.732, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.683433570256971, | |
| "grad_norm": 0.40891462564468384, | |
| "learning_rate": 3.503649635036496e-05, | |
| "loss": 0.9004, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.6856205576817933, | |
| "grad_norm": 0.44487065076828003, | |
| "learning_rate": 3.479318734793187e-05, | |
| "loss": 0.4452, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.6878075451066157, | |
| "grad_norm": 0.27980828285217285, | |
| "learning_rate": 3.4549878345498775e-05, | |
| "loss": 0.6259, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.689994532531438, | |
| "grad_norm": 0.37272408604621887, | |
| "learning_rate": 3.430656934306569e-05, | |
| "loss": 0.7493, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.69218151995626, | |
| "grad_norm": 0.4146464169025421, | |
| "learning_rate": 3.40632603406326e-05, | |
| "loss": 0.5103, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.694368507381083, | |
| "grad_norm": 0.350233793258667, | |
| "learning_rate": 3.381995133819951e-05, | |
| "loss": 0.6766, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6965554948059047, | |
| "grad_norm": 0.49093326926231384, | |
| "learning_rate": 3.357664233576642e-05, | |
| "loss": 0.6934, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.698742482230727, | |
| "grad_norm": 0.4598555266857147, | |
| "learning_rate": 3.333333333333333e-05, | |
| "loss": 0.6618, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.7009294696555495, | |
| "grad_norm": 0.4397393465042114, | |
| "learning_rate": 3.309002433090024e-05, | |
| "loss": 0.5864, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.703116457080372, | |
| "grad_norm": 0.43458834290504456, | |
| "learning_rate": 3.284671532846715e-05, | |
| "loss": 0.6955, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.705303444505194, | |
| "grad_norm": 0.3657298684120178, | |
| "learning_rate": 3.260340632603406e-05, | |
| "loss": 0.651, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.707490431930016, | |
| "grad_norm": 0.4210680425167084, | |
| "learning_rate": 3.236009732360097e-05, | |
| "loss": 0.5718, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.709677419354839, | |
| "grad_norm": 0.3858646750450134, | |
| "learning_rate": 3.211678832116788e-05, | |
| "loss": 0.6649, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.711864406779661, | |
| "grad_norm": 0.4130675494670868, | |
| "learning_rate": 3.187347931873479e-05, | |
| "loss": 0.6539, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.7140513942044833, | |
| "grad_norm": 0.246662899851799, | |
| "learning_rate": 3.1630170316301705e-05, | |
| "loss": 0.5551, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.7162383816293056, | |
| "grad_norm": 0.3459307551383972, | |
| "learning_rate": 3.138686131386861e-05, | |
| "loss": 0.4788, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.718425369054128, | |
| "grad_norm": 0.4324615001678467, | |
| "learning_rate": 3.114355231143552e-05, | |
| "loss": 0.7828, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.7206123564789504, | |
| "grad_norm": 0.5233476758003235, | |
| "learning_rate": 3.090024330900243e-05, | |
| "loss": 0.4262, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.7227993439037723, | |
| "grad_norm": 0.35397472977638245, | |
| "learning_rate": 3.065693430656934e-05, | |
| "loss": 0.688, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.724986331328595, | |
| "grad_norm": 0.37005069851875305, | |
| "learning_rate": 3.041362530413625e-05, | |
| "loss": 0.6592, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.727173318753417, | |
| "grad_norm": 0.4533984661102295, | |
| "learning_rate": 3.017031630170316e-05, | |
| "loss": 0.6367, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.7293603061782394, | |
| "grad_norm": 0.32724103331565857, | |
| "learning_rate": 2.992700729927007e-05, | |
| "loss": 0.5874, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.731547293603062, | |
| "grad_norm": 0.3568969666957855, | |
| "learning_rate": 2.968369829683698e-05, | |
| "loss": 0.8173, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.733734281027884, | |
| "grad_norm": 0.3268612325191498, | |
| "learning_rate": 2.9440389294403892e-05, | |
| "loss": 0.4827, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.7359212684527066, | |
| "grad_norm": 0.30471158027648926, | |
| "learning_rate": 2.91970802919708e-05, | |
| "loss": 0.7108, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.7381082558775285, | |
| "grad_norm": 0.3290720582008362, | |
| "learning_rate": 2.895377128953771e-05, | |
| "loss": 0.639, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.740295243302351, | |
| "grad_norm": 0.35110557079315186, | |
| "learning_rate": 2.871046228710462e-05, | |
| "loss": 0.5367, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.7424822307271732, | |
| "grad_norm": 0.26838091015815735, | |
| "learning_rate": 2.846715328467153e-05, | |
| "loss": 0.801, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7446692181519956, | |
| "grad_norm": 0.3596297800540924, | |
| "learning_rate": 2.822384428223844e-05, | |
| "loss": 0.6018, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.746856205576818, | |
| "grad_norm": 0.4146590530872345, | |
| "learning_rate": 2.7980535279805354e-05, | |
| "loss": 0.7548, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7490431930016404, | |
| "grad_norm": 0.5210931897163391, | |
| "learning_rate": 2.773722627737226e-05, | |
| "loss": 0.6514, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.7512301804264627, | |
| "grad_norm": 0.37990838289260864, | |
| "learning_rate": 2.749391727493917e-05, | |
| "loss": 0.6275, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7534171678512847, | |
| "grad_norm": 0.41597574949264526, | |
| "learning_rate": 2.725060827250608e-05, | |
| "loss": 0.7675, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.755604155276107, | |
| "grad_norm": 0.4515291452407837, | |
| "learning_rate": 2.700729927007299e-05, | |
| "loss": 0.6756, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7577911427009294, | |
| "grad_norm": 0.418295294046402, | |
| "learning_rate": 2.6763990267639903e-05, | |
| "loss": 0.6417, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.7599781301257518, | |
| "grad_norm": 0.34704264998435974, | |
| "learning_rate": 2.652068126520681e-05, | |
| "loss": 0.8996, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.762165117550574, | |
| "grad_norm": 0.3458947241306305, | |
| "learning_rate": 2.627737226277372e-05, | |
| "loss": 0.8436, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.7643521049753965, | |
| "grad_norm": 0.39911675453186035, | |
| "learning_rate": 2.603406326034063e-05, | |
| "loss": 0.5799, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.766539092400219, | |
| "grad_norm": 0.2880173623561859, | |
| "learning_rate": 2.579075425790754e-05, | |
| "loss": 0.5253, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.768726079825041, | |
| "grad_norm": 0.35598114132881165, | |
| "learning_rate": 2.554744525547445e-05, | |
| "loss": 0.6593, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.770913067249863, | |
| "grad_norm": 0.34010377526283264, | |
| "learning_rate": 2.530413625304136e-05, | |
| "loss": 0.6076, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.7731000546746856, | |
| "grad_norm": 0.37857237458229065, | |
| "learning_rate": 2.5060827250608273e-05, | |
| "loss": 0.7757, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.775287042099508, | |
| "grad_norm": 0.6945297718048096, | |
| "learning_rate": 2.481751824817518e-05, | |
| "loss": 0.7243, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.7774740295243303, | |
| "grad_norm": 0.3066571354866028, | |
| "learning_rate": 2.457420924574209e-05, | |
| "loss": 0.6558, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.7796610169491527, | |
| "grad_norm": 0.42167848348617554, | |
| "learning_rate": 2.4330900243309e-05, | |
| "loss": 0.6929, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.781848004373975, | |
| "grad_norm": 0.4334861934185028, | |
| "learning_rate": 2.408759124087591e-05, | |
| "loss": 0.6516, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.784034991798797, | |
| "grad_norm": 0.39597228169441223, | |
| "learning_rate": 2.3844282238442823e-05, | |
| "loss": 0.688, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.7862219792236194, | |
| "grad_norm": 0.36653244495391846, | |
| "learning_rate": 2.3600973236009728e-05, | |
| "loss": 0.7899, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.7884089666484417, | |
| "grad_norm": 0.4496842622756958, | |
| "learning_rate": 2.335766423357664e-05, | |
| "loss": 0.7682, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.790595954073264, | |
| "grad_norm": 0.5105994343757629, | |
| "learning_rate": 2.311435523114355e-05, | |
| "loss": 0.6332, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.7927829414980865, | |
| "grad_norm": 0.30159294605255127, | |
| "learning_rate": 2.287104622871046e-05, | |
| "loss": 0.6215, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.794969928922909, | |
| "grad_norm": 0.44565349817276, | |
| "learning_rate": 2.2627737226277372e-05, | |
| "loss": 0.8171, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.7971569163477312, | |
| "grad_norm": 0.48561230301856995, | |
| "learning_rate": 2.238442822384428e-05, | |
| "loss": 0.7251, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.799343903772553, | |
| "grad_norm": 0.4640182554721832, | |
| "learning_rate": 2.214111922141119e-05, | |
| "loss": 0.8137, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8015308911973755, | |
| "grad_norm": 0.34384575486183167, | |
| "learning_rate": 2.1897810218978098e-05, | |
| "loss": 0.7161, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.803717878622198, | |
| "grad_norm": 0.3967885971069336, | |
| "learning_rate": 2.165450121654501e-05, | |
| "loss": 0.6331, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.8059048660470203, | |
| "grad_norm": 0.4139404892921448, | |
| "learning_rate": 2.1411192214111922e-05, | |
| "loss": 0.7716, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.8080918534718426, | |
| "grad_norm": 0.5906177163124084, | |
| "learning_rate": 2.116788321167883e-05, | |
| "loss": 0.8308, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8102788408966646, | |
| "grad_norm": 0.3923112452030182, | |
| "learning_rate": 2.0924574209245742e-05, | |
| "loss": 0.5808, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.8124658283214874, | |
| "grad_norm": 0.376613050699234, | |
| "learning_rate": 2.0681265206812648e-05, | |
| "loss": 0.4945, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8146528157463093, | |
| "grad_norm": 0.39711064100265503, | |
| "learning_rate": 2.043795620437956e-05, | |
| "loss": 0.9447, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.8168398031711317, | |
| "grad_norm": 0.49172040820121765, | |
| "learning_rate": 2.019464720194647e-05, | |
| "loss": 0.5981, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.819026790595954, | |
| "grad_norm": 0.3777097165584564, | |
| "learning_rate": 1.995133819951338e-05, | |
| "loss": 0.5527, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.8212137780207764, | |
| "grad_norm": 0.3420855700969696, | |
| "learning_rate": 1.9708029197080292e-05, | |
| "loss": 0.591, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.823400765445599, | |
| "grad_norm": 0.3033166825771332, | |
| "learning_rate": 1.9464720194647197e-05, | |
| "loss": 0.4902, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.8255877528704207, | |
| "grad_norm": 0.3743399679660797, | |
| "learning_rate": 1.922141119221411e-05, | |
| "loss": 0.72, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.8277747402952436, | |
| "grad_norm": 0.43312016129493713, | |
| "learning_rate": 1.897810218978102e-05, | |
| "loss": 0.5847, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.8299617277200655, | |
| "grad_norm": 0.4334290623664856, | |
| "learning_rate": 1.873479318734793e-05, | |
| "loss": 0.737, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.832148715144888, | |
| "grad_norm": 0.3262549340724945, | |
| "learning_rate": 1.849148418491484e-05, | |
| "loss": 0.6188, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.8343357025697102, | |
| "grad_norm": 0.3808232247829437, | |
| "learning_rate": 1.824817518248175e-05, | |
| "loss": 0.8153, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8365226899945326, | |
| "grad_norm": 0.35475462675094604, | |
| "learning_rate": 1.8004866180048662e-05, | |
| "loss": 0.5671, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.838709677419355, | |
| "grad_norm": 0.38812217116355896, | |
| "learning_rate": 1.776155717761557e-05, | |
| "loss": 0.6323, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.840896664844177, | |
| "grad_norm": 0.3561973571777344, | |
| "learning_rate": 1.751824817518248e-05, | |
| "loss": 0.6919, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.8430836522689997, | |
| "grad_norm": 0.31703197956085205, | |
| "learning_rate": 1.7274939172749388e-05, | |
| "loss": 0.6856, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8452706396938217, | |
| "grad_norm": 0.41529974341392517, | |
| "learning_rate": 1.70316301703163e-05, | |
| "loss": 0.7612, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.847457627118644, | |
| "grad_norm": 0.42857563495635986, | |
| "learning_rate": 1.678832116788321e-05, | |
| "loss": 0.8243, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8496446145434664, | |
| "grad_norm": 0.4402436912059784, | |
| "learning_rate": 1.654501216545012e-05, | |
| "loss": 0.6149, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.8518316019682888, | |
| "grad_norm": 0.5396206378936768, | |
| "learning_rate": 1.630170316301703e-05, | |
| "loss": 0.623, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.854018589393111, | |
| "grad_norm": 0.3337330222129822, | |
| "learning_rate": 1.605839416058394e-05, | |
| "loss": 0.6207, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.856205576817933, | |
| "grad_norm": 0.47766539454460144, | |
| "learning_rate": 1.5815085158150852e-05, | |
| "loss": 0.7012, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8583925642427555, | |
| "grad_norm": 0.3661979138851166, | |
| "learning_rate": 1.557177615571776e-05, | |
| "loss": 0.6951, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.860579551667578, | |
| "grad_norm": 0.32364702224731445, | |
| "learning_rate": 1.532846715328467e-05, | |
| "loss": 0.5451, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.8627665390924, | |
| "grad_norm": 0.4927031695842743, | |
| "learning_rate": 1.508515815085158e-05, | |
| "loss": 0.6483, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.8649535265172226, | |
| "grad_norm": 0.3563484847545624, | |
| "learning_rate": 1.484184914841849e-05, | |
| "loss": 0.6751, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.867140513942045, | |
| "grad_norm": 0.3271696865558624, | |
| "learning_rate": 1.45985401459854e-05, | |
| "loss": 0.5288, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.8693275013668673, | |
| "grad_norm": 0.3783499300479889, | |
| "learning_rate": 1.435523114355231e-05, | |
| "loss": 0.7292, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.8715144887916892, | |
| "grad_norm": 0.39892178773880005, | |
| "learning_rate": 1.411192214111922e-05, | |
| "loss": 0.7258, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.8737014762165116, | |
| "grad_norm": 0.27586114406585693, | |
| "learning_rate": 1.386861313868613e-05, | |
| "loss": 0.4122, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.875888463641334, | |
| "grad_norm": 0.4590570330619812, | |
| "learning_rate": 1.362530413625304e-05, | |
| "loss": 0.7205, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.8780754510661564, | |
| "grad_norm": 0.34512102603912354, | |
| "learning_rate": 1.3381995133819952e-05, | |
| "loss": 0.7402, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.8802624384909787, | |
| "grad_norm": 0.4092288613319397, | |
| "learning_rate": 1.313868613138686e-05, | |
| "loss": 0.7668, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.882449425915801, | |
| "grad_norm": 0.4686785638332367, | |
| "learning_rate": 1.289537712895377e-05, | |
| "loss": 0.5874, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8846364133406235, | |
| "grad_norm": 0.341987669467926, | |
| "learning_rate": 1.265206812652068e-05, | |
| "loss": 0.7645, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.8868234007654454, | |
| "grad_norm": 0.6410381197929382, | |
| "learning_rate": 1.240875912408759e-05, | |
| "loss": 0.7446, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.889010388190268, | |
| "grad_norm": 0.4242047965526581, | |
| "learning_rate": 1.21654501216545e-05, | |
| "loss": 0.5989, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.89119737561509, | |
| "grad_norm": 0.3659310042858124, | |
| "learning_rate": 1.1922141119221411e-05, | |
| "loss": 0.6532, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.8933843630399125, | |
| "grad_norm": 0.40684065222740173, | |
| "learning_rate": 1.167883211678832e-05, | |
| "loss": 0.657, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.895571350464735, | |
| "grad_norm": 0.47506752610206604, | |
| "learning_rate": 1.143552311435523e-05, | |
| "loss": 0.4426, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.8977583378895573, | |
| "grad_norm": 0.3505801260471344, | |
| "learning_rate": 1.119221411192214e-05, | |
| "loss": 0.724, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.8999453253143797, | |
| "grad_norm": 0.4182322025299072, | |
| "learning_rate": 1.0948905109489049e-05, | |
| "loss": 0.6425, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9021323127392016, | |
| "grad_norm": 0.5423049330711365, | |
| "learning_rate": 1.0705596107055961e-05, | |
| "loss": 0.6135, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.904319300164024, | |
| "grad_norm": 0.47435280680656433, | |
| "learning_rate": 1.0462287104622871e-05, | |
| "loss": 0.6161, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9065062875888463, | |
| "grad_norm": 0.30286717414855957, | |
| "learning_rate": 1.021897810218978e-05, | |
| "loss": 0.5494, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.9086932750136687, | |
| "grad_norm": 0.34891781210899353, | |
| "learning_rate": 9.97566909975669e-06, | |
| "loss": 0.8073, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.910880262438491, | |
| "grad_norm": 0.3608086109161377, | |
| "learning_rate": 9.732360097323599e-06, | |
| "loss": 0.6207, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.9130672498633134, | |
| "grad_norm": 0.2914386987686157, | |
| "learning_rate": 9.48905109489051e-06, | |
| "loss": 0.6153, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.915254237288136, | |
| "grad_norm": 0.4532075822353363, | |
| "learning_rate": 9.24574209245742e-06, | |
| "loss": 0.8057, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.9174412247129577, | |
| "grad_norm": 0.47955191135406494, | |
| "learning_rate": 9.002433090024331e-06, | |
| "loss": 0.7378, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.91962821213778, | |
| "grad_norm": 0.3728046715259552, | |
| "learning_rate": 8.75912408759124e-06, | |
| "loss": 0.5957, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9218151995626025, | |
| "grad_norm": 0.39728742837905884, | |
| "learning_rate": 8.51581508515815e-06, | |
| "loss": 0.7254, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.924002186987425, | |
| "grad_norm": 0.375864714384079, | |
| "learning_rate": 8.27250608272506e-06, | |
| "loss": 0.7013, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.9261891744122472, | |
| "grad_norm": 0.3625723719596863, | |
| "learning_rate": 8.02919708029197e-06, | |
| "loss": 0.866, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.928376161837069, | |
| "grad_norm": 0.46779105067253113, | |
| "learning_rate": 7.78588807785888e-06, | |
| "loss": 0.7114, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.930563149261892, | |
| "grad_norm": 0.3270869851112366, | |
| "learning_rate": 7.54257907542579e-06, | |
| "loss": 0.6085, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.932750136686714, | |
| "grad_norm": 0.3992483913898468, | |
| "learning_rate": 7.2992700729927e-06, | |
| "loss": 0.6498, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.9349371241115363, | |
| "grad_norm": 0.41171202063560486, | |
| "learning_rate": 7.05596107055961e-06, | |
| "loss": 0.7382, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9371241115363587, | |
| "grad_norm": 0.7751166224479675, | |
| "learning_rate": 6.81265206812652e-06, | |
| "loss": 0.8629, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.939311098961181, | |
| "grad_norm": 0.558593213558197, | |
| "learning_rate": 6.56934306569343e-06, | |
| "loss": 0.9791, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.9414980863860034, | |
| "grad_norm": 0.40517720580101013, | |
| "learning_rate": 6.32603406326034e-06, | |
| "loss": 0.6608, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.9436850738108253, | |
| "grad_norm": 0.44248199462890625, | |
| "learning_rate": 6.08272506082725e-06, | |
| "loss": 0.5619, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.945872061235648, | |
| "grad_norm": 0.3731604814529419, | |
| "learning_rate": 5.83941605839416e-06, | |
| "loss": 0.6585, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.94805904866047, | |
| "grad_norm": 0.524138867855072, | |
| "learning_rate": 5.59610705596107e-06, | |
| "loss": 0.5278, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.9502460360852925, | |
| "grad_norm": 0.31725287437438965, | |
| "learning_rate": 5.3527980535279805e-06, | |
| "loss": 0.7118, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.952433023510115, | |
| "grad_norm": 0.3865452706813812, | |
| "learning_rate": 5.10948905109489e-06, | |
| "loss": 0.6209, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.954620010934937, | |
| "grad_norm": 0.36308881640434265, | |
| "learning_rate": 4.866180048661799e-06, | |
| "loss": 0.5582, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.9568069983597596, | |
| "grad_norm": 0.4439944922924042, | |
| "learning_rate": 4.62287104622871e-06, | |
| "loss": 0.587, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.9589939857845815, | |
| "grad_norm": 0.44962093234062195, | |
| "learning_rate": 4.37956204379562e-06, | |
| "loss": 0.7883, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.9611809732094043, | |
| "grad_norm": 0.6172670722007751, | |
| "learning_rate": 4.13625304136253e-06, | |
| "loss": 0.7554, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.9633679606342263, | |
| "grad_norm": 0.4022207260131836, | |
| "learning_rate": 3.89294403892944e-06, | |
| "loss": 0.7109, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.9655549480590486, | |
| "grad_norm": 0.4858662486076355, | |
| "learning_rate": 3.64963503649635e-06, | |
| "loss": 0.7308, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.967741935483871, | |
| "grad_norm": 0.4918728768825531, | |
| "learning_rate": 3.40632603406326e-06, | |
| "loss": 0.7418, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.9699289229086934, | |
| "grad_norm": 0.5118703842163086, | |
| "learning_rate": 3.16301703163017e-06, | |
| "loss": 0.6361, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.9721159103335157, | |
| "grad_norm": 0.4407196044921875, | |
| "learning_rate": 2.91970802919708e-06, | |
| "loss": 0.6971, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.9743028977583377, | |
| "grad_norm": 0.33856332302093506, | |
| "learning_rate": 2.6763990267639902e-06, | |
| "loss": 0.5766, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.97648988518316, | |
| "grad_norm": 0.45704513788223267, | |
| "learning_rate": 2.4330900243308996e-06, | |
| "loss": 0.6431, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.9786768726079824, | |
| "grad_norm": 0.3669881224632263, | |
| "learning_rate": 2.18978102189781e-06, | |
| "loss": 0.5637, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.980863860032805, | |
| "grad_norm": 0.33307334780693054, | |
| "learning_rate": 1.94647201946472e-06, | |
| "loss": 0.6372, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.983050847457627, | |
| "grad_norm": 0.3178769052028656, | |
| "learning_rate": 1.70316301703163e-06, | |
| "loss": 0.8674, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9852378348824495, | |
| "grad_norm": 0.4288700222969055, | |
| "learning_rate": 1.45985401459854e-06, | |
| "loss": 0.7514, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.987424822307272, | |
| "grad_norm": 0.3283116817474365, | |
| "learning_rate": 1.2165450121654498e-06, | |
| "loss": 0.5816, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.989611809732094, | |
| "grad_norm": 0.3714343011379242, | |
| "learning_rate": 9.7323600973236e-07, | |
| "loss": 0.7904, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.991798797156916, | |
| "grad_norm": 0.7103442549705505, | |
| "learning_rate": 7.2992700729927e-07, | |
| "loss": 0.7292, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.9939857845817386, | |
| "grad_norm": 0.34076127409935, | |
| "learning_rate": 4.8661800486618e-07, | |
| "loss": 0.6302, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.996172772006561, | |
| "grad_norm": 0.424398809671402, | |
| "learning_rate": 2.4330900243309e-07, | |
| "loss": 0.781, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.9983597594313833, | |
| "grad_norm": 0.39384347200393677, | |
| "learning_rate": 0.0, | |
| "loss": 0.5505, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.9983597594313833, | |
| "step": 1371, | |
| "total_flos": 4.3228174920083046e+17, | |
| "train_loss": 0.7109334499926396, | |
| "train_runtime": 1998.4313, | |
| "train_samples_per_second": 10.983, | |
| "train_steps_per_second": 0.686 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1371, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.3228174920083046e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |