| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3422720624085813, | |
| "eval_steps": 500, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005850804485616773, | |
| "grad_norm": 4.189145565032959, | |
| "learning_rate": 1.9607843137254904e-07, | |
| "loss": 0.6022, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.011701608971233545, | |
| "grad_norm": 4.088385105133057, | |
| "learning_rate": 3.921568627450981e-07, | |
| "loss": 0.6105, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.017552413456850317, | |
| "grad_norm": 4.105137348175049, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 0.6234, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02340321794246709, | |
| "grad_norm": 4.010756015777588, | |
| "learning_rate": 7.843137254901962e-07, | |
| "loss": 0.5629, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02925402242808386, | |
| "grad_norm": 4.201730728149414, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": 0.6236, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.035104826913700635, | |
| "grad_norm": 4.13097620010376, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 0.6058, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.040955631399317405, | |
| "grad_norm": 3.753781318664551, | |
| "learning_rate": 1.3725490196078434e-06, | |
| "loss": 0.5798, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04680643588493418, | |
| "grad_norm": 3.1203114986419678, | |
| "learning_rate": 1.5686274509803923e-06, | |
| "loss": 0.5575, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05265724037055095, | |
| "grad_norm": 3.1326870918273926, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 0.5794, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05850804485616772, | |
| "grad_norm": 3.01350736618042, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 0.5721, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0643588493417845, | |
| "grad_norm": 2.0586817264556885, | |
| "learning_rate": 2.1568627450980393e-06, | |
| "loss": 0.5389, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07020965382740127, | |
| "grad_norm": 2.056138753890991, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.5578, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07606045831301804, | |
| "grad_norm": 1.8458319902420044, | |
| "learning_rate": 2.549019607843137e-06, | |
| "loss": 0.5432, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.08191126279863481, | |
| "grad_norm": 1.3385547399520874, | |
| "learning_rate": 2.7450980392156867e-06, | |
| "loss": 0.5375, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08776206728425158, | |
| "grad_norm": 2.10184383392334, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.4834, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.09361287176986836, | |
| "grad_norm": 2.354717254638672, | |
| "learning_rate": 3.1372549019607846e-06, | |
| "loss": 0.5087, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09946367625548513, | |
| "grad_norm": 2.4186935424804688, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5408, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1053144807411019, | |
| "grad_norm": 2.02093243598938, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.4967, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.11116528522671867, | |
| "grad_norm": 1.9769740104675293, | |
| "learning_rate": 3.7254901960784316e-06, | |
| "loss": 0.5429, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.11701608971233544, | |
| "grad_norm": 1.4087600708007812, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.4855, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12286689419795221, | |
| "grad_norm": 1.4071195125579834, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.4956, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.128717698683569, | |
| "grad_norm": 1.4400174617767334, | |
| "learning_rate": 4.313725490196079e-06, | |
| "loss": 0.4966, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.13456850316918575, | |
| "grad_norm": 1.2176562547683716, | |
| "learning_rate": 4.509803921568628e-06, | |
| "loss": 0.4892, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.14041930765480254, | |
| "grad_norm": 1.0557763576507568, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.4664, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1462701121404193, | |
| "grad_norm": 1.0654219388961792, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": 0.4427, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.15212091662603608, | |
| "grad_norm": 0.8639155626296997, | |
| "learning_rate": 5.098039215686274e-06, | |
| "loss": 0.4676, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.15797172111165286, | |
| "grad_norm": 0.8091264963150024, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 0.4339, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.16382252559726962, | |
| "grad_norm": 0.7697594165802002, | |
| "learning_rate": 5.4901960784313735e-06, | |
| "loss": 0.4164, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1696733300828864, | |
| "grad_norm": 0.8522382378578186, | |
| "learning_rate": 5.686274509803922e-06, | |
| "loss": 0.4512, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.17552413456850316, | |
| "grad_norm": 0.7640376687049866, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.432, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.18137493905411994, | |
| "grad_norm": 0.6247867941856384, | |
| "learning_rate": 6.07843137254902e-06, | |
| "loss": 0.408, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.18722574353973673, | |
| "grad_norm": 0.6288900971412659, | |
| "learning_rate": 6.274509803921569e-06, | |
| "loss": 0.4611, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.19307654802535348, | |
| "grad_norm": 0.6182562708854675, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 0.4257, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.19892735251097027, | |
| "grad_norm": 0.6193389892578125, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.4063, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.20477815699658702, | |
| "grad_norm": 0.6892727017402649, | |
| "learning_rate": 6.862745098039216e-06, | |
| "loss": 0.3967, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2106289614822038, | |
| "grad_norm": 0.6725057363510132, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.4428, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.21647976596782056, | |
| "grad_norm": 0.5203535556793213, | |
| "learning_rate": 7.2549019607843145e-06, | |
| "loss": 0.4151, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.22233057045343735, | |
| "grad_norm": 0.45232418179512024, | |
| "learning_rate": 7.450980392156863e-06, | |
| "loss": 0.3666, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.22818137493905413, | |
| "grad_norm": 0.5872768759727478, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 0.4144, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2340321794246709, | |
| "grad_norm": 0.526172399520874, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.4346, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.23988298391028767, | |
| "grad_norm": 0.5474228858947754, | |
| "learning_rate": 8.03921568627451e-06, | |
| "loss": 0.3965, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.24573378839590443, | |
| "grad_norm": 0.46727877855300903, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 0.4417, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2515845928815212, | |
| "grad_norm": 0.40532198548316956, | |
| "learning_rate": 8.43137254901961e-06, | |
| "loss": 0.3851, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.257435397367138, | |
| "grad_norm": 0.4897397458553314, | |
| "learning_rate": 8.627450980392157e-06, | |
| "loss": 0.4013, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.26328620185275475, | |
| "grad_norm": 0.4565890431404114, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.3745, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2691370063383715, | |
| "grad_norm": 0.38417261838912964, | |
| "learning_rate": 9.019607843137256e-06, | |
| "loss": 0.3783, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2749878108239883, | |
| "grad_norm": 0.40912356972694397, | |
| "learning_rate": 9.215686274509804e-06, | |
| "loss": 0.3879, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2808386153096051, | |
| "grad_norm": 0.42792415618896484, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.3837, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.28668941979522183, | |
| "grad_norm": 0.4394405484199524, | |
| "learning_rate": 9.607843137254903e-06, | |
| "loss": 0.4004, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2925402242808386, | |
| "grad_norm": 0.4622238576412201, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 0.409, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2983910287664554, | |
| "grad_norm": 0.3894466757774353, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3766, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.30424183325207216, | |
| "grad_norm": 0.39314836263656616, | |
| "learning_rate": 9.999882884955554e-06, | |
| "loss": 0.3418, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3100926377376889, | |
| "grad_norm": 0.44764766097068787, | |
| "learning_rate": 9.999531545308584e-06, | |
| "loss": 0.3909, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3159434422233057, | |
| "grad_norm": 0.403144896030426, | |
| "learning_rate": 9.998945997517957e-06, | |
| "loss": 0.3716, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3217942467089225, | |
| "grad_norm": 0.4303280711174011, | |
| "learning_rate": 9.998126269014255e-06, | |
| "loss": 0.4026, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.32764505119453924, | |
| "grad_norm": 0.4083136022090912, | |
| "learning_rate": 9.997072398198492e-06, | |
| "loss": 0.3842, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.333495855680156, | |
| "grad_norm": 0.3750261664390564, | |
| "learning_rate": 9.99578443444032e-06, | |
| "loss": 0.3605, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3393466601657728, | |
| "grad_norm": 0.43343302607536316, | |
| "learning_rate": 9.994262438075713e-06, | |
| "loss": 0.4119, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.34519746465138956, | |
| "grad_norm": 0.3778004050254822, | |
| "learning_rate": 9.992506480404137e-06, | |
| "loss": 0.3616, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3510482691370063, | |
| "grad_norm": 0.36973798274993896, | |
| "learning_rate": 9.990516643685222e-06, | |
| "loss": 0.3793, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.35689907362262313, | |
| "grad_norm": 0.3836229145526886, | |
| "learning_rate": 9.988293021134888e-06, | |
| "loss": 0.3492, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3627498781082399, | |
| "grad_norm": 0.3700697720050812, | |
| "learning_rate": 9.985835716921e-06, | |
| "loss": 0.3583, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.36860068259385664, | |
| "grad_norm": 0.4023352861404419, | |
| "learning_rate": 9.983144846158472e-06, | |
| "loss": 0.3697, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.37445148707947346, | |
| "grad_norm": 0.38035494089126587, | |
| "learning_rate": 9.980220534903889e-06, | |
| "loss": 0.3772, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3803022915650902, | |
| "grad_norm": 0.3641819953918457, | |
| "learning_rate": 9.977062920149583e-06, | |
| "loss": 0.3562, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.38615309605070697, | |
| "grad_norm": 0.39018484950065613, | |
| "learning_rate": 9.973672149817232e-06, | |
| "loss": 0.3377, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3920039005363237, | |
| "grad_norm": 0.351622998714447, | |
| "learning_rate": 9.970048382750925e-06, | |
| "loss": 0.351, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.39785470502194054, | |
| "grad_norm": 0.40039461851119995, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.3775, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.4037055095075573, | |
| "grad_norm": 0.3892274796962738, | |
| "learning_rate": 9.96210254835968e-06, | |
| "loss": 0.4034, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.40955631399317405, | |
| "grad_norm": 0.4052744507789612, | |
| "learning_rate": 9.957780853265441e-06, | |
| "loss": 0.4079, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.41540711847879086, | |
| "grad_norm": 0.3877456486225128, | |
| "learning_rate": 9.953226905881208e-06, | |
| "loss": 0.3342, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.4212579229644076, | |
| "grad_norm": 0.4107078015804291, | |
| "learning_rate": 9.948440919541277e-06, | |
| "loss": 0.358, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4271087274500244, | |
| "grad_norm": 0.37597158551216125, | |
| "learning_rate": 9.943423118450051e-06, | |
| "loss": 0.3948, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.43295953193564113, | |
| "grad_norm": 0.4590906798839569, | |
| "learning_rate": 9.938173737671531e-06, | |
| "loss": 0.3847, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.43881033642125794, | |
| "grad_norm": 0.48799118399620056, | |
| "learning_rate": 9.932693023118299e-06, | |
| "loss": 0.3845, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4446611409068747, | |
| "grad_norm": 0.39222586154937744, | |
| "learning_rate": 9.926981231540007e-06, | |
| "loss": 0.3872, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.45051194539249145, | |
| "grad_norm": 0.4158020615577698, | |
| "learning_rate": 9.921038630511345e-06, | |
| "loss": 0.388, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.45636274987810826, | |
| "grad_norm": 0.40331101417541504, | |
| "learning_rate": 9.91486549841951e-06, | |
| "loss": 0.3705, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.462213554363725, | |
| "grad_norm": 0.4275971055030823, | |
| "learning_rate": 9.908462124451152e-06, | |
| "loss": 0.3849, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4680643588493418, | |
| "grad_norm": 0.3466413915157318, | |
| "learning_rate": 9.901828808578846e-06, | |
| "loss": 0.347, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.47391516333495853, | |
| "grad_norm": 0.44375771284103394, | |
| "learning_rate": 9.894965861547023e-06, | |
| "loss": 0.373, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.47976596782057535, | |
| "grad_norm": 0.38661712408065796, | |
| "learning_rate": 9.887873604857424e-06, | |
| "loss": 0.3702, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4856167723061921, | |
| "grad_norm": 0.41488274931907654, | |
| "learning_rate": 9.88055237075403e-06, | |
| "loss": 0.3574, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.49146757679180886, | |
| "grad_norm": 0.41137149930000305, | |
| "learning_rate": 9.873002502207502e-06, | |
| "loss": 0.3901, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.49731838127742567, | |
| "grad_norm": 0.39136987924575806, | |
| "learning_rate": 9.86522435289912e-06, | |
| "loss": 0.38, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5031691857630424, | |
| "grad_norm": 0.37086671590805054, | |
| "learning_rate": 9.857218287204204e-06, | |
| "loss": 0.3541, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5090199902486592, | |
| "grad_norm": 0.43105342984199524, | |
| "learning_rate": 9.848984680175049e-06, | |
| "loss": 0.4087, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.514870794734276, | |
| "grad_norm": 0.36811238527297974, | |
| "learning_rate": 9.840523917523354e-06, | |
| "loss": 0.3639, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5207215992198927, | |
| "grad_norm": 0.378967821598053, | |
| "learning_rate": 9.831836395602164e-06, | |
| "loss": 0.3251, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5265724037055095, | |
| "grad_norm": 0.36341214179992676, | |
| "learning_rate": 9.822922521387277e-06, | |
| "loss": 0.3705, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5324232081911263, | |
| "grad_norm": 0.37682002782821655, | |
| "learning_rate": 9.813782712458206e-06, | |
| "loss": 0.3513, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.538274012676743, | |
| "grad_norm": 0.4142582416534424, | |
| "learning_rate": 9.804417396978605e-06, | |
| "loss": 0.3716, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5441248171623598, | |
| "grad_norm": 0.4432157278060913, | |
| "learning_rate": 9.794827013676206e-06, | |
| "loss": 0.4126, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5499756216479766, | |
| "grad_norm": 0.47457224130630493, | |
| "learning_rate": 9.78501201182228e-06, | |
| "loss": 0.3941, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5558264261335933, | |
| "grad_norm": 0.35374128818511963, | |
| "learning_rate": 9.774972851210572e-06, | |
| "loss": 0.3893, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5616772306192102, | |
| "grad_norm": 0.37110310792922974, | |
| "learning_rate": 9.764710002135784e-06, | |
| "loss": 0.3453, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.567528035104827, | |
| "grad_norm": 0.4286816716194153, | |
| "learning_rate": 9.754223945371524e-06, | |
| "loss": 0.3674, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5733788395904437, | |
| "grad_norm": 0.3735758662223816, | |
| "learning_rate": 9.743515172147793e-06, | |
| "loss": 0.3572, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5792296440760605, | |
| "grad_norm": 0.3784080445766449, | |
| "learning_rate": 9.732584184127973e-06, | |
| "loss": 0.3864, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5850804485616772, | |
| "grad_norm": 0.40882179141044617, | |
| "learning_rate": 9.721431493385322e-06, | |
| "loss": 0.3458, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.590931253047294, | |
| "grad_norm": 0.3924429416656494, | |
| "learning_rate": 9.710057622378992e-06, | |
| "loss": 0.3497, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5967820575329108, | |
| "grad_norm": 0.41799789667129517, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.3915, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.6026328620185275, | |
| "grad_norm": 0.4201458990573883, | |
| "learning_rate": 9.686648481193994e-06, | |
| "loss": 0.3797, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.6084836665041443, | |
| "grad_norm": 0.3876160979270935, | |
| "learning_rate": 9.674614307640368e-06, | |
| "loss": 0.3667, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6143344709897611, | |
| "grad_norm": 0.39733994007110596, | |
| "learning_rate": 9.66236114702178e-06, | |
| "loss": 0.3746, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6201852754753778, | |
| "grad_norm": 0.4422380030155182, | |
| "learning_rate": 9.649889573350006e-06, | |
| "loss": 0.3657, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.6260360799609946, | |
| "grad_norm": 0.34534451365470886, | |
| "learning_rate": 9.637200170868607e-06, | |
| "loss": 0.3173, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6318868844466115, | |
| "grad_norm": 0.49448907375335693, | |
| "learning_rate": 9.62429353402556e-06, | |
| "loss": 0.3528, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6377376889322282, | |
| "grad_norm": 0.4157074987888336, | |
| "learning_rate": 9.611170267445401e-06, | |
| "loss": 0.3647, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.643588493417845, | |
| "grad_norm": 0.3649308383464813, | |
| "learning_rate": 9.597830985900913e-06, | |
| "loss": 0.3592, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6494392979034618, | |
| "grad_norm": 0.38802069425582886, | |
| "learning_rate": 9.584276314284316e-06, | |
| "loss": 0.3749, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6552901023890785, | |
| "grad_norm": 0.41905415058135986, | |
| "learning_rate": 9.570506887577994e-06, | |
| "loss": 0.3761, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6611409068746953, | |
| "grad_norm": 0.34973040223121643, | |
| "learning_rate": 9.556523350824759e-06, | |
| "loss": 0.3377, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.666991711360312, | |
| "grad_norm": 0.42152735590934753, | |
| "learning_rate": 9.542326359097619e-06, | |
| "loss": 0.3758, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6728425158459288, | |
| "grad_norm": 0.34654316306114197, | |
| "learning_rate": 9.527916577469104e-06, | |
| "loss": 0.3612, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6786933203315456, | |
| "grad_norm": 0.3440297842025757, | |
| "learning_rate": 9.5132946809801e-06, | |
| "loss": 0.37, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6845441248171623, | |
| "grad_norm": 0.36565279960632324, | |
| "learning_rate": 9.498461354608228e-06, | |
| "loss": 0.352, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6903949293027791, | |
| "grad_norm": 0.3970431983470917, | |
| "learning_rate": 9.483417293235759e-06, | |
| "loss": 0.3694, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6962457337883959, | |
| "grad_norm": 0.3433384895324707, | |
| "learning_rate": 9.468163201617063e-06, | |
| "loss": 0.3657, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7020965382740126, | |
| "grad_norm": 0.39245930314064026, | |
| "learning_rate": 9.452699794345583e-06, | |
| "loss": 0.362, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7079473427596294, | |
| "grad_norm": 0.38453614711761475, | |
| "learning_rate": 9.437027795820373e-06, | |
| "loss": 0.3675, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7137981472452463, | |
| "grad_norm": 0.369517058134079, | |
| "learning_rate": 9.421147940212152e-06, | |
| "loss": 0.3634, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.719648951730863, | |
| "grad_norm": 0.38849949836730957, | |
| "learning_rate": 9.405060971428924e-06, | |
| "loss": 0.3387, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7254997562164798, | |
| "grad_norm": 0.4063083231449127, | |
| "learning_rate": 9.388767643081109e-06, | |
| "loss": 0.3719, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.7313505607020966, | |
| "grad_norm": 0.40234676003456116, | |
| "learning_rate": 9.372268718446259e-06, | |
| "loss": 0.3939, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7372013651877133, | |
| "grad_norm": 0.3845783770084381, | |
| "learning_rate": 9.355564970433288e-06, | |
| "loss": 0.3699, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7430521696733301, | |
| "grad_norm": 0.3887750506401062, | |
| "learning_rate": 9.338657181546277e-06, | |
| "loss": 0.3686, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7489029741589469, | |
| "grad_norm": 0.3700850307941437, | |
| "learning_rate": 9.321546143847802e-06, | |
| "loss": 0.3431, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7547537786445636, | |
| "grad_norm": 0.44235607981681824, | |
| "learning_rate": 9.30423265892184e-06, | |
| "loss": 0.3836, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7606045831301804, | |
| "grad_norm": 0.39945074915885925, | |
| "learning_rate": 9.286717537836211e-06, | |
| "loss": 0.3706, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7664553876157971, | |
| "grad_norm": 0.42615601420402527, | |
| "learning_rate": 9.269001601104593e-06, | |
| "loss": 0.369, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7723061921014139, | |
| "grad_norm": 0.4713898003101349, | |
| "learning_rate": 9.251085678648072e-06, | |
| "loss": 0.3818, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7781569965870307, | |
| "grad_norm": 0.3744489550590515, | |
| "learning_rate": 9.232970609756267e-06, | |
| "loss": 0.3542, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7840078010726474, | |
| "grad_norm": 0.3802720308303833, | |
| "learning_rate": 9.214657243048021e-06, | |
| "loss": 0.3346, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7898586055582643, | |
| "grad_norm": 0.45320552587509155, | |
| "learning_rate": 9.196146436431635e-06, | |
| "loss": 0.3766, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7957094100438811, | |
| "grad_norm": 0.3729214370250702, | |
| "learning_rate": 9.177439057064684e-06, | |
| "loss": 0.3694, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.8015602145294978, | |
| "grad_norm": 0.3678078055381775, | |
| "learning_rate": 9.158535981313395e-06, | |
| "loss": 0.3515, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.8074110190151146, | |
| "grad_norm": 0.4144746959209442, | |
| "learning_rate": 9.13943809471159e-06, | |
| "loss": 0.3756, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.8132618235007314, | |
| "grad_norm": 0.3548150658607483, | |
| "learning_rate": 9.120146291919206e-06, | |
| "loss": 0.3494, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.8191126279863481, | |
| "grad_norm": 0.3966399133205414, | |
| "learning_rate": 9.100661476680379e-06, | |
| "loss": 0.3427, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8249634324719649, | |
| "grad_norm": 0.4523519277572632, | |
| "learning_rate": 9.08098456178111e-06, | |
| "loss": 0.3641, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.8308142369575817, | |
| "grad_norm": 0.45737963914871216, | |
| "learning_rate": 9.061116469006504e-06, | |
| "loss": 0.3643, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.8366650414431984, | |
| "grad_norm": 0.34355804324150085, | |
| "learning_rate": 9.041058129097586e-06, | |
| "loss": 0.3227, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.8425158459288152, | |
| "grad_norm": 0.4239197373390198, | |
| "learning_rate": 9.020810481707709e-06, | |
| "loss": 0.3604, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8483666504144319, | |
| "grad_norm": 0.4363431930541992, | |
| "learning_rate": 9.00037447535852e-06, | |
| "loss": 0.3785, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8542174549000487, | |
| "grad_norm": 0.383635550737381, | |
| "learning_rate": 8.979751067395534e-06, | |
| "loss": 0.355, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8600682593856656, | |
| "grad_norm": 0.3972126543521881, | |
| "learning_rate": 8.958941223943292e-06, | |
| "loss": 0.394, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8659190638712823, | |
| "grad_norm": 0.3762996196746826, | |
| "learning_rate": 8.937945919860086e-06, | |
| "loss": 0.3779, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8717698683568991, | |
| "grad_norm": 0.40220147371292114, | |
| "learning_rate": 8.916766138692303e-06, | |
| "loss": 0.3725, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8776206728425159, | |
| "grad_norm": 0.35849395394325256, | |
| "learning_rate": 8.895402872628352e-06, | |
| "loss": 0.3533, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8834714773281326, | |
| "grad_norm": 0.3301231861114502, | |
| "learning_rate": 8.873857122452174e-06, | |
| "loss": 0.3156, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8893222818137494, | |
| "grad_norm": 0.39462047815322876, | |
| "learning_rate": 8.852129897496367e-06, | |
| "loss": 0.3538, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8951730862993662, | |
| "grad_norm": 0.3844425082206726, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.3913, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.9010238907849829, | |
| "grad_norm": 0.37792298197746277, | |
| "learning_rate": 8.808135103035407e-06, | |
| "loss": 0.3495, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.9068746952705997, | |
| "grad_norm": 0.39290040731430054, | |
| "learning_rate": 8.785869594511182e-06, | |
| "loss": 0.3784, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9127254997562165, | |
| "grad_norm": 0.3619037866592407, | |
| "learning_rate": 8.763426733072624e-06, | |
| "loss": 0.3614, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.9185763042418332, | |
| "grad_norm": 0.3633933663368225, | |
| "learning_rate": 8.740807570078419e-06, | |
| "loss": 0.3902, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.92442710872745, | |
| "grad_norm": 0.3714929223060608, | |
| "learning_rate": 8.718013165146275e-06, | |
| "loss": 0.3274, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.9302779132130667, | |
| "grad_norm": 0.38371893763542175, | |
| "learning_rate": 8.695044586103297e-06, | |
| "loss": 0.3507, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.9361287176986836, | |
| "grad_norm": 0.34635236859321594, | |
| "learning_rate": 8.671902908935942e-06, | |
| "loss": 0.3275, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9419795221843004, | |
| "grad_norm": 0.34420835971832275, | |
| "learning_rate": 8.648589217739635e-06, | |
| "loss": 0.3461, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.9478303266699171, | |
| "grad_norm": 0.3969476819038391, | |
| "learning_rate": 8.625104604667965e-06, | |
| "loss": 0.3579, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.9536811311555339, | |
| "grad_norm": 0.3697619140148163, | |
| "learning_rate": 8.601450169881533e-06, | |
| "loss": 0.3476, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.9595319356411507, | |
| "grad_norm": 0.3809903860092163, | |
| "learning_rate": 8.577627021496413e-06, | |
| "loss": 0.36, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9653827401267674, | |
| "grad_norm": 0.3934761881828308, | |
| "learning_rate": 8.553636275532236e-06, | |
| "loss": 0.3704, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9712335446123842, | |
| "grad_norm": 0.3420058786869049, | |
| "learning_rate": 8.529479055859918e-06, | |
| "loss": 0.3335, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.977084349098001, | |
| "grad_norm": 0.3801231384277344, | |
| "learning_rate": 8.505156494148997e-06, | |
| "loss": 0.3723, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9829351535836177, | |
| "grad_norm": 0.38984423875808716, | |
| "learning_rate": 8.480669729814635e-06, | |
| "loss": 0.3563, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9887859580692345, | |
| "grad_norm": 0.369872123003006, | |
| "learning_rate": 8.456019909964224e-06, | |
| "loss": 0.3494, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9946367625548513, | |
| "grad_norm": 0.3835128843784332, | |
| "learning_rate": 8.43120818934367e-06, | |
| "loss": 0.3672, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0014627011214041, | |
| "grad_norm": 0.4482472538948059, | |
| "learning_rate": 8.40623573028327e-06, | |
| "loss": 0.4454, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.007313505607021, | |
| "grad_norm": 0.45144927501678467, | |
| "learning_rate": 8.381103702643295e-06, | |
| "loss": 0.3454, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.0131643100926377, | |
| "grad_norm": 0.3322243094444275, | |
| "learning_rate": 8.35581328375915e-06, | |
| "loss": 0.2828, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.0190151145782544, | |
| "grad_norm": 0.397659033536911, | |
| "learning_rate": 8.330365658386252e-06, | |
| "loss": 0.3287, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.0248659190638714, | |
| "grad_norm": 0.3485862910747528, | |
| "learning_rate": 8.30476201864451e-06, | |
| "loss": 0.2744, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.030716723549488, | |
| "grad_norm": 0.3832169473171234, | |
| "learning_rate": 8.27900356396249e-06, | |
| "loss": 0.2868, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.0365675280351048, | |
| "grad_norm": 0.4184396266937256, | |
| "learning_rate": 8.25309150102121e-06, | |
| "loss": 0.3291, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.0424183325207217, | |
| "grad_norm": 0.45518970489501953, | |
| "learning_rate": 8.227027043697642e-06, | |
| "loss": 0.3489, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.0482691370063384, | |
| "grad_norm": 0.3730817437171936, | |
| "learning_rate": 8.200811413007808e-06, | |
| "loss": 0.3055, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.054119941491955, | |
| "grad_norm": 0.398185133934021, | |
| "learning_rate": 8.174445837049614e-06, | |
| "loss": 0.326, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.059970745977572, | |
| "grad_norm": 0.4147329032421112, | |
| "learning_rate": 8.147931550945301e-06, | |
| "loss": 0.2961, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.0658215504631887, | |
| "grad_norm": 0.4088496267795563, | |
| "learning_rate": 8.121269796783585e-06, | |
| "loss": 0.3239, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.0716723549488054, | |
| "grad_norm": 0.35450735688209534, | |
| "learning_rate": 8.094461823561473e-06, | |
| "loss": 0.2851, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.0775231594344223, | |
| "grad_norm": 0.4081903100013733, | |
| "learning_rate": 8.06750888712576e-06, | |
| "loss": 0.3188, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.083373963920039, | |
| "grad_norm": 0.3934895396232605, | |
| "learning_rate": 8.040412250114184e-06, | |
| "loss": 0.2891, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0892247684056557, | |
| "grad_norm": 0.35631951689720154, | |
| "learning_rate": 8.013173181896283e-06, | |
| "loss": 0.2667, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0950755728912727, | |
| "grad_norm": 0.42703738808631897, | |
| "learning_rate": 7.985792958513932e-06, | |
| "loss": 0.312, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.1009263773768894, | |
| "grad_norm": 0.4023725986480713, | |
| "learning_rate": 7.958272862621562e-06, | |
| "loss": 0.3343, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.106777181862506, | |
| "grad_norm": 0.3514081537723541, | |
| "learning_rate": 7.930614183426074e-06, | |
| "loss": 0.2959, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.1126279863481228, | |
| "grad_norm": 0.40648946166038513, | |
| "learning_rate": 7.902818216626446e-06, | |
| "loss": 0.3529, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1184787908337397, | |
| "grad_norm": 0.38296204805374146, | |
| "learning_rate": 7.874886264353035e-06, | |
| "loss": 0.2988, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.1243295953193564, | |
| "grad_norm": 0.4062958061695099, | |
| "learning_rate": 7.846819635106569e-06, | |
| "loss": 0.3344, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.130180399804973, | |
| "grad_norm": 0.3408312499523163, | |
| "learning_rate": 7.818619643696863e-06, | |
| "loss": 0.2857, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.13603120429059, | |
| "grad_norm": 0.3789331316947937, | |
| "learning_rate": 7.790287611181217e-06, | |
| "loss": 0.3077, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.1418820087762067, | |
| "grad_norm": 0.38520050048828125, | |
| "learning_rate": 7.76182486480253e-06, | |
| "loss": 0.3025, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.1477328132618234, | |
| "grad_norm": 0.3634053170681, | |
| "learning_rate": 7.733232737927123e-06, | |
| "loss": 0.3037, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.1535836177474403, | |
| "grad_norm": 0.42052581906318665, | |
| "learning_rate": 7.70451256998228e-06, | |
| "loss": 0.304, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.159434422233057, | |
| "grad_norm": 0.3758928179740906, | |
| "learning_rate": 7.675665706393502e-06, | |
| "loss": 0.2755, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.1652852267186737, | |
| "grad_norm": 0.35784485936164856, | |
| "learning_rate": 7.646693498521472e-06, | |
| "loss": 0.2876, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.1711360312042907, | |
| "grad_norm": 0.38650694489479065, | |
| "learning_rate": 7.617597303598754e-06, | |
| "loss": 0.288, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1769868356899074, | |
| "grad_norm": 0.3944965898990631, | |
| "learning_rate": 7.588378484666214e-06, | |
| "loss": 0.3211, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.182837640175524, | |
| "grad_norm": 0.3851556181907654, | |
| "learning_rate": 7.559038410509161e-06, | |
| "loss": 0.3389, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.188688444661141, | |
| "grad_norm": 0.3507968783378601, | |
| "learning_rate": 7.529578455593232e-06, | |
| "loss": 0.2943, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.1945392491467577, | |
| "grad_norm": 0.3462185561656952, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.3112, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.2003900536323744, | |
| "grad_norm": 0.3465600609779358, | |
| "learning_rate": 7.47030442936232e-06, | |
| "loss": 0.3165, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.2062408581179913, | |
| "grad_norm": 0.3432478904724121, | |
| "learning_rate": 7.440493134799425e-06, | |
| "loss": 0.2977, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.212091662603608, | |
| "grad_norm": 0.3325629234313965, | |
| "learning_rate": 7.4105675128517456e-06, | |
| "loss": 0.2809, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.2179424670892247, | |
| "grad_norm": 0.37305665016174316, | |
| "learning_rate": 7.380528965415501e-06, | |
| "loss": 0.3494, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.2237932715748416, | |
| "grad_norm": 0.3855370283126831, | |
| "learning_rate": 7.35037889967702e-06, | |
| "loss": 0.331, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.2296440760604583, | |
| "grad_norm": 0.38624921441078186, | |
| "learning_rate": 7.320118728046818e-06, | |
| "loss": 0.3249, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.235494880546075, | |
| "grad_norm": 0.339275985956192, | |
| "learning_rate": 7.289749868093432e-06, | |
| "loss": 0.2979, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.2413456850316917, | |
| "grad_norm": 0.362403929233551, | |
| "learning_rate": 7.259273742477017e-06, | |
| "loss": 0.3071, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.2471964895173087, | |
| "grad_norm": 0.331527978181839, | |
| "learning_rate": 7.2286917788826926e-06, | |
| "loss": 0.2959, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.2530472940029254, | |
| "grad_norm": 0.34029752016067505, | |
| "learning_rate": 7.19800540995367e-06, | |
| "loss": 0.2873, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.258898098488542, | |
| "grad_norm": 0.38359367847442627, | |
| "learning_rate": 7.167216073224136e-06, | |
| "loss": 0.3215, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.264748902974159, | |
| "grad_norm": 0.3701342046260834, | |
| "learning_rate": 7.136325211051905e-06, | |
| "loss": 0.2931, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.2705997074597757, | |
| "grad_norm": 0.3997856080532074, | |
| "learning_rate": 7.1053342705508564e-06, | |
| "loss": 0.319, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.2764505119453924, | |
| "grad_norm": 0.3141786456108093, | |
| "learning_rate": 7.074244703523137e-06, | |
| "loss": 0.2628, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.2823013164310093, | |
| "grad_norm": 0.363447368144989, | |
| "learning_rate": 7.043057966391158e-06, | |
| "loss": 0.3079, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.288152120916626, | |
| "grad_norm": 0.3675538897514343, | |
| "learning_rate": 7.011775520129363e-06, | |
| "loss": 0.2912, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2940029254022427, | |
| "grad_norm": 0.3745831251144409, | |
| "learning_rate": 6.980398830195785e-06, | |
| "loss": 0.287, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.2998537298878596, | |
| "grad_norm": 0.34273862838745117, | |
| "learning_rate": 6.948929366463397e-06, | |
| "loss": 0.2739, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.3057045343734763, | |
| "grad_norm": 0.38599085807800293, | |
| "learning_rate": 6.9173686031512595e-06, | |
| "loss": 0.3386, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.311555338859093, | |
| "grad_norm": 0.35338225960731506, | |
| "learning_rate": 6.885718018755448e-06, | |
| "loss": 0.3034, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.31740614334471, | |
| "grad_norm": 0.35684457421302795, | |
| "learning_rate": 6.8539790959798045e-06, | |
| "loss": 0.3159, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.3232569478303267, | |
| "grad_norm": 0.342815101146698, | |
| "learning_rate": 6.822153321666469e-06, | |
| "loss": 0.3237, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.3291077523159434, | |
| "grad_norm": 0.36875948309898376, | |
| "learning_rate": 6.790242186726231e-06, | |
| "loss": 0.3084, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.3349585568015603, | |
| "grad_norm": 0.37179967761039734, | |
| "learning_rate": 6.758247186068684e-06, | |
| "loss": 0.3171, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.340809361287177, | |
| "grad_norm": 0.35630038380622864, | |
| "learning_rate": 6.7261698185322e-06, | |
| "loss": 0.3041, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.3466601657727937, | |
| "grad_norm": 0.39249274134635925, | |
| "learning_rate": 6.6940115868137065e-06, | |
| "loss": 0.2953, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.3525109702584106, | |
| "grad_norm": 0.3363463878631592, | |
| "learning_rate": 6.6617739973982985e-06, | |
| "loss": 0.3005, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.3583617747440273, | |
| "grad_norm": 0.36309415102005005, | |
| "learning_rate": 6.629458560488664e-06, | |
| "loss": 0.3415, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.364212579229644, | |
| "grad_norm": 0.3635103106498718, | |
| "learning_rate": 6.597066789934336e-06, | |
| "loss": 0.3117, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.370063383715261, | |
| "grad_norm": 0.3717254102230072, | |
| "learning_rate": 6.5646002031607726e-06, | |
| "loss": 0.3336, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.3759141882008776, | |
| "grad_norm": 0.3539208173751831, | |
| "learning_rate": 6.5320603210982745e-06, | |
| "loss": 0.3335, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.3817649926864943, | |
| "grad_norm": 0.3605196475982666, | |
| "learning_rate": 6.499448668110735e-06, | |
| "loss": 0.319, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.3876157971721113, | |
| "grad_norm": 0.39067190885543823, | |
| "learning_rate": 6.466766771924231e-06, | |
| "loss": 0.3104, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.393466601657728, | |
| "grad_norm": 0.3777407705783844, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.3069, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.3993174061433447, | |
| "grad_norm": 0.34741804003715515, | |
| "learning_rate": 6.401198377239979e-06, | |
| "loss": 0.2852, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.4051682106289616, | |
| "grad_norm": 0.3834282457828522, | |
| "learning_rate": 6.368314950360416e-06, | |
| "loss": 0.3474, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.4110190151145783, | |
| "grad_norm": 0.3760935664176941, | |
| "learning_rate": 6.3353674233743585e-06, | |
| "loss": 0.3136, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.416869819600195, | |
| "grad_norm": 0.3629906475543976, | |
| "learning_rate": 6.302357339742245e-06, | |
| "loss": 0.3403, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.422720624085812, | |
| "grad_norm": 0.342675119638443, | |
| "learning_rate": 6.269286245855039e-06, | |
| "loss": 0.2915, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.3933790326118469, | |
| "learning_rate": 6.236155690961795e-06, | |
| "loss": 0.3048, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.4344222330570453, | |
| "grad_norm": 0.35148119926452637, | |
| "learning_rate": 6.202967227097073e-06, | |
| "loss": 0.3072, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.4402730375426622, | |
| "grad_norm": 0.3553239405155182, | |
| "learning_rate": 6.169722409008244e-06, | |
| "loss": 0.2988, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.446123842028279, | |
| "grad_norm": 0.39217159152030945, | |
| "learning_rate": 6.136422794082645e-06, | |
| "loss": 0.2945, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.4519746465138956, | |
| "grad_norm": 0.39117711782455444, | |
| "learning_rate": 6.10306994227463e-06, | |
| "loss": 0.3038, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.4578254509995126, | |
| "grad_norm": 0.3591575026512146, | |
| "learning_rate": 6.0696654160324875e-06, | |
| "loss": 0.3136, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.4636762554851293, | |
| "grad_norm": 0.4656267464160919, | |
| "learning_rate": 6.0362107802252486e-06, | |
| "loss": 0.3496, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.469527059970746, | |
| "grad_norm": 0.3674546778202057, | |
| "learning_rate": 6.002707602069377e-06, | |
| "loss": 0.3121, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.4753778644563629, | |
| "grad_norm": 0.4174729585647583, | |
| "learning_rate": 5.9691574510553505e-06, | |
| "loss": 0.3121, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.4812286689419796, | |
| "grad_norm": 0.3748752176761627, | |
| "learning_rate": 5.935561898874142e-06, | |
| "loss": 0.3125, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.4870794734275963, | |
| "grad_norm": 0.3187505006790161, | |
| "learning_rate": 5.901922519343586e-06, | |
| "loss": 0.3013, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.4929302779132132, | |
| "grad_norm": 0.34686118364334106, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.3099, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.49878108239883, | |
| "grad_norm": 0.38693419098854065, | |
| "learning_rate": 5.834518583697628e-06, | |
| "loss": 0.343, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.5046318868844466, | |
| "grad_norm": 0.38468196988105774, | |
| "learning_rate": 5.800757185188195e-06, | |
| "loss": 0.3152, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.5104826913700635, | |
| "grad_norm": 0.3720076084136963, | |
| "learning_rate": 5.766958274393428e-06, | |
| "loss": 0.3289, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.51633349585568, | |
| "grad_norm": 0.3495715260505676, | |
| "learning_rate": 5.733123434657704e-06, | |
| "loss": 0.3268, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.522184300341297, | |
| "grad_norm": 0.33257222175598145, | |
| "learning_rate": 5.699254251008524e-06, | |
| "loss": 0.306, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.5280351048269138, | |
| "grad_norm": 0.35938987135887146, | |
| "learning_rate": 5.66535231008227e-06, | |
| "loss": 0.3221, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.5338859093125303, | |
| "grad_norm": 0.3358217477798462, | |
| "learning_rate": 5.631419200049867e-06, | |
| "loss": 0.3109, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.5397367137981472, | |
| "grad_norm": 0.3260052502155304, | |
| "learning_rate": 5.597456510542395e-06, | |
| "loss": 0.2735, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.5455875182837642, | |
| "grad_norm": 0.3558763861656189, | |
| "learning_rate": 5.5634658325766066e-06, | |
| "loss": 0.3133, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.5514383227693807, | |
| "grad_norm": 0.34226661920547485, | |
| "learning_rate": 5.529448758480408e-06, | |
| "loss": 0.301, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.5572891272549976, | |
| "grad_norm": 0.40270325541496277, | |
| "learning_rate": 5.495406881818256e-06, | |
| "loss": 0.3427, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.5631399317406145, | |
| "grad_norm": 0.3240657150745392, | |
| "learning_rate": 5.46134179731651e-06, | |
| "loss": 0.2948, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.568990736226231, | |
| "grad_norm": 0.36010023951530457, | |
| "learning_rate": 5.427255100788726e-06, | |
| "loss": 0.2869, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.574841540711848, | |
| "grad_norm": 0.3521655797958374, | |
| "learning_rate": 5.393148389060893e-06, | |
| "loss": 0.2908, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.5806923451974646, | |
| "grad_norm": 0.3522508442401886, | |
| "learning_rate": 5.359023259896638e-06, | |
| "loss": 0.3222, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5865431496830813, | |
| "grad_norm": 0.358254075050354, | |
| "learning_rate": 5.3248813119223665e-06, | |
| "loss": 0.3191, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.5923939541686982, | |
| "grad_norm": 0.36198315024375916, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.315, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.598244758654315, | |
| "grad_norm": 0.353097528219223, | |
| "learning_rate": 5.2565533579139484e-06, | |
| "loss": 0.3015, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.6040955631399316, | |
| "grad_norm": 0.35641244053840637, | |
| "learning_rate": 5.222370552772353e-06, | |
| "loss": 0.3108, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.6099463676255485, | |
| "grad_norm": 0.35300660133361816, | |
| "learning_rate": 5.188177330455886e-06, | |
| "loss": 0.3443, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.6157971721111652, | |
| "grad_norm": 0.33080846071243286, | |
| "learning_rate": 5.153975292780852e-06, | |
| "loss": 0.2871, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.621647976596782, | |
| "grad_norm": 0.33396315574645996, | |
| "learning_rate": 5.119766041976516e-06, | |
| "loss": 0.3089, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.6274987810823989, | |
| "grad_norm": 0.34597212076187134, | |
| "learning_rate": 5.085551180610046e-06, | |
| "loss": 0.2817, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.6333495855680156, | |
| "grad_norm": 0.3279144763946533, | |
| "learning_rate": 5.05133231151145e-06, | |
| "loss": 0.2944, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.6392003900536323, | |
| "grad_norm": 0.3529197871685028, | |
| "learning_rate": 5.017111037698477e-06, | |
| "loss": 0.3195, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6450511945392492, | |
| "grad_norm": 0.36540284752845764, | |
| "learning_rate": 4.9828889623015265e-06, | |
| "loss": 0.3282, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.650901999024866, | |
| "grad_norm": 0.33339953422546387, | |
| "learning_rate": 4.948667688488552e-06, | |
| "loss": 0.2907, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.6567528035104826, | |
| "grad_norm": 0.32981109619140625, | |
| "learning_rate": 4.9144488193899546e-06, | |
| "loss": 0.2982, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.6626036079960995, | |
| "grad_norm": 0.33798947930336, | |
| "learning_rate": 4.880233958023486e-06, | |
| "loss": 0.2964, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.6684544124817162, | |
| "grad_norm": 0.3474103808403015, | |
| "learning_rate": 4.846024707219149e-06, | |
| "loss": 0.3301, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.674305216967333, | |
| "grad_norm": 0.3323943316936493, | |
| "learning_rate": 4.811822669544115e-06, | |
| "loss": 0.3014, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.6801560214529498, | |
| "grad_norm": 0.38225099444389343, | |
| "learning_rate": 4.777629447227649e-06, | |
| "loss": 0.3389, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.6860068259385665, | |
| "grad_norm": 0.3148108720779419, | |
| "learning_rate": 4.7434466420860515e-06, | |
| "loss": 0.298, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.6918576304241832, | |
| "grad_norm": 0.3262878656387329, | |
| "learning_rate": 4.7092758554476215e-06, | |
| "loss": 0.29, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.6977084349098002, | |
| "grad_norm": 0.3702300190925598, | |
| "learning_rate": 4.675118688077634e-06, | |
| "loss": 0.327, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.7035592393954169, | |
| "grad_norm": 0.3070249855518341, | |
| "learning_rate": 4.640976740103363e-06, | |
| "loss": 0.2918, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.7094100438810336, | |
| "grad_norm": 0.3508608937263489, | |
| "learning_rate": 4.606851610939108e-06, | |
| "loss": 0.3251, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.7152608483666505, | |
| "grad_norm": 0.3425685465335846, | |
| "learning_rate": 4.572744899211275e-06, | |
| "loss": 0.3039, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.7211116528522672, | |
| "grad_norm": 0.33032500743865967, | |
| "learning_rate": 4.53865820268349e-06, | |
| "loss": 0.2874, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.726962457337884, | |
| "grad_norm": 0.34354081749916077, | |
| "learning_rate": 4.504593118181745e-06, | |
| "loss": 0.293, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.7328132618235008, | |
| "grad_norm": 0.35744139552116394, | |
| "learning_rate": 4.470551241519594e-06, | |
| "loss": 0.3136, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.7386640663091175, | |
| "grad_norm": 0.34493860602378845, | |
| "learning_rate": 4.436534167423395e-06, | |
| "loss": 0.2967, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.7445148707947342, | |
| "grad_norm": 0.35344043374061584, | |
| "learning_rate": 4.402543489457607e-06, | |
| "loss": 0.3073, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.7503656752803511, | |
| "grad_norm": 0.3236096203327179, | |
| "learning_rate": 4.368580799950133e-06, | |
| "loss": 0.3045, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.7562164797659678, | |
| "grad_norm": 0.32016465067863464, | |
| "learning_rate": 4.334647689917734e-06, | |
| "loss": 0.2846, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7620672842515845, | |
| "grad_norm": 0.3745932877063751, | |
| "learning_rate": 4.300745748991478e-06, | |
| "loss": 0.3333, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.7679180887372015, | |
| "grad_norm": 0.387076735496521, | |
| "learning_rate": 4.266876565342298e-06, | |
| "loss": 0.3218, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.7737688932228182, | |
| "grad_norm": 0.3995639979839325, | |
| "learning_rate": 4.233041725606573e-06, | |
| "loss": 0.3007, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.7796196977084349, | |
| "grad_norm": 0.3345247507095337, | |
| "learning_rate": 4.199242814811807e-06, | |
| "loss": 0.3214, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.7854705021940518, | |
| "grad_norm": 0.3709820806980133, | |
| "learning_rate": 4.1654814163023735e-06, | |
| "loss": 0.3168, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.7913213066796685, | |
| "grad_norm": 0.34402501583099365, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.2967, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.7971721111652852, | |
| "grad_norm": 0.3674980103969574, | |
| "learning_rate": 4.098077480656415e-06, | |
| "loss": 0.3069, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.8030229156509021, | |
| "grad_norm": 0.35379621386528015, | |
| "learning_rate": 4.064438101125859e-06, | |
| "loss": 0.3105, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.8088737201365188, | |
| "grad_norm": 0.41910186409950256, | |
| "learning_rate": 4.03084254894465e-06, | |
| "loss": 0.3471, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.8147245246221355, | |
| "grad_norm": 0.3440791964530945, | |
| "learning_rate": 3.997292397930624e-06, | |
| "loss": 0.2799, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.8205753291077524, | |
| "grad_norm": 0.3493747413158417, | |
| "learning_rate": 3.963789219774753e-06, | |
| "loss": 0.3011, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.8264261335933691, | |
| "grad_norm": 0.3454689681529999, | |
| "learning_rate": 3.930334583967514e-06, | |
| "loss": 0.2977, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.8322769380789858, | |
| "grad_norm": 0.3456018567085266, | |
| "learning_rate": 3.896930057725372e-06, | |
| "loss": 0.3083, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.8381277425646028, | |
| "grad_norm": 0.3650881052017212, | |
| "learning_rate": 3.863577205917356e-06, | |
| "loss": 0.292, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.8439785470502192, | |
| "grad_norm": 0.37091773748397827, | |
| "learning_rate": 3.8302775909917585e-06, | |
| "loss": 0.3371, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.8498293515358362, | |
| "grad_norm": 0.34685999155044556, | |
| "learning_rate": 3.7970327729029288e-06, | |
| "loss": 0.303, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.855680156021453, | |
| "grad_norm": 0.3407152593135834, | |
| "learning_rate": 3.7638443090382067e-06, | |
| "loss": 0.3268, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.8615309605070696, | |
| "grad_norm": 0.3154624104499817, | |
| "learning_rate": 3.730713754144961e-06, | |
| "loss": 0.2752, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.8673817649926865, | |
| "grad_norm": 0.3909953534603119, | |
| "learning_rate": 3.6976426602577565e-06, | |
| "loss": 0.3347, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.8732325694783034, | |
| "grad_norm": 0.3309001922607422, | |
| "learning_rate": 3.6646325766256423e-06, | |
| "loss": 0.2865, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.87908337396392, | |
| "grad_norm": 0.32067787647247314, | |
| "learning_rate": 3.6316850496395863e-06, | |
| "loss": 0.3059, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.8849341784495368, | |
| "grad_norm": 0.35044965147972107, | |
| "learning_rate": 3.598801622760021e-06, | |
| "loss": 0.311, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.8907849829351537, | |
| "grad_norm": 0.3437960743904114, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.2987, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.8966357874207702, | |
| "grad_norm": 0.372930645942688, | |
| "learning_rate": 3.5332332280757706e-06, | |
| "loss": 0.3277, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.9024865919063871, | |
| "grad_norm": 0.33684802055358887, | |
| "learning_rate": 3.5005513318892666e-06, | |
| "loss": 0.304, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.908337396392004, | |
| "grad_norm": 0.34415948390960693, | |
| "learning_rate": 3.4679396789017263e-06, | |
| "loss": 0.3257, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.9141882008776205, | |
| "grad_norm": 0.3257143795490265, | |
| "learning_rate": 3.4353997968392295e-06, | |
| "loss": 0.2821, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.9200390053632375, | |
| "grad_norm": 0.33275625109672546, | |
| "learning_rate": 3.402933210065665e-06, | |
| "loss": 0.3002, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.9258898098488544, | |
| "grad_norm": 0.31569424271583557, | |
| "learning_rate": 3.3705414395113354e-06, | |
| "loss": 0.2999, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.9317406143344709, | |
| "grad_norm": 0.3612264394760132, | |
| "learning_rate": 3.3382260026017027e-06, | |
| "loss": 0.3035, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.9375914188200878, | |
| "grad_norm": 0.3221355378627777, | |
| "learning_rate": 3.305988413186295e-06, | |
| "loss": 0.2915, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.9434422233057045, | |
| "grad_norm": 0.34144338965415955, | |
| "learning_rate": 3.2738301814678015e-06, | |
| "loss": 0.288, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.9492930277913212, | |
| "grad_norm": 0.35333582758903503, | |
| "learning_rate": 3.241752813931316e-06, | |
| "loss": 0.3185, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.955143832276938, | |
| "grad_norm": 0.3483865559101105, | |
| "learning_rate": 3.2097578132737716e-06, | |
| "loss": 0.294, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.9609946367625548, | |
| "grad_norm": 0.4015137851238251, | |
| "learning_rate": 3.1778466783335328e-06, | |
| "loss": 0.3608, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.9668454412481715, | |
| "grad_norm": 0.35391393303871155, | |
| "learning_rate": 3.1460209040201967e-06, | |
| "loss": 0.2948, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.9726962457337884, | |
| "grad_norm": 0.3409406244754791, | |
| "learning_rate": 3.114281981244553e-06, | |
| "loss": 0.2983, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.9785470502194051, | |
| "grad_norm": 0.36691051721572876, | |
| "learning_rate": 3.082631396848743e-06, | |
| "loss": 0.3138, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.9843978547050218, | |
| "grad_norm": 0.33103111386299133, | |
| "learning_rate": 3.0510706335366034e-06, | |
| "loss": 0.2874, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.9902486591906388, | |
| "grad_norm": 0.3499497175216675, | |
| "learning_rate": 3.019601169804216e-06, | |
| "loss": 0.3114, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.9960994636762555, | |
| "grad_norm": 0.36926743388175964, | |
| "learning_rate": 2.9882244798706372e-06, | |
| "loss": 0.3, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.0029254022428082, | |
| "grad_norm": 0.37020495533943176, | |
| "learning_rate": 2.956942033608843e-06, | |
| "loss": 0.2719, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.008776206728425, | |
| "grad_norm": 0.39733192324638367, | |
| "learning_rate": 2.9257552964768644e-06, | |
| "loss": 0.2797, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.014627011214042, | |
| "grad_norm": 0.4049554169178009, | |
| "learning_rate": 2.8946657294491452e-06, | |
| "loss": 0.2898, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.0204778156996586, | |
| "grad_norm": 0.33150407671928406, | |
| "learning_rate": 2.863674788948097e-06, | |
| "loss": 0.2544, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.0263286201852755, | |
| "grad_norm": 0.34981128573417664, | |
| "learning_rate": 2.832783926775865e-06, | |
| "loss": 0.3092, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.0321794246708924, | |
| "grad_norm": 0.3597969114780426, | |
| "learning_rate": 2.8019945900463307e-06, | |
| "loss": 0.2516, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.038030229156509, | |
| "grad_norm": 0.3807888925075531, | |
| "learning_rate": 2.771308221117309e-06, | |
| "loss": 0.2399, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.043881033642126, | |
| "grad_norm": 0.4420805275440216, | |
| "learning_rate": 2.740726257522987e-06, | |
| "loss": 0.2623, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.0497318381277427, | |
| "grad_norm": 0.3532399535179138, | |
| "learning_rate": 2.7102501319065706e-06, | |
| "loss": 0.2603, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.055582642613359, | |
| "grad_norm": 0.35583576560020447, | |
| "learning_rate": 2.6798812719531843e-06, | |
| "loss": 0.2784, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.061433447098976, | |
| "grad_norm": 0.3376619517803192, | |
| "learning_rate": 2.6496211003229795e-06, | |
| "loss": 0.2687, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.067284251584593, | |
| "grad_norm": 0.35551944375038147, | |
| "learning_rate": 2.6194710345845e-06, | |
| "loss": 0.2666, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.0731350560702095, | |
| "grad_norm": 0.3540509343147278, | |
| "learning_rate": 2.5894324871482557e-06, | |
| "loss": 0.2553, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.0789858605558265, | |
| "grad_norm": 0.354968786239624, | |
| "learning_rate": 2.559506865200576e-06, | |
| "loss": 0.2533, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.0848366650414434, | |
| "grad_norm": 0.36224791407585144, | |
| "learning_rate": 2.529695570637679e-06, | |
| "loss": 0.2621, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.09068746952706, | |
| "grad_norm": 0.3591248095035553, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.2911, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.096538274012677, | |
| "grad_norm": 0.3431352376937866, | |
| "learning_rate": 2.4704215444067684e-06, | |
| "loss": 0.2552, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.1023890784982937, | |
| "grad_norm": 0.3626757562160492, | |
| "learning_rate": 2.4409615894908407e-06, | |
| "loss": 0.2895, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.10823988298391, | |
| "grad_norm": 0.3287983238697052, | |
| "learning_rate": 2.411621515333788e-06, | |
| "loss": 0.259, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.114090687469527, | |
| "grad_norm": 0.35708674788475037, | |
| "learning_rate": 2.3824026964012487e-06, | |
| "loss": 0.2709, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.119941491955144, | |
| "grad_norm": 0.3249606192111969, | |
| "learning_rate": 2.35330650147853e-06, | |
| "loss": 0.2618, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.1257922964407605, | |
| "grad_norm": 0.309447705745697, | |
| "learning_rate": 2.324334293606499e-06, | |
| "loss": 0.2439, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.1316431009263774, | |
| "grad_norm": 0.328646719455719, | |
| "learning_rate": 2.2954874300177197e-06, | |
| "loss": 0.2303, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.1374939054119944, | |
| "grad_norm": 0.3447718024253845, | |
| "learning_rate": 2.266767262072878e-06, | |
| "loss": 0.2685, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.143344709897611, | |
| "grad_norm": 0.3506672978401184, | |
| "learning_rate": 2.238175135197471e-06, | |
| "loss": 0.2728, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.1491955143832278, | |
| "grad_norm": 0.34329918026924133, | |
| "learning_rate": 2.2097123888187825e-06, | |
| "loss": 0.2646, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.1550463188688447, | |
| "grad_norm": 0.3055090606212616, | |
| "learning_rate": 2.181380356303139e-06, | |
| "loss": 0.239, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.160897123354461, | |
| "grad_norm": 0.30475035309791565, | |
| "learning_rate": 2.1531803648934333e-06, | |
| "loss": 0.2683, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.166747927840078, | |
| "grad_norm": 0.32849615812301636, | |
| "learning_rate": 2.1251137356469677e-06, | |
| "loss": 0.2491, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.172598732325695, | |
| "grad_norm": 0.3533441126346588, | |
| "learning_rate": 2.0971817833735548e-06, | |
| "loss": 0.2781, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.1784495368113115, | |
| "grad_norm": 0.30950412154197693, | |
| "learning_rate": 2.069385816573928e-06, | |
| "loss": 0.2258, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.1843003412969284, | |
| "grad_norm": 0.34565675258636475, | |
| "learning_rate": 2.0417271373784403e-06, | |
| "loss": 0.3049, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.1901511457825453, | |
| "grad_norm": 0.32770001888275146, | |
| "learning_rate": 2.0142070414860704e-06, | |
| "loss": 0.254, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.196001950268162, | |
| "grad_norm": 0.34241920709609985, | |
| "learning_rate": 1.9868268181037186e-06, | |
| "loss": 0.2612, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.2018527547537787, | |
| "grad_norm": 0.33506783843040466, | |
| "learning_rate": 1.9595877498858175e-06, | |
| "loss": 0.2748, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.2077035592393957, | |
| "grad_norm": 0.3262109160423279, | |
| "learning_rate": 1.9324911128742406e-06, | |
| "loss": 0.2665, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.213554363725012, | |
| "grad_norm": 0.3280249536037445, | |
| "learning_rate": 1.9055381764385272e-06, | |
| "loss": 0.2591, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.219405168210629, | |
| "grad_norm": 0.33232155442237854, | |
| "learning_rate": 1.8787302032164168e-06, | |
| "loss": 0.2833, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.2252559726962455, | |
| "grad_norm": 0.3800523579120636, | |
| "learning_rate": 1.8520684490547014e-06, | |
| "loss": 0.2895, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.2311067771818625, | |
| "grad_norm": 0.3366720974445343, | |
| "learning_rate": 1.8255541629503865e-06, | |
| "loss": 0.2682, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.2369575816674794, | |
| "grad_norm": 0.31780189275741577, | |
| "learning_rate": 1.7991885869921928e-06, | |
| "loss": 0.2567, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.242808386153096, | |
| "grad_norm": 0.3226467967033386, | |
| "learning_rate": 1.7729729563023613e-06, | |
| "loss": 0.2575, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.248659190638713, | |
| "grad_norm": 0.3137516677379608, | |
| "learning_rate": 1.746908498978791e-06, | |
| "loss": 0.2464, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.2545099951243297, | |
| "grad_norm": 0.3348909914493561, | |
| "learning_rate": 1.7209964360375137e-06, | |
| "loss": 0.2779, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.260360799609946, | |
| "grad_norm": 0.3146172761917114, | |
| "learning_rate": 1.6952379813554914e-06, | |
| "loss": 0.2533, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.266211604095563, | |
| "grad_norm": 0.32093656063079834, | |
| "learning_rate": 1.6696343416137495e-06, | |
| "loss": 0.2626, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.27206240858118, | |
| "grad_norm": 0.3114534914493561, | |
| "learning_rate": 1.6441867162408514e-06, | |
| "loss": 0.2435, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.2779132130667965, | |
| "grad_norm": 0.326259583234787, | |
| "learning_rate": 1.6188962973567068e-06, | |
| "loss": 0.2572, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.2837640175524134, | |
| "grad_norm": 0.3404834568500519, | |
| "learning_rate": 1.5937642697167288e-06, | |
| "loss": 0.2941, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.2896148220380304, | |
| "grad_norm": 0.28849631547927856, | |
| "learning_rate": 1.5687918106563326e-06, | |
| "loss": 0.2377, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.295465626523647, | |
| "grad_norm": 0.2880316972732544, | |
| "learning_rate": 1.5439800900357765e-06, | |
| "loss": 0.2376, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.3013164310092638, | |
| "grad_norm": 0.3132326900959015, | |
| "learning_rate": 1.5193302701853674e-06, | |
| "loss": 0.2483, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.3071672354948807, | |
| "grad_norm": 0.34062352776527405, | |
| "learning_rate": 1.4948435058510036e-06, | |
| "loss": 0.2695, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.313018039980497, | |
| "grad_norm": 0.36842963099479675, | |
| "learning_rate": 1.4705209441400841e-06, | |
| "loss": 0.2897, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.318868844466114, | |
| "grad_norm": 0.3079017996788025, | |
| "learning_rate": 1.4463637244677648e-06, | |
| "loss": 0.2234, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.324719648951731, | |
| "grad_norm": 0.3228384852409363, | |
| "learning_rate": 1.422372978503589e-06, | |
| "loss": 0.2654, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.3305704534373475, | |
| "grad_norm": 0.32149839401245117, | |
| "learning_rate": 1.3985498301184685e-06, | |
| "loss": 0.2642, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.3364212579229644, | |
| "grad_norm": 0.3348996937274933, | |
| "learning_rate": 1.374895395332037e-06, | |
| "loss": 0.2639, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.3422720624085813, | |
| "grad_norm": 0.3477461338043213, | |
| "learning_rate": 1.351410782260366e-06, | |
| "loss": 0.278, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 510, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.413041512335802e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |