| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 27.51196172248804, | |
| "eval_steps": 1000, | |
| "global_step": 92000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 3.539609432220459, | |
| "learning_rate": 4.99925228054434e-05, | |
| "loss": 2.134, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 3.197829246520996, | |
| "learning_rate": 4.997756841633019e-05, | |
| "loss": 0.6178, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 3.3991429805755615, | |
| "learning_rate": 4.996261402721699e-05, | |
| "loss": 0.5496, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 3.072633743286133, | |
| "learning_rate": 4.9947659638103784e-05, | |
| "loss": 0.5228, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.4815468788146973, | |
| "learning_rate": 4.993270524899058e-05, | |
| "loss": 0.5102, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.794753313064575, | |
| "learning_rate": 4.991775085987738e-05, | |
| "loss": 0.4746, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.1388251781463623, | |
| "learning_rate": 4.9902796470764176e-05, | |
| "loss": 0.4769, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.518214225769043, | |
| "learning_rate": 4.988784208165096e-05, | |
| "loss": 0.4476, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 4.257823467254639, | |
| "learning_rate": 4.987288769253776e-05, | |
| "loss": 0.439, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.0235888957977295, | |
| "learning_rate": 4.985793330342456e-05, | |
| "loss": 0.4465, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 0.34466782212257385, | |
| "eval_precision": 0.7649398815576958, | |
| "eval_recall": 0.7874318790603159, | |
| "eval_runtime": 321.2695, | |
| "eval_samples_per_second": 41.629, | |
| "eval_steps_per_second": 1.301, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.372622489929199, | |
| "learning_rate": 4.984297891431135e-05, | |
| "loss": 0.438, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 2.184081792831421, | |
| "learning_rate": 4.982802452519815e-05, | |
| "loss": 0.4319, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.180004358291626, | |
| "learning_rate": 4.981307013608494e-05, | |
| "loss": 0.4153, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.8515098094940186, | |
| "learning_rate": 4.979811574697174e-05, | |
| "loss": 0.4107, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.0762712955474854, | |
| "learning_rate": 4.978316135785853e-05, | |
| "loss": 0.4087, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.6716846227645874, | |
| "learning_rate": 4.9768206968745326e-05, | |
| "loss": 0.4082, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.9515812397003174, | |
| "learning_rate": 4.9753252579632126e-05, | |
| "loss": 0.398, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.9658855199813843, | |
| "learning_rate": 4.973829819051892e-05, | |
| "loss": 0.393, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.9613778591156006, | |
| "learning_rate": 4.972334380140571e-05, | |
| "loss": 0.3904, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.7774882316589355, | |
| "learning_rate": 4.970838941229251e-05, | |
| "loss": 0.3794, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 0.310618132352829, | |
| "eval_precision": 0.7516943243620137, | |
| "eval_recall": 0.8298285045721852, | |
| "eval_runtime": 320.9754, | |
| "eval_samples_per_second": 41.667, | |
| "eval_steps_per_second": 1.302, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.4382622241973877, | |
| "learning_rate": 4.969343502317931e-05, | |
| "loss": 0.369, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.813565731048584, | |
| "learning_rate": 4.96784806340661e-05, | |
| "loss": 0.3751, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.279954195022583, | |
| "learning_rate": 4.9663526244952897e-05, | |
| "loss": 0.3804, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.9376351833343506, | |
| "learning_rate": 4.9648571855839696e-05, | |
| "loss": 0.3611, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.2867352962493896, | |
| "learning_rate": 4.963361746672648e-05, | |
| "loss": 0.3739, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.132394313812256, | |
| "learning_rate": 4.961866307761328e-05, | |
| "loss": 0.3669, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.0541863441467285, | |
| "learning_rate": 4.9603708688500075e-05, | |
| "loss": 0.366, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.1414847373962402, | |
| "learning_rate": 4.9588754299386874e-05, | |
| "loss": 0.3535, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.3949612379074097, | |
| "learning_rate": 4.957379991027367e-05, | |
| "loss": 0.3684, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.8921570777893066, | |
| "learning_rate": 4.955884552116046e-05, | |
| "loss": 0.3556, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.290554404258728, | |
| "eval_precision": 0.79493216033703, | |
| "eval_recall": 0.7901105329597586, | |
| "eval_runtime": 307.7262, | |
| "eval_samples_per_second": 43.461, | |
| "eval_steps_per_second": 1.358, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.6217349767684937, | |
| "learning_rate": 4.954389113204726e-05, | |
| "loss": 0.3566, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.524946928024292, | |
| "learning_rate": 4.952893674293405e-05, | |
| "loss": 0.3477, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.6807836294174194, | |
| "learning_rate": 4.9513982353820846e-05, | |
| "loss": 0.3409, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.5750257968902588, | |
| "learning_rate": 4.9499027964707645e-05, | |
| "loss": 0.3178, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.43153715133667, | |
| "learning_rate": 4.9484073575594445e-05, | |
| "loss": 0.2888, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.4886215925216675, | |
| "learning_rate": 4.946911918648123e-05, | |
| "loss": 0.3153, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.2148983478546143, | |
| "learning_rate": 4.945416479736803e-05, | |
| "loss": 0.3114, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.3632937669754028, | |
| "learning_rate": 4.9439210408254824e-05, | |
| "loss": 0.3031, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.8350048065185547, | |
| "learning_rate": 4.9424256019141617e-05, | |
| "loss": 0.292, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.1402252912521362, | |
| "learning_rate": 4.9409301630028416e-05, | |
| "loss": 0.2983, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.2781643867492676, | |
| "eval_precision": 0.7788883753177721, | |
| "eval_recall": 0.8301363958249947, | |
| "eval_runtime": 307.2732, | |
| "eval_samples_per_second": 43.525, | |
| "eval_steps_per_second": 1.36, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.2367932796478271, | |
| "learning_rate": 4.939434724091521e-05, | |
| "loss": 0.2894, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.4055671691894531, | |
| "learning_rate": 4.937939285180201e-05, | |
| "loss": 0.2847, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.910565972328186, | |
| "learning_rate": 4.93644384626888e-05, | |
| "loss": 0.2917, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.9085345268249512, | |
| "learning_rate": 4.9349484073575595e-05, | |
| "loss": 0.2934, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.5550158023834229, | |
| "learning_rate": 4.9334529684462394e-05, | |
| "loss": 0.2726, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 2.1685421466827393, | |
| "learning_rate": 4.931957529534919e-05, | |
| "loss": 0.3077, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.7528005838394165, | |
| "learning_rate": 4.930462090623598e-05, | |
| "loss": 0.2919, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.804412841796875, | |
| "learning_rate": 4.928966651712278e-05, | |
| "loss": 0.278, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.430739164352417, | |
| "learning_rate": 4.927471212800957e-05, | |
| "loss": 0.2901, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.5466407537460327, | |
| "learning_rate": 4.9259757738896365e-05, | |
| "loss": 0.2886, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.27095386385917664, | |
| "eval_precision": 0.7892478844902066, | |
| "eval_recall": 0.8212999168693618, | |
| "eval_runtime": 308.5531, | |
| "eval_samples_per_second": 43.344, | |
| "eval_steps_per_second": 1.355, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.1303741931915283, | |
| "learning_rate": 4.9244803349783165e-05, | |
| "loss": 0.291, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.3640042543411255, | |
| "learning_rate": 4.922984896066996e-05, | |
| "loss": 0.2897, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.9915575981140137, | |
| "learning_rate": 4.921489457155675e-05, | |
| "loss": 0.2798, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.574576735496521, | |
| "learning_rate": 4.919994018244355e-05, | |
| "loss": 0.2856, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.9231148958206177, | |
| "learning_rate": 4.918498579333034e-05, | |
| "loss": 0.2819, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 2.171637773513794, | |
| "learning_rate": 4.917003140421714e-05, | |
| "loss": 0.2892, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.7447925806045532, | |
| "learning_rate": 4.9155077015103936e-05, | |
| "loss": 0.2837, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.282715320587158, | |
| "learning_rate": 4.914012262599073e-05, | |
| "loss": 0.2888, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.041062831878662, | |
| "learning_rate": 4.912516823687753e-05, | |
| "loss": 0.2733, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.3900405168533325, | |
| "learning_rate": 4.911021384776432e-05, | |
| "loss": 0.2982, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.24861453473567963, | |
| "eval_precision": 0.7945360585297875, | |
| "eval_recall": 0.8426059915637797, | |
| "eval_runtime": 306.7263, | |
| "eval_samples_per_second": 43.602, | |
| "eval_steps_per_second": 1.363, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 2.156783103942871, | |
| "learning_rate": 4.9095259458651114e-05, | |
| "loss": 0.2883, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.6421504020690918, | |
| "learning_rate": 4.9080305069537914e-05, | |
| "loss": 0.2716, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.6905546188354492, | |
| "learning_rate": 4.906535068042471e-05, | |
| "loss": 0.2775, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.1936814785003662, | |
| "learning_rate": 4.90503962913115e-05, | |
| "loss": 0.2571, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.7146382331848145, | |
| "learning_rate": 4.90354419021983e-05, | |
| "loss": 0.2681, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.5280200242996216, | |
| "learning_rate": 4.902048751308509e-05, | |
| "loss": 0.2655, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.4756951332092285, | |
| "learning_rate": 4.9005533123971885e-05, | |
| "loss": 0.2554, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.5664458274841309, | |
| "learning_rate": 4.8990578734858685e-05, | |
| "loss": 0.2125, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.447304368019104, | |
| "learning_rate": 4.897562434574548e-05, | |
| "loss": 0.2161, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.8067011833190918, | |
| "learning_rate": 4.896066995663227e-05, | |
| "loss": 0.213, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.24976512789726257, | |
| "eval_precision": 0.8138389031705227, | |
| "eval_recall": 0.8187752085963238, | |
| "eval_runtime": 305.8458, | |
| "eval_samples_per_second": 43.728, | |
| "eval_steps_per_second": 1.367, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 2.7706127166748047, | |
| "learning_rate": 4.894571556751907e-05, | |
| "loss": 0.2186, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 2.394275426864624, | |
| "learning_rate": 4.893076117840586e-05, | |
| "loss": 0.2094, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.9464359283447266, | |
| "learning_rate": 4.891580678929266e-05, | |
| "loss": 0.2278, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 2.1283416748046875, | |
| "learning_rate": 4.8900852400179456e-05, | |
| "loss": 0.2174, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.7853657007217407, | |
| "learning_rate": 4.888589801106625e-05, | |
| "loss": 0.2184, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.1081209182739258, | |
| "learning_rate": 4.887094362195305e-05, | |
| "loss": 0.2201, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.3894284963607788, | |
| "learning_rate": 4.885598923283984e-05, | |
| "loss": 0.2213, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 2.0615389347076416, | |
| "learning_rate": 4.8841034843726634e-05, | |
| "loss": 0.2217, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.6415098905563354, | |
| "learning_rate": 4.8826080454613434e-05, | |
| "loss": 0.2266, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 3.293736219406128, | |
| "learning_rate": 4.8811126065500226e-05, | |
| "loss": 0.2117, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.24216407537460327, | |
| "eval_precision": 0.8107814105275881, | |
| "eval_recall": 0.826133809538471, | |
| "eval_runtime": 307.023, | |
| "eval_samples_per_second": 43.56, | |
| "eval_steps_per_second": 1.361, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.1580455303192139, | |
| "learning_rate": 4.879617167638702e-05, | |
| "loss": 0.2171, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.0756213665008545, | |
| "learning_rate": 4.878121728727382e-05, | |
| "loss": 0.2174, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.871605396270752, | |
| "learning_rate": 4.876626289816061e-05, | |
| "loss": 0.215, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.8400825262069702, | |
| "learning_rate": 4.8751308509047405e-05, | |
| "loss": 0.2215, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 2.0464110374450684, | |
| "learning_rate": 4.8736354119934204e-05, | |
| "loss": 0.2195, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.2704099416732788, | |
| "learning_rate": 4.8721399730821e-05, | |
| "loss": 0.2266, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.9448720216751099, | |
| "learning_rate": 4.87064453417078e-05, | |
| "loss": 0.2159, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.2881120443344116, | |
| "learning_rate": 4.869149095259459e-05, | |
| "loss": 0.2084, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 2.0659286975860596, | |
| "learning_rate": 4.867653656348138e-05, | |
| "loss": 0.2134, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.109397530555725, | |
| "learning_rate": 4.866158217436818e-05, | |
| "loss": 0.2129, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.22735044360160828, | |
| "eval_precision": 0.8203027060082556, | |
| "eval_recall": 0.8260106530373472, | |
| "eval_runtime": 305.794, | |
| "eval_samples_per_second": 43.735, | |
| "eval_steps_per_second": 1.367, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.164435625076294, | |
| "learning_rate": 4.8646627785254975e-05, | |
| "loss": 0.2155, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.5477757453918457, | |
| "learning_rate": 4.863167339614177e-05, | |
| "loss": 0.2137, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.4342052936553955, | |
| "learning_rate": 4.861671900702857e-05, | |
| "loss": 0.206, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.3847391605377197, | |
| "learning_rate": 4.860176461791536e-05, | |
| "loss": 0.2077, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 2.9082765579223633, | |
| "learning_rate": 4.8586810228802154e-05, | |
| "loss": 0.2126, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.4943510293960571, | |
| "learning_rate": 4.857185583968895e-05, | |
| "loss": 0.2092, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.2332855463027954, | |
| "learning_rate": 4.8556901450575746e-05, | |
| "loss": 0.2222, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 2.227031946182251, | |
| "learning_rate": 4.854194706146254e-05, | |
| "loss": 0.1969, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1.2515846490859985, | |
| "learning_rate": 4.852699267234934e-05, | |
| "loss": 0.2017, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.2267186641693115, | |
| "learning_rate": 4.851203828323613e-05, | |
| "loss": 0.2126, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 0.20952437818050385, | |
| "eval_precision": 0.8416687769055458, | |
| "eval_recall": 0.818682841220481, | |
| "eval_runtime": 302.8923, | |
| "eval_samples_per_second": 44.154, | |
| "eval_steps_per_second": 1.38, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 1.151638150215149, | |
| "learning_rate": 4.849708389412293e-05, | |
| "loss": 0.171, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 3.8168528079986572, | |
| "learning_rate": 4.8482129505009724e-05, | |
| "loss": 0.165, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 2.3039355278015137, | |
| "learning_rate": 4.846717511589652e-05, | |
| "loss": 0.1675, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 1.252301812171936, | |
| "learning_rate": 4.845222072678332e-05, | |
| "loss": 0.1554, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 1.2682992219924927, | |
| "learning_rate": 4.843726633767011e-05, | |
| "loss": 0.1756, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 1.3934777975082397, | |
| "learning_rate": 4.84223119485569e-05, | |
| "loss": 0.1576, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.3386119604110718, | |
| "learning_rate": 4.84073575594437e-05, | |
| "loss": 0.1602, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 1.6670503616333008, | |
| "learning_rate": 4.8392403170330495e-05, | |
| "loss": 0.1638, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 2.5150694847106934, | |
| "learning_rate": 4.837744878121729e-05, | |
| "loss": 0.1653, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 2.840406656265259, | |
| "learning_rate": 4.836249439210409e-05, | |
| "loss": 0.1607, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_loss": 0.22238589823246002, | |
| "eval_precision": 0.8404415146405029, | |
| "eval_recall": 0.8439607130761415, | |
| "eval_runtime": 304.8188, | |
| "eval_samples_per_second": 43.875, | |
| "eval_steps_per_second": 1.371, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 1.5171958208084106, | |
| "learning_rate": 4.834754000299088e-05, | |
| "loss": 0.1606, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 1.6955703496932983, | |
| "learning_rate": 4.833258561387767e-05, | |
| "loss": 0.1554, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 1.893128514289856, | |
| "learning_rate": 4.831763122476447e-05, | |
| "loss": 0.1488, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 1.7299461364746094, | |
| "learning_rate": 4.8302676835651266e-05, | |
| "loss": 0.1596, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 2.150355339050293, | |
| "learning_rate": 4.8287722446538065e-05, | |
| "loss": 0.1623, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 3.2869186401367188, | |
| "learning_rate": 4.827276805742486e-05, | |
| "loss": 0.1622, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 1.7936344146728516, | |
| "learning_rate": 4.825781366831165e-05, | |
| "loss": 0.1651, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 1.579736590385437, | |
| "learning_rate": 4.824285927919845e-05, | |
| "loss": 0.169, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 2.1929283142089844, | |
| "learning_rate": 4.822790489008524e-05, | |
| "loss": 0.1629, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 1.7842892408370972, | |
| "learning_rate": 4.821295050097204e-05, | |
| "loss": 0.1621, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "eval_loss": 0.21504360437393188, | |
| "eval_precision": 0.8350246187102197, | |
| "eval_recall": 0.8563379414390837, | |
| "eval_runtime": 306.2124, | |
| "eval_samples_per_second": 43.676, | |
| "eval_steps_per_second": 1.365, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 2.2203197479248047, | |
| "learning_rate": 4.8197996111858836e-05, | |
| "loss": 0.1595, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 1.8541319370269775, | |
| "learning_rate": 4.818304172274562e-05, | |
| "loss": 0.1702, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 1.3299143314361572, | |
| "learning_rate": 4.816808733363242e-05, | |
| "loss": 0.1651, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 1.7831319570541382, | |
| "learning_rate": 4.815313294451922e-05, | |
| "loss": 0.1601, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 1.0528268814086914, | |
| "learning_rate": 4.8138178555406015e-05, | |
| "loss": 0.1644, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 1.306907057762146, | |
| "learning_rate": 4.812322416629281e-05, | |
| "loss": 0.1556, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 1.8565049171447754, | |
| "learning_rate": 4.810826977717961e-05, | |
| "loss": 0.1654, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 1.4770090579986572, | |
| "learning_rate": 4.80933153880664e-05, | |
| "loss": 0.1628, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 1.9089502096176147, | |
| "learning_rate": 4.807836099895319e-05, | |
| "loss": 0.1632, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 1.3788821697235107, | |
| "learning_rate": 4.806340660983999e-05, | |
| "loss": 0.1597, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_loss": 0.2062728852033615, | |
| "eval_precision": 0.8378547953391097, | |
| "eval_recall": 0.8634194402537024, | |
| "eval_runtime": 304.7295, | |
| "eval_samples_per_second": 43.888, | |
| "eval_steps_per_second": 1.372, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 15.79686164855957, | |
| "learning_rate": 4.8048452220726785e-05, | |
| "loss": 0.1637, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 1.9472129344940186, | |
| "learning_rate": 4.8033497831613585e-05, | |
| "loss": 0.1666, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 2.1338746547698975, | |
| "learning_rate": 4.801854344250037e-05, | |
| "loss": 0.1614, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 1.1886940002441406, | |
| "learning_rate": 4.800358905338717e-05, | |
| "loss": 0.1474, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 2.4190924167633057, | |
| "learning_rate": 4.798863466427397e-05, | |
| "loss": 0.121, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 0.902584433555603, | |
| "learning_rate": 4.797368027516076e-05, | |
| "loss": 0.1192, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 2.3466804027557373, | |
| "learning_rate": 4.7958725886047556e-05, | |
| "loss": 0.129, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 4.135778427124023, | |
| "learning_rate": 4.7943771496934356e-05, | |
| "loss": 0.1206, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 1.6940075159072876, | |
| "learning_rate": 4.792881710782115e-05, | |
| "loss": 0.1313, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 1.7989047765731812, | |
| "learning_rate": 4.791386271870794e-05, | |
| "loss": 0.1139, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "eval_loss": 0.20718763768672943, | |
| "eval_precision": 0.8631126181281592, | |
| "eval_recall": 0.8464238430986176, | |
| "eval_runtime": 304.0256, | |
| "eval_samples_per_second": 43.99, | |
| "eval_steps_per_second": 1.375, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 1.9864155054092407, | |
| "learning_rate": 4.789890832959474e-05, | |
| "loss": 0.1222, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 2.944260835647583, | |
| "learning_rate": 4.7883953940481534e-05, | |
| "loss": 0.1238, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.5448206663131714, | |
| "learning_rate": 4.786899955136833e-05, | |
| "loss": 0.1191, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 1.2996718883514404, | |
| "learning_rate": 4.785404516225512e-05, | |
| "loss": 0.1208, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 2.5177977085113525, | |
| "learning_rate": 4.783909077314192e-05, | |
| "loss": 0.1258, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 1.1356126070022583, | |
| "learning_rate": 4.782413638402872e-05, | |
| "loss": 0.1223, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 1.2576464414596558, | |
| "learning_rate": 4.7809181994915506e-05, | |
| "loss": 0.124, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 0.8868162631988525, | |
| "learning_rate": 4.7794227605802305e-05, | |
| "loss": 0.1246, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 2.3075501918792725, | |
| "learning_rate": 4.7779273216689105e-05, | |
| "loss": 0.1216, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 1.5548241138458252, | |
| "learning_rate": 4.776431882757589e-05, | |
| "loss": 0.1221, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_loss": 0.19333235919475555, | |
| "eval_precision": 0.8727586319112239, | |
| "eval_recall": 0.8257335509098187, | |
| "eval_runtime": 301.0242, | |
| "eval_samples_per_second": 44.428, | |
| "eval_steps_per_second": 1.389, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 1.0018868446350098, | |
| "learning_rate": 4.774936443846269e-05, | |
| "loss": 0.1237, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 1.264910101890564, | |
| "learning_rate": 4.773441004934949e-05, | |
| "loss": 0.1156, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 5.281520366668701, | |
| "learning_rate": 4.771945566023628e-05, | |
| "loss": 0.1286, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 1.9591494798660278, | |
| "learning_rate": 4.7704501271123076e-05, | |
| "loss": 0.1249, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 2.021794080734253, | |
| "learning_rate": 4.768954688200987e-05, | |
| "loss": 0.1233, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 2.007873773574829, | |
| "learning_rate": 4.767459249289667e-05, | |
| "loss": 0.1281, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 2.0108394622802734, | |
| "learning_rate": 4.765963810378346e-05, | |
| "loss": 0.1302, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 1.7474627494812012, | |
| "learning_rate": 4.7644683714670254e-05, | |
| "loss": 0.1164, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 0.758482813835144, | |
| "learning_rate": 4.7629729325557054e-05, | |
| "loss": 0.1211, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 0.9910192489624023, | |
| "learning_rate": 4.7614774936443854e-05, | |
| "loss": 0.1222, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "eval_loss": 0.1955721527338028, | |
| "eval_precision": 0.8685029567382508, | |
| "eval_recall": 0.8591705409649312, | |
| "eval_runtime": 303.5505, | |
| "eval_samples_per_second": 44.059, | |
| "eval_steps_per_second": 1.377, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 2.4667110443115234, | |
| "learning_rate": 4.759982054733064e-05, | |
| "loss": 0.1214, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 2.103156566619873, | |
| "learning_rate": 4.758486615821744e-05, | |
| "loss": 0.1211, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 1.3806654214859009, | |
| "learning_rate": 4.756991176910424e-05, | |
| "loss": 0.1152, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 2.1174566745758057, | |
| "learning_rate": 4.7554957379991025e-05, | |
| "loss": 0.1246, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 2.0334010124206543, | |
| "learning_rate": 4.7540002990877825e-05, | |
| "loss": 0.1189, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 2.668717861175537, | |
| "learning_rate": 4.7525048601764625e-05, | |
| "loss": 0.1237, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "grad_norm": 2.0749363899230957, | |
| "learning_rate": 4.751009421265142e-05, | |
| "loss": 0.1141, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 1.893052577972412, | |
| "learning_rate": 4.749513982353821e-05, | |
| "loss": 0.095, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "grad_norm": 0.6495729684829712, | |
| "learning_rate": 4.7480185434425e-05, | |
| "loss": 0.085, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 1.8883150815963745, | |
| "learning_rate": 4.74652310453118e-05, | |
| "loss": 0.0886, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "eval_loss": 0.2067934274673462, | |
| "eval_precision": 0.880300808187974, | |
| "eval_recall": 0.8685920133009021, | |
| "eval_runtime": 303.377, | |
| "eval_samples_per_second": 44.084, | |
| "eval_steps_per_second": 1.378, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "grad_norm": 1.110809326171875, | |
| "learning_rate": 4.7450276656198596e-05, | |
| "loss": 0.0895, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "grad_norm": 1.9441896677017212, | |
| "learning_rate": 4.743532226708539e-05, | |
| "loss": 0.0935, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "grad_norm": 1.9851264953613281, | |
| "learning_rate": 4.742036787797219e-05, | |
| "loss": 0.0927, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 1.2447096109390259, | |
| "learning_rate": 4.740541348885899e-05, | |
| "loss": 0.0911, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "grad_norm": 1.0151656866073608, | |
| "learning_rate": 4.7390459099745774e-05, | |
| "loss": 0.0932, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "grad_norm": 0.8265299201011658, | |
| "learning_rate": 4.7375504710632574e-05, | |
| "loss": 0.1006, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 2.7819435596466064, | |
| "learning_rate": 4.736055032151937e-05, | |
| "loss": 0.0892, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 1.3706836700439453, | |
| "learning_rate": 4.734559593240616e-05, | |
| "loss": 0.0976, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "grad_norm": 3.606653928756714, | |
| "learning_rate": 4.733064154329296e-05, | |
| "loss": 0.0932, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "grad_norm": 1.3535112142562866, | |
| "learning_rate": 4.731568715417975e-05, | |
| "loss": 0.0917, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "eval_loss": 0.1965586394071579, | |
| "eval_precision": 0.8806825297432687, | |
| "eval_recall": 0.8660673050278641, | |
| "eval_runtime": 303.4486, | |
| "eval_samples_per_second": 44.073, | |
| "eval_steps_per_second": 1.377, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "grad_norm": 1.7558257579803467, | |
| "learning_rate": 4.7300732765066545e-05, | |
| "loss": 0.088, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 2.291628837585449, | |
| "learning_rate": 4.7285778375953345e-05, | |
| "loss": 0.0963, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "grad_norm": 1.4217274188995361, | |
| "learning_rate": 4.727082398684014e-05, | |
| "loss": 0.0969, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 1.8852524757385254, | |
| "learning_rate": 4.725586959772694e-05, | |
| "loss": 0.0952, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "grad_norm": 2.106452465057373, | |
| "learning_rate": 4.724091520861373e-05, | |
| "loss": 0.0966, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "grad_norm": 1.9277011156082153, | |
| "learning_rate": 4.722596081950052e-05, | |
| "loss": 0.089, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "grad_norm": 1.2175403833389282, | |
| "learning_rate": 4.721100643038732e-05, | |
| "loss": 0.0931, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "grad_norm": 2.060368299484253, | |
| "learning_rate": 4.7196052041274115e-05, | |
| "loss": 0.0968, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "grad_norm": 1.4981082677841187, | |
| "learning_rate": 4.718109765216091e-05, | |
| "loss": 0.0929, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 1.6335569620132446, | |
| "learning_rate": 4.716614326304771e-05, | |
| "loss": 0.0938, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "eval_loss": 0.19031907618045807, | |
| "eval_precision": 0.8913960623881361, | |
| "eval_recall": 0.858708704085717, | |
| "eval_runtime": 301.9634, | |
| "eval_samples_per_second": 44.29, | |
| "eval_steps_per_second": 1.384, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "grad_norm": 0.46949952840805054, | |
| "learning_rate": 4.71511888739345e-05, | |
| "loss": 0.09, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "grad_norm": 2.6525633335113525, | |
| "learning_rate": 4.7136234484821294e-05, | |
| "loss": 0.0954, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "grad_norm": 1.2892892360687256, | |
| "learning_rate": 4.7121280095708093e-05, | |
| "loss": 0.0949, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 1.5637331008911133, | |
| "learning_rate": 4.7106325706594886e-05, | |
| "loss": 0.0962, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 2.5609443187713623, | |
| "learning_rate": 4.709137131748168e-05, | |
| "loss": 0.0921, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "grad_norm": 1.4690775871276855, | |
| "learning_rate": 4.707641692836848e-05, | |
| "loss": 0.0955, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "grad_norm": 1.081965684890747, | |
| "learning_rate": 4.706146253925527e-05, | |
| "loss": 0.0928, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 1.6817141771316528, | |
| "learning_rate": 4.704650815014207e-05, | |
| "loss": 0.0963, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "grad_norm": 2.984762191772461, | |
| "learning_rate": 4.7031553761028864e-05, | |
| "loss": 0.095, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "grad_norm": 2.1594882011413574, | |
| "learning_rate": 4.701659937191566e-05, | |
| "loss": 0.0985, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_loss": 0.18151727318763733, | |
| "eval_precision": 0.9042639298086573, | |
| "eval_recall": 0.859940269096955, | |
| "eval_runtime": 302.8985, | |
| "eval_samples_per_second": 44.153, | |
| "eval_steps_per_second": 1.38, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 2.0218722820281982, | |
| "learning_rate": 4.700164498280246e-05, | |
| "loss": 0.0886, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 1.3569700717926025, | |
| "learning_rate": 4.698669059368925e-05, | |
| "loss": 0.0711, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "grad_norm": 1.5697298049926758, | |
| "learning_rate": 4.697173620457604e-05, | |
| "loss": 0.0724, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 1.7853014469146729, | |
| "learning_rate": 4.695678181546284e-05, | |
| "loss": 0.0747, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "grad_norm": 0.7531015872955322, | |
| "learning_rate": 4.6941827426349635e-05, | |
| "loss": 0.074, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 1.3895870447158813, | |
| "learning_rate": 4.692687303723643e-05, | |
| "loss": 0.0683, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "grad_norm": 2.084857225418091, | |
| "learning_rate": 4.691191864812323e-05, | |
| "loss": 0.0741, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "grad_norm": 0.9525838494300842, | |
| "learning_rate": 4.689696425901002e-05, | |
| "loss": 0.0647, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 2.0475118160247803, | |
| "learning_rate": 4.6882009869896813e-05, | |
| "loss": 0.0746, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 1.0650370121002197, | |
| "learning_rate": 4.686705548078361e-05, | |
| "loss": 0.0696, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "eval_loss": 0.19116894900798798, | |
| "eval_precision": 0.9016753284483037, | |
| "eval_recall": 0.8600326364727978, | |
| "eval_runtime": 303.289, | |
| "eval_samples_per_second": 44.097, | |
| "eval_steps_per_second": 1.378, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "grad_norm": 1.5736846923828125, | |
| "learning_rate": 4.6852101091670406e-05, | |
| "loss": 0.0685, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "grad_norm": 0.7526031136512756, | |
| "learning_rate": 4.6837146702557206e-05, | |
| "loss": 0.0816, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 1.284680724143982, | |
| "learning_rate": 4.6822192313444e-05, | |
| "loss": 0.0676, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 4.207923889160156, | |
| "learning_rate": 4.680723792433079e-05, | |
| "loss": 0.0679, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "grad_norm": 1.3670810461044312, | |
| "learning_rate": 4.679228353521759e-05, | |
| "loss": 0.0721, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 1.8094091415405273, | |
| "learning_rate": 4.6777329146104384e-05, | |
| "loss": 0.0673, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "grad_norm": 2.057133436203003, | |
| "learning_rate": 4.676237475699118e-05, | |
| "loss": 0.0711, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "grad_norm": 1.9356772899627686, | |
| "learning_rate": 4.6747420367877976e-05, | |
| "loss": 0.0713, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "grad_norm": 0.4188990592956543, | |
| "learning_rate": 4.673246597876477e-05, | |
| "loss": 0.0772, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "grad_norm": 0.9256879091262817, | |
| "learning_rate": 4.671751158965156e-05, | |
| "loss": 0.0715, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "eval_loss": 0.19474047422409058, | |
| "eval_precision": 0.9012208304190246, | |
| "eval_recall": 0.8727793343391115, | |
| "eval_runtime": 305.0313, | |
| "eval_samples_per_second": 43.845, | |
| "eval_steps_per_second": 1.37, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "grad_norm": 0.890701949596405, | |
| "learning_rate": 4.670255720053836e-05, | |
| "loss": 0.0712, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 1.6164826154708862, | |
| "learning_rate": 4.6687602811425155e-05, | |
| "loss": 0.0772, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "grad_norm": 1.2075903415679932, | |
| "learning_rate": 4.667264842231195e-05, | |
| "loss": 0.0734, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "grad_norm": 0.9141576886177063, | |
| "learning_rate": 4.665769403319875e-05, | |
| "loss": 0.0803, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "grad_norm": 3.0547311305999756, | |
| "learning_rate": 4.664273964408554e-05, | |
| "loss": 0.0688, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 1.1152849197387695, | |
| "learning_rate": 4.662778525497234e-05, | |
| "loss": 0.0703, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "grad_norm": 2.150590181350708, | |
| "learning_rate": 4.661283086585913e-05, | |
| "loss": 0.0745, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "grad_norm": 1.4829721450805664, | |
| "learning_rate": 4.6597876476745926e-05, | |
| "loss": 0.0738, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "grad_norm": 0.6545503735542297, | |
| "learning_rate": 4.6582922087632725e-05, | |
| "loss": 0.0764, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 1.2322636842727661, | |
| "learning_rate": 4.656796769851952e-05, | |
| "loss": 0.0765, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "eval_loss": 0.18639414012432098, | |
| "eval_precision": 0.9072111489223789, | |
| "eval_recall": 0.861849194864374, | |
| "eval_runtime": 301.5834, | |
| "eval_samples_per_second": 44.346, | |
| "eval_steps_per_second": 1.386, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "grad_norm": 1.8931362628936768, | |
| "learning_rate": 4.655301330940631e-05, | |
| "loss": 0.0783, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "grad_norm": 0.7884649038314819, | |
| "learning_rate": 4.653805892029311e-05, | |
| "loss": 0.0718, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "grad_norm": 0.6341440081596375, | |
| "learning_rate": 4.6523104531179904e-05, | |
| "loss": 0.0698, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.9098210334777832, | |
| "learning_rate": 4.6508150142066697e-05, | |
| "loss": 0.071, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 3.0700671672821045, | |
| "learning_rate": 4.6493195752953496e-05, | |
| "loss": 0.0552, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "grad_norm": 1.5736912488937378, | |
| "learning_rate": 4.647824136384029e-05, | |
| "loss": 0.055, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "grad_norm": 0.9347396492958069, | |
| "learning_rate": 4.646328697472708e-05, | |
| "loss": 0.0592, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 1.7453091144561768, | |
| "learning_rate": 4.644833258561388e-05, | |
| "loss": 0.0623, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "grad_norm": 1.1539710760116577, | |
| "learning_rate": 4.6433378196500674e-05, | |
| "loss": 0.0558, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "grad_norm": 0.7530619502067566, | |
| "learning_rate": 4.641842380738747e-05, | |
| "loss": 0.0546, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "eval_loss": 0.2078467607498169, | |
| "eval_precision": 0.908101688386724, | |
| "eval_recall": 0.8710551433233782, | |
| "eval_runtime": 302.902, | |
| "eval_samples_per_second": 44.153, | |
| "eval_steps_per_second": 1.38, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "grad_norm": 1.6339865922927856, | |
| "learning_rate": 4.640346941827427e-05, | |
| "loss": 0.0579, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "grad_norm": 2.397862434387207, | |
| "learning_rate": 4.638851502916106e-05, | |
| "loss": 0.054, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "grad_norm": 2.5979652404785156, | |
| "learning_rate": 4.637356064004786e-05, | |
| "loss": 0.0582, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "grad_norm": 1.4249415397644043, | |
| "learning_rate": 4.635860625093465e-05, | |
| "loss": 0.0611, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "grad_norm": 1.1104274988174438, | |
| "learning_rate": 4.6343651861821445e-05, | |
| "loss": 0.0603, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 1.039832353591919, | |
| "learning_rate": 4.6328697472708245e-05, | |
| "loss": 0.06, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "grad_norm": 1.1284308433532715, | |
| "learning_rate": 4.631374308359504e-05, | |
| "loss": 0.0528, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "grad_norm": 3.3189823627471924, | |
| "learning_rate": 4.629878869448183e-05, | |
| "loss": 0.0634, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "grad_norm": 2.0465550422668457, | |
| "learning_rate": 4.628383430536863e-05, | |
| "loss": 0.0599, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "grad_norm": 1.93597412109375, | |
| "learning_rate": 4.626887991625542e-05, | |
| "loss": 0.0588, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "eval_loss": 0.20041726529598236, | |
| "eval_precision": 0.9101642057026477, | |
| "eval_recall": 0.8805997721604729, | |
| "eval_runtime": 302.521, | |
| "eval_samples_per_second": 44.209, | |
| "eval_steps_per_second": 1.382, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "grad_norm": 2.2025020122528076, | |
| "learning_rate": 4.6253925527142216e-05, | |
| "loss": 0.0557, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "grad_norm": 2.4900927543640137, | |
| "learning_rate": 4.6238971138029016e-05, | |
| "loss": 0.0613, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "grad_norm": 1.2546288967132568, | |
| "learning_rate": 4.622401674891581e-05, | |
| "loss": 0.0609, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 1.3969674110412598, | |
| "learning_rate": 4.62090623598026e-05, | |
| "loss": 0.0617, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "grad_norm": 0.2969658374786377, | |
| "learning_rate": 4.61941079706894e-05, | |
| "loss": 0.0602, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "grad_norm": 0.7388882040977478, | |
| "learning_rate": 4.6179153581576194e-05, | |
| "loss": 0.0593, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "grad_norm": 0.609923779964447, | |
| "learning_rate": 4.6164199192462994e-05, | |
| "loss": 0.0596, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 2.3986215591430664, | |
| "learning_rate": 4.614924480334979e-05, | |
| "loss": 0.0651, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "grad_norm": 1.1203041076660156, | |
| "learning_rate": 4.613429041423658e-05, | |
| "loss": 0.0649, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "grad_norm": 0.7929214835166931, | |
| "learning_rate": 4.611933602512338e-05, | |
| "loss": 0.0648, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "eval_loss": 0.19321496784687042, | |
| "eval_precision": 0.9163062916598927, | |
| "eval_recall": 0.8676683395424736, | |
| "eval_runtime": 301.2643, | |
| "eval_samples_per_second": 44.393, | |
| "eval_steps_per_second": 1.387, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "grad_norm": 0.5828276872634888, | |
| "learning_rate": 4.610438163601017e-05, | |
| "loss": 0.058, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "grad_norm": 0.44025149941444397, | |
| "learning_rate": 4.6089427246896965e-05, | |
| "loss": 0.0598, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "grad_norm": 0.7976229786872864, | |
| "learning_rate": 4.6074472857783765e-05, | |
| "loss": 0.0655, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "grad_norm": 2.6843769550323486, | |
| "learning_rate": 4.605951846867056e-05, | |
| "loss": 0.0588, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 1.1365008354187012, | |
| "learning_rate": 4.604456407955735e-05, | |
| "loss": 0.0563, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "grad_norm": 2.463488817214966, | |
| "learning_rate": 4.602960969044415e-05, | |
| "loss": 0.0581, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "grad_norm": 0.47716620564460754, | |
| "learning_rate": 4.601465530133094e-05, | |
| "loss": 0.0595, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 1.3218754529953003, | |
| "learning_rate": 4.5999700912217736e-05, | |
| "loss": 0.0554, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 1.0640392303466797, | |
| "learning_rate": 4.5984746523104536e-05, | |
| "loss": 0.0409, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "grad_norm": 0.7323993444442749, | |
| "learning_rate": 4.596979213399133e-05, | |
| "loss": 0.0463, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "eval_loss": 0.21357020735740662, | |
| "eval_precision": 0.9223724947042529, | |
| "eval_recall": 0.8714246128267495, | |
| "eval_runtime": 301.9271, | |
| "eval_samples_per_second": 44.295, | |
| "eval_steps_per_second": 1.384, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "grad_norm": 2.1960983276367188, | |
| "learning_rate": 4.595483774487813e-05, | |
| "loss": 0.0424, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "grad_norm": 2.5061357021331787, | |
| "learning_rate": 4.593988335576492e-05, | |
| "loss": 0.0436, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 0.5249370336532593, | |
| "learning_rate": 4.5924928966651714e-05, | |
| "loss": 0.0537, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "grad_norm": 1.0211517810821533, | |
| "learning_rate": 4.5909974577538514e-05, | |
| "loss": 0.0448, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "grad_norm": 2.860835552215576, | |
| "learning_rate": 4.58950201884253e-05, | |
| "loss": 0.0474, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "grad_norm": 2.019699811935425, | |
| "learning_rate": 4.58800657993121e-05, | |
| "loss": 0.0482, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "grad_norm": 0.9144898653030396, | |
| "learning_rate": 4.58651114101989e-05, | |
| "loss": 0.045, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "grad_norm": 1.656792402267456, | |
| "learning_rate": 4.585015702108569e-05, | |
| "loss": 0.0475, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "grad_norm": 1.1702663898468018, | |
| "learning_rate": 4.5835202631972485e-05, | |
| "loss": 0.0445, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "grad_norm": 2.0331854820251465, | |
| "learning_rate": 4.5820248242859284e-05, | |
| "loss": 0.0429, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "eval_loss": 0.22609786689281464, | |
| "eval_precision": 0.9198246970868781, | |
| "eval_recall": 0.8788447920194588, | |
| "eval_runtime": 302.1631, | |
| "eval_samples_per_second": 44.261, | |
| "eval_steps_per_second": 1.383, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 5.98319673538208, | |
| "learning_rate": 4.580529385374608e-05, | |
| "loss": 0.0429, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "grad_norm": 1.0793452262878418, | |
| "learning_rate": 4.579033946463287e-05, | |
| "loss": 0.0525, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "grad_norm": 1.4804214239120483, | |
| "learning_rate": 4.577538507551967e-05, | |
| "loss": 0.0459, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "grad_norm": 0.9862244129180908, | |
| "learning_rate": 4.576043068640646e-05, | |
| "loss": 0.0534, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "grad_norm": 1.26304030418396, | |
| "learning_rate": 4.574547629729326e-05, | |
| "loss": 0.048, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "grad_norm": 0.4214903712272644, | |
| "learning_rate": 4.573052190818005e-05, | |
| "loss": 0.0547, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "grad_norm": 0.9271091222763062, | |
| "learning_rate": 4.571556751906685e-05, | |
| "loss": 0.0537, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "grad_norm": 0.8437818884849548, | |
| "learning_rate": 4.570061312995365e-05, | |
| "loss": 0.0537, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.8551807999610901, | |
| "learning_rate": 4.5685658740840434e-05, | |
| "loss": 0.0461, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "grad_norm": 1.8268975019454956, | |
| "learning_rate": 4.5670704351727234e-05, | |
| "loss": 0.046, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "eval_loss": 0.20938238501548767, | |
| "eval_precision": 0.9151901573163308, | |
| "eval_recall": 0.8794605745250778, | |
| "eval_runtime": 302.034, | |
| "eval_samples_per_second": 44.28, | |
| "eval_steps_per_second": 1.384, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "grad_norm": 0.08975500613451004, | |
| "learning_rate": 4.565574996261403e-05, | |
| "loss": 0.0493, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 2.3698606491088867, | |
| "learning_rate": 4.564079557350082e-05, | |
| "loss": 0.0506, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "grad_norm": 1.1118419170379639, | |
| "learning_rate": 4.562584118438762e-05, | |
| "loss": 0.0445, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "grad_norm": 1.8186097145080566, | |
| "learning_rate": 4.561088679527442e-05, | |
| "loss": 0.0471, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 1.4056422710418701, | |
| "learning_rate": 4.559593240616121e-05, | |
| "loss": 0.0513, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "grad_norm": 1.5597076416015625, | |
| "learning_rate": 4.5580978017048004e-05, | |
| "loss": 0.0452, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 0.8287553191184998, | |
| "learning_rate": 4.5566023627934804e-05, | |
| "loss": 0.0523, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "grad_norm": 0.6897550821304321, | |
| "learning_rate": 4.55510692388216e-05, | |
| "loss": 0.0466, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "grad_norm": 0.7071977853775024, | |
| "learning_rate": 4.553611484970839e-05, | |
| "loss": 0.0434, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "grad_norm": 0.6574975252151489, | |
| "learning_rate": 4.552116046059518e-05, | |
| "loss": 0.0495, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_loss": 0.20542754232883453, | |
| "eval_precision": 0.9183409556852231, | |
| "eval_recall": 0.8964561716801626, | |
| "eval_runtime": 302.3305, | |
| "eval_samples_per_second": 44.236, | |
| "eval_steps_per_second": 1.383, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 1.3489534854888916, | |
| "learning_rate": 4.550620607148198e-05, | |
| "loss": 0.0499, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 1.0300263166427612, | |
| "learning_rate": 4.549125168236878e-05, | |
| "loss": 0.0353, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "grad_norm": 0.4393318295478821, | |
| "learning_rate": 4.547629729325557e-05, | |
| "loss": 0.0352, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "grad_norm": 0.4519498944282532, | |
| "learning_rate": 4.546134290414237e-05, | |
| "loss": 0.0342, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 0.9631327986717224, | |
| "learning_rate": 4.544638851502917e-05, | |
| "loss": 0.0364, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "grad_norm": 2.7282943725585938, | |
| "learning_rate": 4.5431434125915954e-05, | |
| "loss": 0.0354, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "grad_norm": 0.5908452272415161, | |
| "learning_rate": 4.541647973680275e-05, | |
| "loss": 0.0356, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "grad_norm": 2.3660802841186523, | |
| "learning_rate": 4.540152534768955e-05, | |
| "loss": 0.0413, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 1.7346217632293701, | |
| "learning_rate": 4.5386570958576346e-05, | |
| "loss": 0.036, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 1.0829362869262695, | |
| "learning_rate": 4.537161656946314e-05, | |
| "loss": 0.0376, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "eval_loss": 0.226752370595932, | |
| "eval_precision": 0.925325841962565, | |
| "eval_recall": 0.8721635518334924, | |
| "eval_runtime": 302.3165, | |
| "eval_samples_per_second": 44.238, | |
| "eval_steps_per_second": 1.383, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "grad_norm": 1.2249701023101807, | |
| "learning_rate": 4.535666218034993e-05, | |
| "loss": 0.039, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 2.201986789703369, | |
| "learning_rate": 4.534170779123673e-05, | |
| "loss": 0.0384, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 0.31157541275024414, | |
| "learning_rate": 4.5326753402123524e-05, | |
| "loss": 0.0318, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "grad_norm": 0.7502834796905518, | |
| "learning_rate": 4.531179901301032e-05, | |
| "loss": 0.0397, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "grad_norm": 0.3627040684223175, | |
| "learning_rate": 4.529684462389712e-05, | |
| "loss": 0.0389, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "grad_norm": 2.008009672164917, | |
| "learning_rate": 4.5281890234783916e-05, | |
| "loss": 0.042, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "grad_norm": 2.5352540016174316, | |
| "learning_rate": 4.52669358456707e-05, | |
| "loss": 0.0407, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "grad_norm": 0.543992280960083, | |
| "learning_rate": 4.52519814565575e-05, | |
| "loss": 0.0309, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "grad_norm": 1.3150848150253296, | |
| "learning_rate": 4.52370270674443e-05, | |
| "loss": 0.0369, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "grad_norm": 1.6026105880737305, | |
| "learning_rate": 4.522207267833109e-05, | |
| "loss": 0.0418, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "eval_loss": 0.21585828065872192, | |
| "eval_precision": 0.9208557844690967, | |
| "eval_recall": 0.8945164567874627, | |
| "eval_runtime": 303.0508, | |
| "eval_samples_per_second": 44.131, | |
| "eval_steps_per_second": 1.379, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 1.8489359617233276, | |
| "learning_rate": 4.520711828921789e-05, | |
| "loss": 0.0427, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "grad_norm": 2.4979922771453857, | |
| "learning_rate": 4.519216390010468e-05, | |
| "loss": 0.0337, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "grad_norm": 0.3452712595462799, | |
| "learning_rate": 4.517720951099148e-05, | |
| "loss": 0.0347, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "grad_norm": 1.081455945968628, | |
| "learning_rate": 4.516225512187827e-05, | |
| "loss": 0.047, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "grad_norm": 2.3087069988250732, | |
| "learning_rate": 4.5147300732765066e-05, | |
| "loss": 0.0404, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "grad_norm": 1.901135802268982, | |
| "learning_rate": 4.5132346343651865e-05, | |
| "loss": 0.0394, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "grad_norm": 1.2389637231826782, | |
| "learning_rate": 4.511739195453866e-05, | |
| "loss": 0.0376, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "grad_norm": 0.619143545627594, | |
| "learning_rate": 4.510243756542545e-05, | |
| "loss": 0.0414, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 1.3270721435546875, | |
| "learning_rate": 4.508748317631225e-05, | |
| "loss": 0.0405, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "grad_norm": 2.503606081008911, | |
| "learning_rate": 4.507252878719905e-05, | |
| "loss": 0.0493, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "eval_loss": 0.20709815621376038, | |
| "eval_precision": 0.9246134231259603, | |
| "eval_recall": 0.8708088303211305, | |
| "eval_runtime": 301.957, | |
| "eval_samples_per_second": 44.291, | |
| "eval_steps_per_second": 1.384, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 0.6343371868133545, | |
| "learning_rate": 4.505757439808584e-05, | |
| "loss": 0.0365, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "grad_norm": 0.3116106688976288, | |
| "learning_rate": 4.5042620008972636e-05, | |
| "loss": 0.0358, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 0.7307326197624207, | |
| "learning_rate": 4.5027665619859436e-05, | |
| "loss": 0.0411, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "grad_norm": 2.104717493057251, | |
| "learning_rate": 4.501271123074622e-05, | |
| "loss": 0.0401, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 3.8659448623657227, | |
| "learning_rate": 4.499775684163302e-05, | |
| "loss": 0.0348, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "grad_norm": 1.0324366092681885, | |
| "learning_rate": 4.4982802452519815e-05, | |
| "loss": 0.0344, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "grad_norm": 1.0838052034378052, | |
| "learning_rate": 4.4967848063406614e-05, | |
| "loss": 0.0327, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "grad_norm": 1.8709659576416016, | |
| "learning_rate": 4.495289367429341e-05, | |
| "loss": 0.0267, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "grad_norm": 0.4261041283607483, | |
| "learning_rate": 4.49379392851802e-05, | |
| "loss": 0.0305, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "grad_norm": 0.16497644782066345, | |
| "learning_rate": 4.4922984896067e-05, | |
| "loss": 0.0276, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "eval_loss": 0.2343963235616684, | |
| "eval_precision": 0.9252133285746731, | |
| "eval_recall": 0.8779826965115921, | |
| "eval_runtime": 301.9423, | |
| "eval_samples_per_second": 44.293, | |
| "eval_steps_per_second": 1.384, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "grad_norm": 2.9655115604400635, | |
| "learning_rate": 4.490803050695379e-05, | |
| "loss": 0.0268, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "grad_norm": 1.536979079246521, | |
| "learning_rate": 4.4893076117840586e-05, | |
| "loss": 0.0299, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "grad_norm": 2.8167715072631836, | |
| "learning_rate": 4.4878121728727385e-05, | |
| "loss": 0.0325, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "grad_norm": 2.1207668781280518, | |
| "learning_rate": 4.4863167339614185e-05, | |
| "loss": 0.029, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "grad_norm": 2.277759552001953, | |
| "learning_rate": 4.484821295050097e-05, | |
| "loss": 0.0308, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "grad_norm": 1.226417899131775, | |
| "learning_rate": 4.483325856138777e-05, | |
| "loss": 0.0299, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "grad_norm": 0.63482266664505, | |
| "learning_rate": 4.4818304172274563e-05, | |
| "loss": 0.0337, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "grad_norm": 1.8453493118286133, | |
| "learning_rate": 4.4803349783161356e-05, | |
| "loss": 0.0346, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "grad_norm": 0.40149375796318054, | |
| "learning_rate": 4.4788395394048156e-05, | |
| "loss": 0.03, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "grad_norm": 0.3980793058872223, | |
| "learning_rate": 4.477344100493495e-05, | |
| "loss": 0.035, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "eval_loss": 0.22229593992233276, | |
| "eval_precision": 0.9262946269334285, | |
| "eval_recall": 0.8795221527756396, | |
| "eval_runtime": 302.9773, | |
| "eval_samples_per_second": 44.142, | |
| "eval_steps_per_second": 1.38, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "grad_norm": 0.629266083240509, | |
| "learning_rate": 4.475848661582174e-05, | |
| "loss": 0.0363, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "grad_norm": 1.134805679321289, | |
| "learning_rate": 4.474353222670854e-05, | |
| "loss": 0.0343, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "grad_norm": 1.9168953895568848, | |
| "learning_rate": 4.4728577837595334e-05, | |
| "loss": 0.0333, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "grad_norm": 0.7437408566474915, | |
| "learning_rate": 4.4713623448482134e-05, | |
| "loss": 0.0377, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "grad_norm": 0.8649216890335083, | |
| "learning_rate": 4.469866905936893e-05, | |
| "loss": 0.0387, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "grad_norm": 1.9679126739501953, | |
| "learning_rate": 4.468371467025572e-05, | |
| "loss": 0.0324, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "grad_norm": 1.0343681573867798, | |
| "learning_rate": 4.466876028114252e-05, | |
| "loss": 0.0371, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 0.3291555941104889, | |
| "learning_rate": 4.465380589202931e-05, | |
| "loss": 0.0339, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "grad_norm": 1.2407808303833008, | |
| "learning_rate": 4.4638851502916105e-05, | |
| "loss": 0.0376, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "grad_norm": 1.2906955480575562, | |
| "learning_rate": 4.4623897113802905e-05, | |
| "loss": 0.0348, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "eval_loss": 0.22172214090824127, | |
| "eval_precision": 0.9251365945617791, | |
| "eval_recall": 0.8914683333846486, | |
| "eval_runtime": 302.63, | |
| "eval_samples_per_second": 44.193, | |
| "eval_steps_per_second": 1.381, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 0.9678496718406677, | |
| "learning_rate": 4.46089427246897e-05, | |
| "loss": 0.0354, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "grad_norm": 1.92240571975708, | |
| "learning_rate": 4.459398833557649e-05, | |
| "loss": 0.0324, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "grad_norm": 2.5916824340820312, | |
| "learning_rate": 4.457903394646329e-05, | |
| "loss": 0.034, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "grad_norm": 1.4677050113677979, | |
| "learning_rate": 4.456407955735008e-05, | |
| "loss": 0.0304, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "grad_norm": 1.1423336267471313, | |
| "learning_rate": 4.4549125168236876e-05, | |
| "loss": 0.0315, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "grad_norm": 1.0664762258529663, | |
| "learning_rate": 4.4534170779123676e-05, | |
| "loss": 0.0371, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "grad_norm": 1.344557762145996, | |
| "learning_rate": 4.451921639001047e-05, | |
| "loss": 0.0334, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 2.944450616836548, | |
| "learning_rate": 4.450426200089727e-05, | |
| "loss": 0.0312, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 1.02321195602417, | |
| "learning_rate": 4.448930761178406e-05, | |
| "loss": 0.0243, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "grad_norm": 1.4520535469055176, | |
| "learning_rate": 4.4474353222670854e-05, | |
| "loss": 0.0263, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "eval_loss": 0.23973342776298523, | |
| "eval_precision": 0.928783958602846, | |
| "eval_recall": 0.8842020998183442, | |
| "eval_runtime": 302.1259, | |
| "eval_samples_per_second": 44.266, | |
| "eval_steps_per_second": 1.384, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "grad_norm": 0.9927899837493896, | |
| "learning_rate": 4.4459398833557654e-05, | |
| "loss": 0.0251, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "grad_norm": 0.7255445122718811, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.023, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "grad_norm": 1.2551404237747192, | |
| "learning_rate": 4.442949005533124e-05, | |
| "loss": 0.0282, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "grad_norm": 1.8652236461639404, | |
| "learning_rate": 4.441453566621804e-05, | |
| "loss": 0.0265, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "grad_norm": 0.29598140716552734, | |
| "learning_rate": 4.439958127710483e-05, | |
| "loss": 0.0231, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "grad_norm": 0.517977774143219, | |
| "learning_rate": 4.4384626887991625e-05, | |
| "loss": 0.0266, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "grad_norm": 1.3159215450286865, | |
| "learning_rate": 4.4369672498878425e-05, | |
| "loss": 0.0246, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "grad_norm": 1.8311362266540527, | |
| "learning_rate": 4.435471810976522e-05, | |
| "loss": 0.0325, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "grad_norm": 2.8861258029937744, | |
| "learning_rate": 4.433976372065201e-05, | |
| "loss": 0.0303, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "grad_norm": 0.6612695455551147, | |
| "learning_rate": 4.432480933153881e-05, | |
| "loss": 0.0284, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "eval_loss": 0.23250487446784973, | |
| "eval_precision": 0.9248716302952503, | |
| "eval_recall": 0.8873118014717202, | |
| "eval_runtime": 302.5481, | |
| "eval_samples_per_second": 44.205, | |
| "eval_steps_per_second": 1.382, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "grad_norm": 0.8181266784667969, | |
| "learning_rate": 4.43098549424256e-05, | |
| "loss": 0.0251, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "grad_norm": 0.48834991455078125, | |
| "learning_rate": 4.42949005533124e-05, | |
| "loss": 0.0313, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "grad_norm": 0.4897523820400238, | |
| "learning_rate": 4.4279946164199195e-05, | |
| "loss": 0.0328, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "grad_norm": 0.7222294807434082, | |
| "learning_rate": 4.426499177508599e-05, | |
| "loss": 0.0298, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 11.51, | |
| "grad_norm": 0.07086914777755737, | |
| "learning_rate": 4.425003738597279e-05, | |
| "loss": 0.032, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "grad_norm": 1.4812002182006836, | |
| "learning_rate": 4.423508299685958e-05, | |
| "loss": 0.0282, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 11.57, | |
| "grad_norm": 1.302590012550354, | |
| "learning_rate": 4.4220128607746374e-05, | |
| "loss": 0.027, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 1.9532426595687866, | |
| "learning_rate": 4.420517421863317e-05, | |
| "loss": 0.0304, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "grad_norm": 2.029754638671875, | |
| "learning_rate": 4.4190219829519966e-05, | |
| "loss": 0.0301, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "grad_norm": 1.320448398590088, | |
| "learning_rate": 4.417526544040676e-05, | |
| "loss": 0.0277, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "eval_loss": 0.241913303732872, | |
| "eval_precision": 0.9234215627085253, | |
| "eval_recall": 0.8947627697897103, | |
| "eval_runtime": 303.3685, | |
| "eval_samples_per_second": 44.085, | |
| "eval_steps_per_second": 1.378, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "grad_norm": 1.8267722129821777, | |
| "learning_rate": 4.416031105129356e-05, | |
| "loss": 0.0249, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "grad_norm": 0.7122277021408081, | |
| "learning_rate": 4.414535666218035e-05, | |
| "loss": 0.0323, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "grad_norm": 0.5691227316856384, | |
| "learning_rate": 4.4130402273067145e-05, | |
| "loss": 0.0325, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "grad_norm": 0.40894216299057007, | |
| "learning_rate": 4.4115447883953944e-05, | |
| "loss": 0.0301, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "grad_norm": 2.4805972576141357, | |
| "learning_rate": 4.410049349484074e-05, | |
| "loss": 0.0277, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "grad_norm": 1.2774219512939453, | |
| "learning_rate": 4.408553910572754e-05, | |
| "loss": 0.0278, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "grad_norm": 1.267562985420227, | |
| "learning_rate": 4.407058471661433e-05, | |
| "loss": 0.0286, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "grad_norm": 0.6910821795463562, | |
| "learning_rate": 4.405563032750112e-05, | |
| "loss": 0.0344, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "grad_norm": 0.3539283275604248, | |
| "learning_rate": 4.404067593838792e-05, | |
| "loss": 0.0298, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "grad_norm": 1.7098407745361328, | |
| "learning_rate": 4.4025721549274715e-05, | |
| "loss": 0.0318, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "eval_loss": 0.23493793606758118, | |
| "eval_precision": 0.9253437490076529, | |
| "eval_recall": 0.8971951106869054, | |
| "eval_runtime": 302.3541, | |
| "eval_samples_per_second": 44.233, | |
| "eval_steps_per_second": 1.382, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "grad_norm": 2.1748311519622803, | |
| "learning_rate": 4.401076716016151e-05, | |
| "loss": 0.0312, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 0.8426460027694702, | |
| "learning_rate": 4.399581277104831e-05, | |
| "loss": 0.0262, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "grad_norm": 0.3200826048851013, | |
| "learning_rate": 4.39808583819351e-05, | |
| "loss": 0.0237, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "grad_norm": 0.2708234488964081, | |
| "learning_rate": 4.3965903992821893e-05, | |
| "loss": 0.0229, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "grad_norm": 1.4237157106399536, | |
| "learning_rate": 4.395094960370869e-05, | |
| "loss": 0.0198, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "grad_norm": 0.06805676221847534, | |
| "learning_rate": 4.3935995214595486e-05, | |
| "loss": 0.026, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 1.2842926979064941, | |
| "learning_rate": 4.392104082548228e-05, | |
| "loss": 0.0241, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "grad_norm": 1.5190855264663696, | |
| "learning_rate": 4.390608643636908e-05, | |
| "loss": 0.0232, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "grad_norm": 1.8280004262924194, | |
| "learning_rate": 4.389113204725587e-05, | |
| "loss": 0.0241, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "grad_norm": 0.19059352576732635, | |
| "learning_rate": 4.3876177658142664e-05, | |
| "loss": 0.0238, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "eval_loss": 0.24695585668087006, | |
| "eval_precision": 0.9256610729722858, | |
| "eval_recall": 0.8967332738076911, | |
| "eval_runtime": 302.2734, | |
| "eval_samples_per_second": 44.245, | |
| "eval_steps_per_second": 1.383, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "grad_norm": 0.40746474266052246, | |
| "learning_rate": 4.3861223269029464e-05, | |
| "loss": 0.0232, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 1.2412996292114258, | |
| "learning_rate": 4.384626887991626e-05, | |
| "loss": 0.0215, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "grad_norm": 0.2166558802127838, | |
| "learning_rate": 4.3831314490803056e-05, | |
| "loss": 0.0237, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "grad_norm": 0.719872236251831, | |
| "learning_rate": 4.381636010168985e-05, | |
| "loss": 0.0253, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "grad_norm": 1.5946626663208008, | |
| "learning_rate": 4.380140571257664e-05, | |
| "loss": 0.0235, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "grad_norm": 1.0119950771331787, | |
| "learning_rate": 4.378645132346344e-05, | |
| "loss": 0.0257, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "grad_norm": 0.9327923059463501, | |
| "learning_rate": 4.377149693435023e-05, | |
| "loss": 0.0243, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 0.41256028413772583, | |
| "learning_rate": 4.375654254523703e-05, | |
| "loss": 0.0272, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "grad_norm": 0.1845785677433014, | |
| "learning_rate": 4.374158815612383e-05, | |
| "loss": 0.029, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "grad_norm": 1.754239559173584, | |
| "learning_rate": 4.372663376701062e-05, | |
| "loss": 0.0252, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "eval_loss": 0.2473253309726715, | |
| "eval_precision": 0.9269791733010636, | |
| "eval_recall": 0.8962406478031959, | |
| "eval_runtime": 304.5592, | |
| "eval_samples_per_second": 43.913, | |
| "eval_steps_per_second": 1.372, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "grad_norm": 0.5748271346092224, | |
| "learning_rate": 4.371167937789741e-05, | |
| "loss": 0.0281, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "grad_norm": 0.36274582147598267, | |
| "learning_rate": 4.369672498878421e-05, | |
| "loss": 0.0248, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 12.65, | |
| "grad_norm": 0.6130300164222717, | |
| "learning_rate": 4.3681770599671006e-05, | |
| "loss": 0.0269, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "grad_norm": 1.2477418184280396, | |
| "learning_rate": 4.36668162105578e-05, | |
| "loss": 0.0259, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "grad_norm": 0.8152483701705933, | |
| "learning_rate": 4.36518618214446e-05, | |
| "loss": 0.0263, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "grad_norm": 0.04731460288167, | |
| "learning_rate": 4.363690743233139e-05, | |
| "loss": 0.024, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "grad_norm": 0.7886996865272522, | |
| "learning_rate": 4.362195304321819e-05, | |
| "loss": 0.0245, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 2.1900315284729004, | |
| "learning_rate": 4.360699865410498e-05, | |
| "loss": 0.0292, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "grad_norm": 0.45924192667007446, | |
| "learning_rate": 4.3592044264991777e-05, | |
| "loss": 0.0261, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "grad_norm": 0.07307754456996918, | |
| "learning_rate": 4.3577089875878576e-05, | |
| "loss": 0.0248, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "eval_loss": 0.24504822492599487, | |
| "eval_precision": 0.9273960876319711, | |
| "eval_recall": 0.9006127035930909, | |
| "eval_runtime": 303.9567, | |
| "eval_samples_per_second": 44.0, | |
| "eval_steps_per_second": 1.375, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "grad_norm": 0.4676400423049927, | |
| "learning_rate": 4.356213548676536e-05, | |
| "loss": 0.0232, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "grad_norm": 0.2993585765361786, | |
| "learning_rate": 4.354718109765216e-05, | |
| "loss": 0.0237, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "grad_norm": 1.226276159286499, | |
| "learning_rate": 4.353222670853896e-05, | |
| "loss": 0.0256, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "grad_norm": 1.5110477209091187, | |
| "learning_rate": 4.3517272319425754e-05, | |
| "loss": 0.0285, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 1.6162513494491577, | |
| "learning_rate": 4.350231793031255e-05, | |
| "loss": 0.0219, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.1792839914560318, | |
| "learning_rate": 4.348736354119935e-05, | |
| "loss": 0.0191, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 13.07, | |
| "grad_norm": 1.9044649600982666, | |
| "learning_rate": 4.347240915208614e-05, | |
| "loss": 0.017, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "grad_norm": 0.5899202823638916, | |
| "learning_rate": 4.345745476297293e-05, | |
| "loss": 0.0241, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "grad_norm": 0.6521077752113342, | |
| "learning_rate": 4.344250037385973e-05, | |
| "loss": 0.0216, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "grad_norm": 0.7596339583396912, | |
| "learning_rate": 4.3427545984746525e-05, | |
| "loss": 0.0181, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "eval_loss": 0.2613174319267273, | |
| "eval_precision": 0.9276514907592247, | |
| "eval_recall": 0.8870654884694725, | |
| "eval_runtime": 304.3764, | |
| "eval_samples_per_second": 43.939, | |
| "eval_steps_per_second": 1.373, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 13.19, | |
| "grad_norm": 1.0404387712478638, | |
| "learning_rate": 4.3412591595633325e-05, | |
| "loss": 0.0247, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "grad_norm": 1.7849115133285522, | |
| "learning_rate": 4.339763720652011e-05, | |
| "loss": 0.0188, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "grad_norm": 1.0972092151641846, | |
| "learning_rate": 4.338268281740691e-05, | |
| "loss": 0.0255, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 0.7391771078109741, | |
| "learning_rate": 4.336772842829371e-05, | |
| "loss": 0.0225, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "grad_norm": 1.5010148286819458, | |
| "learning_rate": 4.3352774039180497e-05, | |
| "loss": 0.0217, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "grad_norm": 0.7189137935638428, | |
| "learning_rate": 4.3337819650067296e-05, | |
| "loss": 0.0211, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "grad_norm": 1.003636121749878, | |
| "learning_rate": 4.3322865260954096e-05, | |
| "loss": 0.0236, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "grad_norm": 0.914703369140625, | |
| "learning_rate": 4.330791087184089e-05, | |
| "loss": 0.0224, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "grad_norm": 0.1861487776041031, | |
| "learning_rate": 4.329295648272768e-05, | |
| "loss": 0.0251, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "grad_norm": 0.7734150886535645, | |
| "learning_rate": 4.327800209361448e-05, | |
| "loss": 0.0254, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "eval_loss": 0.2583397924900055, | |
| "eval_precision": 0.9213451745124829, | |
| "eval_recall": 0.9135441362110902, | |
| "eval_runtime": 305.1941, | |
| "eval_samples_per_second": 43.821, | |
| "eval_steps_per_second": 1.37, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "grad_norm": 0.7596560716629028, | |
| "learning_rate": 4.3263047704501274e-05, | |
| "loss": 0.0246, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "grad_norm": 1.4200429916381836, | |
| "learning_rate": 4.324809331538807e-05, | |
| "loss": 0.0174, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "grad_norm": 2.7082788944244385, | |
| "learning_rate": 4.323313892627486e-05, | |
| "loss": 0.026, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "grad_norm": 1.2132717370986938, | |
| "learning_rate": 4.321818453716166e-05, | |
| "loss": 0.0228, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 13.61, | |
| "grad_norm": 3.768927812576294, | |
| "learning_rate": 4.320323014804846e-05, | |
| "loss": 0.0236, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "grad_norm": 1.5163260698318481, | |
| "learning_rate": 4.3188275758935245e-05, | |
| "loss": 0.0189, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "grad_norm": 0.7969369888305664, | |
| "learning_rate": 4.3173321369822045e-05, | |
| "loss": 0.0245, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 1.445375680923462, | |
| "learning_rate": 4.3158366980708845e-05, | |
| "loss": 0.0232, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "grad_norm": 0.04813400283455849, | |
| "learning_rate": 4.314341259159563e-05, | |
| "loss": 0.0215, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "grad_norm": 2.0303447246551514, | |
| "learning_rate": 4.312845820248243e-05, | |
| "loss": 0.0206, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "eval_loss": 0.2769757807254791, | |
| "eval_precision": 0.9277020832674738, | |
| "eval_recall": 0.9035376704947813, | |
| "eval_runtime": 304.0355, | |
| "eval_samples_per_second": 43.988, | |
| "eval_steps_per_second": 1.375, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "grad_norm": 0.9254265427589417, | |
| "learning_rate": 4.311350381336923e-05, | |
| "loss": 0.0203, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 2.1310763359069824, | |
| "learning_rate": 4.309854942425602e-05, | |
| "loss": 0.0206, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "grad_norm": 0.5353107452392578, | |
| "learning_rate": 4.3083595035142816e-05, | |
| "loss": 0.0206, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "grad_norm": 0.9395775198936462, | |
| "learning_rate": 4.306864064602961e-05, | |
| "loss": 0.0304, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "grad_norm": 0.056145694106817245, | |
| "learning_rate": 4.305368625691641e-05, | |
| "loss": 0.0237, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "grad_norm": 0.03264997899532318, | |
| "learning_rate": 4.30387318678032e-05, | |
| "loss": 0.0244, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "grad_norm": 1.6055926084518433, | |
| "learning_rate": 4.3023777478689994e-05, | |
| "loss": 0.0224, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 1.4891152381896973, | |
| "learning_rate": 4.3008823089576794e-05, | |
| "loss": 0.021, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "grad_norm": 0.3057061731815338, | |
| "learning_rate": 4.299386870046359e-05, | |
| "loss": 0.0173, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "grad_norm": 1.0254565477371216, | |
| "learning_rate": 4.297891431135038e-05, | |
| "loss": 0.017, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "eval_loss": 0.2714207172393799, | |
| "eval_precision": 0.9283886660138359, | |
| "eval_recall": 0.9048923920071431, | |
| "eval_runtime": 302.2817, | |
| "eval_samples_per_second": 44.244, | |
| "eval_steps_per_second": 1.383, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "grad_norm": 0.6178631782531738, | |
| "learning_rate": 4.296395992223718e-05, | |
| "loss": 0.021, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "grad_norm": 3.516096353530884, | |
| "learning_rate": 4.294900553312398e-05, | |
| "loss": 0.0181, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 0.20362690091133118, | |
| "learning_rate": 4.2934051144010765e-05, | |
| "loss": 0.0193, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "grad_norm": 2.5930867195129395, | |
| "learning_rate": 4.2919096754897565e-05, | |
| "loss": 0.0176, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "grad_norm": 1.4823873043060303, | |
| "learning_rate": 4.2904142365784364e-05, | |
| "loss": 0.0173, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "grad_norm": 0.5278753042221069, | |
| "learning_rate": 4.288918797667115e-05, | |
| "loss": 0.0212, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "grad_norm": 1.855218529701233, | |
| "learning_rate": 4.287423358755795e-05, | |
| "loss": 0.0199, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "grad_norm": 0.31464433670043945, | |
| "learning_rate": 4.285927919844474e-05, | |
| "loss": 0.0241, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "grad_norm": 0.2182936817407608, | |
| "learning_rate": 4.284432480933154e-05, | |
| "loss": 0.0172, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "grad_norm": 1.2800421714782715, | |
| "learning_rate": 4.2829370420218336e-05, | |
| "loss": 0.0188, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "eval_loss": 0.26452192664146423, | |
| "eval_precision": 0.9272217673363986, | |
| "eval_recall": 0.9065242156470334, | |
| "eval_runtime": 302.9199, | |
| "eval_samples_per_second": 44.15, | |
| "eval_steps_per_second": 1.38, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "grad_norm": 3.320737361907959, | |
| "learning_rate": 4.281441603110513e-05, | |
| "loss": 0.0198, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "grad_norm": 0.8519121408462524, | |
| "learning_rate": 4.279946164199193e-05, | |
| "loss": 0.0182, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 14.44, | |
| "grad_norm": 0.4318147599697113, | |
| "learning_rate": 4.278450725287872e-05, | |
| "loss": 0.0178, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "grad_norm": 0.047759074717760086, | |
| "learning_rate": 4.2769552863765514e-05, | |
| "loss": 0.021, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "grad_norm": 1.6022422313690186, | |
| "learning_rate": 4.2754598474652314e-05, | |
| "loss": 0.0144, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "grad_norm": 0.7104184031486511, | |
| "learning_rate": 4.273964408553911e-05, | |
| "loss": 0.0207, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 1.5093780755996704, | |
| "learning_rate": 4.27246896964259e-05, | |
| "loss": 0.0205, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "grad_norm": 0.7566470503807068, | |
| "learning_rate": 4.27097353073127e-05, | |
| "loss": 0.0187, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 1.222693920135498, | |
| "learning_rate": 4.269478091819949e-05, | |
| "loss": 0.0199, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "grad_norm": 1.5546650886535645, | |
| "learning_rate": 4.2679826529086285e-05, | |
| "loss": 0.0188, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "eval_loss": 0.2760772109031677, | |
| "eval_precision": 0.9305101058710299, | |
| "eval_recall": 0.8930077896486961, | |
| "eval_runtime": 301.8588, | |
| "eval_samples_per_second": 44.305, | |
| "eval_steps_per_second": 1.385, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "grad_norm": 0.6152912378311157, | |
| "learning_rate": 4.2664872139973084e-05, | |
| "loss": 0.0199, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "grad_norm": 0.8479551672935486, | |
| "learning_rate": 4.264991775085988e-05, | |
| "loss": 0.0236, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "grad_norm": 2.0793190002441406, | |
| "learning_rate": 4.263496336174668e-05, | |
| "loss": 0.0257, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "grad_norm": 0.9795339107513428, | |
| "learning_rate": 4.262000897263347e-05, | |
| "loss": 0.019, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 0.49018004536628723, | |
| "learning_rate": 4.260505458352026e-05, | |
| "loss": 0.0207, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "grad_norm": 0.22400274872779846, | |
| "learning_rate": 4.259010019440706e-05, | |
| "loss": 0.0212, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "grad_norm": 0.8345464468002319, | |
| "learning_rate": 4.2575145805293855e-05, | |
| "loss": 0.0182, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "grad_norm": 0.2443341612815857, | |
| "learning_rate": 4.256019141618065e-05, | |
| "loss": 0.0177, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "grad_norm": 0.697216272354126, | |
| "learning_rate": 4.254523702706745e-05, | |
| "loss": 0.0216, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "grad_norm": 0.5050187706947327, | |
| "learning_rate": 4.253028263795424e-05, | |
| "loss": 0.0166, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "eval_loss": 0.282767653465271, | |
| "eval_precision": 0.9254008757836374, | |
| "eval_recall": 0.9044305551279288, | |
| "eval_runtime": 303.3682, | |
| "eval_samples_per_second": 44.085, | |
| "eval_steps_per_second": 1.378, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "grad_norm": 0.4018344283103943, | |
| "learning_rate": 4.2515328248841034e-05, | |
| "loss": 0.02, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "grad_norm": 2.2681732177734375, | |
| "learning_rate": 4.250037385972783e-05, | |
| "loss": 0.0169, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 0.18065716326236725, | |
| "learning_rate": 4.2485419470614626e-05, | |
| "loss": 0.0163, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 15.07, | |
| "grad_norm": 1.0265353918075562, | |
| "learning_rate": 4.247046508150142e-05, | |
| "loss": 0.0201, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "grad_norm": 1.7455101013183594, | |
| "learning_rate": 4.245551069238822e-05, | |
| "loss": 0.0174, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 15.13, | |
| "grad_norm": 0.03697839379310608, | |
| "learning_rate": 4.244055630327501e-05, | |
| "loss": 0.021, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "grad_norm": 0.10842275619506836, | |
| "learning_rate": 4.242560191416181e-05, | |
| "loss": 0.0196, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "grad_norm": 0.6541497111320496, | |
| "learning_rate": 4.2410647525048604e-05, | |
| "loss": 0.019, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "grad_norm": 1.3006408214569092, | |
| "learning_rate": 4.23956931359354e-05, | |
| "loss": 0.0178, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 15.25, | |
| "grad_norm": 0.6021150350570679, | |
| "learning_rate": 4.23807387468222e-05, | |
| "loss": 0.0199, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 15.25, | |
| "eval_loss": 0.2640076279640198, | |
| "eval_precision": 0.9301819557882123, | |
| "eval_recall": 0.9081868284122048, | |
| "eval_runtime": 302.9987, | |
| "eval_samples_per_second": 44.139, | |
| "eval_steps_per_second": 1.38, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 15.28, | |
| "grad_norm": 0.8783787488937378, | |
| "learning_rate": 4.236578435770899e-05, | |
| "loss": 0.0175, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 15.31, | |
| "grad_norm": 0.18405625224113464, | |
| "learning_rate": 4.235082996859578e-05, | |
| "loss": 0.0152, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "grad_norm": 0.03877532109618187, | |
| "learning_rate": 4.233587557948258e-05, | |
| "loss": 0.0174, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 15.37, | |
| "grad_norm": 0.3079793155193329, | |
| "learning_rate": 4.2320921190369375e-05, | |
| "loss": 0.015, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 15.4, | |
| "grad_norm": 0.9296764731407166, | |
| "learning_rate": 4.230596680125617e-05, | |
| "loss": 0.0177, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "grad_norm": 0.7762422561645508, | |
| "learning_rate": 4.229101241214297e-05, | |
| "loss": 0.0195, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 15.46, | |
| "grad_norm": 2.472615957260132, | |
| "learning_rate": 4.227605802302976e-05, | |
| "loss": 0.0195, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "grad_norm": 2.8045852184295654, | |
| "learning_rate": 4.226110363391655e-05, | |
| "loss": 0.0201, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "grad_norm": 0.053874421864748, | |
| "learning_rate": 4.224614924480335e-05, | |
| "loss": 0.018, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "grad_norm": 0.3398553729057312, | |
| "learning_rate": 4.2231194855690146e-05, | |
| "loss": 0.0167, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "eval_loss": 0.2754287123680115, | |
| "eval_precision": 0.927292017724521, | |
| "eval_recall": 0.914929646848733, | |
| "eval_runtime": 302.6973, | |
| "eval_samples_per_second": 44.183, | |
| "eval_steps_per_second": 1.381, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 15.58, | |
| "grad_norm": 1.1841187477111816, | |
| "learning_rate": 4.2216240466576945e-05, | |
| "loss": 0.0157, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 15.61, | |
| "grad_norm": 1.0184565782546997, | |
| "learning_rate": 4.220128607746374e-05, | |
| "loss": 0.0145, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 15.64, | |
| "grad_norm": 0.6707783937454224, | |
| "learning_rate": 4.218633168835053e-05, | |
| "loss": 0.0215, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 15.67, | |
| "grad_norm": 0.8084210157394409, | |
| "learning_rate": 4.217137729923733e-05, | |
| "loss": 0.0185, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "grad_norm": 0.24998579919338226, | |
| "learning_rate": 4.2156422910124124e-05, | |
| "loss": 0.0192, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 15.73, | |
| "grad_norm": 0.11048603802919388, | |
| "learning_rate": 4.214146852101092e-05, | |
| "loss": 0.0177, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "grad_norm": 0.8540931940078735, | |
| "learning_rate": 4.2126514131897716e-05, | |
| "loss": 0.018, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "grad_norm": 0.3726775646209717, | |
| "learning_rate": 4.211155974278451e-05, | |
| "loss": 0.0181, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 15.82, | |
| "grad_norm": 0.13543102145195007, | |
| "learning_rate": 4.20966053536713e-05, | |
| "loss": 0.0201, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "grad_norm": 0.3862367570400238, | |
| "learning_rate": 4.20816509645581e-05, | |
| "loss": 0.0184, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "eval_loss": 0.2746909558773041, | |
| "eval_precision": 0.9290507850298093, | |
| "eval_recall": 0.9164075248622187, | |
| "eval_runtime": 304.9585, | |
| "eval_samples_per_second": 43.855, | |
| "eval_steps_per_second": 1.371, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "grad_norm": 0.5059983730316162, | |
| "learning_rate": 4.2066696575444895e-05, | |
| "loss": 0.0182, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "grad_norm": 0.45346036553382874, | |
| "learning_rate": 4.205174218633169e-05, | |
| "loss": 0.0208, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 15.94, | |
| "grad_norm": 1.0658683776855469, | |
| "learning_rate": 4.203678779721849e-05, | |
| "loss": 0.0149, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "grad_norm": 0.2168959081172943, | |
| "learning_rate": 4.202183340810528e-05, | |
| "loss": 0.0191, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.7620713710784912, | |
| "learning_rate": 4.200687901899207e-05, | |
| "loss": 0.0219, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "grad_norm": 0.33198004961013794, | |
| "learning_rate": 4.199192462987887e-05, | |
| "loss": 0.014, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "grad_norm": 3.614070415496826, | |
| "learning_rate": 4.1976970240765665e-05, | |
| "loss": 0.0132, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "grad_norm": 0.7846044898033142, | |
| "learning_rate": 4.1962015851652465e-05, | |
| "loss": 0.014, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "grad_norm": 1.2382973432540894, | |
| "learning_rate": 4.194706146253926e-05, | |
| "loss": 0.0198, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 16.15, | |
| "grad_norm": 1.7487576007843018, | |
| "learning_rate": 4.193210707342605e-05, | |
| "loss": 0.0156, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 16.15, | |
| "eval_loss": 0.27493321895599365, | |
| "eval_precision": 0.926791958041958, | |
| "eval_recall": 0.9140367622155855, | |
| "eval_runtime": 304.8434, | |
| "eval_samples_per_second": 43.872, | |
| "eval_steps_per_second": 1.371, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 16.18, | |
| "grad_norm": 2.473257541656494, | |
| "learning_rate": 4.191715268431285e-05, | |
| "loss": 0.0144, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 16.21, | |
| "grad_norm": 1.7735458612442017, | |
| "learning_rate": 4.1902198295199643e-05, | |
| "loss": 0.0128, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "grad_norm": 0.09201900660991669, | |
| "learning_rate": 4.1887243906086436e-05, | |
| "loss": 0.0121, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "grad_norm": 4.265335559844971, | |
| "learning_rate": 4.1872289516973236e-05, | |
| "loss": 0.0193, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "grad_norm": 0.05550719425082207, | |
| "learning_rate": 4.185733512786003e-05, | |
| "loss": 0.0191, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 16.33, | |
| "grad_norm": 1.2244312763214111, | |
| "learning_rate": 4.184238073874682e-05, | |
| "loss": 0.0144, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "grad_norm": 0.11609119921922684, | |
| "learning_rate": 4.182742634963362e-05, | |
| "loss": 0.0195, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 16.39, | |
| "grad_norm": 0.7442992329597473, | |
| "learning_rate": 4.1812471960520414e-05, | |
| "loss": 0.0161, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "grad_norm": 1.913397192955017, | |
| "learning_rate": 4.179751757140721e-05, | |
| "loss": 0.017, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "grad_norm": 1.5975757837295532, | |
| "learning_rate": 4.178256318229401e-05, | |
| "loss": 0.0131, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "eval_loss": 0.28440138697624207, | |
| "eval_precision": 0.9323552610821896, | |
| "eval_recall": 0.9098494411773762, | |
| "eval_runtime": 302.3846, | |
| "eval_samples_per_second": 44.228, | |
| "eval_steps_per_second": 1.382, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 16.48, | |
| "grad_norm": 0.02616269886493683, | |
| "learning_rate": 4.17676087931808e-05, | |
| "loss": 0.0166, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 16.51, | |
| "grad_norm": 0.270749032497406, | |
| "learning_rate": 4.17526544040676e-05, | |
| "loss": 0.0167, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "grad_norm": 0.8699542880058289, | |
| "learning_rate": 4.173770001495439e-05, | |
| "loss": 0.0178, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "grad_norm": 0.15558452904224396, | |
| "learning_rate": 4.1722745625841185e-05, | |
| "loss": 0.0155, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 16.6, | |
| "grad_norm": 1.3881036043167114, | |
| "learning_rate": 4.1707791236727985e-05, | |
| "loss": 0.0162, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "grad_norm": 1.0590258836746216, | |
| "learning_rate": 4.169283684761478e-05, | |
| "loss": 0.019, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "grad_norm": 0.6527047157287598, | |
| "learning_rate": 4.167788245850157e-05, | |
| "loss": 0.0162, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 16.69, | |
| "grad_norm": 0.7468928694725037, | |
| "learning_rate": 4.166292806938837e-05, | |
| "loss": 0.0187, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "grad_norm": 1.1580772399902344, | |
| "learning_rate": 4.164797368027516e-05, | |
| "loss": 0.0152, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 16.75, | |
| "grad_norm": 0.27484288811683655, | |
| "learning_rate": 4.1633019291161956e-05, | |
| "loss": 0.018, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 16.75, | |
| "eval_loss": 0.2911526560783386, | |
| "eval_precision": 0.9246059786783004, | |
| "eval_recall": 0.9265987253302134, | |
| "eval_runtime": 304.1503, | |
| "eval_samples_per_second": 43.972, | |
| "eval_steps_per_second": 1.374, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 16.78, | |
| "grad_norm": 0.12976956367492676, | |
| "learning_rate": 4.1618064902048756e-05, | |
| "loss": 0.0185, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 16.81, | |
| "grad_norm": 0.37897953391075134, | |
| "learning_rate": 4.160311051293555e-05, | |
| "loss": 0.0152, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 16.84, | |
| "grad_norm": 0.07681228220462799, | |
| "learning_rate": 4.158815612382234e-05, | |
| "loss": 0.0163, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 16.87, | |
| "grad_norm": 0.5966798663139343, | |
| "learning_rate": 4.157320173470914e-05, | |
| "loss": 0.014, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "grad_norm": 0.29120373725891113, | |
| "learning_rate": 4.1558247345595934e-05, | |
| "loss": 0.018, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "grad_norm": 0.4325448274612427, | |
| "learning_rate": 4.1543292956482734e-05, | |
| "loss": 0.0145, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "grad_norm": 1.473797082901001, | |
| "learning_rate": 4.1528338567369527e-05, | |
| "loss": 0.0164, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "grad_norm": 0.963238537311554, | |
| "learning_rate": 4.151338417825632e-05, | |
| "loss": 0.0168, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "grad_norm": 1.2749171257019043, | |
| "learning_rate": 4.149842978914312e-05, | |
| "loss": 0.0172, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "grad_norm": 0.1201496422290802, | |
| "learning_rate": 4.148347540002991e-05, | |
| "loss": 0.0132, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "eval_loss": 0.2895963788032532, | |
| "eval_precision": 0.9242246747641655, | |
| "eval_recall": 0.9230579759229041, | |
| "eval_runtime": 304.3955, | |
| "eval_samples_per_second": 43.936, | |
| "eval_steps_per_second": 1.373, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "grad_norm": 0.0923817902803421, | |
| "learning_rate": 4.1468521010916705e-05, | |
| "loss": 0.0155, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 17.11, | |
| "grad_norm": 0.17687027156352997, | |
| "learning_rate": 4.1453566621803505e-05, | |
| "loss": 0.0142, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "grad_norm": 0.5095121264457703, | |
| "learning_rate": 4.14386122326903e-05, | |
| "loss": 0.0122, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "grad_norm": 0.14807282388210297, | |
| "learning_rate": 4.142365784357709e-05, | |
| "loss": 0.0122, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 17.19, | |
| "grad_norm": 0.22806455194950104, | |
| "learning_rate": 4.140870345446389e-05, | |
| "loss": 0.0126, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "grad_norm": 0.1654992550611496, | |
| "learning_rate": 4.139374906535068e-05, | |
| "loss": 0.012, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 17.25, | |
| "grad_norm": 1.1821808815002441, | |
| "learning_rate": 4.1378794676237476e-05, | |
| "loss": 0.0154, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "grad_norm": 0.33708083629608154, | |
| "learning_rate": 4.1363840287124275e-05, | |
| "loss": 0.0118, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 17.31, | |
| "grad_norm": 0.2778627276420593, | |
| "learning_rate": 4.134888589801107e-05, | |
| "loss": 0.0153, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "grad_norm": 0.4350825250148773, | |
| "learning_rate": 4.133393150889787e-05, | |
| "loss": 0.0131, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "eval_loss": 0.2985839247703552, | |
| "eval_precision": 0.9294326572576876, | |
| "eval_recall": 0.9185011853813233, | |
| "eval_runtime": 303.6403, | |
| "eval_samples_per_second": 44.046, | |
| "eval_steps_per_second": 1.377, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 17.37, | |
| "grad_norm": 1.0241811275482178, | |
| "learning_rate": 4.131897711978466e-05, | |
| "loss": 0.0152, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 17.4, | |
| "grad_norm": 0.705042839050293, | |
| "learning_rate": 4.1304022730671454e-05, | |
| "loss": 0.0165, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 17.43, | |
| "grad_norm": 0.9130484461784363, | |
| "learning_rate": 4.128906834155825e-05, | |
| "loss": 0.0143, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "grad_norm": 0.0633108988404274, | |
| "learning_rate": 4.127411395244504e-05, | |
| "loss": 0.0147, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 17.49, | |
| "grad_norm": 1.2173391580581665, | |
| "learning_rate": 4.125915956333184e-05, | |
| "loss": 0.0134, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 17.52, | |
| "grad_norm": 2.9922380447387695, | |
| "learning_rate": 4.124420517421864e-05, | |
| "loss": 0.0145, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 17.55, | |
| "grad_norm": 0.015288499183952808, | |
| "learning_rate": 4.1229250785105425e-05, | |
| "loss": 0.0169, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 17.58, | |
| "grad_norm": 1.87058424949646, | |
| "learning_rate": 4.1214296395992225e-05, | |
| "loss": 0.0158, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "grad_norm": 0.31113335490226746, | |
| "learning_rate": 4.1199342006879024e-05, | |
| "loss": 0.0151, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "grad_norm": 0.8044542670249939, | |
| "learning_rate": 4.118438761776582e-05, | |
| "loss": 0.0143, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "eval_loss": 0.2973649501800537, | |
| "eval_precision": 0.9298240060774879, | |
| "eval_recall": 0.9044305551279288, | |
| "eval_runtime": 302.1441, | |
| "eval_samples_per_second": 44.264, | |
| "eval_steps_per_second": 1.383, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "grad_norm": 0.08827254921197891, | |
| "learning_rate": 4.116943322865261e-05, | |
| "loss": 0.0157, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "grad_norm": 1.8845312595367432, | |
| "learning_rate": 4.115447883953941e-05, | |
| "loss": 0.0155, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "grad_norm": 0.49602124094963074, | |
| "learning_rate": 4.11395244504262e-05, | |
| "loss": 0.0162, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "grad_norm": 0.3592805564403534, | |
| "learning_rate": 4.1124570061312995e-05, | |
| "loss": 0.0149, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "grad_norm": 1.320101261138916, | |
| "learning_rate": 4.110961567219979e-05, | |
| "loss": 0.0156, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 17.82, | |
| "grad_norm": 0.4389740526676178, | |
| "learning_rate": 4.109466128308659e-05, | |
| "loss": 0.0151, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "grad_norm": 1.6578569412231445, | |
| "learning_rate": 4.107970689397339e-05, | |
| "loss": 0.0166, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 17.88, | |
| "grad_norm": 1.7992475032806396, | |
| "learning_rate": 4.1064752504860174e-05, | |
| "loss": 0.0148, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 17.91, | |
| "grad_norm": 0.026478100568056107, | |
| "learning_rate": 4.1049798115746973e-05, | |
| "loss": 0.0158, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 17.94, | |
| "grad_norm": 2.8473379611968994, | |
| "learning_rate": 4.103484372663377e-05, | |
| "loss": 0.0159, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 17.94, | |
| "eval_loss": 0.2935677468776703, | |
| "eval_precision": 0.9302795129030222, | |
| "eval_recall": 0.9079097262846763, | |
| "eval_runtime": 302.5843, | |
| "eval_samples_per_second": 44.199, | |
| "eval_steps_per_second": 1.381, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 17.97, | |
| "grad_norm": 2.1734695434570312, | |
| "learning_rate": 4.101988933752056e-05, | |
| "loss": 0.0183, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.14518772065639496, | |
| "learning_rate": 4.100493494840736e-05, | |
| "loss": 0.0172, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "grad_norm": 0.3986850380897522, | |
| "learning_rate": 4.098998055929416e-05, | |
| "loss": 0.0101, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "grad_norm": 1.78749680519104, | |
| "learning_rate": 4.097502617018095e-05, | |
| "loss": 0.0123, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 18.09, | |
| "grad_norm": 0.43207836151123047, | |
| "learning_rate": 4.0960071781067744e-05, | |
| "loss": 0.0132, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 18.12, | |
| "grad_norm": 0.11268942803144455, | |
| "learning_rate": 4.0945117391954544e-05, | |
| "loss": 0.0131, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "grad_norm": 0.5929433107376099, | |
| "learning_rate": 4.093016300284134e-05, | |
| "loss": 0.0118, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "grad_norm": 0.012462102808058262, | |
| "learning_rate": 4.091520861372813e-05, | |
| "loss": 0.0114, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 18.21, | |
| "grad_norm": 0.03992025554180145, | |
| "learning_rate": 4.090025422461492e-05, | |
| "loss": 0.0123, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "grad_norm": 0.2556318938732147, | |
| "learning_rate": 4.088529983550172e-05, | |
| "loss": 0.0163, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "eval_loss": 0.3005661070346832, | |
| "eval_precision": 0.930046845034112, | |
| "eval_recall": 0.9108039040610856, | |
| "eval_runtime": 303.0262, | |
| "eval_samples_per_second": 44.135, | |
| "eval_steps_per_second": 1.379, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 18.27, | |
| "grad_norm": 0.0933234691619873, | |
| "learning_rate": 4.087034544638852e-05, | |
| "loss": 0.0139, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 18.3, | |
| "grad_norm": 4.561667442321777, | |
| "learning_rate": 4.085539105727531e-05, | |
| "loss": 0.015, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 18.33, | |
| "grad_norm": 1.8393715620040894, | |
| "learning_rate": 4.084043666816211e-05, | |
| "loss": 0.0113, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "grad_norm": 0.5815320611000061, | |
| "learning_rate": 4.082548227904891e-05, | |
| "loss": 0.0158, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "grad_norm": 0.9265565872192383, | |
| "learning_rate": 4.0810527889935693e-05, | |
| "loss": 0.0165, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "grad_norm": 0.029577825218439102, | |
| "learning_rate": 4.079557350082249e-05, | |
| "loss": 0.0151, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "grad_norm": 0.13609355688095093, | |
| "learning_rate": 4.078061911170929e-05, | |
| "loss": 0.0147, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "grad_norm": 0.2505282461643219, | |
| "learning_rate": 4.0765664722596086e-05, | |
| "loss": 0.0117, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 18.51, | |
| "grad_norm": 0.49616509675979614, | |
| "learning_rate": 4.075071033348288e-05, | |
| "loss": 0.0136, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 18.54, | |
| "grad_norm": 1.4143670797348022, | |
| "learning_rate": 4.073575594436967e-05, | |
| "loss": 0.0199, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 18.54, | |
| "eval_loss": 0.28239989280700684, | |
| "eval_precision": 0.9322552865754473, | |
| "eval_recall": 0.89993534283691, | |
| "eval_runtime": 303.1737, | |
| "eval_samples_per_second": 44.113, | |
| "eval_steps_per_second": 1.379, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 18.57, | |
| "grad_norm": 2.5461013317108154, | |
| "learning_rate": 4.072080155525647e-05, | |
| "loss": 0.0122, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "grad_norm": 0.3786807358264923, | |
| "learning_rate": 4.0705847166143264e-05, | |
| "loss": 0.0122, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "grad_norm": 1.546884536743164, | |
| "learning_rate": 4.069089277703006e-05, | |
| "loss": 0.0133, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 18.66, | |
| "grad_norm": 0.04791215434670448, | |
| "learning_rate": 4.0675938387916856e-05, | |
| "loss": 0.0118, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "grad_norm": 0.12534143030643463, | |
| "learning_rate": 4.0660983998803656e-05, | |
| "loss": 0.0145, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 18.72, | |
| "grad_norm": 1.358917474746704, | |
| "learning_rate": 4.064602960969044e-05, | |
| "loss": 0.0152, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 0.10757000744342804, | |
| "learning_rate": 4.063107522057724e-05, | |
| "loss": 0.0155, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 18.78, | |
| "grad_norm": 2.365614652633667, | |
| "learning_rate": 4.061612083146404e-05, | |
| "loss": 0.0156, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "grad_norm": 0.4936872124671936, | |
| "learning_rate": 4.060116644235083e-05, | |
| "loss": 0.0132, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "grad_norm": 0.022019200026988983, | |
| "learning_rate": 4.058621205323763e-05, | |
| "loss": 0.0124, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "eval_loss": 0.30277740955352783, | |
| "eval_precision": 0.930499515185637, | |
| "eval_recall": 0.9159456879830044, | |
| "eval_runtime": 304.0566, | |
| "eval_samples_per_second": 43.985, | |
| "eval_steps_per_second": 1.375, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "grad_norm": 0.3624964654445648, | |
| "learning_rate": 4.057125766412442e-05, | |
| "loss": 0.0155, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "grad_norm": 1.7629303932189941, | |
| "learning_rate": 4.055630327501122e-05, | |
| "loss": 0.0139, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "grad_norm": 0.18042436242103577, | |
| "learning_rate": 4.054134888589801e-05, | |
| "loss": 0.0179, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "grad_norm": 0.20951129496097565, | |
| "learning_rate": 4.0526394496784806e-05, | |
| "loss": 0.0172, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "grad_norm": 0.8891457915306091, | |
| "learning_rate": 4.0511440107671605e-05, | |
| "loss": 0.0126, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "grad_norm": 0.22427305579185486, | |
| "learning_rate": 4.04964857185584e-05, | |
| "loss": 0.0112, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "grad_norm": 0.25893327593803406, | |
| "learning_rate": 4.048153132944519e-05, | |
| "loss": 0.0123, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 19.08, | |
| "grad_norm": 1.579196810722351, | |
| "learning_rate": 4.046657694033199e-05, | |
| "loss": 0.0117, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "grad_norm": 1.801465630531311, | |
| "learning_rate": 4.045162255121879e-05, | |
| "loss": 0.0113, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "grad_norm": 3.969907522201538, | |
| "learning_rate": 4.0436668162105577e-05, | |
| "loss": 0.0132, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "eval_loss": 0.3150152266025543, | |
| "eval_precision": 0.9289555972482801, | |
| "eval_recall": 0.9146833338464854, | |
| "eval_runtime": 304.0309, | |
| "eval_samples_per_second": 43.989, | |
| "eval_steps_per_second": 1.375, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "grad_norm": 1.5782831907272339, | |
| "learning_rate": 4.0421713772992376e-05, | |
| "loss": 0.0106, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "grad_norm": 1.0305448770523071, | |
| "learning_rate": 4.0406759383879176e-05, | |
| "loss": 0.0115, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "grad_norm": 0.8879725337028503, | |
| "learning_rate": 4.039180499476596e-05, | |
| "loss": 0.0108, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 19.26, | |
| "grad_norm": 1.0525989532470703, | |
| "learning_rate": 4.037685060565276e-05, | |
| "loss": 0.0113, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 19.29, | |
| "grad_norm": 0.19859521090984344, | |
| "learning_rate": 4.0361896216539554e-05, | |
| "loss": 0.011, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "grad_norm": 1.628808856010437, | |
| "learning_rate": 4.034694182742635e-05, | |
| "loss": 0.0126, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "grad_norm": 0.45845118165016174, | |
| "learning_rate": 4.033198743831315e-05, | |
| "loss": 0.0117, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "grad_norm": 0.02105000615119934, | |
| "learning_rate": 4.031703304919994e-05, | |
| "loss": 0.0103, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 19.41, | |
| "grad_norm": 1.2173235416412354, | |
| "learning_rate": 4.030207866008674e-05, | |
| "loss": 0.013, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "grad_norm": 1.0716986656188965, | |
| "learning_rate": 4.028712427097353e-05, | |
| "loss": 0.0136, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "eval_loss": 0.30169057846069336, | |
| "eval_precision": 0.9307780320366132, | |
| "eval_recall": 0.9016903229779242, | |
| "eval_runtime": 303.9363, | |
| "eval_samples_per_second": 44.003, | |
| "eval_steps_per_second": 1.375, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 19.47, | |
| "grad_norm": 0.060731422156095505, | |
| "learning_rate": 4.0272169881860325e-05, | |
| "loss": 0.0103, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "grad_norm": 1.8369615077972412, | |
| "learning_rate": 4.0257215492747125e-05, | |
| "loss": 0.0149, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "grad_norm": 0.5922613143920898, | |
| "learning_rate": 4.024226110363392e-05, | |
| "loss": 0.0137, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 19.56, | |
| "grad_norm": 1.1230493783950806, | |
| "learning_rate": 4.022730671452071e-05, | |
| "loss": 0.016, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 19.59, | |
| "grad_norm": 0.9484757781028748, | |
| "learning_rate": 4.021235232540751e-05, | |
| "loss": 0.0126, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 19.62, | |
| "grad_norm": 0.40328437089920044, | |
| "learning_rate": 4.01973979362943e-05, | |
| "loss": 0.014, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 19.65, | |
| "grad_norm": 1.251897931098938, | |
| "learning_rate": 4.0182443547181096e-05, | |
| "loss": 0.0152, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 19.68, | |
| "grad_norm": 0.06640147417783737, | |
| "learning_rate": 4.0167489158067896e-05, | |
| "loss": 0.0119, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 19.71, | |
| "grad_norm": 0.08419325947761536, | |
| "learning_rate": 4.015253476895469e-05, | |
| "loss": 0.0104, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 19.74, | |
| "grad_norm": 0.8898499011993408, | |
| "learning_rate": 4.013758037984148e-05, | |
| "loss": 0.013, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 19.74, | |
| "eval_loss": 0.30586904287338257, | |
| "eval_precision": 0.9286385564814235, | |
| "eval_recall": 0.9127128298285045, | |
| "eval_runtime": 303.8354, | |
| "eval_samples_per_second": 44.017, | |
| "eval_steps_per_second": 1.376, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "grad_norm": 0.8399672508239746, | |
| "learning_rate": 4.012262599072828e-05, | |
| "loss": 0.0156, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 19.8, | |
| "grad_norm": 1.188772201538086, | |
| "learning_rate": 4.0107671601615074e-05, | |
| "loss": 0.0133, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 19.83, | |
| "grad_norm": 0.3390734791755676, | |
| "learning_rate": 4.0092717212501874e-05, | |
| "loss": 0.011, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "grad_norm": 2.0773940086364746, | |
| "learning_rate": 4.007776282338867e-05, | |
| "loss": 0.0109, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "grad_norm": 1.667506456375122, | |
| "learning_rate": 4.006280843427546e-05, | |
| "loss": 0.0121, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "grad_norm": 0.036488935351371765, | |
| "learning_rate": 4.004785404516226e-05, | |
| "loss": 0.0121, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 19.95, | |
| "grad_norm": 0.9762794375419617, | |
| "learning_rate": 4.003289965604905e-05, | |
| "loss": 0.0138, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 19.98, | |
| "grad_norm": 1.04608952999115, | |
| "learning_rate": 4.0017945266935845e-05, | |
| "loss": 0.0117, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 20.01, | |
| "grad_norm": 5.332238674163818, | |
| "learning_rate": 4.0002990877822645e-05, | |
| "loss": 0.0137, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 20.04, | |
| "grad_norm": 0.01725686341524124, | |
| "learning_rate": 3.998803648870944e-05, | |
| "loss": 0.0131, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 20.04, | |
| "eval_loss": 0.2912316620349884, | |
| "eval_precision": 0.9311961240797836, | |
| "eval_recall": 0.9113273191908617, | |
| "eval_runtime": 303.1004, | |
| "eval_samples_per_second": 44.124, | |
| "eval_steps_per_second": 1.379, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 20.07, | |
| "grad_norm": 0.0427197702229023, | |
| "learning_rate": 3.997308209959623e-05, | |
| "loss": 0.0077, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "grad_norm": 0.017879147082567215, | |
| "learning_rate": 3.995812771048303e-05, | |
| "loss": 0.0104, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 20.13, | |
| "grad_norm": 0.07891906797885895, | |
| "learning_rate": 3.994317332136982e-05, | |
| "loss": 0.0141, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "grad_norm": 0.16812817752361298, | |
| "learning_rate": 3.9928218932256616e-05, | |
| "loss": 0.0097, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "grad_norm": 3.0790505409240723, | |
| "learning_rate": 3.9913264543143416e-05, | |
| "loss": 0.0106, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 20.22, | |
| "grad_norm": 0.41399437189102173, | |
| "learning_rate": 3.989831015403021e-05, | |
| "loss": 0.0089, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 20.25, | |
| "grad_norm": 0.4379628300666809, | |
| "learning_rate": 3.988335576491701e-05, | |
| "loss": 0.0086, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "grad_norm": 0.011956513859331608, | |
| "learning_rate": 3.98684013758038e-05, | |
| "loss": 0.0133, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 20.31, | |
| "grad_norm": 2.477144718170166, | |
| "learning_rate": 3.9853446986690594e-05, | |
| "loss": 0.0091, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "grad_norm": 2.790292739868164, | |
| "learning_rate": 3.9838492597577394e-05, | |
| "loss": 0.0128, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "eval_loss": 0.3076106309890747, | |
| "eval_precision": 0.9304780813715294, | |
| "eval_recall": 0.9090489239200714, | |
| "eval_runtime": 303.9942, | |
| "eval_samples_per_second": 43.994, | |
| "eval_steps_per_second": 1.375, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 20.36, | |
| "grad_norm": 1.441587209701538, | |
| "learning_rate": 3.9823538208464186e-05, | |
| "loss": 0.0159, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "grad_norm": 1.7005335092544556, | |
| "learning_rate": 3.980858381935098e-05, | |
| "loss": 0.01, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "grad_norm": 0.30774638056755066, | |
| "learning_rate": 3.979362943023778e-05, | |
| "loss": 0.0124, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "grad_norm": 0.04803008586168289, | |
| "learning_rate": 3.977867504112457e-05, | |
| "loss": 0.0112, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "grad_norm": 3.551407814025879, | |
| "learning_rate": 3.9763720652011365e-05, | |
| "loss": 0.012, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 20.51, | |
| "grad_norm": 0.037427909672260284, | |
| "learning_rate": 3.9748766262898164e-05, | |
| "loss": 0.0138, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "grad_norm": 0.0066105336882174015, | |
| "learning_rate": 3.973381187378496e-05, | |
| "loss": 0.0114, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 20.57, | |
| "grad_norm": 0.05352969095110893, | |
| "learning_rate": 3.971885748467175e-05, | |
| "loss": 0.0106, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 20.6, | |
| "grad_norm": 1.097419023513794, | |
| "learning_rate": 3.970390309555855e-05, | |
| "loss": 0.0113, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 20.63, | |
| "grad_norm": 2.4684622287750244, | |
| "learning_rate": 3.968894870644534e-05, | |
| "loss": 0.0104, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 20.63, | |
| "eval_loss": 0.3140137493610382, | |
| "eval_precision": 0.9268018018018018, | |
| "eval_recall": 0.9122202038240094, | |
| "eval_runtime": 304.685, | |
| "eval_samples_per_second": 43.895, | |
| "eval_steps_per_second": 1.372, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 20.66, | |
| "grad_norm": 0.03651382029056549, | |
| "learning_rate": 3.967399431733214e-05, | |
| "loss": 0.0086, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 20.69, | |
| "grad_norm": 0.35381224751472473, | |
| "learning_rate": 3.9659039928218935e-05, | |
| "loss": 0.013, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 20.72, | |
| "grad_norm": 0.06933160871267319, | |
| "learning_rate": 3.964408553910573e-05, | |
| "loss": 0.0106, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "grad_norm": 0.4022979140281677, | |
| "learning_rate": 3.962913114999253e-05, | |
| "loss": 0.013, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 20.78, | |
| "grad_norm": 0.03529789671301842, | |
| "learning_rate": 3.961417676087932e-05, | |
| "loss": 0.0156, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 20.81, | |
| "grad_norm": 0.7010594606399536, | |
| "learning_rate": 3.9599222371766114e-05, | |
| "loss": 0.0144, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "grad_norm": 0.37523359060287476, | |
| "learning_rate": 3.958426798265291e-05, | |
| "loss": 0.0127, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "grad_norm": 0.1500304788351059, | |
| "learning_rate": 3.9569313593539706e-05, | |
| "loss": 0.0151, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 20.9, | |
| "grad_norm": 1.1849136352539062, | |
| "learning_rate": 3.95543592044265e-05, | |
| "loss": 0.0092, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "grad_norm": 0.37061455845832825, | |
| "learning_rate": 3.95394048153133e-05, | |
| "loss": 0.0125, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "eval_loss": 0.2996491491794586, | |
| "eval_precision": 0.9277798530693563, | |
| "eval_recall": 0.9176390898734567, | |
| "eval_runtime": 305.225, | |
| "eval_samples_per_second": 43.817, | |
| "eval_steps_per_second": 1.369, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "grad_norm": 1.1082910299301147, | |
| "learning_rate": 3.952445042620009e-05, | |
| "loss": 0.0135, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "grad_norm": 0.21670883893966675, | |
| "learning_rate": 3.9509496037086884e-05, | |
| "loss": 0.0147, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 21.02, | |
| "grad_norm": 1.7163949012756348, | |
| "learning_rate": 3.9494541647973684e-05, | |
| "loss": 0.0074, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 21.05, | |
| "grad_norm": 0.49197930097579956, | |
| "learning_rate": 3.947958725886048e-05, | |
| "loss": 0.009, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "grad_norm": 0.20454080402851105, | |
| "learning_rate": 3.946463286974727e-05, | |
| "loss": 0.0106, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 21.11, | |
| "grad_norm": 1.1480427980422974, | |
| "learning_rate": 3.944967848063407e-05, | |
| "loss": 0.0082, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 21.14, | |
| "grad_norm": 0.012445613741874695, | |
| "learning_rate": 3.943472409152086e-05, | |
| "loss": 0.0124, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "grad_norm": 1.2859218120574951, | |
| "learning_rate": 3.941976970240766e-05, | |
| "loss": 0.0114, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "grad_norm": 1.9639800786972046, | |
| "learning_rate": 3.9404815313294455e-05, | |
| "loss": 0.0094, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 21.23, | |
| "grad_norm": 0.5322540402412415, | |
| "learning_rate": 3.938986092418125e-05, | |
| "loss": 0.0127, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 21.23, | |
| "eval_loss": 0.31439679861068726, | |
| "eval_precision": 0.9300875853255618, | |
| "eval_recall": 0.918747498383571, | |
| "eval_runtime": 305.1026, | |
| "eval_samples_per_second": 43.834, | |
| "eval_steps_per_second": 1.37, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 21.26, | |
| "grad_norm": 0.7698822021484375, | |
| "learning_rate": 3.937490653506805e-05, | |
| "loss": 0.0091, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "grad_norm": 0.058869846165180206, | |
| "learning_rate": 3.935995214595484e-05, | |
| "loss": 0.0116, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 21.32, | |
| "grad_norm": 0.040317438542842865, | |
| "learning_rate": 3.934499775684163e-05, | |
| "loss": 0.0082, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 21.35, | |
| "grad_norm": 0.3180629014968872, | |
| "learning_rate": 3.933004336772843e-05, | |
| "loss": 0.0086, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 21.38, | |
| "grad_norm": 0.14002850651741028, | |
| "learning_rate": 3.9315088978615226e-05, | |
| "loss": 0.0083, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "grad_norm": 0.535882830619812, | |
| "learning_rate": 3.930013458950202e-05, | |
| "loss": 0.0083, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "grad_norm": 0.8898109793663025, | |
| "learning_rate": 3.928518020038882e-05, | |
| "loss": 0.0111, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 21.47, | |
| "grad_norm": 7.178394317626953, | |
| "learning_rate": 3.927022581127561e-05, | |
| "loss": 0.0111, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "grad_norm": 0.03290112316608429, | |
| "learning_rate": 3.9255271422162404e-05, | |
| "loss": 0.0102, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 21.53, | |
| "grad_norm": 0.013704554177820683, | |
| "learning_rate": 3.9240317033049204e-05, | |
| "loss": 0.0131, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 21.53, | |
| "eval_loss": 0.30643701553344727, | |
| "eval_precision": 0.9271496444430644, | |
| "eval_recall": 0.9192709135133471, | |
| "eval_runtime": 304.1697, | |
| "eval_samples_per_second": 43.969, | |
| "eval_steps_per_second": 1.374, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 21.56, | |
| "grad_norm": 0.8118484020233154, | |
| "learning_rate": 3.9225362643936e-05, | |
| "loss": 0.0109, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "grad_norm": 0.8789449334144592, | |
| "learning_rate": 3.9210408254822796e-05, | |
| "loss": 0.0111, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 21.62, | |
| "grad_norm": 1.8666021823883057, | |
| "learning_rate": 3.919545386570959e-05, | |
| "loss": 0.0112, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 21.65, | |
| "grad_norm": 0.33622369170188904, | |
| "learning_rate": 3.918049947659638e-05, | |
| "loss": 0.0121, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "grad_norm": 1.5097126960754395, | |
| "learning_rate": 3.916554508748318e-05, | |
| "loss": 0.0104, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 21.71, | |
| "grad_norm": 1.3149192333221436, | |
| "learning_rate": 3.915059069836997e-05, | |
| "loss": 0.01, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "grad_norm": 1.1172950267791748, | |
| "learning_rate": 3.913563630925677e-05, | |
| "loss": 0.0159, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 21.77, | |
| "grad_norm": 0.7861026525497437, | |
| "learning_rate": 3.912068192014357e-05, | |
| "loss": 0.0102, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 21.8, | |
| "grad_norm": 0.9385488033294678, | |
| "learning_rate": 3.910572753103036e-05, | |
| "loss": 0.0103, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 21.83, | |
| "grad_norm": 0.2858407199382782, | |
| "learning_rate": 3.909077314191715e-05, | |
| "loss": 0.0095, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 21.83, | |
| "eval_loss": 0.3220088481903076, | |
| "eval_precision": 0.9313063063063063, | |
| "eval_recall": 0.89119123125712, | |
| "eval_runtime": 301.1978, | |
| "eval_samples_per_second": 44.403, | |
| "eval_steps_per_second": 1.388, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 21.86, | |
| "grad_norm": 2.1585566997528076, | |
| "learning_rate": 3.907581875280395e-05, | |
| "loss": 0.0107, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 21.89, | |
| "grad_norm": 0.21467708051204681, | |
| "learning_rate": 3.9060864363690745e-05, | |
| "loss": 0.0092, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 21.92, | |
| "grad_norm": 0.0250945333391428, | |
| "learning_rate": 3.904590997457754e-05, | |
| "loss": 0.0095, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 21.95, | |
| "grad_norm": 0.08200676739215851, | |
| "learning_rate": 3.903095558546434e-05, | |
| "loss": 0.0127, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "grad_norm": 7.951723098754883, | |
| "learning_rate": 3.901600119635113e-05, | |
| "loss": 0.0118, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 22.01, | |
| "grad_norm": 0.042703770101070404, | |
| "learning_rate": 3.900104680723793e-05, | |
| "loss": 0.0086, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "grad_norm": 0.13317295908927917, | |
| "learning_rate": 3.898609241812472e-05, | |
| "loss": 0.0117, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 22.07, | |
| "grad_norm": 0.09529834240674973, | |
| "learning_rate": 3.8971138029011516e-05, | |
| "loss": 0.0077, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "grad_norm": 1.2312837839126587, | |
| "learning_rate": 3.8956183639898316e-05, | |
| "loss": 0.01, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 22.13, | |
| "grad_norm": 0.20264630019664764, | |
| "learning_rate": 3.89412292507851e-05, | |
| "loss": 0.0079, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 22.13, | |
| "eval_loss": 0.3207722306251526, | |
| "eval_precision": 0.9257851445663011, | |
| "eval_recall": 0.9148680685981712, | |
| "eval_runtime": 304.4363, | |
| "eval_samples_per_second": 43.93, | |
| "eval_steps_per_second": 1.373, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 22.16, | |
| "grad_norm": 0.007298531476408243, | |
| "learning_rate": 3.89262748616719e-05, | |
| "loss": 0.0083, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 22.19, | |
| "grad_norm": 0.030803361907601357, | |
| "learning_rate": 3.89113204725587e-05, | |
| "loss": 0.0128, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "grad_norm": 0.04404568299651146, | |
| "learning_rate": 3.8896366083445494e-05, | |
| "loss": 0.0094, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "grad_norm": 0.14884673058986664, | |
| "learning_rate": 3.888141169433229e-05, | |
| "loss": 0.0081, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 22.28, | |
| "grad_norm": 0.07467024773359299, | |
| "learning_rate": 3.886645730521909e-05, | |
| "loss": 0.0144, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 22.31, | |
| "grad_norm": 0.6713554859161377, | |
| "learning_rate": 3.885150291610588e-05, | |
| "loss": 0.0136, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "grad_norm": 0.16354040801525116, | |
| "learning_rate": 3.883654852699267e-05, | |
| "loss": 0.0109, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "grad_norm": 1.4964691400527954, | |
| "learning_rate": 3.882159413787947e-05, | |
| "loss": 0.0116, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "grad_norm": 1.4973292350769043, | |
| "learning_rate": 3.8806639748766265e-05, | |
| "loss": 0.008, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "grad_norm": 0.17059992253780365, | |
| "learning_rate": 3.8791685359653065e-05, | |
| "loss": 0.0111, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "eval_loss": 0.30246666073799133, | |
| "eval_precision": 0.9313384217417686, | |
| "eval_recall": 0.8979032605683672, | |
| "eval_runtime": 301.8023, | |
| "eval_samples_per_second": 44.314, | |
| "eval_steps_per_second": 1.385, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 22.46, | |
| "grad_norm": 0.05614122748374939, | |
| "learning_rate": 3.877673097053985e-05, | |
| "loss": 0.0101, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 22.49, | |
| "grad_norm": 0.23737676441669464, | |
| "learning_rate": 3.876177658142665e-05, | |
| "loss": 0.0111, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "grad_norm": 0.11609382182359695, | |
| "learning_rate": 3.874682219231345e-05, | |
| "loss": 0.0129, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 22.55, | |
| "grad_norm": 0.006964783184230328, | |
| "learning_rate": 3.8731867803200236e-05, | |
| "loss": 0.014, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "grad_norm": 0.6018117070198059, | |
| "learning_rate": 3.8716913414087036e-05, | |
| "loss": 0.0092, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "grad_norm": 1.5463790893554688, | |
| "learning_rate": 3.8701959024973836e-05, | |
| "loss": 0.0129, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 22.64, | |
| "grad_norm": 0.3491170108318329, | |
| "learning_rate": 3.868700463586062e-05, | |
| "loss": 0.0124, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "grad_norm": 0.3379780650138855, | |
| "learning_rate": 3.867205024674742e-05, | |
| "loss": 0.0105, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "grad_norm": 0.6625536680221558, | |
| "learning_rate": 3.865709585763422e-05, | |
| "loss": 0.0101, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "grad_norm": 0.5047014951705933, | |
| "learning_rate": 3.8642141468521014e-05, | |
| "loss": 0.0116, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "eval_loss": 0.309579074382782, | |
| "eval_precision": 0.9289195145420119, | |
| "eval_recall": 0.9214261522830136, | |
| "eval_runtime": 306.5207, | |
| "eval_samples_per_second": 43.632, | |
| "eval_steps_per_second": 1.364, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 22.76, | |
| "grad_norm": 2.8879668712615967, | |
| "learning_rate": 3.862718707940781e-05, | |
| "loss": 0.0084, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 22.79, | |
| "grad_norm": 1.4628148078918457, | |
| "learning_rate": 3.86122326902946e-05, | |
| "loss": 0.0091, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 22.82, | |
| "grad_norm": 0.01455759722739458, | |
| "learning_rate": 3.85972783011814e-05, | |
| "loss": 0.0087, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 22.85, | |
| "grad_norm": 0.005665886681526899, | |
| "learning_rate": 3.858232391206819e-05, | |
| "loss": 0.0117, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 22.88, | |
| "grad_norm": 0.5273276567459106, | |
| "learning_rate": 3.8567369522954985e-05, | |
| "loss": 0.009, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 22.91, | |
| "grad_norm": 0.06718481332063675, | |
| "learning_rate": 3.8552415133841785e-05, | |
| "loss": 0.0118, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 22.94, | |
| "grad_norm": 0.30258700251579285, | |
| "learning_rate": 3.8537460744728585e-05, | |
| "loss": 0.0109, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 22.97, | |
| "grad_norm": 2.678166627883911, | |
| "learning_rate": 3.852250635561537e-05, | |
| "loss": 0.015, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.15017007291316986, | |
| "learning_rate": 3.850755196650217e-05, | |
| "loss": 0.0104, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "grad_norm": 0.3501853048801422, | |
| "learning_rate": 3.849259757738897e-05, | |
| "loss": 0.0096, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "eval_loss": 0.2935163080692291, | |
| "eval_precision": 0.9276991482965932, | |
| "eval_recall": 0.9121894146987284, | |
| "eval_runtime": 303.8246, | |
| "eval_samples_per_second": 44.019, | |
| "eval_steps_per_second": 1.376, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 23.06, | |
| "grad_norm": 0.729576587677002, | |
| "learning_rate": 3.8477643188275756e-05, | |
| "loss": 0.0076, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 23.09, | |
| "grad_norm": 0.03431198373436928, | |
| "learning_rate": 3.8462688799162556e-05, | |
| "loss": 0.0068, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "grad_norm": 0.022281186655163765, | |
| "learning_rate": 3.844773441004935e-05, | |
| "loss": 0.0099, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 23.15, | |
| "grad_norm": 0.06289653480052948, | |
| "learning_rate": 3.843278002093615e-05, | |
| "loss": 0.0088, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "grad_norm": 1.1686757802963257, | |
| "learning_rate": 3.841782563182294e-05, | |
| "loss": 0.0113, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 23.21, | |
| "grad_norm": 0.6460024118423462, | |
| "learning_rate": 3.8402871242709734e-05, | |
| "loss": 0.0098, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 23.24, | |
| "grad_norm": 0.04333605244755745, | |
| "learning_rate": 3.8387916853596534e-05, | |
| "loss": 0.0078, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "grad_norm": 1.6560355424880981, | |
| "learning_rate": 3.8372962464483327e-05, | |
| "loss": 0.0069, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "grad_norm": 1.7110439538955688, | |
| "learning_rate": 3.835800807537012e-05, | |
| "loss": 0.0079, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 23.33, | |
| "grad_norm": 0.34755662083625793, | |
| "learning_rate": 3.834305368625692e-05, | |
| "loss": 0.0117, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 23.33, | |
| "eval_loss": 0.31362003087997437, | |
| "eval_precision": 0.9317794739166089, | |
| "eval_recall": 0.9096031281751286, | |
| "eval_runtime": 302.9137, | |
| "eval_samples_per_second": 44.151, | |
| "eval_steps_per_second": 1.38, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "grad_norm": 0.07322967052459717, | |
| "learning_rate": 3.832809929714372e-05, | |
| "loss": 0.0086, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "grad_norm": 0.1620834916830063, | |
| "learning_rate": 3.8313144908030505e-05, | |
| "loss": 0.0105, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 23.42, | |
| "grad_norm": 1.0541850328445435, | |
| "learning_rate": 3.8298190518917305e-05, | |
| "loss": 0.011, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "grad_norm": 0.008509721606969833, | |
| "learning_rate": 3.8283236129804104e-05, | |
| "loss": 0.009, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 23.47, | |
| "grad_norm": 0.2723921537399292, | |
| "learning_rate": 3.826828174069089e-05, | |
| "loss": 0.0089, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "grad_norm": 0.7700883150100708, | |
| "learning_rate": 3.825332735157769e-05, | |
| "loss": 0.0084, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 23.53, | |
| "grad_norm": 0.7245194911956787, | |
| "learning_rate": 3.823837296246448e-05, | |
| "loss": 0.0068, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "grad_norm": 1.283056378364563, | |
| "learning_rate": 3.822341857335128e-05, | |
| "loss": 0.0108, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 23.59, | |
| "grad_norm": 0.016398323699831963, | |
| "learning_rate": 3.8208464184238075e-05, | |
| "loss": 0.0104, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 23.62, | |
| "grad_norm": 0.32268649339675903, | |
| "learning_rate": 3.819350979512487e-05, | |
| "loss": 0.0085, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 23.62, | |
| "eval_loss": 0.30707934498786926, | |
| "eval_precision": 0.9256538985992314, | |
| "eval_recall": 0.9196403830167185, | |
| "eval_runtime": 304.8987, | |
| "eval_samples_per_second": 43.864, | |
| "eval_steps_per_second": 1.371, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 23.65, | |
| "grad_norm": 0.1340191662311554, | |
| "learning_rate": 3.817855540601167e-05, | |
| "loss": 0.0132, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "grad_norm": 1.2741714715957642, | |
| "learning_rate": 3.816360101689846e-05, | |
| "loss": 0.0086, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 23.71, | |
| "grad_norm": 3.2270684242248535, | |
| "learning_rate": 3.8148646627785254e-05, | |
| "loss": 0.012, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "grad_norm": 0.0873398706316948, | |
| "learning_rate": 3.813369223867205e-05, | |
| "loss": 0.0071, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 23.77, | |
| "grad_norm": 0.36740046739578247, | |
| "learning_rate": 3.811873784955885e-05, | |
| "loss": 0.0082, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "grad_norm": 0.7461920976638794, | |
| "learning_rate": 3.810378346044564e-05, | |
| "loss": 0.0133, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "grad_norm": 1.0577598810195923, | |
| "learning_rate": 3.808882907133244e-05, | |
| "loss": 0.0118, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 23.86, | |
| "grad_norm": 1.9472182989120483, | |
| "learning_rate": 3.807387468221923e-05, | |
| "loss": 0.0116, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 23.89, | |
| "grad_norm": 1.6104402542114258, | |
| "learning_rate": 3.8058920293106025e-05, | |
| "loss": 0.0114, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "grad_norm": 0.03251710161566734, | |
| "learning_rate": 3.8043965903992824e-05, | |
| "loss": 0.0091, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "eval_loss": 0.3046566843986511, | |
| "eval_precision": 0.9268397735663303, | |
| "eval_recall": 0.9275531882139229, | |
| "eval_runtime": 305.7377, | |
| "eval_samples_per_second": 43.743, | |
| "eval_steps_per_second": 1.367, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "grad_norm": 0.8245527744293213, | |
| "learning_rate": 3.802901151487962e-05, | |
| "loss": 0.0067, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 23.98, | |
| "grad_norm": 2.3082966804504395, | |
| "learning_rate": 3.801405712576642e-05, | |
| "loss": 0.0103, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 24.01, | |
| "grad_norm": 0.05168503150343895, | |
| "learning_rate": 3.799910273665321e-05, | |
| "loss": 0.0086, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 24.04, | |
| "grad_norm": 0.3247091770172119, | |
| "learning_rate": 3.798414834754e-05, | |
| "loss": 0.0082, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 24.07, | |
| "grad_norm": 0.30284127593040466, | |
| "learning_rate": 3.79691939584268e-05, | |
| "loss": 0.0065, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 24.1, | |
| "grad_norm": 0.041343070566654205, | |
| "learning_rate": 3.7954239569313595e-05, | |
| "loss": 0.0072, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "grad_norm": 0.5980477929115295, | |
| "learning_rate": 3.793928518020039e-05, | |
| "loss": 0.0088, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "grad_norm": 0.0064304666593670845, | |
| "learning_rate": 3.792433079108719e-05, | |
| "loss": 0.0094, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 24.19, | |
| "grad_norm": 0.6040250062942505, | |
| "learning_rate": 3.790937640197398e-05, | |
| "loss": 0.0079, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 24.22, | |
| "grad_norm": 0.3337300419807434, | |
| "learning_rate": 3.7894422012860773e-05, | |
| "loss": 0.0086, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 24.22, | |
| "eval_loss": 0.3350207209587097, | |
| "eval_precision": 0.9268361054008597, | |
| "eval_recall": 0.916192000985252, | |
| "eval_runtime": 304.7162, | |
| "eval_samples_per_second": 43.89, | |
| "eval_steps_per_second": 1.372, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 24.25, | |
| "grad_norm": 0.710114061832428, | |
| "learning_rate": 3.787946762374757e-05, | |
| "loss": 0.008, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 24.28, | |
| "grad_norm": 0.03623099625110626, | |
| "learning_rate": 3.7864513234634366e-05, | |
| "loss": 0.0131, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 24.31, | |
| "grad_norm": 0.09887418150901794, | |
| "learning_rate": 3.784955884552116e-05, | |
| "loss": 0.0086, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 24.34, | |
| "grad_norm": 0.6916789412498474, | |
| "learning_rate": 3.783460445640796e-05, | |
| "loss": 0.0101, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 24.37, | |
| "grad_norm": 1.4278247356414795, | |
| "learning_rate": 3.781965006729475e-05, | |
| "loss": 0.0107, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 0.16397880017757416, | |
| "learning_rate": 3.7804695678181544e-05, | |
| "loss": 0.008, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 24.43, | |
| "grad_norm": 0.08632964640855789, | |
| "learning_rate": 3.7789741289068344e-05, | |
| "loss": 0.0078, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 24.46, | |
| "grad_norm": 2.2472782135009766, | |
| "learning_rate": 3.777478689995514e-05, | |
| "loss": 0.011, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "grad_norm": 0.14701958000659943, | |
| "learning_rate": 3.7759832510841936e-05, | |
| "loss": 0.0096, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "grad_norm": 0.051196735352277756, | |
| "learning_rate": 3.774487812172873e-05, | |
| "loss": 0.0111, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "eval_loss": 0.30252349376678467, | |
| "eval_precision": 0.928390712570056, | |
| "eval_recall": 0.8925459527694818, | |
| "eval_runtime": 302.8814, | |
| "eval_samples_per_second": 44.156, | |
| "eval_steps_per_second": 1.38, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "grad_norm": 0.013324776664376259, | |
| "learning_rate": 3.772992373261552e-05, | |
| "loss": 0.0075, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "grad_norm": 0.10291430354118347, | |
| "learning_rate": 3.771496934350232e-05, | |
| "loss": 0.0099, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 24.61, | |
| "grad_norm": 0.07137342542409897, | |
| "learning_rate": 3.7700014954389115e-05, | |
| "loss": 0.012, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "grad_norm": 0.3020240068435669, | |
| "learning_rate": 3.768506056527591e-05, | |
| "loss": 0.0087, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "grad_norm": 1.067194938659668, | |
| "learning_rate": 3.767010617616271e-05, | |
| "loss": 0.0096, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "grad_norm": 0.014255263842642307, | |
| "learning_rate": 3.76551517870495e-05, | |
| "loss": 0.007, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 24.73, | |
| "grad_norm": 0.02688017673790455, | |
| "learning_rate": 3.764019739793629e-05, | |
| "loss": 0.0089, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "grad_norm": 0.3376453220844269, | |
| "learning_rate": 3.762524300882309e-05, | |
| "loss": 0.0066, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 24.79, | |
| "grad_norm": 0.10389913618564606, | |
| "learning_rate": 3.7610288619709886e-05, | |
| "loss": 0.0066, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "grad_norm": 0.7046878337860107, | |
| "learning_rate": 3.759533423059668e-05, | |
| "loss": 0.01, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "eval_loss": 0.3185621201992035, | |
| "eval_precision": 0.9291735873891379, | |
| "eval_recall": 0.9128667754549094, | |
| "eval_runtime": 303.4192, | |
| "eval_samples_per_second": 44.078, | |
| "eval_steps_per_second": 1.378, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 24.85, | |
| "grad_norm": 0.4447859227657318, | |
| "learning_rate": 3.758037984148348e-05, | |
| "loss": 0.0085, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 24.88, | |
| "grad_norm": 2.2701525688171387, | |
| "learning_rate": 3.756542545237027e-05, | |
| "loss": 0.0114, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 24.91, | |
| "grad_norm": 0.05526027828454971, | |
| "learning_rate": 3.755047106325707e-05, | |
| "loss": 0.012, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 24.94, | |
| "grad_norm": 0.8909191489219666, | |
| "learning_rate": 3.7535516674143864e-05, | |
| "loss": 0.0097, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 24.97, | |
| "grad_norm": 0.004659523721784353, | |
| "learning_rate": 3.7520562285030656e-05, | |
| "loss": 0.0085, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.05222604423761368, | |
| "learning_rate": 3.7505607895917456e-05, | |
| "loss": 0.0088, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 25.03, | |
| "grad_norm": 0.014093970879912376, | |
| "learning_rate": 3.749065350680425e-05, | |
| "loss": 0.0085, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 25.06, | |
| "grad_norm": 0.0026446671690791845, | |
| "learning_rate": 3.747569911769104e-05, | |
| "loss": 0.005, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 25.09, | |
| "grad_norm": 0.1448344588279724, | |
| "learning_rate": 3.746074472857784e-05, | |
| "loss": 0.0064, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 25.12, | |
| "grad_norm": 0.295718789100647, | |
| "learning_rate": 3.7445790339464634e-05, | |
| "loss": 0.0067, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 25.12, | |
| "eval_loss": 0.32626327872276306, | |
| "eval_precision": 0.9313109964567663, | |
| "eval_recall": 0.9225653499184088, | |
| "eval_runtime": 304.7239, | |
| "eval_samples_per_second": 43.889, | |
| "eval_steps_per_second": 1.372, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 25.15, | |
| "grad_norm": 0.028157589957118034, | |
| "learning_rate": 3.743083595035143e-05, | |
| "loss": 0.0094, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 25.18, | |
| "grad_norm": 0.002226242097094655, | |
| "learning_rate": 3.741588156123823e-05, | |
| "loss": 0.0072, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 25.21, | |
| "grad_norm": 0.7868858575820923, | |
| "learning_rate": 3.740092717212502e-05, | |
| "loss": 0.0103, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "grad_norm": 0.031047280877828598, | |
| "learning_rate": 3.738597278301181e-05, | |
| "loss": 0.01, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 25.27, | |
| "grad_norm": 0.30554434657096863, | |
| "learning_rate": 3.737101839389861e-05, | |
| "loss": 0.0076, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 25.3, | |
| "grad_norm": 1.2695821523666382, | |
| "learning_rate": 3.7356064004785405e-05, | |
| "loss": 0.0092, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "grad_norm": 0.039061836898326874, | |
| "learning_rate": 3.7341109615672205e-05, | |
| "loss": 0.0129, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 25.36, | |
| "grad_norm": 1.0094258785247803, | |
| "learning_rate": 3.7326155226559e-05, | |
| "loss": 0.012, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 25.39, | |
| "grad_norm": 0.16602523624897003, | |
| "learning_rate": 3.731120083744579e-05, | |
| "loss": 0.0072, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "grad_norm": 0.6232153177261353, | |
| "learning_rate": 3.729624644833259e-05, | |
| "loss": 0.0094, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "eval_loss": 0.32043251395225525, | |
| "eval_precision": 0.9310592123725484, | |
| "eval_recall": 0.91936328088919, | |
| "eval_runtime": 304.0822, | |
| "eval_samples_per_second": 43.982, | |
| "eval_steps_per_second": 1.375, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "grad_norm": 1.6009403467178345, | |
| "learning_rate": 3.728129205921938e-05, | |
| "loss": 0.0103, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 25.48, | |
| "grad_norm": 0.6107264757156372, | |
| "learning_rate": 3.7266337670106176e-05, | |
| "loss": 0.0079, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 25.51, | |
| "grad_norm": 0.44173404574394226, | |
| "learning_rate": 3.7251383280992976e-05, | |
| "loss": 0.0065, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 25.54, | |
| "grad_norm": 0.9073717594146729, | |
| "learning_rate": 3.723642889187977e-05, | |
| "loss": 0.0071, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 25.57, | |
| "grad_norm": 0.3392820656299591, | |
| "learning_rate": 3.722147450276656e-05, | |
| "loss": 0.0101, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 0.07929588109254837, | |
| "learning_rate": 3.720652011365336e-05, | |
| "loss": 0.0083, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "grad_norm": 0.35071372985839844, | |
| "learning_rate": 3.7191565724540154e-05, | |
| "loss": 0.0121, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 25.66, | |
| "grad_norm": 0.20559339225292206, | |
| "learning_rate": 3.717661133542695e-05, | |
| "loss": 0.0073, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 25.69, | |
| "grad_norm": 0.045159224420785904, | |
| "learning_rate": 3.716165694631375e-05, | |
| "loss": 0.0087, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 25.72, | |
| "grad_norm": 0.10148915648460388, | |
| "learning_rate": 3.714670255720054e-05, | |
| "loss": 0.0119, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 25.72, | |
| "eval_loss": 0.31306663155555725, | |
| "eval_precision": 0.9333648989898989, | |
| "eval_recall": 0.9104036454324332, | |
| "eval_runtime": 304.164, | |
| "eval_samples_per_second": 43.97, | |
| "eval_steps_per_second": 1.374, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "grad_norm": 0.18669423460960388, | |
| "learning_rate": 3.713174816808734e-05, | |
| "loss": 0.0063, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 25.78, | |
| "grad_norm": 0.10197019577026367, | |
| "learning_rate": 3.711679377897413e-05, | |
| "loss": 0.0083, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "grad_norm": 0.0219405684620142, | |
| "learning_rate": 3.7101839389860925e-05, | |
| "loss": 0.0088, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "grad_norm": 0.941899836063385, | |
| "learning_rate": 3.7086885000747725e-05, | |
| "loss": 0.006, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 25.87, | |
| "grad_norm": 0.042357202619314194, | |
| "learning_rate": 3.707193061163452e-05, | |
| "loss": 0.0107, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 25.9, | |
| "grad_norm": 0.04090040549635887, | |
| "learning_rate": 3.705697622252131e-05, | |
| "loss": 0.0076, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 25.93, | |
| "grad_norm": 1.0006482601165771, | |
| "learning_rate": 3.704202183340811e-05, | |
| "loss": 0.0081, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "grad_norm": 0.01344706118106842, | |
| "learning_rate": 3.70270674442949e-05, | |
| "loss": 0.0061, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "grad_norm": 0.039950937032699585, | |
| "learning_rate": 3.7012113055181696e-05, | |
| "loss": 0.0095, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "grad_norm": 0.007412883453071117, | |
| "learning_rate": 3.6997158666068496e-05, | |
| "loss": 0.0061, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "eval_loss": 0.3440411686897278, | |
| "eval_precision": 0.9280669958127618, | |
| "eval_recall": 0.9144370208442378, | |
| "eval_runtime": 304.1449, | |
| "eval_samples_per_second": 43.972, | |
| "eval_steps_per_second": 1.374, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 26.05, | |
| "grad_norm": 0.045031215995550156, | |
| "learning_rate": 3.698220427695529e-05, | |
| "loss": 0.0083, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 26.08, | |
| "grad_norm": 0.5366631150245667, | |
| "learning_rate": 3.696724988784208e-05, | |
| "loss": 0.0069, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 26.11, | |
| "grad_norm": 0.24467185139656067, | |
| "learning_rate": 3.695229549872888e-05, | |
| "loss": 0.0065, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 26.14, | |
| "grad_norm": 0.7528616786003113, | |
| "learning_rate": 3.6937341109615674e-05, | |
| "loss": 0.0087, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 26.17, | |
| "grad_norm": 0.15506117045879364, | |
| "learning_rate": 3.692238672050247e-05, | |
| "loss": 0.0072, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 26.2, | |
| "grad_norm": 0.2464226335287094, | |
| "learning_rate": 3.6907432331389266e-05, | |
| "loss": 0.0053, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 26.23, | |
| "grad_norm": 0.15138311684131622, | |
| "learning_rate": 3.689247794227606e-05, | |
| "loss": 0.0063, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "grad_norm": 0.07477385550737381, | |
| "learning_rate": 3.687752355316286e-05, | |
| "loss": 0.0076, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 26.29, | |
| "grad_norm": 0.661697268486023, | |
| "learning_rate": 3.686256916404965e-05, | |
| "loss": 0.0078, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "grad_norm": 0.16399236023426056, | |
| "learning_rate": 3.6847614774936445e-05, | |
| "loss": 0.0085, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "eval_loss": 0.326471209526062, | |
| "eval_precision": 0.9298322483725588, | |
| "eval_recall": 0.9147449120970473, | |
| "eval_runtime": 305.1957, | |
| "eval_samples_per_second": 43.821, | |
| "eval_steps_per_second": 1.37, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "grad_norm": 0.5788341164588928, | |
| "learning_rate": 3.6832660385823244e-05, | |
| "loss": 0.0097, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 26.38, | |
| "grad_norm": 0.38478532433509827, | |
| "learning_rate": 3.681770599671003e-05, | |
| "loss": 0.0083, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "grad_norm": 1.8616811037063599, | |
| "learning_rate": 3.680275160759683e-05, | |
| "loss": 0.0082, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 26.44, | |
| "grad_norm": 0.005648652091622353, | |
| "learning_rate": 3.678779721848363e-05, | |
| "loss": 0.0074, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 26.47, | |
| "grad_norm": 0.013662021607160568, | |
| "learning_rate": 3.677284282937042e-05, | |
| "loss": 0.0054, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 26.5, | |
| "grad_norm": 0.21754692494869232, | |
| "learning_rate": 3.6757888440257216e-05, | |
| "loss": 0.0115, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 26.53, | |
| "grad_norm": 0.0358903631567955, | |
| "learning_rate": 3.6742934051144015e-05, | |
| "loss": 0.0097, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 26.56, | |
| "grad_norm": 0.9966431856155396, | |
| "learning_rate": 3.672797966203081e-05, | |
| "loss": 0.0074, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 26.58, | |
| "grad_norm": 0.7227293848991394, | |
| "learning_rate": 3.67130252729176e-05, | |
| "loss": 0.0088, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "grad_norm": 1.3261148929595947, | |
| "learning_rate": 3.66980708838044e-05, | |
| "loss": 0.0072, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "eval_loss": 0.3263101279735565, | |
| "eval_precision": 0.9263782601905357, | |
| "eval_recall": 0.9131438775824379, | |
| "eval_runtime": 306.4472, | |
| "eval_samples_per_second": 43.642, | |
| "eval_steps_per_second": 1.364, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 26.64, | |
| "grad_norm": 0.11170350760221481, | |
| "learning_rate": 3.6683116494691194e-05, | |
| "loss": 0.0092, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "grad_norm": 1.529340147972107, | |
| "learning_rate": 3.666816210557799e-05, | |
| "loss": 0.0089, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 26.7, | |
| "grad_norm": 0.01682981289923191, | |
| "learning_rate": 3.665320771646478e-05, | |
| "loss": 0.0093, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 26.73, | |
| "grad_norm": 0.3299085199832916, | |
| "learning_rate": 3.663825332735158e-05, | |
| "loss": 0.0063, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 26.76, | |
| "grad_norm": 1.9823254346847534, | |
| "learning_rate": 3.662329893823838e-05, | |
| "loss": 0.0091, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "grad_norm": 0.07487453520298004, | |
| "learning_rate": 3.6608344549125165e-05, | |
| "loss": 0.009, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 26.82, | |
| "grad_norm": 0.015319288708269596, | |
| "learning_rate": 3.6593390160011964e-05, | |
| "loss": 0.0078, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 26.85, | |
| "grad_norm": 0.004087815526872873, | |
| "learning_rate": 3.6578435770898764e-05, | |
| "loss": 0.0069, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "grad_norm": 0.00753753213211894, | |
| "learning_rate": 3.656348138178556e-05, | |
| "loss": 0.0057, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 26.91, | |
| "grad_norm": 0.012257667258381844, | |
| "learning_rate": 3.654852699267235e-05, | |
| "loss": 0.0095, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 26.91, | |
| "eval_loss": 0.3233014643192291, | |
| "eval_precision": 0.9329517062525696, | |
| "eval_recall": 0.9082484066627667, | |
| "eval_runtime": 304.4964, | |
| "eval_samples_per_second": 43.922, | |
| "eval_steps_per_second": 1.373, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 26.94, | |
| "grad_norm": 0.030741436406970024, | |
| "learning_rate": 3.653357260355915e-05, | |
| "loss": 0.0067, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "grad_norm": 0.429049551486969, | |
| "learning_rate": 3.651861821444594e-05, | |
| "loss": 0.012, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.002479678951203823, | |
| "learning_rate": 3.6503663825332735e-05, | |
| "loss": 0.005, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "grad_norm": 0.12390375137329102, | |
| "learning_rate": 3.648870943621953e-05, | |
| "loss": 0.0083, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 27.06, | |
| "grad_norm": 0.044969938695430756, | |
| "learning_rate": 3.647375504710633e-05, | |
| "loss": 0.0073, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "grad_norm": 0.06378799676895142, | |
| "learning_rate": 3.645880065799313e-05, | |
| "loss": 0.0073, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 27.12, | |
| "grad_norm": 0.323734849691391, | |
| "learning_rate": 3.6443846268879914e-05, | |
| "loss": 0.0078, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 27.15, | |
| "grad_norm": 1.6457269191741943, | |
| "learning_rate": 3.642889187976671e-05, | |
| "loss": 0.0055, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "grad_norm": 0.007004741113632917, | |
| "learning_rate": 3.641393749065351e-05, | |
| "loss": 0.0065, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 27.21, | |
| "grad_norm": 0.06395163387060165, | |
| "learning_rate": 3.63989831015403e-05, | |
| "loss": 0.0062, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 27.21, | |
| "eval_loss": 0.32764899730682373, | |
| "eval_precision": 0.9317584480600751, | |
| "eval_recall": 0.916869361741433, | |
| "eval_runtime": 309.1631, | |
| "eval_samples_per_second": 43.259, | |
| "eval_steps_per_second": 1.352, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 27.24, | |
| "grad_norm": 0.005486265290528536, | |
| "learning_rate": 3.63840287124271e-05, | |
| "loss": 0.0082, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 27.27, | |
| "grad_norm": 2.3132262229919434, | |
| "learning_rate": 3.63690743233139e-05, | |
| "loss": 0.0067, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 27.3, | |
| "grad_norm": 0.07687461376190186, | |
| "learning_rate": 3.635411993420069e-05, | |
| "loss": 0.0051, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 27.33, | |
| "grad_norm": 0.05096305161714554, | |
| "learning_rate": 3.6339165545087484e-05, | |
| "loss": 0.0061, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 27.36, | |
| "grad_norm": 0.21200311183929443, | |
| "learning_rate": 3.6324211155974284e-05, | |
| "loss": 0.0072, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 27.39, | |
| "grad_norm": 0.07336900383234024, | |
| "learning_rate": 3.630925676686108e-05, | |
| "loss": 0.008, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "grad_norm": 0.026788916438817978, | |
| "learning_rate": 3.629430237774787e-05, | |
| "loss": 0.0068, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 27.45, | |
| "grad_norm": 0.03046250529587269, | |
| "learning_rate": 3.627934798863466e-05, | |
| "loss": 0.0081, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 27.48, | |
| "grad_norm": 0.32240158319473267, | |
| "learning_rate": 3.626439359952146e-05, | |
| "loss": 0.0091, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 27.51, | |
| "grad_norm": 0.1428656429052353, | |
| "learning_rate": 3.624943921040826e-05, | |
| "loss": 0.007, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 27.51, | |
| "eval_loss": 0.3499869704246521, | |
| "eval_precision": 0.9278612426685068, | |
| "eval_recall": 0.9108346931863666, | |
| "eval_runtime": 310.2456, | |
| "eval_samples_per_second": 43.108, | |
| "eval_steps_per_second": 1.347, | |
| "step": 92000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 334400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 4.8090441780412416e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |