| { | |
| "best_metric": 0.9472436482214578, | |
| "best_model_checkpoint": "./arabert_author_model_full/checkpoint-35000", | |
| "epoch": 3.9643749321168675, | |
| "eval_steps": 500, | |
| "global_step": 36500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.054306505919409144, | |
| "grad_norm": 14.85861587524414, | |
| "learning_rate": 1.0795547108335597e-05, | |
| "loss": 2.7396, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.054306505919409144, | |
| "eval_accuracy": 0.5551075268817204, | |
| "eval_f1_macro": 0.44528091004577197, | |
| "eval_f1_micro": 0.5551075268817204, | |
| "eval_loss": 2.086405038833618, | |
| "eval_precision_macro": 0.5488362024601817, | |
| "eval_precision_micro": 0.5551075268817204, | |
| "eval_recall_macro": 0.5011249738249491, | |
| "eval_recall_micro": 0.5551075268817204, | |
| "eval_runtime": 1.5572, | |
| "eval_samples_per_second": 477.769, | |
| "eval_steps_per_second": 59.721, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10861301183881829, | |
| "grad_norm": 16.272708892822266, | |
| "learning_rate": 2.1656258484930763e-05, | |
| "loss": 1.584, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.10861301183881829, | |
| "eval_accuracy": 0.7190860215053764, | |
| "eval_f1_macro": 0.6681343461862342, | |
| "eval_f1_micro": 0.7190860215053764, | |
| "eval_loss": 1.1402474641799927, | |
| "eval_precision_macro": 0.7284168208592146, | |
| "eval_precision_micro": 0.7190860215053764, | |
| "eval_recall_macro": 0.6741017912193727, | |
| "eval_recall_micro": 0.7190860215053764, | |
| "eval_runtime": 1.5592, | |
| "eval_samples_per_second": 477.171, | |
| "eval_steps_per_second": 59.646, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16291951775822744, | |
| "grad_norm": 43.108097076416016, | |
| "learning_rate": 3.2516969861525934e-05, | |
| "loss": 1.0278, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16291951775822744, | |
| "eval_accuracy": 0.6935483870967742, | |
| "eval_f1_macro": 0.6694441122919238, | |
| "eval_f1_micro": 0.6935483870967742, | |
| "eval_loss": 0.9176589250564575, | |
| "eval_precision_macro": 0.719370166471438, | |
| "eval_precision_micro": 0.6935483870967742, | |
| "eval_recall_macro": 0.700540634546637, | |
| "eval_recall_micro": 0.6935483870967742, | |
| "eval_runtime": 1.5614, | |
| "eval_samples_per_second": 476.485, | |
| "eval_steps_per_second": 59.561, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21722602367763658, | |
| "grad_norm": 21.786911010742188, | |
| "learning_rate": 4.33776812381211e-05, | |
| "loss": 0.7819, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21722602367763658, | |
| "eval_accuracy": 0.7365591397849462, | |
| "eval_f1_macro": 0.7225937916281533, | |
| "eval_f1_micro": 0.7365591397849462, | |
| "eval_loss": 0.9186232089996338, | |
| "eval_precision_macro": 0.8090241740547917, | |
| "eval_precision_micro": 0.7365591397849462, | |
| "eval_recall_macro": 0.730498755334377, | |
| "eval_recall_micro": 0.7365591397849462, | |
| "eval_runtime": 1.7089, | |
| "eval_samples_per_second": 435.367, | |
| "eval_steps_per_second": 54.421, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2715325295970457, | |
| "grad_norm": 28.50157356262207, | |
| "learning_rate": 5.4216671191963076e-05, | |
| "loss": 0.7433, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2715325295970457, | |
| "eval_accuracy": 0.8091397849462365, | |
| "eval_f1_macro": 0.7931780524032125, | |
| "eval_f1_micro": 0.8091397849462365, | |
| "eval_loss": 0.6774040460586548, | |
| "eval_precision_macro": 0.8236755927756734, | |
| "eval_precision_micro": 0.8091397849462365, | |
| "eval_recall_macro": 0.8138164959150519, | |
| "eval_recall_micro": 0.8091397849462365, | |
| "eval_runtime": 1.5765, | |
| "eval_samples_per_second": 471.94, | |
| "eval_steps_per_second": 58.992, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3258390355164549, | |
| "grad_norm": 23.32242774963379, | |
| "learning_rate": 6.507738256855825e-05, | |
| "loss": 0.7224, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3258390355164549, | |
| "eval_accuracy": 0.7701612903225806, | |
| "eval_f1_macro": 0.7568919015291462, | |
| "eval_f1_micro": 0.7701612903225806, | |
| "eval_loss": 0.726546585559845, | |
| "eval_precision_macro": 0.8108640165896299, | |
| "eval_precision_micro": 0.7701612903225806, | |
| "eval_recall_macro": 0.7863245772318792, | |
| "eval_recall_micro": 0.7701612903225806, | |
| "eval_runtime": 1.5947, | |
| "eval_samples_per_second": 466.552, | |
| "eval_steps_per_second": 58.319, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.38014554143586404, | |
| "grad_norm": 38.11383056640625, | |
| "learning_rate": 7.591637252240023e-05, | |
| "loss": 0.7578, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.38014554143586404, | |
| "eval_accuracy": 0.831989247311828, | |
| "eval_f1_macro": 0.8113946213107872, | |
| "eval_f1_micro": 0.831989247311828, | |
| "eval_loss": 0.5666089653968811, | |
| "eval_precision_macro": 0.8300071137831838, | |
| "eval_precision_micro": 0.831989247311828, | |
| "eval_recall_macro": 0.8277091616765468, | |
| "eval_recall_micro": 0.831989247311828, | |
| "eval_runtime": 1.5775, | |
| "eval_samples_per_second": 471.647, | |
| "eval_steps_per_second": 58.956, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.43445204735527315, | |
| "grad_norm": 27.02998161315918, | |
| "learning_rate": 7.998262267095997e-05, | |
| "loss": 0.7705, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43445204735527315, | |
| "eval_accuracy": 0.793010752688172, | |
| "eval_f1_macro": 0.7783295125154664, | |
| "eval_f1_micro": 0.793010752688172, | |
| "eval_loss": 0.7473301887512207, | |
| "eval_precision_macro": 0.8365596188425319, | |
| "eval_precision_micro": 0.793010752688172, | |
| "eval_recall_macro": 0.7944564993024996, | |
| "eval_recall_micro": 0.793010752688172, | |
| "eval_runtime": 1.592, | |
| "eval_samples_per_second": 467.348, | |
| "eval_steps_per_second": 58.418, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4887585532746823, | |
| "grad_norm": 4.219649314880371, | |
| "learning_rate": 7.988188051558617e-05, | |
| "loss": 0.714, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4887585532746823, | |
| "eval_accuracy": 0.8615591397849462, | |
| "eval_f1_macro": 0.8525941744256668, | |
| "eval_f1_micro": 0.8615591397849462, | |
| "eval_loss": 0.5411700010299683, | |
| "eval_precision_macro": 0.8738317002846235, | |
| "eval_precision_micro": 0.8615591397849462, | |
| "eval_recall_macro": 0.857343934692196, | |
| "eval_recall_micro": 0.8615591397849462, | |
| "eval_runtime": 1.5935, | |
| "eval_samples_per_second": 466.9, | |
| "eval_steps_per_second": 58.363, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5430650591940914, | |
| "grad_norm": 2.9277937412261963, | |
| "learning_rate": 7.969158166695609e-05, | |
| "loss": 0.6781, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5430650591940914, | |
| "eval_accuracy": 0.8440860215053764, | |
| "eval_f1_macro": 0.8140189163428593, | |
| "eval_f1_micro": 0.8440860215053764, | |
| "eval_loss": 0.584912896156311, | |
| "eval_precision_macro": 0.8304958001958954, | |
| "eval_precision_micro": 0.8440860215053764, | |
| "eval_recall_macro": 0.8277544733650097, | |
| "eval_recall_micro": 0.8440860215053764, | |
| "eval_runtime": 1.5664, | |
| "eval_samples_per_second": 474.976, | |
| "eval_steps_per_second": 59.372, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5973715651135006, | |
| "grad_norm": 2.422699213027954, | |
| "learning_rate": 7.941215345034608e-05, | |
| "loss": 0.6365, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5973715651135006, | |
| "eval_accuracy": 0.8198924731182796, | |
| "eval_f1_macro": 0.8098480937728665, | |
| "eval_f1_micro": 0.8198924731182796, | |
| "eval_loss": 0.6339462995529175, | |
| "eval_precision_macro": 0.8511279803543597, | |
| "eval_precision_micro": 0.8198924731182796, | |
| "eval_recall_macro": 0.8376000571890804, | |
| "eval_recall_micro": 0.8198924731182796, | |
| "eval_runtime": 1.5714, | |
| "eval_samples_per_second": 473.456, | |
| "eval_steps_per_second": 59.182, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6516780710329098, | |
| "grad_norm": 24.36614227294922, | |
| "learning_rate": 7.90442233353428e-05, | |
| "loss": 0.5609, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.6516780710329098, | |
| "eval_accuracy": 0.8803763440860215, | |
| "eval_f1_macro": 0.8776901808589186, | |
| "eval_f1_micro": 0.8803763440860215, | |
| "eval_loss": 0.4191497564315796, | |
| "eval_precision_macro": 0.9013592602931226, | |
| "eval_precision_micro": 0.8803763440860215, | |
| "eval_recall_macro": 0.8870337188867564, | |
| "eval_recall_micro": 0.8803763440860215, | |
| "eval_runtime": 1.5644, | |
| "eval_samples_per_second": 475.586, | |
| "eval_steps_per_second": 59.448, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7059845769523189, | |
| "grad_norm": 7.382803440093994, | |
| "learning_rate": 7.85896155913429e-05, | |
| "loss": 0.557, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7059845769523189, | |
| "eval_accuracy": 0.8870967741935484, | |
| "eval_f1_macro": 0.873257230355725, | |
| "eval_f1_micro": 0.8870967741935484, | |
| "eval_loss": 0.43305835127830505, | |
| "eval_precision_macro": 0.9066583526088271, | |
| "eval_precision_micro": 0.8870967741935484, | |
| "eval_recall_macro": 0.8813447066604991, | |
| "eval_recall_micro": 0.8870967741935484, | |
| "eval_runtime": 1.572, | |
| "eval_samples_per_second": 473.271, | |
| "eval_steps_per_second": 59.159, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7602910828717281, | |
| "grad_norm": 40.95680618286133, | |
| "learning_rate": 7.804752932863711e-05, | |
| "loss": 0.5581, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7602910828717281, | |
| "eval_accuracy": 0.8400537634408602, | |
| "eval_f1_macro": 0.832080514333091, | |
| "eval_f1_micro": 0.8400537634408602, | |
| "eval_loss": 0.6264011263847351, | |
| "eval_precision_macro": 0.8486272179160291, | |
| "eval_precision_micro": 0.8400537634408602, | |
| "eval_recall_macro": 0.8385309495501778, | |
| "eval_recall_micro": 0.8400537634408602, | |
| "eval_runtime": 1.5709, | |
| "eval_samples_per_second": 473.61, | |
| "eval_steps_per_second": 59.201, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8145975887911372, | |
| "grad_norm": 1.4635225534439087, | |
| "learning_rate": 7.74200054970606e-05, | |
| "loss": 0.6216, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8145975887911372, | |
| "eval_accuracy": 0.8790322580645161, | |
| "eval_f1_macro": 0.8733027369549773, | |
| "eval_f1_micro": 0.8790322580645161, | |
| "eval_loss": 0.409856379032135, | |
| "eval_precision_macro": 0.8884912266604943, | |
| "eval_precision_micro": 0.8790322580645161, | |
| "eval_recall_macro": 0.8755203032263899, | |
| "eval_recall_micro": 0.8790322580645161, | |
| "eval_runtime": 1.5605, | |
| "eval_samples_per_second": 476.783, | |
| "eval_steps_per_second": 59.598, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8689040947105463, | |
| "grad_norm": 45.099891662597656, | |
| "learning_rate": 7.670845323175364e-05, | |
| "loss": 0.5329, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.8689040947105463, | |
| "eval_accuracy": 0.8494623655913979, | |
| "eval_f1_macro": 0.8493612714696808, | |
| "eval_f1_micro": 0.8494623655913979, | |
| "eval_loss": 0.4563172161579132, | |
| "eval_precision_macro": 0.893910479148947, | |
| "eval_precision_micro": 0.8494623655913979, | |
| "eval_recall_macro": 0.8604223349122991, | |
| "eval_recall_micro": 0.8494623655913979, | |
| "eval_runtime": 1.5627, | |
| "eval_samples_per_second": 476.104, | |
| "eval_steps_per_second": 59.513, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9232106006299554, | |
| "grad_norm": 144.00657653808594, | |
| "learning_rate": 7.591447035777205e-05, | |
| "loss": 0.5361, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9232106006299554, | |
| "eval_accuracy": 0.8655913978494624, | |
| "eval_f1_macro": 0.8441067821828483, | |
| "eval_f1_micro": 0.8655913978494624, | |
| "eval_loss": 0.6785014867782593, | |
| "eval_precision_macro": 0.8860824516800835, | |
| "eval_precision_micro": 0.8655913978494624, | |
| "eval_recall_macro": 0.8499754697649283, | |
| "eval_recall_micro": 0.8655913978494624, | |
| "eval_runtime": 1.7462, | |
| "eval_samples_per_second": 426.057, | |
| "eval_steps_per_second": 53.257, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9775171065493646, | |
| "grad_norm": 51.59966278076172, | |
| "learning_rate": 7.50398398020936e-05, | |
| "loss": 0.5004, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.9775171065493646, | |
| "eval_accuracy": 0.8830645161290323, | |
| "eval_f1_macro": 0.8700101159667029, | |
| "eval_f1_micro": 0.8830645161290323, | |
| "eval_loss": 0.5261440277099609, | |
| "eval_precision_macro": 0.9053295147781684, | |
| "eval_precision_micro": 0.8830645161290323, | |
| "eval_recall_macro": 0.8611036436092915, | |
| "eval_recall_micro": 0.8830645161290323, | |
| "eval_runtime": 1.5897, | |
| "eval_samples_per_second": 468.018, | |
| "eval_steps_per_second": 58.502, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.0318236124687739, | |
| "grad_norm": 1.6247466802597046, | |
| "learning_rate": 7.408850935077056e-05, | |
| "loss": 0.4548, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.0318236124687739, | |
| "eval_accuracy": 0.885752688172043, | |
| "eval_f1_macro": 0.8771175055624273, | |
| "eval_f1_micro": 0.885752688172043, | |
| "eval_loss": 0.47863149642944336, | |
| "eval_precision_macro": 0.9058069553824752, | |
| "eval_precision_micro": 0.885752688172043, | |
| "eval_recall_macro": 0.8766787609951181, | |
| "eval_recall_micro": 0.885752688172043, | |
| "eval_runtime": 1.5934, | |
| "eval_samples_per_second": 466.926, | |
| "eval_steps_per_second": 58.366, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.0861301183881829, | |
| "grad_norm": 0.7039883732795715, | |
| "learning_rate": 7.305880302978226e-05, | |
| "loss": 0.3634, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.0861301183881829, | |
| "eval_accuracy": 0.8454301075268817, | |
| "eval_f1_macro": 0.8355409368806581, | |
| "eval_f1_micro": 0.8454301075268817, | |
| "eval_loss": 0.584094226360321, | |
| "eval_precision_macro": 0.8637046558195853, | |
| "eval_precision_micro": 0.8454301075268817, | |
| "eval_recall_macro": 0.8620713017041678, | |
| "eval_recall_micro": 0.8454301075268817, | |
| "eval_runtime": 1.5924, | |
| "eval_samples_per_second": 467.211, | |
| "eval_steps_per_second": 58.401, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.140436624307592, | |
| "grad_norm": 18.514562606811523, | |
| "learning_rate": 7.195486156631612e-05, | |
| "loss": 0.38, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.140436624307592, | |
| "eval_accuracy": 0.8736559139784946, | |
| "eval_f1_macro": 0.8650283345776192, | |
| "eval_f1_micro": 0.8736559139784946, | |
| "eval_loss": 0.609620213508606, | |
| "eval_precision_macro": 0.8846991890947015, | |
| "eval_precision_micro": 0.8736559139784946, | |
| "eval_recall_macro": 0.8788120484876811, | |
| "eval_recall_micro": 0.8736559139784946, | |
| "eval_runtime": 1.596, | |
| "eval_samples_per_second": 466.176, | |
| "eval_steps_per_second": 58.272, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.194743130227001, | |
| "grad_norm": 33.06956481933594, | |
| "learning_rate": 7.077916391436673e-05, | |
| "loss": 0.4346, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.194743130227001, | |
| "eval_accuracy": 0.8588709677419355, | |
| "eval_f1_macro": 0.8369609828838035, | |
| "eval_f1_micro": 0.8588709677419355, | |
| "eval_loss": 0.6567370891571045, | |
| "eval_precision_macro": 0.8596584963088156, | |
| "eval_precision_micro": 0.8588709677419355, | |
| "eval_recall_macro": 0.8553630932096153, | |
| "eval_recall_micro": 0.8588709677419355, | |
| "eval_runtime": 1.5912, | |
| "eval_samples_per_second": 467.56, | |
| "eval_steps_per_second": 58.445, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.2490496361464103, | |
| "grad_norm": 5.124754428863525, | |
| "learning_rate": 6.953690693258913e-05, | |
| "loss": 0.4048, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.2490496361464103, | |
| "eval_accuracy": 0.8185483870967742, | |
| "eval_f1_macro": 0.8138708613483465, | |
| "eval_f1_micro": 0.8185483870967742, | |
| "eval_loss": 0.8613069653511047, | |
| "eval_precision_macro": 0.8751078398440778, | |
| "eval_precision_micro": 0.8185483870967742, | |
| "eval_recall_macro": 0.8210231918764864, | |
| "eval_recall_micro": 0.8185483870967742, | |
| "eval_runtime": 1.5897, | |
| "eval_samples_per_second": 468.009, | |
| "eval_steps_per_second": 58.501, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.3033561420658195, | |
| "grad_norm": 5.379746437072754, | |
| "learning_rate": 6.822590211705641e-05, | |
| "loss": 0.4131, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.3033561420658195, | |
| "eval_accuracy": 0.907258064516129, | |
| "eval_f1_macro": 0.9064546460371958, | |
| "eval_f1_micro": 0.907258064516129, | |
| "eval_loss": 0.383698046207428, | |
| "eval_precision_macro": 0.9285092418542091, | |
| "eval_precision_micro": 0.907258064516129, | |
| "eval_recall_macro": 0.9084149302207563, | |
| "eval_recall_micro": 0.907258064516129, | |
| "eval_runtime": 1.6383, | |
| "eval_samples_per_second": 454.138, | |
| "eval_steps_per_second": 56.767, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.3576626479852285, | |
| "grad_norm": 21.04884910583496, | |
| "learning_rate": 6.685432467442772e-05, | |
| "loss": 0.3744, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.3576626479852285, | |
| "eval_accuracy": 0.8884408602150538, | |
| "eval_f1_macro": 0.903985402366871, | |
| "eval_f1_micro": 0.8884408602150538, | |
| "eval_loss": 0.33839282393455505, | |
| "eval_precision_macro": 0.9202041810282631, | |
| "eval_precision_micro": 0.8884408602150538, | |
| "eval_recall_macro": 0.9167129841156823, | |
| "eval_recall_micro": 0.8884408602150538, | |
| "eval_runtime": 1.5684, | |
| "eval_samples_per_second": 474.375, | |
| "eval_steps_per_second": 59.297, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.4119691539046377, | |
| "grad_norm": 2.0264995098114014, | |
| "learning_rate": 6.541975827485887e-05, | |
| "loss": 0.3789, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.4119691539046377, | |
| "eval_accuracy": 0.8548387096774194, | |
| "eval_f1_macro": 0.8568056198138125, | |
| "eval_f1_micro": 0.8548387096774194, | |
| "eval_loss": 0.5327471494674683, | |
| "eval_precision_macro": 0.8878810831304436, | |
| "eval_precision_micro": 0.8548387096774194, | |
| "eval_recall_macro": 0.8739629853819976, | |
| "eval_recall_micro": 0.8548387096774194, | |
| "eval_runtime": 1.5511, | |
| "eval_samples_per_second": 479.662, | |
| "eval_steps_per_second": 59.958, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.466275659824047, | |
| "grad_norm": 3.5367252826690674, | |
| "learning_rate": 6.392811057759191e-05, | |
| "loss": 0.3494, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.466275659824047, | |
| "eval_accuracy": 0.9005376344086021, | |
| "eval_f1_macro": 0.8873317559394204, | |
| "eval_f1_micro": 0.9005376344086021, | |
| "eval_loss": 0.38247689604759216, | |
| "eval_precision_macro": 0.8991156587188684, | |
| "eval_precision_micro": 0.9005376344086021, | |
| "eval_recall_macro": 0.8980828267359259, | |
| "eval_recall_micro": 0.9005376344086021, | |
| "eval_runtime": 1.545, | |
| "eval_samples_per_second": 481.563, | |
| "eval_steps_per_second": 60.195, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.520582165743456, | |
| "grad_norm": 0.7285934090614319, | |
| "learning_rate": 6.238273114973711e-05, | |
| "loss": 0.3942, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.520582165743456, | |
| "eval_accuracy": 0.9045698924731183, | |
| "eval_f1_macro": 0.8961888882643896, | |
| "eval_f1_micro": 0.9045698924731183, | |
| "eval_loss": 0.45909252762794495, | |
| "eval_precision_macro": 0.9132890481201191, | |
| "eval_precision_micro": 0.9045698924731183, | |
| "eval_recall_macro": 0.896670735624709, | |
| "eval_recall_micro": 0.9045698924731183, | |
| "eval_runtime": 1.5463, | |
| "eval_samples_per_second": 481.142, | |
| "eval_steps_per_second": 60.143, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.5748886716628652, | |
| "grad_norm": 2.59505295753479, | |
| "learning_rate": 6.078709021560674e-05, | |
| "loss": 0.3023, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.5748886716628652, | |
| "eval_accuracy": 0.8978494623655914, | |
| "eval_f1_macro": 0.8915991372968476, | |
| "eval_f1_micro": 0.8978494623655914, | |
| "eval_loss": 0.5516388416290283, | |
| "eval_precision_macro": 0.9068635931933978, | |
| "eval_precision_micro": 0.8978494623655914, | |
| "eval_recall_macro": 0.8942451339148669, | |
| "eval_recall_micro": 0.8978494623655914, | |
| "eval_runtime": 1.5446, | |
| "eval_samples_per_second": 481.685, | |
| "eval_steps_per_second": 60.211, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.6291951775822744, | |
| "grad_norm": 2.187466859817505, | |
| "learning_rate": 5.9144770864158464e-05, | |
| "loss": 0.3757, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.6291951775822744, | |
| "eval_accuracy": 0.8951612903225806, | |
| "eval_f1_macro": 0.8833952116973107, | |
| "eval_f1_micro": 0.8951612903225806, | |
| "eval_loss": 0.4487917423248291, | |
| "eval_precision_macro": 0.9141733968196161, | |
| "eval_precision_micro": 0.8951612903225806, | |
| "eval_recall_macro": 0.8813523748993086, | |
| "eval_recall_micro": 0.8951612903225806, | |
| "eval_runtime": 1.5598, | |
| "eval_samples_per_second": 476.976, | |
| "eval_steps_per_second": 59.622, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 0.7013949751853943, | |
| "learning_rate": 5.745946100299554e-05, | |
| "loss": 0.3241, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "eval_accuracy": 0.8817204301075269, | |
| "eval_f1_macro": 0.8941666527754931, | |
| "eval_f1_micro": 0.8817204301075269, | |
| "eval_loss": 0.39030957221984863, | |
| "eval_precision_macro": 0.9229567222513297, | |
| "eval_precision_micro": 0.8817204301075269, | |
| "eval_recall_macro": 0.9032318624546449, | |
| "eval_recall_micro": 0.8817204301075269, | |
| "eval_runtime": 1.5563, | |
| "eval_samples_per_second": 478.06, | |
| "eval_steps_per_second": 59.757, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.7378081894210926, | |
| "grad_norm": 28.38803482055664, | |
| "learning_rate": 5.573494507699185e-05, | |
| "loss": 0.349, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.7378081894210926, | |
| "eval_accuracy": 0.9126344086021505, | |
| "eval_f1_macro": 0.9078743168203657, | |
| "eval_f1_micro": 0.9126344086021505, | |
| "eval_loss": 0.26589375734329224, | |
| "eval_precision_macro": 0.9294672653448444, | |
| "eval_precision_micro": 0.9126344086021505, | |
| "eval_recall_macro": 0.9187759272246643, | |
| "eval_recall_micro": 0.9126344086021505, | |
| "eval_runtime": 1.5586, | |
| "eval_samples_per_second": 477.346, | |
| "eval_steps_per_second": 59.668, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.7921146953405018, | |
| "grad_norm": 119.5204086303711, | |
| "learning_rate": 5.3975095570137725e-05, | |
| "loss": 0.3292, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.7921146953405018, | |
| "eval_accuracy": 0.8776881720430108, | |
| "eval_f1_macro": 0.8896563190824521, | |
| "eval_f1_micro": 0.8776881720430108, | |
| "eval_loss": 0.3458763062953949, | |
| "eval_precision_macro": 0.9206080781097684, | |
| "eval_precision_micro": 0.8776881720430108, | |
| "eval_recall_macro": 0.8957695476343184, | |
| "eval_recall_micro": 0.8776881720430108, | |
| "eval_runtime": 1.6873, | |
| "eval_samples_per_second": 440.949, | |
| "eval_steps_per_second": 55.119, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.8464212012599108, | |
| "grad_norm": 0.7282613515853882, | |
| "learning_rate": 5.21838643096897e-05, | |
| "loss": 0.3063, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.8464212012599108, | |
| "eval_accuracy": 0.8991935483870968, | |
| "eval_f1_macro": 0.8879891940359544, | |
| "eval_f1_micro": 0.8991935483870968, | |
| "eval_loss": 0.45412322878837585, | |
| "eval_precision_macro": 0.9073381957613869, | |
| "eval_precision_micro": 0.8991935483870968, | |
| "eval_recall_macro": 0.901361453647893, | |
| "eval_recall_micro": 0.8991935483870968, | |
| "eval_runtime": 1.5907, | |
| "eval_samples_per_second": 467.726, | |
| "eval_steps_per_second": 58.466, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.90072770717932, | |
| "grad_norm": 22.810928344726562, | |
| "learning_rate": 5.036527359215105e-05, | |
| "loss": 0.2974, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.90072770717932, | |
| "eval_accuracy": 0.9139784946236559, | |
| "eval_f1_macro": 0.9147040216841583, | |
| "eval_f1_micro": 0.9139784946236559, | |
| "eval_loss": 0.2588426470756531, | |
| "eval_precision_macro": 0.9280953295150929, | |
| "eval_precision_micro": 0.9139784946236559, | |
| "eval_recall_macro": 0.9254419270814122, | |
| "eval_recall_micro": 0.9139784946236559, | |
| "eval_runtime": 1.5934, | |
| "eval_samples_per_second": 466.917, | |
| "eval_steps_per_second": 58.365, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.9550342130987293, | |
| "grad_norm": 0.9817183613777161, | |
| "learning_rate": 4.8523407151010365e-05, | |
| "loss": 0.2938, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.9550342130987293, | |
| "eval_accuracy": 0.875, | |
| "eval_f1_macro": 0.8887166776365175, | |
| "eval_f1_micro": 0.875, | |
| "eval_loss": 0.45612600445747375, | |
| "eval_precision_macro": 0.915931173623922, | |
| "eval_precision_micro": 0.875, | |
| "eval_recall_macro": 0.8997546027307288, | |
| "eval_recall_micro": 0.875, | |
| "eval_runtime": 1.6003, | |
| "eval_samples_per_second": 464.907, | |
| "eval_steps_per_second": 58.113, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.0093407190181383, | |
| "grad_norm": 85.74153137207031, | |
| "learning_rate": 4.66624009865203e-05, | |
| "loss": 0.3067, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.0093407190181383, | |
| "eval_accuracy": 0.8844086021505376, | |
| "eval_f1_macro": 0.8921145619678118, | |
| "eval_f1_micro": 0.8844086021505376, | |
| "eval_loss": 0.3737676739692688, | |
| "eval_precision_macro": 0.9173188296546068, | |
| "eval_precision_micro": 0.8844086021505376, | |
| "eval_recall_macro": 0.9047881017377212, | |
| "eval_recall_micro": 0.8844086021505376, | |
| "eval_runtime": 1.733, | |
| "eval_samples_per_second": 429.309, | |
| "eval_steps_per_second": 53.664, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.0636472249375477, | |
| "grad_norm": 4.293463706970215, | |
| "learning_rate": 4.479019814743586e-05, | |
| "loss": 0.1653, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.0636472249375477, | |
| "eval_accuracy": 0.9005376344086021, | |
| "eval_f1_macro": 0.8930738638150588, | |
| "eval_f1_micro": 0.9005376344086021, | |
| "eval_loss": 0.5207958221435547, | |
| "eval_precision_macro": 0.9287838810859212, | |
| "eval_precision_micro": 0.9005376344086021, | |
| "eval_recall_macro": 0.8936382765105635, | |
| "eval_recall_micro": 0.9005376344086021, | |
| "eval_runtime": 1.5912, | |
| "eval_samples_per_second": 467.585, | |
| "eval_steps_per_second": 58.448, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.1179537308569567, | |
| "grad_norm": 1.4437882900238037, | |
| "learning_rate": 4.2907281661980334e-05, | |
| "loss": 0.1901, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.1179537308569567, | |
| "eval_accuracy": 0.9206989247311828, | |
| "eval_f1_macro": 0.9210532754724804, | |
| "eval_f1_micro": 0.9206989247311828, | |
| "eval_loss": 0.2993380129337311, | |
| "eval_precision_macro": 0.9381153626922412, | |
| "eval_precision_micro": 0.9206989247311828, | |
| "eval_recall_macro": 0.9300212877722726, | |
| "eval_recall_micro": 0.9206989247311828, | |
| "eval_runtime": 1.7299, | |
| "eval_samples_per_second": 430.073, | |
| "eval_steps_per_second": 53.759, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.1722602367763657, | |
| "grad_norm": 764.3075561523438, | |
| "learning_rate": 4.101407270852778e-05, | |
| "loss": 0.2271, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.1722602367763657, | |
| "eval_accuracy": 0.9032258064516129, | |
| "eval_f1_macro": 0.9205221777214502, | |
| "eval_f1_micro": 0.9032258064516129, | |
| "eval_loss": 0.27076366543769836, | |
| "eval_precision_macro": 0.9497791884188832, | |
| "eval_precision_micro": 0.9032258064516129, | |
| "eval_recall_macro": 0.9297648445536885, | |
| "eval_recall_micro": 0.9032258064516129, | |
| "eval_runtime": 1.5754, | |
| "eval_samples_per_second": 472.249, | |
| "eval_steps_per_second": 59.031, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.226566742695775, | |
| "grad_norm": 8.108640670776367, | |
| "learning_rate": 3.911858660572008e-05, | |
| "loss": 0.2108, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.226566742695775, | |
| "eval_accuracy": 0.9220430107526881, | |
| "eval_f1_macro": 0.9187239197738657, | |
| "eval_f1_micro": 0.9220430107526881, | |
| "eval_loss": 0.27602580189704895, | |
| "eval_precision_macro": 0.9334752930289422, | |
| "eval_precision_micro": 0.9220430107526881, | |
| "eval_recall_macro": 0.9228409582632311, | |
| "eval_recall_micro": 0.9220430107526881, | |
| "eval_runtime": 1.5758, | |
| "eval_samples_per_second": 472.143, | |
| "eval_steps_per_second": 59.018, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.280873248615184, | |
| "grad_norm": 2.4227545261383057, | |
| "learning_rate": 3.7225079759355747e-05, | |
| "loss": 0.2068, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.280873248615184, | |
| "eval_accuracy": 0.9247311827956989, | |
| "eval_f1_macro": 0.9260496184754449, | |
| "eval_f1_micro": 0.9247311827956989, | |
| "eval_loss": 0.25598055124282837, | |
| "eval_precision_macro": 0.9394381557552136, | |
| "eval_precision_micro": 0.9247311827956989, | |
| "eval_recall_macro": 0.9349235944200235, | |
| "eval_recall_micro": 0.9247311827956989, | |
| "eval_runtime": 1.5558, | |
| "eval_samples_per_second": 478.204, | |
| "eval_steps_per_second": 59.776, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.335179754534593, | |
| "grad_norm": 1.4017307758331299, | |
| "learning_rate": 3.5337804130717096e-05, | |
| "loss": 0.2171, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.335179754534593, | |
| "eval_accuracy": 0.9166666666666666, | |
| "eval_f1_macro": 0.9162826856619433, | |
| "eval_f1_micro": 0.9166666666666666, | |
| "eval_loss": 0.23610982298851013, | |
| "eval_precision_macro": 0.9248109520591958, | |
| "eval_precision_micro": 0.9166666666666666, | |
| "eval_recall_macro": 0.9257832999982825, | |
| "eval_recall_micro": 0.9166666666666666, | |
| "eval_runtime": 1.6011, | |
| "eval_samples_per_second": 464.685, | |
| "eval_steps_per_second": 58.086, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.389486260454002, | |
| "grad_norm": 3.65702486038208, | |
| "learning_rate": 3.34609976885854e-05, | |
| "loss": 0.1544, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.389486260454002, | |
| "eval_accuracy": 0.8938172043010753, | |
| "eval_f1_macro": 0.9112579615652996, | |
| "eval_f1_micro": 0.8938172043010753, | |
| "eval_loss": 0.31801822781562805, | |
| "eval_precision_macro": 0.9257465643895157, | |
| "eval_precision_micro": 0.8938172043010753, | |
| "eval_recall_macro": 0.9157454548646736, | |
| "eval_recall_micro": 0.8938172043010753, | |
| "eval_runtime": 1.5901, | |
| "eval_samples_per_second": 467.885, | |
| "eval_steps_per_second": 58.486, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.4437927663734116, | |
| "grad_norm": 17.16629409790039, | |
| "learning_rate": 3.15988748926769e-05, | |
| "loss": 0.2244, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.4437927663734116, | |
| "eval_accuracy": 0.9274193548387096, | |
| "eval_f1_macro": 0.929163688176975, | |
| "eval_f1_micro": 0.9274193548387096, | |
| "eval_loss": 0.2233731597661972, | |
| "eval_precision_macro": 0.9488357724575086, | |
| "eval_precision_micro": 0.9274193548387096, | |
| "eval_recall_macro": 0.9373529300126245, | |
| "eval_recall_micro": 0.9274193548387096, | |
| "eval_runtime": 1.5811, | |
| "eval_samples_per_second": 470.56, | |
| "eval_steps_per_second": 58.82, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.4980992722928206, | |
| "grad_norm": 0.002982610370963812, | |
| "learning_rate": 2.9755617229869363e-05, | |
| "loss": 0.1862, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.4980992722928206, | |
| "eval_accuracy": 0.9233870967741935, | |
| "eval_f1_macro": 0.928924685412107, | |
| "eval_f1_micro": 0.9233870967741935, | |
| "eval_loss": 0.2193865180015564, | |
| "eval_precision_macro": 0.9530930478155653, | |
| "eval_precision_micro": 0.9233870967741935, | |
| "eval_recall_macro": 0.9284052780385081, | |
| "eval_recall_micro": 0.9233870967741935, | |
| "eval_runtime": 1.5911, | |
| "eval_samples_per_second": 467.594, | |
| "eval_steps_per_second": 58.449, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.55240577821223, | |
| "grad_norm": 0.005432271398603916, | |
| "learning_rate": 2.7942593577289413e-05, | |
| "loss": 0.1675, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.55240577821223, | |
| "eval_accuracy": 0.9032258064516129, | |
| "eval_f1_macro": 0.9171716939776399, | |
| "eval_f1_micro": 0.9032258064516129, | |
| "eval_loss": 0.28054481744766235, | |
| "eval_precision_macro": 0.9380595809537082, | |
| "eval_precision_micro": 0.9032258064516129, | |
| "eval_recall_macro": 0.9295445270488267, | |
| "eval_recall_micro": 0.9032258064516129, | |
| "eval_runtime": 1.5905, | |
| "eval_samples_per_second": 467.768, | |
| "eval_steps_per_second": 58.471, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.606712284131639, | |
| "grad_norm": 1.1801046133041382, | |
| "learning_rate": 2.6149315464903132e-05, | |
| "loss": 0.2154, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.606712284131639, | |
| "eval_accuracy": 0.9099462365591398, | |
| "eval_f1_macro": 0.9317358502662257, | |
| "eval_f1_micro": 0.9099462365591398, | |
| "eval_loss": 0.20521479845046997, | |
| "eval_precision_macro": 0.9532059136761886, | |
| "eval_precision_micro": 0.9099462365591398, | |
| "eval_recall_macro": 0.9443101546357744, | |
| "eval_recall_micro": 0.9099462365591398, | |
| "eval_runtime": 1.6188, | |
| "eval_samples_per_second": 459.613, | |
| "eval_steps_per_second": 57.452, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.661018790051048, | |
| "grad_norm": 2.6474785804748535, | |
| "learning_rate": 2.4387139735109228e-05, | |
| "loss": 0.1994, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.661018790051048, | |
| "eval_accuracy": 0.9206989247311828, | |
| "eval_f1_macro": 0.9285830674772237, | |
| "eval_f1_micro": 0.9206989247311828, | |
| "eval_loss": 0.3016538619995117, | |
| "eval_precision_macro": 0.9344600444511141, | |
| "eval_precision_micro": 0.9206989247311828, | |
| "eval_recall_macro": 0.9264637971148902, | |
| "eval_recall_micro": 0.9206989247311828, | |
| "eval_runtime": 1.5905, | |
| "eval_samples_per_second": 467.768, | |
| "eval_steps_per_second": 58.471, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.715325295970457, | |
| "grad_norm": 0.6192300319671631, | |
| "learning_rate": 2.266002343881043e-05, | |
| "loss": 0.1693, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.715325295970457, | |
| "eval_accuracy": 0.9247311827956989, | |
| "eval_f1_macro": 0.9264992560565771, | |
| "eval_f1_micro": 0.9247311827956989, | |
| "eval_loss": 0.2687015235424042, | |
| "eval_precision_macro": 0.9451846988558618, | |
| "eval_precision_micro": 0.9247311827956989, | |
| "eval_recall_macro": 0.931429635216938, | |
| "eval_recall_micro": 0.9247311827956989, | |
| "eval_runtime": 1.5613, | |
| "eval_samples_per_second": 476.521, | |
| "eval_steps_per_second": 59.565, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.7696318018898665, | |
| "grad_norm": 1.0121103525161743, | |
| "learning_rate": 2.0971844899254465e-05, | |
| "loss": 0.1455, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.7696318018898665, | |
| "eval_accuracy": 0.9301075268817204, | |
| "eval_f1_macro": 0.9355979987630854, | |
| "eval_f1_micro": 0.9301075268817204, | |
| "eval_loss": 0.1772308498620987, | |
| "eval_precision_macro": 0.9578394473984728, | |
| "eval_precision_micro": 0.9301075268817204, | |
| "eval_recall_macro": 0.9376020750294228, | |
| "eval_recall_micro": 0.9301075268817204, | |
| "eval_runtime": 1.5579, | |
| "eval_samples_per_second": 477.553, | |
| "eval_steps_per_second": 59.694, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.8239383078092755, | |
| "grad_norm": 7.186190605163574, | |
| "learning_rate": 1.9326395003071377e-05, | |
| "loss": 0.1483, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.8239383078092755, | |
| "eval_accuracy": 0.9099462365591398, | |
| "eval_f1_macro": 0.9274305548763755, | |
| "eval_f1_micro": 0.9099462365591398, | |
| "eval_loss": 0.2263285517692566, | |
| "eval_precision_macro": 0.9497007612847732, | |
| "eval_precision_micro": 0.9099462365591398, | |
| "eval_recall_macro": 0.9370680110954468, | |
| "eval_recall_micro": 0.9099462365591398, | |
| "eval_runtime": 1.5569, | |
| "eval_samples_per_second": 477.873, | |
| "eval_steps_per_second": 59.734, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.878244813728685, | |
| "grad_norm": 0.7355983257293701, | |
| "learning_rate": 1.77273686876542e-05, | |
| "loss": 0.1228, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.878244813728685, | |
| "eval_accuracy": 0.9139784946236559, | |
| "eval_f1_macro": 0.9345990129995828, | |
| "eval_f1_micro": 0.9139784946236559, | |
| "eval_loss": 0.15667569637298584, | |
| "eval_precision_macro": 0.9546581752954356, | |
| "eval_precision_micro": 0.9139784946236559, | |
| "eval_recall_macro": 0.9451982881542952, | |
| "eval_recall_micro": 0.9139784946236559, | |
| "eval_runtime": 1.5563, | |
| "eval_samples_per_second": 478.061, | |
| "eval_steps_per_second": 59.758, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.932551319648094, | |
| "grad_norm": 1.0231633186340332, | |
| "learning_rate": 1.617835664399812e-05, | |
| "loss": 0.1627, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.932551319648094, | |
| "eval_accuracy": 0.9112903225806451, | |
| "eval_f1_macro": 0.9300960898223695, | |
| "eval_f1_micro": 0.9112903225806451, | |
| "eval_loss": 0.19375352561473846, | |
| "eval_precision_macro": 0.9520703470166678, | |
| "eval_precision_micro": 0.9112903225806451, | |
| "eval_recall_macro": 0.9397685186488954, | |
| "eval_recall_micro": 0.9112903225806451, | |
| "eval_runtime": 1.5507, | |
| "eval_samples_per_second": 479.787, | |
| "eval_steps_per_second": 59.973, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.986857825567503, | |
| "grad_norm": 1.2636340856552124, | |
| "learning_rate": 1.4682837253630058e-05, | |
| "loss": 0.1106, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.986857825567503, | |
| "eval_accuracy": 0.9314516129032258, | |
| "eval_f1_macro": 0.9360611349608771, | |
| "eval_f1_micro": 0.9314516129032258, | |
| "eval_loss": 0.18830984830856323, | |
| "eval_precision_macro": 0.9530936916231376, | |
| "eval_precision_micro": 0.9314516129032258, | |
| "eval_recall_macro": 0.9396110759039699, | |
| "eval_recall_micro": 0.9314516129032258, | |
| "eval_runtime": 1.5429, | |
| "eval_samples_per_second": 482.224, | |
| "eval_steps_per_second": 60.278, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.0411643314869123, | |
| "grad_norm": 1.0935313701629639, | |
| "learning_rate": 1.3244168777734551e-05, | |
| "loss": 0.1105, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.0411643314869123, | |
| "eval_accuracy": 0.9341397849462365, | |
| "eval_f1_macro": 0.9393547932119972, | |
| "eval_f1_micro": 0.9341397849462365, | |
| "eval_loss": 0.1898634135723114, | |
| "eval_precision_macro": 0.9593318319238684, | |
| "eval_precision_micro": 0.9341397849462365, | |
| "eval_recall_macro": 0.945290330511796, | |
| "eval_recall_micro": 0.9341397849462365, | |
| "eval_runtime": 1.5415, | |
| "eval_samples_per_second": 482.636, | |
| "eval_steps_per_second": 60.329, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.0954708374063213, | |
| "grad_norm": 1.7280462980270386, | |
| "learning_rate": 1.1865581816015724e-05, | |
| "loss": 0.0978, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.0954708374063213, | |
| "eval_accuracy": 0.9126344086021505, | |
| "eval_f1_macro": 0.932842505921675, | |
| "eval_f1_micro": 0.9126344086021505, | |
| "eval_loss": 0.1739678680896759, | |
| "eval_precision_macro": 0.9588560830865595, | |
| "eval_precision_micro": 0.9126344086021505, | |
| "eval_recall_macro": 0.9441396878334974, | |
| "eval_recall_micro": 0.9126344086021505, | |
| "eval_runtime": 1.5969, | |
| "eval_samples_per_second": 465.902, | |
| "eval_steps_per_second": 58.238, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.1497773433257303, | |
| "grad_norm": 4.298096656799316, | |
| "learning_rate": 1.055017205222924e-05, | |
| "loss": 0.0847, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.1497773433257303, | |
| "eval_accuracy": 0.907258064516129, | |
| "eval_f1_macro": 0.9287242305872121, | |
| "eval_f1_micro": 0.907258064516129, | |
| "eval_loss": 0.18464291095733643, | |
| "eval_precision_macro": 0.9485674275338388, | |
| "eval_precision_micro": 0.907258064516129, | |
| "eval_recall_macro": 0.9397129171252141, | |
| "eval_recall_micro": 0.907258064516129, | |
| "eval_runtime": 1.5931, | |
| "eval_samples_per_second": 467.028, | |
| "eval_steps_per_second": 58.378, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.20408384924514, | |
| "grad_norm": 2.599429130554199, | |
| "learning_rate": 9.30332397145231e-06, | |
| "loss": 0.0929, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.20408384924514, | |
| "eval_accuracy": 0.928763440860215, | |
| "eval_f1_macro": 0.9319260758161285, | |
| "eval_f1_micro": 0.928763440860215, | |
| "eval_loss": 0.2623588442802429, | |
| "eval_precision_macro": 0.9522632308119076, | |
| "eval_precision_micro": 0.928763440860215, | |
| "eval_recall_macro": 0.9354531905275971, | |
| "eval_recall_micro": 0.928763440860215, | |
| "eval_runtime": 1.5913, | |
| "eval_samples_per_second": 467.542, | |
| "eval_steps_per_second": 58.443, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.258390355164549, | |
| "grad_norm": 1.0820720195770264, | |
| "learning_rate": 8.122840981545725e-06, | |
| "loss": 0.0861, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.258390355164549, | |
| "eval_accuracy": 0.9274193548387096, | |
| "eval_f1_macro": 0.9276130625845173, | |
| "eval_f1_micro": 0.9274193548387096, | |
| "eval_loss": 0.26659145951271057, | |
| "eval_precision_macro": 0.9482101341581514, | |
| "eval_precision_micro": 0.9274193548387096, | |
| "eval_recall_macro": 0.9313633323621086, | |
| "eval_recall_micro": 0.9274193548387096, | |
| "eval_runtime": 1.5883, | |
| "eval_samples_per_second": 468.419, | |
| "eval_steps_per_second": 58.552, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.3126968610839578, | |
| "grad_norm": 0.9298611283302307, | |
| "learning_rate": 7.016084376904304e-06, | |
| "loss": 0.1159, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.3126968610839578, | |
| "eval_accuracy": 0.9086021505376344, | |
| "eval_f1_macro": 0.9269507193451398, | |
| "eval_f1_micro": 0.9086021505376344, | |
| "eval_loss": 0.2247731238603592, | |
| "eval_precision_macro": 0.9537479338140438, | |
| "eval_precision_micro": 0.9086021505376344, | |
| "eval_recall_macro": 0.9375578499701469, | |
| "eval_recall_micro": 0.9086021505376344, | |
| "eval_runtime": 1.5934, | |
| "eval_samples_per_second": 466.925, | |
| "eval_steps_per_second": 58.366, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.3670033670033668, | |
| "grad_norm": 1.2038335800170898, | |
| "learning_rate": 5.981104366775459e-06, | |
| "loss": 0.0983, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.3670033670033668, | |
| "eval_accuracy": 0.9112903225806451, | |
| "eval_f1_macro": 0.929673894660028, | |
| "eval_f1_micro": 0.9112903225806451, | |
| "eval_loss": 0.21170715987682343, | |
| "eval_precision_macro": 0.9562636570844839, | |
| "eval_precision_micro": 0.9112903225806451, | |
| "eval_recall_macro": 0.9408174871583557, | |
| "eval_recall_micro": 0.9112903225806451, | |
| "eval_runtime": 1.592, | |
| "eval_samples_per_second": 467.328, | |
| "eval_steps_per_second": 58.416, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.421309872922776, | |
| "grad_norm": 14.01207447052002, | |
| "learning_rate": 5.022515433540584e-06, | |
| "loss": 0.1093, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.421309872922776, | |
| "eval_accuracy": 0.9341397849462365, | |
| "eval_f1_macro": 0.9384885883048681, | |
| "eval_f1_micro": 0.9341397849462365, | |
| "eval_loss": 0.21036885678768158, | |
| "eval_precision_macro": 0.9544129085177095, | |
| "eval_precision_micro": 0.9341397849462365, | |
| "eval_recall_macro": 0.9426855279069932, | |
| "eval_recall_micro": 0.9341397849462365, | |
| "eval_runtime": 1.5897, | |
| "eval_samples_per_second": 468.025, | |
| "eval_steps_per_second": 58.503, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.475616378842185, | |
| "grad_norm": 0.0029710547532886267, | |
| "learning_rate": 4.142470135051615e-06, | |
| "loss": 0.0892, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.475616378842185, | |
| "eval_accuracy": 0.9126344086021505, | |
| "eval_f1_macro": 0.933263998404932, | |
| "eval_f1_micro": 0.9126344086021505, | |
| "eval_loss": 0.21525545418262482, | |
| "eval_precision_macro": 0.9471592585609407, | |
| "eval_precision_micro": 0.9126344086021505, | |
| "eval_recall_macro": 0.9420731137871163, | |
| "eval_recall_micro": 0.9126344086021505, | |
| "eval_runtime": 1.5909, | |
| "eval_samples_per_second": 467.651, | |
| "eval_steps_per_second": 58.456, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.5299228847615947, | |
| "grad_norm": 0.0014916452346369624, | |
| "learning_rate": 3.3429446556265453e-06, | |
| "loss": 0.0921, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.5299228847615947, | |
| "eval_accuracy": 0.9368279569892473, | |
| "eval_f1_macro": 0.9403174227850596, | |
| "eval_f1_micro": 0.9368279569892473, | |
| "eval_loss": 0.20182642340660095, | |
| "eval_precision_macro": 0.9610729151777162, | |
| "eval_precision_micro": 0.9368279569892473, | |
| "eval_recall_macro": 0.9450761984609293, | |
| "eval_recall_micro": 0.9368279569892473, | |
| "eval_runtime": 1.5911, | |
| "eval_samples_per_second": 467.608, | |
| "eval_steps_per_second": 58.451, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.5842293906810037, | |
| "grad_norm": 0.028313804417848587, | |
| "learning_rate": 2.625734368431996e-06, | |
| "loss": 0.1112, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.5842293906810037, | |
| "eval_accuracy": 0.9368279569892473, | |
| "eval_f1_macro": 0.9403174227850596, | |
| "eval_f1_micro": 0.9368279569892473, | |
| "eval_loss": 0.19140255451202393, | |
| "eval_precision_macro": 0.9610729151777162, | |
| "eval_precision_micro": 0.9368279569892473, | |
| "eval_recall_macro": 0.9450761984609293, | |
| "eval_recall_micro": 0.9368279569892473, | |
| "eval_runtime": 1.5433, | |
| "eval_samples_per_second": 482.098, | |
| "eval_steps_per_second": 60.262, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.6385358966004127, | |
| "grad_norm": 0.7935597896575928, | |
| "learning_rate": 1.992449803885794e-06, | |
| "loss": 0.0781, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.6385358966004127, | |
| "eval_accuracy": 0.9368279569892473, | |
| "eval_f1_macro": 0.9431465250491902, | |
| "eval_f1_micro": 0.9368279569892473, | |
| "eval_loss": 0.18505831062793732, | |
| "eval_precision_macro": 0.9605281751371779, | |
| "eval_precision_micro": 0.9368279569892473, | |
| "eval_recall_macro": 0.9482532934747587, | |
| "eval_recall_micro": 0.9368279569892473, | |
| "eval_runtime": 1.5588, | |
| "eval_samples_per_second": 477.296, | |
| "eval_steps_per_second": 59.662, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.6928424025198217, | |
| "grad_norm": 3.4242069721221924, | |
| "learning_rate": 1.444513033132977e-06, | |
| "loss": 0.104, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.6928424025198217, | |
| "eval_accuracy": 0.9368279569892473, | |
| "eval_f1_macro": 0.9436897074728228, | |
| "eval_f1_micro": 0.9368279569892473, | |
| "eval_loss": 0.1714259684085846, | |
| "eval_precision_macro": 0.9618566197941735, | |
| "eval_precision_micro": 0.9368279569892473, | |
| "eval_recall_macro": 0.9478564680779333, | |
| "eval_recall_micro": 0.9368279569892473, | |
| "eval_runtime": 1.5565, | |
| "eval_samples_per_second": 478.008, | |
| "eval_steps_per_second": 59.751, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.747148908439231, | |
| "grad_norm": 0.5764946341514587, | |
| "learning_rate": 9.839900655621303e-07, | |
| "loss": 0.0771, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.747148908439231, | |
| "eval_accuracy": 0.9368279569892473, | |
| "eval_f1_macro": 0.9453734182306841, | |
| "eval_f1_micro": 0.9368279569892473, | |
| "eval_loss": 0.17406800389289856, | |
| "eval_precision_macro": 0.9558439949105471, | |
| "eval_precision_micro": 0.9368279569892473, | |
| "eval_recall_macro": 0.9474835754951718, | |
| "eval_recall_micro": 0.9368279569892473, | |
| "eval_runtime": 1.5564, | |
| "eval_samples_per_second": 478.012, | |
| "eval_steps_per_second": 59.752, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.80145541435864, | |
| "grad_norm": 1.5720099210739136, | |
| "learning_rate": 6.100695905203458e-07, | |
| "loss": 0.0916, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.80145541435864, | |
| "eval_accuracy": 0.9341397849462365, | |
| "eval_f1_macro": 0.9472436482214578, | |
| "eval_f1_micro": 0.9341397849462365, | |
| "eval_loss": 0.17374970018863678, | |
| "eval_precision_macro": 0.9499695730366108, | |
| "eval_precision_micro": 0.9341397849462365, | |
| "eval_recall_macro": 0.9488576574662688, | |
| "eval_recall_micro": 0.9341397849462365, | |
| "eval_runtime": 1.5561, | |
| "eval_samples_per_second": 478.114, | |
| "eval_steps_per_second": 59.764, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.8557619202780495, | |
| "grad_norm": 1.1937015056610107, | |
| "learning_rate": 3.2460111095780954e-07, | |
| "loss": 0.0871, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.8557619202780495, | |
| "eval_accuracy": 0.9153225806451613, | |
| "eval_f1_macro": 0.9355623635453241, | |
| "eval_f1_micro": 0.9153225806451613, | |
| "eval_loss": 0.17309460043907166, | |
| "eval_precision_macro": 0.9566935705253509, | |
| "eval_precision_micro": 0.9153225806451613, | |
| "eval_recall_macro": 0.9470660204134378, | |
| "eval_recall_micro": 0.9153225806451613, | |
| "eval_runtime": 1.5563, | |
| "eval_samples_per_second": 478.07, | |
| "eval_steps_per_second": 59.759, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.9100684261974585, | |
| "grad_norm": 0.0013589151203632355, | |
| "learning_rate": 1.2822566016402971e-07, | |
| "loss": 0.1047, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.9100684261974585, | |
| "eval_accuracy": 0.9153225806451613, | |
| "eval_f1_macro": 0.9355623635453241, | |
| "eval_f1_micro": 0.9153225806451613, | |
| "eval_loss": 0.17508365213871002, | |
| "eval_precision_macro": 0.9566935705253509, | |
| "eval_precision_micro": 0.9153225806451613, | |
| "eval_recall_macro": 0.9470660204134378, | |
| "eval_recall_micro": 0.9153225806451613, | |
| "eval_runtime": 1.5895, | |
| "eval_samples_per_second": 468.068, | |
| "eval_steps_per_second": 58.509, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.9643749321168675, | |
| "grad_norm": 0.9162406325340271, | |
| "learning_rate": 2.1384208719683607e-08, | |
| "loss": 0.0845, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.9643749321168675, | |
| "eval_accuracy": 0.9153225806451613, | |
| "eval_f1_macro": 0.9355623635453241, | |
| "eval_f1_micro": 0.9153225806451613, | |
| "eval_loss": 0.17528513073921204, | |
| "eval_precision_macro": 0.9566935705253509, | |
| "eval_precision_micro": 0.9153225806451613, | |
| "eval_recall_macro": 0.9470660204134378, | |
| "eval_recall_micro": 0.9153225806451613, | |
| "eval_runtime": 1.5895, | |
| "eval_samples_per_second": 468.083, | |
| "eval_steps_per_second": 58.51, | |
| "step": 36500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 36828, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.683916617932083e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |