| { | |
| "best_metric": 0.9245835621453414, | |
| "best_model_checkpoint": "./fine-tune/roberta-base/qnli/checkpoint-19641", | |
| "epoch": 6.0, | |
| "global_step": 39282, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.545176889793841e-06, | |
| "loss": 0.6928, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 5.090353779587682e-06, | |
| "loss": 0.4818, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.635530669381522e-06, | |
| "loss": 0.3934, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.0180707559175364e-05, | |
| "loss": 0.379, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.2725884448969203e-05, | |
| "loss": 0.3509, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.5271061338763045e-05, | |
| "loss": 0.3652, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.7816238228556887e-05, | |
| "loss": 0.3552, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.997692595180449e-05, | |
| "loss": 0.3218, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.9814432654652997e-05, | |
| "loss": 0.3598, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.9651939357501505e-05, | |
| "loss": 0.3159, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.9489446060350013e-05, | |
| "loss": 0.3182, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.932695276319852e-05, | |
| "loss": 0.3031, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.916445946604703e-05, | |
| "loss": 0.2986, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9170785282811642, | |
| "eval_loss": 0.22146184742450714, | |
| "eval_runtime": 9.2911, | |
| "eval_samples_per_second": 587.983, | |
| "eval_steps_per_second": 73.511, | |
| "step": 6547 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.9001966168895533e-05, | |
| "loss": 0.2681, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.883947287174404e-05, | |
| "loss": 0.2681, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.867697957459255e-05, | |
| "loss": 0.2643, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.8514486277441056e-05, | |
| "loss": 0.253, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.8351992980289564e-05, | |
| "loss": 0.2503, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.8189499683138072e-05, | |
| "loss": 0.2597, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.802700638598658e-05, | |
| "loss": 0.2601, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.7864513088835088e-05, | |
| "loss": 0.2483, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.7702019791683592e-05, | |
| "loss": 0.2532, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.75395264945321e-05, | |
| "loss": 0.2455, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.737703319738061e-05, | |
| "loss": 0.2637, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.7214539900229116e-05, | |
| "loss": 0.2391, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.7052046603077624e-05, | |
| "loss": 0.243, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9172615778876075, | |
| "eval_loss": 0.23211686313152313, | |
| "eval_runtime": 9.2969, | |
| "eval_samples_per_second": 587.613, | |
| "eval_steps_per_second": 73.465, | |
| "step": 13094 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.6889553305926132e-05, | |
| "loss": 0.2067, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.672706000877464e-05, | |
| "loss": 0.206, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.6564566711623148e-05, | |
| "loss": 0.1964, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.6402073414471655e-05, | |
| "loss": 0.1993, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.6239580117320163e-05, | |
| "loss": 0.2153, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.607708682016867e-05, | |
| "loss": 0.2103, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.5914593523017176e-05, | |
| "loss": 0.2023, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 1.5752100225865684e-05, | |
| "loss": 0.2063, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.558960692871419e-05, | |
| "loss": 0.2047, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.54271136315627e-05, | |
| "loss": 0.2076, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.5264620334411207e-05, | |
| "loss": 0.2086, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.5102127037259715e-05, | |
| "loss": 0.2001, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.4939633740108221e-05, | |
| "loss": 0.2048, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9245835621453414, | |
| "eval_loss": 0.2992143929004669, | |
| "eval_runtime": 9.1061, | |
| "eval_samples_per_second": 599.927, | |
| "eval_steps_per_second": 75.005, | |
| "step": 19641 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.477714044295673e-05, | |
| "loss": 0.1717, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.4614647145805237e-05, | |
| "loss": 0.1483, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.4452153848653745e-05, | |
| "loss": 0.1743, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.4289660551502251e-05, | |
| "loss": 0.1442, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.412716725435076e-05, | |
| "loss": 0.1744, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.3964673957199267e-05, | |
| "loss": 0.1694, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.3802180660047775e-05, | |
| "loss": 0.1669, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.3639687362896281e-05, | |
| "loss": 0.1539, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.347719406574479e-05, | |
| "loss": 0.1601, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.3314700768593297e-05, | |
| "loss": 0.1689, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.3152207471441804e-05, | |
| "loss": 0.1608, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.298971417429031e-05, | |
| "loss": 0.1639, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.282722087713882e-05, | |
| "loss": 0.1629, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9220208676551346, | |
| "eval_loss": 0.3538360595703125, | |
| "eval_runtime": 9.1087, | |
| "eval_samples_per_second": 599.755, | |
| "eval_steps_per_second": 74.983, | |
| "step": 26188 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.2664727579987326e-05, | |
| "loss": 0.1307, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.2502234282835834e-05, | |
| "loss": 0.1112, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.233974098568434e-05, | |
| "loss": 0.1243, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.217724768853285e-05, | |
| "loss": 0.1111, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.2014754391381356e-05, | |
| "loss": 0.1065, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.1852261094229864e-05, | |
| "loss": 0.1319, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.168976779707837e-05, | |
| "loss": 0.1172, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 1.152727449992688e-05, | |
| "loss": 0.1356, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.1364781202775386e-05, | |
| "loss": 0.1262, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.1202287905623894e-05, | |
| "loss": 0.1236, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.10397946084724e-05, | |
| "loss": 0.1276, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 1.087730131132091e-05, | |
| "loss": 0.126, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.0714808014169416e-05, | |
| "loss": 0.1308, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9209225700164745, | |
| "eval_loss": 0.35333874821662903, | |
| "eval_runtime": 9.1134, | |
| "eval_samples_per_second": 599.446, | |
| "eval_steps_per_second": 74.944, | |
| "step": 32735 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.0552314717017924e-05, | |
| "loss": 0.1111, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.038982141986643e-05, | |
| "loss": 0.0724, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.022732812271494e-05, | |
| "loss": 0.0885, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 1.0064834825563446e-05, | |
| "loss": 0.0855, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 9.902341528411954e-06, | |
| "loss": 0.0851, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 9.739848231260461e-06, | |
| "loss": 0.0852, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 9.57735493410897e-06, | |
| "loss": 0.0888, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 9.414861636957477e-06, | |
| "loss": 0.0893, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 9.252368339805983e-06, | |
| "loss": 0.0865, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 9.089875042654491e-06, | |
| "loss": 0.0814, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 8.927381745502999e-06, | |
| "loss": 0.0967, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 8.764888448351507e-06, | |
| "loss": 0.0882, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 8.602395151200013e-06, | |
| "loss": 0.0846, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9229361156873512, | |
| "eval_loss": 0.427664577960968, | |
| "eval_runtime": 9.0686, | |
| "eval_samples_per_second": 602.409, | |
| "eval_steps_per_second": 75.315, | |
| "step": 39282 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 39282, | |
| "total_flos": 4.133856190735872e+16, | |
| "train_loss": 0.20157804863496975, | |
| "train_runtime": 4031.9268, | |
| "train_samples_per_second": 259.784, | |
| "train_steps_per_second": 16.238 | |
| } | |
| ], | |
| "max_steps": 65470, | |
| "num_train_epochs": 10, | |
| "total_flos": 4.133856190735872e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |