| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.5347222222222223, | |
| "global_step": 730, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.88425925925926e-05, | |
| "loss": 2.0821, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.1527777761220932, | |
| "eval_loss": 2.099255084991455, | |
| "eval_runtime": 90.7163, | |
| "eval_samples_per_second": 6.349, | |
| "eval_steps_per_second": 1.587, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.768518518518519e-05, | |
| "loss": 1.9991, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.2326388955116272, | |
| "eval_loss": 2.011439085006714, | |
| "eval_runtime": 90.9685, | |
| "eval_samples_per_second": 6.332, | |
| "eval_steps_per_second": 1.583, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.664351851851853e-05, | |
| "loss": 2.0133, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.1822916716337204, | |
| "eval_loss": 2.0611398220062256, | |
| "eval_runtime": 91.1422, | |
| "eval_samples_per_second": 6.32, | |
| "eval_steps_per_second": 1.58, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.548611111111112e-05, | |
| "loss": 1.9912, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.1944444477558136, | |
| "eval_loss": 1.9874335527420044, | |
| "eval_runtime": 91.2497, | |
| "eval_samples_per_second": 6.312, | |
| "eval_steps_per_second": 1.578, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.432870370370372e-05, | |
| "loss": 1.9825, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.1875, | |
| "eval_loss": 1.9108080863952637, | |
| "eval_runtime": 91.363, | |
| "eval_samples_per_second": 6.305, | |
| "eval_steps_per_second": 1.576, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.31712962962963e-05, | |
| "loss": 1.8281, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.2760416567325592, | |
| "eval_loss": 1.8094313144683838, | |
| "eval_runtime": 90.7999, | |
| "eval_samples_per_second": 6.344, | |
| "eval_steps_per_second": 1.586, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.201388888888889e-05, | |
| "loss": 1.7768, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.28125, | |
| "eval_loss": 1.8212822675704956, | |
| "eval_runtime": 90.3717, | |
| "eval_samples_per_second": 6.374, | |
| "eval_steps_per_second": 1.593, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.085648148148149e-05, | |
| "loss": 1.8747, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.2916666567325592, | |
| "eval_loss": 1.819143533706665, | |
| "eval_runtime": 89.6205, | |
| "eval_samples_per_second": 6.427, | |
| "eval_steps_per_second": 1.607, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.969907407407407e-05, | |
| "loss": 1.8258, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.2673611044883728, | |
| "eval_loss": 1.8833109140396118, | |
| "eval_runtime": 90.3547, | |
| "eval_samples_per_second": 6.375, | |
| "eval_steps_per_second": 1.594, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.854166666666667e-05, | |
| "loss": 1.8466, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.3020833432674408, | |
| "eval_loss": 1.794202208518982, | |
| "eval_runtime": 89.9211, | |
| "eval_samples_per_second": 6.406, | |
| "eval_steps_per_second": 1.601, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 8.738425925925926e-05, | |
| "loss": 1.7949, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.234375, | |
| "eval_loss": 1.737123727798462, | |
| "eval_runtime": 90.2733, | |
| "eval_samples_per_second": 6.381, | |
| "eval_steps_per_second": 1.595, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.622685185185186e-05, | |
| "loss": 1.6993, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.3333333432674408, | |
| "eval_loss": 1.714468240737915, | |
| "eval_runtime": 90.6521, | |
| "eval_samples_per_second": 6.354, | |
| "eval_steps_per_second": 1.588, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 8.506944444444444e-05, | |
| "loss": 1.9949, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.3628472089767456, | |
| "eval_loss": 1.7169595956802368, | |
| "eval_runtime": 89.657, | |
| "eval_samples_per_second": 6.424, | |
| "eval_steps_per_second": 1.606, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 8.391203703703704e-05, | |
| "loss": 1.6402, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.3506944477558136, | |
| "eval_loss": 1.7289636135101318, | |
| "eval_runtime": 90.3787, | |
| "eval_samples_per_second": 6.373, | |
| "eval_steps_per_second": 1.593, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 8.275462962962963e-05, | |
| "loss": 1.7599, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.3090277910232544, | |
| "eval_loss": 1.7477116584777832, | |
| "eval_runtime": 90.5736, | |
| "eval_samples_per_second": 6.359, | |
| "eval_steps_per_second": 1.59, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.159722222222223e-05, | |
| "loss": 1.5776, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.3715277910232544, | |
| "eval_loss": 1.6158109903335571, | |
| "eval_runtime": 89.7843, | |
| "eval_samples_per_second": 6.415, | |
| "eval_steps_per_second": 1.604, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.043981481481482e-05, | |
| "loss": 1.7169, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.3663194477558136, | |
| "eval_loss": 1.6075005531311035, | |
| "eval_runtime": 90.228, | |
| "eval_samples_per_second": 6.384, | |
| "eval_steps_per_second": 1.596, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.928240740740742e-05, | |
| "loss": 1.653, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.4201388955116272, | |
| "eval_loss": 1.524334192276001, | |
| "eval_runtime": 91.575, | |
| "eval_samples_per_second": 6.29, | |
| "eval_steps_per_second": 1.572, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 7.8125e-05, | |
| "loss": 1.5733, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.359375, | |
| "eval_loss": 1.7072927951812744, | |
| "eval_runtime": 90.2896, | |
| "eval_samples_per_second": 6.379, | |
| "eval_steps_per_second": 1.595, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 7.69675925925926e-05, | |
| "loss": 1.6704, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.4722222089767456, | |
| "eval_loss": 1.4267817735671997, | |
| "eval_runtime": 90.57, | |
| "eval_samples_per_second": 6.36, | |
| "eval_steps_per_second": 1.59, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.581018518518519e-05, | |
| "loss": 1.4389, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.3940972089767456, | |
| "eval_loss": 1.7218824625015259, | |
| "eval_runtime": 89.8269, | |
| "eval_samples_per_second": 6.412, | |
| "eval_steps_per_second": 1.603, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.465277777777779e-05, | |
| "loss": 1.5342, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.3940972089767456, | |
| "eval_loss": 1.5133135318756104, | |
| "eval_runtime": 89.6337, | |
| "eval_samples_per_second": 6.426, | |
| "eval_steps_per_second": 1.607, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 7.349537037037037e-05, | |
| "loss": 1.5165, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.4322916567325592, | |
| "eval_loss": 1.4692301750183105, | |
| "eval_runtime": 90.2666, | |
| "eval_samples_per_second": 6.381, | |
| "eval_steps_per_second": 1.595, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.233796296296297e-05, | |
| "loss": 1.4743, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.3975694477558136, | |
| "eval_loss": 1.5509642362594604, | |
| "eval_runtime": 90.8282, | |
| "eval_samples_per_second": 6.342, | |
| "eval_steps_per_second": 1.585, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.118055555555556e-05, | |
| "loss": 1.4903, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.4739583432674408, | |
| "eval_loss": 1.3426711559295654, | |
| "eval_runtime": 91.1121, | |
| "eval_samples_per_second": 6.322, | |
| "eval_steps_per_second": 1.58, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 7.002314814814816e-05, | |
| "loss": 1.2193, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5329861044883728, | |
| "eval_loss": 1.3175561428070068, | |
| "eval_runtime": 89.5523, | |
| "eval_samples_per_second": 6.432, | |
| "eval_steps_per_second": 1.608, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.886574074074074e-05, | |
| "loss": 1.56, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.4635416567325592, | |
| "eval_loss": 1.4102365970611572, | |
| "eval_runtime": 89.6275, | |
| "eval_samples_per_second": 6.427, | |
| "eval_steps_per_second": 1.607, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.770833333333334e-05, | |
| "loss": 1.4563, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5347222089767456, | |
| "eval_loss": 1.291764736175537, | |
| "eval_runtime": 89.5795, | |
| "eval_samples_per_second": 6.43, | |
| "eval_steps_per_second": 1.608, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 6.655092592592593e-05, | |
| "loss": 1.3766, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.4305555522441864, | |
| "eval_loss": 1.5331988334655762, | |
| "eval_runtime": 89.8777, | |
| "eval_samples_per_second": 6.409, | |
| "eval_steps_per_second": 1.602, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 6.539351851851853e-05, | |
| "loss": 1.218, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_accuracy": 0.4861111044883728, | |
| "eval_loss": 1.317336916923523, | |
| "eval_runtime": 91.03, | |
| "eval_samples_per_second": 6.328, | |
| "eval_steps_per_second": 1.582, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.423611111111112e-05, | |
| "loss": 1.3211, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_accuracy": 0.5034722089767456, | |
| "eval_loss": 1.263780951499939, | |
| "eval_runtime": 90.5509, | |
| "eval_samples_per_second": 6.361, | |
| "eval_steps_per_second": 1.59, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.307870370370372e-05, | |
| "loss": 1.1933, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 0.4982638955116272, | |
| "eval_loss": 1.3839294910430908, | |
| "eval_runtime": 90.7629, | |
| "eval_samples_per_second": 6.346, | |
| "eval_steps_per_second": 1.587, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 6.192129629629629e-05, | |
| "loss": 1.2803, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_accuracy": 0.5121527910232544, | |
| "eval_loss": 1.2145192623138428, | |
| "eval_runtime": 90.0175, | |
| "eval_samples_per_second": 6.399, | |
| "eval_steps_per_second": 1.6, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 6.076388888888889e-05, | |
| "loss": 1.1112, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_accuracy": 0.5711805820465088, | |
| "eval_loss": 1.1930283308029175, | |
| "eval_runtime": 89.5796, | |
| "eval_samples_per_second": 6.43, | |
| "eval_steps_per_second": 1.608, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.960648148148148e-05, | |
| "loss": 1.0907, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_accuracy": 0.5815972089767456, | |
| "eval_loss": 1.145843505859375, | |
| "eval_runtime": 90.3053, | |
| "eval_samples_per_second": 6.378, | |
| "eval_steps_per_second": 1.595, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.844907407407407e-05, | |
| "loss": 1.013, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 0.6180555820465088, | |
| "eval_loss": 1.0559839010238647, | |
| "eval_runtime": 89.8917, | |
| "eval_samples_per_second": 6.408, | |
| "eval_steps_per_second": 1.602, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 5.7291666666666666e-05, | |
| "loss": 0.9308, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.6163194179534912, | |
| "eval_loss": 1.0486806631088257, | |
| "eval_runtime": 90.7378, | |
| "eval_samples_per_second": 6.348, | |
| "eval_steps_per_second": 1.587, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 5.613425925925926e-05, | |
| "loss": 1.3225, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 0.6145833134651184, | |
| "eval_loss": 1.050321340560913, | |
| "eval_runtime": 89.4251, | |
| "eval_samples_per_second": 6.441, | |
| "eval_steps_per_second": 1.61, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 5.497685185185185e-05, | |
| "loss": 1.0774, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_accuracy": 0.5434027910232544, | |
| "eval_loss": 1.25161612033844, | |
| "eval_runtime": 89.4532, | |
| "eval_samples_per_second": 6.439, | |
| "eval_steps_per_second": 1.61, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 5.3819444444444444e-05, | |
| "loss": 1.2251, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_accuracy": 0.609375, | |
| "eval_loss": 1.076072335243225, | |
| "eval_runtime": 90.2082, | |
| "eval_samples_per_second": 6.385, | |
| "eval_steps_per_second": 1.596, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.266203703703704e-05, | |
| "loss": 0.9848, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 0.6440972089767456, | |
| "eval_loss": 1.0271832942962646, | |
| "eval_runtime": 89.9601, | |
| "eval_samples_per_second": 6.403, | |
| "eval_steps_per_second": 1.601, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.150462962962963e-05, | |
| "loss": 0.9913, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.6041666865348816, | |
| "eval_loss": 1.0503506660461426, | |
| "eval_runtime": 89.8786, | |
| "eval_samples_per_second": 6.409, | |
| "eval_steps_per_second": 1.602, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.034722222222222e-05, | |
| "loss": 0.9081, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_accuracy": 0.6666666865348816, | |
| "eval_loss": 0.9094821214675903, | |
| "eval_runtime": 91.6907, | |
| "eval_samples_per_second": 6.282, | |
| "eval_steps_per_second": 1.57, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.9189814814814815e-05, | |
| "loss": 0.8339, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 0.6631944179534912, | |
| "eval_loss": 0.9030921459197998, | |
| "eval_runtime": 90.547, | |
| "eval_samples_per_second": 6.361, | |
| "eval_steps_per_second": 1.59, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.803240740740741e-05, | |
| "loss": 0.8893, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 0.6423611044883728, | |
| "eval_loss": 0.9375382661819458, | |
| "eval_runtime": 90.2281, | |
| "eval_samples_per_second": 6.384, | |
| "eval_steps_per_second": 1.596, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.9362, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.6197916865348816, | |
| "eval_loss": 0.9755175113677979, | |
| "eval_runtime": 91.0163, | |
| "eval_samples_per_second": 6.329, | |
| "eval_steps_per_second": 1.582, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.5717592592592594e-05, | |
| "loss": 0.835, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 0.6545138955116272, | |
| "eval_loss": 0.9399816989898682, | |
| "eval_runtime": 89.8555, | |
| "eval_samples_per_second": 6.41, | |
| "eval_steps_per_second": 1.603, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.456018518518519e-05, | |
| "loss": 0.6733, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_accuracy": 0.6927083134651184, | |
| "eval_loss": 0.8480438590049744, | |
| "eval_runtime": 89.8841, | |
| "eval_samples_per_second": 6.408, | |
| "eval_steps_per_second": 1.602, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.340277777777778e-05, | |
| "loss": 1.0115, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_accuracy": 0.6840277910232544, | |
| "eval_loss": 0.8332173824310303, | |
| "eval_runtime": 91.0011, | |
| "eval_samples_per_second": 6.33, | |
| "eval_steps_per_second": 1.582, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 4.224537037037037e-05, | |
| "loss": 0.7473, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.6475694179534912, | |
| "eval_loss": 0.9618370532989502, | |
| "eval_runtime": 90.2515, | |
| "eval_samples_per_second": 6.382, | |
| "eval_steps_per_second": 1.596, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.1087962962962965e-05, | |
| "loss": 0.8355, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.6840277910232544, | |
| "eval_loss": 0.8845413327217102, | |
| "eval_runtime": 89.7252, | |
| "eval_samples_per_second": 6.42, | |
| "eval_steps_per_second": 1.605, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.993055555555556e-05, | |
| "loss": 0.8487, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 0.8297374844551086, | |
| "eval_runtime": 91.9685, | |
| "eval_samples_per_second": 6.263, | |
| "eval_steps_per_second": 1.566, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.877314814814815e-05, | |
| "loss": 0.6038, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_accuracy": 0.6493055820465088, | |
| "eval_loss": 0.9539130330085754, | |
| "eval_runtime": 90.0856, | |
| "eval_samples_per_second": 6.394, | |
| "eval_steps_per_second": 1.598, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.7615740740740744e-05, | |
| "loss": 0.75, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_accuracy": 0.6857638955116272, | |
| "eval_loss": 0.8455307483673096, | |
| "eval_runtime": 89.5522, | |
| "eval_samples_per_second": 6.432, | |
| "eval_steps_per_second": 1.608, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.6458333333333336e-05, | |
| "loss": 0.8561, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_accuracy": 0.7013888955116272, | |
| "eval_loss": 0.7813519239425659, | |
| "eval_runtime": 90.1129, | |
| "eval_samples_per_second": 6.392, | |
| "eval_steps_per_second": 1.598, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.530092592592593e-05, | |
| "loss": 0.7552, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.6822916865348816, | |
| "eval_loss": 0.8651251196861267, | |
| "eval_runtime": 89.9146, | |
| "eval_samples_per_second": 6.406, | |
| "eval_steps_per_second": 1.602, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.414351851851852e-05, | |
| "loss": 0.6972, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 0.7325252890586853, | |
| "eval_runtime": 90.7375, | |
| "eval_samples_per_second": 6.348, | |
| "eval_steps_per_second": 1.587, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.2986111111111115e-05, | |
| "loss": 0.7483, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.7690972089767456, | |
| "eval_loss": 0.6722133159637451, | |
| "eval_runtime": 90.3148, | |
| "eval_samples_per_second": 6.378, | |
| "eval_steps_per_second": 1.594, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.182870370370371e-05, | |
| "loss": 0.5419, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_accuracy": 0.7326388955116272, | |
| "eval_loss": 0.7046216130256653, | |
| "eval_runtime": 92.1898, | |
| "eval_samples_per_second": 6.248, | |
| "eval_steps_per_second": 1.562, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.06712962962963e-05, | |
| "loss": 0.5203, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_accuracy": 0.7326388955116272, | |
| "eval_loss": 0.7062063813209534, | |
| "eval_runtime": 90.2661, | |
| "eval_samples_per_second": 6.381, | |
| "eval_steps_per_second": 1.595, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 2.951388888888889e-05, | |
| "loss": 0.5235, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_accuracy": 0.7534722089767456, | |
| "eval_loss": 0.6795992255210876, | |
| "eval_runtime": 91.0148, | |
| "eval_samples_per_second": 6.329, | |
| "eval_steps_per_second": 1.582, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 2.8356481481481483e-05, | |
| "loss": 0.514, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 0.7204861044883728, | |
| "eval_loss": 0.746653139591217, | |
| "eval_runtime": 89.8044, | |
| "eval_samples_per_second": 6.414, | |
| "eval_steps_per_second": 1.603, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.7199074074074076e-05, | |
| "loss": 0.5402, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.7447916865348816, | |
| "eval_loss": 0.716274619102478, | |
| "eval_runtime": 90.8833, | |
| "eval_samples_per_second": 6.338, | |
| "eval_steps_per_second": 1.584, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.604166666666667e-05, | |
| "loss": 0.7235, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_accuracy": 0.7222222089767456, | |
| "eval_loss": 0.7545790076255798, | |
| "eval_runtime": 89.6633, | |
| "eval_samples_per_second": 6.424, | |
| "eval_steps_per_second": 1.606, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.488425925925926e-05, | |
| "loss": 0.551, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_accuracy": 0.7534722089767456, | |
| "eval_loss": 0.6994116902351379, | |
| "eval_runtime": 89.8085, | |
| "eval_samples_per_second": 6.414, | |
| "eval_steps_per_second": 1.603, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.3726851851851854e-05, | |
| "loss": 0.5769, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_accuracy": 0.7534722089767456, | |
| "eval_loss": 0.7151244282722473, | |
| "eval_runtime": 90.9508, | |
| "eval_samples_per_second": 6.333, | |
| "eval_steps_per_second": 1.583, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.2569444444444447e-05, | |
| "loss": 0.5501, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 0.7604166865348816, | |
| "eval_loss": 0.695513129234314, | |
| "eval_runtime": 90.6092, | |
| "eval_samples_per_second": 6.357, | |
| "eval_steps_per_second": 1.589, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.141203703703704e-05, | |
| "loss": 0.5416, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.7725694179534912, | |
| "eval_loss": 0.6533116102218628, | |
| "eval_runtime": 89.82, | |
| "eval_samples_per_second": 6.413, | |
| "eval_steps_per_second": 1.603, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.0254629629629632e-05, | |
| "loss": 0.5452, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 0.7777777910232544, | |
| "eval_loss": 0.6232606172561646, | |
| "eval_runtime": 90.4116, | |
| "eval_samples_per_second": 6.371, | |
| "eval_steps_per_second": 1.593, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.9097222222222222e-05, | |
| "loss": 0.8518, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 0.7777777910232544, | |
| "eval_loss": 0.6136298179626465, | |
| "eval_runtime": 90.8648, | |
| "eval_samples_per_second": 6.339, | |
| "eval_steps_per_second": 1.585, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.7939814814814815e-05, | |
| "loss": 0.3372, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_accuracy": 0.7986111044883728, | |
| "eval_loss": 0.5700623393058777, | |
| "eval_runtime": 90.1438, | |
| "eval_samples_per_second": 6.39, | |
| "eval_steps_per_second": 1.597, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.6782407407407408e-05, | |
| "loss": 0.4488, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_accuracy": 0.7847222089767456, | |
| "eval_loss": 0.5789040327072144, | |
| "eval_runtime": 90.7633, | |
| "eval_samples_per_second": 6.346, | |
| "eval_steps_per_second": 1.587, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.3977, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.7829861044883728, | |
| "eval_loss": 0.5748720169067383, | |
| "eval_runtime": 89.6735, | |
| "eval_samples_per_second": 6.423, | |
| "eval_steps_per_second": 1.606, | |
| "step": 730 | |
| } | |
| ], | |
| "max_steps": 864, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.1573660231214095e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |