| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.999129299085764, | |
| "eval_steps": 100, | |
| "global_step": 1148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 8.541543291462938, | |
| "learning_rate": 4.347826086956522e-09, | |
| "logits/chosen": -2.3435652256011963, | |
| "logits/rejected": -2.551011562347412, | |
| "logps/chosen": -314.6017150878906, | |
| "logps/rejected": -206.73678588867188, | |
| "loss": 0.7439, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.2740848958492279, | |
| "rewards/margins": -0.1701626181602478, | |
| "rewards/rejected": -0.1039222925901413, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.82551006058537, | |
| "learning_rate": 4.347826086956521e-08, | |
| "logits/chosen": -2.4006946086883545, | |
| "logits/rejected": -2.4231932163238525, | |
| "logps/chosen": -289.26739501953125, | |
| "logps/rejected": -272.8511962890625, | |
| "loss": 0.7793, | |
| "rewards/accuracies": 0.3888888955116272, | |
| "rewards/chosen": -0.30362409353256226, | |
| "rewards/margins": -0.10056456923484802, | |
| "rewards/rejected": -0.20305952429771423, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 9.957017780765279, | |
| "learning_rate": 8.695652173913042e-08, | |
| "logits/chosen": -2.366244077682495, | |
| "logits/rejected": -2.436676263809204, | |
| "logps/chosen": -304.1680908203125, | |
| "logps/rejected": -283.8580017089844, | |
| "loss": 0.7817, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.31308335065841675, | |
| "rewards/margins": -0.0013480648631229997, | |
| "rewards/rejected": -0.3117353320121765, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 9.441280424617158, | |
| "learning_rate": 1.3043478260869563e-07, | |
| "logits/chosen": -2.4001221656799316, | |
| "logits/rejected": -2.398263454437256, | |
| "logps/chosen": -299.45147705078125, | |
| "logps/rejected": -312.4380187988281, | |
| "loss": 0.808, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.40518712997436523, | |
| "rewards/margins": -0.170148104429245, | |
| "rewards/rejected": -0.23503902554512024, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 9.0042393863242, | |
| "learning_rate": 1.7391304347826085e-07, | |
| "logits/chosen": -2.309999704360962, | |
| "logits/rejected": -2.3476955890655518, | |
| "logps/chosen": -297.22235107421875, | |
| "logps/rejected": -302.00482177734375, | |
| "loss": 0.7707, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.2780631482601166, | |
| "rewards/margins": 0.08002600818872452, | |
| "rewards/rejected": -0.3580891489982605, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 9.03964644769571, | |
| "learning_rate": 2.1739130434782607e-07, | |
| "logits/chosen": -2.3683385848999023, | |
| "logits/rejected": -2.3768067359924316, | |
| "logps/chosen": -288.1864013671875, | |
| "logps/rejected": -275.24493408203125, | |
| "loss": 0.7913, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.24404355883598328, | |
| "rewards/margins": -0.0613979808986187, | |
| "rewards/rejected": -0.18264558911323547, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 9.171931303155361, | |
| "learning_rate": 2.6086956521739126e-07, | |
| "logits/chosen": -2.3965792655944824, | |
| "logits/rejected": -2.4396941661834717, | |
| "logps/chosen": -313.60186767578125, | |
| "logps/rejected": -277.5401611328125, | |
| "loss": 0.7896, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.3664000332355499, | |
| "rewards/margins": -0.11313207447528839, | |
| "rewards/rejected": -0.25326794385910034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 9.303112541467058, | |
| "learning_rate": 3.043478260869565e-07, | |
| "logits/chosen": -2.3145995140075684, | |
| "logits/rejected": -2.3265597820281982, | |
| "logps/chosen": -288.7591247558594, | |
| "logps/rejected": -282.3059997558594, | |
| "loss": 0.7541, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.3928884267807007, | |
| "rewards/margins": -0.10098680108785629, | |
| "rewards/rejected": -0.2919016480445862, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 8.148953826123538, | |
| "learning_rate": 3.478260869565217e-07, | |
| "logits/chosen": -2.4307453632354736, | |
| "logits/rejected": -2.408132553100586, | |
| "logps/chosen": -264.6097106933594, | |
| "logps/rejected": -279.7698669433594, | |
| "loss": 0.7471, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.32603269815444946, | |
| "rewards/margins": 0.050616730004549026, | |
| "rewards/rejected": -0.3766494393348694, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 8.978142275251097, | |
| "learning_rate": 3.9130434782608694e-07, | |
| "logits/chosen": -2.3797852993011475, | |
| "logits/rejected": -2.370492935180664, | |
| "logps/chosen": -272.8327331542969, | |
| "logps/rejected": -269.51495361328125, | |
| "loss": 0.7172, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.3148571252822876, | |
| "rewards/margins": 0.12950611114501953, | |
| "rewards/rejected": -0.44436320662498474, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 8.101534684245069, | |
| "learning_rate": 4.3478260869565214e-07, | |
| "logits/chosen": -2.3395884037017822, | |
| "logits/rejected": -2.3768749237060547, | |
| "logps/chosen": -302.3770751953125, | |
| "logps/rejected": -289.23370361328125, | |
| "loss": 0.717, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.4626343846321106, | |
| "rewards/margins": 0.1476312279701233, | |
| "rewards/rejected": -0.6102656126022339, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_logits/chosen": -2.4191040992736816, | |
| "eval_logits/rejected": -2.5313072204589844, | |
| "eval_logps/chosen": -299.11651611328125, | |
| "eval_logps/rejected": -272.5022277832031, | |
| "eval_loss": 0.6005984544754028, | |
| "eval_rewards/accuracies": 0.6329113841056824, | |
| "eval_rewards/chosen": -0.09238887578248978, | |
| "eval_rewards/margins": 0.19752810895442963, | |
| "eval_rewards/rejected": -0.2899169921875, | |
| "eval_runtime": 116.6781, | |
| "eval_samples_per_second": 21.426, | |
| "eval_steps_per_second": 0.677, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 9.267934110515439, | |
| "learning_rate": 4.782608695652174e-07, | |
| "logits/chosen": -2.402156114578247, | |
| "logits/rejected": -2.372450351715088, | |
| "logps/chosen": -310.5009460449219, | |
| "logps/rejected": -306.57513427734375, | |
| "loss": 0.7206, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.4795568585395813, | |
| "rewards/margins": 0.2181818038225174, | |
| "rewards/rejected": -0.697738528251648, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 7.237175384641583, | |
| "learning_rate": 4.99971097144235e-07, | |
| "logits/chosen": -2.3216421604156494, | |
| "logits/rejected": -2.3850231170654297, | |
| "logps/chosen": -299.10943603515625, | |
| "logps/rejected": -275.91632080078125, | |
| "loss": 0.6797, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4861680567264557, | |
| "rewards/margins": 0.2638343870639801, | |
| "rewards/rejected": -0.7500024437904358, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 8.125054397906975, | |
| "learning_rate": 4.997399143945734e-07, | |
| "logits/chosen": -2.319718599319458, | |
| "logits/rejected": -2.338841438293457, | |
| "logps/chosen": -293.934326171875, | |
| "logps/rejected": -285.8225402832031, | |
| "loss": 0.6779, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.5947624444961548, | |
| "rewards/margins": 0.22826068103313446, | |
| "rewards/rejected": -0.8230231404304504, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 8.82315581796221, | |
| "learning_rate": 4.992777627018238e-07, | |
| "logits/chosen": -2.2996668815612793, | |
| "logits/rejected": -2.334745168685913, | |
| "logps/chosen": -287.4745788574219, | |
| "logps/rejected": -276.83599853515625, | |
| "loss": 0.6759, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.6079668402671814, | |
| "rewards/margins": 0.3260408937931061, | |
| "rewards/rejected": -0.9340078234672546, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.961709787250932, | |
| "learning_rate": 4.985850694813964e-07, | |
| "logits/chosen": -2.302722454071045, | |
| "logits/rejected": -2.3677978515625, | |
| "logps/chosen": -314.6284484863281, | |
| "logps/rejected": -298.97955322265625, | |
| "loss": 0.658, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.638712465763092, | |
| "rewards/margins": 0.39987772703170776, | |
| "rewards/rejected": -1.0385901927947998, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 8.327864420359786, | |
| "learning_rate": 4.976624753622489e-07, | |
| "logits/chosen": -2.3713505268096924, | |
| "logits/rejected": -2.3584446907043457, | |
| "logps/chosen": -329.738037109375, | |
| "logps/rejected": -329.0250244140625, | |
| "loss": 0.6513, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.5834181308746338, | |
| "rewards/margins": 0.6477876901626587, | |
| "rewards/rejected": -1.231205940246582, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 8.149557814002582, | |
| "learning_rate": 4.965108335944079e-07, | |
| "logits/chosen": -2.3453166484832764, | |
| "logits/rejected": -2.3316988945007324, | |
| "logps/chosen": -295.4629821777344, | |
| "logps/rejected": -303.3232116699219, | |
| "loss": 0.6467, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.6690499782562256, | |
| "rewards/margins": 0.5605732798576355, | |
| "rewards/rejected": -1.2296231985092163, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 10.113037082589418, | |
| "learning_rate": 4.951312092598509e-07, | |
| "logits/chosen": -2.3545076847076416, | |
| "logits/rejected": -2.3892111778259277, | |
| "logps/chosen": -293.5394592285156, | |
| "logps/rejected": -310.970947265625, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.7175970077514648, | |
| "rewards/margins": 0.3378085494041443, | |
| "rewards/rejected": -1.0554054975509644, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 7.2425787929751975, | |
| "learning_rate": 4.935248782874788e-07, | |
| "logits/chosen": -2.3394787311553955, | |
| "logits/rejected": -2.4027209281921387, | |
| "logps/chosen": -322.01141357421875, | |
| "logps/rejected": -312.63641357421875, | |
| "loss": 0.6074, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.685249388217926, | |
| "rewards/margins": 0.5340052843093872, | |
| "rewards/rejected": -1.219254732131958, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 8.490660520819826, | |
| "learning_rate": 4.916933262730875e-07, | |
| "logits/chosen": -2.3357110023498535, | |
| "logits/rejected": -2.3726134300231934, | |
| "logps/chosen": -283.4224548339844, | |
| "logps/rejected": -292.6156311035156, | |
| "loss": 0.6273, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7353588342666626, | |
| "rewards/margins": 0.36572274565696716, | |
| "rewards/rejected": -1.1010816097259521, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_logits/chosen": -2.427767753601074, | |
| "eval_logits/rejected": -2.5280518531799316, | |
| "eval_logps/chosen": -305.2568054199219, | |
| "eval_logps/rejected": -285.6261291503906, | |
| "eval_loss": 0.5159560441970825, | |
| "eval_rewards/accuracies": 0.6930379867553711, | |
| "eval_rewards/chosen": -0.39940303564071655, | |
| "eval_rewards/margins": 0.5467095971107483, | |
| "eval_rewards/rejected": -0.9461126327514648, | |
| "eval_runtime": 117.3962, | |
| "eval_samples_per_second": 21.295, | |
| "eval_steps_per_second": 0.673, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 8.576550556267508, | |
| "learning_rate": 4.896382471054332e-07, | |
| "logits/chosen": -2.3324050903320312, | |
| "logits/rejected": -2.3912084102630615, | |
| "logps/chosen": -288.8896789550781, | |
| "logps/rejected": -268.7303161621094, | |
| "loss": 0.6128, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.6878677606582642, | |
| "rewards/margins": 0.49019718170166016, | |
| "rewards/rejected": -1.1780649423599243, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 7.314590858955313, | |
| "learning_rate": 4.873615413996599e-07, | |
| "logits/chosen": -2.3278536796569824, | |
| "logits/rejected": -2.3542141914367676, | |
| "logps/chosen": -320.20867919921875, | |
| "logps/rejected": -344.2962646484375, | |
| "loss": 0.6034, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.8140638470649719, | |
| "rewards/margins": 0.6731799244880676, | |
| "rewards/rejected": -1.487243890762329, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 7.89990023398186, | |
| "learning_rate": 4.848653147395379e-07, | |
| "logits/chosen": -2.3761696815490723, | |
| "logits/rejected": -2.3647568225860596, | |
| "logps/chosen": -278.48651123046875, | |
| "logps/rejected": -313.1923522949219, | |
| "loss": 0.5758, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.8184002637863159, | |
| "rewards/margins": 0.5446540117263794, | |
| "rewards/rejected": -1.3630542755126953, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 6.865979693460653, | |
| "learning_rate": 4.821518757301406e-07, | |
| "logits/chosen": -2.3292605876922607, | |
| "logits/rejected": -2.3688724040985107, | |
| "logps/chosen": -266.1036071777344, | |
| "logps/rejected": -297.202392578125, | |
| "loss": 0.5331, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.7772084474563599, | |
| "rewards/margins": 0.8323043584823608, | |
| "rewards/rejected": -1.6095129251480103, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 7.1462197132774925, | |
| "learning_rate": 4.792237338627589e-07, | |
| "logits/chosen": -2.360563278198242, | |
| "logits/rejected": -2.3513259887695312, | |
| "logps/chosen": -300.3686828613281, | |
| "logps/rejected": -312.9150390625, | |
| "loss": 0.5817, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.8266876339912415, | |
| "rewards/margins": 0.8314968347549438, | |
| "rewards/rejected": -1.6581846475601196, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 7.9450903494413865, | |
| "learning_rate": 4.76083597194028e-07, | |
| "logits/chosen": -2.3549387454986572, | |
| "logits/rejected": -2.3544278144836426, | |
| "logps/chosen": -274.65362548828125, | |
| "logps/rejected": -294.8714294433594, | |
| "loss": 0.5582, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.8873316645622253, | |
| "rewards/margins": 0.63465416431427, | |
| "rewards/rejected": -1.5219857692718506, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 8.86333832366373, | |
| "learning_rate": 4.72734369841415e-07, | |
| "logits/chosen": -2.268969774246216, | |
| "logits/rejected": -2.290459632873535, | |
| "logps/chosen": -326.5730285644531, | |
| "logps/rejected": -340.42547607421875, | |
| "loss": 0.5535, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.9808245897293091, | |
| "rewards/margins": 0.7018911838531494, | |
| "rewards/rejected": -1.6827157735824585, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 9.058341702180591, | |
| "learning_rate": 4.691791492973796e-07, | |
| "logits/chosen": -2.2908778190612793, | |
| "logits/rejected": -2.3170814514160156, | |
| "logps/chosen": -285.4623107910156, | |
| "logps/rejected": -287.90460205078125, | |
| "loss": 0.568, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9195302128791809, | |
| "rewards/margins": 0.8237813115119934, | |
| "rewards/rejected": -1.7433115243911743, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 8.549379026648278, | |
| "learning_rate": 4.6542122356469673e-07, | |
| "logits/chosen": -2.3191022872924805, | |
| "logits/rejected": -2.2784862518310547, | |
| "logps/chosen": -290.696533203125, | |
| "logps/rejected": -345.26922607421875, | |
| "loss": 0.566, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.867855429649353, | |
| "rewards/margins": 1.0541679859161377, | |
| "rewards/rejected": -1.9220234155654907, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 8.117986354757171, | |
| "learning_rate": 4.6146406811558627e-07, | |
| "logits/chosen": -2.2675626277923584, | |
| "logits/rejected": -2.2998242378234863, | |
| "logps/chosen": -300.87139892578125, | |
| "logps/rejected": -308.83258056640625, | |
| "loss": 0.5538, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.039670467376709, | |
| "rewards/margins": 0.7833054065704346, | |
| "rewards/rejected": -1.8229758739471436, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_logits/chosen": -2.410975217819214, | |
| "eval_logits/rejected": -2.4996373653411865, | |
| "eval_logps/chosen": -310.4469909667969, | |
| "eval_logps/rejected": -298.4869689941406, | |
| "eval_loss": 0.47806617617607117, | |
| "eval_rewards/accuracies": 0.7246835231781006, | |
| "eval_rewards/chosen": -0.6589112877845764, | |
| "eval_rewards/margins": 0.9302425980567932, | |
| "eval_rewards/rejected": -1.58915376663208, | |
| "eval_runtime": 116.9313, | |
| "eval_samples_per_second": 21.38, | |
| "eval_steps_per_second": 0.676, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 7.789987130313136, | |
| "learning_rate": 4.5731134267746464e-07, | |
| "logits/chosen": -2.3223114013671875, | |
| "logits/rejected": -2.351526975631714, | |
| "logps/chosen": -294.28973388671875, | |
| "logps/rejected": -294.4921569824219, | |
| "loss": 0.5404, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9947312474250793, | |
| "rewards/margins": 0.8857283592224121, | |
| "rewards/rejected": -1.8804595470428467, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 6.695877210721463, | |
| "learning_rate": 4.529668878482905e-07, | |
| "logits/chosen": -2.278529644012451, | |
| "logits/rejected": -2.322715997695923, | |
| "logps/chosen": -302.17156982421875, | |
| "logps/rejected": -311.0806884765625, | |
| "loss": 0.5469, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9337432980537415, | |
| "rewards/margins": 0.9280776977539062, | |
| "rewards/rejected": -1.861820936203003, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 7.390355171835368, | |
| "learning_rate": 4.484347215446336e-07, | |
| "logits/chosen": -2.2755682468414307, | |
| "logits/rejected": -2.3354978561401367, | |
| "logps/chosen": -309.1221008300781, | |
| "logps/rejected": -321.5992431640625, | |
| "loss": 0.5314, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.9960958361625671, | |
| "rewards/margins": 0.8858348727226257, | |
| "rewards/rejected": -1.8819307088851929, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 7.988169852743586, | |
| "learning_rate": 4.4371903528575345e-07, | |
| "logits/chosen": -2.279055118560791, | |
| "logits/rejected": -2.293339490890503, | |
| "logps/chosen": -313.377685546875, | |
| "logps/rejected": -320.6536560058594, | |
| "loss": 0.5217, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1063129901885986, | |
| "rewards/margins": 0.950600802898407, | |
| "rewards/rejected": -2.0569138526916504, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 8.25715266845146, | |
| "learning_rate": 4.3882419031712327e-07, | |
| "logits/chosen": -2.3251774311065674, | |
| "logits/rejected": -2.3021457195281982, | |
| "logps/chosen": -284.44891357421875, | |
| "logps/rejected": -304.513671875, | |
| "loss": 0.5226, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.952631950378418, | |
| "rewards/margins": 1.1419737339019775, | |
| "rewards/rejected": -2.0946059226989746, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 7.634851788477121, | |
| "learning_rate": 4.33754713576985e-07, | |
| "logits/chosen": -2.2681329250335693, | |
| "logits/rejected": -2.3093745708465576, | |
| "logps/chosen": -264.74871826171875, | |
| "logps/rejected": -277.2441711425781, | |
| "loss": 0.554, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.1214876174926758, | |
| "rewards/margins": 0.9260104298591614, | |
| "rewards/rejected": -2.0474982261657715, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 7.279488194032858, | |
| "learning_rate": 4.2851529350966486e-07, | |
| "logits/chosen": -2.2603583335876465, | |
| "logits/rejected": -2.266740560531616, | |
| "logps/chosen": -283.461669921875, | |
| "logps/rejected": -318.08355712890625, | |
| "loss": 0.5216, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.002974271774292, | |
| "rewards/margins": 1.0799973011016846, | |
| "rewards/rejected": -2.0829715728759766, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 7.8421017282099585, | |
| "learning_rate": 4.231107757295223e-07, | |
| "logits/chosen": -2.2358787059783936, | |
| "logits/rejected": -2.279066801071167, | |
| "logps/chosen": -297.420654296875, | |
| "logps/rejected": -306.3590393066406, | |
| "loss": 0.5241, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.1632144451141357, | |
| "rewards/margins": 1.0475361347198486, | |
| "rewards/rejected": -2.2107505798339844, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 7.394520753535137, | |
| "learning_rate": 4.175461585395423e-07, | |
| "logits/chosen": -2.217916965484619, | |
| "logits/rejected": -2.3138163089752197, | |
| "logps/chosen": -317.17303466796875, | |
| "logps/rejected": -288.50323486328125, | |
| "loss": 0.5381, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.9419029951095581, | |
| "rewards/margins": 1.1881153583526611, | |
| "rewards/rejected": -2.130018472671509, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 8.212800923068182, | |
| "learning_rate": 4.118265883087153e-07, | |
| "logits/chosen": -2.2654032707214355, | |
| "logits/rejected": -2.2785887718200684, | |
| "logps/chosen": -285.03448486328125, | |
| "logps/rejected": -328.69390869140625, | |
| "loss": 0.5056, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.0962527990341187, | |
| "rewards/margins": 1.4823755025863647, | |
| "rewards/rejected": -2.5786283016204834, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_logits/chosen": -2.364351511001587, | |
| "eval_logits/rejected": -2.447211265563965, | |
| "eval_logps/chosen": -313.8343505859375, | |
| "eval_logps/rejected": -309.3686828613281, | |
| "eval_loss": 0.45939481258392334, | |
| "eval_rewards/accuracies": 0.7436708807945251, | |
| "eval_rewards/chosen": -0.8282797932624817, | |
| "eval_rewards/margins": 1.3049596548080444, | |
| "eval_rewards/rejected": -2.133239507675171, | |
| "eval_runtime": 116.733, | |
| "eval_samples_per_second": 21.416, | |
| "eval_steps_per_second": 0.677, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 8.827849836435306, | |
| "learning_rate": 4.059573547124793e-07, | |
| "logits/chosen": -2.2229080200195312, | |
| "logits/rejected": -2.2402656078338623, | |
| "logps/chosen": -290.5343933105469, | |
| "logps/rejected": -333.4751892089844, | |
| "loss": 0.5073, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.0698788166046143, | |
| "rewards/margins": 1.2725006341934204, | |
| "rewards/rejected": -2.342379331588745, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 17.168544254244743, | |
| "learning_rate": 3.9994388584062835e-07, | |
| "logits/chosen": -2.2369210720062256, | |
| "logits/rejected": -2.2701995372772217, | |
| "logps/chosen": -310.66650390625, | |
| "logps/rejected": -343.23736572265625, | |
| "loss": 0.4973, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.10798180103302, | |
| "rewards/margins": 1.2380707263946533, | |
| "rewards/rejected": -2.346052646636963, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 7.900686275091842, | |
| "learning_rate": 3.9379174317720867e-07, | |
| "logits/chosen": -2.2615458965301514, | |
| "logits/rejected": -2.2584452629089355, | |
| "logps/chosen": -253.0293731689453, | |
| "logps/rejected": -310.45562744140625, | |
| "loss": 0.5226, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.0362132787704468, | |
| "rewards/margins": 1.1289708614349365, | |
| "rewards/rejected": -2.1651840209960938, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 7.866159803847311, | |
| "learning_rate": 3.875066164570476e-07, | |
| "logits/chosen": -2.2734320163726807, | |
| "logits/rejected": -2.2685108184814453, | |
| "logps/chosen": -310.897705078125, | |
| "logps/rejected": -311.92559814453125, | |
| "loss": 0.4877, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.9617986679077148, | |
| "rewards/margins": 1.4187655448913574, | |
| "rewards/rejected": -2.3805642127990723, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 8.068518521527759, | |
| "learning_rate": 3.8109431840367094e-07, | |
| "logits/chosen": -2.2477684020996094, | |
| "logits/rejected": -2.2789039611816406, | |
| "logps/chosen": -315.4168395996094, | |
| "logps/rejected": -322.646728515625, | |
| "loss": 0.5117, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0208594799041748, | |
| "rewards/margins": 1.3095576763153076, | |
| "rewards/rejected": -2.3304171562194824, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 8.68774893484682, | |
| "learning_rate": 3.7456077935347586e-07, | |
| "logits/chosen": -2.279040813446045, | |
| "logits/rejected": -2.3296942710876465, | |
| "logps/chosen": -268.678955078125, | |
| "logps/rejected": -305.7779235839844, | |
| "loss": 0.5259, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.930595874786377, | |
| "rewards/margins": 1.5479636192321777, | |
| "rewards/rejected": -2.4785592555999756, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 7.377326318454655, | |
| "learning_rate": 3.6791204177113076e-07, | |
| "logits/chosen": -2.2461414337158203, | |
| "logits/rejected": -2.261500358581543, | |
| "logps/chosen": -288.28131103515625, | |
| "logps/rejected": -323.84259033203125, | |
| "loss": 0.5055, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9557926058769226, | |
| "rewards/margins": 1.3024319410324097, | |
| "rewards/rejected": -2.2582249641418457, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 9.031552997233952, | |
| "learning_rate": 3.6115425466127523e-07, | |
| "logits/chosen": -2.211353302001953, | |
| "logits/rejected": -2.285994529724121, | |
| "logps/chosen": -307.39508056640625, | |
| "logps/rejected": -309.01617431640625, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.0073463916778564, | |
| "rewards/margins": 1.2757568359375, | |
| "rewards/rejected": -2.2831032276153564, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 8.363906043420153, | |
| "learning_rate": 3.5429366788168686e-07, | |
| "logits/chosen": -2.2397665977478027, | |
| "logits/rejected": -2.2399213314056396, | |
| "logps/chosen": -320.8814697265625, | |
| "logps/rejected": -316.41204833984375, | |
| "loss": 0.5137, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.077854871749878, | |
| "rewards/margins": 1.0860309600830078, | |
| "rewards/rejected": -2.1638855934143066, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 8.777454709865056, | |
| "learning_rate": 3.4733662636317615e-07, | |
| "logits/chosen": -2.26035213470459, | |
| "logits/rejected": -2.282977342605591, | |
| "logps/chosen": -323.86785888671875, | |
| "logps/rejected": -323.78558349609375, | |
| "loss": 0.4983, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.0498888492584229, | |
| "rewards/margins": 1.2481728792190552, | |
| "rewards/rejected": -2.2980618476867676, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_logits/chosen": -2.3404455184936523, | |
| "eval_logits/rejected": -2.422346830368042, | |
| "eval_logps/chosen": -312.7843322753906, | |
| "eval_logps/rejected": -312.31671142578125, | |
| "eval_loss": 0.4511754512786865, | |
| "eval_rewards/accuracies": 0.746835470199585, | |
| "eval_rewards/chosen": -0.7757795453071594, | |
| "eval_rewards/margins": 1.5048617124557495, | |
| "eval_rewards/rejected": -2.2806413173675537, | |
| "eval_runtime": 116.6502, | |
| "eval_samples_per_second": 21.432, | |
| "eval_steps_per_second": 0.677, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 7.734353981694666, | |
| "learning_rate": 3.4028956424155383e-07, | |
| "logits/chosen": -2.2213361263275146, | |
| "logits/rejected": -2.2351489067077637, | |
| "logps/chosen": -306.02081298828125, | |
| "logps/rejected": -336.88385009765625, | |
| "loss": 0.489, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0291322469711304, | |
| "rewards/margins": 1.3191864490509033, | |
| "rewards/rejected": -2.3483192920684814, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 8.664121177886866, | |
| "learning_rate": 3.3315899890709766e-07, | |
| "logits/chosen": -2.2391321659088135, | |
| "logits/rejected": -2.2733283042907715, | |
| "logps/chosen": -320.98895263671875, | |
| "logps/rejected": -328.81097412109375, | |
| "loss": 0.489, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.1005600690841675, | |
| "rewards/margins": 1.4874314069747925, | |
| "rewards/rejected": -2.58799147605896, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 8.147807181233981, | |
| "learning_rate": 3.259515249770236e-07, | |
| "logits/chosen": -2.1741220951080322, | |
| "logits/rejected": -2.2048075199127197, | |
| "logps/chosen": -299.4497375488281, | |
| "logps/rejected": -341.54168701171875, | |
| "loss": 0.5198, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.1551388502120972, | |
| "rewards/margins": 1.6270313262939453, | |
| "rewards/rejected": -2.782170057296753, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 7.750929958584628, | |
| "learning_rate": 3.186738081965329e-07, | |
| "logits/chosen": -2.202561616897583, | |
| "logits/rejected": -2.2528486251831055, | |
| "logps/chosen": -294.01123046875, | |
| "logps/rejected": -289.1777648925781, | |
| "loss": 0.5261, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.140953779220581, | |
| "rewards/margins": 1.2280247211456299, | |
| "rewards/rejected": -2.368978500366211, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 11.076352034341378, | |
| "learning_rate": 3.1133257927407875e-07, | |
| "logits/chosen": -2.1735188961029053, | |
| "logits/rejected": -2.2224655151367188, | |
| "logps/chosen": -305.57177734375, | |
| "logps/rejected": -329.1368713378906, | |
| "loss": 0.479, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.2371329069137573, | |
| "rewards/margins": 1.3205959796905518, | |
| "rewards/rejected": -2.5577290058135986, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 8.548263020458736, | |
| "learning_rate": 3.0393462765655133e-07, | |
| "logits/chosen": -2.256953001022339, | |
| "logits/rejected": -2.2490811347961426, | |
| "logps/chosen": -324.26397705078125, | |
| "logps/rejected": -332.2065124511719, | |
| "loss": 0.5242, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.207909345626831, | |
| "rewards/margins": 1.1256216764450073, | |
| "rewards/rejected": -2.333531141281128, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 10.129228469440344, | |
| "learning_rate": 2.9648679525014047e-07, | |
| "logits/chosen": -2.249263286590576, | |
| "logits/rejected": -2.224191188812256, | |
| "logps/chosen": -301.6817626953125, | |
| "logps/rejected": -317.6774597167969, | |
| "loss": 0.5166, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.9407655000686646, | |
| "rewards/margins": 1.2918128967285156, | |
| "rewards/rejected": -2.2325782775878906, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 7.601401935918346, | |
| "learning_rate": 2.88995970092681e-07, | |
| "logits/chosen": -2.228137254714966, | |
| "logits/rejected": -2.2642438411712646, | |
| "logps/chosen": -346.5960388183594, | |
| "logps/rejected": -334.5989074707031, | |
| "loss": 0.4668, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -1.0293288230895996, | |
| "rewards/margins": 1.4567983150482178, | |
| "rewards/rejected": -2.4861273765563965, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 7.059735053355466, | |
| "learning_rate": 2.8146907998333414e-07, | |
| "logits/chosen": -2.2480270862579346, | |
| "logits/rejected": -2.2125768661499023, | |
| "logps/chosen": -292.4286804199219, | |
| "logps/rejected": -345.6949157714844, | |
| "loss": 0.4438, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.9727999567985535, | |
| "rewards/margins": 1.7654082775115967, | |
| "rewards/rejected": -2.738208293914795, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 7.162224042755479, | |
| "learning_rate": 2.7391308607549617e-07, | |
| "logits/chosen": -2.2136716842651367, | |
| "logits/rejected": -2.257483720779419, | |
| "logps/chosen": -302.130126953125, | |
| "logps/rejected": -333.96771240234375, | |
| "loss": 0.4662, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.0814803838729858, | |
| "rewards/margins": 1.416599988937378, | |
| "rewards/rejected": -2.498080253601074, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_logits/chosen": -2.3215160369873047, | |
| "eval_logits/rejected": -2.4048969745635986, | |
| "eval_logps/chosen": -312.94647216796875, | |
| "eval_logps/rejected": -314.7354736328125, | |
| "eval_loss": 0.44313427805900574, | |
| "eval_rewards/accuracies": 0.7658227682113647, | |
| "eval_rewards/chosen": -0.78388512134552, | |
| "eval_rewards/margins": 1.6176937818527222, | |
| "eval_rewards/rejected": -2.401578903198242, | |
| "eval_runtime": 116.9664, | |
| "eval_samples_per_second": 21.374, | |
| "eval_steps_per_second": 0.675, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 5.7685048393945015, | |
| "learning_rate": 2.663349764388602e-07, | |
| "logits/chosen": -2.224734306335449, | |
| "logits/rejected": -2.222229480743408, | |
| "logps/chosen": -305.4608154296875, | |
| "logps/rejected": -343.47027587890625, | |
| "loss": 0.4284, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.9569365382194519, | |
| "rewards/margins": 1.9764817953109741, | |
| "rewards/rejected": -2.9334182739257812, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 7.994083679128179, | |
| "learning_rate": 2.587417595965833e-07, | |
| "logits/chosen": -2.2422053813934326, | |
| "logits/rejected": -2.2728614807128906, | |
| "logps/chosen": -274.6269226074219, | |
| "logps/rejected": -284.4000549316406, | |
| "loss": 0.4676, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9759398698806763, | |
| "rewards/margins": 1.389269232749939, | |
| "rewards/rejected": -2.3652091026306152, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 6.234612520244718, | |
| "learning_rate": 2.511404580435399e-07, | |
| "logits/chosen": -2.2112672328948975, | |
| "logits/rejected": -2.222592353820801, | |
| "logps/chosen": -300.41253662109375, | |
| "logps/rejected": -317.8959655761719, | |
| "loss": 0.4188, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.077165961265564, | |
| "rewards/margins": 1.776564598083496, | |
| "rewards/rejected": -2.8537306785583496, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 7.4969961974893184, | |
| "learning_rate": 2.435381017516511e-07, | |
| "logits/chosen": -2.182955026626587, | |
| "logits/rejected": -2.194115161895752, | |
| "logps/chosen": -302.7352600097656, | |
| "logps/rejected": -356.3200378417969, | |
| "loss": 0.4409, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.1854764223098755, | |
| "rewards/margins": 1.6020820140838623, | |
| "rewards/rejected": -2.7875583171844482, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 7.325319067879207, | |
| "learning_rate": 2.3594172166830066e-07, | |
| "logits/chosen": -2.1576247215270996, | |
| "logits/rejected": -2.212817907333374, | |
| "logps/chosen": -327.29693603515625, | |
| "logps/rejected": -323.61724853515625, | |
| "loss": 0.4553, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.041488766670227, | |
| "rewards/margins": 1.4408646821975708, | |
| "rewards/rejected": -2.4823532104492188, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 6.790105275708977, | |
| "learning_rate": 2.2835834321384799e-07, | |
| "logits/chosen": -2.214137554168701, | |
| "logits/rejected": -2.2264151573181152, | |
| "logps/chosen": -321.0436706542969, | |
| "logps/rejected": -328.35552978515625, | |
| "loss": 0.4503, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0654566287994385, | |
| "rewards/margins": 1.718808889389038, | |
| "rewards/rejected": -2.7842652797698975, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 7.062324933750996, | |
| "learning_rate": 2.20794979784253e-07, | |
| "logits/chosen": -2.1624903678894043, | |
| "logits/rejected": -2.247901678085327, | |
| "logps/chosen": -319.1899719238281, | |
| "logps/rejected": -320.76129150390625, | |
| "loss": 0.4674, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.059831976890564, | |
| "rewards/margins": 1.5487343072891235, | |
| "rewards/rejected": -2.6085660457611084, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 6.8992559586043845, | |
| "learning_rate": 2.132586262648217e-07, | |
| "logits/chosen": -2.1993870735168457, | |
| "logits/rejected": -2.2455170154571533, | |
| "logps/chosen": -342.77874755859375, | |
| "logps/rejected": -336.9671936035156, | |
| "loss": 0.4273, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1384742259979248, | |
| "rewards/margins": 1.6727256774902344, | |
| "rewards/rejected": -2.811199903488159, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 7.798612367752025, | |
| "learning_rate": 2.0575625256107107e-07, | |
| "logits/chosen": -2.1738362312316895, | |
| "logits/rejected": -2.1713316440582275, | |
| "logps/chosen": -282.6869201660156, | |
| "logps/rejected": -328.921142578125, | |
| "loss": 0.4407, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.9972604513168335, | |
| "rewards/margins": 1.8666757345199585, | |
| "rewards/rejected": -2.863936424255371, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 7.992242941279979, | |
| "learning_rate": 1.9829479715269584e-07, | |
| "logits/chosen": -2.2045226097106934, | |
| "logits/rejected": -2.209587812423706, | |
| "logps/chosen": -309.096923828125, | |
| "logps/rejected": -335.237548828125, | |
| "loss": 0.4411, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.1609227657318115, | |
| "rewards/margins": 1.7247358560562134, | |
| "rewards/rejected": -2.8856582641601562, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_logits/chosen": -2.3015921115875244, | |
| "eval_logits/rejected": -2.3840179443359375, | |
| "eval_logps/chosen": -317.4481201171875, | |
| "eval_logps/rejected": -321.8678894042969, | |
| "eval_loss": 0.44147032499313354, | |
| "eval_rewards/accuracies": 0.7689873576164246, | |
| "eval_rewards/chosen": -1.0089699029922485, | |
| "eval_rewards/margins": 1.7492305040359497, | |
| "eval_rewards/rejected": -2.7582004070281982, | |
| "eval_runtime": 116.9747, | |
| "eval_samples_per_second": 21.372, | |
| "eval_steps_per_second": 0.675, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 8.274421110713899, | |
| "learning_rate": 1.908811606765996e-07, | |
| "logits/chosen": -2.184628963470459, | |
| "logits/rejected": -2.218219518661499, | |
| "logps/chosen": -313.20037841796875, | |
| "logps/rejected": -332.700927734375, | |
| "loss": 0.4436, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1558970212936401, | |
| "rewards/margins": 1.7350355386734009, | |
| "rewards/rejected": -2.89093279838562, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 8.088194505594497, | |
| "learning_rate": 1.8352219954492414e-07, | |
| "logits/chosen": -2.1805386543273926, | |
| "logits/rejected": -2.221952438354492, | |
| "logps/chosen": -306.16717529296875, | |
| "logps/rejected": -312.39544677734375, | |
| "loss": 0.4605, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.1549646854400635, | |
| "rewards/margins": 1.5584920644760132, | |
| "rewards/rejected": -2.713456630706787, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 5.472862021460433, | |
| "learning_rate": 1.7622471960397922e-07, | |
| "logits/chosen": -2.1999268531799316, | |
| "logits/rejected": -2.2310051918029785, | |
| "logps/chosen": -297.07659912109375, | |
| "logps/rejected": -292.7288513183594, | |
| "loss": 0.4282, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.087530493736267, | |
| "rewards/margins": 1.8494288921356201, | |
| "rewards/rejected": -2.9369590282440186, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 7.472696962004615, | |
| "learning_rate": 1.6899546983993814e-07, | |
| "logits/chosen": -2.207268476486206, | |
| "logits/rejected": -2.2193145751953125, | |
| "logps/chosen": -333.41241455078125, | |
| "logps/rejected": -339.80670166015625, | |
| "loss": 0.4344, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.152045488357544, | |
| "rewards/margins": 1.8111851215362549, | |
| "rewards/rejected": -2.963230609893799, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 7.854994688865555, | |
| "learning_rate": 1.618411361371193e-07, | |
| "logits/chosen": -2.241340398788452, | |
| "logits/rejected": -2.2305684089660645, | |
| "logps/chosen": -322.3190612792969, | |
| "logps/rejected": -343.23797607421875, | |
| "loss": 0.4631, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.183625340461731, | |
| "rewards/margins": 1.5439379215240479, | |
| "rewards/rejected": -2.7275633811950684, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 8.512012836575265, | |
| "learning_rate": 1.547683350946268e-07, | |
| "logits/chosen": -2.2200608253479004, | |
| "logits/rejected": -2.225961923599243, | |
| "logps/chosen": -326.2708740234375, | |
| "logps/rejected": -365.87689208984375, | |
| "loss": 0.4516, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -1.0509939193725586, | |
| "rewards/margins": 2.1092488765716553, | |
| "rewards/rejected": -3.160243034362793, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 7.243888365117908, | |
| "learning_rate": 1.477836079070687e-07, | |
| "logits/chosen": -2.1900453567504883, | |
| "logits/rejected": -2.194736957550049, | |
| "logps/chosen": -297.6527404785156, | |
| "logps/rejected": -324.55035400390625, | |
| "loss": 0.4447, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.1286150217056274, | |
| "rewards/margins": 1.6178051233291626, | |
| "rewards/rejected": -2.74642014503479, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 8.27216995738527, | |
| "learning_rate": 1.4089341431501228e-07, | |
| "logits/chosen": -2.1735267639160156, | |
| "logits/rejected": -2.2048020362854004, | |
| "logps/chosen": -341.09429931640625, | |
| "logps/rejected": -362.09521484375, | |
| "loss": 0.4533, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.1955785751342773, | |
| "rewards/margins": 2.0903077125549316, | |
| "rewards/rejected": -3.285886287689209, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 6.73898031572128, | |
| "learning_rate": 1.3410412663077075e-07, | |
| "logits/chosen": -2.203127384185791, | |
| "logits/rejected": -2.1927337646484375, | |
| "logps/chosen": -297.426513671875, | |
| "logps/rejected": -331.25115966796875, | |
| "loss": 0.4387, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.2261813879013062, | |
| "rewards/margins": 1.633111596107483, | |
| "rewards/rejected": -2.859293222427368, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 8.710779913480414, | |
| "learning_rate": 1.2742202384504757e-07, | |
| "logits/chosen": -2.2027111053466797, | |
| "logits/rejected": -2.2479450702667236, | |
| "logps/chosen": -331.02740478515625, | |
| "logps/rejected": -364.1605224609375, | |
| "loss": 0.471, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.1860400438308716, | |
| "rewards/margins": 1.6458642482757568, | |
| "rewards/rejected": -2.831904172897339, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_logits/chosen": -2.299058437347412, | |
| "eval_logits/rejected": -2.3809187412261963, | |
| "eval_logps/chosen": -316.5019226074219, | |
| "eval_logps/rejected": -321.5930480957031, | |
| "eval_loss": 0.4368092715740204, | |
| "eval_rewards/accuracies": 0.7689873576164246, | |
| "eval_rewards/chosen": -0.9616590142250061, | |
| "eval_rewards/margins": 1.7827986478805542, | |
| "eval_rewards/rejected": -2.744457483291626, | |
| "eval_runtime": 117.6971, | |
| "eval_samples_per_second": 21.241, | |
| "eval_steps_per_second": 0.671, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 7.470099061565703, | |
| "learning_rate": 1.208532858198875e-07, | |
| "logits/chosen": -2.19744610786438, | |
| "logits/rejected": -2.2142927646636963, | |
| "logps/chosen": -326.9881591796875, | |
| "logps/rejected": -365.5570983886719, | |
| "loss": 0.4298, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.2280399799346924, | |
| "rewards/margins": 1.8768724203109741, | |
| "rewards/rejected": -3.104912519454956, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 6.878834958249367, | |
| "learning_rate": 1.1440398757330589e-07, | |
| "logits/chosen": -2.16925048828125, | |
| "logits/rejected": -2.2376468181610107, | |
| "logps/chosen": -333.90338134765625, | |
| "logps/rejected": -332.63299560546875, | |
| "loss": 0.4255, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -1.1526517868041992, | |
| "rewards/margins": 1.8892349004745483, | |
| "rewards/rejected": -3.041886806488037, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 6.741909833219955, | |
| "learning_rate": 1.0808009366088155e-07, | |
| "logits/chosen": -2.205474853515625, | |
| "logits/rejected": -2.2162137031555176, | |
| "logps/chosen": -325.2530212402344, | |
| "logps/rejected": -318.7095031738281, | |
| "loss": 0.4789, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.2332874536514282, | |
| "rewards/margins": 1.781602144241333, | |
| "rewards/rejected": -3.0148894786834717, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 7.3191732297036225, | |
| "learning_rate": 1.0188745265950985e-07, | |
| "logits/chosen": -2.229236602783203, | |
| "logits/rejected": -2.2648098468780518, | |
| "logps/chosen": -308.75396728515625, | |
| "logps/rejected": -331.4061279296875, | |
| "loss": 0.4429, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.206955075263977, | |
| "rewards/margins": 1.5560095310211182, | |
| "rewards/rejected": -2.7629647254943848, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 8.26769354330985, | |
| "learning_rate": 9.583179175841666e-08, | |
| "logits/chosen": -2.199425220489502, | |
| "logits/rejected": -2.1700549125671387, | |
| "logps/chosen": -314.6337890625, | |
| "logps/rejected": -338.1066589355469, | |
| "loss": 0.4538, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -1.110811710357666, | |
| "rewards/margins": 1.5624140501022339, | |
| "rewards/rejected": -2.6732258796691895, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 7.000649639637843, | |
| "learning_rate": 8.991871146243696e-08, | |
| "logits/chosen": -2.217109203338623, | |
| "logits/rejected": -2.2137506008148193, | |
| "logps/chosen": -289.3517761230469, | |
| "logps/rejected": -343.0474853515625, | |
| "loss": 0.4358, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.2585060596466064, | |
| "rewards/margins": 1.6992295980453491, | |
| "rewards/rejected": -2.957735538482666, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 6.509621971875314, | |
| "learning_rate": 8.415368041245513e-08, | |
| "logits/chosen": -2.205955743789673, | |
| "logits/rejected": -2.2050979137420654, | |
| "logps/chosen": -284.9857482910156, | |
| "logps/rejected": -335.174560546875, | |
| "loss": 0.439, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.2468931674957275, | |
| "rewards/margins": 1.6571691036224365, | |
| "rewards/rejected": -2.904062032699585, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 12.101058956711388, | |
| "learning_rate": 7.854203032779894e-08, | |
| "logits/chosen": -2.2182745933532715, | |
| "logits/rejected": -2.268409490585327, | |
| "logps/chosen": -342.60528564453125, | |
| "logps/rejected": -349.5751953125, | |
| "loss": 0.4508, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.2632758617401123, | |
| "rewards/margins": 1.6573642492294312, | |
| "rewards/rejected": -2.920640468597412, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 7.466182101903001, | |
| "learning_rate": 7.308895107526317e-08, | |
| "logits/chosen": -2.1878528594970703, | |
| "logits/rejected": -2.2148966789245605, | |
| "logps/chosen": -329.15374755859375, | |
| "logps/rejected": -361.855224609375, | |
| "loss": 0.4403, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0873197317123413, | |
| "rewards/margins": 1.8896510601043701, | |
| "rewards/rejected": -2.976970672607422, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 7.096542397688041, | |
| "learning_rate": 6.779948586932389e-08, | |
| "logits/chosen": -2.1874141693115234, | |
| "logits/rejected": -2.2552855014801025, | |
| "logps/chosen": -339.11761474609375, | |
| "logps/rejected": -330.0443115234375, | |
| "loss": 0.4485, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2248756885528564, | |
| "rewards/margins": 1.6217113733291626, | |
| "rewards/rejected": -2.8465871810913086, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_logits/chosen": -2.3004236221313477, | |
| "eval_logits/rejected": -2.3815455436706543, | |
| "eval_logps/chosen": -316.2496643066406, | |
| "eval_logps/rejected": -321.8916015625, | |
| "eval_loss": 0.435116708278656, | |
| "eval_rewards/accuracies": 0.7721518874168396, | |
| "eval_rewards/chosen": -0.9490465521812439, | |
| "eval_rewards/margins": 1.8103375434875488, | |
| "eval_rewards/rejected": -2.7593843936920166, | |
| "eval_runtime": 116.8837, | |
| "eval_samples_per_second": 21.389, | |
| "eval_steps_per_second": 0.676, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 8.40277649092231, | |
| "learning_rate": 6.267852660798256e-08, | |
| "logits/chosen": -2.243600845336914, | |
| "logits/rejected": -2.1874167919158936, | |
| "logps/chosen": -264.4030456542969, | |
| "logps/rejected": -318.32659912109375, | |
| "loss": 0.4608, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.2417147159576416, | |
| "rewards/margins": 1.8263641595840454, | |
| "rewards/rejected": -3.0680789947509766, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 7.94424609527543, | |
| "learning_rate": 5.7730809348553315e-08, | |
| "logits/chosen": -2.226975679397583, | |
| "logits/rejected": -2.2741405963897705, | |
| "logps/chosen": -309.5135803222656, | |
| "logps/rejected": -325.623779296875, | |
| "loss": 0.4366, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -1.201520562171936, | |
| "rewards/margins": 1.7903496026992798, | |
| "rewards/rejected": -2.991870164871216, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 7.128082903747095, | |
| "learning_rate": 5.296090992757746e-08, | |
| "logits/chosen": -2.206270217895508, | |
| "logits/rejected": -2.201709747314453, | |
| "logps/chosen": -315.83282470703125, | |
| "logps/rejected": -364.1822814941406, | |
| "loss": 0.443, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.2698633670806885, | |
| "rewards/margins": 1.9889068603515625, | |
| "rewards/rejected": -3.258769989013672, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 7.514968298076625, | |
| "learning_rate": 4.8373239728916326e-08, | |
| "logits/chosen": -2.1958649158477783, | |
| "logits/rejected": -2.1885480880737305, | |
| "logps/chosen": -288.6278381347656, | |
| "logps/rejected": -353.77984619140625, | |
| "loss": 0.4283, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -1.0743087530136108, | |
| "rewards/margins": 2.0363688468933105, | |
| "rewards/rejected": -3.1106772422790527, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 11.932999852584523, | |
| "learning_rate": 4.397204160393628e-08, | |
| "logits/chosen": -2.192418336868286, | |
| "logits/rejected": -2.225080966949463, | |
| "logps/chosen": -281.90093994140625, | |
| "logps/rejected": -319.2328186035156, | |
| "loss": 0.4456, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.106330156326294, | |
| "rewards/margins": 1.9500024318695068, | |
| "rewards/rejected": -3.056332588195801, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 7.8327307820265375, | |
| "learning_rate": 3.9761385947558845e-08, | |
| "logits/chosen": -2.2083628177642822, | |
| "logits/rejected": -2.212167978286743, | |
| "logps/chosen": -305.710693359375, | |
| "logps/rejected": -331.8744201660156, | |
| "loss": 0.4156, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -1.0697580575942993, | |
| "rewards/margins": 2.013720989227295, | |
| "rewards/rejected": -3.083479404449463, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 8.141593893840476, | |
| "learning_rate": 3.574516693380511e-08, | |
| "logits/chosen": -2.177604913711548, | |
| "logits/rejected": -2.187506914138794, | |
| "logps/chosen": -293.26287841796875, | |
| "logps/rejected": -328.9276123046875, | |
| "loss": 0.4442, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.204113245010376, | |
| "rewards/margins": 1.9541772603988647, | |
| "rewards/rejected": -3.158290386199951, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 8.19223484059336, | |
| "learning_rate": 3.192709891431586e-08, | |
| "logits/chosen": -2.1618940830230713, | |
| "logits/rejected": -2.1518969535827637, | |
| "logps/chosen": -324.14654541015625, | |
| "logps/rejected": -348.9007263183594, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.1970574855804443, | |
| "rewards/margins": 1.7036092281341553, | |
| "rewards/rejected": -2.9006664752960205, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 7.729663351327424, | |
| "learning_rate": 2.8310712983178524e-08, | |
| "logits/chosen": -2.204336404800415, | |
| "logits/rejected": -2.2102417945861816, | |
| "logps/chosen": -307.18817138671875, | |
| "logps/rejected": -341.29840087890625, | |
| "loss": 0.4431, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.2059943675994873, | |
| "rewards/margins": 1.5643813610076904, | |
| "rewards/rejected": -2.7703757286071777, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 8.086893591573848, | |
| "learning_rate": 2.4899353711237247e-08, | |
| "logits/chosen": -2.1703457832336426, | |
| "logits/rejected": -2.2025086879730225, | |
| "logps/chosen": -313.03167724609375, | |
| "logps/rejected": -340.02264404296875, | |
| "loss": 0.4411, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -1.1995328664779663, | |
| "rewards/margins": 1.7753593921661377, | |
| "rewards/rejected": -2.9748923778533936, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_logits/chosen": -2.3010759353637695, | |
| "eval_logits/rejected": -2.3822624683380127, | |
| "eval_logps/chosen": -315.85467529296875, | |
| "eval_logps/rejected": -321.64093017578125, | |
| "eval_loss": 0.4348324239253998, | |
| "eval_rewards/accuracies": 0.7658227682113647, | |
| "eval_rewards/chosen": -0.9292957782745361, | |
| "eval_rewards/margins": 1.8175575733184814, | |
| "eval_rewards/rejected": -2.7468531131744385, | |
| "eval_runtime": 116.7271, | |
| "eval_samples_per_second": 21.417, | |
| "eval_steps_per_second": 0.677, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 6.876861850417737, | |
| "learning_rate": 2.1696176052907105e-08, | |
| "logits/chosen": -2.2324881553649902, | |
| "logits/rejected": -2.219320774078369, | |
| "logps/chosen": -313.92529296875, | |
| "logps/rejected": -350.4786682128906, | |
| "loss": 0.4293, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.2920801639556885, | |
| "rewards/margins": 1.7782132625579834, | |
| "rewards/rejected": -3.070293426513672, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 7.677036235689073, | |
| "learning_rate": 1.8704142428352528e-08, | |
| "logits/chosen": -2.190504789352417, | |
| "logits/rejected": -2.2708182334899902, | |
| "logps/chosen": -332.3207702636719, | |
| "logps/rejected": -330.7673034667969, | |
| "loss": 0.4507, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.1538569927215576, | |
| "rewards/margins": 1.831903100013733, | |
| "rewards/rejected": -2.98576021194458, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 6.9135152123839365, | |
| "learning_rate": 1.592601998372886e-08, | |
| "logits/chosen": -2.195899248123169, | |
| "logits/rejected": -2.1886417865753174, | |
| "logps/chosen": -296.1393127441406, | |
| "logps/rejected": -334.1782531738281, | |
| "loss": 0.4433, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.1621323823928833, | |
| "rewards/margins": 1.852728247642517, | |
| "rewards/rejected": -3.0148606300354004, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 7.569188340166907, | |
| "learning_rate": 1.336437803202059e-08, | |
| "logits/chosen": -2.228149175643921, | |
| "logits/rejected": -2.2481937408447266, | |
| "logps/chosen": -310.6243896484375, | |
| "logps/rejected": -328.8221130371094, | |
| "loss": 0.4301, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -1.1201293468475342, | |
| "rewards/margins": 1.740098237991333, | |
| "rewards/rejected": -2.860227584838867, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 8.234591785864946, | |
| "learning_rate": 1.102158567684336e-08, | |
| "logits/chosen": -2.192157745361328, | |
| "logits/rejected": -2.194495677947998, | |
| "logps/chosen": -302.17559814453125, | |
| "logps/rejected": -339.31085205078125, | |
| "loss": 0.4208, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.169334053993225, | |
| "rewards/margins": 1.9188495874404907, | |
| "rewards/rejected": -3.088183879852295, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 7.558156377001319, | |
| "learning_rate": 8.899809621407045e-09, | |
| "logits/chosen": -2.1941123008728027, | |
| "logits/rejected": -2.2328617572784424, | |
| "logps/chosen": -275.9726867675781, | |
| "logps/rejected": -308.49212646484375, | |
| "loss": 0.4256, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2298341989517212, | |
| "rewards/margins": 1.8165092468261719, | |
| "rewards/rejected": -3.0463433265686035, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 7.958869766486015, | |
| "learning_rate": 7.001012164666392e-09, | |
| "logits/chosen": -2.181845188140869, | |
| "logits/rejected": -2.1818060874938965, | |
| "logps/chosen": -332.1898498535156, | |
| "logps/rejected": -357.20062255859375, | |
| "loss": 0.4367, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.14080810546875, | |
| "rewards/margins": 1.8624111413955688, | |
| "rewards/rejected": -3.0032193660736084, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 7.273641922997009, | |
| "learning_rate": 5.326949386512764e-09, | |
| "logits/chosen": -2.1961543560028076, | |
| "logits/rejected": -2.1588053703308105, | |
| "logps/chosen": -314.35601806640625, | |
| "logps/rejected": -367.77716064453125, | |
| "loss": 0.4241, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0949821472167969, | |
| "rewards/margins": 2.1694438457489014, | |
| "rewards/rejected": -3.2644259929656982, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 7.683716889020659, | |
| "learning_rate": 3.879169523684639e-09, | |
| "logits/chosen": -2.182697296142578, | |
| "logits/rejected": -2.188960313796997, | |
| "logps/chosen": -294.359375, | |
| "logps/rejected": -334.6479187011719, | |
| "loss": 0.4398, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.1956040859222412, | |
| "rewards/margins": 1.873143196105957, | |
| "rewards/rejected": -3.0687472820281982, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 8.782560420891002, | |
| "learning_rate": 2.65901153789963e-09, | |
| "logits/chosen": -2.2032766342163086, | |
| "logits/rejected": -2.225320339202881, | |
| "logps/chosen": -302.3791198730469, | |
| "logps/rejected": -329.71710205078125, | |
| "loss": 0.4499, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -1.1408864259719849, | |
| "rewards/margins": 1.8591581583023071, | |
| "rewards/rejected": -3.000044345855713, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_logits/chosen": -2.301210641860962, | |
| "eval_logits/rejected": -2.382761240005493, | |
| "eval_logps/chosen": -316.2319641113281, | |
| "eval_logps/rejected": -322.2369079589844, | |
| "eval_loss": 0.4347890019416809, | |
| "eval_rewards/accuracies": 0.7658227682113647, | |
| "eval_rewards/chosen": -0.9481591582298279, | |
| "eval_rewards/margins": 1.8284918069839478, | |
| "eval_rewards/rejected": -2.776650905609131, | |
| "eval_runtime": 116.8141, | |
| "eval_samples_per_second": 21.402, | |
| "eval_steps_per_second": 0.676, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 7.9306460884331775, | |
| "learning_rate": 1.6676038775320089e-09, | |
| "logits/chosen": -2.1896042823791504, | |
| "logits/rejected": -2.1769232749938965, | |
| "logps/chosen": -319.85491943359375, | |
| "logps/rejected": -358.85296630859375, | |
| "loss": 0.4625, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.1989891529083252, | |
| "rewards/margins": 1.7593486309051514, | |
| "rewards/rejected": -2.9583375453948975, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 6.9870920606828735, | |
| "learning_rate": 9.058634339806914e-10, | |
| "logits/chosen": -2.190636396408081, | |
| "logits/rejected": -2.2027156352996826, | |
| "logps/chosen": -313.0627746582031, | |
| "logps/rejected": -347.0715026855469, | |
| "loss": 0.4495, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.2059122323989868, | |
| "rewards/margins": 1.832135558128357, | |
| "rewards/rejected": -3.0380477905273438, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 8.257014654020075, | |
| "learning_rate": 3.74494693693439e-10, | |
| "logits/chosen": -2.2022292613983154, | |
| "logits/rejected": -2.2187328338623047, | |
| "logps/chosen": -282.579345703125, | |
| "logps/rejected": -332.579345703125, | |
| "loss": 0.4331, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.009050965309143, | |
| "rewards/margins": 1.674533486366272, | |
| "rewards/rejected": -2.683584690093994, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 8.586752011175628, | |
| "learning_rate": 7.39890866310322e-11, | |
| "logits/chosen": -2.1978275775909424, | |
| "logits/rejected": -2.208787441253662, | |
| "logps/chosen": -313.6376953125, | |
| "logps/rejected": -326.4861145019531, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.2633991241455078, | |
| "rewards/margins": 1.3990856409072876, | |
| "rewards/rejected": -2.662484645843506, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1148, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5184940074794384, | |
| "train_runtime": 19743.6623, | |
| "train_samples_per_second": 7.445, | |
| "train_steps_per_second": 0.058 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |