| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 100, | |
| "global_step": 2907, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.7182130584192438e-09, | |
| "logits/chosen": -2.7645790576934814, | |
| "logits/rejected": -2.8125059604644775, | |
| "logps/chosen": -113.67314910888672, | |
| "logps/rejected": -132.0498504638672, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.718213058419244e-08, | |
| "logits/chosen": -2.9990971088409424, | |
| "logits/rejected": -3.0227837562561035, | |
| "logps/chosen": -281.044921875, | |
| "logps/rejected": -247.3936309814453, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.004584211856126785, | |
| "rewards/margins": 0.00850469246506691, | |
| "rewards/rejected": -0.0039204806089401245, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.436426116838488e-08, | |
| "logits/chosen": -2.88598895072937, | |
| "logits/rejected": -2.917177200317383, | |
| "logps/chosen": -359.26177978515625, | |
| "logps/rejected": -298.42877197265625, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.05283154919743538, | |
| "rewards/margins": 0.018162177875638008, | |
| "rewards/rejected": 0.03466937318444252, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.154639175257731e-08, | |
| "logits/chosen": -2.9570868015289307, | |
| "logits/rejected": -2.9609949588775635, | |
| "logps/chosen": -326.2544860839844, | |
| "logps/rejected": -289.9393615722656, | |
| "loss": 0.6564, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.1515582799911499, | |
| "rewards/margins": 0.05830109864473343, | |
| "rewards/rejected": 0.09325718879699707, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.872852233676976e-08, | |
| "logits/chosen": -2.9531846046447754, | |
| "logits/rejected": -2.955566883087158, | |
| "logps/chosen": -376.5739440917969, | |
| "logps/rejected": -331.3490295410156, | |
| "loss": 0.6444, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3529122769832611, | |
| "rewards/margins": 0.1296483278274536, | |
| "rewards/rejected": 0.2232639044523239, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.59106529209622e-08, | |
| "logits/chosen": -2.8878796100616455, | |
| "logits/rejected": -2.9229512214660645, | |
| "logps/chosen": -427.5284118652344, | |
| "logps/rejected": -266.94415283203125, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.5041142702102661, | |
| "rewards/margins": 0.29676300287246704, | |
| "rewards/rejected": 0.20735123753547668, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0309278350515462e-07, | |
| "logits/chosen": -2.92777943611145, | |
| "logits/rejected": -2.9371728897094727, | |
| "logps/chosen": -318.0414123535156, | |
| "logps/rejected": -265.1334228515625, | |
| "loss": 0.6105, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.38802462816238403, | |
| "rewards/margins": 0.31535086035728455, | |
| "rewards/rejected": 0.0726737454533577, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.202749140893471e-07, | |
| "logits/chosen": -2.9851737022399902, | |
| "logits/rejected": -3.0005269050598145, | |
| "logps/chosen": -404.5143127441406, | |
| "logps/rejected": -300.8736572265625, | |
| "loss": 0.6069, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5573440790176392, | |
| "rewards/margins": 0.41277560591697693, | |
| "rewards/rejected": 0.14456847310066223, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3745704467353952e-07, | |
| "logits/chosen": -2.96441912651062, | |
| "logits/rejected": -2.968987464904785, | |
| "logps/chosen": -314.7437438964844, | |
| "logps/rejected": -254.7586669921875, | |
| "loss": 0.5569, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.45461219549179077, | |
| "rewards/margins": 0.5670984983444214, | |
| "rewards/rejected": -0.1124863252043724, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5463917525773197e-07, | |
| "logits/chosen": -3.021219491958618, | |
| "logits/rejected": -3.0178027153015137, | |
| "logps/chosen": -308.31585693359375, | |
| "logps/rejected": -257.63250732421875, | |
| "loss": 0.5296, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.5257282257080078, | |
| "rewards/margins": 0.7057730555534363, | |
| "rewards/rejected": -0.18004484474658966, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.718213058419244e-07, | |
| "logits/chosen": -2.8890886306762695, | |
| "logits/rejected": -2.896449327468872, | |
| "logps/chosen": -375.84564208984375, | |
| "logps/rejected": -241.34219360351562, | |
| "loss": 0.5504, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.6213200092315674, | |
| "rewards/margins": 0.8095133900642395, | |
| "rewards/rejected": -0.18819323182106018, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_logits/chosen": -2.936641216278076, | |
| "eval_logits/rejected": -2.935973882675171, | |
| "eval_logps/chosen": -361.9043884277344, | |
| "eval_logps/rejected": -293.7761535644531, | |
| "eval_loss": 0.5406630635261536, | |
| "eval_rewards/accuracies": 0.7579365372657776, | |
| "eval_rewards/chosen": 0.5287383794784546, | |
| "eval_rewards/margins": 0.7097563743591309, | |
| "eval_rewards/rejected": -0.18101799488067627, | |
| "eval_runtime": 163.7175, | |
| "eval_samples_per_second": 12.216, | |
| "eval_steps_per_second": 0.385, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8900343642611682e-07, | |
| "logits/chosen": -2.917739152908325, | |
| "logits/rejected": -2.8890061378479004, | |
| "logps/chosen": -334.1250305175781, | |
| "logps/rejected": -331.29571533203125, | |
| "loss": 0.5741, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.4206802248954773, | |
| "rewards/margins": 0.6734089851379395, | |
| "rewards/rejected": -0.25272876024246216, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.0618556701030925e-07, | |
| "logits/chosen": -2.9665865898132324, | |
| "logits/rejected": -2.970818519592285, | |
| "logps/chosen": -386.2568664550781, | |
| "logps/rejected": -280.7279357910156, | |
| "loss": 0.5533, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.3308308720588684, | |
| "rewards/margins": 0.6611676216125488, | |
| "rewards/rejected": -0.33033671975135803, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.2336769759450173e-07, | |
| "logits/chosen": -3.012545347213745, | |
| "logits/rejected": -2.9925591945648193, | |
| "logps/chosen": -353.75469970703125, | |
| "logps/rejected": -290.1478576660156, | |
| "loss": 0.5447, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.47041910886764526, | |
| "rewards/margins": 0.7254467010498047, | |
| "rewards/rejected": -0.2550275921821594, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.405498281786942e-07, | |
| "logits/chosen": -2.9241251945495605, | |
| "logits/rejected": -2.9576869010925293, | |
| "logps/chosen": -329.7611389160156, | |
| "logps/rejected": -265.63006591796875, | |
| "loss": 0.5113, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.4418914318084717, | |
| "rewards/margins": 0.7908871173858643, | |
| "rewards/rejected": -0.3489956259727478, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5773195876288655e-07, | |
| "logits/chosen": -2.9687321186065674, | |
| "logits/rejected": -2.9832406044006348, | |
| "logps/chosen": -331.42669677734375, | |
| "logps/rejected": -269.3779296875, | |
| "loss": 0.5387, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.38882407546043396, | |
| "rewards/margins": 0.8327142000198364, | |
| "rewards/rejected": -0.4438902735710144, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.7491408934707903e-07, | |
| "logits/chosen": -2.9920172691345215, | |
| "logits/rejected": -3.013425827026367, | |
| "logps/chosen": -383.51934814453125, | |
| "logps/rejected": -297.9476318359375, | |
| "loss": 0.5083, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.6878620982170105, | |
| "rewards/margins": 1.0685365200042725, | |
| "rewards/rejected": -0.3806745111942291, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9209621993127146e-07, | |
| "logits/chosen": -2.9233837127685547, | |
| "logits/rejected": -2.9321510791778564, | |
| "logps/chosen": -339.95745849609375, | |
| "logps/rejected": -280.793701171875, | |
| "loss": 0.5131, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5272036194801331, | |
| "rewards/margins": 0.8315987586975098, | |
| "rewards/rejected": -0.3043951690196991, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.0927835051546394e-07, | |
| "logits/chosen": -2.9920477867126465, | |
| "logits/rejected": -2.9811954498291016, | |
| "logps/chosen": -265.2094421386719, | |
| "logps/rejected": -254.6926727294922, | |
| "loss": 0.504, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.177840456366539, | |
| "rewards/margins": 0.8774341344833374, | |
| "rewards/rejected": -0.699593722820282, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.2646048109965636e-07, | |
| "logits/chosen": -3.0027570724487305, | |
| "logits/rejected": -2.987896203994751, | |
| "logps/chosen": -330.7102966308594, | |
| "logps/rejected": -239.6572723388672, | |
| "loss": 0.5611, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.5821124315261841, | |
| "rewards/margins": 1.141722559928894, | |
| "rewards/rejected": -0.5596100687980652, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.436426116838488e-07, | |
| "logits/chosen": -3.048879384994507, | |
| "logits/rejected": -2.9993340969085693, | |
| "logps/chosen": -266.72430419921875, | |
| "logps/rejected": -187.27467346191406, | |
| "loss": 0.541, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.2961394786834717, | |
| "rewards/margins": 0.8773609399795532, | |
| "rewards/rejected": -0.5812214612960815, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_logits/chosen": -2.980220317840576, | |
| "eval_logits/rejected": -2.9785656929016113, | |
| "eval_logps/chosen": -360.50030517578125, | |
| "eval_logps/rejected": -297.53515625, | |
| "eval_loss": 0.5220658779144287, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": 0.6691505908966064, | |
| "eval_rewards/margins": 1.2260682582855225, | |
| "eval_rewards/rejected": -0.5569177269935608, | |
| "eval_runtime": 163.6147, | |
| "eval_samples_per_second": 12.224, | |
| "eval_steps_per_second": 0.385, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.608247422680412e-07, | |
| "logits/chosen": -2.9797844886779785, | |
| "logits/rejected": -2.9449918270111084, | |
| "logps/chosen": -364.26287841796875, | |
| "logps/rejected": -251.58901977539062, | |
| "loss": 0.4772, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.5583639740943909, | |
| "rewards/margins": 1.3936102390289307, | |
| "rewards/rejected": -0.835246205329895, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.7800687285223364e-07, | |
| "logits/chosen": -2.9557044506073, | |
| "logits/rejected": -2.9637341499328613, | |
| "logps/chosen": -261.53216552734375, | |
| "logps/rejected": -271.5208740234375, | |
| "loss": 0.5707, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.367598295211792, | |
| "rewards/margins": 1.1545054912567139, | |
| "rewards/rejected": -0.7869071960449219, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.9518900343642607e-07, | |
| "logits/chosen": -3.0034899711608887, | |
| "logits/rejected": -2.991698980331421, | |
| "logps/chosen": -308.8106689453125, | |
| "logps/rejected": -278.55950927734375, | |
| "loss": 0.5827, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.4888441562652588, | |
| "rewards/margins": 1.2474777698516846, | |
| "rewards/rejected": -0.7586336731910706, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.123711340206185e-07, | |
| "logits/chosen": -3.0844597816467285, | |
| "logits/rejected": -3.0464837551116943, | |
| "logps/chosen": -385.8021545410156, | |
| "logps/rejected": -253.19869995117188, | |
| "loss": 0.4898, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.5026682019233704, | |
| "rewards/margins": 1.1020526885986328, | |
| "rewards/rejected": -0.5993844270706177, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.2955326460481097e-07, | |
| "logits/chosen": -3.0472395420074463, | |
| "logits/rejected": -3.0599236488342285, | |
| "logps/chosen": -341.8814697265625, | |
| "logps/rejected": -295.29437255859375, | |
| "loss": 0.5395, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.24311120808124542, | |
| "rewards/margins": 0.9385444521903992, | |
| "rewards/rejected": -0.6954333186149597, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.4673539518900345e-07, | |
| "logits/chosen": -3.0285518169403076, | |
| "logits/rejected": -3.0690500736236572, | |
| "logps/chosen": -353.20074462890625, | |
| "logps/rejected": -244.77041625976562, | |
| "loss": 0.6312, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.22555696964263916, | |
| "rewards/margins": 0.8821722269058228, | |
| "rewards/rejected": -1.107729196548462, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.639175257731959e-07, | |
| "logits/chosen": -3.0879526138305664, | |
| "logits/rejected": -3.0506978034973145, | |
| "logps/chosen": -354.5426025390625, | |
| "logps/rejected": -279.86773681640625, | |
| "loss": 0.571, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.26413029432296753, | |
| "rewards/margins": 0.9725528955459595, | |
| "rewards/rejected": -0.7084226012229919, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.810996563573884e-07, | |
| "logits/chosen": -3.0588438510894775, | |
| "logits/rejected": -3.0481762886047363, | |
| "logps/chosen": -339.85675048828125, | |
| "logps/rejected": -285.8063049316406, | |
| "loss": 0.6383, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.30532822012901306, | |
| "rewards/margins": 1.10079026222229, | |
| "rewards/rejected": -0.7954620122909546, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.982817869415807e-07, | |
| "logits/chosen": -3.0889270305633545, | |
| "logits/rejected": -3.0666940212249756, | |
| "logps/chosen": -335.5870666503906, | |
| "logps/rejected": -258.51641845703125, | |
| "loss": 0.5611, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.7204562425613403, | |
| "rewards/margins": 1.0493910312652588, | |
| "rewards/rejected": -0.32893460988998413, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.982798165137615e-07, | |
| "logits/chosen": -3.0340983867645264, | |
| "logits/rejected": -3.0090713500976562, | |
| "logps/chosen": -281.38751220703125, | |
| "logps/rejected": -289.4985656738281, | |
| "loss": 0.6034, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.268043577671051, | |
| "rewards/margins": 0.5827728509902954, | |
| "rewards/rejected": -0.3147292733192444, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_logits/chosen": -3.035973072052002, | |
| "eval_logits/rejected": -3.0234200954437256, | |
| "eval_logps/chosen": -359.8170166015625, | |
| "eval_logps/rejected": -296.5441589355469, | |
| "eval_loss": 0.5459412932395935, | |
| "eval_rewards/accuracies": 0.761904776096344, | |
| "eval_rewards/chosen": 0.737476110458374, | |
| "eval_rewards/margins": 1.1952924728393555, | |
| "eval_rewards/rejected": -0.45781639218330383, | |
| "eval_runtime": 164.3219, | |
| "eval_samples_per_second": 12.171, | |
| "eval_steps_per_second": 0.383, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.963685015290519e-07, | |
| "logits/chosen": -3.1333563327789307, | |
| "logits/rejected": -3.0529465675354004, | |
| "logps/chosen": -394.2475280761719, | |
| "logps/rejected": -328.84796142578125, | |
| "loss": 0.5995, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5251134634017944, | |
| "rewards/margins": 0.8031543493270874, | |
| "rewards/rejected": -0.27804094552993774, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.944571865443424e-07, | |
| "logits/chosen": -3.115387201309204, | |
| "logits/rejected": -3.104794502258301, | |
| "logps/chosen": -299.5379943847656, | |
| "logps/rejected": -227.14413452148438, | |
| "loss": 0.5504, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.5498681664466858, | |
| "rewards/margins": 1.550806999206543, | |
| "rewards/rejected": -1.0009387731552124, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.92545871559633e-07, | |
| "logits/chosen": -3.1059436798095703, | |
| "logits/rejected": -3.110661029815674, | |
| "logps/chosen": -405.8400573730469, | |
| "logps/rejected": -290.01934814453125, | |
| "loss": 0.5355, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.5642995834350586, | |
| "rewards/margins": 1.1246757507324219, | |
| "rewards/rejected": -0.5603762269020081, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.906345565749235e-07, | |
| "logits/chosen": -3.0694103240966797, | |
| "logits/rejected": -3.075610876083374, | |
| "logps/chosen": -301.7900695800781, | |
| "logps/rejected": -273.09100341796875, | |
| "loss": 0.6496, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.26571425795555115, | |
| "rewards/margins": 0.967176079750061, | |
| "rewards/rejected": -0.701461672782898, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.88723241590214e-07, | |
| "logits/chosen": -3.078815460205078, | |
| "logits/rejected": -3.097691059112549, | |
| "logps/chosen": -373.6755065917969, | |
| "logps/rejected": -278.1918640136719, | |
| "loss": 0.5251, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.5501624941825867, | |
| "rewards/margins": 1.1470292806625366, | |
| "rewards/rejected": -0.5968667268753052, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.868119266055046e-07, | |
| "logits/chosen": -3.101353168487549, | |
| "logits/rejected": -3.1290316581726074, | |
| "logps/chosen": -370.21112060546875, | |
| "logps/rejected": -328.2227783203125, | |
| "loss": 0.5218, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.4693407118320465, | |
| "rewards/margins": 1.2798802852630615, | |
| "rewards/rejected": -0.8105396032333374, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.849006116207951e-07, | |
| "logits/chosen": -3.108405113220215, | |
| "logits/rejected": -3.108668804168701, | |
| "logps/chosen": -357.5787048339844, | |
| "logps/rejected": -308.5846252441406, | |
| "loss": 0.5781, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.5667105317115784, | |
| "rewards/margins": 1.561586618423462, | |
| "rewards/rejected": -0.9948760271072388, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.829892966360856e-07, | |
| "logits/chosen": -3.1076834201812744, | |
| "logits/rejected": -3.139901638031006, | |
| "logps/chosen": -372.7229919433594, | |
| "logps/rejected": -321.50347900390625, | |
| "loss": 0.5748, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.595112681388855, | |
| "rewards/margins": 1.7188622951507568, | |
| "rewards/rejected": -1.1237497329711914, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.810779816513762e-07, | |
| "logits/chosen": -3.0231597423553467, | |
| "logits/rejected": -3.055475950241089, | |
| "logps/chosen": -308.81109619140625, | |
| "logps/rejected": -280.67572021484375, | |
| "loss": 0.594, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.022154245525598526, | |
| "rewards/margins": 1.1715147495269775, | |
| "rewards/rejected": -1.1493604183197021, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.791666666666667e-07, | |
| "logits/chosen": -2.9821434020996094, | |
| "logits/rejected": -2.990657329559326, | |
| "logps/chosen": -350.4073791503906, | |
| "logps/rejected": -234.08291625976562, | |
| "loss": 0.5944, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.6456303000450134, | |
| "rewards/margins": 1.8281257152557373, | |
| "rewards/rejected": -1.182495355606079, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_logits/chosen": -2.96209979057312, | |
| "eval_logits/rejected": -2.963911294937134, | |
| "eval_logps/chosen": -362.2125549316406, | |
| "eval_logps/rejected": -300.90362548828125, | |
| "eval_loss": 0.5573462247848511, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": 0.49792128801345825, | |
| "eval_rewards/margins": 1.391687273979187, | |
| "eval_rewards/rejected": -0.8937660455703735, | |
| "eval_runtime": 163.7646, | |
| "eval_samples_per_second": 12.213, | |
| "eval_steps_per_second": 0.385, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.772553516819572e-07, | |
| "logits/chosen": -2.9685988426208496, | |
| "logits/rejected": -2.9469170570373535, | |
| "logps/chosen": -359.9443054199219, | |
| "logps/rejected": -339.13482666015625, | |
| "loss": 0.7753, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.5800348520278931, | |
| "rewards/margins": 1.4965015649795532, | |
| "rewards/rejected": -0.9164667129516602, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.753440366972477e-07, | |
| "logits/chosen": -3.039097785949707, | |
| "logits/rejected": -3.0352489948272705, | |
| "logps/chosen": -279.19451904296875, | |
| "logps/rejected": -275.61077880859375, | |
| "loss": 0.5719, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.08171078562736511, | |
| "rewards/margins": 0.8535135388374329, | |
| "rewards/rejected": -0.7718027234077454, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.7343272171253825e-07, | |
| "logits/chosen": -3.0542099475860596, | |
| "logits/rejected": -3.048107624053955, | |
| "logps/chosen": -304.2041015625, | |
| "logps/rejected": -275.24664306640625, | |
| "loss": 0.5521, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.6358417272567749, | |
| "rewards/margins": 1.4337527751922607, | |
| "rewards/rejected": -0.7979112863540649, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.715214067278288e-07, | |
| "logits/chosen": -2.9832911491394043, | |
| "logits/rejected": -2.9696083068847656, | |
| "logps/chosen": -351.0896911621094, | |
| "logps/rejected": -278.2879333496094, | |
| "loss": 0.5257, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.4420256018638611, | |
| "rewards/margins": 1.773047685623169, | |
| "rewards/rejected": -1.331022024154663, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.696100917431192e-07, | |
| "logits/chosen": -3.115874767303467, | |
| "logits/rejected": -3.0773837566375732, | |
| "logps/chosen": -392.2452392578125, | |
| "logps/rejected": -324.62640380859375, | |
| "loss": 0.5536, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.2861310839653015, | |
| "rewards/margins": 0.8551927804946899, | |
| "rewards/rejected": -0.5690616369247437, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6769877675840974e-07, | |
| "logits/chosen": -3.0585522651672363, | |
| "logits/rejected": -3.089534282684326, | |
| "logps/chosen": -310.84967041015625, | |
| "logps/rejected": -287.9058532714844, | |
| "loss": 0.5614, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.2514137625694275, | |
| "rewards/margins": 1.2147700786590576, | |
| "rewards/rejected": -0.9633563160896301, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6578746177370027e-07, | |
| "logits/chosen": -3.0050368309020996, | |
| "logits/rejected": -3.0113613605499268, | |
| "logps/chosen": -243.838623046875, | |
| "logps/rejected": -224.61404418945312, | |
| "loss": 0.5769, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.10031839460134506, | |
| "rewards/margins": 1.2319433689117432, | |
| "rewards/rejected": -1.1316249370574951, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.638761467889908e-07, | |
| "logits/chosen": -3.01200795173645, | |
| "logits/rejected": -2.9829325675964355, | |
| "logps/chosen": -353.6679992675781, | |
| "logps/rejected": -299.7701416015625, | |
| "loss": 0.5141, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": 0.6005850434303284, | |
| "rewards/margins": 2.408433437347412, | |
| "rewards/rejected": -1.807848334312439, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.6196483180428133e-07, | |
| "logits/chosen": -3.038440227508545, | |
| "logits/rejected": -3.0429458618164062, | |
| "logps/chosen": -330.0135192871094, | |
| "logps/rejected": -262.1318359375, | |
| "loss": 0.5292, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.4290197491645813, | |
| "rewards/margins": 1.5280876159667969, | |
| "rewards/rejected": -1.0990678071975708, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.600535168195718e-07, | |
| "logits/chosen": -3.0223565101623535, | |
| "logits/rejected": -3.0170624256134033, | |
| "logps/chosen": -259.1560363769531, | |
| "logps/rejected": -268.68365478515625, | |
| "loss": 0.5512, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.08852599561214447, | |
| "rewards/margins": 1.076027750968933, | |
| "rewards/rejected": -0.9875017404556274, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_logits/chosen": -3.0406343936920166, | |
| "eval_logits/rejected": -3.0485074520111084, | |
| "eval_logps/chosen": -362.83642578125, | |
| "eval_logps/rejected": -302.1329650878906, | |
| "eval_loss": 0.5256651043891907, | |
| "eval_rewards/accuracies": 0.7579365372657776, | |
| "eval_rewards/chosen": 0.4355368912220001, | |
| "eval_rewards/margins": 1.452234148979187, | |
| "eval_rewards/rejected": -1.0166972875595093, | |
| "eval_runtime": 164.1914, | |
| "eval_samples_per_second": 12.181, | |
| "eval_steps_per_second": 0.384, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.5814220183486234e-07, | |
| "logits/chosen": -2.971991777420044, | |
| "logits/rejected": -2.9626731872558594, | |
| "logps/chosen": -387.75872802734375, | |
| "logps/rejected": -341.24224853515625, | |
| "loss": 0.5611, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.0966944545507431, | |
| "rewards/margins": 0.9770743250846863, | |
| "rewards/rejected": -1.0737688541412354, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.562308868501529e-07, | |
| "logits/chosen": -2.97809100151062, | |
| "logits/rejected": -3.0156943798065186, | |
| "logps/chosen": -325.83837890625, | |
| "logps/rejected": -321.0384826660156, | |
| "loss": 0.5693, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.27487924695014954, | |
| "rewards/margins": 1.646512746810913, | |
| "rewards/rejected": -1.371633529663086, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.543195718654434e-07, | |
| "logits/chosen": -3.0082881450653076, | |
| "logits/rejected": -3.003408193588257, | |
| "logps/chosen": -274.6020812988281, | |
| "logps/rejected": -240.13998413085938, | |
| "loss": 0.5953, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.0502743124961853, | |
| "rewards/margins": 0.5772665739059448, | |
| "rewards/rejected": -0.6275408864021301, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.5240825688073394e-07, | |
| "logits/chosen": -3.0475857257843018, | |
| "logits/rejected": -3.0587058067321777, | |
| "logps/chosen": -345.28802490234375, | |
| "logps/rejected": -276.25018310546875, | |
| "loss": 0.559, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.6526178121566772, | |
| "rewards/margins": 1.6864182949066162, | |
| "rewards/rejected": -1.033800482749939, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.504969418960244e-07, | |
| "logits/chosen": -2.9780993461608887, | |
| "logits/rejected": -3.0339550971984863, | |
| "logps/chosen": -318.60699462890625, | |
| "logps/rejected": -363.83966064453125, | |
| "loss": 0.5182, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.3249278664588928, | |
| "rewards/margins": 1.6138547658920288, | |
| "rewards/rejected": -1.2889269590377808, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4858562691131495e-07, | |
| "logits/chosen": -3.0293617248535156, | |
| "logits/rejected": -3.0541815757751465, | |
| "logps/chosen": -355.3965759277344, | |
| "logps/rejected": -341.19097900390625, | |
| "loss": 0.5655, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.15244658291339874, | |
| "rewards/margins": 1.1955846548080444, | |
| "rewards/rejected": -1.043138027191162, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.466743119266055e-07, | |
| "logits/chosen": -2.955909252166748, | |
| "logits/rejected": -2.966557502746582, | |
| "logps/chosen": -339.918701171875, | |
| "logps/rejected": -312.85992431640625, | |
| "loss": 0.5342, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.24385514855384827, | |
| "rewards/margins": 1.2561490535736084, | |
| "rewards/rejected": -1.5000044107437134, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.44762996941896e-07, | |
| "logits/chosen": -2.9457859992980957, | |
| "logits/rejected": -2.921659231185913, | |
| "logps/chosen": -361.46905517578125, | |
| "logps/rejected": -314.6666259765625, | |
| "loss": 0.5347, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.31199535727500916, | |
| "rewards/margins": 1.5148388147354126, | |
| "rewards/rejected": -1.202843427658081, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.4285168195718655e-07, | |
| "logits/chosen": -2.9674103260040283, | |
| "logits/rejected": -2.9832658767700195, | |
| "logps/chosen": -279.5147705078125, | |
| "logps/rejected": -283.4952697753906, | |
| "loss": 0.5475, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.18109655380249023, | |
| "rewards/margins": 1.081386685371399, | |
| "rewards/rejected": -1.2624832391738892, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.40940366972477e-07, | |
| "logits/chosen": -3.038327932357788, | |
| "logits/rejected": -3.081512928009033, | |
| "logps/chosen": -282.9052429199219, | |
| "logps/rejected": -260.5687255859375, | |
| "loss": 0.5879, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0032925487030297518, | |
| "rewards/margins": 0.903986930847168, | |
| "rewards/rejected": -0.9006943702697754, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_logits/chosen": -2.9869041442871094, | |
| "eval_logits/rejected": -2.991122007369995, | |
| "eval_logps/chosen": -362.4848327636719, | |
| "eval_logps/rejected": -301.2572021484375, | |
| "eval_loss": 0.5287741422653198, | |
| "eval_rewards/accuracies": 0.7579365372657776, | |
| "eval_rewards/chosen": 0.47069627046585083, | |
| "eval_rewards/margins": 1.3998188972473145, | |
| "eval_rewards/rejected": -0.9291225075721741, | |
| "eval_runtime": 164.0279, | |
| "eval_samples_per_second": 12.193, | |
| "eval_steps_per_second": 0.384, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.3902905198776756e-07, | |
| "logits/chosen": -3.0266683101654053, | |
| "logits/rejected": -3.0426414012908936, | |
| "logps/chosen": -345.2246398925781, | |
| "logps/rejected": -280.60711669921875, | |
| "loss": 0.5172, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.3867380917072296, | |
| "rewards/margins": 1.5311682224273682, | |
| "rewards/rejected": -1.14443039894104, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.371177370030581e-07, | |
| "logits/chosen": -3.0024008750915527, | |
| "logits/rejected": -3.0336501598358154, | |
| "logps/chosen": -340.01483154296875, | |
| "logps/rejected": -288.4037170410156, | |
| "loss": 0.5674, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.09759467095136642, | |
| "rewards/margins": 1.4280248880386353, | |
| "rewards/rejected": -1.330430030822754, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.352064220183486e-07, | |
| "logits/chosen": -3.073171377182007, | |
| "logits/rejected": -3.0693984031677246, | |
| "logps/chosen": -268.47442626953125, | |
| "logps/rejected": -253.87173461914062, | |
| "loss": 0.6129, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.08323542028665543, | |
| "rewards/margins": 1.263319969177246, | |
| "rewards/rejected": -1.180084466934204, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.3329510703363915e-07, | |
| "logits/chosen": -3.1394124031066895, | |
| "logits/rejected": -3.147449493408203, | |
| "logps/chosen": -316.50323486328125, | |
| "logps/rejected": -256.6443786621094, | |
| "loss": 0.5405, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.0077073900029063225, | |
| "rewards/margins": 1.4150127172470093, | |
| "rewards/rejected": -1.407305359840393, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.313837920489297e-07, | |
| "logits/chosen": -3.015110731124878, | |
| "logits/rejected": -3.0439746379852295, | |
| "logps/chosen": -309.4215087890625, | |
| "logps/rejected": -278.88934326171875, | |
| "loss": 0.5239, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.3628634512424469, | |
| "rewards/margins": 1.4821045398712158, | |
| "rewards/rejected": -1.1192409992218018, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.2947247706422016e-07, | |
| "logits/chosen": -3.0345845222473145, | |
| "logits/rejected": -2.997607469558716, | |
| "logps/chosen": -311.18719482421875, | |
| "logps/rejected": -289.7060852050781, | |
| "loss": 0.5288, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.4103531241416931, | |
| "rewards/margins": 1.0822376012802124, | |
| "rewards/rejected": -1.4925907850265503, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.275611620795107e-07, | |
| "logits/chosen": -3.0080935955047607, | |
| "logits/rejected": -3.015535593032837, | |
| "logps/chosen": -377.9685974121094, | |
| "logps/rejected": -297.92169189453125, | |
| "loss": 0.5683, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.14489376544952393, | |
| "rewards/margins": 1.1130046844482422, | |
| "rewards/rejected": -0.9681110382080078, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.2564984709480123e-07, | |
| "logits/chosen": -3.030597448348999, | |
| "logits/rejected": -3.059508800506592, | |
| "logps/chosen": -368.32635498046875, | |
| "logps/rejected": -274.53619384765625, | |
| "loss": 0.6557, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.3344075083732605, | |
| "rewards/margins": 1.3792588710784912, | |
| "rewards/rejected": -1.0448510646820068, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.2373853211009176e-07, | |
| "logits/chosen": -3.0355846881866455, | |
| "logits/rejected": -3.0617101192474365, | |
| "logps/chosen": -310.24530029296875, | |
| "logps/rejected": -280.7437438964844, | |
| "loss": 0.5629, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.31564414501190186, | |
| "rewards/margins": 1.532622218132019, | |
| "rewards/rejected": -1.2169779539108276, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2182721712538224e-07, | |
| "logits/chosen": -3.029533863067627, | |
| "logits/rejected": -3.05369234085083, | |
| "logps/chosen": -370.49945068359375, | |
| "logps/rejected": -285.1793212890625, | |
| "loss": 0.6773, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.3230968117713928, | |
| "rewards/margins": 1.0617311000823975, | |
| "rewards/rejected": -0.7386342287063599, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_logits/chosen": -3.041776180267334, | |
| "eval_logits/rejected": -3.0563852787017822, | |
| "eval_logps/chosen": -366.7193603515625, | |
| "eval_logps/rejected": -301.1505432128906, | |
| "eval_loss": 0.585310697555542, | |
| "eval_rewards/accuracies": 0.7460317611694336, | |
| "eval_rewards/chosen": 0.047242674976587296, | |
| "eval_rewards/margins": 0.9657005667686462, | |
| "eval_rewards/rejected": -0.9184578657150269, | |
| "eval_runtime": 164.258, | |
| "eval_samples_per_second": 12.176, | |
| "eval_steps_per_second": 0.384, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.199159021406727e-07, | |
| "logits/chosen": -2.963630199432373, | |
| "logits/rejected": -3.031212329864502, | |
| "logps/chosen": -297.58990478515625, | |
| "logps/rejected": -283.17572021484375, | |
| "loss": 0.6067, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.13306304812431335, | |
| "rewards/margins": 1.0036863088607788, | |
| "rewards/rejected": -0.8706234097480774, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1800458715596325e-07, | |
| "logits/chosen": -3.0382869243621826, | |
| "logits/rejected": -3.0224924087524414, | |
| "logps/chosen": -373.01947021484375, | |
| "logps/rejected": -315.932861328125, | |
| "loss": 0.6166, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.028559958562254906, | |
| "rewards/margins": 0.9609702825546265, | |
| "rewards/rejected": -0.9324103593826294, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.160932721712538e-07, | |
| "logits/chosen": -3.0072388648986816, | |
| "logits/rejected": -3.0005228519439697, | |
| "logps/chosen": -340.4766540527344, | |
| "logps/rejected": -306.3741149902344, | |
| "loss": 0.6079, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.09662823379039764, | |
| "rewards/margins": 1.1530828475952148, | |
| "rewards/rejected": -1.0564546585083008, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.141819571865443e-07, | |
| "logits/chosen": -2.9518847465515137, | |
| "logits/rejected": -2.9550204277038574, | |
| "logps/chosen": -325.9070739746094, | |
| "logps/rejected": -244.12588500976562, | |
| "loss": 0.564, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.1386883705854416, | |
| "rewards/margins": 1.7188549041748047, | |
| "rewards/rejected": -1.5801665782928467, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.1227064220183485e-07, | |
| "logits/chosen": -2.9738943576812744, | |
| "logits/rejected": -3.009288787841797, | |
| "logps/chosen": -306.73614501953125, | |
| "logps/rejected": -284.35089111328125, | |
| "loss": 0.5213, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.16552898287773132, | |
| "rewards/margins": 1.8094953298568726, | |
| "rewards/rejected": -1.6439664363861084, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.103593272171253e-07, | |
| "logits/chosen": -2.9576098918914795, | |
| "logits/rejected": -2.9751369953155518, | |
| "logps/chosen": -336.5853576660156, | |
| "logps/rejected": -326.5455017089844, | |
| "loss": 0.5703, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.07825515419244766, | |
| "rewards/margins": 1.1398742198944092, | |
| "rewards/rejected": -1.0616191625595093, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.0844801223241586e-07, | |
| "logits/chosen": -3.0122196674346924, | |
| "logits/rejected": -2.9879307746887207, | |
| "logps/chosen": -350.8817138671875, | |
| "logps/rejected": -298.84307861328125, | |
| "loss": 0.5197, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.03767753392457962, | |
| "rewards/margins": 1.09770929813385, | |
| "rewards/rejected": -1.0600318908691406, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.065366972477064e-07, | |
| "logits/chosen": -2.9043805599212646, | |
| "logits/rejected": -2.9711837768554688, | |
| "logps/chosen": -379.1385803222656, | |
| "logps/rejected": -296.9505920410156, | |
| "loss": 0.5669, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.3532111942768097, | |
| "rewards/margins": 1.7610466480255127, | |
| "rewards/rejected": -1.407835602760315, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.046253822629969e-07, | |
| "logits/chosen": -2.9510416984558105, | |
| "logits/rejected": -2.961275100708008, | |
| "logps/chosen": -330.33673095703125, | |
| "logps/rejected": -288.71173095703125, | |
| "loss": 0.5177, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.301142156124115, | |
| "rewards/margins": 1.2936238050460815, | |
| "rewards/rejected": -0.9924817085266113, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.0271406727828745e-07, | |
| "logits/chosen": -2.991361141204834, | |
| "logits/rejected": -2.974353790283203, | |
| "logps/chosen": -322.8855895996094, | |
| "logps/rejected": -271.7654113769531, | |
| "loss": 0.5263, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.06158037111163139, | |
| "rewards/margins": 1.1184431314468384, | |
| "rewards/rejected": -1.056862711906433, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_logits/chosen": -2.963681936264038, | |
| "eval_logits/rejected": -2.966184139251709, | |
| "eval_logps/chosen": -364.9457702636719, | |
| "eval_logps/rejected": -303.87957763671875, | |
| "eval_loss": 0.5150811076164246, | |
| "eval_rewards/accuracies": 0.761904776096344, | |
| "eval_rewards/chosen": 0.22460374236106873, | |
| "eval_rewards/margins": 1.4159626960754395, | |
| "eval_rewards/rejected": -1.191359043121338, | |
| "eval_runtime": 163.931, | |
| "eval_samples_per_second": 12.2, | |
| "eval_steps_per_second": 0.384, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.00802752293578e-07, | |
| "logits/chosen": -2.974116802215576, | |
| "logits/rejected": -2.9998645782470703, | |
| "logps/chosen": -330.64910888671875, | |
| "logps/rejected": -294.6690368652344, | |
| "loss": 0.5031, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.18794824182987213, | |
| "rewards/margins": 1.2928552627563477, | |
| "rewards/rejected": -1.1049071550369263, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.9889143730886847e-07, | |
| "logits/chosen": -2.9930388927459717, | |
| "logits/rejected": -2.983773946762085, | |
| "logps/chosen": -376.36212158203125, | |
| "logps/rejected": -305.14111328125, | |
| "loss": 0.5357, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.27564454078674316, | |
| "rewards/margins": 1.5530188083648682, | |
| "rewards/rejected": -1.277374267578125, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.96980122324159e-07, | |
| "logits/chosen": -3.0382747650146484, | |
| "logits/rejected": -3.0700857639312744, | |
| "logps/chosen": -313.2106018066406, | |
| "logps/rejected": -256.5130310058594, | |
| "loss": 0.556, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.03291673585772514, | |
| "rewards/margins": 1.2974836826324463, | |
| "rewards/rejected": -1.3304002285003662, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9506880733944953e-07, | |
| "logits/chosen": -3.0589098930358887, | |
| "logits/rejected": -3.058842897415161, | |
| "logps/chosen": -304.68658447265625, | |
| "logps/rejected": -276.25177001953125, | |
| "loss": 0.5578, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.22373457252979279, | |
| "rewards/margins": 1.489611268043518, | |
| "rewards/rejected": -1.7133458852767944, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9315749235474006e-07, | |
| "logits/chosen": -3.037079334259033, | |
| "logits/rejected": -3.0386836528778076, | |
| "logps/chosen": -347.38897705078125, | |
| "logps/rejected": -334.3331298828125, | |
| "loss": 0.5433, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.15273378789424896, | |
| "rewards/margins": 1.5688612461090088, | |
| "rewards/rejected": -1.7215951681137085, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.912461773700306e-07, | |
| "logits/chosen": -2.9914333820343018, | |
| "logits/rejected": -3.013286828994751, | |
| "logps/chosen": -361.6410217285156, | |
| "logps/rejected": -342.3985900878906, | |
| "loss": 0.5464, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.03575097769498825, | |
| "rewards/margins": 1.2531265020370483, | |
| "rewards/rejected": -1.2888776063919067, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.8933486238532107e-07, | |
| "logits/chosen": -2.9541945457458496, | |
| "logits/rejected": -2.979830265045166, | |
| "logps/chosen": -424.258544921875, | |
| "logps/rejected": -299.7648620605469, | |
| "loss": 0.581, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.013498688116669655, | |
| "rewards/margins": 1.3451616764068604, | |
| "rewards/rejected": -1.331662893295288, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.874235474006116e-07, | |
| "logits/chosen": -2.9790916442871094, | |
| "logits/rejected": -2.987037181854248, | |
| "logps/chosen": -364.68048095703125, | |
| "logps/rejected": -290.4891052246094, | |
| "loss": 0.58, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.007329714484512806, | |
| "rewards/margins": 1.4278209209442139, | |
| "rewards/rejected": -1.4351506233215332, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.8551223241590214e-07, | |
| "logits/chosen": -2.986210823059082, | |
| "logits/rejected": -2.9739222526550293, | |
| "logps/chosen": -300.7494812011719, | |
| "logps/rejected": -278.2732849121094, | |
| "loss": 0.5741, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.04843021556735039, | |
| "rewards/margins": 1.3019744157791138, | |
| "rewards/rejected": -1.3504045009613037, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.8360091743119267e-07, | |
| "logits/chosen": -2.9838929176330566, | |
| "logits/rejected": -2.9902117252349854, | |
| "logps/chosen": -306.20025634765625, | |
| "logps/rejected": -289.5735168457031, | |
| "loss": 0.5366, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.058762937784194946, | |
| "rewards/margins": 1.2602014541625977, | |
| "rewards/rejected": -1.2014386653900146, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_logits/chosen": -2.9907381534576416, | |
| "eval_logits/rejected": -2.982360363006592, | |
| "eval_logps/chosen": -364.6807556152344, | |
| "eval_logps/rejected": -302.8385314941406, | |
| "eval_loss": 0.5133689641952515, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": 0.25110283493995667, | |
| "eval_rewards/margins": 1.338356614112854, | |
| "eval_rewards/rejected": -1.0872538089752197, | |
| "eval_runtime": 164.3114, | |
| "eval_samples_per_second": 12.172, | |
| "eval_steps_per_second": 0.383, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.816896024464832e-07, | |
| "logits/chosen": -3.0253748893737793, | |
| "logits/rejected": -2.9562289714813232, | |
| "logps/chosen": -281.73016357421875, | |
| "logps/rejected": -248.2506866455078, | |
| "loss": 0.5377, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.28038138151168823, | |
| "rewards/margins": 1.607208251953125, | |
| "rewards/rejected": -1.326826810836792, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.797782874617737e-07, | |
| "logits/chosen": -2.9797048568725586, | |
| "logits/rejected": -2.932326555252075, | |
| "logps/chosen": -333.2131042480469, | |
| "logps/rejected": -267.63128662109375, | |
| "loss": 0.4959, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.26507893204689026, | |
| "rewards/margins": 1.226858139038086, | |
| "rewards/rejected": -0.9617794156074524, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.778669724770642e-07, | |
| "logits/chosen": -2.9677836894989014, | |
| "logits/rejected": -2.9711012840270996, | |
| "logps/chosen": -301.1932067871094, | |
| "logps/rejected": -239.915771484375, | |
| "loss": 0.5646, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.06536471843719482, | |
| "rewards/margins": 1.3896596431732178, | |
| "rewards/rejected": -1.3242948055267334, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.7595565749235474e-07, | |
| "logits/chosen": -3.003399133682251, | |
| "logits/rejected": -2.9879281520843506, | |
| "logps/chosen": -351.9979553222656, | |
| "logps/rejected": -264.519775390625, | |
| "loss": 0.585, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.3593365252017975, | |
| "rewards/margins": 1.1577335596084595, | |
| "rewards/rejected": -0.7983969449996948, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.740443425076452e-07, | |
| "logits/chosen": -2.9658942222595215, | |
| "logits/rejected": -2.982341766357422, | |
| "logps/chosen": -336.6238708496094, | |
| "logps/rejected": -299.588134765625, | |
| "loss": 0.5176, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.2032889872789383, | |
| "rewards/margins": 1.3534172773361206, | |
| "rewards/rejected": -1.1501282453536987, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.7213302752293575e-07, | |
| "logits/chosen": -2.951019763946533, | |
| "logits/rejected": -2.985151767730713, | |
| "logps/chosen": -338.8179931640625, | |
| "logps/rejected": -283.80328369140625, | |
| "loss": 0.538, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.049173761159181595, | |
| "rewards/margins": 0.8662222623825073, | |
| "rewards/rejected": -0.8170484304428101, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.702217125382263e-07, | |
| "logits/chosen": -2.9252991676330566, | |
| "logits/rejected": -2.937505006790161, | |
| "logps/chosen": -354.7286682128906, | |
| "logps/rejected": -312.35333251953125, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.2835424542427063, | |
| "rewards/margins": 1.7692314386367798, | |
| "rewards/rejected": -1.4856891632080078, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.6831039755351677e-07, | |
| "logits/chosen": -2.975984573364258, | |
| "logits/rejected": -2.9734318256378174, | |
| "logps/chosen": -319.2844543457031, | |
| "logps/rejected": -303.3651428222656, | |
| "loss": 0.1133, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.7135050296783447, | |
| "rewards/margins": 4.7504682540893555, | |
| "rewards/rejected": -3.0369625091552734, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.663990825688073e-07, | |
| "logits/chosen": -2.899402141571045, | |
| "logits/rejected": -2.89802885055542, | |
| "logps/chosen": -288.45123291015625, | |
| "logps/rejected": -316.5885314941406, | |
| "loss": 0.1405, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.7678325176239014, | |
| "rewards/margins": 5.483719348907471, | |
| "rewards/rejected": -3.715886354446411, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.6448776758409783e-07, | |
| "logits/chosen": -2.8675971031188965, | |
| "logits/rejected": -2.89615797996521, | |
| "logps/chosen": -325.03863525390625, | |
| "logps/rejected": -333.436767578125, | |
| "loss": 0.1034, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.79555344581604, | |
| "rewards/margins": 4.759924411773682, | |
| "rewards/rejected": -2.9643709659576416, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_logits/chosen": -2.92020583152771, | |
| "eval_logits/rejected": -2.9095799922943115, | |
| "eval_logps/chosen": -364.1185302734375, | |
| "eval_logps/rejected": -306.2866516113281, | |
| "eval_loss": 0.5107486248016357, | |
| "eval_rewards/accuracies": 0.761904776096344, | |
| "eval_rewards/chosen": 0.307327002286911, | |
| "eval_rewards/margins": 1.739391803741455, | |
| "eval_rewards/rejected": -1.4320647716522217, | |
| "eval_runtime": 164.3142, | |
| "eval_samples_per_second": 12.172, | |
| "eval_steps_per_second": 0.383, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.6257645259938836e-07, | |
| "logits/chosen": -2.8848228454589844, | |
| "logits/rejected": -2.9434664249420166, | |
| "logps/chosen": -304.1281433105469, | |
| "logps/rejected": -323.9388732910156, | |
| "loss": 0.0912, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.5237512588500977, | |
| "rewards/margins": 5.300021171569824, | |
| "rewards/rejected": -3.7762699127197266, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.606651376146789e-07, | |
| "logits/chosen": -2.818145275115967, | |
| "logits/rejected": -2.773864269256592, | |
| "logps/chosen": -315.73687744140625, | |
| "logps/rejected": -252.3991241455078, | |
| "loss": 0.1072, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.2391575574874878, | |
| "rewards/margins": 4.052863121032715, | |
| "rewards/rejected": -2.8137052059173584, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.5875382262996937e-07, | |
| "logits/chosen": -2.8956587314605713, | |
| "logits/rejected": -2.88509202003479, | |
| "logps/chosen": -332.889404296875, | |
| "logps/rejected": -375.0550231933594, | |
| "loss": 0.0854, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.7886276245117188, | |
| "rewards/margins": 4.777144432067871, | |
| "rewards/rejected": -2.9885172843933105, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.568425076452599e-07, | |
| "logits/chosen": -2.9382426738739014, | |
| "logits/rejected": -2.9390716552734375, | |
| "logps/chosen": -339.12451171875, | |
| "logps/rejected": -315.15625, | |
| "loss": 0.099, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 1.3939841985702515, | |
| "rewards/margins": 4.764640808105469, | |
| "rewards/rejected": -3.370656967163086, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.5493119266055044e-07, | |
| "logits/chosen": -2.8407671451568604, | |
| "logits/rejected": -2.821763753890991, | |
| "logps/chosen": -336.37298583984375, | |
| "logps/rejected": -257.6861267089844, | |
| "loss": 0.1132, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.3384716510772705, | |
| "rewards/margins": 4.965681076049805, | |
| "rewards/rejected": -3.627209424972534, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.5301987767584097e-07, | |
| "logits/chosen": -2.8167824745178223, | |
| "logits/rejected": -2.810854434967041, | |
| "logps/chosen": -323.439208984375, | |
| "logps/rejected": -342.47991943359375, | |
| "loss": 0.2041, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.7954254150390625, | |
| "rewards/margins": 4.722014427185059, | |
| "rewards/rejected": -3.9265894889831543, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.511085626911315e-07, | |
| "logits/chosen": -2.940957546234131, | |
| "logits/rejected": -3.0021321773529053, | |
| "logps/chosen": -366.2899475097656, | |
| "logps/rejected": -343.2218933105469, | |
| "loss": 0.3299, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.4886529445648193, | |
| "rewards/margins": 5.359461307525635, | |
| "rewards/rejected": -3.870807647705078, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.49197247706422e-07, | |
| "logits/chosen": -2.933786392211914, | |
| "logits/rejected": -2.906247615814209, | |
| "logps/chosen": -261.5579833984375, | |
| "logps/rejected": -276.83026123046875, | |
| "loss": 0.149, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.48736995458602905, | |
| "rewards/margins": 4.608451843261719, | |
| "rewards/rejected": -4.121081829071045, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.472859327217125e-07, | |
| "logits/chosen": -2.8349316120147705, | |
| "logits/rejected": -2.9043667316436768, | |
| "logps/chosen": -364.6941833496094, | |
| "logps/rejected": -376.5315856933594, | |
| "loss": 0.1092, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.2117881774902344, | |
| "rewards/margins": 5.32895565032959, | |
| "rewards/rejected": -4.117166996002197, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.4537461773700304e-07, | |
| "logits/chosen": -2.985729694366455, | |
| "logits/rejected": -2.8761606216430664, | |
| "logps/chosen": -258.05841064453125, | |
| "logps/rejected": -241.39053344726562, | |
| "loss": 0.1114, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.6233962774276733, | |
| "rewards/margins": 4.293813228607178, | |
| "rewards/rejected": -3.6704165935516357, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_logits/chosen": -2.9666378498077393, | |
| "eval_logits/rejected": -2.95609450340271, | |
| "eval_logps/chosen": -365.8598327636719, | |
| "eval_logps/rejected": -310.414794921875, | |
| "eval_loss": 0.534447431564331, | |
| "eval_rewards/accuracies": 0.7460317611694336, | |
| "eval_rewards/chosen": 0.1331927627325058, | |
| "eval_rewards/margins": 1.9780747890472412, | |
| "eval_rewards/rejected": -1.8448821306228638, | |
| "eval_runtime": 164.1399, | |
| "eval_samples_per_second": 12.185, | |
| "eval_steps_per_second": 0.384, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.434633027522936e-07, | |
| "logits/chosen": -2.9507124423980713, | |
| "logits/rejected": -2.9483211040496826, | |
| "logps/chosen": -338.0868835449219, | |
| "logps/rejected": -325.01483154296875, | |
| "loss": 0.1007, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.6764055490493774, | |
| "rewards/margins": 5.684920310974121, | |
| "rewards/rejected": -4.008514404296875, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.415519877675841e-07, | |
| "logits/chosen": -2.976590156555176, | |
| "logits/rejected": -3.025784730911255, | |
| "logps/chosen": -277.34710693359375, | |
| "logps/rejected": -323.576171875, | |
| "loss": 0.1131, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.841607928276062, | |
| "rewards/margins": 4.658609867095947, | |
| "rewards/rejected": -3.8170018196105957, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.3964067278287464e-07, | |
| "logits/chosen": -3.0445570945739746, | |
| "logits/rejected": -3.0413312911987305, | |
| "logps/chosen": -337.9605407714844, | |
| "logps/rejected": -288.26666259765625, | |
| "loss": 0.1463, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.5894230604171753, | |
| "rewards/margins": 5.3310723304748535, | |
| "rewards/rejected": -3.7416489124298096, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.377293577981651e-07, | |
| "logits/chosen": -2.9471421241760254, | |
| "logits/rejected": -2.9865708351135254, | |
| "logps/chosen": -288.2189025878906, | |
| "logps/rejected": -309.2388610839844, | |
| "loss": 0.0934, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.4304211139678955, | |
| "rewards/margins": 5.832246780395508, | |
| "rewards/rejected": -4.401825428009033, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.3581804281345565e-07, | |
| "logits/chosen": -2.9803059101104736, | |
| "logits/rejected": -2.9711978435516357, | |
| "logps/chosen": -337.70697021484375, | |
| "logps/rejected": -298.4077453613281, | |
| "loss": 0.0967, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.4117494821548462, | |
| "rewards/margins": 5.474527359008789, | |
| "rewards/rejected": -4.062777519226074, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.339067278287462e-07, | |
| "logits/chosen": -2.8603241443634033, | |
| "logits/rejected": -2.8709046840667725, | |
| "logps/chosen": -312.73504638671875, | |
| "logps/rejected": -306.9026794433594, | |
| "loss": 0.0785, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.3839662075042725, | |
| "rewards/margins": 5.900813102722168, | |
| "rewards/rejected": -4.516847133636475, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.319954128440367e-07, | |
| "logits/chosen": -2.975525379180908, | |
| "logits/rejected": -2.9611260890960693, | |
| "logps/chosen": -325.86163330078125, | |
| "logps/rejected": -285.2755432128906, | |
| "loss": 0.0992, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.0602104663848877, | |
| "rewards/margins": 4.849926948547363, | |
| "rewards/rejected": -3.7897167205810547, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.3008409785932725e-07, | |
| "logits/chosen": -2.877586841583252, | |
| "logits/rejected": -2.821748971939087, | |
| "logps/chosen": -324.6281433105469, | |
| "logps/rejected": -323.02301025390625, | |
| "loss": 0.0892, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.1957648992538452, | |
| "rewards/margins": 5.952631950378418, | |
| "rewards/rejected": -4.756867408752441, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2817278287461773e-07, | |
| "logits/chosen": -2.9451098442077637, | |
| "logits/rejected": -2.9684863090515137, | |
| "logps/chosen": -279.90216064453125, | |
| "logps/rejected": -338.3842468261719, | |
| "loss": 0.1045, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.2665460109710693, | |
| "rewards/margins": 5.084068298339844, | |
| "rewards/rejected": -3.8175220489501953, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.262614678899082e-07, | |
| "logits/chosen": -2.889819383621216, | |
| "logits/rejected": -2.9235751628875732, | |
| "logps/chosen": -303.02838134765625, | |
| "logps/rejected": -356.177734375, | |
| "loss": 0.1338, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.8252049684524536, | |
| "rewards/margins": 5.507418155670166, | |
| "rewards/rejected": -4.6822123527526855, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_logits/chosen": -2.9508416652679443, | |
| "eval_logits/rejected": -2.9459922313690186, | |
| "eval_logps/chosen": -368.0057678222656, | |
| "eval_logps/rejected": -313.3835144042969, | |
| "eval_loss": 0.534950315952301, | |
| "eval_rewards/accuracies": 0.773809552192688, | |
| "eval_rewards/chosen": -0.0813969075679779, | |
| "eval_rewards/margins": 2.0603599548339844, | |
| "eval_rewards/rejected": -2.141756772994995, | |
| "eval_runtime": 164.0736, | |
| "eval_samples_per_second": 12.19, | |
| "eval_steps_per_second": 0.384, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2435015290519874e-07, | |
| "logits/chosen": -2.916611671447754, | |
| "logits/rejected": -2.927777051925659, | |
| "logps/chosen": -283.2217712402344, | |
| "logps/rejected": -297.02850341796875, | |
| "loss": 0.0893, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.0804212093353271, | |
| "rewards/margins": 5.549715518951416, | |
| "rewards/rejected": -4.469293594360352, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.2243883792048927e-07, | |
| "logits/chosen": -2.977875232696533, | |
| "logits/rejected": -2.986704111099243, | |
| "logps/chosen": -335.274658203125, | |
| "logps/rejected": -380.4412536621094, | |
| "loss": 0.1303, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.4366910457611084, | |
| "rewards/margins": 5.666425704956055, | |
| "rewards/rejected": -4.229735374450684, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.205275229357798e-07, | |
| "logits/chosen": -2.868638753890991, | |
| "logits/rejected": -2.8948395252227783, | |
| "logps/chosen": -387.9947204589844, | |
| "logps/rejected": -389.3511657714844, | |
| "loss": 0.1117, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.3278411626815796, | |
| "rewards/margins": 5.698910236358643, | |
| "rewards/rejected": -4.371068954467773, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.186162079510703e-07, | |
| "logits/chosen": -2.9128642082214355, | |
| "logits/rejected": -2.91692852973938, | |
| "logps/chosen": -351.5616149902344, | |
| "logps/rejected": -373.9852600097656, | |
| "loss": 0.1466, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.3745101690292358, | |
| "rewards/margins": 5.025930404663086, | |
| "rewards/rejected": -3.6514201164245605, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.167048929663608e-07, | |
| "logits/chosen": -2.955967426300049, | |
| "logits/rejected": -2.923954486846924, | |
| "logps/chosen": -278.7707824707031, | |
| "logps/rejected": -281.9942321777344, | |
| "loss": 0.1003, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 1.0149275064468384, | |
| "rewards/margins": 5.51505184173584, | |
| "rewards/rejected": -4.500124931335449, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.1479357798165134e-07, | |
| "logits/chosen": -2.9661002159118652, | |
| "logits/rejected": -2.948564052581787, | |
| "logps/chosen": -339.5476989746094, | |
| "logps/rejected": -321.3616638183594, | |
| "loss": 0.0984, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.038356065750122, | |
| "rewards/margins": 5.16934061050415, | |
| "rewards/rejected": -4.130984306335449, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.128822629969419e-07, | |
| "logits/chosen": -2.862750291824341, | |
| "logits/rejected": -2.8853306770324707, | |
| "logps/chosen": -350.9757995605469, | |
| "logps/rejected": -333.6067199707031, | |
| "loss": 0.1195, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.147101640701294, | |
| "rewards/margins": 5.550149917602539, | |
| "rewards/rejected": -4.403048038482666, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.109709480122324e-07, | |
| "logits/chosen": -2.9103734493255615, | |
| "logits/rejected": -2.9115426540374756, | |
| "logps/chosen": -286.4703063964844, | |
| "logps/rejected": -298.028076171875, | |
| "loss": 0.1039, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.1303044557571411, | |
| "rewards/margins": 5.467093467712402, | |
| "rewards/rejected": -4.336789131164551, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.0905963302752294e-07, | |
| "logits/chosen": -2.7934536933898926, | |
| "logits/rejected": -2.880432605743408, | |
| "logps/chosen": -312.3811340332031, | |
| "logps/rejected": -316.52215576171875, | |
| "loss": 0.1168, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.9917739629745483, | |
| "rewards/margins": 5.529503345489502, | |
| "rewards/rejected": -4.537729263305664, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.071483180428134e-07, | |
| "logits/chosen": -2.9836788177490234, | |
| "logits/rejected": -2.9340128898620605, | |
| "logps/chosen": -382.5011291503906, | |
| "logps/rejected": -295.3705749511719, | |
| "loss": 0.0979, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.5801069736480713, | |
| "rewards/margins": 5.381975173950195, | |
| "rewards/rejected": -3.801867723464966, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_logits/chosen": -2.9200918674468994, | |
| "eval_logits/rejected": -2.9171833992004395, | |
| "eval_logps/chosen": -368.1370849609375, | |
| "eval_logps/rejected": -314.4656982421875, | |
| "eval_loss": 0.5474238991737366, | |
| "eval_rewards/accuracies": 0.7658730149269104, | |
| "eval_rewards/chosen": -0.09453116357326508, | |
| "eval_rewards/margins": 2.1554412841796875, | |
| "eval_rewards/rejected": -2.249972343444824, | |
| "eval_runtime": 164.7724, | |
| "eval_samples_per_second": 12.138, | |
| "eval_steps_per_second": 0.382, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.0523700305810395e-07, | |
| "logits/chosen": -2.9245269298553467, | |
| "logits/rejected": -2.9436841011047363, | |
| "logps/chosen": -330.574951171875, | |
| "logps/rejected": -342.8641662597656, | |
| "loss": 0.1043, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.3053131103515625, | |
| "rewards/margins": 5.542339324951172, | |
| "rewards/rejected": -4.237026214599609, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.033256880733945e-07, | |
| "logits/chosen": -2.9248242378234863, | |
| "logits/rejected": -2.935176372528076, | |
| "logps/chosen": -286.57171630859375, | |
| "logps/rejected": -278.96746826171875, | |
| "loss": 0.104, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.7352786660194397, | |
| "rewards/margins": 5.138430118560791, | |
| "rewards/rejected": -4.403151035308838, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.01414373088685e-07, | |
| "logits/chosen": -2.8516454696655273, | |
| "logits/rejected": -2.7985987663269043, | |
| "logps/chosen": -344.3554382324219, | |
| "logps/rejected": -341.85986328125, | |
| "loss": 0.1138, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.1177431344985962, | |
| "rewards/margins": 5.806307792663574, | |
| "rewards/rejected": -4.688565254211426, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.9950305810397555e-07, | |
| "logits/chosen": -2.906580686569214, | |
| "logits/rejected": -2.97481369972229, | |
| "logps/chosen": -335.2439880371094, | |
| "logps/rejected": -320.96929931640625, | |
| "loss": 0.1256, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.8055500984191895, | |
| "rewards/margins": 6.211544990539551, | |
| "rewards/rejected": -5.405994892120361, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9759174311926603e-07, | |
| "logits/chosen": -2.9623026847839355, | |
| "logits/rejected": -2.9445879459381104, | |
| "logps/chosen": -323.3135070800781, | |
| "logps/rejected": -329.90496826171875, | |
| "loss": 0.1101, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.9566561579704285, | |
| "rewards/margins": 7.040016174316406, | |
| "rewards/rejected": -6.083359718322754, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.9568042813455656e-07, | |
| "logits/chosen": -2.8847999572753906, | |
| "logits/rejected": -2.8803889751434326, | |
| "logps/chosen": -342.5070495605469, | |
| "logps/rejected": -269.67431640625, | |
| "loss": 0.1087, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.4027617573738098, | |
| "rewards/margins": 4.224934101104736, | |
| "rewards/rejected": -3.8221726417541504, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.937691131498471e-07, | |
| "logits/chosen": -2.949441909790039, | |
| "logits/rejected": -2.9045028686523438, | |
| "logps/chosen": -338.4786376953125, | |
| "logps/rejected": -334.69189453125, | |
| "loss": 0.1096, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.9296592473983765, | |
| "rewards/margins": 5.884530067443848, | |
| "rewards/rejected": -4.954870700836182, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.918577981651376e-07, | |
| "logits/chosen": -2.932290554046631, | |
| "logits/rejected": -2.9427378177642822, | |
| "logps/chosen": -280.7291564941406, | |
| "logps/rejected": -314.51953125, | |
| "loss": 0.1177, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.7406275868415833, | |
| "rewards/margins": 5.4294962882995605, | |
| "rewards/rejected": -4.688868522644043, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.8994648318042816e-07, | |
| "logits/chosen": -2.9206976890563965, | |
| "logits/rejected": -2.9712460041046143, | |
| "logps/chosen": -358.7654113769531, | |
| "logps/rejected": -313.7131652832031, | |
| "loss": 0.112, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.8611236810684204, | |
| "rewards/margins": 5.0603203773498535, | |
| "rewards/rejected": -4.199196815490723, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.8803516819571863e-07, | |
| "logits/chosen": -2.979775905609131, | |
| "logits/rejected": -2.9886953830718994, | |
| "logps/chosen": -379.1478576660156, | |
| "logps/rejected": -358.28179931640625, | |
| "loss": 0.1366, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.4315125942230225, | |
| "rewards/margins": 6.2576189041137695, | |
| "rewards/rejected": -4.826106071472168, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_logits/chosen": -2.9143617153167725, | |
| "eval_logits/rejected": -2.9134304523468018, | |
| "eval_logps/chosen": -371.9402770996094, | |
| "eval_logps/rejected": -315.933837890625, | |
| "eval_loss": 0.5439518094062805, | |
| "eval_rewards/accuracies": 0.7579365372657776, | |
| "eval_rewards/chosen": -0.47485068440437317, | |
| "eval_rewards/margins": 1.921934962272644, | |
| "eval_rewards/rejected": -2.3967857360839844, | |
| "eval_runtime": 165.1605, | |
| "eval_samples_per_second": 12.109, | |
| "eval_steps_per_second": 0.381, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.8612385321100917e-07, | |
| "logits/chosen": -2.8306632041931152, | |
| "logits/rejected": -2.9071240425109863, | |
| "logps/chosen": -294.634033203125, | |
| "logps/rejected": -327.87896728515625, | |
| "loss": 0.1281, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.9478427171707153, | |
| "rewards/margins": 5.661940097808838, | |
| "rewards/rejected": -4.714097499847412, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.842125382262997e-07, | |
| "logits/chosen": -2.9503073692321777, | |
| "logits/rejected": -2.9379420280456543, | |
| "logps/chosen": -308.3216247558594, | |
| "logps/rejected": -308.57574462890625, | |
| "loss": 0.1361, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.2659790515899658, | |
| "rewards/margins": 5.583965301513672, | |
| "rewards/rejected": -4.317985534667969, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.8230122324159023e-07, | |
| "logits/chosen": -2.8611526489257812, | |
| "logits/rejected": -2.9008944034576416, | |
| "logps/chosen": -375.9707946777344, | |
| "logps/rejected": -374.29913330078125, | |
| "loss": 0.1194, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.3691354990005493, | |
| "rewards/margins": 6.1049299240112305, | |
| "rewards/rejected": -4.735795021057129, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.8038990825688076e-07, | |
| "logits/chosen": -2.994868516921997, | |
| "logits/rejected": -2.9603443145751953, | |
| "logps/chosen": -263.36474609375, | |
| "logps/rejected": -250.1201934814453, | |
| "loss": 0.1098, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.8430646657943726, | |
| "rewards/margins": 4.541081428527832, | |
| "rewards/rejected": -3.698017120361328, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.784785932721712e-07, | |
| "logits/chosen": -2.9225330352783203, | |
| "logits/rejected": -2.925787925720215, | |
| "logps/chosen": -319.07574462890625, | |
| "logps/rejected": -327.4895324707031, | |
| "loss": 0.1336, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.1656725406646729, | |
| "rewards/margins": 5.621832847595215, | |
| "rewards/rejected": -4.456160068511963, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.765672782874617e-07, | |
| "logits/chosen": -2.896669864654541, | |
| "logits/rejected": -2.895914316177368, | |
| "logps/chosen": -331.0616760253906, | |
| "logps/rejected": -286.6056213378906, | |
| "loss": 0.1367, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.3613170385360718, | |
| "rewards/margins": 5.62969970703125, | |
| "rewards/rejected": -4.268383026123047, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.7465596330275225e-07, | |
| "logits/chosen": -2.955178737640381, | |
| "logits/rejected": -2.9608724117279053, | |
| "logps/chosen": -350.20703125, | |
| "logps/rejected": -255.1401824951172, | |
| "loss": 0.104, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.233802080154419, | |
| "rewards/margins": 5.684638023376465, | |
| "rewards/rejected": -4.450836658477783, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.727446483180428e-07, | |
| "logits/chosen": -2.9283223152160645, | |
| "logits/rejected": -2.952641010284424, | |
| "logps/chosen": -313.20306396484375, | |
| "logps/rejected": -316.35333251953125, | |
| "loss": 0.1125, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.9838566780090332, | |
| "rewards/margins": 5.377806186676025, | |
| "rewards/rejected": -4.393948554992676, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.708333333333333e-07, | |
| "logits/chosen": -2.850559949874878, | |
| "logits/rejected": -2.833322048187256, | |
| "logps/chosen": -341.3831481933594, | |
| "logps/rejected": -314.9398498535156, | |
| "loss": 0.0943, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.7250410914421082, | |
| "rewards/margins": 5.533560276031494, | |
| "rewards/rejected": -4.808518409729004, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6892201834862385e-07, | |
| "logits/chosen": -2.937903642654419, | |
| "logits/rejected": -2.9050183296203613, | |
| "logps/chosen": -326.52691650390625, | |
| "logps/rejected": -302.30694580078125, | |
| "loss": 0.1042, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.6643081903457642, | |
| "rewards/margins": 5.094948768615723, | |
| "rewards/rejected": -4.43064022064209, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_logits/chosen": -2.930583953857422, | |
| "eval_logits/rejected": -2.936053991317749, | |
| "eval_logps/chosen": -372.2054138183594, | |
| "eval_logps/rejected": -318.7686462402344, | |
| "eval_loss": 0.552377462387085, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": -0.5013648867607117, | |
| "eval_rewards/margins": 2.178898334503174, | |
| "eval_rewards/rejected": -2.6802632808685303, | |
| "eval_runtime": 167.7329, | |
| "eval_samples_per_second": 11.924, | |
| "eval_steps_per_second": 0.376, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6701070336391433e-07, | |
| "logits/chosen": -2.90950345993042, | |
| "logits/rejected": -2.87695050239563, | |
| "logps/chosen": -378.1885070800781, | |
| "logps/rejected": -322.77337646484375, | |
| "loss": 0.0924, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.4327830076217651, | |
| "rewards/margins": 5.619394779205322, | |
| "rewards/rejected": -4.186612129211426, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6509938837920486e-07, | |
| "logits/chosen": -2.849907398223877, | |
| "logits/rejected": -2.8833765983581543, | |
| "logps/chosen": -289.51605224609375, | |
| "logps/rejected": -320.0068054199219, | |
| "loss": 0.0975, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.8192381858825684, | |
| "rewards/margins": 4.99267053604126, | |
| "rewards/rejected": -4.173432350158691, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.631880733944954e-07, | |
| "logits/chosen": -2.964118719100952, | |
| "logits/rejected": -2.984459400177002, | |
| "logps/chosen": -303.44866943359375, | |
| "logps/rejected": -299.94482421875, | |
| "loss": 0.105, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.37635737657546997, | |
| "rewards/margins": 4.797235488891602, | |
| "rewards/rejected": -4.420877933502197, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.612767584097859e-07, | |
| "logits/chosen": -2.9242002964019775, | |
| "logits/rejected": -2.9575366973876953, | |
| "logps/chosen": -308.75616455078125, | |
| "logps/rejected": -282.21380615234375, | |
| "loss": 0.1016, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.4492263793945312, | |
| "rewards/margins": 5.21218204498291, | |
| "rewards/rejected": -3.7629554271698, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5936544342507646e-07, | |
| "logits/chosen": -2.902669668197632, | |
| "logits/rejected": -2.932953357696533, | |
| "logps/chosen": -306.1797790527344, | |
| "logps/rejected": -315.36700439453125, | |
| "loss": 0.1412, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.0359132289886475, | |
| "rewards/margins": 5.480694770812988, | |
| "rewards/rejected": -4.444781303405762, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5745412844036693e-07, | |
| "logits/chosen": -2.862687110900879, | |
| "logits/rejected": -2.9322876930236816, | |
| "logps/chosen": -361.41583251953125, | |
| "logps/rejected": -309.0520935058594, | |
| "loss": 0.1228, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.3139979839324951, | |
| "rewards/margins": 5.389029026031494, | |
| "rewards/rejected": -4.07503080368042, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5554281345565747e-07, | |
| "logits/chosen": -2.937886953353882, | |
| "logits/rejected": -2.9431166648864746, | |
| "logps/chosen": -312.0148620605469, | |
| "logps/rejected": -329.90863037109375, | |
| "loss": 0.1005, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.7804427146911621, | |
| "rewards/margins": 5.692571640014648, | |
| "rewards/rejected": -4.912128925323486, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.53631498470948e-07, | |
| "logits/chosen": -2.972658634185791, | |
| "logits/rejected": -2.9699690341949463, | |
| "logps/chosen": -347.6422424316406, | |
| "logps/rejected": -312.10858154296875, | |
| "loss": 0.0857, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.3729878664016724, | |
| "rewards/margins": 5.7400736808776855, | |
| "rewards/rejected": -4.367085933685303, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.5172018348623853e-07, | |
| "logits/chosen": -2.8885016441345215, | |
| "logits/rejected": -2.9170756340026855, | |
| "logps/chosen": -355.39813232421875, | |
| "logps/rejected": -336.2825927734375, | |
| "loss": 0.0852, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.1573688983917236, | |
| "rewards/margins": 5.676226615905762, | |
| "rewards/rejected": -4.518857479095459, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.4980886850152906e-07, | |
| "logits/chosen": -2.895519256591797, | |
| "logits/rejected": -2.851107120513916, | |
| "logps/chosen": -341.70904541015625, | |
| "logps/rejected": -305.86480712890625, | |
| "loss": 0.1313, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.0455483198165894, | |
| "rewards/margins": 5.084899425506592, | |
| "rewards/rejected": -4.039351463317871, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_logits/chosen": -2.8998661041259766, | |
| "eval_logits/rejected": -2.9059910774230957, | |
| "eval_logps/chosen": -369.42547607421875, | |
| "eval_logps/rejected": -313.8333435058594, | |
| "eval_loss": 0.5333446264266968, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -0.22337232530117035, | |
| "eval_rewards/margins": 1.9633642435073853, | |
| "eval_rewards/rejected": -2.1867363452911377, | |
| "eval_runtime": 167.9925, | |
| "eval_samples_per_second": 11.905, | |
| "eval_steps_per_second": 0.375, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.478975535168196e-07, | |
| "logits/chosen": -2.892448663711548, | |
| "logits/rejected": -2.8929343223571777, | |
| "logps/chosen": -341.67431640625, | |
| "logps/rejected": -287.61383056640625, | |
| "loss": 0.1044, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.8163628578186035, | |
| "rewards/margins": 4.475451469421387, | |
| "rewards/rejected": -3.659088611602783, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.459862385321101e-07, | |
| "logits/chosen": -2.9265084266662598, | |
| "logits/rejected": -2.9548892974853516, | |
| "logps/chosen": -382.5340576171875, | |
| "logps/rejected": -347.4888916015625, | |
| "loss": 0.1018, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.6915686130523682, | |
| "rewards/margins": 6.067580223083496, | |
| "rewards/rejected": -4.376010894775391, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.440749235474006e-07, | |
| "logits/chosen": -2.8851680755615234, | |
| "logits/rejected": -2.903552532196045, | |
| "logps/chosen": -342.8496398925781, | |
| "logps/rejected": -317.72845458984375, | |
| "loss": 0.1005, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.0220366716384888, | |
| "rewards/margins": 5.634666442871094, | |
| "rewards/rejected": -4.6126298904418945, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.421636085626911e-07, | |
| "logits/chosen": -2.8142755031585693, | |
| "logits/rejected": -2.8399770259857178, | |
| "logps/chosen": -344.5146484375, | |
| "logps/rejected": -301.95928955078125, | |
| "loss": 0.0981, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.6624820232391357, | |
| "rewards/margins": 6.2719035148620605, | |
| "rewards/rejected": -4.6094207763671875, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.402522935779816e-07, | |
| "logits/chosen": -2.8734793663024902, | |
| "logits/rejected": -2.876209259033203, | |
| "logps/chosen": -339.1289367675781, | |
| "logps/rejected": -351.3002014160156, | |
| "loss": 0.1207, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.1800963878631592, | |
| "rewards/margins": 5.746790409088135, | |
| "rewards/rejected": -4.566694736480713, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.3834097859327215e-07, | |
| "logits/chosen": -2.922632932662964, | |
| "logits/rejected": -2.973679780960083, | |
| "logps/chosen": -285.7434997558594, | |
| "logps/rejected": -304.81536865234375, | |
| "loss": 0.1184, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.9965537786483765, | |
| "rewards/margins": 5.077877998352051, | |
| "rewards/rejected": -4.081325054168701, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3642966360856268e-07, | |
| "logits/chosen": -2.9541144371032715, | |
| "logits/rejected": -2.929344654083252, | |
| "logps/chosen": -326.902587890625, | |
| "logps/rejected": -306.6372985839844, | |
| "loss": 0.0972, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.7346267700195312, | |
| "rewards/margins": 5.666862964630127, | |
| "rewards/rejected": -3.9322357177734375, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.345183486238532e-07, | |
| "logits/chosen": -2.857109785079956, | |
| "logits/rejected": -2.8801960945129395, | |
| "logps/chosen": -308.47369384765625, | |
| "logps/rejected": -375.78692626953125, | |
| "loss": 0.0899, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.088503122329712, | |
| "rewards/margins": 5.432967185974121, | |
| "rewards/rejected": -4.344464302062988, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.3260703363914372e-07, | |
| "logits/chosen": -2.9087703227996826, | |
| "logits/rejected": -2.9551265239715576, | |
| "logps/chosen": -381.01959228515625, | |
| "logps/rejected": -338.1856994628906, | |
| "loss": 0.105, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.9901115894317627, | |
| "rewards/margins": 5.308182716369629, | |
| "rewards/rejected": -3.318070888519287, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3069571865443425e-07, | |
| "logits/chosen": -2.9682905673980713, | |
| "logits/rejected": -2.9819796085357666, | |
| "logps/chosen": -277.3031921386719, | |
| "logps/rejected": -325.71649169921875, | |
| "loss": 0.1629, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.5376863479614258, | |
| "rewards/margins": 5.757152557373047, | |
| "rewards/rejected": -5.219466209411621, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_logits/chosen": -2.909576654434204, | |
| "eval_logits/rejected": -2.9181904792785645, | |
| "eval_logps/chosen": -371.09588623046875, | |
| "eval_logps/rejected": -319.5571594238281, | |
| "eval_loss": 0.5655122399330139, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -0.3904118835926056, | |
| "eval_rewards/margins": 2.3687071800231934, | |
| "eval_rewards/rejected": -2.7591187953948975, | |
| "eval_runtime": 164.0305, | |
| "eval_samples_per_second": 12.193, | |
| "eval_steps_per_second": 0.384, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.2878440366972476e-07, | |
| "logits/chosen": -2.9808902740478516, | |
| "logits/rejected": -2.9869067668914795, | |
| "logps/chosen": -326.5906677246094, | |
| "logps/rejected": -384.11944580078125, | |
| "loss": 0.0867, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.5069032311439514, | |
| "rewards/margins": 5.799986362457275, | |
| "rewards/rejected": -5.2930827140808105, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.268730886850153e-07, | |
| "logits/chosen": -2.8385868072509766, | |
| "logits/rejected": -2.9057114124298096, | |
| "logps/chosen": -325.4120178222656, | |
| "logps/rejected": -309.59136962890625, | |
| "loss": 0.0989, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 1.1605224609375, | |
| "rewards/margins": 5.591654300689697, | |
| "rewards/rejected": -4.431131362915039, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.249617737003058e-07, | |
| "logits/chosen": -2.8839237689971924, | |
| "logits/rejected": -2.8796088695526123, | |
| "logps/chosen": -352.14886474609375, | |
| "logps/rejected": -371.3978576660156, | |
| "loss": 0.1089, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.0480362176895142, | |
| "rewards/margins": 5.741724967956543, | |
| "rewards/rejected": -4.693687915802002, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2305045871559633e-07, | |
| "logits/chosen": -2.883430242538452, | |
| "logits/rejected": -2.8605690002441406, | |
| "logps/chosen": -345.23272705078125, | |
| "logps/rejected": -331.52325439453125, | |
| "loss": 0.1311, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.180057168006897, | |
| "rewards/margins": 5.705449104309082, | |
| "rewards/rejected": -4.525391578674316, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.2113914373088686e-07, | |
| "logits/chosen": -2.962857484817505, | |
| "logits/rejected": -2.9696333408355713, | |
| "logps/chosen": -377.6351623535156, | |
| "logps/rejected": -362.8825378417969, | |
| "loss": 0.1067, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.8095831871032715, | |
| "rewards/margins": 5.408170700073242, | |
| "rewards/rejected": -4.598587512969971, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.1922782874617736e-07, | |
| "logits/chosen": -2.9029316902160645, | |
| "logits/rejected": -2.9539952278137207, | |
| "logps/chosen": -289.267822265625, | |
| "logps/rejected": -366.2077941894531, | |
| "loss": 0.0929, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.47237950563430786, | |
| "rewards/margins": 6.199611186981201, | |
| "rewards/rejected": -5.727231502532959, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1731651376146787e-07, | |
| "logits/chosen": -2.8582608699798584, | |
| "logits/rejected": -2.8988289833068848, | |
| "logps/chosen": -277.98406982421875, | |
| "logps/rejected": -328.0066833496094, | |
| "loss": 0.1048, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.7530109286308289, | |
| "rewards/margins": 5.7497334480285645, | |
| "rewards/rejected": -4.99672269821167, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.154051987767584e-07, | |
| "logits/chosen": -2.9023542404174805, | |
| "logits/rejected": -2.9242827892303467, | |
| "logps/chosen": -314.0538330078125, | |
| "logps/rejected": -299.74420166015625, | |
| "loss": 0.12, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.7526110410690308, | |
| "rewards/margins": 5.213059425354004, | |
| "rewards/rejected": -4.460447311401367, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.134938837920489e-07, | |
| "logits/chosen": -2.8436591625213623, | |
| "logits/rejected": -2.8463809490203857, | |
| "logps/chosen": -255.3565673828125, | |
| "logps/rejected": -273.94464111328125, | |
| "loss": 0.0818, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.06976697593927383, | |
| "rewards/margins": 4.849638938903809, | |
| "rewards/rejected": -4.919405937194824, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.1158256880733944e-07, | |
| "logits/chosen": -2.9030632972717285, | |
| "logits/rejected": -2.8941729068756104, | |
| "logps/chosen": -367.6248779296875, | |
| "logps/rejected": -302.12249755859375, | |
| "loss": 0.0993, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.4400938153266907, | |
| "rewards/margins": 5.627293586730957, | |
| "rewards/rejected": -5.187199592590332, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_logits/chosen": -2.8477160930633545, | |
| "eval_logits/rejected": -2.8601999282836914, | |
| "eval_logps/chosen": -374.3083801269531, | |
| "eval_logps/rejected": -321.6667785644531, | |
| "eval_loss": 0.5605445504188538, | |
| "eval_rewards/accuracies": 0.7460317611694336, | |
| "eval_rewards/chosen": -0.711660623550415, | |
| "eval_rewards/margins": 2.2584221363067627, | |
| "eval_rewards/rejected": -2.9700827598571777, | |
| "eval_runtime": 164.7388, | |
| "eval_samples_per_second": 12.14, | |
| "eval_steps_per_second": 0.382, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.0967125382262994e-07, | |
| "logits/chosen": -2.8688273429870605, | |
| "logits/rejected": -2.868739366531372, | |
| "logps/chosen": -337.7546081542969, | |
| "logps/rejected": -312.27569580078125, | |
| "loss": 0.1163, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.7751233577728271, | |
| "rewards/margins": 5.873146057128906, | |
| "rewards/rejected": -5.098022937774658, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0775993883792048e-07, | |
| "logits/chosen": -2.8374381065368652, | |
| "logits/rejected": -2.8085215091705322, | |
| "logps/chosen": -352.53192138671875, | |
| "logps/rejected": -316.3230895996094, | |
| "loss": 0.0933, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.9547752141952515, | |
| "rewards/margins": 5.203994274139404, | |
| "rewards/rejected": -4.249218940734863, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.05848623853211e-07, | |
| "logits/chosen": -2.874891757965088, | |
| "logits/rejected": -2.839573621749878, | |
| "logps/chosen": -366.4833679199219, | |
| "logps/rejected": -319.9959411621094, | |
| "loss": 0.0966, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.212294101715088, | |
| "rewards/margins": 5.914790630340576, | |
| "rewards/rejected": -4.702496528625488, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0393730886850151e-07, | |
| "logits/chosen": -2.8277204036712646, | |
| "logits/rejected": -2.878105640411377, | |
| "logps/chosen": -378.3955383300781, | |
| "logps/rejected": -314.2088623046875, | |
| "loss": 0.0863, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.7747803926467896, | |
| "rewards/margins": 5.967954158782959, | |
| "rewards/rejected": -5.193174362182617, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.0202599388379205e-07, | |
| "logits/chosen": -2.8658251762390137, | |
| "logits/rejected": -2.8985071182250977, | |
| "logps/chosen": -339.0852355957031, | |
| "logps/rejected": -384.46112060546875, | |
| "loss": 0.0786, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.9408707618713379, | |
| "rewards/margins": 5.965841770172119, | |
| "rewards/rejected": -5.024971008300781, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.0011467889908258e-07, | |
| "logits/chosen": -2.829246997833252, | |
| "logits/rejected": -2.8732194900512695, | |
| "logps/chosen": -381.65655517578125, | |
| "logps/rejected": -284.0471496582031, | |
| "loss": 0.0945, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.5882245898246765, | |
| "rewards/margins": 5.461816787719727, | |
| "rewards/rejected": -4.873592376708984, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9820336391437308e-07, | |
| "logits/chosen": -2.8398656845092773, | |
| "logits/rejected": -2.8620615005493164, | |
| "logps/chosen": -309.2004089355469, | |
| "logps/rejected": -296.1297302246094, | |
| "loss": 0.093, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.8088359832763672, | |
| "rewards/margins": 5.4316887855529785, | |
| "rewards/rejected": -4.6228532791137695, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9629204892966362e-07, | |
| "logits/chosen": -2.941847324371338, | |
| "logits/rejected": -2.950911283493042, | |
| "logps/chosen": -329.76617431640625, | |
| "logps/rejected": -295.0538635253906, | |
| "loss": 0.1113, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.2022342681884766, | |
| "rewards/margins": 5.600251197814941, | |
| "rewards/rejected": -4.398016929626465, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.943807339449541e-07, | |
| "logits/chosen": -2.8697052001953125, | |
| "logits/rejected": -2.901094913482666, | |
| "logps/chosen": -311.559326171875, | |
| "logps/rejected": -333.4175720214844, | |
| "loss": 0.0948, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.7903985977172852, | |
| "rewards/margins": 5.597433090209961, | |
| "rewards/rejected": -4.807034015655518, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.9246941896024463e-07, | |
| "logits/chosen": -2.939120054244995, | |
| "logits/rejected": -2.9861233234405518, | |
| "logps/chosen": -320.5481262207031, | |
| "logps/rejected": -347.7875061035156, | |
| "loss": 0.1116, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.6647524833679199, | |
| "rewards/margins": 6.036587238311768, | |
| "rewards/rejected": -5.3718342781066895, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_logits/chosen": -2.914954900741577, | |
| "eval_logits/rejected": -2.927724599838257, | |
| "eval_logps/chosen": -373.57073974609375, | |
| "eval_logps/rejected": -319.2250061035156, | |
| "eval_loss": 0.5649252533912659, | |
| "eval_rewards/accuracies": 0.7539682388305664, | |
| "eval_rewards/chosen": -0.6378985047340393, | |
| "eval_rewards/margins": 2.088006019592285, | |
| "eval_rewards/rejected": -2.7259042263031006, | |
| "eval_runtime": 164.2377, | |
| "eval_samples_per_second": 12.177, | |
| "eval_steps_per_second": 0.384, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.9055810397553516e-07, | |
| "logits/chosen": -2.9238085746765137, | |
| "logits/rejected": -2.9308090209960938, | |
| "logps/chosen": -313.63665771484375, | |
| "logps/rejected": -304.2153625488281, | |
| "loss": 0.1214, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.6549821496009827, | |
| "rewards/margins": 4.487866401672363, | |
| "rewards/rejected": -3.8328843116760254, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8864678899082566e-07, | |
| "logits/chosen": -2.8484818935394287, | |
| "logits/rejected": -2.866534948348999, | |
| "logps/chosen": -347.75689697265625, | |
| "logps/rejected": -279.4710693359375, | |
| "loss": 0.1082, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.3496394753456116, | |
| "rewards/margins": 4.312170505523682, | |
| "rewards/rejected": -3.9625308513641357, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.867354740061162e-07, | |
| "logits/chosen": -2.9492716789245605, | |
| "logits/rejected": -2.956796169281006, | |
| "logps/chosen": -307.85845947265625, | |
| "logps/rejected": -332.1622619628906, | |
| "loss": 0.1061, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.4475575387477875, | |
| "rewards/margins": 5.942025184631348, | |
| "rewards/rejected": -5.494467735290527, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8482415902140673e-07, | |
| "logits/chosen": -2.923053503036499, | |
| "logits/rejected": -2.920959949493408, | |
| "logps/chosen": -331.311767578125, | |
| "logps/rejected": -320.19586181640625, | |
| "loss": 0.0801, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.0848143100738525, | |
| "rewards/margins": 5.605216979980469, | |
| "rewards/rejected": -4.520401954650879, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8291284403669723e-07, | |
| "logits/chosen": -2.881058692932129, | |
| "logits/rejected": -2.93363618850708, | |
| "logps/chosen": -309.11212158203125, | |
| "logps/rejected": -322.50665283203125, | |
| "loss": 0.0254, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.7099040746688843, | |
| "rewards/margins": 6.735787868499756, | |
| "rewards/rejected": -5.025883674621582, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.8100152905198777e-07, | |
| "logits/chosen": -2.7668607234954834, | |
| "logits/rejected": -2.7822773456573486, | |
| "logps/chosen": -351.9031677246094, | |
| "logps/rejected": -415.9180603027344, | |
| "loss": 0.0185, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.090543508529663, | |
| "rewards/margins": 7.8234100341796875, | |
| "rewards/rejected": -6.7328667640686035, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7909021406727827e-07, | |
| "logits/chosen": -2.9600331783294678, | |
| "logits/rejected": -2.8843834400177, | |
| "logps/chosen": -309.39642333984375, | |
| "logps/rejected": -306.4966735839844, | |
| "loss": 0.0282, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.9620206952095032, | |
| "rewards/margins": 6.899697303771973, | |
| "rewards/rejected": -5.937676429748535, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.771788990825688e-07, | |
| "logits/chosen": -2.923687696456909, | |
| "logits/rejected": -2.9661598205566406, | |
| "logps/chosen": -330.7653503417969, | |
| "logps/rejected": -352.5653076171875, | |
| "loss": 0.0243, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.7405881881713867, | |
| "rewards/margins": 8.00406265258789, | |
| "rewards/rejected": -6.263474941253662, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7526758409785934e-07, | |
| "logits/chosen": -2.9299581050872803, | |
| "logits/rejected": -2.8949360847473145, | |
| "logps/chosen": -362.6274719238281, | |
| "logps/rejected": -363.09149169921875, | |
| "loss": 0.0148, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0967247486114502, | |
| "rewards/margins": 7.466977119445801, | |
| "rewards/rejected": -6.37025260925293, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7335626911314984e-07, | |
| "logits/chosen": -2.845986843109131, | |
| "logits/rejected": -2.8671188354492188, | |
| "logps/chosen": -274.60870361328125, | |
| "logps/rejected": -295.59478759765625, | |
| "loss": 0.0193, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.014614641666412354, | |
| "rewards/margins": 6.743406772613525, | |
| "rewards/rejected": -6.758021354675293, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_logits/chosen": -2.882474422454834, | |
| "eval_logits/rejected": -2.8919453620910645, | |
| "eval_logps/chosen": -376.60406494140625, | |
| "eval_logps/rejected": -329.82745361328125, | |
| "eval_loss": 0.6121558547019958, | |
| "eval_rewards/accuracies": 0.761904776096344, | |
| "eval_rewards/chosen": -0.941230058670044, | |
| "eval_rewards/margins": 2.8449153900146484, | |
| "eval_rewards/rejected": -3.7861454486846924, | |
| "eval_runtime": 164.9655, | |
| "eval_samples_per_second": 12.124, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.7144495412844037e-07, | |
| "logits/chosen": -2.9446756839752197, | |
| "logits/rejected": -2.953831911087036, | |
| "logps/chosen": -353.67376708984375, | |
| "logps/rejected": -347.7017822265625, | |
| "loss": 0.0214, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.4430414140224457, | |
| "rewards/margins": 7.570870399475098, | |
| "rewards/rejected": -7.127829074859619, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.6953363914373088e-07, | |
| "logits/chosen": -2.940734386444092, | |
| "logits/rejected": -2.9746463298797607, | |
| "logps/chosen": -348.05328369140625, | |
| "logps/rejected": -333.2148742675781, | |
| "loss": 0.012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.2261013984680176, | |
| "rewards/margins": 8.234363555908203, | |
| "rewards/rejected": -7.008261680603027, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6762232415902138e-07, | |
| "logits/chosen": -2.875319719314575, | |
| "logits/rejected": -2.855180263519287, | |
| "logps/chosen": -306.70050048828125, | |
| "logps/rejected": -349.5177917480469, | |
| "loss": 0.0203, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.3287349343299866, | |
| "rewards/margins": 7.007230281829834, | |
| "rewards/rejected": -6.678494930267334, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6571100917431192e-07, | |
| "logits/chosen": -2.9315755367279053, | |
| "logits/rejected": -2.930187702178955, | |
| "logps/chosen": -306.041259765625, | |
| "logps/rejected": -305.6824951171875, | |
| "loss": 0.0151, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5696347951889038, | |
| "rewards/margins": 7.535808563232422, | |
| "rewards/rejected": -6.9661736488342285, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6379969418960242e-07, | |
| "logits/chosen": -2.8848538398742676, | |
| "logits/rejected": -2.905867338180542, | |
| "logps/chosen": -389.7286682128906, | |
| "logps/rejected": -386.9409484863281, | |
| "loss": 0.0148, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.3652772903442383, | |
| "rewards/margins": 8.069811820983887, | |
| "rewards/rejected": -6.704535484313965, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.6188837920489295e-07, | |
| "logits/chosen": -2.8828773498535156, | |
| "logits/rejected": -2.8783280849456787, | |
| "logps/chosen": -359.57666015625, | |
| "logps/rejected": -339.35345458984375, | |
| "loss": 0.0154, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.8870366811752319, | |
| "rewards/margins": 8.021839141845703, | |
| "rewards/rejected": -7.13480281829834, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5997706422018349e-07, | |
| "logits/chosen": -2.88275408744812, | |
| "logits/rejected": -2.929903984069824, | |
| "logps/chosen": -322.4759216308594, | |
| "logps/rejected": -423.30682373046875, | |
| "loss": 0.0171, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.5736116170883179, | |
| "rewards/margins": 8.667892456054688, | |
| "rewards/rejected": -7.0942816734313965, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.58065749235474e-07, | |
| "logits/chosen": -2.90217661857605, | |
| "logits/rejected": -2.8910233974456787, | |
| "logps/chosen": -413.54522705078125, | |
| "logps/rejected": -385.43341064453125, | |
| "loss": 0.0127, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.9447764158248901, | |
| "rewards/margins": 7.688788414001465, | |
| "rewards/rejected": -6.744012355804443, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5615443425076452e-07, | |
| "logits/chosen": -2.7480947971343994, | |
| "logits/rejected": -2.747185230255127, | |
| "logps/chosen": -354.87493896484375, | |
| "logps/rejected": -351.3457946777344, | |
| "loss": 0.0178, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.513919472694397, | |
| "rewards/margins": 8.175249099731445, | |
| "rewards/rejected": -7.661329746246338, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5424311926605506e-07, | |
| "logits/chosen": -2.8367042541503906, | |
| "logits/rejected": -2.8408215045928955, | |
| "logps/chosen": -298.4134826660156, | |
| "logps/rejected": -300.4717712402344, | |
| "loss": 0.0175, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.2804573178291321, | |
| "rewards/margins": 7.449028968811035, | |
| "rewards/rejected": -7.729485511779785, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_logits/chosen": -2.839301586151123, | |
| "eval_logits/rejected": -2.8474462032318115, | |
| "eval_logps/chosen": -383.2186279296875, | |
| "eval_logps/rejected": -338.7977294921875, | |
| "eval_loss": 0.6523212790489197, | |
| "eval_rewards/accuracies": 0.7658730149269104, | |
| "eval_rewards/chosen": -1.6026798486709595, | |
| "eval_rewards/margins": 3.080495834350586, | |
| "eval_rewards/rejected": -4.683175563812256, | |
| "eval_runtime": 165.5125, | |
| "eval_samples_per_second": 12.084, | |
| "eval_steps_per_second": 0.381, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.5233180428134556e-07, | |
| "logits/chosen": -2.9054439067840576, | |
| "logits/rejected": -2.913278102874756, | |
| "logps/chosen": -323.6388244628906, | |
| "logps/rejected": -323.73419189453125, | |
| "loss": 0.0138, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.8671594858169556, | |
| "rewards/margins": 7.387481689453125, | |
| "rewards/rejected": -6.520320892333984, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.504204892966361e-07, | |
| "logits/chosen": -2.800830364227295, | |
| "logits/rejected": -2.8197312355041504, | |
| "logps/chosen": -359.3259582519531, | |
| "logps/rejected": -394.8112487792969, | |
| "loss": 0.0173, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4949137568473816, | |
| "rewards/margins": 9.693056106567383, | |
| "rewards/rejected": -9.198141098022461, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.485091743119266e-07, | |
| "logits/chosen": -2.890476942062378, | |
| "logits/rejected": -2.925356388092041, | |
| "logps/chosen": -315.17742919921875, | |
| "logps/rejected": -378.8518371582031, | |
| "loss": 0.0188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.27211472392082214, | |
| "rewards/margins": 8.301239967346191, | |
| "rewards/rejected": -8.02912425994873, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.465978593272171e-07, | |
| "logits/chosen": -2.820862054824829, | |
| "logits/rejected": -2.8192131519317627, | |
| "logps/chosen": -233.12344360351562, | |
| "logps/rejected": -238.68014526367188, | |
| "loss": 0.0187, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.23832440376281738, | |
| "rewards/margins": 7.234049320220947, | |
| "rewards/rejected": -7.472373008728027, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4468654434250764e-07, | |
| "logits/chosen": -2.7812180519104004, | |
| "logits/rejected": -2.839566946029663, | |
| "logps/chosen": -400.56396484375, | |
| "logps/rejected": -418.9078063964844, | |
| "loss": 0.0183, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.306671380996704, | |
| "rewards/margins": 9.309846878051758, | |
| "rewards/rejected": -8.003175735473633, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4277522935779814e-07, | |
| "logits/chosen": -2.857119083404541, | |
| "logits/rejected": -2.8069121837615967, | |
| "logps/chosen": -346.87091064453125, | |
| "logps/rejected": -364.4837341308594, | |
| "loss": 0.0185, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.42278409004211426, | |
| "rewards/margins": 7.95212459564209, | |
| "rewards/rejected": -7.5293402671813965, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.4086391437308867e-07, | |
| "logits/chosen": -2.828322172164917, | |
| "logits/rejected": -2.872556209564209, | |
| "logps/chosen": -371.8916015625, | |
| "logps/rejected": -404.73162841796875, | |
| "loss": 0.0183, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.5918793082237244, | |
| "rewards/margins": 8.069284439086914, | |
| "rewards/rejected": -7.477405548095703, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.389525993883792e-07, | |
| "logits/chosen": -2.8225388526916504, | |
| "logits/rejected": -2.8491692543029785, | |
| "logps/chosen": -293.30047607421875, | |
| "logps/rejected": -313.1904296875, | |
| "loss": 0.018, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3963487148284912, | |
| "rewards/margins": 7.192727565765381, | |
| "rewards/rejected": -7.589076042175293, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.370412844036697e-07, | |
| "logits/chosen": -2.8727283477783203, | |
| "logits/rejected": -2.850238561630249, | |
| "logps/chosen": -306.93695068359375, | |
| "logps/rejected": -345.2283020019531, | |
| "loss": 0.0144, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09160284698009491, | |
| "rewards/margins": 7.823256492614746, | |
| "rewards/rejected": -7.731653690338135, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3512996941896024e-07, | |
| "logits/chosen": -2.8500583171844482, | |
| "logits/rejected": -2.8594961166381836, | |
| "logps/chosen": -283.15771484375, | |
| "logps/rejected": -311.3097839355469, | |
| "loss": 0.0131, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.03272407129406929, | |
| "rewards/margins": 8.4783353805542, | |
| "rewards/rejected": -8.445611953735352, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_logits/chosen": -2.806851387023926, | |
| "eval_logits/rejected": -2.812812328338623, | |
| "eval_logps/chosen": -386.09039306640625, | |
| "eval_logps/rejected": -342.2704162597656, | |
| "eval_loss": 0.6702452898025513, | |
| "eval_rewards/accuracies": 0.7420634627342224, | |
| "eval_rewards/chosen": -1.8898613452911377, | |
| "eval_rewards/margins": 3.1405844688415527, | |
| "eval_rewards/rejected": -5.0304460525512695, | |
| "eval_runtime": 165.1336, | |
| "eval_samples_per_second": 12.111, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3321865443425075e-07, | |
| "logits/chosen": -2.7931952476501465, | |
| "logits/rejected": -2.8073198795318604, | |
| "logps/chosen": -338.2393493652344, | |
| "logps/rejected": -352.142333984375, | |
| "loss": 0.015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.13923540711402893, | |
| "rewards/margins": 8.090972900390625, | |
| "rewards/rejected": -8.230208396911621, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.3130733944954128e-07, | |
| "logits/chosen": -2.74983811378479, | |
| "logits/rejected": -2.748617649078369, | |
| "logps/chosen": -358.42401123046875, | |
| "logps/rejected": -402.30328369140625, | |
| "loss": 0.0129, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.6026802062988281, | |
| "rewards/margins": 9.088810920715332, | |
| "rewards/rejected": -7.4861297607421875, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.293960244648318e-07, | |
| "logits/chosen": -2.8457603454589844, | |
| "logits/rejected": -2.8344614505767822, | |
| "logps/chosen": -365.7544860839844, | |
| "logps/rejected": -347.2682189941406, | |
| "loss": 0.0163, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.001628613448701799, | |
| "rewards/margins": 8.202213287353516, | |
| "rewards/rejected": -8.203841209411621, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2748470948012232e-07, | |
| "logits/chosen": -2.8093724250793457, | |
| "logits/rejected": -2.81803822517395, | |
| "logps/chosen": -340.55352783203125, | |
| "logps/rejected": -407.7304992675781, | |
| "loss": 0.0119, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.47526517510414124, | |
| "rewards/margins": 8.480849266052246, | |
| "rewards/rejected": -8.005583763122559, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2557339449541285e-07, | |
| "logits/chosen": -2.8672242164611816, | |
| "logits/rejected": -2.855675220489502, | |
| "logps/chosen": -343.7786865234375, | |
| "logps/rejected": -365.4543151855469, | |
| "loss": 0.0212, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.35550713539123535, | |
| "rewards/margins": 8.239429473876953, | |
| "rewards/rejected": -7.8839240074157715, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2366207951070336e-07, | |
| "logits/chosen": -2.8647074699401855, | |
| "logits/rejected": -2.8598999977111816, | |
| "logps/chosen": -335.4911193847656, | |
| "logps/rejected": -369.7025146484375, | |
| "loss": 0.0148, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1035120040178299, | |
| "rewards/margins": 8.297709465026855, | |
| "rewards/rejected": -8.401222229003906, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.217507645259939e-07, | |
| "logits/chosen": -2.8038744926452637, | |
| "logits/rejected": -2.8534445762634277, | |
| "logps/chosen": -327.49005126953125, | |
| "logps/rejected": -348.63116455078125, | |
| "loss": 0.0103, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.261627733707428, | |
| "rewards/margins": 8.056116104125977, | |
| "rewards/rejected": -7.794488430023193, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.198394495412844e-07, | |
| "logits/chosen": -2.879183053970337, | |
| "logits/rejected": -2.9233028888702393, | |
| "logps/chosen": -337.91790771484375, | |
| "logps/rejected": -346.1882019042969, | |
| "loss": 0.0176, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.12253670394420624, | |
| "rewards/margins": 7.606545925140381, | |
| "rewards/rejected": -7.4840087890625, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1792813455657493e-07, | |
| "logits/chosen": -2.7606253623962402, | |
| "logits/rejected": -2.8114898204803467, | |
| "logps/chosen": -337.6861877441406, | |
| "logps/rejected": -345.7854309082031, | |
| "loss": 0.0258, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.3526005446910858, | |
| "rewards/margins": 8.258612632751465, | |
| "rewards/rejected": -7.906012058258057, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1601681957186543e-07, | |
| "logits/chosen": -2.8984854221343994, | |
| "logits/rejected": -2.912468910217285, | |
| "logps/chosen": -334.9092102050781, | |
| "logps/rejected": -334.67669677734375, | |
| "loss": 0.0243, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.7173303961753845, | |
| "rewards/margins": 7.802558898925781, | |
| "rewards/rejected": -7.085227966308594, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_logits/chosen": -2.8489737510681152, | |
| "eval_logits/rejected": -2.854724407196045, | |
| "eval_logps/chosen": -383.90655517578125, | |
| "eval_logps/rejected": -339.3347473144531, | |
| "eval_loss": 0.6559089422225952, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": -1.6714773178100586, | |
| "eval_rewards/margins": 3.0653984546661377, | |
| "eval_rewards/rejected": -4.736875534057617, | |
| "eval_runtime": 164.8339, | |
| "eval_samples_per_second": 12.133, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1410550458715595e-07, | |
| "logits/chosen": -2.8347411155700684, | |
| "logits/rejected": -2.851090908050537, | |
| "logps/chosen": -329.1361999511719, | |
| "logps/rejected": -359.9030456542969, | |
| "loss": 0.0278, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.2210700958967209, | |
| "rewards/margins": 7.436942100524902, | |
| "rewards/rejected": -7.215872287750244, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1219418960244648e-07, | |
| "logits/chosen": -2.8228423595428467, | |
| "logits/rejected": -2.841404438018799, | |
| "logps/chosen": -282.3636169433594, | |
| "logps/rejected": -409.47979736328125, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2855607867240906, | |
| "rewards/margins": 8.16025161743164, | |
| "rewards/rejected": -8.445813179016113, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.10282874617737e-07, | |
| "logits/chosen": -2.8471851348876953, | |
| "logits/rejected": -2.8798093795776367, | |
| "logps/chosen": -295.41900634765625, | |
| "logps/rejected": -340.5544738769531, | |
| "loss": 0.0184, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3259337544441223, | |
| "rewards/margins": 8.084188461303711, | |
| "rewards/rejected": -8.410122871398926, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0837155963302752e-07, | |
| "logits/chosen": -2.636784076690674, | |
| "logits/rejected": -2.740302562713623, | |
| "logps/chosen": -287.13702392578125, | |
| "logps/rejected": -391.1552429199219, | |
| "loss": 0.0185, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.528149425983429, | |
| "rewards/margins": 9.060527801513672, | |
| "rewards/rejected": -8.532378196716309, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.0646024464831804e-07, | |
| "logits/chosen": -2.753213882446289, | |
| "logits/rejected": -2.822252035140991, | |
| "logps/chosen": -369.473388671875, | |
| "logps/rejected": -360.38983154296875, | |
| "loss": 0.0221, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.3261668086051941, | |
| "rewards/margins": 8.502967834472656, | |
| "rewards/rejected": -8.829133033752441, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0454892966360856e-07, | |
| "logits/chosen": -2.782691717147827, | |
| "logits/rejected": -2.868027448654175, | |
| "logps/chosen": -333.0803527832031, | |
| "logps/rejected": -355.0961608886719, | |
| "loss": 0.0144, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5834169983863831, | |
| "rewards/margins": 8.312009811401367, | |
| "rewards/rejected": -8.895425796508789, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0263761467889908e-07, | |
| "logits/chosen": -2.770711898803711, | |
| "logits/rejected": -2.796137809753418, | |
| "logps/chosen": -336.739990234375, | |
| "logps/rejected": -372.0965576171875, | |
| "loss": 0.0151, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.35953274369239807, | |
| "rewards/margins": 8.809865951538086, | |
| "rewards/rejected": -8.450332641601562, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.007262996941896e-07, | |
| "logits/chosen": -2.7547390460968018, | |
| "logits/rejected": -2.7793593406677246, | |
| "logps/chosen": -335.936279296875, | |
| "logps/rejected": -330.6647033691406, | |
| "loss": 0.0167, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.02794322930276394, | |
| "rewards/margins": 8.719170570373535, | |
| "rewards/rejected": -8.747113227844238, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.881498470948011e-08, | |
| "logits/chosen": -2.846524477005005, | |
| "logits/rejected": -2.799567222595215, | |
| "logps/chosen": -343.198486328125, | |
| "logps/rejected": -335.6533508300781, | |
| "loss": 0.0136, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.013063406571745872, | |
| "rewards/margins": 8.952108383178711, | |
| "rewards/rejected": -8.965171813964844, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.690366972477065e-08, | |
| "logits/chosen": -2.85577392578125, | |
| "logits/rejected": -2.8093135356903076, | |
| "logps/chosen": -333.2208251953125, | |
| "logps/rejected": -358.0810241699219, | |
| "loss": 0.0142, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.41089487075805664, | |
| "rewards/margins": 8.724878311157227, | |
| "rewards/rejected": -9.135773658752441, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_logits/chosen": -2.835172653198242, | |
| "eval_logits/rejected": -2.839359760284424, | |
| "eval_logps/chosen": -386.6546936035156, | |
| "eval_logps/rejected": -343.19000244140625, | |
| "eval_loss": 0.6733575463294983, | |
| "eval_rewards/accuracies": 0.7579365372657776, | |
| "eval_rewards/chosen": -1.946290373802185, | |
| "eval_rewards/margins": 3.1761116981506348, | |
| "eval_rewards/rejected": -5.122402191162109, | |
| "eval_runtime": 165.3843, | |
| "eval_samples_per_second": 12.093, | |
| "eval_steps_per_second": 0.381, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.499235474006116e-08, | |
| "logits/chosen": -2.846043109893799, | |
| "logits/rejected": -2.8555102348327637, | |
| "logps/chosen": -376.3670349121094, | |
| "logps/rejected": -341.2032470703125, | |
| "loss": 0.0246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.03427610173821449, | |
| "rewards/margins": 8.654411315917969, | |
| "rewards/rejected": -8.688688278198242, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.308103975535168e-08, | |
| "logits/chosen": -2.8411316871643066, | |
| "logits/rejected": -2.8570432662963867, | |
| "logps/chosen": -373.59844970703125, | |
| "logps/rejected": -401.067138671875, | |
| "loss": 0.0176, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.0549451112747192, | |
| "rewards/margins": 8.842530250549316, | |
| "rewards/rejected": -7.787585258483887, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.116972477064219e-08, | |
| "logits/chosen": -2.895292282104492, | |
| "logits/rejected": -2.854443073272705, | |
| "logps/chosen": -345.359375, | |
| "logps/rejected": -408.4029846191406, | |
| "loss": 0.0169, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.30679136514663696, | |
| "rewards/margins": 7.920645713806152, | |
| "rewards/rejected": -8.227437019348145, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.925840978593272e-08, | |
| "logits/chosen": -2.835501194000244, | |
| "logits/rejected": -2.896915912628174, | |
| "logps/chosen": -264.5487365722656, | |
| "logps/rejected": -387.1824951171875, | |
| "loss": 0.019, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.3389735221862793, | |
| "rewards/margins": 9.347002029418945, | |
| "rewards/rejected": -9.008028030395508, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.734709480122324e-08, | |
| "logits/chosen": -2.806790828704834, | |
| "logits/rejected": -2.8148555755615234, | |
| "logps/chosen": -308.4158630371094, | |
| "logps/rejected": -376.0751953125, | |
| "loss": 0.0166, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.38033682107925415, | |
| "rewards/margins": 8.450287818908691, | |
| "rewards/rejected": -8.8306245803833, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.543577981651376e-08, | |
| "logits/chosen": -2.7967381477355957, | |
| "logits/rejected": -2.792023181915283, | |
| "logps/chosen": -455.0721740722656, | |
| "logps/rejected": -405.89501953125, | |
| "loss": 0.0156, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.7061554789543152, | |
| "rewards/margins": 9.382705688476562, | |
| "rewards/rejected": -8.676549911499023, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.352446483180428e-08, | |
| "logits/chosen": -2.8607754707336426, | |
| "logits/rejected": -2.8268520832061768, | |
| "logps/chosen": -331.96820068359375, | |
| "logps/rejected": -321.39422607421875, | |
| "loss": 0.0236, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.4299864172935486, | |
| "rewards/margins": 8.559895515441895, | |
| "rewards/rejected": -8.129908561706543, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.161314984709481e-08, | |
| "logits/chosen": -2.8822827339172363, | |
| "logits/rejected": -2.893578052520752, | |
| "logps/chosen": -339.42449951171875, | |
| "logps/rejected": -356.1263427734375, | |
| "loss": 0.0083, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.2752775549888611, | |
| "rewards/margins": 8.704290390014648, | |
| "rewards/rejected": -8.4290132522583, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.970183486238531e-08, | |
| "logits/chosen": -2.828721284866333, | |
| "logits/rejected": -2.833087205886841, | |
| "logps/chosen": -328.60418701171875, | |
| "logps/rejected": -360.6470642089844, | |
| "loss": 0.0176, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.30725544691085815, | |
| "rewards/margins": 8.530462265014648, | |
| "rewards/rejected": -8.223207473754883, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.779051987767583e-08, | |
| "logits/chosen": -2.8711142539978027, | |
| "logits/rejected": -2.892519950866699, | |
| "logps/chosen": -370.29339599609375, | |
| "logps/rejected": -355.296875, | |
| "loss": 0.0211, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0712406188249588, | |
| "rewards/margins": 8.155640602111816, | |
| "rewards/rejected": -8.084399223327637, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_logits/chosen": -2.8333258628845215, | |
| "eval_logits/rejected": -2.8368897438049316, | |
| "eval_logps/chosen": -388.3058776855469, | |
| "eval_logps/rejected": -347.57440185546875, | |
| "eval_loss": 0.6890397667884827, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": -2.1114044189453125, | |
| "eval_rewards/margins": 3.4494407176971436, | |
| "eval_rewards/rejected": -5.560845375061035, | |
| "eval_runtime": 164.7492, | |
| "eval_samples_per_second": 12.14, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.587920489296635e-08, | |
| "logits/chosen": -2.855881690979004, | |
| "logits/rejected": -2.8854427337646484, | |
| "logps/chosen": -351.69769287109375, | |
| "logps/rejected": -358.4553527832031, | |
| "loss": 0.0164, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.2860868573188782, | |
| "rewards/margins": 8.237478256225586, | |
| "rewards/rejected": -8.523565292358398, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.396788990825688e-08, | |
| "logits/chosen": -2.8813681602478027, | |
| "logits/rejected": -2.9079108238220215, | |
| "logps/chosen": -322.7754821777344, | |
| "logps/rejected": -327.5832824707031, | |
| "loss": 0.0137, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06775089353322983, | |
| "rewards/margins": 8.004460334777832, | |
| "rewards/rejected": -8.072211265563965, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.20565749235474e-08, | |
| "logits/chosen": -2.810084819793701, | |
| "logits/rejected": -2.815389394760132, | |
| "logps/chosen": -325.9468688964844, | |
| "logps/rejected": -330.6631164550781, | |
| "loss": 0.0133, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12425418943166733, | |
| "rewards/margins": 8.769124984741211, | |
| "rewards/rejected": -8.893379211425781, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.014525993883792e-08, | |
| "logits/chosen": -2.7919540405273438, | |
| "logits/rejected": -2.7934675216674805, | |
| "logps/chosen": -353.1927185058594, | |
| "logps/rejected": -365.3847351074219, | |
| "loss": 0.0154, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2072305679321289, | |
| "rewards/margins": 8.390886306762695, | |
| "rewards/rejected": -8.18365478515625, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.823394495412843e-08, | |
| "logits/chosen": -2.7678780555725098, | |
| "logits/rejected": -2.765697479248047, | |
| "logps/chosen": -358.8880615234375, | |
| "logps/rejected": -376.55706787109375, | |
| "loss": 0.02, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11311036348342896, | |
| "rewards/margins": 9.944357872009277, | |
| "rewards/rejected": -9.83124828338623, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.632262996941895e-08, | |
| "logits/chosen": -2.834345817565918, | |
| "logits/rejected": -2.7858288288116455, | |
| "logps/chosen": -336.33648681640625, | |
| "logps/rejected": -366.84991455078125, | |
| "loss": 0.0164, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12319626659154892, | |
| "rewards/margins": 9.35567569732666, | |
| "rewards/rejected": -9.478872299194336, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.441131498470948e-08, | |
| "logits/chosen": -2.847996950149536, | |
| "logits/rejected": -2.863615036010742, | |
| "logps/chosen": -304.58502197265625, | |
| "logps/rejected": -352.5277404785156, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.37131237983703613, | |
| "rewards/margins": 8.316202163696289, | |
| "rewards/rejected": -8.687514305114746, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.25e-08, | |
| "logits/chosen": -2.8685457706451416, | |
| "logits/rejected": -2.876739501953125, | |
| "logps/chosen": -394.1883850097656, | |
| "logps/rejected": -382.19287109375, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5528956055641174, | |
| "rewards/margins": 9.068865776062012, | |
| "rewards/rejected": -8.515970230102539, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.058868501529052e-08, | |
| "logits/chosen": -2.9075653553009033, | |
| "logits/rejected": -2.8715763092041016, | |
| "logps/chosen": -366.0291442871094, | |
| "logps/rejected": -358.59381103515625, | |
| "loss": 0.0202, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.14853370189666748, | |
| "rewards/margins": 8.612794876098633, | |
| "rewards/rejected": -8.464262008666992, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.8677370030581035e-08, | |
| "logits/chosen": -2.797910213470459, | |
| "logits/rejected": -2.840148687362671, | |
| "logps/chosen": -331.3750305175781, | |
| "logps/rejected": -344.34332275390625, | |
| "loss": 0.011, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.22872698307037354, | |
| "rewards/margins": 8.751152038574219, | |
| "rewards/rejected": -8.979879379272461, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_logits/chosen": -2.8258047103881836, | |
| "eval_logits/rejected": -2.8298983573913574, | |
| "eval_logps/chosen": -390.2113952636719, | |
| "eval_logps/rejected": -350.03887939453125, | |
| "eval_loss": 0.6998910307884216, | |
| "eval_rewards/accuracies": 0.7658730149269104, | |
| "eval_rewards/chosen": -2.301961660385132, | |
| "eval_rewards/margins": 3.5053274631500244, | |
| "eval_rewards/rejected": -5.807290077209473, | |
| "eval_runtime": 164.7101, | |
| "eval_samples_per_second": 12.143, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.6766055045871554e-08, | |
| "logits/chosen": -2.837218761444092, | |
| "logits/rejected": -2.8603646755218506, | |
| "logps/chosen": -325.1515197753906, | |
| "logps/rejected": -377.93707275390625, | |
| "loss": 0.0122, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.7425957918167114, | |
| "rewards/margins": 9.372137069702148, | |
| "rewards/rejected": -8.629541397094727, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.485474006116208e-08, | |
| "logits/chosen": -2.859614372253418, | |
| "logits/rejected": -2.907731294631958, | |
| "logps/chosen": -323.7126159667969, | |
| "logps/rejected": -337.7955627441406, | |
| "loss": 0.0124, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9508829116821289, | |
| "rewards/margins": 9.375367164611816, | |
| "rewards/rejected": -8.424482345581055, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.294342507645259e-08, | |
| "logits/chosen": -2.8355846405029297, | |
| "logits/rejected": -2.8445563316345215, | |
| "logps/chosen": -387.78021240234375, | |
| "logps/rejected": -341.34332275390625, | |
| "loss": 0.0122, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06057599186897278, | |
| "rewards/margins": 8.586808204650879, | |
| "rewards/rejected": -8.647383689880371, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.1032110091743117e-08, | |
| "logits/chosen": -2.8416004180908203, | |
| "logits/rejected": -2.8135132789611816, | |
| "logps/chosen": -294.2474670410156, | |
| "logps/rejected": -342.1490173339844, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0905003547668457, | |
| "rewards/margins": 7.913638114929199, | |
| "rewards/rejected": -9.004137992858887, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.9120795107033635e-08, | |
| "logits/chosen": -2.808621406555176, | |
| "logits/rejected": -2.8129184246063232, | |
| "logps/chosen": -361.94146728515625, | |
| "logps/rejected": -372.2251892089844, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.09845595061779022, | |
| "rewards/margins": 9.0397367477417, | |
| "rewards/rejected": -9.138191223144531, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.7209480122324154e-08, | |
| "logits/chosen": -2.857626438140869, | |
| "logits/rejected": -2.854701042175293, | |
| "logps/chosen": -334.3382568359375, | |
| "logps/rejected": -397.60504150390625, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4237436354160309, | |
| "rewards/margins": 8.734308242797852, | |
| "rewards/rejected": -9.158050537109375, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.529816513761467e-08, | |
| "logits/chosen": -2.833742618560791, | |
| "logits/rejected": -2.848910093307495, | |
| "logps/chosen": -376.8042297363281, | |
| "logps/rejected": -431.098388671875, | |
| "loss": 0.0185, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.09423612058162689, | |
| "rewards/margins": 8.842924118041992, | |
| "rewards/rejected": -8.937159538269043, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.33868501529052e-08, | |
| "logits/chosen": -2.8585665225982666, | |
| "logits/rejected": -2.888023614883423, | |
| "logps/chosen": -299.4255065917969, | |
| "logps/rejected": -347.65032958984375, | |
| "loss": 0.0227, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.39737778902053833, | |
| "rewards/margins": 8.897387504577637, | |
| "rewards/rejected": -9.294764518737793, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.147553516819572e-08, | |
| "logits/chosen": -2.7752485275268555, | |
| "logits/rejected": -2.7679455280303955, | |
| "logps/chosen": -288.93524169921875, | |
| "logps/rejected": -362.3262634277344, | |
| "loss": 0.0138, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.21344709396362305, | |
| "rewards/margins": 8.969237327575684, | |
| "rewards/rejected": -9.182684898376465, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.9564220183486236e-08, | |
| "logits/chosen": -2.771638870239258, | |
| "logits/rejected": -2.7894372940063477, | |
| "logps/chosen": -358.27276611328125, | |
| "logps/rejected": -337.4700012207031, | |
| "loss": 0.0114, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.08803452551364899, | |
| "rewards/margins": 8.622003555297852, | |
| "rewards/rejected": -8.710036277770996, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_logits/chosen": -2.817159414291382, | |
| "eval_logits/rejected": -2.820690393447876, | |
| "eval_logps/chosen": -389.5738525390625, | |
| "eval_logps/rejected": -348.8511962890625, | |
| "eval_loss": 0.6951248645782471, | |
| "eval_rewards/accuracies": 0.7698412537574768, | |
| "eval_rewards/chosen": -2.238208055496216, | |
| "eval_rewards/margins": 3.4503118991851807, | |
| "eval_rewards/rejected": -5.6885199546813965, | |
| "eval_runtime": 164.1407, | |
| "eval_samples_per_second": 12.185, | |
| "eval_steps_per_second": 0.384, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.7652905198776755e-08, | |
| "logits/chosen": -2.8291115760803223, | |
| "logits/rejected": -2.812997817993164, | |
| "logps/chosen": -361.16973876953125, | |
| "logps/rejected": -371.3473205566406, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.2673804759979248, | |
| "rewards/margins": 8.214799880981445, | |
| "rewards/rejected": -8.482179641723633, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.574159021406728e-08, | |
| "logits/chosen": -2.8169431686401367, | |
| "logits/rejected": -2.780579090118408, | |
| "logps/chosen": -340.25567626953125, | |
| "logps/rejected": -452.1532287597656, | |
| "loss": 0.0125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4266482889652252, | |
| "rewards/margins": 8.901152610778809, | |
| "rewards/rejected": -9.327801704406738, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.383027522935779e-08, | |
| "logits/chosen": -2.852733850479126, | |
| "logits/rejected": -2.8627407550811768, | |
| "logps/chosen": -345.10504150390625, | |
| "logps/rejected": -381.2701416015625, | |
| "loss": 0.0144, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.6709401607513428, | |
| "rewards/margins": 9.368196487426758, | |
| "rewards/rejected": -8.697256088256836, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.191896024464832e-08, | |
| "logits/chosen": -2.847033977508545, | |
| "logits/rejected": -2.880303382873535, | |
| "logps/chosen": -344.7592468261719, | |
| "logps/rejected": -366.505615234375, | |
| "loss": 0.0093, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2561042606830597, | |
| "rewards/margins": 9.626019477844238, | |
| "rewards/rejected": -9.369915008544922, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.0007645259938836e-08, | |
| "logits/chosen": -2.829150676727295, | |
| "logits/rejected": -2.8306522369384766, | |
| "logps/chosen": -299.6269836425781, | |
| "logps/rejected": -362.5341796875, | |
| "loss": 0.0191, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.312942773103714, | |
| "rewards/margins": 10.283103942871094, | |
| "rewards/rejected": -9.970161437988281, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.809633027522936e-08, | |
| "logits/chosen": -2.815882444381714, | |
| "logits/rejected": -2.7824299335479736, | |
| "logps/chosen": -315.24993896484375, | |
| "logps/rejected": -347.3058776855469, | |
| "loss": 0.0207, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.3008100688457489, | |
| "rewards/margins": 8.677629470825195, | |
| "rewards/rejected": -8.978440284729004, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.6185015290519877e-08, | |
| "logits/chosen": -2.8008246421813965, | |
| "logits/rejected": -2.7953882217407227, | |
| "logps/chosen": -333.69329833984375, | |
| "logps/rejected": -373.4231872558594, | |
| "loss": 0.0131, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08695399761199951, | |
| "rewards/margins": 7.9680304527282715, | |
| "rewards/rejected": -8.054986000061035, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.4273700305810396e-08, | |
| "logits/chosen": -2.790097951889038, | |
| "logits/rejected": -2.827036142349243, | |
| "logps/chosen": -378.98236083984375, | |
| "logps/rejected": -420.4288635253906, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.11630038917064667, | |
| "rewards/margins": 10.843367576599121, | |
| "rewards/rejected": -10.727069854736328, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.2362385321100918e-08, | |
| "logits/chosen": -2.7879481315612793, | |
| "logits/rejected": -2.7845988273620605, | |
| "logps/chosen": -350.80572509765625, | |
| "logps/rejected": -345.9007873535156, | |
| "loss": 0.018, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.001116895698942244, | |
| "rewards/margins": 8.821355819702148, | |
| "rewards/rejected": -8.822473526000977, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.0451070336391437e-08, | |
| "logits/chosen": -2.7627055644989014, | |
| "logits/rejected": -2.7335832118988037, | |
| "logps/chosen": -341.35662841796875, | |
| "logps/rejected": -318.8877258300781, | |
| "loss": 0.0437, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.1377846747636795, | |
| "rewards/margins": 8.672611236572266, | |
| "rewards/rejected": -8.534826278686523, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_logits/chosen": -2.8117332458496094, | |
| "eval_logits/rejected": -2.8151025772094727, | |
| "eval_logps/chosen": -389.4859924316406, | |
| "eval_logps/rejected": -348.1217346191406, | |
| "eval_loss": 0.6910788416862488, | |
| "eval_rewards/accuracies": 0.7658730149269104, | |
| "eval_rewards/chosen": -2.229426622390747, | |
| "eval_rewards/margins": 3.3861491680145264, | |
| "eval_rewards/rejected": -5.615575313568115, | |
| "eval_runtime": 165.138, | |
| "eval_samples_per_second": 12.111, | |
| "eval_steps_per_second": 0.381, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.8539755351681956e-08, | |
| "logits/chosen": -2.7365012168884277, | |
| "logits/rejected": -2.788407325744629, | |
| "logps/chosen": -330.33197021484375, | |
| "logps/rejected": -380.91168212890625, | |
| "loss": 0.0163, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.0645025223493576, | |
| "rewards/margins": 8.317387580871582, | |
| "rewards/rejected": -8.381890296936035, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.6628440366972478e-08, | |
| "logits/chosen": -2.8033618927001953, | |
| "logits/rejected": -2.8255763053894043, | |
| "logps/chosen": -373.3817443847656, | |
| "logps/rejected": -359.75421142578125, | |
| "loss": 0.0142, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.008309101685881615, | |
| "rewards/margins": 8.045055389404297, | |
| "rewards/rejected": -8.03674602508545, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.4717125382262997e-08, | |
| "logits/chosen": -2.854548692703247, | |
| "logits/rejected": -2.8665812015533447, | |
| "logps/chosen": -339.0101318359375, | |
| "logps/rejected": -377.62847900390625, | |
| "loss": 0.0123, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.6132253408432007, | |
| "rewards/margins": 8.178349494934082, | |
| "rewards/rejected": -8.791574478149414, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2805810397553517e-08, | |
| "logits/chosen": -2.8801310062408447, | |
| "logits/rejected": -2.826385021209717, | |
| "logps/chosen": -346.5010681152344, | |
| "logps/rejected": -360.3970031738281, | |
| "loss": 0.026, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.022508572787046432, | |
| "rewards/margins": 8.316872596740723, | |
| "rewards/rejected": -8.33938217163086, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0894495412844038e-08, | |
| "logits/chosen": -2.758545160293579, | |
| "logits/rejected": -2.7856967449188232, | |
| "logps/chosen": -326.37896728515625, | |
| "logps/rejected": -359.3761291503906, | |
| "loss": 0.015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21567471325397491, | |
| "rewards/margins": 8.950045585632324, | |
| "rewards/rejected": -8.734369277954102, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.983180428134555e-09, | |
| "logits/chosen": -2.8466389179229736, | |
| "logits/rejected": -2.8278822898864746, | |
| "logps/chosen": -327.270751953125, | |
| "logps/rejected": -307.3416748046875, | |
| "loss": 0.0158, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.5247852802276611, | |
| "rewards/margins": 8.214715957641602, | |
| "rewards/rejected": -8.739501953125, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.071865443425076e-09, | |
| "logits/chosen": -2.7788777351379395, | |
| "logits/rejected": -2.7975292205810547, | |
| "logps/chosen": -361.37384033203125, | |
| "logps/rejected": -391.8774719238281, | |
| "loss": 0.0134, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.35571426153182983, | |
| "rewards/margins": 8.981501579284668, | |
| "rewards/rejected": -9.337217330932617, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.1605504587155965e-09, | |
| "logits/chosen": -2.8489837646484375, | |
| "logits/rejected": -2.7892398834228516, | |
| "logps/chosen": -342.7001953125, | |
| "logps/rejected": -358.37469482421875, | |
| "loss": 0.012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04198342561721802, | |
| "rewards/margins": 8.752424240112305, | |
| "rewards/rejected": -8.710439682006836, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.249235474006116e-09, | |
| "logits/chosen": -2.8391404151916504, | |
| "logits/rejected": -2.862032175064087, | |
| "logps/chosen": -332.72100830078125, | |
| "logps/rejected": -362.6523132324219, | |
| "loss": 0.0282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.023767167702317238, | |
| "rewards/margins": 8.220497131347656, | |
| "rewards/rejected": -8.19672966003418, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.3379204892966359e-09, | |
| "logits/chosen": -2.854654550552368, | |
| "logits/rejected": -2.8116354942321777, | |
| "logps/chosen": -330.1148986816406, | |
| "logps/rejected": -358.40155029296875, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3439770042896271, | |
| "rewards/margins": 8.63255500793457, | |
| "rewards/rejected": -8.976531028747559, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_logits/chosen": -2.814802408218384, | |
| "eval_logits/rejected": -2.818735122680664, | |
| "eval_logps/chosen": -389.9677429199219, | |
| "eval_logps/rejected": -348.89801025390625, | |
| "eval_loss": 0.6909257769584656, | |
| "eval_rewards/accuracies": 0.7658730149269104, | |
| "eval_rewards/chosen": -2.2775967121124268, | |
| "eval_rewards/margins": 3.415607452392578, | |
| "eval_rewards/rejected": -5.693204402923584, | |
| "eval_runtime": 164.8452, | |
| "eval_samples_per_second": 12.133, | |
| "eval_steps_per_second": 0.382, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2907, | |
| "total_flos": 0.0, | |
| "train_loss": 0.23139607249449978, | |
| "train_runtime": 34004.0578, | |
| "train_samples_per_second": 5.467, | |
| "train_steps_per_second": 0.085 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2907, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |