{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3356, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.488095238095238e-09, "logits/chosen": -2.6795692443847656, "logits/rejected": -2.624149799346924, "logps/chosen": -54.570396423339844, "logps/rejected": -74.21392822265625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.4880952380952379e-08, "logits/chosen": -2.7060725688934326, "logits/rejected": -2.6765432357788086, "logps/chosen": -95.24983978271484, "logps/rejected": -91.18234252929688, "loss": 0.6933, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": 0.0005662046023644507, "rewards/margins": -0.006994906347244978, "rewards/rejected": 0.007561111822724342, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.9761904761904758e-08, "logits/chosen": -2.5795836448669434, "logits/rejected": -2.592409133911133, "logps/chosen": -124.33586120605469, "logps/rejected": -103.54573822021484, "loss": 0.6947, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0065773227252066135, "rewards/margins": -0.0029559016693383455, "rewards/rejected": 0.009533221833407879, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.4642857142857145e-08, "logits/chosen": -2.579939126968384, "logits/rejected": -2.5497870445251465, "logps/chosen": -68.13322448730469, "logps/rejected": -66.37541961669922, "loss": 0.6921, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.01673651486635208, "rewards/margins": 0.00222357758320868, "rewards/rejected": 0.01451293658465147, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.9523809523809515e-08, "logits/chosen": -2.6564245223999023, "logits/rejected": -2.608503818511963, "logps/chosen": -83.7612533569336, "logps/rejected": -79.3699951171875, "loss": 0.6886, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.050556618720293045, "rewards/margins": 0.005645673722028732, "rewards/rejected": 0.04491094499826431, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.44047619047619e-08, "logits/chosen": -2.752234935760498, "logits/rejected": -2.6355555057525635, "logps/chosen": -127.2625503540039, "logps/rejected": -114.26876068115234, "loss": 0.6892, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.09844812005758286, "rewards/margins": 0.0011480912799015641, "rewards/rejected": 0.09730003774166107, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.928571428571429e-08, "logits/chosen": -2.669374704360962, "logits/rejected": -2.652597188949585, "logps/chosen": -103.32049560546875, "logps/rejected": -105.29325103759766, "loss": 0.6817, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.19618754088878632, "rewards/margins": 0.012378268875181675, "rewards/rejected": 0.18380926549434662, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7530548572540283, "logits/rejected": -2.680541753768921, "logps/chosen": -84.53085327148438, "logps/rejected": -84.82635498046875, "loss": 0.6819, "rewards/accuracies": 0.5, "rewards/chosen": 0.33352726697921753, "rewards/margins": 0.020224859938025475, "rewards/rejected": 0.3133023679256439, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.1904761904761903e-07, "logits/chosen": -2.572601079940796, "logits/rejected": -2.5415000915527344, "logps/chosen": -96.4114761352539, "logps/rejected": -84.30821228027344, "loss": 0.6708, "rewards/accuracies": 0.625, "rewards/chosen": 0.4342936873435974, "rewards/margins": 0.0613841637969017, "rewards/rejected": 0.3729095458984375, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.3392857142857142e-07, "logits/chosen": -2.7009196281433105, "logits/rejected": -2.698122262954712, "logps/chosen": -78.68132781982422, "logps/rejected": -81.79669189453125, "loss": 0.6546, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5974748730659485, "rewards/margins": 0.08051940053701401, "rewards/rejected": 0.5169554948806763, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.488095238095238e-07, "logits/chosen": -2.5833797454833984, "logits/rejected": -2.624276876449585, "logps/chosen": -77.67559814453125, "logps/rejected": -90.95040130615234, "loss": 0.6601, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.5890167355537415, "rewards/margins": 0.06459061056375504, "rewards/rejected": 0.5244261026382446, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.6369047619047617e-07, "logits/chosen": -2.5440800189971924, "logits/rejected": -2.536761522293091, "logps/chosen": -79.65280151367188, "logps/rejected": -77.1148681640625, "loss": 0.6643, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.48702484369277954, "rewards/margins": 0.02558879181742668, "rewards/rejected": 0.46143603324890137, "step": 110 }, { "epoch": 0.04, "learning_rate": 1.7857142857142858e-07, "logits/chosen": -2.59000301361084, "logits/rejected": -2.6294052600860596, "logps/chosen": -98.95535278320312, "logps/rejected": -93.15876770019531, "loss": 0.6528, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.44851523637771606, "rewards/margins": 0.04791822284460068, "rewards/rejected": 0.4005970060825348, "step": 120 }, { "epoch": 0.04, "learning_rate": 1.9345238095238096e-07, "logits/chosen": -2.5660836696624756, "logits/rejected": -2.532435894012451, "logps/chosen": -81.32213592529297, "logps/rejected": -86.37200927734375, "loss": 0.6286, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.6584704518318176, "rewards/margins": 0.1672821044921875, "rewards/rejected": 0.4911883771419525, "step": 130 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.657209873199463, "logits/rejected": -2.620845079421997, "logps/chosen": -98.81898498535156, "logps/rejected": -91.02754974365234, "loss": 0.6596, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.8377985954284668, "rewards/margins": 0.049154218286275864, "rewards/rejected": 0.7886443138122559, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.232142857142857e-07, "logits/chosen": -2.594756603240967, "logits/rejected": -2.5098514556884766, "logps/chosen": -108.9326171875, "logps/rejected": -124.50955963134766, "loss": 0.6063, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.6377179622650146, "rewards/margins": 0.7904380559921265, "rewards/rejected": -0.15272006392478943, "step": 150 }, { "epoch": 0.05, "learning_rate": 2.3809523809523806e-07, "logits/chosen": -2.5515310764312744, "logits/rejected": -2.4522361755371094, "logps/chosen": -90.93934631347656, "logps/rejected": -106.53071594238281, "loss": 0.6199, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.8264306783676147, "rewards/margins": 0.5799387097358704, "rewards/rejected": 0.246491938829422, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.5297619047619046e-07, "logits/chosen": -2.511021137237549, "logits/rejected": -2.5456349849700928, "logps/chosen": -91.14982604980469, "logps/rejected": -99.70429992675781, "loss": 0.6079, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.7046107649803162, "rewards/margins": 0.36221450567245483, "rewards/rejected": 0.34239625930786133, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.6785714285714284e-07, "logits/chosen": -2.520282030105591, "logits/rejected": -2.503950595855713, "logps/chosen": -79.16224670410156, "logps/rejected": -89.08283233642578, "loss": 0.6324, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.8210590481758118, "rewards/margins": 0.16991613805294037, "rewards/rejected": 0.651142954826355, "step": 180 }, { "epoch": 0.06, "learning_rate": 2.827380952380952e-07, "logits/chosen": -2.6823697090148926, "logits/rejected": -2.633678674697876, "logps/chosen": -104.0126724243164, "logps/rejected": -103.51971435546875, "loss": 0.5904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.1408202648162842, "rewards/margins": 0.2863886058330536, "rewards/rejected": 0.8544318079948425, "step": 190 }, { "epoch": 0.06, "learning_rate": 2.976190476190476e-07, "logits/chosen": -2.530428409576416, "logits/rejected": -2.50227689743042, "logps/chosen": -100.63572692871094, "logps/rejected": -94.46806335449219, "loss": 0.6018, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.7896903157234192, "rewards/margins": 0.45959681272506714, "rewards/rejected": 0.33009350299835205, "step": 200 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.4940271377563477, "logits/rejected": -2.5085806846618652, "logps/chosen": -92.1917724609375, "logps/rejected": -107.3184585571289, "loss": 0.5868, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.7482628226280212, "rewards/margins": 0.49973025918006897, "rewards/rejected": 0.24853253364562988, "step": 210 }, { "epoch": 0.07, "learning_rate": 3.2738095238095235e-07, "logits/chosen": -2.5470972061157227, "logits/rejected": -2.5241191387176514, "logps/chosen": -113.54488372802734, "logps/rejected": -129.91867065429688, "loss": 0.5871, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4822530746459961, "rewards/margins": 0.5863619446754456, "rewards/rejected": -0.10410883277654648, "step": 220 }, { "epoch": 0.07, "learning_rate": 3.4226190476190473e-07, "logits/chosen": -2.5854454040527344, "logits/rejected": -2.427126169204712, "logps/chosen": -95.35980987548828, "logps/rejected": -81.82037353515625, "loss": 0.6183, "rewards/accuracies": 0.75, "rewards/chosen": 0.8959482908248901, "rewards/margins": 0.8998041152954102, "rewards/rejected": -0.0038558482192456722, "step": 230 }, { "epoch": 0.07, "learning_rate": 3.5714285714285716e-07, "logits/chosen": -2.5749735832214355, "logits/rejected": -2.58799409866333, "logps/chosen": -76.01658630371094, "logps/rejected": -77.50577545166016, "loss": 0.6595, "rewards/accuracies": 0.625, "rewards/chosen": 0.28254395723342896, "rewards/margins": 0.4179397523403168, "rewards/rejected": -0.13539579510688782, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.7202380952380953e-07, "logits/chosen": -2.655733823776245, "logits/rejected": -2.6001226902008057, "logps/chosen": -112.2961654663086, "logps/rejected": -124.30081939697266, "loss": 0.5967, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.081606388092041, "rewards/margins": 0.3873857855796814, "rewards/rejected": 0.6942206025123596, "step": 250 }, { "epoch": 0.08, "learning_rate": 3.869047619047619e-07, "logits/chosen": -2.3797781467437744, "logits/rejected": -2.3257176876068115, "logps/chosen": -100.49422454833984, "logps/rejected": -116.31571197509766, "loss": 0.5687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.26938995718955994, "rewards/margins": 0.5422745943069458, "rewards/rejected": -0.27288463711738586, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.017857142857143e-07, "logits/chosen": -2.506838321685791, "logits/rejected": -2.5618858337402344, "logps/chosen": -103.68598937988281, "logps/rejected": -116.80242919921875, "loss": 0.6466, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9570896029472351, "rewards/margins": 0.3445149064064026, "rewards/rejected": 0.6125746965408325, "step": 270 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.5634925365448, "logits/rejected": -2.520244836807251, "logps/chosen": -102.6960678100586, "logps/rejected": -90.80632019042969, "loss": 0.5996, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.0112148523330688, "rewards/margins": 0.3889988362789154, "rewards/rejected": 0.6222161054611206, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.3154761904761904e-07, "logits/chosen": -2.569206714630127, "logits/rejected": -2.5652623176574707, "logps/chosen": -85.24828338623047, "logps/rejected": -93.45872497558594, "loss": 0.5347, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.3659771978855133, "rewards/margins": 0.7742798924446106, "rewards/rejected": -0.4083026945590973, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.464285714285714e-07, "logits/chosen": -2.363185167312622, "logits/rejected": -2.371516227722168, "logps/chosen": -99.2336654663086, "logps/rejected": -92.32693481445312, "loss": 0.5878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7069708704948425, "rewards/margins": 0.8502944111824036, "rewards/rejected": -0.1433234065771103, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.613095238095238e-07, "logits/chosen": -2.412259578704834, "logits/rejected": -2.4086456298828125, "logps/chosen": -96.43733978271484, "logps/rejected": -120.0870590209961, "loss": 0.5642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19554999470710754, "rewards/margins": 1.321014404296875, "rewards/rejected": -1.1254642009735107, "step": 310 }, { "epoch": 0.1, "learning_rate": 4.761904761904761e-07, "logits/chosen": -2.6165080070495605, "logits/rejected": -2.6191306114196777, "logps/chosen": -117.46064758300781, "logps/rejected": -122.75732421875, "loss": 0.5508, "rewards/accuracies": 0.625, "rewards/chosen": -0.7428444623947144, "rewards/margins": 0.3669503331184387, "rewards/rejected": -1.1097948551177979, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.910714285714285e-07, "logits/chosen": -2.493110179901123, "logits/rejected": -2.4452643394470215, "logps/chosen": -91.34004211425781, "logps/rejected": -103.17684173583984, "loss": 0.5986, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6157582402229309, "rewards/margins": 0.7595478892326355, "rewards/rejected": -1.375306248664856, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.993377483443708e-07, "logits/chosen": -2.547645092010498, "logits/rejected": -2.4399895668029785, "logps/chosen": -106.4365005493164, "logps/rejected": -109.07222747802734, "loss": 0.5639, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.36611366271972656, "rewards/margins": 0.6897183060646057, "rewards/rejected": -1.0558319091796875, "step": 340 }, { "epoch": 0.1, "learning_rate": 4.97682119205298e-07, "logits/chosen": -2.5453834533691406, "logits/rejected": -2.5119881629943848, "logps/chosen": -108.45722961425781, "logps/rejected": -105.61241149902344, "loss": 0.5994, "rewards/accuracies": 0.625, "rewards/chosen": 0.21297264099121094, "rewards/margins": 0.35165560245513916, "rewards/rejected": -0.13868291676044464, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.960264900662251e-07, "logits/chosen": -2.568861484527588, "logits/rejected": -2.552140712738037, "logps/chosen": -99.7040786743164, "logps/rejected": -109.383544921875, "loss": 0.5401, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2864856421947479, "rewards/margins": 0.8699267506599426, "rewards/rejected": -1.1564123630523682, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.943708609271523e-07, "logits/chosen": -2.584989070892334, "logits/rejected": -2.524940013885498, "logps/chosen": -116.22591400146484, "logps/rejected": -132.27352905273438, "loss": 0.5816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3508208990097046, "rewards/margins": 0.757738471031189, "rewards/rejected": -1.1085593700408936, "step": 370 }, { "epoch": 0.11, "learning_rate": 4.927152317880794e-07, "logits/chosen": -2.5064499378204346, "logits/rejected": -2.520719528198242, "logps/chosen": -105.9725570678711, "logps/rejected": -106.05126953125, "loss": 0.6476, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.491422414779663, "rewards/margins": 0.45032089948654175, "rewards/rejected": -1.9417431354522705, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.910596026490066e-07, "logits/chosen": -2.4913430213928223, "logits/rejected": -2.5125203132629395, "logps/chosen": -124.0137710571289, "logps/rejected": -119.0078353881836, "loss": 0.6202, "rewards/accuracies": 0.75, "rewards/chosen": -1.6193113327026367, "rewards/margins": 0.656644880771637, "rewards/rejected": -2.275956392288208, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.894039735099338e-07, "logits/chosen": -2.5196266174316406, "logits/rejected": -2.492640256881714, "logps/chosen": -108.40077209472656, "logps/rejected": -106.96036529541016, "loss": 0.5793, "rewards/accuracies": 0.625, "rewards/chosen": -0.8650729060173035, "rewards/margins": 0.4100722372531891, "rewards/rejected": -1.275145173072815, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.877483443708609e-07, "logits/chosen": -2.3113367557525635, "logits/rejected": -2.363025426864624, "logps/chosen": -108.32320404052734, "logps/rejected": -96.14768981933594, "loss": 1.0008, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.168811559677124, "rewards/margins": -1.5380103588104248, "rewards/rejected": -0.6308012008666992, "step": 410 }, { "epoch": 0.13, "learning_rate": 4.860927152317881e-07, "logits/chosen": -2.2521350383758545, "logits/rejected": -2.2686538696289062, "logps/chosen": -78.05595397949219, "logps/rejected": -93.2776107788086, "loss": 0.5595, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5212607979774475, "rewards/margins": 0.686114490032196, "rewards/rejected": -1.207375407218933, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.844370860927152e-07, "logits/chosen": -2.2812628746032715, "logits/rejected": -2.29258394241333, "logps/chosen": -128.2143096923828, "logps/rejected": -135.92117309570312, "loss": 0.5525, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4639101028442383, "rewards/margins": 1.0309460163116455, "rewards/rejected": -3.494856595993042, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.827814569536423e-07, "logits/chosen": -2.3497612476348877, "logits/rejected": -2.259904384613037, "logps/chosen": -126.2747802734375, "logps/rejected": -132.0948944091797, "loss": 0.5087, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5596282482147217, "rewards/margins": 1.1223429441452026, "rewards/rejected": -2.681971549987793, "step": 440 }, { "epoch": 0.13, "learning_rate": 4.811258278145695e-07, "logits/chosen": -2.3696093559265137, "logits/rejected": -2.355694055557251, "logps/chosen": -113.27628326416016, "logps/rejected": -120.5525131225586, "loss": 0.5239, "rewards/accuracies": 0.625, "rewards/chosen": -0.5899262428283691, "rewards/margins": 0.7907289266586304, "rewards/rejected": -1.380655288696289, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.794701986754966e-07, "logits/chosen": -2.4090988636016846, "logits/rejected": -2.4314303398132324, "logps/chosen": -119.7711410522461, "logps/rejected": -138.52122497558594, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2273411750793457, "rewards/margins": 1.0350992679595947, "rewards/rejected": -2.2624402046203613, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.778145695364238e-07, "logits/chosen": -2.414658546447754, "logits/rejected": -2.4013447761535645, "logps/chosen": -101.0434799194336, "logps/rejected": -102.90351867675781, "loss": 0.5651, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9459658861160278, "rewards/margins": 0.6103629469871521, "rewards/rejected": -1.5563287734985352, "step": 470 }, { "epoch": 0.14, "learning_rate": 4.76158940397351e-07, "logits/chosen": -2.42374849319458, "logits/rejected": -2.4381699562072754, "logps/chosen": -113.9575424194336, "logps/rejected": -121.03520202636719, "loss": 0.5268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7829158902168274, "rewards/margins": 1.2523859739303589, "rewards/rejected": -2.035301923751831, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.7450331125827815e-07, "logits/chosen": -2.4486849308013916, "logits/rejected": -2.4538803100585938, "logps/chosen": -97.44860076904297, "logps/rejected": -100.29484558105469, "loss": 0.5659, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2252352237701416, "rewards/margins": 0.4965124726295471, "rewards/rejected": -0.7217476963996887, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.728476821192053e-07, "logits/chosen": -2.4106860160827637, "logits/rejected": -2.477334499359131, "logps/chosen": -87.63328552246094, "logps/rejected": -96.80977630615234, "loss": 0.5598, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022742483764886856, "rewards/margins": 0.6748077273368835, "rewards/rejected": -0.6520652174949646, "step": 500 }, { "epoch": 0.15, "eval_logits/chosen": -2.339754343032837, "eval_logits/rejected": -2.2989299297332764, "eval_logps/chosen": -104.51243591308594, "eval_logps/rejected": -112.7801513671875, "eval_loss": 0.6348409652709961, "eval_rewards/accuracies": 0.7120535969734192, "eval_rewards/chosen": -0.46458080410957336, "eval_rewards/margins": 1.1086541414260864, "eval_rewards/rejected": -1.5732349157333374, "eval_runtime": 528.3305, "eval_samples_per_second": 3.38, "eval_steps_per_second": 0.106, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.7119205298013243e-07, "logits/chosen": -2.3279285430908203, "logits/rejected": -2.2698190212249756, "logps/chosen": -91.65203094482422, "logps/rejected": -111.75373840332031, "loss": 0.5584, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5981258153915405, "rewards/margins": 1.0291321277618408, "rewards/rejected": -1.6272579431533813, "step": 510 }, { "epoch": 0.15, "learning_rate": 4.6953642384105957e-07, "logits/chosen": -2.432509183883667, "logits/rejected": -2.4649786949157715, "logps/chosen": -113.98470306396484, "logps/rejected": -131.01609802246094, "loss": 0.5481, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3087894320487976, "rewards/margins": 1.156360387802124, "rewards/rejected": -1.4651498794555664, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.678807947019867e-07, "logits/chosen": -2.4623587131500244, "logits/rejected": -2.406970500946045, "logps/chosen": -111.83284759521484, "logps/rejected": -117.27791595458984, "loss": 0.5231, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3450089991092682, "rewards/margins": 1.3268024921417236, "rewards/rejected": -1.6718114614486694, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.662251655629139e-07, "logits/chosen": -2.434732675552368, "logits/rejected": -2.482849597930908, "logps/chosen": -82.09310150146484, "logps/rejected": -113.52314758300781, "loss": 0.5046, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10573047399520874, "rewards/margins": 1.0775012969970703, "rewards/rejected": -1.1832319498062134, "step": 540 }, { "epoch": 0.16, "learning_rate": 4.6456953642384104e-07, "logits/chosen": -2.495922565460205, "logits/rejected": -2.432220935821533, "logps/chosen": -123.1400375366211, "logps/rejected": -111.23506164550781, "loss": 1.2805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4564870297908783, "rewards/margins": 0.5551273226737976, "rewards/rejected": -1.0116143226623535, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.629139072847682e-07, "logits/chosen": -2.3615145683288574, "logits/rejected": -2.3742241859436035, "logps/chosen": -128.84971618652344, "logps/rejected": -140.29312133789062, "loss": 1.2493, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.166237235069275, "rewards/margins": 1.4496667385101318, "rewards/rejected": -2.615903854370117, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.612582781456953e-07, "logits/chosen": -2.471628189086914, "logits/rejected": -2.407003164291382, "logps/chosen": -106.4498291015625, "logps/rejected": -119.580078125, "loss": 0.4833, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.251348614692688, "rewards/margins": 1.0506912469863892, "rewards/rejected": -2.302039623260498, "step": 570 }, { "epoch": 0.17, "learning_rate": 4.596026490066225e-07, "logits/chosen": -2.3577880859375, "logits/rejected": -2.3710594177246094, "logps/chosen": -109.6436996459961, "logps/rejected": -111.36781311035156, "loss": 0.6501, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1875368356704712, "rewards/margins": 0.8005573153495789, "rewards/rejected": -1.9880939722061157, "step": 580 }, { "epoch": 0.18, "learning_rate": 4.5794701986754965e-07, "logits/chosen": -2.3025927543640137, "logits/rejected": -2.412416934967041, "logps/chosen": -92.57754516601562, "logps/rejected": -125.9276123046875, "loss": 0.6228, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1066559553146362, "rewards/margins": 0.5373567938804626, "rewards/rejected": -1.6440128087997437, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.562913907284768e-07, "logits/chosen": -2.315936326980591, "logits/rejected": -2.264455556869507, "logps/chosen": -111.17767333984375, "logps/rejected": -124.8282699584961, "loss": 0.5129, "rewards/accuracies": 0.75, "rewards/chosen": -0.9577595591545105, "rewards/margins": 1.1823832988739014, "rewards/rejected": -2.1401429176330566, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.54635761589404e-07, "logits/chosen": -2.4507012367248535, "logits/rejected": -2.402617931365967, "logps/chosen": -112.44432067871094, "logps/rejected": -117.4054946899414, "loss": 0.7353, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0404579639434814, "rewards/margins": 0.9835718870162964, "rewards/rejected": -2.0240299701690674, "step": 610 }, { "epoch": 0.18, "learning_rate": 4.5298013245033113e-07, "logits/chosen": -2.410632610321045, "logits/rejected": -2.4103057384490967, "logps/chosen": -102.71327209472656, "logps/rejected": -118.40677642822266, "loss": 0.496, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.740358829498291, "rewards/margins": 1.1352561712265015, "rewards/rejected": -1.875615119934082, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.5132450331125827e-07, "logits/chosen": -2.2251460552215576, "logits/rejected": -2.2362751960754395, "logps/chosen": -106.41926574707031, "logps/rejected": -108.68377685546875, "loss": 0.9923, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0975613594055176, "rewards/margins": 1.4764889478683472, "rewards/rejected": -2.5740504264831543, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.496688741721854e-07, "logits/chosen": -2.2884135246276855, "logits/rejected": -2.3148610591888428, "logps/chosen": -95.29539489746094, "logps/rejected": -102.16908264160156, "loss": 0.6218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0151549577713013, "rewards/margins": 0.7811011075973511, "rewards/rejected": -1.7962560653686523, "step": 640 }, { "epoch": 0.19, "learning_rate": 4.4801324503311255e-07, "logits/chosen": -2.266324520111084, "logits/rejected": -2.1928133964538574, "logps/chosen": -94.09014892578125, "logps/rejected": -102.40970611572266, "loss": 0.525, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.572268009185791, "rewards/margins": 0.8905662298202515, "rewards/rejected": -1.462834358215332, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.463576158940397e-07, "logits/chosen": -2.2293038368225098, "logits/rejected": -2.1520204544067383, "logps/chosen": -119.3239974975586, "logps/rejected": -125.38603210449219, "loss": 0.7517, "rewards/accuracies": 0.75, "rewards/chosen": -0.5861669182777405, "rewards/margins": 1.747180700302124, "rewards/rejected": -2.333347797393799, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.4470198675496683e-07, "logits/chosen": -2.3797130584716797, "logits/rejected": -2.3224523067474365, "logps/chosen": -103.2835922241211, "logps/rejected": -110.06852722167969, "loss": 0.6595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6205762028694153, "rewards/margins": 0.7353760600090027, "rewards/rejected": -1.355952262878418, "step": 670 }, { "epoch": 0.2, "learning_rate": 4.43046357615894e-07, "logits/chosen": -2.286005973815918, "logits/rejected": -2.243605375289917, "logps/chosen": -122.601806640625, "logps/rejected": -152.8876190185547, "loss": 0.4932, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.38620439171791077, "rewards/margins": 1.5684607028961182, "rewards/rejected": -1.9546654224395752, "step": 680 }, { "epoch": 0.21, "learning_rate": 4.4139072847682116e-07, "logits/chosen": -2.2788243293762207, "logits/rejected": -2.3132455348968506, "logps/chosen": -109.7626724243164, "logps/rejected": -121.210693359375, "loss": 0.5107, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1420578509569168, "rewards/margins": 1.0154675245285034, "rewards/rejected": -1.1575253009796143, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.397350993377483e-07, "logits/chosen": -2.1815805435180664, "logits/rejected": -2.2088842391967773, "logps/chosen": -97.82100677490234, "logps/rejected": -110.3985595703125, "loss": 0.5536, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8147605657577515, "rewards/margins": 1.0990091562271118, "rewards/rejected": -1.9137697219848633, "step": 700 }, { "epoch": 0.21, "learning_rate": 4.380794701986755e-07, "logits/chosen": -2.1567564010620117, "logits/rejected": -2.213163375854492, "logps/chosen": -88.54952239990234, "logps/rejected": -115.46138000488281, "loss": 0.5308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8950099945068359, "rewards/margins": 0.986484169960022, "rewards/rejected": -1.881494164466858, "step": 710 }, { "epoch": 0.21, "learning_rate": 4.3642384105960263e-07, "logits/chosen": -2.195145845413208, "logits/rejected": -2.1583914756774902, "logps/chosen": -89.95973205566406, "logps/rejected": -90.32757568359375, "loss": 0.5371, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1613867282867432, "rewards/margins": 1.2194865942001343, "rewards/rejected": -2.380873203277588, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.347682119205298e-07, "logits/chosen": -2.157541513442993, "logits/rejected": -2.0622384548187256, "logps/chosen": -122.03218078613281, "logps/rejected": -133.05084228515625, "loss": 0.4753, "rewards/accuracies": 0.75, "rewards/chosen": -1.2253652811050415, "rewards/margins": 1.0726807117462158, "rewards/rejected": -2.2980456352233887, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.3311258278145697e-07, "logits/chosen": -2.2362678050994873, "logits/rejected": -2.2174267768859863, "logps/chosen": -104.6390151977539, "logps/rejected": -108.64559173583984, "loss": 0.5309, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9815710186958313, "rewards/margins": 0.686564564704895, "rewards/rejected": -1.6681352853775024, "step": 740 }, { "epoch": 0.22, "learning_rate": 4.314569536423841e-07, "logits/chosen": -2.3051602840423584, "logits/rejected": -2.205004930496216, "logps/chosen": -112.1572494506836, "logps/rejected": -115.80439758300781, "loss": 3.3956, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1993415355682373, "rewards/margins": 0.9256394505500793, "rewards/rejected": -2.124980926513672, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.2980132450331125e-07, "logits/chosen": -2.1093502044677734, "logits/rejected": -2.1304099559783936, "logps/chosen": -101.59135437011719, "logps/rejected": -121.8282241821289, "loss": 0.6244, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.4122694730758667, "rewards/margins": 1.0248304605484009, "rewards/rejected": -2.4371001720428467, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.281456953642384e-07, "logits/chosen": -2.1925549507141113, "logits/rejected": -2.2341551780700684, "logps/chosen": -125.73774719238281, "logps/rejected": -137.68995666503906, "loss": 0.5342, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3582961559295654, "rewards/margins": 1.1268060207366943, "rewards/rejected": -2.4851021766662598, "step": 770 }, { "epoch": 0.23, "learning_rate": 4.2649006622516553e-07, "logits/chosen": -2.1826648712158203, "logits/rejected": -2.0866520404815674, "logps/chosen": -112.77205657958984, "logps/rejected": -135.73634338378906, "loss": 0.6883, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9496241807937622, "rewards/margins": 2.8248558044433594, "rewards/rejected": -3.774479627609253, "step": 780 }, { "epoch": 0.24, "learning_rate": 4.2483443708609267e-07, "logits/chosen": -2.2106716632843018, "logits/rejected": -2.2418696880340576, "logps/chosen": -100.58189392089844, "logps/rejected": -122.01805114746094, "loss": 0.4801, "rewards/accuracies": 0.75, "rewards/chosen": -1.5530904531478882, "rewards/margins": 1.249182939529419, "rewards/rejected": -2.802273750305176, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.231788079470198e-07, "logits/chosen": -2.1691789627075195, "logits/rejected": -2.082367181777954, "logps/chosen": -100.97856903076172, "logps/rejected": -102.23161315917969, "loss": 0.5207, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1863991022109985, "rewards/margins": 1.5210940837860107, "rewards/rejected": -2.707493305206299, "step": 800 }, { "epoch": 0.24, "learning_rate": 4.21523178807947e-07, "logits/chosen": -2.321969985961914, "logits/rejected": -2.2945773601531982, "logps/chosen": -95.80015563964844, "logps/rejected": -103.98514556884766, "loss": 0.5769, "rewards/accuracies": 0.75, "rewards/chosen": -0.6277263760566711, "rewards/margins": 1.0614575147628784, "rewards/rejected": -1.6891838312149048, "step": 810 }, { "epoch": 0.24, "learning_rate": 4.1986754966887414e-07, "logits/chosen": -2.16201114654541, "logits/rejected": -2.100698471069336, "logps/chosen": -107.64762878417969, "logps/rejected": -114.20783996582031, "loss": 0.5842, "rewards/accuracies": 0.75, "rewards/chosen": -1.0765411853790283, "rewards/margins": 0.9841095209121704, "rewards/rejected": -2.060650587081909, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.1821192052980133e-07, "logits/chosen": -2.15731143951416, "logits/rejected": -2.1200685501098633, "logps/chosen": -94.93736267089844, "logps/rejected": -108.20992279052734, "loss": 0.502, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5728567838668823, "rewards/margins": 1.2192682027816772, "rewards/rejected": -1.7921253442764282, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.165562913907285e-07, "logits/chosen": -2.228494644165039, "logits/rejected": -2.199162006378174, "logps/chosen": -119.44285583496094, "logps/rejected": -124.89945220947266, "loss": 0.5335, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5043415427207947, "rewards/margins": 1.4671887159347534, "rewards/rejected": -1.9715303182601929, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.149006622516556e-07, "logits/chosen": -2.242833137512207, "logits/rejected": -2.193368673324585, "logps/chosen": -106.42388916015625, "logps/rejected": -115.7519302368164, "loss": 0.5458, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2504803538322449, "rewards/margins": 1.4816687107086182, "rewards/rejected": -1.7321488857269287, "step": 850 }, { "epoch": 0.26, "learning_rate": 4.1324503311258276e-07, "logits/chosen": -2.296274185180664, "logits/rejected": -2.233081340789795, "logps/chosen": -97.89036560058594, "logps/rejected": -118.38981628417969, "loss": 0.6251, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.856580376625061, "rewards/margins": 1.5226026773452759, "rewards/rejected": -2.379183053970337, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.1158940397350995e-07, "logits/chosen": -2.2974660396575928, "logits/rejected": -2.1640889644622803, "logps/chosen": -111.53731536865234, "logps/rejected": -109.1888656616211, "loss": 0.4891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9736050367355347, "rewards/margins": 1.2244486808776855, "rewards/rejected": -2.1980538368225098, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.099337748344371e-07, "logits/chosen": -2.130094289779663, "logits/rejected": -2.0237298011779785, "logps/chosen": -116.61064147949219, "logps/rejected": -123.98744201660156, "loss": 0.9585, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.352774143218994, "rewards/margins": 1.3028422594070435, "rewards/rejected": -3.6556167602539062, "step": 880 }, { "epoch": 0.27, "learning_rate": 4.0827814569536423e-07, "logits/chosen": -2.1122946739196777, "logits/rejected": -2.1758933067321777, "logps/chosen": -92.36100769042969, "logps/rejected": -117.257080078125, "loss": 0.5243, "rewards/accuracies": 0.625, "rewards/chosen": -1.1455414295196533, "rewards/margins": 1.1456917524337769, "rewards/rejected": -2.291233539581299, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.0662251655629137e-07, "logits/chosen": -2.1967172622680664, "logits/rejected": -2.163334369659424, "logps/chosen": -94.69267272949219, "logps/rejected": -106.30582427978516, "loss": 0.5839, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9426735043525696, "rewards/margins": 0.8261833190917969, "rewards/rejected": -1.7688567638397217, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.049668874172185e-07, "logits/chosen": -2.2559750080108643, "logits/rejected": -2.2480287551879883, "logps/chosen": -114.98677825927734, "logps/rejected": -118.00787353515625, "loss": 0.6499, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.6318261623382568, "rewards/margins": 0.813581645488739, "rewards/rejected": -2.4454076290130615, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.0331125827814565e-07, "logits/chosen": -2.277569055557251, "logits/rejected": -2.2428252696990967, "logps/chosen": -106.87760162353516, "logps/rejected": -107.15045166015625, "loss": 0.7337, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2901384830474854, "rewards/margins": 0.8376191854476929, "rewards/rejected": -2.1277577877044678, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.016556291390728e-07, "logits/chosen": -2.2305266857147217, "logits/rejected": -2.2446939945220947, "logps/chosen": -115.1706314086914, "logps/rejected": -132.69129943847656, "loss": 0.5205, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4605052471160889, "rewards/margins": 1.3340156078338623, "rewards/rejected": -2.794520854949951, "step": 930 }, { "epoch": 0.28, "learning_rate": 4e-07, "logits/chosen": -2.3378500938415527, "logits/rejected": -2.1980865001678467, "logps/chosen": -124.11688232421875, "logps/rejected": -121.2197494506836, "loss": 0.5762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1835664510726929, "rewards/margins": 1.0963947772979736, "rewards/rejected": -2.279961109161377, "step": 940 }, { "epoch": 0.28, "learning_rate": 3.983443708609271e-07, "logits/chosen": -2.2236156463623047, "logits/rejected": -2.2054903507232666, "logps/chosen": -122.0257568359375, "logps/rejected": -125.471923828125, "loss": 0.4677, "rewards/accuracies": 0.75, "rewards/chosen": -1.3710222244262695, "rewards/margins": 1.238471269607544, "rewards/rejected": -2.6094937324523926, "step": 950 }, { "epoch": 0.29, "learning_rate": 3.966887417218543e-07, "logits/chosen": -2.2760846614837646, "logits/rejected": -2.2383294105529785, "logps/chosen": -104.09146881103516, "logps/rejected": -120.87336730957031, "loss": 0.5848, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4301923513412476, "rewards/margins": 1.626868486404419, "rewards/rejected": -3.0570602416992188, "step": 960 }, { "epoch": 0.29, "learning_rate": 3.9503311258278146e-07, "logits/chosen": -2.304551839828491, "logits/rejected": -2.3333609104156494, "logps/chosen": -119.12831115722656, "logps/rejected": -128.80160522460938, "loss": 0.555, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5498030185699463, "rewards/margins": 0.8728634715080261, "rewards/rejected": -3.422666072845459, "step": 970 }, { "epoch": 0.29, "learning_rate": 3.933774834437086e-07, "logits/chosen": -2.2905325889587402, "logits/rejected": -2.175750255584717, "logps/chosen": -111.89952087402344, "logps/rejected": -112.72969055175781, "loss": 0.5745, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.818068742752075, "rewards/margins": 0.5174419283866882, "rewards/rejected": -3.335510730743408, "step": 980 }, { "epoch": 0.29, "learning_rate": 3.9172185430463574e-07, "logits/chosen": -2.3529715538024902, "logits/rejected": -2.2983202934265137, "logps/chosen": -136.7278594970703, "logps/rejected": -129.16085815429688, "loss": 0.5891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4263627529144287, "rewards/margins": 0.8253445625305176, "rewards/rejected": -3.2517075538635254, "step": 990 }, { "epoch": 0.3, "learning_rate": 3.9006622516556293e-07, "logits/chosen": -2.2374019622802734, "logits/rejected": -2.2284903526306152, "logps/chosen": -114.3366470336914, "logps/rejected": -110.65074157714844, "loss": 0.6708, "rewards/accuracies": 0.625, "rewards/chosen": -2.3343544006347656, "rewards/margins": 1.0057871341705322, "rewards/rejected": -3.340141773223877, "step": 1000 }, { "epoch": 0.3, "eval_logits/chosen": -2.2622616291046143, "eval_logits/rejected": -2.215507745742798, "eval_logps/chosen": -119.37470245361328, "eval_logps/rejected": -125.0894546508789, "eval_loss": 0.5807133316993713, "eval_rewards/accuracies": 0.6830357313156128, "eval_rewards/chosen": -1.950809121131897, "eval_rewards/margins": 0.8533560633659363, "eval_rewards/rejected": -2.8041651248931885, "eval_runtime": 520.9457, "eval_samples_per_second": 3.428, "eval_steps_per_second": 0.107, "step": 1000 }, { "epoch": 0.3, "learning_rate": 3.8841059602649007e-07, "logits/chosen": -2.4846906661987305, "logits/rejected": -2.38966703414917, "logps/chosen": -122.82658386230469, "logps/rejected": -122.37986755371094, "loss": 0.5429, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5582962036132812, "rewards/margins": 0.7036358118057251, "rewards/rejected": -2.261931896209717, "step": 1010 }, { "epoch": 0.3, "learning_rate": 3.867549668874172e-07, "logits/chosen": -2.413020133972168, "logits/rejected": -2.348389148712158, "logps/chosen": -146.7459716796875, "logps/rejected": -152.81591796875, "loss": 0.5503, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9320647716522217, "rewards/margins": 1.321993112564087, "rewards/rejected": -3.2540581226348877, "step": 1020 }, { "epoch": 0.31, "learning_rate": 3.8509933774834435e-07, "logits/chosen": -2.335376262664795, "logits/rejected": -2.3727335929870605, "logps/chosen": -96.5339584350586, "logps/rejected": -102.97718811035156, "loss": 0.4738, "rewards/accuracies": 0.625, "rewards/chosen": -1.347673773765564, "rewards/margins": 0.7935667037963867, "rewards/rejected": -2.141240358352661, "step": 1030 }, { "epoch": 0.31, "learning_rate": 3.834437086092715e-07, "logits/chosen": -2.474375009536743, "logits/rejected": -2.457411527633667, "logps/chosen": -100.12342071533203, "logps/rejected": -98.28324890136719, "loss": 0.5072, "rewards/accuracies": 0.75, "rewards/chosen": -0.6612989902496338, "rewards/margins": 1.0141090154647827, "rewards/rejected": -1.675408124923706, "step": 1040 }, { "epoch": 0.31, "learning_rate": 3.8178807947019863e-07, "logits/chosen": -2.4102184772491455, "logits/rejected": -2.366565704345703, "logps/chosen": -94.41053771972656, "logps/rejected": -106.40338134765625, "loss": 0.4768, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1945335865020752, "rewards/margins": 1.7779722213745117, "rewards/rejected": -2.972505807876587, "step": 1050 }, { "epoch": 0.32, "learning_rate": 3.8013245033112577e-07, "logits/chosen": -2.324781656265259, "logits/rejected": -2.265265703201294, "logps/chosen": -113.0925064086914, "logps/rejected": -116.36458587646484, "loss": 0.5291, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6215614080429077, "rewards/margins": 0.970362663269043, "rewards/rejected": -1.5919239521026611, "step": 1060 }, { "epoch": 0.32, "learning_rate": 3.7847682119205296e-07, "logits/chosen": -2.4248404502868652, "logits/rejected": -2.3727540969848633, "logps/chosen": -112.99056243896484, "logps/rejected": -124.29933166503906, "loss": 1.2347, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.332558035850525, "rewards/margins": 1.7028182744979858, "rewards/rejected": -3.0353763103485107, "step": 1070 }, { "epoch": 0.32, "learning_rate": 3.7682119205298016e-07, "logits/chosen": -2.510585308074951, "logits/rejected": -2.4303643703460693, "logps/chosen": -122.59515380859375, "logps/rejected": -119.64692687988281, "loss": 0.5815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8100630044937134, "rewards/margins": 0.8500891923904419, "rewards/rejected": -2.660151958465576, "step": 1080 }, { "epoch": 0.32, "learning_rate": 3.751655629139073e-07, "logits/chosen": -2.48645281791687, "logits/rejected": -2.433279037475586, "logps/chosen": -131.58583068847656, "logps/rejected": -139.4903106689453, "loss": 0.4595, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.738173484802246, "rewards/margins": 0.9940687417984009, "rewards/rejected": -2.7322418689727783, "step": 1090 }, { "epoch": 0.33, "learning_rate": 3.7350993377483444e-07, "logits/chosen": -2.2750325202941895, "logits/rejected": -2.214141845703125, "logps/chosen": -92.43232727050781, "logps/rejected": -118.48176574707031, "loss": 0.4501, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.566083312034607, "rewards/margins": 1.913888931274414, "rewards/rejected": -3.4799721240997314, "step": 1100 }, { "epoch": 0.33, "learning_rate": 3.718543046357616e-07, "logits/chosen": -2.3589186668395996, "logits/rejected": -2.289020538330078, "logps/chosen": -116.14213562011719, "logps/rejected": -115.25, "loss": 0.5489, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8134400844573975, "rewards/margins": 1.0903173685073853, "rewards/rejected": -2.9037575721740723, "step": 1110 }, { "epoch": 0.33, "learning_rate": 3.701986754966887e-07, "logits/chosen": -2.4153926372528076, "logits/rejected": -2.38564133644104, "logps/chosen": -198.99185180664062, "logps/rejected": -211.7269744873047, "loss": 0.4915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -8.85645580291748, "rewards/margins": 1.3634490966796875, "rewards/rejected": -10.219904899597168, "step": 1120 }, { "epoch": 0.34, "learning_rate": 3.6854304635761586e-07, "logits/chosen": -2.3718574047088623, "logits/rejected": -2.323935031890869, "logps/chosen": -114.41487121582031, "logps/rejected": -115.03157806396484, "loss": 0.5742, "rewards/accuracies": 0.625, "rewards/chosen": -1.747230887413025, "rewards/margins": 0.9782400131225586, "rewards/rejected": -2.725471019744873, "step": 1130 }, { "epoch": 0.34, "learning_rate": 3.6688741721854305e-07, "logits/chosen": -2.266796350479126, "logits/rejected": -2.279444456100464, "logps/chosen": -125.43962097167969, "logps/rejected": -138.60568237304688, "loss": 0.5559, "rewards/accuracies": 0.5, "rewards/chosen": -1.7848097085952759, "rewards/margins": 0.5874557495117188, "rewards/rejected": -2.372265577316284, "step": 1140 }, { "epoch": 0.34, "learning_rate": 3.652317880794702e-07, "logits/chosen": -2.3460640907287598, "logits/rejected": -2.2017135620117188, "logps/chosen": -173.7471923828125, "logps/rejected": -175.39913940429688, "loss": 0.6409, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -8.320539474487305, "rewards/margins": 0.5577089190483093, "rewards/rejected": -8.87824821472168, "step": 1150 }, { "epoch": 0.35, "learning_rate": 3.6357615894039733e-07, "logits/chosen": -2.3931944370269775, "logits/rejected": -2.295135498046875, "logps/chosen": -117.7610855102539, "logps/rejected": -131.86878967285156, "loss": 0.5557, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.364652395248413, "rewards/margins": 0.9879738092422485, "rewards/rejected": -2.352626323699951, "step": 1160 }, { "epoch": 0.35, "learning_rate": 3.6192052980132447e-07, "logits/chosen": -2.3470005989074707, "logits/rejected": -2.314392328262329, "logps/chosen": -116.54869079589844, "logps/rejected": -121.33402252197266, "loss": 0.474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6547797918319702, "rewards/margins": 1.0188862085342407, "rewards/rejected": -2.6736655235290527, "step": 1170 }, { "epoch": 0.35, "learning_rate": 3.602649006622516e-07, "logits/chosen": -2.3665614128112793, "logits/rejected": -2.2760112285614014, "logps/chosen": -116.99346923828125, "logps/rejected": -194.17459106445312, "loss": 0.4616, "rewards/accuracies": 0.875, "rewards/chosen": -1.1822216510772705, "rewards/margins": 7.195115089416504, "rewards/rejected": -8.377335548400879, "step": 1180 }, { "epoch": 0.35, "learning_rate": 3.5860927152317875e-07, "logits/chosen": -2.378209114074707, "logits/rejected": -2.3278615474700928, "logps/chosen": -119.82206726074219, "logps/rejected": -126.67927551269531, "loss": 0.5238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.758419394493103, "rewards/margins": 1.255906343460083, "rewards/rejected": -3.0143258571624756, "step": 1190 }, { "epoch": 0.36, "learning_rate": 3.5695364238410594e-07, "logits/chosen": -2.376044750213623, "logits/rejected": -2.308166265487671, "logps/chosen": -113.3560562133789, "logps/rejected": -114.37657165527344, "loss": 0.5637, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4774434566497803, "rewards/margins": 0.5793313384056091, "rewards/rejected": -2.056774616241455, "step": 1200 }, { "epoch": 0.36, "learning_rate": 3.5529801324503314e-07, "logits/chosen": -2.374824285507202, "logits/rejected": -2.3935980796813965, "logps/chosen": -108.83685302734375, "logps/rejected": -115.5961685180664, "loss": 0.5626, "rewards/accuracies": 0.625, "rewards/chosen": -1.5476689338684082, "rewards/margins": 0.7745328545570374, "rewards/rejected": -2.322201728820801, "step": 1210 }, { "epoch": 0.36, "learning_rate": 3.536423841059603e-07, "logits/chosen": -2.339582681655884, "logits/rejected": -2.361855983734131, "logps/chosen": -121.9773941040039, "logps/rejected": -133.71356201171875, "loss": 0.5682, "rewards/accuracies": 0.625, "rewards/chosen": -2.1847705841064453, "rewards/margins": 0.9190909266471863, "rewards/rejected": -3.1038613319396973, "step": 1220 }, { "epoch": 0.37, "learning_rate": 3.519867549668874e-07, "logits/chosen": -2.2750911712646484, "logits/rejected": -2.235349416732788, "logps/chosen": -83.35279846191406, "logps/rejected": -102.2212905883789, "loss": 0.5579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9985214471817017, "rewards/margins": 1.0258718729019165, "rewards/rejected": -3.024393081665039, "step": 1230 }, { "epoch": 0.37, "learning_rate": 3.5033112582781456e-07, "logits/chosen": -2.3950631618499756, "logits/rejected": -2.286043643951416, "logps/chosen": -112.0318603515625, "logps/rejected": -116.00035095214844, "loss": 0.9739, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5054577589035034, "rewards/margins": 0.963812530040741, "rewards/rejected": -2.4692704677581787, "step": 1240 }, { "epoch": 0.37, "learning_rate": 3.486754966887417e-07, "logits/chosen": -2.2539525032043457, "logits/rejected": -2.280163526535034, "logps/chosen": -90.12135314941406, "logps/rejected": -103.78663635253906, "loss": 0.5341, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0227272510528564, "rewards/margins": 0.8646720051765442, "rewards/rejected": -1.8873993158340454, "step": 1250 }, { "epoch": 0.38, "learning_rate": 3.4701986754966884e-07, "logits/chosen": -2.3163156509399414, "logits/rejected": -2.324432849884033, "logps/chosen": -91.22362518310547, "logps/rejected": -103.0667953491211, "loss": 0.5978, "rewards/accuracies": 0.75, "rewards/chosen": -0.4652012288570404, "rewards/margins": 1.1732677221298218, "rewards/rejected": -1.6384689807891846, "step": 1260 }, { "epoch": 0.38, "learning_rate": 3.4536423841059603e-07, "logits/chosen": -2.3076674938201904, "logits/rejected": -2.3175816535949707, "logps/chosen": -100.41036224365234, "logps/rejected": -117.35456848144531, "loss": 0.648, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2293812036514282, "rewards/margins": 0.8102920651435852, "rewards/rejected": -2.039673089981079, "step": 1270 }, { "epoch": 0.38, "learning_rate": 3.4370860927152317e-07, "logits/chosen": -2.3699378967285156, "logits/rejected": -2.3562657833099365, "logps/chosen": -119.7326889038086, "logps/rejected": -131.7585906982422, "loss": 0.4977, "rewards/accuracies": 0.75, "rewards/chosen": -0.6823489665985107, "rewards/margins": 1.3262075185775757, "rewards/rejected": -2.008556604385376, "step": 1280 }, { "epoch": 0.38, "learning_rate": 3.420529801324503e-07, "logits/chosen": -2.1104941368103027, "logits/rejected": -2.0905330181121826, "logps/chosen": -171.0330810546875, "logps/rejected": -175.89886474609375, "loss": 0.9012, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -7.170141696929932, "rewards/margins": -0.5844208002090454, "rewards/rejected": -6.585721015930176, "step": 1290 }, { "epoch": 0.39, "learning_rate": 3.4039735099337745e-07, "logits/chosen": -2.3445873260498047, "logits/rejected": -2.2650883197784424, "logps/chosen": -130.222900390625, "logps/rejected": -132.6783447265625, "loss": 0.6484, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4724981784820557, "rewards/margins": 0.9613865613937378, "rewards/rejected": -2.433884859085083, "step": 1300 }, { "epoch": 0.39, "learning_rate": 3.387417218543046e-07, "logits/chosen": -2.457414150238037, "logits/rejected": -2.5287423133850098, "logps/chosen": -135.46902465820312, "logps/rejected": -159.89739990234375, "loss": 0.4958, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7360942363739014, "rewards/margins": 1.1658858060836792, "rewards/rejected": -2.901979923248291, "step": 1310 }, { "epoch": 0.39, "learning_rate": 3.3708609271523173e-07, "logits/chosen": -2.221667766571045, "logits/rejected": -2.208982467651367, "logps/chosen": -106.48121643066406, "logps/rejected": -104.82066345214844, "loss": 0.5892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4603312015533447, "rewards/margins": 0.8812816739082336, "rewards/rejected": -2.3416128158569336, "step": 1320 }, { "epoch": 0.4, "learning_rate": 3.35430463576159e-07, "logits/chosen": -2.2299439907073975, "logits/rejected": -2.225663423538208, "logps/chosen": -94.22245025634766, "logps/rejected": -100.85789489746094, "loss": 0.5178, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32827529311180115, "rewards/margins": 1.4746736288070679, "rewards/rejected": -1.8029489517211914, "step": 1330 }, { "epoch": 0.4, "learning_rate": 3.337748344370861e-07, "logits/chosen": -2.2380738258361816, "logits/rejected": -2.2997546195983887, "logps/chosen": -98.22574615478516, "logps/rejected": -112.7544937133789, "loss": 0.5318, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4563646912574768, "rewards/margins": 0.6618258953094482, "rewards/rejected": -1.1181905269622803, "step": 1340 }, { "epoch": 0.4, "learning_rate": 3.3211920529801326e-07, "logits/chosen": -2.405059814453125, "logits/rejected": -2.429863452911377, "logps/chosen": -107.7689437866211, "logps/rejected": -119.28629302978516, "loss": 0.5111, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2023572474718094, "rewards/margins": 0.8103917241096497, "rewards/rejected": -1.0127489566802979, "step": 1350 }, { "epoch": 0.41, "learning_rate": 3.304635761589404e-07, "logits/chosen": -2.3326334953308105, "logits/rejected": -2.256371259689331, "logps/chosen": -111.0186767578125, "logps/rejected": -101.33964538574219, "loss": 0.5624, "rewards/accuracies": 0.75, "rewards/chosen": -0.1809541881084442, "rewards/margins": 0.9960860013961792, "rewards/rejected": -1.177040457725525, "step": 1360 }, { "epoch": 0.41, "learning_rate": 3.2880794701986754e-07, "logits/chosen": -2.228715181350708, "logits/rejected": -2.2780203819274902, "logps/chosen": -82.96192932128906, "logps/rejected": -106.3135757446289, "loss": 0.5477, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4178234040737152, "rewards/margins": 1.3403428792953491, "rewards/rejected": -1.7581663131713867, "step": 1370 }, { "epoch": 0.41, "learning_rate": 3.271523178807947e-07, "logits/chosen": -2.3673007488250732, "logits/rejected": -2.361161947250366, "logps/chosen": -110.33650970458984, "logps/rejected": -118.94313049316406, "loss": 0.6233, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7891864776611328, "rewards/margins": 0.4793139100074768, "rewards/rejected": -1.2685004472732544, "step": 1380 }, { "epoch": 0.41, "learning_rate": 3.254966887417218e-07, "logits/chosen": -2.270993947982788, "logits/rejected": -2.3422646522521973, "logps/chosen": -110.1440200805664, "logps/rejected": -123.39158630371094, "loss": 0.5202, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0290793180465698, "rewards/margins": 0.8790245056152344, "rewards/rejected": -1.9081039428710938, "step": 1390 }, { "epoch": 0.42, "learning_rate": 3.23841059602649e-07, "logits/chosen": -2.3637521266937256, "logits/rejected": -2.3246121406555176, "logps/chosen": -123.53662109375, "logps/rejected": -130.99119567871094, "loss": 0.4855, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9286755323410034, "rewards/margins": 1.2442817687988281, "rewards/rejected": -2.172957181930542, "step": 1400 }, { "epoch": 0.42, "learning_rate": 3.2218543046357615e-07, "logits/chosen": -2.1791653633117676, "logits/rejected": -2.18937349319458, "logps/chosen": -166.4168243408203, "logps/rejected": -188.746826171875, "loss": 0.633, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -5.239639759063721, "rewards/margins": 1.5334604978561401, "rewards/rejected": -6.77310037612915, "step": 1410 }, { "epoch": 0.42, "learning_rate": 3.205298013245033e-07, "logits/chosen": -2.234860897064209, "logits/rejected": -2.235252857208252, "logps/chosen": -131.88731384277344, "logps/rejected": -147.35777282714844, "loss": 0.6159, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8779850006103516, "rewards/margins": 1.1691521406173706, "rewards/rejected": -4.0471367835998535, "step": 1420 }, { "epoch": 0.43, "learning_rate": 3.1887417218543043e-07, "logits/chosen": -2.3836143016815186, "logits/rejected": -2.35886287689209, "logps/chosen": -122.30987548828125, "logps/rejected": -125.3285903930664, "loss": 0.536, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.5127007961273193, "rewards/margins": 1.0224969387054443, "rewards/rejected": -3.5351977348327637, "step": 1430 }, { "epoch": 0.43, "learning_rate": 3.1721854304635757e-07, "logits/chosen": -2.280726194381714, "logits/rejected": -2.2161917686462402, "logps/chosen": -118.85569763183594, "logps/rejected": -122.44981384277344, "loss": 0.5343, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2102978229522705, "rewards/margins": 1.2170623540878296, "rewards/rejected": -3.4273605346679688, "step": 1440 }, { "epoch": 0.43, "learning_rate": 3.155629139072847e-07, "logits/chosen": -2.445349931716919, "logits/rejected": -2.4110920429229736, "logps/chosen": -110.351806640625, "logps/rejected": -118.65000915527344, "loss": 0.5877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.997372031211853, "rewards/margins": 0.7109335660934448, "rewards/rejected": -2.708305835723877, "step": 1450 }, { "epoch": 0.44, "learning_rate": 3.1390728476821196e-07, "logits/chosen": -2.3366105556488037, "logits/rejected": -2.3466391563415527, "logps/chosen": -127.3154067993164, "logps/rejected": -124.52156829833984, "loss": 0.5383, "rewards/accuracies": 0.625, "rewards/chosen": -2.18947172164917, "rewards/margins": 0.6396933794021606, "rewards/rejected": -2.829165458679199, "step": 1460 }, { "epoch": 0.44, "learning_rate": 3.122516556291391e-07, "logits/chosen": -2.260577917098999, "logits/rejected": -2.2590389251708984, "logps/chosen": -113.4861831665039, "logps/rejected": -108.08863830566406, "loss": 0.6422, "rewards/accuracies": 0.625, "rewards/chosen": -2.5557265281677246, "rewards/margins": 0.4942797124385834, "rewards/rejected": -3.050006151199341, "step": 1470 }, { "epoch": 0.44, "learning_rate": 3.1059602649006624e-07, "logits/chosen": -2.3765158653259277, "logits/rejected": -2.3215491771698, "logps/chosen": -123.18157958984375, "logps/rejected": -119.86373138427734, "loss": 0.4739, "rewards/accuracies": 0.75, "rewards/chosen": -1.5957121849060059, "rewards/margins": 0.9963384866714478, "rewards/rejected": -2.592050552368164, "step": 1480 }, { "epoch": 0.44, "learning_rate": 3.089403973509934e-07, "logits/chosen": -2.3044986724853516, "logits/rejected": -2.3184516429901123, "logps/chosen": -120.02888488769531, "logps/rejected": -125.82350158691406, "loss": 0.5382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5062299966812134, "rewards/margins": 1.3234798908233643, "rewards/rejected": -2.829709529876709, "step": 1490 }, { "epoch": 0.45, "learning_rate": 3.072847682119205e-07, "logits/chosen": -2.315985918045044, "logits/rejected": -2.262968063354492, "logps/chosen": -114.96397399902344, "logps/rejected": -129.48318481445312, "loss": 0.5984, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5051807165145874, "rewards/margins": 1.5209523439407349, "rewards/rejected": -3.0261335372924805, "step": 1500 }, { "epoch": 0.45, "eval_logits/chosen": -2.1823971271514893, "eval_logits/rejected": -2.1383469104766846, "eval_logps/chosen": -114.31800079345703, "eval_logps/rejected": -123.8126220703125, "eval_loss": 0.5244069695472717, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -1.4451391696929932, "eval_rewards/margins": 1.2313430309295654, "eval_rewards/rejected": -2.6764819622039795, "eval_runtime": 522.8803, "eval_samples_per_second": 3.416, "eval_steps_per_second": 0.107, "step": 1500 }, { "epoch": 0.45, "learning_rate": 3.0562913907284766e-07, "logits/chosen": -2.313927173614502, "logits/rejected": -2.33535099029541, "logps/chosen": -118.8395767211914, "logps/rejected": -128.5199737548828, "loss": 0.5877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8590469360351562, "rewards/margins": 0.7332299947738647, "rewards/rejected": -2.5922768115997314, "step": 1510 }, { "epoch": 0.45, "learning_rate": 3.039735099337748e-07, "logits/chosen": -2.3866069316864014, "logits/rejected": -2.3465638160705566, "logps/chosen": -120.46064758300781, "logps/rejected": -116.50871276855469, "loss": 0.8307, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8921232223510742, "rewards/margins": 0.763845682144165, "rewards/rejected": -2.6559691429138184, "step": 1520 }, { "epoch": 0.46, "learning_rate": 3.02317880794702e-07, "logits/chosen": -2.421853542327881, "logits/rejected": -2.318270206451416, "logps/chosen": -137.11500549316406, "logps/rejected": -134.6293487548828, "loss": 0.5303, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.621045708656311, "rewards/margins": 0.8223884701728821, "rewards/rejected": -2.443434238433838, "step": 1530 }, { "epoch": 0.46, "learning_rate": 3.0066225165562913e-07, "logits/chosen": -2.35496187210083, "logits/rejected": -2.2371764183044434, "logps/chosen": -111.7890853881836, "logps/rejected": -106.77983093261719, "loss": 0.5696, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0801293849945068, "rewards/margins": 1.046350121498108, "rewards/rejected": -2.1264796257019043, "step": 1540 }, { "epoch": 0.46, "learning_rate": 2.9900662251655627e-07, "logits/chosen": -2.329745054244995, "logits/rejected": -2.2365243434906006, "logps/chosen": -112.671875, "logps/rejected": -102.41822814941406, "loss": 0.6113, "rewards/accuracies": 0.75, "rewards/chosen": -1.2923063039779663, "rewards/margins": 1.320711612701416, "rewards/rejected": -2.613018035888672, "step": 1550 }, { "epoch": 0.46, "learning_rate": 2.973509933774834e-07, "logits/chosen": -2.2645044326782227, "logits/rejected": -2.1956381797790527, "logps/chosen": -104.55106353759766, "logps/rejected": -106.2977294921875, "loss": 0.7244, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3651814460754395, "rewards/margins": 0.206166073679924, "rewards/rejected": -1.5713475942611694, "step": 1560 }, { "epoch": 0.47, "learning_rate": 2.9569536423841055e-07, "logits/chosen": -2.1943066120147705, "logits/rejected": -2.24649977684021, "logps/chosen": -89.5943374633789, "logps/rejected": -106.08259582519531, "loss": 0.5679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.288975477218628, "rewards/margins": 0.5116127133369446, "rewards/rejected": -1.8005882501602173, "step": 1570 }, { "epoch": 0.47, "learning_rate": 2.940397350993377e-07, "logits/chosen": -2.607445240020752, "logits/rejected": -2.4970269203186035, "logps/chosen": -146.52468872070312, "logps/rejected": -140.1497344970703, "loss": 0.5597, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5630671977996826, "rewards/margins": 0.6673210263252258, "rewards/rejected": -2.2303881645202637, "step": 1580 }, { "epoch": 0.47, "learning_rate": 2.9238410596026494e-07, "logits/chosen": -2.238361358642578, "logits/rejected": -2.1506645679473877, "logps/chosen": -99.36707305908203, "logps/rejected": -105.86885070800781, "loss": 0.5912, "rewards/accuracies": 0.75, "rewards/chosen": -1.7212435007095337, "rewards/margins": 1.0883468389511108, "rewards/rejected": -2.8095905780792236, "step": 1590 }, { "epoch": 0.48, "learning_rate": 2.907284768211921e-07, "logits/chosen": -2.3251490592956543, "logits/rejected": -2.307288408279419, "logps/chosen": -115.70127868652344, "logps/rejected": -124.7564926147461, "loss": 0.6455, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.711801290512085, "rewards/margins": 1.0508122444152832, "rewards/rejected": -2.7626137733459473, "step": 1600 }, { "epoch": 0.48, "learning_rate": 2.890728476821192e-07, "logits/chosen": -2.2478513717651367, "logits/rejected": -2.2882168292999268, "logps/chosen": -104.28340911865234, "logps/rejected": -121.9701156616211, "loss": 0.5165, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.887414574623108, "rewards/margins": 0.925916850566864, "rewards/rejected": -2.8133316040039062, "step": 1610 }, { "epoch": 0.48, "learning_rate": 2.8741721854304636e-07, "logits/chosen": -2.296663999557495, "logits/rejected": -2.3100745677948, "logps/chosen": -158.80392456054688, "logps/rejected": -124.94380950927734, "loss": 1.3257, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -4.360524654388428, "rewards/margins": -2.369443655014038, "rewards/rejected": -1.9910815954208374, "step": 1620 }, { "epoch": 0.49, "learning_rate": 2.857615894039735e-07, "logits/chosen": -2.2816338539123535, "logits/rejected": -2.2055506706237793, "logps/chosen": -116.35295104980469, "logps/rejected": -125.78230285644531, "loss": 0.4623, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8872613906860352, "rewards/margins": 1.402567982673645, "rewards/rejected": -3.2898292541503906, "step": 1630 }, { "epoch": 0.49, "learning_rate": 2.8410596026490064e-07, "logits/chosen": -2.3136837482452393, "logits/rejected": -2.327634334564209, "logps/chosen": -118.3811264038086, "logps/rejected": -131.34634399414062, "loss": 0.4969, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.959405243396759, "rewards/margins": 1.3146027326583862, "rewards/rejected": -2.27400803565979, "step": 1640 }, { "epoch": 0.49, "learning_rate": 2.824503311258278e-07, "logits/chosen": -2.2207038402557373, "logits/rejected": -2.2655978202819824, "logps/chosen": -122.19026184082031, "logps/rejected": -130.85289001464844, "loss": 0.5344, "rewards/accuracies": 0.75, "rewards/chosen": -1.631255865097046, "rewards/margins": 1.0431182384490967, "rewards/rejected": -2.6743741035461426, "step": 1650 }, { "epoch": 0.49, "learning_rate": 2.8079470198675497e-07, "logits/chosen": -2.3095381259918213, "logits/rejected": -2.2248189449310303, "logps/chosen": -103.5934066772461, "logps/rejected": -116.8990249633789, "loss": 0.5347, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2814357280731201, "rewards/margins": 1.4714066982269287, "rewards/rejected": -2.752842426300049, "step": 1660 }, { "epoch": 0.5, "learning_rate": 2.791390728476821e-07, "logits/chosen": -2.2992262840270996, "logits/rejected": -2.3474018573760986, "logps/chosen": -140.76292419433594, "logps/rejected": -168.6060333251953, "loss": 0.5155, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.161952018737793, "rewards/margins": 1.1705152988433838, "rewards/rejected": -3.3324673175811768, "step": 1670 }, { "epoch": 0.5, "learning_rate": 2.7748344370860925e-07, "logits/chosen": -2.1538851261138916, "logits/rejected": -2.1492209434509277, "logps/chosen": -85.61529541015625, "logps/rejected": -105.35960388183594, "loss": 0.4547, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2468538284301758, "rewards/margins": 1.524840235710144, "rewards/rejected": -2.7716941833496094, "step": 1680 }, { "epoch": 0.5, "learning_rate": 2.758278145695364e-07, "logits/chosen": -2.220313549041748, "logits/rejected": -2.270676612854004, "logps/chosen": -116.72190856933594, "logps/rejected": -132.87937927246094, "loss": 0.4907, "rewards/accuracies": 0.875, "rewards/chosen": -1.662656545639038, "rewards/margins": 1.4583295583724976, "rewards/rejected": -3.1209864616394043, "step": 1690 }, { "epoch": 0.51, "learning_rate": 2.7417218543046353e-07, "logits/chosen": -2.3201870918273926, "logits/rejected": -2.287921667098999, "logps/chosen": -119.7146987915039, "logps/rejected": -135.27894592285156, "loss": 0.4492, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.569067358970642, "rewards/margins": 1.5324440002441406, "rewards/rejected": -3.1015114784240723, "step": 1700 }, { "epoch": 0.51, "learning_rate": 2.725165562913907e-07, "logits/chosen": -2.2404065132141113, "logits/rejected": -2.2358851432800293, "logps/chosen": -134.36831665039062, "logps/rejected": -137.21890258789062, "loss": 0.5334, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.740677833557129, "rewards/margins": 1.5694725513458252, "rewards/rejected": -3.310150623321533, "step": 1710 }, { "epoch": 0.51, "learning_rate": 2.7086092715231786e-07, "logits/chosen": -2.3186452388763428, "logits/rejected": -2.2739059925079346, "logps/chosen": -118.93257141113281, "logps/rejected": -117.28021240234375, "loss": 0.5242, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.951216459274292, "rewards/margins": 1.0923653841018677, "rewards/rejected": -3.04358172416687, "step": 1720 }, { "epoch": 0.52, "learning_rate": 2.6920529801324506e-07, "logits/chosen": -2.230313777923584, "logits/rejected": -2.269009828567505, "logps/chosen": -152.952880859375, "logps/rejected": -145.59732055664062, "loss": 0.8317, "rewards/accuracies": 0.75, "rewards/chosen": -4.0952677726745605, "rewards/margins": -0.3405976891517639, "rewards/rejected": -3.7546706199645996, "step": 1730 }, { "epoch": 0.52, "learning_rate": 2.675496688741722e-07, "logits/chosen": -2.276404857635498, "logits/rejected": -2.223013162612915, "logps/chosen": -126.49522399902344, "logps/rejected": -123.188720703125, "loss": 0.503, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3149405717849731, "rewards/margins": 0.8885973691940308, "rewards/rejected": -2.203538179397583, "step": 1740 }, { "epoch": 0.52, "learning_rate": 2.6589403973509934e-07, "logits/chosen": -2.1905181407928467, "logits/rejected": -2.179508924484253, "logps/chosen": -105.01606750488281, "logps/rejected": -133.09783935546875, "loss": 0.5413, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6002556085586548, "rewards/margins": 2.0444486141204834, "rewards/rejected": -2.6447041034698486, "step": 1750 }, { "epoch": 0.52, "learning_rate": 2.642384105960265e-07, "logits/chosen": -2.2174530029296875, "logits/rejected": -2.250398635864258, "logps/chosen": -101.74955749511719, "logps/rejected": -133.28713989257812, "loss": 0.5109, "rewards/accuracies": 0.625, "rewards/chosen": -0.5314977765083313, "rewards/margins": 1.0308630466461182, "rewards/rejected": -1.5623606443405151, "step": 1760 }, { "epoch": 0.53, "learning_rate": 2.625827814569536e-07, "logits/chosen": -2.2077157497406006, "logits/rejected": -2.1879453659057617, "logps/chosen": -106.83088684082031, "logps/rejected": -117.6878662109375, "loss": 0.5311, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.922218918800354, "rewards/margins": 1.4254719018936157, "rewards/rejected": -3.3476905822753906, "step": 1770 }, { "epoch": 0.53, "learning_rate": 2.6092715231788076e-07, "logits/chosen": -2.236419677734375, "logits/rejected": -2.1945042610168457, "logps/chosen": -112.48036193847656, "logps/rejected": -125.71522521972656, "loss": 0.8311, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3717401027679443, "rewards/margins": 1.444071888923645, "rewards/rejected": -2.8158118724823, "step": 1780 }, { "epoch": 0.53, "learning_rate": 2.5927152317880795e-07, "logits/chosen": -2.3207204341888428, "logits/rejected": -2.2542405128479004, "logps/chosen": -112.21119689941406, "logps/rejected": -117.85597229003906, "loss": 0.5143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3228784799575806, "rewards/margins": 0.836434543132782, "rewards/rejected": -2.159313201904297, "step": 1790 }, { "epoch": 0.54, "learning_rate": 2.576158940397351e-07, "logits/chosen": -2.1415367126464844, "logits/rejected": -2.173337936401367, "logps/chosen": -111.6198501586914, "logps/rejected": -131.82977294921875, "loss": 0.5845, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.362797498703003, "rewards/margins": 0.8809803128242493, "rewards/rejected": -2.2437777519226074, "step": 1800 }, { "epoch": 0.54, "learning_rate": 2.5596026490066223e-07, "logits/chosen": -2.169029951095581, "logits/rejected": -2.145346164703369, "logps/chosen": -102.84260559082031, "logps/rejected": -115.5724105834961, "loss": 0.778, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.428421974182129, "rewards/margins": 0.6596145629882812, "rewards/rejected": -2.08803653717041, "step": 1810 }, { "epoch": 0.54, "learning_rate": 2.5430463576158937e-07, "logits/chosen": -2.3364787101745605, "logits/rejected": -2.2011687755584717, "logps/chosen": -109.7977066040039, "logps/rejected": -112.79130554199219, "loss": 0.4779, "rewards/accuracies": 0.75, "rewards/chosen": -1.1568695306777954, "rewards/margins": 1.5236353874206543, "rewards/rejected": -2.6805050373077393, "step": 1820 }, { "epoch": 0.55, "learning_rate": 2.526490066225165e-07, "logits/chosen": -2.270590305328369, "logits/rejected": -2.317115306854248, "logps/chosen": -131.15716552734375, "logps/rejected": -127.9232406616211, "loss": 0.5718, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5472099781036377, "rewards/margins": 0.4068627953529358, "rewards/rejected": -1.9540729522705078, "step": 1830 }, { "epoch": 0.55, "learning_rate": 2.509933774834437e-07, "logits/chosen": -2.258516550064087, "logits/rejected": -2.215508222579956, "logps/chosen": -116.99686431884766, "logps/rejected": -119.55728912353516, "loss": 0.5443, "rewards/accuracies": 0.75, "rewards/chosen": -1.1407909393310547, "rewards/margins": 1.8011624813079834, "rewards/rejected": -2.941953659057617, "step": 1840 }, { "epoch": 0.55, "learning_rate": 2.4933774834437084e-07, "logits/chosen": -2.301156997680664, "logits/rejected": -2.282895565032959, "logps/chosen": -134.67526245117188, "logps/rejected": -115.00638580322266, "loss": 0.9478, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1763339042663574, "rewards/margins": -0.813465416431427, "rewards/rejected": -2.362868309020996, "step": 1850 }, { "epoch": 0.55, "learning_rate": 2.47682119205298e-07, "logits/chosen": -2.1597695350646973, "logits/rejected": -2.2584662437438965, "logps/chosen": -90.55691528320312, "logps/rejected": -123.76590728759766, "loss": 0.408, "rewards/accuracies": 0.875, "rewards/chosen": -1.2247810363769531, "rewards/margins": 1.5576727390289307, "rewards/rejected": -2.782454013824463, "step": 1860 }, { "epoch": 0.56, "learning_rate": 2.460264900662252e-07, "logits/chosen": -2.3151791095733643, "logits/rejected": -2.3073203563690186, "logps/chosen": -110.79292297363281, "logps/rejected": -124.72319030761719, "loss": 0.4559, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4885177612304688, "rewards/margins": 0.9933377504348755, "rewards/rejected": -2.481855630874634, "step": 1870 }, { "epoch": 0.56, "learning_rate": 2.443708609271523e-07, "logits/chosen": -2.3785767555236816, "logits/rejected": -2.32625150680542, "logps/chosen": -135.62266540527344, "logps/rejected": -147.58059692382812, "loss": 0.5509, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6126123666763306, "rewards/margins": 1.325919508934021, "rewards/rejected": -2.9385318756103516, "step": 1880 }, { "epoch": 0.56, "learning_rate": 2.4271523178807946e-07, "logits/chosen": -2.067755937576294, "logits/rejected": -2.157957077026367, "logps/chosen": -100.20467376708984, "logps/rejected": -139.4010009765625, "loss": 0.4928, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7971267700195312, "rewards/margins": 2.0710580348968506, "rewards/rejected": -3.8681845664978027, "step": 1890 }, { "epoch": 0.57, "learning_rate": 2.410596026490066e-07, "logits/chosen": -2.26438307762146, "logits/rejected": -2.1824748516082764, "logps/chosen": -107.44325256347656, "logps/rejected": -120.7808609008789, "loss": 0.5088, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0230767726898193, "rewards/margins": 1.2027556896209717, "rewards/rejected": -2.22583270072937, "step": 1900 }, { "epoch": 0.57, "learning_rate": 2.394039735099338e-07, "logits/chosen": -2.202819585800171, "logits/rejected": -2.1675939559936523, "logps/chosen": -118.07059478759766, "logps/rejected": -127.72599792480469, "loss": 0.6344, "rewards/accuracies": 0.75, "rewards/chosen": -1.6664397716522217, "rewards/margins": 0.9765909314155579, "rewards/rejected": -2.6430306434631348, "step": 1910 }, { "epoch": 0.57, "learning_rate": 2.377483443708609e-07, "logits/chosen": -2.2761118412017822, "logits/rejected": -2.2829902172088623, "logps/chosen": -125.0960464477539, "logps/rejected": -151.57437133789062, "loss": 0.4561, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6365854740142822, "rewards/margins": 1.4987179040908813, "rewards/rejected": -3.135303497314453, "step": 1920 }, { "epoch": 0.58, "learning_rate": 2.3609271523178807e-07, "logits/chosen": -2.317108154296875, "logits/rejected": -2.367867946624756, "logps/chosen": -110.93936920166016, "logps/rejected": -124.30006408691406, "loss": 0.4998, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5488382577896118, "rewards/margins": 1.2520344257354736, "rewards/rejected": -2.800872564315796, "step": 1930 }, { "epoch": 0.58, "learning_rate": 2.3443708609271524e-07, "logits/chosen": -2.283686399459839, "logits/rejected": -2.2000420093536377, "logps/chosen": -99.95622253417969, "logps/rejected": -107.75992584228516, "loss": 0.5787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1963920593261719, "rewards/margins": 1.1246505975723267, "rewards/rejected": -2.321042537689209, "step": 1940 }, { "epoch": 0.58, "learning_rate": 2.3278145695364238e-07, "logits/chosen": -2.3099443912506104, "logits/rejected": -2.293797016143799, "logps/chosen": -143.53994750976562, "logps/rejected": -156.98973083496094, "loss": 0.5597, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4224615097045898, "rewards/margins": 2.5084261894226074, "rewards/rejected": -3.9308879375457764, "step": 1950 }, { "epoch": 0.58, "learning_rate": 2.3112582781456952e-07, "logits/chosen": -2.2922520637512207, "logits/rejected": -2.3070366382598877, "logps/chosen": -105.562744140625, "logps/rejected": -115.12934875488281, "loss": 0.4832, "rewards/accuracies": 0.75, "rewards/chosen": -1.130995512008667, "rewards/margins": 1.4186890125274658, "rewards/rejected": -2.549685001373291, "step": 1960 }, { "epoch": 0.59, "learning_rate": 2.2947019867549669e-07, "logits/chosen": -2.32692289352417, "logits/rejected": -2.284797191619873, "logps/chosen": -110.09574127197266, "logps/rejected": -141.3235321044922, "loss": 0.5419, "rewards/accuracies": 0.625, "rewards/chosen": -1.62274169921875, "rewards/margins": 2.5739684104919434, "rewards/rejected": -4.196709632873535, "step": 1970 }, { "epoch": 0.59, "learning_rate": 2.2781456953642383e-07, "logits/chosen": -2.2572226524353027, "logits/rejected": -2.3021931648254395, "logps/chosen": -103.7448501586914, "logps/rejected": -124.720947265625, "loss": 0.5929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1002720594406128, "rewards/margins": 1.6038618087768555, "rewards/rejected": -2.704134225845337, "step": 1980 }, { "epoch": 0.59, "learning_rate": 2.2615894039735097e-07, "logits/chosen": -2.1988630294799805, "logits/rejected": -2.1840052604675293, "logps/chosen": -97.76619720458984, "logps/rejected": -114.77903747558594, "loss": 0.536, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6038196086883545, "rewards/margins": 1.3313450813293457, "rewards/rejected": -2.9351646900177, "step": 1990 }, { "epoch": 0.6, "learning_rate": 2.2450331125827813e-07, "logits/chosen": -2.2937569618225098, "logits/rejected": -2.1716551780700684, "logps/chosen": -122.71870422363281, "logps/rejected": -126.25750732421875, "loss": 0.5508, "rewards/accuracies": 0.75, "rewards/chosen": -1.7580636739730835, "rewards/margins": 1.4850653409957886, "rewards/rejected": -3.243128538131714, "step": 2000 }, { "epoch": 0.6, "eval_logits/chosen": -2.1208481788635254, "eval_logits/rejected": -2.0760180950164795, "eval_logps/chosen": -117.771728515625, "eval_logps/rejected": -125.91642761230469, "eval_loss": 0.5643959641456604, "eval_rewards/accuracies": 0.6785714030265808, "eval_rewards/chosen": -1.7905113697052002, "eval_rewards/margins": 1.0963507890701294, "eval_rewards/rejected": -2.886862277984619, "eval_runtime": 519.2917, "eval_samples_per_second": 3.439, "eval_steps_per_second": 0.108, "step": 2000 }, { "epoch": 0.6, "learning_rate": 2.228476821192053e-07, "logits/chosen": -2.2932658195495605, "logits/rejected": -2.2196624279022217, "logps/chosen": -121.21055603027344, "logps/rejected": -114.63111877441406, "loss": 0.7063, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.775307059288025, "rewards/margins": 0.6449312567710876, "rewards/rejected": -2.420238494873047, "step": 2010 }, { "epoch": 0.6, "learning_rate": 2.2119205298013244e-07, "logits/chosen": -2.137760877609253, "logits/rejected": -2.1844496726989746, "logps/chosen": -138.58255004882812, "logps/rejected": -127.23612213134766, "loss": 0.5234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9617927074432373, "rewards/margins": 0.6043619513511658, "rewards/rejected": -2.5661544799804688, "step": 2020 }, { "epoch": 0.6, "learning_rate": 2.1953642384105958e-07, "logits/chosen": -2.2938625812530518, "logits/rejected": -2.268752336502075, "logps/chosen": -113.85087585449219, "logps/rejected": -149.66539001464844, "loss": 0.5987, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5664665699005127, "rewards/margins": 0.8736650347709656, "rewards/rejected": -2.440131664276123, "step": 2030 }, { "epoch": 0.61, "learning_rate": 2.1788079470198675e-07, "logits/chosen": -2.3219776153564453, "logits/rejected": -2.350645065307617, "logps/chosen": -94.68901824951172, "logps/rejected": -106.27494812011719, "loss": 0.6865, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4543777704238892, "rewards/margins": 0.8551589846611023, "rewards/rejected": -2.309536933898926, "step": 2040 }, { "epoch": 0.61, "learning_rate": 2.1622516556291389e-07, "logits/chosen": -2.2941012382507324, "logits/rejected": -2.2624030113220215, "logps/chosen": -125.8183822631836, "logps/rejected": -133.0880584716797, "loss": 0.5175, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4116047620773315, "rewards/margins": 1.2253597974777222, "rewards/rejected": -2.6369645595550537, "step": 2050 }, { "epoch": 0.61, "learning_rate": 2.1456953642384105e-07, "logits/chosen": -2.176222562789917, "logits/rejected": -2.0717849731445312, "logps/chosen": -104.74139404296875, "logps/rejected": -124.4080581665039, "loss": 0.4433, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.621763825416565, "rewards/margins": 1.3861135244369507, "rewards/rejected": -3.0078773498535156, "step": 2060 }, { "epoch": 0.62, "learning_rate": 2.1291390728476822e-07, "logits/chosen": -2.244816541671753, "logits/rejected": -2.217611074447632, "logps/chosen": -126.18927001953125, "logps/rejected": -129.03915405273438, "loss": 0.559, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9465411901474, "rewards/margins": 0.7713083028793335, "rewards/rejected": -2.7178492546081543, "step": 2070 }, { "epoch": 0.62, "learning_rate": 2.1125827814569536e-07, "logits/chosen": -2.331676721572876, "logits/rejected": -2.249488353729248, "logps/chosen": -115.93875885009766, "logps/rejected": -122.89765930175781, "loss": 0.5416, "rewards/accuracies": 0.625, "rewards/chosen": -1.3803373575210571, "rewards/margins": 0.6847006678581238, "rewards/rejected": -2.065037965774536, "step": 2080 }, { "epoch": 0.62, "learning_rate": 2.096026490066225e-07, "logits/chosen": -2.2945587635040283, "logits/rejected": -2.2709438800811768, "logps/chosen": -102.5355453491211, "logps/rejected": -105.96309661865234, "loss": 0.6918, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.1539552211761475, "rewards/margins": 1.233269453048706, "rewards/rejected": -2.3872246742248535, "step": 2090 }, { "epoch": 0.63, "learning_rate": 2.0794701986754967e-07, "logits/chosen": -2.260633945465088, "logits/rejected": -2.257582187652588, "logps/chosen": -123.7904052734375, "logps/rejected": -139.2223663330078, "loss": 0.489, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6961309909820557, "rewards/margins": 1.7095321416854858, "rewards/rejected": -3.405663251876831, "step": 2100 }, { "epoch": 0.63, "learning_rate": 2.062913907284768e-07, "logits/chosen": -2.4550869464874268, "logits/rejected": -2.369741439819336, "logps/chosen": -110.0873031616211, "logps/rejected": -110.52005767822266, "loss": 0.5515, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.5832288265228271, "rewards/margins": 0.5315386652946472, "rewards/rejected": -2.114767551422119, "step": 2110 }, { "epoch": 0.63, "learning_rate": 2.0463576158940397e-07, "logits/chosen": -2.1035804748535156, "logits/rejected": -2.0870535373687744, "logps/chosen": -108.03116607666016, "logps/rejected": -193.72422790527344, "loss": 0.4535, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8296096324920654, "rewards/margins": 8.629142761230469, "rewards/rejected": -10.45875072479248, "step": 2120 }, { "epoch": 0.63, "learning_rate": 2.029801324503311e-07, "logits/chosen": -2.3085687160491943, "logits/rejected": -2.3340985774993896, "logps/chosen": -117.62290954589844, "logps/rejected": -126.44264221191406, "loss": 2.8703, "rewards/accuracies": 0.625, "rewards/chosen": -1.7038648128509521, "rewards/margins": 1.1059377193450928, "rewards/rejected": -2.809802770614624, "step": 2130 }, { "epoch": 0.64, "learning_rate": 2.0132450331125828e-07, "logits/chosen": -2.4316937923431396, "logits/rejected": -2.3887412548065186, "logps/chosen": -138.1640625, "logps/rejected": -139.81886291503906, "loss": 0.5099, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.448891520500183, "rewards/margins": 0.9665302038192749, "rewards/rejected": -2.415421962738037, "step": 2140 }, { "epoch": 0.64, "learning_rate": 1.9966887417218542e-07, "logits/chosen": -2.5827786922454834, "logits/rejected": -2.53794002532959, "logps/chosen": -126.8006362915039, "logps/rejected": -126.6136474609375, "loss": 0.515, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2876603603363037, "rewards/margins": 0.7639477849006653, "rewards/rejected": -2.051608085632324, "step": 2150 }, { "epoch": 0.64, "learning_rate": 1.9801324503311256e-07, "logits/chosen": -2.3453097343444824, "logits/rejected": -2.4177701473236084, "logps/chosen": -111.456787109375, "logps/rejected": -121.5312728881836, "loss": 0.5125, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4392328262329102, "rewards/margins": 1.1764917373657227, "rewards/rejected": -2.615724563598633, "step": 2160 }, { "epoch": 0.65, "learning_rate": 1.9635761589403973e-07, "logits/chosen": -2.2641375064849854, "logits/rejected": -2.3049521446228027, "logps/chosen": -95.91242980957031, "logps/rejected": -111.0653305053711, "loss": 0.5322, "rewards/accuracies": 0.75, "rewards/chosen": -1.325178861618042, "rewards/margins": 1.0794451236724854, "rewards/rejected": -2.4046239852905273, "step": 2170 }, { "epoch": 0.65, "learning_rate": 1.947019867549669e-07, "logits/chosen": -2.3387389183044434, "logits/rejected": -2.2360782623291016, "logps/chosen": -100.87395477294922, "logps/rejected": -111.1401138305664, "loss": 0.49, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7546203136444092, "rewards/margins": 1.1646721363067627, "rewards/rejected": -2.919292449951172, "step": 2180 }, { "epoch": 0.65, "learning_rate": 1.9304635761589403e-07, "logits/chosen": -2.3461403846740723, "logits/rejected": -2.3300156593322754, "logps/chosen": -103.99101257324219, "logps/rejected": -122.7502212524414, "loss": 0.5215, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1400959491729736, "rewards/margins": 1.375199556350708, "rewards/rejected": -2.5152957439422607, "step": 2190 }, { "epoch": 0.66, "learning_rate": 1.913907284768212e-07, "logits/chosen": -2.353959560394287, "logits/rejected": -2.2813894748687744, "logps/chosen": -110.76973724365234, "logps/rejected": -133.5187225341797, "loss": 0.5406, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2461332082748413, "rewards/margins": 2.200319766998291, "rewards/rejected": -3.446453094482422, "step": 2200 }, { "epoch": 0.66, "learning_rate": 1.8973509933774834e-07, "logits/chosen": -2.483916759490967, "logits/rejected": -2.3655548095703125, "logps/chosen": -108.8669662475586, "logps/rejected": -116.64085388183594, "loss": 0.5907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9422400593757629, "rewards/margins": 0.9399551153182983, "rewards/rejected": -1.8821951150894165, "step": 2210 }, { "epoch": 0.66, "learning_rate": 1.8807947019867548e-07, "logits/chosen": -2.3532841205596924, "logits/rejected": -2.3442747592926025, "logps/chosen": -99.94935607910156, "logps/rejected": -115.6842269897461, "loss": 0.6063, "rewards/accuracies": 0.75, "rewards/chosen": -1.1935023069381714, "rewards/margins": 1.022131323814392, "rewards/rejected": -2.2156338691711426, "step": 2220 }, { "epoch": 0.66, "learning_rate": 1.8642384105960262e-07, "logits/chosen": -2.473654270172119, "logits/rejected": -2.456444263458252, "logps/chosen": -128.30955505371094, "logps/rejected": -135.67520141601562, "loss": 0.5602, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0379096269607544, "rewards/margins": 0.6246587038040161, "rewards/rejected": -1.6625683307647705, "step": 2230 }, { "epoch": 0.67, "learning_rate": 1.8476821192052979e-07, "logits/chosen": -2.4869556427001953, "logits/rejected": -2.449312686920166, "logps/chosen": -116.06538391113281, "logps/rejected": -120.8796615600586, "loss": 0.5219, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9571272730827332, "rewards/margins": 1.021393060684204, "rewards/rejected": -1.978520154953003, "step": 2240 }, { "epoch": 0.67, "learning_rate": 1.8311258278145695e-07, "logits/chosen": -2.4670963287353516, "logits/rejected": -2.382390260696411, "logps/chosen": -123.17083740234375, "logps/rejected": -109.2813949584961, "loss": 0.5479, "rewards/accuracies": 0.625, "rewards/chosen": -1.2968004941940308, "rewards/margins": 0.7737834453582764, "rewards/rejected": -2.0705838203430176, "step": 2250 }, { "epoch": 0.67, "learning_rate": 1.814569536423841e-07, "logits/chosen": -2.345423460006714, "logits/rejected": -2.3300931453704834, "logps/chosen": -92.75725555419922, "logps/rejected": -107.68856048583984, "loss": 0.4778, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8894661068916321, "rewards/margins": 1.360033392906189, "rewards/rejected": -2.249499559402466, "step": 2260 }, { "epoch": 0.68, "learning_rate": 1.7980132450331126e-07, "logits/chosen": -2.2602181434631348, "logits/rejected": -2.1599280834198, "logps/chosen": -105.10438537597656, "logps/rejected": -131.7586669921875, "loss": 0.5319, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.113909125328064, "rewards/margins": 1.5821037292480469, "rewards/rejected": -2.6960129737854004, "step": 2270 }, { "epoch": 0.68, "learning_rate": 1.781456953642384e-07, "logits/chosen": -2.3455591201782227, "logits/rejected": -2.364595413208008, "logps/chosen": -130.79019165039062, "logps/rejected": -147.9118194580078, "loss": 0.4775, "rewards/accuracies": 0.75, "rewards/chosen": -1.7606639862060547, "rewards/margins": 1.2344610691070557, "rewards/rejected": -2.9951250553131104, "step": 2280 }, { "epoch": 0.68, "learning_rate": 1.7649006622516554e-07, "logits/chosen": -2.167285442352295, "logits/rejected": -2.247238874435425, "logps/chosen": -139.6284942626953, "logps/rejected": -158.96109008789062, "loss": 0.7002, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.767961263656616, "rewards/margins": 1.04625403881073, "rewards/rejected": -3.8142154216766357, "step": 2290 }, { "epoch": 0.69, "learning_rate": 1.748344370860927e-07, "logits/chosen": -2.3983192443847656, "logits/rejected": -2.4019296169281006, "logps/chosen": -115.08357238769531, "logps/rejected": -129.3126220703125, "loss": 0.4762, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6475099325180054, "rewards/margins": 1.2640306949615479, "rewards/rejected": -2.9115407466888428, "step": 2300 }, { "epoch": 0.69, "learning_rate": 1.7317880794701987e-07, "logits/chosen": -2.3797879219055176, "logits/rejected": -2.324965715408325, "logps/chosen": -116.2055435180664, "logps/rejected": -131.01705932617188, "loss": 0.5819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.186680555343628, "rewards/margins": 1.4000349044799805, "rewards/rejected": -2.5867154598236084, "step": 2310 }, { "epoch": 0.69, "learning_rate": 1.71523178807947e-07, "logits/chosen": -2.227961778640747, "logits/rejected": -2.197960615158081, "logps/chosen": -106.0052490234375, "logps/rejected": -126.49810791015625, "loss": 0.5598, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7543351650238037, "rewards/margins": 1.1439127922058105, "rewards/rejected": -2.8982481956481934, "step": 2320 }, { "epoch": 0.69, "learning_rate": 1.6986754966887418e-07, "logits/chosen": -2.34653902053833, "logits/rejected": -2.361428737640381, "logps/chosen": -93.7066879272461, "logps/rejected": -115.24361419677734, "loss": 0.5737, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.030475378036499, "rewards/margins": 1.2473831176757812, "rewards/rejected": -2.2778584957122803, "step": 2330 }, { "epoch": 0.7, "learning_rate": 1.6821192052980132e-07, "logits/chosen": -2.3806099891662598, "logits/rejected": -2.404531955718994, "logps/chosen": -108.77107238769531, "logps/rejected": -124.2442398071289, "loss": 0.5444, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4443012475967407, "rewards/margins": 0.9878839254379272, "rewards/rejected": -2.432185411453247, "step": 2340 }, { "epoch": 0.7, "learning_rate": 1.6655629139072846e-07, "logits/chosen": -2.447935104370117, "logits/rejected": -2.4319026470184326, "logps/chosen": -116.2603988647461, "logps/rejected": -123.78592681884766, "loss": 0.4883, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3556338548660278, "rewards/margins": 1.9813722372055054, "rewards/rejected": -3.337006092071533, "step": 2350 }, { "epoch": 0.7, "learning_rate": 1.649006622516556e-07, "logits/chosen": -2.3291103839874268, "logits/rejected": -2.3166141510009766, "logps/chosen": -111.0199203491211, "logps/rejected": -117.00514221191406, "loss": 0.4997, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5363482236862183, "rewards/margins": 0.870397686958313, "rewards/rejected": -2.4067459106445312, "step": 2360 }, { "epoch": 0.71, "learning_rate": 1.632450331125828e-07, "logits/chosen": -2.2856314182281494, "logits/rejected": -2.264632225036621, "logps/chosen": -102.96492004394531, "logps/rejected": -126.04007720947266, "loss": 0.456, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1887879371643066, "rewards/margins": 1.522206425666809, "rewards/rejected": -2.7109944820404053, "step": 2370 }, { "epoch": 0.71, "learning_rate": 1.6158940397350993e-07, "logits/chosen": -2.2702925205230713, "logits/rejected": -2.237971782684326, "logps/chosen": -99.81072998046875, "logps/rejected": -124.4561996459961, "loss": 0.5386, "rewards/accuracies": 0.625, "rewards/chosen": -1.322842001914978, "rewards/margins": 1.6832103729248047, "rewards/rejected": -3.0060524940490723, "step": 2380 }, { "epoch": 0.71, "learning_rate": 1.5993377483443707e-07, "logits/chosen": -2.265434741973877, "logits/rejected": -2.2928626537323, "logps/chosen": -83.43587493896484, "logps/rejected": -114.23832702636719, "loss": 0.5913, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8695703744888306, "rewards/margins": 2.3257930278778076, "rewards/rejected": -3.1953632831573486, "step": 2390 }, { "epoch": 0.72, "learning_rate": 1.5827814569536424e-07, "logits/chosen": -2.575456142425537, "logits/rejected": -2.395871162414551, "logps/chosen": -105.5760498046875, "logps/rejected": -97.21208190917969, "loss": 0.5091, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1773121356964111, "rewards/margins": 0.5559796690940857, "rewards/rejected": -1.7332916259765625, "step": 2400 }, { "epoch": 0.72, "learning_rate": 1.5662251655629138e-07, "logits/chosen": -2.4753453731536865, "logits/rejected": -2.414577007293701, "logps/chosen": -143.94302368164062, "logps/rejected": -137.99838256835938, "loss": 0.5287, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1706478595733643, "rewards/margins": 1.126072883605957, "rewards/rejected": -2.2967207431793213, "step": 2410 }, { "epoch": 0.72, "learning_rate": 1.5496688741721852e-07, "logits/chosen": -2.412086009979248, "logits/rejected": -2.3731260299682617, "logps/chosen": -106.2443618774414, "logps/rejected": -112.93099212646484, "loss": 0.4915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2554187774658203, "rewards/margins": 1.3224613666534424, "rewards/rejected": -2.5778801441192627, "step": 2420 }, { "epoch": 0.72, "learning_rate": 1.533112582781457e-07, "logits/chosen": -2.2778310775756836, "logits/rejected": -2.256308078765869, "logps/chosen": -120.01104736328125, "logps/rejected": -123.55589294433594, "loss": 0.465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.085076093673706, "rewards/margins": 1.26901376247406, "rewards/rejected": -2.3540899753570557, "step": 2430 }, { "epoch": 0.73, "learning_rate": 1.5165562913907285e-07, "logits/chosen": -2.393749475479126, "logits/rejected": -2.3263931274414062, "logps/chosen": -117.22212219238281, "logps/rejected": -138.7658233642578, "loss": 0.5859, "rewards/accuracies": 0.75, "rewards/chosen": -1.8770434856414795, "rewards/margins": 1.216582179069519, "rewards/rejected": -3.0936264991760254, "step": 2440 }, { "epoch": 0.73, "learning_rate": 1.5e-07, "logits/chosen": -2.4931600093841553, "logits/rejected": -2.4225075244903564, "logps/chosen": -125.9542236328125, "logps/rejected": -141.84219360351562, "loss": 0.5621, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3538328409194946, "rewards/margins": 1.1864392757415771, "rewards/rejected": -2.5402722358703613, "step": 2450 }, { "epoch": 0.73, "learning_rate": 1.4834437086092716e-07, "logits/chosen": -2.3211989402770996, "logits/rejected": -2.3927392959594727, "logps/chosen": -94.21218872070312, "logps/rejected": -111.05567932128906, "loss": 0.5479, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.555248737335205, "rewards/margins": 0.7776703834533691, "rewards/rejected": -2.332918882369995, "step": 2460 }, { "epoch": 0.74, "learning_rate": 1.466887417218543e-07, "logits/chosen": -2.4537739753723145, "logits/rejected": -2.3887171745300293, "logps/chosen": -104.2787857055664, "logps/rejected": -113.91764831542969, "loss": 0.6143, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.352063775062561, "rewards/margins": 0.9023284912109375, "rewards/rejected": -2.254392147064209, "step": 2470 }, { "epoch": 0.74, "learning_rate": 1.4503311258278144e-07, "logits/chosen": -2.4015471935272217, "logits/rejected": -2.42402720451355, "logps/chosen": -114.39097595214844, "logps/rejected": -132.64340209960938, "loss": 0.5667, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2820857763290405, "rewards/margins": 1.0415351390838623, "rewards/rejected": -2.323620557785034, "step": 2480 }, { "epoch": 0.74, "learning_rate": 1.4337748344370858e-07, "logits/chosen": -2.355255126953125, "logits/rejected": -2.277355909347534, "logps/chosen": -102.35648345947266, "logps/rejected": -109.78977966308594, "loss": 0.5125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.6282755136489868, "rewards/margins": 1.255506992340088, "rewards/rejected": -2.8837826251983643, "step": 2490 }, { "epoch": 0.74, "learning_rate": 1.4172185430463577e-07, "logits/chosen": -2.5044684410095215, "logits/rejected": -2.3574650287628174, "logps/chosen": -130.39955139160156, "logps/rejected": -128.08071899414062, "loss": 0.5218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2382522821426392, "rewards/margins": 1.3744902610778809, "rewards/rejected": -2.6127424240112305, "step": 2500 }, { "epoch": 0.74, "eval_logits/chosen": -2.261568784713745, "eval_logits/rejected": -2.2171857357025146, "eval_logps/chosen": -113.09461975097656, "eval_logps/rejected": -122.51795959472656, "eval_loss": 0.5183302164077759, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -1.3228007555007935, "eval_rewards/margins": 1.2242145538330078, "eval_rewards/rejected": -2.54701566696167, "eval_runtime": 523.6034, "eval_samples_per_second": 3.411, "eval_steps_per_second": 0.107, "step": 2500 }, { "epoch": 0.75, "learning_rate": 1.4006622516556291e-07, "logits/chosen": -2.4229695796966553, "logits/rejected": -2.3659071922302246, "logps/chosen": -97.41563415527344, "logps/rejected": -107.28167724609375, "loss": 0.5285, "rewards/accuracies": 0.75, "rewards/chosen": -1.0089452266693115, "rewards/margins": 1.1100685596466064, "rewards/rejected": -2.119013786315918, "step": 2510 }, { "epoch": 0.75, "learning_rate": 1.3841059602649005e-07, "logits/chosen": -2.368020534515381, "logits/rejected": -2.266580820083618, "logps/chosen": -107.78886413574219, "logps/rejected": -124.20140075683594, "loss": 0.4815, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1261156797409058, "rewards/margins": 1.54592764377594, "rewards/rejected": -2.6720430850982666, "step": 2520 }, { "epoch": 0.75, "learning_rate": 1.3675496688741722e-07, "logits/chosen": -2.3915557861328125, "logits/rejected": -2.3538260459899902, "logps/chosen": -96.66950988769531, "logps/rejected": -107.39601135253906, "loss": 0.5181, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2402719259262085, "rewards/margins": 1.5004689693450928, "rewards/rejected": -2.7407407760620117, "step": 2530 }, { "epoch": 0.76, "learning_rate": 1.3509933774834436e-07, "logits/chosen": -2.3380367755889893, "logits/rejected": -2.2895896434783936, "logps/chosen": -122.12117767333984, "logps/rejected": -122.6964111328125, "loss": 0.5068, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7105709314346313, "rewards/margins": 1.117949366569519, "rewards/rejected": -2.8285202980041504, "step": 2540 }, { "epoch": 0.76, "learning_rate": 1.334437086092715e-07, "logits/chosen": -2.650242567062378, "logits/rejected": -2.575338840484619, "logps/chosen": -116.67132568359375, "logps/rejected": -121.65495300292969, "loss": 0.4781, "rewards/accuracies": 0.625, "rewards/chosen": -1.0724276304244995, "rewards/margins": 0.7939808964729309, "rewards/rejected": -1.8664085865020752, "step": 2550 }, { "epoch": 0.76, "learning_rate": 1.317880794701987e-07, "logits/chosen": -2.4393889904022217, "logits/rejected": -2.356849431991577, "logps/chosen": -108.2217788696289, "logps/rejected": -105.802734375, "loss": 0.4569, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9382781982421875, "rewards/margins": 0.9701493978500366, "rewards/rejected": -1.9084275960922241, "step": 2560 }, { "epoch": 0.77, "learning_rate": 1.3013245033112583e-07, "logits/chosen": -2.27262544631958, "logits/rejected": -2.2650160789489746, "logps/chosen": -82.49347686767578, "logps/rejected": -105.01361083984375, "loss": 0.4757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5115716457366943, "rewards/margins": 1.9998562335968018, "rewards/rejected": -2.511428117752075, "step": 2570 }, { "epoch": 0.77, "learning_rate": 1.2847682119205297e-07, "logits/chosen": -2.3641304969787598, "logits/rejected": -2.400428533554077, "logps/chosen": -95.62802124023438, "logps/rejected": -105.62736511230469, "loss": 0.5091, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2022227048873901, "rewards/margins": 1.1635633707046509, "rewards/rejected": -2.365786075592041, "step": 2580 }, { "epoch": 0.77, "learning_rate": 1.2682119205298011e-07, "logits/chosen": -2.2362232208251953, "logits/rejected": -2.294517993927002, "logps/chosen": -111.7828140258789, "logps/rejected": -107.35648345947266, "loss": 0.5689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3542122840881348, "rewards/margins": 0.9707919359207153, "rewards/rejected": -2.3250043392181396, "step": 2590 }, { "epoch": 0.77, "learning_rate": 1.2516556291390728e-07, "logits/chosen": -2.4351532459259033, "logits/rejected": -2.3938307762145996, "logps/chosen": -116.37557220458984, "logps/rejected": -142.02877807617188, "loss": 0.4966, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.059444546699524, "rewards/margins": 1.6750872135162354, "rewards/rejected": -2.734531879425049, "step": 2600 }, { "epoch": 0.78, "learning_rate": 1.2350993377483442e-07, "logits/chosen": -2.130566358566284, "logits/rejected": -2.1427571773529053, "logps/chosen": -98.26994323730469, "logps/rejected": -125.13362121582031, "loss": 0.5217, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1679749488830566, "rewards/margins": 2.0151760578155518, "rewards/rejected": -4.183150768280029, "step": 2610 }, { "epoch": 0.78, "learning_rate": 1.218543046357616e-07, "logits/chosen": -2.3847343921661377, "logits/rejected": -2.3289005756378174, "logps/chosen": -103.18563079833984, "logps/rejected": -106.60140228271484, "loss": 0.526, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5415022373199463, "rewards/margins": 1.246010184288025, "rewards/rejected": -2.7875125408172607, "step": 2620 }, { "epoch": 0.78, "learning_rate": 1.2019867549668873e-07, "logits/chosen": -2.344989776611328, "logits/rejected": -2.2486376762390137, "logps/chosen": -111.1012954711914, "logps/rejected": -114.76014709472656, "loss": 0.4662, "rewards/accuracies": 0.75, "rewards/chosen": -1.4294898509979248, "rewards/margins": 1.2511804103851318, "rewards/rejected": -2.6806702613830566, "step": 2630 }, { "epoch": 0.79, "learning_rate": 1.185430463576159e-07, "logits/chosen": -2.342101573944092, "logits/rejected": -2.3254072666168213, "logps/chosen": -114.9495620727539, "logps/rejected": -122.08809661865234, "loss": 0.4812, "rewards/accuracies": 0.75, "rewards/chosen": -1.4265601634979248, "rewards/margins": 1.6178033351898193, "rewards/rejected": -3.0443637371063232, "step": 2640 }, { "epoch": 0.79, "learning_rate": 1.1688741721854305e-07, "logits/chosen": -2.329153537750244, "logits/rejected": -2.2368149757385254, "logps/chosen": -123.8796157836914, "logps/rejected": -119.62074279785156, "loss": 0.4744, "rewards/accuracies": 0.625, "rewards/chosen": -1.5358905792236328, "rewards/margins": 1.2361419200897217, "rewards/rejected": -2.7720324993133545, "step": 2650 }, { "epoch": 0.79, "learning_rate": 1.1523178807947019e-07, "logits/chosen": -2.4591078758239746, "logits/rejected": -2.454157829284668, "logps/chosen": -116.4410629272461, "logps/rejected": -129.07809448242188, "loss": 0.5417, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5216033458709717, "rewards/margins": 1.3263527154922485, "rewards/rejected": -2.8479561805725098, "step": 2660 }, { "epoch": 0.8, "learning_rate": 1.1357615894039735e-07, "logits/chosen": -2.287152051925659, "logits/rejected": -2.2752058506011963, "logps/chosen": -128.70211791992188, "logps/rejected": -141.4760284423828, "loss": 0.5571, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9110784530639648, "rewards/margins": 1.6628735065460205, "rewards/rejected": -3.5739517211914062, "step": 2670 }, { "epoch": 0.8, "learning_rate": 1.119205298013245e-07, "logits/chosen": -2.3498637676239014, "logits/rejected": -2.3876309394836426, "logps/chosen": -121.30989074707031, "logps/rejected": -129.5779571533203, "loss": 0.4927, "rewards/accuracies": 0.875, "rewards/chosen": -1.256763219833374, "rewards/margins": 1.8756353855133057, "rewards/rejected": -3.1323981285095215, "step": 2680 }, { "epoch": 0.8, "learning_rate": 1.1026490066225165e-07, "logits/chosen": -2.2559609413146973, "logits/rejected": -2.2900869846343994, "logps/chosen": -89.24148559570312, "logps/rejected": -104.9818115234375, "loss": 0.5639, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5417016744613647, "rewards/margins": 1.3289363384246826, "rewards/rejected": -2.870638132095337, "step": 2690 }, { "epoch": 0.8, "learning_rate": 1.0860927152317881e-07, "logits/chosen": -2.3323373794555664, "logits/rejected": -2.4132628440856934, "logps/chosen": -112.27374267578125, "logps/rejected": -132.1646728515625, "loss": 0.5692, "rewards/accuracies": 0.75, "rewards/chosen": -1.039150595664978, "rewards/margins": 1.5584369897842407, "rewards/rejected": -2.597587823867798, "step": 2700 }, { "epoch": 0.81, "learning_rate": 1.0695364238410595e-07, "logits/chosen": -2.2826695442199707, "logits/rejected": -2.232888698577881, "logps/chosen": -107.91890716552734, "logps/rejected": -114.87126159667969, "loss": 0.5245, "rewards/accuracies": 0.75, "rewards/chosen": -1.3879740238189697, "rewards/margins": 1.4284284114837646, "rewards/rejected": -2.8164026737213135, "step": 2710 }, { "epoch": 0.81, "learning_rate": 1.0529801324503311e-07, "logits/chosen": -2.433330535888672, "logits/rejected": -2.3720269203186035, "logps/chosen": -122.9089584350586, "logps/rejected": -130.165771484375, "loss": 0.5503, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7988684177398682, "rewards/margins": 1.0453321933746338, "rewards/rejected": -2.844200611114502, "step": 2720 }, { "epoch": 0.81, "learning_rate": 1.0364238410596025e-07, "logits/chosen": -2.432610511779785, "logits/rejected": -2.3609492778778076, "logps/chosen": -126.18135070800781, "logps/rejected": -137.95114135742188, "loss": 0.5377, "rewards/accuracies": 0.75, "rewards/chosen": -1.1943086385726929, "rewards/margins": 1.0217430591583252, "rewards/rejected": -2.2160518169403076, "step": 2730 }, { "epoch": 0.82, "learning_rate": 1.0198675496688741e-07, "logits/chosen": -2.40020489692688, "logits/rejected": -2.333512783050537, "logps/chosen": -120.67720794677734, "logps/rejected": -123.46641540527344, "loss": 0.4558, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2497332096099854, "rewards/margins": 1.2155460119247437, "rewards/rejected": -2.4652791023254395, "step": 2740 }, { "epoch": 0.82, "learning_rate": 1.0033112582781457e-07, "logits/chosen": -2.4432952404022217, "logits/rejected": -2.3959970474243164, "logps/chosen": -131.6014862060547, "logps/rejected": -145.7483673095703, "loss": 0.4373, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9097809791564941, "rewards/margins": 1.4488131999969482, "rewards/rejected": -2.3585941791534424, "step": 2750 }, { "epoch": 0.82, "learning_rate": 9.867549668874171e-08, "logits/chosen": -2.2430427074432373, "logits/rejected": -2.2248117923736572, "logps/chosen": -99.05213928222656, "logps/rejected": -118.5693130493164, "loss": 0.5283, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.898768424987793, "rewards/margins": 1.2684627771377563, "rewards/rejected": -2.1672310829162598, "step": 2760 }, { "epoch": 0.83, "learning_rate": 9.701986754966887e-08, "logits/chosen": -2.4450364112854004, "logits/rejected": -2.3705830574035645, "logps/chosen": -107.76090240478516, "logps/rejected": -112.4260482788086, "loss": 0.4824, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4488307237625122, "rewards/margins": 0.7578933835029602, "rewards/rejected": -2.206723928451538, "step": 2770 }, { "epoch": 0.83, "learning_rate": 9.536423841059603e-08, "logits/chosen": -2.4003443717956543, "logits/rejected": -2.348435878753662, "logps/chosen": -98.28638458251953, "logps/rejected": -100.79689025878906, "loss": 0.5439, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.41732919216156, "rewards/margins": 1.0273702144622803, "rewards/rejected": -2.444699764251709, "step": 2780 }, { "epoch": 0.83, "learning_rate": 9.370860927152317e-08, "logits/chosen": -2.3565890789031982, "logits/rejected": -2.3134591579437256, "logps/chosen": -122.64701080322266, "logps/rejected": -140.7588348388672, "loss": 0.54, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.8713737726211548, "rewards/margins": 0.9109483957290649, "rewards/rejected": -2.7823221683502197, "step": 2790 }, { "epoch": 0.83, "learning_rate": 9.205298013245033e-08, "logits/chosen": -2.4065792560577393, "logits/rejected": -2.343113422393799, "logps/chosen": -113.3506088256836, "logps/rejected": -118.6836166381836, "loss": 0.6089, "rewards/accuracies": 0.75, "rewards/chosen": -1.640981912612915, "rewards/margins": 1.4465951919555664, "rewards/rejected": -3.0875768661499023, "step": 2800 }, { "epoch": 0.84, "learning_rate": 9.039735099337747e-08, "logits/chosen": -2.280989170074463, "logits/rejected": -2.2906501293182373, "logps/chosen": -108.36322021484375, "logps/rejected": -118.99311828613281, "loss": 0.4821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3466839790344238, "rewards/margins": 1.234621524810791, "rewards/rejected": -2.581305503845215, "step": 2810 }, { "epoch": 0.84, "learning_rate": 8.874172185430463e-08, "logits/chosen": -2.3098435401916504, "logits/rejected": -2.365722179412842, "logps/chosen": -142.2515411376953, "logps/rejected": -136.40847778320312, "loss": 0.6105, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9295507669448853, "rewards/margins": 0.7886122465133667, "rewards/rejected": -2.718163013458252, "step": 2820 }, { "epoch": 0.84, "learning_rate": 8.70860927152318e-08, "logits/chosen": -2.4758474826812744, "logits/rejected": -2.4529106616973877, "logps/chosen": -102.67512512207031, "logps/rejected": -108.22530364990234, "loss": 0.4814, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4004428386688232, "rewards/margins": 0.7858734130859375, "rewards/rejected": -2.1863162517547607, "step": 2830 }, { "epoch": 0.85, "learning_rate": 8.543046357615893e-08, "logits/chosen": -2.4003779888153076, "logits/rejected": -2.3763396739959717, "logps/chosen": -104.71977233886719, "logps/rejected": -117.16717529296875, "loss": 0.4928, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1805320978164673, "rewards/margins": 1.6279674768447876, "rewards/rejected": -2.808499336242676, "step": 2840 }, { "epoch": 0.85, "learning_rate": 8.377483443708609e-08, "logits/chosen": -2.4015908241271973, "logits/rejected": -2.3405182361602783, "logps/chosen": -117.36273193359375, "logps/rejected": -124.22319030761719, "loss": 0.5652, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3869013786315918, "rewards/margins": 0.9574357867240906, "rewards/rejected": -2.344337224960327, "step": 2850 }, { "epoch": 0.85, "learning_rate": 8.211920529801324e-08, "logits/chosen": -2.4349982738494873, "logits/rejected": -2.4097352027893066, "logps/chosen": -125.55684661865234, "logps/rejected": -132.021484375, "loss": 0.5082, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4107874631881714, "rewards/margins": 0.8303612470626831, "rewards/rejected": -2.2411487102508545, "step": 2860 }, { "epoch": 0.86, "learning_rate": 8.04635761589404e-08, "logits/chosen": -2.265141248703003, "logits/rejected": -2.169220447540283, "logps/chosen": -102.09349060058594, "logps/rejected": -119.7810287475586, "loss": 0.553, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4435564279556274, "rewards/margins": 1.294883131980896, "rewards/rejected": -2.7384393215179443, "step": 2870 }, { "epoch": 0.86, "learning_rate": 7.880794701986755e-08, "logits/chosen": -2.4385974407196045, "logits/rejected": -2.3579273223876953, "logps/chosen": -93.9774169921875, "logps/rejected": -96.58930969238281, "loss": 0.5111, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5919442772865295, "rewards/margins": 0.9432849884033203, "rewards/rejected": -1.5352293252944946, "step": 2880 }, { "epoch": 0.86, "learning_rate": 7.71523178807947e-08, "logits/chosen": -2.4252941608428955, "logits/rejected": -2.308663845062256, "logps/chosen": -139.50485229492188, "logps/rejected": -134.99417114257812, "loss": 0.4841, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1931045055389404, "rewards/margins": 1.3365012407302856, "rewards/rejected": -2.5296058654785156, "step": 2890 }, { "epoch": 0.86, "learning_rate": 7.549668874172185e-08, "logits/chosen": -2.3252806663513184, "logits/rejected": -2.2149767875671387, "logps/chosen": -119.28135681152344, "logps/rejected": -126.89034271240234, "loss": 0.4699, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3043967485427856, "rewards/margins": 1.297178030014038, "rewards/rejected": -2.601574420928955, "step": 2900 }, { "epoch": 0.87, "learning_rate": 7.3841059602649e-08, "logits/chosen": -2.4337799549102783, "logits/rejected": -2.408616065979004, "logps/chosen": -105.0708236694336, "logps/rejected": -112.90872955322266, "loss": 0.5492, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.739833652973175, "rewards/margins": 1.0932036638259888, "rewards/rejected": -1.8330373764038086, "step": 2910 }, { "epoch": 0.87, "learning_rate": 7.218543046357616e-08, "logits/chosen": -2.474499225616455, "logits/rejected": -2.3793933391571045, "logps/chosen": -115.8188247680664, "logps/rejected": -119.8792953491211, "loss": 0.5534, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7591004371643066, "rewards/margins": 1.390928864479065, "rewards/rejected": -2.150029182434082, "step": 2920 }, { "epoch": 0.87, "learning_rate": 7.052980132450331e-08, "logits/chosen": -2.342878580093384, "logits/rejected": -2.2635059356689453, "logps/chosen": -112.3121566772461, "logps/rejected": -118.00971984863281, "loss": 0.4827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1263339519500732, "rewards/margins": 0.7218903303146362, "rewards/rejected": -1.848224401473999, "step": 2930 }, { "epoch": 0.88, "learning_rate": 6.887417218543045e-08, "logits/chosen": -2.4378771781921387, "logits/rejected": -2.493478775024414, "logps/chosen": -101.32011413574219, "logps/rejected": -126.55435943603516, "loss": 0.4912, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0950850248336792, "rewards/margins": 1.0815422534942627, "rewards/rejected": -2.1766273975372314, "step": 2940 }, { "epoch": 0.88, "learning_rate": 6.721854304635762e-08, "logits/chosen": -2.395272731781006, "logits/rejected": -2.352908134460449, "logps/chosen": -115.22686767578125, "logps/rejected": -114.85673522949219, "loss": 0.5139, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.910413384437561, "rewards/margins": 0.9333620071411133, "rewards/rejected": -1.8437751531600952, "step": 2950 }, { "epoch": 0.88, "learning_rate": 6.556291390728476e-08, "logits/chosen": -2.4603307247161865, "logits/rejected": -2.4367270469665527, "logps/chosen": -111.51399993896484, "logps/rejected": -120.80682373046875, "loss": 0.5692, "rewards/accuracies": 0.625, "rewards/chosen": -1.1758167743682861, "rewards/margins": 0.8748563528060913, "rewards/rejected": -2.050673007965088, "step": 2960 }, { "epoch": 0.88, "learning_rate": 6.390728476821191e-08, "logits/chosen": -2.3244917392730713, "logits/rejected": -2.253732919692993, "logps/chosen": -108.8800277709961, "logps/rejected": -125.33662414550781, "loss": 0.4513, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7894026637077332, "rewards/margins": 1.8209375143051147, "rewards/rejected": -2.610340118408203, "step": 2970 }, { "epoch": 0.89, "learning_rate": 6.225165562913907e-08, "logits/chosen": -2.387305974960327, "logits/rejected": -2.387345552444458, "logps/chosen": -107.43021392822266, "logps/rejected": -118.97044372558594, "loss": 0.6606, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9636829495429993, "rewards/margins": 0.8673983812332153, "rewards/rejected": -1.8310810327529907, "step": 2980 }, { "epoch": 0.89, "learning_rate": 6.059602649006622e-08, "logits/chosen": -2.3770089149475098, "logits/rejected": -2.371371269226074, "logps/chosen": -123.25062561035156, "logps/rejected": -140.9857635498047, "loss": 0.5031, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.802879810333252, "rewards/margins": 1.1967840194702148, "rewards/rejected": -1.9996639490127563, "step": 2990 }, { "epoch": 0.89, "learning_rate": 5.8940397350993375e-08, "logits/chosen": -2.3844501972198486, "logits/rejected": -2.415923595428467, "logps/chosen": -96.17528533935547, "logps/rejected": -111.2402114868164, "loss": 0.4914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7955904006958008, "rewards/margins": 1.3297996520996094, "rewards/rejected": -2.125389814376831, "step": 3000 }, { "epoch": 0.89, "eval_logits/chosen": -2.2567203044891357, "eval_logits/rejected": -2.214937925338745, "eval_logps/chosen": -110.69182586669922, "eval_logps/rejected": -120.59849548339844, "eval_loss": 0.5078982710838318, "eval_rewards/accuracies": 0.7120535969734192, "eval_rewards/chosen": -1.0825201272964478, "eval_rewards/margins": 1.2725489139556885, "eval_rewards/rejected": -2.3550689220428467, "eval_runtime": 502.7018, "eval_samples_per_second": 3.553, "eval_steps_per_second": 0.111, "step": 3000 }, { "epoch": 0.9, "learning_rate": 5.728476821192053e-08, "logits/chosen": -2.398317575454712, "logits/rejected": -2.4122400283813477, "logps/chosen": -93.20875549316406, "logps/rejected": -113.88653564453125, "loss": 0.549, "rewards/accuracies": 0.625, "rewards/chosen": -0.8295547366142273, "rewards/margins": 1.022578239440918, "rewards/rejected": -1.852132797241211, "step": 3010 }, { "epoch": 0.9, "learning_rate": 5.5629139072847675e-08, "logits/chosen": -2.414301633834839, "logits/rejected": -2.3872337341308594, "logps/chosen": -129.2257080078125, "logps/rejected": -136.29031372070312, "loss": 0.4718, "rewards/accuracies": 0.875, "rewards/chosen": -1.3618860244750977, "rewards/margins": 1.9172807931900024, "rewards/rejected": -3.2791664600372314, "step": 3020 }, { "epoch": 0.9, "learning_rate": 5.397350993377483e-08, "logits/chosen": -2.446453809738159, "logits/rejected": -2.384152889251709, "logps/chosen": -120.69456481933594, "logps/rejected": -128.5080108642578, "loss": 0.4889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.820598304271698, "rewards/margins": 1.557586908340454, "rewards/rejected": -2.3781850337982178, "step": 3030 }, { "epoch": 0.91, "learning_rate": 5.231788079470199e-08, "logits/chosen": -2.416982889175415, "logits/rejected": -2.296403646469116, "logps/chosen": -110.80255126953125, "logps/rejected": -113.04368591308594, "loss": 0.4946, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1277140378952026, "rewards/margins": 1.060675859451294, "rewards/rejected": -2.188389778137207, "step": 3040 }, { "epoch": 0.91, "learning_rate": 5.0662251655629135e-08, "logits/chosen": -2.355494976043701, "logits/rejected": -2.2958462238311768, "logps/chosen": -113.16410064697266, "logps/rejected": -119.9725112915039, "loss": 0.4515, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6746724247932434, "rewards/margins": 1.8009824752807617, "rewards/rejected": -2.4756548404693604, "step": 3050 }, { "epoch": 0.91, "learning_rate": 4.900662251655629e-08, "logits/chosen": -2.4485743045806885, "logits/rejected": -2.426466703414917, "logps/chosen": -110.64210510253906, "logps/rejected": -122.92867279052734, "loss": 0.4162, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9479681849479675, "rewards/margins": 1.6344906091690063, "rewards/rejected": -2.582458972930908, "step": 3060 }, { "epoch": 0.91, "learning_rate": 4.735099337748344e-08, "logits/chosen": -2.279062509536743, "logits/rejected": -2.2378296852111816, "logps/chosen": -117.4856185913086, "logps/rejected": -126.33473205566406, "loss": 0.5187, "rewards/accuracies": 0.75, "rewards/chosen": -0.9676671028137207, "rewards/margins": 1.4139858484268188, "rewards/rejected": -2.381652593612671, "step": 3070 }, { "epoch": 0.92, "learning_rate": 4.5695364238410595e-08, "logits/chosen": -2.27183198928833, "logits/rejected": -2.2195851802825928, "logps/chosen": -99.91886138916016, "logps/rejected": -139.50657653808594, "loss": 0.5204, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.060943603515625, "rewards/margins": 2.9951958656311035, "rewards/rejected": -4.056139945983887, "step": 3080 }, { "epoch": 0.92, "learning_rate": 4.403973509933775e-08, "logits/chosen": -2.413677215576172, "logits/rejected": -2.440647602081299, "logps/chosen": -118.7281723022461, "logps/rejected": -134.04771423339844, "loss": 0.5028, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.119231939315796, "rewards/margins": 1.4490314722061157, "rewards/rejected": -2.568263530731201, "step": 3090 }, { "epoch": 0.92, "learning_rate": 4.23841059602649e-08, "logits/chosen": -2.3565783500671387, "logits/rejected": -2.4461493492126465, "logps/chosen": -108.08616638183594, "logps/rejected": -132.34011840820312, "loss": 0.485, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1898248195648193, "rewards/margins": 1.3966195583343506, "rewards/rejected": -2.58644437789917, "step": 3100 }, { "epoch": 0.93, "learning_rate": 4.072847682119205e-08, "logits/chosen": -2.396179437637329, "logits/rejected": -2.4256176948547363, "logps/chosen": -96.67437744140625, "logps/rejected": -101.86246490478516, "loss": 0.4582, "rewards/accuracies": 0.75, "rewards/chosen": -0.4870302081108093, "rewards/margins": 1.127990484237671, "rewards/rejected": -1.615020751953125, "step": 3110 }, { "epoch": 0.93, "learning_rate": 3.90728476821192e-08, "logits/chosen": -2.3725028038024902, "logits/rejected": -2.322782039642334, "logps/chosen": -128.52896118164062, "logps/rejected": -129.73118591308594, "loss": 0.5572, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3587214946746826, "rewards/margins": 1.5958476066589355, "rewards/rejected": -2.954568862915039, "step": 3120 }, { "epoch": 0.93, "learning_rate": 3.7417218543046355e-08, "logits/chosen": -2.378821611404419, "logits/rejected": -2.277832269668579, "logps/chosen": -87.0296630859375, "logps/rejected": -106.12138366699219, "loss": 0.5238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8327251672744751, "rewards/margins": 1.2228658199310303, "rewards/rejected": -2.055591106414795, "step": 3130 }, { "epoch": 0.94, "learning_rate": 3.576158940397351e-08, "logits/chosen": -2.549872398376465, "logits/rejected": -2.4757115840911865, "logps/chosen": -114.14369201660156, "logps/rejected": -116.66259765625, "loss": 0.5169, "rewards/accuracies": 0.625, "rewards/chosen": -0.9791749119758606, "rewards/margins": 0.8427003026008606, "rewards/rejected": -1.821874976158142, "step": 3140 }, { "epoch": 0.94, "learning_rate": 3.410596026490066e-08, "logits/chosen": -2.433527708053589, "logits/rejected": -2.371525764465332, "logps/chosen": -103.0054931640625, "logps/rejected": -103.31925964355469, "loss": 0.5538, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8271923065185547, "rewards/margins": 1.119652509689331, "rewards/rejected": -1.9468450546264648, "step": 3150 }, { "epoch": 0.94, "learning_rate": 3.245033112582781e-08, "logits/chosen": -2.337153434753418, "logits/rejected": -2.2308475971221924, "logps/chosen": -129.55728149414062, "logps/rejected": -122.7024917602539, "loss": 0.4763, "rewards/accuracies": 0.625, "rewards/chosen": -0.9213937520980835, "rewards/margins": 1.0446635484695435, "rewards/rejected": -1.9660571813583374, "step": 3160 }, { "epoch": 0.94, "learning_rate": 3.079470198675496e-08, "logits/chosen": -2.2858211994171143, "logits/rejected": -2.313380002975464, "logps/chosen": -107.20402526855469, "logps/rejected": -136.98562622070312, "loss": 0.5288, "rewards/accuracies": 0.625, "rewards/chosen": -0.9993183016777039, "rewards/margins": 1.509690284729004, "rewards/rejected": -2.5090086460113525, "step": 3170 }, { "epoch": 0.95, "learning_rate": 2.913907284768212e-08, "logits/chosen": -2.3693079948425293, "logits/rejected": -2.284874677658081, "logps/chosen": -106.6112289428711, "logps/rejected": -126.05074310302734, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": -0.7449665665626526, "rewards/margins": 1.826768159866333, "rewards/rejected": -2.571734666824341, "step": 3180 }, { "epoch": 0.95, "learning_rate": 2.748344370860927e-08, "logits/chosen": -2.2911553382873535, "logits/rejected": -2.380384922027588, "logps/chosen": -102.5718765258789, "logps/rejected": -124.40003967285156, "loss": 0.4937, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7534275650978088, "rewards/margins": 1.1661580801010132, "rewards/rejected": -1.9195858240127563, "step": 3190 }, { "epoch": 0.95, "learning_rate": 2.5827814569536422e-08, "logits/chosen": -2.4230473041534424, "logits/rejected": -2.4315543174743652, "logps/chosen": -117.46553802490234, "logps/rejected": -130.05776977539062, "loss": 0.4991, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8374800682067871, "rewards/margins": 1.2378642559051514, "rewards/rejected": -2.0753445625305176, "step": 3200 }, { "epoch": 0.96, "learning_rate": 2.4172185430463576e-08, "logits/chosen": -2.417757034301758, "logits/rejected": -2.2985901832580566, "logps/chosen": -132.27774047851562, "logps/rejected": -133.81459045410156, "loss": 0.5058, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.378666877746582, "rewards/margins": 0.97132807970047, "rewards/rejected": -2.3499951362609863, "step": 3210 }, { "epoch": 0.96, "learning_rate": 2.2516556291390726e-08, "logits/chosen": -2.327725887298584, "logits/rejected": -2.290168046951294, "logps/chosen": -118.74835205078125, "logps/rejected": -132.76882934570312, "loss": 0.6159, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.3064758777618408, "rewards/margins": 1.0742686986923218, "rewards/rejected": -2.380744457244873, "step": 3220 }, { "epoch": 0.96, "learning_rate": 2.0860927152317882e-08, "logits/chosen": -2.3731508255004883, "logits/rejected": -2.367323398590088, "logps/chosen": -126.88232421875, "logps/rejected": -135.72384643554688, "loss": 0.5072, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.890730082988739, "rewards/margins": 1.7571513652801514, "rewards/rejected": -2.6478817462921143, "step": 3230 }, { "epoch": 0.97, "learning_rate": 1.9205298013245032e-08, "logits/chosen": -2.4219555854797363, "logits/rejected": -2.4555513858795166, "logps/chosen": -96.6889419555664, "logps/rejected": -114.50843811035156, "loss": 0.514, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1261937618255615, "rewards/margins": 1.0691124200820923, "rewards/rejected": -2.1953060626983643, "step": 3240 }, { "epoch": 0.97, "learning_rate": 1.7549668874172186e-08, "logits/chosen": -2.3101606369018555, "logits/rejected": -2.3013217449188232, "logps/chosen": -95.89967346191406, "logps/rejected": -99.94120025634766, "loss": 0.4685, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2933688163757324, "rewards/margins": 0.8693240880966187, "rewards/rejected": -2.1626930236816406, "step": 3250 }, { "epoch": 0.97, "learning_rate": 1.5894039735099336e-08, "logits/chosen": -2.22920823097229, "logits/rejected": -2.2497153282165527, "logps/chosen": -83.50569152832031, "logps/rejected": -98.3634033203125, "loss": 0.514, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0309844017028809, "rewards/margins": 1.4945679903030396, "rewards/rejected": -2.525552272796631, "step": 3260 }, { "epoch": 0.97, "learning_rate": 1.4238410596026489e-08, "logits/chosen": -2.220327854156494, "logits/rejected": -2.2442502975463867, "logps/chosen": -105.8703842163086, "logps/rejected": -126.78196716308594, "loss": 0.4796, "rewards/accuracies": 0.75, "rewards/chosen": -1.513962984085083, "rewards/margins": 1.4240639209747314, "rewards/rejected": -2.9380269050598145, "step": 3270 }, { "epoch": 0.98, "learning_rate": 1.2582781456953642e-08, "logits/chosen": -2.417300224304199, "logits/rejected": -2.3726484775543213, "logps/chosen": -126.7840576171875, "logps/rejected": -133.47689819335938, "loss": 0.4275, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0579255819320679, "rewards/margins": 1.5306205749511719, "rewards/rejected": -2.58854603767395, "step": 3280 }, { "epoch": 0.98, "learning_rate": 1.0927152317880794e-08, "logits/chosen": -2.4346401691436768, "logits/rejected": -2.4542853832244873, "logps/chosen": -119.21122741699219, "logps/rejected": -128.86886596679688, "loss": 0.4999, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2019822597503662, "rewards/margins": 1.384701132774353, "rewards/rejected": -2.5866830348968506, "step": 3290 }, { "epoch": 0.98, "learning_rate": 9.271523178807947e-09, "logits/chosen": -2.4030935764312744, "logits/rejected": -2.3885276317596436, "logps/chosen": -111.55142974853516, "logps/rejected": -113.03800964355469, "loss": 0.6577, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.222048282623291, "rewards/margins": 0.9595780372619629, "rewards/rejected": -2.181626558303833, "step": 3300 }, { "epoch": 0.99, "learning_rate": 7.6158940397351e-09, "logits/chosen": -2.2258238792419434, "logits/rejected": -2.1862361431121826, "logps/chosen": -92.22099304199219, "logps/rejected": -98.86279296875, "loss": 0.5937, "rewards/accuracies": 0.625, "rewards/chosen": -1.713568925857544, "rewards/margins": 0.8357810974121094, "rewards/rejected": -2.5493500232696533, "step": 3310 }, { "epoch": 0.99, "learning_rate": 5.960264900662252e-09, "logits/chosen": -2.317258358001709, "logits/rejected": -2.3031933307647705, "logps/chosen": -109.45621490478516, "logps/rejected": -111.22418212890625, "loss": 0.8281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2969977855682373, "rewards/margins": 1.3577762842178345, "rewards/rejected": -2.6547741889953613, "step": 3320 }, { "epoch": 0.99, "learning_rate": 4.3046357615894034e-09, "logits/chosen": -2.2622385025024414, "logits/rejected": -2.2199172973632812, "logps/chosen": -98.4054946899414, "logps/rejected": -112.76808166503906, "loss": 0.4438, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9782658815383911, "rewards/margins": 1.7501733303070068, "rewards/rejected": -2.7284390926361084, "step": 3330 }, { "epoch": 1.0, "learning_rate": 2.6490066225165564e-09, "logits/chosen": -2.3729500770568848, "logits/rejected": -2.432080030441284, "logps/chosen": -101.60713195800781, "logps/rejected": -131.0595703125, "loss": 0.5899, "rewards/accuracies": 0.625, "rewards/chosen": -0.8371769189834595, "rewards/margins": 0.8700039982795715, "rewards/rejected": -1.7071807384490967, "step": 3340 }, { "epoch": 1.0, "learning_rate": 9.933774834437085e-10, "logits/chosen": -2.2028284072875977, "logits/rejected": -2.2098453044891357, "logps/chosen": -109.49913024902344, "logps/rejected": -121.43013763427734, "loss": 0.4436, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7948501110076904, "rewards/margins": 2.1088409423828125, "rewards/rejected": -2.903691291809082, "step": 3350 }, { "epoch": 1.0, "step": 3356, "total_flos": 0.0, "train_loss": 0.58384587518933, "train_runtime": 30698.0699, "train_samples_per_second": 1.749, "train_steps_per_second": 0.109 } ], "logging_steps": 10, "max_steps": 3356, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }