| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3356, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.488095238095238e-09, | |
| "logits/chosen": -2.6795692443847656, | |
| "logits/rejected": -2.624149799346924, | |
| "logps/chosen": -54.570396423339844, | |
| "logps/rejected": -74.21392822265625, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.4880952380952379e-08, | |
| "logits/chosen": -2.7060725688934326, | |
| "logits/rejected": -2.6765432357788086, | |
| "logps/chosen": -95.24983978271484, | |
| "logps/rejected": -91.18234252929688, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": 0.0005662046023644507, | |
| "rewards/margins": -0.006994906347244978, | |
| "rewards/rejected": 0.007561111822724342, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.9761904761904758e-08, | |
| "logits/chosen": -2.5795836448669434, | |
| "logits/rejected": -2.592409133911133, | |
| "logps/chosen": -124.33586120605469, | |
| "logps/rejected": -103.54573822021484, | |
| "loss": 0.6947, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.0065773227252066135, | |
| "rewards/margins": -0.0029559016693383455, | |
| "rewards/rejected": 0.009533221833407879, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.4642857142857145e-08, | |
| "logits/chosen": -2.579939126968384, | |
| "logits/rejected": -2.5497870445251465, | |
| "logps/chosen": -68.13322448730469, | |
| "logps/rejected": -66.37541961669922, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.01673651486635208, | |
| "rewards/margins": 0.00222357758320868, | |
| "rewards/rejected": 0.01451293658465147, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.9523809523809515e-08, | |
| "logits/chosen": -2.6564245223999023, | |
| "logits/rejected": -2.608503818511963, | |
| "logps/chosen": -83.7612533569336, | |
| "logps/rejected": -79.3699951171875, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.050556618720293045, | |
| "rewards/margins": 0.005645673722028732, | |
| "rewards/rejected": 0.04491094499826431, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.44047619047619e-08, | |
| "logits/chosen": -2.752234935760498, | |
| "logits/rejected": -2.6355555057525635, | |
| "logps/chosen": -127.2625503540039, | |
| "logps/rejected": -114.26876068115234, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.09844812005758286, | |
| "rewards/margins": 0.0011480912799015641, | |
| "rewards/rejected": 0.09730003774166107, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.928571428571429e-08, | |
| "logits/chosen": -2.669374704360962, | |
| "logits/rejected": -2.652597188949585, | |
| "logps/chosen": -103.32049560546875, | |
| "logps/rejected": -105.29325103759766, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.19618754088878632, | |
| "rewards/margins": 0.012378268875181675, | |
| "rewards/rejected": 0.18380926549434662, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logits/chosen": -2.7530548572540283, | |
| "logits/rejected": -2.680541753768921, | |
| "logps/chosen": -84.53085327148438, | |
| "logps/rejected": -84.82635498046875, | |
| "loss": 0.6819, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.33352726697921753, | |
| "rewards/margins": 0.020224859938025475, | |
| "rewards/rejected": 0.3133023679256439, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.1904761904761903e-07, | |
| "logits/chosen": -2.572601079940796, | |
| "logits/rejected": -2.5415000915527344, | |
| "logps/chosen": -96.4114761352539, | |
| "logps/rejected": -84.30821228027344, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.4342936873435974, | |
| "rewards/margins": 0.0613841637969017, | |
| "rewards/rejected": 0.3729095458984375, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.3392857142857142e-07, | |
| "logits/chosen": -2.7009196281433105, | |
| "logits/rejected": -2.698122262954712, | |
| "logps/chosen": -78.68132781982422, | |
| "logps/rejected": -81.79669189453125, | |
| "loss": 0.6546, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5974748730659485, | |
| "rewards/margins": 0.08051940053701401, | |
| "rewards/rejected": 0.5169554948806763, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.488095238095238e-07, | |
| "logits/chosen": -2.5833797454833984, | |
| "logits/rejected": -2.624276876449585, | |
| "logps/chosen": -77.67559814453125, | |
| "logps/rejected": -90.95040130615234, | |
| "loss": 0.6601, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.5890167355537415, | |
| "rewards/margins": 0.06459061056375504, | |
| "rewards/rejected": 0.5244261026382446, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.6369047619047617e-07, | |
| "logits/chosen": -2.5440800189971924, | |
| "logits/rejected": -2.536761522293091, | |
| "logps/chosen": -79.65280151367188, | |
| "logps/rejected": -77.1148681640625, | |
| "loss": 0.6643, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.48702484369277954, | |
| "rewards/margins": 0.02558879181742668, | |
| "rewards/rejected": 0.46143603324890137, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.7857142857142858e-07, | |
| "logits/chosen": -2.59000301361084, | |
| "logits/rejected": -2.6294052600860596, | |
| "logps/chosen": -98.95535278320312, | |
| "logps/rejected": -93.15876770019531, | |
| "loss": 0.6528, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.44851523637771606, | |
| "rewards/margins": 0.04791822284460068, | |
| "rewards/rejected": 0.4005970060825348, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9345238095238096e-07, | |
| "logits/chosen": -2.5660836696624756, | |
| "logits/rejected": -2.532435894012451, | |
| "logps/chosen": -81.32213592529297, | |
| "logps/rejected": -86.37200927734375, | |
| "loss": 0.6286, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.6584704518318176, | |
| "rewards/margins": 0.1672821044921875, | |
| "rewards/rejected": 0.4911883771419525, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -2.657209873199463, | |
| "logits/rejected": -2.620845079421997, | |
| "logps/chosen": -98.81898498535156, | |
| "logps/rejected": -91.02754974365234, | |
| "loss": 0.6596, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.8377985954284668, | |
| "rewards/margins": 0.049154218286275864, | |
| "rewards/rejected": 0.7886443138122559, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.232142857142857e-07, | |
| "logits/chosen": -2.594756603240967, | |
| "logits/rejected": -2.5098514556884766, | |
| "logps/chosen": -108.9326171875, | |
| "logps/rejected": -124.50955963134766, | |
| "loss": 0.6063, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.6377179622650146, | |
| "rewards/margins": 0.7904380559921265, | |
| "rewards/rejected": -0.15272006392478943, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.3809523809523806e-07, | |
| "logits/chosen": -2.5515310764312744, | |
| "logits/rejected": -2.4522361755371094, | |
| "logps/chosen": -90.93934631347656, | |
| "logps/rejected": -106.53071594238281, | |
| "loss": 0.6199, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.8264306783676147, | |
| "rewards/margins": 0.5799387097358704, | |
| "rewards/rejected": 0.246491938829422, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.5297619047619046e-07, | |
| "logits/chosen": -2.511021137237549, | |
| "logits/rejected": -2.5456349849700928, | |
| "logps/chosen": -91.14982604980469, | |
| "logps/rejected": -99.70429992675781, | |
| "loss": 0.6079, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.7046107649803162, | |
| "rewards/margins": 0.36221450567245483, | |
| "rewards/rejected": 0.34239625930786133, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.6785714285714284e-07, | |
| "logits/chosen": -2.520282030105591, | |
| "logits/rejected": -2.503950595855713, | |
| "logps/chosen": -79.16224670410156, | |
| "logps/rejected": -89.08283233642578, | |
| "loss": 0.6324, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.8210590481758118, | |
| "rewards/margins": 0.16991613805294037, | |
| "rewards/rejected": 0.651142954826355, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.827380952380952e-07, | |
| "logits/chosen": -2.6823697090148926, | |
| "logits/rejected": -2.633678674697876, | |
| "logps/chosen": -104.0126724243164, | |
| "logps/rejected": -103.51971435546875, | |
| "loss": 0.5904, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.1408202648162842, | |
| "rewards/margins": 0.2863886058330536, | |
| "rewards/rejected": 0.8544318079948425, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.976190476190476e-07, | |
| "logits/chosen": -2.530428409576416, | |
| "logits/rejected": -2.50227689743042, | |
| "logps/chosen": -100.63572692871094, | |
| "logps/rejected": -94.46806335449219, | |
| "loss": 0.6018, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.7896903157234192, | |
| "rewards/margins": 0.45959681272506714, | |
| "rewards/rejected": 0.33009350299835205, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.4940271377563477, | |
| "logits/rejected": -2.5085806846618652, | |
| "logps/chosen": -92.1917724609375, | |
| "logps/rejected": -107.3184585571289, | |
| "loss": 0.5868, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.7482628226280212, | |
| "rewards/margins": 0.49973025918006897, | |
| "rewards/rejected": 0.24853253364562988, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.2738095238095235e-07, | |
| "logits/chosen": -2.5470972061157227, | |
| "logits/rejected": -2.5241191387176514, | |
| "logps/chosen": -113.54488372802734, | |
| "logps/rejected": -129.91867065429688, | |
| "loss": 0.5871, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.4822530746459961, | |
| "rewards/margins": 0.5863619446754456, | |
| "rewards/rejected": -0.10410883277654648, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.4226190476190473e-07, | |
| "logits/chosen": -2.5854454040527344, | |
| "logits/rejected": -2.427126169204712, | |
| "logps/chosen": -95.35980987548828, | |
| "logps/rejected": -81.82037353515625, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.8959482908248901, | |
| "rewards/margins": 0.8998041152954102, | |
| "rewards/rejected": -0.0038558482192456722, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "logits/chosen": -2.5749735832214355, | |
| "logits/rejected": -2.58799409866333, | |
| "logps/chosen": -76.01658630371094, | |
| "logps/rejected": -77.50577545166016, | |
| "loss": 0.6595, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.28254395723342896, | |
| "rewards/margins": 0.4179397523403168, | |
| "rewards/rejected": -0.13539579510688782, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.7202380952380953e-07, | |
| "logits/chosen": -2.655733823776245, | |
| "logits/rejected": -2.6001226902008057, | |
| "logps/chosen": -112.2961654663086, | |
| "logps/rejected": -124.30081939697266, | |
| "loss": 0.5967, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.081606388092041, | |
| "rewards/margins": 0.3873857855796814, | |
| "rewards/rejected": 0.6942206025123596, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.869047619047619e-07, | |
| "logits/chosen": -2.3797781467437744, | |
| "logits/rejected": -2.3257176876068115, | |
| "logps/chosen": -100.49422454833984, | |
| "logps/rejected": -116.31571197509766, | |
| "loss": 0.5687, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.26938995718955994, | |
| "rewards/margins": 0.5422745943069458, | |
| "rewards/rejected": -0.27288463711738586, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.017857142857143e-07, | |
| "logits/chosen": -2.506838321685791, | |
| "logits/rejected": -2.5618858337402344, | |
| "logps/chosen": -103.68598937988281, | |
| "logps/rejected": -116.80242919921875, | |
| "loss": 0.6466, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.9570896029472351, | |
| "rewards/margins": 0.3445149064064026, | |
| "rewards/rejected": 0.6125746965408325, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -2.5634925365448, | |
| "logits/rejected": -2.520244836807251, | |
| "logps/chosen": -102.6960678100586, | |
| "logps/rejected": -90.80632019042969, | |
| "loss": 0.5996, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.0112148523330688, | |
| "rewards/margins": 0.3889988362789154, | |
| "rewards/rejected": 0.6222161054611206, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.3154761904761904e-07, | |
| "logits/chosen": -2.569206714630127, | |
| "logits/rejected": -2.5652623176574707, | |
| "logps/chosen": -85.24828338623047, | |
| "logps/rejected": -93.45872497558594, | |
| "loss": 0.5347, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.3659771978855133, | |
| "rewards/margins": 0.7742798924446106, | |
| "rewards/rejected": -0.4083026945590973, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.464285714285714e-07, | |
| "logits/chosen": -2.363185167312622, | |
| "logits/rejected": -2.371516227722168, | |
| "logps/chosen": -99.2336654663086, | |
| "logps/rejected": -92.32693481445312, | |
| "loss": 0.5878, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.7069708704948425, | |
| "rewards/margins": 0.8502944111824036, | |
| "rewards/rejected": -0.1433234065771103, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.613095238095238e-07, | |
| "logits/chosen": -2.412259578704834, | |
| "logits/rejected": -2.4086456298828125, | |
| "logps/chosen": -96.43733978271484, | |
| "logps/rejected": -120.0870590209961, | |
| "loss": 0.5642, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.19554999470710754, | |
| "rewards/margins": 1.321014404296875, | |
| "rewards/rejected": -1.1254642009735107, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.761904761904761e-07, | |
| "logits/chosen": -2.6165080070495605, | |
| "logits/rejected": -2.6191306114196777, | |
| "logps/chosen": -117.46064758300781, | |
| "logps/rejected": -122.75732421875, | |
| "loss": 0.5508, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7428444623947144, | |
| "rewards/margins": 0.3669503331184387, | |
| "rewards/rejected": -1.1097948551177979, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.910714285714285e-07, | |
| "logits/chosen": -2.493110179901123, | |
| "logits/rejected": -2.4452643394470215, | |
| "logps/chosen": -91.34004211425781, | |
| "logps/rejected": -103.17684173583984, | |
| "loss": 0.5986, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6157582402229309, | |
| "rewards/margins": 0.7595478892326355, | |
| "rewards/rejected": -1.375306248664856, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.993377483443708e-07, | |
| "logits/chosen": -2.547645092010498, | |
| "logits/rejected": -2.4399895668029785, | |
| "logps/chosen": -106.4365005493164, | |
| "logps/rejected": -109.07222747802734, | |
| "loss": 0.5639, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.36611366271972656, | |
| "rewards/margins": 0.6897183060646057, | |
| "rewards/rejected": -1.0558319091796875, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.97682119205298e-07, | |
| "logits/chosen": -2.5453834533691406, | |
| "logits/rejected": -2.5119881629943848, | |
| "logps/chosen": -108.45722961425781, | |
| "logps/rejected": -105.61241149902344, | |
| "loss": 0.5994, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.21297264099121094, | |
| "rewards/margins": 0.35165560245513916, | |
| "rewards/rejected": -0.13868291676044464, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.960264900662251e-07, | |
| "logits/chosen": -2.568861484527588, | |
| "logits/rejected": -2.552140712738037, | |
| "logps/chosen": -99.7040786743164, | |
| "logps/rejected": -109.383544921875, | |
| "loss": 0.5401, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.2864856421947479, | |
| "rewards/margins": 0.8699267506599426, | |
| "rewards/rejected": -1.1564123630523682, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.943708609271523e-07, | |
| "logits/chosen": -2.584989070892334, | |
| "logits/rejected": -2.524940013885498, | |
| "logps/chosen": -116.22591400146484, | |
| "logps/rejected": -132.27352905273438, | |
| "loss": 0.5816, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3508208990097046, | |
| "rewards/margins": 0.757738471031189, | |
| "rewards/rejected": -1.1085593700408936, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.927152317880794e-07, | |
| "logits/chosen": -2.5064499378204346, | |
| "logits/rejected": -2.520719528198242, | |
| "logps/chosen": -105.9725570678711, | |
| "logps/rejected": -106.05126953125, | |
| "loss": 0.6476, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.491422414779663, | |
| "rewards/margins": 0.45032089948654175, | |
| "rewards/rejected": -1.9417431354522705, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.910596026490066e-07, | |
| "logits/chosen": -2.4913430213928223, | |
| "logits/rejected": -2.5125203132629395, | |
| "logps/chosen": -124.0137710571289, | |
| "logps/rejected": -119.0078353881836, | |
| "loss": 0.6202, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.6193113327026367, | |
| "rewards/margins": 0.656644880771637, | |
| "rewards/rejected": -2.275956392288208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.894039735099338e-07, | |
| "logits/chosen": -2.5196266174316406, | |
| "logits/rejected": -2.492640256881714, | |
| "logps/chosen": -108.40077209472656, | |
| "logps/rejected": -106.96036529541016, | |
| "loss": 0.5793, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8650729060173035, | |
| "rewards/margins": 0.4100722372531891, | |
| "rewards/rejected": -1.275145173072815, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.877483443708609e-07, | |
| "logits/chosen": -2.3113367557525635, | |
| "logits/rejected": -2.363025426864624, | |
| "logps/chosen": -108.32320404052734, | |
| "logps/rejected": -96.14768981933594, | |
| "loss": 1.0008, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.168811559677124, | |
| "rewards/margins": -1.5380103588104248, | |
| "rewards/rejected": -0.6308012008666992, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.860927152317881e-07, | |
| "logits/chosen": -2.2521350383758545, | |
| "logits/rejected": -2.2686538696289062, | |
| "logps/chosen": -78.05595397949219, | |
| "logps/rejected": -93.2776107788086, | |
| "loss": 0.5595, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5212607979774475, | |
| "rewards/margins": 0.686114490032196, | |
| "rewards/rejected": -1.207375407218933, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.844370860927152e-07, | |
| "logits/chosen": -2.2812628746032715, | |
| "logits/rejected": -2.29258394241333, | |
| "logps/chosen": -128.2143096923828, | |
| "logps/rejected": -135.92117309570312, | |
| "loss": 0.5525, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.4639101028442383, | |
| "rewards/margins": 1.0309460163116455, | |
| "rewards/rejected": -3.494856595993042, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.827814569536423e-07, | |
| "logits/chosen": -2.3497612476348877, | |
| "logits/rejected": -2.259904384613037, | |
| "logps/chosen": -126.2747802734375, | |
| "logps/rejected": -132.0948944091797, | |
| "loss": 0.5087, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.5596282482147217, | |
| "rewards/margins": 1.1223429441452026, | |
| "rewards/rejected": -2.681971549987793, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.811258278145695e-07, | |
| "logits/chosen": -2.3696093559265137, | |
| "logits/rejected": -2.355694055557251, | |
| "logps/chosen": -113.27628326416016, | |
| "logps/rejected": -120.5525131225586, | |
| "loss": 0.5239, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.5899262428283691, | |
| "rewards/margins": 0.7907289266586304, | |
| "rewards/rejected": -1.380655288696289, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.794701986754966e-07, | |
| "logits/chosen": -2.4090988636016846, | |
| "logits/rejected": -2.4314303398132324, | |
| "logps/chosen": -119.7711410522461, | |
| "logps/rejected": -138.52122497558594, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.2273411750793457, | |
| "rewards/margins": 1.0350992679595947, | |
| "rewards/rejected": -2.2624402046203613, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.778145695364238e-07, | |
| "logits/chosen": -2.414658546447754, | |
| "logits/rejected": -2.4013447761535645, | |
| "logps/chosen": -101.0434799194336, | |
| "logps/rejected": -102.90351867675781, | |
| "loss": 0.5651, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.9459658861160278, | |
| "rewards/margins": 0.6103629469871521, | |
| "rewards/rejected": -1.5563287734985352, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.76158940397351e-07, | |
| "logits/chosen": -2.42374849319458, | |
| "logits/rejected": -2.4381699562072754, | |
| "logps/chosen": -113.9575424194336, | |
| "logps/rejected": -121.03520202636719, | |
| "loss": 0.5268, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.7829158902168274, | |
| "rewards/margins": 1.2523859739303589, | |
| "rewards/rejected": -2.035301923751831, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7450331125827815e-07, | |
| "logits/chosen": -2.4486849308013916, | |
| "logits/rejected": -2.4538803100585938, | |
| "logps/chosen": -97.44860076904297, | |
| "logps/rejected": -100.29484558105469, | |
| "loss": 0.5659, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2252352237701416, | |
| "rewards/margins": 0.4965124726295471, | |
| "rewards/rejected": -0.7217476963996887, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.728476821192053e-07, | |
| "logits/chosen": -2.4106860160827637, | |
| "logits/rejected": -2.477334499359131, | |
| "logps/chosen": -87.63328552246094, | |
| "logps/rejected": -96.80977630615234, | |
| "loss": 0.5598, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.022742483764886856, | |
| "rewards/margins": 0.6748077273368835, | |
| "rewards/rejected": -0.6520652174949646, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_logits/chosen": -2.339754343032837, | |
| "eval_logits/rejected": -2.2989299297332764, | |
| "eval_logps/chosen": -104.51243591308594, | |
| "eval_logps/rejected": -112.7801513671875, | |
| "eval_loss": 0.6348409652709961, | |
| "eval_rewards/accuracies": 0.7120535969734192, | |
| "eval_rewards/chosen": -0.46458080410957336, | |
| "eval_rewards/margins": 1.1086541414260864, | |
| "eval_rewards/rejected": -1.5732349157333374, | |
| "eval_runtime": 528.3305, | |
| "eval_samples_per_second": 3.38, | |
| "eval_steps_per_second": 0.106, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7119205298013243e-07, | |
| "logits/chosen": -2.3279285430908203, | |
| "logits/rejected": -2.2698190212249756, | |
| "logps/chosen": -91.65203094482422, | |
| "logps/rejected": -111.75373840332031, | |
| "loss": 0.5584, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5981258153915405, | |
| "rewards/margins": 1.0291321277618408, | |
| "rewards/rejected": -1.6272579431533813, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.6953642384105957e-07, | |
| "logits/chosen": -2.432509183883667, | |
| "logits/rejected": -2.4649786949157715, | |
| "logps/chosen": -113.98470306396484, | |
| "logps/rejected": -131.01609802246094, | |
| "loss": 0.5481, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.3087894320487976, | |
| "rewards/margins": 1.156360387802124, | |
| "rewards/rejected": -1.4651498794555664, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.678807947019867e-07, | |
| "logits/chosen": -2.4623587131500244, | |
| "logits/rejected": -2.406970500946045, | |
| "logps/chosen": -111.83284759521484, | |
| "logps/rejected": -117.27791595458984, | |
| "loss": 0.5231, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3450089991092682, | |
| "rewards/margins": 1.3268024921417236, | |
| "rewards/rejected": -1.6718114614486694, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.662251655629139e-07, | |
| "logits/chosen": -2.434732675552368, | |
| "logits/rejected": -2.482849597930908, | |
| "logps/chosen": -82.09310150146484, | |
| "logps/rejected": -113.52314758300781, | |
| "loss": 0.5046, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.10573047399520874, | |
| "rewards/margins": 1.0775012969970703, | |
| "rewards/rejected": -1.1832319498062134, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.6456953642384104e-07, | |
| "logits/chosen": -2.495922565460205, | |
| "logits/rejected": -2.432220935821533, | |
| "logps/chosen": -123.1400375366211, | |
| "logps/rejected": -111.23506164550781, | |
| "loss": 1.2805, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4564870297908783, | |
| "rewards/margins": 0.5551273226737976, | |
| "rewards/rejected": -1.0116143226623535, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.629139072847682e-07, | |
| "logits/chosen": -2.3615145683288574, | |
| "logits/rejected": -2.3742241859436035, | |
| "logps/chosen": -128.84971618652344, | |
| "logps/rejected": -140.29312133789062, | |
| "loss": 1.2493, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.166237235069275, | |
| "rewards/margins": 1.4496667385101318, | |
| "rewards/rejected": -2.615903854370117, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.612582781456953e-07, | |
| "logits/chosen": -2.471628189086914, | |
| "logits/rejected": -2.407003164291382, | |
| "logps/chosen": -106.4498291015625, | |
| "logps/rejected": -119.580078125, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -1.251348614692688, | |
| "rewards/margins": 1.0506912469863892, | |
| "rewards/rejected": -2.302039623260498, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.596026490066225e-07, | |
| "logits/chosen": -2.3577880859375, | |
| "logits/rejected": -2.3710594177246094, | |
| "logps/chosen": -109.6436996459961, | |
| "logps/rejected": -111.36781311035156, | |
| "loss": 0.6501, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.1875368356704712, | |
| "rewards/margins": 0.8005573153495789, | |
| "rewards/rejected": -1.9880939722061157, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.5794701986754965e-07, | |
| "logits/chosen": -2.3025927543640137, | |
| "logits/rejected": -2.412416934967041, | |
| "logps/chosen": -92.57754516601562, | |
| "logps/rejected": -125.9276123046875, | |
| "loss": 0.6228, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.1066559553146362, | |
| "rewards/margins": 0.5373567938804626, | |
| "rewards/rejected": -1.6440128087997437, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.562913907284768e-07, | |
| "logits/chosen": -2.315936326980591, | |
| "logits/rejected": -2.264455556869507, | |
| "logps/chosen": -111.17767333984375, | |
| "logps/rejected": -124.8282699584961, | |
| "loss": 0.5129, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9577595591545105, | |
| "rewards/margins": 1.1823832988739014, | |
| "rewards/rejected": -2.1401429176330566, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.54635761589404e-07, | |
| "logits/chosen": -2.4507012367248535, | |
| "logits/rejected": -2.402617931365967, | |
| "logps/chosen": -112.44432067871094, | |
| "logps/rejected": -117.4054946899414, | |
| "loss": 0.7353, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0404579639434814, | |
| "rewards/margins": 0.9835718870162964, | |
| "rewards/rejected": -2.0240299701690674, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.5298013245033113e-07, | |
| "logits/chosen": -2.410632610321045, | |
| "logits/rejected": -2.4103057384490967, | |
| "logps/chosen": -102.71327209472656, | |
| "logps/rejected": -118.40677642822266, | |
| "loss": 0.496, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.740358829498291, | |
| "rewards/margins": 1.1352561712265015, | |
| "rewards/rejected": -1.875615119934082, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.5132450331125827e-07, | |
| "logits/chosen": -2.2251460552215576, | |
| "logits/rejected": -2.2362751960754395, | |
| "logps/chosen": -106.41926574707031, | |
| "logps/rejected": -108.68377685546875, | |
| "loss": 0.9923, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0975613594055176, | |
| "rewards/margins": 1.4764889478683472, | |
| "rewards/rejected": -2.5740504264831543, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.496688741721854e-07, | |
| "logits/chosen": -2.2884135246276855, | |
| "logits/rejected": -2.3148610591888428, | |
| "logps/chosen": -95.29539489746094, | |
| "logps/rejected": -102.16908264160156, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0151549577713013, | |
| "rewards/margins": 0.7811011075973511, | |
| "rewards/rejected": -1.7962560653686523, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.4801324503311255e-07, | |
| "logits/chosen": -2.266324520111084, | |
| "logits/rejected": -2.1928133964538574, | |
| "logps/chosen": -94.09014892578125, | |
| "logps/rejected": -102.40970611572266, | |
| "loss": 0.525, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.572268009185791, | |
| "rewards/margins": 0.8905662298202515, | |
| "rewards/rejected": -1.462834358215332, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.463576158940397e-07, | |
| "logits/chosen": -2.2293038368225098, | |
| "logits/rejected": -2.1520204544067383, | |
| "logps/chosen": -119.3239974975586, | |
| "logps/rejected": -125.38603210449219, | |
| "loss": 0.7517, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.5861669182777405, | |
| "rewards/margins": 1.747180700302124, | |
| "rewards/rejected": -2.333347797393799, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.4470198675496683e-07, | |
| "logits/chosen": -2.3797130584716797, | |
| "logits/rejected": -2.3224523067474365, | |
| "logps/chosen": -103.2835922241211, | |
| "logps/rejected": -110.06852722167969, | |
| "loss": 0.6595, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6205762028694153, | |
| "rewards/margins": 0.7353760600090027, | |
| "rewards/rejected": -1.355952262878418, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.43046357615894e-07, | |
| "logits/chosen": -2.286005973815918, | |
| "logits/rejected": -2.243605375289917, | |
| "logps/chosen": -122.601806640625, | |
| "logps/rejected": -152.8876190185547, | |
| "loss": 0.4932, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.38620439171791077, | |
| "rewards/margins": 1.5684607028961182, | |
| "rewards/rejected": -1.9546654224395752, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.4139072847682116e-07, | |
| "logits/chosen": -2.2788243293762207, | |
| "logits/rejected": -2.3132455348968506, | |
| "logps/chosen": -109.7626724243164, | |
| "logps/rejected": -121.210693359375, | |
| "loss": 0.5107, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1420578509569168, | |
| "rewards/margins": 1.0154675245285034, | |
| "rewards/rejected": -1.1575253009796143, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.397350993377483e-07, | |
| "logits/chosen": -2.1815805435180664, | |
| "logits/rejected": -2.2088842391967773, | |
| "logps/chosen": -97.82100677490234, | |
| "logps/rejected": -110.3985595703125, | |
| "loss": 0.5536, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.8147605657577515, | |
| "rewards/margins": 1.0990091562271118, | |
| "rewards/rejected": -1.9137697219848633, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.380794701986755e-07, | |
| "logits/chosen": -2.1567564010620117, | |
| "logits/rejected": -2.213163375854492, | |
| "logps/chosen": -88.54952239990234, | |
| "logps/rejected": -115.46138000488281, | |
| "loss": 0.5308, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.8950099945068359, | |
| "rewards/margins": 0.986484169960022, | |
| "rewards/rejected": -1.881494164466858, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.3642384105960263e-07, | |
| "logits/chosen": -2.195145845413208, | |
| "logits/rejected": -2.1583914756774902, | |
| "logps/chosen": -89.95973205566406, | |
| "logps/rejected": -90.32757568359375, | |
| "loss": 0.5371, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1613867282867432, | |
| "rewards/margins": 1.2194865942001343, | |
| "rewards/rejected": -2.380873203277588, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.347682119205298e-07, | |
| "logits/chosen": -2.157541513442993, | |
| "logits/rejected": -2.0622384548187256, | |
| "logps/chosen": -122.03218078613281, | |
| "logps/rejected": -133.05084228515625, | |
| "loss": 0.4753, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.2253652811050415, | |
| "rewards/margins": 1.0726807117462158, | |
| "rewards/rejected": -2.2980456352233887, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.3311258278145697e-07, | |
| "logits/chosen": -2.2362678050994873, | |
| "logits/rejected": -2.2174267768859863, | |
| "logps/chosen": -104.6390151977539, | |
| "logps/rejected": -108.64559173583984, | |
| "loss": 0.5309, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9815710186958313, | |
| "rewards/margins": 0.686564564704895, | |
| "rewards/rejected": -1.6681352853775024, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.314569536423841e-07, | |
| "logits/chosen": -2.3051602840423584, | |
| "logits/rejected": -2.205004930496216, | |
| "logps/chosen": -112.1572494506836, | |
| "logps/rejected": -115.80439758300781, | |
| "loss": 3.3956, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1993415355682373, | |
| "rewards/margins": 0.9256394505500793, | |
| "rewards/rejected": -2.124980926513672, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.2980132450331125e-07, | |
| "logits/chosen": -2.1093502044677734, | |
| "logits/rejected": -2.1304099559783936, | |
| "logps/chosen": -101.59135437011719, | |
| "logps/rejected": -121.8282241821289, | |
| "loss": 0.6244, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -1.4122694730758667, | |
| "rewards/margins": 1.0248304605484009, | |
| "rewards/rejected": -2.4371001720428467, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.281456953642384e-07, | |
| "logits/chosen": -2.1925549507141113, | |
| "logits/rejected": -2.2341551780700684, | |
| "logps/chosen": -125.73774719238281, | |
| "logps/rejected": -137.68995666503906, | |
| "loss": 0.5342, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.3582961559295654, | |
| "rewards/margins": 1.1268060207366943, | |
| "rewards/rejected": -2.4851021766662598, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.2649006622516553e-07, | |
| "logits/chosen": -2.1826648712158203, | |
| "logits/rejected": -2.0866520404815674, | |
| "logps/chosen": -112.77205657958984, | |
| "logps/rejected": -135.73634338378906, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.9496241807937622, | |
| "rewards/margins": 2.8248558044433594, | |
| "rewards/rejected": -3.774479627609253, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.2483443708609267e-07, | |
| "logits/chosen": -2.2106716632843018, | |
| "logits/rejected": -2.2418696880340576, | |
| "logps/chosen": -100.58189392089844, | |
| "logps/rejected": -122.01805114746094, | |
| "loss": 0.4801, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.5530904531478882, | |
| "rewards/margins": 1.249182939529419, | |
| "rewards/rejected": -2.802273750305176, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.231788079470198e-07, | |
| "logits/chosen": -2.1691789627075195, | |
| "logits/rejected": -2.082367181777954, | |
| "logps/chosen": -100.97856903076172, | |
| "logps/rejected": -102.23161315917969, | |
| "loss": 0.5207, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.1863991022109985, | |
| "rewards/margins": 1.5210940837860107, | |
| "rewards/rejected": -2.707493305206299, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.21523178807947e-07, | |
| "logits/chosen": -2.321969985961914, | |
| "logits/rejected": -2.2945773601531982, | |
| "logps/chosen": -95.80015563964844, | |
| "logps/rejected": -103.98514556884766, | |
| "loss": 0.5769, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.6277263760566711, | |
| "rewards/margins": 1.0614575147628784, | |
| "rewards/rejected": -1.6891838312149048, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.1986754966887414e-07, | |
| "logits/chosen": -2.16201114654541, | |
| "logits/rejected": -2.100698471069336, | |
| "logps/chosen": -107.64762878417969, | |
| "logps/rejected": -114.20783996582031, | |
| "loss": 0.5842, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.0765411853790283, | |
| "rewards/margins": 0.9841095209121704, | |
| "rewards/rejected": -2.060650587081909, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.1821192052980133e-07, | |
| "logits/chosen": -2.15731143951416, | |
| "logits/rejected": -2.1200685501098633, | |
| "logps/chosen": -94.93736267089844, | |
| "logps/rejected": -108.20992279052734, | |
| "loss": 0.502, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5728567838668823, | |
| "rewards/margins": 1.2192682027816772, | |
| "rewards/rejected": -1.7921253442764282, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.165562913907285e-07, | |
| "logits/chosen": -2.228494644165039, | |
| "logits/rejected": -2.199162006378174, | |
| "logps/chosen": -119.44285583496094, | |
| "logps/rejected": -124.89945220947266, | |
| "loss": 0.5335, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.5043415427207947, | |
| "rewards/margins": 1.4671887159347534, | |
| "rewards/rejected": -1.9715303182601929, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.149006622516556e-07, | |
| "logits/chosen": -2.242833137512207, | |
| "logits/rejected": -2.193368673324585, | |
| "logps/chosen": -106.42388916015625, | |
| "logps/rejected": -115.7519302368164, | |
| "loss": 0.5458, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.2504803538322449, | |
| "rewards/margins": 1.4816687107086182, | |
| "rewards/rejected": -1.7321488857269287, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.1324503311258276e-07, | |
| "logits/chosen": -2.296274185180664, | |
| "logits/rejected": -2.233081340789795, | |
| "logps/chosen": -97.89036560058594, | |
| "logps/rejected": -118.38981628417969, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.856580376625061, | |
| "rewards/margins": 1.5226026773452759, | |
| "rewards/rejected": -2.379183053970337, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.1158940397350995e-07, | |
| "logits/chosen": -2.2974660396575928, | |
| "logits/rejected": -2.1640889644622803, | |
| "logps/chosen": -111.53731536865234, | |
| "logps/rejected": -109.1888656616211, | |
| "loss": 0.4891, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.9736050367355347, | |
| "rewards/margins": 1.2244486808776855, | |
| "rewards/rejected": -2.1980538368225098, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.099337748344371e-07, | |
| "logits/chosen": -2.130094289779663, | |
| "logits/rejected": -2.0237298011779785, | |
| "logps/chosen": -116.61064147949219, | |
| "logps/rejected": -123.98744201660156, | |
| "loss": 0.9585, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.352774143218994, | |
| "rewards/margins": 1.3028422594070435, | |
| "rewards/rejected": -3.6556167602539062, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.0827814569536423e-07, | |
| "logits/chosen": -2.1122946739196777, | |
| "logits/rejected": -2.1758933067321777, | |
| "logps/chosen": -92.36100769042969, | |
| "logps/rejected": -117.257080078125, | |
| "loss": 0.5243, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.1455414295196533, | |
| "rewards/margins": 1.1456917524337769, | |
| "rewards/rejected": -2.291233539581299, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.0662251655629137e-07, | |
| "logits/chosen": -2.1967172622680664, | |
| "logits/rejected": -2.163334369659424, | |
| "logps/chosen": -94.69267272949219, | |
| "logps/rejected": -106.30582427978516, | |
| "loss": 0.5839, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9426735043525696, | |
| "rewards/margins": 0.8261833190917969, | |
| "rewards/rejected": -1.7688567638397217, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.049668874172185e-07, | |
| "logits/chosen": -2.2559750080108643, | |
| "logits/rejected": -2.2480287551879883, | |
| "logps/chosen": -114.98677825927734, | |
| "logps/rejected": -118.00787353515625, | |
| "loss": 0.6499, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.6318261623382568, | |
| "rewards/margins": 0.813581645488739, | |
| "rewards/rejected": -2.4454076290130615, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.0331125827814565e-07, | |
| "logits/chosen": -2.277569055557251, | |
| "logits/rejected": -2.2428252696990967, | |
| "logps/chosen": -106.87760162353516, | |
| "logps/rejected": -107.15045166015625, | |
| "loss": 0.7337, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.2901384830474854, | |
| "rewards/margins": 0.8376191854476929, | |
| "rewards/rejected": -2.1277577877044678, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.016556291390728e-07, | |
| "logits/chosen": -2.2305266857147217, | |
| "logits/rejected": -2.2446939945220947, | |
| "logps/chosen": -115.1706314086914, | |
| "logps/rejected": -132.69129943847656, | |
| "loss": 0.5205, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4605052471160889, | |
| "rewards/margins": 1.3340156078338623, | |
| "rewards/rejected": -2.794520854949951, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -2.3378500938415527, | |
| "logits/rejected": -2.1980865001678467, | |
| "logps/chosen": -124.11688232421875, | |
| "logps/rejected": -121.2197494506836, | |
| "loss": 0.5762, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1835664510726929, | |
| "rewards/margins": 1.0963947772979736, | |
| "rewards/rejected": -2.279961109161377, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.983443708609271e-07, | |
| "logits/chosen": -2.2236156463623047, | |
| "logits/rejected": -2.2054903507232666, | |
| "logps/chosen": -122.0257568359375, | |
| "logps/rejected": -125.471923828125, | |
| "loss": 0.4677, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.3710222244262695, | |
| "rewards/margins": 1.238471269607544, | |
| "rewards/rejected": -2.6094937324523926, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.966887417218543e-07, | |
| "logits/chosen": -2.2760846614837646, | |
| "logits/rejected": -2.2383294105529785, | |
| "logps/chosen": -104.09146881103516, | |
| "logps/rejected": -120.87336730957031, | |
| "loss": 0.5848, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.4301923513412476, | |
| "rewards/margins": 1.626868486404419, | |
| "rewards/rejected": -3.0570602416992188, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.9503311258278146e-07, | |
| "logits/chosen": -2.304551839828491, | |
| "logits/rejected": -2.3333609104156494, | |
| "logps/chosen": -119.12831115722656, | |
| "logps/rejected": -128.80160522460938, | |
| "loss": 0.555, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.5498030185699463, | |
| "rewards/margins": 0.8728634715080261, | |
| "rewards/rejected": -3.422666072845459, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.933774834437086e-07, | |
| "logits/chosen": -2.2905325889587402, | |
| "logits/rejected": -2.175750255584717, | |
| "logps/chosen": -111.89952087402344, | |
| "logps/rejected": -112.72969055175781, | |
| "loss": 0.5745, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.818068742752075, | |
| "rewards/margins": 0.5174419283866882, | |
| "rewards/rejected": -3.335510730743408, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.9172185430463574e-07, | |
| "logits/chosen": -2.3529715538024902, | |
| "logits/rejected": -2.2983202934265137, | |
| "logps/chosen": -136.7278594970703, | |
| "logps/rejected": -129.16085815429688, | |
| "loss": 0.5891, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.4263627529144287, | |
| "rewards/margins": 0.8253445625305176, | |
| "rewards/rejected": -3.2517075538635254, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.9006622516556293e-07, | |
| "logits/chosen": -2.2374019622802734, | |
| "logits/rejected": -2.2284903526306152, | |
| "logps/chosen": -114.3366470336914, | |
| "logps/rejected": -110.65074157714844, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.3343544006347656, | |
| "rewards/margins": 1.0057871341705322, | |
| "rewards/rejected": -3.340141773223877, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_logits/chosen": -2.2622616291046143, | |
| "eval_logits/rejected": -2.215507745742798, | |
| "eval_logps/chosen": -119.37470245361328, | |
| "eval_logps/rejected": -125.0894546508789, | |
| "eval_loss": 0.5807133316993713, | |
| "eval_rewards/accuracies": 0.6830357313156128, | |
| "eval_rewards/chosen": -1.950809121131897, | |
| "eval_rewards/margins": 0.8533560633659363, | |
| "eval_rewards/rejected": -2.8041651248931885, | |
| "eval_runtime": 520.9457, | |
| "eval_samples_per_second": 3.428, | |
| "eval_steps_per_second": 0.107, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.8841059602649007e-07, | |
| "logits/chosen": -2.4846906661987305, | |
| "logits/rejected": -2.38966703414917, | |
| "logps/chosen": -122.82658386230469, | |
| "logps/rejected": -122.37986755371094, | |
| "loss": 0.5429, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.5582962036132812, | |
| "rewards/margins": 0.7036358118057251, | |
| "rewards/rejected": -2.261931896209717, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.867549668874172e-07, | |
| "logits/chosen": -2.413020133972168, | |
| "logits/rejected": -2.348389148712158, | |
| "logps/chosen": -146.7459716796875, | |
| "logps/rejected": -152.81591796875, | |
| "loss": 0.5503, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.9320647716522217, | |
| "rewards/margins": 1.321993112564087, | |
| "rewards/rejected": -3.2540581226348877, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.8509933774834435e-07, | |
| "logits/chosen": -2.335376262664795, | |
| "logits/rejected": -2.3727335929870605, | |
| "logps/chosen": -96.5339584350586, | |
| "logps/rejected": -102.97718811035156, | |
| "loss": 0.4738, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.347673773765564, | |
| "rewards/margins": 0.7935667037963867, | |
| "rewards/rejected": -2.141240358352661, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.834437086092715e-07, | |
| "logits/chosen": -2.474375009536743, | |
| "logits/rejected": -2.457411527633667, | |
| "logps/chosen": -100.12342071533203, | |
| "logps/rejected": -98.28324890136719, | |
| "loss": 0.5072, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.6612989902496338, | |
| "rewards/margins": 1.0141090154647827, | |
| "rewards/rejected": -1.675408124923706, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.8178807947019863e-07, | |
| "logits/chosen": -2.4102184772491455, | |
| "logits/rejected": -2.366565704345703, | |
| "logps/chosen": -94.41053771972656, | |
| "logps/rejected": -106.40338134765625, | |
| "loss": 0.4768, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1945335865020752, | |
| "rewards/margins": 1.7779722213745117, | |
| "rewards/rejected": -2.972505807876587, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.8013245033112577e-07, | |
| "logits/chosen": -2.324781656265259, | |
| "logits/rejected": -2.265265703201294, | |
| "logps/chosen": -113.0925064086914, | |
| "logps/rejected": -116.36458587646484, | |
| "loss": 0.5291, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6215614080429077, | |
| "rewards/margins": 0.970362663269043, | |
| "rewards/rejected": -1.5919239521026611, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.7847682119205296e-07, | |
| "logits/chosen": -2.4248404502868652, | |
| "logits/rejected": -2.3727540969848633, | |
| "logps/chosen": -112.99056243896484, | |
| "logps/rejected": -124.29933166503906, | |
| "loss": 1.2347, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.332558035850525, | |
| "rewards/margins": 1.7028182744979858, | |
| "rewards/rejected": -3.0353763103485107, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.7682119205298016e-07, | |
| "logits/chosen": -2.510585308074951, | |
| "logits/rejected": -2.4303643703460693, | |
| "logps/chosen": -122.59515380859375, | |
| "logps/rejected": -119.64692687988281, | |
| "loss": 0.5815, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.8100630044937134, | |
| "rewards/margins": 0.8500891923904419, | |
| "rewards/rejected": -2.660151958465576, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.751655629139073e-07, | |
| "logits/chosen": -2.48645281791687, | |
| "logits/rejected": -2.433279037475586, | |
| "logps/chosen": -131.58583068847656, | |
| "logps/rejected": -139.4903106689453, | |
| "loss": 0.4595, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.738173484802246, | |
| "rewards/margins": 0.9940687417984009, | |
| "rewards/rejected": -2.7322418689727783, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.7350993377483444e-07, | |
| "logits/chosen": -2.2750325202941895, | |
| "logits/rejected": -2.214141845703125, | |
| "logps/chosen": -92.43232727050781, | |
| "logps/rejected": -118.48176574707031, | |
| "loss": 0.4501, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -1.566083312034607, | |
| "rewards/margins": 1.913888931274414, | |
| "rewards/rejected": -3.4799721240997314, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.718543046357616e-07, | |
| "logits/chosen": -2.3589186668395996, | |
| "logits/rejected": -2.289020538330078, | |
| "logps/chosen": -116.14213562011719, | |
| "logps/rejected": -115.25, | |
| "loss": 0.5489, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.8134400844573975, | |
| "rewards/margins": 1.0903173685073853, | |
| "rewards/rejected": -2.9037575721740723, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.701986754966887e-07, | |
| "logits/chosen": -2.4153926372528076, | |
| "logits/rejected": -2.38564133644104, | |
| "logps/chosen": -198.99185180664062, | |
| "logps/rejected": -211.7269744873047, | |
| "loss": 0.4915, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -8.85645580291748, | |
| "rewards/margins": 1.3634490966796875, | |
| "rewards/rejected": -10.219904899597168, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.6854304635761586e-07, | |
| "logits/chosen": -2.3718574047088623, | |
| "logits/rejected": -2.323935031890869, | |
| "logps/chosen": -114.41487121582031, | |
| "logps/rejected": -115.03157806396484, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.747230887413025, | |
| "rewards/margins": 0.9782400131225586, | |
| "rewards/rejected": -2.725471019744873, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.6688741721854305e-07, | |
| "logits/chosen": -2.266796350479126, | |
| "logits/rejected": -2.279444456100464, | |
| "logps/chosen": -125.43962097167969, | |
| "logps/rejected": -138.60568237304688, | |
| "loss": 0.5559, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -1.7848097085952759, | |
| "rewards/margins": 0.5874557495117188, | |
| "rewards/rejected": -2.372265577316284, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.652317880794702e-07, | |
| "logits/chosen": -2.3460640907287598, | |
| "logits/rejected": -2.2017135620117188, | |
| "logps/chosen": -173.7471923828125, | |
| "logps/rejected": -175.39913940429688, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -8.320539474487305, | |
| "rewards/margins": 0.5577089190483093, | |
| "rewards/rejected": -8.87824821472168, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.6357615894039733e-07, | |
| "logits/chosen": -2.3931944370269775, | |
| "logits/rejected": -2.295135498046875, | |
| "logps/chosen": -117.7610855102539, | |
| "logps/rejected": -131.86878967285156, | |
| "loss": 0.5557, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.364652395248413, | |
| "rewards/margins": 0.9879738092422485, | |
| "rewards/rejected": -2.352626323699951, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.6192052980132447e-07, | |
| "logits/chosen": -2.3470005989074707, | |
| "logits/rejected": -2.314392328262329, | |
| "logps/chosen": -116.54869079589844, | |
| "logps/rejected": -121.33402252197266, | |
| "loss": 0.474, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.6547797918319702, | |
| "rewards/margins": 1.0188862085342407, | |
| "rewards/rejected": -2.6736655235290527, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.602649006622516e-07, | |
| "logits/chosen": -2.3665614128112793, | |
| "logits/rejected": -2.2760112285614014, | |
| "logps/chosen": -116.99346923828125, | |
| "logps/rejected": -194.17459106445312, | |
| "loss": 0.4616, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.1822216510772705, | |
| "rewards/margins": 7.195115089416504, | |
| "rewards/rejected": -8.377335548400879, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.5860927152317875e-07, | |
| "logits/chosen": -2.378209114074707, | |
| "logits/rejected": -2.3278615474700928, | |
| "logps/chosen": -119.82206726074219, | |
| "logps/rejected": -126.67927551269531, | |
| "loss": 0.5238, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.758419394493103, | |
| "rewards/margins": 1.255906343460083, | |
| "rewards/rejected": -3.0143258571624756, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.5695364238410594e-07, | |
| "logits/chosen": -2.376044750213623, | |
| "logits/rejected": -2.308166265487671, | |
| "logps/chosen": -113.3560562133789, | |
| "logps/rejected": -114.37657165527344, | |
| "loss": 0.5637, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.4774434566497803, | |
| "rewards/margins": 0.5793313384056091, | |
| "rewards/rejected": -2.056774616241455, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.5529801324503314e-07, | |
| "logits/chosen": -2.374824285507202, | |
| "logits/rejected": -2.3935980796813965, | |
| "logps/chosen": -108.83685302734375, | |
| "logps/rejected": -115.5961685180664, | |
| "loss": 0.5626, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.5476689338684082, | |
| "rewards/margins": 0.7745328545570374, | |
| "rewards/rejected": -2.322201728820801, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.536423841059603e-07, | |
| "logits/chosen": -2.339582681655884, | |
| "logits/rejected": -2.361855983734131, | |
| "logps/chosen": -121.9773941040039, | |
| "logps/rejected": -133.71356201171875, | |
| "loss": 0.5682, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.1847705841064453, | |
| "rewards/margins": 0.9190909266471863, | |
| "rewards/rejected": -3.1038613319396973, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.519867549668874e-07, | |
| "logits/chosen": -2.2750911712646484, | |
| "logits/rejected": -2.235349416732788, | |
| "logps/chosen": -83.35279846191406, | |
| "logps/rejected": -102.2212905883789, | |
| "loss": 0.5579, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.9985214471817017, | |
| "rewards/margins": 1.0258718729019165, | |
| "rewards/rejected": -3.024393081665039, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.5033112582781456e-07, | |
| "logits/chosen": -2.3950631618499756, | |
| "logits/rejected": -2.286043643951416, | |
| "logps/chosen": -112.0318603515625, | |
| "logps/rejected": -116.00035095214844, | |
| "loss": 0.9739, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5054577589035034, | |
| "rewards/margins": 0.963812530040741, | |
| "rewards/rejected": -2.4692704677581787, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.486754966887417e-07, | |
| "logits/chosen": -2.2539525032043457, | |
| "logits/rejected": -2.280163526535034, | |
| "logps/chosen": -90.12135314941406, | |
| "logps/rejected": -103.78663635253906, | |
| "loss": 0.5341, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.0227272510528564, | |
| "rewards/margins": 0.8646720051765442, | |
| "rewards/rejected": -1.8873993158340454, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.4701986754966884e-07, | |
| "logits/chosen": -2.3163156509399414, | |
| "logits/rejected": -2.324432849884033, | |
| "logps/chosen": -91.22362518310547, | |
| "logps/rejected": -103.0667953491211, | |
| "loss": 0.5978, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.4652012288570404, | |
| "rewards/margins": 1.1732677221298218, | |
| "rewards/rejected": -1.6384689807891846, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.4536423841059603e-07, | |
| "logits/chosen": -2.3076674938201904, | |
| "logits/rejected": -2.3175816535949707, | |
| "logps/chosen": -100.41036224365234, | |
| "logps/rejected": -117.35456848144531, | |
| "loss": 0.648, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2293812036514282, | |
| "rewards/margins": 0.8102920651435852, | |
| "rewards/rejected": -2.039673089981079, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.4370860927152317e-07, | |
| "logits/chosen": -2.3699378967285156, | |
| "logits/rejected": -2.3562657833099365, | |
| "logps/chosen": -119.7326889038086, | |
| "logps/rejected": -131.7585906982422, | |
| "loss": 0.4977, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.6823489665985107, | |
| "rewards/margins": 1.3262075185775757, | |
| "rewards/rejected": -2.008556604385376, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.420529801324503e-07, | |
| "logits/chosen": -2.1104941368103027, | |
| "logits/rejected": -2.0905330181121826, | |
| "logps/chosen": -171.0330810546875, | |
| "logps/rejected": -175.89886474609375, | |
| "loss": 0.9012, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -7.170141696929932, | |
| "rewards/margins": -0.5844208002090454, | |
| "rewards/rejected": -6.585721015930176, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.4039735099337745e-07, | |
| "logits/chosen": -2.3445873260498047, | |
| "logits/rejected": -2.2650883197784424, | |
| "logps/chosen": -130.222900390625, | |
| "logps/rejected": -132.6783447265625, | |
| "loss": 0.6484, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4724981784820557, | |
| "rewards/margins": 0.9613865613937378, | |
| "rewards/rejected": -2.433884859085083, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.387417218543046e-07, | |
| "logits/chosen": -2.457414150238037, | |
| "logits/rejected": -2.5287423133850098, | |
| "logps/chosen": -135.46902465820312, | |
| "logps/rejected": -159.89739990234375, | |
| "loss": 0.4958, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.7360942363739014, | |
| "rewards/margins": 1.1658858060836792, | |
| "rewards/rejected": -2.901979923248291, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.3708609271523173e-07, | |
| "logits/chosen": -2.221667766571045, | |
| "logits/rejected": -2.208982467651367, | |
| "logps/chosen": -106.48121643066406, | |
| "logps/rejected": -104.82066345214844, | |
| "loss": 0.5892, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.4603312015533447, | |
| "rewards/margins": 0.8812816739082336, | |
| "rewards/rejected": -2.3416128158569336, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.35430463576159e-07, | |
| "logits/chosen": -2.2299439907073975, | |
| "logits/rejected": -2.225663423538208, | |
| "logps/chosen": -94.22245025634766, | |
| "logps/rejected": -100.85789489746094, | |
| "loss": 0.5178, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.32827529311180115, | |
| "rewards/margins": 1.4746736288070679, | |
| "rewards/rejected": -1.8029489517211914, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.337748344370861e-07, | |
| "logits/chosen": -2.2380738258361816, | |
| "logits/rejected": -2.2997546195983887, | |
| "logps/chosen": -98.22574615478516, | |
| "logps/rejected": -112.7544937133789, | |
| "loss": 0.5318, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.4563646912574768, | |
| "rewards/margins": 0.6618258953094482, | |
| "rewards/rejected": -1.1181905269622803, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.3211920529801326e-07, | |
| "logits/chosen": -2.405059814453125, | |
| "logits/rejected": -2.429863452911377, | |
| "logps/chosen": -107.7689437866211, | |
| "logps/rejected": -119.28629302978516, | |
| "loss": 0.5111, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2023572474718094, | |
| "rewards/margins": 0.8103917241096497, | |
| "rewards/rejected": -1.0127489566802979, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.304635761589404e-07, | |
| "logits/chosen": -2.3326334953308105, | |
| "logits/rejected": -2.256371259689331, | |
| "logps/chosen": -111.0186767578125, | |
| "logps/rejected": -101.33964538574219, | |
| "loss": 0.5624, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.1809541881084442, | |
| "rewards/margins": 0.9960860013961792, | |
| "rewards/rejected": -1.177040457725525, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.2880794701986754e-07, | |
| "logits/chosen": -2.228715181350708, | |
| "logits/rejected": -2.2780203819274902, | |
| "logps/chosen": -82.96192932128906, | |
| "logps/rejected": -106.3135757446289, | |
| "loss": 0.5477, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.4178234040737152, | |
| "rewards/margins": 1.3403428792953491, | |
| "rewards/rejected": -1.7581663131713867, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.271523178807947e-07, | |
| "logits/chosen": -2.3673007488250732, | |
| "logits/rejected": -2.361161947250366, | |
| "logps/chosen": -110.33650970458984, | |
| "logps/rejected": -118.94313049316406, | |
| "loss": 0.6233, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.7891864776611328, | |
| "rewards/margins": 0.4793139100074768, | |
| "rewards/rejected": -1.2685004472732544, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.254966887417218e-07, | |
| "logits/chosen": -2.270993947982788, | |
| "logits/rejected": -2.3422646522521973, | |
| "logps/chosen": -110.1440200805664, | |
| "logps/rejected": -123.39158630371094, | |
| "loss": 0.5202, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0290793180465698, | |
| "rewards/margins": 0.8790245056152344, | |
| "rewards/rejected": -1.9081039428710938, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.23841059602649e-07, | |
| "logits/chosen": -2.3637521266937256, | |
| "logits/rejected": -2.3246121406555176, | |
| "logps/chosen": -123.53662109375, | |
| "logps/rejected": -130.99119567871094, | |
| "loss": 0.4855, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.9286755323410034, | |
| "rewards/margins": 1.2442817687988281, | |
| "rewards/rejected": -2.172957181930542, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.2218543046357615e-07, | |
| "logits/chosen": -2.1791653633117676, | |
| "logits/rejected": -2.18937349319458, | |
| "logps/chosen": -166.4168243408203, | |
| "logps/rejected": -188.746826171875, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -5.239639759063721, | |
| "rewards/margins": 1.5334604978561401, | |
| "rewards/rejected": -6.77310037612915, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.205298013245033e-07, | |
| "logits/chosen": -2.234860897064209, | |
| "logits/rejected": -2.235252857208252, | |
| "logps/chosen": -131.88731384277344, | |
| "logps/rejected": -147.35777282714844, | |
| "loss": 0.6159, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.8779850006103516, | |
| "rewards/margins": 1.1691521406173706, | |
| "rewards/rejected": -4.0471367835998535, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.1887417218543043e-07, | |
| "logits/chosen": -2.3836143016815186, | |
| "logits/rejected": -2.35886287689209, | |
| "logps/chosen": -122.30987548828125, | |
| "logps/rejected": -125.3285903930664, | |
| "loss": 0.536, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.5127007961273193, | |
| "rewards/margins": 1.0224969387054443, | |
| "rewards/rejected": -3.5351977348327637, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.1721854304635757e-07, | |
| "logits/chosen": -2.280726194381714, | |
| "logits/rejected": -2.2161917686462402, | |
| "logps/chosen": -118.85569763183594, | |
| "logps/rejected": -122.44981384277344, | |
| "loss": 0.5343, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.2102978229522705, | |
| "rewards/margins": 1.2170623540878296, | |
| "rewards/rejected": -3.4273605346679688, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.155629139072847e-07, | |
| "logits/chosen": -2.445349931716919, | |
| "logits/rejected": -2.4110920429229736, | |
| "logps/chosen": -110.351806640625, | |
| "logps/rejected": -118.65000915527344, | |
| "loss": 0.5877, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.997372031211853, | |
| "rewards/margins": 0.7109335660934448, | |
| "rewards/rejected": -2.708305835723877, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.1390728476821196e-07, | |
| "logits/chosen": -2.3366105556488037, | |
| "logits/rejected": -2.3466391563415527, | |
| "logps/chosen": -127.3154067993164, | |
| "logps/rejected": -124.52156829833984, | |
| "loss": 0.5383, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.18947172164917, | |
| "rewards/margins": 0.6396933794021606, | |
| "rewards/rejected": -2.829165458679199, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.122516556291391e-07, | |
| "logits/chosen": -2.260577917098999, | |
| "logits/rejected": -2.2590389251708984, | |
| "logps/chosen": -113.4861831665039, | |
| "logps/rejected": -108.08863830566406, | |
| "loss": 0.6422, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.5557265281677246, | |
| "rewards/margins": 0.4942797124385834, | |
| "rewards/rejected": -3.050006151199341, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.1059602649006624e-07, | |
| "logits/chosen": -2.3765158653259277, | |
| "logits/rejected": -2.3215491771698, | |
| "logps/chosen": -123.18157958984375, | |
| "logps/rejected": -119.86373138427734, | |
| "loss": 0.4739, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.5957121849060059, | |
| "rewards/margins": 0.9963384866714478, | |
| "rewards/rejected": -2.592050552368164, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.089403973509934e-07, | |
| "logits/chosen": -2.3044986724853516, | |
| "logits/rejected": -2.3184516429901123, | |
| "logps/chosen": -120.02888488769531, | |
| "logps/rejected": -125.82350158691406, | |
| "loss": 0.5382, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5062299966812134, | |
| "rewards/margins": 1.3234798908233643, | |
| "rewards/rejected": -2.829709529876709, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.072847682119205e-07, | |
| "logits/chosen": -2.315985918045044, | |
| "logits/rejected": -2.262968063354492, | |
| "logps/chosen": -114.96397399902344, | |
| "logps/rejected": -129.48318481445312, | |
| "loss": 0.5984, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5051807165145874, | |
| "rewards/margins": 1.5209523439407349, | |
| "rewards/rejected": -3.0261335372924805, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_logits/chosen": -2.1823971271514893, | |
| "eval_logits/rejected": -2.1383469104766846, | |
| "eval_logps/chosen": -114.31800079345703, | |
| "eval_logps/rejected": -123.8126220703125, | |
| "eval_loss": 0.5244069695472717, | |
| "eval_rewards/accuracies": 0.71875, | |
| "eval_rewards/chosen": -1.4451391696929932, | |
| "eval_rewards/margins": 1.2313430309295654, | |
| "eval_rewards/rejected": -2.6764819622039795, | |
| "eval_runtime": 522.8803, | |
| "eval_samples_per_second": 3.416, | |
| "eval_steps_per_second": 0.107, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.0562913907284766e-07, | |
| "logits/chosen": -2.313927173614502, | |
| "logits/rejected": -2.33535099029541, | |
| "logps/chosen": -118.8395767211914, | |
| "logps/rejected": -128.5199737548828, | |
| "loss": 0.5877, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.8590469360351562, | |
| "rewards/margins": 0.7332299947738647, | |
| "rewards/rejected": -2.5922768115997314, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.039735099337748e-07, | |
| "logits/chosen": -2.3866069316864014, | |
| "logits/rejected": -2.3465638160705566, | |
| "logps/chosen": -120.46064758300781, | |
| "logps/rejected": -116.50871276855469, | |
| "loss": 0.8307, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.8921232223510742, | |
| "rewards/margins": 0.763845682144165, | |
| "rewards/rejected": -2.6559691429138184, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.02317880794702e-07, | |
| "logits/chosen": -2.421853542327881, | |
| "logits/rejected": -2.318270206451416, | |
| "logps/chosen": -137.11500549316406, | |
| "logps/rejected": -134.6293487548828, | |
| "loss": 0.5303, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.621045708656311, | |
| "rewards/margins": 0.8223884701728821, | |
| "rewards/rejected": -2.443434238433838, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.0066225165562913e-07, | |
| "logits/chosen": -2.35496187210083, | |
| "logits/rejected": -2.2371764183044434, | |
| "logps/chosen": -111.7890853881836, | |
| "logps/rejected": -106.77983093261719, | |
| "loss": 0.5696, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0801293849945068, | |
| "rewards/margins": 1.046350121498108, | |
| "rewards/rejected": -2.1264796257019043, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.9900662251655627e-07, | |
| "logits/chosen": -2.329745054244995, | |
| "logits/rejected": -2.2365243434906006, | |
| "logps/chosen": -112.671875, | |
| "logps/rejected": -102.41822814941406, | |
| "loss": 0.6113, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.2923063039779663, | |
| "rewards/margins": 1.320711612701416, | |
| "rewards/rejected": -2.613018035888672, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.973509933774834e-07, | |
| "logits/chosen": -2.2645044326782227, | |
| "logits/rejected": -2.1956381797790527, | |
| "logps/chosen": -104.55106353759766, | |
| "logps/rejected": -106.2977294921875, | |
| "loss": 0.7244, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.3651814460754395, | |
| "rewards/margins": 0.206166073679924, | |
| "rewards/rejected": -1.5713475942611694, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.9569536423841055e-07, | |
| "logits/chosen": -2.1943066120147705, | |
| "logits/rejected": -2.24649977684021, | |
| "logps/chosen": -89.5943374633789, | |
| "logps/rejected": -106.08259582519531, | |
| "loss": 0.5679, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.288975477218628, | |
| "rewards/margins": 0.5116127133369446, | |
| "rewards/rejected": -1.8005882501602173, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.940397350993377e-07, | |
| "logits/chosen": -2.607445240020752, | |
| "logits/rejected": -2.4970269203186035, | |
| "logps/chosen": -146.52468872070312, | |
| "logps/rejected": -140.1497344970703, | |
| "loss": 0.5597, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5630671977996826, | |
| "rewards/margins": 0.6673210263252258, | |
| "rewards/rejected": -2.2303881645202637, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.9238410596026494e-07, | |
| "logits/chosen": -2.238361358642578, | |
| "logits/rejected": -2.1506645679473877, | |
| "logps/chosen": -99.36707305908203, | |
| "logps/rejected": -105.86885070800781, | |
| "loss": 0.5912, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.7212435007095337, | |
| "rewards/margins": 1.0883468389511108, | |
| "rewards/rejected": -2.8095905780792236, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.907284768211921e-07, | |
| "logits/chosen": -2.3251490592956543, | |
| "logits/rejected": -2.307288408279419, | |
| "logps/chosen": -115.70127868652344, | |
| "logps/rejected": -124.7564926147461, | |
| "loss": 0.6455, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.711801290512085, | |
| "rewards/margins": 1.0508122444152832, | |
| "rewards/rejected": -2.7626137733459473, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.890728476821192e-07, | |
| "logits/chosen": -2.2478513717651367, | |
| "logits/rejected": -2.2882168292999268, | |
| "logps/chosen": -104.28340911865234, | |
| "logps/rejected": -121.9701156616211, | |
| "loss": 0.5165, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.887414574623108, | |
| "rewards/margins": 0.925916850566864, | |
| "rewards/rejected": -2.8133316040039062, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.8741721854304636e-07, | |
| "logits/chosen": -2.296663999557495, | |
| "logits/rejected": -2.3100745677948, | |
| "logps/chosen": -158.80392456054688, | |
| "logps/rejected": -124.94380950927734, | |
| "loss": 1.3257, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -4.360524654388428, | |
| "rewards/margins": -2.369443655014038, | |
| "rewards/rejected": -1.9910815954208374, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.857615894039735e-07, | |
| "logits/chosen": -2.2816338539123535, | |
| "logits/rejected": -2.2055506706237793, | |
| "logps/chosen": -116.35295104980469, | |
| "logps/rejected": -125.78230285644531, | |
| "loss": 0.4623, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.8872613906860352, | |
| "rewards/margins": 1.402567982673645, | |
| "rewards/rejected": -3.2898292541503906, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.8410596026490064e-07, | |
| "logits/chosen": -2.3136837482452393, | |
| "logits/rejected": -2.327634334564209, | |
| "logps/chosen": -118.3811264038086, | |
| "logps/rejected": -131.34634399414062, | |
| "loss": 0.4969, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.959405243396759, | |
| "rewards/margins": 1.3146027326583862, | |
| "rewards/rejected": -2.27400803565979, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.824503311258278e-07, | |
| "logits/chosen": -2.2207038402557373, | |
| "logits/rejected": -2.2655978202819824, | |
| "logps/chosen": -122.19026184082031, | |
| "logps/rejected": -130.85289001464844, | |
| "loss": 0.5344, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.631255865097046, | |
| "rewards/margins": 1.0431182384490967, | |
| "rewards/rejected": -2.6743741035461426, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.8079470198675497e-07, | |
| "logits/chosen": -2.3095381259918213, | |
| "logits/rejected": -2.2248189449310303, | |
| "logps/chosen": -103.5934066772461, | |
| "logps/rejected": -116.8990249633789, | |
| "loss": 0.5347, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.2814357280731201, | |
| "rewards/margins": 1.4714066982269287, | |
| "rewards/rejected": -2.752842426300049, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.791390728476821e-07, | |
| "logits/chosen": -2.2992262840270996, | |
| "logits/rejected": -2.3474018573760986, | |
| "logps/chosen": -140.76292419433594, | |
| "logps/rejected": -168.6060333251953, | |
| "loss": 0.5155, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.161952018737793, | |
| "rewards/margins": 1.1705152988433838, | |
| "rewards/rejected": -3.3324673175811768, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.7748344370860925e-07, | |
| "logits/chosen": -2.1538851261138916, | |
| "logits/rejected": -2.1492209434509277, | |
| "logps/chosen": -85.61529541015625, | |
| "logps/rejected": -105.35960388183594, | |
| "loss": 0.4547, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.2468538284301758, | |
| "rewards/margins": 1.524840235710144, | |
| "rewards/rejected": -2.7716941833496094, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.758278145695364e-07, | |
| "logits/chosen": -2.220313549041748, | |
| "logits/rejected": -2.270676612854004, | |
| "logps/chosen": -116.72190856933594, | |
| "logps/rejected": -132.87937927246094, | |
| "loss": 0.4907, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.662656545639038, | |
| "rewards/margins": 1.4583295583724976, | |
| "rewards/rejected": -3.1209864616394043, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.7417218543046353e-07, | |
| "logits/chosen": -2.3201870918273926, | |
| "logits/rejected": -2.287921667098999, | |
| "logps/chosen": -119.7146987915039, | |
| "logps/rejected": -135.27894592285156, | |
| "loss": 0.4492, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.569067358970642, | |
| "rewards/margins": 1.5324440002441406, | |
| "rewards/rejected": -3.1015114784240723, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.725165562913907e-07, | |
| "logits/chosen": -2.2404065132141113, | |
| "logits/rejected": -2.2358851432800293, | |
| "logps/chosen": -134.36831665039062, | |
| "logps/rejected": -137.21890258789062, | |
| "loss": 0.5334, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.740677833557129, | |
| "rewards/margins": 1.5694725513458252, | |
| "rewards/rejected": -3.310150623321533, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.7086092715231786e-07, | |
| "logits/chosen": -2.3186452388763428, | |
| "logits/rejected": -2.2739059925079346, | |
| "logps/chosen": -118.93257141113281, | |
| "logps/rejected": -117.28021240234375, | |
| "loss": 0.5242, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.951216459274292, | |
| "rewards/margins": 1.0923653841018677, | |
| "rewards/rejected": -3.04358172416687, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6920529801324506e-07, | |
| "logits/chosen": -2.230313777923584, | |
| "logits/rejected": -2.269009828567505, | |
| "logps/chosen": -152.952880859375, | |
| "logps/rejected": -145.59732055664062, | |
| "loss": 0.8317, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.0952677726745605, | |
| "rewards/margins": -0.3405976891517639, | |
| "rewards/rejected": -3.7546706199645996, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.675496688741722e-07, | |
| "logits/chosen": -2.276404857635498, | |
| "logits/rejected": -2.223013162612915, | |
| "logps/chosen": -126.49522399902344, | |
| "logps/rejected": -123.188720703125, | |
| "loss": 0.503, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.3149405717849731, | |
| "rewards/margins": 0.8885973691940308, | |
| "rewards/rejected": -2.203538179397583, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6589403973509934e-07, | |
| "logits/chosen": -2.1905181407928467, | |
| "logits/rejected": -2.179508924484253, | |
| "logps/chosen": -105.01606750488281, | |
| "logps/rejected": -133.09783935546875, | |
| "loss": 0.5413, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.6002556085586548, | |
| "rewards/margins": 2.0444486141204834, | |
| "rewards/rejected": -2.6447041034698486, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.642384105960265e-07, | |
| "logits/chosen": -2.2174530029296875, | |
| "logits/rejected": -2.250398635864258, | |
| "logps/chosen": -101.74955749511719, | |
| "logps/rejected": -133.28713989257812, | |
| "loss": 0.5109, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.5314977765083313, | |
| "rewards/margins": 1.0308630466461182, | |
| "rewards/rejected": -1.5623606443405151, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.625827814569536e-07, | |
| "logits/chosen": -2.2077157497406006, | |
| "logits/rejected": -2.1879453659057617, | |
| "logps/chosen": -106.83088684082031, | |
| "logps/rejected": -117.6878662109375, | |
| "loss": 0.5311, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.922218918800354, | |
| "rewards/margins": 1.4254719018936157, | |
| "rewards/rejected": -3.3476905822753906, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6092715231788076e-07, | |
| "logits/chosen": -2.236419677734375, | |
| "logits/rejected": -2.1945042610168457, | |
| "logps/chosen": -112.48036193847656, | |
| "logps/rejected": -125.71522521972656, | |
| "loss": 0.8311, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.3717401027679443, | |
| "rewards/margins": 1.444071888923645, | |
| "rewards/rejected": -2.8158118724823, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5927152317880795e-07, | |
| "logits/chosen": -2.3207204341888428, | |
| "logits/rejected": -2.2542405128479004, | |
| "logps/chosen": -112.21119689941406, | |
| "logps/rejected": -117.85597229003906, | |
| "loss": 0.5143, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.3228784799575806, | |
| "rewards/margins": 0.836434543132782, | |
| "rewards/rejected": -2.159313201904297, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.576158940397351e-07, | |
| "logits/chosen": -2.1415367126464844, | |
| "logits/rejected": -2.173337936401367, | |
| "logps/chosen": -111.6198501586914, | |
| "logps/rejected": -131.82977294921875, | |
| "loss": 0.5845, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.362797498703003, | |
| "rewards/margins": 0.8809803128242493, | |
| "rewards/rejected": -2.2437777519226074, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.5596026490066223e-07, | |
| "logits/chosen": -2.169029951095581, | |
| "logits/rejected": -2.145346164703369, | |
| "logps/chosen": -102.84260559082031, | |
| "logps/rejected": -115.5724105834961, | |
| "loss": 0.778, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -1.428421974182129, | |
| "rewards/margins": 0.6596145629882812, | |
| "rewards/rejected": -2.08803653717041, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.5430463576158937e-07, | |
| "logits/chosen": -2.3364787101745605, | |
| "logits/rejected": -2.2011687755584717, | |
| "logps/chosen": -109.7977066040039, | |
| "logps/rejected": -112.79130554199219, | |
| "loss": 0.4779, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1568695306777954, | |
| "rewards/margins": 1.5236353874206543, | |
| "rewards/rejected": -2.6805050373077393, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.526490066225165e-07, | |
| "logits/chosen": -2.270590305328369, | |
| "logits/rejected": -2.317115306854248, | |
| "logps/chosen": -131.15716552734375, | |
| "logps/rejected": -127.9232406616211, | |
| "loss": 0.5718, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.5472099781036377, | |
| "rewards/margins": 0.4068627953529358, | |
| "rewards/rejected": -1.9540729522705078, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.509933774834437e-07, | |
| "logits/chosen": -2.258516550064087, | |
| "logits/rejected": -2.215508222579956, | |
| "logps/chosen": -116.99686431884766, | |
| "logps/rejected": -119.55728912353516, | |
| "loss": 0.5443, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1407909393310547, | |
| "rewards/margins": 1.8011624813079834, | |
| "rewards/rejected": -2.941953659057617, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4933774834437084e-07, | |
| "logits/chosen": -2.301156997680664, | |
| "logits/rejected": -2.282895565032959, | |
| "logps/chosen": -134.67526245117188, | |
| "logps/rejected": -115.00638580322266, | |
| "loss": 0.9478, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.1763339042663574, | |
| "rewards/margins": -0.813465416431427, | |
| "rewards/rejected": -2.362868309020996, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.47682119205298e-07, | |
| "logits/chosen": -2.1597695350646973, | |
| "logits/rejected": -2.2584662437438965, | |
| "logps/chosen": -90.55691528320312, | |
| "logps/rejected": -123.76590728759766, | |
| "loss": 0.408, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.2247810363769531, | |
| "rewards/margins": 1.5576727390289307, | |
| "rewards/rejected": -2.782454013824463, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.460264900662252e-07, | |
| "logits/chosen": -2.3151791095733643, | |
| "logits/rejected": -2.3073203563690186, | |
| "logps/chosen": -110.79292297363281, | |
| "logps/rejected": -124.72319030761719, | |
| "loss": 0.4559, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4885177612304688, | |
| "rewards/margins": 0.9933377504348755, | |
| "rewards/rejected": -2.481855630874634, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.443708609271523e-07, | |
| "logits/chosen": -2.3785767555236816, | |
| "logits/rejected": -2.32625150680542, | |
| "logps/chosen": -135.62266540527344, | |
| "logps/rejected": -147.58059692382812, | |
| "loss": 0.5509, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.6126123666763306, | |
| "rewards/margins": 1.325919508934021, | |
| "rewards/rejected": -2.9385318756103516, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.4271523178807946e-07, | |
| "logits/chosen": -2.067755937576294, | |
| "logits/rejected": -2.157957077026367, | |
| "logps/chosen": -100.20467376708984, | |
| "logps/rejected": -139.4010009765625, | |
| "loss": 0.4928, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.7971267700195312, | |
| "rewards/margins": 2.0710580348968506, | |
| "rewards/rejected": -3.8681845664978027, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.410596026490066e-07, | |
| "logits/chosen": -2.26438307762146, | |
| "logits/rejected": -2.1824748516082764, | |
| "logps/chosen": -107.44325256347656, | |
| "logps/rejected": -120.7808609008789, | |
| "loss": 0.5088, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.0230767726898193, | |
| "rewards/margins": 1.2027556896209717, | |
| "rewards/rejected": -2.22583270072937, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.394039735099338e-07, | |
| "logits/chosen": -2.202819585800171, | |
| "logits/rejected": -2.1675939559936523, | |
| "logps/chosen": -118.07059478759766, | |
| "logps/rejected": -127.72599792480469, | |
| "loss": 0.6344, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.6664397716522217, | |
| "rewards/margins": 0.9765909314155579, | |
| "rewards/rejected": -2.6430306434631348, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.377483443708609e-07, | |
| "logits/chosen": -2.2761118412017822, | |
| "logits/rejected": -2.2829902172088623, | |
| "logps/chosen": -125.0960464477539, | |
| "logps/rejected": -151.57437133789062, | |
| "loss": 0.4561, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.6365854740142822, | |
| "rewards/margins": 1.4987179040908813, | |
| "rewards/rejected": -3.135303497314453, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.3609271523178807e-07, | |
| "logits/chosen": -2.317108154296875, | |
| "logits/rejected": -2.367867946624756, | |
| "logps/chosen": -110.93936920166016, | |
| "logps/rejected": -124.30006408691406, | |
| "loss": 0.4998, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5488382577896118, | |
| "rewards/margins": 1.2520344257354736, | |
| "rewards/rejected": -2.800872564315796, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.3443708609271524e-07, | |
| "logits/chosen": -2.283686399459839, | |
| "logits/rejected": -2.2000420093536377, | |
| "logps/chosen": -99.95622253417969, | |
| "logps/rejected": -107.75992584228516, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1963920593261719, | |
| "rewards/margins": 1.1246505975723267, | |
| "rewards/rejected": -2.321042537689209, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.3278145695364238e-07, | |
| "logits/chosen": -2.3099443912506104, | |
| "logits/rejected": -2.293797016143799, | |
| "logps/chosen": -143.53994750976562, | |
| "logps/rejected": -156.98973083496094, | |
| "loss": 0.5597, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4224615097045898, | |
| "rewards/margins": 2.5084261894226074, | |
| "rewards/rejected": -3.9308879375457764, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.3112582781456952e-07, | |
| "logits/chosen": -2.2922520637512207, | |
| "logits/rejected": -2.3070366382598877, | |
| "logps/chosen": -105.562744140625, | |
| "logps/rejected": -115.12934875488281, | |
| "loss": 0.4832, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.130995512008667, | |
| "rewards/margins": 1.4186890125274658, | |
| "rewards/rejected": -2.549685001373291, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.2947019867549669e-07, | |
| "logits/chosen": -2.32692289352417, | |
| "logits/rejected": -2.284797191619873, | |
| "logps/chosen": -110.09574127197266, | |
| "logps/rejected": -141.3235321044922, | |
| "loss": 0.5419, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.62274169921875, | |
| "rewards/margins": 2.5739684104919434, | |
| "rewards/rejected": -4.196709632873535, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.2781456953642383e-07, | |
| "logits/chosen": -2.2572226524353027, | |
| "logits/rejected": -2.3021931648254395, | |
| "logps/chosen": -103.7448501586914, | |
| "logps/rejected": -124.720947265625, | |
| "loss": 0.5929, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.1002720594406128, | |
| "rewards/margins": 1.6038618087768555, | |
| "rewards/rejected": -2.704134225845337, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.2615894039735097e-07, | |
| "logits/chosen": -2.1988630294799805, | |
| "logits/rejected": -2.1840052604675293, | |
| "logps/chosen": -97.76619720458984, | |
| "logps/rejected": -114.77903747558594, | |
| "loss": 0.536, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.6038196086883545, | |
| "rewards/margins": 1.3313450813293457, | |
| "rewards/rejected": -2.9351646900177, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.2450331125827813e-07, | |
| "logits/chosen": -2.2937569618225098, | |
| "logits/rejected": -2.1716551780700684, | |
| "logps/chosen": -122.71870422363281, | |
| "logps/rejected": -126.25750732421875, | |
| "loss": 0.5508, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.7580636739730835, | |
| "rewards/margins": 1.4850653409957886, | |
| "rewards/rejected": -3.243128538131714, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_logits/chosen": -2.1208481788635254, | |
| "eval_logits/rejected": -2.0760180950164795, | |
| "eval_logps/chosen": -117.771728515625, | |
| "eval_logps/rejected": -125.91642761230469, | |
| "eval_loss": 0.5643959641456604, | |
| "eval_rewards/accuracies": 0.6785714030265808, | |
| "eval_rewards/chosen": -1.7905113697052002, | |
| "eval_rewards/margins": 1.0963507890701294, | |
| "eval_rewards/rejected": -2.886862277984619, | |
| "eval_runtime": 519.2917, | |
| "eval_samples_per_second": 3.439, | |
| "eval_steps_per_second": 0.108, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.228476821192053e-07, | |
| "logits/chosen": -2.2932658195495605, | |
| "logits/rejected": -2.2196624279022217, | |
| "logps/chosen": -121.21055603027344, | |
| "logps/rejected": -114.63111877441406, | |
| "loss": 0.7063, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.775307059288025, | |
| "rewards/margins": 0.6449312567710876, | |
| "rewards/rejected": -2.420238494873047, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.2119205298013244e-07, | |
| "logits/chosen": -2.137760877609253, | |
| "logits/rejected": -2.1844496726989746, | |
| "logps/chosen": -138.58255004882812, | |
| "logps/rejected": -127.23612213134766, | |
| "loss": 0.5234, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.9617927074432373, | |
| "rewards/margins": 0.6043619513511658, | |
| "rewards/rejected": -2.5661544799804688, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.1953642384105958e-07, | |
| "logits/chosen": -2.2938625812530518, | |
| "logits/rejected": -2.268752336502075, | |
| "logps/chosen": -113.85087585449219, | |
| "logps/rejected": -149.66539001464844, | |
| "loss": 0.5987, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5664665699005127, | |
| "rewards/margins": 0.8736650347709656, | |
| "rewards/rejected": -2.440131664276123, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.1788079470198675e-07, | |
| "logits/chosen": -2.3219776153564453, | |
| "logits/rejected": -2.350645065307617, | |
| "logps/chosen": -94.68901824951172, | |
| "logps/rejected": -106.27494812011719, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4543777704238892, | |
| "rewards/margins": 0.8551589846611023, | |
| "rewards/rejected": -2.309536933898926, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.1622516556291389e-07, | |
| "logits/chosen": -2.2941012382507324, | |
| "logits/rejected": -2.2624030113220215, | |
| "logps/chosen": -125.8183822631836, | |
| "logps/rejected": -133.0880584716797, | |
| "loss": 0.5175, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.4116047620773315, | |
| "rewards/margins": 1.2253597974777222, | |
| "rewards/rejected": -2.6369645595550537, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.1456953642384105e-07, | |
| "logits/chosen": -2.176222562789917, | |
| "logits/rejected": -2.0717849731445312, | |
| "logps/chosen": -104.74139404296875, | |
| "logps/rejected": -124.4080581665039, | |
| "loss": 0.4433, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.621763825416565, | |
| "rewards/margins": 1.3861135244369507, | |
| "rewards/rejected": -3.0078773498535156, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.1291390728476822e-07, | |
| "logits/chosen": -2.244816541671753, | |
| "logits/rejected": -2.217611074447632, | |
| "logps/chosen": -126.18927001953125, | |
| "logps/rejected": -129.03915405273438, | |
| "loss": 0.559, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.9465411901474, | |
| "rewards/margins": 0.7713083028793335, | |
| "rewards/rejected": -2.7178492546081543, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.1125827814569536e-07, | |
| "logits/chosen": -2.331676721572876, | |
| "logits/rejected": -2.249488353729248, | |
| "logps/chosen": -115.93875885009766, | |
| "logps/rejected": -122.89765930175781, | |
| "loss": 0.5416, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.3803373575210571, | |
| "rewards/margins": 0.6847006678581238, | |
| "rewards/rejected": -2.065037965774536, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.096026490066225e-07, | |
| "logits/chosen": -2.2945587635040283, | |
| "logits/rejected": -2.2709438800811768, | |
| "logps/chosen": -102.5355453491211, | |
| "logps/rejected": -105.96309661865234, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.1539552211761475, | |
| "rewards/margins": 1.233269453048706, | |
| "rewards/rejected": -2.3872246742248535, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.0794701986754967e-07, | |
| "logits/chosen": -2.260633945465088, | |
| "logits/rejected": -2.257582187652588, | |
| "logps/chosen": -123.7904052734375, | |
| "logps/rejected": -139.2223663330078, | |
| "loss": 0.489, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.6961309909820557, | |
| "rewards/margins": 1.7095321416854858, | |
| "rewards/rejected": -3.405663251876831, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.062913907284768e-07, | |
| "logits/chosen": -2.4550869464874268, | |
| "logits/rejected": -2.369741439819336, | |
| "logps/chosen": -110.0873031616211, | |
| "logps/rejected": -110.52005767822266, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -1.5832288265228271, | |
| "rewards/margins": 0.5315386652946472, | |
| "rewards/rejected": -2.114767551422119, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.0463576158940397e-07, | |
| "logits/chosen": -2.1035804748535156, | |
| "logits/rejected": -2.0870535373687744, | |
| "logps/chosen": -108.03116607666016, | |
| "logps/rejected": -193.72422790527344, | |
| "loss": 0.4535, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.8296096324920654, | |
| "rewards/margins": 8.629142761230469, | |
| "rewards/rejected": -10.45875072479248, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.029801324503311e-07, | |
| "logits/chosen": -2.3085687160491943, | |
| "logits/rejected": -2.3340985774993896, | |
| "logps/chosen": -117.62290954589844, | |
| "logps/rejected": -126.44264221191406, | |
| "loss": 2.8703, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.7038648128509521, | |
| "rewards/margins": 1.1059377193450928, | |
| "rewards/rejected": -2.809802770614624, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.0132450331125828e-07, | |
| "logits/chosen": -2.4316937923431396, | |
| "logits/rejected": -2.3887412548065186, | |
| "logps/chosen": -138.1640625, | |
| "logps/rejected": -139.81886291503906, | |
| "loss": 0.5099, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.448891520500183, | |
| "rewards/margins": 0.9665302038192749, | |
| "rewards/rejected": -2.415421962738037, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.9966887417218542e-07, | |
| "logits/chosen": -2.5827786922454834, | |
| "logits/rejected": -2.53794002532959, | |
| "logps/chosen": -126.8006362915039, | |
| "logps/rejected": -126.6136474609375, | |
| "loss": 0.515, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.2876603603363037, | |
| "rewards/margins": 0.7639477849006653, | |
| "rewards/rejected": -2.051608085632324, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.9801324503311256e-07, | |
| "logits/chosen": -2.3453097343444824, | |
| "logits/rejected": -2.4177701473236084, | |
| "logps/chosen": -111.456787109375, | |
| "logps/rejected": -121.5312728881836, | |
| "loss": 0.5125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.4392328262329102, | |
| "rewards/margins": 1.1764917373657227, | |
| "rewards/rejected": -2.615724563598633, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.9635761589403973e-07, | |
| "logits/chosen": -2.2641375064849854, | |
| "logits/rejected": -2.3049521446228027, | |
| "logps/chosen": -95.91242980957031, | |
| "logps/rejected": -111.0653305053711, | |
| "loss": 0.5322, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.325178861618042, | |
| "rewards/margins": 1.0794451236724854, | |
| "rewards/rejected": -2.4046239852905273, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.947019867549669e-07, | |
| "logits/chosen": -2.3387389183044434, | |
| "logits/rejected": -2.2360782623291016, | |
| "logps/chosen": -100.87395477294922, | |
| "logps/rejected": -111.1401138305664, | |
| "loss": 0.49, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.7546203136444092, | |
| "rewards/margins": 1.1646721363067627, | |
| "rewards/rejected": -2.919292449951172, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.9304635761589403e-07, | |
| "logits/chosen": -2.3461403846740723, | |
| "logits/rejected": -2.3300156593322754, | |
| "logps/chosen": -103.99101257324219, | |
| "logps/rejected": -122.7502212524414, | |
| "loss": 0.5215, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1400959491729736, | |
| "rewards/margins": 1.375199556350708, | |
| "rewards/rejected": -2.5152957439422607, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.913907284768212e-07, | |
| "logits/chosen": -2.353959560394287, | |
| "logits/rejected": -2.2813894748687744, | |
| "logps/chosen": -110.76973724365234, | |
| "logps/rejected": -133.5187225341797, | |
| "loss": 0.5406, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.2461332082748413, | |
| "rewards/margins": 2.200319766998291, | |
| "rewards/rejected": -3.446453094482422, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8973509933774834e-07, | |
| "logits/chosen": -2.483916759490967, | |
| "logits/rejected": -2.3655548095703125, | |
| "logps/chosen": -108.8669662475586, | |
| "logps/rejected": -116.64085388183594, | |
| "loss": 0.5907, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.9422400593757629, | |
| "rewards/margins": 0.9399551153182983, | |
| "rewards/rejected": -1.8821951150894165, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8807947019867548e-07, | |
| "logits/chosen": -2.3532841205596924, | |
| "logits/rejected": -2.3442747592926025, | |
| "logps/chosen": -99.94935607910156, | |
| "logps/rejected": -115.6842269897461, | |
| "loss": 0.6063, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1935023069381714, | |
| "rewards/margins": 1.022131323814392, | |
| "rewards/rejected": -2.2156338691711426, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8642384105960262e-07, | |
| "logits/chosen": -2.473654270172119, | |
| "logits/rejected": -2.456444263458252, | |
| "logps/chosen": -128.30955505371094, | |
| "logps/rejected": -135.67520141601562, | |
| "loss": 0.5602, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0379096269607544, | |
| "rewards/margins": 0.6246587038040161, | |
| "rewards/rejected": -1.6625683307647705, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.8476821192052979e-07, | |
| "logits/chosen": -2.4869556427001953, | |
| "logits/rejected": -2.449312686920166, | |
| "logps/chosen": -116.06538391113281, | |
| "logps/rejected": -120.8796615600586, | |
| "loss": 0.5219, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.9571272730827332, | |
| "rewards/margins": 1.021393060684204, | |
| "rewards/rejected": -1.978520154953003, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.8311258278145695e-07, | |
| "logits/chosen": -2.4670963287353516, | |
| "logits/rejected": -2.382390260696411, | |
| "logps/chosen": -123.17083740234375, | |
| "logps/rejected": -109.2813949584961, | |
| "loss": 0.5479, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.2968004941940308, | |
| "rewards/margins": 0.7737834453582764, | |
| "rewards/rejected": -2.0705838203430176, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.814569536423841e-07, | |
| "logits/chosen": -2.345423460006714, | |
| "logits/rejected": -2.3300931453704834, | |
| "logps/chosen": -92.75725555419922, | |
| "logps/rejected": -107.68856048583984, | |
| "loss": 0.4778, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.8894661068916321, | |
| "rewards/margins": 1.360033392906189, | |
| "rewards/rejected": -2.249499559402466, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7980132450331126e-07, | |
| "logits/chosen": -2.2602181434631348, | |
| "logits/rejected": -2.1599280834198, | |
| "logps/chosen": -105.10438537597656, | |
| "logps/rejected": -131.7586669921875, | |
| "loss": 0.5319, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.113909125328064, | |
| "rewards/margins": 1.5821037292480469, | |
| "rewards/rejected": -2.6960129737854004, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.781456953642384e-07, | |
| "logits/chosen": -2.3455591201782227, | |
| "logits/rejected": -2.364595413208008, | |
| "logps/chosen": -130.79019165039062, | |
| "logps/rejected": -147.9118194580078, | |
| "loss": 0.4775, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.7606639862060547, | |
| "rewards/margins": 1.2344610691070557, | |
| "rewards/rejected": -2.9951250553131104, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7649006622516554e-07, | |
| "logits/chosen": -2.167285442352295, | |
| "logits/rejected": -2.247238874435425, | |
| "logps/chosen": -139.6284942626953, | |
| "logps/rejected": -158.96109008789062, | |
| "loss": 0.7002, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.767961263656616, | |
| "rewards/margins": 1.04625403881073, | |
| "rewards/rejected": -3.8142154216766357, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.748344370860927e-07, | |
| "logits/chosen": -2.3983192443847656, | |
| "logits/rejected": -2.4019296169281006, | |
| "logps/chosen": -115.08357238769531, | |
| "logps/rejected": -129.3126220703125, | |
| "loss": 0.4762, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.6475099325180054, | |
| "rewards/margins": 1.2640306949615479, | |
| "rewards/rejected": -2.9115407466888428, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.7317880794701987e-07, | |
| "logits/chosen": -2.3797879219055176, | |
| "logits/rejected": -2.324965715408325, | |
| "logps/chosen": -116.2055435180664, | |
| "logps/rejected": -131.01705932617188, | |
| "loss": 0.5819, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.186680555343628, | |
| "rewards/margins": 1.4000349044799805, | |
| "rewards/rejected": -2.5867154598236084, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.71523178807947e-07, | |
| "logits/chosen": -2.227961778640747, | |
| "logits/rejected": -2.197960615158081, | |
| "logps/chosen": -106.0052490234375, | |
| "logps/rejected": -126.49810791015625, | |
| "loss": 0.5598, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -1.7543351650238037, | |
| "rewards/margins": 1.1439127922058105, | |
| "rewards/rejected": -2.8982481956481934, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.6986754966887418e-07, | |
| "logits/chosen": -2.34653902053833, | |
| "logits/rejected": -2.361428737640381, | |
| "logps/chosen": -93.7066879272461, | |
| "logps/rejected": -115.24361419677734, | |
| "loss": 0.5737, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.030475378036499, | |
| "rewards/margins": 1.2473831176757812, | |
| "rewards/rejected": -2.2778584957122803, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.6821192052980132e-07, | |
| "logits/chosen": -2.3806099891662598, | |
| "logits/rejected": -2.404531955718994, | |
| "logps/chosen": -108.77107238769531, | |
| "logps/rejected": -124.2442398071289, | |
| "loss": 0.5444, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.4443012475967407, | |
| "rewards/margins": 0.9878839254379272, | |
| "rewards/rejected": -2.432185411453247, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.6655629139072846e-07, | |
| "logits/chosen": -2.447935104370117, | |
| "logits/rejected": -2.4319026470184326, | |
| "logps/chosen": -116.2603988647461, | |
| "logps/rejected": -123.78592681884766, | |
| "loss": 0.4883, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.3556338548660278, | |
| "rewards/margins": 1.9813722372055054, | |
| "rewards/rejected": -3.337006092071533, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.649006622516556e-07, | |
| "logits/chosen": -2.3291103839874268, | |
| "logits/rejected": -2.3166141510009766, | |
| "logps/chosen": -111.0199203491211, | |
| "logps/rejected": -117.00514221191406, | |
| "loss": 0.4997, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5363482236862183, | |
| "rewards/margins": 0.870397686958313, | |
| "rewards/rejected": -2.4067459106445312, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.632450331125828e-07, | |
| "logits/chosen": -2.2856314182281494, | |
| "logits/rejected": -2.264632225036621, | |
| "logps/chosen": -102.96492004394531, | |
| "logps/rejected": -126.04007720947266, | |
| "loss": 0.456, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1887879371643066, | |
| "rewards/margins": 1.522206425666809, | |
| "rewards/rejected": -2.7109944820404053, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.6158940397350993e-07, | |
| "logits/chosen": -2.2702925205230713, | |
| "logits/rejected": -2.237971782684326, | |
| "logps/chosen": -99.81072998046875, | |
| "logps/rejected": -124.4561996459961, | |
| "loss": 0.5386, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.322842001914978, | |
| "rewards/margins": 1.6832103729248047, | |
| "rewards/rejected": -3.0060524940490723, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.5993377483443707e-07, | |
| "logits/chosen": -2.265434741973877, | |
| "logits/rejected": -2.2928626537323, | |
| "logps/chosen": -83.43587493896484, | |
| "logps/rejected": -114.23832702636719, | |
| "loss": 0.5913, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.8695703744888306, | |
| "rewards/margins": 2.3257930278778076, | |
| "rewards/rejected": -3.1953632831573486, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5827814569536424e-07, | |
| "logits/chosen": -2.575456142425537, | |
| "logits/rejected": -2.395871162414551, | |
| "logps/chosen": -105.5760498046875, | |
| "logps/rejected": -97.21208190917969, | |
| "loss": 0.5091, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.1773121356964111, | |
| "rewards/margins": 0.5559796690940857, | |
| "rewards/rejected": -1.7332916259765625, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5662251655629138e-07, | |
| "logits/chosen": -2.4753453731536865, | |
| "logits/rejected": -2.414577007293701, | |
| "logps/chosen": -143.94302368164062, | |
| "logps/rejected": -137.99838256835938, | |
| "loss": 0.5287, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1706478595733643, | |
| "rewards/margins": 1.126072883605957, | |
| "rewards/rejected": -2.2967207431793213, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5496688741721852e-07, | |
| "logits/chosen": -2.412086009979248, | |
| "logits/rejected": -2.3731260299682617, | |
| "logps/chosen": -106.2443618774414, | |
| "logps/rejected": -112.93099212646484, | |
| "loss": 0.4915, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.2554187774658203, | |
| "rewards/margins": 1.3224613666534424, | |
| "rewards/rejected": -2.5778801441192627, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.533112582781457e-07, | |
| "logits/chosen": -2.2778310775756836, | |
| "logits/rejected": -2.256308078765869, | |
| "logps/chosen": -120.01104736328125, | |
| "logps/rejected": -123.55589294433594, | |
| "loss": 0.465, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.085076093673706, | |
| "rewards/margins": 1.26901376247406, | |
| "rewards/rejected": -2.3540899753570557, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.5165562913907285e-07, | |
| "logits/chosen": -2.393749475479126, | |
| "logits/rejected": -2.3263931274414062, | |
| "logps/chosen": -117.22212219238281, | |
| "logps/rejected": -138.7658233642578, | |
| "loss": 0.5859, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.8770434856414795, | |
| "rewards/margins": 1.216582179069519, | |
| "rewards/rejected": -3.0936264991760254, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.5e-07, | |
| "logits/chosen": -2.4931600093841553, | |
| "logits/rejected": -2.4225075244903564, | |
| "logps/chosen": -125.9542236328125, | |
| "logps/rejected": -141.84219360351562, | |
| "loss": 0.5621, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.3538328409194946, | |
| "rewards/margins": 1.1864392757415771, | |
| "rewards/rejected": -2.5402722358703613, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.4834437086092716e-07, | |
| "logits/chosen": -2.3211989402770996, | |
| "logits/rejected": -2.3927392959594727, | |
| "logps/chosen": -94.21218872070312, | |
| "logps/rejected": -111.05567932128906, | |
| "loss": 0.5479, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.555248737335205, | |
| "rewards/margins": 0.7776703834533691, | |
| "rewards/rejected": -2.332918882369995, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.466887417218543e-07, | |
| "logits/chosen": -2.4537739753723145, | |
| "logits/rejected": -2.3887171745300293, | |
| "logps/chosen": -104.2787857055664, | |
| "logps/rejected": -113.91764831542969, | |
| "loss": 0.6143, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.352063775062561, | |
| "rewards/margins": 0.9023284912109375, | |
| "rewards/rejected": -2.254392147064209, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.4503311258278144e-07, | |
| "logits/chosen": -2.4015471935272217, | |
| "logits/rejected": -2.42402720451355, | |
| "logps/chosen": -114.39097595214844, | |
| "logps/rejected": -132.64340209960938, | |
| "loss": 0.5667, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2820857763290405, | |
| "rewards/margins": 1.0415351390838623, | |
| "rewards/rejected": -2.323620557785034, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.4337748344370858e-07, | |
| "logits/chosen": -2.355255126953125, | |
| "logits/rejected": -2.277355909347534, | |
| "logps/chosen": -102.35648345947266, | |
| "logps/rejected": -109.78977966308594, | |
| "loss": 0.5125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.6282755136489868, | |
| "rewards/margins": 1.255506992340088, | |
| "rewards/rejected": -2.8837826251983643, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.4172185430463577e-07, | |
| "logits/chosen": -2.5044684410095215, | |
| "logits/rejected": -2.3574650287628174, | |
| "logps/chosen": -130.39955139160156, | |
| "logps/rejected": -128.08071899414062, | |
| "loss": 0.5218, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.2382522821426392, | |
| "rewards/margins": 1.3744902610778809, | |
| "rewards/rejected": -2.6127424240112305, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_logits/chosen": -2.261568784713745, | |
| "eval_logits/rejected": -2.2171857357025146, | |
| "eval_logps/chosen": -113.09461975097656, | |
| "eval_logps/rejected": -122.51795959472656, | |
| "eval_loss": 0.5183302164077759, | |
| "eval_rewards/accuracies": 0.703125, | |
| "eval_rewards/chosen": -1.3228007555007935, | |
| "eval_rewards/margins": 1.2242145538330078, | |
| "eval_rewards/rejected": -2.54701566696167, | |
| "eval_runtime": 523.6034, | |
| "eval_samples_per_second": 3.411, | |
| "eval_steps_per_second": 0.107, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.4006622516556291e-07, | |
| "logits/chosen": -2.4229695796966553, | |
| "logits/rejected": -2.3659071922302246, | |
| "logps/chosen": -97.41563415527344, | |
| "logps/rejected": -107.28167724609375, | |
| "loss": 0.5285, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.0089452266693115, | |
| "rewards/margins": 1.1100685596466064, | |
| "rewards/rejected": -2.119013786315918, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.3841059602649005e-07, | |
| "logits/chosen": -2.368020534515381, | |
| "logits/rejected": -2.266580820083618, | |
| "logps/chosen": -107.78886413574219, | |
| "logps/rejected": -124.20140075683594, | |
| "loss": 0.4815, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1261156797409058, | |
| "rewards/margins": 1.54592764377594, | |
| "rewards/rejected": -2.6720430850982666, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.3675496688741722e-07, | |
| "logits/chosen": -2.3915557861328125, | |
| "logits/rejected": -2.3538260459899902, | |
| "logps/chosen": -96.66950988769531, | |
| "logps/rejected": -107.39601135253906, | |
| "loss": 0.5181, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.2402719259262085, | |
| "rewards/margins": 1.5004689693450928, | |
| "rewards/rejected": -2.7407407760620117, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.3509933774834436e-07, | |
| "logits/chosen": -2.3380367755889893, | |
| "logits/rejected": -2.2895896434783936, | |
| "logps/chosen": -122.12117767333984, | |
| "logps/rejected": -122.6964111328125, | |
| "loss": 0.5068, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.7105709314346313, | |
| "rewards/margins": 1.117949366569519, | |
| "rewards/rejected": -2.8285202980041504, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.334437086092715e-07, | |
| "logits/chosen": -2.650242567062378, | |
| "logits/rejected": -2.575338840484619, | |
| "logps/chosen": -116.67132568359375, | |
| "logps/rejected": -121.65495300292969, | |
| "loss": 0.4781, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.0724276304244995, | |
| "rewards/margins": 0.7939808964729309, | |
| "rewards/rejected": -1.8664085865020752, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.317880794701987e-07, | |
| "logits/chosen": -2.4393889904022217, | |
| "logits/rejected": -2.356849431991577, | |
| "logps/chosen": -108.2217788696289, | |
| "logps/rejected": -105.802734375, | |
| "loss": 0.4569, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9382781982421875, | |
| "rewards/margins": 0.9701493978500366, | |
| "rewards/rejected": -1.9084275960922241, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.3013245033112583e-07, | |
| "logits/chosen": -2.27262544631958, | |
| "logits/rejected": -2.2650160789489746, | |
| "logps/chosen": -82.49347686767578, | |
| "logps/rejected": -105.01361083984375, | |
| "loss": 0.4757, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5115716457366943, | |
| "rewards/margins": 1.9998562335968018, | |
| "rewards/rejected": -2.511428117752075, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2847682119205297e-07, | |
| "logits/chosen": -2.3641304969787598, | |
| "logits/rejected": -2.400428533554077, | |
| "logps/chosen": -95.62802124023438, | |
| "logps/rejected": -105.62736511230469, | |
| "loss": 0.5091, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.2022227048873901, | |
| "rewards/margins": 1.1635633707046509, | |
| "rewards/rejected": -2.365786075592041, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2682119205298011e-07, | |
| "logits/chosen": -2.2362232208251953, | |
| "logits/rejected": -2.294517993927002, | |
| "logps/chosen": -111.7828140258789, | |
| "logps/rejected": -107.35648345947266, | |
| "loss": 0.5689, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.3542122840881348, | |
| "rewards/margins": 0.9707919359207153, | |
| "rewards/rejected": -2.3250043392181396, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2516556291390728e-07, | |
| "logits/chosen": -2.4351532459259033, | |
| "logits/rejected": -2.3938307762145996, | |
| "logps/chosen": -116.37557220458984, | |
| "logps/rejected": -142.02877807617188, | |
| "loss": 0.4966, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.059444546699524, | |
| "rewards/margins": 1.6750872135162354, | |
| "rewards/rejected": -2.734531879425049, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2350993377483442e-07, | |
| "logits/chosen": -2.130566358566284, | |
| "logits/rejected": -2.1427571773529053, | |
| "logps/chosen": -98.26994323730469, | |
| "logps/rejected": -125.13362121582031, | |
| "loss": 0.5217, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.1679749488830566, | |
| "rewards/margins": 2.0151760578155518, | |
| "rewards/rejected": -4.183150768280029, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.218543046357616e-07, | |
| "logits/chosen": -2.3847343921661377, | |
| "logits/rejected": -2.3289005756378174, | |
| "logps/chosen": -103.18563079833984, | |
| "logps/rejected": -106.60140228271484, | |
| "loss": 0.526, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.5415022373199463, | |
| "rewards/margins": 1.246010184288025, | |
| "rewards/rejected": -2.7875125408172607, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2019867549668873e-07, | |
| "logits/chosen": -2.344989776611328, | |
| "logits/rejected": -2.2486376762390137, | |
| "logps/chosen": -111.1012954711914, | |
| "logps/rejected": -114.76014709472656, | |
| "loss": 0.4662, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.4294898509979248, | |
| "rewards/margins": 1.2511804103851318, | |
| "rewards/rejected": -2.6806702613830566, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.185430463576159e-07, | |
| "logits/chosen": -2.342101573944092, | |
| "logits/rejected": -2.3254072666168213, | |
| "logps/chosen": -114.9495620727539, | |
| "logps/rejected": -122.08809661865234, | |
| "loss": 0.4812, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.4265601634979248, | |
| "rewards/margins": 1.6178033351898193, | |
| "rewards/rejected": -3.0443637371063232, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1688741721854305e-07, | |
| "logits/chosen": -2.329153537750244, | |
| "logits/rejected": -2.2368149757385254, | |
| "logps/chosen": -123.8796157836914, | |
| "logps/rejected": -119.62074279785156, | |
| "loss": 0.4744, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.5358905792236328, | |
| "rewards/margins": 1.2361419200897217, | |
| "rewards/rejected": -2.7720324993133545, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1523178807947019e-07, | |
| "logits/chosen": -2.4591078758239746, | |
| "logits/rejected": -2.454157829284668, | |
| "logps/chosen": -116.4410629272461, | |
| "logps/rejected": -129.07809448242188, | |
| "loss": 0.5417, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.5216033458709717, | |
| "rewards/margins": 1.3263527154922485, | |
| "rewards/rejected": -2.8479561805725098, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1357615894039735e-07, | |
| "logits/chosen": -2.287152051925659, | |
| "logits/rejected": -2.2752058506011963, | |
| "logps/chosen": -128.70211791992188, | |
| "logps/rejected": -141.4760284423828, | |
| "loss": 0.5571, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.9110784530639648, | |
| "rewards/margins": 1.6628735065460205, | |
| "rewards/rejected": -3.5739517211914062, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.119205298013245e-07, | |
| "logits/chosen": -2.3498637676239014, | |
| "logits/rejected": -2.3876309394836426, | |
| "logps/chosen": -121.30989074707031, | |
| "logps/rejected": -129.5779571533203, | |
| "loss": 0.4927, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.256763219833374, | |
| "rewards/margins": 1.8756353855133057, | |
| "rewards/rejected": -3.1323981285095215, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1026490066225165e-07, | |
| "logits/chosen": -2.2559609413146973, | |
| "logits/rejected": -2.2900869846343994, | |
| "logps/chosen": -89.24148559570312, | |
| "logps/rejected": -104.9818115234375, | |
| "loss": 0.5639, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5417016744613647, | |
| "rewards/margins": 1.3289363384246826, | |
| "rewards/rejected": -2.870638132095337, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0860927152317881e-07, | |
| "logits/chosen": -2.3323373794555664, | |
| "logits/rejected": -2.4132628440856934, | |
| "logps/chosen": -112.27374267578125, | |
| "logps/rejected": -132.1646728515625, | |
| "loss": 0.5692, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.039150595664978, | |
| "rewards/margins": 1.5584369897842407, | |
| "rewards/rejected": -2.597587823867798, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0695364238410595e-07, | |
| "logits/chosen": -2.2826695442199707, | |
| "logits/rejected": -2.232888698577881, | |
| "logps/chosen": -107.91890716552734, | |
| "logps/rejected": -114.87126159667969, | |
| "loss": 0.5245, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.3879740238189697, | |
| "rewards/margins": 1.4284284114837646, | |
| "rewards/rejected": -2.8164026737213135, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0529801324503311e-07, | |
| "logits/chosen": -2.433330535888672, | |
| "logits/rejected": -2.3720269203186035, | |
| "logps/chosen": -122.9089584350586, | |
| "logps/rejected": -130.165771484375, | |
| "loss": 0.5503, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.7988684177398682, | |
| "rewards/margins": 1.0453321933746338, | |
| "rewards/rejected": -2.844200611114502, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0364238410596025e-07, | |
| "logits/chosen": -2.432610511779785, | |
| "logits/rejected": -2.3609492778778076, | |
| "logps/chosen": -126.18135070800781, | |
| "logps/rejected": -137.95114135742188, | |
| "loss": 0.5377, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1943086385726929, | |
| "rewards/margins": 1.0217430591583252, | |
| "rewards/rejected": -2.2160518169403076, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0198675496688741e-07, | |
| "logits/chosen": -2.40020489692688, | |
| "logits/rejected": -2.333512783050537, | |
| "logps/chosen": -120.67720794677734, | |
| "logps/rejected": -123.46641540527344, | |
| "loss": 0.4558, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.2497332096099854, | |
| "rewards/margins": 1.2155460119247437, | |
| "rewards/rejected": -2.4652791023254395, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0033112582781457e-07, | |
| "logits/chosen": -2.4432952404022217, | |
| "logits/rejected": -2.3959970474243164, | |
| "logps/chosen": -131.6014862060547, | |
| "logps/rejected": -145.7483673095703, | |
| "loss": 0.4373, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.9097809791564941, | |
| "rewards/margins": 1.4488131999969482, | |
| "rewards/rejected": -2.3585941791534424, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.867549668874171e-08, | |
| "logits/chosen": -2.2430427074432373, | |
| "logits/rejected": -2.2248117923736572, | |
| "logps/chosen": -99.05213928222656, | |
| "logps/rejected": -118.5693130493164, | |
| "loss": 0.5283, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.898768424987793, | |
| "rewards/margins": 1.2684627771377563, | |
| "rewards/rejected": -2.1672310829162598, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.701986754966887e-08, | |
| "logits/chosen": -2.4450364112854004, | |
| "logits/rejected": -2.3705830574035645, | |
| "logps/chosen": -107.76090240478516, | |
| "logps/rejected": -112.4260482788086, | |
| "loss": 0.4824, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4488307237625122, | |
| "rewards/margins": 0.7578933835029602, | |
| "rewards/rejected": -2.206723928451538, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.536423841059603e-08, | |
| "logits/chosen": -2.4003443717956543, | |
| "logits/rejected": -2.348435878753662, | |
| "logps/chosen": -98.28638458251953, | |
| "logps/rejected": -100.79689025878906, | |
| "loss": 0.5439, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.41732919216156, | |
| "rewards/margins": 1.0273702144622803, | |
| "rewards/rejected": -2.444699764251709, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.370860927152317e-08, | |
| "logits/chosen": -2.3565890789031982, | |
| "logits/rejected": -2.3134591579437256, | |
| "logps/chosen": -122.64701080322266, | |
| "logps/rejected": -140.7588348388672, | |
| "loss": 0.54, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -1.8713737726211548, | |
| "rewards/margins": 0.9109483957290649, | |
| "rewards/rejected": -2.7823221683502197, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.205298013245033e-08, | |
| "logits/chosen": -2.4065792560577393, | |
| "logits/rejected": -2.343113422393799, | |
| "logps/chosen": -113.3506088256836, | |
| "logps/rejected": -118.6836166381836, | |
| "loss": 0.6089, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.640981912612915, | |
| "rewards/margins": 1.4465951919555664, | |
| "rewards/rejected": -3.0875768661499023, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 9.039735099337747e-08, | |
| "logits/chosen": -2.280989170074463, | |
| "logits/rejected": -2.2906501293182373, | |
| "logps/chosen": -108.36322021484375, | |
| "logps/rejected": -118.99311828613281, | |
| "loss": 0.4821, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.3466839790344238, | |
| "rewards/margins": 1.234621524810791, | |
| "rewards/rejected": -2.581305503845215, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.874172185430463e-08, | |
| "logits/chosen": -2.3098435401916504, | |
| "logits/rejected": -2.365722179412842, | |
| "logps/chosen": -142.2515411376953, | |
| "logps/rejected": -136.40847778320312, | |
| "loss": 0.6105, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.9295507669448853, | |
| "rewards/margins": 0.7886122465133667, | |
| "rewards/rejected": -2.718163013458252, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.70860927152318e-08, | |
| "logits/chosen": -2.4758474826812744, | |
| "logits/rejected": -2.4529106616973877, | |
| "logps/chosen": -102.67512512207031, | |
| "logps/rejected": -108.22530364990234, | |
| "loss": 0.4814, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4004428386688232, | |
| "rewards/margins": 0.7858734130859375, | |
| "rewards/rejected": -2.1863162517547607, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.543046357615893e-08, | |
| "logits/chosen": -2.4003779888153076, | |
| "logits/rejected": -2.3763396739959717, | |
| "logps/chosen": -104.71977233886719, | |
| "logps/rejected": -117.16717529296875, | |
| "loss": 0.4928, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.1805320978164673, | |
| "rewards/margins": 1.6279674768447876, | |
| "rewards/rejected": -2.808499336242676, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.377483443708609e-08, | |
| "logits/chosen": -2.4015908241271973, | |
| "logits/rejected": -2.3405182361602783, | |
| "logps/chosen": -117.36273193359375, | |
| "logps/rejected": -124.22319030761719, | |
| "loss": 0.5652, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.3869013786315918, | |
| "rewards/margins": 0.9574357867240906, | |
| "rewards/rejected": -2.344337224960327, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.211920529801324e-08, | |
| "logits/chosen": -2.4349982738494873, | |
| "logits/rejected": -2.4097352027893066, | |
| "logps/chosen": -125.55684661865234, | |
| "logps/rejected": -132.021484375, | |
| "loss": 0.5082, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4107874631881714, | |
| "rewards/margins": 0.8303612470626831, | |
| "rewards/rejected": -2.2411487102508545, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 8.04635761589404e-08, | |
| "logits/chosen": -2.265141248703003, | |
| "logits/rejected": -2.169220447540283, | |
| "logps/chosen": -102.09349060058594, | |
| "logps/rejected": -119.7810287475586, | |
| "loss": 0.553, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4435564279556274, | |
| "rewards/margins": 1.294883131980896, | |
| "rewards/rejected": -2.7384393215179443, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.880794701986755e-08, | |
| "logits/chosen": -2.4385974407196045, | |
| "logits/rejected": -2.3579273223876953, | |
| "logps/chosen": -93.9774169921875, | |
| "logps/rejected": -96.58930969238281, | |
| "loss": 0.5111, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.5919442772865295, | |
| "rewards/margins": 0.9432849884033203, | |
| "rewards/rejected": -1.5352293252944946, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.71523178807947e-08, | |
| "logits/chosen": -2.4252941608428955, | |
| "logits/rejected": -2.308663845062256, | |
| "logps/chosen": -139.50485229492188, | |
| "logps/rejected": -134.99417114257812, | |
| "loss": 0.4841, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1931045055389404, | |
| "rewards/margins": 1.3365012407302856, | |
| "rewards/rejected": -2.5296058654785156, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.549668874172185e-08, | |
| "logits/chosen": -2.3252806663513184, | |
| "logits/rejected": -2.2149767875671387, | |
| "logps/chosen": -119.28135681152344, | |
| "logps/rejected": -126.89034271240234, | |
| "loss": 0.4699, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.3043967485427856, | |
| "rewards/margins": 1.297178030014038, | |
| "rewards/rejected": -2.601574420928955, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.3841059602649e-08, | |
| "logits/chosen": -2.4337799549102783, | |
| "logits/rejected": -2.408616065979004, | |
| "logps/chosen": -105.0708236694336, | |
| "logps/rejected": -112.90872955322266, | |
| "loss": 0.5492, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.739833652973175, | |
| "rewards/margins": 1.0932036638259888, | |
| "rewards/rejected": -1.8330373764038086, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.218543046357616e-08, | |
| "logits/chosen": -2.474499225616455, | |
| "logits/rejected": -2.3793933391571045, | |
| "logps/chosen": -115.8188247680664, | |
| "logps/rejected": -119.8792953491211, | |
| "loss": 0.5534, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7591004371643066, | |
| "rewards/margins": 1.390928864479065, | |
| "rewards/rejected": -2.150029182434082, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.052980132450331e-08, | |
| "logits/chosen": -2.342878580093384, | |
| "logits/rejected": -2.2635059356689453, | |
| "logps/chosen": -112.3121566772461, | |
| "logps/rejected": -118.00971984863281, | |
| "loss": 0.4827, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1263339519500732, | |
| "rewards/margins": 0.7218903303146362, | |
| "rewards/rejected": -1.848224401473999, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.887417218543045e-08, | |
| "logits/chosen": -2.4378771781921387, | |
| "logits/rejected": -2.493478775024414, | |
| "logps/chosen": -101.32011413574219, | |
| "logps/rejected": -126.55435943603516, | |
| "loss": 0.4912, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0950850248336792, | |
| "rewards/margins": 1.0815422534942627, | |
| "rewards/rejected": -2.1766273975372314, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.721854304635762e-08, | |
| "logits/chosen": -2.395272731781006, | |
| "logits/rejected": -2.352908134460449, | |
| "logps/chosen": -115.22686767578125, | |
| "logps/rejected": -114.85673522949219, | |
| "loss": 0.5139, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.910413384437561, | |
| "rewards/margins": 0.9333620071411133, | |
| "rewards/rejected": -1.8437751531600952, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.556291390728476e-08, | |
| "logits/chosen": -2.4603307247161865, | |
| "logits/rejected": -2.4367270469665527, | |
| "logps/chosen": -111.51399993896484, | |
| "logps/rejected": -120.80682373046875, | |
| "loss": 0.5692, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.1758167743682861, | |
| "rewards/margins": 0.8748563528060913, | |
| "rewards/rejected": -2.050673007965088, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.390728476821191e-08, | |
| "logits/chosen": -2.3244917392730713, | |
| "logits/rejected": -2.253732919692993, | |
| "logps/chosen": -108.8800277709961, | |
| "logps/rejected": -125.33662414550781, | |
| "loss": 0.4513, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.7894026637077332, | |
| "rewards/margins": 1.8209375143051147, | |
| "rewards/rejected": -2.610340118408203, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.225165562913907e-08, | |
| "logits/chosen": -2.387305974960327, | |
| "logits/rejected": -2.387345552444458, | |
| "logps/chosen": -107.43021392822266, | |
| "logps/rejected": -118.97044372558594, | |
| "loss": 0.6606, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9636829495429993, | |
| "rewards/margins": 0.8673983812332153, | |
| "rewards/rejected": -1.8310810327529907, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.059602649006622e-08, | |
| "logits/chosen": -2.3770089149475098, | |
| "logits/rejected": -2.371371269226074, | |
| "logps/chosen": -123.25062561035156, | |
| "logps/rejected": -140.9857635498047, | |
| "loss": 0.5031, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.802879810333252, | |
| "rewards/margins": 1.1967840194702148, | |
| "rewards/rejected": -1.9996639490127563, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.8940397350993375e-08, | |
| "logits/chosen": -2.3844501972198486, | |
| "logits/rejected": -2.415923595428467, | |
| "logps/chosen": -96.17528533935547, | |
| "logps/rejected": -111.2402114868164, | |
| "loss": 0.4914, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.7955904006958008, | |
| "rewards/margins": 1.3297996520996094, | |
| "rewards/rejected": -2.125389814376831, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_logits/chosen": -2.2567203044891357, | |
| "eval_logits/rejected": -2.214937925338745, | |
| "eval_logps/chosen": -110.69182586669922, | |
| "eval_logps/rejected": -120.59849548339844, | |
| "eval_loss": 0.5078982710838318, | |
| "eval_rewards/accuracies": 0.7120535969734192, | |
| "eval_rewards/chosen": -1.0825201272964478, | |
| "eval_rewards/margins": 1.2725489139556885, | |
| "eval_rewards/rejected": -2.3550689220428467, | |
| "eval_runtime": 502.7018, | |
| "eval_samples_per_second": 3.553, | |
| "eval_steps_per_second": 0.111, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.728476821192053e-08, | |
| "logits/chosen": -2.398317575454712, | |
| "logits/rejected": -2.4122400283813477, | |
| "logps/chosen": -93.20875549316406, | |
| "logps/rejected": -113.88653564453125, | |
| "loss": 0.549, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8295547366142273, | |
| "rewards/margins": 1.022578239440918, | |
| "rewards/rejected": -1.852132797241211, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.5629139072847675e-08, | |
| "logits/chosen": -2.414301633834839, | |
| "logits/rejected": -2.3872337341308594, | |
| "logps/chosen": -129.2257080078125, | |
| "logps/rejected": -136.29031372070312, | |
| "loss": 0.4718, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.3618860244750977, | |
| "rewards/margins": 1.9172807931900024, | |
| "rewards/rejected": -3.2791664600372314, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.397350993377483e-08, | |
| "logits/chosen": -2.446453809738159, | |
| "logits/rejected": -2.384152889251709, | |
| "logps/chosen": -120.69456481933594, | |
| "logps/rejected": -128.5080108642578, | |
| "loss": 0.4889, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.820598304271698, | |
| "rewards/margins": 1.557586908340454, | |
| "rewards/rejected": -2.3781850337982178, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.231788079470199e-08, | |
| "logits/chosen": -2.416982889175415, | |
| "logits/rejected": -2.296403646469116, | |
| "logps/chosen": -110.80255126953125, | |
| "logps/rejected": -113.04368591308594, | |
| "loss": 0.4946, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.1277140378952026, | |
| "rewards/margins": 1.060675859451294, | |
| "rewards/rejected": -2.188389778137207, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.0662251655629135e-08, | |
| "logits/chosen": -2.355494976043701, | |
| "logits/rejected": -2.2958462238311768, | |
| "logps/chosen": -113.16410064697266, | |
| "logps/rejected": -119.9725112915039, | |
| "loss": 0.4515, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6746724247932434, | |
| "rewards/margins": 1.8009824752807617, | |
| "rewards/rejected": -2.4756548404693604, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.900662251655629e-08, | |
| "logits/chosen": -2.4485743045806885, | |
| "logits/rejected": -2.426466703414917, | |
| "logps/chosen": -110.64210510253906, | |
| "logps/rejected": -122.92867279052734, | |
| "loss": 0.4162, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.9479681849479675, | |
| "rewards/margins": 1.6344906091690063, | |
| "rewards/rejected": -2.582458972930908, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.735099337748344e-08, | |
| "logits/chosen": -2.279062509536743, | |
| "logits/rejected": -2.2378296852111816, | |
| "logps/chosen": -117.4856185913086, | |
| "logps/rejected": -126.33473205566406, | |
| "loss": 0.5187, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9676671028137207, | |
| "rewards/margins": 1.4139858484268188, | |
| "rewards/rejected": -2.381652593612671, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.5695364238410595e-08, | |
| "logits/chosen": -2.27183198928833, | |
| "logits/rejected": -2.2195851802825928, | |
| "logps/chosen": -99.91886138916016, | |
| "logps/rejected": -139.50657653808594, | |
| "loss": 0.5204, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.060943603515625, | |
| "rewards/margins": 2.9951958656311035, | |
| "rewards/rejected": -4.056139945983887, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.403973509933775e-08, | |
| "logits/chosen": -2.413677215576172, | |
| "logits/rejected": -2.440647602081299, | |
| "logps/chosen": -118.7281723022461, | |
| "logps/rejected": -134.04771423339844, | |
| "loss": 0.5028, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.119231939315796, | |
| "rewards/margins": 1.4490314722061157, | |
| "rewards/rejected": -2.568263530731201, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.23841059602649e-08, | |
| "logits/chosen": -2.3565783500671387, | |
| "logits/rejected": -2.4461493492126465, | |
| "logps/chosen": -108.08616638183594, | |
| "logps/rejected": -132.34011840820312, | |
| "loss": 0.485, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.1898248195648193, | |
| "rewards/margins": 1.3966195583343506, | |
| "rewards/rejected": -2.58644437789917, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.072847682119205e-08, | |
| "logits/chosen": -2.396179437637329, | |
| "logits/rejected": -2.4256176948547363, | |
| "logps/chosen": -96.67437744140625, | |
| "logps/rejected": -101.86246490478516, | |
| "loss": 0.4582, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.4870302081108093, | |
| "rewards/margins": 1.127990484237671, | |
| "rewards/rejected": -1.615020751953125, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.90728476821192e-08, | |
| "logits/chosen": -2.3725028038024902, | |
| "logits/rejected": -2.322782039642334, | |
| "logps/chosen": -128.52896118164062, | |
| "logps/rejected": -129.73118591308594, | |
| "loss": 0.5572, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.3587214946746826, | |
| "rewards/margins": 1.5958476066589355, | |
| "rewards/rejected": -2.954568862915039, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.7417218543046355e-08, | |
| "logits/chosen": -2.378821611404419, | |
| "logits/rejected": -2.277832269668579, | |
| "logps/chosen": -87.0296630859375, | |
| "logps/rejected": -106.12138366699219, | |
| "loss": 0.5238, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.8327251672744751, | |
| "rewards/margins": 1.2228658199310303, | |
| "rewards/rejected": -2.055591106414795, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.576158940397351e-08, | |
| "logits/chosen": -2.549872398376465, | |
| "logits/rejected": -2.4757115840911865, | |
| "logps/chosen": -114.14369201660156, | |
| "logps/rejected": -116.66259765625, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.9791749119758606, | |
| "rewards/margins": 0.8427003026008606, | |
| "rewards/rejected": -1.821874976158142, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.410596026490066e-08, | |
| "logits/chosen": -2.433527708053589, | |
| "logits/rejected": -2.371525764465332, | |
| "logps/chosen": -103.0054931640625, | |
| "logps/rejected": -103.31925964355469, | |
| "loss": 0.5538, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.8271923065185547, | |
| "rewards/margins": 1.119652509689331, | |
| "rewards/rejected": -1.9468450546264648, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.245033112582781e-08, | |
| "logits/chosen": -2.337153434753418, | |
| "logits/rejected": -2.2308475971221924, | |
| "logps/chosen": -129.55728149414062, | |
| "logps/rejected": -122.7024917602539, | |
| "loss": 0.4763, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.9213937520980835, | |
| "rewards/margins": 1.0446635484695435, | |
| "rewards/rejected": -1.9660571813583374, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.079470198675496e-08, | |
| "logits/chosen": -2.2858211994171143, | |
| "logits/rejected": -2.313380002975464, | |
| "logps/chosen": -107.20402526855469, | |
| "logps/rejected": -136.98562622070312, | |
| "loss": 0.5288, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.9993183016777039, | |
| "rewards/margins": 1.509690284729004, | |
| "rewards/rejected": -2.5090086460113525, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.913907284768212e-08, | |
| "logits/chosen": -2.3693079948425293, | |
| "logits/rejected": -2.284874677658081, | |
| "logps/chosen": -106.6112289428711, | |
| "logps/rejected": -126.05074310302734, | |
| "loss": 0.4491, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.7449665665626526, | |
| "rewards/margins": 1.826768159866333, | |
| "rewards/rejected": -2.571734666824341, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.748344370860927e-08, | |
| "logits/chosen": -2.2911553382873535, | |
| "logits/rejected": -2.380384922027588, | |
| "logps/chosen": -102.5718765258789, | |
| "logps/rejected": -124.40003967285156, | |
| "loss": 0.4937, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.7534275650978088, | |
| "rewards/margins": 1.1661580801010132, | |
| "rewards/rejected": -1.9195858240127563, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.5827814569536422e-08, | |
| "logits/chosen": -2.4230473041534424, | |
| "logits/rejected": -2.4315543174743652, | |
| "logps/chosen": -117.46553802490234, | |
| "logps/rejected": -130.05776977539062, | |
| "loss": 0.4991, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.8374800682067871, | |
| "rewards/margins": 1.2378642559051514, | |
| "rewards/rejected": -2.0753445625305176, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.4172185430463576e-08, | |
| "logits/chosen": -2.417757034301758, | |
| "logits/rejected": -2.2985901832580566, | |
| "logps/chosen": -132.27774047851562, | |
| "logps/rejected": -133.81459045410156, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.378666877746582, | |
| "rewards/margins": 0.97132807970047, | |
| "rewards/rejected": -2.3499951362609863, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.2516556291390726e-08, | |
| "logits/chosen": -2.327725887298584, | |
| "logits/rejected": -2.290168046951294, | |
| "logps/chosen": -118.74835205078125, | |
| "logps/rejected": -132.76882934570312, | |
| "loss": 0.6159, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -1.3064758777618408, | |
| "rewards/margins": 1.0742686986923218, | |
| "rewards/rejected": -2.380744457244873, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.0860927152317882e-08, | |
| "logits/chosen": -2.3731508255004883, | |
| "logits/rejected": -2.367323398590088, | |
| "logps/chosen": -126.88232421875, | |
| "logps/rejected": -135.72384643554688, | |
| "loss": 0.5072, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.890730082988739, | |
| "rewards/margins": 1.7571513652801514, | |
| "rewards/rejected": -2.6478817462921143, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9205298013245032e-08, | |
| "logits/chosen": -2.4219555854797363, | |
| "logits/rejected": -2.4555513858795166, | |
| "logps/chosen": -96.6889419555664, | |
| "logps/rejected": -114.50843811035156, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.1261937618255615, | |
| "rewards/margins": 1.0691124200820923, | |
| "rewards/rejected": -2.1953060626983643, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.7549668874172186e-08, | |
| "logits/chosen": -2.3101606369018555, | |
| "logits/rejected": -2.3013217449188232, | |
| "logps/chosen": -95.89967346191406, | |
| "logps/rejected": -99.94120025634766, | |
| "loss": 0.4685, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.2933688163757324, | |
| "rewards/margins": 0.8693240880966187, | |
| "rewards/rejected": -2.1626930236816406, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5894039735099336e-08, | |
| "logits/chosen": -2.22920823097229, | |
| "logits/rejected": -2.2497153282165527, | |
| "logps/chosen": -83.50569152832031, | |
| "logps/rejected": -98.3634033203125, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0309844017028809, | |
| "rewards/margins": 1.4945679903030396, | |
| "rewards/rejected": -2.525552272796631, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.4238410596026489e-08, | |
| "logits/chosen": -2.220327854156494, | |
| "logits/rejected": -2.2442502975463867, | |
| "logps/chosen": -105.8703842163086, | |
| "logps/rejected": -126.78196716308594, | |
| "loss": 0.4796, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.513962984085083, | |
| "rewards/margins": 1.4240639209747314, | |
| "rewards/rejected": -2.9380269050598145, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.2582781456953642e-08, | |
| "logits/chosen": -2.417300224304199, | |
| "logits/rejected": -2.3726484775543213, | |
| "logps/chosen": -126.7840576171875, | |
| "logps/rejected": -133.47689819335938, | |
| "loss": 0.4275, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0579255819320679, | |
| "rewards/margins": 1.5306205749511719, | |
| "rewards/rejected": -2.58854603767395, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0927152317880794e-08, | |
| "logits/chosen": -2.4346401691436768, | |
| "logits/rejected": -2.4542853832244873, | |
| "logps/chosen": -119.21122741699219, | |
| "logps/rejected": -128.86886596679688, | |
| "loss": 0.4999, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.2019822597503662, | |
| "rewards/margins": 1.384701132774353, | |
| "rewards/rejected": -2.5866830348968506, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.271523178807947e-09, | |
| "logits/chosen": -2.4030935764312744, | |
| "logits/rejected": -2.3885276317596436, | |
| "logps/chosen": -111.55142974853516, | |
| "logps/rejected": -113.03800964355469, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -1.222048282623291, | |
| "rewards/margins": 0.9595780372619629, | |
| "rewards/rejected": -2.181626558303833, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 7.6158940397351e-09, | |
| "logits/chosen": -2.2258238792419434, | |
| "logits/rejected": -2.1862361431121826, | |
| "logps/chosen": -92.22099304199219, | |
| "logps/rejected": -98.86279296875, | |
| "loss": 0.5937, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.713568925857544, | |
| "rewards/margins": 0.8357810974121094, | |
| "rewards/rejected": -2.5493500232696533, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.960264900662252e-09, | |
| "logits/chosen": -2.317258358001709, | |
| "logits/rejected": -2.3031933307647705, | |
| "logps/chosen": -109.45621490478516, | |
| "logps/rejected": -111.22418212890625, | |
| "loss": 0.8281, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2969977855682373, | |
| "rewards/margins": 1.3577762842178345, | |
| "rewards/rejected": -2.6547741889953613, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.3046357615894034e-09, | |
| "logits/chosen": -2.2622385025024414, | |
| "logits/rejected": -2.2199172973632812, | |
| "logps/chosen": -98.4054946899414, | |
| "logps/rejected": -112.76808166503906, | |
| "loss": 0.4438, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9782658815383911, | |
| "rewards/margins": 1.7501733303070068, | |
| "rewards/rejected": -2.7284390926361084, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.6490066225165564e-09, | |
| "logits/chosen": -2.3729500770568848, | |
| "logits/rejected": -2.432080030441284, | |
| "logps/chosen": -101.60713195800781, | |
| "logps/rejected": -131.0595703125, | |
| "loss": 0.5899, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8371769189834595, | |
| "rewards/margins": 0.8700039982795715, | |
| "rewards/rejected": -1.7071807384490967, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.933774834437085e-10, | |
| "logits/chosen": -2.2028284072875977, | |
| "logits/rejected": -2.2098453044891357, | |
| "logps/chosen": -109.49913024902344, | |
| "logps/rejected": -121.43013763427734, | |
| "loss": 0.4436, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.7948501110076904, | |
| "rewards/margins": 2.1088409423828125, | |
| "rewards/rejected": -2.903691291809082, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3356, | |
| "total_flos": 0.0, | |
| "train_loss": 0.58384587518933, | |
| "train_runtime": 30698.0699, | |
| "train_samples_per_second": 1.749, | |
| "train_steps_per_second": 0.109 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3356, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |