| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9985553308292401, | |
| "eval_steps": 100, | |
| "global_step": 432, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.023114706732158336, | |
| "grad_norm": 68.13874053955078, | |
| "learning_rate": 2.2727272727272726e-07, | |
| "logits/chosen": -0.33626726269721985, | |
| "logits/rejected": -0.31605297327041626, | |
| "logps/chosen": -269.3142395019531, | |
| "logps/rejected": -267.5635681152344, | |
| "loss": 2.9227, | |
| "nll_loss": 1.0585803985595703, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -26.93142318725586, | |
| "rewards/margins": -0.17506682872772217, | |
| "rewards/rejected": -26.756357192993164, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04622941346431667, | |
| "grad_norm": 57.248863220214844, | |
| "learning_rate": 4.545454545454545e-07, | |
| "logits/chosen": -0.3509574234485626, | |
| "logits/rejected": -0.3329581320285797, | |
| "logps/chosen": -260.00225830078125, | |
| "logps/rejected": -266.528076171875, | |
| "loss": 2.8595, | |
| "nll_loss": 0.9751935005187988, | |
| "rewards/accuracies": 0.565625011920929, | |
| "rewards/chosen": -26.000228881835938, | |
| "rewards/margins": 0.6525786519050598, | |
| "rewards/rejected": -26.652807235717773, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06934412019647501, | |
| "grad_norm": 57.46038055419922, | |
| "learning_rate": 6.818181818181817e-07, | |
| "logits/chosen": -0.38174495100975037, | |
| "logits/rejected": -0.3690889775753021, | |
| "logps/chosen": -243.15576171875, | |
| "logps/rejected": -246.4366455078125, | |
| "loss": 2.6573, | |
| "nll_loss": 1.0130800008773804, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -24.315576553344727, | |
| "rewards/margins": 0.32808613777160645, | |
| "rewards/rejected": -24.64366340637207, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09245882692863334, | |
| "grad_norm": 51.81425476074219, | |
| "learning_rate": 9.09090909090909e-07, | |
| "logits/chosen": -0.6905701756477356, | |
| "logits/rejected": -0.673626184463501, | |
| "logps/chosen": -202.0510711669922, | |
| "logps/rejected": -203.85264587402344, | |
| "loss": 2.3834, | |
| "nll_loss": 0.8630102872848511, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -20.20510482788086, | |
| "rewards/margins": 0.18015719950199127, | |
| "rewards/rejected": -20.385265350341797, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11557353366079168, | |
| "grad_norm": 46.2398796081543, | |
| "learning_rate": 9.845360824742267e-07, | |
| "logits/chosen": -0.8133252263069153, | |
| "logits/rejected": -0.7886686325073242, | |
| "logps/chosen": -176.6295623779297, | |
| "logps/rejected": -175.64236450195312, | |
| "loss": 2.1663, | |
| "nll_loss": 0.46288958191871643, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -17.662960052490234, | |
| "rewards/margins": -0.09872126579284668, | |
| "rewards/rejected": -17.56423568725586, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13868824039295002, | |
| "grad_norm": 55.56767272949219, | |
| "learning_rate": 9.587628865979382e-07, | |
| "logits/chosen": -0.6391716003417969, | |
| "logits/rejected": -0.6422410607337952, | |
| "logps/chosen": -158.78402709960938, | |
| "logps/rejected": -159.15992736816406, | |
| "loss": 1.9369, | |
| "nll_loss": 0.4064036011695862, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -15.878405570983887, | |
| "rewards/margins": 0.037588153034448624, | |
| "rewards/rejected": -15.915992736816406, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16180294712510834, | |
| "grad_norm": 53.74827575683594, | |
| "learning_rate": 9.329896907216495e-07, | |
| "logits/chosen": -0.4799535274505615, | |
| "logits/rejected": -0.45562925934791565, | |
| "logps/chosen": -153.95602416992188, | |
| "logps/rejected": -156.0488739013672, | |
| "loss": 1.8829, | |
| "nll_loss": 0.33092719316482544, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -15.395601272583008, | |
| "rewards/margins": 0.20928561687469482, | |
| "rewards/rejected": -15.604887008666992, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1849176538572667, | |
| "grad_norm": 45.28865432739258, | |
| "learning_rate": 9.072164948453608e-07, | |
| "logits/chosen": -0.39702308177948, | |
| "logits/rejected": -0.3713148832321167, | |
| "logps/chosen": -158.48983764648438, | |
| "logps/rejected": -161.58985900878906, | |
| "loss": 1.7248, | |
| "nll_loss": 0.2892971634864807, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -15.848983764648438, | |
| "rewards/margins": 0.31000271439552307, | |
| "rewards/rejected": -16.158987045288086, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.208032360589425, | |
| "grad_norm": 47.74916076660156, | |
| "learning_rate": 8.814432989690721e-07, | |
| "logits/chosen": -0.39265576004981995, | |
| "logits/rejected": -0.3789977431297302, | |
| "logps/chosen": -151.72657775878906, | |
| "logps/rejected": -159.8419189453125, | |
| "loss": 1.625, | |
| "nll_loss": 0.24877457320690155, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -15.17265796661377, | |
| "rewards/margins": 0.8115337491035461, | |
| "rewards/rejected": -15.98419189453125, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23114706732158335, | |
| "grad_norm": 46.02494430541992, | |
| "learning_rate": 8.556701030927834e-07, | |
| "logits/chosen": -0.3637830317020416, | |
| "logits/rejected": -0.34602683782577515, | |
| "logps/chosen": -156.66448974609375, | |
| "logps/rejected": -159.7339630126953, | |
| "loss": 1.7848, | |
| "nll_loss": 0.3014821708202362, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -15.666448593139648, | |
| "rewards/margins": 0.306946337223053, | |
| "rewards/rejected": -15.973396301269531, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23114706732158335, | |
| "eval_logits/chosen": -0.3793767988681793, | |
| "eval_logits/rejected": -0.35158297419548035, | |
| "eval_logps/chosen": -153.7521209716797, | |
| "eval_logps/rejected": -157.6624755859375, | |
| "eval_loss": 1.6452385187149048, | |
| "eval_nll_loss": 0.27188077569007874, | |
| "eval_rewards/accuracies": 0.5804347991943359, | |
| "eval_rewards/chosen": -15.375213623046875, | |
| "eval_rewards/margins": 0.39103466272354126, | |
| "eval_rewards/rejected": -15.766247749328613, | |
| "eval_runtime": 77.4102, | |
| "eval_samples_per_second": 23.589, | |
| "eval_steps_per_second": 1.486, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2542617740537417, | |
| "grad_norm": 47.49018096923828, | |
| "learning_rate": 8.298969072164948e-07, | |
| "logits/chosen": -0.35000625252723694, | |
| "logits/rejected": -0.31752458214759827, | |
| "logps/chosen": -150.97109985351562, | |
| "logps/rejected": -155.34848022460938, | |
| "loss": 1.5701, | |
| "nll_loss": 0.26048144698143005, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -15.0971097946167, | |
| "rewards/margins": 0.4377376139163971, | |
| "rewards/rejected": -15.534846305847168, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.27737648078590005, | |
| "grad_norm": 45.2691535949707, | |
| "learning_rate": 8.041237113402062e-07, | |
| "logits/chosen": -0.3519677221775055, | |
| "logits/rejected": -0.32791006565093994, | |
| "logps/chosen": -157.8858642578125, | |
| "logps/rejected": -159.35427856445312, | |
| "loss": 1.6168, | |
| "nll_loss": 0.3137766718864441, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -15.788586616516113, | |
| "rewards/margins": 0.14684121310710907, | |
| "rewards/rejected": -15.93542766571045, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.30049118751805837, | |
| "grad_norm": 57.24121856689453, | |
| "learning_rate": 7.783505154639175e-07, | |
| "logits/chosen": -0.42790165543556213, | |
| "logits/rejected": -0.40469226241111755, | |
| "logps/chosen": -157.43922424316406, | |
| "logps/rejected": -165.82485961914062, | |
| "loss": 1.6817, | |
| "nll_loss": 0.3016583323478699, | |
| "rewards/accuracies": 0.590624988079071, | |
| "rewards/chosen": -15.743922233581543, | |
| "rewards/margins": 0.8385635614395142, | |
| "rewards/rejected": -16.58248519897461, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3236058942502167, | |
| "grad_norm": 58.36777114868164, | |
| "learning_rate": 7.525773195876288e-07, | |
| "logits/chosen": -0.5298252105712891, | |
| "logits/rejected": -0.5220087170600891, | |
| "logps/chosen": -148.60704040527344, | |
| "logps/rejected": -154.1773681640625, | |
| "loss": 1.6595, | |
| "nll_loss": 0.2847565710544586, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -14.86070442199707, | |
| "rewards/margins": 0.5570319294929504, | |
| "rewards/rejected": -15.41773796081543, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.34672060098237506, | |
| "grad_norm": 68.2182846069336, | |
| "learning_rate": 7.268041237113402e-07, | |
| "logits/chosen": -0.5616439580917358, | |
| "logits/rejected": -0.5490089654922485, | |
| "logps/chosen": -149.12344360351562, | |
| "logps/rejected": -154.98716735839844, | |
| "loss": 1.7257, | |
| "nll_loss": 0.2902756631374359, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -14.912343978881836, | |
| "rewards/margins": 0.5863727331161499, | |
| "rewards/rejected": -15.49871826171875, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3698353077145334, | |
| "grad_norm": 39.74797821044922, | |
| "learning_rate": 7.010309278350515e-07, | |
| "logits/chosen": -0.5092633962631226, | |
| "logits/rejected": -0.480968177318573, | |
| "logps/chosen": -163.02232360839844, | |
| "logps/rejected": -166.95147705078125, | |
| "loss": 1.5552, | |
| "nll_loss": 0.28835493326187134, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -16.302234649658203, | |
| "rewards/margins": 0.39291518926620483, | |
| "rewards/rejected": -16.695148468017578, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3929500144466917, | |
| "grad_norm": 47.49101257324219, | |
| "learning_rate": 6.752577319587629e-07, | |
| "logits/chosen": -0.3924413323402405, | |
| "logits/rejected": -0.3796409070491791, | |
| "logps/chosen": -158.3751220703125, | |
| "logps/rejected": -165.7510223388672, | |
| "loss": 1.4469, | |
| "nll_loss": 0.2873372733592987, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -15.837512016296387, | |
| "rewards/margins": 0.7375894784927368, | |
| "rewards/rejected": -16.575103759765625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.41606472117885, | |
| "grad_norm": 47.435874938964844, | |
| "learning_rate": 6.494845360824742e-07, | |
| "logits/chosen": -0.47805255651474, | |
| "logits/rejected": -0.4726547598838806, | |
| "logps/chosen": -152.86453247070312, | |
| "logps/rejected": -159.31887817382812, | |
| "loss": 1.4799, | |
| "nll_loss": 0.2764199376106262, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -15.286453247070312, | |
| "rewards/margins": 0.6454333066940308, | |
| "rewards/rejected": -15.93188762664795, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4391794279110084, | |
| "grad_norm": 53.53675842285156, | |
| "learning_rate": 6.237113402061855e-07, | |
| "logits/chosen": -0.5260998606681824, | |
| "logits/rejected": -0.5136414766311646, | |
| "logps/chosen": -162.34234619140625, | |
| "logps/rejected": -168.7532196044922, | |
| "loss": 1.5476, | |
| "nll_loss": 0.28763675689697266, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -16.234233856201172, | |
| "rewards/margins": 0.6410863995552063, | |
| "rewards/rejected": -16.875320434570312, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4622941346431667, | |
| "grad_norm": 45.457393646240234, | |
| "learning_rate": 5.979381443298969e-07, | |
| "logits/chosen": -0.4183273911476135, | |
| "logits/rejected": -0.4106271266937256, | |
| "logps/chosen": -158.37742614746094, | |
| "logps/rejected": -164.9311065673828, | |
| "loss": 1.5276, | |
| "nll_loss": 0.2878963351249695, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -15.837743759155273, | |
| "rewards/margins": 0.6553663015365601, | |
| "rewards/rejected": -16.49310874938965, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4622941346431667, | |
| "eval_logits/chosen": -0.4236670732498169, | |
| "eval_logits/rejected": -0.39834392070770264, | |
| "eval_logps/chosen": -158.0997314453125, | |
| "eval_logps/rejected": -164.43028259277344, | |
| "eval_loss": 1.5229449272155762, | |
| "eval_nll_loss": 0.27477577328681946, | |
| "eval_rewards/accuracies": 0.604347825050354, | |
| "eval_rewards/chosen": -15.809972763061523, | |
| "eval_rewards/margins": 0.6330567002296448, | |
| "eval_rewards/rejected": -16.443029403686523, | |
| "eval_runtime": 77.51, | |
| "eval_samples_per_second": 23.558, | |
| "eval_steps_per_second": 1.484, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.48540884137532503, | |
| "grad_norm": 51.9904899597168, | |
| "learning_rate": 5.721649484536082e-07, | |
| "logits/chosen": -0.412663996219635, | |
| "logits/rejected": -0.385576069355011, | |
| "logps/chosen": -155.5344696044922, | |
| "logps/rejected": -157.98362731933594, | |
| "loss": 1.5112, | |
| "nll_loss": 0.2793150544166565, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -15.553448677062988, | |
| "rewards/margins": 0.24491462111473083, | |
| "rewards/rejected": -15.798362731933594, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5085235481074833, | |
| "grad_norm": 49.318214416503906, | |
| "learning_rate": 5.463917525773195e-07, | |
| "logits/chosen": -0.41389769315719604, | |
| "logits/rejected": -0.38615840673446655, | |
| "logps/chosen": -168.82632446289062, | |
| "logps/rejected": -174.5264129638672, | |
| "loss": 1.5696, | |
| "nll_loss": 0.2951294779777527, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -16.882633209228516, | |
| "rewards/margins": 0.5700088143348694, | |
| "rewards/rejected": -17.4526424407959, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5316382548396418, | |
| "grad_norm": 47.010562896728516, | |
| "learning_rate": 5.20618556701031e-07, | |
| "logits/chosen": -0.3984927237033844, | |
| "logits/rejected": -0.3791876435279846, | |
| "logps/chosen": -166.37368774414062, | |
| "logps/rejected": -173.02516174316406, | |
| "loss": 1.583, | |
| "nll_loss": 0.29688653349876404, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -16.637371063232422, | |
| "rewards/margins": 0.6651442050933838, | |
| "rewards/rejected": -17.302515029907227, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5547529615718001, | |
| "grad_norm": 50.885963439941406, | |
| "learning_rate": 4.948453608247422e-07, | |
| "logits/chosen": -0.37319958209991455, | |
| "logits/rejected": -0.35784170031547546, | |
| "logps/chosen": -166.8050537109375, | |
| "logps/rejected": -172.14004516601562, | |
| "loss": 1.4338, | |
| "nll_loss": 0.28680044412612915, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": -16.68050765991211, | |
| "rewards/margins": 0.5334986448287964, | |
| "rewards/rejected": -17.214006423950195, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5778676683039584, | |
| "grad_norm": 60.3321418762207, | |
| "learning_rate": 4.6907216494845357e-07, | |
| "logits/chosen": -0.41152358055114746, | |
| "logits/rejected": -0.3965645730495453, | |
| "logps/chosen": -161.13088989257812, | |
| "logps/rejected": -164.7410888671875, | |
| "loss": 1.5649, | |
| "nll_loss": 0.25410208106040955, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": -16.113088607788086, | |
| "rewards/margins": 0.361021488904953, | |
| "rewards/rejected": -16.474109649658203, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6009823750361167, | |
| "grad_norm": 49.894596099853516, | |
| "learning_rate": 4.432989690721649e-07, | |
| "logits/chosen": -0.518215537071228, | |
| "logits/rejected": -0.504486083984375, | |
| "logps/chosen": -157.352294921875, | |
| "logps/rejected": -166.1837921142578, | |
| "loss": 1.4283, | |
| "nll_loss": 0.2800624370574951, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -15.7352294921875, | |
| "rewards/margins": 0.8831487894058228, | |
| "rewards/rejected": -16.618377685546875, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.624097081768275, | |
| "grad_norm": 44.237037658691406, | |
| "learning_rate": 4.175257731958763e-07, | |
| "logits/chosen": -0.4858783185482025, | |
| "logits/rejected": -0.47672492265701294, | |
| "logps/chosen": -158.05084228515625, | |
| "logps/rejected": -165.44720458984375, | |
| "loss": 1.4637, | |
| "nll_loss": 0.2714413106441498, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": -15.805084228515625, | |
| "rewards/margins": 0.739635169506073, | |
| "rewards/rejected": -16.544719696044922, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6472117885004334, | |
| "grad_norm": 52.68519973754883, | |
| "learning_rate": 3.917525773195876e-07, | |
| "logits/chosen": -0.4353243410587311, | |
| "logits/rejected": -0.4047181010246277, | |
| "logps/chosen": -162.55743408203125, | |
| "logps/rejected": -171.99114990234375, | |
| "loss": 1.4694, | |
| "nll_loss": 0.2985231876373291, | |
| "rewards/accuracies": 0.6468750238418579, | |
| "rewards/chosen": -16.2557430267334, | |
| "rewards/margins": 0.9433721303939819, | |
| "rewards/rejected": -17.199115753173828, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6703264952325917, | |
| "grad_norm": 54.75222396850586, | |
| "learning_rate": 3.659793814432989e-07, | |
| "logits/chosen": -0.4927333891391754, | |
| "logits/rejected": -0.46107035875320435, | |
| "logps/chosen": -165.6143035888672, | |
| "logps/rejected": -171.44073486328125, | |
| "loss": 1.4147, | |
| "nll_loss": 0.2945927381515503, | |
| "rewards/accuracies": 0.6031249761581421, | |
| "rewards/chosen": -16.561431884765625, | |
| "rewards/margins": 0.5826419591903687, | |
| "rewards/rejected": -17.144071578979492, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6934412019647501, | |
| "grad_norm": 44.112884521484375, | |
| "learning_rate": 3.402061855670103e-07, | |
| "logits/chosen": -0.5113806128501892, | |
| "logits/rejected": -0.5009027719497681, | |
| "logps/chosen": -168.73641967773438, | |
| "logps/rejected": -178.53773498535156, | |
| "loss": 1.4811, | |
| "nll_loss": 0.3009123206138611, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -16.873641967773438, | |
| "rewards/margins": 0.9801331758499146, | |
| "rewards/rejected": -17.853775024414062, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6934412019647501, | |
| "eval_logits/chosen": -0.4338991940021515, | |
| "eval_logits/rejected": -0.40692025423049927, | |
| "eval_logps/chosen": -160.7057342529297, | |
| "eval_logps/rejected": -168.00125122070312, | |
| "eval_loss": 1.463964819908142, | |
| "eval_nll_loss": 0.28037506341934204, | |
| "eval_rewards/accuracies": 0.613043487071991, | |
| "eval_rewards/chosen": -16.070573806762695, | |
| "eval_rewards/margins": 0.7295539975166321, | |
| "eval_rewards/rejected": -16.800127029418945, | |
| "eval_runtime": 77.4432, | |
| "eval_samples_per_second": 23.579, | |
| "eval_steps_per_second": 1.485, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7165559086969084, | |
| "grad_norm": 47.5919303894043, | |
| "learning_rate": 3.1443298969072163e-07, | |
| "logits/chosen": -0.47527360916137695, | |
| "logits/rejected": -0.4661695957183838, | |
| "logps/chosen": -170.21279907226562, | |
| "logps/rejected": -174.0437774658203, | |
| "loss": 1.5403, | |
| "nll_loss": 0.3031921982765198, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -17.021282196044922, | |
| "rewards/margins": 0.38309773802757263, | |
| "rewards/rejected": -17.404376983642578, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7396706154290668, | |
| "grad_norm": 45.896297454833984, | |
| "learning_rate": 2.8865979381443296e-07, | |
| "logits/chosen": -0.4675866961479187, | |
| "logits/rejected": -0.4545253813266754, | |
| "logps/chosen": -159.13848876953125, | |
| "logps/rejected": -168.03915405273438, | |
| "loss": 1.5256, | |
| "nll_loss": 0.28240206837654114, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": -15.913850784301758, | |
| "rewards/margins": 0.8900658488273621, | |
| "rewards/rejected": -16.80391502380371, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7627853221612251, | |
| "grad_norm": 46.80994415283203, | |
| "learning_rate": 2.6288659793814435e-07, | |
| "logits/chosen": -0.4704606533050537, | |
| "logits/rejected": -0.4714192748069763, | |
| "logps/chosen": -158.95199584960938, | |
| "logps/rejected": -165.59017944335938, | |
| "loss": 1.5045, | |
| "nll_loss": 0.2972811162471771, | |
| "rewards/accuracies": 0.6031249761581421, | |
| "rewards/chosen": -15.8951997756958, | |
| "rewards/margins": 0.6638190150260925, | |
| "rewards/rejected": -16.559019088745117, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7859000288933834, | |
| "grad_norm": 44.699893951416016, | |
| "learning_rate": 2.3711340206185566e-07, | |
| "logits/chosen": -0.47059255838394165, | |
| "logits/rejected": -0.45767131447792053, | |
| "logps/chosen": -162.30491638183594, | |
| "logps/rejected": -167.87808227539062, | |
| "loss": 1.3925, | |
| "nll_loss": 0.2751705050468445, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -16.230493545532227, | |
| "rewards/margins": 0.5573164820671082, | |
| "rewards/rejected": -16.787809371948242, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8090147356255417, | |
| "grad_norm": 46.135292053222656, | |
| "learning_rate": 2.11340206185567e-07, | |
| "logits/chosen": -0.4483928680419922, | |
| "logits/rejected": -0.4473996162414551, | |
| "logps/chosen": -163.79013061523438, | |
| "logps/rejected": -172.65423583984375, | |
| "loss": 1.4903, | |
| "nll_loss": 0.2803964912891388, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -16.379013061523438, | |
| "rewards/margins": 0.8864116668701172, | |
| "rewards/rejected": -17.265422821044922, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8321294423577, | |
| "grad_norm": 56.81486511230469, | |
| "learning_rate": 1.8556701030927835e-07, | |
| "logits/chosen": -0.4406563639640808, | |
| "logits/rejected": -0.4262049198150635, | |
| "logps/chosen": -168.64210510253906, | |
| "logps/rejected": -174.23231506347656, | |
| "loss": 1.535, | |
| "nll_loss": 0.27415817975997925, | |
| "rewards/accuracies": 0.590624988079071, | |
| "rewards/chosen": -16.86421012878418, | |
| "rewards/margins": 0.559019923210144, | |
| "rewards/rejected": -17.423233032226562, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8552441490898585, | |
| "grad_norm": 50.85494613647461, | |
| "learning_rate": 1.5979381443298966e-07, | |
| "logits/chosen": -0.4312410354614258, | |
| "logits/rejected": -0.4031241834163666, | |
| "logps/chosen": -162.82699584960938, | |
| "logps/rejected": -172.81155395507812, | |
| "loss": 1.4015, | |
| "nll_loss": 0.2656095623970032, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -16.282699584960938, | |
| "rewards/margins": 0.9984554052352905, | |
| "rewards/rejected": -17.28115463256836, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8783588558220168, | |
| "grad_norm": 54.44953918457031, | |
| "learning_rate": 1.3402061855670102e-07, | |
| "logits/chosen": -0.38503849506378174, | |
| "logits/rejected": -0.36510857939720154, | |
| "logps/chosen": -159.20938110351562, | |
| "logps/rejected": -166.0490264892578, | |
| "loss": 1.4196, | |
| "nll_loss": 0.2949269711971283, | |
| "rewards/accuracies": 0.6031249761581421, | |
| "rewards/chosen": -15.920938491821289, | |
| "rewards/margins": 0.6839638352394104, | |
| "rewards/rejected": -16.604902267456055, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9014735625541751, | |
| "grad_norm": 53.84355926513672, | |
| "learning_rate": 1.0824742268041237e-07, | |
| "logits/chosen": -0.4367571473121643, | |
| "logits/rejected": -0.41515684127807617, | |
| "logps/chosen": -162.70645141601562, | |
| "logps/rejected": -173.05177307128906, | |
| "loss": 1.4525, | |
| "nll_loss": 0.27491894364356995, | |
| "rewards/accuracies": 0.6156250238418579, | |
| "rewards/chosen": -16.27064323425293, | |
| "rewards/margins": 1.0345335006713867, | |
| "rewards/rejected": -17.30517578125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9245882692863334, | |
| "grad_norm": 51.0332145690918, | |
| "learning_rate": 8.24742268041237e-08, | |
| "logits/chosen": -0.4105447232723236, | |
| "logits/rejected": -0.38223332166671753, | |
| "logps/chosen": -167.57431030273438, | |
| "logps/rejected": -172.93923950195312, | |
| "loss": 1.4642, | |
| "nll_loss": 0.3052617907524109, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -16.757429122924805, | |
| "rewards/margins": 0.5364928841590881, | |
| "rewards/rejected": -17.293922424316406, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9245882692863334, | |
| "eval_logits/chosen": -0.38119494915008545, | |
| "eval_logits/rejected": -0.35089170932769775, | |
| "eval_logps/chosen": -161.57652282714844, | |
| "eval_logps/rejected": -169.120361328125, | |
| "eval_loss": 1.4428884983062744, | |
| "eval_nll_loss": 0.2844657897949219, | |
| "eval_rewards/accuracies": 0.6304348111152649, | |
| "eval_rewards/chosen": -16.157651901245117, | |
| "eval_rewards/margins": 0.7543851137161255, | |
| "eval_rewards/rejected": -16.912038803100586, | |
| "eval_runtime": 77.5027, | |
| "eval_samples_per_second": 23.56, | |
| "eval_steps_per_second": 1.484, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9477029760184917, | |
| "grad_norm": 46.96026611328125, | |
| "learning_rate": 5.670103092783505e-08, | |
| "logits/chosen": -0.3439410626888275, | |
| "logits/rejected": -0.3307141661643982, | |
| "logps/chosen": -154.14857482910156, | |
| "logps/rejected": -163.5908966064453, | |
| "loss": 1.3967, | |
| "nll_loss": 0.2838439345359802, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -15.4148588180542, | |
| "rewards/margins": 0.9442328214645386, | |
| "rewards/rejected": -16.35909080505371, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9708176827506501, | |
| "grad_norm": 44.309818267822266, | |
| "learning_rate": 3.092783505154639e-08, | |
| "logits/chosen": -0.3803669214248657, | |
| "logits/rejected": -0.3566874861717224, | |
| "logps/chosen": -156.36138916015625, | |
| "logps/rejected": -162.5567626953125, | |
| "loss": 1.4043, | |
| "nll_loss": 0.28173336386680603, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -15.636138916015625, | |
| "rewards/margins": 0.6195372939109802, | |
| "rewards/rejected": -16.25567626953125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9939323894828085, | |
| "grad_norm": 48.28641128540039, | |
| "learning_rate": 5.154639175257731e-09, | |
| "logits/chosen": -0.3961271345615387, | |
| "logits/rejected": -0.40863722562789917, | |
| "logps/chosen": -164.62960815429688, | |
| "logps/rejected": -171.9886474609375, | |
| "loss": 1.4854, | |
| "nll_loss": 0.2709375023841858, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -16.46295928955078, | |
| "rewards/margins": 0.7359048128128052, | |
| "rewards/rejected": -17.198863983154297, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9985553308292401, | |
| "step": 432, | |
| "total_flos": 0.0, | |
| "train_loss": 1.6653077276768509, | |
| "train_runtime": 9934.0195, | |
| "train_samples_per_second": 5.574, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 432, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |