| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 249, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.060240963855421686, | |
| "grad_norm": 6.8344698068850365, | |
| "learning_rate": 6.25e-06, | |
| "logits/chosen": 0.20106494426727295, | |
| "logits/rejected": 0.19740548729896545, | |
| "logps/chosen": -7.359566688537598, | |
| "logps/rejected": -7.794281959533691, | |
| "loss": 0.7984, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.0005805142573080957, | |
| "rewards/margins": 0.00020884368859697133, | |
| "rewards/rejected": 0.0003716700884979218, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.12048192771084337, | |
| "grad_norm": 11.077793932918919, | |
| "learning_rate": 9.998300813454981e-06, | |
| "logits/chosen": 0.14798372983932495, | |
| "logits/rejected": 0.14764083921909332, | |
| "logps/chosen": -9.343093872070312, | |
| "logps/rejected": -10.072733879089355, | |
| "loss": 0.804, | |
| "rewards/accuracies": 0.6500000357627869, | |
| "rewards/chosen": -0.12536723911762238, | |
| "rewards/margins": 0.08952564746141434, | |
| "rewards/rejected": -0.21489287912845612, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.18072289156626506, | |
| "grad_norm": 22.64666641244041, | |
| "learning_rate": 9.979198225579968e-06, | |
| "logits/chosen": 0.09950681030750275, | |
| "logits/rejected": 0.10036235302686691, | |
| "logps/chosen": -15.521926879882812, | |
| "logps/rejected": -17.505037307739258, | |
| "loss": 0.9697, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.8507131338119507, | |
| "rewards/margins": 0.12971031665802002, | |
| "rewards/rejected": -0.9804234504699707, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.24096385542168675, | |
| "grad_norm": 15.494932567768581, | |
| "learning_rate": 9.938950460359912e-06, | |
| "logits/chosen": 0.19120505452156067, | |
| "logits/rejected": 0.19118723273277283, | |
| "logps/chosen": -16.326614379882812, | |
| "logps/rejected": -21.430246353149414, | |
| "loss": 0.9114, | |
| "rewards/accuracies": 0.5625000596046448, | |
| "rewards/chosen": -0.8851631879806519, | |
| "rewards/margins": 0.45752018690109253, | |
| "rewards/rejected": -1.3426833152770996, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.30120481927710846, | |
| "grad_norm": 7.705967374303585, | |
| "learning_rate": 9.877728438110645e-06, | |
| "logits/chosen": 0.2934933602809906, | |
| "logits/rejected": 0.2933184504508972, | |
| "logps/chosen": -16.846477508544922, | |
| "logps/rejected": -20.12131690979004, | |
| "loss": 0.9228, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.9210986495018005, | |
| "rewards/margins": 0.357336163520813, | |
| "rewards/rejected": -1.2784347534179688, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3614457831325301, | |
| "grad_norm": 6.557578354068986, | |
| "learning_rate": 9.795792150593739e-06, | |
| "logits/chosen": 0.39821481704711914, | |
| "logits/rejected": 0.3969055414199829, | |
| "logps/chosen": -16.84841537475586, | |
| "logps/rejected": -20.08253288269043, | |
| "loss": 0.9177, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.9185025691986084, | |
| "rewards/margins": 0.3549797832965851, | |
| "rewards/rejected": -1.273482322692871, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.42168674698795183, | |
| "grad_norm": 8.954398134887585, | |
| "learning_rate": 9.693489556908641e-06, | |
| "logits/chosen": 0.4319503605365753, | |
| "logits/rejected": 0.43394067883491516, | |
| "logps/chosen": -16.256404876708984, | |
| "logps/rejected": -19.0562686920166, | |
| "loss": 0.9048, | |
| "rewards/accuracies": 0.6187500357627869, | |
| "rewards/chosen": -0.8165277242660522, | |
| "rewards/margins": 0.362092524766922, | |
| "rewards/rejected": -1.1786202192306519, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.4819277108433735, | |
| "grad_norm": 5.470944347532639, | |
| "learning_rate": 9.571255105813632e-06, | |
| "logits/chosen": 0.4045424163341522, | |
| "logits/rejected": 0.40858352184295654, | |
| "logps/chosen": -13.871018409729004, | |
| "logps/rejected": -18.067955017089844, | |
| "loss": 0.8794, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.7279474139213562, | |
| "rewards/margins": 0.30724459886550903, | |
| "rewards/rejected": -1.0351920127868652, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5421686746987951, | |
| "grad_norm": 11.381498960351852, | |
| "learning_rate": 9.429607890750863e-06, | |
| "logits/chosen": 0.4048815369606018, | |
| "logits/rejected": 0.4054288864135742, | |
| "logps/chosen": -14.333499908447266, | |
| "logps/rejected": -17.505741119384766, | |
| "loss": 0.9043, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.6680309772491455, | |
| "rewards/margins": 0.30727389454841614, | |
| "rewards/rejected": -0.9753048419952393, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6024096385542169, | |
| "grad_norm": 7.883892084991124, | |
| "learning_rate": 9.269149445410545e-06, | |
| "logits/chosen": 0.4062591791152954, | |
| "logits/rejected": 0.4050535261631012, | |
| "logps/chosen": -12.784080505371094, | |
| "logps/rejected": -18.091772079467773, | |
| "loss": 0.822, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.6254960298538208, | |
| "rewards/margins": 0.4347843527793884, | |
| "rewards/rejected": -1.060280442237854, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6626506024096386, | |
| "grad_norm": 6.279250929917772, | |
| "learning_rate": 9.09056118919587e-06, | |
| "logits/chosen": 0.33784574270248413, | |
| "logits/rejected": 0.3434804677963257, | |
| "logps/chosen": -12.477110862731934, | |
| "logps/rejected": -20.124866485595703, | |
| "loss": 0.7625, | |
| "rewards/accuracies": 0.6375000476837158, | |
| "rewards/chosen": -0.6189417839050293, | |
| "rewards/margins": 0.6243987083435059, | |
| "rewards/rejected": -1.2433404922485352, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7228915662650602, | |
| "grad_norm": 8.046915696828538, | |
| "learning_rate": 8.894601533437e-06, | |
| "logits/chosen": 0.3401602506637573, | |
| "logits/rejected": 0.34562352299690247, | |
| "logps/chosen": -14.957855224609375, | |
| "logps/rejected": -19.155860900878906, | |
| "loss": 0.931, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.7567940950393677, | |
| "rewards/margins": 0.40305179357528687, | |
| "rewards/rejected": -1.1598458290100098, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7831325301204819, | |
| "grad_norm": 7.80904642662881, | |
| "learning_rate": 8.682102660643196e-06, | |
| "logits/chosen": 0.4684681296348572, | |
| "logits/rejected": 0.4649001955986023, | |
| "logps/chosen": -13.015806198120117, | |
| "logps/rejected": -16.524856567382812, | |
| "loss": 0.8736, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.6119887232780457, | |
| "rewards/margins": 0.3549143671989441, | |
| "rewards/rejected": -0.9669030904769897, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8433734939759037, | |
| "grad_norm": 5.157128287254778, | |
| "learning_rate": 8.453966990470656e-06, | |
| "logits/chosen": 0.5199801325798035, | |
| "logits/rejected": 0.5173701047897339, | |
| "logps/chosen": -14.613012313842773, | |
| "logps/rejected": -19.30719757080078, | |
| "loss": 0.8357, | |
| "rewards/accuracies": 0.6375000476837158, | |
| "rewards/chosen": -0.712582528591156, | |
| "rewards/margins": 0.5169816017150879, | |
| "rewards/rejected": -1.2295641899108887, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9036144578313253, | |
| "grad_norm": 7.350972551804275, | |
| "learning_rate": 8.211163347414005e-06, | |
| "logits/chosen": 0.5550334453582764, | |
| "logits/rejected": 0.5541264414787292, | |
| "logps/chosen": -14.093545913696289, | |
| "logps/rejected": -18.224023818969727, | |
| "loss": 0.8537, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7159973978996277, | |
| "rewards/margins": 0.4007648527622223, | |
| "rewards/rejected": -1.1167622804641724, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 6.599463284958903, | |
| "learning_rate": 7.95472284649615e-06, | |
| "logits/chosen": 0.5369991064071655, | |
| "logits/rejected": 0.5401480197906494, | |
| "logps/chosen": -14.460978507995605, | |
| "logps/rejected": -19.939844131469727, | |
| "loss": 0.89, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.7761189937591553, | |
| "rewards/margins": 0.39927852153778076, | |
| "rewards/rejected": -1.1753976345062256, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0240963855421688, | |
| "grad_norm": 16.015900049544836, | |
| "learning_rate": 7.685734514428767e-06, | |
| "logits/chosen": 0.42177876830101013, | |
| "logits/rejected": 0.42529550194740295, | |
| "logps/chosen": -14.51510238647461, | |
| "logps/rejected": -17.642459869384766, | |
| "loss": 0.815, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.6715702414512634, | |
| "rewards/margins": 0.41218772530555725, | |
| "rewards/rejected": -1.0837578773498535, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.0843373493975903, | |
| "grad_norm": 4.065830749870838, | |
| "learning_rate": 7.405340664838994e-06, | |
| "logits/chosen": 0.23701924085617065, | |
| "logits/rejected": 0.24219843745231628, | |
| "logps/chosen": -11.591026306152344, | |
| "logps/rejected": -18.730134963989258, | |
| "loss": 0.6683, | |
| "rewards/accuracies": 0.7437500357627869, | |
| "rewards/chosen": -0.41484612226486206, | |
| "rewards/margins": 0.7188076376914978, | |
| "rewards/rejected": -1.1336537599563599, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.144578313253012, | |
| "grad_norm": 6.737624906291123, | |
| "learning_rate": 7.114732047202433e-06, | |
| "logits/chosen": 0.06327857077121735, | |
| "logits/rejected": 0.06292692571878433, | |
| "logps/chosen": -11.42073917388916, | |
| "logps/rejected": -19.14717674255371, | |
| "loss": 0.6966, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.4310266673564911, | |
| "rewards/margins": 0.7305406332015991, | |
| "rewards/rejected": -1.1615674495697021, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2048192771084336, | |
| "grad_norm": 7.082800328690027, | |
| "learning_rate": 6.815142790083473e-06, | |
| "logits/chosen": -0.01345985010266304, | |
| "logits/rejected": -0.005416684318333864, | |
| "logps/chosen": -12.335067749023438, | |
| "logps/rejected": -20.66636848449707, | |
| "loss": 0.7393, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.5101935863494873, | |
| "rewards/margins": 0.8145975470542908, | |
| "rewards/rejected": -1.3247911930084229, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2650602409638554, | |
| "grad_norm": 5.036833862784185, | |
| "learning_rate": 6.507845160157476e-06, | |
| "logits/chosen": -0.03185756877064705, | |
| "logits/rejected": -0.026032937690615654, | |
| "logps/chosen": -12.31982421875, | |
| "logps/rejected": -19.825225830078125, | |
| "loss": 0.6988, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.5740019083023071, | |
| "rewards/margins": 0.696465790271759, | |
| "rewards/rejected": -1.270467758178711, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.3253012048192772, | |
| "grad_norm": 5.560017374017693, | |
| "learning_rate": 6.1941441592717564e-06, | |
| "logits/chosen": -0.028415212407708168, | |
| "logits/rejected": -0.03128795325756073, | |
| "logps/chosen": -12.783760070800781, | |
| "logps/rejected": -21.475841522216797, | |
| "loss": 0.7107, | |
| "rewards/accuracies": 0.6875000596046448, | |
| "rewards/chosen": -0.6131666302680969, | |
| "rewards/margins": 0.7542582750320435, | |
| "rewards/rejected": -1.3674249649047852, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3855421686746987, | |
| "grad_norm": 4.679243672883931, | |
| "learning_rate": 5.875371982489959e-06, | |
| "logits/chosen": -0.03955061361193657, | |
| "logits/rejected": -0.03935273364186287, | |
| "logps/chosen": -12.692768096923828, | |
| "logps/rejected": -20.340290069580078, | |
| "loss": 0.7095, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.5668330788612366, | |
| "rewards/margins": 0.7566565275192261, | |
| "rewards/rejected": -1.3234896659851074, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.4457831325301205, | |
| "grad_norm": 4.303668638983521, | |
| "learning_rate": 5.55288236065495e-06, | |
| "logits/chosen": -0.03517518565058708, | |
| "logits/rejected": -0.03254169970750809, | |
| "logps/chosen": -12.00464153289795, | |
| "logps/rejected": -20.043167114257812, | |
| "loss": 0.6784, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.45237845182418823, | |
| "rewards/margins": 0.8140364289283752, | |
| "rewards/rejected": -1.2664148807525635, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5060240963855422, | |
| "grad_norm": 9.89407814357214, | |
| "learning_rate": 5.228044811495632e-06, | |
| "logits/chosen": 0.011906255967915058, | |
| "logits/rejected": 0.012696187943220139, | |
| "logps/chosen": -11.616506576538086, | |
| "logps/rejected": -20.238014221191406, | |
| "loss": 0.7106, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.44120335578918457, | |
| "rewards/margins": 0.8242735266685486, | |
| "rewards/rejected": -1.2654768228530884, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.5662650602409638, | |
| "grad_norm": 4.549821099257562, | |
| "learning_rate": 4.9022388236915306e-06, | |
| "logits/chosen": 0.04570060223340988, | |
| "logits/rejected": 0.04913484305143356, | |
| "logps/chosen": -10.814414978027344, | |
| "logps/rejected": -20.540760040283203, | |
| "loss": 0.6321, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.38592851161956787, | |
| "rewards/margins": 0.9144089818000793, | |
| "rewards/rejected": -1.300337553024292, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.6265060240963856, | |
| "grad_norm": 6.159969465605162, | |
| "learning_rate": 4.57684799859372e-06, | |
| "logits/chosen": 0.051878318190574646, | |
| "logits/rejected": 0.049009501934051514, | |
| "logps/chosen": -10.995405197143555, | |
| "logps/rejected": -20.4885196685791, | |
| "loss": 0.6566, | |
| "rewards/accuracies": 0.7062500715255737, | |
| "rewards/chosen": -0.399277925491333, | |
| "rewards/margins": 0.9130607843399048, | |
| "rewards/rejected": -1.3123387098312378, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.6867469879518073, | |
| "grad_norm": 7.807608973663333, | |
| "learning_rate": 4.253254174480462e-06, | |
| "logits/chosen": 0.06293821334838867, | |
| "logits/rejected": 0.06775335967540741, | |
| "logps/chosen": -12.86220932006836, | |
| "logps/rejected": -24.761363983154297, | |
| "loss": 0.6637, | |
| "rewards/accuracies": 0.7437500357627869, | |
| "rewards/chosen": -0.5232489705085754, | |
| "rewards/margins": 1.1066060066223145, | |
| "rewards/rejected": -1.6298549175262451, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7469879518072289, | |
| "grad_norm": 6.450758798029886, | |
| "learning_rate": 3.932831558300074e-06, | |
| "logits/chosen": 0.03402797505259514, | |
| "logits/rejected": 0.03250021114945412, | |
| "logps/chosen": -12.925631523132324, | |
| "logps/rejected": -19.254789352416992, | |
| "loss": 0.7336, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.49226850271224976, | |
| "rewards/margins": 0.6960192322731018, | |
| "rewards/rejected": -1.1882877349853516, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.8072289156626506, | |
| "grad_norm": 4.3607756550439625, | |
| "learning_rate": 3.6169408898217973e-06, | |
| "logits/chosen": 0.039087243378162384, | |
| "logits/rejected": 0.04352856054902077, | |
| "logps/chosen": -12.372940063476562, | |
| "logps/rejected": -20.964872360229492, | |
| "loss": 0.7091, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.5094107389450073, | |
| "rewards/margins": 0.7860168218612671, | |
| "rewards/rejected": -1.295427680015564, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8674698795180724, | |
| "grad_norm": 4.665085160330557, | |
| "learning_rate": 3.306923662977789e-06, | |
| "logits/chosen": 0.06090332567691803, | |
| "logits/rejected": 0.06559460610151291, | |
| "logps/chosen": -12.070850372314453, | |
| "logps/rejected": -21.903215408325195, | |
| "loss": 0.6518, | |
| "rewards/accuracies": 0.7062500715255737, | |
| "rewards/chosen": -0.46340861916542053, | |
| "rewards/margins": 1.006162405014038, | |
| "rewards/rejected": -1.4695709943771362, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.927710843373494, | |
| "grad_norm": 6.373734799601269, | |
| "learning_rate": 3.0040964289364618e-06, | |
| "logits/chosen": 0.10437053442001343, | |
| "logits/rejected": 0.09873737394809723, | |
| "logps/chosen": -11.04707145690918, | |
| "logps/rejected": -20.31425666809082, | |
| "loss": 0.6583, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.3955325484275818, | |
| "rewards/margins": 0.8207127451896667, | |
| "rewards/rejected": -1.2162452936172485, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9879518072289155, | |
| "grad_norm": 6.76579535839464, | |
| "learning_rate": 2.7097452051003375e-06, | |
| "logits/chosen": 0.12389400601387024, | |
| "logits/rejected": 0.12251937389373779, | |
| "logps/chosen": -10.690954208374023, | |
| "logps/rejected": -17.01201057434082, | |
| "loss": 0.7035, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3639286160469055, | |
| "rewards/margins": 0.6530975103378296, | |
| "rewards/rejected": -1.0170261859893799, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.0481927710843375, | |
| "grad_norm": 3.0286043203118043, | |
| "learning_rate": 2.4251200137717545e-06, | |
| "logits/chosen": 0.12429633736610413, | |
| "logits/rejected": 0.13369517028331757, | |
| "logps/chosen": -9.702492713928223, | |
| "logps/rejected": -18.70101547241211, | |
| "loss": 0.5816, | |
| "rewards/accuracies": 0.7687499523162842, | |
| "rewards/chosen": -0.19249771535396576, | |
| "rewards/margins": 0.9518150091171265, | |
| "rewards/rejected": -1.144312858581543, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.108433734939759, | |
| "grad_norm": 3.4650296160280996, | |
| "learning_rate": 2.151429573679084e-06, | |
| "logits/chosen": 0.09276320040225983, | |
| "logits/rejected": 0.08381576836109161, | |
| "logps/chosen": -7.954525470733643, | |
| "logps/rejected": -18.878183364868164, | |
| "loss": 0.5543, | |
| "rewards/accuracies": 0.7937500476837158, | |
| "rewards/chosen": -0.13666492700576782, | |
| "rewards/margins": 1.0228486061096191, | |
| "rewards/rejected": -1.1595135927200317, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.1686746987951806, | |
| "grad_norm": 4.009672492571936, | |
| "learning_rate": 1.8898361669069497e-06, | |
| "logits/chosen": 0.012567025609314442, | |
| "logits/rejected": 0.013043450191617012, | |
| "logps/chosen": -8.962043762207031, | |
| "logps/rejected": -20.484146118164062, | |
| "loss": 0.5452, | |
| "rewards/accuracies": 0.7937500476837158, | |
| "rewards/chosen": -0.16434553265571594, | |
| "rewards/margins": 1.1564074754714966, | |
| "rewards/rejected": -1.3207528591156006, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.2289156626506026, | |
| "grad_norm": 4.442304788111209, | |
| "learning_rate": 1.6414507030291249e-06, | |
| "logits/chosen": -0.06221061572432518, | |
| "logits/rejected": -0.06934291869401932, | |
| "logps/chosen": -9.080927848815918, | |
| "logps/rejected": -23.444576263427734, | |
| "loss": 0.5333, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -0.18415439128875732, | |
| "rewards/margins": 1.3432217836380005, | |
| "rewards/rejected": -1.5273760557174683, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.289156626506024, | |
| "grad_norm": 4.2271531800043585, | |
| "learning_rate": 1.4073280014052077e-06, | |
| "logits/chosen": -0.14234301447868347, | |
| "logits/rejected": -0.13763678073883057, | |
| "logps/chosen": -10.012588500976562, | |
| "logps/rejected": -22.59408187866211, | |
| "loss": 0.5441, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.22738967835903168, | |
| "rewards/margins": 1.3349870443344116, | |
| "rewards/rejected": -1.562376618385315, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.3493975903614457, | |
| "grad_norm": 3.8404210034307105, | |
| "learning_rate": 1.1884623116758121e-06, | |
| "logits/chosen": -0.17886070907115936, | |
| "logits/rejected": -0.17571109533309937, | |
| "logps/chosen": -9.892772674560547, | |
| "logps/rejected": -26.796541213989258, | |
| "loss": 0.4638, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": -0.21987608075141907, | |
| "rewards/margins": 1.6867713928222656, | |
| "rewards/rejected": -1.9066474437713623, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.4096385542168672, | |
| "grad_norm": 4.953359294501781, | |
| "learning_rate": 9.857830914793827e-07, | |
| "logits/chosen": -0.20311155915260315, | |
| "logits/rejected": -0.19799628853797913, | |
| "logps/chosen": -10.871113777160645, | |
| "logps/rejected": -29.490276336669922, | |
| "loss": 0.4925, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.3226281404495239, | |
| "rewards/margins": 1.7889926433563232, | |
| "rewards/rejected": -2.1116209030151367, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.4698795180722892, | |
| "grad_norm": 4.64517618032467, | |
| "learning_rate": 8.001510593213946e-07, | |
| "logits/chosen": -0.22599855065345764, | |
| "logits/rejected": -0.22137105464935303, | |
| "logps/chosen": -8.583234786987305, | |
| "logps/rejected": -25.918903350830078, | |
| "loss": 0.4905, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": -0.14613361656665802, | |
| "rewards/margins": 1.7042248249053955, | |
| "rewards/rejected": -1.850358247756958, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.5301204819277108, | |
| "grad_norm": 3.152112838442371, | |
| "learning_rate": 6.323545393582847e-07, | |
| "logits/chosen": -0.22711718082427979, | |
| "logits/rejected": -0.2298433780670166, | |
| "logps/chosen": -9.491654396057129, | |
| "logps/rejected": -22.80984115600586, | |
| "loss": 0.5301, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.1725078970193863, | |
| "rewards/margins": 1.4063202142715454, | |
| "rewards/rejected": -1.57882821559906, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.5903614457831328, | |
| "grad_norm": 4.4630924013044115, | |
| "learning_rate": 4.831061136186787e-07, | |
| "logits/chosen": -0.23690445721149445, | |
| "logits/rejected": -0.23807355761528015, | |
| "logps/chosen": -8.89923095703125, | |
| "logps/rejected": -23.76407814025879, | |
| "loss": 0.5269, | |
| "rewards/accuracies": 0.8250000476837158, | |
| "rewards/chosen": -0.21625632047653198, | |
| "rewards/margins": 1.421187400817871, | |
| "rewards/rejected": -1.6374436616897583, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.6506024096385543, | |
| "grad_norm": 5.625801336718983, | |
| "learning_rate": 3.53039595878959e-07, | |
| "logits/chosen": -0.24186240136623383, | |
| "logits/rejected": -0.24021676182746887, | |
| "logps/chosen": -10.780243873596191, | |
| "logps/rejected": -24.856246948242188, | |
| "loss": 0.6027, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.37535130977630615, | |
| "rewards/margins": 1.3886429071426392, | |
| "rewards/rejected": -1.7639942169189453, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.710843373493976, | |
| "grad_norm": 5.374747811868763, | |
| "learning_rate": 2.4270734004424643e-07, | |
| "logits/chosen": -0.24513426423072815, | |
| "logits/rejected": -0.24106302857398987, | |
| "logps/chosen": -9.546283721923828, | |
| "logps/rejected": -26.042221069335938, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 0.8625000715255737, | |
| "rewards/chosen": -0.1909545361995697, | |
| "rewards/margins": 1.6568984985351562, | |
| "rewards/rejected": -1.8478529453277588, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.7710843373493974, | |
| "grad_norm": 4.71023027186897, | |
| "learning_rate": 1.5257789446526172e-07, | |
| "logits/chosen": -0.24886594712734222, | |
| "logits/rejected": -0.24283945560455322, | |
| "logps/chosen": -9.447517395019531, | |
| "logps/rejected": -25.348175048828125, | |
| "loss": 0.521, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.2334991991519928, | |
| "rewards/margins": 1.5295912027359009, | |
| "rewards/rejected": -1.7630903720855713, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.8313253012048194, | |
| "grad_norm": 3.618285262250259, | |
| "learning_rate": 8.303401215251583e-08, | |
| "logits/chosen": -0.23923178017139435, | |
| "logits/rejected": -0.2397070974111557, | |
| "logps/chosen": -9.20031452178955, | |
| "logps/rejected": -28.882890701293945, | |
| "loss": 0.4402, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.24343208968639374, | |
| "rewards/margins": 1.849422812461853, | |
| "rewards/rejected": -2.0928549766540527, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.891566265060241, | |
| "grad_norm": 4.639163710743134, | |
| "learning_rate": 3.437102533785541e-08, | |
| "logits/chosen": -0.24758636951446533, | |
| "logits/rejected": -0.2467891424894333, | |
| "logps/chosen": -7.994501113891602, | |
| "logps/rejected": -24.78835678100586, | |
| "loss": 0.4808, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.12487827241420746, | |
| "rewards/margins": 1.5749868154525757, | |
| "rewards/rejected": -1.6998651027679443, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.9518072289156625, | |
| "grad_norm": 3.9125816690429254, | |
| "learning_rate": 6.7955912861095155e-09, | |
| "logits/chosen": -0.2559281587600708, | |
| "logits/rejected": -0.253335177898407, | |
| "logps/chosen": -8.652589797973633, | |
| "logps/rejected": -21.75301170349121, | |
| "loss": 0.4927, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -0.1405271738767624, | |
| "rewards/margins": 1.3281803131103516, | |
| "rewards/rejected": -1.46870756149292, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 249, | |
| "total_flos": 11469721436160.0, | |
| "train_loss": 0.693384567896525, | |
| "train_runtime": 1185.9878, | |
| "train_samples_per_second": 6.716, | |
| "train_steps_per_second": 0.21 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 249, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 11469721436160.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |