| { | |
| "epoch": 1.0, | |
| "eval_kl/ref_to_policy/chosen": -17.542104721069336, | |
| "eval_kl/ref_to_policy/mean": 39.619388580322266, | |
| "eval_kl/ref_to_policy/rejected": 96.7808837890625, | |
| "eval_logits/chosen": -1.413936734199524, | |
| "eval_logits/rejected": -1.7039226293563843, | |
| "eval_logps/chosen": -1506.69384765625, | |
| "eval_logps/rejected": -1620.5035400390625, | |
| "eval_loss": 4.123634338378906, | |
| "eval_nll_loss": 0.9376209378242493, | |
| "eval_rewards/accuracies": 0.9414893388748169, | |
| "eval_rewards/chosen": 0.17542102932929993, | |
| "eval_rewards/margins": 1.1432298421859741, | |
| "eval_rewards/rejected": -0.967808723449707, | |
| "eval_runtime": 112.3415, | |
| "eval_samples_per_second": 3.347, | |
| "eval_steps_per_second": 1.673 | |
| } |