{ "epoch": 1.0, "eval_logits/chosen": -3.7611606121063232, "eval_logits/rejected": -5.544642925262451, "eval_logps/chosen": -89.77777862548828, "eval_logps/rejected": -263.28570556640625, "eval_loss": 0.39369189739227295, "eval_rewards/accuracies": 0.8066716194152832, "eval_rewards/chosen": -0.4649677574634552, "eval_rewards/margins": 1.7144097089767456, "eval_rewards/rejected": -2.180307626724243, "eval_runtime": 169.4341, "eval_samples_per_second": 47.216, "eval_steps_per_second": 0.372 }