| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 20, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 284.0, | |
| "epoch": 0.1, | |
| "grad_norm": 5.3337507247924805, | |
| "kl": 0.0, | |
| "learning_rate": 4.965903258506806e-07, | |
| "loss": -0.0, | |
| "reward": 1.9800817370414734, | |
| "reward_std": 1.3241489548236132, | |
| "rewards/concensus_correctness_reward_func": 0.27412500604987144, | |
| "rewards/consensus_reward_func": 0.25, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.5504254810512066, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.8430312536656857, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 305.09375, | |
| "epoch": 0.2, | |
| "grad_norm": 7.926000595092773, | |
| "kl": 0.0017703269804769661, | |
| "learning_rate": 4.698684378016222e-07, | |
| "loss": 0.0, | |
| "reward": 1.3028377667069435, | |
| "reward_std": 1.2722213035449386, | |
| "rewards/concensus_correctness_reward_func": 0.09031249955296516, | |
| "rewards/consensus_reward_func": 0.125, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.125, | |
| "rewards/question_recreation_reward_func": 0.5440564770251513, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.41846876963973045, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 324.53125, | |
| "epoch": 0.3, | |
| "grad_norm": 4.630513668060303, | |
| "kl": 0.0023597670333401766, | |
| "learning_rate": 4.193203929064353e-07, | |
| "loss": -0.0, | |
| "reward": 2.616505871526897, | |
| "reward_std": 1.836945830451441, | |
| "rewards/concensus_correctness_reward_func": 0.8931249994784594, | |
| "rewards/consensus_reward_func": 0.375, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.125, | |
| "rewards/question_recreation_reward_func": 0.7252558525651693, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.49812499433755875, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 300.03125, | |
| "epoch": 0.4, | |
| "grad_norm": 6.0448408126831055, | |
| "kl": 0.006831302540376782, | |
| "learning_rate": 3.5042385616324236e-07, | |
| "loss": 0.0, | |
| "reward": 2.330481383949518, | |
| "reward_std": 1.3933904968434945, | |
| "rewards/concensus_correctness_reward_func": 0.5753750018775463, | |
| "rewards/consensus_reward_func": 0.4375, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.1875, | |
| "rewards/question_recreation_reward_func": 0.5330438492819667, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.5970625001937151, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 289.375, | |
| "epoch": 0.5, | |
| "grad_norm": 5.541182041168213, | |
| "kl": 0.008215808833483607, | |
| "learning_rate": 2.706448363680831e-07, | |
| "loss": 0.0, | |
| "reward": 3.2710629031062126, | |
| "reward_std": 0.6699612002703361, | |
| "rewards/concensus_correctness_reward_func": 0.6778750009834766, | |
| "rewards/consensus_reward_func": 0.625, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.375, | |
| "rewards/question_recreation_reward_func": 0.7284065950661898, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.864781254902482, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 274.875, | |
| "epoch": 0.6, | |
| "grad_norm": 4.919522762298584, | |
| "kl": 0.009143324597971514, | |
| "learning_rate": 1.886286282148002e-07, | |
| "loss": 0.0, | |
| "reward": 1.9645782262086868, | |
| "reward_std": 0.8224145476706326, | |
| "rewards/concensus_correctness_reward_func": 0.23931250348687172, | |
| "rewards/consensus_reward_func": 0.3125, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.5546094560995698, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.7956562414765358, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 272.03125, | |
| "epoch": 0.7, | |
| "grad_norm": 9.62459945678711, | |
| "kl": 0.008210204628994688, | |
| "learning_rate": 1.1326296046939333e-07, | |
| "loss": 0.0, | |
| "reward": 2.2136378148570657, | |
| "reward_std": 1.3659028941765428, | |
| "rewards/concensus_correctness_reward_func": 0.5049999989569187, | |
| "rewards/consensus_reward_func": 0.4375, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.689387796446681, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.5192499980330467, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 245.8125, | |
| "epoch": 0.8, | |
| "grad_norm": 7.016496181488037, | |
| "kl": 0.012462856859201565, | |
| "learning_rate": 5.271487265090163e-08, | |
| "loss": 0.0, | |
| "reward": 3.3639015331864357, | |
| "reward_std": 2.017711062449962, | |
| "rewards/concensus_correctness_reward_func": 1.087625004351139, | |
| "rewards/consensus_reward_func": 0.4375, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.3125, | |
| "rewards/question_recreation_reward_func": 0.7259328681975603, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.8003437630832195, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 278.6875, | |
| "epoch": 0.9, | |
| "grad_norm": 9.100286483764648, | |
| "kl": 0.025102579093072563, | |
| "learning_rate": 1.3545689574841341e-08, | |
| "loss": 0.0, | |
| "reward": 2.6848340295255184, | |
| "reward_std": 2.140879629761912, | |
| "rewards/concensus_correctness_reward_func": 0.9294374994933605, | |
| "rewards/consensus_reward_func": 0.3125, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.5343964868225157, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.8460000082850456, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 278.28125, | |
| "epoch": 1.0, | |
| "grad_norm": 7.133706092834473, | |
| "kl": 0.010047315998235717, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 3.617217257618904, | |
| "reward_std": 1.1404688449110836, | |
| "rewards/concensus_correctness_reward_func": 0.9678124859929085, | |
| "rewards/consensus_reward_func": 0.625, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.4375, | |
| "rewards/question_recreation_reward_func": 0.6855921968817711, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.9013124946504831, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 20, | |
| "total_flos": 0.0, | |
| "train_loss": 7.92425125837326e-06, | |
| "train_runtime": 248.6923, | |
| "train_samples_per_second": 1.287, | |
| "train_steps_per_second": 0.08 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 20, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |