File size: 2,885 Bytes
d658edc 93df967 d658edc 93df967 d658edc 93df967 d658edc 3975322 93df967 d658edc 93df967 1285f6b 7ea5372 93df967 d658edc 93df967 8ad1e12 93df967 5c0c1b8 93df967 d658edc 93df967 d658edc 38a2206 93df967 d658edc 93df967 d658edc 93df967 d658edc 93df967 d658edc 93df967 cab08e8 d658edc 93df967 d658edc 93df967 d658edc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.5,
"eval_steps": 500,
"global_step": 5,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 140.125,
"epoch": 1.0,
"grad_norm": 25.24424934387207,
"kl": 0.0,
"learning_rate": 5e-07,
"loss": 0.0,
"reward": 0.052890727296471596,
"reward_std": 0.017691312765236944,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.052890727296471596,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 2
},
{
"completion_length": 86.625,
"epoch": 2.0,
"grad_norm": 9.793798446655273,
"kl": 0.011194768259883858,
"learning_rate": 2.5e-07,
"loss": 0.0,
"reward": 0.10395016614347696,
"reward_std": 0.012995281023904681,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.10395016614347696,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 4
},
{
"completion_length": 104.0,
"epoch": 2.5,
"kl": 0.005378011410357431,
"reward": 0.01932604657486081,
"reward_std": 0.00301999697512656,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.01932604657486081,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 5,
"total_flos": 0.0,
"train_loss": 5.6818393204594034e-06,
"train_runtime": 3692.0926,
"train_samples_per_second": 0.005,
"train_steps_per_second": 0.001
}
],
"logging_steps": 2,
"max_steps": 5,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|