File size: 2,885 Bytes
d658edc
 
 
 
93df967
d658edc
93df967
d658edc
 
 
 
 
93df967
 
 
d658edc
 
3975322
93df967
 
 
 
d658edc
93df967
 
1285f6b
7ea5372
93df967
 
d658edc
 
93df967
8ad1e12
93df967
 
 
5c0c1b8
93df967
 
 
 
d658edc
93df967
 
d658edc
38a2206
93df967
 
d658edc
 
93df967
 
 
 
 
 
 
d658edc
93df967
 
d658edc
 
93df967
 
d658edc
93df967
 
 
cab08e8
d658edc
 
 
93df967
d658edc
93df967
 
d658edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.5,
  "eval_steps": 500,
  "global_step": 5,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "completion_length": 140.125,
      "epoch": 1.0,
      "grad_norm": 25.24424934387207,
      "kl": 0.0,
      "learning_rate": 5e-07,
      "loss": 0.0,
      "reward": 0.052890727296471596,
      "reward_std": 0.017691312765236944,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 0.0,
      "rewards/question_recreation_reward_func": 0.052890727296471596,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.0,
      "step": 2
    },
    {
      "completion_length": 86.625,
      "epoch": 2.0,
      "grad_norm": 9.793798446655273,
      "kl": 0.011194768259883858,
      "learning_rate": 2.5e-07,
      "loss": 0.0,
      "reward": 0.10395016614347696,
      "reward_std": 0.012995281023904681,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 0.0,
      "rewards/question_recreation_reward_func": 0.10395016614347696,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.0,
      "step": 4
    },
    {
      "completion_length": 104.0,
      "epoch": 2.5,
      "kl": 0.005378011410357431,
      "reward": 0.01932604657486081,
      "reward_std": 0.00301999697512656,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 0.0,
      "rewards/question_recreation_reward_func": 0.01932604657486081,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.0,
      "step": 5,
      "total_flos": 0.0,
      "train_loss": 5.6818393204594034e-06,
      "train_runtime": 3692.0926,
      "train_samples_per_second": 0.005,
      "train_steps_per_second": 0.001
    }
  ],
  "logging_steps": 2,
  "max_steps": 5,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}