leolemon commited on
Commit
2f0853f
·
verified ·
1 Parent(s): 99cac50

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 0.0,
4
- "train_runtime": 44.7968,
5
  "train_samples": 160,
6
- "train_samples_per_second": 7.143,
7
- "train_steps_per_second": 0.446
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 0.0,
4
+ "train_runtime": 41.7979,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 7.656,
7
+ "train_steps_per_second": 0.478
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d9d53ed4f15c7ac98f58f4f70b1f36e41a1562ea7a3e878e1962ac26eb90bdc
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81da7b371edae9af27e81b142b6cd80fe488b17ba084bf175f5098e2c24af53
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 0.0,
4
- "train_runtime": 44.7968,
5
  "train_samples": 160,
6
- "train_samples_per_second": 7.143,
7
- "train_steps_per_second": 0.446
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 0.0,
4
+ "train_runtime": 41.7979,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 7.656,
7
+ "train_steps_per_second": 0.478
8
  }
trainer_state.json CHANGED
@@ -16,9 +16,9 @@
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
  "loss": 0.0,
19
- "reward": 4.07148377597332,
20
  "reward_std": 0.0,
21
- "rewards/concensus_correctness_reward_func": 2.046500012278557,
22
  "rewards/consensus_reward_func": 2.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 0.0,
@@ -35,9 +35,9 @@
35
  "kl": 0.0,
36
  "learning_rate": 4.698684378016222e-07,
37
  "loss": 0.0,
38
- "reward": 4.081080943346024,
39
  "reward_std": 0.0,
40
- "rewards/concensus_correctness_reward_func": 2.045749992132187,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.0,
@@ -54,9 +54,9 @@
54
  "kl": 0.0,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
- "reward": 4.075482279062271,
58
  "reward_std": 0.0,
59
- "rewards/concensus_correctness_reward_func": 2.0467500165104866,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.0,
@@ -73,9 +73,9 @@
73
  "kl": 0.0,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
- "reward": 4.197205916047096,
77
  "reward_std": 0.0,
78
- "rewards/concensus_correctness_reward_func": 2.170999988913536,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
@@ -92,9 +92,9 @@
92
  "kl": 0.0,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
- "reward": 4.330130845308304,
96
  "reward_std": 0.0,
97
- "rewards/concensus_correctness_reward_func": 2.296249993145466,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.0,
@@ -111,9 +111,9 @@
111
  "kl": 0.0,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
- "reward": 4.200867220759392,
115
  "reward_std": 0.0,
116
- "rewards/concensus_correctness_reward_func": 2.1718750298023224,
117
  "rewards/consensus_reward_func": 2.0,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
@@ -130,9 +130,9 @@
130
  "kl": 0.0,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
- "reward": 4.07891184091568,
134
  "reward_std": 0.0,
135
- "rewards/concensus_correctness_reward_func": 2.0457500219345093,
136
  "rewards/consensus_reward_func": 2.0,
137
  "rewards/cumulative_reward_2": 0.0,
138
  "rewards/final_correctness_reward_func": 0.0,
@@ -149,9 +149,9 @@
149
  "kl": 0.0,
150
  "learning_rate": 5.271487265090163e-08,
151
  "loss": 0.0,
152
- "reward": 4.098081558942795,
153
  "reward_std": 0.0,
154
- "rewards/concensus_correctness_reward_func": 2.0510000064969063,
155
  "rewards/consensus_reward_func": 2.0,
156
  "rewards/cumulative_reward_2": 0.0,
157
  "rewards/final_correctness_reward_func": 0.0,
@@ -168,9 +168,9 @@
168
  "kl": 0.0,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
- "reward": 3.9656456112861633,
172
  "reward_std": 0.0,
173
- "rewards/concensus_correctness_reward_func": 1.9221250116825104,
174
  "rewards/consensus_reward_func": 2.0,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.0,
@@ -187,9 +187,9 @@
187
  "kl": 0.0,
188
  "learning_rate": 0.0,
189
  "loss": 0.0,
190
- "reward": 4.329772099852562,
191
  "reward_std": 0.0,
192
- "rewards/concensus_correctness_reward_func": 2.2959999963641167,
193
  "rewards/consensus_reward_func": 2.0,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
@@ -204,9 +204,9 @@
204
  "step": 20,
205
  "total_flos": 0.0,
206
  "train_loss": 0.0,
207
- "train_runtime": 44.7968,
208
- "train_samples_per_second": 7.143,
209
- "train_steps_per_second": 0.446
210
  }
211
  ],
212
  "logging_steps": 2,
 
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
  "loss": 0.0,
19
+ "reward": 4.071608752012253,
20
  "reward_std": 0.0,
21
+ "rewards/concensus_correctness_reward_func": 2.046625018119812,
22
  "rewards/consensus_reward_func": 2.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 0.0,
 
35
  "kl": 0.0,
36
  "learning_rate": 4.698684378016222e-07,
37
  "loss": 0.0,
38
+ "reward": 4.081330955028534,
39
  "reward_std": 0.0,
40
+ "rewards/concensus_correctness_reward_func": 2.0459999963641167,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.0,
 
54
  "kl": 0.0,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
+ "reward": 4.075607255101204,
58
  "reward_std": 0.0,
59
+ "rewards/concensus_correctness_reward_func": 2.0468750298023224,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.0,
 
73
  "kl": 0.0,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
+ "reward": 3.947580948472023,
77
  "reward_std": 0.0,
78
+ "rewards/concensus_correctness_reward_func": 1.921374998986721,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
 
92
  "kl": 0.0,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
+ "reward": 4.2051308155059814,
96
  "reward_std": 0.0,
97
+ "rewards/concensus_correctness_reward_func": 2.1712500154972076,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.0,
 
111
  "kl": 0.0,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
+ "reward": 4.200742214918137,
115
  "reward_std": 0.0,
116
+ "rewards/concensus_correctness_reward_func": 2.171750031411648,
117
  "rewards/consensus_reward_func": 2.0,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
 
130
  "kl": 0.0,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
+ "reward": 4.079286843538284,
134
  "reward_std": 0.0,
135
+ "rewards/concensus_correctness_reward_func": 2.0461250245571136,
136
  "rewards/consensus_reward_func": 2.0,
137
  "rewards/cumulative_reward_2": 0.0,
138
  "rewards/final_correctness_reward_func": 0.0,
 
149
  "kl": 0.0,
150
  "learning_rate": 5.271487265090163e-08,
151
  "loss": 0.0,
152
+ "reward": 3.9730815291404724,
153
  "reward_std": 0.0,
154
+ "rewards/concensus_correctness_reward_func": 1.9260000139474869,
155
  "rewards/consensus_reward_func": 2.0,
156
  "rewards/cumulative_reward_2": 0.0,
157
  "rewards/final_correctness_reward_func": 0.0,
 
168
  "kl": 0.0,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
+ "reward": 4.090770557522774,
172
  "reward_std": 0.0,
173
+ "rewards/concensus_correctness_reward_func": 2.047250024974346,
174
  "rewards/consensus_reward_func": 2.0,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.0,
 
187
  "kl": 0.0,
188
  "learning_rate": 0.0,
189
  "loss": 0.0,
190
+ "reward": 4.3300221264362335,
191
  "reward_std": 0.0,
192
+ "rewards/concensus_correctness_reward_func": 2.2962500005960464,
193
  "rewards/consensus_reward_func": 2.0,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
 
204
  "step": 20,
205
  "total_flos": 0.0,
206
  "train_loss": 0.0,
207
+ "train_runtime": 41.7979,
208
+ "train_samples_per_second": 7.656,
209
+ "train_steps_per_second": 0.478
210
  }
211
  ],
212
  "logging_steps": 2,