j05hr3d commited on
Commit
920c72a
·
verified ·
1 Parent(s): 83e51b5

Model save

Browse files
Files changed (3) hide show
  1. README.md +11 -11
  2. adapter_model.safetensors +1 -1
  3. trainer_state.json +89 -89
README.md CHANGED
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.7603
23
 
24
  ## Model description
25
 
@@ -53,16 +53,16 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss |
55
  |:-------------:|:------:|:----:|:---------------:|
56
- | 1.0044 | 0.2867 | 20 | 0.9259 |
57
- | 0.7707 | 0.5735 | 40 | 0.8609 |
58
- | 0.7489 | 0.8602 | 60 | 0.8249 |
59
- | 0.6848 | 1.1434 | 80 | 0.8104 |
60
- | 0.7167 | 1.4301 | 100 | 0.7897 |
61
- | 0.6183 | 1.7168 | 120 | 0.7741 |
62
- | 0.6007 | 2.0 | 140 | 0.7621 |
63
- | 0.5897 | 2.2867 | 160 | 0.7663 |
64
- | 0.5883 | 2.5735 | 180 | 0.7646 |
65
- | 0.5313 | 2.8602 | 200 | 0.7603 |
66
 
67
 
68
  ### Framework versions
 
19
 
20
  This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.7061
23
 
24
  ## Model description
25
 
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss |
55
  |:-------------:|:------:|:----:|:---------------:|
56
+ | 0.9132 | 0.2857 | 20 | 0.8498 |
57
+ | 0.9442 | 0.5714 | 40 | 0.7973 |
58
+ | 0.7474 | 0.8571 | 60 | 0.7688 |
59
+ | 0.755 | 1.1429 | 80 | 0.7504 |
60
+ | 0.7374 | 1.4286 | 100 | 0.7385 |
61
+ | 0.6531 | 1.7143 | 120 | 0.7256 |
62
+ | 0.6193 | 2.0 | 140 | 0.7123 |
63
+ | 0.5379 | 2.2857 | 160 | 0.7121 |
64
+ | 0.5749 | 2.5714 | 180 | 0.7107 |
65
+ | 0.7175 | 2.8571 | 200 | 0.7061 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68ea47a57624c6dbd731e74788e484df083739fe089e65c857d9548c17b935a1
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038f8c1e796fc1acb2f1fd738bd7a6d58f49ae512461f85f3c46ef8452e119df
3
  size 323014168
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 200,
3
- "best_metric": 0.760331392288208,
4
  "best_model_checkpoint": "j05hr3d/SFT-Qwen2.5-Coder-7B_v1.1/checkpoint-200",
5
  "epoch": 3.0,
6
  "eval_steps": 20,
@@ -10,170 +10,170 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.2867383512544803,
14
- "grad_norm": 0.19278952479362488,
15
  "learning_rate": 9.408866995073891e-05,
16
- "loss": 1.0044,
17
  "step": 20
18
  },
19
  {
20
- "epoch": 0.2867383512544803,
21
- "eval_loss": 0.925897479057312,
22
- "eval_runtime": 13.8049,
23
- "eval_samples_per_second": 4.419,
24
- "eval_steps_per_second": 0.58,
25
  "step": 20
26
  },
27
  {
28
- "epoch": 0.5734767025089605,
29
- "grad_norm": 0.3084106743335724,
30
  "learning_rate": 8.423645320197044e-05,
31
- "loss": 0.7707,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.5734767025089605,
36
- "eval_loss": 0.8608937859535217,
37
- "eval_runtime": 12.8413,
38
- "eval_samples_per_second": 4.75,
39
- "eval_steps_per_second": 0.623,
40
  "step": 40
41
  },
42
  {
43
- "epoch": 0.8602150537634409,
44
- "grad_norm": 0.2818623483181,
45
  "learning_rate": 7.438423645320197e-05,
46
- "loss": 0.7489,
47
  "step": 60
48
  },
49
  {
50
- "epoch": 0.8602150537634409,
51
- "eval_loss": 0.8248968720436096,
52
- "eval_runtime": 12.8428,
53
- "eval_samples_per_second": 4.75,
54
- "eval_steps_per_second": 0.623,
55
  "step": 60
56
  },
57
  {
58
- "epoch": 1.1433691756272402,
59
- "grad_norm": 0.4351920783519745,
60
  "learning_rate": 6.45320197044335e-05,
61
- "loss": 0.6848,
62
  "step": 80
63
  },
64
  {
65
- "epoch": 1.1433691756272402,
66
- "eval_loss": 0.8104078769683838,
67
- "eval_runtime": 12.8482,
68
- "eval_samples_per_second": 4.748,
69
- "eval_steps_per_second": 0.623,
70
  "step": 80
71
  },
72
  {
73
- "epoch": 1.4301075268817205,
74
- "grad_norm": 0.2989635765552521,
75
  "learning_rate": 5.467980295566503e-05,
76
- "loss": 0.7167,
77
  "step": 100
78
  },
79
  {
80
- "epoch": 1.4301075268817205,
81
- "eval_loss": 0.7897041440010071,
82
- "eval_runtime": 12.8586,
83
- "eval_samples_per_second": 4.744,
84
- "eval_steps_per_second": 0.622,
85
  "step": 100
86
  },
87
  {
88
- "epoch": 1.7168458781362008,
89
- "grad_norm": 0.4142651855945587,
90
  "learning_rate": 4.482758620689655e-05,
91
- "loss": 0.6183,
92
  "step": 120
93
  },
94
  {
95
- "epoch": 1.7168458781362008,
96
- "eval_loss": 0.7741073966026306,
97
- "eval_runtime": 12.8528,
98
- "eval_samples_per_second": 4.746,
99
- "eval_steps_per_second": 0.622,
100
  "step": 120
101
  },
102
  {
103
  "epoch": 2.0,
104
- "grad_norm": 0.9190816283226013,
105
  "learning_rate": 3.497536945812808e-05,
106
- "loss": 0.6007,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 2.0,
111
- "eval_loss": 0.7620603442192078,
112
- "eval_runtime": 12.8483,
113
- "eval_samples_per_second": 4.748,
114
- "eval_steps_per_second": 0.623,
115
  "step": 140
116
  },
117
  {
118
- "epoch": 2.2867383512544803,
119
- "grad_norm": 0.6895560026168823,
120
  "learning_rate": 2.512315270935961e-05,
121
- "loss": 0.5897,
122
  "step": 160
123
  },
124
  {
125
- "epoch": 2.2867383512544803,
126
- "eval_loss": 0.7662609219551086,
127
- "eval_runtime": 12.8451,
128
- "eval_samples_per_second": 4.749,
129
- "eval_steps_per_second": 0.623,
130
  "step": 160
131
  },
132
  {
133
- "epoch": 2.5734767025089607,
134
- "grad_norm": 0.3825511634349823,
135
  "learning_rate": 1.5270935960591133e-05,
136
- "loss": 0.5883,
137
  "step": 180
138
  },
139
  {
140
- "epoch": 2.5734767025089607,
141
- "eval_loss": 0.7646079063415527,
142
- "eval_runtime": 12.8504,
143
- "eval_samples_per_second": 4.747,
144
- "eval_steps_per_second": 0.623,
145
  "step": 180
146
  },
147
  {
148
- "epoch": 2.860215053763441,
149
- "grad_norm": 0.8453129529953003,
150
  "learning_rate": 5.418719211822661e-06,
151
- "loss": 0.5313,
152
  "step": 200
153
  },
154
  {
155
- "epoch": 2.860215053763441,
156
- "eval_loss": 0.760331392288208,
157
- "eval_runtime": 12.8476,
158
- "eval_samples_per_second": 4.748,
159
- "eval_steps_per_second": 0.623,
160
  "step": 200
161
  },
162
  {
163
  "epoch": 3.0,
164
  "step": 210,
165
- "total_flos": 8.231651003616768e+16,
166
- "train_loss": 0.6739955493382045,
167
- "train_runtime": 1174.5589,
168
- "train_samples_per_second": 1.423,
169
- "train_steps_per_second": 0.179
170
  },
171
  {
172
  "epoch": 3.0,
173
- "eval_loss": 0.760331392288208,
174
- "eval_runtime": 12.7995,
175
- "eval_samples_per_second": 4.766,
176
- "eval_steps_per_second": 0.625,
177
  "step": 210
178
  }
179
  ],
@@ -203,7 +203,7 @@
203
  "attributes": {}
204
  }
205
  },
206
- "total_flos": 8.231651003616768e+16,
207
  "train_batch_size": 2,
208
  "trial_name": null,
209
  "trial_params": null
 
1
  {
2
  "best_global_step": 200,
3
+ "best_metric": 0.7061131596565247,
4
  "best_model_checkpoint": "j05hr3d/SFT-Qwen2.5-Coder-7B_v1.1/checkpoint-200",
5
  "epoch": 3.0,
6
  "eval_steps": 20,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.2857142857142857,
14
+ "grad_norm": 0.24874337017536163,
15
  "learning_rate": 9.408866995073891e-05,
16
+ "loss": 0.9132,
17
  "step": 20
18
  },
19
  {
20
+ "epoch": 0.2857142857142857,
21
+ "eval_loss": 0.8498335480690002,
22
+ "eval_runtime": 20.8728,
23
+ "eval_samples_per_second": 2.97,
24
+ "eval_steps_per_second": 0.383,
25
  "step": 20
26
  },
27
  {
28
+ "epoch": 0.5714285714285714,
29
+ "grad_norm": 0.24814729392528534,
30
  "learning_rate": 8.423645320197044e-05,
31
+ "loss": 0.9442,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 0.5714285714285714,
36
+ "eval_loss": 0.7972759008407593,
37
+ "eval_runtime": 18.6915,
38
+ "eval_samples_per_second": 3.317,
39
+ "eval_steps_per_second": 0.428,
40
  "step": 40
41
  },
42
  {
43
+ "epoch": 0.8571428571428571,
44
+ "grad_norm": 0.3184037208557129,
45
  "learning_rate": 7.438423645320197e-05,
46
+ "loss": 0.7474,
47
  "step": 60
48
  },
49
  {
50
+ "epoch": 0.8571428571428571,
51
+ "eval_loss": 0.7688223123550415,
52
+ "eval_runtime": 18.6846,
53
+ "eval_samples_per_second": 3.318,
54
+ "eval_steps_per_second": 0.428,
55
  "step": 60
56
  },
57
  {
58
+ "epoch": 1.1428571428571428,
59
+ "grad_norm": 0.2819560468196869,
60
  "learning_rate": 6.45320197044335e-05,
61
+ "loss": 0.755,
62
  "step": 80
63
  },
64
  {
65
+ "epoch": 1.1428571428571428,
66
+ "eval_loss": 0.7504354119300842,
67
+ "eval_runtime": 18.6934,
68
+ "eval_samples_per_second": 3.317,
69
+ "eval_steps_per_second": 0.428,
70
  "step": 80
71
  },
72
  {
73
+ "epoch": 1.4285714285714286,
74
+ "grad_norm": 0.349388062953949,
75
  "learning_rate": 5.467980295566503e-05,
76
+ "loss": 0.7374,
77
  "step": 100
78
  },
79
  {
80
+ "epoch": 1.4285714285714286,
81
+ "eval_loss": 0.7384942770004272,
82
+ "eval_runtime": 18.7081,
83
+ "eval_samples_per_second": 3.314,
84
+ "eval_steps_per_second": 0.428,
85
  "step": 100
86
  },
87
  {
88
+ "epoch": 1.7142857142857144,
89
+ "grad_norm": 0.4439921975135803,
90
  "learning_rate": 4.482758620689655e-05,
91
+ "loss": 0.6531,
92
  "step": 120
93
  },
94
  {
95
+ "epoch": 1.7142857142857144,
96
+ "eval_loss": 0.7255800366401672,
97
+ "eval_runtime": 18.704,
98
+ "eval_samples_per_second": 3.315,
99
+ "eval_steps_per_second": 0.428,
100
  "step": 120
101
  },
102
  {
103
  "epoch": 2.0,
104
+ "grad_norm": 1.2030707597732544,
105
  "learning_rate": 3.497536945812808e-05,
106
+ "loss": 0.6193,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 2.0,
111
+ "eval_loss": 0.7122625112533569,
112
+ "eval_runtime": 18.6953,
113
+ "eval_samples_per_second": 3.316,
114
+ "eval_steps_per_second": 0.428,
115
  "step": 140
116
  },
117
  {
118
+ "epoch": 2.2857142857142856,
119
+ "grad_norm": 0.7790128588676453,
120
  "learning_rate": 2.512315270935961e-05,
121
+ "loss": 0.5379,
122
  "step": 160
123
  },
124
  {
125
+ "epoch": 2.2857142857142856,
126
+ "eval_loss": 0.7121083736419678,
127
+ "eval_runtime": 18.7059,
128
+ "eval_samples_per_second": 3.314,
129
+ "eval_steps_per_second": 0.428,
130
  "step": 160
131
  },
132
  {
133
+ "epoch": 2.571428571428571,
134
+ "grad_norm": 0.5455464720726013,
135
  "learning_rate": 1.5270935960591133e-05,
136
+ "loss": 0.5749,
137
  "step": 180
138
  },
139
  {
140
+ "epoch": 2.571428571428571,
141
+ "eval_loss": 0.71072918176651,
142
+ "eval_runtime": 18.7175,
143
+ "eval_samples_per_second": 3.312,
144
+ "eval_steps_per_second": 0.427,
145
  "step": 180
146
  },
147
  {
148
+ "epoch": 2.857142857142857,
149
+ "grad_norm": 0.8019999861717224,
150
  "learning_rate": 5.418719211822661e-06,
151
+ "loss": 0.7175,
152
  "step": 200
153
  },
154
  {
155
+ "epoch": 2.857142857142857,
156
+ "eval_loss": 0.7061131596565247,
157
+ "eval_runtime": 18.7094,
158
+ "eval_samples_per_second": 3.314,
159
+ "eval_steps_per_second": 0.428,
160
  "step": 200
161
  },
162
  {
163
  "epoch": 3.0,
164
  "step": 210,
165
+ "total_flos": 7.87369399526062e+16,
166
+ "train_loss": 0.7112103757404146,
167
+ "train_runtime": 1206.0601,
168
+ "train_samples_per_second": 1.39,
169
+ "train_steps_per_second": 0.174
170
  },
171
  {
172
  "epoch": 3.0,
173
+ "eval_loss": 0.7061131596565247,
174
+ "eval_runtime": 18.6112,
175
+ "eval_samples_per_second": 3.331,
176
+ "eval_steps_per_second": 0.43,
177
  "step": 210
178
  }
179
  ],
 
203
  "attributes": {}
204
  }
205
  },
206
+ "total_flos": 7.87369399526062e+16,
207
  "train_batch_size": 2,
208
  "trial_name": null,
209
  "trial_params": null