panda992 commited on
Commit
ed28b4c
·
verified ·
1 Parent(s): db3f9a7

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # fish_disease_datasets
18
 
19
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0866
22
  - Accuracy: 0.9728
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # fish_disease_datasets
19
 
20
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the fish_disease_datasets dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0866
23
  - Accuracy: 0.9728
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9809782608695652,
4
- "eval_loss": 0.07041697204113007,
5
- "eval_runtime": 1.7197,
6
- "eval_samples_per_second": 213.985,
7
- "eval_steps_per_second": 26.748,
8
  "total_flos": 6.45382209997357e+17,
9
- "train_loss": 0.27801662728986665,
10
- "train_runtime": 166.0361,
11
- "train_samples_per_second": 50.158,
12
- "train_steps_per_second": 3.156
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9728260869565217,
4
+ "eval_loss": 0.08664274215698242,
5
+ "eval_runtime": 1.5646,
6
+ "eval_samples_per_second": 235.209,
7
+ "eval_steps_per_second": 29.401,
8
  "total_flos": 6.45382209997357e+17,
9
+ "train_loss": 0.2574100203186501,
10
+ "train_runtime": 180.8127,
11
+ "train_samples_per_second": 46.059,
12
+ "train_steps_per_second": 2.898
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9809782608695652,
4
- "eval_loss": 0.07041697204113007,
5
- "eval_runtime": 1.7197,
6
- "eval_samples_per_second": 213.985,
7
- "eval_steps_per_second": 26.748
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9728260869565217,
4
+ "eval_loss": 0.08664274215698242,
5
+ "eval_runtime": 1.5646,
6
+ "eval_samples_per_second": 235.209,
7
+ "eval_steps_per_second": 29.401
8
  }
runs/May29_06-11-43_24e856c6c156/events.out.tfevents.1748499812.24e856c6c156.378.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6173c982c40285fdac5012c21d6b1623b1e1421ebf6c821114c15b471aa399fe
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
- "train_loss": 0.27801662728986665,
5
- "train_runtime": 166.0361,
6
- "train_samples_per_second": 50.158,
7
- "train_steps_per_second": 3.156
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
+ "train_loss": 0.2574100203186501,
5
+ "train_runtime": 180.8127,
6
+ "train_samples_per_second": 46.059,
7
+ "train_steps_per_second": 2.898
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 500,
3
- "best_metric": 0.07041697204113007,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
@@ -11,421 +11,421 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
- "grad_norm": 1.8386459350585938,
15
  "learning_rate": 0.00019656488549618322,
16
- "loss": 1.8727,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
- "grad_norm": 2.027068614959717,
22
  "learning_rate": 0.00019274809160305345,
23
- "loss": 1.6646,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
- "grad_norm": 1.9189614057540894,
29
  "learning_rate": 0.00018893129770992367,
30
- "loss": 1.4059,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
- "grad_norm": 2.0967509746551514,
36
  "learning_rate": 0.0001851145038167939,
37
- "loss": 1.0754,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
- "grad_norm": 2.0452075004577637,
43
  "learning_rate": 0.00018129770992366412,
44
- "loss": 0.9438,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
- "grad_norm": 2.0183463096618652,
50
  "learning_rate": 0.00017748091603053437,
51
- "loss": 0.8577,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
- "grad_norm": 3.3413853645324707,
57
  "learning_rate": 0.0001736641221374046,
58
- "loss": 0.8169,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
- "grad_norm": 2.440781593322754,
64
  "learning_rate": 0.00016984732824427482,
65
- "loss": 0.6525,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
- "grad_norm": 3.064451217651367,
71
  "learning_rate": 0.00016603053435114505,
72
- "loss": 0.4708,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
- "grad_norm": 2.395552396774292,
78
  "learning_rate": 0.00016221374045801527,
79
- "loss": 0.4357,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
- "eval_accuracy": 0.9375,
85
- "eval_loss": 0.32088735699653625,
86
- "eval_runtime": 1.5155,
87
- "eval_samples_per_second": 242.823,
88
- "eval_steps_per_second": 30.353,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
- "grad_norm": 4.234215259552002,
94
  "learning_rate": 0.0001583969465648855,
95
- "loss": 0.3343,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
- "grad_norm": 2.090794086456299,
101
  "learning_rate": 0.00015458015267175574,
102
- "loss": 0.3294,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
- "grad_norm": 1.4856677055358887,
108
  "learning_rate": 0.00015076335877862594,
109
- "loss": 0.3227,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
- "grad_norm": 0.4898907542228699,
115
  "learning_rate": 0.0001469465648854962,
116
- "loss": 0.1935,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
- "grad_norm": 1.27410089969635,
122
  "learning_rate": 0.00014312977099236642,
123
- "loss": 0.2564,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
- "grad_norm": 1.4080382585525513,
129
  "learning_rate": 0.00013931297709923664,
130
- "loss": 0.1208,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
- "grad_norm": 4.060358047485352,
136
  "learning_rate": 0.0001354961832061069,
137
- "loss": 0.1377,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
- "grad_norm": 1.9385284185409546,
143
  "learning_rate": 0.0001316793893129771,
144
- "loss": 0.1576,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
- "grad_norm": 1.01824152469635,
150
  "learning_rate": 0.00012786259541984734,
151
- "loss": 0.1459,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
- "grad_norm": 3.3385653495788574,
157
  "learning_rate": 0.00012404580152671757,
158
- "loss": 0.1859,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
- "eval_accuracy": 0.970108695652174,
164
- "eval_loss": 0.13519759476184845,
165
- "eval_runtime": 1.5011,
166
- "eval_samples_per_second": 245.146,
167
- "eval_steps_per_second": 30.643,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
- "grad_norm": 1.7121399641036987,
173
  "learning_rate": 0.0001202290076335878,
174
- "loss": 0.2204,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
- "grad_norm": 0.9915915727615356,
180
  "learning_rate": 0.00011641221374045803,
181
- "loss": 0.1224,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
- "grad_norm": 0.21471183001995087,
187
  "learning_rate": 0.00011259541984732824,
188
- "loss": 0.0638,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
- "grad_norm": 0.16611893475055695,
194
  "learning_rate": 0.00010877862595419848,
195
- "loss": 0.1206,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
- "grad_norm": 1.0316003561019897,
201
  "learning_rate": 0.00010496183206106871,
202
- "loss": 0.1723,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
- "grad_norm": 5.562504291534424,
208
  "learning_rate": 0.00010114503816793894,
209
- "loss": 0.1555,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
- "grad_norm": 0.12873513996601105,
215
  "learning_rate": 9.732824427480916e-05,
216
- "loss": 0.0993,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
- "grad_norm": 0.1394704133272171,
222
  "learning_rate": 9.351145038167939e-05,
223
- "loss": 0.0892,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
- "grad_norm": 0.514519989490509,
229
  "learning_rate": 8.969465648854962e-05,
230
- "loss": 0.061,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
- "grad_norm": 0.5804300904273987,
236
  "learning_rate": 8.587786259541986e-05,
237
- "loss": 0.0641,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
- "eval_accuracy": 0.9782608695652174,
243
- "eval_loss": 0.09063434600830078,
244
- "eval_runtime": 1.5297,
245
- "eval_samples_per_second": 240.566,
246
- "eval_steps_per_second": 30.071,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
- "grad_norm": 0.3890454173088074,
252
  "learning_rate": 8.206106870229007e-05,
253
- "loss": 0.1259,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
- "grad_norm": 1.0797548294067383,
259
  "learning_rate": 7.824427480916031e-05,
260
- "loss": 0.0578,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
- "grad_norm": 3.639617443084717,
266
  "learning_rate": 7.442748091603053e-05,
267
- "loss": 0.0914,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
- "grad_norm": 0.27799326181411743,
273
  "learning_rate": 7.061068702290077e-05,
274
- "loss": 0.0622,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
- "grad_norm": 0.09738999605178833,
280
  "learning_rate": 6.6793893129771e-05,
281
- "loss": 0.0516,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
- "grad_norm": 0.08531022071838379,
287
  "learning_rate": 6.297709923664122e-05,
288
- "loss": 0.0362,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
- "grad_norm": 0.08216961473226547,
294
  "learning_rate": 5.916030534351146e-05,
295
- "loss": 0.0482,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
- "grad_norm": 0.07119940966367722,
301
  "learning_rate": 5.534351145038168e-05,
302
- "loss": 0.0288,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
- "grad_norm": 0.06432362645864487,
308
  "learning_rate": 5.152671755725191e-05,
309
- "loss": 0.0445,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
- "grad_norm": 0.09170796722173691,
315
  "learning_rate": 4.7709923664122144e-05,
316
- "loss": 0.0202,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
- "eval_accuracy": 0.9755434782608695,
322
- "eval_loss": 0.10268648713827133,
323
- "eval_runtime": 1.5318,
324
- "eval_samples_per_second": 240.238,
325
- "eval_steps_per_second": 30.03,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
- "grad_norm": 0.5211504697799683,
331
  "learning_rate": 4.389312977099237e-05,
332
- "loss": 0.0262,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
- "grad_norm": 0.23740150034427643,
338
  "learning_rate": 4.00763358778626e-05,
339
- "loss": 0.0276,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
- "grad_norm": 0.28564441204071045,
345
  "learning_rate": 3.625954198473282e-05,
346
- "loss": 0.0215,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
- "grad_norm": 0.06320058554410934,
352
  "learning_rate": 3.2442748091603054e-05,
353
- "loss": 0.0306,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
- "grad_norm": 0.0660184696316719,
359
  "learning_rate": 2.862595419847328e-05,
360
- "loss": 0.0321,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
- "grad_norm": 0.7349413633346558,
366
  "learning_rate": 2.4809160305343512e-05,
367
- "loss": 0.0331,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
- "grad_norm": 0.057535506784915924,
373
  "learning_rate": 2.099236641221374e-05,
374
- "loss": 0.0188,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
- "grad_norm": 0.4652552902698517,
380
  "learning_rate": 1.717557251908397e-05,
381
- "loss": 0.0179,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
- "grad_norm": 0.057850148528814316,
387
  "learning_rate": 1.3358778625954198e-05,
388
- "loss": 0.0539,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
- "grad_norm": 1.1574023962020874,
394
  "learning_rate": 9.541984732824428e-06,
395
- "loss": 0.0352,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
- "eval_accuracy": 0.9809782608695652,
401
- "eval_loss": 0.07041697204113007,
402
- "eval_runtime": 1.5657,
403
- "eval_samples_per_second": 235.036,
404
- "eval_steps_per_second": 29.379,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
- "grad_norm": 0.06465106457471848,
410
  "learning_rate": 5.725190839694657e-06,
411
- "loss": 0.0173,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
- "grad_norm": 0.25634443759918213,
417
  "learning_rate": 1.908396946564886e-06,
418
- "loss": 0.0968,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
- "train_loss": 0.27801662728986665,
426
- "train_runtime": 166.0361,
427
- "train_samples_per_second": 50.158,
428
- "train_steps_per_second": 3.156
429
  }
430
  ],
431
  "logging_steps": 10,
 
1
  {
2
  "best_global_step": 500,
3
+ "best_metric": 0.08664274215698242,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
+ "grad_norm": 2.0032925605773926,
15
  "learning_rate": 0.00019656488549618322,
16
+ "loss": 1.8167,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
+ "grad_norm": 2.0694987773895264,
22
  "learning_rate": 0.00019274809160305345,
23
+ "loss": 1.5113,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
+ "grad_norm": 1.8945856094360352,
29
  "learning_rate": 0.00018893129770992367,
30
+ "loss": 1.3072,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
+ "grad_norm": 1.994113564491272,
36
  "learning_rate": 0.0001851145038167939,
37
+ "loss": 1.0743,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
+ "grad_norm": 2.0848724842071533,
43
  "learning_rate": 0.00018129770992366412,
44
+ "loss": 0.7789,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
+ "grad_norm": 2.2140491008758545,
50
  "learning_rate": 0.00017748091603053437,
51
+ "loss": 0.7127,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
+ "grad_norm": 2.714928388595581,
57
  "learning_rate": 0.0001736641221374046,
58
+ "loss": 0.592,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
+ "grad_norm": 0.919693112373352,
64
  "learning_rate": 0.00016984732824427482,
65
+ "loss": 0.4939,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
+ "grad_norm": 3.400426149368286,
71
  "learning_rate": 0.00016603053435114505,
72
+ "loss": 0.4973,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
+ "grad_norm": 6.015654563903809,
78
  "learning_rate": 0.00016221374045801527,
79
+ "loss": 0.3865,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
+ "eval_accuracy": 0.8913043478260869,
85
+ "eval_loss": 0.4161355495452881,
86
+ "eval_runtime": 1.7389,
87
+ "eval_samples_per_second": 211.632,
88
+ "eval_steps_per_second": 26.454,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
+ "grad_norm": 2.465869665145874,
94
  "learning_rate": 0.0001583969465648855,
95
+ "loss": 0.3979,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
+ "grad_norm": 3.375448226928711,
101
  "learning_rate": 0.00015458015267175574,
102
+ "loss": 0.3569,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
+ "grad_norm": 1.7015740871429443,
108
  "learning_rate": 0.00015076335877862594,
109
+ "loss": 0.4148,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
+ "grad_norm": 0.402433305978775,
115
  "learning_rate": 0.0001469465648854962,
116
+ "loss": 0.2044,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
+ "grad_norm": 2.2606430053710938,
122
  "learning_rate": 0.00014312977099236642,
123
+ "loss": 0.1364,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
+ "grad_norm": 1.087276816368103,
129
  "learning_rate": 0.00013931297709923664,
130
+ "loss": 0.2527,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
+ "grad_norm": 3.1737782955169678,
136
  "learning_rate": 0.0001354961832061069,
137
+ "loss": 0.1463,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
+ "grad_norm": 0.22228187322616577,
143
  "learning_rate": 0.0001316793893129771,
144
+ "loss": 0.136,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
+ "grad_norm": 6.465834617614746,
150
  "learning_rate": 0.00012786259541984734,
151
+ "loss": 0.1747,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
+ "grad_norm": 1.6935511827468872,
157
  "learning_rate": 0.00012404580152671757,
158
+ "loss": 0.1206,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
+ "eval_accuracy": 0.9456521739130435,
164
+ "eval_loss": 0.2170080989599228,
165
+ "eval_runtime": 1.5107,
166
+ "eval_samples_per_second": 243.59,
167
+ "eval_steps_per_second": 30.449,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
+ "grad_norm": 2.237194061279297,
173
  "learning_rate": 0.0001202290076335878,
174
+ "loss": 0.1119,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
+ "grad_norm": 0.3265667259693146,
180
  "learning_rate": 0.00011641221374045803,
181
+ "loss": 0.1954,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
+ "grad_norm": 4.540020942687988,
187
  "learning_rate": 0.00011259541984732824,
188
+ "loss": 0.1225,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
+ "grad_norm": 4.758110046386719,
194
  "learning_rate": 0.00010877862595419848,
195
+ "loss": 0.1258,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
+ "grad_norm": 0.1943335086107254,
201
  "learning_rate": 0.00010496183206106871,
202
+ "loss": 0.0803,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
+ "grad_norm": 3.112128734588623,
208
  "learning_rate": 0.00010114503816793894,
209
+ "loss": 0.0871,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
+ "grad_norm": 2.004229784011841,
215
  "learning_rate": 9.732824427480916e-05,
216
+ "loss": 0.1014,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
+ "grad_norm": 2.089447498321533,
222
  "learning_rate": 9.351145038167939e-05,
223
+ "loss": 0.0635,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
+ "grad_norm": 0.177068829536438,
229
  "learning_rate": 8.969465648854962e-05,
230
+ "loss": 0.0506,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
+ "grad_norm": 1.701705813407898,
236
  "learning_rate": 8.587786259541986e-05,
237
+ "loss": 0.1132,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
+ "eval_accuracy": 0.967391304347826,
243
+ "eval_loss": 0.13170665502548218,
244
+ "eval_runtime": 1.8599,
245
+ "eval_samples_per_second": 197.858,
246
+ "eval_steps_per_second": 24.732,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
+ "grad_norm": 0.22077667713165283,
252
  "learning_rate": 8.206106870229007e-05,
253
+ "loss": 0.0579,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
+ "grad_norm": 4.027545928955078,
259
  "learning_rate": 7.824427480916031e-05,
260
+ "loss": 0.0684,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
+ "grad_norm": 0.08583887666463852,
266
  "learning_rate": 7.442748091603053e-05,
267
+ "loss": 0.0709,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
+ "grad_norm": 0.07327867299318314,
273
  "learning_rate": 7.061068702290077e-05,
274
+ "loss": 0.0301,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
+ "grad_norm": 0.08430969715118408,
280
  "learning_rate": 6.6793893129771e-05,
281
+ "loss": 0.0843,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
+ "grad_norm": 0.08751753717660904,
287
  "learning_rate": 6.297709923664122e-05,
288
+ "loss": 0.0345,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
+ "grad_norm": 0.06909404695034027,
294
  "learning_rate": 5.916030534351146e-05,
295
+ "loss": 0.0416,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
+ "grad_norm": 0.06500696390867233,
301
  "learning_rate": 5.534351145038168e-05,
302
+ "loss": 0.066,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
+ "grad_norm": 0.08221684396266937,
308
  "learning_rate": 5.152671755725191e-05,
309
+ "loss": 0.0433,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
+ "grad_norm": 1.0126067399978638,
315
  "learning_rate": 4.7709923664122144e-05,
316
+ "loss": 0.0547,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
+ "eval_accuracy": 0.9809782608695652,
322
+ "eval_loss": 0.08785175532102585,
323
+ "eval_runtime": 1.8079,
324
+ "eval_samples_per_second": 203.548,
325
+ "eval_steps_per_second": 25.443,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
+ "grad_norm": 0.07123812288045883,
331
  "learning_rate": 4.389312977099237e-05,
332
+ "loss": 0.0272,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
+ "grad_norm": 0.06167351081967354,
338
  "learning_rate": 4.00763358778626e-05,
339
+ "loss": 0.0386,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
+ "grad_norm": 0.06654069572687149,
345
  "learning_rate": 3.625954198473282e-05,
346
+ "loss": 0.0312,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
+ "grad_norm": 0.12739847600460052,
352
  "learning_rate": 3.2442748091603054e-05,
353
+ "loss": 0.0446,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
+ "grad_norm": 0.8813410997390747,
359
  "learning_rate": 2.862595419847328e-05,
360
+ "loss": 0.0417,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
+ "grad_norm": 0.9467947483062744,
366
  "learning_rate": 2.4809160305343512e-05,
367
+ "loss": 0.027,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
+ "grad_norm": 0.22488470375537872,
373
  "learning_rate": 2.099236641221374e-05,
374
+ "loss": 0.021,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
+ "grad_norm": 0.05164281651377678,
380
  "learning_rate": 1.717557251908397e-05,
381
+ "loss": 0.0254,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
+ "grad_norm": 0.055942848324775696,
387
  "learning_rate": 1.3358778625954198e-05,
388
+ "loss": 0.0376,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
+ "grad_norm": 0.17778228223323822,
394
  "learning_rate": 9.541984732824428e-06,
395
+ "loss": 0.0209,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
+ "eval_accuracy": 0.9728260869565217,
401
+ "eval_loss": 0.08664274215698242,
402
+ "eval_runtime": 1.5113,
403
+ "eval_samples_per_second": 243.505,
404
+ "eval_steps_per_second": 30.438,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
+ "grad_norm": 0.05874630808830261,
410
  "learning_rate": 5.725190839694657e-06,
411
+ "loss": 0.0193,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
+ "grad_norm": 0.07613587379455566,
417
  "learning_rate": 1.908396946564886e-06,
418
+ "loss": 0.0316,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
+ "train_loss": 0.2574100203186501,
426
+ "train_runtime": 180.8127,
427
+ "train_samples_per_second": 46.059,
428
+ "train_steps_per_second": 2.898
429
  }
430
  ],
431
  "logging_steps": 10,