JhonMR commited on
Commit
8b1cd12
·
verified ·
1 Parent(s): cf8665b
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: distilbert/distilbert-base-uncased
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: My_Model
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # My_Model
16
+
17
+ This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on the None dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.4353
20
+ - Accuracy@en: 0.8946
21
+ - F1@en: 0.8931
22
+ - Precision@en: 0.8965
23
+ - Recall@en: 0.8952
24
+ - Loss@en: 0.4353
25
+
26
+ ## Model description
27
+
28
+ More information needed
29
+
30
+ ## Intended uses & limitations
31
+
32
+ More information needed
33
+
34
+ ## Training and evaluation data
35
+
36
+ More information needed
37
+
38
+ ## Training procedure
39
+
40
+ ### Training hyperparameters
41
+
42
+ The following hyperparameters were used during training:
43
+ - learning_rate: 1e-05
44
+ - train_batch_size: 8
45
+ - eval_batch_size: 8
46
+ - seed: 42
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - num_epochs: 10
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy@en | F1@en | Precision@en | Recall@en | Loss@en |
54
+ |:-------------:|:-----:|:----:|:---------------:|:-----------:|:------:|:------------:|:---------:|:-------:|
55
+ | 3.1352 | 1.0 | 700 | 2.5257 | 0.3279 | 0.2574 | 0.3241 | 0.3350 | 2.5257 |
56
+ | 2.1929 | 2.0 | 1400 | 1.6830 | 0.6121 | 0.5597 | 0.6589 | 0.6125 | 1.6830 |
57
+ | 1.4355 | 3.0 | 2100 | 1.0143 | 0.7929 | 0.7750 | 0.7990 | 0.7914 | 1.0143 |
58
+ | 0.9043 | 4.0 | 2800 | 0.6990 | 0.8242 | 0.8062 | 0.8239 | 0.8248 | 0.6990 |
59
+ | 0.6269 | 5.0 | 3500 | 0.5446 | 0.8796 | 0.8783 | 0.8845 | 0.8802 | 0.5446 |
60
+ | 0.4852 | 6.0 | 4200 | 0.4860 | 0.8862 | 0.8847 | 0.8901 | 0.8862 | 0.4860 |
61
+ | 0.4124 | 7.0 | 4900 | 0.4538 | 0.8892 | 0.8876 | 0.8925 | 0.8896 | 0.4538 |
62
+ | 0.3602 | 8.0 | 5600 | 0.4392 | 0.89 | 0.8886 | 0.8925 | 0.8906 | 0.4392 |
63
+ | 0.3355 | 9.0 | 6300 | 0.4373 | 0.8912 | 0.8898 | 0.8934 | 0.8919 | 0.4373 |
64
+ | 0.3147 | 10.0 | 7000 | 0.4353 | 0.8946 | 0.8931 | 0.8965 | 0.8952 | 0.4353 |
65
+
66
+
67
+ ### Framework versions
68
+
69
+ - Transformers 4.44.2
70
+ - Pytorch 2.4.1+cu121
71
+ - Datasets 3.0.1
72
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 7423201443840000.0,
4
+ "train_loss": 1.0203040662493024,
5
+ "train_runtime": 4378.397,
6
+ "train_samples_per_second": 12.79,
7
+ "train_steps_per_second": 1.599
8
+ }
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "ActaAudienciaAplicacionPrincipioOportunidadRenuncia",
13
+ "1": "ActaAudienciaAplicacionPrincipioOportunidadSuspension",
14
+ "2": "ActaAudienciaConcentrada",
15
+ "3": "ActaAudienciaControlInterceptacionComunicaciones",
16
+ "4": "ActaAudienciaControlPosteriorBusquedaSelectivaBaseDatos",
17
+ "5": "ActaAudienciaControlPosteriorRecuperacionInformacionProductoLaTransmisionDatos",
18
+ "6": "ActaAudienciaControlPrevioBusquedaSelectivaBaseDatos",
19
+ "7": "ActaAudienciaFormulacionImputacion",
20
+ "8": "ActaAudienciaLegalizacionCaptura",
21
+ "9": "ActaAudienciaLibertadVencimientoTerminos",
22
+ "10": "ActaAudienciaOrdenCaptura",
23
+ "11": "ActaAudienciaProrrogaBusquedaSelectivaBaseDatos",
24
+ "12": "ActaAudienciaProrrogaOrdenCaptura",
25
+ "13": "ActaAudienciaRevocatoriaMedidaAseguramiento",
26
+ "14": "ActaAudienciaSustitucionMedidaAseguramiento",
27
+ "15": "ActaRepartoGarantias",
28
+ "16": "BoletaEncarcelacion",
29
+ "17": "ColillaCitaciones",
30
+ "18": "ConstanciaReprogramacion",
31
+ "19": "ConstanciaSecretarial",
32
+ "20": "NoticiaCriminal",
33
+ "21": "OficioArticulo97Imputacion",
34
+ "22": "OficioInformaCancelacionOrdenCaptura",
35
+ "23": "OficioInformaImposicionMedidaAseguramiento",
36
+ "24": "OficioInformaImposicionMedidasCautelares",
37
+ "25": "OficioInformaOrdenCaptura",
38
+ "26": "OrdenCaptura",
39
+ "27": "SolicitudAplicacionPrincipioOportunidadRenuncia",
40
+ "28": "SolicitudAudienciasConcentradas",
41
+ "29": "SolicitudControlInterceptacionComunicaciones",
42
+ "30": "SolicitudControlPosteriorBusquedaSelectivaBaseDatos",
43
+ "31": "SolicitudControlPrevioBusquedaSelectivaBaseDatos",
44
+ "32": "SolicitudEntregaDefinitivaVehiculo",
45
+ "33": "SolicitudFormulacionImputacion",
46
+ "34": "SolicitudLibertadVencimientoTerminos",
47
+ "35": "SolicitudOrdenCaptura",
48
+ "36": "SolicitudProrrogaBusquedaSelectivaBaseDatos",
49
+ "37": "SolicitudProrrogaOrdenCaptura",
50
+ "38": "SolicitudRevocatoriaMedidaAseguramiento",
51
+ "39": "SolicitudSustitucionMedidaAseguramiento"
52
+ },
53
+ "initializer_range": 0.02,
54
+ "label2id": {
55
+ "ActaAudienciaAplicacionPrincipioOportunidadRenuncia": 0,
56
+ "ActaAudienciaAplicacionPrincipioOportunidadSuspension": 1,
57
+ "ActaAudienciaConcentrada": 2,
58
+ "ActaAudienciaControlInterceptacionComunicaciones": 3,
59
+ "ActaAudienciaControlPosteriorBusquedaSelectivaBaseDatos": 4,
60
+ "ActaAudienciaControlPosteriorRecuperacionInformacionProductoLaTransmisionDatos": 5,
61
+ "ActaAudienciaControlPrevioBusquedaSelectivaBaseDatos": 6,
62
+ "ActaAudienciaFormulacionImputacion": 7,
63
+ "ActaAudienciaLegalizacionCaptura": 8,
64
+ "ActaAudienciaLibertadVencimientoTerminos": 9,
65
+ "ActaAudienciaOrdenCaptura": 10,
66
+ "ActaAudienciaProrrogaBusquedaSelectivaBaseDatos": 11,
67
+ "ActaAudienciaProrrogaOrdenCaptura": 12,
68
+ "ActaAudienciaRevocatoriaMedidaAseguramiento": 13,
69
+ "ActaAudienciaSustitucionMedidaAseguramiento": 14,
70
+ "ActaRepartoGarantias": 15,
71
+ "BoletaEncarcelacion": 16,
72
+ "ColillaCitaciones": 17,
73
+ "ConstanciaReprogramacion": 18,
74
+ "ConstanciaSecretarial": 19,
75
+ "NoticiaCriminal": 20,
76
+ "OficioArticulo97Imputacion": 21,
77
+ "OficioInformaCancelacionOrdenCaptura": 22,
78
+ "OficioInformaImposicionMedidaAseguramiento": 23,
79
+ "OficioInformaImposicionMedidasCautelares": 24,
80
+ "OficioInformaOrdenCaptura": 25,
81
+ "OrdenCaptura": 26,
82
+ "SolicitudAplicacionPrincipioOportunidadRenuncia": 27,
83
+ "SolicitudAudienciasConcentradas": 28,
84
+ "SolicitudControlInterceptacionComunicaciones": 29,
85
+ "SolicitudControlPosteriorBusquedaSelectivaBaseDatos": 30,
86
+ "SolicitudControlPrevioBusquedaSelectivaBaseDatos": 31,
87
+ "SolicitudEntregaDefinitivaVehiculo": 32,
88
+ "SolicitudFormulacionImputacion": 33,
89
+ "SolicitudLibertadVencimientoTerminos": 34,
90
+ "SolicitudOrdenCaptura": 35,
91
+ "SolicitudProrrogaBusquedaSelectivaBaseDatos": 36,
92
+ "SolicitudProrrogaOrdenCaptura": 37,
93
+ "SolicitudRevocatoriaMedidaAseguramiento": 38,
94
+ "SolicitudSustitucionMedidaAseguramiento": 39
95
+ },
96
+ "max_position_embeddings": 512,
97
+ "model_type": "distilbert",
98
+ "n_heads": 12,
99
+ "n_layers": 6,
100
+ "pad_token_id": 0,
101
+ "problem_type": "single_label_classification",
102
+ "qa_dropout": 0.1,
103
+ "seq_classif_dropout": 0.2,
104
+ "sinusoidal_pos_embds": false,
105
+ "tie_weights_": true,
106
+ "torch_dtype": "float32",
107
+ "transformers_version": "4.44.2",
108
+ "vocab_size": 30522
109
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95988a56d4d1ac218e38b000f70f40e25cab9b2ece1166bb8bae249dcb58f4c2
3
+ size 267949464
runs/Sep26_15-29-03_00ef1955dd4b/events.out.tfevents.1727364544.00ef1955dd4b.639.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a1cdbd7a4792adf9b247eff416885664b82cceead22f9a562326790de5e906
3
+ size 22787
runs/Sep26_15-29-03_00ef1955dd4b/events.out.tfevents.1727369255.00ef1955dd4b.639.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48fd90cbcbe82d6b733147c5440beecb33234afda6ef387cc46c9c9b741f754
3
+ size 1275
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 7423201443840000.0,
4
+ "train_loss": 1.0203040662493024,
5
+ "train_runtime": 4378.397,
6
+ "train_samples_per_second": 12.79,
7
+ "train_steps_per_second": 1.599
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "step": 700,
14
+ "train@en_accuracy@en": 0.34375,
15
+ "train@en_f1@en": 0.26433920874315414,
16
+ "train@en_loss": 2.507004976272583,
17
+ "train@en_loss@en": 2.507004976272583,
18
+ "train@en_precision@en": 0.31280481843195657,
19
+ "train@en_recall@en": 0.3408727980841251,
20
+ "train@en_runtime": 94.9519,
21
+ "train@en_samples_per_second": 58.977,
22
+ "train@en_steps_per_second": 7.372
23
+ },
24
+ {
25
+ "epoch": 1.0,
26
+ "grad_norm": 6.358036518096924,
27
+ "learning_rate": 9e-06,
28
+ "loss": 3.1352,
29
+ "step": 700
30
+ },
31
+ {
32
+ "epoch": 1.0,
33
+ "eval_accuracy@en": 0.3279166666666667,
34
+ "eval_f1@en": 0.2574237949125209,
35
+ "eval_loss": 2.5256826877593994,
36
+ "eval_loss@en": 2.5256826877593994,
37
+ "eval_precision@en": 0.32413309895477144,
38
+ "eval_recall@en": 0.33497029506802606,
39
+ "eval_runtime": 40.6414,
40
+ "eval_samples_per_second": 59.053,
41
+ "eval_steps_per_second": 7.382,
42
+ "step": 700
43
+ },
44
+ {
45
+ "epoch": 2.0,
46
+ "step": 1400,
47
+ "train@en_accuracy@en": 0.62375,
48
+ "train@en_f1@en": 0.5742986370992385,
49
+ "train@en_loss": 1.6666306257247925,
50
+ "train@en_loss@en": 1.6666306257247925,
51
+ "train@en_precision@en": 0.702336197256759,
52
+ "train@en_recall@en": 0.62328079736136,
53
+ "train@en_runtime": 94.9224,
54
+ "train@en_samples_per_second": 58.996,
55
+ "train@en_steps_per_second": 7.374
56
+ },
57
+ {
58
+ "epoch": 2.0,
59
+ "grad_norm": 12.081613540649414,
60
+ "learning_rate": 8.000000000000001e-06,
61
+ "loss": 2.1929,
62
+ "step": 1400
63
+ },
64
+ {
65
+ "epoch": 2.0,
66
+ "eval_accuracy@en": 0.6120833333333333,
67
+ "eval_f1@en": 0.5596697475213486,
68
+ "eval_loss": 1.6829949617385864,
69
+ "eval_loss@en": 1.6829948425292969,
70
+ "eval_precision@en": 0.6589140555735225,
71
+ "eval_recall@en": 0.6124881504058503,
72
+ "eval_runtime": 40.955,
73
+ "eval_samples_per_second": 58.601,
74
+ "eval_steps_per_second": 7.325,
75
+ "step": 1400
76
+ },
77
+ {
78
+ "epoch": 3.0,
79
+ "step": 2100,
80
+ "train@en_accuracy@en": 0.8026785714285715,
81
+ "train@en_f1@en": 0.787258335698847,
82
+ "train@en_loss": 0.9801780581474304,
83
+ "train@en_loss@en": 0.9801781177520752,
84
+ "train@en_precision@en": 0.8144774919753994,
85
+ "train@en_recall@en": 0.8032770135992633,
86
+ "train@en_runtime": 94.6841,
87
+ "train@en_samples_per_second": 59.144,
88
+ "train@en_steps_per_second": 7.393
89
+ },
90
+ {
91
+ "epoch": 3.0,
92
+ "grad_norm": 7.4003586769104,
93
+ "learning_rate": 7e-06,
94
+ "loss": 1.4355,
95
+ "step": 2100
96
+ },
97
+ {
98
+ "epoch": 3.0,
99
+ "eval_accuracy@en": 0.7929166666666667,
100
+ "eval_f1@en": 0.7750444717423886,
101
+ "eval_loss": 1.0143293142318726,
102
+ "eval_loss@en": 1.0143293142318726,
103
+ "eval_precision@en": 0.7990333939296972,
104
+ "eval_recall@en": 0.7914333335575858,
105
+ "eval_runtime": 40.6569,
106
+ "eval_samples_per_second": 59.031,
107
+ "eval_steps_per_second": 7.379,
108
+ "step": 2100
109
+ },
110
+ {
111
+ "epoch": 4.0,
112
+ "step": 2800,
113
+ "train@en_accuracy@en": 0.8451785714285714,
114
+ "train@en_f1@en": 0.8293494910105388,
115
+ "train@en_loss": 0.6264702677726746,
116
+ "train@en_loss@en": 0.6264702677726746,
117
+ "train@en_precision@en": 0.8630517375855116,
118
+ "train@en_recall@en": 0.8450169861202415,
119
+ "train@en_runtime": 95.0025,
120
+ "train@en_samples_per_second": 58.946,
121
+ "train@en_steps_per_second": 7.368
122
+ },
123
+ {
124
+ "epoch": 4.0,
125
+ "grad_norm": 17.754791259765625,
126
+ "learning_rate": 6e-06,
127
+ "loss": 0.9043,
128
+ "step": 2800
129
+ },
130
+ {
131
+ "epoch": 4.0,
132
+ "eval_accuracy@en": 0.8241666666666667,
133
+ "eval_f1@en": 0.8062219760834642,
134
+ "eval_loss": 0.6990054249763489,
135
+ "eval_loss@en": 0.6990054249763489,
136
+ "eval_precision@en": 0.8239392997219726,
137
+ "eval_recall@en": 0.8248465837554612,
138
+ "eval_runtime": 40.6254,
139
+ "eval_samples_per_second": 59.076,
140
+ "eval_steps_per_second": 7.385,
141
+ "step": 2800
142
+ },
143
+ {
144
+ "epoch": 5.0,
145
+ "step": 3500,
146
+ "train@en_accuracy@en": 0.9032142857142857,
147
+ "train@en_f1@en": 0.9027244042054093,
148
+ "train@en_loss": 0.4525674879550934,
149
+ "train@en_loss@en": 0.4525674879550934,
150
+ "train@en_precision@en": 0.9070092329699971,
151
+ "train@en_recall@en": 0.9029663965407628,
152
+ "train@en_runtime": 95.2629,
153
+ "train@en_samples_per_second": 58.785,
154
+ "train@en_steps_per_second": 7.348
155
+ },
156
+ {
157
+ "epoch": 5.0,
158
+ "grad_norm": 10.363593101501465,
159
+ "learning_rate": 5e-06,
160
+ "loss": 0.6269,
161
+ "step": 3500
162
+ },
163
+ {
164
+ "epoch": 5.0,
165
+ "eval_accuracy@en": 0.8795833333333334,
166
+ "eval_f1@en": 0.8782906102037042,
167
+ "eval_loss": 0.544560968875885,
168
+ "eval_loss@en": 0.5445610284805298,
169
+ "eval_precision@en": 0.884533616431302,
170
+ "eval_recall@en": 0.8802499769187758,
171
+ "eval_runtime": 40.8955,
172
+ "eval_samples_per_second": 58.686,
173
+ "eval_steps_per_second": 7.336,
174
+ "step": 3500
175
+ },
176
+ {
177
+ "epoch": 6.0,
178
+ "step": 4200,
179
+ "train@en_accuracy@en": 0.9125,
180
+ "train@en_f1@en": 0.9121300237302143,
181
+ "train@en_loss": 0.3839361071586609,
182
+ "train@en_loss@en": 0.3839361071586609,
183
+ "train@en_precision@en": 0.9170749755243058,
184
+ "train@en_recall@en": 0.9124407943873611,
185
+ "train@en_runtime": 95.1975,
186
+ "train@en_samples_per_second": 58.825,
187
+ "train@en_steps_per_second": 7.353
188
+ },
189
+ {
190
+ "epoch": 6.0,
191
+ "grad_norm": 1.9296058416366577,
192
+ "learning_rate": 4.000000000000001e-06,
193
+ "loss": 0.4852,
194
+ "step": 4200
195
+ },
196
+ {
197
+ "epoch": 6.0,
198
+ "eval_accuracy@en": 0.88625,
199
+ "eval_f1@en": 0.8846960621547091,
200
+ "eval_loss": 0.4860028028488159,
201
+ "eval_loss@en": 0.48600292205810547,
202
+ "eval_precision@en": 0.8900593967407658,
203
+ "eval_recall@en": 0.8861556208357062,
204
+ "eval_runtime": 40.9267,
205
+ "eval_samples_per_second": 58.641,
206
+ "eval_steps_per_second": 7.33,
207
+ "step": 4200
208
+ },
209
+ {
210
+ "epoch": 7.0,
211
+ "step": 4900,
212
+ "train@en_accuracy@en": 0.9217857142857143,
213
+ "train@en_f1@en": 0.9214453335435421,
214
+ "train@en_loss": 0.33172789216041565,
215
+ "train@en_loss@en": 0.33172792196273804,
216
+ "train@en_precision@en": 0.9249516156522037,
217
+ "train@en_recall@en": 0.9217162336632556,
218
+ "train@en_runtime": 94.8919,
219
+ "train@en_samples_per_second": 59.014,
220
+ "train@en_steps_per_second": 7.377
221
+ },
222
+ {
223
+ "epoch": 7.0,
224
+ "grad_norm": 5.603872299194336,
225
+ "learning_rate": 3e-06,
226
+ "loss": 0.4124,
227
+ "step": 4900
228
+ },
229
+ {
230
+ "epoch": 7.0,
231
+ "eval_accuracy@en": 0.8891666666666667,
232
+ "eval_f1@en": 0.8875957586953916,
233
+ "eval_loss": 0.4537738561630249,
234
+ "eval_loss@en": 0.4537737965583801,
235
+ "eval_precision@en": 0.8924955219230164,
236
+ "eval_recall@en": 0.8895945067696548,
237
+ "eval_runtime": 40.7161,
238
+ "eval_samples_per_second": 58.945,
239
+ "eval_steps_per_second": 7.368,
240
+ "step": 4900
241
+ },
242
+ {
243
+ "epoch": 8.0,
244
+ "step": 5600,
245
+ "train@en_accuracy@en": 0.9269642857142857,
246
+ "train@en_f1@en": 0.9268275154406451,
247
+ "train@en_loss": 0.3015599846839905,
248
+ "train@en_loss@en": 0.3015599846839905,
249
+ "train@en_precision@en": 0.929294504097796,
250
+ "train@en_recall@en": 0.9269746361541606,
251
+ "train@en_runtime": 94.8999,
252
+ "train@en_samples_per_second": 59.01,
253
+ "train@en_steps_per_second": 7.376
254
+ },
255
+ {
256
+ "epoch": 8.0,
257
+ "grad_norm": 14.972261428833008,
258
+ "learning_rate": 2.0000000000000003e-06,
259
+ "loss": 0.3602,
260
+ "step": 5600
261
+ },
262
+ {
263
+ "epoch": 8.0,
264
+ "eval_accuracy@en": 0.89,
265
+ "eval_f1@en": 0.8886400377574052,
266
+ "eval_loss": 0.4391521215438843,
267
+ "eval_loss@en": 0.4391521215438843,
268
+ "eval_precision@en": 0.8924797214528745,
269
+ "eval_recall@en": 0.8905594135731242,
270
+ "eval_runtime": 40.8791,
271
+ "eval_samples_per_second": 58.71,
272
+ "eval_steps_per_second": 7.339,
273
+ "step": 5600
274
+ },
275
+ {
276
+ "epoch": 9.0,
277
+ "step": 6300,
278
+ "train@en_accuracy@en": 0.9308928571428572,
279
+ "train@en_f1@en": 0.9307655706182029,
280
+ "train@en_loss": 0.2896050214767456,
281
+ "train@en_loss@en": 0.2896049916744232,
282
+ "train@en_precision@en": 0.9335190085867936,
283
+ "train@en_recall@en": 0.9309233485086119,
284
+ "train@en_runtime": 94.7133,
285
+ "train@en_samples_per_second": 59.126,
286
+ "train@en_steps_per_second": 7.391
287
+ },
288
+ {
289
+ "epoch": 9.0,
290
+ "grad_norm": 18.07584571838379,
291
+ "learning_rate": 1.0000000000000002e-06,
292
+ "loss": 0.3355,
293
+ "step": 6300
294
+ },
295
+ {
296
+ "epoch": 9.0,
297
+ "eval_accuracy@en": 0.89125,
298
+ "eval_f1@en": 0.8898241490623645,
299
+ "eval_loss": 0.43729308247566223,
300
+ "eval_loss@en": 0.43729308247566223,
301
+ "eval_precision@en": 0.8934240261027814,
302
+ "eval_recall@en": 0.891901632779242,
303
+ "eval_runtime": 40.7209,
304
+ "eval_samples_per_second": 58.938,
305
+ "eval_steps_per_second": 7.367,
306
+ "step": 6300
307
+ },
308
+ {
309
+ "epoch": 10.0,
310
+ "step": 7000,
311
+ "train@en_accuracy@en": 0.9308928571428572,
312
+ "train@en_f1@en": 0.930857547069914,
313
+ "train@en_loss": 0.28553587198257446,
314
+ "train@en_loss@en": 0.28553587198257446,
315
+ "train@en_precision@en": 0.9332947682664481,
316
+ "train@en_recall@en": 0.9309689846137742,
317
+ "train@en_runtime": 95.5143,
318
+ "train@en_samples_per_second": 58.63,
319
+ "train@en_steps_per_second": 7.329
320
+ },
321
+ {
322
+ "epoch": 10.0,
323
+ "grad_norm": 8.576262474060059,
324
+ "learning_rate": 0.0,
325
+ "loss": 0.3147,
326
+ "step": 7000
327
+ },
328
+ {
329
+ "epoch": 10.0,
330
+ "eval_accuracy@en": 0.8945833333333333,
331
+ "eval_f1@en": 0.8931269722333681,
332
+ "eval_loss": 0.4353380799293518,
333
+ "eval_loss@en": 0.4353380799293518,
334
+ "eval_precision@en": 0.8964579374461799,
335
+ "eval_recall@en": 0.8951529655905859,
336
+ "eval_runtime": 40.9416,
337
+ "eval_samples_per_second": 58.62,
338
+ "eval_steps_per_second": 7.328,
339
+ "step": 7000
340
+ },
341
+ {
342
+ "epoch": 10.0,
343
+ "step": 7000,
344
+ "total_flos": 7423201443840000.0,
345
+ "train_loss": 1.0203040662493024,
346
+ "train_runtime": 4378.397,
347
+ "train_samples_per_second": 12.79,
348
+ "train_steps_per_second": 1.599
349
+ },
350
+ {
351
+ "epoch": 10.0,
352
+ "step": 7000,
353
+ "train_en_accuracy@en": 0.9308928571428572,
354
+ "train_en_f1@en": 0.930857547069914,
355
+ "train_en_loss": 0.28553587198257446,
356
+ "train_en_loss@en": 0.28553587198257446,
357
+ "train_en_precision@en": 0.9332947682664481,
358
+ "train_en_recall@en": 0.9309689846137742,
359
+ "train_en_runtime": 96.5413,
360
+ "train_en_samples_per_second": 58.006,
361
+ "train_en_steps_per_second": 7.251
362
+ },
363
+ {
364
+ "epoch": 10.0,
365
+ "step": 7000,
366
+ "test_en_accuracy@en": 0.8945833333333333,
367
+ "test_en_f1@en": 0.8931269722333681,
368
+ "test_en_loss": 0.4353380799293518,
369
+ "test_en_loss@en": 0.4353380799293518,
370
+ "test_en_precision@en": 0.8964579374461799,
371
+ "test_en_recall@en": 0.8951529655905859,
372
+ "test_en_runtime": 41.8459,
373
+ "test_en_samples_per_second": 57.353,
374
+ "test_en_steps_per_second": 7.169
375
+ }
376
+ ],
377
+ "logging_steps": 500,
378
+ "max_steps": 7000,
379
+ "num_input_tokens_seen": 0,
380
+ "num_train_epochs": 10,
381
+ "save_steps": 500,
382
+ "stateful_callbacks": {
383
+ "TrainerControl": {
384
+ "args": {
385
+ "should_epoch_stop": false,
386
+ "should_evaluate": false,
387
+ "should_log": false,
388
+ "should_save": true,
389
+ "should_training_stop": true
390
+ },
391
+ "attributes": {}
392
+ }
393
+ },
394
+ "total_flos": 7423201443840000.0,
395
+ "train_batch_size": 8,
396
+ "trial_name": null,
397
+ "trial_params": null
398
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7342b6dddf9e6da6bfe04ec59898b109df0dc8b0d2c3a5c096e1656de97aeae4
3
+ size 5176