Dc-4nderson commited on
Commit
b81a2cb
·
verified ·
1 Parent(s): 71a2b91

Upload trained model folder

Browse files
checkpoint-540/config.json CHANGED
@@ -30,6 +30,6 @@
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.44.2",
33
- "use_cache": true,
34
  "vocab_size": 32101
35
  }
 
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.44.2",
33
+ "use_cache": false,
34
  "vocab_size": 32101
35
  }
checkpoint-540/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31fef6d504293d684f1a4d4452f0d3682417cf43504ea8e8824df977c64382b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbbdd93d08e8e26682bdef2370ccf671b467db0033ab57021b4569a5128240ec
3
  size 14645
checkpoint-540/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
checkpoint-540/trainer_state.json CHANGED
@@ -2,45 +2,132 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.966850828729282,
5
- "eval_steps": 500,
6
  "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.1049723756906078,
13
  "grad_norm": NaN,
14
  "learning_rate": 0.0,
15
- "loss": 291.9891,
16
  "step": 100
17
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {
19
  "epoch": 2.2099447513812156,
20
  "grad_norm": NaN,
21
  "learning_rate": 0.0,
22
- "loss": 0.3379,
23
  "step": 200
24
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {
26
  "epoch": 3.314917127071823,
27
  "grad_norm": NaN,
28
  "learning_rate": 0.0,
29
- "loss": 451369.48,
30
  "step": 300
31
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  {
33
  "epoch": 4.419889502762431,
34
  "grad_norm": NaN,
35
  "learning_rate": 0.0,
36
- "loss": 0.6954,
37
  "step": 400
38
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "epoch": 5.524861878453039,
41
  "grad_norm": NaN,
42
  "learning_rate": 0.0,
43
- "loss": 31.9152,
44
  "step": 500
45
  },
46
  {
@@ -51,17 +138,17 @@
51
  "eval_rouge2": 0.23593705494621237,
52
  "eval_rougeL": 0.3235722263776485,
53
  "eval_rougeLsum": 0.3191904629985445,
54
- "eval_runtime": 9.3726,
55
- "eval_samples_per_second": 2.134,
56
- "eval_steps_per_second": 1.067,
57
  "step": 500
58
  }
59
  ],
60
- "logging_steps": 100,
61
  "max_steps": 540,
62
  "num_input_tokens_seen": 0,
63
  "num_train_epochs": 6,
64
- "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
67
  "args": {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.966850828729282,
5
+ "eval_steps": 100,
6
  "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.5524861878453039,
13
+ "grad_norm": NaN,
14
+ "learning_rate": 0.0,
15
+ "loss": 38.3801,
16
+ "step": 50
17
+ },
18
  {
19
  "epoch": 1.1049723756906078,
20
  "grad_norm": NaN,
21
  "learning_rate": 0.0,
22
+ "loss": 0.065,
23
  "step": 100
24
  },
25
+ {
26
+ "epoch": 1.1049723756906078,
27
+ "eval_chunk_count_mae": 0.0,
28
+ "eval_loss": NaN,
29
+ "eval_rouge1": 0.3467557136477354,
30
+ "eval_rouge2": 0.23593705494621237,
31
+ "eval_rougeL": 0.3235722263776485,
32
+ "eval_rougeLsum": 0.3191904629985445,
33
+ "eval_runtime": 9.106,
34
+ "eval_samples_per_second": 2.196,
35
+ "eval_steps_per_second": 1.098,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 1.6574585635359116,
40
+ "grad_norm": NaN,
41
+ "learning_rate": 0.0,
42
+ "loss": 0.0337,
43
+ "step": 150
44
+ },
45
  {
46
  "epoch": 2.2099447513812156,
47
  "grad_norm": NaN,
48
  "learning_rate": 0.0,
49
+ "loss": 3.5965,
50
  "step": 200
51
  },
52
+ {
53
+ "epoch": 2.2099447513812156,
54
+ "eval_chunk_count_mae": 0.0,
55
+ "eval_loss": NaN,
56
+ "eval_rouge1": 0.3467557136477354,
57
+ "eval_rouge2": 0.23593705494621237,
58
+ "eval_rougeL": 0.3235722263776485,
59
+ "eval_rougeLsum": 0.3191904629985445,
60
+ "eval_runtime": 9.0115,
61
+ "eval_samples_per_second": 2.219,
62
+ "eval_steps_per_second": 1.11,
63
+ "step": 200
64
+ },
65
+ {
66
+ "epoch": 2.7624309392265194,
67
+ "grad_norm": NaN,
68
+ "learning_rate": 0.0,
69
+ "loss": 58689.13,
70
+ "step": 250
71
+ },
72
  {
73
  "epoch": 3.314917127071823,
74
  "grad_norm": NaN,
75
  "learning_rate": 0.0,
76
+ "loss": 0.0,
77
  "step": 300
78
  },
79
+ {
80
+ "epoch": 3.314917127071823,
81
+ "eval_chunk_count_mae": 0.0,
82
+ "eval_loss": NaN,
83
+ "eval_rouge1": 0.3467557136477354,
84
+ "eval_rouge2": 0.23593705494621237,
85
+ "eval_rougeL": 0.3235722263776485,
86
+ "eval_rougeLsum": 0.3191904629985445,
87
+ "eval_runtime": 9.0259,
88
+ "eval_samples_per_second": 2.216,
89
+ "eval_steps_per_second": 1.108,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 3.867403314917127,
94
+ "grad_norm": NaN,
95
+ "learning_rate": 0.0,
96
+ "loss": 0.0674,
97
+ "step": 350
98
+ },
99
  {
100
  "epoch": 4.419889502762431,
101
  "grad_norm": NaN,
102
  "learning_rate": 0.0,
103
+ "loss": 31.251,
104
  "step": 400
105
  },
106
+ {
107
+ "epoch": 4.419889502762431,
108
+ "eval_chunk_count_mae": 0.0,
109
+ "eval_loss": NaN,
110
+ "eval_rouge1": 0.3467557136477354,
111
+ "eval_rouge2": 0.23593705494621237,
112
+ "eval_rougeL": 0.3235722263776485,
113
+ "eval_rougeLsum": 0.3191904629985445,
114
+ "eval_runtime": 9.1266,
115
+ "eval_samples_per_second": 2.191,
116
+ "eval_steps_per_second": 1.096,
117
+ "step": 400
118
+ },
119
+ {
120
+ "epoch": 4.972375690607735,
121
+ "grad_norm": NaN,
122
+ "learning_rate": 0.0,
123
+ "loss": 4.2385,
124
+ "step": 450
125
+ },
126
  {
127
  "epoch": 5.524861878453039,
128
  "grad_norm": NaN,
129
  "learning_rate": 0.0,
130
+ "loss": 0.4142,
131
  "step": 500
132
  },
133
  {
 
138
  "eval_rouge2": 0.23593705494621237,
139
  "eval_rougeL": 0.3235722263776485,
140
  "eval_rougeLsum": 0.3191904629985445,
141
+ "eval_runtime": 9.1206,
142
+ "eval_samples_per_second": 2.193,
143
+ "eval_steps_per_second": 1.096,
144
  "step": 500
145
  }
146
  ],
147
+ "logging_steps": 50,
148
  "max_steps": 540,
149
  "num_input_tokens_seen": 0,
150
  "num_train_epochs": 6,
151
+ "save_steps": 200,
152
  "stateful_callbacks": {
153
  "TrainerControl": {
154
  "args": {
checkpoint-540/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4cb08af4e70f4088787ea08084a28bd6e3d739bf04d1ec3840e727e8816d53
3
  size 5713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0dffe3825cee4e1a0c6a604e07bdffebfe62b0c4a711a7b5489a16a25a87925
3
  size 5713