besimray commited on
Commit
4e07e40
·
verified ·
1 Parent(s): a6955f5

Training in progress, step 170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:480d04c6a926444def1429dc33a1fe22e39da7d221159d43c99b539437465d19
3
  size 194563400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777f88c0fb9cc4c42e43334f6d9fec0428b5b47952cada109214eb8aa3d44699
3
  size 194563400
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea4c7c222eac50c7b503e2aeddf3486cc8817aaf6f6a2db27055883f9fe0b1b1
3
  size 99235764
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c5d9407152d844f63784c447d20896bc2e57aa3bf5775eed398b50d837bf04
3
  size 99235764
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41a3f09479c8db549f49ad67575d3a33d8f3d71007e65938dea57bdd9f47be60
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb0c18b6062425824cddd605e9c9b215ab24c7c036f0e4d279ccb5c974403e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9037087ffb4f9832ca5c41a5341235731b38f400553502e7da55d0ac1d1965dd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0eb49a8f40f69ba68a14a6468b61014042911a930d1cd0d9dc3c51ec8f4713
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9662845730781555,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
- "epoch": 0.7776427703523694,
5
  "eval_steps": 10,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1263,6 +1263,84 @@
1263
  "eval_samples_per_second": 3.448,
1264
  "eval_steps_per_second": 0.699,
1265
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1266
  }
1267
  ],
1268
  "logging_steps": 1,
@@ -1277,7 +1355,7 @@
1277
  "early_stopping_threshold": 0.0
1278
  },
1279
  "attributes": {
1280
- "early_stopping_patience_counter": 2
1281
  }
1282
  },
1283
  "TrainerControl": {
@@ -1286,12 +1364,12 @@
1286
  "should_evaluate": false,
1287
  "should_log": false,
1288
  "should_save": true,
1289
- "should_training_stop": false
1290
  },
1291
  "attributes": {}
1292
  }
1293
  },
1294
- "total_flos": 2.2585189683167232e+17,
1295
  "train_batch_size": 5,
1296
  "trial_name": null,
1297
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9662845730781555,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
+ "epoch": 0.8262454434993924,
5
  "eval_steps": 10,
6
+ "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1263
  "eval_samples_per_second": 3.448,
1264
  "eval_steps_per_second": 0.699,
1265
  "step": 160
1266
+ },
1267
+ {
1268
+ "epoch": 0.7825030376670717,
1269
+ "grad_norm": 0.11883629858493805,
1270
+ "learning_rate": 0.0001887376873718548,
1271
+ "loss": 0.8172,
1272
+ "step": 161
1273
+ },
1274
+ {
1275
+ "epoch": 0.787363304981774,
1276
+ "grad_norm": 0.14962925016880035,
1277
+ "learning_rate": 0.00018859093629069058,
1278
+ "loss": 0.8627,
1279
+ "step": 162
1280
+ },
1281
+ {
1282
+ "epoch": 0.7922235722964763,
1283
+ "grad_norm": 0.17162087559700012,
1284
+ "learning_rate": 0.00018844329309978145,
1285
+ "loss": 0.825,
1286
+ "step": 163
1287
+ },
1288
+ {
1289
+ "epoch": 0.7970838396111786,
1290
+ "grad_norm": 0.14641453325748444,
1291
+ "learning_rate": 0.00018829475928589271,
1292
+ "loss": 0.8163,
1293
+ "step": 164
1294
+ },
1295
+ {
1296
+ "epoch": 0.8019441069258809,
1297
+ "grad_norm": 0.1582384705543518,
1298
+ "learning_rate": 0.00018814533634475822,
1299
+ "loss": 1.0263,
1300
+ "step": 165
1301
+ },
1302
+ {
1303
+ "epoch": 0.8068043742405833,
1304
+ "grad_norm": 0.15247194468975067,
1305
+ "learning_rate": 0.00018799502578106534,
1306
+ "loss": 0.8945,
1307
+ "step": 166
1308
+ },
1309
+ {
1310
+ "epoch": 0.8116646415552855,
1311
+ "grad_norm": 0.1615608185529709,
1312
+ "learning_rate": 0.00018784382910843976,
1313
+ "loss": 0.9895,
1314
+ "step": 167
1315
+ },
1316
+ {
1317
+ "epoch": 0.8165249088699879,
1318
+ "grad_norm": 0.1320793181657791,
1319
+ "learning_rate": 0.0001876917478494303,
1320
+ "loss": 0.8975,
1321
+ "step": 168
1322
+ },
1323
+ {
1324
+ "epoch": 0.8213851761846902,
1325
+ "grad_norm": 0.15704546868801117,
1326
+ "learning_rate": 0.00018753878353549357,
1327
+ "loss": 0.8848,
1328
+ "step": 169
1329
+ },
1330
+ {
1331
+ "epoch": 0.8262454434993924,
1332
+ "grad_norm": 0.15228189527988434,
1333
+ "learning_rate": 0.00018738493770697852,
1334
+ "loss": 1.062,
1335
+ "step": 170
1336
+ },
1337
+ {
1338
+ "epoch": 0.8262454434993924,
1339
+ "eval_loss": 0.9668231010437012,
1340
+ "eval_runtime": 62.8803,
1341
+ "eval_samples_per_second": 3.451,
1342
+ "eval_steps_per_second": 0.7,
1343
+ "step": 170
1344
  }
1345
  ],
1346
  "logging_steps": 1,
 
1355
  "early_stopping_threshold": 0.0
1356
  },
1357
  "attributes": {
1358
+ "early_stopping_patience_counter": 3
1359
  }
1360
  },
1361
  "TrainerControl": {
 
1364
  "should_evaluate": false,
1365
  "should_log": false,
1366
  "should_save": true,
1367
+ "should_training_stop": true
1368
  },
1369
  "attributes": {}
1370
  }
1371
  },
1372
+ "total_flos": 2.3994561894285312e+17,
1373
  "train_batch_size": 5,
1374
  "trial_name": null,
1375
  "trial_params": null