Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +84 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:beb3038837c41f22c7331d8e0203167cd2b26cfb8d2a725b93d7a8eea26aa1e5
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:073f6bd0e44861f9bbefb443566a168c81495bced39dd1b1d2a0be23cbbe9cb5
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8b138152c8cc8a42ca5f8cd0600cf66ac1a69402919c839146f1b0aa8493476
 size 71878612

 version https://git-lfs.github.com/spec/v1
+oid sha256:689241204dabed053806b0006b9e37c3f782c5542b4a0e6383d0d2978eae10a6
 size 71878612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:122a2936afa43ed66142df0bd3dcbe036fd722232978b87a762dea6e03beb670
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a9275c62aa538dc59da4e92cab0e96aef321694e347e049b029bf91527188c8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7624744ddc571068835a96b66c68f20536621f83b9432ca68e2d5ee8eb961785
 size 1192

 version https://git-lfs.github.com/spec/v1
+oid sha256:513432b56c3d25d6cb2b5f5a8da383c67096421a340fb1793f248481156b1328
 size 1192

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.0360867977142334,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.054892273912446825,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -93,6 +93,84 @@
       "eval_samples_per_second": 19.366,
       "eval_steps_per_second": 9.683,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -116,12 +194,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3864241215897600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.9555625915527344,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.10978454782489365,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.366,
       "eval_steps_per_second": 9.683,
       "step": 100
+    },
+    {
+      "epoch": 0.06038150130369151,
+      "grad_norm": 2.6299073696136475,
+      "learning_rate": 5e-05,
+      "loss": 1.9692,
+      "step": 110
+    },
+    {
+      "epoch": 0.06587072869493618,
+      "grad_norm": 4.907375812530518,
+      "learning_rate": 5e-05,
+      "loss": 2.1752,
+      "step": 120
+    },
+    {
+      "epoch": 0.07135995608618087,
+      "grad_norm": 2.7539217472076416,
+      "learning_rate": 5e-05,
+      "loss": 1.9246,
+      "step": 130
+    },
+    {
+      "epoch": 0.07684918347742556,
+      "grad_norm": 5.6027302742004395,
+      "learning_rate": 5e-05,
+      "loss": 1.9031,
+      "step": 140
+    },
+    {
+      "epoch": 0.08233841086867023,
+      "grad_norm": 3.0815937519073486,
+      "learning_rate": 5e-05,
+      "loss": 2.0121,
+      "step": 150
+    },
+    {
+      "epoch": 0.08782763825991492,
+      "grad_norm": 3.3530800342559814,
+      "learning_rate": 5e-05,
+      "loss": 2.1147,
+      "step": 160
+    },
+    {
+      "epoch": 0.0933168656511596,
+      "grad_norm": 2.6608502864837646,
+      "learning_rate": 5e-05,
+      "loss": 2.1767,
+      "step": 170
+    },
+    {
+      "epoch": 0.09880609304240429,
+      "grad_norm": 4.09913444519043,
+      "learning_rate": 5e-05,
+      "loss": 1.931,
+      "step": 180
+    },
+    {
+      "epoch": 0.10429532043364896,
+      "grad_norm": 2.4433376789093018,
+      "learning_rate": 5e-05,
+      "loss": 2.0358,
+      "step": 190
+    },
+    {
+      "epoch": 0.10978454782489365,
+      "grad_norm": 2.8582210540771484,
+      "learning_rate": 5e-05,
+      "loss": 2.0232,
+      "step": 200
+    },
+    {
+      "epoch": 0.10978454782489365,
+      "eval_loss": 1.9555625915527344,
+      "eval_runtime": 39.601,
+      "eval_samples_per_second": 19.393,
+      "eval_steps_per_second": 9.697,
+      "step": 200
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7728482431795200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null