Training in progress, step 69, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +172 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d62ae375b3d886ffba3d69638b096dc77f81cc0a4c0fd5ea40b88fbf6d33b20d
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:54b580ddbf09d3d9ae2addf973352cb11707da67742b7b282fde53fde7fbe740
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82741442944130d3aeea9f4f9e24b1ec333236448da46572d8457dcd974264a2
 size 90365754

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf2da229f251b5f291f941c33cc14eed837857befbbc6c2ca7bccca1a3f0efe5
 size 90365754

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60e49d22bc4fd77c6162c14ee158f692a103103bc8c1d8f029e487221cbb7fd4
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:265ac61368277eb3ace99e1735a0a601a5a63864d2a441bc9b97a9c99e0f4c89
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:962c7c7a348e3bebdc1847dd4552e16a844922051e90382917c120dd6021a51a
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:c79d137864c2e1cdce26d7c2feb8d8fbfd9200a245e19ae914aa4b88e97a6687
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de6fc2d70040a6cdfe81cf5772990d667054ae9a80d1043cbf6bd19eea218f23
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a045dc546280f3d42f2dc4c02c23ea3301726496c8a8623025df0283d2e3d076
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5118219749652295,
   "eval_steps": 23,
-  "global_step": 46,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -353,6 +353,175 @@
       "eval_samples_per_second": 17.662,
       "eval_steps_per_second": 4.416,
       "step": 46
     }
   ],
   "logging_steps": 1,
@@ -372,7 +541,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.561219055996109e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7677329624478443,
   "eval_steps": 23,
+  "global_step": 69,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.662,
       "eval_steps_per_second": 4.416,
       "step": 46
+    },
+    {
+      "epoch": 0.5229485396383866,
+      "grad_norm": 0.2565418779850006,
+      "learning_rate": 0.000188,
+      "loss": 0.8667,
+      "step": 47
+    },
+    {
+      "epoch": 0.5340751043115438,
+      "grad_norm": 0.25146299600601196,
+      "learning_rate": 0.000192,
+      "loss": 0.7796,
+      "step": 48
+    },
+    {
+      "epoch": 0.545201668984701,
+      "grad_norm": 0.23373191058635712,
+      "learning_rate": 0.000196,
+      "loss": 0.7679,
+      "step": 49
+    },
+    {
+      "epoch": 0.5563282336578581,
+      "grad_norm": 0.26904454827308655,
+      "learning_rate": 0.0002,
+      "loss": 0.8682,
+      "step": 50
+    },
+    {
+      "epoch": 0.5674547983310153,
+      "grad_norm": 0.26204514503479004,
+      "learning_rate": 0.00019967573081342103,
+      "loss": 0.8317,
+      "step": 51
+    },
+    {
+      "epoch": 0.5785813630041725,
+      "grad_norm": 0.2280825525522232,
+      "learning_rate": 0.00019870502626379127,
+      "loss": 0.6542,
+      "step": 52
+    },
+    {
+      "epoch": 0.5897079276773296,
+      "grad_norm": 0.2595359683036804,
+      "learning_rate": 0.0001970941817426052,
+      "loss": 0.8229,
+      "step": 53
+    },
+    {
+      "epoch": 0.6008344923504868,
+      "grad_norm": 0.28588512539863586,
+      "learning_rate": 0.00019485364419471454,
+      "loss": 0.8858,
+      "step": 54
+    },
+    {
+      "epoch": 0.6119610570236439,
+      "grad_norm": 0.269365131855011,
+      "learning_rate": 0.00019199794436588243,
+      "loss": 0.8765,
+      "step": 55
+    },
+    {
+      "epoch": 0.6230876216968011,
+      "grad_norm": 0.2782859206199646,
+      "learning_rate": 0.000188545602565321,
+      "loss": 0.8199,
+      "step": 56
+    },
+    {
+      "epoch": 0.6342141863699583,
+      "grad_norm": 0.27628636360168457,
+      "learning_rate": 0.0001845190085543795,
+      "loss": 0.7886,
+      "step": 57
+    },
+    {
+      "epoch": 0.6453407510431154,
+      "grad_norm": 0.3029504716396332,
+      "learning_rate": 0.00017994427634035015,
+      "loss": 0.8383,
+      "step": 58
+    },
+    {
+      "epoch": 0.6564673157162726,
+      "grad_norm": 0.3166523277759552,
+      "learning_rate": 0.00017485107481711012,
+      "loss": 0.8833,
+      "step": 59
+    },
+    {
+      "epoch": 0.6675938803894298,
+      "grad_norm": 0.3032624423503876,
+      "learning_rate": 0.00016927243535095997,
+      "loss": 0.853,
+      "step": 60
+    },
+    {
+      "epoch": 0.6787204450625869,
+      "grad_norm": 0.3140091300010681,
+      "learning_rate": 0.00016324453755953773,
+      "loss": 0.8105,
+      "step": 61
+    },
+    {
+      "epoch": 0.6898470097357441,
+      "grad_norm": 0.34130093455314636,
+      "learning_rate": 0.00015680647467311557,
+      "loss": 0.8835,
+      "step": 62
+    },
+    {
+      "epoch": 0.7009735744089013,
+      "grad_norm": 0.34113407135009766,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 0.7985,
+      "step": 63
+    },
+    {
+      "epoch": 0.7121001390820584,
+      "grad_norm": 0.3518664538860321,
+      "learning_rate": 0.00014286925614030542,
+      "loss": 0.7976,
+      "step": 64
+    },
+    {
+      "epoch": 0.7232267037552156,
+      "grad_norm": 0.42231667041778564,
+      "learning_rate": 0.00013546048870425356,
+      "loss": 0.8612,
+      "step": 65
+    },
+    {
+      "epoch": 0.7343532684283728,
+      "grad_norm": 0.5391589999198914,
+      "learning_rate": 0.0001278217463916453,
+      "loss": 0.8911,
+      "step": 66
+    },
+    {
+      "epoch": 0.7454798331015299,
+      "grad_norm": 0.17004971206188202,
+      "learning_rate": 0.00012000256937760445,
+      "loss": 0.6497,
+      "step": 67
+    },
+    {
+      "epoch": 0.7566063977746871,
+      "grad_norm": 0.25505462288856506,
+      "learning_rate": 0.0001120536680255323,
+      "loss": 0.8027,
+      "step": 68
+    },
+    {
+      "epoch": 0.7677329624478443,
+      "grad_norm": 0.20039665699005127,
+      "learning_rate": 0.00010402659401094152,
+      "loss": 0.6715,
+      "step": 69
+    },
+    {
+      "epoch": 0.7677329624478443,
+      "eval_loss": 0.8166666626930237,
+      "eval_runtime": 8.6948,
+      "eval_samples_per_second": 17.482,
+      "eval_steps_per_second": 4.37,
+      "step": 69
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.341828583994163e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null