Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +178 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3d42a1eec1f943c1388cb61143c46218e9bd0013db67a2ec548bbe97540572f
 size 349243752

 version https://git-lfs.github.com/spec/v1
+oid sha256:98488c56f318dc2da3929b18ad2ad5a152e66efba66afbe4fc87cda337b6db57
 size 349243752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a16c1c0142e72e1f8b2431f5f66c49b1fa75ebcfbf4b63b1248a95dd4e30017f
 size 177909253

 version https://git-lfs.github.com/spec/v1
+oid sha256:057c770e031ada072d6f11d80e9d3ef37634519804b1e3934feba9bcc0ac546d
 size 177909253

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa891e248cfec1e331e706e066d2fa515d3af505fa6d0b031f66d55ead042ba5
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:442b031d01f716fa595ec83da7a5b8b396b18c106796b82715fedbff217e57d5
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa5b5d1fc0dacb794f95a8e0653d3306763b02c6b66ddfe1486572f85ef2c3a0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:24481c486beb14ce7d59d0586b23c806ec848e00bc91bbf21c23488cf27d188d
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4975124378109453,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -883,6 +883,181 @@
       "learning_rate": 8.012803577096473e-06,
       "loss": 1.3037,
       "step": 500
     }
   ],
   "logging_steps": 4,
@@ -902,7 +1077,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.773277452453806e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5970149253731343,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.012803577096473e-06,
       "loss": 1.3037,
       "step": 500
+    },
+    {
+      "epoch": 0.5014925373134328,
+      "grad_norm": 0.17158161103725433,
+      "learning_rate": 7.92168888781252e-06,
+      "loss": 1.409,
+      "step": 504
+    },
+    {
+      "epoch": 0.5054726368159204,
+      "grad_norm": 0.1840897649526596,
+      "learning_rate": 7.830051785587235e-06,
+      "loss": 1.3857,
+      "step": 508
+    },
+    {
+      "epoch": 0.5094527363184079,
+      "grad_norm": 0.16382519900798798,
+      "learning_rate": 7.737916550320155e-06,
+      "loss": 1.3431,
+      "step": 512
+    },
+    {
+      "epoch": 0.5134328358208955,
+      "grad_norm": 0.12751099467277527,
+      "learning_rate": 7.64530759389469e-06,
+      "loss": 1.3626,
+      "step": 516
+    },
+    {
+      "epoch": 0.5174129353233831,
+      "grad_norm": 0.16248376667499542,
+      "learning_rate": 7.552249453710032e-06,
+      "loss": 1.3129,
+      "step": 520
+    },
+    {
+      "epoch": 0.5213930348258706,
+      "grad_norm": 0.1406685709953308,
+      "learning_rate": 7.458766786179792e-06,
+      "loss": 1.3628,
+      "step": 524
+    },
+    {
+      "epoch": 0.5253731343283582,
+      "grad_norm": 0.13998349010944366,
+      "learning_rate": 7.364884360199107e-06,
+      "loss": 1.3887,
+      "step": 528
+    },
+    {
+      "epoch": 0.5293532338308458,
+      "grad_norm": 0.15993693470954895,
+      "learning_rate": 7.270627050581951e-06,
+      "loss": 1.3764,
+      "step": 532
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.21970954537391663,
+      "learning_rate": 7.176019831470373e-06,
+      "loss": 1.4067,
+      "step": 536
+    },
+    {
+      "epoch": 0.5373134328358209,
+      "grad_norm": 0.1592174619436264,
+      "learning_rate": 7.081087769717416e-06,
+      "loss": 1.4348,
+      "step": 540
+    },
+    {
+      "epoch": 0.5412935323383085,
+      "grad_norm": 0.1640401929616928,
+      "learning_rate": 6.985856018245494e-06,
+      "loss": 1.39,
+      "step": 544
+    },
+    {
+      "epoch": 0.545273631840796,
+      "grad_norm": 0.14530886709690094,
+      "learning_rate": 6.890349809381926e-06,
+      "loss": 1.4217,
+      "step": 548
+    },
+    {
+      "epoch": 0.5492537313432836,
+      "grad_norm": 0.14905086159706116,
+      "learning_rate": 6.7945944481734625e-06,
+      "loss": 1.3693,
+      "step": 552
+    },
+    {
+      "epoch": 0.5532338308457712,
+      "grad_norm": 0.1508338302373886,
+      "learning_rate": 6.698615305681538e-06,
+      "loss": 1.3794,
+      "step": 556
+    },
+    {
+      "epoch": 0.5572139303482587,
+      "grad_norm": 0.15846911072731018,
+      "learning_rate": 6.602437812260021e-06,
+      "loss": 1.439,
+      "step": 560
+    },
+    {
+      "epoch": 0.5611940298507463,
+      "grad_norm": 0.15680456161499023,
+      "learning_rate": 6.5060874508172626e-06,
+      "loss": 1.3706,
+      "step": 564
+    },
+    {
+      "epoch": 0.5651741293532339,
+      "grad_norm": 0.14353099465370178,
+      "learning_rate": 6.4095897500642245e-06,
+      "loss": 1.4015,
+      "step": 568
+    },
+    {
+      "epoch": 0.5691542288557214,
+      "grad_norm": 0.16101489961147308,
+      "learning_rate": 6.3129702777504585e-06,
+      "loss": 1.3364,
+      "step": 572
+    },
+    {
+      "epoch": 0.573134328358209,
+      "grad_norm": 0.13535454869270325,
+      "learning_rate": 6.216254633889758e-06,
+      "loss": 1.3294,
+      "step": 576
+    },
+    {
+      "epoch": 0.5771144278606966,
+      "grad_norm": 0.17043928802013397,
+      "learning_rate": 6.119468443977249e-06,
+      "loss": 1.4216,
+      "step": 580
+    },
+    {
+      "epoch": 0.5810945273631841,
+      "grad_norm": 0.15072950720787048,
+      "learning_rate": 6.02263735219973e-06,
+      "loss": 1.4152,
+      "step": 584
+    },
+    {
+      "epoch": 0.5850746268656717,
+      "grad_norm": 0.13807035982608795,
+      "learning_rate": 5.925787014641067e-06,
+      "loss": 1.369,
+      "step": 588
+    },
+    {
+      "epoch": 0.5890547263681593,
+      "grad_norm": 0.1548323780298233,
+      "learning_rate": 5.82894309248444e-06,
+      "loss": 1.4166,
+      "step": 592
+    },
+    {
+      "epoch": 0.5930348258706468,
+      "grad_norm": 0.16310498118400574,
+      "learning_rate": 5.732131245213214e-06,
+      "loss": 1.3644,
+      "step": 596
+    },
+    {
+      "epoch": 0.5970149253731343,
+      "grad_norm": 0.14257760345935822,
+      "learning_rate": 5.63537712381229e-06,
+      "loss": 1.3559,
+      "step": 600
     }
   ],
   "logging_steps": 4,
       "attributes": {}
     }
   },
+  "total_flos": 4.52163003620524e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null