Training in progress, step 20, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +68 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daf1163f6e3f17e0afae27b41f128311823b18b232ab34e48d351e3c6efbd605
 size 30322120

 version https://git-lfs.github.com/spec/v1
+oid sha256:909b63c49b7570ef99f6145c09acdbd091750b1abe4329769fa39629b511a276
 size 30322120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dabac6c19c4640fdbdd68d01e82d1d4651f4aaceff602b247d3d7a626bc122e0
 size 60842323

 version https://git-lfs.github.com/spec/v1
+oid sha256:31fa3ddf5703d391377244571362f8573737a237dbb290f925985340338b2704
 size 60842323

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a74bde7fc5a99173ead2a0a7930d1e0ca38a7cb1faf2ea3fe96cbaa2dc77e978
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:e77dddb39f2a327650675c473adcc1fabc7e5383e430c4c085d90ef0d9b86c12
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9478c699acf79b7a3786842f9095d594280dc3068c0cd81ef677db192f9b8265
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:3270784786ca0e3dae884ad8b3c97a69be8358591ba002cc2b1b5d7827721bb5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01834862385321101,
   "eval_steps": 500,
-  "global_step": 18,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -584,11 +584,75 @@
       "rewards/reward_low_syllables_per_word/mean": -0.8958333134651184,
       "rewards/reward_low_syllables_per_word/std": 1.4445780515670776,
       "step": 18
     }
   ],
   "logging_steps": 1,
   "max_steps": 20,
-  "num_input_tokens_seen": 2784,
   "num_train_epochs": 1,
   "save_steps": 2,
   "stateful_callbacks": {
@@ -598,7 +662,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.020387359836901122,
   "eval_steps": 500,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "rewards/reward_low_syllables_per_word/mean": -0.8958333134651184,
       "rewards/reward_low_syllables_per_word/std": 1.4445780515670776,
       "step": 18
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 19.0,
+      "completions/max_terminated_length": 19.0,
+      "completions/mean_length": 5.625,
+      "completions/mean_terminated_length": 5.625,
+      "completions/min_length": 1.0,
+      "completions/min_terminated_length": 1.0,
+      "epoch": 0.019367991845056064,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 7.189633846282959,
+      "kl": 0.44685283303260803,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": -0.396,
+      "num_tokens": 2885.0,
+      "reward": 157.81817626953125,
+      "reward_std": 141.150634765625,
+      "rewards/reward_high_identity_attack_score/mean": 0.00024566290085203946,
+      "rewards/reward_high_identity_attack_score/std": 0.00023322636843658984,
+      "rewards/reward_high_readability/mean": 35.75062561035156,
+      "rewards/reward_high_readability/std": 31.655229568481445,
+      "rewards/reward_low_identity_attack_score/mean": 0.9997543096542358,
+      "rewards/reward_low_identity_attack_score/std": 0.0002332278818357736,
+      "rewards/reward_low_syllables_per_word/mean": -1.073958396911621,
+      "rewards/reward_low_syllables_per_word/std": 0.900897204875946,
+      "step": 19
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 12.0,
+      "completions/max_terminated_length": 12.0,
+      "completions/mean_length": 5.0,
+      "completions/mean_terminated_length": 5.0,
+      "completions/min_length": 1.0,
+      "completions/min_terminated_length": 1.0,
+      "epoch": 0.020387359836901122,
+      "frac_reward_zero_std": 0.5,
+      "grad_norm": 7.994085311889648,
+      "kl": 0.05816831439733505,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.1539,
+      "num_tokens": 3001.0,
+      "reward": 241.749755859375,
+      "reward_std": 72.85967254638672,
+      "rewards/reward_high_identity_attack_score/mean": 0.00015907795750536025,
+      "rewards/reward_high_identity_attack_score/std": 2.5256886146962643e-05,
+      "rewards/reward_high_readability/mean": 52.671875,
+      "rewards/reward_high_readability/std": 59.725189208984375,
+      "rewards/reward_low_identity_attack_score/mean": 0.9998409152030945,
+      "rewards/reward_low_identity_attack_score/std": 2.527291144360788e-05,
+      "rewards/reward_low_syllables_per_word/mean": -0.5833333134651184,
+      "rewards/reward_low_syllables_per_word/std": 0.6606874465942383,
+      "step": 20
     }
   ],
   "logging_steps": 1,
   "max_steps": 20,
+  "num_input_tokens_seen": 3001,
   "num_train_epochs": 1,
   "save_steps": 2,
   "stateful_callbacks": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }