aleegis commited on
Commit
aba550f
·
verified ·
1 Parent(s): 0bc518a

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf1163f6e3f17e0afae27b41f128311823b18b232ab34e48d351e3c6efbd605
3
  size 30322120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909b63c49b7570ef99f6145c09acdbd091750b1abe4329769fa39629b511a276
3
  size 30322120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dabac6c19c4640fdbdd68d01e82d1d4651f4aaceff602b247d3d7a626bc122e0
3
  size 60842323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fa3ddf5703d391377244571362f8573737a237dbb290f925985340338b2704
3
  size 60842323
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a74bde7fc5a99173ead2a0a7930d1e0ca38a7cb1faf2ea3fe96cbaa2dc77e978
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e77dddb39f2a327650675c473adcc1fabc7e5383e430c4c085d90ef0d9b86c12
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9478c699acf79b7a3786842f9095d594280dc3068c0cd81ef677db192f9b8265
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3270784786ca0e3dae884ad8b3c97a69be8358591ba002cc2b1b5d7827721bb5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01834862385321101,
6
  "eval_steps": 500,
7
- "global_step": 18,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -584,11 +584,75 @@
584
  "rewards/reward_low_syllables_per_word/mean": -0.8958333134651184,
585
  "rewards/reward_low_syllables_per_word/std": 1.4445780515670776,
586
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  }
588
  ],
589
  "logging_steps": 1,
590
  "max_steps": 20,
591
- "num_input_tokens_seen": 2784,
592
  "num_train_epochs": 1,
593
  "save_steps": 2,
594
  "stateful_callbacks": {
@@ -598,7 +662,7 @@
598
  "should_evaluate": false,
599
  "should_log": false,
600
  "should_save": true,
601
- "should_training_stop": false
602
  },
603
  "attributes": {}
604
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.020387359836901122,
6
  "eval_steps": 500,
7
+ "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
584
  "rewards/reward_low_syllables_per_word/mean": -0.8958333134651184,
585
  "rewards/reward_low_syllables_per_word/std": 1.4445780515670776,
586
  "step": 18
587
+ },
588
+ {
589
+ "clip_ratio/high_max": 0.0,
590
+ "clip_ratio/high_mean": 0.0,
591
+ "clip_ratio/low_mean": 0.0,
592
+ "clip_ratio/low_min": 0.0,
593
+ "clip_ratio/region_mean": 0.0,
594
+ "completions/clipped_ratio": 0.0,
595
+ "completions/max_length": 19.0,
596
+ "completions/max_terminated_length": 19.0,
597
+ "completions/mean_length": 5.625,
598
+ "completions/mean_terminated_length": 5.625,
599
+ "completions/min_length": 1.0,
600
+ "completions/min_terminated_length": 1.0,
601
+ "epoch": 0.019367991845056064,
602
+ "frac_reward_zero_std": 0.0,
603
+ "grad_norm": 7.189633846282959,
604
+ "kl": 0.44685283303260803,
605
+ "learning_rate": 3.0153689607045845e-06,
606
+ "loss": -0.396,
607
+ "num_tokens": 2885.0,
608
+ "reward": 157.81817626953125,
609
+ "reward_std": 141.150634765625,
610
+ "rewards/reward_high_identity_attack_score/mean": 0.00024566290085203946,
611
+ "rewards/reward_high_identity_attack_score/std": 0.00023322636843658984,
612
+ "rewards/reward_high_readability/mean": 35.75062561035156,
613
+ "rewards/reward_high_readability/std": 31.655229568481445,
614
+ "rewards/reward_low_identity_attack_score/mean": 0.9997543096542358,
615
+ "rewards/reward_low_identity_attack_score/std": 0.0002332278818357736,
616
+ "rewards/reward_low_syllables_per_word/mean": -1.073958396911621,
617
+ "rewards/reward_low_syllables_per_word/std": 0.900897204875946,
618
+ "step": 19
619
+ },
620
+ {
621
+ "clip_ratio/high_max": 0.0,
622
+ "clip_ratio/high_mean": 0.0,
623
+ "clip_ratio/low_mean": 0.0,
624
+ "clip_ratio/low_min": 0.0,
625
+ "clip_ratio/region_mean": 0.0,
626
+ "completions/clipped_ratio": 0.0,
627
+ "completions/max_length": 12.0,
628
+ "completions/max_terminated_length": 12.0,
629
+ "completions/mean_length": 5.0,
630
+ "completions/mean_terminated_length": 5.0,
631
+ "completions/min_length": 1.0,
632
+ "completions/min_terminated_length": 1.0,
633
+ "epoch": 0.020387359836901122,
634
+ "frac_reward_zero_std": 0.5,
635
+ "grad_norm": 7.994085311889648,
636
+ "kl": 0.05816831439733505,
637
+ "learning_rate": 7.596123493895991e-07,
638
+ "loss": 0.1539,
639
+ "num_tokens": 3001.0,
640
+ "reward": 241.749755859375,
641
+ "reward_std": 72.85967254638672,
642
+ "rewards/reward_high_identity_attack_score/mean": 0.00015907795750536025,
643
+ "rewards/reward_high_identity_attack_score/std": 2.5256886146962643e-05,
644
+ "rewards/reward_high_readability/mean": 52.671875,
645
+ "rewards/reward_high_readability/std": 59.725189208984375,
646
+ "rewards/reward_low_identity_attack_score/mean": 0.9998409152030945,
647
+ "rewards/reward_low_identity_attack_score/std": 2.527291144360788e-05,
648
+ "rewards/reward_low_syllables_per_word/mean": -0.5833333134651184,
649
+ "rewards/reward_low_syllables_per_word/std": 0.6606874465942383,
650
+ "step": 20
651
  }
652
  ],
653
  "logging_steps": 1,
654
  "max_steps": 20,
655
+ "num_input_tokens_seen": 3001,
656
  "num_train_epochs": 1,
657
  "save_steps": 2,
658
  "stateful_callbacks": {
 
662
  "should_evaluate": false,
663
  "should_log": false,
664
  "should_save": true,
665
+ "should_training_stop": true
666
  },
667
  "attributes": {}
668
  }