apriasmoro commited on
Commit
0d740bf
·
verified ·
1 Parent(s): 600f99b

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3d42a1eec1f943c1388cb61143c46218e9bd0013db67a2ec548bbe97540572f
3
  size 349243752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98488c56f318dc2da3929b18ad2ad5a152e66efba66afbe4fc87cda337b6db57
3
  size 349243752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a16c1c0142e72e1f8b2431f5f66c49b1fa75ebcfbf4b63b1248a95dd4e30017f
3
  size 177909253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057c770e031ada072d6f11d80e9d3ef37634519804b1e3934feba9bcc0ac546d
3
  size 177909253
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa891e248cfec1e331e706e066d2fa515d3af505fa6d0b031f66d55ead042ba5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442b031d01f716fa595ec83da7a5b8b396b18c106796b82715fedbff217e57d5
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa5b5d1fc0dacb794f95a8e0653d3306763b02c6b66ddfe1486572f85ef2c3a0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24481c486beb14ce7d59d0586b23c806ec848e00bc91bbf21c23488cf27d188d
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4975124378109453,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -883,6 +883,181 @@
883
  "learning_rate": 8.012803577096473e-06,
884
  "loss": 1.3037,
885
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886
  }
887
  ],
888
  "logging_steps": 4,
@@ -902,7 +1077,7 @@
902
  "attributes": {}
903
  }
904
  },
905
- "total_flos": 3.773277452453806e+17,
906
  "train_batch_size": 24,
907
  "trial_name": null,
908
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5970149253731343,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
883
  "learning_rate": 8.012803577096473e-06,
884
  "loss": 1.3037,
885
  "step": 500
886
+ },
887
+ {
888
+ "epoch": 0.5014925373134328,
889
+ "grad_norm": 0.17158161103725433,
890
+ "learning_rate": 7.92168888781252e-06,
891
+ "loss": 1.409,
892
+ "step": 504
893
+ },
894
+ {
895
+ "epoch": 0.5054726368159204,
896
+ "grad_norm": 0.1840897649526596,
897
+ "learning_rate": 7.830051785587235e-06,
898
+ "loss": 1.3857,
899
+ "step": 508
900
+ },
901
+ {
902
+ "epoch": 0.5094527363184079,
903
+ "grad_norm": 0.16382519900798798,
904
+ "learning_rate": 7.737916550320155e-06,
905
+ "loss": 1.3431,
906
+ "step": 512
907
+ },
908
+ {
909
+ "epoch": 0.5134328358208955,
910
+ "grad_norm": 0.12751099467277527,
911
+ "learning_rate": 7.64530759389469e-06,
912
+ "loss": 1.3626,
913
+ "step": 516
914
+ },
915
+ {
916
+ "epoch": 0.5174129353233831,
917
+ "grad_norm": 0.16248376667499542,
918
+ "learning_rate": 7.552249453710032e-06,
919
+ "loss": 1.3129,
920
+ "step": 520
921
+ },
922
+ {
923
+ "epoch": 0.5213930348258706,
924
+ "grad_norm": 0.1406685709953308,
925
+ "learning_rate": 7.458766786179792e-06,
926
+ "loss": 1.3628,
927
+ "step": 524
928
+ },
929
+ {
930
+ "epoch": 0.5253731343283582,
931
+ "grad_norm": 0.13998349010944366,
932
+ "learning_rate": 7.364884360199107e-06,
933
+ "loss": 1.3887,
934
+ "step": 528
935
+ },
936
+ {
937
+ "epoch": 0.5293532338308458,
938
+ "grad_norm": 0.15993693470954895,
939
+ "learning_rate": 7.270627050581951e-06,
940
+ "loss": 1.3764,
941
+ "step": 532
942
+ },
943
+ {
944
+ "epoch": 0.5333333333333333,
945
+ "grad_norm": 0.21970954537391663,
946
+ "learning_rate": 7.176019831470373e-06,
947
+ "loss": 1.4067,
948
+ "step": 536
949
+ },
950
+ {
951
+ "epoch": 0.5373134328358209,
952
+ "grad_norm": 0.1592174619436264,
953
+ "learning_rate": 7.081087769717416e-06,
954
+ "loss": 1.4348,
955
+ "step": 540
956
+ },
957
+ {
958
+ "epoch": 0.5412935323383085,
959
+ "grad_norm": 0.1640401929616928,
960
+ "learning_rate": 6.985856018245494e-06,
961
+ "loss": 1.39,
962
+ "step": 544
963
+ },
964
+ {
965
+ "epoch": 0.545273631840796,
966
+ "grad_norm": 0.14530886709690094,
967
+ "learning_rate": 6.890349809381926e-06,
968
+ "loss": 1.4217,
969
+ "step": 548
970
+ },
971
+ {
972
+ "epoch": 0.5492537313432836,
973
+ "grad_norm": 0.14905086159706116,
974
+ "learning_rate": 6.7945944481734625e-06,
975
+ "loss": 1.3693,
976
+ "step": 552
977
+ },
978
+ {
979
+ "epoch": 0.5532338308457712,
980
+ "grad_norm": 0.1508338302373886,
981
+ "learning_rate": 6.698615305681538e-06,
982
+ "loss": 1.3794,
983
+ "step": 556
984
+ },
985
+ {
986
+ "epoch": 0.5572139303482587,
987
+ "grad_norm": 0.15846911072731018,
988
+ "learning_rate": 6.602437812260021e-06,
989
+ "loss": 1.439,
990
+ "step": 560
991
+ },
992
+ {
993
+ "epoch": 0.5611940298507463,
994
+ "grad_norm": 0.15680456161499023,
995
+ "learning_rate": 6.5060874508172626e-06,
996
+ "loss": 1.3706,
997
+ "step": 564
998
+ },
999
+ {
1000
+ "epoch": 0.5651741293532339,
1001
+ "grad_norm": 0.14353099465370178,
1002
+ "learning_rate": 6.4095897500642245e-06,
1003
+ "loss": 1.4015,
1004
+ "step": 568
1005
+ },
1006
+ {
1007
+ "epoch": 0.5691542288557214,
1008
+ "grad_norm": 0.16101489961147308,
1009
+ "learning_rate": 6.3129702777504585e-06,
1010
+ "loss": 1.3364,
1011
+ "step": 572
1012
+ },
1013
+ {
1014
+ "epoch": 0.573134328358209,
1015
+ "grad_norm": 0.13535454869270325,
1016
+ "learning_rate": 6.216254633889758e-06,
1017
+ "loss": 1.3294,
1018
+ "step": 576
1019
+ },
1020
+ {
1021
+ "epoch": 0.5771144278606966,
1022
+ "grad_norm": 0.17043928802013397,
1023
+ "learning_rate": 6.119468443977249e-06,
1024
+ "loss": 1.4216,
1025
+ "step": 580
1026
+ },
1027
+ {
1028
+ "epoch": 0.5810945273631841,
1029
+ "grad_norm": 0.15072950720787048,
1030
+ "learning_rate": 6.02263735219973e-06,
1031
+ "loss": 1.4152,
1032
+ "step": 584
1033
+ },
1034
+ {
1035
+ "epoch": 0.5850746268656717,
1036
+ "grad_norm": 0.13807035982608795,
1037
+ "learning_rate": 5.925787014641067e-06,
1038
+ "loss": 1.369,
1039
+ "step": 588
1040
+ },
1041
+ {
1042
+ "epoch": 0.5890547263681593,
1043
+ "grad_norm": 0.1548323780298233,
1044
+ "learning_rate": 5.82894309248444e-06,
1045
+ "loss": 1.4166,
1046
+ "step": 592
1047
+ },
1048
+ {
1049
+ "epoch": 0.5930348258706468,
1050
+ "grad_norm": 0.16310498118400574,
1051
+ "learning_rate": 5.732131245213214e-06,
1052
+ "loss": 1.3644,
1053
+ "step": 596
1054
+ },
1055
+ {
1056
+ "epoch": 0.5970149253731343,
1057
+ "grad_norm": 0.14257760345935822,
1058
+ "learning_rate": 5.63537712381229e-06,
1059
+ "loss": 1.3559,
1060
+ "step": 600
1061
  }
1062
  ],
1063
  "logging_steps": 4,
 
1077
  "attributes": {}
1078
  }
1079
  },
1080
+ "total_flos": 4.52163003620524e+17,
1081
  "train_batch_size": 24,
1082
  "trial_name": null,
1083
  "trial_params": null