DrAliGomaa commited on
Commit
1a23a1c
·
verified ·
1 Parent(s): 96dccea

Training in progress, step 26840, checkpoint

Browse files
last-checkpoint/global_step26840/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6352feb7c350a62238622e0e788c88e5be55b9750c1d51f8302cd35c9553dde
3
+ size 2312371472
last-checkpoint/global_step26840/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d56362d42c54a0d2bb62d2e97838d22ecd01464a52ae7c75fc8fc1e6332ad0b
3
+ size 2312371472
last-checkpoint/global_step26840/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ba8602308ec40a45363a91a1b8852fe1667dbde98ee7a820352bd0d362fb64
3
+ size 2312371472
last-checkpoint/global_step26840/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf488272b732bcdee7cdbe6f749c49a2a75f7a04db9854f32ca40e6a84cbfbf
3
+ size 2312368912
last-checkpoint/global_step26840/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3171d7599d1cb0bcf24ce6742e8cfea0dd60af0b23741ec2101bcb89755bd1d5
3
+ size 2312373776
last-checkpoint/global_step26840/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a0dd81cb4997639e0a3c94957b34e3017bfa238110fea417571ac6865f1a1d7
3
+ size 2312374928
last-checkpoint/global_step26840/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82f3a417fdc51e897f5ef589501668deab4dddb98090196a4bd85d553cd9098
3
+ size 2312374608
last-checkpoint/global_step26840/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d2df4faf88886659ba484719ea6153a394e5bf686c86c21a6e450ede3be099
3
+ size 2312374352
last-checkpoint/global_step26840/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbffd1b2694b080e9059f0575316a7fa0e33c6c85eb9126c34f540ea81f6d8ba
3
+ size 3091157252
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step23485
 
1
+ global_step26840
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:714bbedeea9887f4a4175fd6ca7185f327bfb632f864ad8e2be90ecc389d5a56
3
  size 3219909288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0994eab9f09245b2604408956fb81c5bb1bbba22b4386ceb38dc21867b83b3d2
3
  size 3219909288
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77d8ab6905cd3a4e24d75f95e0ee11957f3affc8737a6ab6794d643c09553c81
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e464ee5687df2468f6598f5fbaf5ecca79e676c224afe9b2b9c93e7de313e3
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110c4f76ce40322613c082b35e2f64980aead421fe536dfcb89def5e8adf624e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c2768a057a5737f0dd811a2de2e72b92803061644a587506be3a1b78938712
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e1f4315f94ccb9c260de1cd47266e51224bfc98c4655b866c79b57e5cf6db9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e4ad39a8765593ef6b70913f6cfd1e72a7a7481a92bc4a9779fe9c7952cf13
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:516f55b71bbd475c8c83b214d527ab1a6fc8c9cbf0f2471d183b7809c99f1dbf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45b7061361563da1cab22f3338e147ac5b6aaf172e88e9248f4f95e493cef24
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d449ea9890c8ccd230b1934042d174f2679448dd3ab3b039c7a39b6d4b5e0e2a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db243ba40667e19f840cf467486fdee8150837809162868348f5b0a5a63faf87
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2d10c0b913462f38a1335aacdd7e667fbf36ea7e7efe0e6678d7d62ea77b2ba
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:648d594d4dbe02334ab30eec0ae10841e3ae65746846a43bb94d60b5366ef4c9
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7759888fa3895c36ed13c6ffe7b753a06b450fe3e08446c2d9d4e800a625cdd7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7b5be279fb61e58c292b03764c230adec8a20a83b7cbe5e8f29d20187238b2
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c5eb6791e3c9c99e1287dfb1f51164085acc9747dbddb8dfeac1903f3fd1ab3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0f3cc458c5e8588ba44862c062dee2ec0f7322cdb2054e8c69057d17238686
3
  size 15984
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 23485,
3
  "best_metric": 0.2517834662190516,
4
  "best_model_checkpoint": "./whisper-large-v3-ar-test/checkpoint-23485",
5
- "epoch": 3.4989570917759236,
6
  "eval_steps": 3355,
7
- "global_step": 23485,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1790,6 +1790,280 @@
1790
  "eval_mgb2_wer": 9.467092337917485,
1791
  "eval_mgb2_wer_ortho": 13.79169224473092,
1792
  "step": 23485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1793
  }
1794
  ],
1795
  "logging_steps": 100,
@@ -1804,7 +2078,7 @@
1804
  "early_stopping_threshold": 0.0
1805
  },
1806
  "attributes": {
1807
- "early_stopping_patience_counter": 0
1808
  }
1809
  },
1810
  "TrainerControl": {
@@ -1818,7 +2092,7 @@
1818
  "attributes": {}
1819
  }
1820
  },
1821
- "total_flos": 2.5532878087120287e+21,
1822
  "train_batch_size": 4,
1823
  "trial_name": null,
1824
  "trial_params": null
 
2
  "best_global_step": 23485,
3
  "best_metric": 0.2517834662190516,
4
  "best_model_checkpoint": "./whisper-large-v3-ar-test/checkpoint-23485",
5
+ "epoch": 3.99880810488677,
6
  "eval_steps": 3355,
7
+ "global_step": 26840,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1790
  "eval_mgb2_wer": 9.467092337917485,
1791
  "eval_mgb2_wer_ortho": 13.79169224473092,
1792
  "step": 23485
1793
+ },
1794
+ {
1795
+ "epoch": 3.50119189511323,
1796
+ "grad_norm": 0.37351053953170776,
1797
+ "learning_rate": 2.9153628371330657e-06,
1798
+ "loss": 0.0262,
1799
+ "step": 23500
1800
+ },
1801
+ {
1802
+ "epoch": 3.5160905840286056,
1803
+ "grad_norm": 1.6208069324493408,
1804
+ "learning_rate": 2.9029454130035267e-06,
1805
+ "loss": 0.024,
1806
+ "step": 23600
1807
+ },
1808
+ {
1809
+ "epoch": 3.530989272943981,
1810
+ "grad_norm": 0.6350374817848206,
1811
+ "learning_rate": 2.8905279888739885e-06,
1812
+ "loss": 0.0234,
1813
+ "step": 23700
1814
+ },
1815
+ {
1816
+ "epoch": 3.545887961859356,
1817
+ "grad_norm": 1.354862928390503,
1818
+ "learning_rate": 2.8781105647444494e-06,
1819
+ "loss": 0.025,
1820
+ "step": 23800
1821
+ },
1822
+ {
1823
+ "epoch": 3.5607866507747317,
1824
+ "grad_norm": 0.6370978355407715,
1825
+ "learning_rate": 2.8656931406149112e-06,
1826
+ "loss": 0.0245,
1827
+ "step": 23900
1828
+ },
1829
+ {
1830
+ "epoch": 3.575685339690107,
1831
+ "grad_norm": 0.7340075373649597,
1832
+ "learning_rate": 2.8532757164853726e-06,
1833
+ "loss": 0.0269,
1834
+ "step": 24000
1835
+ },
1836
+ {
1837
+ "epoch": 3.5905840286054826,
1838
+ "grad_norm": 0.6038511395454407,
1839
+ "learning_rate": 2.8408582923558344e-06,
1840
+ "loss": 0.025,
1841
+ "step": 24100
1842
+ },
1843
+ {
1844
+ "epoch": 3.605482717520858,
1845
+ "grad_norm": 0.6267861127853394,
1846
+ "learning_rate": 2.8284408682262953e-06,
1847
+ "loss": 0.0221,
1848
+ "step": 24200
1849
+ },
1850
+ {
1851
+ "epoch": 3.6203814064362336,
1852
+ "grad_norm": 1.2206661701202393,
1853
+ "learning_rate": 2.8160234440967567e-06,
1854
+ "loss": 0.0251,
1855
+ "step": 24300
1856
+ },
1857
+ {
1858
+ "epoch": 3.635280095351609,
1859
+ "grad_norm": 0.6847667098045349,
1860
+ "learning_rate": 2.803606019967218e-06,
1861
+ "loss": 0.024,
1862
+ "step": 24400
1863
+ },
1864
+ {
1865
+ "epoch": 3.6501787842669846,
1866
+ "grad_norm": 0.8292557597160339,
1867
+ "learning_rate": 2.7911885958376795e-06,
1868
+ "loss": 0.0246,
1869
+ "step": 24500
1870
+ },
1871
+ {
1872
+ "epoch": 3.66507747318236,
1873
+ "grad_norm": 0.6944147944450378,
1874
+ "learning_rate": 2.7787711717081413e-06,
1875
+ "loss": 0.0228,
1876
+ "step": 24600
1877
+ },
1878
+ {
1879
+ "epoch": 3.679976162097735,
1880
+ "grad_norm": 0.5313336849212646,
1881
+ "learning_rate": 2.766353747578602e-06,
1882
+ "loss": 0.0249,
1883
+ "step": 24700
1884
+ },
1885
+ {
1886
+ "epoch": 3.694874851013111,
1887
+ "grad_norm": 0.47161412239074707,
1888
+ "learning_rate": 2.753936323449064e-06,
1889
+ "loss": 0.026,
1890
+ "step": 24800
1891
+ },
1892
+ {
1893
+ "epoch": 3.709773539928486,
1894
+ "grad_norm": 0.9280988574028015,
1895
+ "learning_rate": 2.7415188993195254e-06,
1896
+ "loss": 0.0224,
1897
+ "step": 24900
1898
+ },
1899
+ {
1900
+ "epoch": 3.7246722288438616,
1901
+ "grad_norm": 0.9222899675369263,
1902
+ "learning_rate": 2.7291014751899867e-06,
1903
+ "loss": 0.0229,
1904
+ "step": 25000
1905
+ },
1906
+ {
1907
+ "epoch": 3.739570917759237,
1908
+ "grad_norm": 0.6520107388496399,
1909
+ "learning_rate": 2.716684051060448e-06,
1910
+ "loss": 0.0215,
1911
+ "step": 25100
1912
+ },
1913
+ {
1914
+ "epoch": 3.7544696066746126,
1915
+ "grad_norm": 0.5594351291656494,
1916
+ "learning_rate": 2.70426662693091e-06,
1917
+ "loss": 0.0227,
1918
+ "step": 25200
1919
+ },
1920
+ {
1921
+ "epoch": 3.769368295589988,
1922
+ "grad_norm": 0.8105785846710205,
1923
+ "learning_rate": 2.691849202801371e-06,
1924
+ "loss": 0.0231,
1925
+ "step": 25300
1926
+ },
1927
+ {
1928
+ "epoch": 3.7842669845053636,
1929
+ "grad_norm": 0.8062728643417358,
1930
+ "learning_rate": 2.6794317786718326e-06,
1931
+ "loss": 0.0212,
1932
+ "step": 25400
1933
+ },
1934
+ {
1935
+ "epoch": 3.799165673420739,
1936
+ "grad_norm": 0.6356725692749023,
1937
+ "learning_rate": 2.667014354542294e-06,
1938
+ "loss": 0.022,
1939
+ "step": 25500
1940
+ },
1941
+ {
1942
+ "epoch": 3.8140643623361146,
1943
+ "grad_norm": 0.8131894469261169,
1944
+ "learning_rate": 2.6545969304127554e-06,
1945
+ "loss": 0.0223,
1946
+ "step": 25600
1947
+ },
1948
+ {
1949
+ "epoch": 3.82896305125149,
1950
+ "grad_norm": 0.34240245819091797,
1951
+ "learning_rate": 2.6421795062832168e-06,
1952
+ "loss": 0.0211,
1953
+ "step": 25700
1954
+ },
1955
+ {
1956
+ "epoch": 3.843861740166865,
1957
+ "grad_norm": 0.7727413177490234,
1958
+ "learning_rate": 2.6297620821536786e-06,
1959
+ "loss": 0.0229,
1960
+ "step": 25800
1961
+ },
1962
+ {
1963
+ "epoch": 3.858760429082241,
1964
+ "grad_norm": 1.0136847496032715,
1965
+ "learning_rate": 2.6173446580241395e-06,
1966
+ "loss": 0.0225,
1967
+ "step": 25900
1968
+ },
1969
+ {
1970
+ "epoch": 3.873659117997616,
1971
+ "grad_norm": 0.603969931602478,
1972
+ "learning_rate": 2.6049272338946013e-06,
1973
+ "loss": 0.0221,
1974
+ "step": 26000
1975
+ },
1976
+ {
1977
+ "epoch": 3.8885578069129916,
1978
+ "grad_norm": 0.7807964086532593,
1979
+ "learning_rate": 2.5925098097650627e-06,
1980
+ "loss": 0.0228,
1981
+ "step": 26100
1982
+ },
1983
+ {
1984
+ "epoch": 3.903456495828367,
1985
+ "grad_norm": 0.5105961561203003,
1986
+ "learning_rate": 2.580092385635524e-06,
1987
+ "loss": 0.0194,
1988
+ "step": 26200
1989
+ },
1990
+ {
1991
+ "epoch": 3.9183551847437426,
1992
+ "grad_norm": 0.6721608638763428,
1993
+ "learning_rate": 2.5676749615059854e-06,
1994
+ "loss": 0.0221,
1995
+ "step": 26300
1996
+ },
1997
+ {
1998
+ "epoch": 3.933253873659118,
1999
+ "grad_norm": 1.0928480625152588,
2000
+ "learning_rate": 2.555257537376447e-06,
2001
+ "loss": 0.023,
2002
+ "step": 26400
2003
+ },
2004
+ {
2005
+ "epoch": 3.9481525625744935,
2006
+ "grad_norm": 0.8083678483963013,
2007
+ "learning_rate": 2.542840113246908e-06,
2008
+ "loss": 0.0235,
2009
+ "step": 26500
2010
+ },
2011
+ {
2012
+ "epoch": 3.963051251489869,
2013
+ "grad_norm": 0.6882435083389282,
2014
+ "learning_rate": 2.5304226891173695e-06,
2015
+ "loss": 0.0211,
2016
+ "step": 26600
2017
+ },
2018
+ {
2019
+ "epoch": 3.977949940405244,
2020
+ "grad_norm": 0.6654950380325317,
2021
+ "learning_rate": 2.5180052649878313e-06,
2022
+ "loss": 0.0202,
2023
+ "step": 26700
2024
+ },
2025
+ {
2026
+ "epoch": 3.99284862932062,
2027
+ "grad_norm": 0.31399238109588623,
2028
+ "learning_rate": 2.5055878408582923e-06,
2029
+ "loss": 0.0199,
2030
+ "step": 26800
2031
+ },
2032
+ {
2033
+ "epoch": 3.99880810488677,
2034
+ "eval_quran_cer": 0.559225892559226,
2035
+ "eval_quran_cer_ortho": 0.5632298965632299,
2036
+ "eval_quran_loss": 0.006387302652001381,
2037
+ "eval_quran_runtime": 884.8053,
2038
+ "eval_quran_samples_per_second": 1.13,
2039
+ "eval_quran_steps_per_second": 0.018,
2040
+ "eval_quran_wer": 0.8742481465939292,
2041
+ "eval_quran_wer_ortho": 0.8952301021121835,
2042
+ "step": 26840
2043
+ },
2044
+ {
2045
+ "epoch": 3.99880810488677,
2046
+ "eval_hadith_cer": 3.8342385316344125,
2047
+ "eval_hadith_cer_ortho": 3.9314555329436014,
2048
+ "eval_hadith_loss": 0.06706906855106354,
2049
+ "eval_hadith_runtime": 859.7812,
2050
+ "eval_hadith_samples_per_second": 1.163,
2051
+ "eval_hadith_steps_per_second": 0.019,
2052
+ "eval_hadith_wer": 6.03901938133744,
2053
+ "eval_hadith_wer_ortho": 6.417661404184314,
2054
+ "step": 26840
2055
+ },
2056
+ {
2057
+ "epoch": 3.99880810488677,
2058
+ "eval_mgb2_cer": 2.902677893823859,
2059
+ "eval_mgb2_cer_ortho": 3.678463284022172,
2060
+ "eval_mgb2_loss": 0.292144775390625,
2061
+ "eval_mgb2_runtime": 460.4502,
2062
+ "eval_mgb2_samples_per_second": 1.073,
2063
+ "eval_mgb2_steps_per_second": 0.017,
2064
+ "eval_mgb2_wer": 10.019646365422396,
2065
+ "eval_mgb2_wer_ortho": 13.971761817065683,
2066
+ "step": 26840
2067
  }
2068
  ],
2069
  "logging_steps": 100,
 
2078
  "early_stopping_threshold": 0.0
2079
  },
2080
  "attributes": {
2081
+ "early_stopping_patience_counter": 1
2082
  }
2083
  },
2084
  "TrainerControl": {
 
2092
  "attributes": {}
2093
  }
2094
  },
2095
+ "total_flos": 2.9180432098191653e+21,
2096
  "train_batch_size": 4,
2097
  "trial_name": null,
2098
  "trial_params": null