Commit 
							
							·
						
						ec7fcfd
	
1
								Parent(s):
							
							4ecfd3e
								
add files
Browse files- README.md +19 -0
- config.json +68 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            language: cs
         | 
| 3 | 
            +
            tags:
         | 
| 4 | 
            +
            - audio
         | 
| 5 | 
            +
            - automatic-speech-recognition
         | 
| 6 | 
            +
            - voxpopuli
         | 
| 7 | 
            +
            license: cc-by-nc-4.0
         | 
| 8 | 
            +
            ---
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # Wav2Vec2-Base-VoxPopuli-Finetuned
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            [Facebook's Wav2Vec2](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) large model pretrained on the 10K unlabeled subset of [VoxPopuli corpus](https://arxiv.org/abs/2101.00390) and fine-tuned on the transcribed data in cs (refer to Table 1 of paper for more information).
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            **Paper**: *[VoxPopuli: A Large-Scale Multilingual Speech Corpus for Representation
         | 
| 15 | 
            +
            Learning, Semi-Supervised Learning and Interpretation](https://arxiv.org/abs/2101.00390)*
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            **Authors**: *Changhan Wang, Morgane Riviere, Ann Lee, Anne Wu, Chaitanya Talnikar, Daniel Haziza, Mary Williamson, Juan Pino, Emmanuel Dupoux* from *Facebook AI*
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            See the official website for more information, [here](https://github.com/facebookresearch/voxpopuli/)
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "activation_dropout": 0.1,
         | 
| 3 | 
            +
              "apply_spec_augment": true,
         | 
| 4 | 
            +
              "architectures": [
         | 
| 5 | 
            +
                "Wav2Vec2ForCTC"
         | 
| 6 | 
            +
              ],
         | 
| 7 | 
            +
              "attention_dropout": 0.1,
         | 
| 8 | 
            +
              "bos_token_id": 0,
         | 
| 9 | 
            +
              "conv_bias": false,
         | 
| 10 | 
            +
              "conv_dim": [
         | 
| 11 | 
            +
                512,
         | 
| 12 | 
            +
                512,
         | 
| 13 | 
            +
                512,
         | 
| 14 | 
            +
                512,
         | 
| 15 | 
            +
                512,
         | 
| 16 | 
            +
                512,
         | 
| 17 | 
            +
                512
         | 
| 18 | 
            +
              ],
         | 
| 19 | 
            +
              "conv_kernel": [
         | 
| 20 | 
            +
                10,
         | 
| 21 | 
            +
                3,
         | 
| 22 | 
            +
                3,
         | 
| 23 | 
            +
                3,
         | 
| 24 | 
            +
                3,
         | 
| 25 | 
            +
                2,
         | 
| 26 | 
            +
                2
         | 
| 27 | 
            +
              ],
         | 
| 28 | 
            +
              "conv_stride": [
         | 
| 29 | 
            +
                5,
         | 
| 30 | 
            +
                2,
         | 
| 31 | 
            +
                2,
         | 
| 32 | 
            +
                2,
         | 
| 33 | 
            +
                2,
         | 
| 34 | 
            +
                2,
         | 
| 35 | 
            +
                2
         | 
| 36 | 
            +
              ],
         | 
| 37 | 
            +
              "ctc_loss_reduction": "sum",
         | 
| 38 | 
            +
              "ctc_zero_infinity": false,
         | 
| 39 | 
            +
              "do_stable_layer_norm": false,
         | 
| 40 | 
            +
              "eos_token_id": 2,
         | 
| 41 | 
            +
              "feat_extract_activation": "gelu",
         | 
| 42 | 
            +
              "feat_extract_dropout": 0.0,
         | 
| 43 | 
            +
              "feat_extract_norm": "group",
         | 
| 44 | 
            +
              "feat_proj_dropout": 0.1,
         | 
| 45 | 
            +
              "final_dropout": 0.1,
         | 
| 46 | 
            +
              "gradient_checkpointing": false,
         | 
| 47 | 
            +
              "hidden_act": "gelu",
         | 
| 48 | 
            +
              "hidden_dropout": 0.1,
         | 
| 49 | 
            +
              "hidden_dropout_prob": 0.1,
         | 
| 50 | 
            +
              "hidden_size": 768,
         | 
| 51 | 
            +
              "initializer_range": 0.02,
         | 
| 52 | 
            +
              "intermediate_size": 3072,
         | 
| 53 | 
            +
              "layer_norm_eps": 1e-05,
         | 
| 54 | 
            +
              "layerdrop": 0.1,
         | 
| 55 | 
            +
              "mask_feature_length": 10,
         | 
| 56 | 
            +
              "mask_feature_prob": 0.0,
         | 
| 57 | 
            +
              "mask_time_length": 10,
         | 
| 58 | 
            +
              "mask_time_prob": 0.05,
         | 
| 59 | 
            +
              "model_type": "wav2vec2",
         | 
| 60 | 
            +
              "num_attention_heads": 12,
         | 
| 61 | 
            +
              "num_conv_pos_embedding_groups": 16,
         | 
| 62 | 
            +
              "num_conv_pos_embeddings": 128,
         | 
| 63 | 
            +
              "num_feat_extract_layers": 7,
         | 
| 64 | 
            +
              "num_hidden_layers": 12,
         | 
| 65 | 
            +
              "pad_token_id": 1,
         | 
| 66 | 
            +
              "transformers_version": "4.6.0.dev0",
         | 
| 67 | 
            +
              "vocab_size": 46
         | 
| 68 | 
            +
            }
         | 
    	
        preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "do_normalize": true,
         | 
| 3 | 
            +
              "feature_extractor_type": "Wav2Vec2FeatureExtractor",
         | 
| 4 | 
            +
              "feature_size": 1,
         | 
| 5 | 
            +
              "padding_side": "right",
         | 
| 6 | 
            +
              "padding_value": 0,
         | 
| 7 | 
            +
              "return_attention_mask": false,
         | 
| 8 | 
            +
              "sampling_rate": 16000
         | 
| 9 | 
            +
            }
         | 
    	
        pytorch_model.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:584de66885bcfc7a0a0652d9eff0030119022f1e7504d43ee0fd6870d1b4df66
         | 
| 3 | 
            +
            size 377715628
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
         | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|"}
         | 
    	
        vocab.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "|": 4, "o": 5, "e": 6, "n": 7, "a": 8, "t": 9, "i": 10, "s": 11, "v": 12, "r": 13, "p": 14, "m": 15, "d": 16, "l": 17, "k": 18, "í": 19, "u": 20, "c": 21, "á": 22, "j": 23, "h": 24, "z": 25, "y": 26, "b": 27, "ě": 28, "é": 29, "ř": 30, "ž": 31, "ý": 32, "č": 33, "š": 34, "ů": 35, "g": 36, "f": 37, "ú": 38, "ň": 39, "x": 40, "w": 41, "ť": 42, "ď": 43, "ó": 44, "q": 45}
         | 

