berkind commited on
Commit
1526c64
·
verified ·
1 Parent(s): 87ad05d

Canary-1b-v2 tokenizer backwards compitability

Browse files
canary-1b-v2/config.json CHANGED
@@ -29,9 +29,10 @@
29
  "subsampling_conv_stride": 2,
30
  "subsampling_factor": 8
31
  },
 
 
 
32
  "initializer_range": 0.02,
33
- "model_type": "parakeet_ctc",
34
  "pad_token_id": 1024,
35
- "transformers_version": "4.57.1",
36
  "vocab_size": 16385
37
  }
 
29
  "subsampling_conv_stride": 2,
30
  "subsampling_factor": 8
31
  },
32
+ "bos_token_id": 4,
33
+ "eos_token_id": 3,
34
+ "nemo_model_type": "parakeet",
35
  "initializer_range": 0.02,
 
36
  "pad_token_id": 1024,
 
37
  "vocab_size": 16385
38
  }
canary-1b-v2/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
canary-1b-v2/tokenizer_config.json CHANGED
@@ -9310,6 +9310,6 @@
9310
  "model_max_length": 1000000000000000019884624838656,
9311
  "pad_token": "<pad>",
9312
  "processor_class": "ParakeetProcessor",
9313
- "tokenizer_class": "ParakeetTokenizerFast",
9314
  "unk_token": "<unk>"
9315
  }
 
9310
  "model_max_length": 1000000000000000019884624838656,
9311
  "pad_token": "<pad>",
9312
  "processor_class": "ParakeetProcessor",
9313
+ "tokenizer_class": "PreTrainedTokenizer",
9314
  "unk_token": "<unk>"
9315
  }