hynt's picture
Update config.json
b516cba verified
raw
history blame
2.37 kB
{
"model_name": "Efficient Conformer CTC Small",
"model_type": "CTC",
"encoder_params":
{
"arch": "Conformer",
"num_blocks": 15,
"dim_model": [120, 168, 240],
"ff_ratio": 4,
"num_heads": 4,
"kernel_size": 15,
"Pdrop": 0.1,
"conv_stride": 2,
"att_stride": 1,
"strided_blocks": [4, 9],
"expand_blocks": [4, 9],
"att_group_size": [3, 1, 1],
"relative_pos_enc": true,
"max_pos_encoding": 10000,
"subsampling_module": "Conv2d",
"subsampling_layers": 1,
"subsampling_filters": [120],
"subsampling_kernel_size": 3,
"subsampling_norm": "batch",
"subsampling_act": "swish",
"sample_rate": 16000,
"win_length_ms": 25,
"hop_length_ms": 10,
"n_fft": 512,
"n_mels": 80,
"normalize": false,
"mean": -5.6501,
"std": 4.2280,
"spec_augment": false,
"mF": 2,
"F": 27,
"mT": 5,
"pS": 0.05
},
"tokenizer_params":
{
"tokenizer_path": "datasets/Vietnamese/vi_bpe_1024.model",
"vocab_type": "bpe",
"vocab_size": 1024
},
"training_params":
{
"epochs": 450,
"batch_size": 64,
"accumulated_steps": 2,
"mixed_precision": true,
"optimizer": "Adam",
"beta1": 0.9,
"beta2": 0.98,
"eps": 1e-9,
"weight_decay": 1e-6,
"lr_schedule": "Transformer",
"schedule_dim": 240,
"warmup_steps": 10000,
"K": 2,
"train_audio_max_length": 256000,
"train_label_max_length": 256000,
"eval_audio_max_length": 256000,
"eval_label_max_length": 256000,
"training_dataset": "Vietnamese",
"training_dataset_path": "/mnt/c/Users/hyngu/Data/ASRDataset/",
"evaluation_dataset": "Vietnamese",
"evaluation_dataset_path": "/mnt/c/Users/hyngu/Data/ASRDataset/",
"callback_path": "callbacks/EfficientConformerCTCSmall/"
},
"decoding_params":
{
"beam_size": 15,
"tmp": 1,
"ngram_path": "data/6gram_lm_corpus.binary",
"ngram_alpha": 0.4,
"ngram_beta": 1.0,
"lm_config": "configs/LM-Transformer.json",
"lm_weight": 1,
"lm_tmp": 1
}
}