File size: 1,114 Bytes
d6ec671 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
model:
class_path: vocos.experiment.VocosEncodecExp
init_args:
sample_rate: 24000
initial_learning_rate: 5e-4
mel_loss_coeff: 45
mrd_loss_coeff: 1.0
num_warmup_steps: 0 # Optimizers warmup steps
pretrain_mel_steps: 0 # 0 means GAN objective from the first iteration
# automatic evaluation
evaluate_utmos: true
evaluate_pesq: true
evaluate_periodicty: true
feature_extractor:
class_path: vocos.feature_extractors.EncodecFeatures
init_args:
encodec_model: encodec_24khz
bandwidths: [1.5, 3.0, 6.0, 12.0]
train_codebooks: false
backbone:
class_path: vocos.models.VocosBackbone
init_args:
input_channels: 128
dim: 384
intermediate_dim: 1152
num_layers: 8
adanorm_num_embeddings: 4 # len(bandwidths)
head:
class_path: vocos.heads.WaveNextHead
init_args:
dim: 384
n_fft: 1280
hop_length: 320
padding: same
melspec_loss:
class_path: vocos.loss.MelSpecReconstructionLoss
init_args:
sample_rate: 24000
n_fft: 1024
hop_length: 256
n_mels: 128
f_min: 0
f_max: 12000
clip_val: 1e-7
|