PyTorch
ONNX
English
Catalan
File size: 1,114 Bytes
d6ec671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
model:
  class_path: vocos.experiment.VocosEncodecExp
  init_args:
    sample_rate: 24000
    initial_learning_rate: 5e-4
    mel_loss_coeff: 45
    mrd_loss_coeff: 1.0
    num_warmup_steps: 0 # Optimizers warmup steps
    pretrain_mel_steps: 0  # 0 means GAN objective from the first iteration

    # automatic evaluation
    evaluate_utmos: true
    evaluate_pesq: true
    evaluate_periodicty: true

feature_extractor:
  class_path: vocos.feature_extractors.EncodecFeatures
  init_args:
    encodec_model: encodec_24khz
    bandwidths: [1.5, 3.0, 6.0, 12.0]
    train_codebooks: false

backbone:
  class_path: vocos.models.VocosBackbone
  init_args:
    input_channels: 128
    dim: 384
    intermediate_dim: 1152
    num_layers: 8
    adanorm_num_embeddings: 4  # len(bandwidths)

head:
  class_path: vocos.heads.WaveNextHead
  init_args:
    dim: 384
    n_fft: 1280
    hop_length: 320
    padding: same

melspec_loss:
  class_path: vocos.loss.MelSpecReconstructionLoss
  init_args:
    sample_rate: 24000
    n_fft: 1024
    hop_length: 256
    n_mels: 128
    f_min: 0
    f_max: 12000
    clip_val: 1e-7