| { | |
| "architectures": ["E2TTS"], | |
| "backbone": { | |
| "dim": 1024, | |
| "depth": 26, | |
| "heads": 16, | |
| "ff_mult": 4 | |
| }, | |
| "mel_spectrogram": { | |
| "n_mel_channels": 100, | |
| "n_fft": 1024, | |
| "hop_length": 256, | |
| "win_length": 1024, | |
| "target_sample_rate": 24000, | |
| "mel_spec_type": "vocos" | |
| }, | |
| "odeint": { | |
| "method": "euler" | |
| }, | |
| "sampling": { | |
| "nfe_step": 32, | |
| "cfg_strength": 2.0, | |
| "sway_sampling_coef": -1.0, | |
| "target_rms": 0.1 | |
| } | |
| } | |