fx
Browse files- tts_harvard.py +8 -8
tts_harvard.py
CHANGED
|
@@ -17,9 +17,9 @@ LABELS = ['arousal', 'dominance', 'valence']
|
|
| 17 |
def load_speech(split=None):
|
| 18 |
DB = [
|
| 19 |
# [dataset, version, table, has_timdeltas_or_is_full_wavfile]
|
| 20 |
-
|
| 21 |
-
['librispeech', '3.1.0', 'test-clean', False],
|
| 22 |
-
|
| 23 |
# ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
|
| 24 |
# ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
|
| 25 |
# ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
|
|
@@ -116,19 +116,19 @@ for audio_prompt in ['english',
|
|
| 116 |
# harvard.append(long_sentence.replace('.', ' '))
|
| 117 |
for text in list_of_10['sentences']:
|
| 118 |
if audio_prompt == 'english':
|
| 119 |
-
_p = synthetic_wav_paths[ix % 134]
|
| 120 |
style_vec = msinference.compute_style(_p)
|
| 121 |
elif audio_prompt == 'english_4x':
|
| 122 |
-
_p = synthetic_wav_paths_4x[ix % 134]
|
| 123 |
style_vec = msinference.compute_style(_p)
|
| 124 |
elif audio_prompt == 'human':
|
| 125 |
_p = natural_wav_paths[ix % len(natural_wav_paths)]
|
| 126 |
style_vec = msinference.compute_style(_p)
|
| 127 |
elif audio_prompt == 'foreign':
|
| 128 |
-
_p = synthetic_wav_paths_foreign[ix % 204
|
| 129 |
style_vec = msinference.compute_style(_p)
|
| 130 |
elif audio_prompt == 'foreign_4x':
|
| 131 |
-
_p = synthetic_wav_paths_foreign_4x[ix %
|
| 132 |
style_vec = msinference.compute_style(_p)
|
| 133 |
else:
|
| 134 |
print('unknonw list of style vector')
|
|
@@ -154,4 +154,4 @@ for audio_prompt in ['english',
|
|
| 154 |
soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
|
| 155 |
|
| 156 |
else:
|
| 157 |
-
print('\nALREADY EXISTS\n')
|
|
|
|
| 17 |
def load_speech(split=None):
|
| 18 |
DB = [
|
| 19 |
# [dataset, version, table, has_timdeltas_or_is_full_wavfile]
|
| 20 |
+
# ['crema-d', '1.1.1', 'emotion.voice.test', False],
|
| 21 |
+
#['librispeech', '3.1.0', 'test-clean', False],
|
| 22 |
+
['emodb', '1.2.0', 'emotion.categories.train.gold_standard', False],
|
| 23 |
# ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
|
| 24 |
# ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
|
| 25 |
# ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
|
|
|
|
| 116 |
# harvard.append(long_sentence.replace('.', ' '))
|
| 117 |
for text in list_of_10['sentences']:
|
| 118 |
if audio_prompt == 'english':
|
| 119 |
+
_p = synthetic_wav_paths[ix % len(synthetic_wav_paths)] #134]
|
| 120 |
style_vec = msinference.compute_style(_p)
|
| 121 |
elif audio_prompt == 'english_4x':
|
| 122 |
+
_p = synthetic_wav_paths_4x[ix % len(synthetic_wav_paths_4x)] # 134]
|
| 123 |
style_vec = msinference.compute_style(_p)
|
| 124 |
elif audio_prompt == 'human':
|
| 125 |
_p = natural_wav_paths[ix % len(natural_wav_paths)]
|
| 126 |
style_vec = msinference.compute_style(_p)
|
| 127 |
elif audio_prompt == 'foreign':
|
| 128 |
+
_p = synthetic_wav_paths_foreign[ix % len(synthetic_wav_paths_foreign)] #179] # 204 some short styles are discarded
|
| 129 |
style_vec = msinference.compute_style(_p)
|
| 130 |
elif audio_prompt == 'foreign_4x':
|
| 131 |
+
_p = synthetic_wav_paths_foreign_4x[ix % len(synthetic_wav_paths_foreign_4x)] #179] # 204
|
| 132 |
style_vec = msinference.compute_style(_p)
|
| 133 |
else:
|
| 134 |
print('unknonw list of style vector')
|
|
|
|
| 154 |
soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
|
| 155 |
|
| 156 |
else:
|
| 157 |
+
print('\nALREADY EXISTS\n')
|