In [None]:
!pip install deep-phonemizer

In [None]:
# Dowload and prepare a dataset
!wget https://raw.githubusercontent.com/CUNY-CL/wikipron/master/data/scrape/tsv/eng_latn_us_broad.tsv

with open('eng_latn_us_broad.tsv', 'r', encoding='utf-8') as f:
 lines = f.readlines()

# Prepare data as tuples (lang, word, phoneme)
lines = [l.replace(' ', '').replace('\n', '') for l in lines]
splits = [l.split('\t') for l in lines]
train_data = [('en_us', s[0], s[1]) for s in splits if len(s)==2]

for d in train_data[:10000:1000]:
 print(d)


In [None]:
# Read standard config and adjust some params for speedup
from dp.utils.io import read_config, save_config
import dp
import os

config_file = os.path.dirname(dp.__file__) + '/configs/forward_config.yaml'
config = read_config(config_file)
config['training']['epochs'] = 10
config['training']['warmup_steps'] = 100
config['training']['generate_steps'] = 500
config['training']['validate_steps'] = 500
save_config(config, 'config.yaml')

for k, v in config.items():
 print(f'{k} {v}')



In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/checkpoints

In [None]:
from dp.preprocess import preprocess
from dp.train import train

preprocess(config_file='config.yaml', train_data=train_data)
train(config_file='config.yaml')

In [None]:
# Load phonemizer (including the training data dictionary)
from dp.phonemizer import Phonemizer

phonemizer = Phonemizer.from_checkpoint('/content/checkpoints/best_model.pt')
result = phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')

print(result)

2021-05-12 10:15:19,916.916 DEBUG phonemizer: Initializing phonemizer with model step 18000


fənəmaɪzɪŋ ən ɪŋɡlɪʃ tɛkst ɪz ɪmpɑsɪmpəbəl!


In [None]:
# Phonemize a list of texts and pull out model predictions with confidence scores
result = phonemizer.phonemise_list(['Phonemizing an US-English text is imposimpable!'], lang='en_us')

for word, pred in result.predictions.items():
 print(f'{word} {pred.phonemes} {pred.confidence}')

imposimpable ɪmpɑsɪmpəbəl 0.2185952042855603
Phonemizing fənəmaɪzɪŋ 0.22222847233670942
