{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "Training_Example.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "2zx09MSGFHjT" }, "source": [ "!pip install deep-phonemizer" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "oZ1L1BlhOUMR" }, "source": [ "# Dowload and prepare a dataset\n", "!wget https://raw.githubusercontent.com/CUNY-CL/wikipron/master/data/scrape/tsv/eng_latn_us_broad.tsv\n", "\n", "with open('eng_latn_us_broad.tsv', 'r', encoding='utf-8') as f:\n", " lines = f.readlines()\n", "\n", "# Prepare data as tuples (lang, word, phoneme)\n", "lines = [l.replace(' ', '').replace('\\n', '') for l in lines]\n", "splits = [l.split('\\t') for l in lines]\n", "train_data = [('en_us', s[0], s[1]) for s in splits if len(s)==2]\n", "\n", "for d in train_data[:10000:1000]:\n", " print(d)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "MiV-OUi0FQ-O" }, "source": [ "# Read standard config and adjust some params for speedup\n", "from dp.utils.io import read_config, save_config\n", "import dp\n", "import os\n", "\n", "config_file = os.path.dirname(dp.__file__) + '/configs/forward_config.yaml'\n", "config = read_config(config_file)\n", "config['training']['epochs'] = 10\n", "config['training']['warmup_steps'] = 100\n", "config['training']['generate_steps'] = 500\n", "config['training']['validate_steps'] = 500\n", "save_config(config, 'config.yaml')\n", "\n", "for k, v in config.items():\n", " print(f'{k} {v}')\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "m_SNVv_HN4fR" }, "source": [ "%load_ext tensorboard\n", "%tensorboard --logdir /content/checkpoints" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "K09wG-ymXECZ" }, "source": [ "from dp.preprocess import preprocess\n", "from dp.train import train\n", "\n", "preprocess(config_file='config.yaml', train_data=train_data)\n", "train(config_file='config.yaml')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "2KcIwL6QdvEJ", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ed499929-1103-4a22-dce8-4a54d85f9b9e" }, "source": [ "# Load phonemizer (including the training data dictionary)\n", "from dp.phonemizer import Phonemizer\n", "\n", "phonemizer = Phonemizer.from_checkpoint('/content/checkpoints/best_model.pt')\n", "result = phonemizer('Phonemizing an English text is imposimpable!', lang='en_us')\n", "\n", "print(result)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "2021-05-12 10:15:19,916.916 DEBUG phonemizer: Initializing phonemizer with model step 18000\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "fənəmaɪzɪŋ ən ɪŋɡlɪʃ tɛkst ɪz ɪmpɑsɪmpəbəl!\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "Xt85fzFneDno", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "bc1cb892-9cd2-4540-b930-e4443ddf69c2" }, "source": [ "# Phonemize a list of texts and pull out model predictions with confidence scores\n", "result = phonemizer.phonemise_list(['Phonemizing an US-English text is imposimpable!'], lang='en_us')\n", "\n", "for word, pred in result.predictions.items():\n", " print(f'{word} {pred.phonemes} {pred.confidence}')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "imposimpable ɪmpɑsɪmpəbəl 0.2185952042855603\n", "Phonemizing fənəmaɪzɪŋ 0.22222847233670942\n" ], "name": "stdout" } ] } ] }