Spaces:

ovieyra21
/

es_speecht5_tts_mabama

Paused

App Files Files Community

ovieyra21 commited on Jul 11, 2024

Commit

27e8b08

verified ·

1 Parent(s): 011aec2

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -10

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import gradio as gr
 import torch
 from datasets import load_dataset
-from transformers import pipeline, SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
 model_id = "ovieyra21/es_speecht5_tts_mabama"  # update with your model id
-# pipe = pipeline("automatic-speech-recognition", model=model_id)
 model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
-# checkpoint = "microsoft/speecht5_tts"
 processor = SpeechT5Processor.from_pretrained(model_id)
 replacements = [
@@ -27,7 +25,6 @@ replacements = [
     ("ü", "u"),
 ]
 title = "Text-to-Speech"
 description = """
 Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_finetuned_facebook_voxpopuli_french](https://huggingface.co/Sandiago21/speecht5_finetuned_facebook_voxpopuli_french) checkpoint, which is based on Microsoft's
@@ -35,7 +32,6 @@ Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_fi
 ![Text-to-Speech (TTS)"](https://geekflare.com/wp-content/uploads/2021/07/texttospeech-1200x385.png "Diagram of Text-to-Speech (TTS)")
 """
 def cleanup_text(text):
     for src, dst in replacements:
         text = text.replace(src, dst)
@@ -44,16 +40,14 @@ def cleanup_text(text):
 def synthesize_speech(text):
     text = cleanup_text(text)
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
-    return gr.Audio.update(value=(16000, speech.cpu().numpy()))
 syntesize_speech_gradio = gr.Interface(
     synthesize_speech,
-    inputs = gr.Textbox(label="Text", placeholder="Type something here..."),
     outputs=gr.Audio(),
     examples=["Probando audio"],
     title=title,
     description=description,
-).launch()

 import gradio as gr
 import torch
 from datasets import load_dataset
+from transformers import SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
 model_id = "ovieyra21/es_speecht5_tts_mabama"  # update with your model id
 model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
 processor = SpeechT5Processor.from_pretrained(model_id)
 replacements = [
     ("ü", "u"),
 ]
 title = "Text-to-Speech"
 description = """
 Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_finetuned_facebook_voxpopuli_french](https://huggingface.co/Sandiago21/speecht5_finetuned_facebook_voxpopuli_french) checkpoint, which is based on Microsoft's
 ![Text-to-Speech (TTS)"](https://geekflare.com/wp-content/uploads/2021/07/texttospeech-1200x385.png "Diagram of Text-to-Speech (TTS)")
 """
 def cleanup_text(text):
     for src, dst in replacements:
         text = text.replace(src, dst)
 def synthesize_speech(text):
     text = cleanup_text(text)
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    return (16000, speech.cpu().numpy())  # Devuelve el audio directamente
 syntesize_speech_gradio = gr.Interface(
     synthesize_speech,
+    inputs=gr.Textbox(label="Text", placeholder="Type something here..."),
     outputs=gr.Audio(),
     examples=["Probando audio"],
     title=title,
     description=description,
+).launch()