Spaces:

kunalpro379
/

tts

Runtime error

App Files Files Community

kunalpro379 commited on Jun 4

Commit

46ab128

verified ·

1 Parent(s): 6fea906

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -28

app.py CHANGED Viewed

@@ -6,50 +6,49 @@ import numpy as np
 import tempfile
 # Load model and tokenizer
-device = "cpu"  # or "cuda" if available
 model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
 tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
-# Mapping: language -> speaker_id
 LANG_SPEAKER_MAP = {
-    "asm": 0, "ben": 2, "brx": 4, "doi": 6,
-    "kan": 8, "mai": 10, "mal": 11,
-    "mar": 13, "nep": 14, "pan": 16,
-    "san": 17, "tam": 18, "tel": 19,
-    "hin": 13  # use Marathi Male voice for Hindi (close)
 }
-# Mapping: Style (fixed default)
 DEFAULT_STYLE_ID = 0  # ALEXA
-def tts_from_json(json_input):
-    try:
-        text = json_input["text"]
-        lang = json_input["language"].lower()
-        speaker_id = LANG_SPEAKER_MAP.get(lang)
-        if speaker_id is None:
-            return f"Language '{lang}' not supported."
-        inputs = tokenizer(text=text, return_tensors="pt").to(device)
         outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
-        waveform = outputs.waveform.squeeze().cpu().numpy()
-        sample_rate = model.config.sampling_rate
-        # Save to temp file for Gradio playback
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
-            sf.write(f.name, waveform, sample_rate)
-            return sample_rate, waveform
-    except Exception as e:
-        return f"Error: {str(e)}"
 iface = gr.Interface(
-    fn=tts_from_json,
-    inputs=gr.JSON(label="Input JSON: {'text': '...', 'language': 'mar/hin/san'}"),
     outputs=gr.Audio(label="Generated Audio"),
     title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
-    description="Uses ai4bharat/vits_rasa_13. Supports Marathi, Hindi, and Sanskrit."
 )
 iface.launch()

 import tempfile
 # Load model and tokenizer
+device = "cpu"  # Change to "cuda" if you have GPU
 model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
 tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
+# Speaker IDs for languages
 LANG_SPEAKER_MAP = {
+    "mar": 13,  # Marathi Male
+    "hin": 13,  # Reuse Marathi Male for Hindi
+    "san": 17   # Sanskrit Male
 }
 DEFAULT_STYLE_ID = 0  # ALEXA
+def generate_audio(text, language):
+    if not text.strip():
+        return "Error: Text cannot be empty."
+    speaker_id = LANG_SPEAKER_MAP.get(language.lower())
+    if speaker_id is None:
+        return f"Unsupported language: {language}"
+    inputs = tokenizer(text=text, return_tensors="pt").to(device)
+    with torch.no_grad():
         outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
+    waveform = outputs.waveform.squeeze().cpu().numpy()
+    sample_rate = model.config.sampling_rate
+    # Save temp audio
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        sf.write(f.name, waveform, sample_rate)
+        return sample_rate, waveform
+# Gradio Interface with clean inputs
 iface = gr.Interface(
+    fn=generate_audio,
+    inputs=[
+        gr.Textbox(label="Enter Text"),
+        gr.Dropdown(["mar", "hin", "san"], label="Select Language")
+    ],
     outputs=gr.Audio(label="Generated Audio"),
     title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
+    description="Uses ai4bharat/vits_rasa_13. Enter text and select a language."
 )
 iface.launch()