Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoModel, AutoTokenizer | |
| import gradio as gr | |
| import soundfile as sf | |
| import numpy as np | |
| import tempfile | |
| # Load model and tokenizer | |
| device = "cpu" # Change to "cuda" if you have GPU | |
| model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device) | |
| tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True) | |
| # Speaker IDs for languages | |
| LANG_SPEAKER_MAP = { | |
| "mar": 13, # Marathi Male | |
| "hin": 13, # Reuse Marathi Male for Hindi | |
| "san": 17 # Sanskrit Male | |
| } | |
| DEFAULT_STYLE_ID = 0 # ALEXA | |
| def generate_audio(text, language): | |
| if not text.strip(): | |
| return "Error: Text cannot be empty." | |
| speaker_id = LANG_SPEAKER_MAP.get(language.lower()) | |
| if speaker_id is None: | |
| return f"Unsupported language: {language}" | |
| inputs = tokenizer(text=text, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID) | |
| waveform = outputs.waveform.squeeze().cpu().numpy() | |
| sample_rate = model.config.sampling_rate | |
| # Save temp audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| sf.write(f.name, waveform, sample_rate) | |
| return sample_rate, waveform | |
| # Gradio Interface with clean inputs | |
| iface = gr.Interface( | |
| fn=generate_audio, | |
| inputs=[ | |
| gr.Textbox(label="Enter Text"), | |
| gr.Dropdown(["mar", "hin", "san"], label="Select Language") | |
| ], | |
| outputs=gr.Audio(label="Generated Audio"), | |
| title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)", | |
| description="Uses ai4bharat/vits_rasa_13. Enter text and select a language." | |
| ) | |
| iface.launch() | |