kunalpro379 commited on
Commit
46ab128
·
verified ·
1 Parent(s): 6fea906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -28
app.py CHANGED
@@ -6,50 +6,49 @@ import numpy as np
6
  import tempfile
7
 
8
  # Load model and tokenizer
9
- device = "cpu" # or "cuda" if available
10
  model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
11
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
12
 
13
- # Mapping: language -> speaker_id
14
  LANG_SPEAKER_MAP = {
15
- "asm": 0, "ben": 2, "brx": 4, "doi": 6,
16
- "kan": 8, "mai": 10, "mal": 11,
17
- "mar": 13, "nep": 14, "pan": 16,
18
- "san": 17, "tam": 18, "tel": 19,
19
- "hin": 13 # use Marathi Male voice for Hindi (close)
20
  }
21
 
22
- # Mapping: Style (fixed default)
23
  DEFAULT_STYLE_ID = 0 # ALEXA
24
 
25
- def tts_from_json(json_input):
26
- try:
27
- text = json_input["text"]
28
- lang = json_input["language"].lower()
29
 
30
- speaker_id = LANG_SPEAKER_MAP.get(lang)
31
- if speaker_id is None:
32
- return f"Language '{lang}' not supported."
33
 
34
- inputs = tokenizer(text=text, return_tensors="pt").to(device)
 
35
  outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
 
 
36
 
37
- waveform = outputs.waveform.squeeze().cpu().numpy()
38
- sample_rate = model.config.sampling_rate
39
-
40
- # Save to temp file for Gradio playback
41
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
42
- sf.write(f.name, waveform, sample_rate)
43
- return sample_rate, waveform
44
- except Exception as e:
45
- return f"Error: {str(e)}"
46
 
 
47
  iface = gr.Interface(
48
- fn=tts_from_json,
49
- inputs=gr.JSON(label="Input JSON: {'text': '...', 'language': 'mar/hin/san'}"),
 
 
 
50
  outputs=gr.Audio(label="Generated Audio"),
51
  title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
52
- description="Uses ai4bharat/vits_rasa_13. Supports Marathi, Hindi, and Sanskrit."
53
  )
54
 
55
  iface.launch()
 
 
6
  import tempfile
7
 
8
  # Load model and tokenizer
9
+ device = "cpu" # Change to "cuda" if you have GPU
10
  model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
11
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
12
 
13
+ # Speaker IDs for languages
14
  LANG_SPEAKER_MAP = {
15
+ "mar": 13, # Marathi Male
16
+ "hin": 13, # Reuse Marathi Male for Hindi
17
+ "san": 17 # Sanskrit Male
 
 
18
  }
19
 
 
20
  DEFAULT_STYLE_ID = 0 # ALEXA
21
 
22
+ def generate_audio(text, language):
23
+ if not text.strip():
24
+ return "Error: Text cannot be empty."
 
25
 
26
+ speaker_id = LANG_SPEAKER_MAP.get(language.lower())
27
+ if speaker_id is None:
28
+ return f"Unsupported language: {language}"
29
 
30
+ inputs = tokenizer(text=text, return_tensors="pt").to(device)
31
+ with torch.no_grad():
32
  outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
33
+ waveform = outputs.waveform.squeeze().cpu().numpy()
34
+ sample_rate = model.config.sampling_rate
35
 
36
+ # Save temp audio
37
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
38
+ sf.write(f.name, waveform, sample_rate)
39
+ return sample_rate, waveform
 
 
 
 
 
40
 
41
+ # Gradio Interface with clean inputs
42
  iface = gr.Interface(
43
+ fn=generate_audio,
44
+ inputs=[
45
+ gr.Textbox(label="Enter Text"),
46
+ gr.Dropdown(["mar", "hin", "san"], label="Select Language")
47
+ ],
48
  outputs=gr.Audio(label="Generated Audio"),
49
  title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
50
+ description="Uses ai4bharat/vits_rasa_13. Enter text and select a language."
51
  )
52
 
53
  iface.launch()
54
+