Rajkumar Pramanik "RJproz commited on
Commit
77ab639
·
1 Parent(s): ffd626f
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. app.py +28 -27
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -6,7 +6,6 @@ import numpy as np
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from snac import SNAC
8
 
9
- print(gr.__version__)
10
  # Mock spaces module for local testing
11
  try:
12
  import spaces
@@ -132,19 +131,18 @@ def preset_selected(preset_name):
132
  return char["description"], char["example_text"]
133
  return "", ""
134
 
135
- def greet(name):
136
- print(f"called greet")
137
- return f"Hello {name}"
138
-
139
-
140
  @spaces.GPU
141
  def generate_speech(description, text, temperature, max_tokens):
142
  """Generate emotional speech from description and text using Transformers."""
143
- print(f"called generate_speech gpu")
144
  try:
145
-
146
  load_models()
147
-
 
 
 
 
 
148
  if not description or not text:
149
  return None, "Error: Please provide both description and text!"
150
 
@@ -212,7 +210,6 @@ def generate_speech(description, text, temperature, max_tokens):
212
  duration = len(audio) / AUDIO_SAMPLE_RATE
213
  status_msg = f"Generated {duration:.2f}s of emotional speech!"
214
 
215
-
216
  return tmp_path, status_msg
217
 
218
  except Exception as e:
@@ -236,28 +233,29 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
236
 
237
  with gr.Row():
238
  with gr.Column(scale=1):
239
-
240
- # gr.Markdown("### Character Selection")
241
 
242
- # preset_dropdown = gr.Dropdown(
243
- # choices=list(PRESET_CHARACTERS.keys()),
244
- # label="Preset Characters",
245
- # value=list(PRESET_CHARACTERS.keys())[0],
246
- # info="Quick pick from 4 preset characters"
247
- # )
248
 
249
- # gr.Markdown("### Voice Design")
250
 
251
  description_input = gr.Textbox(
252
  label="Voice Description",
253
  placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
254
- lines=3
 
255
  )
256
 
257
  text_input = gr.Textbox(
258
  label="Text to Speak",
259
  placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
260
- lines=4
 
261
  )
262
 
263
  with gr.Accordion("Advanced Settings", open=False):
@@ -290,7 +288,6 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
290
  interactive=False
291
  )
292
 
293
-
294
  status_output = gr.Textbox(
295
  label="Status",
296
  lines=3,
@@ -305,15 +302,19 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
305
  `<sing>` `<whisper>`
306
  """)
307
 
308
-
 
 
 
 
 
309
 
310
  generate_btn.click(
311
  fn=generate_speech,
312
  inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
313
- outputs=[audio_output, status_output],
314
- api_name="generate_speech"
315
  )
316
 
317
- demo.queue().launch(show_api=True)
318
-
319
 
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from snac import SNAC
8
 
 
9
  # Mock spaces module for local testing
10
  try:
11
  import spaces
 
131
  return char["description"], char["example_text"]
132
  return "", ""
133
 
 
 
 
 
 
134
  @spaces.GPU
135
  def generate_speech(description, text, temperature, max_tokens):
136
  """Generate emotional speech from description and text using Transformers."""
 
137
  try:
138
+ # Load models if not already loaded
139
  load_models()
140
+
141
+ # If using preset, override description
142
+ # if preset_name and preset_name in PRESET_CHARACTERS:
143
+ # description = PRESET_CHARACTERS[preset_name]["description"]
144
+
145
+ # Validate inputs
146
  if not description or not text:
147
  return None, "Error: Please provide both description and text!"
148
 
 
210
  duration = len(audio) / AUDIO_SAMPLE_RATE
211
  status_msg = f"Generated {duration:.2f}s of emotional speech!"
212
 
 
213
  return tmp_path, status_msg
214
 
215
  except Exception as e:
 
233
 
234
  with gr.Row():
235
  with gr.Column(scale=1):
236
+ gr.Markdown("### Character Selection")
 
237
 
238
+ preset_dropdown = gr.Dropdown(
239
+ choices=list(PRESET_CHARACTERS.keys()),
240
+ label="Preset Characters",
241
+ value=list(PRESET_CHARACTERS.keys())[0],
242
+ info="Quick pick from 4 preset characters"
243
+ )
244
 
245
+ gr.Markdown("### Voice Design")
246
 
247
  description_input = gr.Textbox(
248
  label="Voice Description",
249
  placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
250
+ lines=3,
251
+ value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["description"]
252
  )
253
 
254
  text_input = gr.Textbox(
255
  label="Text to Speak",
256
  placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
257
+ lines=4,
258
+ value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["example_text"]
259
  )
260
 
261
  with gr.Accordion("Advanced Settings", open=False):
 
288
  interactive=False
289
  )
290
 
 
291
  status_output = gr.Textbox(
292
  label="Status",
293
  lines=3,
 
302
  `<sing>` `<whisper>`
303
  """)
304
 
305
+ # Event handlers
306
+ preset_dropdown.change(
307
+ fn=preset_selected,
308
+ inputs=[preset_dropdown],
309
+ outputs=[description_input, text_input]
310
+ )
311
 
312
  generate_btn.click(
313
  fn=generate_speech,
314
  inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
315
+ outputs=[audio_output, status_output]
 
316
  )
317
 
318
+ if __name__ == "__main__":
319
+ demo.launch()
320