Spaces:
Running
Running
Rajkumar Pramanik "RJproz
commited on
Commit
·
77ab639
1
Parent(s):
ffd626f
bug fixes
Browse files
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import numpy as np
|
|
| 6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
from snac import SNAC
|
| 8 |
|
| 9 |
-
print(gr.__version__)
|
| 10 |
# Mock spaces module for local testing
|
| 11 |
try:
|
| 12 |
import spaces
|
|
@@ -132,19 +131,18 @@ def preset_selected(preset_name):
|
|
| 132 |
return char["description"], char["example_text"]
|
| 133 |
return "", ""
|
| 134 |
|
| 135 |
-
def greet(name):
|
| 136 |
-
print(f"called greet")
|
| 137 |
-
return f"Hello {name}"
|
| 138 |
-
|
| 139 |
-
|
| 140 |
@spaces.GPU
|
| 141 |
def generate_speech(description, text, temperature, max_tokens):
|
| 142 |
"""Generate emotional speech from description and text using Transformers."""
|
| 143 |
-
print(f"called generate_speech gpu")
|
| 144 |
try:
|
| 145 |
-
|
| 146 |
load_models()
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
if not description or not text:
|
| 149 |
return None, "Error: Please provide both description and text!"
|
| 150 |
|
|
@@ -212,7 +210,6 @@ def generate_speech(description, text, temperature, max_tokens):
|
|
| 212 |
duration = len(audio) / AUDIO_SAMPLE_RATE
|
| 213 |
status_msg = f"Generated {duration:.2f}s of emotional speech!"
|
| 214 |
|
| 215 |
-
|
| 216 |
return tmp_path, status_msg
|
| 217 |
|
| 218 |
except Exception as e:
|
|
@@ -236,28 +233,29 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
|
|
| 236 |
|
| 237 |
with gr.Row():
|
| 238 |
with gr.Column(scale=1):
|
| 239 |
-
|
| 240 |
-
# gr.Markdown("### Character Selection")
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
|
| 249 |
-
|
| 250 |
|
| 251 |
description_input = gr.Textbox(
|
| 252 |
label="Voice Description",
|
| 253 |
placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
|
| 254 |
-
lines=3
|
|
|
|
| 255 |
)
|
| 256 |
|
| 257 |
text_input = gr.Textbox(
|
| 258 |
label="Text to Speak",
|
| 259 |
placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
|
| 260 |
-
lines=4
|
|
|
|
| 261 |
)
|
| 262 |
|
| 263 |
with gr.Accordion("Advanced Settings", open=False):
|
|
@@ -290,7 +288,6 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
|
|
| 290 |
interactive=False
|
| 291 |
)
|
| 292 |
|
| 293 |
-
|
| 294 |
status_output = gr.Textbox(
|
| 295 |
label="Status",
|
| 296 |
lines=3,
|
|
@@ -305,15 +302,19 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
|
|
| 305 |
`<sing>` `<whisper>`
|
| 306 |
""")
|
| 307 |
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
generate_btn.click(
|
| 311 |
fn=generate_speech,
|
| 312 |
inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
|
| 313 |
-
outputs=[audio_output, status_output]
|
| 314 |
-
api_name="generate_speech"
|
| 315 |
)
|
| 316 |
|
| 317 |
-
|
| 318 |
-
|
| 319 |
|
|
|
|
| 6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
from snac import SNAC
|
| 8 |
|
|
|
|
| 9 |
# Mock spaces module for local testing
|
| 10 |
try:
|
| 11 |
import spaces
|
|
|
|
| 131 |
return char["description"], char["example_text"]
|
| 132 |
return "", ""
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
@spaces.GPU
|
| 135 |
def generate_speech(description, text, temperature, max_tokens):
|
| 136 |
"""Generate emotional speech from description and text using Transformers."""
|
|
|
|
| 137 |
try:
|
| 138 |
+
# Load models if not already loaded
|
| 139 |
load_models()
|
| 140 |
+
|
| 141 |
+
# If using preset, override description
|
| 142 |
+
# if preset_name and preset_name in PRESET_CHARACTERS:
|
| 143 |
+
# description = PRESET_CHARACTERS[preset_name]["description"]
|
| 144 |
+
|
| 145 |
+
# Validate inputs
|
| 146 |
if not description or not text:
|
| 147 |
return None, "Error: Please provide both description and text!"
|
| 148 |
|
|
|
|
| 210 |
duration = len(audio) / AUDIO_SAMPLE_RATE
|
| 211 |
status_msg = f"Generated {duration:.2f}s of emotional speech!"
|
| 212 |
|
|
|
|
| 213 |
return tmp_path, status_msg
|
| 214 |
|
| 215 |
except Exception as e:
|
|
|
|
| 233 |
|
| 234 |
with gr.Row():
|
| 235 |
with gr.Column(scale=1):
|
| 236 |
+
gr.Markdown("### Character Selection")
|
|
|
|
| 237 |
|
| 238 |
+
preset_dropdown = gr.Dropdown(
|
| 239 |
+
choices=list(PRESET_CHARACTERS.keys()),
|
| 240 |
+
label="Preset Characters",
|
| 241 |
+
value=list(PRESET_CHARACTERS.keys())[0],
|
| 242 |
+
info="Quick pick from 4 preset characters"
|
| 243 |
+
)
|
| 244 |
|
| 245 |
+
gr.Markdown("### Voice Design")
|
| 246 |
|
| 247 |
description_input = gr.Textbox(
|
| 248 |
label="Voice Description",
|
| 249 |
placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
|
| 250 |
+
lines=3,
|
| 251 |
+
value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["description"]
|
| 252 |
)
|
| 253 |
|
| 254 |
text_input = gr.Textbox(
|
| 255 |
label="Text to Speak",
|
| 256 |
placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
|
| 257 |
+
lines=4,
|
| 258 |
+
value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["example_text"]
|
| 259 |
)
|
| 260 |
|
| 261 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
| 288 |
interactive=False
|
| 289 |
)
|
| 290 |
|
|
|
|
| 291 |
status_output = gr.Textbox(
|
| 292 |
label="Status",
|
| 293 |
lines=3,
|
|
|
|
| 302 |
`<sing>` `<whisper>`
|
| 303 |
""")
|
| 304 |
|
| 305 |
+
# Event handlers
|
| 306 |
+
preset_dropdown.change(
|
| 307 |
+
fn=preset_selected,
|
| 308 |
+
inputs=[preset_dropdown],
|
| 309 |
+
outputs=[description_input, text_input]
|
| 310 |
+
)
|
| 311 |
|
| 312 |
generate_btn.click(
|
| 313 |
fn=generate_speech,
|
| 314 |
inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
|
| 315 |
+
outputs=[audio_output, status_output]
|
|
|
|
| 316 |
)
|
| 317 |
|
| 318 |
+
if __name__ == "__main__":
|
| 319 |
+
demo.launch()
|
| 320 |
|