Spaces:

rjproz
/

maya1-txt2speech

Running

App Files Files Community

Rajkumar Pramanik "RJproz commited on 25 days ago

Commit

77ab639

1 Parent(s): ffd626f

bug fixes

Browse files

Files changed (2) hide show

.DS_Store +0 -0
app.py +28 -27

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from snac import SNAC
-print(gr.__version__)
 # Mock spaces module for local testing
 try:
     import spaces
@@ -132,19 +131,18 @@ def preset_selected(preset_name):
         return char["description"], char["example_text"]
     return "", ""
-def greet(name):
-    print(f"called greet")
-    return f"Hello {name}"
 @spaces.GPU
 def generate_speech(description, text, temperature, max_tokens):
     """Generate emotional speech from description and text using Transformers."""
-    print(f"called generate_speech gpu")
     try:
         load_models()
         if not description or not text:
             return None, "Error: Please provide both description and text!"
@@ -212,7 +210,6 @@ def generate_speech(description, text, temperature, max_tokens):
         duration = len(audio) / AUDIO_SAMPLE_RATE
         status_msg = f"Generated {duration:.2f}s of emotional speech!"
         return tmp_path, status_msg
     except Exception as e:
@@ -236,28 +233,29 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
     with gr.Row():
         with gr.Column(scale=1):
-            # gr.Markdown("### Character Selection")
-            # preset_dropdown = gr.Dropdown(
-            #     choices=list(PRESET_CHARACTERS.keys()),
-            #     label="Preset Characters",
-            #     value=list(PRESET_CHARACTERS.keys())[0],
-            #     info="Quick pick from 4 preset characters"
-            # )
-            # gr.Markdown("### Voice Design")
             description_input = gr.Textbox(
                 label="Voice Description",
                 placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
-                lines=3
             )
             text_input = gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
-                lines=4
             )
             with gr.Accordion("Advanced Settings", open=False):
@@ -290,7 +288,6 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
                 interactive=False
             )
             status_output = gr.Textbox(
                 label="Status",
                 lines=3,
@@ -305,15 +302,19 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
             `<sing>` `<whisper>`
             """)
     generate_btn.click(
         fn=generate_speech,
         inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
-        outputs=[audio_output, status_output],
-        api_name="generate_speech"
     )
-demo.queue().launch(show_api=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from snac import SNAC
 # Mock spaces module for local testing
 try:
     import spaces
         return char["description"], char["example_text"]
     return "", ""
 @spaces.GPU
 def generate_speech(description, text, temperature, max_tokens):
     """Generate emotional speech from description and text using Transformers."""
     try:
+        # Load models if not already loaded
         load_models()
+        # If using preset, override description
+        # if preset_name and preset_name in PRESET_CHARACTERS:
+        #     description = PRESET_CHARACTERS[preset_name]["description"]
+        # Validate inputs
         if not description or not text:
             return None, "Error: Please provide both description and text!"
         duration = len(audio) / AUDIO_SAMPLE_RATE
         status_msg = f"Generated {duration:.2f}s of emotional speech!"
         return tmp_path, status_msg
     except Exception as e:
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### Character Selection")
+            preset_dropdown = gr.Dropdown(
+                choices=list(PRESET_CHARACTERS.keys()),
+                label="Preset Characters",
+                value=list(PRESET_CHARACTERS.keys())[0],
+                info="Quick pick from 4 preset characters"
+            )
+            gr.Markdown("### Voice Design")
             description_input = gr.Textbox(
                 label="Voice Description",
                 placeholder="E.g., Male voice in their 30s with american accent. Normal pitch, warm timbre...",
+                lines=3,
+                value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["description"]
             )
             text_input = gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter text with <emotion> tags like <laugh>, <sigh>, <excited>...",
+                lines=4,
+                value=PRESET_CHARACTERS[list(PRESET_CHARACTERS.keys())[0]]["example_text"]
             )
             with gr.Accordion("Advanced Settings", open=False):
                 interactive=False
             )
             status_output = gr.Textbox(
                 label="Status",
                 lines=3,
             `<sing>` `<whisper>`
             """)
+    # Event handlers
+    preset_dropdown.change(
+        fn=preset_selected,
+        inputs=[preset_dropdown],
+        outputs=[description_input, text_input]
+    )
     generate_btn.click(
         fn=generate_speech,
         inputs=[description_input, text_input, temperature_slider, max_tokens_slider],
+        outputs=[audio_output, status_output]
     )
+if __name__ == "__main__":
+    demo.launch()