Spaces:

mobenta
/

pdf_audio

Build error

App Files Files Community

mobenta commited on Sep 25, 2024

Commit

1b5a143

verified ·

1 Parent(s): 56d1631

Create app.py

Browse files

Files changed (1) hide show

app.py +175 -0

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import concurrent.futures as cf
+import glob
+import io
+import os
+import time
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import List, Literal
+import re
+from transformers import pipeline
+from pydantic import BaseModel
+# Initialize Hugging Face text generation model
+text_generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Instruction templates (unchanged from your original code)
+INSTRUCTION_TEMPLATES = {
+    "podcast": {
+        "intro": """Your task is to take the input text provided and turn it into a lively, engaging, informative podcast dialogue, in the style of NPR...""",
+        "text_instructions": "First, carefully read through the input text...",
+        "scratch_pad": """Brainstorm creative ways to discuss the main topics...""",
+        "prelude": """Now that you have brainstormed ideas and created a rough outline...""",
+        "dialog": """Write a very long, engaging, informative podcast dialogue..."""
+    }
+}
+# Function to update instruction fields based on template selection
+def update_instructions(template):
+    return (
+        INSTRUCTION_TEMPLATES[template]["intro"],
+        INSTRUCTION_TEMPLATES[template]["text_instructions"],
+        INSTRUCTION_TEMPLATES[template]["scratch_pad"],
+        INSTRUCTION_TEMPLATES[template]["prelude"],
+        INSTRUCTION_TEMPLATES[template]["dialog"]
+    )
+# Define the structure of dialogue
+class DialogueItem(BaseModel):
+    text: str
+    speaker: Literal["speaker-1", "speaker-2"]
+class Dialogue(BaseModel):
+    scratchpad: str
+    dialogue: List[DialogueItem]
+# Function to read README.md
+def read_readme():
+    readme_path = Path("README.md")
+    if readme_path.exists():
+        with open(readme_path, "r") as file:
+            content = file.read()
+            content = re.sub(r'--.*?--', '', content, flags=re.DOTALL)
+            return content
+    else:
+        return "README.md not found. Please check the repository for more information."
+# Hugging Face-based dialogue generation function
+def generate_dialogue(text: str, intro_instructions: str, text_instructions: str,
+                      scratch_pad_instructions: str, prelude_dialog: str,
+                      podcast_dialog_instructions: str, edited_transcript: str = None,
+                      user_feedback: str = None) -> str:
+    # Combine instructions and text into a prompt
+    full_prompt = f"""
+    {intro_instructions}
+    Original text:
+    {text}
+    {text_instructions}
+    Brainstorming:
+    {scratch_pad_instructions}
+    Prelude:
+    {prelude_dialog}
+    Dialogue:
+    {podcast_dialog_instructions}
+    {edited_transcript if edited_transcript else ""}
+    {user_feedback if user_feedback else ""}
+    """
+    # Generate text using Hugging Face model
+    generated = text_generator(full_prompt, max_length=1000)  # Adjust max_length as needed
+    return generated[0]['generated_text']  # Extract generated text from the response
+# Function to handle audio generation (could be expanded later)
+def get_mp3(text: str, voice: str, audio_model: str) -> bytes:
+    # Placeholder for audio generation; currently not implemented
+    # You can use text-to-speech services or local TTS engines
+    return b""
+# Main audio generation function (adapted for Hugging Face text generation)
+def generate_audio(
+    files: list,
+    text_model: str = "EleutherAI/gpt-neo-2.7B",
+    audio_model: str = "tts-1",
+    speaker_1_voice: str = "alloy",
+    speaker_2_voice: str = "echo",
+    intro_instructions: str = '',
+    text_instructions: str = '',
+    scratch_pad_instructions: str = '',
+    prelude_dialog: str = '',
+    podcast_dialog_instructions: str = '',
+    edited_transcript: str = None,
+    user_feedback: str = None,
+    original_text: str = None,
+    debug = False,
+) -> tuple:
+    # Combine input text from files
+    combined_text = original_text or ""
+    if not combined_text:
+        for file in files:
+            with Path(file).open("rb") as f:
+                text = f.read().decode('utf-8')  # Assuming the PDF text is extracted as UTF-8
+                combined_text += text + "\n\n"
+    # Generate the dialogue using Hugging Face
+    llm_output = generate_dialogue(
+        combined_text,
+        intro_instructions=intro_instructions,
+        text_instructions=text_instructions,
+        scratch_pad_instructions=scratch_pad_instructions,
+        prelude_dialog=prelude_dialog,
+        podcast_dialog_instructions=podcast_dialog_instructions,
+        edited_transcript=edited_transcript,
+        user_feedback=user_feedback
+    )
+    # Placeholder for audio (since TTS implementation is omitted)
+    audio = b""
+    transcript = llm_output
+    characters = len(llm_output)
+    # Generating audio (placeholder logic)
+    with cf.ThreadPoolExecutor() as executor:
+        futures = []
+        for line in llm_output.split('\n'):
+            future = executor.submit(get_mp3, line, speaker_1_voice, audio_model)
+            futures.append(future)
+            characters += len(line)
+        for future in futures:
+            audio_chunk = future.result()
+            audio += audio_chunk
+    temporary_directory = "./tmp/"
+    os.makedirs(temporary_directory, exist_ok=True)
+    # Save audio to a temporary file
+    temporary_file = NamedTemporaryFile(dir=temporary_directory, delete=False, suffix=".mp3")
+    temporary_file.write(audio)
+    temporary_file.close()
+    return temporary_file.name, transcript, combined_text
+# Example call to generate audio
+files = ["sample.pdf"]  # Replace with your actual PDF file paths
+audio_file, transcript, original_text = generate_audio(
+    files=files,
+    intro_instructions="Your task is to create a podcast...",
+    text_instructions="Extract the main points...",
+    scratch_pad_instructions="Brainstorm how to present the topics...",
+    prelude_dialog="Now let's write the podcast dialogue...",
+    podcast_dialog_instructions="Write a long and engaging podcast dialogue."
+)
+# Print output transcript (or save it as needed)
+print(transcript)
+# Read and print README content
+print(read_readme())