Spaces:

barbaroo
/

ASR_Faroese

Paused

File size: 2,501 Bytes

7b18d60
 
502159a
eb134bd
d9a4d6b
7b18d60
68a9c43
 
882143b
68a9c43
 
0c0c610
 
68a9c43
0c0c610
 
0659665
 
 
 
 
 
 
 
 
a762ace
 
0659665
 
d9a4d6b
a762ace
ebd3d99
a762ace
d9a4d6b
ebd3d99
a762ace
2e0c6c8
d9a4d6b
 
ebd3d99
93c12ab
d9a4d6b
0c0c610
93c12ab
0c0c610
12974e0
1d903b8
d9a4d6b
 
0c0c610
 
a762ace
 
0c0c610
 
a762ace
93c12ab
12974e0
0c0c610
 
 
12974e0
0c0c610
 
a762ace
0c0c610
 
12974e0
0c0c610
 
 
 
 
12974e0
2e0c6c8
93c12ab

import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg  # Make sure it's ffmpeg-python

# Check if GPU is available
use_gpu = torch.cuda.is_available()

# Configure the pipeline to use the GPU if available
if use_gpu:
    p = pipeline("automatic-speech-recognition",
                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
else:
    p = pipeline("automatic-speech-recognition",
                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")

def extract_audio_from_m3u8(url):
    try:
        output_file = "output_audio.aac"
        ffmpeg.input(url).output(output_file).run(overwrite_output=True)
        return output_file
    except Exception as e:
        return f"An error occurred: {e}"

def transcribe_function(audio, state, m3u8_url):
    # If an m3u8 URL is provided, extract audio from the URL
    if m3u8_url:
        audio = extract_audio_from_m3u8(m3u8_url)

    # If no audio is provided, return the current state
    if not audio:
        return {state_var: state, transcription_var: state}

    try:
        time.sleep(3)  # Simulate processing delay
        text = p(audio, chunk_length_s=50)["text"]
        state += text + "\n"
        return {state_var: state, transcription_var: state}
    except Exception as e:
        return {transcription_var: "An error occurred during transcription.", state_var: state}

def reset_output(transcription, state):
    """Reset the state to an empty string."""
    return "", ""

with gr.Blocks() as demo:
    state_var = gr.State("")

    with gr.Row():
        with gr.Column():
            # Single Audio component supporting both recording and file upload
            audio_input = gr.Audio(type="filepath", label="Audio (Record or Upload)")
            m3u8_url = gr.Textbox(label="m3u8 URL | E.g.: from kvf.fo or logting.fo")
        with gr.Column():
            # Use interactive=False to make the textbox read-only
            transcription_var = gr.Textbox(type="text", label="Transcription", interactive=False)

    with gr.Row():
        transcribe_button = gr.Button("Transcribe")
        reset_button = gr.Button("Reset output")

    transcribe_button.click(
        transcribe_function,
        [audio_input, state_var, m3u8_url],
        [transcription_var, state_var]
    )

    reset_button.click(
        reset_output,
        [transcription_var, state_var],
        [transcription_var, state_var]
    )

demo.launch()