Spaces:
Paused
Paused
File size: 2,501 Bytes
7b18d60 502159a eb134bd d9a4d6b 7b18d60 68a9c43 882143b 68a9c43 0c0c610 68a9c43 0c0c610 0659665 a762ace 0659665 d9a4d6b a762ace ebd3d99 a762ace d9a4d6b ebd3d99 a762ace 2e0c6c8 d9a4d6b ebd3d99 93c12ab d9a4d6b 0c0c610 93c12ab 0c0c610 12974e0 1d903b8 d9a4d6b 0c0c610 a762ace 0c0c610 a762ace 93c12ab 12974e0 0c0c610 12974e0 0c0c610 a762ace 0c0c610 12974e0 0c0c610 12974e0 2e0c6c8 93c12ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg # Make sure it's ffmpeg-python
# Check if GPU is available
use_gpu = torch.cuda.is_available()
# Configure the pipeline to use the GPU if available
if use_gpu:
p = pipeline("automatic-speech-recognition",
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
else:
p = pipeline("automatic-speech-recognition",
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")
def extract_audio_from_m3u8(url):
try:
output_file = "output_audio.aac"
ffmpeg.input(url).output(output_file).run(overwrite_output=True)
return output_file
except Exception as e:
return f"An error occurred: {e}"
def transcribe_function(audio, state, m3u8_url):
# If an m3u8 URL is provided, extract audio from the URL
if m3u8_url:
audio = extract_audio_from_m3u8(m3u8_url)
# If no audio is provided, return the current state
if not audio:
return {state_var: state, transcription_var: state}
try:
time.sleep(3) # Simulate processing delay
text = p(audio, chunk_length_s=50)["text"]
state += text + "\n"
return {state_var: state, transcription_var: state}
except Exception as e:
return {transcription_var: "An error occurred during transcription.", state_var: state}
def reset_output(transcription, state):
"""Reset the state to an empty string."""
return "", ""
with gr.Blocks() as demo:
state_var = gr.State("")
with gr.Row():
with gr.Column():
# Single Audio component supporting both recording and file upload
audio_input = gr.Audio(type="filepath", label="Audio (Record or Upload)")
m3u8_url = gr.Textbox(label="m3u8 URL | E.g.: from kvf.fo or logting.fo")
with gr.Column():
# Use interactive=False to make the textbox read-only
transcription_var = gr.Textbox(type="text", label="Transcription", interactive=False)
with gr.Row():
transcribe_button = gr.Button("Transcribe")
reset_button = gr.Button("Reset output")
transcribe_button.click(
transcribe_function,
[audio_input, state_var, m3u8_url],
[transcription_var, state_var]
)
reset_button.click(
reset_output,
[transcription_var, state_var],
[transcription_var, state_var]
)
demo.launch()
|