Spaces:
Paused
Paused
| import os | |
| from flask import Flask, request, jsonify, render_template | |
| from transformers import pipeline | |
| from flask_cors import CORS | |
| from pydub import AudioSegment | |
| from io import BytesIO | |
| import Levenshtein | |
| # Set the FFmpeg paths explicitly | |
| AudioSegment.converter = "/usr/bin/ffmpeg" | |
| AudioSegment.ffprobe = "/usr/bin/ffprobe" | |
| # Set Hugging Face cache directory to avoid permission issues | |
| os.environ['HF_HOME'] = '/tmp/.cache' | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Use Hugging Face ASR pipeline for automatic speech recognition | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-arabic") | |
| def convert_to_wav(audio_bytes): | |
| """Convert audio bytes to wav format using pydub""" | |
| try: | |
| audio = AudioSegment.from_file(BytesIO(audio_bytes)) # Auto-detect format | |
| wav_io = BytesIO() | |
| audio.export(wav_io, format="wav") | |
| wav_io.seek(0) | |
| return wav_io | |
| except Exception as e: | |
| print(f"Error converting audio: {e}") | |
| return None | |
| def transcribe_audio(audio_bytes): | |
| """Transcribes the audio using the Hugging Face ASR pipeline.""" | |
| wav_io = convert_to_wav(audio_bytes) | |
| if wav_io is None: | |
| raise Exception("Could not convert audio to WAV format") | |
| # Read the audio file into bytes for the ASR pipeline | |
| wav_io.seek(0) | |
| transcription = asr_pipeline(wav_io)["text"] | |
| return transcription.strip() | |
| def levenshtein_similarity(transcription1, transcription2): | |
| distance = Levenshtein.distance(transcription1, transcription2) | |
| max_len = max(len(transcription1), len(transcription2)) | |
| return 1 - distance / max_len | |
| def index(): | |
| return render_template('index.html') | |
| def transcribe(): | |
| original_audio = request.files['original_audio'] | |
| user_audio = request.files['user_audio'] | |
| original_audio_bytes = original_audio.read() | |
| user_audio_bytes = user_audio.read() | |
| try: | |
| transcription_original = transcribe_audio(original_audio_bytes) | |
| transcription_user = transcribe_audio(user_audio_bytes) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 500 | |
| similarity_score = levenshtein_similarity(transcription_original, transcription_user) | |
| return jsonify({ | |
| "transcription_original": transcription_original, | |
| "transcription_user": transcription_user, | |
| "similarity_score": similarity_score | |
| }) | |
| if __name__ == '__main__': | |
| app.run(debug=False, port=7860, host='0.0.0.0') | |