Spaces:

dnsy
/

ALIA_Bilingual_Speech_Recognition

Sleeping

App Files Files Community

dnsy commited on 20 days ago

Commit

b293249

1 Parent(s): 01e9ce2

HuggingFace

Browse files

Files changed (4) hide show

.python-version +1 -0
main.py +94 -0
pyproject.toml +7 -0
requirements.txt +8 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

main.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import shutil
+import torch
+import warnings
+from fastapi import FastAPI, UploadFile, File, HTTPException
+import uvicorn
+import nemo.collections.asr as nemo_asr
+from starlette.concurrency import run_in_threadpool # Import this
+import traceback # Import for better error logging
+MODEL_NAME = "projecte-aina/stt_ca-es_conformer_transducer_large"
+PORT = 34450
+app = FastAPI(
+    title="Audio Transcription API",
+    description="An API to transcribe audio files using the NeMo ASR model.",
+    version="1.0.0",
+)
+# Your startup event remains the same...
+@app.on_event("startup")
+async def load_model():
+    """
+    Loads the NeMo ASR model into the application's state.
+    """
+    warnings.filterwarnings("ignore", ".*was not in range.*")
+    if torch.cuda.is_available():
+        device = "cuda"
+        print("GPU found. The model will run on the GPU.")
+    else:
+        device = "cpu"
+        print("Warning: No GPU found. Inference will be very slow on the CPU.")
+    try:
+        nemo_asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=MODEL_NAME)
+        nemo_asr_model.to(device)
+        nemo_asr_model.eval()
+        app.state.model = nemo_asr_model
+        print("Model loaded and ready.")
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        app.state.model = None
+# --- REVISED ENDPOINT ---
+@app.post("/tenedor")
+async def transcribe_audio(file: UploadFile = File(...)):
+    """
+    Receives an audio file, transcribes it, and returns the transcription.
+    """
+    if not app.state.model:
+        raise HTTPException(status_code=503, detail="Model is not available.")
+    temp_audio_path = f"temp_{file.filename}"
+    try:
+        # Save the uploaded file to a temporary path.
+        with open(temp_audio_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # Define the synchronous transcription function to run in a thread.
+        def do_transcribe():
+            return app.state.model.transcribe([temp_audio_path])
+        # Run the blocking transcribe function in a separate thread.
+        # 'await' will wait for the result without blocking the event loop.
+        transcriptions = await run_in_threadpool(do_transcribe)
+        # Check the result and return it.
+        if transcriptions and len(transcriptions) > 0:
+             # The result from transcribe is a list containing the transcription text.
+             # Depending on the NeMo version, the result might be a list of strings
+             # or objects. The original code checked for '.text', but often it's just a string.
+             # This is a more robust check.
+            result_text = transcriptions[0]
+            if hasattr(result_text, 'text'):
+                return result_text.text
+            return result_text
+        else:
+            raise HTTPException(status_code=400, detail="Transcription failed. The model returned no output or an empty result.")
+    except Exception as e:
+        # Log the full error to the console for debugging.
+        print(f"An error occurred during transcription: {e}")
+        traceback.print_exc() # This will print the full traceback.
+        raise HTTPException(status_code=500, detail=f"Error during transcription: {str(e)}")
+    finally:
+        # Clean up the temporary file.
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+# Your main block remains the same...
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=PORT)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[project]
+name = "alia-bilingual-speech-recognition"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = []

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+nemo-toolkit[all]
+onnx==1.18
+ml-dtypes==0.4.1
+fastapi>=0.119.0
+uvicorn>=0.38.0
+python-multipart>=0.0.20