dnsy commited on
Commit
b293249
·
1 Parent(s): 01e9ce2

HuggingFace

Browse files
Files changed (4) hide show
  1. .python-version +1 -0
  2. main.py +94 -0
  3. pyproject.toml +7 -0
  4. requirements.txt +8 -0
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
main.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import torch
4
+ import warnings
5
+ from fastapi import FastAPI, UploadFile, File, HTTPException
6
+ import uvicorn
7
+ import nemo.collections.asr as nemo_asr
8
+ from starlette.concurrency import run_in_threadpool # Import this
9
+ import traceback # Import for better error logging
10
+
11
+ MODEL_NAME = "projecte-aina/stt_ca-es_conformer_transducer_large"
12
+ PORT = 34450
13
+
14
+ app = FastAPI(
15
+ title="Audio Transcription API",
16
+ description="An API to transcribe audio files using the NeMo ASR model.",
17
+ version="1.0.0",
18
+ )
19
+
20
+ # Your startup event remains the same...
21
+ @app.on_event("startup")
22
+ async def load_model():
23
+ """
24
+ Loads the NeMo ASR model into the application's state.
25
+ """
26
+ warnings.filterwarnings("ignore", ".*was not in range.*")
27
+
28
+ if torch.cuda.is_available():
29
+ device = "cuda"
30
+ print("GPU found. The model will run on the GPU.")
31
+ else:
32
+ device = "cpu"
33
+ print("Warning: No GPU found. Inference will be very slow on the CPU.")
34
+
35
+ try:
36
+ nemo_asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=MODEL_NAME)
37
+ nemo_asr_model.to(device)
38
+ nemo_asr_model.eval()
39
+ app.state.model = nemo_asr_model
40
+ print("Model loaded and ready.")
41
+ except Exception as e:
42
+ print(f"Error loading model: {e}")
43
+ app.state.model = None
44
+
45
+ # --- REVISED ENDPOINT ---
46
+ @app.post("/tenedor")
47
+ async def transcribe_audio(file: UploadFile = File(...)):
48
+ """
49
+ Receives an audio file, transcribes it, and returns the transcription.
50
+ """
51
+ if not app.state.model:
52
+ raise HTTPException(status_code=503, detail="Model is not available.")
53
+
54
+ temp_audio_path = f"temp_{file.filename}"
55
+ try:
56
+ # Save the uploaded file to a temporary path.
57
+ with open(temp_audio_path, "wb") as buffer:
58
+ shutil.copyfileobj(file.file, buffer)
59
+
60
+ # Define the synchronous transcription function to run in a thread.
61
+ def do_transcribe():
62
+ return app.state.model.transcribe([temp_audio_path])
63
+
64
+ # Run the blocking transcribe function in a separate thread.
65
+ # 'await' will wait for the result without blocking the event loop.
66
+ transcriptions = await run_in_threadpool(do_transcribe)
67
+
68
+ # Check the result and return it.
69
+ if transcriptions and len(transcriptions) > 0:
70
+ # The result from transcribe is a list containing the transcription text.
71
+ # Depending on the NeMo version, the result might be a list of strings
72
+ # or objects. The original code checked for '.text', but often it's just a string.
73
+ # This is a more robust check.
74
+ result_text = transcriptions[0]
75
+ if hasattr(result_text, 'text'):
76
+ return result_text.text
77
+ return result_text
78
+ else:
79
+ raise HTTPException(status_code=400, detail="Transcription failed. The model returned no output or an empty result.")
80
+
81
+ except Exception as e:
82
+ # Log the full error to the console for debugging.
83
+ print(f"An error occurred during transcription: {e}")
84
+ traceback.print_exc() # This will print the full traceback.
85
+ raise HTTPException(status_code=500, detail=f"Error during transcription: {str(e)}")
86
+
87
+ finally:
88
+ # Clean up the temporary file.
89
+ if os.path.exists(temp_audio_path):
90
+ os.remove(temp_audio_path)
91
+
92
+ # Your main block remains the same...
93
+ if __name__ == "__main__":
94
+ uvicorn.run(app, host="0.0.0.0", port=PORT)
pyproject.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "alia-bilingual-speech-recognition"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = []
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ nemo-toolkit[all]
4
+ onnx==1.18
5
+ ml-dtypes==0.4.1
6
+ fastapi>=0.119.0
7
+ uvicorn>=0.38.0
8
+ python-multipart>=0.0.20