| import gradio as gr | |
| import os | |
| from SenseVoiceAx import SenseVoiceAx | |
| from print_utils import rich_transcription_postprocess | |
| max_len = 256 | |
| model_path = os.path.join("sensevoice_ax650", "sensevoice.axmodel") | |
| assert os.path.exists(model_path), f"model {model_path} not exist" | |
| pipeline = SenseVoiceAx( | |
| model_path, | |
| max_len=max_len, | |
| beam_size=3, | |
| language="auto", | |
| hot_words=None, | |
| use_itn=True, | |
| streaming=False, | |
| ) | |
| def speech_to_text(audio_path, lang): | |
| """ | |
| audio_path: 音频文件路径 | |
| lang: 语言类型 "auto", "zh", "en", "yue", "ja", "ko" | |
| """ | |
| if not audio_path: | |
| return "无音频" | |
| pipeline.choose_language(language=lang) | |
| asr_res = pipeline.infer(audio_path, print_rtf=False) | |
| return asr_res | |
| def main(): | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="识别结果", lines=5) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["upload"], type="filepath", label="录制或上传音频", format="mp3" | |
| ) | |
| lang_dropdown = gr.Dropdown( | |
| choices=["auto", "zh", "en", "yue", "ja", "ko"], | |
| value="auto", | |
| label="选择音频语言", | |
| ) | |
| audio_input.change( | |
| fn=speech_to_text, inputs=[audio_input, lang_dropdown], outputs=output_text | |
| ) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| ) | |
| if __name__ == "__main__": | |
| main() | |