Spaces:
Runtime error
Runtime error
| import time | |
| import torch | |
| from peft import PeftModel, PeftConfig | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM | |
| import gradio as gr | |
| import speech_recognition as sr | |
| from math import log2, pow | |
| import os | |
| #from scipy.fftpack import fft | |
| import gc | |
| peft_model_id='hackathon-somos-nlp-2023/T5unami-small-v1' | |
| config = PeftConfig.from_pretrained(peft_model_id) | |
| model2 = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, return_dict=True, | |
| # load_in_8bit=True, | |
| # load_in_8bit_fp32_cpu_offload=True, | |
| device_map='auto') | |
| tokenizer2 = AutoTokenizer.from_pretrained(peft_model_id) | |
| model2 = PeftModel.from_pretrained(model2, peft_model_id) | |
| Problema_tarjetaCredito= os.path.abspath("Problema_tarjetaCredito.ogg") | |
| list_audios= [[Problema_tarjetaCredito]] | |
| def gen_conversation(text,max_new_tokens=100): | |
| text = "<SN>instruction: " + text + "\n " | |
| batch = tokenizer2(text, return_tensors='pt') | |
| output_tokens = model2.generate(**batch, | |
| max_new_tokens=max_new_tokens, | |
| eos_token_id= tokenizer2.eos_token_id, | |
| pad_token_id= tokenizer2.pad_token_id, | |
| bos_token_id= tokenizer2.bos_token_id, | |
| early_stopping = True, | |
| no_repeat_ngram_size=2, | |
| repetition_penalty=1.2, | |
| temperature=.9, | |
| num_beams=3 | |
| ) | |
| gc.collect() | |
| return tokenizer2.decode(output_tokens[0], skip_special_tokens=True).split("\n")[-1].replace("output:","") | |
| conversacion = "" | |
| def speech_to_text(audio_file, texto_adicional): | |
| global conversacion | |
| if audio_file is not None: | |
| # Lógica para entrada de audio | |
| r = sr.Recognizer() | |
| audio_data = sr.AudioFile(audio_file) | |
| with audio_data as source: | |
| audio = r.record(source) | |
| text_enrada="" | |
| texto_generado = r.recognize_google(audio, language="es-ES") | |
| texto_generado= f"[|Audio a texto|]:{texto_generado}\n" + "<br>[AGENTE]:"+gen_conversation(texto_generado,max_new_tokens=500) | |
| texto_generado = "<div style='color: #66b3ff;'>" + texto_generado + "</div><br>" | |
| else: | |
| texto_generado= f"[|Solo texto|]:{texto_adicional}\n" + "<br>[AGENTE]:"+gen_conversation(texto_adicional,max_new_tokens=500) | |
| texto_generado = "<div style='color: #66b3ff;'> " + texto_generado + "</div><br>" | |
| conversacion += texto_generado | |
| return conversacion | |
| iface = gr.Interface( | |
| fn=speech_to_text, | |
| inputs=[gr.inputs.Audio(label="Voz", type="filepath"), gr.inputs.Textbox(label="Texto adicional")], | |
| outputs=gr.outputs.HTML(label=["chatbot","state"]), | |
| title="Chat bot para empresas.", | |
| description="Este modelo convierte la entrada de voz a texto e inferencia, texto a inferencia", | |
| examples=list_audios, | |
| theme="default", | |
| layout="vertical", | |
| allow_flagging=False, | |
| flagging_dir=None, | |
| server_name=None, | |
| server_port=None, | |
| live=False, | |
| capture_session=False | |
| ) | |
| iface.launch() | |