Spaces:

HenriqueBraz
/

AiiTServices

Sleeping

App Files Files Community

AiiTServices / app.py

HenriqueBraz

Update app.py

90b048e verified 7 days ago

raw

history blame contribute delete

22.6 kB

	import streamlit as st
	from transformers import pipeline
	import torch
	from PIL import Image
	import io
	import librosa
	import numpy as np
	import logging
	import tempfile
	import os
	from streamlit.runtime.uploaded_file_manager import UploadedFile
	from diffusers import StableDiffusionPipeline

	# Configurar página
	st.set_page_config(
	page_title="Aplicação de IA Multi-Modal",
	page_icon="🤖",
	layout="wide"
	)

	# Configurar logging
	logging.basicConfig(
	filename='app_errors.log',
	level=logging.ERROR,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	# Cache para evitar recarregar modelos a cada execução
	@st.cache_resource(show_spinner=False)
	def load_models():
	"""Carrega todos os modelos com cache para melhor performance"""
	device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
	logging.info(f"Usando dispositivo: {device}")
	models = {}

	try:
	# Modelos de texto
	models['sentiment_analysis'] = pipeline(
	"sentiment-analysis",
	model="cardiffnlp/twitter-roberta-base-sentiment-latest",
	device=device
	)

	models['text_classification'] = pipeline(
	"text-classification",
	model="distilbert-base-uncased-finetuned-sst-2-english",
	device=device
	)

	models['summarization'] = pipeline(
	"summarization",
	model="facebook/bart-large-cnn",
	device=device
	)

	models['question_answering'] = pipeline(
	"question-answering",
	model="deepset/roberta-base-squad2",
	device=device
	)

	models['translation'] = pipeline(
	"translation",
	model="Helsinki-NLP/opus-mt-tc-big-en-pt",
	device=device
	)

	models['text_generation'] = pipeline(
	"text-generation",
	model="gpt2",
	device=device
	)

	models['ner'] = pipeline(
	"ner",
	model="dbmdz/bert-large-cased-finetuned-conll03-english",
	device=device,
	aggregation_strategy="simple"
	)

	# Modelos de imagem
	models['image_classification'] = pipeline(
	"image-classification",
	model="google/vit-base-patch16-224",
	device=device
	)

	models['object_detection'] = pipeline(
	"object-detection",
	model="facebook/detr-resnet-50",
	device=device
	)

	models['image_segmentation'] = pipeline(
	"image-segmentation",
	model="facebook/detr-resnet-50-panoptic",
	device=device
	)

	models['facial_recognition'] = pipeline(
	"image-classification",
	model="mo-thecreator/vit-Facial-Expression-Recognition",
	device=device
	)

	# Modelos de áudio
	models['speech_to_text'] = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-base",
	device=device
	)

	models['audio_classification'] = pipeline(
	"audio-classification",
	model="superb/hubert-base-superb-er",
	device=device
	)

	# Modelos generativos
	# --- Stable Diffusion com segurança dinâmica ---
	sd_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# Detecta se estamos em ambiente Hugging Face Spaces (produção pública)
	is_hf_spaces = "SPACE_ID" in os.environ or "HF_HOME" in os.environ

	if is_hf_spaces:
	# ✅ Ativa safety checker em produção
	safety_model_id = "runwayml/stable-diffusion-v1-5"
	safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
	feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)

	models['text_to_image'] = StableDiffusionPipeline.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	torch_dtype=sd_dtype,
	use_safetensors=True,
	safety_checker=safety_checker,
	feature_extractor=feature_extractor
	)
	else:
	# ⚡ Modo local: desativa safety checker por performance
	models['text_to_image'] = StableDiffusionPipeline.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	torch_dtype=sd_dtype,
	use_safetensors=True,
	safety_checker=None
	)

	try:
	models['text_to_image'] = models['text_to_image'].to(device)
	models['text_to_image'].enable_attention_slicing()
	except Exception:
	logging.info("Otimizações de SD não aplicadas para este dispositivo.")

	except Exception as e:
	st.error(f"Erro crítico ao carregar modelos: {str(e)}")
	logging.exception("Erro ao carregar modelos")
	return {}

	return models

	def validate_audio_file(file: UploadedFile) -> bool:
	"""Valida o arquivo de áudio"""
	valid_extensions = ['.wav', '.mp3', '.flac', '.m4a']
	if not any(file.name.lower().endswith(ext) for ext in valid_extensions):
	return False
	return True

	def validate_image_file(file: UploadedFile) -> bool:
	"""Valida o arquivo de imagem"""
	valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
	if not any(file.name.lower().endswith(ext) for ext in valid_extensions):
	return False
	try:
	Image.open(file).verify()
	# reposiciona o cursor para permitir leituras subsequentes
	try:
	file.seek(0)
	except Exception:
	pass
	return True
	except Exception:
	return False

	def process_audio_file(audio_file):
	"""Processa arquivo de áudio para o formato correto"""
	try:
	# Criar arquivo temporário para processamento
	with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file:
	tmp_file.write(audio_file.read())
	tmp_file_path = tmp_file.name

	# Carregar áudio com librosa
	audio_array, sample_rate = librosa.load(tmp_file_path, sr=16000)
	os.unlink(tmp_file_path)

	return audio_array
	except Exception as e:
	st.error(f"Erro ao processar áudio: {str(e)}")
	logging.error(f"Erro no processamento de áudio: {e}")
	return None

	def process_image_file(image_file):
	"""Processa arquivo de imagem"""
	try:
	image = Image.open(image_file)
	if image.mode != 'RGB':
	image = image.convert('RGB')
	return image
	except Exception as e:
	st.error(f"Erro ao processar imagem: {str(e)}")
	logging.error(f"Erro no processamento de imagem: {e}")
	return None

	def extract_images_from_sd_result(result):
	"""Normaliza o retorno do StableDiffusionPipeline em uma lista de PIL Images."""
	images = []
	if hasattr(result, 'images'):
	images = result.images
	elif isinstance(result, list):
	images = result
	else:
	images = [result]

	normalized_images = []
	for img in images:
	if isinstance(img, Image.Image):
	normalized_images.append(img)
	elif isinstance(img, np.ndarray):
	try:
	normalized_images.append(Image.fromarray(img))
	except Exception:
	continue
	return normalized_images

	def display_results(result, model_key):
	"""Exibe resultados formatados de acordo com o tipo de modelo"""
	if model_key == 'summarization':
	st.subheader("📝 Resumo Gerado")
	st.info(result[0]['summary_text'])

	elif model_key == 'translation':
	st.subheader("🌍 Tradução")
	st.success(result[0]['translation_text'])

	elif model_key in ['sentiment_analysis', 'text_classification']:
	st.subheader("📊 Resultados")
	for res in result:
	label = res['label']
	score = res['score']
	st.progress(float(score), text=f"{label} ({score:.2%})")

	elif model_key == 'ner':
	st.subheader("🔍 Entidades Reconhecidas")
	for entity in result:
	st.write(f"- {entity['word']}: {entity['entity_group']} (confiança: {entity['score']:.2%})")

	elif model_key == 'text_generation':
	st.subheader("🧠 Texto Gerado")
	st.write(result[0]['generated_text'])

	elif model_key == 'image_classification':
	st.subheader("🏷️ Classificação")
	for res in result[:5]:
	st.write(f"- {res['label']}: {res['score']:.2%}")

	elif model_key == 'object_detection':
	st.subheader("📦 Objetos Detectados")
	for obj in result:
	st.write(f"- {obj['label']} (confiança: {obj['score']:.2%})")

	elif model_key == 'image_segmentation':
	st.subheader("🧩 Segmentação")
	st.image(result[0]['mask'], caption="Máscara de segmentação", use_container_width=True)

	elif model_key == 'facial_recognition':
	st.subheader("😊 Reconhecimento Facial")
	top_result = result[0]
	st.write(f"Emoção predominante: {top_result['label']} (confiança: {top_result['score']:.2%})")

	elif model_key == 'speech_to_text':
	st.subheader("🔈 Transcrição")
	st.success(result['text'])

	elif model_key == 'audio_classification':
	st.subheader("🎧 Classificação de Áudio")
	top_emotion = result[0]
	st.write(f"Emoção detectada: {top_emotion['label']} (confiança: {top_emotion['score']:.2%})")

	elif model_key == 'text_to_image':
	st.subheader("🎨 Imagem Gerada")
	images = extract_images_from_sd_result(result)
	if not images:
	st.error("Nenhuma imagem foi gerada.")
	return
	if len(images) == 1:
	st.image(images[0], caption="Imagem gerada a partir do texto")
	else:
	st.image(images, caption=[f"Imagem {i+1}" for i in range(len(images))])

	def main():
	st.title("🤖 Aplicação de IA Multi-Modal Avançada")
	st.markdown("---")

	# Carregar modelos
	with st.spinner("Carregando modelos de IA... (Isso pode levar alguns minutos na primeira execução)"):
	models = load_models()

	if not models:
	st.error("Falha crítica ao carregar os modelos. Verifique os logs para mais detalhes.")
	return

	# Sidebar para seleção de modelo
	st.sidebar.title("⚙️ Configurações")
	model_categories = {
	"📝 Processamento de Texto": [
	("Análise de Sentimento", "sentiment_analysis"),
	("Classificação de Texto", "text_classification"),
	("Resumo de Texto", "summarization"),
	("Perguntas e Respostas", "question_answering"),
	("Tradução (EN→PT)", "translation"),
	("Reconhecimento de Entidades", "ner"),
	("Geração de Texto", "text_generation")
	],
	"🖼️ Processamento de Imagem": [
	("Classificação de Imagem", "image_classification"),
	("Detecção de Objetos", "object_detection"),
	("Segmentação de Imagem", "image_segmentation"),
	("Reconhecimento Facial", "facial_recognition")
	],
	"🎵 Processamento de Áudio": [
	("Transcrição de Áudio", "speech_to_text"),
	("Classificação de Emoções", "audio_classification")
	],
	"✨ Modelos Generativos": [
	("Texto para Imagem", "text_to_image")
	]
	}

	selected_category = st.sidebar.selectbox(
	"Categoria",
	list(model_categories.keys()),
	index=0
	)

	selected_model = st.sidebar.selectbox(
	"Modelo",
	[name for name, key in model_categories[selected_category]],
	format_func=lambda x: x,
	index=0
	)

	# Obter chave do modelo selecionado
	model_key = next(key for name, key in model_categories[selected_category] if name == selected_model)

	# Interface principal
	st.header(f"{selected_model}")

	# Accordion para informações do modelo
	with st.expander("ℹ️ Sobre este modelo"):
	model_info = {
	'sentiment_analysis': "Analisa o sentimento expresso em um texto (positivo/negativo/neutro)",
	'text_classification': "Classifica textos em categorias pré-definidas",
	'summarization': "Gera um resumo conciso de um texto longo",
	'question_answering': "Responde perguntas baseadas em um contexto fornecido",
	'translation': "Traduz texto de inglês para português",
	'ner': "Identifica e classifica entidades nomeadas (pessoas, lugares, organizações)",
	'text_generation': "Gera texto criativo continuando a partir de um prompt",
	'image_classification': "Identifica objetos e cenas em imagens",
	'object_detection': "Detecta e localiza múltiplos objetos em uma imagem",
	'image_segmentation': "Segmenta diferentes elementos em uma imagem",
	'facial_recognition': "Reconhece características faciais e emoções",
	'speech_to_text': "Transcreve fala em texto",
	'audio_classification': "Classifica emoções em arquivos de áudio",
	'text_to_image': "Gera imagens a partir de descrições textuais"
	}
	st.info(model_info.get(model_key, "Informações detalhadas sobre este modelo."))

	# Processamento baseado no tipo de modelo
	try:
	if model_key in ['sentiment_analysis', 'text_classification', 'summarization',
	'translation', 'text_generation', 'ner']:
	handle_text_models(models, model_key, selected_model)

	elif model_key == 'question_answering':
	handle_qa_model(models, model_key)

	elif model_key in ['image_classification', 'object_detection',
	'image_segmentation', 'facial_recognition']:
	handle_image_models(models, model_key, selected_model)

	elif model_key in ['speech_to_text', 'audio_classification']:
	handle_audio_models(models, model_key)

	elif model_key == 'text_to_image':
	handle_generative_models(models, model_key)

	except Exception as e:
	st.error(f"Erro inesperado durante a execução: {str(e)}")
	logging.exception("Erro durante a execução do modelo")

	def handle_text_models(models, model_key, model_name):
	"""Manipula modelos de texto"""
	input_text = st.text_area(
	f"Digite o texto para {model_name.lower()}:",
	height=200,
	placeholder="Cole ou digite seu texto aqui...",
	key=f"text_input_{model_key}"
	)

	# Parâmetros adicionais para alguns modelos
	advanced_params = {}
	if model_key == 'summarization':
	with st.expander("⚙️ Parâmetros Avançados"):
	advanced_params['max_length'] = st.slider("Comprimento máximo", 50, 300, 150)
	advanced_params['min_length'] = st.slider("Comprimento mínimo", 10, 100, 30)

	if model_key == 'text_generation':
	with st.expander("⚙️ Parâmetros Avançados"):
	advanced_params['max_length'] = st.slider("Comprimento do texto", 50, 500, 100)
	advanced_params['temperature'] = st.slider("Criatividade", 0.1, 1.0, 0.7)
	advanced_params['num_return_sequences'] = st.slider("Número de resultados", 1, 5, 1)

	if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_{model_key}"):
	if input_text.strip():
	with st.spinner("Processando..."):
	try:
	if model_key == 'ner':
	result = models[model_key](input_text)
	else:
	result = models[model_key](input_text, **advanced_params)

	display_results(result, model_key)

	except Exception as e:
	st.error(f"Erro ao processar texto: {str(e)}")
	logging.error(f"Erro no modelo {model_key}: {e}")
	else:
	st.warning("⚠️ Por favor, insira um texto válido.")

	def handle_qa_model(models, model_key):
	"""Manipula modelo de Q&A"""
	col1, col2 = st.columns(2)

	with col1:
	context = st.text_area(
	"Contexto:",
	height=200,
	placeholder="Cole o texto que contém a informação...",
	key="qa_context"
	)

	with col2:
	question = st.text_area(
	"Pergunta:",
	height=150,
	placeholder="Faça sua pergunta sobre o contexto...",
	key="qa_question"
	)

	with st.expander("⚙️ Parâmetros Avançados"):
	confidence_threshold = st.slider("Limite de confiança", 0.0, 1.0, 0.5, 0.01)

	if st.button("🚀 Executar Pergunta e Resposta", type="primary", key="btn_qa"):
	if context.strip() and question.strip():
	with st.spinner("Buscando resposta..."):
	try:
	result = models[model_key](question=question, context=context)

	if result['score'] < confidence_threshold:
	st.warning(f"⚠️ Confiança baixa na resposta ({result['score']:.2%})")

	st.success("🔍 Resposta encontrada:")
	st.markdown(f"Resposta: {result['answer']}")
	st.markdown(f"Confiança: {result['score']:.2%}")

	except Exception as e:
	st.error(f"Erro ao processar Q&A: {str(e)}")
	logging.error(f"Erro no modelo Q&A: {e}")
	else:
	st.warning("⚠️ Por favor, forneça tanto o contexto quanto a pergunta.")

	def handle_image_models(models, model_key, model_name):
	"""Manipula modelos de imagem"""
	uploaded_file = st.file_uploader(
	"Carregue uma imagem",
	type=["jpg", "png", "jpeg", "bmp"],
	help="Formatos suportados: JPG, PNG, JPEG, BMP",
	key=f"img_upload_{model_key}"
	)

	if uploaded_file is not None:
	if not validate_image_file(uploaded_file):
	st.error("⚠️ Formato de arquivo inválido ou arquivo corrompido.")
	return

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🖼️ Imagem Original")
	image = process_image_file(uploaded_file)
	if image:
	st.image(image, use_container_width=True)

	with col2:
	st.subheader("📊 Resultados")
	if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_img_{model_key}"):
	if image:
	with st.spinner("Analisando imagem..."):
	try:
	result = models[model_key](image)
	display_results(result, model_key)

	except Exception as e:
	st.error(f"Erro ao processar imagem: {str(e)}")
	logging.error(f"Erro no modelo {model_key}: {e}")

	def handle_audio_models(models, model_key):
	"""Manipula modelos de áudio"""
	model_name = "Transcrição de Áudio" if model_key == 'speech_to_text' else "Classificação de Áudio"

	uploaded_file = st.file_uploader(
	f"Carregue um arquivo de áudio para {model_name}",
	type=["wav", "mp3", "flac", "m4a"],
	help="Formatos suportados: WAV, MP3, FLAC, M4A",
	key=f"audio_upload_{model_key}"
	)

	if uploaded_file is not None:
	if not validate_audio_file(uploaded_file):
	st.error("⚠️ Formato de arquivo inválido ou não suportado.")
	return

	st.audio(uploaded_file)

	if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_audio_{model_key}"):
	with st.spinner("Processando áudio..."):
	try:
	audio_array = process_audio_file(uploaded_file)

	if audio_array is not None:
	result = models[model_key]({"array": audio_array, "sampling_rate": 16000})
	display_results(result, model_key)
	else:
	st.error("Não foi possível processar o arquivo de áudio.")

	except Exception as e:
	st.error(f"Erro ao processar áudio: {str(e)}")
	logging.error(f"Erro no modelo {model_key}: {e}")

	def handle_generative_models(models, model_key):
	"""Manipula modelos generativos"""
	prompt = st.text_area(
	"Descrição da imagem:",
	height=150,
	placeholder="Descreva a imagem que deseja gerar...",
	key="text_to_image_prompt"
	)

	with st.expander("⚙️ Parâmetros Avançados"):
	cols = st.columns(2)
	with cols[0]:
	width = st.slider("Largura", 256, 1024, 512, 64)
	with cols[1]:
	height = st.slider("Altura", 256, 1024, 512, 64)
	num_images = st.slider("Número de imagens", 1, 4, 1)
	guidance_scale = st.slider("Escala de orientação", 1.0, 20.0, 7.5)

	if st.button("🚀 Gerar Imagem", type="primary", key="btn_text_to_image"):
	if prompt.strip():
	with st.spinner("Criando imagem..."):
	try:
	result = models[model_key](
	prompt,
	height=height,
	width=width,
	num_images_per_prompt=num_images,
	guidance_scale=guidance_scale
	)
	display_results(result, model_key)

	except Exception as e:
	st.error(f"Erro ao gerar imagem: {str(e)}")
	logging.error(f"Erro no modelo text-to-image: {e}")
	else:
	st.warning("⚠️ Por favor, insira uma descrição para a imagem.")

	if __name__ == "__main__":
	main()