import streamlit as st
from transformers import pipeline
import torch
from PIL import Image
import io
import librosa
import numpy as np
import logging
import tempfile
import os
from streamlit.runtime.uploaded_file_manager import UploadedFile
from diffusers import StableDiffusionPipeline

# Configurar página
st.set_page_config(
    page_title="Aplicação de IA Multi-Modal",
    page_icon="🤖",
    layout="wide"
)

# Configurar logging
logging.basicConfig(
    filename='app_errors.log',
    level=logging.ERROR,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Cache para evitar recarregar modelos a cada execução
@st.cache_resource(show_spinner=False)
def load_models():
    """Carrega todos os modelos com cache para melhor performance"""
    device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
    logging.info(f"Usando dispositivo: {device}")
    models = {}
    
    try:
        # Modelos de texto
        models['sentiment_analysis'] = pipeline(
            "sentiment-analysis", 
            model="cardiffnlp/twitter-roberta-base-sentiment-latest",
            device=device
        )
        
        models['text_classification'] = pipeline(
            "text-classification", 
            model="distilbert-base-uncased-finetuned-sst-2-english",
            device=device
        )
        
        models['summarization'] = pipeline(
            "summarization", 
            model="facebook/bart-large-cnn",
            device=device
        )
        
        models['question_answering'] = pipeline(
            "question-answering", 
            model="deepset/roberta-base-squad2",
            device=device
        )
        
        models['translation'] = pipeline(
            "translation", 
            model="Helsinki-NLP/opus-mt-tc-big-en-pt",
            device=device
        )
        
        models['text_generation'] = pipeline(
            "text-generation",
            model="gpt2",
            device=device
        )
        
        models['ner'] = pipeline(
            "ner",
            model="dbmdz/bert-large-cased-finetuned-conll03-english",
            device=device,
            aggregation_strategy="simple"
        )
        
        # Modelos de imagem
        models['image_classification'] = pipeline(
            "image-classification",
            model="google/vit-base-patch16-224",
            device=device
        )
        
        models['object_detection'] = pipeline(
            "object-detection",
            model="facebook/detr-resnet-50",
            device=device
        )
        
        models['image_segmentation'] = pipeline(
            "image-segmentation",
            model="facebook/detr-resnet-50-panoptic",
            device=device
        )
        
        models['facial_recognition'] = pipeline(
            "image-classification",
            model="mo-thecreator/vit-Facial-Expression-Recognition",
            device=device
        )
        
        # Modelos de áudio
        models['speech_to_text'] = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-base",
            device=device
        )
        
        models['audio_classification'] = pipeline(
            "audio-classification",
            model="superb/hubert-base-superb-er",
            device=device
        )
        
        # Modelos generativos
       # --- Stable Diffusion com segurança dinâmica ---
        sd_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        
        # Detecta se estamos em ambiente Hugging Face Spaces (produção pública)
        is_hf_spaces = "SPACE_ID" in os.environ or "HF_HOME" in os.environ

        if is_hf_spaces:
            # ✅ Ativa safety checker em produção
            safety_model_id = "runwayml/stable-diffusion-v1-5"
            safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
            feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)

            models['text_to_image'] = StableDiffusionPipeline.from_pretrained(
                "runwayml/stable-diffusion-v1-5",
                torch_dtype=sd_dtype,
                use_safetensors=True,
                safety_checker=safety_checker,
                feature_extractor=feature_extractor
            )
        else:
            # ⚡ Modo local: desativa safety checker por performance
            models['text_to_image'] = StableDiffusionPipeline.from_pretrained(
                "runwayml/stable-diffusion-v1-5",
                torch_dtype=sd_dtype,
                use_safetensors=True,
                safety_checker=None
            )

        try:
            models['text_to_image'] = models['text_to_image'].to(device)
            models['text_to_image'].enable_attention_slicing()
        except Exception:
            logging.info("Otimizações de SD não aplicadas para este dispositivo.")

    except Exception as e:
        st.error(f"Erro crítico ao carregar modelos: {str(e)}")
        logging.exception("Erro ao carregar modelos")
        return {}

    return models

def validate_audio_file(file: UploadedFile) -> bool:
    """Valida o arquivo de áudio"""
    valid_extensions = ['.wav', '.mp3', '.flac', '.m4a']
    if not any(file.name.lower().endswith(ext) for ext in valid_extensions):
        return False
    return True

def validate_image_file(file: UploadedFile) -> bool:
    """Valida o arquivo de imagem"""
    valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
    if not any(file.name.lower().endswith(ext) for ext in valid_extensions):
        return False
    try:
        Image.open(file).verify()
        # reposiciona o cursor para permitir leituras subsequentes
        try:
            file.seek(0)
        except Exception:
            pass
        return True
    except Exception:
        return False

def process_audio_file(audio_file):
    """Processa arquivo de áudio para o formato correto"""
    try:
        # Criar arquivo temporário para processamento
        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file:
            tmp_file.write(audio_file.read())
            tmp_file_path = tmp_file.name
        
        # Carregar áudio com librosa
        audio_array, sample_rate = librosa.load(tmp_file_path, sr=16000)
        os.unlink(tmp_file_path)
        
        return audio_array
    except Exception as e:
        st.error(f"Erro ao processar áudio: {str(e)}")
        logging.error(f"Erro no processamento de áudio: {e}")
        return None

def process_image_file(image_file):
    """Processa arquivo de imagem"""
    try:
        image = Image.open(image_file)
        if image.mode != 'RGB':
            image = image.convert('RGB')
        return image
    except Exception as e:
        st.error(f"Erro ao processar imagem: {str(e)}")
        logging.error(f"Erro no processamento de imagem: {e}")
        return None

def extract_images_from_sd_result(result):
    """Normaliza o retorno do StableDiffusionPipeline em uma lista de PIL Images."""
    images = []
    if hasattr(result, 'images'):
        images = result.images
    elif isinstance(result, list):
        images = result
    else:
        images = [result]

    normalized_images = []
    for img in images:
        if isinstance(img, Image.Image):
            normalized_images.append(img)
        elif isinstance(img, np.ndarray):
            try:
                normalized_images.append(Image.fromarray(img))
            except Exception:
                continue
    return normalized_images

def display_results(result, model_key):
    """Exibe resultados formatados de acordo com o tipo de modelo"""
    if model_key == 'summarization':
        st.subheader("📝 Resumo Gerado")
        st.info(result[0]['summary_text'])
    
    elif model_key == 'translation':
        st.subheader("🌍 Tradução")
        st.success(result[0]['translation_text'])
    
    elif model_key in ['sentiment_analysis', 'text_classification']:
        st.subheader("📊 Resultados")
        for res in result:
            label = res['label']
            score = res['score']
            st.progress(float(score), text=f"{label} ({score:.2%})")
    
    elif model_key == 'ner':
        st.subheader("🔍 Entidades Reconhecidas")
        for entity in result:
            st.write(f"- **{entity['word']}**: {entity['entity_group']} (confiança: {entity['score']:.2%})")
    
    elif model_key == 'text_generation':
        st.subheader("🧠 Texto Gerado")
        st.write(result[0]['generated_text'])
    
    elif model_key == 'image_classification':
        st.subheader("🏷️ Classificação")
        for res in result[:5]:
            st.write(f"- **{res['label']}**: {res['score']:.2%}")
    
    elif model_key == 'object_detection':
        st.subheader("📦 Objetos Detectados")
        for obj in result:
            st.write(f"- {obj['label']} (confiança: {obj['score']:.2%})")
    
    elif model_key == 'image_segmentation':
        st.subheader("🧩 Segmentação")
        st.image(result[0]['mask'], caption="Máscara de segmentação", use_container_width=True)
    
    elif model_key == 'facial_recognition':
        st.subheader("😊 Reconhecimento Facial")
        top_result = result[0]
        st.write(f"**Emoção predominante**: {top_result['label']} (confiança: {top_result['score']:.2%})")
    
    elif model_key == 'speech_to_text':
        st.subheader("🔈 Transcrição")
        st.success(result['text'])
    
    elif model_key == 'audio_classification':
        st.subheader("🎧 Classificação de Áudio")
        top_emotion = result[0]
        st.write(f"**Emoção detectada**: {top_emotion['label']} (confiança: {top_emotion['score']:.2%})")
    
    elif model_key == 'text_to_image':
        st.subheader("🎨 Imagem Gerada")
        images = extract_images_from_sd_result(result)
        if not images:
            st.error("Nenhuma imagem foi gerada.")
            return
        if len(images) == 1:
            st.image(images[0], caption="Imagem gerada a partir do texto")
        else:
            st.image(images, caption=[f"Imagem {i+1}" for i in range(len(images))])

def main():
    st.title("🤖 Aplicação de IA Multi-Modal Avançada")
    st.markdown("---")
    
    # Carregar modelos
    with st.spinner("Carregando modelos de IA... (Isso pode levar alguns minutos na primeira execução)"):
        models = load_models()
    
    if not models:
        st.error("Falha crítica ao carregar os modelos. Verifique os logs para mais detalhes.")
        return
    
    # Sidebar para seleção de modelo
    st.sidebar.title("⚙️ Configurações")
    model_categories = {
        "📝 Processamento de Texto": [
            ("Análise de Sentimento", "sentiment_analysis"),
            ("Classificação de Texto", "text_classification"),
            ("Resumo de Texto", "summarization"),
            ("Perguntas e Respostas", "question_answering"),
            ("Tradução (EN→PT)", "translation"),
            ("Reconhecimento de Entidades", "ner"),
            ("Geração de Texto", "text_generation")
        ],
        "🖼️ Processamento de Imagem": [
            ("Classificação de Imagem", "image_classification"),
            ("Detecção de Objetos", "object_detection"),
            ("Segmentação de Imagem", "image_segmentation"),
            ("Reconhecimento Facial", "facial_recognition")
        ],
        "🎵 Processamento de Áudio": [
            ("Transcrição de Áudio", "speech_to_text"),
            ("Classificação de Emoções", "audio_classification")
        ],
        "✨ Modelos Generativos": [
            ("Texto para Imagem", "text_to_image")
        ]
    }
    
    selected_category = st.sidebar.selectbox(
        "Categoria",
        list(model_categories.keys()),
        index=0
    )
    
    selected_model = st.sidebar.selectbox(
        "Modelo", 
        [name for name, key in model_categories[selected_category]],
        format_func=lambda x: x,
        index=0
    )
    
    # Obter chave do modelo selecionado
    model_key = next(key for name, key in model_categories[selected_category] if name == selected_model)
    
    # Interface principal
    st.header(f"{selected_model}")
    
    # Accordion para informações do modelo
    with st.expander("ℹ️ Sobre este modelo"):
        model_info = {
            'sentiment_analysis': "Analisa o sentimento expresso em um texto (positivo/negativo/neutro)",
            'text_classification': "Classifica textos em categorias pré-definidas",
            'summarization': "Gera um resumo conciso de um texto longo",
            'question_answering': "Responde perguntas baseadas em um contexto fornecido",
            'translation': "Traduz texto de inglês para português",
            'ner': "Identifica e classifica entidades nomeadas (pessoas, lugares, organizações)",
            'text_generation': "Gera texto criativo continuando a partir de um prompt",
            'image_classification': "Identifica objetos e cenas em imagens",
            'object_detection': "Detecta e localiza múltiplos objetos em uma imagem",
            'image_segmentation': "Segmenta diferentes elementos em uma imagem",
            'facial_recognition': "Reconhece características faciais e emoções",
            'speech_to_text': "Transcreve fala em texto",
            'audio_classification': "Classifica emoções em arquivos de áudio",
            'text_to_image': "Gera imagens a partir de descrições textuais"
        }
        st.info(model_info.get(model_key, "Informações detalhadas sobre este modelo."))
    
    # Processamento baseado no tipo de modelo
    try:
        if model_key in ['sentiment_analysis', 'text_classification', 'summarization', 
                        'translation', 'text_generation', 'ner']:
            handle_text_models(models, model_key, selected_model)
        
        elif model_key == 'question_answering':
            handle_qa_model(models, model_key)
        
        elif model_key in ['image_classification', 'object_detection', 
                          'image_segmentation', 'facial_recognition']:
            handle_image_models(models, model_key, selected_model)
        
        elif model_key in ['speech_to_text', 'audio_classification']:
            handle_audio_models(models, model_key)
        
        elif model_key == 'text_to_image':
            handle_generative_models(models, model_key)
    
    except Exception as e:
        st.error(f"Erro inesperado durante a execução: {str(e)}")
        logging.exception("Erro durante a execução do modelo")

def handle_text_models(models, model_key, model_name):
    """Manipula modelos de texto"""
    input_text = st.text_area(
        f"Digite o texto para {model_name.lower()}:", 
        height=200,
        placeholder="Cole ou digite seu texto aqui...",
        key=f"text_input_{model_key}"
    )
    
    # Parâmetros adicionais para alguns modelos
    advanced_params = {}
    if model_key == 'summarization':
        with st.expander("⚙️ Parâmetros Avançados"):
            advanced_params['max_length'] = st.slider("Comprimento máximo", 50, 300, 150)
            advanced_params['min_length'] = st.slider("Comprimento mínimo", 10, 100, 30)
    
    if model_key == 'text_generation':
        with st.expander("⚙️ Parâmetros Avançados"):
            advanced_params['max_length'] = st.slider("Comprimento do texto", 50, 500, 100)
            advanced_params['temperature'] = st.slider("Criatividade", 0.1, 1.0, 0.7)
            advanced_params['num_return_sequences'] = st.slider("Número de resultados", 1, 5, 1)
    
    if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_{model_key}"):
        if input_text.strip():
            with st.spinner("Processando..."):
                try:
                    if model_key == 'ner':
                        result = models[model_key](input_text)
                    else:
                        result = models[model_key](input_text, **advanced_params)
                    
                    display_results(result, model_key)
                    
                except Exception as e:
                    st.error(f"Erro ao processar texto: {str(e)}")
                    logging.error(f"Erro no modelo {model_key}: {e}")
        else:
            st.warning("⚠️ Por favor, insira um texto válido.")

def handle_qa_model(models, model_key):
    """Manipula modelo de Q&A"""
    col1, col2 = st.columns(2)
    
    with col1:
        context = st.text_area(
            "Contexto:", 
            height=200,
            placeholder="Cole o texto que contém a informação...",
            key="qa_context"
        )
    
    with col2:
        question = st.text_area(
            "Pergunta:", 
            height=150,
            placeholder="Faça sua pergunta sobre o contexto...",
            key="qa_question"
        )
    
    with st.expander("⚙️ Parâmetros Avançados"):
        confidence_threshold = st.slider("Limite de confiança", 0.0, 1.0, 0.5, 0.01)
    
    if st.button("🚀 Executar Pergunta e Resposta", type="primary", key="btn_qa"):
        if context.strip() and question.strip():
            with st.spinner("Buscando resposta..."):
                try:
                    result = models[model_key](question=question, context=context)
                    
                    if result['score'] < confidence_threshold:
                        st.warning(f"⚠️ Confiança baixa na resposta ({result['score']:.2%})")
                    
                    st.success("🔍 Resposta encontrada:")
                    st.markdown(f"**Resposta:** {result['answer']}")
                    st.markdown(f"**Confiança:** {result['score']:.2%}")
                    
                except Exception as e:
                    st.error(f"Erro ao processar Q&A: {str(e)}")
                    logging.error(f"Erro no modelo Q&A: {e}")
        else:
            st.warning("⚠️ Por favor, forneça tanto o contexto quanto a pergunta.")

def handle_image_models(models, model_key, model_name):
    """Manipula modelos de imagem"""
    uploaded_file = st.file_uploader(
        "Carregue uma imagem", 
        type=["jpg", "png", "jpeg", "bmp"],
        help="Formatos suportados: JPG, PNG, JPEG, BMP",
        key=f"img_upload_{model_key}"
    )
    
    if uploaded_file is not None:
        if not validate_image_file(uploaded_file):
            st.error("⚠️ Formato de arquivo inválido ou arquivo corrompido.")
            return
            
        col1, col2 = st.columns(2)
        
        with col1:
            st.subheader("🖼️ Imagem Original")
            image = process_image_file(uploaded_file)
            if image:
                st.image(image, use_container_width=True)
        
        with col2:
            st.subheader("📊 Resultados")
            if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_img_{model_key}"):
                if image:
                    with st.spinner("Analisando imagem..."):
                        try:
                            result = models[model_key](image)
                            display_results(result, model_key)
                                
                        except Exception as e:
                            st.error(f"Erro ao processar imagem: {str(e)}")
                            logging.error(f"Erro no modelo {model_key}: {e}")

def handle_audio_models(models, model_key):
    """Manipula modelos de áudio"""
    model_name = "Transcrição de Áudio" if model_key == 'speech_to_text' else "Classificação de Áudio"
    
    uploaded_file = st.file_uploader(
        f"Carregue um arquivo de áudio para {model_name}", 
        type=["wav", "mp3", "flac", "m4a"],
        help="Formatos suportados: WAV, MP3, FLAC, M4A",
        key=f"audio_upload_{model_key}"
    )
    
    if uploaded_file is not None:
        if not validate_audio_file(uploaded_file):
            st.error("⚠️ Formato de arquivo inválido ou não suportado.")
            return
            
        st.audio(uploaded_file)
        
        if st.button(f"🚀 Executar {model_name}", type="primary", key=f"btn_audio_{model_key}"):
            with st.spinner("Processando áudio..."):
                try:
                    audio_array = process_audio_file(uploaded_file)
                    
                    if audio_array is not None:
                        result = models[model_key]({"array": audio_array, "sampling_rate": 16000})
                        display_results(result, model_key)
                    else:
                        st.error("Não foi possível processar o arquivo de áudio.")
                        
                except Exception as e:
                    st.error(f"Erro ao processar áudio: {str(e)}")
                    logging.error(f"Erro no modelo {model_key}: {e}")

def handle_generative_models(models, model_key):
    """Manipula modelos generativos"""
    prompt = st.text_area(
        "Descrição da imagem:",
        height=150,
        placeholder="Descreva a imagem que deseja gerar...",
        key="text_to_image_prompt"
    )
    
    with st.expander("⚙️ Parâmetros Avançados"):
        cols = st.columns(2)
        with cols[0]:
            width = st.slider("Largura", 256, 1024, 512, 64)
        with cols[1]:
            height = st.slider("Altura", 256, 1024, 512, 64)
        num_images = st.slider("Número de imagens", 1, 4, 1)
        guidance_scale = st.slider("Escala de orientação", 1.0, 20.0, 7.5)
    
    if st.button("🚀 Gerar Imagem", type="primary", key="btn_text_to_image"):
        if prompt.strip():
            with st.spinner("Criando imagem..."):
                try:
                    result = models[model_key](
                        prompt,
                        height=height,
                        width=width,
                        num_images_per_prompt=num_images,
                        guidance_scale=guidance_scale
                    )
                    display_results(result, model_key)
                    
                except Exception as e:
                    st.error(f"Erro ao gerar imagem: {str(e)}")
                    logging.error(f"Erro no modelo text-to-image: {e}")
        else:
            st.warning("⚠️ Por favor, insira uma descrição para a imagem.")

if __name__ == "__main__":
    main()