Upload 2 files
Browse files- app.py +250 -0
 - enhanced_search_v2.py +471 -0
 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,250 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # app.py (Versão FINAL - Corrigido para erro 401 com renomeação de Secret)
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            import pandas as pd
         
     | 
| 4 | 
         
            +
            from flask import Flask, render_template, request, jsonify
         
     | 
| 5 | 
         
            +
            import os
         
     | 
| 6 | 
         
            +
            import sys
         
     | 
| 7 | 
         
            +
            import traceback
         
     | 
| 8 | 
         
            +
            import subprocess
         
     | 
| 9 | 
         
            +
            from sentence_transformers import SentenceTransformer
         
     | 
| 10 | 
         
            +
            import csv
         
     | 
| 11 | 
         
            +
            from collections import defaultdict
         
     | 
| 12 | 
         
            +
            import datetime
         
     | 
| 13 | 
         
            +
            import re
         
     | 
| 14 | 
         
            +
            from huggingface_hub import InferenceClient
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            # --- Variáveis e Constantes de Feedback ---
         
     | 
| 17 | 
         
            +
            USER_FEEDBACK_FILE = 'user_feedback.csv'
         
     | 
| 18 | 
         
            +
            USER_BEST_MATCHES_COUNTS = {}
         
     | 
| 19 | 
         
            +
            USER_FEEDBACK_THRESHOLD = 3
         
     | 
| 20 | 
         
            +
            FEEDBACK_CSV_COLUMNS = ['timestamp', 'query_original', 'query_normalized', 'tuss_code_submitted', 'tuss_code_raw_input', 'tuss_description_associated', 'rol_names_associated', 'feedback_type']
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            # --- INÍCIO DA CORREÇÃO: Lendo o Secret com o nome correto ---
         
     | 
| 23 | 
         
            +
            # Buscamos a chave do Secret que você renomeou para 'NOVITA_API_KEY'
         
     | 
| 24 | 
         
            +
            api_key = os.environ.get("NOVITA_API_KEY")
         
     | 
| 25 | 
         
            +
            if not api_key:
         
     | 
| 26 | 
         
            +
                print("--- [AVISO CRÍTICO] Secret 'NOVITA_API_KEY' não encontrado. As chamadas para a IA irão falhar. ---")
         
     | 
| 27 | 
         
            +
                client_ia = None
         
     | 
| 28 | 
         
            +
            else:
         
     | 
| 29 | 
         
            +
                client_ia = InferenceClient(
         
     | 
| 30 | 
         
            +
                    provider="novita",
         
     | 
| 31 | 
         
            +
                    api_key=api_key,
         
     | 
| 32 | 
         
            +
                )
         
     | 
| 33 | 
         
            +
                print("--- [SUCESSO] Cliente de Inferência da IA configurado com a chave correta. ---")
         
     | 
| 34 | 
         
            +
            # --- FIM DA CORREÇÃO ---
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            # --- Funções de Feedback (sem alterações) ---
         
     | 
| 38 | 
         
            +
            def normalize_text_for_feedback(text):
         
     | 
| 39 | 
         
            +
                if pd.isna(text): return ""
         
     | 
| 40 | 
         
            +
                from enhanced_search_v2 import normalize_text as es_normalize_text
         
     | 
| 41 | 
         
            +
                return es_normalize_text(str(text).strip())
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            def load_user_feedback():
         
     | 
| 44 | 
         
            +
                global USER_BEST_MATCHES_COUNTS; USER_BEST_MATCHES_COUNTS = {}
         
     | 
| 45 | 
         
            +
                feedback_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), USER_FEEDBACK_FILE)
         
     | 
| 46 | 
         
            +
                if not os.path.exists(feedback_file_path):
         
     | 
| 47 | 
         
            +
                    with open(feedback_file_path, 'w', newline='', encoding='utf-8') as f: csv.writer(f).writerow(FEEDBACK_CSV_COLUMNS)
         
     | 
| 48 | 
         
            +
                    return
         
     | 
| 49 | 
         
            +
                try:
         
     | 
| 50 | 
         
            +
                    with open(feedback_file_path, 'r', encoding='utf-8') as f:
         
     | 
| 51 | 
         
            +
                        reader = csv.reader(f)
         
     | 
| 52 | 
         
            +
                        header = next(reader)
         
     | 
| 53 | 
         
            +
                        if [col.strip() for col in header] != FEEDBACK_CSV_COLUMNS: raise ValueError("Cabeçalho inválido")
         
     | 
| 54 | 
         
            +
                        for row in reader:
         
     | 
| 55 | 
         
            +
                            if len(row) == len(FEEDBACK_CSV_COLUMNS):
         
     | 
| 56 | 
         
            +
                                row_dict = dict(zip(FEEDBACK_CSV_COLUMNS, row))
         
     | 
| 57 | 
         
            +
                                query_norm, tuss_code = row_dict.get('query_normalized', ''), row_dict.get('tuss_code_submitted', '')
         
     | 
| 58 | 
         
            +
                                if query_norm and tuss_code:
         
     | 
| 59 | 
         
            +
                                    if query_norm not in USER_BEST_MATCHES_COUNTS: USER_BEST_MATCHES_COUNTS[query_norm] = {}
         
     | 
| 60 | 
         
            +
                                    USER_BEST_MATCHES_COUNTS[query_norm][tuss_code] = USER_BEST_MATCHES_COUNTS[query_norm].get(tuss_code, 0) + 1
         
     | 
| 61 | 
         
            +
                    print(f"--- [SUCESSO] Feedback de usuário carregado. ---")
         
     | 
| 62 | 
         
            +
                except Exception as e: print(f"--- [ERRO] Falha ao carregar feedback: {e} ---"); traceback.print_exc()
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            def append_user_feedback(query_original, tuss_code_submitted, feedback_type, tuss_code_raw_input=''):
         
     | 
| 65 | 
         
            +
                feedback_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), USER_FEEDBACK_FILE)
         
     | 
| 66 | 
         
            +
                query_normalized = normalize_text_for_feedback(query_original)
         
     | 
| 67 | 
         
            +
                tuss_descriptions, rol_names = [], []
         
     | 
| 68 | 
         
            +
                if DF_ORIGINAL is not None and not DF_ORIGINAL.empty:
         
     | 
| 69 | 
         
            +
                    matching_rows = DF_ORIGINAL[DF_ORIGINAL['Codigo_TUSS'].astype(str) == tuss_code_submitted]
         
     | 
| 70 | 
         
            +
                    if not matching_rows.empty:
         
     | 
| 71 | 
         
            +
                        tuss_descriptions = matching_rows['Descricao_TUSS'].unique().tolist()
         
     | 
| 72 | 
         
            +
                        rol_names = matching_rows['Procedimento_Rol'].unique().tolist()
         
     | 
| 73 | 
         
            +
                tuss_desc_assoc = " | ".join(filter(None, tuss_descriptions)) or 'Não encontrado'
         
     | 
| 74 | 
         
            +
                rol_names_assoc = " | ".join(filter(None, rol_names)) or 'Não encontrado'
         
     | 
| 75 | 
         
            +
                try:
         
     | 
| 76 | 
         
            +
                    with open(feedback_file_path, 'a', newline='', encoding='utf-8') as f:
         
     | 
| 77 | 
         
            +
                        csv.writer(f).writerow([datetime.datetime.now().isoformat(), query_original, query_normalized, tuss_code_submitted, tuss_code_raw_input, tuss_desc_assoc, rol_names_assoc, feedback_type])
         
     | 
| 78 | 
         
            +
                    global USER_BEST_MATCHES_COUNTS
         
     | 
| 79 | 
         
            +
                    if query_normalized not in USER_BEST_MATCHES_COUNTS: USER_BEST_MATCHES_COUNTS[query_normalized] = {}
         
     | 
| 80 | 
         
            +
                    USER_BEST_MATCHES_COUNTS[query_normalized][tuss_code_submitted] = USER_BEST_MATCHES_COUNTS[query_normalized].get(tuss_code_submitted, 0) + 1
         
     | 
| 81 | 
         
            +
                except Exception as e: print(f"--- [ERRO] Falha ao adicionar feedback: {e} ---"); traceback.print_exc()
         
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
            +
            # --- Execução de Scripts e Importações (sem alterações) ---
         
     | 
| 84 | 
         
            +
            # ... (código igual ao anterior)
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
            sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
         
     | 
| 87 | 
         
            +
            try:
         
     | 
| 88 | 
         
            +
                from enhanced_search_v2 import load_and_prepare_database, load_correction_corpus, load_general_dictionary, search_procedure_with_log
         
     | 
| 89 | 
         
            +
                print("--- [SUCESSO] Módulo 'enhanced_search_v2.py' importado. ---")
         
     | 
| 90 | 
         
            +
            except Exception as e: print(f"--- [ERRO CRÍTICO] Não foi possível importar 'enhanced_search_v2.py': {e} ---"); traceback.print_exc(); sys.exit(1)
         
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
            app = Flask(__name__)
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
            # --- Carregamento dos Dados ---
         
     | 
| 95 | 
         
            +
            DF_ORIGINAL, DF_NORMALIZED, FUZZY_CORPUS, IDF_SCORES, DB_WORD_SET = (None, None, None, None, set())
         
     | 
| 96 | 
         
            +
            CORRECTION_CORPUS, NORMALIZED_CORRECTION_CORPUS = [], []
         
     | 
| 97 | 
         
            +
            PORTUGUESE_WORD_SET = set()
         
     | 
| 98 | 
         
            +
            SEMANTIC_MODEL = None
         
     | 
| 99 | 
         
            +
             
     | 
| 100 | 
         
            +
            try:
         
     | 
| 101 | 
         
            +
                db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rol_procedures_database.csv')
         
     | 
| 102 | 
         
            +
                DF_ORIGINAL, DF_NORMALIZED, FUZZY_CORPUS, IDF_SCORES, DB_WORD_SET = load_and_prepare_database(db_path)
         
     | 
| 103 | 
         
            +
                
         
     | 
| 104 | 
         
            +
                dict_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Dic.csv')
         
     | 
| 105 | 
         
            +
                CORRECTION_CORPUS, NORMALIZED_CORRECTION_CORPUS = load_correction_corpus(dict_path, column_name='Termo_Correto')
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
                general_dict_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dicionario_ptbr.txt')
         
     | 
| 108 | 
         
            +
                PORTUGUESE_WORD_SET = load_general_dictionary(general_dict_path)
         
     | 
| 109 | 
         
            +
             
     | 
| 110 | 
         
            +
                load_user_feedback()
         
     | 
| 111 | 
         
            +
             
     | 
| 112 | 
         
            +
                print("\n--- [SETUP] Carregando modelo semântico... ---")
         
     | 
| 113 | 
         
            +
                model_name = 'sentence-transformers/all-MiniLM-L6-v2'
         
     | 
| 114 | 
         
            +
                
         
     | 
| 115 | 
         
            +
                # --- CORREÇÃO: A chamada agora é limpa, sem parâmetros extras. ---
         
     | 
| 116 | 
         
            +
                # Como não há mais um HF_TOKEN no ambiente, a biblioteca não tentará se autenticar.
         
     | 
| 117 | 
         
            +
                SEMANTIC_MODEL = SentenceTransformer(model_name, device='cpu')
         
     | 
| 118 | 
         
            +
                
         
     | 
| 119 | 
         
            +
                print(f"--- [SUCESSO] Modelo semântico '{model_name}' carregado. ---")
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
            except Exception as e:
         
     | 
| 122 | 
         
            +
                print(f"--- [ERRO CRÍTICO] Falha fatal durante o setup: {e} ---"); traceback.print_exc(); sys.exit(1)
         
     | 
| 123 | 
         
            +
             
     | 
| 124 | 
         
            +
             
     | 
| 125 | 
         
            +
            # --- Rotas da Aplicação (O restante do arquivo permanece igual) ---
         
     | 
| 126 | 
         
            +
            @app.route('/')
         
     | 
| 127 | 
         
            +
            def index(): return render_template('index.html')
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
            +
            @app.route('/favicon.ico')
         
     | 
| 130 | 
         
            +
            def favicon(): return '', 204
         
     | 
| 131 | 
         
            +
             
     | 
| 132 | 
         
            +
            @app.route('/search', methods=['POST'])
         
     | 
| 133 | 
         
            +
            def search():
         
     | 
| 134 | 
         
            +
                try:
         
     | 
| 135 | 
         
            +
                    data = request.get_json()
         
     | 
| 136 | 
         
            +
                    query = data.get('query', '').strip()
         
     | 
| 137 | 
         
            +
                    
         
     | 
| 138 | 
         
            +
                    results = search_procedure_with_log(
         
     | 
| 139 | 
         
            +
                        query=query,
         
     | 
| 140 | 
         
            +
                        df_original=DF_ORIGINAL,
         
     | 
| 141 | 
         
            +
                        df_normalized=DF_NORMALIZED,
         
     | 
| 142 | 
         
            +
                        fuzzy_search_corpus=FUZZY_CORPUS,
         
     | 
| 143 | 
         
            +
                        correction_corpus=(CORRECTION_CORPUS, NORMALIZED_CORRECTION_CORPUS),
         
     | 
| 144 | 
         
            +
                        portuguese_word_set=PORTUGUESE_WORD_SET,
         
     | 
| 145 | 
         
            +
                        idf_scores=IDF_SCORES,
         
     | 
| 146 | 
         
            +
                        db_word_set=DB_WORD_SET,
         
     | 
| 147 | 
         
            +
                        limit_per_layer=10,
         
     | 
| 148 | 
         
            +
                        semantic_model=SEMANTIC_MODEL,
         
     | 
| 149 | 
         
            +
                        user_best_matches_counts=USER_BEST_MATCHES_COUNTS,
         
     | 
| 150 | 
         
            +
                        user_feedback_threshold=USER_FEEDBACK_THRESHOLD
         
     | 
| 151 | 
         
            +
                    )
         
     | 
| 152 | 
         
            +
                    return jsonify(results)
         
     | 
| 153 | 
         
            +
                except Exception as e:
         
     | 
| 154 | 
         
            +
                    print("--- [ERRO FATAL DURANTE A BUSCA] ---"); traceback.print_exc()
         
     | 
| 155 | 
         
            +
                    return jsonify({"error": "Ocorreu um erro interno no motor de busca."}), 500
         
     | 
| 156 | 
         
            +
             
     | 
| 157 | 
         
            +
            @app.route('/submit_feedback', methods=['POST'])
         
     | 
| 158 | 
         
            +
            def submit_feedback_route():
         
     | 
| 159 | 
         
            +
                try:
         
     | 
| 160 | 
         
            +
                    data = request.get_json()
         
     | 
| 161 | 
         
            +
                    query, tuss_code_submitted, feedback_type, tuss_code_raw_input = data.get('query'), data.get('tuss_code'), data.get('feedback_type', 'unknown'), data.get('tuss_code_raw_input', '')
         
     | 
| 162 | 
         
            +
                    if not query or not tuss_code_submitted: return jsonify({"status": "error", "message": "Query e TUSS Code são obrigatórios."}), 400
         
     | 
| 163 | 
         
            +
                    append_user_feedback(query, tuss_code_submitted, feedback_type, tuss_code_raw_input)
         
     | 
| 164 | 
         
            +
                    return jsonify({"status": "success", "message": "Feedback recebido!"}), 200
         
     | 
| 165 | 
         
            +
                except Exception as e: print("--- [ERRO NO SUBMIT_FEEDBACK] ---"); traceback.print_exc(); return jsonify({"status": "error", "message": "Erro interno."}), 500
         
     | 
| 166 | 
         
            +
             
     | 
| 167 | 
         
            +
            @app.route('/get_tuss_info', methods=['GET'])
         
     | 
| 168 | 
         
            +
            def get_tuss_info():
         
     | 
| 169 | 
         
            +
                tuss_code_prefix = request.args.get('tuss_prefix', '').strip()
         
     | 
| 170 | 
         
            +
                if not tuss_code_prefix: return jsonify([])
         
     | 
| 171 | 
         
            +
                suggestions = []
         
     | 
| 172 | 
         
            +
                if DF_ORIGINAL is not None and not DF_ORIGINAL.empty:
         
     | 
| 173 | 
         
            +
                    filtered_df = DF_ORIGINAL[DF_ORIGINAL['Codigo_TUSS'].astype(str).str.startswith(tuss_code_prefix)]
         
     | 
| 174 | 
         
            +
                    tuss_grouped = filtered_df.groupby('Codigo_TUSS').agg(tuss_descriptions=('Descricao_TUSS', lambda x: list(x.unique())), rol_names=('Procedimento_Rol', lambda x: list(x.unique()))).reset_index()
         
     | 
| 175 | 
         
            +
                    for index, row in tuss_grouped.head(10).iterrows():
         
     | 
| 176 | 
         
            +
                        tuss_desc = " | ".join(filter(None, row['tuss_descriptions'])) or 'Sem descrição TUSS'
         
     | 
| 177 | 
         
            +
                        rol_name = " | ".join(filter(None, row['rol_names'])) or 'Sem procedimento Rol'
         
     | 
| 178 | 
         
            +
                        suggestions.append({'tuss_code': str(row['Codigo_TUSS']), 'tuss_description': tuss_desc, 'rol_name': rol_name})
         
     | 
| 179 | 
         
            +
                return jsonify(suggestions)
         
     | 
| 180 | 
         
            +
             
     | 
| 181 | 
         
            +
             
     | 
| 182 | 
         
            +
            @app.route('/get_ai_suggestion', methods=['POST'])
         
     | 
| 183 | 
         
            +
            def get_ai_suggestion():
         
     | 
| 184 | 
         
            +
                if not client_ia:
         
     | 
| 185 | 
         
            +
                    return jsonify({"error": "O serviço de IA não está configurado no servidor (sem chave de API)."}), 503
         
     | 
| 186 | 
         
            +
             
     | 
| 187 | 
         
            +
                try:
         
     | 
| 188 | 
         
            +
                    data = request.get_json()
         
     | 
| 189 | 
         
            +
                    query = data.get('query')
         
     | 
| 190 | 
         
            +
                    results = data.get('results', [])
         
     | 
| 191 | 
         
            +
             
     | 
| 192 | 
         
            +
                    if not query or not results:
         
     | 
| 193 | 
         
            +
                        return jsonify({"error": "A consulta e os resultados são necessários."}), 400
         
     | 
| 194 | 
         
            +
             
     | 
| 195 | 
         
            +
                    formatted_results = []
         
     | 
| 196 | 
         
            +
                    for r in results:
         
     | 
| 197 | 
         
            +
                        tuss_code = r.get('Codigo_TUSS', 'N/A')
         
     | 
| 198 | 
         
            +
                        tuss_desc = r.get('Descricao_TUSS', 'N/A')
         
     | 
| 199 | 
         
            +
                        rol_desc = r.get('Procedimento_Rol', 'N/A')
         
     | 
| 200 | 
         
            +
                        sinonimos = ", ".join(filter(None, [
         
     | 
| 201 | 
         
            +
                            r.get('Sinonimo_1'), r.get('Sinonimo_2'), r.get('Sinonimo_3'), r.get('Sinonimo_4'), r.get('Semantico')
         
     | 
| 202 | 
         
            +
                        ]))
         
     | 
| 203 | 
         
            +
                        formatted_results.append(
         
     | 
| 204 | 
         
            +
                            f"TUSS: {tuss_code}\nDescrição TUSS: {tuss_desc}\nDescrição Rol: {rol_desc}\nContexto/Sinônimos: {sinonimos}\n"
         
     | 
| 205 | 
         
            +
                        )
         
     | 
| 206 | 
         
            +
             
     | 
| 207 | 
         
            +
                    results_string = "\n".join(formatted_results)
         
     | 
| 208 | 
         
            +
                    system_prompt = (
         
     | 
| 209 | 
         
            +
                        "Você é um especialista em terminologia médica e na tabela TUSS brasileira. "
         
     | 
| 210 | 
         
            +
                        "Sua tarefa é analisar uma consulta de busca e uma lista de procedimentos médicos retornados por um sistema. "
         
     | 
| 211 | 
         
            +
                        "Você deve escolher o ÚNICO procedimento que melhor corresponde à intenção da consulta. "
         
     | 
| 212 | 
         
            +
                        "Responda APENAS com o código TUSS do procedimento escolhido (ex: 40301741). Não inclua nenhuma outra palavra, explicação ou pontuação."
         
     | 
| 213 | 
         
            +
                    )
         
     | 
| 214 | 
         
            +
                    user_prompt = (
         
     | 
| 215 | 
         
            +
                        f"Consulta do usuário: \"{query}\"\n\n"
         
     | 
| 216 | 
         
            +
                        "Resultados da busca:\n"
         
     | 
| 217 | 
         
            +
                        "----------------------\n"
         
     | 
| 218 | 
         
            +
                        f"{results_string}"
         
     | 
| 219 | 
         
            +
                        "\n----------------------\n\n"
         
     | 
| 220 | 
         
            +
                        "Qual destes é a melhor correspondência para a consulta? Responda apenas com o código TUSS."
         
     | 
| 221 | 
         
            +
                    )
         
     | 
| 222 | 
         
            +
             
     | 
| 223 | 
         
            +
                    completion = client_ia.chat.completions.create(
         
     | 
| 224 | 
         
            +
                        model="baidu/ERNIE-4.5-21B-A3B-PT",
         
     | 
| 225 | 
         
            +
                        messages=[
         
     | 
| 226 | 
         
            +
                            {"role": "system", "content": system_prompt},
         
     | 
| 227 | 
         
            +
                            {"role": "user", "content": user_prompt}
         
     | 
| 228 | 
         
            +
                        ],
         
     | 
| 229 | 
         
            +
                        max_tokens=20,
         
     | 
| 230 | 
         
            +
                        temperature=0.1,
         
     | 
| 231 | 
         
            +
                    )
         
     | 
| 232 | 
         
            +
             
     | 
| 233 | 
         
            +
                    suggested_content = completion.choices[0].message.content.strip()
         
     | 
| 234 | 
         
            +
                    suggested_tuss = re.sub(r'\D', '', suggested_content)
         
     | 
| 235 | 
         
            +
             
     | 
| 236 | 
         
            +
                    if not suggested_tuss:
         
     | 
| 237 | 
         
            +
                        return jsonify({"error": "A IA não conseguiu determinar um melhor resultado.", "details": suggested_content}), 422
         
     | 
| 238 | 
         
            +
             
     | 
| 239 | 
         
            +
                    return jsonify({"suggested_tuss_code": suggested_tuss})
         
     | 
| 240 | 
         
            +
             
     | 
| 241 | 
         
            +
                except Exception as e:
         
     | 
| 242 | 
         
            +
                    print("--- [ERRO FATAL NA SUGESTÃO DA IA] ---")
         
     | 
| 243 | 
         
            +
                    traceback.print_exc()
         
     | 
| 244 | 
         
            +
                    error_message = f"Ocorreu um erro interno ao processar a sugestão da IA: {str(e)}"
         
     | 
| 245 | 
         
            +
                    return jsonify({"error": error_message}), 500
         
     | 
| 246 | 
         
            +
             
     | 
| 247 | 
         
            +
             
     | 
| 248 | 
         
            +
            if __name__ == '__main__':
         
     | 
| 249 | 
         
            +
                port = int(os.environ.get("PORT", 7860))
         
     | 
| 250 | 
         
            +
                app.run(host='0.0.0.0', port=port, debug=False)
         
     | 
    	
        enhanced_search_v2.py
    ADDED
    
    | 
         @@ -0,0 +1,471 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # enhanced_search_v2.py
         
     | 
| 2 | 
         
            +
            # ---------------------
         
     | 
| 3 | 
         
            +
            # Motor de busca híbrido e em camadas para procedimentos médicos.
         
     | 
| 4 | 
         
            +
            # Versão final consolidada com todas as otimizações.
         
     | 
| 5 | 
         
            +
            #
         
     | 
| 6 | 
         
            +
            # Funcionalidades Principais:
         
     | 
| 7 | 
         
            +
            # 1. Correção Ortográfica PRÉ-BUSCA: Corrige termos inválidos ANTES de qualquer busca.
         
     | 
| 8 | 
         
            +
            # 2. Camada 0 para Busca Literal Robusta: Encontra correspondências exatas da frase,
         
     | 
| 9 | 
         
            +
            #    ignorando caixa, acentos, pontuação e espaçamento.
         
     | 
| 10 | 
         
            +
            # 3. Early Exit Otimizado: Interrompe a busca com log claro e correto.
         
     | 
| 11 | 
         
            +
            # 4. Busca em Múltiplas Camadas: Da mais restrita (literal) à mais abrangente (ponderada).
         
     | 
| 12 | 
         
            +
            # 5. Pontuação por Relevância (IDF): Palavras raras têm mais peso.
         
     | 
| 13 | 
         
            +
            # 6. Limpeza de Dados: Zera campos do Rol para procedimentos que não são do Rol.
         
     | 
| 14 | 
         
            +
            # 7. Reordenação Semântica: Usa o MiniLM-L6-v2 para entender o significado e reordenar.
         
     | 
| 15 | 
         
            +
            # 8. Feedback de Usuário: Prioriza resultados validados pela comunidade.
         
     | 
| 16 | 
         
            +
            # 9. Lida com múltiplos procedimentos (linhas do DB) para um mesmo código TUSS.
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            import pandas as pd
         
     | 
| 19 | 
         
            +
            import re
         
     | 
| 20 | 
         
            +
            from thefuzz import process, fuzz
         
     | 
| 21 | 
         
            +
            from unidecode import unidecode
         
     | 
| 22 | 
         
            +
            import time
         
     | 
| 23 | 
         
            +
            from sentence_transformers import util
         
     | 
| 24 | 
         
            +
            import torch
         
     | 
| 25 | 
         
            +
            import math
         
     | 
| 26 | 
         
            +
            from collections import defaultdict
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            # --- FUNÇÕES AUXILIARES DE NORMALIZAÇÃO ---
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            def literal_normalize_text(text):
         
     | 
| 32 | 
         
            +
                """
         
     | 
| 33 | 
         
            +
                Normaliza o texto para busca literal (Camada 0): minúsculas, sem acentos,
         
     | 
| 34 | 
         
            +
                sem pontuação e com espaços padronizados.
         
     | 
| 35 | 
         
            +
                """
         
     | 
| 36 | 
         
            +
                if pd.isna(text): return ""
         
     | 
| 37 | 
         
            +
                normalized = unidecode(str(text).lower())
         
     | 
| 38 | 
         
            +
                normalized = re.sub(r'[^\w\s]', '', normalized)
         
     | 
| 39 | 
         
            +
                return re.sub(r'\s+', ' ', normalized).strip()
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            def normalize_text(text):
         
     | 
| 42 | 
         
            +
                """Normaliza o texto para busca por tokens (palavras): minúsculas, sem acentos e espaços extras."""
         
     | 
| 43 | 
         
            +
                if pd.isna(text): return ""
         
     | 
| 44 | 
         
            +
                return unidecode(str(text).lower().strip())
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
            def get_longest_word(query_text):
         
     | 
| 47 | 
         
            +
                """Extrai a palavra mais longa de uma query (usado no fallback)."""
         
     | 
| 48 | 
         
            +
                words = re.findall(r'\b\w{4,}\b', query_text)
         
     | 
| 49 | 
         
            +
                if not words: return ""
         
     | 
| 50 | 
         
            +
                return max(words, key=len)
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
            # --- FUNÇÕES DE FORMATAÇÃO E DESTAQUE ---
         
     | 
| 54 | 
         
            +
             
     | 
| 55 | 
         
            +
            # No arquivo enhanced_search_v2.py
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
            def format_result(row_data, match_type="", score=0):
         
     | 
| 58 | 
         
            +
                """
         
     | 
| 59 | 
         
            +
                Formata uma linha do DataFrame em um dicionário de resultado padrão.
         
     | 
| 60 | 
         
            +
                Aplica a regra de negócio para limpar dados se o procedimento não for do Rol.
         
     | 
| 61 | 
         
            +
                """
         
     | 
| 62 | 
         
            +
                data = row_data.copy()
         
     | 
| 63 | 
         
            +
                if data.get('Correlacao_Rol', '').strip().lower() != 'sim':
         
     | 
| 64 | 
         
            +
                    data['Grupo'], data['Subgrupo'], data['Vigencia'], data['Resolucao_Normativa'] = '', '', '', ''
         
     | 
| 65 | 
         
            +
                    data['PAC'], data['DUT'] = '---', '---'
         
     | 
| 66 | 
         
            +
                else:
         
     | 
| 67 | 
         
            +
                    data['PAC'] = 'Sim' if data.get('PAC', '').strip().lower() == 'pac' else 'Não'
         
     | 
| 68 | 
         
            +
                    
         
     | 
| 69 | 
         
            +
                    # Lógica da DUT corrigida aqui
         
     | 
| 70 | 
         
            +
                    original_dut_value = data.get('DUT', '').strip()
         
     | 
| 71 | 
         
            +
                    # CORREÇÃO: A verificação agora aceita números com ponto decimal (ex: "65.1")
         
     | 
| 72 | 
         
            +
                    # A lógica é: se o valor, após remover o primeiro '.', for composto apenas de dígitos, é válido.
         
     | 
| 73 | 
         
            +
                    if original_dut_value and original_dut_value.replace('.', '', 1).isdigit():
         
     | 
| 74 | 
         
            +
                        data['DUT'] = f'Sim, DUT nº {original_dut_value}'
         
     | 
| 75 | 
         
            +
                    else:
         
     | 
| 76 | 
         
            +
                        data['DUT'] = 'Não'
         
     | 
| 77 | 
         
            +
                        
         
     | 
| 78 | 
         
            +
                standard_columns = [
         
     | 
| 79 | 
         
            +
                    'Codigo_TUSS', 'Descricao_TUSS', 'Correlacao_Rol', 'Procedimento_Rol',
         
     | 
| 80 | 
         
            +
                    'Resolucao_Normativa', 'Vigencia', 'OD', 'AMB', 'HCO', 'HSO', 'PAC',
         
     | 
| 81 | 
         
            +
                    'DUT', 'SUBGRUPO', 'GRUPO', 'CAPITULO', 'Sinonimo_1', 'Sinonimo_2',
         
     | 
| 82 | 
         
            +
                    'Sinonimo_3', 'Sinonimo_4', 'Semantico'
         
     | 
| 83 | 
         
            +
                ]
         
     | 
| 84 | 
         
            +
                formatted_data = {col: data.get(col, '') for col in standard_columns}
         
     | 
| 85 | 
         
            +
                result = {"score": round(score), "match_type": match_type}
         
     | 
| 86 | 
         
            +
                result.update(formatted_data)
         
     | 
| 87 | 
         
            +
                return result
         
     | 
| 88 | 
         
            +
             
     | 
| 89 | 
         
            +
            def _highlight_matches(results, query):
         
     | 
| 90 | 
         
            +
                """Adiciona tags <b></b> em volta das palavras da query nos resultados."""
         
     | 
| 91 | 
         
            +
                if not query or not results: return results
         
     | 
| 92 | 
         
            +
                stopwords = {'de', 'do', 'da', 'dos', 'das', 'a', 'o', 'e', 'em', 'um', 'uma', 'para', 'com'}
         
     | 
| 93 | 
         
            +
                query_words = {word for word in normalize_text(query).split() if len(word) > 2 and word not in stopwords}
         
     | 
| 94 | 
         
            +
                cols_to_highlight = ['Descricao_TUSS', 'Procedimento_Rol', 'Sinonimo_1', 'Sinonimo_2', 'Sinonimo_3', 'Sinonimo_4', 'Semantico']
         
     | 
| 95 | 
         
            +
                
         
     | 
| 96 | 
         
            +
                for result in results:
         
     | 
| 97 | 
         
            +
                    for col in cols_to_highlight:
         
     | 
| 98 | 
         
            +
                        original_text = result.get(col, '')
         
     | 
| 99 | 
         
            +
                        highlighted_text = original_text
         
     | 
| 100 | 
         
            +
                        if original_text and query_words:
         
     | 
| 101 | 
         
            +
                            for word in sorted(list(query_words), key=len, reverse=True):
         
     | 
| 102 | 
         
            +
                                pattern = r'\b(' + re.escape(word) + r')\b'
         
     | 
| 103 | 
         
            +
                                highlighted_text = re.sub(pattern, r'<b>\1</b>', highlighted_text, flags=re.IGNORECASE)
         
     | 
| 104 | 
         
            +
                        result[f"{col}_highlighted"] = highlighted_text
         
     | 
| 105 | 
         
            +
                return results
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
             
     | 
| 108 | 
         
            +
            # --- FUNÇÕES DE CARREGAMENTO DE DADOS ---
         
     | 
| 109 | 
         
            +
             
     | 
| 110 | 
         
            +
            def load_and_prepare_database(db_path):
         
     | 
| 111 | 
         
            +
                """
         
     | 
| 112 | 
         
            +
                Carrega o CSV, cria colunas normalizadas, campo de texto único, pesos IDF e um conjunto
         
     | 
| 113 | 
         
            +
                de todas as palavras únicas da base de dados.
         
     | 
| 114 | 
         
            +
                """
         
     | 
| 115 | 
         
            +
                try:
         
     | 
| 116 | 
         
            +
                    print(f"Carregando e preparando a base de dados de: {db_path}...")
         
     | 
| 117 | 
         
            +
                    df_original = pd.read_csv(db_path, dtype=str).fillna('')
         
     | 
| 118 | 
         
            +
                    search_cols = ['Descricao_TUSS', 'Procedimento_Rol', 'Sinonimo_1', 'Sinonimo_2', 'Sinonimo_3', 'Sinonimo_4', 'Semantico']
         
     | 
| 119 | 
         
            +
                    df_normalized = df_original.copy()
         
     | 
| 120 | 
         
            +
                    
         
     | 
| 121 | 
         
            +
                    df_normalized['Codigo_TUSS_literal'] = df_normalized['Codigo_TUSS'].apply(literal_normalize_text)
         
     | 
| 122 | 
         
            +
                    df_normalized['Codigo_TUSS_norm'] = df_normalized['Codigo_TUSS'].apply(normalize_text)
         
     | 
| 123 | 
         
            +
                    
         
     | 
| 124 | 
         
            +
                    df_normalized['full_text_norm'] = ""
         
     | 
| 125 | 
         
            +
                    for col in search_cols:
         
     | 
| 126 | 
         
            +
                        if col in df_normalized.columns:
         
     | 
| 127 | 
         
            +
                            df_normalized[f'{col}_literal'] = df_normalized[col].apply(literal_normalize_text)
         
     | 
| 128 | 
         
            +
                            df_normalized[f'{col}_norm'] = df_normalized[col].apply(normalize_text)
         
     | 
| 129 | 
         
            +
                            df_normalized['full_text_norm'] += ' ' + df_normalized[f'{col}_norm']
         
     | 
| 130 | 
         
            +
                            
         
     | 
| 131 | 
         
            +
                    print("Calculando pesos IDF e dicionário da base...")
         
     | 
| 132 | 
         
            +
                    num_documents = len(df_normalized)
         
     | 
| 133 | 
         
            +
                    doc_freq = defaultdict(int)
         
     | 
| 134 | 
         
            +
                    db_word_set = set()
         
     | 
| 135 | 
         
            +
                    for text in df_normalized['full_text_norm']:
         
     | 
| 136 | 
         
            +
                        words = set(text.split())
         
     | 
| 137 | 
         
            +
                        db_word_set.update(words)
         
     | 
| 138 | 
         
            +
                        for word in words:
         
     | 
| 139 | 
         
            +
                            if word: doc_freq[word] += 1
         
     | 
| 140 | 
         
            +
                    db_word_set.discard('')
         
     | 
| 141 | 
         
            +
                    print(f"Dicionário da base de dados criado com {len(db_word_set)} palavras únicas.")
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
                    idf_scores = {word: math.log(num_documents / (freq + 1)) for word, freq in doc_freq.items()}
         
     | 
| 144 | 
         
            +
                    print(f"Pesos IDF calculados para {len(idf_scores)} palavras.")
         
     | 
| 145 | 
         
            +
                    
         
     | 
| 146 | 
         
            +
                    print("Criando corpus para busca fuzzy...")
         
     | 
| 147 | 
         
            +
                    fuzzy_search_corpus = []
         
     | 
| 148 | 
         
            +
                    for index, row in df_normalized.iterrows():
         
     | 
| 149 | 
         
            +
                        for col in search_cols:
         
     | 
| 150 | 
         
            +
                            if col in df_original.columns and f'{col}_norm' in row and pd.notna(row[f'{col}_norm']):
         
     | 
| 151 | 
         
            +
                                val = row[f'{col}_norm']
         
     | 
| 152 | 
         
            +
                                if val: fuzzy_search_corpus.append((val, index, f'{col}_norm'))
         
     | 
| 153 | 
         
            +
                                
         
     | 
| 154 | 
         
            +
                    print(f"Base de dados pronta com {len(df_original)} procedimentos.")
         
     | 
| 155 | 
         
            +
                    return df_original, df_normalized, fuzzy_search_corpus, idf_scores, db_word_set
         
     | 
| 156 | 
         
            +
                except Exception as e:
         
     | 
| 157 | 
         
            +
                    print(f"Erro crítico ao carregar/preparar a base de dados: {e}"); raise
         
     | 
| 158 | 
         
            +
             
     | 
| 159 | 
         
            +
            def load_general_dictionary(path):
         
     | 
| 160 | 
         
            +
                try:
         
     | 
| 161 | 
         
            +
                    print(f"Carregando dicionário geral de português de: {path}...")
         
     | 
| 162 | 
         
            +
                    with open(path, 'r', encoding='utf-8') as f:
         
     | 
| 163 | 
         
            +
                        words = {normalize_text(line.strip()) for line in f if line.strip()}
         
     | 
| 164 | 
         
            +
                    print(f"Dicionário geral carregado com {len(words)} palavras.")
         
     | 
| 165 | 
         
            +
                    return words
         
     | 
| 166 | 
         
            +
                except FileNotFoundError: return set()
         
     | 
| 167 | 
         
            +
                except Exception as e: return set()
         
     | 
| 168 | 
         
            +
             
     | 
| 169 | 
         
            +
            def load_correction_corpus(dict_path, column_name='Termo_Correto'):
         
     | 
| 170 | 
         
            +
                try:
         
     | 
| 171 | 
         
            +
                    print(f"Carregando corpus de correção de: {dict_path}...")
         
     | 
| 172 | 
         
            +
                    df_dict = pd.read_csv(dict_path, dtype=str).fillna('')
         
     | 
| 173 | 
         
            +
                    if column_name not in df_dict.columns: return [], []
         
     | 
| 174 | 
         
            +
                    
         
     | 
| 175 | 
         
            +
                    original_corpus = df_dict[column_name].dropna().astype(str).tolist()
         
     | 
| 176 | 
         
            +
                    normalized_corpus = [normalize_text(term) for term in original_corpus]
         
     | 
| 177 | 
         
            +
                    
         
     | 
| 178 | 
         
            +
                    print(f"Corpus de correção carregado com {len(original_corpus)} termos.")
         
     | 
| 179 | 
         
            +
                    return original_corpus, normalized_corpus
         
     | 
| 180 | 
         
            +
                except FileNotFoundError: return [], []
         
     | 
| 181 | 
         
            +
                except Exception as e: return [], []
         
     | 
| 182 | 
         
            +
             
     | 
| 183 | 
         
            +
             
     | 
| 184 | 
         
            +
            # --- FUNÇÃO DE RECLASSIFICAÇÃO SEMÂNTICA ---
         
     | 
| 185 | 
         
            +
             
     | 
| 186 | 
         
            +
            def rerank_with_semantic_model(original_query, results_list, model):
         
     | 
| 187 | 
         
            +
                if not model or not results_list: return results_list
         
     | 
| 188 | 
         
            +
                semantic_columns = ['Descricao_TUSS', 'Procedimento_Rol', 'SUBGRUPO', 'Sinonimo_1', 'Sinonimo_2', 'Sinonimo_3', 'Sinonimo_4', 'Semantico']
         
     | 
| 189 | 
         
            +
                corpus_texts = [". ".join(sorted(list({res.get(col) for col in semantic_columns if res.get(col) and isinstance(res.get(col), str)}))) for res in results_list]
         
     | 
| 190 | 
         
            +
                try:
         
     | 
| 191 | 
         
            +
                    query_embedding = model.encode(original_query, convert_to_tensor=True, show_progress_bar=False)
         
     | 
| 192 | 
         
            +
                    corpus_embeddings = model.encode(corpus_texts, convert_to_tensor=True, show_progress_bar=False)
         
     | 
| 193 | 
         
            +
                    cosine_scores = util.cos_sim(query_embedding, corpus_embeddings)
         
     | 
| 194 | 
         
            +
                except Exception as e: return results_list
         
     | 
| 195 | 
         
            +
                for i, result in enumerate(results_list):
         
     | 
| 196 | 
         
            +
                    result['semantic_score'] = round(max(0, cosine_scores[0][i].item()) * 100)
         
     | 
| 197 | 
         
            +
                    result['hybrid_score'] = result['semantic_score'] + result.get('score', 0)
         
     | 
| 198 | 
         
            +
                return sorted(results_list, key=lambda x: (x.get('score', 0) == 100, x.get('hybrid_score', 0)), reverse=True)
         
     | 
| 199 | 
         
            +
             
     | 
| 200 | 
         
            +
             
     | 
| 201 | 
         
            +
            # --- FUNÇÃO INTERNA DE BUSCA COM CAMADAS ---
         
     | 
| 202 | 
         
            +
             
     | 
| 203 | 
         
            +
            def _run_search_layers(literal_query, normalized_query, response, df_original, df_normalized, fuzzy_search_corpus, idf_scores, limit_per_layer):
         
     | 
| 204 | 
         
            +
                """Executa as camadas de busca e retorna o nome da camada de saída em caso de early exit."""
         
     | 
| 205 | 
         
            +
                matched_indices = set()
         
     | 
| 206 | 
         
            +
                stopwords = {'de', 'do', 'da', 'dos', 'das', 'a', 'o', 'e', 'em', 'um', 'uma', 'para', 'com'}
         
     | 
| 207 | 
         
            +
                query_words = [word for word in normalized_query.split() if word not in stopwords and len(word) > 1]
         
     | 
| 208 | 
         
            +
             
     | 
| 209 | 
         
            +
                # --- CAMADA 0: Busca Literal ---
         
     | 
| 210 | 
         
            +
                if literal_query:
         
     | 
| 211 | 
         
            +
                    temp_results = []
         
     | 
| 212 | 
         
            +
                    literal_cols = ['Codigo_TUSS_literal', 'Descricao_TUSS_literal', 'Procedimento_Rol_literal']
         
     | 
| 213 | 
         
            +
                    for col in literal_cols:
         
     | 
| 214 | 
         
            +
                        if col in df_normalized.columns:
         
     | 
| 215 | 
         
            +
                            mask = df_normalized[col].str.contains(r'\b' + re.escape(literal_query) + r'\b', na=False)
         
     | 
| 216 | 
         
            +
                            matches = df_normalized[mask]
         
     | 
| 217 | 
         
            +
                            for index, _ in matches.iterrows():
         
     | 
| 218 | 
         
            +
                                if index not in matched_indices:
         
     | 
| 219 | 
         
            +
                                    match_type = "Código Literal" if "Codigo" in col else "Texto Literal"
         
     | 
| 220 | 
         
            +
                                    temp_results.append(format_result(df_original.loc[index], match_type, 100))
         
     | 
| 221 | 
         
            +
                                    matched_indices.add(index)
         
     | 
| 222 | 
         
            +
                    if temp_results:
         
     | 
| 223 | 
         
            +
                        response["results_by_layer"]["literal_matches"] = sorted(temp_results, key=lambda x: x['Codigo_TUSS'])[:limit_per_layer]
         
     | 
| 224 | 
         
            +
                        return "Busca Literal"
         
     | 
| 225 | 
         
            +
             
     | 
| 226 | 
         
            +
                # --- CAMADA 1: Busca Normalizada Exata ---
         
     | 
| 227 | 
         
            +
                temp_results = []
         
     | 
| 228 | 
         
            +
                if normalized_query:
         
     | 
| 229 | 
         
            +
                    exact_code_matches = df_normalized[df_normalized['Codigo_TUSS_norm'] == normalized_query]
         
     | 
| 230 | 
         
            +
                    for index, _ in exact_code_matches.iterrows():
         
     | 
| 231 | 
         
            +
                        if index not in matched_indices:
         
     | 
| 232 | 
         
            +
                            temp_results.append(format_result(df_original.loc[index], "Código Exato (Normalizado)", 100))
         
     | 
| 233 | 
         
            +
                            matched_indices.add(index)
         
     | 
| 234 | 
         
            +
                    for col in ['Descricao_TUSS_norm', 'Procedimento_Rol_norm']:
         
     | 
| 235 | 
         
            +
                        if col in df_normalized.columns:
         
     | 
| 236 | 
         
            +
                            exact_text_matches = df_normalized[df_normalized[col] == normalized_query]
         
     | 
| 237 | 
         
            +
                            for index, _ in exact_text_matches.iterrows():
         
     | 
| 238 | 
         
            +
                                if index not in matched_indices:
         
     | 
| 239 | 
         
            +
                                    temp_results.append(format_result(df_original.loc[index], "Exato (Normalizado)", 100))
         
     | 
| 240 | 
         
            +
                                    matched_indices.add(index)
         
     | 
| 241 | 
         
            +
                if temp_results:
         
     | 
| 242 | 
         
            +
                    response["results_by_layer"]["exact_matches"] = sorted(temp_results, key=lambda x: x['Codigo_TUSS'])[:limit_per_layer]
         
     | 
| 243 | 
         
            +
                    return "Normalizada Exata"
         
     | 
| 244 | 
         
            +
             
     | 
| 245 | 
         
            +
                # --- CAMADA 2: Busca Lógica 'E' ---
         
     | 
| 246 | 
         
            +
                temp_results = []
         
     | 
| 247 | 
         
            +
                if query_words:
         
     | 
| 248 | 
         
            +
                    mask = pd.Series(True, index=df_normalized.index)
         
     | 
| 249 | 
         
            +
                    for word in query_words:
         
     | 
| 250 | 
         
            +
                        mask &= df_normalized['full_text_norm'].str.contains(r'\b' + re.escape(word) + r'\b', na=False)
         
     | 
| 251 | 
         
            +
                    for index, row in df_normalized[mask & ~df_normalized.index.isin(matched_indices)].iterrows():
         
     | 
| 252 | 
         
            +
                        score = fuzz.WRatio(normalized_query, row.get('full_text_norm', ''))
         
     | 
| 253 | 
         
            +
                        if score > 85:
         
     | 
| 254 | 
         
            +
                            temp_results.append(format_result(df_original.loc[index], "Busca Lógica (E)", score))
         
     | 
| 255 | 
         
            +
                            matched_indices.add(index)
         
     | 
| 256 | 
         
            +
                response["results_by_layer"]["logical_matches"] = sorted(temp_results, key=lambda x: x.get('score', 0), reverse=True)[:limit_per_layer]
         
     | 
| 257 | 
         
            +
             
     | 
| 258 | 
         
            +
                # --- CAMADA 3: Busca 'Quase Exata' (Fuzzy) ---
         
     | 
| 259 | 
         
            +
                temp_results = []
         
     | 
| 260 | 
         
            +
                processed_indices_layer3 = set()
         
     | 
| 261 | 
         
            +
                if fuzzy_search_corpus and normalized_query:
         
     | 
| 262 | 
         
            +
                    almost_exact_matches = process.extractBests(normalized_query, [item[0] for item in fuzzy_search_corpus], scorer=fuzz.token_set_ratio, limit=limit_per_layer * 3, score_cutoff=90)
         
     | 
| 263 | 
         
            +
                    for match_text, score in almost_exact_matches:
         
     | 
| 264 | 
         
            +
                        if score == 100 and match_text == normalized_query: continue
         
     | 
| 265 | 
         
            +
                        corpus_items = [item for item in fuzzy_search_corpus if item[0] == match_text]
         
     | 
| 266 | 
         
            +
                        for _, original_index, _ in corpus_items:
         
     | 
| 267 | 
         
            +
                            if original_index not in matched_indices and original_index not in processed_indices_layer3:
         
     | 
| 268 | 
         
            +
                                temp_results.append(format_result(df_original.loc[original_index], "Quase Exato", 98))
         
     | 
| 269 | 
         
            +
                                matched_indices.add(original_index)
         
     | 
| 270 | 
         
            +
                                processed_indices_layer3.add(original_index)
         
     | 
| 271 | 
         
            +
                response["results_by_layer"]["almost_exact_matches"] = sorted(temp_results, key=lambda x: x['Codigo_TUSS'])[:limit_per_layer]
         
     | 
| 272 | 
         
            +
             
     | 
| 273 | 
         
            +
                # --- CAMADA 4: Busca por Termos Validados ---
         
     | 
| 274 | 
         
            +
                temp_results = []
         
     | 
| 275 | 
         
            +
                if query_words:
         
     | 
| 276 | 
         
            +
                    mask_c4 = pd.Series(True, index=df_normalized.index)
         
     | 
| 277 | 
         
            +
                    for word in query_words:
         
     | 
| 278 | 
         
            +
                        mask_c4 &= df_normalized['full_text_norm'].str.contains(r'\b' + re.escape(word) + r'\b', na=False)
         
     | 
| 279 | 
         
            +
                    for index, row in df_normalized[mask_c4 & ~df_normalized.index.isin(matched_indices)].iterrows():
         
     | 
| 280 | 
         
            +
                        score = fuzz.WRatio(normalized_query, row.get('Descricao_TUSS_norm', ''))
         
     | 
| 281 | 
         
            +
                        if score > 75:
         
     | 
| 282 | 
         
            +
                            temp_results.append(format_result(df_original.iloc[index], "Termos Validados (E)", score))
         
     | 
| 283 | 
         
            +
                            matched_indices.add(index)
         
     | 
| 284 | 
         
            +
                response["results_by_layer"]["contains_matches"] = sorted(temp_results, key=lambda x: x.get('score', 0), reverse=True)[:limit_per_layer]
         
     | 
| 285 | 
         
            +
             
     | 
| 286 | 
         
            +
                # --- CAMADA 5: Busca Ponderada (IDF) ---
         
     | 
| 287 | 
         
            +
                temp_results = []
         
     | 
| 288 | 
         
            +
                if query_words and idf_scores:
         
     | 
| 289 | 
         
            +
                    max_idf = max(idf_scores.values()) if idf_scores else 1.0
         
     | 
| 290 | 
         
            +
                    total_query_idf = sum(idf_scores.get(word, max_idf) for word in query_words)
         
     | 
| 291 | 
         
            +
                    regex_pattern = '|'.join(re.escape(word) for word in query_words)
         
     | 
| 292 | 
         
            +
                    mask = df_normalized['full_text_norm'].str.contains(regex_pattern, na=False)
         
     | 
| 293 | 
         
            +
                    candidate_df = df_normalized[mask & ~df_normalized.index.isin(matched_indices)]
         
     | 
| 294 | 
         
            +
                    for index, row in candidate_df.iterrows():
         
     | 
| 295 | 
         
            +
                        weighted_score = sum(idf_scores.get(word, 0) for word in query_words if word in row['full_text_norm'].split())
         
     | 
| 296 | 
         
            +
                        normalized_score = (weighted_score / total_query_idf) * 90 if total_query_idf > 0 else 0
         
     | 
| 297 | 
         
            +
                        if query_words and row.get('full_text_norm', '').strip().startswith(query_words[0]):
         
     | 
| 298 | 
         
            +
                            normalized_score = min(normalized_score + 10, 95)
         
     | 
| 299 | 
         
            +
                        temp_results.append(format_result(df_original.loc[index], "Busca Ponderada (IDF)", normalized_score))
         
     | 
| 300 | 
         
            +
                        matched_indices.add(index)
         
     | 
| 301 | 
         
            +
                response["results_by_layer"]["term_matches"] = sorted(temp_results, key=lambda x: x.get('score', 0), reverse=True)[:limit_per_layer * 4]
         
     | 
| 302 | 
         
            +
             
     | 
| 303 | 
         
            +
                # --- CAMADA 6: Fallback ---
         
     | 
| 304 | 
         
            +
                total_found_primary = sum(len(v) for v in response["results_by_layer"].values())
         
     | 
| 305 | 
         
            +
                if total_found_primary == 0 and normalized_query:
         
     | 
| 306 | 
         
            +
                    temp_results = []
         
     | 
| 307 | 
         
            +
                    longest_word = get_longest_word(normalized_query)
         
     | 
| 308 | 
         
            +
                    if longest_word:
         
     | 
| 309 | 
         
            +
                        mask_c6 = df_normalized['full_text_norm'].str.contains(r'\b' + re.escape(longest_word) + r'\b', na=False)
         
     | 
| 310 | 
         
            +
                        for index, row in df_normalized[mask_c6 & ~df_normalized.index.isin(matched_indices)].iterrows():
         
     | 
| 311 | 
         
            +
                            temp_results.append(format_result(df_original.loc[index], f"Palavra-Chave '{longest_word}'", 80))
         
     | 
| 312 | 
         
            +
                    response["results_by_layer"]["keyword_matches"] = sorted(temp_results, key=lambda x: x['Codigo_TUSS'])[:limit_per_layer]
         
     | 
| 313 | 
         
            +
             
     | 
| 314 | 
         
            +
                return None
         
     | 
| 315 | 
         
            +
             
     | 
| 316 | 
         
            +
             
     | 
| 317 | 
         
            +
            # --- FUNÇÃO PRINCIPAL QUE ORQUESTRA A BUSCA ---
         
     | 
| 318 | 
         
            +
             
     | 
| 319 | 
         
            +
            def search_procedure_with_log(query, df_original, df_normalized, fuzzy_search_corpus, correction_corpus,
         
     | 
| 320 | 
         
            +
                                          portuguese_word_set, idf_scores, db_word_set,
         
     | 
| 321 | 
         
            +
                                          limit_per_layer=10, semantic_model=None,
         
     | 
| 322 | 
         
            +
                                          user_best_matches_counts=None, user_feedback_threshold=10):
         
     | 
| 323 | 
         
            +
                SEMANTIC_RERANK_LIMIT = 40
         
     | 
| 324 | 
         
            +
                start_time = time.time()
         
     | 
| 325 | 
         
            +
                original_query = str(query).strip()
         
     | 
| 326 | 
         
            +
             
     | 
| 327 | 
         
            +
                response = {"search_log": [],
         
     | 
| 328 | 
         
            +
                            "results_by_layer": {"literal_matches": [], "exact_matches": [], "logical_matches": [],
         
     | 
| 329 | 
         
            +
                                                 "almost_exact_matches": [], "contains_matches": [],
         
     | 
| 330 | 
         
            +
                                                 "term_matches": [], "keyword_matches": []},
         
     | 
| 331 | 
         
            +
                            "final_semantic_results": [], "was_corrected": False, "original_query": original_query,
         
     | 
| 332 | 
         
            +
                            "corrected_query": ""}
         
     | 
| 333 | 
         
            +
             
     | 
| 334 | 
         
            +
                if not original_query:
         
     | 
| 335 | 
         
            +
                    response["search_log"].append("Query vazia, busca não realizada.")
         
     | 
| 336 | 
         
            +
                    return response
         
     | 
| 337 | 
         
            +
             
     | 
| 338 | 
         
            +
                response["search_log"].append(f"Buscando por: '{original_query}'")
         
     | 
| 339 | 
         
            +
             
     | 
| 340 | 
         
            +
                # ETAPA 1: CORREÇÃO ORTOGRÁFICA PRÉ-BUSCA
         
     | 
| 341 | 
         
            +
                stopwords = {'de', 'do', 'da', 'dos', 'das', 'a', 'o', 'e', 'em', 'um', 'uma', 'para', 'com'}
         
     | 
| 342 | 
         
            +
                query_after_correction = original_query
         
     | 
| 343 | 
         
            +
                original_correction_corpus, normalized_correction_corpus = correction_corpus
         
     | 
| 344 | 
         
            +
                valid_words = portuguese_word_set.union(db_word_set)
         
     | 
| 345 | 
         
            +
                
         
     | 
| 346 | 
         
            +
                if valid_words and original_correction_corpus:
         
     | 
| 347 | 
         
            +
                    words_from_query, corrected_words, made_correction = original_query.split(), [], False
         
     | 
| 348 | 
         
            +
                    for word in words_from_query:
         
     | 
| 349 | 
         
            +
                        norm_word = normalize_text(word)
         
     | 
| 350 | 
         
            +
                        if norm_word in stopwords or len(norm_word) < 4:
         
     | 
| 351 | 
         
            +
                            corrected_words.append(word)
         
     | 
| 352 | 
         
            +
                            continue
         
     | 
| 353 | 
         
            +
                        
         
     | 
| 354 | 
         
            +
                        if norm_word not in valid_words:
         
     | 
| 355 | 
         
            +
                            match_norm, score = process.extractOne(norm_word, normalized_correction_corpus, scorer=fuzz.token_set_ratio)
         
     | 
| 356 | 
         
            +
                            if score >= 85:
         
     | 
| 357 | 
         
            +
                                match_index = normalized_correction_corpus.index(match_norm)
         
     | 
| 358 | 
         
            +
                                corrected_word_original = original_correction_corpus[match_index]
         
     | 
| 359 | 
         
            +
                                
         
     | 
| 360 | 
         
            +
                                corrected_word = corrected_word_original
         
     | 
| 361 | 
         
            +
                                if word.istitle(): corrected_word = corrected_word.title()
         
     | 
| 362 | 
         
            +
                                elif word.isupper(): corrected_word = corrected_word.upper()
         
     | 
| 363 | 
         
            +
                                
         
     | 
| 364 | 
         
            +
                                corrected_words.append(corrected_word)
         
     | 
| 365 | 
         
            +
                                made_correction = True
         
     | 
| 366 | 
         
            +
                            else:
         
     | 
| 367 | 
         
            +
                                corrected_words.append(word)
         
     | 
| 368 | 
         
            +
                        else:
         
     | 
| 369 | 
         
            +
                            corrected_words.append(word)
         
     | 
| 370 | 
         
            +
                    if made_correction:
         
     | 
| 371 | 
         
            +
                        query_after_correction = " ".join(corrected_words)
         
     | 
| 372 | 
         
            +
                        response["was_corrected"] = True
         
     | 
| 373 | 
         
            +
                        response["corrected_query"] = query_after_correction
         
     | 
| 374 | 
         
            +
                        response["search_log"].append(f"Query corrigida para: '{query_after_correction}'.")
         
     | 
| 375 | 
         
            +
             
     | 
| 376 | 
         
            +
                # ETAPA 2: PREPARAÇÃO DAS QUERIES PARA AS CAMADAS
         
     | 
| 377 | 
         
            +
                literal_query = literal_normalize_text(query_after_correction)
         
     | 
| 378 | 
         
            +
                cleaned_query = " ".join([word for word in query_after_correction.split() if normalize_text(word) not in stopwords])
         
     | 
| 379 | 
         
            +
                normalized_query = normalize_text(cleaned_query)
         
     | 
| 380 | 
         
            +
             
     | 
| 381 | 
         
            +
                if not cleaned_query.strip() and not literal_query.strip():
         
     | 
| 382 | 
         
            +
                    response["search_log"].append("Query resultante é vazia. Busca não realizada.")
         
     | 
| 383 | 
         
            +
                    return response
         
     | 
| 384 | 
         
            +
             
     | 
| 385 | 
         
            +
                if cleaned_query != query_after_correction:
         
     | 
| 386 | 
         
            +
                     response["search_log"].append(f"Query limpa (sem stop words): '{cleaned_query}'")
         
     | 
| 387 | 
         
            +
             
     | 
| 388 | 
         
            +
                # Para proibir buscas com apenas um caractere
         
     | 
| 389 | 
         
            +
                if len(cleaned_query.strip()) <= 1:
         
     | 
| 390 | 
         
            +
                    # Pega o termo que seria buscado para exibir no log. Pode ser vazio.
         
     | 
| 391 | 
         
            +
                    term_to_log = cleaned_query.strip()
         
     | 
| 392 | 
         
            +
                    response["search_log"].append(f"Busca por '{term_to_log}' ignorada. A busca deve conter no mínimo 2 caracteres.")
         
     | 
| 393 | 
         
            +
                    response["final_semantic_results"] = []
         
     | 
| 394 | 
         
            +
                    return response
         
     | 
| 395 | 
         
            +
             
     | 
| 396 | 
         
            +
                # ETAPA 3: EXECUÇÃO DA BUSCA
         
     | 
| 397 | 
         
            +
                exit_layer_name = _run_search_layers(literal_query, normalized_query, response, df_original, df_normalized, fuzzy_search_corpus,
         
     | 
| 398 | 
         
            +
                                                     idf_scores, limit_per_layer)
         
     | 
| 399 | 
         
            +
             
     | 
| 400 | 
         
            +
                # ETAPA 4: AGREGAÇÃO E REORDENAÇÃO DOS RESULTADOS
         
     | 
| 401 | 
         
            +
                all_candidates = []
         
     | 
| 402 | 
         
            +
                layer_order = ["literal_matches", "exact_matches", "logical_matches", "almost_exact_matches", "contains_matches", "term_matches", "keyword_matches"]
         
     | 
| 403 | 
         
            +
                layer_names_map = {"literal_matches": "0. Busca Literal", "exact_matches": "1. Normalizada Exata", "logical_matches": "2. Lógica 'E'",
         
     | 
| 404 | 
         
            +
                                   "almost_exact_matches": "3. Quase Exatos (Fuzzy)", "contains_matches": "4. Termos Validados",
         
     | 
| 405 | 
         
            +
                                   "term_matches": "5. Busca Ponderada (IDF)", "keyword_matches": "6. Fallback (Palavra-Chave)"}
         
     | 
| 406 | 
         
            +
             
     | 
| 407 | 
         
            +
                if exit_layer_name:
         
     | 
| 408 | 
         
            +
                    response["search_log"].append(f"--- [OTIMIZAÇÃO] Resultado de alta confiança encontrado na camada '{exit_layer_name}'. Busca interrompida. ---")
         
     | 
| 409 | 
         
            +
             
     | 
| 410 | 
         
            +
                response["search_log"].append("\n--- Detalhamento da Busca por Camadas ---")
         
     | 
| 411 | 
         
            +
                for layer_key in layer_order:
         
     | 
| 412 | 
         
            +
                    layer_results = response["results_by_layer"].get(layer_key, [])
         
     | 
| 413 | 
         
            +
                    num_results = len(layer_results)
         
     | 
| 414 | 
         
            +
                    response["search_log"].append(f"[{layer_names_map.get(layer_key, layer_key)}]: {num_results} resultado(s)")
         
     | 
| 415 | 
         
            +
                    all_candidates.extend(layer_results)
         
     | 
| 416 | 
         
            +
             
     | 
| 417 | 
         
            +
                # Lógica de feedback do usuário
         
     | 
| 418 | 
         
            +
                feedback_prioritized_tuss_votes = {}
         
     | 
| 419 | 
         
            +
                if user_best_matches_counts and all_candidates:
         
     | 
| 420 | 
         
            +
                    query_norm_for_feedback = normalize_text(response.get("corrected_query") or original_query)
         
     | 
| 421 | 
         
            +
                    feedback_for_query = user_best_matches_counts.get(query_norm_for_feedback, {})
         
     | 
| 422 | 
         
            +
                    for tuss_code, votes in feedback_for_query.items():
         
     | 
| 423 | 
         
            +
                        if votes >= user_feedback_threshold:
         
     | 
| 424 | 
         
            +
                            feedback_prioritized_tuss_votes[tuss_code] = votes
         
     | 
| 425 | 
         
            +
                    if feedback_prioritized_tuss_votes:
         
     | 
| 426 | 
         
            +
                        response["search_log"].append(f"\nFeedback de usuários qualificado encontrado.")
         
     | 
| 427 | 
         
            +
                        for result in all_candidates:
         
     | 
| 428 | 
         
            +
                            if result.get('Codigo_TUSS') in feedback_prioritized_tuss_votes:
         
     | 
| 429 | 
         
            +
                                result['is_user_best_match'] = True
         
     | 
| 430 | 
         
            +
                                result['feedback_votes'] = feedback_prioritized_tuss_votes[result.get('Codigo_TUSS')]
         
     | 
| 431 | 
         
            +
             
     | 
| 432 | 
         
            +
                response["search_log"].append(f"\n--- Análise e Reordenação ---\nTotal de candidatos encontrados: {len(all_candidates)}")
         
     | 
| 433 | 
         
            +
                
         
     | 
| 434 | 
         
            +
                query_for_highlight = response.get("corrected_query") or cleaned_query
         
     | 
| 435 | 
         
            +
                all_candidates = _highlight_matches(all_candidates, query_for_highlight)
         
     | 
| 436 | 
         
            +
                final_list = []
         
     | 
| 437 | 
         
            +
                
         
     | 
| 438 | 
         
            +
                if all_candidates:
         
     | 
| 439 | 
         
            +
                    query_for_semantic = response.get("corrected_query") or cleaned_query
         
     | 
| 440 | 
         
            +
                    prioritized_candidates = [res for res in all_candidates if res.get('is_user_best_match')]
         
     | 
| 441 | 
         
            +
                    non_prioritized_candidates = [res for res in all_candidates if not res.get('is_user_best_match')]
         
     | 
| 442 | 
         
            +
                    
         
     | 
| 443 | 
         
            +
                    if semantic_model and prioritized_candidates:
         
     | 
| 444 | 
         
            +
                        reranked_prioritized = rerank_with_semantic_model(query_for_semantic, prioritized_candidates, semantic_model)
         
     | 
| 445 | 
         
            +
                        prioritized_results_sorted = sorted(reranked_prioritized, key=lambda x: (x.get('feedback_votes', 0), x.get('semantic_score', 0)), reverse=True)
         
     | 
| 446 | 
         
            +
                    else:
         
     | 
| 447 | 
         
            +
                        prioritized_results_sorted = sorted(prioritized_candidates, key=lambda x: (x.get('feedback_votes', 0), x.get('score', 0)), reverse=True)
         
     | 
| 448 | 
         
            +
                    final_list.extend(prioritized_results_sorted)
         
     | 
| 449 | 
         
            +
             
     | 
| 450 | 
         
            +
                    if semantic_model and non_prioritized_candidates:
         
     | 
| 451 | 
         
            +
                        candidates_for_rerank = non_prioritized_candidates[:SEMANTIC_RERANK_LIMIT]
         
     | 
| 452 | 
         
            +
                        reranked_non_prioritized = rerank_with_semantic_model(query_for_semantic, candidates_for_rerank, semantic_model)
         
     | 
| 453 | 
         
            +
                        final_list.extend(reranked_non_prioritized)
         
     | 
| 454 | 
         
            +
                        seen_reranked_codes = {r.get('Codigo_TUSS') for r in reranked_non_prioritized}
         
     | 
| 455 | 
         
            +
                        for candidate in non_prioritized_candidates:
         
     | 
| 456 | 
         
            +
                            if candidate.get('Codigo_TUSS') not in seen_reranked_codes:
         
     | 
| 457 | 
         
            +
                                final_list.append(candidate)
         
     | 
| 458 | 
         
            +
                    else:
         
     | 
| 459 | 
         
            +
                        final_list.extend(sorted(non_prioritized_candidates, key=lambda x: x.get('score', 0), reverse=True))
         
     | 
| 460 | 
         
            +
                    
         
     | 
| 461 | 
         
            +
                    response["search_log"].append(f"Lista final de resultados combinada: {len(final_list)} itens antes do limite.")
         
     | 
| 462 | 
         
            +
                    response["final_semantic_results"] = final_list[:10]
         
     | 
| 463 | 
         
            +
                else:
         
     | 
| 464 | 
         
            +
                    response["search_log"].append("Nenhum resultado final para exibir.")
         
     | 
| 465 | 
         
            +
                    response["final_semantic_results"] = []
         
     | 
| 466 | 
         
            +
             
     | 
| 467 | 
         
            +
                end_time = time.time()
         
     | 
| 468 | 
         
            +
                response["search_duration_seconds"] = round(end_time - start_time, 4)
         
     | 
| 469 | 
         
            +
                response["search_log"].append(f"\nBusca completa em {response['search_duration_seconds']} segundos.")
         
     | 
| 470 | 
         
            +
                print(f"\n\n==================== LOG DE DEPURAÇÃO (QUERY: '{original_query}') ====================")
         
     | 
| 471 | 
         
            +
                return response
         
     |