Spaces:

KeenWoo
/

AD_Multimodal_Chatbot

Sleeping

App Files Files Community

KeenWoo commited on Sep 14

Commit

41d9a09

verified ·

1 Parent(s): 47ca460

Update alz_companion/agent.py

Browse files

Files changed (1) hide show

alz_companion/agent.py +51 -6

alz_companion/agent.py CHANGED Viewed

@@ -677,23 +677,68 @@ def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: boo
             # This logic retrieves all documents from the personal FAISS store and then
             # filters them to include ONLY text-based sources, excluding media files.
             print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
             if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
-                # 1. Get all documents from the FAISS docstore
-                all_personal_docs = list(vs_personal.docstore._dict.values())
                 # 2. Filter this list to keep only text-based files
                 text_based_docs = []
                 text_extensions = ('.txt', '.jsonl')  # Define what counts as a text source
-                for doc in all_personal_docs:
                     source = doc.metadata.get("source", "").lower()
                     # if source.endswith(text_extensions):
                     # NEW: Include saved personal conversations
                     if source.endswith(text_extensions) or source == "saved chat":
-                        text_based_docs.append(doc)
                 # 3. Extend the final list with only the filtered, text-based documents
-                all_retrieved_docs.extend(text_based_docs)
             # --- END OF MODIFICATION ---
         else:

             # This logic retrieves all documents from the personal FAISS store and then
             # filters them to include ONLY text-based sources, excluding media files.
             print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
+            # 1. check if the personal vector store is valid and has content.
             if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
+                ## NEW Experiment
+                # 2. If it's valid, proceed with the upgraded retrieval logic.
+                print("[DEBUG] Personal Memory Route Activated. Expanding query...")
+                # Expand the original query to include synonyms and rephrasings.
+                search_queries = [query]
+                try:
+                    expansion_prompt = QUERY_EXPANSION_PROMPT.format(question=query)
+                    expansion_messages = [{"role": "user", "content": expansion_prompt}]
+                    raw_expansion = call_llm(expansion_messages, temperature=0.0)
+                    expanded = json.loads(raw_expansion)
+                    if isinstance(expanded, list):
+                        search_queries.extend(expanded)
+                    print(f"[DEBUG] Expanded Search Queries: {search_queries}")
+                except Exception as e:
+                    print(f"[DEBUG] Query expansion failed: {e}")
+                # Perform a similarity search for EACH query variant.
+                initial_results = []
+                for q in search_queries:
+                    initial_results.extend(vs_personal.similarity_search_with_score(q, k=3))
+                initial_results = dedup_docs(initial_results)
+                initial_results.sort(key=lambda x: x[1])
+                # END new experiment
+                # Get all documents from the FAISS docstore
+                # Uncomment this line if we UNDO above experiment
+                # all_personal_docs = list(vs_personal.docstore._dict.values())
                 # 2. Filter this list to keep only text-based files
                 text_based_docs = []
                 text_extensions = ('.txt', '.jsonl')  # Define what counts as a text source
+                # ORIG: for doc in all_personal_docs:
+                for doc, score in initial_results:
                     source = doc.metadata.get("source", "").lower()
                     # if source.endswith(text_extensions):
                     # NEW: Include saved personal conversations
                     if source.endswith(text_extensions) or source == "saved chat":
+                        # ORIG: text_based_docs.append(doc)
+                        text_based_results.append((doc, score))
+                # Add the debug print to show the final, filtered results.
+                print("\n--- DEBUG: Filtered Personal Documents (Text-Only, with scores) ---")
+                if text_based_results:
+                    for doc, score in text_based_results:
+                        source = doc.metadata.get('source', 'N/A')
+                        print(f"  - Score: {score:.4f} | Source: {source}")
+                else:
+                    print("  - No relevant text-based personal documents found.")
+                print("---------------------------------------------------------------------\n")
                 # 3. Extend the final list with only the filtered, text-based documents
+                # Select the final 5 (parameter tuning) documents for the context.
+                final_personal_docs = [doc for doc, score in text_based_results[:5]]
+                all_retrieved_docs.extend(final_personal_docs)
+                # ORIG code
+                # all_retrieved_docs.extend(text_based_docs)
             # --- END OF MODIFICATION ---
         else: