KeenWoo commited on
Commit
41d9a09
·
verified ·
1 Parent(s): 47ca460

Update alz_companion/agent.py

Browse files
Files changed (1) hide show
  1. alz_companion/agent.py +51 -6
alz_companion/agent.py CHANGED
@@ -677,23 +677,68 @@ def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: boo
677
  # This logic retrieves all documents from the personal FAISS store and then
678
  # filters them to include ONLY text-based sources, excluding media files.
679
  print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
680
-
 
681
  if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
682
- # 1. Get all documents from the FAISS docstore
683
- all_personal_docs = list(vs_personal.docstore._dict.values())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
 
685
  # 2. Filter this list to keep only text-based files
686
  text_based_docs = []
687
  text_extensions = ('.txt', '.jsonl') # Define what counts as a text source
688
- for doc in all_personal_docs:
 
689
  source = doc.metadata.get("source", "").lower()
690
  # if source.endswith(text_extensions):
691
  # NEW: Include saved personal conversations
692
  if source.endswith(text_extensions) or source == "saved chat":
693
- text_based_docs.append(doc)
 
 
 
 
 
 
 
 
 
 
 
694
 
695
  # 3. Extend the final list with only the filtered, text-based documents
696
- all_retrieved_docs.extend(text_based_docs)
 
 
 
 
697
  # --- END OF MODIFICATION ---
698
 
699
  else:
 
677
  # This logic retrieves all documents from the personal FAISS store and then
678
  # filters them to include ONLY text-based sources, excluding media files.
679
  print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
680
+
681
+ # 1. check if the personal vector store is valid and has content.
682
  if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
683
+
684
+ ## NEW Experiment
685
+ # 2. If it's valid, proceed with the upgraded retrieval logic.
686
+ print("[DEBUG] Personal Memory Route Activated. Expanding query...")
687
+
688
+ # Expand the original query to include synonyms and rephrasings.
689
+ search_queries = [query]
690
+ try:
691
+ expansion_prompt = QUERY_EXPANSION_PROMPT.format(question=query)
692
+ expansion_messages = [{"role": "user", "content": expansion_prompt}]
693
+ raw_expansion = call_llm(expansion_messages, temperature=0.0)
694
+ expanded = json.loads(raw_expansion)
695
+ if isinstance(expanded, list):
696
+ search_queries.extend(expanded)
697
+ print(f"[DEBUG] Expanded Search Queries: {search_queries}")
698
+ except Exception as e:
699
+ print(f"[DEBUG] Query expansion failed: {e}")
700
+
701
+ # Perform a similarity search for EACH query variant.
702
+ initial_results = []
703
+ for q in search_queries:
704
+ initial_results.extend(vs_personal.similarity_search_with_score(q, k=3))
705
+
706
+ initial_results = dedup_docs(initial_results)
707
+ initial_results.sort(key=lambda x: x[1])
708
+ # END new experiment
709
+
710
+ # Get all documents from the FAISS docstore
711
+ # Uncomment this line if we UNDO above experiment
712
+ # all_personal_docs = list(vs_personal.docstore._dict.values())
713
 
714
  # 2. Filter this list to keep only text-based files
715
  text_based_docs = []
716
  text_extensions = ('.txt', '.jsonl') # Define what counts as a text source
717
+ # ORIG: for doc in all_personal_docs:
718
+ for doc, score in initial_results:
719
  source = doc.metadata.get("source", "").lower()
720
  # if source.endswith(text_extensions):
721
  # NEW: Include saved personal conversations
722
  if source.endswith(text_extensions) or source == "saved chat":
723
+ # ORIG: text_based_docs.append(doc)
724
+ text_based_results.append((doc, score))
725
+
726
+ # Add the debug print to show the final, filtered results.
727
+ print("\n--- DEBUG: Filtered Personal Documents (Text-Only, with scores) ---")
728
+ if text_based_results:
729
+ for doc, score in text_based_results:
730
+ source = doc.metadata.get('source', 'N/A')
731
+ print(f" - Score: {score:.4f} | Source: {source}")
732
+ else:
733
+ print(" - No relevant text-based personal documents found.")
734
+ print("---------------------------------------------------------------------\n")
735
 
736
  # 3. Extend the final list with only the filtered, text-based documents
737
+ # Select the final 5 (parameter tuning) documents for the context.
738
+ final_personal_docs = [doc for doc, score in text_based_results[:5]]
739
+ all_retrieved_docs.extend(final_personal_docs)
740
+ # ORIG code
741
+ # all_retrieved_docs.extend(text_based_docs)
742
  # --- END OF MODIFICATION ---
743
 
744
  else: