Spaces:
Sleeping
Sleeping
Update alz_companion/agent.py
Browse files- alz_companion/agent.py +51 -6
alz_companion/agent.py
CHANGED
|
@@ -677,23 +677,68 @@ def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: boo
|
|
| 677 |
# This logic retrieves all documents from the personal FAISS store and then
|
| 678 |
# filters them to include ONLY text-based sources, excluding media files.
|
| 679 |
print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
|
| 680 |
-
|
|
|
|
| 681 |
if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
|
| 682 |
-
|
| 683 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
|
| 685 |
# 2. Filter this list to keep only text-based files
|
| 686 |
text_based_docs = []
|
| 687 |
text_extensions = ('.txt', '.jsonl') # Define what counts as a text source
|
| 688 |
-
for doc in all_personal_docs:
|
|
|
|
| 689 |
source = doc.metadata.get("source", "").lower()
|
| 690 |
# if source.endswith(text_extensions):
|
| 691 |
# NEW: Include saved personal conversations
|
| 692 |
if source.endswith(text_extensions) or source == "saved chat":
|
| 693 |
-
text_based_docs.append(doc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
# 3. Extend the final list with only the filtered, text-based documents
|
| 696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
# --- END OF MODIFICATION ---
|
| 698 |
|
| 699 |
else:
|
|
|
|
| 677 |
# This logic retrieves all documents from the personal FAISS store and then
|
| 678 |
# filters them to include ONLY text-based sources, excluding media files.
|
| 679 |
print("[DEBUG] Personal Memory Route Activated. Retrieving all personal text documents...")
|
| 680 |
+
|
| 681 |
+
# 1. check if the personal vector store is valid and has content.
|
| 682 |
if vs_personal and vs_personal.docstore and len(vs_personal.index_to_docstore_id) > 0:
|
| 683 |
+
|
| 684 |
+
## NEW Experiment
|
| 685 |
+
# 2. If it's valid, proceed with the upgraded retrieval logic.
|
| 686 |
+
print("[DEBUG] Personal Memory Route Activated. Expanding query...")
|
| 687 |
+
|
| 688 |
+
# Expand the original query to include synonyms and rephrasings.
|
| 689 |
+
search_queries = [query]
|
| 690 |
+
try:
|
| 691 |
+
expansion_prompt = QUERY_EXPANSION_PROMPT.format(question=query)
|
| 692 |
+
expansion_messages = [{"role": "user", "content": expansion_prompt}]
|
| 693 |
+
raw_expansion = call_llm(expansion_messages, temperature=0.0)
|
| 694 |
+
expanded = json.loads(raw_expansion)
|
| 695 |
+
if isinstance(expanded, list):
|
| 696 |
+
search_queries.extend(expanded)
|
| 697 |
+
print(f"[DEBUG] Expanded Search Queries: {search_queries}")
|
| 698 |
+
except Exception as e:
|
| 699 |
+
print(f"[DEBUG] Query expansion failed: {e}")
|
| 700 |
+
|
| 701 |
+
# Perform a similarity search for EACH query variant.
|
| 702 |
+
initial_results = []
|
| 703 |
+
for q in search_queries:
|
| 704 |
+
initial_results.extend(vs_personal.similarity_search_with_score(q, k=3))
|
| 705 |
+
|
| 706 |
+
initial_results = dedup_docs(initial_results)
|
| 707 |
+
initial_results.sort(key=lambda x: x[1])
|
| 708 |
+
# END new experiment
|
| 709 |
+
|
| 710 |
+
# Get all documents from the FAISS docstore
|
| 711 |
+
# Uncomment this line if we UNDO above experiment
|
| 712 |
+
# all_personal_docs = list(vs_personal.docstore._dict.values())
|
| 713 |
|
| 714 |
# 2. Filter this list to keep only text-based files
|
| 715 |
text_based_docs = []
|
| 716 |
text_extensions = ('.txt', '.jsonl') # Define what counts as a text source
|
| 717 |
+
# ORIG: for doc in all_personal_docs:
|
| 718 |
+
for doc, score in initial_results:
|
| 719 |
source = doc.metadata.get("source", "").lower()
|
| 720 |
# if source.endswith(text_extensions):
|
| 721 |
# NEW: Include saved personal conversations
|
| 722 |
if source.endswith(text_extensions) or source == "saved chat":
|
| 723 |
+
# ORIG: text_based_docs.append(doc)
|
| 724 |
+
text_based_results.append((doc, score))
|
| 725 |
+
|
| 726 |
+
# Add the debug print to show the final, filtered results.
|
| 727 |
+
print("\n--- DEBUG: Filtered Personal Documents (Text-Only, with scores) ---")
|
| 728 |
+
if text_based_results:
|
| 729 |
+
for doc, score in text_based_results:
|
| 730 |
+
source = doc.metadata.get('source', 'N/A')
|
| 731 |
+
print(f" - Score: {score:.4f} | Source: {source}")
|
| 732 |
+
else:
|
| 733 |
+
print(" - No relevant text-based personal documents found.")
|
| 734 |
+
print("---------------------------------------------------------------------\n")
|
| 735 |
|
| 736 |
# 3. Extend the final list with only the filtered, text-based documents
|
| 737 |
+
# Select the final 5 (parameter tuning) documents for the context.
|
| 738 |
+
final_personal_docs = [doc for doc, score in text_based_results[:5]]
|
| 739 |
+
all_retrieved_docs.extend(final_personal_docs)
|
| 740 |
+
# ORIG code
|
| 741 |
+
# all_retrieved_docs.extend(text_based_docs)
|
| 742 |
# --- END OF MODIFICATION ---
|
| 743 |
|
| 744 |
else:
|