KeenWoo commited on
Commit
be01e15
Β·
verified Β·
1 Parent(s): 9e6c5a6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +1034 -0
  2. evaluate.py +405 -0
app.py ADDED
@@ -0,0 +1,1034 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ import gradio as gr
5
+ import tempfile
6
+ from datetime import datetime
7
+ from typing import List, Dict, Any, Optional
8
+ from pytube import YouTube
9
+ from pathlib import Path
10
+ import re
11
+ import pandas as pd
12
+
13
+ # --- Agent Imports ---
14
+ try:
15
+ from alz_companion.agent import (
16
+ bootstrap_vectorstore, make_rag_chain, answer_query, synthesize_tts,
17
+ transcribe_audio, detect_tags_from_query, describe_image, build_or_load_vectorstore,
18
+ _default_embeddings, route_query_type, call_llm
19
+ )
20
+ from alz_companion.prompts import (
21
+ BEHAVIOUR_TAGS, EMOTION_STYLES, FAITHFULNESS_JUDGE_PROMPT
22
+ )
23
+ from langchain.schema import Document
24
+ from langchain_community.vectorstores import FAISS
25
+ AGENT_OK = True
26
+ except Exception as e:
27
+ AGENT_OK = False
28
+ class Document:
29
+ def __init__(self, page_content, metadata): self.page_content, self.metadata = page_content, metadata
30
+ class FAISS:
31
+ def __init__(self):
32
+ self.docstore = type('obj', (object,), {'_dict': {}})()
33
+ def add_documents(self, docs):
34
+ start_idx = len(self.docstore._dict)
35
+ for i, d in enumerate(docs, start_idx):
36
+ self.docstore._dict[i] = d
37
+ def save_local(self, path): pass
38
+ @classmethod
39
+ def from_documents(cls, docs, embeddings=None):
40
+ inst = cls()
41
+ inst.add_documents(docs)
42
+ return inst
43
+ def build_or_load_vectorstore(docs, index_path, is_personal=False): return FAISS.from_documents(docs or [], embeddings=None)
44
+ def bootstrap_vectorstore(sample_paths=None, index_path="data/"): return object()
45
+ def make_rag_chain(vs_general, vs_personal, **kwargs): return lambda q, **k: {"answer": f"(Demo) You asked: {q}", "sources": []}
46
+ def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
47
+ def synthesize_tts(text: str, lang: str = "en"): return None
48
+ def transcribe_audio(filepath: str, lang: str = "en"): return "This is a transcribed message."
49
+ def detect_tags_from_query(*args, **kwargs): return {"detected_behavior": "None", "detected_emotion": "None"}
50
+ def describe_image(image_path: str): return "This is a description of an image."
51
+ def _default_embeddings(): return None
52
+ def route_query_type(query: str): return "general_conversation"
53
+ def call_llm(messages, **kwargs): return "Cannot call LLM in fallback mode."
54
+ BEHAVIOUR_TAGS, EMOTION_STYLES, FAITHFULNESS_JUDGE_PROMPT = {"None": []}, {"None": {}}, ""
55
+ print(f"WARNING: Could not import from alz_companion ({e}). Running in UI-only demo mode.")
56
+
57
+
58
+ # --- NEW: Import for Evaluation Logic ---
59
+ try:
60
+ from evaluate import load_test_fixtures, run_comprehensive_evaluation
61
+ except ImportError:
62
+ # Fallback if evaluate.py is not found
63
+ def load_test_fixtures(): print("WARNING: evaluate.py not found.")
64
+ def run_comprehensive_evaluation(*args, **kwargs): return "Evaluation module not found.", []
65
+
66
+
67
+ # --- Centralized Configuration ---
68
+ CONFIG = {
69
+ "themes": ["All", "The Father", "Still Alice", "Away from Her", "Alive Inside", "General Caregiving"],
70
+ "roles": ["patient", "caregiver"],
71
+ "disease_stages": ["Default: Mild Stage", "Moderate Stage", "Advanced Stage"],
72
+ "behavior_tags": ["None"] + list(BEHAVIOUR_TAGS.keys()),
73
+ "emotion_tags": ["None"] + list(EMOTION_STYLES.keys()),
74
+ "topic_tags": ["None", "caregiving_advice", "medical_fact", "personal_story", "research_update", "treatment_option:home_safety", "treatment_option:long_term_care", "treatment_option:music_therapy", "treatment_option:reassurance", "treatment_option:routine_structuring", "treatment_option:validation_therapy"],
75
+ "context_tags": ["None", "disease_stage_mild", "disease_stage_moderate", "disease_stage_advanced", "disease_stage_unspecified", "interaction_mode_one_to_one", "interaction_mode_small_group", "interaction_mode_group_activity", "relationship_family", "relationship_spouse", "relationship_staff_or_caregiver", "relationship_unspecified", "setting_home_or_community", "setting_care_home", "setting_clinic_or_hospital"],
76
+ "languages": {"English": "en", "Chinese": "zh", "Cantonese": "zh-yue", "Korean": "ko", "Japanese": "ja", "Malay": "ms", "French": "fr", "Spanish": "es", "Hindi": "hi", "Arabic": "ar"},
77
+ "tones": ["warm", "empathetic", "caring", "reassuring", "calm", "optimistic", "motivating", "neutral", "formal", "humorous"],
78
+ # --- ADD THIS NEW KEY AND LIST ---
79
+ "music_moods": [
80
+ "Confusion or Disorientation",
81
+ "Reminiscence and Connection",
82
+ "Sundowning or Restlessness",
83
+ "Sadness or Longing",
84
+ "Anxiety or Fear",
85
+ "Agitation or Anger",
86
+ "Joy or Affection"
87
+ ]
88
+ # --- END OF ADDITION ---
89
+ }
90
+
91
+ # --- File Management & Vector Store Logic ---
92
+ def _storage_root() -> Path:
93
+ for p in [Path(os.getenv("SPACE_STORAGE", "")), Path("/data"), Path.home() / ".cache" / "alz_companion"]:
94
+ if not p: continue
95
+ try:
96
+ p.mkdir(parents=True, exist_ok=True)
97
+ (p / ".write_test").write_text("ok")
98
+ (p / ".write_test").unlink(missing_ok=True)
99
+ return p
100
+ except Exception: continue
101
+ tmp = Path(tempfile.gettempdir()) / "alz_companion"
102
+ tmp.mkdir(parents=True, exist_ok=True)
103
+ return tmp
104
+
105
+ STORAGE_ROOT = _storage_root()
106
+ INDEX_BASE = STORAGE_ROOT / "index"
107
+ # --- NEW: Define path for the auto-loading folder ---
108
+ PERSISTENT_MEMORY_PATH = Path(__file__).parent / "Personal Memory Bank"
109
+ # --- END NEW ---
110
+ PERSONAL_DATA_BASE = STORAGE_ROOT / "personal"
111
+ UPLOADS_BASE = INDEX_BASE / "uploads"
112
+ PERSONAL_INDEX_PATH = str(PERSONAL_DATA_BASE / "personal_faiss_index")
113
+ NLU_EXAMPLES_INDEX_PATH = str(INDEX_BASE / "nlu_examples_faiss_index")
114
+ THEME_PATHS = {t: str(INDEX_BASE / f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
115
+ os.makedirs(UPLOADS_BASE, exist_ok=True)
116
+ os.makedirs(PERSONAL_DATA_BASE, exist_ok=True)
117
+ # --- NEW: Create the folders on startup if it does not exist ---
118
+ os.makedirs(PERSISTENT_MEMORY_PATH, exist_ok=True)
119
+ # --- END NEW ---
120
+
121
+
122
+ for p in THEME_PATHS.values(): os.makedirs(p, exist_ok=True)
123
+ vectorstores = {}
124
+ personal_vectorstore = None
125
+ nlu_vectorstore = None
126
+
127
+ try:
128
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
129
+ except Exception:
130
+ personal_vectorstore = None
131
+ def bootstrap_nlu_vectorstore(example_file: str, index_path: str) -> FAISS:
132
+ if not os.path.exists(example_file):
133
+ print(f"WARNING: NLU example file not found at {example_file}. NLU will be less accurate.")
134
+ return build_or_load_vectorstore([], index_path)
135
+ docs = []
136
+ with open(example_file, "r", encoding="utf-8") as f:
137
+ for line in f:
138
+ try:
139
+ data = json.loads(line)
140
+ doc = Document(page_content=data["query"], metadata=data)
141
+ docs.append(doc)
142
+ except (json.JSONDecodeError, KeyError): continue
143
+ print(f"Found and loaded {len(docs)} NLU training examples.")
144
+ if os.path.exists(index_path): shutil.rmtree(index_path)
145
+ return build_or_load_vectorstore(docs, index_path)
146
+
147
+
148
+ # In app.py, near the other path definitions
149
+ PERSONAL_MUSIC_BASE = PERSONAL_DATA_BASE / "music"
150
+ os.makedirs(PERSONAL_MUSIC_BASE, exist_ok=True)
151
+
152
+ # In app.py, replace your existing versions of these three functions with the code below.
153
+ # --- Function 1: Auto-loads non-music memories from the 'Personal Memory Bank' folder ---
154
+ def load_personal_files_from_folder():
155
+ """
156
+ Scans the 'Personal Memory Bank' folder and loads new multi-modal files
157
+ (text, audio, video, images) into the personal vectorstore.
158
+ """
159
+ global personal_vectorstore
160
+ print("Scanning 'Personal Memory Bank' folder for new files...")
161
+ if not os.path.exists(PERSISTENT_MEMORY_PATH):
162
+ return
163
+
164
+ # Define supported file extensions
165
+ TEXT_EXTENSIONS = (".txt",)
166
+ AUDIO_EXTENSIONS = (".mp3", ".wav", ".m4a", ".flac")
167
+ VIDEO_EXTENSIONS = (".mp4", ".mov", ".avi", ".mkv")
168
+ IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".bmp")
169
+
170
+ # Get a list of sources already in the vectorstore to avoid re-processing files
171
+ existing_sources = set()
172
+ if personal_vectorstore and hasattr(personal_vectorstore.docstore, '_dict'):
173
+ for doc in personal_vectorstore.docstore._dict.values():
174
+ existing_sources.add(doc.metadata.get("source"))
175
+
176
+ docs_to_add = []
177
+ for filename in os.listdir(PERSISTENT_MEMORY_PATH):
178
+ if filename in existing_sources:
179
+ continue
180
+
181
+ filepath = PERSISTENT_MEMORY_PATH / filename
182
+ content_to_process = ""
183
+
184
+ file_lower = filename.lower()
185
+
186
+ if file_lower.endswith(TEXT_EXTENSIONS):
187
+ print(f" - Found new text file to load: {filename}")
188
+ with open(filepath, "r", encoding="utf-8") as f:
189
+ content_to_process = f.read()
190
+
191
+ elif file_lower.endswith(AUDIO_EXTENSIONS) or file_lower.endswith(VIDEO_EXTENSIONS):
192
+ media_type = "Audio" if file_lower.endswith(AUDIO_EXTENSIONS) else "Video"
193
+ print(f" - Found new {media_type} file to transcribe: {filename}")
194
+ try:
195
+ transcribed_text = transcribe_audio(str(filepath))
196
+ title = os.path.splitext(filename)[0].replace('_', ' ').replace('-', ' ')
197
+ content_to_process = f"Title: {title}\n\nContent: {transcribed_text}"
198
+ except Exception as e:
199
+ print(f" - ERROR: Failed to transcribe {filename}. Reason: {e}")
200
+ continue
201
+
202
+ elif file_lower.endswith(IMAGE_EXTENSIONS):
203
+ print(f" - Found new Image file to describe: {filename}")
204
+ try:
205
+ description = describe_image(str(filepath))
206
+ title = os.path.splitext(filename)[0].replace('_', ' ').replace('-', ' ')
207
+ content_to_process = f"Title: {title}\n\nContent: {description}"
208
+ except Exception as e:
209
+ print(f" - ERROR: Failed to describe {filename}. Reason: {e}")
210
+ continue
211
+
212
+ if content_to_process:
213
+ docs_to_add.extend(parse_and_tag_entries(content_to_process, source=filename, settings={}))
214
+
215
+ if docs_to_add:
216
+ if personal_vectorstore is None:
217
+ personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
218
+ else:
219
+ personal_vectorstore.add_documents(docs_to_add)
220
+
221
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
222
+ print(f"Successfully added {len(docs_to_add)} new document(s) from the folder.")
223
+
224
+
225
+ # --- Function 2: Auto-syncs music from the 'Music Library' folder (Hybrid Approach) ---
226
+ def sync_music_library_from_folder():
227
+ """Scans 'Music Library' folder, syncs manifest for playback, and adds lyrics to vectorstore."""
228
+ global personal_vectorstore
229
+ music_library_path = PERSISTENT_MEMORY_PATH / "Music Library"
230
+ os.makedirs(music_library_path, exist_ok=True)
231
+
232
+ manifest_path = PERSONAL_MUSIC_BASE / "music_manifest.json"
233
+ manifest = {}
234
+ if manifest_path.exists():
235
+ with open(manifest_path, "r") as f: manifest = json.load(f)
236
+
237
+ existing_sources = set()
238
+ if personal_vectorstore and hasattr(personal_vectorstore.docstore, '_dict'):
239
+ for doc in personal_vectorstore.docstore._dict.values():
240
+ existing_sources.add(doc.metadata.get("source"))
241
+
242
+ print("Scanning 'Music Library' folder for new songs...")
243
+ filename_pattern = re.compile(r'^(.*?) - (.*?) - (.*?)\.(mp3|wav|m4a|ogg|flac)$', re.IGNORECASE)
244
+
245
+ synced_count = 0
246
+ docs_to_add = []
247
+ for filename in os.listdir(music_library_path):
248
+ song_id = filename.replace(" ", "_").lower()
249
+ if song_id in manifest and filename in existing_sources:
250
+ continue
251
+
252
+ match = filename_pattern.match(filename)
253
+ if match:
254
+ print(f" - Found new song to sync: {filename}")
255
+ title, artist, tag = match.groups()[:3]
256
+
257
+ source_path = music_library_path / filename
258
+ dest_path = PERSONAL_MUSIC_BASE / filename
259
+ if not os.path.exists(dest_path):
260
+ shutil.copy2(str(source_path), str(dest_path))
261
+
262
+ # Add to manifest for playback system
263
+ song_metadata = {"title": title.strip(), "artist": artist.strip(), "moods": [tag.strip().lower()], "filepath": str(dest_path)}
264
+ manifest[song_id] = song_metadata
265
+
266
+ # --- NEW HYBRID LOGIC: Transcribe and prep for vectorstore ---
267
+ # Transcribe and prep for semantic memory system (vectorstore)
268
+ if filename not in existing_sources:
269
+ try:
270
+ print(f" - Transcribing '{title}' for memory bank...")
271
+ lyrics = transcribe_audio(str(dest_path))
272
+ content_for_rag = (
273
+ f"Title: Song - {song_metadata['title']}\n"
274
+ f"Artist: {song_metadata['artist']}\n"
275
+ f"Moods: {', '.join(song_metadata['moods'])}\n\n"
276
+ f"Lyrics:\n{lyrics}"
277
+ )
278
+ docs_to_add.extend(parse_and_tag_entries(content_for_rag, source=filename, settings={}))
279
+ except Exception as e:
280
+ print(f" - WARNING: Failed to transcribe {filename} for memory bank. Error: {e}")
281
+ # --- END OF NEW HYBRID LOGIC ---
282
+ synced_count += 1
283
+
284
+ if synced_count > 0:
285
+ with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2)
286
+ print(f"Successfully synced {synced_count} new song(s) to the music manifest.")
287
+
288
+ if docs_to_add:
289
+ if personal_vectorstore is None:
290
+ personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
291
+ else:
292
+ personal_vectorstore.add_documents(docs_to_add)
293
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
294
+ print(f"Successfully added lyrics for {len(docs_to_add)} song(s) to the personal vectorstore.")
295
+
296
+
297
+ def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
298
+ def theme_upload_dir(theme: str) -> str:
299
+ p = UPLOADS_BASE / f"theme_{canonical_theme(theme).replace(' ', '').lower()}"
300
+ p.mkdir(exist_ok=True)
301
+ return str(p)
302
+ def load_manifest(theme: str) -> Dict[str, Any]:
303
+ p = os.path.join(theme_upload_dir(theme), "manifest.json")
304
+ if os.path.exists(p):
305
+ try:
306
+ with open(p, "r", encoding="utf-8") as f: return json.load(f)
307
+ except Exception: pass
308
+ return {"files": {}}
309
+ def save_manifest(theme: str, man: Dict[str, Any]):
310
+ with open(os.path.join(theme_upload_dir(theme), "manifest.json"), "w", encoding="utf-8") as f: json.dump(man, f, indent=2)
311
+ def list_theme_files(theme: str) -> List[tuple[str, bool]]:
312
+ man = load_manifest(theme)
313
+ base = theme_upload_dir(theme)
314
+ found = [(n, bool(e)) for n, e in man.get("files", {}).items() if os.path.exists(os.path.join(base, n))]
315
+ existing = {n for n, e in found}
316
+ for name in sorted(os.listdir(base)):
317
+ if name not in existing and os.path.isfile(os.path.join(base, name)): found.append((name, False))
318
+ man["files"] = dict(found)
319
+ save_manifest(theme, man)
320
+ return found
321
+ def copy_into_theme(theme: str, src_path: str) -> str:
322
+ fname = os.path.basename(src_path)
323
+ dest = os.path.join(theme_upload_dir(theme), fname)
324
+ shutil.copy2(src_path, dest)
325
+ return dest
326
+ def seed_files_into_theme(theme: str):
327
+ SEED_FILES = [("sample_data/caregiving_tips.txt", True), ("sample_data/the_father_segments_enriched_harmonized_plus.jsonl", True), ("sample_data/still_alice_enriched_harmonized_plus.jsonl", True), ("sample_data/away_from_her_enriched_harmonized_plus.jsonl", True), ("sample_data/alive_inside_enriched_harmonized.jsonl", True)]
328
+ man, changed = load_manifest(theme), False
329
+ for path, enable in SEED_FILES:
330
+ if not os.path.exists(path): continue
331
+ fname = os.path.basename(path)
332
+ if not os.path.exists(os.path.join(theme_upload_dir(theme), fname)):
333
+ copy_into_theme(theme, path)
334
+ man["files"][fname] = bool(enable)
335
+ changed = True
336
+ if changed: save_manifest(theme, man)
337
+ def ensure_index(theme='All'):
338
+ theme = canonical_theme(theme)
339
+ if theme in vectorstores: return vectorstores[theme]
340
+ upload_dir = theme_upload_dir(theme)
341
+ enabled_files = [os.path.join(upload_dir, n) for n, enabled in list_theme_files(theme) if enabled]
342
+ index_path = THEME_PATHS.get(theme)
343
+ vectorstores[theme] = bootstrap_vectorstore(sample_paths=enabled_files, index_path=index_path)
344
+ return vectorstores[theme]
345
+
346
+ # --- Gradio Callbacks ---
347
+ # In app.py, modify the collect_settings function
348
+
349
+ def collect_settings(*args):
350
+ keys = ["role", "patient_name", "caregiver_name", "tone", "language", "tts_lang", "temperature",
351
+ # --- ADD "disease_stage" to this list ---
352
+ "disease_stage",
353
+ "behaviour_tag", "emotion_tag", "topic_tag", "active_theme", "tts_on", "debug_mode"]
354
+ return dict(zip(keys, args))
355
+
356
+
357
+ # In app.py, replace the entire parse_and_tag_entries function.
358
+ def parse_and_tag_entries(text_content: str, source: str, settings: dict = None) -> List[Document]:
359
+ docs_to_add = []
360
+ # This logic correctly handles both simple text and complex journal entries
361
+ entries = re.split(r'\n(?:---|--|-|-\*-|-\.-)\n', text_content)
362
+ if len(entries) == 1 and "title:" not in entries[0].lower() and "content:" not in entries[0].lower():
363
+ entries = [text_content] # Treat simple text as a single entry
364
+
365
+ for entry in entries:
366
+ if not entry.strip(): continue
367
+
368
+ lines = entry.strip().split('\n')
369
+ title_line = lines[0].split(':', 1)
370
+ title = title_line[1].strip() if len(title_line) > 1 and "title:" in lines[0].lower() else "Untitled Text Entry"
371
+ content_part = "\n".join(lines[1:])
372
+ content = content_part.split(':', 1)[1].strip() if "content:" in content_part.lower() else content_part.strip() or entry.strip()
373
+
374
+ if not content: continue
375
+
376
+ full_content = f"Title: {title}\n\nContent: {content}"
377
+
378
+ detected_tags = detect_tags_from_query(
379
+ content, nlu_vectorstore=nlu_vectorstore,
380
+ behavior_options=CONFIG["behavior_tags"], emotion_options=CONFIG["emotion_tags"],
381
+ topic_options=CONFIG["topic_tags"], context_options=CONFIG["context_tags"],
382
+ settings=settings
383
+ )
384
+
385
+ metadata = {"source": source, "title": title}
386
+
387
+ # --- START: CORRECTED METADATA ASSIGNMENT ---
388
+ if detected_tags.get("detected_behaviors"):
389
+ metadata["behaviors"] = [b.lower() for b in detected_tags["detected_behaviors"]]
390
+ detected_emotion = detected_tags.get("detected_emotion")
391
+ if detected_emotion and detected_emotion != "None":
392
+ metadata["emotion"] = detected_emotion.lower()
393
+
394
+ # Correctly handle the plural "detected_topics" key and list value
395
+ detected_topics = detected_tags.get("detected_topics")
396
+ if detected_topics:
397
+ metadata["topic_tags"] = [t.lower() for t in detected_topics]
398
+
399
+ if detected_tags.get("detected_contexts"):
400
+ metadata["context_tags"] = [c.lower() for c in detected_tags["detected_contexts"]]
401
+ # --- END: CORRECTED METADATA ASSIGNMENT ---
402
+
403
+ docs_to_add.append(Document(page_content=full_content, metadata=metadata))
404
+
405
+ return docs_to_add
406
+
407
+
408
+ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, settings):
409
+ global personal_vectorstore
410
+ docs_to_add = []
411
+ source, content = "Unknown", ""
412
+ if text_input and text_input.strip():
413
+ source, content = "Text Input", f"Title: {title or 'Untitled'}\n\nContent: {text_input}"
414
+ elif file_input:
415
+ source = os.path.basename(file_input.name)
416
+ if file_input.name.lower().endswith('.txt'):
417
+ with open(file_input.name, 'r', encoding='utf-8') as f: content = f.read()
418
+ else:
419
+ transcribed = transcribe_audio(file_input.name)
420
+ content = f"Title: {title or 'Audio/Video Note'}\n\nContent: {transcribed}"
421
+ elif image_input:
422
+ source, description = "Image Input", describe_image(image_input)
423
+ content = f"Title: {title or 'Image Note'}\n\nContent: {description}"
424
+ elif yt_url and ("youtube.com" in yt_url or "youtu.be" in yt_url):
425
+ try:
426
+ yt = YouTube(yt_url)
427
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
428
+ yt.streams.get_audio_only().download(filename=temp_audio_file.name)
429
+ transcribed = transcribe_audio(temp_audio_file.name)
430
+ os.remove(temp_audio_file.name)
431
+ source, content = f"YouTube: {yt.title}", f"Title: {title or yt.title}\n\nContent: {transcribed}"
432
+ except Exception as e:
433
+ return f"Error processing YouTube link: {e}"
434
+ else:
435
+ return "Please provide content to add."
436
+ if content:
437
+ docs_to_add = parse_and_tag_entries(content, source, settings=settings)
438
+ if not docs_to_add: return "No processable content found to add."
439
+ if personal_vectorstore is None:
440
+ personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
441
+ else:
442
+ personal_vectorstore.add_documents(docs_to_add)
443
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
444
+ return f"Successfully added {len(docs_to_add)} new memory/memories."
445
+
446
+
447
+ # In app.py, add this new handler function
448
+
449
+ def handle_add_music(file, title, artist, mood):
450
+ if not all([file, title, artist, mood]):
451
+ return "Please fill out all fields."
452
+
453
+ # Save the audio file
454
+ filename = os.path.basename(file.name)
455
+ dest_path = PERSONAL_MUSIC_BASE / filename
456
+ shutil.copy2(file.name, str(dest_path))
457
+
458
+ # Save the metadata to a manifest file
459
+ manifest_path = PERSONAL_MUSIC_BASE / "music_manifest.json"
460
+ manifest = {}
461
+ if manifest_path.exists():
462
+ with open(manifest_path, "r") as f:
463
+ manifest = json.load(f)
464
+
465
+ song_id = filename.replace(" ", "_").lower()
466
+ manifest[song_id] = {
467
+ "title": title.strip(),
468
+ "artist": artist.strip(),
469
+ # "moods": [m.strip().lower() for m in mood.split(",")],
470
+ "moods": [m.lower() for m in mood], # Correctly handles the list from the dropdown
471
+ "filepath": str(dest_path) # Store the full path for backend access
472
+ }
473
+
474
+ with open(manifest_path, "w") as f:
475
+ json.dump(manifest, f, indent=2)
476
+
477
+ return f"Successfully added '{title}' to the music library."
478
+
479
+ # In app.py, add these two new functions (e.g., after the handle_add_music function)
480
+
481
+ def list_music_library():
482
+ """Loads the music manifest and formats it for the Gradio UI."""
483
+ manifest_path = PERSONAL_MUSIC_BASE / "music_manifest.json"
484
+ if not manifest_path.exists():
485
+ return gr.update(value=[["Library is empty", "", ""]]), gr.update(choices=[], value=None)
486
+
487
+ with open(manifest_path, "r") as f:
488
+ manifest = json.load(f)
489
+
490
+ if not manifest:
491
+ return gr.update(value=[["Library is empty", "", ""]]), gr.update(choices=[], value=None)
492
+
493
+ display_data = [[data['title'], data['artist'], ", ".join(data['moods'])] for data in manifest.values()]
494
+
495
+ # Use the song's unique ID (the key in the manifest) for the delete dropdown
496
+ delete_choices = list(manifest.keys())
497
+
498
+ return gr.update(value=display_data), gr.update(choices=delete_choices, value=None)
499
+
500
+ def delete_music_from_library(song_id_to_delete):
501
+ """Deletes a song from the manifest, the audio file, and the vectorstore."""
502
+ global personal_vectorstore
503
+ if not song_id_to_delete:
504
+ return "No music selected to delete."
505
+
506
+ # 1. Remove from manifest and delete audio file
507
+ manifest_path = PERSONAL_MUSIC_BASE / "music_manifest.json"
508
+ if not manifest_path.exists(): return "Error: Music manifest not found."
509
+
510
+ with open(manifest_path, "r") as f: manifest = json.load(f)
511
+
512
+ song_to_delete = manifest.pop(song_id_to_delete, None)
513
+ if not song_to_delete: return f"Error: Could not find song ID {song_id_to_delete} in manifest."
514
+
515
+ with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2)
516
+
517
+ try:
518
+ os.remove(song_to_delete['filepath'])
519
+ except OSError as e:
520
+ print(f"Error deleting audio file {song_to_delete['filepath']}: {e}")
521
+
522
+ # 2. Remove lyrics from the personal vectorstore
523
+ if personal_vectorstore and hasattr(personal_vectorstore.docstore, '_dict'):
524
+ filename_to_delete = os.path.basename(song_to_delete['filepath'])
525
+ all_docs = list(personal_vectorstore.docstore._dict.values())
526
+
527
+ # Find the document whose source matches the audio filename
528
+ docs_to_keep = [d for d in all_docs if d.metadata.get("source") != filename_to_delete]
529
+
530
+ if len(all_docs) > len(docs_to_keep):
531
+ if not docs_to_keep: # If it was the last doc
532
+ if os.path.isdir(PERSONAL_INDEX_PATH): shutil.rmtree(PERSONAL_INDEX_PATH)
533
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
534
+ else:
535
+ new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
536
+ new_vs.save_local(PERSONAL_INDEX_PATH)
537
+ personal_vectorstore = new_vs
538
+ return f"Successfully deleted '{song_to_delete['title']}' from the library and memory bank."
539
+
540
+ return f"Successfully deleted '{song_to_delete['title']}' from the music library."
541
+
542
+
543
+ def chat_fn(user_text, audio_file, settings, chat_history):
544
+
545
+ # --- ADD THIS DEBUG BLOCK AT THE TOP ---
546
+ print("\n" + "="*50)
547
+ print(f"[DEBUG app.py] chat_fn received settings: {settings}")
548
+ print("="*50 + "\n")
549
+ # --- END OF ADDITION ---
550
+
551
+ global personal_vectorstore
552
+ question = (user_text or "").strip()
553
+ if audio_file and not question:
554
+ try:
555
+ question = transcribe_audio(audio_file, lang=CONFIG["languages"].get(settings.get("tts_lang", "English"), "en"))
556
+ except Exception as e:
557
+ err_msg = f"Audio Error: {e}" if settings.get("debug_mode") else "Sorry, I couldn't understand the audio."
558
+ chat_history.append({"role": "assistant", "content": err_msg})
559
+ return "", None, chat_history
560
+
561
+ if not question:
562
+ return "", None, chat_history
563
+
564
+ # --- START FIX 1: Correctly process the incoming chat_history (list of dicts) ---
565
+ # The incoming chat_history is already in the desired format for the API,
566
+ # we just need to filter out our special system messages (like sources).
567
+ api_chat_history = [
568
+ msg for msg in chat_history
569
+ if msg.get("content") and not msg["content"].strip().startswith("*(")
570
+ ]
571
+
572
+ # Append the new user question to the history that will be displayed in the UI
573
+ chat_history.append({"role": "user", "content": question})
574
+ # --- END FIX 1 ---
575
+
576
+ # NEW
577
+ query_type = route_query_type(question, severity=settings.get("disease_stage", "Default: Mild Stage"))
578
+ # query_type = route_query_type(question)
579
+ # --- ADD THIS DEBUG PRINT ---
580
+ print(f"[DEBUG] Router classified query as: {query_type}")
581
+ # --- END OF ADDITION ---
582
+
583
+
584
+ final_tags = { "scenario_tag": None, "emotion_tag": None, "topic_tag": None, "context_tags": [] }
585
+ manual_behavior = settings.get("behaviour_tag", "None")
586
+ manual_emotion = settings.get("emotion_tag", "None")
587
+ manual_topic = settings.get("topic_tag", "None")
588
+
589
+ auto_detected_context = ""
590
+ if not all(m == "None" for m in [manual_behavior, manual_emotion, manual_topic]):
591
+ # --- ADD THIS DEBUG PRINT ---
592
+ print(f"[DEBUG app.py] Manual override DETECTED. Behavior='{manual_behavior}', Emotion='{manual_emotion}', Topic='{manual_topic}'")
593
+ # --- END OF ADDITION ---
594
+
595
+ final_tags["scenario_tag"] = manual_behavior if manual_behavior != "None" else None
596
+ final_tags["emotion_tag"] = manual_emotion if manual_emotion != "None" else None
597
+ final_tags["topic_tag"] = manual_topic if manual_topic != "None" else None
598
+
599
+ # NEW: Expand detecting emotions and behaviors for caregiving to music playing
600
+ # whenever a request to play music, the system will first analyze their query to detect an underlying emotion or behavior
601
+ elif "caregiving_scenario" in query_type or "play_music_request" in query_type:
602
+
603
+ # --- NEW DEBUG BLOCK: Print inputs before calling NLU ---
604
+ print("\n--- [DEBUG app.py] Preparing to call NLU ---")
605
+ print(f" - Query to Analyze: '{question}'")
606
+ print(f" - NLU Vectorstore Loaded: {nlu_vectorstore is not None}")
607
+ print(f" - Current Settings Passed: {settings}")
608
+ print("------------------------------------------")
609
+ # --- END OF NEW DEBUG BLOCK ---
610
+
611
+ detected_tags = detect_tags_from_query(
612
+ question, nlu_vectorstore=nlu_vectorstore, behavior_options=CONFIG["behavior_tags"],
613
+ emotion_options=CONFIG["emotion_tags"], topic_options=CONFIG["topic_tags"],
614
+ context_options=CONFIG["context_tags"], settings=settings)
615
+
616
+ # --- ADD THIS DEBUG PRINT ---
617
+ print(f"[DEBUG app.py] Raw NLU output: {detected_tags}")
618
+ # --- END OF ADDITION ---
619
+
620
+ behaviors = detected_tags.get("detected_behaviors")
621
+ final_tags["scenario_tag"] = behaviors[0] if behaviors else None
622
+ final_tags["emotion_tag"] = detected_tags.get("detected_emotion")
623
+ final_tags["topic_tag"] = detected_tags.get("detected_topic")
624
+ final_tags["context_tags"] = detected_tags.get("detected_contexts", [])
625
+
626
+ # --- ADD THIS DEBUG PRINT ---
627
+ print(f"[DEBUG] NLU detected tags: {final_tags}")
628
+ # --- END OF ADDITION ---
629
+
630
+ detected_parts = [f"{k.split('_')[1]}=`{v}`" for k, v in final_tags.items() if v and v != "None" and v != []]
631
+ if detected_parts:
632
+ auto_detected_context = f"*(Auto-detected context: {', '.join(detected_parts)})*"
633
+
634
+ vs_general = ensure_index(settings.get("active_theme", "All"))
635
+ if personal_vectorstore is None:
636
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
637
+
638
+ # OLD rag_settings = {k: settings.get(k) for k in ["role", "temperature", "language", "patient_name", "caregiver_name", "tone"]}
639
+ # NEW add "disease_stage"
640
+ # rag_settings = {k: settings.get(k) for k in ["role", "temperature", "language", "patient_name", "caregiver_name", "tone", "disease_stage"]}
641
+
642
+ # First, construct the path to the manifest file.
643
+ manifest_path_str = str(PERSONAL_MUSIC_BASE / "music_manifest.json")
644
+
645
+ # Then, gather all the settings from the UI into the dictionary.
646
+ rag_settings = {k: settings.get(k) for k in ["role", "temperature", "language", "patient_name", "caregiver_name", "tone", "disease_stage"]}
647
+
648
+ # Finally, add the special manifest path to that same dictionary.
649
+ rag_settings["music_manifest_path"] = manifest_path_str
650
+
651
+ chain = make_rag_chain(vs_general, personal_vectorstore, **rag_settings)
652
+
653
+ response = answer_query(chain, question, query_type=query_type, chat_history=api_chat_history, **final_tags)
654
+
655
+ # --- MUSIC PLAYBACK LOGIC START ---
656
+
657
+ # 1. Extract the text answer and the potential music file path from the agent's response.
658
+ answer = response.get("answer", "[No answer found]")
659
+ audio_playback_url = response.get("audio_playback_url")
660
+
661
+ # 2. Append the text part of the response to the chat history so the user sees it.
662
+ chat_history.append({"role": "assistant", "content": answer})
663
+
664
+ if auto_detected_context:
665
+ chat_history.append({"role": "assistant", "content": auto_detected_context})
666
+ if response.get("sources"):
667
+ chat_history.append({"role": "assistant", "content": f"*(Sources used: {', '.join(response['sources'])})*"})
668
+
669
+ # 3. Decide what to play in the audio component: music takes priority over TTS.
670
+ audio_out_update = None
671
+ if audio_playback_url:
672
+ # If a music URL was returned, update the audio component to play that music file.
673
+ song_title = os.path.basename(audio_playback_url)
674
+ audio_out_update = gr.update(value=audio_playback_url, visible=True, label=f"Now Playing: {song_title}", autoplay=True)
675
+ elif settings.get("tts_on") and answer:
676
+ # Otherwise, if no music is playing and TTS is on, fall back to reading the text answer aloud.
677
+ tts_file = synthesize_tts(answer, lang=CONFIG["languages"].get(settings.get("tts_lang"), "en"))
678
+ audio_out_update = gr.update(value=tts_file, visible=bool(tts_file), label="Response Audio", autoplay=True)
679
+
680
+ # 4. Return all the updates for the Gradio UI.
681
+ return "", audio_out_update, chat_history
682
+
683
+ # --- MUSIC PLAYBACK LOGIC END ---
684
+
685
+
686
+ # The save_chat_to_memory function incorrectly assumes the history is
687
+ # a list of tuples, like [(True, "..."), (False, "...")]
688
+ # However, The chat_fn function correctly builds the chat_history as
689
+ # a list of dictionaries, like this:
690
+ # [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
691
+ # To correctly parse the list of dictionaries.
692
+ def save_chat_to_memory(chat_history):
693
+ if not chat_history:
694
+ return "Nothing to save."
695
+
696
+ # --- START: MODIFIED LOGIC ---
697
+ # Correctly processes the list of dictionaries from the chatbot
698
+ formatted_chat = [
699
+ f"{msg.get('role', 'assistant').capitalize()}: {msg.get('content', '').strip()}"
700
+ for msg in chat_history
701
+ if isinstance(msg, dict) and msg.get('content') and not msg.get('content', '').strip().startswith("*(")
702
+ ]
703
+ # --- END: MODIFIED LOGIC ---
704
+
705
+ if not formatted_chat:
706
+ return "No conversation to save."
707
+
708
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
709
+ title = f"Conversation from {timestamp}"
710
+ full_content = f"Title: {title}\n\nContent:\n" + "\n".join(formatted_chat)
711
+ doc = Document(page_content=full_content, metadata={"source": "Saved Chat", "title": title})
712
+
713
+ global personal_vectorstore
714
+ if personal_vectorstore is None:
715
+ personal_vectorstore = build_or_load_vectorstore([doc], PERSONAL_INDEX_PATH, is_personal=True)
716
+ else:
717
+ personal_vectorstore.add_documents([doc])
718
+
719
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
720
+ return f"Conversation from {timestamp} saved."
721
+
722
+
723
+ def list_personal_memories():
724
+ global personal_vectorstore
725
+ if personal_vectorstore is None or not hasattr(personal_vectorstore.docstore, '_dict') or not personal_vectorstore.docstore._dict:
726
+ return gr.update(value=[["No memories", "", ""]]), gr.update(choices=[], value=None)
727
+ docs = list(personal_vectorstore.docstore._dict.values())
728
+ return gr.update(value=[[d.metadata.get('title', '...'), d.metadata.get('source', '...'), d.page_content] for d in docs]), gr.update(choices=[d.page_content for d in docs])
729
+ def delete_personal_memory(memory_to_delete):
730
+ global personal_vectorstore
731
+ if personal_vectorstore is None or not memory_to_delete: return "No memory selected."
732
+ all_docs = list(personal_vectorstore.docstore._dict.values())
733
+ docs_to_keep = [d for d in all_docs if d.page_content != memory_to_delete]
734
+ if len(all_docs) == len(docs_to_keep): return "Error: Could not find memory."
735
+ if not docs_to_keep:
736
+ if os.path.isdir(PERSONAL_INDEX_PATH): shutil.rmtree(PERSONAL_INDEX_PATH)
737
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
738
+ else:
739
+ new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
740
+ new_vs.save_local(PERSONAL_INDEX_PATH)
741
+ personal_vectorstore = new_vs
742
+ return "Successfully deleted memory."
743
+
744
+ # --- EVALUATION FUNCTIONS: move them into evaluate.py
745
+ # def evaluate_nlu_tags(expected: Dict[str, Any], actual: Dict[str, Any], tag_key: str, expected_key_override: str = None) -> Dict[str, float]:
746
+ # def _parse_judge_json(raw_str: str) -> dict | None:
747
+ # def run_comprehensive_evaluation():
748
+
749
+ def upload_knowledge(files, theme):
750
+ for f in files: copy_into_theme(theme, f.name)
751
+ if theme in vectorstores: del vectorstores[theme]
752
+ return f"Uploaded {len(files)} file(s)."
753
+ def save_file_selection(theme, enabled):
754
+ man = load_manifest(theme)
755
+ for fname in man['files']: man['files'][fname] = fname in enabled
756
+ save_manifest(theme, man)
757
+ if theme in vectorstores: del vectorstores[theme]
758
+ return f"Settings saved for theme '{theme}'."
759
+ def refresh_file_list_ui(theme):
760
+ files = list_theme_files(theme)
761
+ return gr.update(choices=[f for f, _ in files], value=[f for f, en in files if en]), f"Found {len(files)} file(s)."
762
+ def auto_setup_on_load(theme):
763
+ if not os.listdir(theme_upload_dir(theme)): seed_files_into_theme(theme)
764
+ # Changed the 11th argument from "All" to "None"
765
+ settings = collect_settings("patient", "", "", "warm", "English", "English", 0.7,
766
+ "None", "None", "None", "None", True, False)
767
+ files_ui, status = refresh_file_list_ui(theme)
768
+ return settings, files_ui, status
769
+
770
+ def test_save_file():
771
+ try:
772
+ path = PERSONAL_DATA_BASE / "persistence_test.txt"
773
+ path.write_text(f"File saved at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
774
+ return f"βœ… Success! Wrote test file to: {path}"
775
+ except Exception as e: return f"❌ Error! Failed to write file: {e}"
776
+ def check_test_file():
777
+ path = PERSONAL_DATA_BASE / "persistence_test.txt"
778
+ if path.exists(): return f"βœ… Success! Found test file. Contents: '{path.read_text()}'"
779
+ return f"❌ Failure. Test file not found at: {path}"
780
+
781
+ # --- UI Definition ---
782
+ CSS = """
783
+ .gradio-container { font-size: 14px; }
784
+ #chatbot { min-height: 400px; }
785
+ #audio_in audio, #audio_out audio { max-height: 40px; }
786
+ #audio_in .waveform, #audio_out .waveform { display: none !important; }
787
+ #audio_in, #audio_out { min-height: 0px !important; }
788
+ """
789
+
790
+ # OLD: add allowed_paths so the UI can access the music files
791
+ # with gr.Blocks(theme=gr.themes.Soft(), css=CSS, allowed_paths=[str(PERSONAL_MUSIC_BASE)]) as demo:
792
+ with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
793
+ settings_state = gr.State({})
794
+ with gr.Tab("Chat"):
795
+ with gr.Row():
796
+ user_text = gr.Textbox(show_label=False, placeholder="Type your message here...", scale=7)
797
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
798
+ with gr.Row():
799
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Voice Input", elem_id="audio_in")
800
+ audio_out = gr.Audio(label="Response Audio", autoplay=True, visible=True, elem_id="audio_out")
801
+
802
+ chatbot = gr.Chatbot(elem_id="chatbot", label="Conversation", type="messages")
803
+ chat_status = gr.Markdown()
804
+ with gr.Row():
805
+ clear_btn = gr.Button("Clear")
806
+ save_btn = gr.Button("Save to Memory")
807
+
808
+ with gr.Tab("Personalize"):
809
+ gr.Markdown("### **Upload Personal Memory**")
810
+ with gr.Accordion("Add Multimodal Data to Personal Memory Bank", open=True):
811
+ personal_title = gr.Textbox(label="Title")
812
+ personal_text = gr.Textbox(lines=5, label="Text Content")
813
+ with gr.Row():
814
+ personal_file = gr.File(label="Upload Audio/Video/Text File")
815
+ personal_image = gr.Image(type="filepath", label="Upload Image")
816
+ personal_yt_url = gr.Textbox(label="Or, provide a YouTube URL")
817
+ personal_add_btn = gr.Button("Add Knowledge", variant="primary")
818
+ personal_status = gr.Markdown()
819
+
820
+ # In app.py, within the "Personalize" Tab
821
+ gr.Markdown("### **Upload Personal Music Library**")
822
+ with gr.Accordion("Add Music to Personal Memory Bank", open=False):
823
+ music_file = gr.File(label="Upload Audio File (.mp3, .wav)", file_types=["audio"])
824
+ music_title = gr.Textbox(label="Song Title (e.g., My Way)")
825
+ music_artist = gr.Textbox(label="Artist (e.g., Frank Sinatra)")
826
+ # music_mood = gr.Textbox(label="Mood Tags (comma-separated, e.g., calm, happy, nostalgic)")
827
+ # NEW: Add a dropdown menu music tag selection based on emotion and behavior tags
828
+ music_mood = gr.Dropdown(
829
+ CONFIG["music_moods"],
830
+ label="Select Moods/Contexts for this Song",
831
+ multiselect=True
832
+ )
833
+ music_add_btn = gr.Button("Add Music", variant="primary")
834
+ music_status = gr.Markdown()
835
+
836
+ gr.Markdown("### **Manage Personal Memory Bank**")
837
+ with gr.Accordion("View/Hide Details", open=False):
838
+ personal_memory_display = gr.DataFrame(headers=["Title", "Source", "Content"], label="Saved Memories", row_count=(5, "dynamic"))
839
+ personal_refresh_btn = gr.Button("Refresh Memories")
840
+ personal_delete_selector = gr.Dropdown(label="Select memory to delete", scale=3, interactive=True)
841
+ personal_delete_btn = gr.Button("Delete Selected", variant="stop", scale=1)
842
+ personal_delete_status = gr.Markdown()
843
+
844
+ # --- NEW UI FOR MUSIC MANAGEMENT ---
845
+ gr.Markdown("### **Manage Music Library**")
846
+ with gr.Accordion("View/Hide Music Details", open=False):
847
+ music_library_display = gr.DataFrame(
848
+ headers=["Title", "Artist", "Moods"],
849
+ label="Music Library",
850
+ row_count=(5, "dynamic")
851
+ )
852
+ music_refresh_btn = gr.Button("Refresh Music List")
853
+ music_delete_selector = gr.Dropdown(
854
+ label="Select music to delete",
855
+ scale=3,
856
+ interactive=True
857
+ )
858
+ music_delete_btn = gr.Button("Delete Selected Music", variant="stop", scale=1)
859
+ music_delete_status = gr.Markdown()
860
+ # --- END OF NEW UI ---
861
+
862
+ with gr.Tab("Settings"):
863
+ with gr.Group():
864
+ gr.Markdown("## Conversation & Persona Settings")
865
+ with gr.Row():
866
+ role = gr.Radio(CONFIG["roles"], value="patient", label="Your Role")
867
+ patient_name = gr.Textbox(label="Patient's Name")
868
+ caregiver_name = gr.Textbox(label="Caregiver's Name")
869
+ with gr.Row():
870
+ temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.1, label="Creativity")
871
+ tone = gr.Dropdown(CONFIG["tones"], value="warm", label="Response Tone")
872
+ with gr.Row():
873
+ # --- ADD THIS NEW DROPDOWN ---
874
+ # disease_stage = gr.Dropdown(CONFIG["disease_stages"], value="Normal / Unspecified", label="Assumed Disease Stage")
875
+ disease_stage = gr.Dropdown(CONFIG["disease_stages"], value="Default: Mild Stage", label="Assumed Disease Stage")
876
+ # --- END OF ADDITION ---
877
+ behaviour_tag = gr.Dropdown(CONFIG["behavior_tags"], value="None", label="Behaviour Filter (Manual)")
878
+ emotion_tag = gr.Dropdown(CONFIG["emotion_tags"], value="None", label="Emotion Filter (Manual)")
879
+ topic_tag = gr.Dropdown(CONFIG["topic_tags"], value="None", label="Topic Tag Filter (Manual)")
880
+ with gr.Accordion("Language, Voice & Debugging", open=False):
881
+ language = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Response Language")
882
+ tts_lang = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Voice Language")
883
+ tts_on = gr.Checkbox(True, label="Enable Voice Response")
884
+ debug_mode = gr.Checkbox(False, label="Show Debug Info")
885
+ gr.Markdown("--- \n ## General Knowledge Base Management")
886
+ with gr.Row():
887
+ with gr.Column(scale=1):
888
+ files_in = gr.File(file_count="multiple", file_types=[".jsonl", ".txt"], label="Upload Knowledge Files")
889
+ upload_btn = gr.Button("Upload to Theme")
890
+ seed_btn = gr.Button("Import Sample Data")
891
+ mgmt_status = gr.Markdown()
892
+ with gr.Column(scale=2):
893
+ active_theme = gr.Radio(CONFIG["themes"], value="All", label="Active Knowledge Theme")
894
+ files_box = gr.CheckboxGroup(choices=[], label="Enable Files for Selected Theme")
895
+ with gr.Row():
896
+ save_files_btn = gr.Button("Save Selection", variant="primary")
897
+ refresh_btn = gr.Button("Refresh List")
898
+ with gr.Accordion("Persistence Test", open=False):
899
+ test_save_btn = gr.Button("1. Run Persistence Test (Save File)")
900
+ check_save_btn = gr.Button("3. Check for Test File")
901
+ test_status = gr.Markdown()
902
+
903
+ # --- UPDATED TESTING TAB ---
904
+ with gr.Tab("Testing"):
905
+ gr.Markdown("## Comprehensive Performance Evaluation")
906
+ gr.Markdown("Click the button below to run a full evaluation on all test fixtures. This will test NLU (Routing & Tagging) and generate RAG responses for manual review.")
907
+
908
+ run_comprehensive_btn = gr.Button("Run Comprehensive Evaluation", variant="primary")
909
+
910
+ batch_summary_md = gr.Markdown("### Evaluation Summary: Not yet run.")
911
+
912
+ comprehensive_results_df = gr.DataFrame(
913
+ label="Detailed Evaluation Results",
914
+ elem_id="comprehensive_results_df",
915
+ headers=[
916
+ "Test ID","Title","Route Correct?","Expected Route","Actual Route",
917
+ "Behavior F1","Emotion F1","Topic F1","Context F1",
918
+ "Generated Answer","Sources","Source Count","Latency (ms)", "Faithfulness"
919
+ ],
920
+ interactive=False
921
+ )
922
+
923
+
924
+ # --- Event Wiring ---
925
+ all_settings = [
926
+ # Chat Tab Settings
927
+ role, patient_name, caregiver_name, tone, language, tts_lang, temperature,
928
+ # Disease Stage & Manual Filters
929
+ disease_stage, behaviour_tag, emotion_tag, topic_tag,
930
+ # Knowledge Base & Debug
931
+ active_theme, tts_on, debug_mode
932
+ ]
933
+ settings_state = gr.State({})
934
+
935
+ # In app.py, replace the event wiring loop right after the all_settings list
936
+
937
+ for component in all_settings:
938
+ component.change(fn=collect_settings, inputs=all_settings, outputs=settings_state)
939
+
940
+ submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
941
+
942
+ # for c in all_settings: c.change(fn=collect_settings, inputs=all_settings, outputs=settings_state)
943
+ # submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
944
+
945
+ save_btn.click(fn=save_chat_to_memory, inputs=[chatbot], outputs=[chat_status])
946
+ clear_btn.click(lambda: (None, None, [], None, "", ""), outputs=[user_text, audio_out, chatbot, audio_in, user_text, chat_status])
947
+
948
+ personal_add_btn.click(fn=handle_add_knowledge, inputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url, settings_state], outputs=[personal_status]).then(lambda: (None, None, None, None, None), outputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url])
949
+ # Wire the button to the function in the UI event wiring section
950
+ music_add_btn.click(
951
+ fn=handle_add_music,
952
+ inputs=[music_file, music_title, music_artist, music_mood],
953
+ outputs=[music_status]
954
+ )
955
+ # --- NEW EVENT WIRING FOR MUSIC MANAGEMENT ---
956
+ music_refresh_btn.click(
957
+ fn=list_music_library,
958
+ inputs=None,
959
+ outputs=[music_library_display, music_delete_selector]
960
+ )
961
+ music_delete_btn.click(
962
+ fn=delete_music_from_library,
963
+ inputs=[music_delete_selector],
964
+ outputs=[music_delete_status]
965
+ ).then(
966
+ fn=list_music_library,
967
+ inputs=None,
968
+ outputs=[music_library_display, music_delete_selector]
969
+ )
970
+ # --- END OF NEW WIRING ---
971
+
972
+ personal_refresh_btn.click(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
973
+ personal_delete_btn.click(fn=delete_personal_memory, inputs=[personal_delete_selector], outputs=[personal_delete_status]).then(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
974
+
975
+ upload_btn.click(upload_knowledge, inputs=[files_in, active_theme], outputs=[mgmt_status]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
976
+ save_files_btn.click(save_file_selection, inputs=[active_theme, files_box], outputs=[mgmt_status])
977
+ seed_btn.click(seed_files_into_theme, inputs=[active_theme]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
978
+ refresh_btn.click(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
979
+ active_theme.change(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
980
+
981
+ # Then update the .click() event handler
982
+ run_comprehensive_btn.click(
983
+ fn=lambda: run_comprehensive_evaluation(
984
+ vs_general=ensure_index("All"),
985
+ vs_personal=personal_vectorstore, # <-- This is correctly passed in
986
+ nlu_vectorstore=nlu_vectorstore,
987
+ config=CONFIG,
988
+ storage_path=STORAGE_ROOT # <-- ADD THIS ARGUMENT
989
+ ),
990
+ outputs=[batch_summary_md, comprehensive_results_df, comprehensive_results_df]
991
+ )
992
+
993
+ demo.load(auto_setup_on_load, inputs=[active_theme], outputs=[settings_state, files_box, mgmt_status])
994
+ demo.load(load_test_fixtures)
995
+ test_save_btn.click(fn=test_save_file, inputs=None, outputs=[test_status])
996
+ check_save_btn.click(fn=check_test_file, inputs=None, outputs=[test_status])
997
+
998
+ # --- Startup Logic ---
999
+ # --- Function 3: The Startup Orchestrator ---
1000
+ def pre_load_indexes():
1001
+ """Loads all data sources and runs the auto-loading functions at startup."""
1002
+ global personal_vectorstore, nlu_vectorstore
1003
+ print("Pre-loading all indexes at startup...")
1004
+ print(" - Loading NLU examples index...")
1005
+ nlu_vectorstore = bootstrap_nlu_vectorstore("nlu_training_examples.jsonl", NLU_EXAMPLES_INDEX_PATH)
1006
+ print(f" ...NLU index loaded.")
1007
+ for theme in CONFIG["themes"]:
1008
+ print(f" - Loading general index for theme: '{theme}'")
1009
+ try:
1010
+ ensure_index(theme)
1011
+ print(f" ...'{theme}' theme loaded.")
1012
+ except Exception as e:
1013
+ print(f" ...Error loading theme '{theme}': {e}")
1014
+
1015
+ print(" - Loading personal knowledge index...")
1016
+ try:
1017
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
1018
+ print(" ...Personal knowledge loaded.")
1019
+ except Exception as e:
1020
+ print(f" ...Error loading personal knowledge: {e}")
1021
+
1022
+ # NEW: auto-loading and syncing functions with a small pre-loaded Personal Memory Bank
1023
+ load_personal_files_from_folder()
1024
+ sync_music_library_from_folder()
1025
+
1026
+ print("All indexes and personal files loaded. Application is ready.")
1027
+
1028
+
1029
+
1030
+ if __name__ == "__main__":
1031
+ seed_files_into_theme('All')
1032
+ pre_load_indexes()
1033
+ demo.queue().launch(debug=True, allowed_paths=[str(PERSONAL_MUSIC_BASE)])
1034
+ # demo.queue().launch(debug=True)
evaluate.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluate.py
2
+
3
+ import os
4
+ import json
5
+ import time
6
+ import re # <-- ADD THIS IMPORT
7
+ import pandas as pd
8
+ from typing import List, Dict, Any
9
+ from pathlib import Path
10
+
11
+ # --- Imports from the main application ---
12
+ # In evaluate.py
13
+
14
+ try:
15
+ from alz_companion.agent import (
16
+ make_rag_chain, route_query_type, detect_tags_from_query,
17
+ answer_query, call_llm, build_or_load_vectorstore
18
+ )
19
+ from alz_companion.prompts import FAITHFULNESS_JUDGE_PROMPT
20
+ from langchain_community.vectorstores import FAISS
21
+ # --- Also move this import inside the try block for consistency ---
22
+ from langchain.schema import Document
23
+
24
+ except ImportError:
25
+ # --- START: FALLBACK DEFINITIONS ---
26
+ class FAISS:
27
+ def __init__(self): self.docstore = type('obj', (object,), {'_dict': {}})()
28
+ def add_documents(self, docs): pass
29
+ def save_local(self, path): pass
30
+ @classmethod
31
+ def from_documents(cls, docs, embeddings=None): return cls()
32
+
33
+ class Document:
34
+ def __init__(self, page_content, metadata=None):
35
+ self.page_content = page_content
36
+ self.metadata = metadata or {}
37
+
38
+ def make_rag_chain(*args, **kwargs): return lambda q, **k: {"answer": f"(Eval Fallback) You asked: {q}", "sources": []}
39
+ def route_query_type(q, **kwargs): return "general_conversation"
40
+ def detect_tags_from_query(*args, **kwargs): return {}
41
+ def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
42
+ def call_llm(*args, **kwargs): return "{}"
43
+
44
+ # --- ADD FALLBACK DEFINITION FOR THE MISSING FUNCTION ---
45
+ def build_or_load_vectorstore(docs, index_path, is_personal=False):
46
+ return FAISS()
47
+ # --- END OF ADDITION ---
48
+
49
+ FAITHFULNESS_JUDGE_PROMPT = ""
50
+ print("WARNING: Could not import from alz_companion. Evaluation functions will use fallbacks.")
51
+ # --- END: FALLBACK DEFINITIONS ---
52
+
53
+
54
+ # --- LLM-as-a-Judge Prompt for Answer Correctness ---
55
+ ANSWER_CORRECTNESS_JUDGE_PROMPT = """You are an expert evaluator. Your task is to assess the factual correctness of a generated answer against a ground truth answer.
56
+
57
+ - GROUND_TRUTH_ANSWER: This is the gold-standard, correct answer.
58
+ - GENERATED_ANSWER: This is the answer produced by the AI model.
59
+
60
+ Evaluate if the GENERATED_ANSWER is factually aligned with the GROUND_TRUTH_ANSWER. Ignore minor differences in phrasing, tone, or structure. The key is factual accuracy.
61
+
62
+ Respond with a single JSON object containing a float score from 0.0 to 1.0.
63
+ - 1.0: The generated answer is factually correct and aligns perfectly with the ground truth.
64
+ - 0.5: The generated answer is partially correct but misses key information or contains minor inaccuracies.
65
+ - 0.0: The generated answer is factually incorrect or contradicts the ground truth.
66
+
67
+ --- DATA TO EVALUATE ---
68
+ GROUND_TRUTH_ANSWER:
69
+ {ground_truth_answer}
70
+
71
+ GENERATED_ANSWER:
72
+ {generated_answer}
73
+ ---
74
+
75
+ Return a single JSON object with your score:
76
+ {{
77
+ "correctness_score": <float>
78
+ }}
79
+ """
80
+
81
+ test_fixtures = []
82
+
83
+ def load_test_fixtures():
84
+ """Loads fixtures into the test_fixtures list."""
85
+ global test_fixtures
86
+ test_fixtures = []
87
+ env_path = os.environ.get("TEST_FIXTURES_PATH", "").strip()
88
+ candidates = [env_path] if env_path else ["conversation_test_fixtures_v10.jsonl", "conversation_test_fixtures_v8.jsonl"]
89
+ path = next((p for p in candidates if p and os.path.exists(p)), None)
90
+ if not path:
91
+ print("Warning: No test fixtures file found for evaluation.")
92
+ return
93
+
94
+ # Use the corrected v10 file if available
95
+ if "conversation_test_fixtures_v10.jsonl" in path:
96
+ print(f"Using corrected test fixtures: {path}")
97
+
98
+ with open(path, "r", encoding="utf-8") as f:
99
+ for line in f:
100
+ try:
101
+ test_fixtures.append(json.loads(line))
102
+ except json.JSONDecodeError:
103
+ print(f"Skipping malformed JSON line in {path}")
104
+ print(f"Loaded {len(test_fixtures)} fixtures for evaluation from {path}")
105
+
106
+ def evaluate_nlu_tags(expected: Dict[str, Any], actual: Dict[str, Any], tag_key: str, expected_key_override: str = None) -> Dict[str, float]:
107
+ lookup_key = expected_key_override or tag_key
108
+ expected_raw = expected.get(lookup_key, [])
109
+ expected_set = set(expected_raw if isinstance(expected_raw, list) else [expected_raw]) if expected_raw and expected_raw != "None" else set()
110
+ actual_raw = actual.get(tag_key, [])
111
+ actual_set = set(actual_raw if isinstance(actual_raw, list) else [actual_raw]) if actual_raw and actual_raw != "None" else set()
112
+ if not expected_set and not actual_set:
113
+ return {"precision": 1.0, "recall": 1.0, "f1_score": 1.0}
114
+ true_positives = len(expected_set.intersection(actual_set))
115
+ precision = true_positives / len(actual_set) if actual_set else 0.0
116
+ recall = true_positives / len(expected_set) if expected_set else 0.0
117
+ f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
118
+ return {"precision": precision, "recall": recall, "f1_score": f1_score}
119
+
120
+ def _parse_judge_json(raw_str: str) -> dict | None:
121
+ try:
122
+ start_brace = raw_str.find('{')
123
+ end_brace = raw_str.rfind('}')
124
+ if start_brace != -1 and end_brace > start_brace:
125
+ json_str = raw_str[start_brace : end_brace + 1]
126
+ return json.loads(json_str)
127
+ return None
128
+ except (json.JSONDecodeError, AttributeError):
129
+ return None
130
+
131
+ # --- NEW: helpers for categorisation and error-class labelling ---
132
+ def _categorize_test(test_id: str) -> str:
133
+ tid = (test_id or "").lower()
134
+ if "synonym" in tid: return "synonym"
135
+ if "multi_fact" in tid or "multi-hop" in tid or "multihop" in tid: return "multi_fact"
136
+ if "omission" in tid: return "omission"
137
+ if "hallucination" in tid: return "hallucination"
138
+ if "time" in tid or "temporal" in tid: return "temporal"
139
+ if "context" in tid: return "context_disambig"
140
+ return "baseline"
141
+
142
+ def _classify_error(gt: str, gen: str) -> str:
143
+ import re
144
+ gt = (gt or "").strip().lower()
145
+ gen = (gen or "").strip().lower()
146
+ if not gen:
147
+ return "empty"
148
+ if not gt:
149
+ return "hallucination" if gen else "empty"
150
+ if gt in gen:
151
+ return "paraphrase"
152
+ gt_tokens = set([t for t in re.split(r'\W+', gt) if t])
153
+ gen_tokens = set([t for t in re.split(r'\W+', gen) if t])
154
+ overlap = len(gt_tokens & gen_tokens) / max(1, len(gt_tokens))
155
+ if overlap >= 0.3:
156
+ return "omission"
157
+ return "contradiction"
158
+
159
+ ## NEW
160
+ # In evaluate.py
161
+ def run_comprehensive_evaluation(
162
+ vs_general: FAISS,
163
+ vs_personal: FAISS,
164
+ nlu_vectorstore: FAISS,
165
+ config: Dict[str, Any],
166
+ storage_path: Path # <-- ADD THIS PARAMETER
167
+ ):
168
+ global test_fixtures
169
+ if not test_fixtures:
170
+ # The return signature is now back to 3 items.
171
+ return "No test fixtures loaded.", [], []
172
+
173
+ vs_personal_test = None
174
+ personal_context_docs = []
175
+ personal_context_file = "sample_data/1 Complaints of a Dutiful Daughter.txt"
176
+
177
+ if os.path.exists(personal_context_file):
178
+ print(f"Found personal context file for evaluation: '{personal_context_file}'")
179
+ with open(personal_context_file, "r", encoding="utf-8") as f:
180
+ content = f.read()
181
+ doc = Document(page_content=content, metadata={"source": os.path.basename(personal_context_file)})
182
+ personal_context_docs.append(doc)
183
+ else:
184
+ print(f"WARNING: Personal context file not found at '{personal_context_file}'. Factual tests will likely fail.")
185
+
186
+ vs_personal_test = build_or_load_vectorstore(
187
+ personal_context_docs,
188
+ index_path="tmp/eval_personal_index",
189
+ is_personal=True
190
+ )
191
+ print(f"Successfully created temporary personal vectorstore with {len(personal_context_docs)} document(s) for this evaluation run.")
192
+
193
+ def _norm(label: str) -> str:
194
+ label = (label or "").strip().lower()
195
+ return "factual_question" if "factual" in label else label
196
+
197
+ print("Starting comprehensive evaluation...")
198
+ results: List[Dict[str, Any]] = []
199
+ total_fixtures = len(test_fixtures)
200
+ print(f"\nπŸš€ STARTING EVALUATION on {total_fixtures} test cases...")
201
+
202
+ for i, fx in enumerate(test_fixtures):
203
+ test_id = fx.get("test_id", "N/A")
204
+ print(f"--- Processing Test Case {i+1}/{total_fixtures}: ID = {test_id} ---")
205
+
206
+ turns = fx.get("turns") or []
207
+ api_chat_history = [{"role": t.get("role"), "content": t.get("text")} for t in turns]
208
+ query = next((t["content"] for t in reversed(api_chat_history) if (t.get("role") or "user").lower() == "user"), "")
209
+ if not query: continue
210
+
211
+ print(f'Query: "{query}"')
212
+
213
+ ground_truth = fx.get("ground_truth", {})
214
+ expected_route = _norm(ground_truth.get("expected_route", "caregiving_scenario"))
215
+ expected_tags = ground_truth.get("expected_tags", {})
216
+ actual_route = _norm(route_query_type(query))
217
+ route_correct = (actual_route == expected_route)
218
+
219
+ actual_tags: Dict[str, Any] = {}
220
+ if "caregiving_scenario" in actual_route:
221
+ actual_tags = detect_tags_from_query(
222
+ query, nlu_vectorstore=nlu_vectorstore,
223
+ behavior_options=config["behavior_tags"], emotion_options=config["emotion_tags"],
224
+ topic_options=config["topic_tags"], context_options=config["context_tags"],
225
+ )
226
+
227
+ behavior_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_behaviors")
228
+ emotion_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_emotion")
229
+ topic_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_topics")
230
+ context_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_contexts")
231
+
232
+ final_tags = {}
233
+ if "caregiving_scenario" in actual_route:
234
+ final_tags = {
235
+ "scenario_tag": (actual_tags.get("detected_behaviors") or [None])[0],
236
+ "emotion_tag": actual_tags.get("detected_emotion"),
237
+ "topic_tag": (actual_tags.get("detected_topics") or [None])[0],
238
+ "context_tags": actual_tags.get("detected_contexts", [])
239
+ }
240
+
241
+ current_test_role = fx.get("test_role", "patient")
242
+ rag_chain = make_rag_chain(
243
+ vs_general,
244
+ vs_personal,
245
+ role=current_test_role,
246
+ for_evaluation=True
247
+ )
248
+
249
+ t0 = time.time()
250
+ response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
251
+ latency_ms = round((time.time() - t0) * 1000.0, 1)
252
+ answer_text = response.get("answer", "ERROR")
253
+ ground_truth_answer = ground_truth.get("ground_truth_answer")
254
+
255
+ category = _categorize_test(test_id)
256
+ error_class = _classify_error(ground_truth_answer, answer_text)
257
+
258
+ expected_sources_set = set(map(str, ground_truth.get("expected_sources", [])))
259
+ raw_sources = response.get("sources", [])
260
+ actual_sources_set = set(map(str, raw_sources if isinstance(raw_sources, (list, tuple)) else [raw_sources]))
261
+
262
+ print("\n" + "-"*20 + " SOURCE EVALUATION " + "-"*20)
263
+ print(f" - Expected: {sorted(list(expected_sources_set))}")
264
+ print(f" - Actual: {sorted(list(actual_sources_set))}")
265
+
266
+ true_positives = expected_sources_set.intersection(actual_sources_set)
267
+ false_positives = actual_sources_set - expected_sources_set
268
+ false_negatives = expected_sources_set - actual_sources_set
269
+
270
+ if not false_positives and not false_negatives:
271
+ print(" - Result: βœ… Perfect Match!")
272
+ else:
273
+ if false_positives:
274
+ print(f" - πŸ”» False Positives (hurts precision): {sorted(list(false_positives))}")
275
+ if false_negatives:
276
+ print(f" - πŸ”» False Negatives (hurts recall): {sorted(list(false_negatives))}")
277
+ print("-"*59 + "\n")
278
+
279
+ context_precision, context_recall = 0.0, 0.0
280
+ if expected_sources_set or actual_sources_set:
281
+ tp = len(expected_sources_set.intersection(actual_sources_set))
282
+ if len(actual_sources_set) > 0: context_precision = tp / len(actual_sources_set)
283
+ if len(expected_sources_set) > 0: context_recall = tp / len(expected_sources_set)
284
+ elif not expected_sources_set and not actual_sources_set:
285
+ context_precision, context_recall = 1.0, 1.0
286
+
287
+ print("\n" + "-"*20 + " ANSWER & CORRECTNESS EVALUATION " + "-"*20)
288
+ print(f" - Ground Truth Answer: {ground_truth_answer}")
289
+ print(f" - Generated Answer: {answer_text}")
290
+ print("-" * 59)
291
+
292
+ answer_correctness_score = None
293
+ if ground_truth_answer and "ERROR" not in answer_text:
294
+ try:
295
+ judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
296
+ print(f" - Judge Prompt Sent:\n{judge_msg}")
297
+ raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
298
+ print(f" - Judge Raw Response: {raw_correctness}")
299
+ correctness_data = _parse_judge_json(raw_correctness)
300
+ if correctness_data and "correctness_score" in correctness_data:
301
+ answer_correctness_score = float(correctness_data["correctness_score"])
302
+ print(f" - Final Score: {answer_correctness_score}")
303
+ except Exception as e:
304
+ print(f"ERROR during answer correctness judging: {e}")
305
+
306
+ faithfulness = None
307
+ source_docs = response.get("source_documents", [])
308
+ if source_docs and "ERROR" not in answer_text:
309
+ context_blob = "\n---\n".join([doc.page_content for doc in source_docs])
310
+ judge_msg = FAITHFULNESS_JUDGE_PROMPT.format(query=query, answer=answer_text, sources=context_blob)
311
+ try:
312
+ if context_blob.strip():
313
+ raw = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
314
+ data = _parse_judge_json(raw)
315
+ if data:
316
+ denom = data.get("supported", 0) + data.get("contradicted", 0) + data.get("not_enough_info", 0)
317
+ if denom > 0: faithfulness = round(data.get("supported", 0) / denom, 3)
318
+ elif data.get("ignored", 0) > 0: faithfulness = 1.0
319
+ except Exception as e:
320
+ print(f"ERROR during faithfulness judging: {e}")
321
+
322
+ sources_pretty = ", ".join(sorted(s)) if (s:=actual_sources_set) else ""
323
+ results.append({
324
+ "test_id": fx.get("test_id", "N/A"), "title": fx.get("title", "N/A"),
325
+ "route_correct": "βœ…" if route_correct else "❌", "expected_route": expected_route, "actual_route": actual_route,
326
+ "behavior_f1": f"{behavior_metrics['f1_score']:.2f}", "emotion_f1": f"{emotion_metrics['f1_score']:.2f}",
327
+ "topic_f1": f"{topic_metrics['f1_score']:.2f}", "context_f1": f"{context_metrics['f1_score']:.2f}",
328
+ "generated_answer": answer_text, "sources": sources_pretty, "source_count": len(actual_sources_set),
329
+ "latency_ms": latency_ms, "faithfulness": faithfulness,
330
+ "context_precision": context_precision, "context_recall": context_recall,
331
+ "answer_correctness": answer_correctness_score,
332
+ "category": category,
333
+ "error_class": error_class
334
+ })
335
+
336
+ df = pd.DataFrame(results)
337
+ summary_text, table_rows, headers = "No valid test fixtures found to evaluate.", [], []
338
+
339
+ if not df.empty:
340
+ cols = ["test_id", "title", "route_correct", "expected_route", "actual_route", "context_precision", "context_recall", "faithfulness", "answer_correctness", "behavior_f1", "emotion_f1", "topic_f1", "context_f1", "source_count", "latency_ms", "sources", "generated_answer", "category", "error_class"]
341
+ df = df[[c for c in cols if c in df.columns]]
342
+
343
+ # --- START OF MODIFICATION ---
344
+ pct = df["route_correct"].value_counts(normalize=True).get("βœ…", 0) * 100
345
+ to_f = lambda s: pd.to_numeric(s, errors="coerce")
346
+
347
+ # Calculate the mean for the NLU F1 scores
348
+ bf1_mean = to_f(df["behavior_f1"]).mean() * 100
349
+ ef1_mean = to_f(df["emotion_f1"]).mean() * 100
350
+ tf1_mean = to_f(df["topic_f1"]).mean() * 100
351
+ cf1_mean = to_f(df["context_f1"]).mean() * 100
352
+
353
+ # Add the NLU metrics to the summary f-string
354
+ summary_text = f"""## Evaluation Summary
355
+ - **Routing Accuracy**: {pct:.2f}%
356
+ - **Behaviour F1 (avg)**: {bf1_mean:.2f}%
357
+ - **Emotion F1 (avg)**: {ef1_mean:.2f}%
358
+ - **Topic F1 (avg)**: {tf1_mean:.2f}%
359
+ - **Context F1 (avg)**: {cf1_mean:.2f}%
360
+ - **RAG: Context Precision**: {(to_f(df["context_precision"]).mean() * 100):.1f}%
361
+ - **RAG: Context Recall**: {(to_f(df["context_recall"]).mean() * 100):.1f}%
362
+ - **RAG: Answer Correctness (LLM-judge)**: {(to_f(df["answer_correctness"]).mean() * 100):.1f}%"""
363
+ # --- END OF MODIFICATION ---
364
+
365
+
366
+ df_display = df.rename(columns={"context_precision": "Ctx. Precision", "context_recall": "Ctx. Recall"})
367
+ table_rows = df_display.values.tolist()
368
+ headers = df_display.columns.tolist()
369
+
370
+
371
+ output_path = "evaluation_results.csv"
372
+ df.to_csv(output_path, index=False, encoding="utf-8")
373
+ print(f"Evaluation results saved to {output_path}")
374
+
375
+ log_path = storage_path / "evaluation_log.txt"
376
+ with open(log_path, "w", encoding="utf-8") as logf:
377
+ logf.write("===== Detailed Evaluation Run =====\n")
378
+ df_string = df.to_string(index=False)
379
+ logf.write(df_string)
380
+ logf.write("\n\n")
381
+
382
+ try:
383
+ cat_means = df.groupby("category")["answer_correctness"].mean().reset_index()
384
+ print("\nπŸ“Š Correctness by Category:")
385
+ print(cat_means.to_string(index=False))
386
+ logf.write("\nπŸ“Š Correctness by Category:\n")
387
+ logf.write(cat_means.to_string(index=False))
388
+ logf.write("\n")
389
+ except Exception as e:
390
+ print(f"WARNING: Could not compute category breakdown: {e}")
391
+
392
+ try:
393
+ confusion = pd.crosstab(df["category"], df["error_class"], rownames=["Category"], colnames=["Error Class"], dropna=False)
394
+ print("\nπŸ“Š Error Class Distribution by Category:")
395
+ print(confusion.to_string())
396
+ logf.write("\nπŸ“Š Error Class Distribution by Category:\n")
397
+ logf.write(confusion.to_string())
398
+ logf.write("\n")
399
+ except Exception as e:
400
+ print(f"WARNING: Could not build confusion matrix: {e}")
401
+
402
+ return summary_text, table_rows, headers
403
+ # return summary_text, table_rows
404
+
405
+ ## END