Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
0e45c9f
1
Parent(s):
c816ffa
Upd agent mcp
Browse files- gemini_mcp.py → agent.py +25 -7
- app.py +118 -95
gemini_mcp.py → agent.py
RENAMED
|
@@ -176,20 +176,38 @@ async def call_tool(name: str, arguments: dict) -> Sequence[TextContent | ImageC
|
|
| 176 |
if system_prompt:
|
| 177 |
contents = f"{system_prompt}\n\n{user_prompt}"
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
#
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
if files:
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
-
# Generate content using
|
| 187 |
try:
|
| 188 |
# Use asyncio.to_thread to make the blocking call async
|
|
|
|
| 189 |
response = await asyncio.to_thread(
|
| 190 |
gemini_client.models.generate_content,
|
| 191 |
model=model,
|
| 192 |
-
contents=
|
| 193 |
)
|
| 194 |
|
| 195 |
# Extract text from response
|
|
|
|
| 176 |
if system_prompt:
|
| 177 |
contents = f"{system_prompt}\n\n{user_prompt}"
|
| 178 |
|
| 179 |
+
# Prepare content for Gemini API
|
| 180 |
+
# The google-genai API expects contents as a list of parts
|
| 181 |
+
gemini_contents = []
|
| 182 |
+
|
| 183 |
+
# Add text content as first part
|
| 184 |
+
gemini_contents.append(contents)
|
| 185 |
+
|
| 186 |
+
# Add file content if provided
|
| 187 |
if files:
|
| 188 |
+
try:
|
| 189 |
+
file_parts = prepare_gemini_files(files)
|
| 190 |
+
# Convert file parts to the format expected by Gemini API
|
| 191 |
+
for file_part in file_parts:
|
| 192 |
+
# The API expects parts with inline_data for binary content
|
| 193 |
+
gemini_contents.append({
|
| 194 |
+
"inline_data": {
|
| 195 |
+
"mime_type": file_part["mime_type"],
|
| 196 |
+
"data": base64.b64encode(file_part["data"]).decode('utf-8')
|
| 197 |
+
}
|
| 198 |
+
})
|
| 199 |
+
logger.info(f"Added {len(file_parts)} file(s) to Gemini request")
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.warning(f"Error preparing files: {e}, continuing with text only")
|
| 202 |
|
| 203 |
+
# Generate content using Gemini API
|
| 204 |
try:
|
| 205 |
# Use asyncio.to_thread to make the blocking call async
|
| 206 |
+
# The API accepts contents as a list
|
| 207 |
response = await asyncio.to_thread(
|
| 208 |
gemini_client.models.generate_content,
|
| 209 |
model=model,
|
| 210 |
+
contents=gemini_contents
|
| 211 |
)
|
| 212 |
|
| 213 |
# Extract text from response
|
app.py
CHANGED
|
@@ -200,18 +200,18 @@ global_mcp_session = None
|
|
| 200 |
global_mcp_stdio_ctx = None # Store stdio context to keep it alive
|
| 201 |
global_mcp_lock = threading.Lock() # Lock for thread-safe session access
|
| 202 |
# MCP server configuration via environment variables
|
| 203 |
-
# Gemini MCP server: Python-based server (
|
| 204 |
# This works on Hugging Face Spaces without requiring npm/Node.js
|
| 205 |
# Make sure GEMINI_API_KEY is set in environment variables
|
| 206 |
#
|
| 207 |
-
# Default configuration uses the bundled
|
| 208 |
# To override:
|
| 209 |
# export MCP_SERVER_COMMAND="python"
|
| 210 |
-
# export MCP_SERVER_ARGS="/path/to/
|
| 211 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 212 |
-
|
| 213 |
MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
|
| 214 |
-
MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS",
|
| 215 |
|
| 216 |
async def get_mcp_session():
|
| 217 |
"""Get or create MCP client session with proper context management"""
|
|
@@ -277,16 +277,25 @@ async def get_mcp_session():
|
|
| 277 |
session = ClientSession(read, write)
|
| 278 |
await session.__aenter__()
|
| 279 |
|
| 280 |
-
# Wait
|
| 281 |
-
|
|
|
|
| 282 |
|
| 283 |
-
# Verify the session works by listing tools
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
# Store both the session and stdio context to keep them alive
|
| 292 |
global_mcp_session = session
|
|
@@ -301,7 +310,7 @@ async def get_mcp_session():
|
|
| 301 |
global_mcp_stdio_ctx = None
|
| 302 |
return None
|
| 303 |
|
| 304 |
-
async def
|
| 305 |
"""Call Gemini MCP generate_content tool"""
|
| 306 |
if not MCP_AVAILABLE:
|
| 307 |
logger.warning("MCP not available for Gemini call")
|
|
@@ -428,7 +437,7 @@ async def transcribe_audio_gemini(audio_path: str) -> str:
|
|
| 428 |
system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
|
| 429 |
user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
|
| 430 |
|
| 431 |
-
result = await
|
| 432 |
user_prompt=user_prompt,
|
| 433 |
system_prompt=system_prompt,
|
| 434 |
files=files,
|
|
@@ -651,7 +660,7 @@ async def translate_text_gemini(text: str, target_lang: str = "en", source_lang:
|
|
| 651 |
# Use concise system prompt
|
| 652 |
system_prompt = "You are a professional translator. Translate accurately and concisely."
|
| 653 |
|
| 654 |
-
result = await
|
| 655 |
user_prompt=user_prompt,
|
| 656 |
system_prompt=system_prompt,
|
| 657 |
model=GEMINI_MODEL_LITE, # Use lite model for translation
|
|
@@ -688,94 +697,107 @@ def translate_text(text: str, target_lang: str = "en", source_lang: str = None)
|
|
| 688 |
# Return original text if translation fails
|
| 689 |
return text
|
| 690 |
|
| 691 |
-
async def
|
| 692 |
-
"""Search web using
|
| 693 |
if not MCP_AVAILABLE:
|
| 694 |
-
logger.warning("Gemini MCP not available for web search")
|
| 695 |
return []
|
| 696 |
|
| 697 |
try:
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
Return the search results in JSON format with the following structure:
|
| 702 |
-
{{
|
| 703 |
-
"results": [
|
| 704 |
-
{{
|
| 705 |
-
"title": "Result title",
|
| 706 |
-
"url": "Result URL",
|
| 707 |
-
"content": "Brief summary or snippet of the content"
|
| 708 |
-
}}
|
| 709 |
-
]
|
| 710 |
-
}}
|
| 711 |
-
|
| 712 |
-
Return up to {max_results} most relevant results. Focus on medical/health information if applicable."""
|
| 713 |
-
|
| 714 |
-
# Use concise system prompt
|
| 715 |
-
system_prompt = "You are a web search assistant. Search the web and return structured JSON results with titles, URLs, and content summaries."
|
| 716 |
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
-
if not
|
| 725 |
-
logger.warning("Gemini MCP returned empty search results")
|
| 726 |
return []
|
| 727 |
|
| 728 |
-
#
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
for
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 755 |
return web_content
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
# Fallback: treat as plain text result
|
| 759 |
-
return [{
|
| 760 |
-
'title': 'Web Search Result',
|
| 761 |
-
'url': '',
|
| 762 |
-
'content': result[:1000] # Limit content length
|
| 763 |
-
}]
|
| 764 |
|
| 765 |
return []
|
| 766 |
except Exception as e:
|
| 767 |
-
logger.error(f"
|
| 768 |
-
import traceback
|
| 769 |
-
logger.debug(traceback.format_exc())
|
| 770 |
return []
|
| 771 |
|
| 772 |
async def search_web_mcp(query: str, max_results: int = 5) -> list:
|
| 773 |
-
"""Search web using
|
| 774 |
-
|
|
|
|
| 775 |
if results:
|
| 776 |
return results
|
| 777 |
-
|
| 778 |
-
|
|
|
|
|
|
|
| 779 |
return search_web_fallback(query, max_results)
|
| 780 |
|
| 781 |
def search_web_fallback(query: str, max_results: int = 5) -> list:
|
|
@@ -893,7 +915,7 @@ Summary:"""
|
|
| 893 |
# Use concise system prompt
|
| 894 |
system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
|
| 895 |
|
| 896 |
-
result = await
|
| 897 |
user_prompt=user_prompt,
|
| 898 |
system_prompt=system_prompt,
|
| 899 |
model=GEMINI_MODEL, # Use full model for summarization
|
|
@@ -978,7 +1000,7 @@ Respond in JSON format:
|
|
| 978 |
# Use concise system prompt
|
| 979 |
system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
|
| 980 |
|
| 981 |
-
response = await
|
| 982 |
user_prompt=reasoning_prompt,
|
| 983 |
system_prompt=system_prompt,
|
| 984 |
model=GEMINI_MODEL, # Use full model for reasoning
|
|
@@ -1171,7 +1193,7 @@ Respond in JSON:
|
|
| 1171 |
# Use concise system prompt
|
| 1172 |
system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
|
| 1173 |
|
| 1174 |
-
response = await
|
| 1175 |
user_prompt=reflection_prompt,
|
| 1176 |
system_prompt=system_prompt,
|
| 1177 |
model=GEMINI_MODEL, # Use full model for reflection
|
|
@@ -1248,7 +1270,7 @@ async def parse_document_gemini(file_path: str, file_extension: str) -> str:
|
|
| 1248 |
system_prompt = "Extract all text content from the document accurately."
|
| 1249 |
user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
|
| 1250 |
|
| 1251 |
-
result = await
|
| 1252 |
user_prompt=user_prompt,
|
| 1253 |
system_prompt=system_prompt,
|
| 1254 |
files=files,
|
|
@@ -1537,15 +1559,16 @@ def stream_chat(
|
|
| 1537 |
web_sources = []
|
| 1538 |
web_urls = [] # Store URLs for citations
|
| 1539 |
if final_use_web_search:
|
| 1540 |
-
logger.info("🌐 Performing web search (MCP)...")
|
| 1541 |
web_results = search_web(message, max_results=5)
|
| 1542 |
if web_results:
|
|
|
|
| 1543 |
web_summary = summarize_web_content(web_results, message)
|
| 1544 |
-
web_context = f"\n\nAdditional Web Sources
|
| 1545 |
web_sources = [r['title'] for r in web_results[:3]]
|
| 1546 |
# Extract unique URLs for citations
|
| 1547 |
web_urls = [r.get('url', '') for r in web_results if r.get('url')]
|
| 1548 |
-
logger.info(f"Web search completed, found {len(web_results)} results")
|
| 1549 |
|
| 1550 |
# Build final context
|
| 1551 |
context_parts = []
|
|
|
|
| 200 |
global_mcp_stdio_ctx = None # Store stdio context to keep it alive
|
| 201 |
global_mcp_lock = threading.Lock() # Lock for thread-safe session access
|
| 202 |
# MCP server configuration via environment variables
|
| 203 |
+
# Gemini MCP server: Python-based server (agent.py)
|
| 204 |
# This works on Hugging Face Spaces without requiring npm/Node.js
|
| 205 |
# Make sure GEMINI_API_KEY is set in environment variables
|
| 206 |
#
|
| 207 |
+
# Default configuration uses the bundled agent.py script
|
| 208 |
# To override:
|
| 209 |
# export MCP_SERVER_COMMAND="python"
|
| 210 |
+
# export MCP_SERVER_ARGS="/path/to/agent.py"
|
| 211 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 212 |
+
agent_path = os.path.join(script_dir, "agent.py")
|
| 213 |
MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
|
| 214 |
+
MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", agent_path).split() if os.environ.get("MCP_SERVER_ARGS") else [agent_path]
|
| 215 |
|
| 216 |
async def get_mcp_session():
|
| 217 |
"""Get or create MCP client session with proper context management"""
|
|
|
|
| 277 |
session = ClientSession(read, write)
|
| 278 |
await session.__aenter__()
|
| 279 |
|
| 280 |
+
# Wait longer for the server to fully initialize
|
| 281 |
+
# The server needs time to start up and be ready
|
| 282 |
+
await asyncio.sleep(1.0)
|
| 283 |
|
| 284 |
+
# Verify the session works by listing tools with retries
|
| 285 |
+
max_init_retries = 5
|
| 286 |
+
for init_attempt in range(max_init_retries):
|
| 287 |
+
try:
|
| 288 |
+
tools = await session.list_tools()
|
| 289 |
+
if tools and hasattr(tools, 'tools'):
|
| 290 |
+
logger.info(f"MCP server initialized with {len(tools.tools)} tools: {[t.name for t in tools.tools]}")
|
| 291 |
+
break
|
| 292 |
+
except Exception as e:
|
| 293 |
+
if init_attempt < max_init_retries - 1:
|
| 294 |
+
logger.debug(f"Initialization attempt {init_attempt + 1}/{max_init_retries} failed, retrying...")
|
| 295 |
+
await asyncio.sleep(0.5 * (init_attempt + 1))
|
| 296 |
+
else:
|
| 297 |
+
logger.warning(f"Could not list tools after {max_init_retries} attempts: {e}")
|
| 298 |
+
# Continue anyway, might work on first actual call
|
| 299 |
|
| 300 |
# Store both the session and stdio context to keep them alive
|
| 301 |
global_mcp_session = session
|
|
|
|
| 310 |
global_mcp_stdio_ctx = None
|
| 311 |
return None
|
| 312 |
|
| 313 |
+
async def call_agent(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
|
| 314 |
"""Call Gemini MCP generate_content tool"""
|
| 315 |
if not MCP_AVAILABLE:
|
| 316 |
logger.warning("MCP not available for Gemini call")
|
|
|
|
| 437 |
system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
|
| 438 |
user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
|
| 439 |
|
| 440 |
+
result = await call_agent(
|
| 441 |
user_prompt=user_prompt,
|
| 442 |
system_prompt=system_prompt,
|
| 443 |
files=files,
|
|
|
|
| 660 |
# Use concise system prompt
|
| 661 |
system_prompt = "You are a professional translator. Translate accurately and concisely."
|
| 662 |
|
| 663 |
+
result = await call_agent(
|
| 664 |
user_prompt=user_prompt,
|
| 665 |
system_prompt=system_prompt,
|
| 666 |
model=GEMINI_MODEL_LITE, # Use lite model for translation
|
|
|
|
| 697 |
# Return original text if translation fails
|
| 698 |
return text
|
| 699 |
|
| 700 |
+
async def search_web_mcp_tool(query: str, max_results: int = 5) -> list:
|
| 701 |
+
"""Search web using MCP web search tool (e.g., DuckDuckGo MCP server)"""
|
| 702 |
if not MCP_AVAILABLE:
|
|
|
|
| 703 |
return []
|
| 704 |
|
| 705 |
try:
|
| 706 |
+
session = await get_mcp_session()
|
| 707 |
+
if session is None:
|
| 708 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
|
| 710 |
+
# Retry listing tools if it fails the first time
|
| 711 |
+
max_retries = 3
|
| 712 |
+
tools = None
|
| 713 |
+
for attempt in range(max_retries):
|
| 714 |
+
try:
|
| 715 |
+
tools = await session.list_tools()
|
| 716 |
+
break
|
| 717 |
+
except Exception as e:
|
| 718 |
+
if attempt < max_retries - 1:
|
| 719 |
+
await asyncio.sleep(0.5 * (attempt + 1))
|
| 720 |
+
else:
|
| 721 |
+
logger.error(f"Failed to list MCP tools after {max_retries} attempts: {e}")
|
| 722 |
+
return []
|
| 723 |
|
| 724 |
+
if not tools or not hasattr(tools, 'tools'):
|
|
|
|
| 725 |
return []
|
| 726 |
|
| 727 |
+
# Look for web search tools (DuckDuckGo, search, etc.)
|
| 728 |
+
search_tool = None
|
| 729 |
+
for tool in tools.tools:
|
| 730 |
+
tool_name_lower = tool.name.lower()
|
| 731 |
+
if any(keyword in tool_name_lower for keyword in ["search", "duckduckgo", "ddg", "web"]):
|
| 732 |
+
search_tool = tool
|
| 733 |
+
logger.info(f"Found web search MCP tool: {tool.name}")
|
| 734 |
+
break
|
| 735 |
+
|
| 736 |
+
if search_tool:
|
| 737 |
+
try:
|
| 738 |
+
# Call the search tool
|
| 739 |
+
result = await session.call_tool(
|
| 740 |
+
search_tool.name,
|
| 741 |
+
arguments={"query": query, "max_results": max_results}
|
| 742 |
+
)
|
| 743 |
+
|
| 744 |
+
# Parse result
|
| 745 |
+
web_content = []
|
| 746 |
+
if hasattr(result, 'content') and result.content:
|
| 747 |
+
for item in result.content:
|
| 748 |
+
if hasattr(item, 'text'):
|
| 749 |
+
try:
|
| 750 |
+
data = json.loads(item.text)
|
| 751 |
+
if isinstance(data, list):
|
| 752 |
+
for entry in data[:max_results]:
|
| 753 |
+
web_content.append({
|
| 754 |
+
'title': entry.get('title', ''),
|
| 755 |
+
'url': entry.get('url', entry.get('href', '')),
|
| 756 |
+
'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
|
| 757 |
+
})
|
| 758 |
+
elif isinstance(data, dict):
|
| 759 |
+
if 'results' in data:
|
| 760 |
+
for entry in data['results'][:max_results]:
|
| 761 |
+
web_content.append({
|
| 762 |
+
'title': entry.get('title', ''),
|
| 763 |
+
'url': entry.get('url', entry.get('href', '')),
|
| 764 |
+
'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
|
| 765 |
+
})
|
| 766 |
+
else:
|
| 767 |
+
web_content.append({
|
| 768 |
+
'title': data.get('title', ''),
|
| 769 |
+
'url': data.get('url', data.get('href', '')),
|
| 770 |
+
'content': data.get('body', data.get('snippet', data.get('content', '')))
|
| 771 |
+
})
|
| 772 |
+
except json.JSONDecodeError:
|
| 773 |
+
# If not JSON, treat as plain text
|
| 774 |
+
web_content.append({
|
| 775 |
+
'title': '',
|
| 776 |
+
'url': '',
|
| 777 |
+
'content': item.text[:1000]
|
| 778 |
+
})
|
| 779 |
+
|
| 780 |
+
if web_content:
|
| 781 |
+
logger.info(f"Web search MCP returned {len(web_content)} results")
|
| 782 |
return web_content
|
| 783 |
+
except Exception as e:
|
| 784 |
+
logger.error(f"Error calling web search MCP tool: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
|
| 786 |
return []
|
| 787 |
except Exception as e:
|
| 788 |
+
logger.error(f"Web search MCP tool error: {e}")
|
|
|
|
|
|
|
| 789 |
return []
|
| 790 |
|
| 791 |
async def search_web_mcp(query: str, max_results: int = 5) -> list:
|
| 792 |
+
"""Search web using MCP tools - tries web search MCP tool first, then falls back to direct search"""
|
| 793 |
+
# First try to use a dedicated web search MCP tool (like DuckDuckGo MCP server)
|
| 794 |
+
results = await search_web_mcp_tool(query, max_results)
|
| 795 |
if results:
|
| 796 |
return results
|
| 797 |
+
|
| 798 |
+
# If no web search MCP tool available, use direct search (ddgs)
|
| 799 |
+
# This is the correct approach - Gemini MCP cannot search the web
|
| 800 |
+
logger.info("No web search MCP tool found, using direct DuckDuckGo search")
|
| 801 |
return search_web_fallback(query, max_results)
|
| 802 |
|
| 803 |
def search_web_fallback(query: str, max_results: int = 5) -> list:
|
|
|
|
| 915 |
# Use concise system prompt
|
| 916 |
system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
|
| 917 |
|
| 918 |
+
result = await call_agent(
|
| 919 |
user_prompt=user_prompt,
|
| 920 |
system_prompt=system_prompt,
|
| 921 |
model=GEMINI_MODEL, # Use full model for summarization
|
|
|
|
| 1000 |
# Use concise system prompt
|
| 1001 |
system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
|
| 1002 |
|
| 1003 |
+
response = await call_agent(
|
| 1004 |
user_prompt=reasoning_prompt,
|
| 1005 |
system_prompt=system_prompt,
|
| 1006 |
model=GEMINI_MODEL, # Use full model for reasoning
|
|
|
|
| 1193 |
# Use concise system prompt
|
| 1194 |
system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
|
| 1195 |
|
| 1196 |
+
response = await call_agent(
|
| 1197 |
user_prompt=reflection_prompt,
|
| 1198 |
system_prompt=system_prompt,
|
| 1199 |
model=GEMINI_MODEL, # Use full model for reflection
|
|
|
|
| 1270 |
system_prompt = "Extract all text content from the document accurately."
|
| 1271 |
user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
|
| 1272 |
|
| 1273 |
+
result = await call_agent(
|
| 1274 |
user_prompt=user_prompt,
|
| 1275 |
system_prompt=system_prompt,
|
| 1276 |
files=files,
|
|
|
|
| 1559 |
web_sources = []
|
| 1560 |
web_urls = [] # Store URLs for citations
|
| 1561 |
if final_use_web_search:
|
| 1562 |
+
logger.info("🌐 Performing web search (MCP or direct ddgs)...")
|
| 1563 |
web_results = search_web(message, max_results=5)
|
| 1564 |
if web_results:
|
| 1565 |
+
logger.info(f"📊 Summarizing {len(web_results)} web search results using Gemini MCP...")
|
| 1566 |
web_summary = summarize_web_content(web_results, message)
|
| 1567 |
+
web_context = f"\n\nAdditional Web Sources:\n{web_summary}"
|
| 1568 |
web_sources = [r['title'] for r in web_results[:3]]
|
| 1569 |
# Extract unique URLs for citations
|
| 1570 |
web_urls = [r.get('url', '') for r in web_results if r.get('url')]
|
| 1571 |
+
logger.info(f"Web search completed, found {len(web_results)} results, summarized with Gemini MCP")
|
| 1572 |
|
| 1573 |
# Build final context
|
| 1574 |
context_parts = []
|