LiamKhoaLe commited on
Commit
0e45c9f
·
1 Parent(s): c816ffa

Upd agent mcp

Browse files
Files changed (2) hide show
  1. gemini_mcp.py → agent.py +25 -7
  2. app.py +118 -95
gemini_mcp.py → agent.py RENAMED
@@ -176,20 +176,38 @@ async def call_tool(name: str, arguments: dict) -> Sequence[TextContent | ImageC
176
  if system_prompt:
177
  contents = f"{system_prompt}\n\n{user_prompt}"
178
 
179
- # Note: The simple API doesn't support files or temperature directly
180
- # For files, we would need to use a different approach or encode them
181
- # For now, we'll handle text-only requests
 
 
 
 
 
182
  if files:
183
- logger.warning("File support not available in simple API mode. Processing text only.")
184
- # Could potentially encode files as base64 in the prompt, but keeping it simple for now
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # Generate content using the simple API
187
  try:
188
  # Use asyncio.to_thread to make the blocking call async
 
189
  response = await asyncio.to_thread(
190
  gemini_client.models.generate_content,
191
  model=model,
192
- contents=contents
193
  )
194
 
195
  # Extract text from response
 
176
  if system_prompt:
177
  contents = f"{system_prompt}\n\n{user_prompt}"
178
 
179
+ # Prepare content for Gemini API
180
+ # The google-genai API expects contents as a list of parts
181
+ gemini_contents = []
182
+
183
+ # Add text content as first part
184
+ gemini_contents.append(contents)
185
+
186
+ # Add file content if provided
187
  if files:
188
+ try:
189
+ file_parts = prepare_gemini_files(files)
190
+ # Convert file parts to the format expected by Gemini API
191
+ for file_part in file_parts:
192
+ # The API expects parts with inline_data for binary content
193
+ gemini_contents.append({
194
+ "inline_data": {
195
+ "mime_type": file_part["mime_type"],
196
+ "data": base64.b64encode(file_part["data"]).decode('utf-8')
197
+ }
198
+ })
199
+ logger.info(f"Added {len(file_parts)} file(s) to Gemini request")
200
+ except Exception as e:
201
+ logger.warning(f"Error preparing files: {e}, continuing with text only")
202
 
203
+ # Generate content using Gemini API
204
  try:
205
  # Use asyncio.to_thread to make the blocking call async
206
+ # The API accepts contents as a list
207
  response = await asyncio.to_thread(
208
  gemini_client.models.generate_content,
209
  model=model,
210
+ contents=gemini_contents
211
  )
212
 
213
  # Extract text from response
app.py CHANGED
@@ -200,18 +200,18 @@ global_mcp_session = None
200
  global_mcp_stdio_ctx = None # Store stdio context to keep it alive
201
  global_mcp_lock = threading.Lock() # Lock for thread-safe session access
202
  # MCP server configuration via environment variables
203
- # Gemini MCP server: Python-based server (gemini_mcp.py)
204
  # This works on Hugging Face Spaces without requiring npm/Node.js
205
  # Make sure GEMINI_API_KEY is set in environment variables
206
  #
207
- # Default configuration uses the bundled gemini_mcp.py script
208
  # To override:
209
  # export MCP_SERVER_COMMAND="python"
210
- # export MCP_SERVER_ARGS="/path/to/gemini_mcp.py"
211
  script_dir = os.path.dirname(os.path.abspath(__file__))
212
- gemini_mcp_path = os.path.join(script_dir, "gemini_mcp.py")
213
  MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
214
- MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", gemini_mcp_path).split() if os.environ.get("MCP_SERVER_ARGS") else [gemini_mcp_path]
215
 
216
  async def get_mcp_session():
217
  """Get or create MCP client session with proper context management"""
@@ -277,16 +277,25 @@ async def get_mcp_session():
277
  session = ClientSession(read, write)
278
  await session.__aenter__()
279
 
280
- # Wait a bit for the server to fully initialize
281
- await asyncio.sleep(0.5)
 
282
 
283
- # Verify the session works by listing tools
284
- try:
285
- tools = await session.list_tools()
286
- logger.info(f"MCP server initialized with {len(tools.tools)} tools")
287
- except Exception as e:
288
- logger.warning(f"Could not list tools immediately after session creation: {e}")
289
- # Continue anyway, might work on first actual call
 
 
 
 
 
 
 
 
290
 
291
  # Store both the session and stdio context to keep them alive
292
  global_mcp_session = session
@@ -301,7 +310,7 @@ async def get_mcp_session():
301
  global_mcp_stdio_ctx = None
302
  return None
303
 
304
- async def call_gemini_mcp(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
305
  """Call Gemini MCP generate_content tool"""
306
  if not MCP_AVAILABLE:
307
  logger.warning("MCP not available for Gemini call")
@@ -428,7 +437,7 @@ async def transcribe_audio_gemini(audio_path: str) -> str:
428
  system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
429
  user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
430
 
431
- result = await call_gemini_mcp(
432
  user_prompt=user_prompt,
433
  system_prompt=system_prompt,
434
  files=files,
@@ -651,7 +660,7 @@ async def translate_text_gemini(text: str, target_lang: str = "en", source_lang:
651
  # Use concise system prompt
652
  system_prompt = "You are a professional translator. Translate accurately and concisely."
653
 
654
- result = await call_gemini_mcp(
655
  user_prompt=user_prompt,
656
  system_prompt=system_prompt,
657
  model=GEMINI_MODEL_LITE, # Use lite model for translation
@@ -688,94 +697,107 @@ def translate_text(text: str, target_lang: str = "en", source_lang: str = None)
688
  # Return original text if translation fails
689
  return text
690
 
691
- async def search_web_gemini(query: str, max_results: int = 5) -> list:
692
- """Search web using Gemini MCP generate_content tool"""
693
  if not MCP_AVAILABLE:
694
- logger.warning("Gemini MCP not available for web search")
695
  return []
696
 
697
  try:
698
- # Use Gemini MCP to search the web and get structured results
699
- user_prompt = f"""Search the web for: "{query}"
700
-
701
- Return the search results in JSON format with the following structure:
702
- {{
703
- "results": [
704
- {{
705
- "title": "Result title",
706
- "url": "Result URL",
707
- "content": "Brief summary or snippet of the content"
708
- }}
709
- ]
710
- }}
711
-
712
- Return up to {max_results} most relevant results. Focus on medical/health information if applicable."""
713
-
714
- # Use concise system prompt
715
- system_prompt = "You are a web search assistant. Search the web and return structured JSON results with titles, URLs, and content summaries."
716
 
717
- result = await call_gemini_mcp(
718
- user_prompt=user_prompt,
719
- system_prompt=system_prompt,
720
- model=GEMINI_MODEL, # Use full model for web search
721
- temperature=0.3
722
- )
 
 
 
 
 
 
 
723
 
724
- if not result:
725
- logger.warning("Gemini MCP returned empty search results")
726
  return []
727
 
728
- # Parse JSON response
729
- try:
730
- # Extract JSON from response
731
- json_start = result.find('{')
732
- json_end = result.rfind('}') + 1
733
- if json_start >= 0 and json_end > json_start:
734
- data = json.loads(result[json_start:json_end])
735
- if isinstance(data, dict) and "results" in data:
736
- web_content = []
737
- for entry in data["results"][:max_results]:
738
- web_content.append({
739
- 'title': entry.get('title', ''),
740
- 'url': entry.get('url', ''),
741
- 'content': entry.get('content', '')
742
- })
743
- logger.info(f"Gemini MCP search returned {len(web_content)} results")
744
- return web_content
745
- elif isinstance(data, list):
746
- # Handle case where results are directly in a list
747
- web_content = []
748
- for entry in data[:max_results]:
749
- web_content.append({
750
- 'title': entry.get('title', ''),
751
- 'url': entry.get('url', entry.get('href', '')),
752
- 'content': entry.get('content', entry.get('body', entry.get('snippet', '')))
753
- })
754
- logger.info(f"Gemini MCP search returned {len(web_content)} results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  return web_content
756
- except json.JSONDecodeError as e:
757
- logger.warning(f"Failed to parse Gemini search results as JSON: {e}")
758
- # Fallback: treat as plain text result
759
- return [{
760
- 'title': 'Web Search Result',
761
- 'url': '',
762
- 'content': result[:1000] # Limit content length
763
- }]
764
 
765
  return []
766
  except Exception as e:
767
- logger.error(f"Gemini MCP web search error: {e}")
768
- import traceback
769
- logger.debug(traceback.format_exc())
770
  return []
771
 
772
  async def search_web_mcp(query: str, max_results: int = 5) -> list:
773
- """Search web using Gemini MCP (wrapper for compatibility)"""
774
- results = await search_web_gemini(query, max_results)
 
775
  if results:
776
  return results
777
- # Fallback to direct search only if Gemini MCP fails
778
- logger.warning("Gemini MCP web search failed, falling back to direct search")
 
 
779
  return search_web_fallback(query, max_results)
780
 
781
  def search_web_fallback(query: str, max_results: int = 5) -> list:
@@ -893,7 +915,7 @@ Summary:"""
893
  # Use concise system prompt
894
  system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
895
 
896
- result = await call_gemini_mcp(
897
  user_prompt=user_prompt,
898
  system_prompt=system_prompt,
899
  model=GEMINI_MODEL, # Use full model for summarization
@@ -978,7 +1000,7 @@ Respond in JSON format:
978
  # Use concise system prompt
979
  system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
980
 
981
- response = await call_gemini_mcp(
982
  user_prompt=reasoning_prompt,
983
  system_prompt=system_prompt,
984
  model=GEMINI_MODEL, # Use full model for reasoning
@@ -1171,7 +1193,7 @@ Respond in JSON:
1171
  # Use concise system prompt
1172
  system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
1173
 
1174
- response = await call_gemini_mcp(
1175
  user_prompt=reflection_prompt,
1176
  system_prompt=system_prompt,
1177
  model=GEMINI_MODEL, # Use full model for reflection
@@ -1248,7 +1270,7 @@ async def parse_document_gemini(file_path: str, file_extension: str) -> str:
1248
  system_prompt = "Extract all text content from the document accurately."
1249
  user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
1250
 
1251
- result = await call_gemini_mcp(
1252
  user_prompt=user_prompt,
1253
  system_prompt=system_prompt,
1254
  files=files,
@@ -1537,15 +1559,16 @@ def stream_chat(
1537
  web_sources = []
1538
  web_urls = [] # Store URLs for citations
1539
  if final_use_web_search:
1540
- logger.info("🌐 Performing web search (MCP)...")
1541
  web_results = search_web(message, max_results=5)
1542
  if web_results:
 
1543
  web_summary = summarize_web_content(web_results, message)
1544
- web_context = f"\n\nAdditional Web Sources (MCP):\n{web_summary}"
1545
  web_sources = [r['title'] for r in web_results[:3]]
1546
  # Extract unique URLs for citations
1547
  web_urls = [r.get('url', '') for r in web_results if r.get('url')]
1548
- logger.info(f"Web search completed, found {len(web_results)} results")
1549
 
1550
  # Build final context
1551
  context_parts = []
 
200
  global_mcp_stdio_ctx = None # Store stdio context to keep it alive
201
  global_mcp_lock = threading.Lock() # Lock for thread-safe session access
202
  # MCP server configuration via environment variables
203
+ # Gemini MCP server: Python-based server (agent.py)
204
  # This works on Hugging Face Spaces without requiring npm/Node.js
205
  # Make sure GEMINI_API_KEY is set in environment variables
206
  #
207
+ # Default configuration uses the bundled agent.py script
208
  # To override:
209
  # export MCP_SERVER_COMMAND="python"
210
+ # export MCP_SERVER_ARGS="/path/to/agent.py"
211
  script_dir = os.path.dirname(os.path.abspath(__file__))
212
+ agent_path = os.path.join(script_dir, "agent.py")
213
  MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
214
+ MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", agent_path).split() if os.environ.get("MCP_SERVER_ARGS") else [agent_path]
215
 
216
  async def get_mcp_session():
217
  """Get or create MCP client session with proper context management"""
 
277
  session = ClientSession(read, write)
278
  await session.__aenter__()
279
 
280
+ # Wait longer for the server to fully initialize
281
+ # The server needs time to start up and be ready
282
+ await asyncio.sleep(1.0)
283
 
284
+ # Verify the session works by listing tools with retries
285
+ max_init_retries = 5
286
+ for init_attempt in range(max_init_retries):
287
+ try:
288
+ tools = await session.list_tools()
289
+ if tools and hasattr(tools, 'tools'):
290
+ logger.info(f"MCP server initialized with {len(tools.tools)} tools: {[t.name for t in tools.tools]}")
291
+ break
292
+ except Exception as e:
293
+ if init_attempt < max_init_retries - 1:
294
+ logger.debug(f"Initialization attempt {init_attempt + 1}/{max_init_retries} failed, retrying...")
295
+ await asyncio.sleep(0.5 * (init_attempt + 1))
296
+ else:
297
+ logger.warning(f"Could not list tools after {max_init_retries} attempts: {e}")
298
+ # Continue anyway, might work on first actual call
299
 
300
  # Store both the session and stdio context to keep them alive
301
  global_mcp_session = session
 
310
  global_mcp_stdio_ctx = None
311
  return None
312
 
313
+ async def call_agent(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
314
  """Call Gemini MCP generate_content tool"""
315
  if not MCP_AVAILABLE:
316
  logger.warning("MCP not available for Gemini call")
 
437
  system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
438
  user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
439
 
440
+ result = await call_agent(
441
  user_prompt=user_prompt,
442
  system_prompt=system_prompt,
443
  files=files,
 
660
  # Use concise system prompt
661
  system_prompt = "You are a professional translator. Translate accurately and concisely."
662
 
663
+ result = await call_agent(
664
  user_prompt=user_prompt,
665
  system_prompt=system_prompt,
666
  model=GEMINI_MODEL_LITE, # Use lite model for translation
 
697
  # Return original text if translation fails
698
  return text
699
 
700
+ async def search_web_mcp_tool(query: str, max_results: int = 5) -> list:
701
+ """Search web using MCP web search tool (e.g., DuckDuckGo MCP server)"""
702
  if not MCP_AVAILABLE:
 
703
  return []
704
 
705
  try:
706
+ session = await get_mcp_session()
707
+ if session is None:
708
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
 
710
+ # Retry listing tools if it fails the first time
711
+ max_retries = 3
712
+ tools = None
713
+ for attempt in range(max_retries):
714
+ try:
715
+ tools = await session.list_tools()
716
+ break
717
+ except Exception as e:
718
+ if attempt < max_retries - 1:
719
+ await asyncio.sleep(0.5 * (attempt + 1))
720
+ else:
721
+ logger.error(f"Failed to list MCP tools after {max_retries} attempts: {e}")
722
+ return []
723
 
724
+ if not tools or not hasattr(tools, 'tools'):
 
725
  return []
726
 
727
+ # Look for web search tools (DuckDuckGo, search, etc.)
728
+ search_tool = None
729
+ for tool in tools.tools:
730
+ tool_name_lower = tool.name.lower()
731
+ if any(keyword in tool_name_lower for keyword in ["search", "duckduckgo", "ddg", "web"]):
732
+ search_tool = tool
733
+ logger.info(f"Found web search MCP tool: {tool.name}")
734
+ break
735
+
736
+ if search_tool:
737
+ try:
738
+ # Call the search tool
739
+ result = await session.call_tool(
740
+ search_tool.name,
741
+ arguments={"query": query, "max_results": max_results}
742
+ )
743
+
744
+ # Parse result
745
+ web_content = []
746
+ if hasattr(result, 'content') and result.content:
747
+ for item in result.content:
748
+ if hasattr(item, 'text'):
749
+ try:
750
+ data = json.loads(item.text)
751
+ if isinstance(data, list):
752
+ for entry in data[:max_results]:
753
+ web_content.append({
754
+ 'title': entry.get('title', ''),
755
+ 'url': entry.get('url', entry.get('href', '')),
756
+ 'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
757
+ })
758
+ elif isinstance(data, dict):
759
+ if 'results' in data:
760
+ for entry in data['results'][:max_results]:
761
+ web_content.append({
762
+ 'title': entry.get('title', ''),
763
+ 'url': entry.get('url', entry.get('href', '')),
764
+ 'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
765
+ })
766
+ else:
767
+ web_content.append({
768
+ 'title': data.get('title', ''),
769
+ 'url': data.get('url', data.get('href', '')),
770
+ 'content': data.get('body', data.get('snippet', data.get('content', '')))
771
+ })
772
+ except json.JSONDecodeError:
773
+ # If not JSON, treat as plain text
774
+ web_content.append({
775
+ 'title': '',
776
+ 'url': '',
777
+ 'content': item.text[:1000]
778
+ })
779
+
780
+ if web_content:
781
+ logger.info(f"Web search MCP returned {len(web_content)} results")
782
  return web_content
783
+ except Exception as e:
784
+ logger.error(f"Error calling web search MCP tool: {e}")
 
 
 
 
 
 
785
 
786
  return []
787
  except Exception as e:
788
+ logger.error(f"Web search MCP tool error: {e}")
 
 
789
  return []
790
 
791
  async def search_web_mcp(query: str, max_results: int = 5) -> list:
792
+ """Search web using MCP tools - tries web search MCP tool first, then falls back to direct search"""
793
+ # First try to use a dedicated web search MCP tool (like DuckDuckGo MCP server)
794
+ results = await search_web_mcp_tool(query, max_results)
795
  if results:
796
  return results
797
+
798
+ # If no web search MCP tool available, use direct search (ddgs)
799
+ # This is the correct approach - Gemini MCP cannot search the web
800
+ logger.info("No web search MCP tool found, using direct DuckDuckGo search")
801
  return search_web_fallback(query, max_results)
802
 
803
  def search_web_fallback(query: str, max_results: int = 5) -> list:
 
915
  # Use concise system prompt
916
  system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
917
 
918
+ result = await call_agent(
919
  user_prompt=user_prompt,
920
  system_prompt=system_prompt,
921
  model=GEMINI_MODEL, # Use full model for summarization
 
1000
  # Use concise system prompt
1001
  system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
1002
 
1003
+ response = await call_agent(
1004
  user_prompt=reasoning_prompt,
1005
  system_prompt=system_prompt,
1006
  model=GEMINI_MODEL, # Use full model for reasoning
 
1193
  # Use concise system prompt
1194
  system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
1195
 
1196
+ response = await call_agent(
1197
  user_prompt=reflection_prompt,
1198
  system_prompt=system_prompt,
1199
  model=GEMINI_MODEL, # Use full model for reflection
 
1270
  system_prompt = "Extract all text content from the document accurately."
1271
  user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
1272
 
1273
+ result = await call_agent(
1274
  user_prompt=user_prompt,
1275
  system_prompt=system_prompt,
1276
  files=files,
 
1559
  web_sources = []
1560
  web_urls = [] # Store URLs for citations
1561
  if final_use_web_search:
1562
+ logger.info("🌐 Performing web search (MCP or direct ddgs)...")
1563
  web_results = search_web(message, max_results=5)
1564
  if web_results:
1565
+ logger.info(f"📊 Summarizing {len(web_results)} web search results using Gemini MCP...")
1566
  web_summary = summarize_web_content(web_results, message)
1567
+ web_context = f"\n\nAdditional Web Sources:\n{web_summary}"
1568
  web_sources = [r['title'] for r in web_results[:3]]
1569
  # Extract unique URLs for citations
1570
  web_urls = [r.get('url', '') for r in web_results if r.get('url')]
1571
+ logger.info(f"Web search completed, found {len(web_results)} results, summarized with Gemini MCP")
1572
 
1573
  # Build final context
1574
  context_parts = []