Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import logging | |
| import asyncio | |
| import nest_asyncio | |
| from datetime import datetime | |
| import uuid | |
| import aiohttp | |
| import gradio as gr | |
| import requests | |
| import xml.etree.ElementTree as ET | |
| import json | |
| from langfuse.llama_index import LlamaIndexInstrumentor | |
| from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec | |
| from llama_index.tools.weather import OpenWeatherMapToolSpec | |
| from llama_index.tools.playwright import PlaywrightToolSpec | |
| from llama_index.core.tools import FunctionTool | |
| from llama_index.core.agent.workflow import AgentWorkflow | |
| from llama_index.core.workflow import Context | |
| from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI | |
| from llama_index.core.memory import ChatMemoryBuffer | |
| from llama_index.readers.web import RssReader, SimpleWebPageReader | |
| from llama_index.core import SummaryIndex | |
| # Import the event types for streaming | |
| from llama_index.core.agent.workflow import AgentStream, ToolCall, ToolCallResult | |
| import subprocess | |
| subprocess.run(["playwright", "install"]) | |
| # allow nested loops in Spaces | |
| nest_asyncio.apply() | |
| # --- Llangfuse --- | |
| instrumentor = LlamaIndexInstrumentor( | |
| public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"), | |
| secret_key=os.environ.get("LANGFUSE_SECRET_KEY"), | |
| host=os.environ.get("LANGFUSE_HOST"), | |
| ) | |
| instrumentor.start() | |
| # --- Secrets via env vars --- | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| OPENWEATHERMAP_KEY = os.getenv("OPENWEATHERMAP_API_KEY") | |
| SERPER_API_KEY = os.getenv("SERPER_API_KEY") | |
| # --- LLMs --- | |
| llm = HuggingFaceInferenceAPI( | |
| model_name="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| token=HF_TOKEN, | |
| task="conversational", | |
| streaming=True | |
| ) | |
| memory = ChatMemoryBuffer.from_defaults(token_limit=8192) | |
| today_str = datetime.now().strftime("%B %d, %Y") | |
| ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex) | |
| # # OpenAI for pure function-calling | |
| # openai_llm = OpenAI( | |
| # model="gpt-4o", | |
| # api_key=OPENAI_API_KEY, | |
| # temperature=0.0, | |
| # streaming=False, | |
| # ) | |
| # --- Tools Setup --- | |
| # DuckDuckGo | |
| # duck_spec = DuckDuckGoSearchToolSpec() | |
| # search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search) | |
| # Weather | |
| openweather_api_key=OPENWEATHERMAP_KEY | |
| weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key) | |
| weather_tool = FunctionTool.from_defaults( | |
| weather_tool_spec.weather_at_location, | |
| name="current_weather", | |
| description="Get the current weather at a specific location (city, country)." | |
| ) | |
| forecast_tool = FunctionTool.from_defaults( | |
| weather_tool_spec.forecast_tommorrow_at_location, | |
| name="weather_forecast", | |
| description="Get tomorrow's weather forecast for a specific location (city, country)." | |
| ) | |
| # Playwright (synchronous start) | |
| # async def _start_browser(): | |
| # return await PlaywrightToolSpec.create_async_playwright_browser(headless=True) | |
| # browser = asyncio.get_event_loop().run_until_complete(_start_browser()) | |
| # playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser) | |
| # navigate_tool = FunctionTool.from_defaults( | |
| # playwright_tool_spec.navigate_to, | |
| # name="web_navigate", | |
| # description="Navigate to a specific URL." | |
| # ) | |
| # extract_text_tool = FunctionTool.from_defaults( | |
| # playwright_tool_spec.extract_text, | |
| # name="web_extract_text", | |
| # description="Extract all text from the current page." | |
| # ) | |
| # extract_links_tool = FunctionTool.from_defaults( | |
| # playwright_tool_spec.extract_hyperlinks, | |
| # name="web_extract_links", | |
| # description="Extract all hyperlinks from the current page." | |
| # ) | |
| # Google News RSS | |
| # def fetch_google_news_rss(): | |
| # docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"]) | |
| # return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs] | |
| # ----------------------------- | |
| # Google News RSS | |
| # ----------------------------- | |
| def fetch_news_headlines() -> str: | |
| """Fetches the latest news from Google News RSS feed. | |
| Returns: | |
| A string containing the latest news articles from Google News, or an error message if the request fails. | |
| """ | |
| url = "https://news.google.com/rss" | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| # Parse the XML content | |
| root = ET.fromstring(response.content) | |
| # Format the news articles into a readable string | |
| formatted_news = [] | |
| for i, item in enumerate(root.findall('.//item')): | |
| if i >= 5: | |
| break | |
| title = item.find('title').text if item.find('title') is not None else 'N/A' | |
| link = item.find('link').text if item.find('link') is not None else 'N/A' | |
| pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A' | |
| description = item.find('description').text if item.find('description') is not None else 'N/A' | |
| formatted_news.append(f"Title: {title}") | |
| formatted_news.append(f"Published: {pub_date}") | |
| formatted_news.append(f"Link: {link}") | |
| formatted_news.append(f"Description: {description}") | |
| formatted_news.append("---") | |
| return "\n".join(formatted_news) if formatted_news else "No news articles found." | |
| except requests.exceptions.RequestException as e: | |
| return f"Error fetching news: {str(e)}" | |
| except Exception as e: | |
| return f"An unexpected error occurred: {str(e)}" | |
| google_rss_tool = FunctionTool.from_defaults( | |
| fn=fetch_news_headlines, | |
| name="fetch_google_news_rss", | |
| description="Fetch latest headlines." | |
| ) | |
| # ----------------------------- | |
| # SERPER API | |
| # ----------------------------- | |
| def fetch_news_topics(query: str) -> str: | |
| """Fetches news articles about a specific topic using the Serper API. | |
| Args: | |
| query: The topic to search for news about. | |
| Returns: | |
| A string containing the news articles found, or an error message if the request fails. | |
| """ | |
| url = "https://google.serper.dev/news" | |
| payload = json.dumps({ | |
| "q": query | |
| }) | |
| headers = { | |
| 'X-API-KEY': os.getenv('SERPER_API_KEY'), | |
| 'Content-Type': 'application/json' | |
| } | |
| try: | |
| response = requests.post(url, headers=headers, data=payload) | |
| response.raise_for_status() | |
| news_data = response.json() | |
| # Format the news articles into a readable string | |
| formatted_news = [] | |
| for i, article in enumerate(news_data.get('news', [])): | |
| if i >= 5: | |
| break | |
| formatted_news.append(f"Title: {article.get('title', 'N/A')}") | |
| formatted_news.append(f"Source: {article.get('source', 'N/A')}") | |
| formatted_news.append(f"Link: {article.get('link', 'N/A')}") | |
| formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}") | |
| formatted_news.append("---") | |
| return "\n".join(formatted_news) if formatted_news else "No news articles found." | |
| except requests.exceptions.RequestException as e: | |
| return f"Error fetching news: {str(e)}" | |
| except Exception as e: | |
| return f"An unexpected error occurred: {str(e)}" | |
| serper_news_tool = FunctionTool.from_defaults( | |
| fetch_news_topics, | |
| name="fetch_news_from_serper", | |
| description="Fetch news articles on a specific topic." | |
| ) | |
| # ----------------------------- | |
| # WEB PAGE READER | |
| # ----------------------------- | |
| def summarize_webpage(url: str) -> str: | |
| """Fetches and summarizes the content of a web page.""" | |
| try: | |
| # NOTE: the html_to_text=True option requires html2text to be installed | |
| documents = SimpleWebPageReader(html_to_text=True).load_data([url]) | |
| if not documents: | |
| return "No content could be loaded from the provided URL." | |
| index = SummaryIndex.from_documents(documents) | |
| query_engine = index.as_query_engine() | |
| response = query_engine.query("Summarize the main points of this page.") | |
| return str(response) | |
| except Exception as e: | |
| return f"An error occurred while summarizing the web page: {str(e)}" | |
| webpage_reader_tool = FunctionTool.from_defaults( | |
| summarize_webpage, | |
| name="summarize_webpage", | |
| description="Read and summarize the main points of a web page given its URL." | |
| ) | |
| # Create the agent workflow | |
| tools = [ | |
| #search_tool, | |
| #navigate_tool, | |
| #extract_text_tool, | |
| #extract_links_tool, | |
| weather_tool, | |
| forecast_tool, | |
| google_rss_tool, | |
| serper_news_tool, | |
| webpage_reader_tool, | |
| ] | |
| web_agent = AgentWorkflow.from_tools_or_functions( | |
| tools, | |
| llm=llm, | |
| system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news. | |
| AVAILABLE TOOLS: | |
| 1. current_weather - Get current weather conditions for a location | |
| 2. weather_forecast - Get tomorrow's weather forecast for a location | |
| 3. fetch_google_news_rss - Fetch the latest general news headlines | |
| 4. fetch_news_from_serper - Fetch news articles on a specific topic | |
| 5. summarize_webpage - Read and summarize the content of a web page | |
| WHEN AND HOW TO USE EACH TOOL: | |
| For weather information: | |
| - Use current_weather when asked about present conditions | |
| EXAMPLE: User asks "What's the weather in Tokyo?" | |
| TOOL: current_weather | |
| PARAMETERS: {"location": "Tokyo, JP"} | |
| - Use weather_forecast when asked about future weather | |
| EXAMPLE: User asks "What will the weather be like in Paris tomorrow?" | |
| TOOL: weather_forecast | |
| PARAMETERS: {"location": "Paris, FR"} | |
| For news retrieval: | |
| - Use fetch_google_news_rss for general headlines (requires NO parameters) | |
| EXAMPLE: User asks "What's happening in the news today?" | |
| TOOL: fetch_google_news_rss | |
| PARAMETERS: {} | |
| - Use fetch_news_from_serper for specific news topics | |
| EXAMPLE: User asks "Any news about AI advancements?" | |
| TOOL: fetch_news_from_serper | |
| PARAMETERS: {"query": "artificial intelligence advancements"} | |
| For web content: | |
| - Use summarize_webpage to extract information from websites | |
| EXAMPLE: User asks "Can you summarize the content on hf.co/learn?" | |
| TOOL: summarize_webpage | |
| PARAMETERS: {"url": "https://hf.co/learn"} | |
| IMPORTANT GUIDELINES: | |
| - Always verify the format of parameters before submitting | |
| - For locations, use the format "City, Country Code" (e.g., "Montreal, CA") | |
| - For URLs, include the full address with http:// or https:// | |
| - When multiple tools are needed to answer a complex question, use them in sequence | |
| - If possible, provide clickable links for your sources in your final answer. | |
| When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information. | |
| """ | |
| ) | |
| ctx = Context(web_agent) | |
| # Async helper to run agent queries (kept for compatibility) | |
| def run_query_sync(query: str): | |
| """Helper to run async agent.run in sync context.""" | |
| return asyncio.get_event_loop().run_until_complete( | |
| web_agent.run(query, ctx=ctx) | |
| ) | |
| # Updated run_query function to use stream_events | |
| async def run_query(query: str): | |
| trace_id = f"agent-run-{uuid.uuid4().hex}" | |
| try: | |
| with instrumentor.observe( | |
| trace_id=trace_id, | |
| session_id="web-agent-session", | |
| user_id=ANON_USER_ID, | |
| ): | |
| # Start the handler | |
| handler = web_agent.run(query, ctx=ctx) | |
| # Keep track of what we're showing to avoid duplicates | |
| tool_calls_shown = set() | |
| # Stream content | |
| async for event in handler.stream_events(): | |
| if isinstance(event, AgentStream): | |
| # Filter out any lines starting with "Thought:" or "Action:" | |
| if hasattr(event, 'delta') and event.delta: | |
| delta = event.delta | |
| # Filter out thought processes and internal reasoning | |
| if not (delta.strip().startswith("Thought:") or | |
| delta.strip().startswith("Action:") or | |
| delta.strip().startswith("Answer:")): | |
| yield delta | |
| elif isinstance(event, ToolCall): | |
| tool_name = getattr(event, 'name', getattr(event, 'function_name', getattr(event, 'tool_name', "unknown tool"))) | |
| # Only show tool call message once per tool+call combo | |
| tool_call_id = f"{tool_name}_{hash(str(getattr(event, 'args', '')))}" | |
| if tool_call_id not in tool_calls_shown: | |
| tool_calls_shown.add(tool_call_id) | |
| yield f"\n\n🔧 Using tool: {tool_name}...\n" | |
| elif isinstance(event, ToolCallResult): | |
| # We don't need to show the raw tool result to the user | |
| # The agent will incorporate the results in its response | |
| pass | |
| except Exception as e: | |
| yield f"\n\n❌ Error: {str(e)}\n" | |
| import traceback | |
| yield f"Traceback: {traceback.format_exc()}" | |
| finally: | |
| instrumentor.flush() | |
| # Updated gradio_query function | |
| async def gradio_query(user_input, chat_history=None): | |
| history = chat_history or [] | |
| history.append({"role": "user", "content": user_input}) | |
| # Add initial assistant message | |
| history.append({"role": "assistant", "content": "Processing..."}) | |
| yield history, history | |
| # Get streaming response | |
| full_response = "" | |
| async for chunk in run_query(user_input): | |
| if chunk: | |
| full_response += chunk | |
| history[-1]["content"] = full_response | |
| yield history, history | |
| # Build and launch Gradio app | |
| grb = gr.Blocks() | |
| with grb: | |
| gr.Markdown("## Perspicacity") | |
| gr.Markdown( | |
| """ | |
| This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n | |
| 🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n | |
| 🙌 Got ideas or improvements? PRs welcome!\n\n | |
| 👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?' | |
| """ | |
| ) | |
| chatbot = gr.Chatbot(type="messages") | |
| txt = gr.Textbox(placeholder="Ask me anything...", show_label=False) | |
| # Set up event handlers for streaming | |
| txt.submit( | |
| gradio_query, | |
| inputs=[txt, chatbot], | |
| outputs=[chatbot, chatbot] | |
| ).then( | |
| lambda: gr.update(value=""), # Clear the textbox after submission | |
| None, | |
| [txt] | |
| ) | |
| # Also update the button click handler | |
| send_btn = gr.Button("Send") | |
| send_btn.click( | |
| gradio_query, | |
| [txt, chatbot], | |
| [chatbot, chatbot] | |
| ).then( | |
| lambda: gr.update(value=""), # Clear the textbox after submission | |
| None, | |
| [txt] | |
| ) | |
| if __name__ == "__main__": | |
| grb.launch() |