Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

App Files Files Community

VanguardAI commited on Aug 14, 2024

Commit

038f995

verified ·

1 Parent(s): 03ff38e

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -21

app.py CHANGED Viewed

@@ -11,8 +11,9 @@ import soundfile as sf
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains import RetrievalQA, LLMChain
-from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
 from PIL import Image
 from decord import VideoReader, cpu
 from tavily import TavilyClient
@@ -56,7 +57,6 @@ def play_voice_output(response):
 # NumPy Code Calculator Tool
 def numpy_code_calculator(query):
-    """Generates and executes NumPy code for mathematical operations."""
     try:
         llm_response = client.chat.completions.create(
             model=MODEL,
@@ -77,20 +77,17 @@ def numpy_code_calculator(query):
 # Web Search Tool
 def web_search(query):
-    """Performs a web search using Tavily."""
     answer = tavily_client.qna_search(query=query)
     return answer
 # Image Generation Tool
 def image_generation(query):
-    """Generates an image based on the given prompt."""
     image = image_pipe(prompt=query, num_inference_steps=20, guidance_scale=7.5).images[0]
     image.save("output.jpg")
     return "output.jpg"
 # Document Question Answering Tool
 def doc_question_answering(query, file_path):
-    """Answers questions based on the content of a document."""
     with open(file_path, 'r') as f:
         file_content = f.read()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
@@ -102,9 +99,7 @@ def doc_question_answering(query, file_path):
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
-    # Voice input handling
     if audio:
-        # Make sure 'audio' is a file object
         if isinstance(audio, str):
             audio = open(audio, "rb")
         transcription = client.audio.transcriptions.create(
@@ -113,7 +108,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
         )
         user_prompt = transcription.text
-    # Initialize tools
     tools = [
         Tool(
             name="Numpy Code Calculator",
@@ -132,7 +126,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
         ),
     ]
-    # Add document Q&A tool if a document is provided
     if doc:
         tools.append(
             Tool(
@@ -142,7 +135,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
             )
         )
-    # Function for the agent's LLM
     def llm_function(query):
         response = client.chat.completions.create(
             model=MODEL,
@@ -150,22 +142,15 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
         )
         return response.choices[0].message.content
-    # Initialize agent with an empty string prompt for LLMChain
-    agent = ZeroShotAgent(llm_chain=LLMChain(llm=llm_function, prompt=""), tools=tools, verbose=True)
     agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
-    # Initialize agent
-    agent = ZeroShotAgent(llm_chain=LLMChain(llm=llm_function, prompt=None), tools=tools, verbose=True)
-    agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
-    # If user uploaded an image and text, use MiniCPM model
     if image:
         image = Image.open(image).convert('RGB')
         messages = [{"role": "user", "content": [image, user_prompt]}]
         response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
         return response
-    # Use the agent to determine the best tool and get the response
     if websearch:
         response = agent_executor.run(f"{user_prompt} Use the Web Search tool if necessary.")
     else:
@@ -198,7 +183,6 @@ def create_ui():
             outputs=[output_label, audio_output]
         )
-        # Voice-only mode UI
         voice_only_mode.change(
             lambda x: gr.update(visible=not x),
             inputs=voice_only_mode,
@@ -230,4 +214,4 @@ def main_interface(user_prompt, image=None, audio=None, doc=None, voice_only=Fal
 # Launch the app
 demo = create_ui()
-demo.launch(inline=False)

 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain.agents import AgentExecutor, Tool
+from langchain.schema import RunnableSequence
 from PIL import Image
 from decord import VideoReader, cpu
 from tavily import TavilyClient
 # NumPy Code Calculator Tool
 def numpy_code_calculator(query):
     try:
         llm_response = client.chat.completions.create(
             model=MODEL,
 # Web Search Tool
 def web_search(query):
     answer = tavily_client.qna_search(query=query)
     return answer
 # Image Generation Tool
 def image_generation(query):
     image = image_pipe(prompt=query, num_inference_steps=20, guidance_scale=7.5).images[0]
     image.save("output.jpg")
     return "output.jpg"
 # Document Question Answering Tool
 def doc_question_answering(query, file_path):
     with open(file_path, 'r') as f:
         file_content = f.read()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
     if audio:
         if isinstance(audio, str):
             audio = open(audio, "rb")
         transcription = client.audio.transcriptions.create(
         )
         user_prompt = transcription.text
     tools = [
         Tool(
             name="Numpy Code Calculator",
         ),
     ]
     if doc:
         tools.append(
             Tool(
             )
         )
     def llm_function(query):
         response = client.chat.completions.create(
             model=MODEL,
         )
         return response.choices[0].message.content
+    agent = ZeroShotAgent(llm_chain=RunnableSequence(prompt="", llm=llm_function), tools=tools, verbose=True)
     agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
     if image:
         image = Image.open(image).convert('RGB')
         messages = [{"role": "user", "content": [image, user_prompt]}]
         response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
         return response
     if websearch:
         response = agent_executor.run(f"{user_prompt} Use the Web Search tool if necessary.")
     else:
             outputs=[output_label, audio_output]
         )
         voice_only_mode.change(
             lambda x: gr.update(visible=not x),
             inputs=voice_only_mode,
 # Launch the app
 demo = create_ui()
+demo.launch(inline=False)