Spaces:

Yiqin
/

ChatVID

Paused

App Files Files Community

Yiqin commited on Jun 13, 2023

Commit

f99efcc

1 Parent(s): 49b3986

split different users' data

Browse files

Files changed (6) hide show

.gitignore +1 -0
app.py +53 -47
examples/cook_720p.mp4 +3 -0
examples/references.txt +7 -0
examples/temple_of_heaven_720p.mp4 +3 -0
model/Vicuna.py +16 -91

.gitignore CHANGED Viewed

@@ -3,6 +3,7 @@ output_*/
 icl_inference_output/
 .vscode/
 tmp/
 # Byte-compiled / optimized / DLL files
 __pycache__/

 icl_inference_output/
 .vscode/
 tmp/
+gradio_cached_examples/
 # Byte-compiled / optimized / DLL files
 __pycache__/

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import argparse
 import time
 import gradio as gr
@@ -7,64 +6,73 @@ from config.config_utils import get_config
 from model import Captioner, VicunaHandler
-def set_example_video(example: list) -> dict:
-    return gr.Video.update(value=example[0])
-def upload_file(files):
-    file_paths = [file.name for file in files]
-    return file_paths
-def upload_video(video):
-    print(video)
-    return video
-def respond(input, chat_history):
-    bot_response = handler.gr_chat(input)
     chat_history.append((input, bot_response))
     time.sleep(0.1)
-    return "", chat_history
-def clear_chat(chat_history):
-    handler.chatbot.clear_conv_()
-    return "", []
 config = get_config('config/infer.yaml')
-captioner = Captioner(config)  # global
-global handler
 handler = VicunaHandler(config['vicuna'])
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## <h1><center>ChatVID</center></h1>")
-    gr.Markdown("""
-    ChatVID is a video chatbot that can chat about any video.
-    """)
     with gr.Row():
         with gr.Column():
             video_path = gr.Video(label="Video")
             with gr.Column():
-                upload_button = gr.Button(
-                    "Upload & Watch. (Click once and wait 3min )")
-                chat_button = gr.Button("Let's Chat!", interactive=False)
                 num_frames = gr.Slider(
                     minimum=5,
                     value=12,
                     maximum=12,
                     step=1,
-                    label="Number of frames (no more than 12)")
         with gr.Column():
             chatbot = gr.Chatbot()
-            captions = gr.State("")
             with gr.Row(visible=False) as input:
                 with gr.Column(scale=0.7):
                     txt = gr.Textbox(
@@ -76,22 +84,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 with gr.Column(scale=0.15, min_width=0):
                     clear_button = gr.Button("CLEAR")
-    upload_button.click(
-        lambda: gr.update(interactive=False), None, chat_button).then(
-            lambda: gr.update(visible=False), None,
-            input).then(lambda: [], None, chatbot).then(
-                captioner.caption_video, [video_path, num_frames],
-                [captions]).then(lambda: gr.update(interactive=True), None,
-                                    chat_button)
-    chat_button.click(handler.gr_chatbot_init, [captions],
-                        None).then(lambda: gr.update(visible=True), None,
-                                    input)
-    txt.submit(respond, inputs=[txt, chatbot], outputs=[txt, chatbot])
     run_button.click(
-        respond, inputs=[txt, chatbot], outputs=[txt, chatbot])
     clear_button.click(
-        clear_chat, inputs=[chatbot], outputs=[txt, chatbot])
 demo.launch()

 import time
 import gradio as gr
 from model import Captioner, VicunaHandler
+def mirror(x):
+    return x
+def clear_chat(conv_template):
+    return "", [], conv_template
+def clear_four():
+    return [], [], [], []
+def respond(input, chat_history, conv):
+    bot_response, new_conv = handler.gr_chat(input, conv)
     chat_history.append((input, bot_response))
     time.sleep(0.1)
+    return "", chat_history, new_conv
+# global variables
 config = get_config('config/infer.yaml')
+captioner = Captioner(config)
 handler = VicunaHandler(config['vicuna'])
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        "## <h1><center><img src='https://github.com/InvincibleWyq/ChatVID/assets/37479394/1a7f47ca-ffbd-4720-b43a-4304fcaa8657' height=40/> ChatVID</center></h1>"
+    )
+    gr.Markdown("""🔥 [ChatVID](https://github.com/InvincibleWyq/ChatVID) is a
+    video chatbot. Please give us a ⭐ Star!""")
+    gr.Markdown("""🎥 You may use the example video by clicking it.""")
+    gr.Markdown("""🚀 For any questions or suggestions, feel free to drop Yiqin
+    an email at <a href="mailto:[email protected]">[email protected]</a>
+    or open an issue.""")
     with gr.Row():
         with gr.Column():
             video_path = gr.Video(label="Video")
             with gr.Column():
+                upload_button = gr.Button("""Upload & Process.
+                    (Click and wait 3min until dialog box appears)""")
                 num_frames = gr.Slider(
                     minimum=5,
                     value=12,
                     maximum=12,
                     step=1,
+                    label="Number of frames")
+                gr.Markdown("## Video Examples")
+                gr.Examples(
+                    examples=[
+                        "examples/cook_720p.mp4",
+                        "examples/temple_of_heaven_720p.mp4"
+                    ],
+                    inputs=video_path,
+                    outputs=video_path,
+                    fn=mirror,
+                    cache_examples=True,
+                )
         with gr.Column():
+            caption_box = gr.Textbox("")
             chatbot = gr.Chatbot()
+            conv_template = gr.State("")  # determined by the video
+            conv = gr.State("")  # updated thourghout the conversation
             with gr.Row(visible=False) as input:
                 with gr.Column(scale=0.7):
                     txt = gr.Textbox(
                 with gr.Column(scale=0.15, min_width=0):
                     clear_button = gr.Button("CLEAR")
+    # conv_template and conv are `Conversation` objects
+    upload_button.click(lambda: gr.update(visible=False), None, input).then(
+        clear_four, None, [chatbot, conv, conv_template, caption_box]).then(
+            captioner.caption_video, [video_path, num_frames],
+            [conv_template]).then(mirror, [conv_template], [caption_box]).then(
+                handler.gr_chatbot_init, [conv_template],
+                [conv_template, conv]).then(lambda: gr.update(visible=True),
+                                            None, input)
+    txt.submit(
+        respond, inputs=[txt, chatbot, conv], outputs=[txt, chatbot, conv])
     run_button.click(
+        respond, inputs=[txt, chatbot, conv], outputs=[txt, chatbot, conv])
     clear_button.click(
+        clear_chat, inputs=[conv_template], outputs=[txt, chatbot, conv])
 demo.launch()

examples/cook_720p.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa232686c22066b90fe099e9bb4f0ad093693685368eb7590ddd843deb40f574
+size 5320367

examples/references.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+The video [Temple of Heaven - UNESCO World Heritage Site] by YouTube creator [World Heritage Journey] used under Fair Use.
+Link:
+https://www.youtube.com/watch?v=9xLoyYY_5rc
+The video [做饭糊弄学 十分钟晚餐 今天吃 ：番茄西兰花炒蛋] from Bilibili user [香蕉柿子梨] used under Fair Use.
+Link:
+https://www.bilibili.com/video/BV1RY411e74Z

examples/temple_of_heaven_720p.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e60ce16122b0a6277c10efc5c37cc9b89c963913b2d382539fb7cc101bbd0851
+size 4217766

model/Vicuna.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from model.fastchat.conversation import (Conversation, SeparatorStyle,
-                                         compute_skip_echo_len,
-                                         get_default_conv_template)
-from model.fastchat.serve.inference import (ChatIO, chat_loop, generate_stream,
-                                            load_model)
 class SimpleChatIO(ChatIO):
@@ -35,7 +33,6 @@ class VicunaChatBot:
         num_gpus: str,
         max_gpu_memory: str,
         load_8bit: bool,
-        conv_template,
         ChatIO: ChatIO,
         debug: bool,
     ):
@@ -48,25 +45,18 @@ class VicunaChatBot:
                                                 num_gpus, max_gpu_memory,
                                                 load_8bit, debug)
-        if conv_template:
-            self.conv = conv_template.copy()
-        else:
-            self.conv = get_default_conv_template(model_path).copy()
-        self.conv_template = self.conv.copy()
-    def chat(self, inp: str, temperature: float, max_new_tokens: int):
         """ Vicuna as a chatbot. """
-        self.conv.append_message(self.conv.roles[0], inp)
-        self.conv.append_message(self.conv.roles[1], None)
         generate_stream_func = generate_stream
-        prompt = self.conv.get_prompt()
-        skip_echo_len = compute_skip_echo_len(self.model_path, self.conv,
-                                              prompt)
         stop_str = (
-            self.conv.sep if self.conv.sep_style
             in [SeparatorStyle.SINGLE, SeparatorStyle.BAIZE] else None)
         params = {
             "model": self.model_path,
@@ -76,65 +66,13 @@ class VicunaChatBot:
             "stop": stop_str,
         }
         print(prompt)
-        self.chatio.prompt_for_output(self.conv.roles[1])
         output_stream = generate_stream_func(self.model, self.tokenizer,
                                              params, self.device)
         outputs = self.chatio.stream_output(output_stream, skip_echo_len)
         # NOTE: strip is important to align with the training data.
-        self.conv.messages[-1][-1] = outputs.strip()
-        return outputs
-    def summarise(self, caption: dict, temperature: float,
-                  max_new_tokens: int):
-        """ Vicuna as a summariser. """
-        questions = caption
-        captions = {}
-        for id, question in questions.items():
-            # Reset the conversation for each iteration
-            self.conv = get_default_conv_template(self.model_path).copy()
-            self.conv.append_message(self.conv.roles[0], question)
-            self.conv.append_message(self.conv.roles[1], None)
-            generate_stream_func = generate_stream
-            prompt = self.conv.get_prompt()
-            skip_echo_len = compute_skip_echo_len(self.model_path, self.conv,
-                                                  prompt)
-            stop_str = (
-                self.conv.sep if self.conv.sep_style
-                in [SeparatorStyle.SINGLE, SeparatorStyle.BAIZE] else None)
-            params = {
-                "model": self.model_path,
-                "prompt": prompt,
-                "temperature": temperature,
-                "max_new_tokens": max_new_tokens,
-                "stop": stop_str,
-            }
-            self.chatio.prompt_for_output(self.conv.roles[1])
-            output_stream = generate_stream_func(self.model, self.tokenizer,
-                                                 params, self.device)
-            outputs = self.chatio.stream_output(output_stream, skip_echo_len)
-            captions[id] = outputs
-            if self.debug:
-                print("\n", {"prompt": prompt, "outputs": outputs}, "\n")
-        print(captions)
-        return captions
-    def clear_conv_(self):
-        """ Clear the conversation. """
-        self.conv = self.conv_template.copy()
-    def change_conv_template_(self, conv_template):
-        self.conv_template = conv_template.copy()
-        self.conv = conv_template.copy()
-    def change_conv_(self, conv_template):
-        """ Change the conversation. """
-        self.conv = conv_template.copy()
 class VicunaHandler:
@@ -150,38 +88,25 @@ class VicunaHandler:
             self.config['num_gpus'],
             self.config['max_gpu_memory'],
             self.config['load_8bit'],
-            None,
             self.chat_io,
             self.config['debug'],
         )
     def chat(self):
         """ Chat with the Vicuna. """
-        template = self._construct_conversation("")
-        chat_loop(
-            self.config['model_path'],
-            self.config['device'],
-            self.config['num_gpus'],
-            self.config['max_gpu_memory'],
-            self.config['load_8bit'],
-            template,
-            self.config['temperature'],
-            self.config['max_new_tokens'],
-            self.chat_io,
-            self.config['debug'],
-        )
     def gr_chatbot_init(self, caption: str):
         """ Initialise the chatbot for gradio. """
         template = self._construct_conversation(caption)
-        self.chatbot.change_conv_template_(template)
         print("Chatbot initialised.")
-    def gr_chat(self, inp):
         """ Chat using gradio as the frontend. """
         return self.chatbot.chat(inp, self.config['temperature'],
-                                 self.config['max_new_tokens'])
     def _construct_conversation(self, prompt):
         """ Construct a conversation template.

 from model.fastchat.conversation import (Conversation, SeparatorStyle,
+                                         compute_skip_echo_len)
+from model.fastchat.serve.inference import ChatIO, generate_stream, load_model
 class SimpleChatIO(ChatIO):
         num_gpus: str,
         max_gpu_memory: str,
         load_8bit: bool,
         ChatIO: ChatIO,
         debug: bool,
     ):
                                                 num_gpus, max_gpu_memory,
                                                 load_8bit, debug)
+    def chat(self, inp: str, temperature: float, max_new_tokens: int,
+             conv: Conversation):
         """ Vicuna as a chatbot. """
+        conv.append_message(conv.roles[0], inp)
+        conv.append_message(conv.roles[1], None)
         generate_stream_func = generate_stream
+        prompt = conv.get_prompt()
+        skip_echo_len = compute_skip_echo_len(self.model_path, conv, prompt)
         stop_str = (
+            conv.sep if conv.sep_style
             in [SeparatorStyle.SINGLE, SeparatorStyle.BAIZE] else None)
         params = {
             "model": self.model_path,
             "stop": stop_str,
         }
         print(prompt)
+        self.chatio.prompt_for_output(conv.roles[1])
         output_stream = generate_stream_func(self.model, self.tokenizer,
                                              params, self.device)
         outputs = self.chatio.stream_output(output_stream, skip_echo_len)
         # NOTE: strip is important to align with the training data.
+        conv.messages[-1][-1] = outputs.strip()
+        return outputs, conv
 class VicunaHandler:
             self.config['num_gpus'],
             self.config['max_gpu_memory'],
             self.config['load_8bit'],
             self.chat_io,
             self.config['debug'],
         )
     def chat(self):
         """ Chat with the Vicuna. """
+        pass
     def gr_chatbot_init(self, caption: str):
         """ Initialise the chatbot for gradio. """
         template = self._construct_conversation(caption)
         print("Chatbot initialised.")
+        return template.copy(), template.copy()
+    def gr_chat(self, inp, conv: Conversation):
         """ Chat using gradio as the frontend. """
         return self.chatbot.chat(inp, self.config['temperature'],
+                                 self.config['max_new_tokens'], conv)
     def _construct_conversation(self, prompt):
         """ Construct a conversation template.