Spaces:

danhtran2mind
/

Text2Video-Ghibli-style

Running

App Files Files Community

danhtran2mind commited on Jul 31

Commit

e8d5a56

verified ·

1 Parent(s): 9827a61

Upload 43 files

Browse files

Files changed (39) hide show

.gitattributes +7 -0
.python-version +1 -0
LICENSE +21 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 +3 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/config.json +15 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.mp4 +0 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json +15 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 +3 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json +15 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 +3 -0
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json +15 -0
apps/gradio_app.py +136 -0
apps/gradio_app/__init__.py +0 -0
apps/gradio_app/abc.py +0 -0
apps/gradio_app/inference.py +104 -0
apps/gradio_app/new-inference.py +104 -0
apps/gradio_app/old-inference.py +73 -0
apps/gradio_app/setup_scripts.py +46 -0
apps/gradio_app/static/__init__.py +0 -0
apps/gradio_app/static/scripts.js +50 -0
apps/gradio_app/static/styles.css +154 -0
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif +3 -0
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif +3 -0
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json +1 -1
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif +3 -0
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json +1 -1
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif +3 -0
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json +1 -1
configs/config_multi_videos.yaml +131 -0
notebooks/zeroscope_v2_576w_Ghibli_LoRA-Inference.ipynb +0 -0
notebooks/zeroscope_v2_576w_Ghibli_LoRA-Training.ipynb +802 -0
requirements/requirements.txt +26 -0
requirements/requirements_compatible.txt +23 -0
scripts/download_ckpts.py +96 -0
scripts/process_dataset.py +48 -0
scripts/setup_third_party.py +38 -0
src/text2video_ghibli_style/inference.py +96 -0
src/text2video_ghibli_style/train.py +73 -0
src/third_party/.gitkeep +0 -0

.gitattributes CHANGED Viewed

@@ -39,3 +39,10 @@ assets/zeroscope_v2_576w-Ghibli-LoRA/examples/4/Studio_Ghibli_style_Two_women_wa
 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text

 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
 assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text
+apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
+apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
+apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text
+assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif filter=lfs diff=lfs merge=lfs -text
+assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif filter=lfs diff=lfs merge=lfs -text
+assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif filter=lfs diff=lfs merge=lfs -text
+assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif filter=lfs diff=lfs merge=lfs -text

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11.13

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Danh Tran
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a77fed344231dc9e9cf3b271646183b84c2edbe94cd15bf2d2b192cec9ac89ae
+size 288959

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prompt": "A dog is running with Ghibli style",
+    "negative-prompt": "ugly, noise, fragment, blur, static video",
+    "height": 512,
+    "width": 288,
+    "num-frames": 24,
+    "num-steps": 50,
+    "guidance_scale": 12,
+    "fps": 16,
+    "lora_rank": 64,
+    "lora_scale": 1.0,
+    "noise_prior": 0.0,
+    "seed": 42,
+    "video": "A_dog_is_running_with_Ghibli_style_42.mp4"
+}

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.mp4 ADDED Viewed

Binary file (60.9 kB). View file

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prompt": "A girl is walking with Ghibli style",
+    "negative-prompt": "ugly, noise, fragment, blur, static video",
+    "height": 384,
+    "width": 384,
+    "num-frames": 28,
+    "num-steps": 50,
+    "guidance_scale": 15,
+    "fps": 16,
+    "lora_rank": 128,
+    "lora_scale": 0.8,
+    "noise_prior": 0.3,
+    "seed": 0,
+    "video": "A_girl_is_walking_with_Ghibli_style_0.mp4"
+}

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a833dba0ad2cd264556d40e586b3a1cb7656e0239f9cae30f82ea635ed75d3b
+size 156033

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prompt": "Studio Ghibli style. Young man contemplates, walks away from ivy-covered yellow building.",
+    "negative-prompt": "ugly, noise, fragment, blur, static video",
+    "height": 384,
+    "width": 384,
+    "num-frames": 28,
+    "num-steps": 50,
+    "guidance_scale": 15,
+    "fps": 16,
+    "lora_rank": 32,
+    "lora_scale": 0.9,
+    "noise_prior": 0.3,
+    "seed": 12345,
+    "video": "Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4"
+}

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd172829c209c5cc4fc064190891161a1145adceda7766676b8b8d8d57100156
+size 134892

apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prompt": "Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
+    "negative-prompt": "ugly, noise, fragment, blur, static video",
+    "height": 512,
+    "width": 512,
+    "num-frames": 16,
+    "num-steps": 50,
+    "guidance_scale": 30,
+    "fps": 16,
+    "lora_rank": 96,
+    "lora_scale": 0.7,
+    "noise_prior": 0.1,
+    "seed": 100,
+    "video": "Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4"
+}

apps/gradio_app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import gradio as gr
+import json
+from gradio_app.inference import run_inference, run_setup_script
+def create_app():
+    # Run setup script at startup
+    setup_output = run_setup_script()
+    # Load CSS file
+    CSS = open("apps/gradio_app/static/styles.css", "r").read()
+    with gr.Blocks(css=CSS) as app:
+        gr.HTML('<script src="file=apps/gradio_app/static/scripts.js"></script>')
+        gr.Markdown(
+            """
+            # Text to Video Ghibli style
+            Generate videos using the `zeroscope_v2_576w` model with Studio Ghibli style LoRA.
+            """
+        )
+        with gr.Row(elem_classes="row-container"):
+            with gr.Column(elem_classes="column-container"):
+                model_path = gr.Dropdown(
+                    label="Base Model",
+                    choices=["./ckpts/zeroscope_v2_576w"],
+                    value="./ckpts/zeroscope_v2_576w"
+                )
+                checkpoint_folder = gr.Dropdown(
+                    label="LoRA folder",
+                    choices=["./ckpts/zeroscope_v2_576w-Ghibli-LoRA"],
+                    value="./ckpts/zeroscope_v2_576w-Ghibli-LoRA"
+                )
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    value="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible."
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="ugly, noise, fragment, blur, static video"
+                )
+                # Video Dimensions & Timing
+                with gr.Row(elem_classes="slider-row"):
+                    with gr.Group(elem_classes="slider-group"):
+                        gr.Markdown("### Video Dimensions & Timing")
+                        width = gr.Slider(label="Width", minimum=256, maximum=1024, step=8, value=512)
+                        height = gr.Slider(label="Height", minimum=256, maximum=1024, step=8, value=512)
+                        num_frames = gr.Slider(label="Number of Frames", minimum=8, maximum=64, step=1, value=16)
+                        fps = gr.Slider(label="FPS", minimum=10, maximum=60, step=1, value=16)
+                        seed = gr.Number(label="Seed", value=100)
+                generate_btn = gr.Button("Generate Video", elem_classes="generate-btn")
+            with gr.Column(elem_classes="column-container"):
+                video_output = gr.Video(label="Generated Video")
+                log_output = gr.Textbox(label="Logs", lines=3, max_lines=20)
+                # Model Parameters
+                with gr.Row(elem_classes="slider-row"):
+                    with gr.Group(elem_classes="slider-group"):
+                        gr.Markdown("### Model Parameters")
+                        num_steps = gr.Slider(label="Number of Steps", minimum=10, maximum=100, step=1, value=50)
+                        guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=50.0, step=0.1, value=30.0)
+                        lora_rank = gr.Slider(label="LoRA Rank", minimum=16, maximum=128, step=8, value=96)
+                        lora_scale = gr.Slider(label="LoRA Scale", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
+                        noise_prior = gr.Slider(label="Noise Prior", minimum=0.0, maximum=1.0, step=0.01, value=0.1)
+        # Example Buttons Section
+        gr.Markdown("## Example Configurations")
+        example_base_path = "apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA"
+        example_buttons = []
+        configs = []
+        for i in range(1, 5):
+            example_dir = os.path.join(example_base_path, str(i))
+            config_path = os.path.join(example_dir, "config.json")
+            if os.path.exists(config_path):
+                with open(config_path, "r") as f:
+                    config = json.load(f)
+                video_path = os.path.join(example_dir, config["video"])
+                if os.path.exists(video_path):
+                    configs.append((config, video_path))
+                    example_buttons.append(gr.Button(f"Load Example {i}"))
+        def create_example_fn(config, video_path):
+            def load_example():
+                return [
+                    "./ckpts/zeroscope_v2_576w",  # model_path
+                    "./ckpts/zeroscope_v2_576w-Ghibli-LoRA",  # checkpoint_folder
+                    config.get("prompt", ""),
+                    config.get("negative-prompt", ""),
+                    config.get("width", 512),
+                    config.get("height", 512),
+                    config.get("num-frames", 16),
+                    config.get("num-steps", 50),
+                    config.get("guidance_scale", 30.0),
+                    config.get("fps", 16),
+                    config.get("lora_rank", 96),
+                    config.get("lora_scale", 0.7),
+                    config.get("noise_prior", 0.1),
+                    config.get("seed", 100),
+                    video_path,  # video_output
+                    f"Loaded example with prompt: {config.get('prompt', '')}"  # log_output
+                ]
+            return load_example
+        for btn, (config, video_path) in zip(example_buttons, configs):
+            btn.click(
+                fn=create_example_fn(config, video_path),
+                inputs=[],
+                outputs=[
+                    model_path, checkpoint_folder, prompt, negative_prompt,
+                    width, height, num_frames, num_steps, guidance_scale,
+                    fps, lora_rank, lora_scale, noise_prior, seed,
+                    video_output, log_output
+                ]
+            )
+        generate_btn.click(
+            fn=run_inference,
+            inputs=[
+                model_path, checkpoint_folder, prompt, negative_prompt,
+                width, height, num_frames, num_steps, guidance_scale,
+                fps, lora_rank, lora_scale, noise_prior, seed
+            ],
+            outputs=[video_output, log_output]
+        )
+        gr.Markdown("""
+                    This repository is trained from [![GitHub Repo](https://img.shields.io/badge/GitHub-danhtran2mind%2FMotionDirector-blue?style=flat)](https://github.com/danhtran2mind/MotionDirector), a fork of [![GitHub Repo](https://img.shields.io/badge/GitHub-showlab%2FMotionDirector-blue?style=flat)](https://github.com/showlab/MotionDirector), with numerous bug fixes and rewritten code for improved performance and stability.
+                    """)
+    return app
+if __name__ == "__main__":
+    app = create_app()
+    app.launch()

apps/gradio_app/__init__.py ADDED Viewed

File without changes

apps/gradio_app/abc.py ADDED Viewed

File without changes

apps/gradio_app/inference.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import sys
+import os
+import subprocess
+from pathlib import Path
+import uuid
+import torch
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+def run_setup_script():
+    setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
+    try:
+        result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        return f"Setup script failed: {e.stderr}"
+def run_inference(
+    model_path="./ckpts/zeroscope_v2_576w",
+    checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
+    prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
+    negative_prompt="ugly, noise, fragment, blur, static video",
+    width=256,
+    height=256,
+    num_frames=8,
+    num_steps=30,
+    guidance_scale=30.0,
+    fps=8,
+    lora_rank=32,
+    lora_scale=0.7,
+    noise_prior=0.1,
+    # device="cuda",
+    seed=100
+):
+    print("Start Inference")
+    output_dir = "apps/gradio_app/temp_data"
+    os.makedirs(output_dir, exist_ok=True)
+    # Get list of files in output_dir
+    for file_name in os.listdir(output_dir):
+        # Check if file ends with .mp4
+        if file_name.endswith(".mp4"):
+            # Remove the file
+            os.remove(os.path.join(output_dir, file_name))
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    command = [
+        "python", "src/third_party/MotionDirector/main_inference.py",
+        "--model", model_path,
+        "--checkpoint_folder", checkpoint_folder,
+        "--prompt", prompt,
+        "--negative-prompt", negative_prompt,
+        "--width", str(width),
+        "--height", str(height),
+        "--num-frames", str(num_frames),
+        "--num-steps", str(num_steps),
+        "--guidance-scale", str(guidance_scale),
+        "--fps", str(fps),
+        "--lora_rank", str(lora_rank),
+        "--lora_scale", str(lora_scale),
+        "--noise_prior", str(noise_prior),
+        "--device", device,
+        "--seed", str(seed),
+        "--output_dir", output_dir,
+        "--no-prompt-name"
+    ]
+    # Use Popen to execute the command
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        bufsize=1  # Line buffering
+    )
+    # Read output line-by-line in real-time
+    output_lines = []
+    try:
+        for line in process.stdout:
+            output_lines.append(line.strip())
+    except Exception as e:
+        return None, f"Error reading output: {str(e)}"
+    # Capture stderr and wait for process to complete
+    stderr_output = process.communicate()[1]
+    if process.returncode != 0:
+        return None, f"Error: {stderr_output.strip()}"
+    # Check for MP4 files in output directory
+    output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
+    if output_file:
+        output_path = os.path.join(output_dir, output_file[-1])
+        if os.path.exists(output_path):
+            return output_path, "\n".join(output_lines)
+        else:
+            return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
+    return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
+if __name__ == "__main__":
+    # Example usage
+    video_path, logs = run_inference()
+    print(f"Generated Video: {video_path}")
+    print(f"Logs: {logs}")

apps/gradio_app/new-inference.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import sys
+import os
+import subprocess
+from pathlib import Path
+import uuid
+import torch
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+def run_setup_script():
+    setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
+    try:
+        result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        return f"Setup script failed: {e.stderr}"
+def run_inference(
+    model_path="./ckpts/zeroscope_v2_576w",
+    checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
+    prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
+    negative_prompt="ugly, noise, fragment, blur, static video",
+    width=256,
+    height=256,
+    num_frames=8,
+    num_steps=30,
+    guidance_scale=30.0,
+    fps=8,
+    lora_rank=32,
+    lora_scale=0.7,
+    noise_prior=0.1,
+    device="cuda",
+    seed=100
+):
+    print("Start Inference")
+    output_dir = "apps/gradio_app/temp_data"
+    os.makedirs(output_dir, exist_ok=True)
+    # Get list of files in output_dir
+    for file_name in os.listdir(output_dir):
+        # Check if file ends with .mp4
+        if file_name.endswith(".mp4"):
+            # Remove the file
+            os.remove(os.path.join(output_dir, file_name))
+    command = [
+        "python", "src/third_party/MotionDirector/main_inference.py",
+        "--model", model_path,
+        "--checkpoint_folder", checkpoint_folder,
+        "--prompt", prompt,
+        "--negative-prompt", negative_prompt,
+        "--width", str(width),
+        "--height", str(height),
+        "--num-frames", str(num_frames),
+        "--num-steps", str(num_steps),
+        "--guidance-scale", str(guidance_scale),
+        "--fps", str(fps),
+        "--lora_rank", str(lora_rank),
+        "--lora_scale", str(lora_scale),
+        "--noise_prior", str(noise_prior),
+        "--device", device,
+        "--seed", str(seed),
+        "--output_dir", output_dir,
+        "--no-prompt-name"
+    ]
+    # Use Popen to execute the command
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        bufsize=1  # Line buffering
+    )
+    # Read output line-by-line in real-time
+    output_lines = []
+    try:
+        for line in process.stdout:
+            output_lines.append(line.strip())
+    except Exception as e:
+        return None, f"Error reading output: {str(e)}"
+    # Capture stderr and wait for process to complete
+    stderr_output = process.communicate()[1]
+    if process.returncode != 0:
+        return None, f"Error: {stderr_output.strip()}"
+    # Check for MP4 files in output directory
+    output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
+    if output_file:
+        output_path = os.path.join(output_dir, output_file[-1])
+        if os.path.exists(output_path):
+            return output_path, "\n".join(output_lines)
+        else:
+            return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
+    return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
+if __name__ == "__main__":
+    # Example usage
+    video_path, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
+    print(f"Generated Video: {video_path}")
+    print(f"Logs: {logs}")

apps/gradio_app/old-inference.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import sys
+import subprocess
+from pathlib import Path
+import uuid
+import torch
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+def run_setup_script():
+    setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
+    try:
+        result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        return f"Setup script failed: {e.stderr}"
+def run_inference(
+    model_path="./ckpts/zeroscope_v2_576w",
+    checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
+    prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
+    negative_prompt="ugly, noise, fragment, blur, static video",
+    width=512,
+    height=512,
+    num_frames=16,
+    num_steps=50,
+    guidance_scale=30.0,
+    fps=16,
+    lora_rank=96,
+    lora_scale=0.7,
+    noise_prior=0.1,
+    device="cuda",
+    seed=100
+):
+    output_dir = "apps/gradio_app/temp_data"
+    os.makedirs(output_dir, exist_ok=True)
+    command = [
+        "python", "src/third_party/MotionDirector/main_inference.py",
+        "--model", model_path,
+        "--checkpoint_folder", checkpoint_folder,
+        "--prompt", prompt,
+        "--negative-prompt", negative_prompt,
+        "--width", str(width),
+        "--height", str(height),
+        "--num-frames", str(num_frames),
+        "--num-steps", str(num_steps),
+        "--guidance-scale", str(guidance_scale),
+        "--fps", str(fps),
+        "--lora_rank", str(lora_rank),
+        "--lora_scale", str(lora_scale),
+        "--noise_prior", str(noise_prior),
+        "--device", device,
+        "--seed", str(seed),
+        "--output_dir", output_dir,
+        "--no-prompt-name"
+    ]
+    output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
+    print(os.path.join(output_dir, output_file[0]) if output_file else "No MP4 files found.")
+    try:
+        result = subprocess.run(command, capture_output=True, text=True, check=True)
+        return str(output_file), result.stdout
+    except subprocess.CalledProcessError as e:
+        return None, f"Error: {e.stderr}"
+if __name__ == "__main__":
+    # Example usage
+    video, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
+    print(f"Generated Video: {video}")
+    print(f"Logs: {logs}")

apps/gradio_app/setup_scripts.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import subprocess
+import sys
+import os
+def run_script(script_path):
+    """
+    Run a Python script using subprocess and handle potential errors.
+    Returns True if successful, False otherwise.
+    """
+    try:
+        result = subprocess.run(
+            [sys.executable, script_path],
+            check=True,
+            text=True,
+            capture_output=True
+        )
+        print(f"Successfully executed {script_path}")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error executing {script_path}:")
+        print(e.stderr)
+        return False
+    except FileNotFoundError:
+        print(f"Script not found: {script_path}")
+        return False
+def main():
+    """
+    Main function to execute setup_third_party.py and download_ckpts.py in sequence.
+    """
+    scripts_dir = "scripts"
+    scripts = [
+        os.path.join(scripts_dir, "setup_third_party.py"),
+        os.path.join(scripts_dir, "download_ckpts.py")
+    ]
+    for script in scripts:
+        print(f"Start running {script}\n")
+        if not run_script(script):
+            print(f"Stopping execution due to error in {script}")
+            sys.exit(1)
+        print(f"Completed {script}\n")
+if __name__ == "__main__":
+    main()

apps/gradio_app/static/__init__.py ADDED Viewed

File without changes

apps/gradio_app/static/scripts.js ADDED Viewed

	@@ -0,0 +1,50 @@

+document.addEventListener('DOMContentLoaded', () => {
+    // Add loading animation to generate button
+    const generateBtn = document.querySelector('.generate-btn');
+    if (generateBtn) {
+        generateBtn.addEventListener('click', () => {
+            generateBtn.textContent = 'Generating...';
+            generateBtn.disabled = true;
+            generateBtn.style.opacity = '0.7';
+            // Reset button after 2 seconds (simulating async operation)
+            setTimeout(() => {
+                generateBtn.textContent = 'Generate Video';
+                generateBtn.disabled = false;
+                generateBtn.style.opacity = '1';
+            }, 2000);
+        });
+    }
+    // Add input validation feedback
+    const inputs = document.querySelectorAll('input[type="text"]');
+    inputs.forEach(input => {
+        input.addEventListener('input', () => {
+            if (input.value.trim() === '') {
+                input.style.borderColor = '#e53e3e';
+            } else {
+                input.style.borderColor = '#4c51bf';
+            }
+        });
+    });
+    // Add subtle animation to sliders
+    const sliders = document.querySelectorAll('input[type="range"]');
+    sliders.forEach(slider => {
+        slider.addEventListener('input', () => {
+            slider.style.transform = 'scale(1.02)';
+            setTimeout(() => {
+                slider.style.transform = 'scale(1)';
+            }, 200);
+        });
+    });
+    // Auto-resize textarea
+    const textarea = document.querySelector('textarea');
+    if (textarea) {
+        textarea.addEventListener('input', () => {
+            textarea.style.height = 'auto';
+            textarea.style.height = `${textarea.scrollHeight}px`;
+        });
+    }
+});

apps/gradio_app/static/styles.css ADDED Viewed

	@@ -0,0 +1,154 @@

+:root {
+    --primary-color: #007bff;
+    --secondary-color: #6c757d;
+    --background-light: #f8f9fa;
+    --background-dark: #1a1a1a;
+    --text-light: #212529;
+    --text-dark: #e9ecef;
+    --accent-color: #28a745;
+    --border-color-light: #dee2e6;
+    --border-color-dark: #343a40;
+    --button-hover-light: #0056b3;
+    --button-hover-dark: #4dabf7;
+    --shadow-light: rgba(0, 0, 0, 0.1);
+    --shadow-dark: rgba(255, 255, 255, 0.1);
+}
+body {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    margin: 0;
+    padding: 20px;
+    transition: background-color 0.3s, color 0.3s;
+}
+.light-theme {
+    background-color: var(--background-light);
+    color: var(--text-light);
+}
+.dark-theme {
+    background-color: var(--background-dark);
+    color: var(--text-dark);
+}
+.row-container {
+    display: flex;
+    gap: 20px;
+    margin-bottom: 20px;
+}
+.column-container {
+    background: var(--background-light);
+    border: 1px solid var(--border-color-light);
+    border-radius: 8px;
+    padding: 20px;
+    box-shadow: 0 2px 4px var(--shadow-light);
+    transition: background-color 0.3s, border-color 0.3s, box-shadow 0.3s;
+}
+.dark-theme .column-container {
+    background: var(--background-dark);
+    border-color: var(--border-color-dark);
+    box-shadow: 0 2px 4px var(--shadow-dark);
+}
+.slider-row {
+    margin: 10px 0;
+}
+.slider-group {
+    background: rgba(255, 255, 255, 0.5);
+    border-radius: 6px;
+    padding: 15px;
+    border: 1px solid var(--border-color-light);
+    transition: background-color 0.3s, border-color 0.3s;
+}
+.dark-theme .slider-group {
+    background: rgba(0, 0, 0, 0.3);
+    border-color: var(--border-color-dark);
+}
+.generate-btn {
+    background-color: var(--primary-color);
+    color: white;
+    border: none;
+    padding: 12px 24px;
+    border-radius: 6px;
+    font-size: 16px;
+    font-weight: 500;
+    cursor: pointer;
+    transition: background-color 0.3s, transform 0.2s;
+}
+.generate-btn:hover {
+    background-color: var(--button-hover-light);
+    transform: translateY(-2px);
+}
+.dark-theme .generate-btn {
+    background-color: var(--primary-color);
+}
+.dark-theme .generate-btn:hover {
+    background-color: var(--button-hover-dark);
+}
+.gr-button, .gr-textbox, .gr-slider, .gr-dropdown, .gr-number, .gr-video, .gr-markdown {
+    border-radius: 6px !important;
+    border: 1px solid var(--border-color-light) !important;
+    transition: border-color 0.3s, background-color 0.3s;
+}
+.dark-theme .gr-button,
+.dark-theme .gr-textbox,
+.dark-theme .gr-slider,
+.dark-theme .gr-dropdown,
+.dark-theme .gr-number,
+.dark-theme .gr-video,
+.dark-theme .gr-markdown {
+    border-color: var(--border-color-dark) !important;
+    background-color: rgba(255, 255, 255, 0.05) !important;
+}
+.gr-textbox input, .gr-number input {
+    background: transparent !important;
+    color: inherit !important;
+    font-size: 14px;
+}
+.gr-slider input[type="range"] {
+    accent-color: var(--primary-color);
+}
+.gr-dropdown select {
+    background: transparent !important;
+    color: inherit !important;
+    padding: 8px;
+}
+.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
+    font-weight: 600;
+    margin-bottom: 10px;
+    color: var(--primary-color);
+}
+.dark-theme .gr-markdown h1,
+.dark-theme .gr-markdown h2,
+.dark-theme .gr-markdown h3 {
+    color: var(--button-hover-dark);
+}
+@media (max-width: 768px) {
+    .row-container {
+        flex-direction: column;
+    }
+    .column-container {
+        padding: 15px;
+    }
+    .generate-btn {
+        width: 100%;
+    }
+}

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif ADDED Viewed

Git LFS Details

SHA256: ca16026bdc19faed0d40507c059fc882455eee05f106a1ecca9e4438a366f68e
Pointer size: 132 Bytes
Size of remote file: 1.38 MB

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif ADDED Viewed

Git LFS Details

SHA256: 0254e790e96f25d81910e2eedaedb628931294a38a9a2914f12a12694933c2a8
Pointer size: 131 Bytes
Size of remote file: 315 kB

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "height": 384,
     "width": 384,
     "num-frames": 28,
-    "num-steps": 50
     "guidance_scale": 15,
     "fps": 16,
     "lora_rank": 128,

     "height": 384,
     "width": 384,
     "num-frames": 28,
+    "num-steps": 50,
     "guidance_scale": 15,
     "fps": 16,
     "lora_rank": 128,

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif ADDED Viewed

Git LFS Details

SHA256: 130fffec21d318bf17095cb30f85fd36f7a639c70b6bbfa9333db96e254be0a1
Pointer size: 131 Bytes
Size of remote file: 921 kB

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "height": 384,
     "width": 384,
     "num-frames": 28,
-    "num-steps": 50
     "guidance_scale": 15,
     "fps": 16,
     "lora_rank": 32,

     "height": 384,
     "width": 384,
     "num-frames": 28,
+    "num-steps": 50,
     "guidance_scale": 15,
     "fps": 16,
     "lora_rank": 32,

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif ADDED Viewed

Git LFS Details

SHA256: 1a81aa08926211ac88420151497ca422469cc102a332cfd5b3865693e7ee005a
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "height": 512,
     "width": 512,
     "num-frames": 16,
-    "num-steps": 50
     "guidance_scale": 30,
     "fps": 16,
     "lora_rank": 96,

     "height": 512,
     "width": 512,
     "num-frames": 16,
+    "num-steps": 50,
     "guidance_scale": 30,
     "fps": 16,
     "lora_rank": 96,

configs/config_multi_videos.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+# Pretrained diffusers model path.
+pretrained_model_path: "./ckpts/zeroscope_v2_576w"
+# pretrained_model_path: "./ckpts/text-to-video-ms-1.7b"
+# The folder where your training outputs will be placed.
+output_dir: "./zeroscope_v2_576w-Ghibli-LoRA"
+# resume_step: 500
+# resume_from_checkpoint: "./zeroscope_v2_576w-Scenery_Anime_Bright-lora/train_2025-07-10T13-46-57"
+# lora_path: "zeroscope_v2_576w-Scenery_Anime_Bright-lora/checkpoint-500" # This argument is used for training resumption
+# lora_path: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/checkpoint-200
+dataset_types:
+  - 'folder'
+# Caches the latents (Frames-Image -> VAE -> Latent) to a HDD or SDD.
+# The latents will be saved under your training folder, and loaded automatically for training.
+# This both saves memory and speeds up training and takes very little disk space.
+cache_latents: True
+# If you have cached latents set to `True` and have a directory of cached latents,
+# you can skip the caching process and load previously saved ones.
+cached_latent_dir: null #/path/to/cached_latents
+# cached_latent_dir: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/cached_latents
+# Use LoRA for the UNET model.
+use_unet_lora: True
+# LoRA Dropout. This parameter adds the probability of randomly zeros out elements. Helps prevent overfitting.
+# See: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
+lora_unet_dropout: 0.1
+# Choose whether or not ito save the full pretrained model weights for both checkpoints and after training.
+# The only time you want this off is if you're doing full LoRA training.
+save_pretrained_model: True
+# save_pretrained_model: True
+# The rank for LoRA training. With ModelScope, the maximum should be 1024.
+# VRAM increases with higher rank, lower when decreased.
+lora_rank: 16
+# Training data parameters
+train_data:
+  # 'multiple videos'
+  path: "./data/ghibli/videos"
+  # The width and height in which you want your training data to be resized to.
+  width: 384
+  height: 384
+  # This will find the closest aspect ratio to your input width and height.
+  # For example, 512x512 width and height with a video of resolution 1280x720 will be resized to 512x256
+  use_bucketing: True
+  gradient_accumulation_steps: 2
+  batch_size: 1
+  # The start frame index where your videos should start (Leave this at one for json and folder based training).
+  sample_start_idx: 1
+  # Used for 'folder'. The rate at which your frames are sampled. Does nothing for 'json' and 'single_video' dataset.
+  fps: 16
+  # For 'single_video' and 'json'. The number of frames to "step" (1,2,3,4) (frame_step=2) -> (1,3,5,7, ...).
+  frame_step: 1
+  # The number of frames to sample. The higher this number, the higher the VRAM (acts similar to batch size).
+  n_sample_frames: 24
+  # The prompt when using a a single video file
+  # fallback_prompt: "A person is riding a bicycle."
+# Validation data parameters.
+validation_data:
+  # A custom prompt that is different from your training dataset.
+  prompt:
+  - "Studio Ghibli style. The video showcases a vibrant and lively scene set in the early."
+  - "Studio Ghibli style. A woman with black hair is holding a gun in her hand."
+  # Whether or not to sample preview during training (Requires more VRAM).
+  # sample_preview: True
+  sample_preview: False
+  # The number of frames to sample during validation.
+  num_frames: 24
+  # Height and width of validation sample.
+  width: 384
+  height: 384
+  # Number of inference steps when generating the video.
+  num_inference_steps: 15
+  # CFG scale
+  guidance_scale: 12
+  # scale of spatial LoRAs, default is 0
+  spatial_scale: 0
+  # scale of noise prior, i.e. the scale of inversion noises
+  noise_prior: 0
+use_offset_noise: False
+offset_noise_strength: 0.
+# Learning rate for AdamW
+learning_rate: 5e-4
+# Weight decay. Higher = more regularization. Lower = closer to dataset.
+adam_weight_decay: 1e-4
+# Maximum number of train steps. Model is saved after training.
+max_train_steps: 5000
+# Saves a model every nth step.
+checkpointing_steps: 5000
+# How many steps to do for validation if sample_preview is enabled.
+validation_steps: 5000
+# Whether or not we want to use mixed precision with accelerate
+mixed_precision: "fp16"
+# mixed_precision: "no"
+# Trades VRAM usage for speed. You lose roughly 20% of training speed, but save a lot of VRAM.
+# If you need to save more VRAM, it can also be enabled for the text encoder, but reduces speed x2.
+gradient_checkpointing: True
+text_encoder_gradient_checkpointing: True
+# Xformers must be installed for best memory savings and performance (< Pytorch 2.0)
+enable_xformers_memory_efficient_attention: True
+use_8bit_adam: True
+# Use scaled dot product attention (Only available with >= Torch 2.0)
+enable_torch_2_attn: True

notebooks/zeroscope_v2_576w_Ghibli_LoRA-Inference.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/zeroscope_v2_576w_Ghibli_LoRA-Training.ipynb ADDED Viewed

	@@ -0,0 +1,802 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:27:38.872329Z",
+     "iopub.status.busy": "2025-07-16T05:27:38.872068Z",
+     "iopub.status.idle": "2025-07-16T05:29:50.846263Z",
+     "shell.execute_reply": "2025-07-16T05:29:50.845486Z",
+     "shell.execute_reply.started": "2025-07-16T05:27:38.872302Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/content\n",
+      "Cloning into 'MotionDirector'...\n",
+      "remote: Enumerating objects: 657, done.\u001b[K\n",
+      "remote: Counting objects: 100% (163/163), done.\u001b[K\n",
+      "remote: Compressing objects: 100% (82/82), done.\u001b[K\n",
+      "remote: Total 657 (delta 108), reused 88 (delta 81), pack-reused 494 (from 1)\u001b[K\n",
+      "Receiving objects: 100% (657/657), 132.29 MiB | 50.34 MiB/s, done.\n",
+      "Resolving deltas: 100% (349/349), done.\n",
+      "/content/MotionDirector\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m362.1/362.1 kB\u001b[0m \u001b[31m24.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m90.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m44.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m73.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.9/72.9 MB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.8/44.8 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.6/35.6 MB\u001b[0m \u001b[31m47.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.9/294.9 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Building wheel for lora_diffusion (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Building wheel for loralib (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "bigframes 2.8.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.\n",
+      "google-api-core 1.34.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<4.0.0dev,>=3.19.5, but you have protobuf 4.25.8 which is incompatible.\n",
+      "pandas-gbq 0.29.1 requires google-api-core<3.0.0,>=2.10.2, but you have google-api-core 1.34.1 which is incompatible.\n",
+      "google-cloud-storage 2.19.0 requires google-api-core<3.0.0dev,>=2.15.0, but you have google-api-core 1.34.1 which is incompatible.\n",
+      "dataproc-spark-connect 0.7.5 requires google-api-core>=2.19, but you have google-api-core 1.34.1 which is incompatible.\n",
+      "bigframes 2.8.0 requires google-cloud-bigquery[bqstorage,pandas]>=3.31.0, but you have google-cloud-bigquery 3.25.0 which is incompatible.\n",
+      "bigframes 2.8.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "# install packages\n",
+    "%cd /content\n",
+    "!git clone https://github.com/danhtran2mind/MotionDirector\n",
+    "%cd MotionDirector\n",
+    "!pip install -r requirements.txt -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:29:50.848033Z",
+     "iopub.status.busy": "2025-07-16T05:29:50.847771Z",
+     "iopub.status.idle": "2025-07-16T05:29:54.955247Z",
+     "shell.execute_reply": "2025-07-16T05:29:54.954373Z",
+     "shell.execute_reply.started": "2025-07-16T05:29:50.848010Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m235.8/235.8 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install -q bitsandbytes unidecode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:29:54.956819Z",
+     "iopub.status.busy": "2025-07-16T05:29:54.956511Z",
+     "iopub.status.idle": "2025-07-16T05:29:54.963707Z",
+     "shell.execute_reply": "2025-07-16T05:29:54.962891Z",
+     "shell.execute_reply.started": "2025-07-16T05:29:54.956786Z"
+    },
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import shutil\n",
+    "import random\n",
+    "\n",
+    "def copy_file_pairs(source_dir, dest_dir, max_pairs=20, seed=None):\n",
+    "    if seed is not None:\n",
+    "        random.seed(seed)\n",
+    "    os.makedirs(dest_dir, exist_ok=True)\n",
+    "    mp4_files = [f for f in os.listdir(source_dir) if f.endswith('.mp4')]\n",
+    "    selected_mp4_files = random.sample(mp4_files, min(len(mp4_files), max_pairs))\n",
+    "    for mp4 in selected_mp4_files:\n",
+    "        base = os.path.splitext(mp4)[0]\n",
+    "        txt = f\"{base}.txt\"\n",
+    "        if os.path.exists(os.path.join(source_dir, txt)):\n",
+    "            shutil.copy2(os.path.join(source_dir, mp4), os.path.join(dest_dir, mp4))\n",
+    "            shutil.copy2(os.path.join(source_dir, txt), os.path.join(dest_dir, txt))\n",
+    "    return len(selected_mp4_files)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:29:54.965605Z",
+     "iopub.status.busy": "2025-07-16T05:29:54.965374Z",
+     "iopub.status.idle": "2025-07-16T05:30:00.766653Z",
+     "shell.execute_reply": "2025-07-16T05:30:00.766019Z",
+     "shell.execute_reply.started": "2025-07-16T05:29:54.965578Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2b0f51df7a5047bd8d404fca30add463",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b800f95e15fb4064854902602aa3a3dd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       ".gitattributes: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6a6cc4ba523f4a18adbc9ffaa3525340",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "studio_ghibli_wan14b_t2v_v01_dataset.zip:   0%|          | 0.00/300M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "# Create directory if it doesn't exist\n",
+    "os.makedirs(\"data/ghibli/raw\", exist_ok=True)\n",
+    "\n",
+    "# Download the dataset using snapshot_download\n",
+    "snapshot_download(repo_id=\"raymondt/ghibi_t2v\", \n",
+    "                 local_dir=\"data/ghibli/raw\", \n",
+    "                 repo_type=\"dataset\")\n",
+    "\n",
+    "# Assuming the zip file is downloaded, unzip it to the target directory\n",
+    "import zipfile\n",
+    "zip_path = \"data/ghibli/raw/studio_ghibli_wan14b_t2v_v01_dataset.zip\"\n",
+    "extract_path = \"data/ghibli/raw\"\n",
+    "\n",
+    "with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n",
+    "    zip_ref.extractall(extract_path)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:30:00.767663Z",
+     "iopub.status.busy": "2025-07-16T05:30:00.767419Z",
+     "iopub.status.idle": "2025-07-16T05:30:01.056828Z",
+     "shell.execute_reply": "2025-07-16T05:30:01.056142Z",
+     "shell.execute_reply.started": "2025-07-16T05:30:00.767643Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Copied 240 pairs to data/ghibli/videos\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Copy the videos directory to the desired location\n",
+    "source = \"data/ghibli/raw/videos/1920x1040\"\n",
+    "dest = \"data/ghibli/videos\"\n",
+    "\n",
+    "copied = copy_file_pairs(source, dest, max_pairs=240, seed=42)\n",
+    "print(f\"Copied {copied} pairs to {dest}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:30:01.057811Z",
+     "iopub.status.busy": "2025-07-16T05:30:01.057583Z",
+     "iopub.status.idle": "2025-07-16T05:30:29.547286Z",
+     "shell.execute_reply": "2025-07-16T05:30:29.546428Z",
+     "shell.execute_reply.started": "2025-07-16T05:30:01.057784Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "98bbd6ea501745bcabdb7f89bdb7af95",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6b20bb02614d4dfdb037484f1bbf06ed",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "merges.txt: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d7d127ca77964c7a9ab58b55dced9701",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model_index.json:   0%|          | 0.00/384 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1b9272edd1784c12baaa2826a1effcd3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8f7a542514544640af6d140501a7a05b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d33b170a93dd430386b8bd5f045f3419",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "scheduler_config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d64f3a8914004c2283450052563fa0cd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       ".gitattributes: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7909289ad6a747f4a6fbd81db836e7b5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d562fad6588646f3929d7dfd805e001e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "pytorch_model.bin:   0%|          | 0.00/681M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0b4361967a684e7c99075ba14b7ec864",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "55a17f9d827c4656b1ae272d679b0a26",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5ebea88cba841d0a117156384a7af6d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f7bc8fb35a9f44deb336bb1b109298ee",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "open_clip_pytorch_model.bin:   0%|          | 0.00/1.97G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2ba9dca7dfb2455aaf4627d43390d550",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "diffusion_pytorch_model.bin:   0%|          | 0.00/2.82G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aab2fd449c9b44a58ff281b30deb6e37",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.json: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6aca918dcb4f4b4b9f6804859ea65ab6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "text2video_pytorch_model.pth:   0%|          | 0.00/2.82G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bf61eb3a364e455b89379e66e8f304d7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "diffusion_pytorch_model.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'/content/MotionDirector/ckpts/zeroscope_v2_576w'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import snapshot_download\n",
+    "# Download ZeroScope model snapshot\n",
+    "repo_id = \"cerspense/zeroscope_v2_576w\"\n",
+    "snapshot_download(repo_id=repo_id,\n",
+    "                  local_dir=\"./ckpts/zeroscope_v2_576w\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-15T14:12:26.903891Z",
+     "iopub.status.busy": "2025-07-15T14:12:26.903623Z",
+     "iopub.status.idle": "2025-07-15T14:12:26.907531Z",
+     "shell.execute_reply": "2025-07-15T14:12:26.906880Z",
+     "shell.execute_reply.started": "2025-07-15T14:12:26.903873Z"
+    },
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "# device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:35:53.783879Z",
+     "iopub.status.busy": "2025-07-16T05:35:53.783504Z",
+     "iopub.status.idle": "2025-07-16T05:35:53.791697Z",
+     "shell.execute_reply": "2025-07-16T05:35:53.791129Z",
+     "shell.execute_reply.started": "2025-07-16T05:35:53.783849Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting configs/config_multi_videos.yaml\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile configs/config_multi_videos.yaml\n",
+    "# Pretrained diffusers model path.\n",
+    "pretrained_model_path: \"./ckpts/zeroscope_v2_576w\"\n",
+    "# pretrained_model_path: \"./ckpts/text-to-video-ms-1.7b\"\n",
+    "# The folder where your training outputs will be placed.\n",
+    "output_dir: \"./zeroscope_v2_576w-Ghibli-LoRA\"\n",
+    "# resume_step: 500\n",
+    "# resume_from_checkpoint: \"./zeroscope_v2_576w-Scenery_Anime_Bright-lora/train_2025-07-10T13-46-57\"\n",
+    "# lora_path: \"zeroscope_v2_576w-Scenery_Anime_Bright-lora/checkpoint-500\" # This argument is used for training resumption\n",
+    "# lora_path: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/checkpoint-200\n",
+    "\n",
+    "dataset_types:\n",
+    "  - 'folder'\n",
+    "\n",
+    "# Caches the latents (Frames-Image -> VAE -> Latent) to a HDD or SDD.\n",
+    "# The latents will be saved under your training folder, and loaded automatically for training.\n",
+    "# This both saves memory and speeds up training and takes very little disk space.\n",
+    "cache_latents: True\n",
+    "\n",
+    "\n",
+    "# If you have cached latents set to `True` and have a directory of cached latents,\n",
+    "# you can skip the caching process and load previously saved ones.\n",
+    "cached_latent_dir: null #/path/to/cached_latents\n",
+    "# cached_latent_dir: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/cached_latents\n",
+    "\n",
+    "# Use LoRA for the UNET model.\n",
+    "use_unet_lora: True\n",
+    "\n",
+    "# LoRA Dropout. This parameter adds the probability of randomly zeros out elements. Helps prevent overfitting.\n",
+    "# See: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html\n",
+    "lora_unet_dropout: 0.1\n",
+    "\n",
+    "# Choose whether or not ito save the full pretrained model weights for both checkpoints and after training.\n",
+    "# The only time you want this off is if you're doing full LoRA training.\n",
+    "save_pretrained_model: True\n",
+    "# save_pretrained_model: True\n",
+    "\n",
+    "# The rank for LoRA training. With ModelScope, the maximum should be 1024.\n",
+    "# VRAM increases with higher rank, lower when decreased.\n",
+    "lora_rank: 16\n",
+    "\n",
+    "# Training data parameters\n",
+    "train_data:\n",
+    "  # 'multiple videos'\n",
+    "  path: \"./data/ghibli/videos\"\n",
+    "  # The width and height in which you want your training data to be resized to.\n",
+    "  width: 384\n",
+    "  height: 384\n",
+    "\n",
+    "  # This will find the closest aspect ratio to your input width and height.\n",
+    "  # For example, 512x512 width and height with a video of resolution 1280x720 will be resized to 512x256\n",
+    "  use_bucketing: True\n",
+    "  gradient_accumulation_steps: 2\n",
+    "  batch_size: 1\n",
+    "  # The start frame index where your videos should start (Leave this at one for json and folder based training).\n",
+    "  sample_start_idx: 1\n",
+    "\n",
+    "  # Used for 'folder'. The rate at which your frames are sampled. Does nothing for 'json' and 'single_video' dataset.\n",
+    "  fps: 16\n",
+    "\n",
+    "  # For 'single_video' and 'json'. The number of frames to \"step\" (1,2,3,4) (frame_step=2) -> (1,3,5,7, ...).\n",
+    "  frame_step: 1\n",
+    "\n",
+    "  # The number of frames to sample. The higher this number, the higher the VRAM (acts similar to batch size).\n",
+    "  n_sample_frames: 24\n",
+    "\n",
+    "  # The prompt when using a a single video file\n",
+    "  # fallback_prompt: \"A person is riding a bicycle.\"\n",
+    "\n",
+    "# Validation data parameters.\n",
+    "validation_data:\n",
+    "  # A custom prompt that is different from your training dataset.\n",
+    "  prompt:\n",
+    "  - \"Studio Ghibli style. The video showcases a vibrant and lively scene set in the early.\"\n",
+    "  - \"Studio Ghibli style. A woman with black hair is holding a gun in her hand.\"\n",
+    "\n",
+    "  # Whether or not to sample preview during training (Requires more VRAM).\n",
+    "  # sample_preview: True\n",
+    "  sample_preview: False\n",
+    "\n",
+    "  # The number of frames to sample during validation.\n",
+    "  num_frames: 24\n",
+    "\n",
+    "  # Height and width of validation sample.\n",
+    "  width: 384\n",
+    "  height: 384\n",
+    "\n",
+    "  # Number of inference steps when generating the video.\n",
+    "  num_inference_steps: 15\n",
+    "\n",
+    "  # CFG scale\n",
+    "  guidance_scale: 12\n",
+    "\n",
+    "  # scale of spatial LoRAs, default is 0\n",
+    "  spatial_scale: 0\n",
+    "\n",
+    "  # scale of noise prior, i.e. the scale of inversion noises\n",
+    "  noise_prior: 0\n",
+    "\n",
+    "use_offset_noise: False\n",
+    "offset_noise_strength: 0.\n",
+    "\n",
+    "# Learning rate for AdamW\n",
+    "learning_rate: 5e-4\n",
+    "\n",
+    "# Weight decay. Higher = more regularization. Lower = closer to dataset.\n",
+    "adam_weight_decay: 1e-4\n",
+    "\n",
+    "# Maximum number of train steps. Model is saved after training.\n",
+    "max_train_steps: 5000\n",
+    "\n",
+    "# Saves a model every nth step.\n",
+    "checkpointing_steps: 5000\n",
+    "\n",
+    "# How many steps to do for validation if sample_preview is enabled.\n",
+    "validation_steps: 5000\n",
+    "\n",
+    "# Whether or not we want to use mixed precision with accelerate\n",
+    "mixed_precision: \"fp16\"\n",
+    "# mixed_precision: \"no\"\n",
+    "\n",
+    "# Trades VRAM usage for speed. You lose roughly 20% of training speed, but save a lot of VRAM.\n",
+    "# If you need to save more VRAM, it can also be enabled for the text encoder, but reduces speed x2.\n",
+    "gradient_checkpointing: True\n",
+    "text_encoder_gradient_checkpointing: True\n",
+    "\n",
+    "# Xformers must be installed for best memory savings and performance (< Pytorch 2.0)\n",
+    "enable_xformers_memory_efficient_attention: True\n",
+    "use_8bit_adam: True\n",
+    "\n",
+    "# Use scaled dot product attention (Only available with >= Torch 2.0)\n",
+    "enable_torch_2_attn: True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-07-16T05:36:02.733520Z",
+     "iopub.status.busy": "2025-07-16T05:36:02.732856Z",
+     "iopub.status.idle": "2025-07-16T16:01:06.692095Z",
+     "shell.execute_reply": "2025-07-16T16:01:06.688451Z",
+     "shell.execute_reply.started": "2025-07-16T05:36:02.733496Z"
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2025-07-16 05:36:13.391674: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+      "E0000 00:00:1752644173.574411     316 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "E0000 00:00:1752644173.625685     316 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "Initializing the conversion map\n",
+      "{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
+      "An error occurred while trying to fetch ./ckpts/zeroscope_v2_576w: Error no file named diffusion_pytorch_model.safetensors found in directory ./ckpts/zeroscope_v2_576w.\n",
+      "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n",
+      "{'latents_mean', 'use_post_quant_conv', 'mid_block_add_attention', 'force_upcast', 'use_quant_conv', 'shift_factor', 'latents_std'} was not found in config. Values will be initialized to default values.\n",
+      "All model checkpoint weights were used when initializing AutoencoderKL.\n",
+      "\n",
+      "All the weights of AutoencoderKL were initialized from the model checkpoint at ./ckpts/zeroscope_v2_576w.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use AutoencoderKL for predictions without further training.\n",
+      "An error occurred while trying to fetch ./ckpts/zeroscope_v2_576w: Error no file named diffusion_pytorch_model.safetensors found in directory ./ckpts/zeroscope_v2_576w.\n",
+      "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n",
+      "All model checkpoint weights were used when initializing UNet3DConditionModel.\n",
+      "\n",
+      "All the weights of UNet3DConditionModel were initialized from the model checkpoint at ./ckpts/zeroscope_v2_576w.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use UNet3DConditionModel for predictions without further training.\n",
+      "Could not enable memory efficient attention for xformers or Torch 2.0.\n",
+      "Loading pipeline components...:   0%|                     | 0/5 [00:00<?, ?it/s]Loaded text_encoder as CLIPTextModel from `text_encoder` subfolder of ./ckpts/zeroscope_v2_576w.\n",
+      "Loading pipeline components...:  40%|█████▏       | 2/5 [00:00<00:01,  2.52it/s]{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
+      "Loaded scheduler as DDIMScheduler from `scheduler` subfolder of ./ckpts/zeroscope_v2_576w.\n",
+      "Loaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of ./ckpts/zeroscope_v2_576w.\n",
+      "Loading pipeline components...: 100%|█████████████| 5/5 [00:00<00:00,  5.76it/s]\n",
+      "Expected types for unet: (<class 'diffusers.models.unets.unet_3d_condition.UNet3DConditionModel'>,), got <class 'models.unet_3d_condition.UNet3DConditionModel'>.\n",
+      "The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after Diffusers version 0.33.1. \n",
+      "Caching Latents.: 100%|███████████████████████| 240/240 [09:41<00:00,  2.42s/it]\n",
+      "Lora successfully injected into UNet3DConditionModel.\n",
+      "Lora successfully injected into UNet3DConditionModel.\n",
+      "unet._set_gradient_checkpointing(unet_enable)\n",
+      "Steps:   0%|                                           | 0/5000 [00:00<?, ?it/s]1942 params have been unfrozen for training.\n",
+      "/usr/local/lib/python3.11/dist-packages/diffusers/models/transformers/transformer_2d.py:35: FutureWarning: `Transformer2DModelOutput` is deprecated and will be removed in version 1.0.0. Importing `Transformer2DModelOutput` from `diffusers.models.transformer_2d` is deprecated and this will be removed in a future version. Please use `from diffusers.models.modeling_outputs import Transformer2DModelOutput`, instead.\n",
+      "  deprecate(\"Transformer2DModelOutput\", \"1.0.0\", deprecation_message)\n",
+      "Steps: 100%|█████████████████████████████| 5000/5000 [10:14:13<00:00,  7.28s/it][2025-07-16 16:00:44,146] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
+      "[2025-07-16 16:00:46,892] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n",
+      "\n",
+      "Loading pipeline components...:   0%|                     | 0/5 [00:00<?, ?it/s]\u001b[A{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
+      "Loaded scheduler as DDIMScheduler from `scheduler` subfolder of ./ckpts/zeroscope_v2_576w.\n",
+      "Loaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of ./ckpts/zeroscope_v2_576w.\n",
+      "Loading pipeline components...: 100%|█████████████| 5/5 [00:00<00:00, 50.50it/s]\n",
+      "Expected types for unet: (<class 'diffusers.models.unets.unet_3d_condition.UNet3DConditionModel'>,), got <class 'models.unet_3d_condition.UNet3DConditionModel'>.\n",
+      "The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after Diffusers version 0.33.1. \n",
+      "Configuration saved in ./zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-16T05-36-26/checkpoint-5000/vae/config.json\n",
+      "Model weights saved in ./zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-16T05-36-26/checkpoint-5000/vae/diffusion_pytorch_model.safetensors\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Train\n",
+    "!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n",
+    "!python main_train.py --config ./configs/config_multi_videos.yaml"
+   ]
+  }
+ ],
+ "metadata": {
+  "kaggle": {
+   "accelerator": "gpu",
+   "dataSources": [],
+   "dockerImageVersionId": 31090,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

requirements/requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+accelerate>=1.7.0
+deepspeed
+diffusers>=0.33.0
+huggingface-hub
+git+https://github.com/cloneofsimo/lora.git
+git+https://github.com/microsoft/LoRA
+loralib
+numpy==1.26.4
+tqdm
+einops
+imageio
+imageio-ffmpeg
+torch>=2.6.0
+torchvision>=0.21.0
+torchaudio
+transformers>=4.51.3
+decord
+safetensors
+omegaconf
+opencv-python
+pydantic
+triton
+compel
+peft>=0.15.0
+pytorch_lightning>=2.5.0
+bitsandbytes

requirements/requirements_compatible.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+deepspeed==0.17.3
+diffusers==0.34.0
+huggingface_hub==0.34.1
+loralib==0.1.2
+numpy==1.26.4
+tqdm==4.67.1
+einops==0.8.1
+imageio==2.37.0
+imageio_ffmpeg==0.6.0
+torch==2.6.0
+torchvision==0.21.0
+torchaudio==2.6.0
+transformers==4.54.0
+decord==0.6.0
+safetensors==0.5.3
+omegaconf==2.3.0
+cv2==4.11.0
+pydantic==2.11.7
+triton==3.2.0
+compel==2.1.1
+peft==0.16.0
+pytorch_lightning==2.5.2
+bitsandbytes==0.46.1

scripts/download_ckpts.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from huggingface_hub import HfApi, snapshot_download
+import os
+import torch
+import argparse
+def download_checkpoint(repo_id, save_path, repo_type="model"):
+    """
+    Download a model checkpoint from Hugging Face Hub to the specified local directory.
+    Args:
+        repo_id (str): The repository ID on Hugging Face Hub
+        save_path (str): Local directory path to save the checkpoint
+        repo_type (str): Type of repository (default: "model")
+    """
+    # Initialize Hugging Face API
+    api = HfApi()
+    # Create the directory if it doesn't exist
+    os.makedirs(save_path, exist_ok=True)
+    # Download the checkpoint
+    print(f"Downloading {repo_id} to {save_path}...")
+    snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=save_path)
+    print(f"Successfully downloaded {repo_id}")
+def main(args):
+    # Define checkpoint configurations
+    checkpoints = [
+        {
+            "repo_id": args.repo_id,
+            "save_path": args.save_path,
+            "repo_type": args.repo_type
+        }
+    ]
+    # Add LoRA checkpoint if provided
+    if args.lora_repo_id and args.lora_save_path:
+        checkpoints.append({
+            "repo_id": args.lora_repo_id,
+            "save_path": args.lora_save_path,
+            "repo_type": args.lora_repo_type
+        })
+    # Download each checkpoint
+    for checkpoint in checkpoints:
+        download_checkpoint(
+            repo_id=checkpoint["repo_id"],
+            save_path=checkpoint["save_path"],
+            repo_type=checkpoint["repo_type"]
+        )
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Download model checkpoints from Hugging Face Hub")
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        default="cerspense/zeroscope_v2_576w",
+        help="Hugging Face repository ID for the checkpoint"
+    )
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        default="./ckpts/zeroscope_v2_576w",
+        help="Local directory to save the checkpoint"
+    )
+    parser.add_argument(
+        "--repo_type",
+        type=str,
+        default="model",
+        help="Type of repository (e.g., model, dataset)"
+    )
+    parser.add_argument(
+        "--lora_repo_id",
+        type=str,
+        default="danhtran2mind/zeroscope_v2_576w-Ghibli-LoRA",
+        help="Hugging Face repository ID for the LoRA checkpoint"
+    )
+    parser.add_argument(
+        "--lora_save_path",
+        type=str,
+        default="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
+        help="Local directory to save the LoRA checkpoint"
+    )
+    parser.add_argument(
+        "--lora_repo_type",
+        type=str,
+        default="model",
+        help="Type of repository for the LoRA checkpoint (e.g., model, dataset)"
+    )
+    # Parse arguments
+    args = parser.parse_args()
+    # Call main with parsed arguments
+    main(args)

scripts/process_dataset.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import shutil
+import random
+import argparse
+from huggingface_hub import snapshot_download
+import zipfile
+def copy_file_pairs(source_dir, dest_dir, max_pairs=20, seed=None):
+    if seed is not None:
+        random.seed(seed)
+    os.makedirs(dest_dir, exist_ok=True)
+    mp4_files = [f for f in os.listdir(source_dir) if f.endswith('.mp4')]
+    selected_mp4_files = random.sample(mp4_files, min(len(mp4_files), max_pairs))
+    for mp4 in selected_mp4_files:
+        base = os.path.splitext(mp4)[0]
+        txt = f"{base}.txt"
+        if os.path.exists(os.path.join(source_dir, txt)):
+            shutil.copy2(os.path.join(source_dir, mp4), os.path.join(dest_dir, mp4))
+            shutil.copy2(os.path.join(source_dir, txt), os.path.join(dest_dir, txt))
+    return len(selected_mp4_files)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process Studio Ghibli dataset by downloading, extracting, and copying file pairs.")
+    parser.add_argument("--source_dir", default="data/ghibli/raw/videos/1920x1040", help="Source directory containing video and text files")
+    parser.add_argument("--dest_dir", default="data/ghibli/videos", help="Destination directory for copied file pairs")
+    parser.add_argument("--max_pairs", type=int, default=240, help="Maximum number of file pairs to copy")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
+    parser.add_argument("--repo_id", default="raymondt/ghibi_t2v", help="Hugging Face dataset repository ID")
+    parser.add_argument("--local_dir", default="data/ghibli/raw", help="Local directory to download the dataset")
+    parser.add_argument("--zip_path", default="data/ghibli/raw/studio_ghibli_wan14b_t2v_v01_dataset.zip", help="Path to the downloaded zip file")
+    args = parser.parse_args()
+    # Create directory if it doesn't exist
+    os.makedirs(args.local_dir, exist_ok=True)
+    # Download the dataset using snapshot_download
+    snapshot_download(repo_id=args.repo_id,
+                     local_dir=args.local_dir,
+                     repo_type="dataset")
+    # Unzip the dataset
+    with zipfile.ZipFile(args.zip_path, 'r') as zip_ref:
+        zip_ref.extractall(args.local_dir)
+    # Copy file pairs
+    copied = copy_file_pairs(args.source_dir, args.dest_dir, max_pairs=args.max_pairs, seed=args.seed)
+    print(f"Copied {copied} pairs to {args.dest_dir}")

scripts/setup_third_party.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import subprocess
+import argparse
+import sys
+def clone_repository(repo_url, target_dir, branch="main"):
+    """Clone a git repository to the specified directory with specific branch."""
+    if os.path.exists(target_dir):
+        print(f"Directory {target_dir} already exists. Skipping clone.")
+        return
+    os.makedirs(os.path.dirname(target_dir), exist_ok=True)
+    try:
+        subprocess.run(
+            ["git", "clone", "-b", branch, repo_url, target_dir],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+        print(f"Successfully cloned {repo_url} (branch: {branch}) to {target_dir}")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to clone repository: {e.stderr}")
+        sys.exit(1)
+def main(motiondirector_url="https://github.com/danhtran2mind/MotionDirector", branch="main"):
+    # Define target directory
+    target_dir = os.path.join("src", "third_party", "MotionDirector")
+    # Clone MotionDirector repository
+    clone_repository(motiondirector_url, target_dir, branch)
+if __name__ == "__main__":
+    # Set arguments directly
+    main(
+        motiondirector_url="https://github.com/danhtran2mind/MotionDirector",
+        branch="main"
+    )

src/text2video_ghibli_style/inference.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import sys
+import os
+import subprocess
+from pathlib import Path
+import uuid
+import torch
+# Append the current directory to sys.path
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+def run_inference(
+    model_path="./ckpts/zeroscope_v2_576w",
+    checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
+    prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
+    negative_prompt="ugly, noise, fragment, blur, static video",
+    width=256,
+    height=256,
+    num_frames=8,
+    num_steps=30,
+    guidance_scale=30.0,
+    fps=8,
+    lora_rank=32,
+    lora_scale=0.7,
+    noise_prior=0.1,
+    device="cuda",
+    seed=100
+):
+    print("Start Inference")
+    output_dir = "apps/gradio_app/temp_data"
+    os.makedirs(output_dir, exist_ok=True)
+    # Get list of files in output_dir
+    for file_name in os.listdir(output_dir):
+        # Check if file ends with .mp4
+        if file_name.endswith(".mp4"):
+            # Remove the file
+            os.remove(os.path.join(output_dir, file_name))
+    command = [
+        "python", "src/third_party/MotionDirector/main_inference.py",
+        "--model", model_path,
+        "--checkpoint_folder", checkpoint_folder,
+        "--prompt", prompt,
+        "--negative-prompt", negative_prompt,
+        "--width", str(width),
+        "--height", str(height),
+        "--num-frames", str(num_frames),
+        "--num-steps", str(num_steps),
+        "--guidance-scale", str(guidance_scale),
+        "--fps", str(fps),
+        "--lora_rank", str(lora_rank),
+        "--lora_scale", str(lora_scale),
+        "--noise_prior", str(noise_prior),
+        "--device", device,
+        "--seed", str(seed),
+        "--output_dir", output_dir,
+        # "--no-prompt-name"
+    ]
+    # Use Popen to execute the command
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        bufsize=1  # Line buffering
+    )
+    # Read output line-by-line in real-time
+    output_lines = []
+    try:
+        for line in process.stdout:
+            output_lines.append(line.strip())
+    except Exception as e:
+        return None, f"Error reading output: {str(e)}"
+    # Capture stderr and wait for process to complete
+    stderr_output = process.communicate()[1]
+    if process.returncode != 0:
+        return None, f"Error: {stderr_output.strip()}"
+    # Check for MP4 files in output directory
+    output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
+    if output_file:
+        output_path = os.path.join(output_dir, output_file[-1])
+        if os.path.exists(output_path):
+            return output_path, "\n".join(output_lines)
+        else:
+            return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
+    return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
+if __name__ == "__main__":
+    # Example usage
+    video_path, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
+    print(f"Generated Video: {video_path}")
+    print(f"Logs: {logs}")

src/text2video_ghibli_style/train.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import subprocess
+import os
+import sys
+import argparse
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'MotionDirector')))
+def run_training(config_path, pytorch_cuda_alloc_conf="expandable_segments:True"):
+    # Set the environment variable
+    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = pytorch_cuda_alloc_conf
+    # Command to execute
+    command = ["python", "src/third_party/MotionDirector/main_train.py", "--config", config_path]
+    try:
+        # Run the command using subprocess.Popen
+        process = subprocess.Popen(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            env=os.environ.copy()
+        )
+        # Stream output in real-time
+        while True:
+            output = process.stdout.readline()
+            if output == '' and process.poll() is not None:
+                break
+            if output:
+                print(output.strip())
+        # Get any remaining output and errors
+        stdout, stderr = process.communicate()
+        # Print any errors
+        if stderr:
+            print("Errors:", stderr)
+        # Check the return code
+        if process.returncode == 0:
+            print("Training completed successfully")
+        else:
+            print(f"Training failed with return code: {process.returncode}")
+    except subprocess.SubprocessError as e:
+        print(f"Error running training: {e}")
+    except FileNotFoundError:
+        print("Error: main_train.py or config file not found")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Run training script with specified config")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="./configs/config_multi_videos.yaml",
+        help="Path to the config file"
+    )
+    parser.add_argument(
+        "--pytorch-cuda-alloc",
+        type=str,
+        default="expandable_segments:True",
+        help="Value for PYTORCH_CUDA_ALLOC_CONF environment variable"
+    )
+    # Parse arguments
+    args = parser.parse_args()
+    # Run training with provided arguments
+    run_training(args.config, args.pytorch_cuda_alloc)

src/third_party/.gitkeep ADDED Viewed

File without changes